def apply_fp(self, data): if self.name=='PubChem' or self.name=="FP4": mols = list(data['SMILES']) elif self.name=='3D pharmacophore': mols = list(data['Conformer']) else: mols = list(data['Mol']) i=0 for mol in mols: try: fp = self.fp_fun(mol) except: print(data['SMILES'][i] + ' '+data['Inchi-Key'][i]) MolToFile(mol, 'd.png') if isinstance(fp, tuple): fp = np.array(list(fp[0])) if isinstance(fp, rdkit.DataStructs.cDataStructs.ExplicitBitVect): fp = BitVect_to_NumpyArray(fp) elif self.name=='PubChem' or self.name=='FP4': fp = IntListToBitArray(fp) else: fp = np.array(list(fp)) self.x += [fp] if (str(type(self.x[0])) != "<class 'numpy.ndarray'>"): print("WARNING: type for ", self.name, "is ", type(self.x[0])) i=i+1
def generate_png(mol, pngpath, logfile=devnull, size=300): with stdout_redirected(to=sys.stdout, stdout=sys.stderr): with stdout_redirected(to=logfile, stdout=sys.stdout): nhmol = RemoveHs(mol, implicitOnly=False, updateExplicitCount=True, sanitize=False) SanitizeMol(nhmol, catchErrors=True) op = DrawingOptions() op.atomLabelFontSize = size / 25 MolToFile(PrepareMolForDrawing(nhmol,forceCoords=True,addChiralHs=True),\ pngpath,fitImage=True,size=(size, size),options=op)
def make_structures_for_smiles(ligand_dict): """ Make structure figures from smile strings. All image files will be in the IMG_DIR :param ligand_dict: ligand dictionary which keep all ligand information :return: """ relative_dir = IMG_DIR for key in ligand_dict: smile = ligand_dict[key][SMILE_COLUMNNAME] filename = ligand_dict[key]["orig_id"] mol = Chem.MolFromSmiles(smile) MolToFile(mol, os.path.join(relative_dir, '{}.svg'.format(filename)))
def model_process_fun(model_out, visdom, n): # TODO: rephrase this to return a dict, instead of calling visdom directly from rdkit import Chem from rdkit.Chem.Draw import MolToFile # actions, logits, rewards, terminals, info = model_out smiles, valid = model_out['info'] total_rewards = model_out['rewards'] if len(total_rewards.shape) > 1: total_rewards = total_rewards.sum(1) best_ind = torch.argmax(total_rewards).data.item() this_smile = smiles[best_ind] mol = Chem.MolFromSmiles(this_smile) pic_save_path = os.path.realpath(root_location + '/images/' + 'tmp.svg') if mol is not None: try: MolToFile(mol, pic_save_path, imageType='svg') with open(pic_save_path, 'r') as myfile: data = myfile.read() data = data.replace('svg:', '') visdom.append('best molecule of batch', 'svg', svgstr=data) except Exception as e: print(e) scores, norm_scores = scorer.get_scores([this_smile]) visdom.append( 'score component', 'line', X=np.array([n]), Y=np.array([[x for x in norm_scores[0]] + [norm_scores[0].sum()] + [scores[0].sum()] + [desc.CalcNumAromaticRings(mol)]]), opts={ 'legend': [ 'logP', 'SA', 'cycle', 'norm_reward', 'reward', 'Aromatic rings' ] }) visdom.append('reward', 'line', X=np.array([n]), Y=np.array([total_rewards[best_ind].item()])) visdom.append('fraction valid', 'line', X=np.array([n]), Y=np.array([valid.mean().data.item()])) visdom.append('num atoms', 'line', X=np.array([n]), Y=np.array([len(mol.GetAtoms())]))
def assign_ids(self): """Assigns a numerical ID to compounds (and reactions) for ease of reference. Unique only to the CURRENT run.""" # If we were running a multiprocess expansion, this removes the dicts # from Manager control self.compounds = dict(self.compounds) self.reactions = dict(self.reactions) i = 1 for comp in sorted(self.compounds.values(), key=lambda x: (x['Generation'], x['_id'])): # Create ID of form ####### ending with i, padded with zeroes to # fill unused spots to the left with zfill (e.g. ID = '0003721' if # i = 3721). if not comp['ID']: comp['ID'] = 'pkc' + str(i).zfill(7) i += 1 self.compounds[comp['_id']] = comp # If we are not loading into the mine, we generate the image # here. if self.image_dir and not self.mine: mol = AllChem.MolFromSmiles(comp['SMILES']) try: MolToFile(mol, os.path.join(self.image_dir, comp['ID'] + '.png'), fitImage=True, kekulize=False) except OSError: print("Unable to generate image for %s" % comp['SMILES']) i = 1 for rxn in sorted(self.reactions.values(), key=lambda x: (x['Generation'], x['_id'])): rxn['ID_rxn'] = ' + '.join(['(%s) %s[c0]' % (x.stoich, self.compounds[x.c_id]["ID"]) for x in rxn["Reactants"]]) \ + ' => ' + \ ' + '.join(['(%s) %s[c0]' % (x.stoich, self.compounds[x.c_id]["ID"]) for x in rxn["Products"]]) # Create ID of form ####### ending with i, padded with zeroes to # fill unused spots to the left with zfill (e.g. ID = '0003721' if # i = 3721). rxn['ID'] = 'pkr' + str(i).zfill(7) i += 1 self.reactions[rxn['_id']] = rxn
def testMakePNG(self): # smoke test only--that it doesn't fail, not that it looks correct (that's outside the scope of this package) # The choices shown resulted (at last check) in 3 fragments, one of which has a branch try: silent_remove(TEST_PNG) result = create_sample_kmc_result(num_initial_monos=24, max_monos=24, seed=1, max_time=SHORT_TIME) summary = analyze_adj_matrix(result[ADJ_MATRIX]) adj_analysis_to_stdout(summary, break_co_bonds=False) nodes = result[MONO_LIST] adj = result[ADJ_MATRIX] block = generate_mol(adj, nodes) mol = MolFromMolBlock(block) Compute2DCoords(mol) MolToFile(mol, TEST_PNG, size=(2000, 1200)) self.assertTrue(os.path.isfile(TEST_PNG)) finally: silent_remove(TEST_PNG, disable=DISABLE_REMOVE) pass
def make_image_grid(file_label, smi_list, labels=None, out_dir=PNG_DIR, mol_img_size=(400, 300), write_output=True): """ Given a molecular formula (or other label) and the set of SMI, make an image grid of all smiles within https://www.rdkit.org/docs/GettingStartedInPython.html :param file_label: str, such as chemical formula that corresponds to all smiles in SMILES set :param smi_list: list or set of SMILES strings; used to generate images :param labels: if None, will use the smi_list as labels; otherwise a list to use :param out_dir: directory where the file should be saved :param mol_img_size: tuple of ints to determine size of individual molecules :param write_output: boolean to determine whether to write to screen that a file was created :return: N/A, save a file """ mols = [] for smi in smi_list: mol = Chem.MolFromSmiles(smi) Compute2DCoords(mol) mols.append(mol) if labels: img_labels = labels else: img_labels = smi_list if len(mols) == 1: # didn't see a way for RDKit to add a label to an image with a single molecule (grid image does not work # for one image), so add to file name file_label += '_' + img_labels[0] fname = create_out_fname(file_label, ext='png', base_dir=out_dir) if len(mols) == 1: MolToFile(mols[0], fname, size=mol_img_size) else: img_grid = MolsToGridImage(mols, molsPerRow=3, subImgSize=mol_img_size, legends=img_labels) img_grid.save(fname) if write_output: print(f"Wrote file: {os.path.relpath(fname)}")
def generate_comp_svgs(self): """Create SVG images of the compounds. The images are inserted directly into the images directory of the frontend. Notes: It is expected that the user has installed an svg capable renderer for rdkit. See http://www.rdkit.org for details. Returns: None """ structure_path = os.path.join(os.path.dirname(__file__), '../../frontend/app/data/images') self.compounds.apply(lambda r: MolToFile( r.structure, os.path.join(structure_path, '{}-{}.svg'.format(r.name, r['name'])) ), axis=1)
def produce_output(adj_matrix, mono_list, cfg): if cfg[SUPPRESS_SMI] and not (cfg[SAVE_JSON] or cfg[SAVE_PNG] or cfg[SAVE_SVG]): format_list = [SAVE_TCL] mol = None # Make IDE happy else: # Default out is SMILES, which requires getting an rdKit molecule object; also required for everything # except the TCL format format_list = [SAVE_TCL, SAVE_JSON, SAVE_PNG, SAVE_SVG] block = generate_mol(adj_matrix, mono_list) mol = MolFromMolBlock(block) try: smi_str = MolToSmiles(mol) + '\n' except: raise InvalidDataError("Error in producing SMILES string.") # if SMI is to be saved, don't output to stdout if cfg[SAVE_SMI]: fname = create_out_fname(cfg[BASENAME], base_dir=cfg[OUT_DIR], ext=SAVE_SMI) str_to_file(smi_str, fname, print_info=True) else: print("\nSMILES representation: \n", MolToSmiles(mol), "\n") if cfg[SAVE_PNG] or cfg[SAVE_SVG] or cfg[SAVE_JSON]: # PNG and SVG make 2D images and thus need coordinates # JSON will save coordinates--zero's if not computed; might as well compute and save non-zero values Compute2DCoords(mol) for save_format in format_list: if cfg[save_format]: fname = create_out_fname(cfg[BASENAME], base_dir=cfg[OUT_DIR], ext=save_format) if save_format == SAVE_TCL: gen_tcl(adj_matrix, mono_list, tcl_fname=fname, chain_id=cfg[CHAIN_ID], psf_fname=cfg[PSF_FNAME], toppar_dir=cfg[TOPPAR_DIR], out_dir=cfg[OUT_DIR]) if save_format == SAVE_JSON: json_str = MolToJSON(mol) str_to_file(json_str + '\n', fname) elif save_format == SAVE_PNG or save_format == SAVE_SVG: MolToFile(mol, fname, size=cfg[IMAGE_SIZE]) print(f"Wrote file: {fname}")
def model_process_fun(model_out, visdom, n): from rdkit import Chem from rdkit.Chem.Draw import MolToFile actions, logits, rewards, terminals, info = model_out smiles, valid = info total_rewards = rewards.sum(1) best_ind = torch.argmax(total_rewards).data.item() this_smile = smiles[best_ind] mol = Chem.MolFromSmiles(this_smile) pic_save_path = root_location + 'images/' + 'test.svg' if mol is not None: try: MolToFile(mol, pic_save_path, imageType='svg') with open(pic_save_path, 'r') as myfile: data = myfile.read() data = data.replace('svg:', '') visdom.append('best molecule of batch', 'svg', svgstr=data) except: pass scores, norm_scores = scorer.get_scores([this_smile]) visdom.append( 'score component', 'line', X=np.array([n]), Y=np.array( [[x for x in norm_scores[0]] + [norm_scores[0].sum()] + [scores[0].sum()] + [desc.CalcNumAromaticRings(mol)]]), opts={ 'legend': [ 'logP', 'SA', 'cycle', 'norm_reward', 'reward', 'Aromatic rings' ] }) visdom.append('fraction valid', 'line', X=np.array([n]), Y=np.array([valid.mean().data.item()]))
def plot_molecule(molecule, file_name=None): """Plot a molecule using RDKit.""" rdkG = get_graph(molecule) rdkG = Chem.RemoveHs(rdkG) MolToFile(rdkG, file_name, size=(200, 200))
def weight_visulize_py(smiles, atom_weight,): print(smiles) atom_weight_list = atom_weight.squeeze().numpy().tolist() max_atom_weight_index = atom_weight_list.index(max(atom_weight_list)) significant_weight = atom_weight[max_atom_weight_index] mol = Chem.MolFromSmiles(smiles) norm = matplotlib.colors.Normalize(vmin=0, vmax=1) cmap = cm.get_cmap('Oranges') plt_colors = cm.ScalarMappable(norm=norm, cmap=cmap) atom_colors = {} bond_colors = {} # weight_norm = np.array(ind_weight).flatten() # threshold = weight_norm[np.argsort(weight_norm)[1]] # weight_norm = np.where(weight_norm < threshold, 0, weight_norm) atom_new_weight = [0 for x in range(mol.GetNumAtoms())] # generate most important significant circle fragment and attach significant weight atom = mol.GetAtomWithIdx(max_atom_weight_index) # # find neighbors 1 atom_neighbors_1 = [x.GetIdx() for x in atom.GetNeighbors()] # find neighbors 2 atom_neighbors_2 = [] for neighbors_1_index in atom_neighbors_1: neighbor_1_atom = mol.GetAtomWithIdx(neighbors_1_index) atom_neighbors_2 = atom_neighbors_2 + [x.GetIdx() for x in neighbor_1_atom.GetNeighbors()] atom_neighbors_2.remove(max_atom_weight_index) # find neighbors 3 atom_neighbors_3 = [] for neighbors_2_index in atom_neighbors_2: neighbor_2_atom = mol.GetAtomWithIdx(neighbors_2_index) atom_neighbors_3 = atom_neighbors_3 + [x.GetIdx() for x in neighbor_2_atom.GetNeighbors()] atom_neighbors_3 = [x for x in atom_neighbors_3 if x not in atom_neighbors_1] # attach neighbor 3 significant weight for i in atom_neighbors_3: atom_new_weight[i] = significant_weight*0.5 for i in atom_neighbors_2: atom_new_weight[i] = significant_weight for i in atom_neighbors_1: atom_new_weight[i] = significant_weight atom_new_weight[max_atom_weight_index] = significant_weight significant_fg_index = [max_atom_weight_index] + atom_neighbors_1 + atom_neighbors_2 + atom_neighbors_3 for i in range(mol.GetNumAtoms()): atom_colors[i] = plt_colors.to_rgba(float(atom_new_weight[i])) for i in range(mol.GetNumBonds()): bond = mol.GetBondWithIdx(i) u = bond.GetBeginAtomIdx() v = bond.GetEndAtomIdx() x = atom_new_weight[u] y = atom_new_weight[v] bond_weight = (x+y)/2 if u in significant_fg_index and v in significant_fg_index: bond_colors[i] = plt_colors.to_rgba(float(abs(bond_weight))) else: bond_colors[i] = plt_colors.to_rgba(float(abs(0))) rdDepictor.Compute2DCoords(mol) drawer = rdMolDraw2D.MolDraw2DSVG(280, 280) drawer.SetFontSize(1) op = drawer.drawOptions() mol = rdMolDraw2D.PrepareMolForDrawing(mol) smiles_name = eval(repr(smiles).replace('\\', '|')) MolToFile(mol, r'./CYP2D6/'+smiles_name+'.png', highlightAtoms=range(0, mol.GetNumAtoms()), highlightBonds=range(0, mol.GetNumBonds()), highlightAtomColors=atom_colors, highlightBondColors=bond_colors)
help="The molecule to be fragmented in MOL2 format.") parser.add_argument("-outfolder", dest="OUT_FOLDER", required=False, default="fragments", type=str, help="The folder name which will be created (or erased if it already exists) where the " "PNG images of the fragments will be saved.") parser.add_argument("-fpradius", dest="FP_RADIUS", required=False, default=2, type=int, help="The ECFP radius parameter value (distance in number of bonds). Default: %(default)s") args = parser.parse_args() return args if __name__ == "__main__": args = cmdlineparse() if args.SMILES: mol = Chem.MolFromSmiles(args.SMILES) if args.MOL2: mol = MolFromMol2File(args.MOL2, sanitize=False, removeHs=False) if os.path.exists(args.OUT_FOLDER): shutil.rmtree(args.OUT_FOLDER) os.mkdir(args.OUT_FOLDER) MolToFile(mol, "original_molecule.png") shutil.move("original_molecule.png", args.OUT_FOLDER + "/original_molecule.png") bi = {} fp = AllChem.GetMorganFingerprintAsBitVect(mol, radius=args.FP_RADIUS, bitInfo=bi) for k in bi.keys(): mfp2_svg = DrawMorganBit(mol, k, bi) mfp2_svg.save(fp="%s/%i_frag.png" % (args.OUT_FOLDER, k), format="PNG") # TODO: show all fragments in one figure # https://stackoverflow.com/questions/37365824/pandas-ipython-notebook-include-and-display-an-image-in-a-dataframe