def pred_multiple_prod(testdf, model, path_vec, k=1): """Predicts the products of specific chemical reactions. Input is reactant SMILES strings. The default predicted consequence is one, you can change the value of k to get more probable forecasted results. Args: """ a = [] b = [] c = [] for i in range(len(testdf)): smi = testdf['Reactants'][i] std = transform_r2p_str(smi, model, path_vec, k) c.append(std) [a.append(std[i]) for i in range(len(std))] for j in range(len(std)): col = 'Product' b.append(col) out = pd.DataFrame(data=c, columns=b) out.insert( 0, 'Reactants', testdf['Reactants'].values, ) df = struc2mol(pd.DataFrame(data=a, columns=['smiles'])) display( PandasTools.FrameToGridImage(df, column='mol', legendsCol='smiles', molsPerRow=5)) return out
def output_single_prod(smi, model, path_vec, k=15): """A function used to predict the product of a specific chemical reactions with the input of reactant smiles string. When using beamsearch, the value of k is 15. """ a = ['Reactant', 'Product'] b = [] c = [smi] std = transform_r2p_str(smi, model, path_vec, k) for j in range(len(std)): if std[j] == smi.upper( ): # still need some more work, not applied to all reactions prd = std[j] break elif smi.replace('#', '') == std[j]: prd = std[j] break else: prd = std[14] b.append(prd) c.append(prd) out = pd.DataFrame(data=b, columns=['Product']) out.insert(0, 'Reactant', smi) df = struc2mol(pd.DataFrame(data=c, columns=['smiles'])) df.insert(3, 'legend', a) display( PandasTools.FrameToGridImage(df, column='mol', legendsCol='legend', molsPerRow=5)) return out
def grid_image( df, filename, molobj=True, smi='smiles'): # list of molecules to print and substructre to align """ Creates and saves grid image of 2D drawings of molecules. Accepts dataframe containing a column titled "Molecule" that contains RDKit molecule objects. Accepts filename as string (without .png) for image file. Returns nothing, saves file in current directory. _____________________________ Keyword Arguments: molobj=True, if RDKit MolObj column exists in df. (Must be headed "Molecule") smi='smiles', if molojb=False then use column titled smi to create MolObj column. """ if not molobj: # no molobj exists PandasTools.AddMoleculeColumnToFrame(df, smi, 'Molecule', includeFingerprints=True) # this code makes multiple images of n molecules. May be prefered for large sets of molecules. # create images of molecules in dataframe mol_image = PandasTools.FrameToGridImage( df, column='Molecule', molsPerRow=3, subImgSize=(800, 400), legends=[str(i + 1) for i in range(len(df['Molecule']))]) mol_image.save( filename + '.png') # shold use a better naming scheme to avoid overwrites.
def run(self, k=15): #self.clear smi = self.sms model = load_model() # path_vec = load_path_vector() path_vec = self.path_vec a = ['Reactant', 'Product'] b = [] c = [smi] std = tranform(smi, model, path_vec, k) for j in range(len(std)): if std[j] == smi.upper(): prd = std[j] break elif smi.replace('#', '') == std[j]: prd = std[j] break else: prd = std[14] b.append(prd) c.append(prd) out = pd.DataFrame(data=b, columns=['Product']) out.insert(0, 'Reactant', smi) df = struc2mol(pd.DataFrame(data=c, columns=['smiles'])) df.insert(3, 'legend', a) out = (PandasTools.FrameToGridImage(df, column='mol', legendsCol='smiles', molsPerRow=2)) a = np.array(out) scipy.misc.imsave('outfile.png', a) figname = 'outfile.png' #time.sleep(3) self._signal.emit(str(figname))
def test_FrameToGridImage(self): # This test only makes sure that we get no exception. To see the created images, set # interactive to True interactive = False self.assertTrue(True) df = self.df result = PandasTools.FrameToGridImage(df) if interactive: result.show() result = PandasTools.FrameToGridImage( df, legendsCol='PUBCHEM_IUPAC_INCHIKEY') if interactive: result.show() result = PandasTools.FrameToGridImage(df, legendsCol=df.index.name) if interactive: result.show()
def test_force_Kekulize(): df = pd.read_csv('All_Moles_Tested_Data.csv') i= 0 mol_list = [] for smile in df['smiles']: mol = MolFromSmiles(smile) x = find_custom_Kekulize_set(smile, max_atoms= 60, max_degree= 5,printMe = False) for index in x: mol.GetAtomWithIdx(index).SetAtomicNum(32) mol_list.append(mol) df['mol'] = pd.DataFrame({'mol':mol_list}) unit = 5 for i in range(0,len(df)//unit): display(PandasTools.FrameToGridImage(df.iloc[i*unit:i*unit+unit],column='mol', legendsCol='',molsPerRow=unit)) if((len(df)%unit>0)*1): display(PandasTools.FrameToGridImage(df.iloc[len(df)//unit*unit:len(df)],column='mol', legendsCol='',molsPerRow=unit))
def output_grid(df, imagename): #use PandasTools.FrameToGridImage template = Chem.MolFromSmarts(smartsq) AllChem.Compute2DCoords(template) df['mol_strip'].apply( lambda x: AllChem.GenerateDepictionMatching2DStructure(x, template)) df['matches'] = df['mol_strip'].apply( lambda x: x.GetSubstructMatch(template)) frameimage = PandasTools.FrameToGridImage(df, column='mol_strip', legendsCol='molregno', molsPerRow=10, highlightAtomLists=list( df['matches'])) frameimage.save(imagename)
def pred_single_prod(smi, model, path_vec, k=1): """A function used to predict the product of a specific chemical reactions with the input of reactant smiles string. The default predicted consequence is one, you can change the value of k to get more probable forecasted results. """ c = [] b = [] std = transform_r2p_str(smi, model, path_vec, k) c.append(std) for j in range(len(std)): col = 'Product' b.append(col) out = pd.DataFrame(data=c, columns=b) out.insert(0, 'Reactant', smi) df = struc2mol(pd.DataFrame(data=std, columns=['smiles'])) display( PandasTools.FrameToGridImage(df, column='mol', legendsCol='smiles', molsPerRow=5)) return out
def output_multiple_prod(testdf, model, path_vec, k=15): """A function used to output the product of many specific chemical reactions with the input of reactant smiles strings. The default value for k is 15. """ a = [] b = [] c = [] for i in range(len(testdf)): smi = testdf['Reactants'][i] a.append('Reactant') c.append(smi) std = transform_r2p_str(smi, model, path_vec, k) for j in range(len(std)): if std[j] == smi.upper(): prd = std[j] break elif smi.replace('#', '') == std[j]: prd = std[j] break else: prd = std[14] a.append('Product') c.append(prd) b.append(prd) out = pd.DataFrame(data=b, columns=['Products']) out.insert( 0, 'Reactants', testdf['Reactants'].values, ) df = struc2mol(pd.DataFrame(data=c, columns=['smiles'])) df.insert(3, 'legend', a) display( PandasTools.FrameToGridImage(df, column='mol', legendsCol='legend', molsPerRow=2)) return out
def test_github2380(self): from rdkit.Chem.Draw import IPythonConsole IPythonConsole.ipython_useSVG = True df = PandasTools.LoadSDF(getStreamIO(methane + peroxide)) _ = PandasTools.FrameToGridImage(df)
'SUBSTANCE_SYNONYM', 'SUBSTANCE_COMMENT', 'SUBSTANCE_URL', 'INCHIKEY_1.05_CHEMAXON_19.27.0' ]] # Display table # thesis_df # fixes mol display in dataframes (RDKit Issue# 2673) from IPython.display import HTML HTML(thesis_df.head(len(thesis_df.index)).to_html()) # In[7]: # we can also display just the molecules like this: PandasTools.FrameToGridImage(thesis_df, column='RDMol', molsPerRow=2, subImgSize=(400, 400), legendsCol="DATASOURCE_REGID") # In[8]: # Now we need to caluclate the InChIs from RDKit and add to thesis_df # These are InChI 1.05 as computed by RDKit 2019.09.2 release. inchi_list = [] for mol in thesis_df['RDMol']: inchi = Chem.MolToInchi(mol) inchi_list.append(inchi) # add to dataframe thesis_df['INCHI_1.05_RDKIT_2019.09.2'] = inchi_list