Example #1
0
def pred_multiple_prod(testdf, model, path_vec, k=1):
    """Predicts the products of specific chemical reactions.
    Input is reactant SMILES strings.
    The default predicted consequence is one, you can change the value of k to get more probable forecasted results.

    Args:

    """
    a = []
    b = []
    c = []
    for i in range(len(testdf)):
        smi = testdf['Reactants'][i]
        std = transform_r2p_str(smi, model, path_vec, k)
        c.append(std)
        [a.append(std[i]) for i in range(len(std))]
    for j in range(len(std)):
        col = 'Product'
        b.append(col)
    out = pd.DataFrame(data=c, columns=b)
    out.insert(
        0,
        'Reactants',
        testdf['Reactants'].values,
    )
    df = struc2mol(pd.DataFrame(data=a, columns=['smiles']))
    display(
        PandasTools.FrameToGridImage(df,
                                     column='mol',
                                     legendsCol='smiles',
                                     molsPerRow=5))
    return out
Example #2
0
def output_single_prod(smi, model, path_vec, k=15):
    """A function used to predict the product of a specific chemical reactions with the input of reactant smiles string.
     When using beamsearch, the value of k is 15.
    """
    a = ['Reactant', 'Product']
    b = []
    c = [smi]
    std = transform_r2p_str(smi, model, path_vec, k)
    for j in range(len(std)):
        if std[j] == smi.upper(
        ):  # still need some more work, not applied to all reactions
            prd = std[j]
            break
        elif smi.replace('#', '') == std[j]:
            prd = std[j]
            break
        else:
            prd = std[14]
    b.append(prd)
    c.append(prd)
    out = pd.DataFrame(data=b, columns=['Product'])
    out.insert(0, 'Reactant', smi)
    df = struc2mol(pd.DataFrame(data=c, columns=['smiles']))
    df.insert(3, 'legend', a)
    display(
        PandasTools.FrameToGridImage(df,
                                     column='mol',
                                     legendsCol='legend',
                                     molsPerRow=5))
    return out
Example #3
0
def grid_image(
        df,
        filename,
        molobj=True,
        smi='smiles'):  # list of molecules to print and substructre to align
    """
    Creates and saves grid image of 2D drawings of molecules.
    Accepts dataframe containing a column titled "Molecule" that contains RDKit molecule objects.
    Accepts filename as string (without .png) for image file.
    Returns nothing, saves file in current directory.
    _____________________________
    Keyword Arguments:
    molobj=True, if RDKit MolObj column exists in df.  (Must be headed "Molecule")
    smi='smiles', if molojb=False then use column titled smi to create MolObj column.

     """

    if not molobj:  # no molobj exists
        PandasTools.AddMoleculeColumnToFrame(df,
                                             smi,
                                             'Molecule',
                                             includeFingerprints=True)

    # this code makes multiple images of n molecules.  May be prefered for large sets of molecules.

    # create images of molecules in dataframe
    mol_image = PandasTools.FrameToGridImage(
        df,
        column='Molecule',
        molsPerRow=3,
        subImgSize=(800, 400),
        legends=[str(i + 1) for i in range(len(df['Molecule']))])
    mol_image.save(
        filename +
        '.png')  # shold use a better naming scheme to avoid overwrites.
Example #4
0
 def run(self, k=15):
     #self.clear
     smi = self.sms
     model = load_model()
     # path_vec = load_path_vector()
     path_vec = self.path_vec
     a = ['Reactant', 'Product']
     b = []
     c = [smi]
     std = tranform(smi, model, path_vec, k)
     for j in range(len(std)):
         if std[j] == smi.upper():
             prd = std[j]
             break
         elif smi.replace('#', '') == std[j]:
             prd = std[j]
             break
         else:
             prd = std[14]
     b.append(prd)
     c.append(prd)
     out = pd.DataFrame(data=b, columns=['Product'])
     out.insert(0, 'Reactant', smi)
     df = struc2mol(pd.DataFrame(data=c, columns=['smiles']))
     df.insert(3, 'legend', a)
     out = (PandasTools.FrameToGridImage(df,
                                         column='mol',
                                         legendsCol='smiles',
                                         molsPerRow=2))
     a = np.array(out)
     scipy.misc.imsave('outfile.png', a)
     figname = 'outfile.png'
     #time.sleep(3)
     self._signal.emit(str(figname))
Example #5
0
    def test_FrameToGridImage(self):
        # This test only makes sure that we get no exception. To see the created images, set
        # interactive to True
        interactive = False
        self.assertTrue(True)
        df = self.df

        result = PandasTools.FrameToGridImage(df)
        if interactive:
            result.show()

        result = PandasTools.FrameToGridImage(
            df, legendsCol='PUBCHEM_IUPAC_INCHIKEY')
        if interactive:
            result.show()

        result = PandasTools.FrameToGridImage(df, legendsCol=df.index.name)
        if interactive:
            result.show()
Example #6
0
def test_force_Kekulize():
    df = pd.read_csv('All_Moles_Tested_Data.csv')
    i= 0
    mol_list = []
    for smile in df['smiles']:
        mol = MolFromSmiles(smile)
        x = find_custom_Kekulize_set(smile,  max_atoms= 60,  max_degree= 5,printMe = False)
        for index in x:
            mol.GetAtomWithIdx(index).SetAtomicNum(32)
            
        mol_list.append(mol)
        
    df['mol'] = pd.DataFrame({'mol':mol_list})
    
    unit = 5
    for i in range(0,len(df)//unit):
        display(PandasTools.FrameToGridImage(df.iloc[i*unit:i*unit+unit],column='mol', legendsCol='',molsPerRow=unit))
    if((len(df)%unit>0)*1):
        display(PandasTools.FrameToGridImage(df.iloc[len(df)//unit*unit:len(df)],column='mol', legendsCol='',molsPerRow=unit))
Example #7
0
def output_grid(df, imagename):
    #use PandasTools.FrameToGridImage
    template = Chem.MolFromSmarts(smartsq)
    AllChem.Compute2DCoords(template)

    df['mol_strip'].apply(
        lambda x: AllChem.GenerateDepictionMatching2DStructure(x, template))
    df['matches'] = df['mol_strip'].apply(
        lambda x: x.GetSubstructMatch(template))
    frameimage = PandasTools.FrameToGridImage(df,
                                              column='mol_strip',
                                              legendsCol='molregno',
                                              molsPerRow=10,
                                              highlightAtomLists=list(
                                                  df['matches']))
    frameimage.save(imagename)
Example #8
0
def pred_single_prod(smi, model, path_vec, k=1):
    """A function used to predict the product of a specific chemical reactions with the input of reactant smiles string.
    The default predicted consequence is one, you can change the value of k to get more probable forecasted results.
    """
    c = []
    b = []
    std = transform_r2p_str(smi, model, path_vec, k)
    c.append(std)
    for j in range(len(std)):
        col = 'Product'
        b.append(col)
    out = pd.DataFrame(data=c, columns=b)
    out.insert(0, 'Reactant', smi)
    df = struc2mol(pd.DataFrame(data=std, columns=['smiles']))
    display(
        PandasTools.FrameToGridImage(df,
                                     column='mol',
                                     legendsCol='smiles',
                                     molsPerRow=5))
    return out
Example #9
0
def output_multiple_prod(testdf, model, path_vec, k=15):
    """A function used to output the product of many specific chemical reactions with the input of reactant smiles strings.
    The default value for k is 15.
    """
    a = []
    b = []
    c = []
    for i in range(len(testdf)):
        smi = testdf['Reactants'][i]
        a.append('Reactant')
        c.append(smi)
        std = transform_r2p_str(smi, model, path_vec, k)
        for j in range(len(std)):
            if std[j] == smi.upper():
                prd = std[j]
                break
            elif smi.replace('#', '') == std[j]:
                prd = std[j]
                break
            else:
                prd = std[14]
        a.append('Product')
        c.append(prd)
        b.append(prd)
    out = pd.DataFrame(data=b, columns=['Products'])
    out.insert(
        0,
        'Reactants',
        testdf['Reactants'].values,
    )
    df = struc2mol(pd.DataFrame(data=c, columns=['smiles']))
    df.insert(3, 'legend', a)
    display(
        PandasTools.FrameToGridImage(df,
                                     column='mol',
                                     legendsCol='legend',
                                     molsPerRow=2))
    return out
Example #10
0
 def test_github2380(self):
     from rdkit.Chem.Draw import IPythonConsole
     IPythonConsole.ipython_useSVG = True
     df = PandasTools.LoadSDF(getStreamIO(methane + peroxide))
     _ = PandasTools.FrameToGridImage(df)
Example #11
0
    'SUBSTANCE_SYNONYM', 'SUBSTANCE_COMMENT', 'SUBSTANCE_URL',
    'INCHIKEY_1.05_CHEMAXON_19.27.0'
]]

# Display table
# thesis_df
# fixes mol display in dataframes (RDKit Issue# 2673)
from IPython.display import HTML
HTML(thesis_df.head(len(thesis_df.index)).to_html())

# In[7]:

# we can also display just the molecules like this:
PandasTools.FrameToGridImage(thesis_df,
                             column='RDMol',
                             molsPerRow=2,
                             subImgSize=(400, 400),
                             legendsCol="DATASOURCE_REGID")

# In[8]:

# Now we need to caluclate the InChIs from RDKit and add to thesis_df
# These are InChI 1.05 as computed by RDKit 2019.09.2 release.

inchi_list = []
for mol in thesis_df['RDMol']:
    inchi = Chem.MolToInchi(mol)
    inchi_list.append(inchi)

# add to dataframe
thesis_df['INCHI_1.05_RDKIT_2019.09.2'] = inchi_list