コード例 #1
0
def _preprocess(i, row):
#     print('hello')
    mol = dm.to_mol(str(row[smiles_column]), ordered=True)
    mol = dm.fix_mol(mol)
    mol = dm.sanitize_mol(mol, sanifix=True, charge_neutral=False)
    mol = dm.standardize_mol(mol, disconnect_metals=False, normalize=True, reionize=True, uncharge=False, stereo=True)
    
    fingerprint_function = rdMolDescriptors.GetMorganFingerprintAsBitVect
    pars = { "radius": 2,
                     "nBits": 8192,
                     "invariants": [],
                     "fromAtoms": [],
                     "useChirality": True,
                     "useBondTypes": True,
                     "useFeatures": False,
            }
    fp = fingerprint_function(mol, **pars)

    row["standard_smiles"] = dm.standardize_smiles(dm.to_smiles(mol))
    row["selfies"] = dm.to_selfies(mol)
    row["inchi"] = dm.to_inchi(mol)
    row["inchikey"] = dm.to_inchikey(mol)
    row["onbits_fp"] =list(fp.GetOnBits())
    
    return row
コード例 #2
0
def test_standardize_mol():
    sm = "[Na]OC1=CC2CCCCC2N=C1"
    sm_standard = dm.to_smiles(dm.standardize_smiles(sm))
    standard_mol = dm.standardize_mol(dm.to_mol(sm),
                                      disconnect_metals=True,
                                      uncharge=True)
    mol_standard = dm.to_smiles(Chem.MolToSmiles(standard_mol))
    assert sm_standard == mol_standard
コード例 #3
0
def _preprocess(i, row):
    #     print('hello')
    try:
        mol = dm.to_mol(str(row[smiles_column]), ordered=True)
        mol = dm.fix_mol(mol)
        mol = dm.sanitize_mol(mol, sanifix=True, charge_neutral=False)
        mol = dm.standardize_mol(mol,
                                 disconnect_metals=False,
                                 normalize=True,
                                 reionize=True,
                                 uncharge=False,
                                 stereo=True)
        opts = StereoEnumerationOptions(unique=True,
                                        maxIsomers=20,
                                        rand=0xf00d)
        isomers = EnumerateStereoisomers(mol, options=opts)
        enum_smiles = sorted(
            Chem.MolToSmiles(y, isomericSmiles=True) for y in isomers)

        smiles_list = []
        for count, smi in enumerate(enum_smiles):
            smiles_string = smi

            smiles_list.append(smiles_string)
        # fingerprint_function = rdMolDescriptors.GetMorganFingerprintAsBitVect
        # pars = { "radius": 2,
        #                  "nBits": 8192,
        #                  "invariants": [],
        #                  "fromAtoms": [],
        #                  "useChirality": False,
        #                  "useBondTypes": True,
        #                  "useFeatures": False,
        #         }
        # fp = fingerprint_function(mol, **pars)

        row["standard_smiles"] = dm.standardize_smiles(dm.to_smiles(mol))
        row["selfies"] = dm.to_selfies(mol)
        row["inchi"] = dm.to_inchi(mol)
        row["inchikey"] = dm.to_inchikey(mol)
        row["enumerated_smiles"] = smiles_list
        # row["onbits_fp"] =list(fp.GetOnBits())

        return row

    except ValueError:
        row["standard_smiles"] = 'dropped'
        row["selfies"] = 'dropped'
        row["inchi"] = 'dropped'
        row["inchikey"] = 'dropped'
        row["enumerated_smiles"] = list('dropped')
        return row
コード例 #4
0
def _preprocess(i, row):
    '''Takes a smiles string and generates a clean rdkit mol with datamol. The stereoisomers
    are then enumerated while holding defined stereochemistry. Morgan fingerprints are then
    generated using RDkit with and without stereochemistry. The try/except logic deals with 
    RDkit mol failures on conversion of an invalid smiles string. Smarts are added for later
    searching.'''
    try:
        mol = dm.to_mol(str(row[smiles_column]), ordered=True)
        mol = dm.fix_mol(mol)
        mol = dm.sanitize_mol(mol, sanifix=True, charge_neutral=False)
        mol = dm.standardize_mol(mol, disconnect_metals=False, normalize=True, reionize=True, uncharge=False, stereo=True)
        opts = StereoEnumerationOptions(unique=True,maxIsomers=20,rand=0xf00d)
        isomers = EnumerateStereoisomers(mol, options=opts)
        enum_smiles = sorted(Chem.MolToSmiles(y,isomericSmiles=True) for y in isomers)
#         enum_dm_smiles = sorted(dm.standardize_smiles(dm.to_smiles(x)) for x in isomers)
        
        smiles_list = []
        achiral_fp_lis = []
        chiral_fp_lis = []
        
#         standard_smiles_list = []
        for count, smi in enumerate(enum_smiles):
            smiles_string = smi
            
            mol = dm.to_mol(smi, ordered=True)
            mol = dm.fix_mol(mol)
            mol = dm.sanitize_mol(mol, sanifix=True, charge_neutral=False)
            mol = dm.standardize_mol(mol, disconnect_metals=False, normalize=True, reionize=True, uncharge=False, stereo=True)

            fingerprint_function = rdMolDescriptors.GetMorganFingerprintAsBitVect
            
            pars = { "radius": 2,
                             "nBits": 8192,
                             "invariants": [],
                             "fromAtoms": [],
                             "useChirality": True,
                             "useBondTypes": True,
                             "useFeatures": False, }
            
            pars2 = { "radius": 2,
                             "nBits": 8192,
                             "invariants": [],
                             "fromAtoms": [],
                             "useChirality": False,
                             "useBondTypes": True,
                             "useFeatures": False, }

            fp = fingerprint_function(mol, **pars)
            fp1 = fingerprint_function(mol, **pars2)
            smiles_list.append(dm.standardize_smiles(smiles_string))
            achiral_fp_lis.append(list(fp1.GetOnBits()))
            chiral_fp_lis.append(list(fp.GetOnBits()))

        row["standard_smiles"] = dm.standardize_smiles(dm.to_smiles(mol))
        row["smarts"] = dm.to_smarts(mol)
        row["selfies"] = dm.to_selfies(mol)
        row["enumerated_smiles"] = smiles_list
        row["achiral_fp"] = achiral_fp_lis
        row["chiral_fp"] = chiral_fp_lis
#         row["dm_enumerated_smiles"] = enum_dm_smiles_lis
        # row["onbits_fp"] =list(fp.GetOnBits())
        
        return row

    except ValueError:
#         row["standard_smiles"] = 'dropped'
#         row["selfies"] = 'dropped'
#         row["inchi"] = 'dropped'
#         row["inchikey"] = 'dropped'
        
        row["standard_smiles"] = 'dropped'
        row["smarts"] = 'dropped'
        row["selfies"] = 'dropped'
        row["enumerated_smiles"] = list('dropped')
        row["achiral_fp"] = list('dropped')
        row["chiral_fp"] = list('dropped')
#         row["dm_enumerated_smiles"] = 'dropped'
        return row