Beispiel #1
0
def test_fixmol():
    sm = "C.Cl.CC.[H][N:1]1(C)=CC(O)=CC2CCCCC12"
    mol = Chem.MolFromSmiles(sm, sanitize=False)
    # mol.UpdatePropertyCache(False)
    # Chem.Kekulize(mol)
    res = dm.fix_mol(mol, n_iter=1)  # copy by default

    # should still be invalid in term of valence for nitrogen
    assert not dm.incorrect_valence(res)

    res2 = dm.fix_mol(mol, n_iter=2)
    # not expecting difference between res2 and res3
    assert Chem.MolToSmiles(res) == Chem.MolToSmiles(res2)

    # only largest expected_here
    res_largest = dm.fix_mol(mol, largest_only=True)

    dm.fix_mol(mol, remove_singleton=True, largest_only=True)
    assert len(Chem.rdmolops.GetMolFrags(res_largest)) == 1

    expected_largest_fix = dm.standardize_smiles("OC1=CC2CCCCC2[N:1]=C1")
    assert dm.standardize_smiles(
        Chem.MolToSmiles(res_largest)) == expected_largest_fix

    res_no_singleton = dm.fix_mol(mol, n_iter=2, remove_singleton=True)
    assert len(Chem.rdmolops.GetMolFrags(res_largest)) == 1
    assert len(Chem.rdmolops.GetMolFrags(res_no_singleton)) == 2
def _preprocess(i, row):
#     print('hello')
    mol = dm.to_mol(str(row[smiles_column]), ordered=True)
    mol = dm.fix_mol(mol)
    mol = dm.sanitize_mol(mol, sanifix=True, charge_neutral=False)
    mol = dm.standardize_mol(mol, disconnect_metals=False, normalize=True, reionize=True, uncharge=False, stereo=True)
    
    fingerprint_function = rdMolDescriptors.GetMorganFingerprintAsBitVect
    pars = { "radius": 2,
                     "nBits": 8192,
                     "invariants": [],
                     "fromAtoms": [],
                     "useChirality": True,
                     "useBondTypes": True,
                     "useFeatures": False,
            }
    fp = fingerprint_function(mol, **pars)

    row["standard_smiles"] = dm.standardize_smiles(dm.to_smiles(mol))
    row["selfies"] = dm.to_selfies(mol)
    row["inchi"] = dm.to_inchi(mol)
    row["inchikey"] = dm.to_inchikey(mol)
    row["onbits_fp"] =list(fp.GetOnBits())
    
    return row
Beispiel #3
0
def test_standardize_mol():
    sm = "[Na]OC1=CC2CCCCC2N=C1"
    sm_standard = dm.to_smiles(dm.standardize_smiles(sm))
    standard_mol = dm.standardize_mol(dm.to_mol(sm),
                                      disconnect_metals=True,
                                      uncharge=True)
    mol_standard = dm.to_smiles(Chem.MolToSmiles(standard_mol))
    assert sm_standard == mol_standard
def _preprocess(i, row):
    #     print('hello')
    try:
        mol = dm.to_mol(str(row[smiles_column]), ordered=True)
        mol = dm.fix_mol(mol)
        mol = dm.sanitize_mol(mol, sanifix=True, charge_neutral=False)
        mol = dm.standardize_mol(mol,
                                 disconnect_metals=False,
                                 normalize=True,
                                 reionize=True,
                                 uncharge=False,
                                 stereo=True)
        opts = StereoEnumerationOptions(unique=True,
                                        maxIsomers=20,
                                        rand=0xf00d)
        isomers = EnumerateStereoisomers(mol, options=opts)
        enum_smiles = sorted(
            Chem.MolToSmiles(y, isomericSmiles=True) for y in isomers)

        smiles_list = []
        for count, smi in enumerate(enum_smiles):
            smiles_string = smi

            smiles_list.append(smiles_string)
        # fingerprint_function = rdMolDescriptors.GetMorganFingerprintAsBitVect
        # pars = { "radius": 2,
        #                  "nBits": 8192,
        #                  "invariants": [],
        #                  "fromAtoms": [],
        #                  "useChirality": False,
        #                  "useBondTypes": True,
        #                  "useFeatures": False,
        #         }
        # fp = fingerprint_function(mol, **pars)

        row["standard_smiles"] = dm.standardize_smiles(dm.to_smiles(mol))
        row["selfies"] = dm.to_selfies(mol)
        row["inchi"] = dm.to_inchi(mol)
        row["inchikey"] = dm.to_inchikey(mol)
        row["enumerated_smiles"] = smiles_list
        # row["onbits_fp"] =list(fp.GetOnBits())

        return row

    except ValueError:
        row["standard_smiles"] = 'dropped'
        row["selfies"] = 'dropped'
        row["inchi"] = 'dropped'
        row["inchikey"] = 'dropped'
        row["enumerated_smiles"] = list('dropped')
        return row
Beispiel #5
0
def test_standardize_smiles_tautomer():
    smiles = "C1=CC=CN=C1"
    std_smiles = dm.standardize_smiles(smiles, tautomer=True)
    assert "c1ccncc1" == std_smiles
Beispiel #6
0
def _preprocess(i, row):
    '''Takes a smiles string and generates a clean rdkit mol with datamol. The stereoisomers
    are then enumerated while holding defined stereochemistry. Morgan fingerprints are then
    generated using RDkit with and without stereochemistry. The try/except logic deals with 
    RDkit mol failures on conversion of an invalid smiles string. Smarts are added for later
    searching.'''
    try:
        mol = dm.to_mol(str(row[smiles_column]), ordered=True)
        mol = dm.fix_mol(mol)
        mol = dm.sanitize_mol(mol, sanifix=True, charge_neutral=False)
        mol = dm.standardize_mol(mol, disconnect_metals=False, normalize=True, reionize=True, uncharge=False, stereo=True)
        opts = StereoEnumerationOptions(unique=True,maxIsomers=20,rand=0xf00d)
        isomers = EnumerateStereoisomers(mol, options=opts)
        enum_smiles = sorted(Chem.MolToSmiles(y,isomericSmiles=True) for y in isomers)
#         enum_dm_smiles = sorted(dm.standardize_smiles(dm.to_smiles(x)) for x in isomers)
        
        smiles_list = []
        achiral_fp_lis = []
        chiral_fp_lis = []
        
#         standard_smiles_list = []
        for count, smi in enumerate(enum_smiles):
            smiles_string = smi
            
            mol = dm.to_mol(smi, ordered=True)
            mol = dm.fix_mol(mol)
            mol = dm.sanitize_mol(mol, sanifix=True, charge_neutral=False)
            mol = dm.standardize_mol(mol, disconnect_metals=False, normalize=True, reionize=True, uncharge=False, stereo=True)

            fingerprint_function = rdMolDescriptors.GetMorganFingerprintAsBitVect
            
            pars = { "radius": 2,
                             "nBits": 8192,
                             "invariants": [],
                             "fromAtoms": [],
                             "useChirality": True,
                             "useBondTypes": True,
                             "useFeatures": False, }
            
            pars2 = { "radius": 2,
                             "nBits": 8192,
                             "invariants": [],
                             "fromAtoms": [],
                             "useChirality": False,
                             "useBondTypes": True,
                             "useFeatures": False, }

            fp = fingerprint_function(mol, **pars)
            fp1 = fingerprint_function(mol, **pars2)
            smiles_list.append(dm.standardize_smiles(smiles_string))
            achiral_fp_lis.append(list(fp1.GetOnBits()))
            chiral_fp_lis.append(list(fp.GetOnBits()))

        row["standard_smiles"] = dm.standardize_smiles(dm.to_smiles(mol))
        row["smarts"] = dm.to_smarts(mol)
        row["selfies"] = dm.to_selfies(mol)
        row["enumerated_smiles"] = smiles_list
        row["achiral_fp"] = achiral_fp_lis
        row["chiral_fp"] = chiral_fp_lis
#         row["dm_enumerated_smiles"] = enum_dm_smiles_lis
        # row["onbits_fp"] =list(fp.GetOnBits())
        
        return row

    except ValueError:
#         row["standard_smiles"] = 'dropped'
#         row["selfies"] = 'dropped'
#         row["inchi"] = 'dropped'
#         row["inchikey"] = 'dropped'
        
        row["standard_smiles"] = 'dropped'
        row["smarts"] = 'dropped'
        row["selfies"] = 'dropped'
        row["enumerated_smiles"] = list('dropped')
        row["achiral_fp"] = list('dropped')
        row["chiral_fp"] = list('dropped')
#         row["dm_enumerated_smiles"] = 'dropped'
        return row