Beispiel #1
0
def standardize_tautomer(iso_can_smi):
    """
    Standardize tautomer to one universal tautomer.

    Parameters
    ----------
    iso_can_smi: str
        isomeric SMILES

    Returns
    -------
    str:
        standardized tautomer

    Notes
    -----
    Does not standardize for ionization states.
    In some cases preforms better than `oequacpac.OEGetUniqueProtomer`.
    See `notebook <https://github.com/openforcefield/cmiles/blob/master/notebooks/Tautomers.ipynb>`_
    """
    if has_rdkit:
        from rdkit.Chem import MolStandardize
    else:
        raise ImportError("Must have rdkit installed to use this function")

    std_tautomer = MolStandardize.canonicalize_tautomer_smiles(iso_can_smi)
    return std_tautomer
Beispiel #2
0
def preprocess_smi(smi):

    # Filter 1- Convert to Canonical Smiles
    try:
        mol = Chem.MolFromSmiles(smi)
        can_smi = Chem.MolToSmiles(mol, True)
    except:
        return None

    # Filter 2- Remove salt
    remover = SaltRemover()
    mol = Chem.MolFromSmiles(can_smi)
    res, deleted = remover.StripMolWithDeleted(mol, dontRemoveEverything=True)
    removed_salt_smi = Chem.MolToSmiles(res)

    # Filter 3- Remove Charge
    uncharger = rdMolStandardize.Uncharger()
    m = Chem.MolFromSmiles(removed_salt_smi)
    p = uncharger.uncharge(m)
    uncharged_smi = Chem.MolToSmiles(p)

    # Filter 4 - Standardize the tautomer
    clean_smi = MolStandardize.canonicalize_tautomer_smiles(uncharged_smi)

    return clean_smi
 def testBasic(self):
     m = Chem.MolFromSmiles('Oc1c(cccc3)c3nc2ccncc12')
     enumerator = rdMolStandardize.TautomerEnumerator()
     canon = enumerator.Canonicalize(m)
     reord = MolStandardize.ReorderTautomers(m)[0]
     canonSmile = Chem.MolToSmiles(canon)
     reordSmile = Chem.MolToSmiles(reord)
     self.assertEquals(canonSmile, reordSmile)
def GenCoreFromString(mol, max_c):

    repeat = mol.count('x')
    c_num = mol.count('C')
    Temp_1 = [x for x in list(itertools.product('01', repeat=repeat))]
    Temp_2 = [list(map(int, t)) for t in Temp_1]  # convert str to int

    # limit attachment points to maximum the scaffold allows
    Product = [t for t in Temp_2 if sum(t) <= (max_c - c_num)]

    Cores = []
    for combo in Product:  # list of [1,0,0,0,1,1,1,0]
        m = mol
        n = mol
        for sub in combo:
            # molecule for final use, [*] has no ()
            for idx, atom in enumerate(n):
                if atom == 'x':
                    if sub:
                        n = ''.join(n[:idx]) + '[*]' + ''.join(n[idx + 1:])
                        break
                    else:
                        n = ''.join(n[:idx]) + ''.join(n[idx + 1:])
                        break
            # For canonicalization use, [*] has (), not final use
            for idx, atom in enumerate(m):
                if atom == 'x':
                    if sub:
                        m = ''.join(m[:idx]) + '([*])' + ''.join(
                            m[idx + 1:])  # for canon use
                        break
                    else:
                        m = ''.join(m[:idx]) + ''.join(m[idx + 1:])
                        break

        cleaned = CleanFragBranch(
            n)  # kinda unnecessary with the current setup
        if re.search(r'\*', cleaned):  # save molecules with attach point(s)
            ## get a canonicalized version of the molecule for comparison
            std_smi = MolStandardize.standardize_smiles(m)
            tau_smi = MolStandardize.canonicalize_tautomer_smiles(std_smi)
            Cores.append([cleaned, tau_smi])

    return Cores
 def MyStandardizer(self, norms):
     stdzr = MolStandardize.Standardizer(
         normalizations = norms,
         max_restarts = MolStandardize.normalize.MAX_RESTARTS,
         prefer_organic = MolStandardize.fragment.PREFER_ORGANIC,
         acid_base_pairs = MolStandardize.charge.ACID_BASE_PAIRS,
         charge_corrections = MolStandardize.charge.CHARGE_CORRECTIONS,
         tautomer_transforms = MolStandardize.tautomer.TAUTOMER_TRANSFORMS,
         tautomer_scores = MolStandardize.tautomer.TAUTOMER_SCORES,
         max_tautomers = MolStandardize.tautomer.MAX_TAUTOMERS
         )
     return(stdzr)
Beispiel #6
0
def convert_to_rdkit_smiles(smiles,
                            allowTautomers=True,
                            sanitize=False,
                            isomericSmiles=False):
    """
    Converts a Mol object into a canonical SMILES string.
    :type allowTautomers: allows having same molecule represented in different tautomeric forms
    """
    if allowTautomers:
        return rkc.MolToSmiles(rkc.MolFromSmiles(smiles, sanitize=sanitize),
                               isomericSmiles=isomericSmiles)
    else:
        return MolStandardize.canonicalize_tautomer_smiles(smiles)
def Stereoisomer(inp):
    smiles, name, dgunsat, formula, molwt = inp

    sto_opt = StereoEnumerationOptions(tryEmbedding=False, unique=True)

    mol = Chem.MolFromSmiles(smiles)
    ism_mol = tuple(EnumerateStereoisomers(mol, options=sto_opt))
    ism_smi = [Chem.MolToSmiles(s, isomericSmiles=True) for s in ism_mol]
    tau_smi = [MolStandardize.canonicalize_tautomer_smiles(m) for m in ism_smi]

    Out = []
    for idx, smi in enumerate(tau_smi):
        Out.append([smi, name + '_' + str(idx + 1), dgunsat, formula, molwt])

    return Out
Beispiel #8
0
    print("Number of valid SMILES: \t%d" % len(results))
    print("%% Valid: \t\t\t%.2f%%" %
          (len(results) / len(generated_smiles) * 100))

# Determine linkers of generated molecules
linkers = Parallel(n_jobs=n_cores)(delayed(frag_utils.get_linker)(
    Chem.MolFromSmiles(m[2]), Chem.MolFromSmiles(m[3]), m[1]) for m in results)

# Standardise linkers
for i, linker in enumerate(linkers):
    if linker == "":
        continue
    try:
        linker_canon = Chem.MolFromSmiles(re.sub('[0-9]+\*', '*', linker))
        Chem.rdmolops.RemoveStereochemistry(linker_canon)
        linkers[i] = MolStandardize.canonicalize_tautomer_smiles(
            Chem.MolToSmiles(linker_canon))
    except:
        continue

# Update results
for i in range(len(results)):
    results[i].append(linkers[i])

# Prepare training set database

# Load ZINC training set
linkers_train = []

with open(train_set_path, 'r') as f:
    for line in f:
        toks = line.strip().split(' ')
 def testLength(self):
     m = Chem.MolFromSmiles('Oc1c(cccc3)c3nc2ccncc12')
     enumerator = rdMolStandardize.TautomerEnumerator()
     tauts = enumerator.Enumerate(m)
     reordtauts = MolStandardize.ReorderTautomers(m)
     self.assertEquals(len(reordtauts), len(tauts))
Beispiel #10
0
def _weld_mol(frags):
    combine = Chem.MolToSmiles(Weld_R_Groups(Chem.MolFromSmiles(frags)))
    std_smi = MolStandardize.standardize_smiles(combine)
    tau_smi = MolStandardize.canonicalize_tautomer_smiles(std_smi)
    tau_mol = Chem.MolFromSmiles(tau_smi)
    return tau_mol
Beispiel #11
0
def _comb_mol(frags):
    print(frags)
    std_smi = MolStandardize.standardize_smiles(Chem.CanonSmiles(frags))
    tau_smi = MolStandardize.canonicalize_tautomer_smiles(std_smi)
    tau_mol = Chem.MolFromSmiles(tau_smi)
    return tau_mol
Beispiel #12
0
def multiprocess_tautomer(_ref_smi):
    new_dict = {}
    taut_smi = MolStandardize.canonicalize_tautomer_smiles(_ref_smi)
    new_dict[_ref_smi] = taut_smi
    print(new_dict)
    return new_dict
Beispiel #13
0
def CleanSmilesForMol(smi):
    std_smi = MolStandardize.standardize_smiles(Chem.CanonSmiles(smi))
    tau_smi = MolStandardize.canonicalize_tautomer_smiles(std_smi)
    tau_mol = Chem.MolFromSmiles(tau_smi)
    mol_h = Chem.AddHs(tau_mol)
    return mol_h