예제 #1
0
    def test6Charge(self):
        mol = Chem.MolFromSmiles("C1=C(C=CC(=C1)[S]([O-])=O)[S](O)(=O)=O")
        # instantiate with default acid base pair library
        reionizer = rdMolStandardize.Reionizer()
        nm = reionizer.reionize(mol)
        self.assertEqual(Chem.MolToSmiles(nm), "O=S(O)c1ccc(S(=O)(=O)[O-])cc1")

        # try reionize with another acid base pair library without the right
        # pairs
        abfile = os.path.join(RDConfig.RDDataDir, 'MolStandardize',
                              'acid_base_pairs2.txt')
        reionizer2 = rdMolStandardize.Reionizer(abfile)
        nm2 = reionizer2.reionize(mol)
        self.assertEqual(Chem.MolToSmiles(nm2),
                         "O=S([O-])c1ccc(S(=O)(=O)O)cc1")

        # test Uncharger
        uncharger = rdMolStandardize.Uncharger()
        mol3 = Chem.MolFromSmiles("O=C([O-])c1ccccc1")
        nm3 = uncharger.uncharge(mol3)
        self.assertEqual(Chem.MolToSmiles(nm3), "O=C(O)c1ccccc1")

        # test canonical Uncharger
        uncharger = rdMolStandardize.Uncharger(canonicalOrder=False)
        mol3 = Chem.MolFromSmiles("C[N+](C)(C)CC(C(=O)[O-])CC(=O)[O-]")
        nm3 = uncharger.uncharge(mol3)
        self.assertEqual(Chem.MolToSmiles(nm3),
                         "C[N+](C)(C)CC(CC(=O)[O-])C(=O)O")
        uncharger = rdMolStandardize.Uncharger(canonicalOrder=True)
        nm3 = uncharger.uncharge(mol3)
        self.assertEqual(Chem.MolToSmiles(nm3),
                         "C[N+](C)(C)CC(CC(=O)O)C(=O)[O-]")
예제 #2
0
def preprocess_smi(smi):

    # Filter 1- Convert to Canonical Smiles
    try:
        mol = Chem.MolFromSmiles(smi)
        can_smi = Chem.MolToSmiles(mol, True)
    except:
        return None

    # Filter 2- Remove salt
    remover = SaltRemover()
    mol = Chem.MolFromSmiles(can_smi)
    res, deleted = remover.StripMolWithDeleted(mol, dontRemoveEverything=True)
    removed_salt_smi = Chem.MolToSmiles(res)

    # Filter 3- Remove Charge
    uncharger = rdMolStandardize.Uncharger()
    m = Chem.MolFromSmiles(removed_salt_smi)
    p = uncharger.uncharge(m)
    uncharged_smi = Chem.MolToSmiles(p)

    # Filter 4 - Standardize the tautomer
    clean_smi = MolStandardize.canonicalize_tautomer_smiles(uncharged_smi)

    return clean_smi
예제 #3
0
def predict(mol, uncharged=True):
    if uncharged:
        un = rdMolStandardize.Uncharger()
        mol = un.uncharge(mol)
    mol = AllChem.AddHs(mol)
    base_dict = predict_base(mol)
    acid_dict = predict_acid(mol)
    return base_dict, acid_dict
예제 #4
0
def run_filter_mol(smiles_info, child_dict):
    """
    This takes a smiles_string and the selected filter list (child_dict) and
    runs it through the selected filters.

    Inputs:
    :param list smiles_info: A list with info about a ligand, the SMILES string
        is idx=0 and the name/ID is idx=1. example: smiles_info
        ["CCCCCCC","zinc123"]
    :param dict child_dict: This dictionary contains all the names of the
        chosen filters as keys and the the filter objects as the items Or None if
        User specifies no filters

    Returns:
    :returns: list smiles_info: list of the smiles_info if it passed the filter and
        "Filter_Passed".
        returns smiles_info and "Sanitize_fail" if the mol fails to sanitize.
        returns smiles_info and "Sanitize_fail" if the mol fails one or more filters.
    """

    smiles_string = smiles_info[0]

    mol = Chem.MolFromSmiles(smiles_string, sanitize=False)
    # try sanitizing, which is necessary later
    mol = MOH.check_sanitization(mol)
    if mol is None:
        return [smiles_info, "Sanitize_fail"]

    mol = MOH.try_deprotanation(mol)
    if mol is None:
        return [smiles_info, "Sanitize_fail"]

    mol = MOH.check_sanitization(mol)
    if mol is None:
        return [smiles_info, "Sanitize_fail"]

    # remove charge from mol objects. This affects some properties
    # such as: logP, Mol refractivity, and polar surface area
    # which can impact filters such as Ghose and VandeWaterbeemd
    # This is done because logP is traditionally applied to neutral molecules
    uncharger_obj = rdMolStandardize.Uncharger()
    mol = uncharger_obj.uncharge(mol)
    if mol is None:
        return [smiles_info, "Sanitize_fail"]

    if child_dict is not None:
        # run through the filters
        filter_result = run_all_selected_filters(mol, child_dict)

        # see if passed
        if filter_result is False:
            return [smiles_info, "Filter_fail"]
        # it passed return the smiles_info
        return [smiles_info, "Filter_Passed"]

    # This will return None
    return [smiles_info, "Filter_Passed"]
def standardize_format(mol):
    """Clean up molecule and return in standardized format
    """
    mol = rdMolStandardize.Cleanup(mol)
    mol = get_biggest_component(mol)

    uncharger = rdMolStandardize.Uncharger()
    mol = uncharger.uncharge(mol)

    remove_isotopes(mol)
    return mol
예제 #6
0
파일: dataset.py 프로젝트: XuhanLiu/DrugEx
def corpus(input, output, suffix='sdf'):
    if suffix =='sdf':
        inf = gzip.open(input)
        mols = Chem.ForwardSDMolSupplier(inf)
        # mols = [mol for mol in suppl]
    else:
        df = pd.read_table(input).Smiles.dropna()
        mols = [Chem.MolFromSmiles(s) for s in df]
    voc = Voc('data/voc_smiles.txt')
    charger = rdMolStandardize.Uncharger()
    chooser = rdMolStandardize.LargestFragmentChooser()
    disconnector = rdMolStandardize.MetalDisconnector()
    normalizer = rdMolStandardize.Normalizer()
    words = set()
    canons = []
    tokens = []
    smiles = set()
    for mol in tqdm(mols):
        try:
            mol = disconnector.Disconnect(mol)
            mol = normalizer.normalize(mol)
            mol = chooser.choose(mol)
            mol = charger.uncharge(mol)
            mol = disconnector.Disconnect(mol)
            mol = normalizer.normalize(mol)
            smileR = Chem.MolToSmiles(mol, 0)
            smiles.add(Chem.CanonSmiles(smileR))
        except:
            print('Parsing Error:') #, Chem.MolToSmiles(mol))

    for smile in tqdm(smiles):
        token = voc.split(smile) + ['EOS']
        if {'C', 'c'}.isdisjoint(token):
            print('Warning:', smile)
            continue
        if not {'[Na]', '[Zn]'}.isdisjoint(token):
            print('Redudent', smile)
            continue
        if 10 < len(token) <= 100:
            words.update(token)
            canons.append(smile)
            tokens.append(' '.join(token))
    log = open(output + '_voc.txt', 'w')
    log.write('\n'.join(sorted(words)))
    log.close()

    log = pd.DataFrame()
    log['Smiles'] = canons
    log['Token'] = tokens
    log.drop_duplicates(subset='Smiles')
    log.to_csv(output + '_corpus.txt', sep='\t', index=False)
예제 #7
0
def uncharge(mol):
    """Attempts to neutralize charges by adding and/or removing
    hydrogens where possible.

    Parameters
    ----------
    mol: rdkit.Chem.Mol
        The molecule where the charges have to be neutralized.

    Returns
    -------
    mol: rdkit.Chem.Mol
        Returns a neutralized molecule.
    """
    return rdMolStandardize.Uncharger().uncharge(mol)
예제 #8
0
def standardize_mol(
    mol: Chem.rdchem.Mol,
    disconnect_metals: bool = False,
    normalize: bool = True,
    reionize: bool = True,
    uncharge: bool = False,
    stereo: bool = True,
):
    r"""
    This function returns a standardized version the given molecule, with or without disconnect the metals.
    The process is apply in the order of the argument.

    Arguments:
        mol: The molecule to standardize.
        disconnect_metals: Whether to disconnect the metallic atoms from non-metals
        normalize: Whether to apply normalization (correct functional groups and recombine charges).
        reionize: Whether to apply molecule reionization
        uncharge: Whether to remove all charge from molecule
        stereo: Whether to attempt to assign stereochemistry

    Returns:
        mol: The standardized molecule.
    """
    mol = copy_mol(mol)

    if disconnect_metals:
        md = rdMolStandardize.MetalDisconnector()
        mol = md.Disconnect(mol)

    if normalize:
        mol = rdMolStandardize.Normalize(mol)

    if reionize:
        reionizer = rdMolStandardize.Reionizer()
        mol = reionizer.reionize(mol)

    if uncharge:
        uncharger = rdMolStandardize.Uncharger()
        mol = uncharger.uncharge(mol)

    if stereo:
        Chem.AssignStereochemistry(mol, force=False, cleanIt=True)

    return mol
예제 #9
0
def standardize_mol(mol):
    """
    Standardize molecule.
    Parameters
    ----------
    mol : rdkit.Chem.rdchem.Mol
        Molecule.
    Returns
    -------
    rdkit.Chem.rdchem.Mol or None
        Standardized molecule or None if standardization failed.
    """

    try:

        # sanitize molecule
        Chem.SanitizeMol(mol)

        # remove non-explicit hydrogens
        mol = Chem.RemoveHs(mol)

        # disconnect metals from molecule
        mol = rdMolStandardize.MetalDisconnector().Disconnect(mol)

        # normalize moleucle
        mol = rdMolStandardize.Normalize(mol)

        # reionize molecule
        mol = rdMolStandardize.Reionize(mol)

        # uncharge molecule (this helps to standardize protonation states)
        u = rdMolStandardize.Uncharger()
        mol = u.uncharge(mol)

        # assign stereochemistry
        Chem.AssignStereochemistry(mol, force=True, cleanIt=True)

        return mol

    except Exception as e:

        print(f"ERROR in standardization: {e}")
        return None
예제 #10
0
def uncharge_mol(m):
    """

    >>> def uncharge_smiles(smi): return Chem.MolToSmiles(uncharge_mol(Chem.MolFromSmiles(smi)))
    >>> uncharge_smiles('[NH3+]CCC')
    'CCCN'
    >>> uncharge_smiles('[NH3+]CCC[O-]')
    'NCCCO'
    >>> uncharge_smiles('C[N+](C)(C)CCC[O-]')
    'C[N+](C)(C)CCC[O-]'
    >>> uncharge_smiles('CC[NH+](C)C.[Cl-]')
    'CCN(C)C.Cl'
    >>> uncharge_smiles('CC(=O)[O-]')
    'CC(=O)O'
    >>> uncharge_smiles('CC(=O)[O-].[Na+]')
    'CC(=O)[O-].[Na+]'
    >>> uncharge_smiles('[NH3+]CC(=O)[O-].[Na+]')
    'NCC(=O)[O-].[Na+]'
    >>> uncharge_smiles('CC(=O)[O-].C[NH+](C)C')
    'CC(=O)O.CN(C)C'

    Alcohols are protonated before acids:

    >>> uncharge_smiles('[O-]C([N+](C)C)CC(=O)[O-]')
    'C[N+](C)C(O)CC(=O)[O-]'

    And the neutralization is done in a canonical order, so atom ordering of the input
    structure isn't important:

    >>> uncharge_smiles('C[N+](C)(C)CC([O-])CC[O-]')
    'C[N+](C)(C)CC([O-])CCO'
    >>> uncharge_smiles('C[N+](C)(C)CC(CC[O-])[O-]')
    'C[N+](C)(C)CC([O-])CCO'

    """
    uncharger = rdMolStandardize.Uncharger(canonicalOrder=True)
    res = uncharger.uncharge(m)
    res.UpdatePropertyCache(strict=False)
    return res
예제 #11
0
        frag = r.StripMol(frag)
        if frag.GetNumAtoms() == 0:
            continue
        elif is_nonorganic(frag):
            continue
        else:
            nonorg = contains_nonorg(frag)

            try:
                frag = rdMolStandardize.Normalize(frag)
            except ValueError as e:
                stand_mol_list.append(("Failed at normalize", index, None,
                                       mixture, nonorg, str(e)))
                continue
            try:
                frag = rdMolStandardize.Uncharger().uncharge(frag)
            except ValueError as e:
                stand_mol_list.append(
                    ("Failed at neutralising", index, None, mixture, nonorg,
                     str(e)))
                continue

            if flow_variables['stereo'] == "Remove":
                try:
                    Chem.RemoveStereochemistry(frag)
                except ValueError as e:
                    stand_mol_list.append(
                        ("Failed at stereochem remove", index, None, mixture,
                         nonorg, str(e)))
                    continue
예제 #12
0
    :return:
    """
    out_dict = {}
    conf = mol.GetConformer()
    atoms = mol.GetAtoms()
    for atom in atoms:
        # Get res_name
        res_name, atom_name, position = get_res_atom_name(atom, conf)
        if res_name in out_dict:
            out_dict[res_name][atom_name] = position
        else:
            out_dict[res_name] = {atom_name: position}
    return out_dict


uncharger = rdMolStandardize.Uncharger()


def standardize(mol):
    mol = rdMolStandardize.Cleanup(mol)
    mol = fragment(mol)
    mol = uncharger.uncharge(mol)
    remove_isotopes(mol)
    return mol


def remove_isotopes(mol):
    for atom in mol.GetAtoms():
        atom.SetIsotope(0)