Python Descriptors Examples, rdkit.Chem.Descriptors Python Examples

Example #1

0

Show file

File: prepare_mols.py Project: samoturk/cheminf-notebooks

 def filter_props(self, mol):
     """Quickly filters mols, doesn't save any calculated values and moves to the next one as soon as a molecules
     has a property not within desired range  """
     for desc in self.descriptors:
         desc = self.descriptors[desc]
         f = Descriptors.__getattribute__(desc[0])
         if desc[1] <= f(mol) <= desc[2]:
             pass
         else:
             return False
     # If all pass
     return True

Example #2

0

Show file

File: public_spectrum.py Project: chalbori/bmdms-np

    def set_computables_from_mol(self, mol):
        try:  # warning comes up in pycharm (bug of pycharm)
            self.molecular_formula = Descriptors.rdMolDescriptors.CalcMolFormula(
                mol)
            self.molecular_weight = Descriptors.ExactMolWt(mol)
            self.inchi = inchi.MolToInchi(mol)
            self.inchikey = inchi.MolToInchiKey(mol)
            self.smiles = Chem.MolToSmiles(mol, isomericSmiles=False)
        except Exception as e:
            raise SpectrumError("Error occurred while computing properties" +
                                e.args) from e

        assert self.molecular_formula is not None, "molecular-formula can't be None"
        assert self.molecular_weight is not None, "molecular-weight can't be None"
        assert self.inchi is not None, "inchi can't be None"
        assert self.inchikey is not None, "inchikey can't be None"
        assert self.smiles is not None, "smiles can't be None"

Example #3

0

Show file

File: run_dqn.py Project: shih29242890/AI_google-research

    def _reward(self):
        """Calculates the reward of the current state.

    The reward is defined as the negative l1 distance between the current
    molecular weight and target molecular weight range.

    Returns:
      Float. The negative distance.
    """
        molecule = Chem.MolFromSmiles(self._state)
        if molecule is None:
            return -self.target_weight**2
        lower, upper = self.target_weight - 25, self.target_weight + 25
        mw = Descriptors.MolWt(molecule)
        if lower <= mw <= upper:
            return 1
        return -min(abs(lower - mw), abs(upper - mw))

Example #4

0

Show file

def calc_mol_weight(smiles):
    mol = Chem.MolFromSmiles(smiles)
    try:
        wt = Descriptors.MolWt(mol)
        f_charge = Chem.rdmolops.GetFormalCharge(mol)
    except:
        wt = 100
        f_charge = 0
    # lithium is often omitted in the compound database. add the weight of lithium
    if f_charge < 0:
        wt = wt - 6.941 * f_charge

    # Ca and Mg is often used just to reperesent the repeating units
    wt = wt - 40.078 * smiles.count("Ca")
    wt = wt - 24.305 * smiles.count("Mg")

    return wt

Example #5

0

Show file

File: pepfun.py Project: AspirinCode/PepFun

    def calculate_properties_from_mol(self):
        """
        Function to calculate some molecular properties based on RDKit functionalities
        
        Return:
        Static physico-chemical properties: molecular weight, crippen logP, number of hydrogen bond acceptors and donors
        """

        # Generate molecule from sequence
        mol = Chem.MolFromSmiles(self.smiles)
        mol.SetProp("_Name", self.sequence)

        # Calculate the descriptors
        self.num_hdonors = Lipinski.NumHDonors(mol)
        self.num_hacceptors = Lipinski.NumHAcceptors(mol)
        self.mol_weight = Descriptors.MolWt(mol)
        self.mol_logp = Crippen.MolLogP(mol)

Example #6

0

Show file

    def getDiscriptor(self):
        from rdkit.Chem import Crippen
        from rdkit import Chem
        import pandas as pd
        from rdkit.Chem import Descriptors, Lipinski
        import os

        os.chdir(r"G:\マイドライブ\Data\Meram Chronic Data")
        df = pd.read_csv('extChronicStrcture.csv', engine='python')
        df = df[['CAS', 'canonical_smiles']]
        df = df.dropna(how='any')

        #df = pd.read_csv('extractInchi.csv',header=None)
        columns = [
            'CAS', 'weight', 'logP', 'RotatableBonds', 'HeavyAtomCounts',
            'AromProp', 'TPSA', 'HDonor', 'HAcceptors', 'FractionCSP3',
            'AromaticCarbocycles', 'AromaticHeterocycles'
        ]
        CAS = df['CAS']
        SMILES = df['canonical_smiles']

        resultDf = pd.DataFrame(columns=columns)
        for cas, smiles in zip(CAS, SMILES):
            mol = Chem.MolFromSmiles(smiles)
            wt = Descriptors.MolWt(mol)
            rot = Lipinski.NumRotatableBonds(mol)
            heavy = Lipinski.HeavyAtomCount(mol)
            logp = Crippen.MolLogP(mol)
            aromaticHeavyatoms = len(
                mol.GetSubstructMatches(Chem.MolFromSmarts('[a]')))
            numAtoms = mol.GetNumAtoms()
            aromprop = float(aromaticHeavyatoms / numAtoms)
            TPSA = Descriptors.TPSA(mol)
            HDonors = Descriptors.NumHDonors(mol)
            HAcceptors = Descriptors.NumHAcceptors(mol)

            FractionCSP3 = Descriptors.FractionCSP3(mol)
            AromaticCarbocycles = Descriptors.NumAromaticCarbocycles(mol)
            AromaticHeterocycles = Descriptors.NumAromaticHeterocycles(mol)

            (print(HDonors, HAcceptors))
            tempDf = pd.DataFrame([[
                cas, wt, logp, rot, heavy, aromprop, TPSA, HDonors, HAcceptors,
                FractionCSP3, AromaticCarbocycles, AromaticHeterocycles
            ]],
                                  columns=columns)
            resultDf = pd.concat([resultDf, tempDf])
        resultDf.to_csv('Descriptors.csv', index=False)

Example #7

0

Show file

def compute_descriptors(smiles):

    smiles = list(map(lambda x: Chem.MolFromSmiles(x), smiles))
    CanonicalSmiles = list(map(lambda x: Chem.MolToSmiles(x), smiles))
    HBA = list(map(lambda x: Descriptors.NumHAcceptors(x), smiles))
    HBD = list(map(lambda x: Descriptors.NumHDonors(x), smiles))
    RB = list(map(lambda x: Descriptors.NumRotatableBonds(x), smiles))
    LOGP = list(map(lambda x: Descriptors.MolLogP(x), smiles))
    TPSA = list(map(lambda x: Descriptors.TPSA(x), smiles))
    MW = list(map(lambda x: Descriptors.MolWt(x), smiles))

    return CanonicalSmiles, HBA, HBD, RB, LOGP, TPSA, MW

Example #8

0

Show file

File: rdock_util.py Project: tsudalab/ChemGE

def score(smiles, num_docking=3):
    smiles_md5 = str(hashlib.md5(smiles.encode('utf-8')).hexdigest())
    docking_result_file = '{}_out'.format(smiles_md5)
    sdf_name = '{}.sdf'.format(smiles_md5)
    score_name = '<SCORE.INTER>'  # <SCORE> or <SCORE.INTER>

    min_score = 1e10

    # Translation from SMILES to sdf
    if smiles == '':
        mol = None
    else:
        mol = Chem.MolFromSmiles(smiles)
    try:
        if mol is not None and Descriptors.MolWt(mol) < 500:
            mol = Chem.AddHs(mol)
            AllChem.EmbedMolecule(mol)
            AllChem.UFFOptimizeMolecule(mol, maxIters=200)
            fw = Chem.SDWriter(sdf_name)
            fw.write(mol)
            fw.close()

            # rdock calculation
            cmd = '$RBT_ROOT/bin/rbdock -r cavity.prm '\
                  '-p $RBT_ROOT/data/scripts/dock.prm '\
                  '-i {} -o {} -T 1 -n {} > /dev/null'\
                  .format(sdf_name, docking_result_file, num_docking)
            path = docking_result_file + '.sd'
            if not os.path.exists(path):
                subprocess.call(cmd, shell=True)

            # find the minimum score of rdock from multiple docking results
            if os.path.exists(path):
                with open(path, 'r') as f:
                    lines = f.readlines()
                isScore = False
                for line in lines:
                    if isScore:
                        min_score = min(float(line), min_score)
                        isScore = False
                    if score_name in line:  # next line has score
                        isScore = True
    except Exception:
        pass
    return min_score

Example #9

0

Show file

    def run_filter(self, mol):
        """
        This runs a Ghose filter for drug-likeliness. Ghose filter filters
        molecules by Molecular weight (MW), the number of atoms, and the logP
        value.

        We protonate the mol in this filter because hydrogens affect
        atom count. Our Ghose implementation counts hydrogens in against
        the total number of atoms.

        To pass the filter a molecule must be:
            MW between 160 and 500 dalton
            Number of Atoms: between 20 and 70
            logP  between -0,4 and +5,6

        Inputs:
        :param rdkit.Chem.rdchem.Mol object mol: An rdkit mol object to be
            tested if it passes the filters

        Returns:
        :returns: bool bool: True if the mol passes the filter; False if it
            fails the filter
        """
        copy_mol = copy.deepcopy(mol)
        copy_mol = Chem.AddHs(copy_mol)
        exact_mwt = Descriptors.ExactMolWt(copy_mol)
        if ((exact_mwt < 160) or (exact_mwt > 500)):
            return False

        num_atoms = copy_mol.GetNumAtoms()
        if ((num_atoms < 20) or (num_atoms > 70)):
            return False

        # molar Refractivity
        MolMR = Crippen.MolMR(copy_mol)
        if ((MolMR < 40) or (MolMR > 130)):
            return False

        # molar LogP
        mol_log_p = Crippen.MolLogP(copy_mol)
        if ((mol_log_p < -0.4) or (mol_log_p > 5.6)):
            return False

        # passed all filters
        return True

Example #10

0

Show file

File: decoy_utils.py Project: oxpig/DeepCoy

def calc_props_dude_extended(smiles):
    try:
        mol = Chem.MolFromSmiles(smiles)

        # Calculate properties and store in dict
        prop_dict = {}
        # molweight
        prop_dict.update({'mol_wg': Descriptors.MolWt(mol)})
        # logP
        prop_dict.update({'log_p': Chem.Crippen.MolLogP(mol)})
        # HBA
        prop_dict.update(
            {'hba': Chem.rdMolDescriptors.CalcNumLipinskiHBA(mol)})
        # HBD
        prop_dict.update(
            {'hbd': Chem.rdMolDescriptors.CalcNumLipinskiHBD(mol)})
        # ring count
        prop_dict.update({'ring_ct': Chem.rdMolDescriptors.CalcNumRings(mol)})
        # rotatable bonds
        prop_dict.update(
            {'rot_bnds': Chem.rdMolDescriptors.CalcNumRotatableBonds(mol)})
        # Formal (net) charge
        prop_dict.update({'net_charge': Chem.rdmolops.GetFormalCharge(mol)})
        # Topological polar surface area
        prop_dict.update({'tpsa': Chem.rdMolDescriptors.CalcTPSA(mol)})
        # Stereo centers
        prop_dict.update({
            'stereo_cnts':
            len(
                Chem.FindMolChiralCenters(mol,
                                          force=True,
                                          includeUnassigned=True))
        })

        prop_array = [
            prop_dict['mol_wg'], prop_dict['log_p'], prop_dict['hba'],
            prop_dict['hbd'], prop_dict['ring_ct'], prop_dict['rot_bnds'],
            prop_dict['net_charge'], prop_dict['tpsa'],
            prop_dict['stereo_cnts']
        ]

        return (prop_dict, prop_array)

    except:
        return ({}, [-10, -10, -10, -10, -10, -10, -10, -10, -10])

Example #11

0

Show file

def get_score_components_from_mol(this_mol):
    try:
        logP = Descriptors.MolLogP(this_mol)
    except:
        logP = 0.0
    SA_score = -sascorer.calculateScore(this_mol)
    cycle_list = nx.cycle_basis(nx.Graph(
        rdmolops.GetAdjacencyMatrix(this_mol)))
    if len(cycle_list) == 0:
        cycle_length = 0
    else:
        cycle_length = max([len(j) for j in cycle_list])
    if cycle_length <= 6:
        cycle_length = 0
    else:
        cycle_length = cycle_length - 6
    cycle_score = -cycle_length
    return logP, SA_score, cycle_score

Example #12

0

Show file

def zinc_logp(smile):
    logp_value=[]
    compound=[]
    for i in range(len(smile)):
        middle=[]
        for j in range(len(smile[i])):
            middle.append(smile[i][j])
        com=''.join(middle)
        compound.append(com)
    for i in range(len(compound)):
        m = Chem.MolFromSmiles(compound[i])
        logp=Descriptors.MolLogP(m)
        logp_value.append(logp)

    ma=6.66134

    print(max(logp_value))
    print(logp_value)

Example #13

0

Show file

File: add_node_type.py Project: sozenoid/Chem-TS-binding

def logp_calculation(new_compound):
    print new_compound[0]
    logp_value = []
    valid_smile = []
    all_smile = []
    distance = []
    m = Chem.MolFromSmiles(str(new_compound[0]))
    try:
        if m is not None:
            logp = Descriptors.MolLogP(m)
            valid_smile.append(new_compound)
        else:
            logp = -100
    except:
        logp = -100
    all_smile.append(str(new_compound[0]))

    return logp, valid_smile, all_smile

Example #14

0

Show file

 def get_mol_set(self, smiles, atsym=['H', 'C', 'N', 'O'], MaxNa=20):
     mols = []
     for i, sm in enumerate(smiles):
         mol = Chem.MolFromSmiles(sm)
         if mol:
             mol = Chem.AddHs(mol)
             check = AllChem.EmbedMolecule(mol)
             fc = 0
             for a in mol.GetAtoms():
                 fc += a.GetFormalCharge()
                 
             if check == 0:
                 X, S = __convert_rdkitmol_to_nparr__(mol)
                 if set(S).issubset(atsym) and len(S) < MaxNa and fc == 0:
                     dec = Descriptors.NumRotatableBonds(mol)
                     if dec > 0:
                         mols.append(mol)
     return mols

Example #15

0

Show file

File: tds_uct.py Project: yangxiufengsia/PMCTS

def simulation(chem_model, state):
    val = [
        '\n', '&', 'C', '(', ')', 'c', '1', '2', 'o', '=', 'O', 'N', '3', 'F',
        '[C@@H]', 'n', '-', '#', 'S', 'Cl', '[O-]', '[C@H]', '[NH+]', '[C@]',
        's', 'Br', '/', '[nH]', '[NH3+]', '4', '[NH2+]', '[C@@]', '[N+]',
        '[nH+]', '\\', '[S@]', '5', '[N-]', '[n+]', '[S@@]', '[S-]', '6', '7',
        'I', '[n-]', 'P', '[OH+]', '[NH-]', '[P@@H]', '[P@@]', '[PH2]', '[P@]',
        '[P+]', '[S+]', '[o+]', '[CH2-]', '[CH-]', '[SH+]', '[O+]', '[s+]',
        '[PH+]', '[PH]', '8', '[S@@+]'
    ]
    all_posible = chem_kn_simulation(chem_model, state, val)
    generate_smile = predict_smile(all_posible, val)
    new_compound = make_input_smile(generate_smile)
    kao = []
    try:
        m = Chem.MolFromSmiles(str(new_compound[0]))
    except:
        m = None
    if m != None:
        try:
            logp = Descriptors.MolLogP(m)
        except:
            logp = -1000
        SA_score = -sascorer.calculateScore(MolFromSmiles(new_compound[0]))
        cycle_list = nx.cycle_basis(
            nx.Graph(
                rdmolops.GetAdjacencyMatrix(MolFromSmiles(new_compound[0]))))
        if len(cycle_list) == 0:
            cycle_length = 0
        else:
            cycle_length = max([len(j) for j in cycle_list])
        if cycle_length <= 6:
            cycle_length = 0
        else:
            cycle_length = cycle_length - 6
        cycle_score = -cycle_length
        SA_score_norm = SA_score  #(SA_score-SA_mean)/SA_std
        logp_norm = logp  #(logp-logP_mean)/logP_std
        cycle_score_norm = cycle_score  #(cycle_score-cycle_mean)/cycle_std
        score_one = SA_score_norm + logp_norm + cycle_score_norm
        score = score_one / (1 + abs(score_one))
    else:
        score = -1000 / (1 + 1000)
    return score, new_compound[0]

Example #16

0

Show file

File: analysis.py Project: jenniening/Frag20_prepare

 def check_others(self, infile):
     """
     Check radicals and partial charges, MMFF_SMILES is used to check
     
     Args:
         infile: data index file
     """
     update_list = []
     os.chdir(self.datadir)
     data = pd.read_csv(os.path.join(self.outdir, infile))
     index_list = list(data["index"])
     for i in index_list:
         mol = Chem.SDMolSupplier(str(i) + self.suffix)[0]
         if Descriptors.NumRadicalElectrons(mol) == 0:
             if len([atom for atom in mol.GetAtoms() if atom.GetFormalCharge() != 0]) == 0:
                 update_list.append(i)
     data = data[data["index"].isin(update_list)]
     data.to_csv(os.path.join(self.outdir, infile.split(".")[0] + "_rmrpc.csv"), index = False)
     os.chdir(self.olddir)

Example #17

0

Show file

def check_node_type(new_compound, SA_mean, SA_std, logP_mean, logP_std,
                    cycle_mean, cycle_std):
    node_index = []
    valid_compound = []
    score = []

    for i in range(len(new_compound)):
        try:
            m = Chem.MolFromSmiles(str(new_compound[i]))
        except:
            None

        if m != None and len(new_compound[i]) <= 81:
            try:
                logp = Descriptors.MolLogP(m)
            except ValueError:  # habdle Sanitization error: Explicit valence for atom is greater than permitted
                continue
            node_index.append(i)
            valid_compound.append(new_compound[i])
            SA_score = -sascorer.calculateScore(MolFromSmiles(new_compound[i]))
            cycle_list = nx.cycle_basis(
                nx.Graph(
                    rdmolops.GetAdjacencyMatrix(MolFromSmiles(
                        new_compound[i]))))
            if len(cycle_list) == 0:
                cycle_length = 0
            else:
                cycle_length = max([len(j) for j in cycle_list])
            if cycle_length <= 6:
                cycle_length = 0
            else:
                cycle_length = cycle_length - 6
            cycle_score = -cycle_length
            # print(cycle_score)
            # print(SA_score)
            # print(logp)
            SA_score_norm = (SA_score - SA_mean) / SA_std
            logp_norm = (logp - logP_mean) / logP_std
            cycle_score_norm = (cycle_score - cycle_mean) / cycle_std
            score_one = SA_score_norm + logp_norm + cycle_score_norm
            score.append(score_one)

    return node_index, score, valid_compound

Example #18

0

Show file

File: GUI.py Project: ljmartin/interactive_filtering

 def generate_fingerprints_and_create_list(self):
     #generate fingerprints of predicted ligands and known ligands:
     gen_mo = rdFingerprintGenerator.GetMorganGenerator(fpSize=2048,
                                                        radius=2)
     predicted_fps = [
         gen_mo.GetFingerprint(mol) for mol in self.predicted['molecules']
     ]
     true_fps = [
         gen_mo.GetFingerprint(mol) for mol in self.true_pos['molecules']
     ]
     similarities = list()
     for count, mol in enumerate(predicted_fps):
         tanimoto_values = ([
             DataStructs.TanimotoSimilarity(mol, i) for i in true_fps
         ])
         index_of_highest = np.argmax(tanimoto_values)
         similarities.append(tanimoto_values[index_of_highest])
     #module code is in: https://github.com/rdkit/rdkit/tree/master/Contrib/SA_Score
     sa_score = [
         sascorer.calculateScore(i)
         for i in list(self.predicted['molecules'])
     ]
     #create a list holding the QED drug-likeness score
     #reference: https://doi.org/10.1038/nchem.1243
     qeds = [qed(mol) for mol in self.predicted['molecules']]
     #create a list holding logp:
     logp = [Descriptors.MolLogP(m) for m in self.predicted['molecules']]
     #filter catalog usage instructions are here: https://github.com/rdkit/rdkit/pull/536
     params = FilterCatalogParams()
     params.AddCatalog(FilterCatalogParams.FilterCatalogs.BRENK)
     catalog = FilterCatalog(params)
     self.brenk = np.array(
         [catalog.HasMatch(m) for m in self.predicted['molecules']])
     #add these lists as columns to the 'predicted' pd.DataFrame
     self.predicted['similarities'] = similarities
     self.predicted['sa_score'] = sa_score
     self.predicted['qeds'] = qeds
     self.predicted['logp'] = logp
     print(self.predicted['logp'] < 6)
     shortlist_mask = ((self.predicted['similarities'] < 0.2) &
                       (self.predicted['sa_score'] < 4) &
                       (self.predicted['qeds'] > 0.25) &
                       (self.predicted['logp'] < 6) & (~self.brenk))

Example #19

0

Show file

File: compound_parsing.py Project: n1mus/CompoundSetUtils

def _make_compound_info(mol_object):

    return {
        'smiles': AllChem.MolToSmiles(mol_object, True),
        'inchikey': AllChem.InchiToInchiKey(AllChem.MolToInchi(mol_object)),
        'mass': Descriptors.MolWt(mol_object),
        'exactmass': AllChem.CalcExactMolWt(mol_object),
        'formula': AllChem.CalcMolFormula(mol_object),
        'charge': AllChem.GetFormalCharge(mol_object),
        'fingerprints': {
            'maccs':
            dict([(str(x), 1) for x in AllChem.GetMACCSKeysFingerprint(
                mol_object).GetOnBits()]),
            'rdkit':
            dict([(str(x), 1)
                  for x in AllChem.RDKFingerprint(mol_object).GetOnBits()]),
        },
        'dblinks': {},
    }

Example #20

0

Show file

File: sb_models.py Project: SWFarag/NRP-structure-classifier

def make_fingerprints(length, verbose, mols, chosen=None):
    if chosen   == 1:
        fp_list = [
            fingerprint(lambda x : GetHashedAtomPairFingerprintAsBitVect(x, nBits = length),
                     "&qfuot;Atom pair (1985)")]
    elif chosen == 2:
        fp_list = [
             fingerprint(lambda x : GetHashedTopologicalTorsionFingerprintAsBitVect(x, nBits = length),
                     "Topological torsion (1987)")]
    elif chosen == 3:
        fp_list = [
             fingerprint(lambda x : GetMorganFingerprintAsBitVect(x, 3, nBits = length),
                     "Morgan circular ")]
    elif chosen == 4:
         fp_list = [
             fingerprint(FingerprintMol, "Estate (1995)")]
    elif chosen == 5:
        fp_list = [
             fingerprint(lambda x: GetAvalonFP(x, nBits=length),
                    "Avalon bit based (2006)")]
    elif chosen == 6:
        fp_list = [
            fingerprint(lambda x: np.append(GetAvalonFP(x, nBits=length), Descriptors.MolWt(x)),
                    "Avalon+mol. weight")]
    elif chosen == 7:
        fp_list = [
            fingerprint(lambda x: GetErGFingerprint(x), "ErG fingerprint (2006)")]
    elif chosen == 8:
        fp_list = [
            fingerprint(lambda x : RDKFingerprint(x, fpSize=length),
                     "RDKit fingerprint")]
    elif chosen == 9:
        fp_list = [
            fingerprint(lambda x : FingerprintMols.FingerprintMol(x),
                     "RDKit fingerprint2")]
    else:
        fp_list = [fingerprint(lambda x : MACCSkeys.GenMACCSKeys(x), "RDKit MACCSkeys")]

    for fp in fp_list:
        if (verbose): print("doing", fp.name)
        fp.apply_fp(mols)

    return fp_list

Example #21

0

Show file

File: tools.py Project: HarvardCS181Practical2016/practical_one

def add_features_to_test_file(input, output):
    with open(input, 'r') as f:
        lines = f.read().splitlines()

    with open(output, 'w') as f:
        f.writelines(lines[0] +
                     ',atom,bonds,molwt,double_bonds,valence_electrons\n')
        for line in range(1, len(lines)):
            smiles = lines[line].split(',')[1]
            m = Chem.MolFromSmiles(smiles)
            l = lines[line] + \
                ',' + str(m.GetNumAtoms()) + \
                ',' + str(m.GetNumBonds()) + \
                ',' + str(Descriptors. MolWt(m)) + \
                ',' + str(smiles.count('=')) + \
                ',' + str(Descriptors.NumValenceElectrons(m)) + '\n'
            if line % 10000 == 0:
                print line
            f.write(l)

Example #22

0

Show file

def fragment(mol, mode, quiet=False):
    frags = Chem.GetMolFrags(mol, asMols=True)

    if len(frags) == 1:
        return mol
    else:
        # TODO - handle ties
        biggest_index = -1
        i = 0
        if mode == 'hac':
            biggest_count = 0
            for frag in frags:
                hac = frag.GetNumHeavyAtoms()
                if hac > biggest_count:
                    biggest_count = hac
                    biggest_mol = frag
                    biggest_index = i
                i+=1
            if not quiet:
                utils.log("Chose fragment", biggest_index, "from", len(frags), "based on HAC")
        elif mode == 'mw':
            biggest_mw = 0
            for frag in frags:
                mw = Descriptors.MolWt(frag)
                if mw > biggest_mw:
                    biggest_mw = mw
                    biggest_mol = frag
                    biggest_index = i
                i+=1
            if not quiet:
                utils.log("Chose fragment", biggest_index, "from", len(frags), "based on MW")
        else:
            raise ValueError('Invalid fragment mode:',mode)

        # copy the properties across
        for name in mol.GetPropNames():
            biggest_mol.SetProp(name, mol.GetProp(name))

        # _Name is a magical property that is not in the ones returned by GetPropNames
        if '_Name' in mol.GetPropNames():
            biggest_mol.SetProp("_Name", mol.GetProp("_Name"))

        return biggest_mol

Example #23

0

Show file

File: mpi_thread_leaf_parallel.py Project: tsudalab/DP-ChemTS

def gaussion_workers(chem_model,val,state,m):

    all_posible=chem_kn_simulation(chem_model,state,val,m)
    generate_smile=predict_smile(all_posible,val)
    new_compound=make_input_smile(generate_smile)
    score=[]
    kao=[]

    try:
        m = Chem.MolFromSmiles(str(new_compound[0]))
    except:
        m=None
    #if m!=None and len(task[i])<=81:
    if m!=None:
        try:
           logp=Descriptors.MolLogP(m)
        except:
           logp=-1000
        SA_score = -sascorer.calculateScore(MolFromSmiles(new_compound[0]))
        cycle_list = nx.cycle_basis(nx.Graph(rdmolops.GetAdjacencyMatrix(MolFromSmiles(new_compound[0]))))
        if len(cycle_list) == 0:
            cycle_length = 0
        else:
            cycle_length = max([ len(j) for j in cycle_list ])
        if cycle_length <= 6:
            cycle_length = 0
        else:
            cycle_length = cycle_length - 6
        cycle_score = -cycle_length
            #print cycle_score
            #print SA_score
            #print logp
        SA_score_norm=(SA_score-SA_mean)/SA_std
        logp_norm=(logp-logP_mean)/logP_std
        cycle_score_norm=(cycle_score-cycle_mean)/cycle_std
        score_one = SA_score_norm + logp_norm + cycle_score_norm
        score.append(score_one)

    else:
        score.append(-1000)
    score.append(new_compound[0])

    return score

Example #24

0

Show file

File: handler.py Project: stjordanis/of_conformal

def calc_descriptors(rdmol):
    fp = Chem.GetMorganFingerprintAsBitVect(rdmol,
                                            radius=2,
                                            nBits=N_BITS,
                                            useFeatures=False)
    np_fp = np.zeros(N_BITS)
    ecfp = DataStructs.ConvertToNumpyArray(fp, np_fp)
    logp = Descriptors.MolLogP(rdmol)
    mwt = Descriptors.MolWt(rdmol)
    rtb = Descriptors.NumRotatableBonds(rdmol)
    hbd = Descriptors.NumHDonors(rdmol)
    hba = Descriptors.NumHAcceptors(rdmol)
    tpsa = Descriptors.TPSA(rdmol)
    return [logp, mwt, rtb, hbd, hba, tpsa, np_fp]

Example #25

0

Show file

File: Preprocessing.py Project: MolFilterGAN/MolFilterGAN

def check_smi(insmi):
    label = True
    mol = Chem.MolFromSmiles(insmi)
    if mol:
        atms = mol.GetAtoms()
        n_atoms = len(atms)
        if n_atoms < 10:
            label = False
        s_n = 0
        for atom in atms:
            if atom.GetSymbol() not in allowed_elements:
                label = False
                break
            if atom.GetIsotope():
                label = False
                break
            if atom.GetChiralTag() != non_s:
                s_n += 1
        if s_n >= 5:
            label = False

        for patt in rules_smarts:
            if len(mol.GetSubstructMatches(patt)) > 0:
                label = False
                break
        # check MW
        mw = Descriptors.MolWt(mol)
        if mw > 750:
            label = False
        r_info = mol.GetRingInfo()
        if r_info.NumRings() > 7:
            label = False
        r_a_size = [len(i) for i in r_info.AtomRings()]
        if r_a_size:
            max_r_size = max(r_a_size)
            min_r_size = min(r_a_size)
            if max_r_size > 8:
                label = False
    else:
        label = False

    return label

Example #26

0

Show file

File: biomin_screening.py Project: andrewtarzia/enzyme_screen

def HOF_examples(output_dir):
    """
    Prepare figure showing the value of d for all molecules used in the
    BioHOFs from: 10.1021/jacs.9b06589

    """
    # the n-phenyl esters
    mol_list_1 = [
        'fluorescein', 'hydrogen_peroxide', 'methanol', 'formaldehyde', 'urea'
    ]
    smiles_list_1 = [
        'C1=CC=C2C(=C1)C(=O)OC23C4=C(C=C(C=C4)O)OC5=C3C=CC(=C5)O', 'OO', 'CO',
        'C=O', 'C(=O)(N)N'
    ]
    fig, ax = plt.subplots(figsize=(8, 5))
    for i, name in enumerate(mol_list_1):
        out_file = (f"{output_dir}/"
                    f"{name.replace(' ', '_').replace('/', '__')}"
                    '_diam_result.csv')
        if os.path.exists(out_file) is False:
            continue
        results = pd.read_csv(out_file)
        mid_diam = min(results['diam2'])
        mol = Chem.AddHs(Chem.MolFromSmiles(smiles_list_1[i]))
        MW = Descriptors.MolWt(mol)
        print(name, mol_list_1[i], MW, mid_diam)
        ax.scatter(MW,
                   mid_diam,
                   c='#5499C7',
                   edgecolors='k',
                   marker='o',
                   alpha=1.0,
                   s=140)

    # ax.axhline(y=11.8, c='k', alpha=0.2)
    pfn.define_standard_plot(ax,
                             xtitle='molecular weight [g/mol]',
                             ytitle=r'$d$ [$\mathrm{\AA}$]',
                             xlim=(10, 500),
                             ylim=(2.5, 15))
    fig.tight_layout()
    fig.savefig("HOF_examples.pdf", dpi=720, bbox_inches='tight')

Example #27

0

Show file

File: biomin_screening.py Project: andrewtarzia/enzyme_screen

def cyt_C_perox_assay(output_dir):
    """
    Prepare figure showing the change in intermediate diameter for 3
    peroxide molcules degraded by Cyt-C in ZIF-8 (One-Pot Synthesis of
    Protein-Embedded Metal–Organic Frameworks with Enhanced Biological
    Activities, DOI:10.1021/nl5026419)

    """
    # the n-phenyl esters
    mol_list_1 = [
        'hydrogen peroxide', 'methyl ethyl ketone peroxide',
        'tert-butyl hydroperoxide'
    ]
    smiles_list_1 = ['OO', 'CCC(C)(OO)OOC(C)(CC)OO', 'CC(C)(C)OO']
    fig, ax = plt.subplots()
    for i, name in enumerate(mol_list_1):
        out_file = (f"{output_dir}/"
                    f"{name.replace(' ', '_').replace('/', '__')}"
                    '_diam_result.csv')
        if os.path.exists(out_file) is False:
            continue
        results = pd.read_csv(out_file)
        mid_diam = min(results['diam2'])
        mol = Chem.AddHs(Chem.MolFromSmiles(smiles_list_1[i]))
        MW = Descriptors.MolWt(mol)
        print(name, mol_list_1[i], MW, mid_diam)
        ax.scatter(MW,
                   mid_diam,
                   c='k',
                   edgecolors='k',
                   marker='o',
                   alpha=1.0,
                   s=100)

    ax.axhspan(ymin=4.0, ymax=6.6, facecolor='k', alpha=0.2, hatch="/")
    pfn.define_standard_plot(ax,
                             xtitle='molecular weight [g/mol]',
                             ytitle=r'$d$ [$\mathrm{\AA}$]',
                             xlim=(10, 250),
                             ylim=(2.5, 8))
    fig.tight_layout()
    fig.savefig("cytC_comp.pdf", dpi=720, bbox_inches='tight')

Example #28

0

Show file

File: RDKIT.py Project: MolMeDB/rdkit_service

    def getGeneralInfo(self, params={}):
        if not "smi" in params:
            print("Parameter 'smi' not found in argument list")
            return []

        req_smiles = params["smi"]
        mol = Chem.MolFromSmiles(req_smiles)

        canonized_smiles = Chem.MolToSmiles(mol)
        inchi = Chem.inchi.MolToInchi(mol)
        inchiKey = Chem.inchi.InchiToInchiKey(inchi)
        MW = Descriptors.MolWt(mol)
        LogP = Crippen.MolLogP(mol)

        return {
            "canonized_smiles": canonized_smiles,
            "inchi": inchiKey,
            "MW": MW,
            "LogP": LogP
        }

Example #29

0

Show file

File: basic.py Project: zmsunnyday/deepchem

    def _featurize(self, mol):
        """
    Calculate molecular weight.

    Parameters
    ----------
    mol : RDKit Mol
        Molecule.

    Returns
    -------
    np.ndarray of length 1 containing the molecular weight.
    """
        try:
            from rdkit.Chem import Descriptors
        except ModuleNotFoundError:
            raise ValueError("This class requires RDKit to be installed.")
        wt = Descriptors.ExactMolWt(mol)
        wt = [wt]
        return np.asarray(wt)

Example #30

0

Show file

File: filter_MW.py Project: danielzaidman/CovaLib

def main(name, argv):
    if len(argv) != 3:
        print_usage(name)
        return

    MW = float(argv[1])
    RB = int(argv[2])

    #read molport building blocks
    with open(argv[0], 'r') as f:
        for line in f:
            line_s = line.split()
            molecule = [Chem.MolFromSmiles(line_s[0]), line_s[1]]
            if molecule[0] == None:
                continue
            if Descriptors.MolWt(
                    molecule[0]
            ) <= MW and rdMolDescriptors.CalcNumRotatableBonds(
                    molecule[0]) <= RB:
                print Chem.MolToSmiles(molecule[0]) + "\t" + line_s[1]

Example #31

0

Show file

def cut_some_bonds(frag_weight, mol, seed):
    cuttable_bonds = common.find_cuttable_bonds(mol)
    cut_bonds_indexes = [b.GetIdx() for b in cuttable_bonds]
    total_weight = Descriptors.MolWt(mol)
    nb_frags = round(total_weight / frag_weight)
    max_cuts = min(len(cut_bonds_indexes), nb_frags - 1)
    # print("mol %s; cut %d bonds" % (mol.GetProp("name"), max_cuts),
    #       file=sys.stderr)
    random.shuffle(cut_bonds_indexes)
    to_cut = cut_bonds_indexes[0:max_cuts]
    if len(to_cut) == 0:
        # molecule too small: not fragmented
        # still, we output it so that input and output SMILES files can be
        # visualized side-by-side
        smi = Chem.MolToSmiles(mol)
        name = get_name(mol)
        dico = {}
        return (smi, name, dico)
    else:
        return fragment_on_bonds_and_label(mol, to_cut)

Example #32

0

Show file

File: featurizer.py Project: grshukla/ABA_project

def phys_featurizer(s):
  m=Chem.MolFromSmiles(s)
  phys_features=[]
  #Featurization begins
  phys_features.append(Descriptors.BertzCT(m))  #0
  phys_features.append(Descriptors.Chi0(m))
  phys_features.append(Descriptors.Chi0n(m))
  phys_features.append(Descriptors.Chi0v(m))
  phys_features.append(Descriptors.Chi1(m))
  phys_features.append(Descriptors.Chi1n(m))
  phys_features.append(Descriptors.Chi1v(m))
  phys_features.append(Descriptors.Chi2n(m))
  phys_features.append(Descriptors.Chi2v(m))
  phys_features.append(Descriptors.Chi3n(m))
  phys_features.append(Descriptors.Chi3v(m))     #10
  phys_features.append(Descriptors.Chi4n(m))
  phys_features.append(Descriptors.Chi4v(m))
  phys_features.append(Descriptors.EState_VSA1(m))
  phys_features.append(Descriptors.EState_VSA10(m))
  phys_features.append(Descriptors.EState_VSA11(m))
  phys_features.append(Descriptors.EState_VSA2(m))
  phys_features.append(Descriptors.EState_VSA3(m))
  phys_features.append(Descriptors.EState_VSA4(m))
  phys_features.append(Descriptors.EState_VSA5(m))
  phys_features.append(Descriptors.EState_VSA6(m))  #20
  phys_features.append(Descriptors.EState_VSA7(m))
  phys_features.append(Descriptors.EState_VSA8(m))
  phys_features.append(Descriptors.EState_VSA9(m))
  phys_features.append(Descriptors.ExactMolWt(m))   #24
  phys_features.append(Descriptors.FractionCSP3(m)) 
  phys_features.append(Descriptors.HallKierAlpha(m))
  phys_features.append(Descriptors.HeavyAtomCount(m))
  phys_features.append(Descriptors.HeavyAtomMolWt(m))
  phys_features.append(Descriptors.Ipc(m))
  phys_features.append(Descriptors.Kappa1(m))   #30
  phys_features.append(Descriptors.Kappa2(m))
  phys_features.append(Descriptors.Kappa3(m))
  phys_features.append(Descriptors.LabuteASA(m))
  phys_features.append(Descriptors.MaxAbsEStateIndex(m))
  phys_features.append(Descriptors.MaxAbsPartialCharge(m))
  phys_features.append(Descriptors.MaxEStateIndex(m))
  phys_features.append(Descriptors.MaxPartialCharge(m))
  phys_features.append(Descriptors.MinAbsEStateIndex(m))
  phys_features.append(Descriptors.MinAbsPartialCharge(m))
  phys_features.append(Descriptors.MinEStateIndex(m))    #40
  phys_features.append(Descriptors.MinPartialCharge(m))
  phys_features.append(Descriptors.MolLogP(m))
  phys_features.append(Descriptors.MolMR(m))
  phys_features.append(Descriptors.MolWt(m))
  phys_features.append(Descriptors.NHOHCount(m))
  phys_features.append(Descriptors.NOCount(m))
  phys_features.append(Descriptors.NumAliphaticCarbocycles(m))
  phys_features.append(Descriptors.NumAliphaticHeterocycles(m))
  phys_features.append(Descriptors.NumAliphaticRings(m))
  phys_features.append(Descriptors.NumAromaticCarbocycles(m))   #50
  phys_features.append(Descriptors.NumAromaticHeterocycles(m))
  phys_features.append(Descriptors.NumAromaticRings(m))
  phys_features.append(Descriptors.NumHAcceptors(m))
  phys_features.append(Descriptors.NumHDonors(m))
  phys_features.append(Descriptors.NumHeteroatoms(m))
  phys_features.append(Descriptors.NumRadicalElectrons(m))
  phys_features.append(Descriptors.NumRotatableBonds(m))
  phys_features.append(Descriptors.NumSaturatedCarbocycles(m))
  phys_features.append(Descriptors.NumSaturatedHeterocycles(m))
  phys_features.append(Descriptors.NumSaturatedRings(m))       #60
  phys_features.append(Descriptors.NumValenceElectrons(m))
  phys_features.append(Descriptors.PEOE_VSA1(m))
  phys_features.append(Descriptors.PEOE_VSA10(m))
  phys_features.append(Descriptors.PEOE_VSA11(m))
  phys_features.append(Descriptors.PEOE_VSA12(m))
  phys_features.append(Descriptors.PEOE_VSA13(m))
  phys_features.append(Descriptors.PEOE_VSA14(m))
  phys_features.append(Descriptors.PEOE_VSA2(m))
  phys_features.append(Descriptors.PEOE_VSA3(m))
  phys_features.append(Descriptors.PEOE_VSA4(m))        #70
  phys_features.append(Descriptors.PEOE_VSA5(m))
  phys_features.append(Descriptors.PEOE_VSA6(m))
  phys_features.append(Descriptors.PEOE_VSA7(m))
  phys_features.append(Descriptors.PEOE_VSA8(m))
  phys_features.append(Descriptors.PEOE_VSA9(m))
  phys_features.append(Descriptors.RingCount(m))
  phys_features.append(Descriptors.SMR_VSA1(m))
  phys_features.append(Descriptors.SMR_VSA10(m))
  phys_features.append(Descriptors.SMR_VSA2(m))
  phys_features.append(Descriptors.SMR_VSA3(m))    #80
  phys_features.append(Descriptors.SMR_VSA4(m))
  phys_features.append(Descriptors.SMR_VSA5(m))
  phys_features.append(Descriptors.SMR_VSA6(m))
  phys_features.append(Descriptors.SMR_VSA7(m))
  phys_features.append(Descriptors.SMR_VSA8(m))
  phys_features.append(Descriptors.SMR_VSA9(m))
  phys_features.append(Descriptors.SlogP_VSA1(m))
  phys_features.append(Descriptors.SlogP_VSA10(m))
  phys_features.append(Descriptors.SlogP_VSA11(m))
  phys_features.append(Descriptors.SlogP_VSA12(m))  #90
  phys_features.append(Descriptors.SlogP_VSA2(m))
  phys_features.append(Descriptors.SlogP_VSA3(m))
  phys_features.append(Descriptors.SlogP_VSA4(m))
  phys_features.append(Descriptors.SlogP_VSA5(m))
  phys_features.append(Descriptors.SlogP_VSA6(m))
  phys_features.append(Descriptors.SlogP_VSA7(m))
  phys_features.append(Descriptors.SlogP_VSA8(m))
  phys_features.append(Descriptors.SlogP_VSA9(m))
  phys_features.append(Descriptors.TPSA(m))
  phys_features.append(Descriptors.VSA_EState1(m)) #100
  phys_features.append(Descriptors.VSA_EState10(m))
  phys_features.append(Descriptors.VSA_EState2(m))
  phys_features.append(Descriptors.VSA_EState3(m))
  phys_features.append(Descriptors.VSA_EState4(m))
  phys_features.append(Descriptors.VSA_EState5(m))
  phys_features.append(Descriptors.VSA_EState6(m))
  phys_features.append(Descriptors.VSA_EState7(m))
  phys_features.append(Descriptors.VSA_EState8(m))
  phys_features.append(Descriptors.VSA_EState9(m))
  phys_features.append(Descriptors.fr_Al_COO(m))   #110
  phys_features.append(Descriptors.fr_Al_OH(m))
  phys_features.append(Descriptors.fr_Al_OH_noTert(m))
  phys_features.append(Descriptors.fr_ArN(m))
  phys_features.append(Descriptors.fr_Ar_COO(m))
  phys_features.append(Descriptors.fr_Ar_N(m))
  phys_features.append(Descriptors.fr_Ar_NH(m))
  phys_features.append(Descriptors.fr_Ar_OH(m))
  phys_features.append(Descriptors.fr_COO(m))
  phys_features.append(Descriptors.fr_COO2(m))
  phys_features.append(Descriptors.fr_C_O(m))   #120
  phys_features.append(Descriptors.fr_C_O_noCOO(m))
  phys_features.append(Descriptors.fr_C_S(m))
  phys_features.append(Descriptors.fr_HOCCN(m))
  phys_features.append(Descriptors.fr_Imine(m))
  phys_features.append(Descriptors.fr_NH0(m))
  phys_features.append(Descriptors.fr_NH1(m))
  phys_features.append(Descriptors.fr_NH2(m))
  phys_features.append(Descriptors.fr_N_O(m))
  phys_features.append(Descriptors.fr_Ndealkylation1(m))
  phys_features.append(Descriptors.fr_Ndealkylation2(m))  #130
  phys_features.append(Descriptors.fr_Nhpyrrole(m))
  phys_features.append(Descriptors.fr_SH(m))
  phys_features.append(Descriptors.fr_aldehyde(m))
  phys_features.append(Descriptors.fr_alkyl_carbamate(m))
  phys_features.append(Descriptors.fr_alkyl_halide(m))
  phys_features.append(Descriptors.fr_allylic_oxid(m))
  phys_features.append(Descriptors.fr_amide(m))
  phys_features.append(Descriptors.fr_amidine(m))
  phys_features.append(Descriptors.fr_aniline(m))
  phys_features.append(Descriptors.fr_aryl_methyl(m))   #140
  phys_features.append(Descriptors.fr_azide(m))
  phys_features.append(Descriptors.fr_azo(m))
  phys_features.append(Descriptors.fr_barbitur(m))
  phys_features.append(Descriptors.fr_benzene(m))
  phys_features.append(Descriptors.fr_benzodiazepine(m))
  phys_features.append(Descriptors.fr_bicyclic(m))
  phys_features.append(Descriptors.fr_diazo(m))
  phys_features.append(Descriptors.fr_dihydropyridine(m))
  phys_features.append(Descriptors.fr_epoxide(m))
  phys_features.append(Descriptors.fr_ester(m))     #150
  phys_features.append(Descriptors.fr_ether(m))
  phys_features.append(Descriptors.fr_furan(m))
  phys_features.append(Descriptors.fr_guanido(m))
  phys_features.append(Descriptors.fr_halogen(m))
  phys_features.append(Descriptors.fr_hdrzine(m))
  phys_features.append(Descriptors.fr_hdrzone(m))
  phys_features.append(Descriptors.fr_imidazole(m))
  phys_features.append(Descriptors.fr_imide(m))
  phys_features.append(Descriptors.fr_isocyan(m))
  phys_features.append(Descriptors.fr_isothiocyan(m))  #160
  phys_features.append(Descriptors.fr_ketone(m))
  phys_features.append(Descriptors.fr_ketone_Topliss(m))
  phys_features.append(Descriptors.fr_lactam(m))
  phys_features.append(Descriptors.fr_lactone(m))
  phys_features.append(Descriptors.fr_methoxy(m))
  phys_features.append(Descriptors.fr_morpholine(m))
  phys_features.append(Descriptors.fr_nitrile(m))
  phys_features.append(Descriptors.fr_nitro(m))
  phys_features.append(Descriptors.fr_nitro_arom(m))
  phys_features.append(Descriptors.fr_nitro_arom_nonortho(m))   #170
  phys_features.append(Descriptors.fr_nitroso(m))
  phys_features.append(Descriptors.fr_oxazole(m))
  phys_features.append(Descriptors.fr_oxime(m))
  phys_features.append(Descriptors.fr_para_hydroxylation(m))
  phys_features.append(Descriptors.fr_phenol(m))
  phys_features.append(Descriptors.fr_phenol_noOrthoHbond(m))
  phys_features.append(Descriptors.fr_phos_acid(m))
  phys_features.append(Descriptors.fr_phos_ester(m))
  phys_features.append(Descriptors.fr_piperdine(m))
  phys_features.append(Descriptors.fr_piperzine(m))             #180
  phys_features.append(Descriptors.fr_priamide(m))
  phys_features.append(Descriptors.fr_prisulfonamd(m))
  phys_features.append(Descriptors.fr_pyridine(m))
  phys_features.append(Descriptors.fr_quatN(m))
  phys_features.append(Descriptors.fr_sulfide(m))
  phys_features.append(Descriptors.fr_sulfonamd(m))
  phys_features.append(Descriptors.fr_sulfone(m))
  phys_features.append(Descriptors.fr_term_acetylene(m))
  phys_features.append(Descriptors.fr_tetrazole(m))
  phys_features.append(Descriptors.fr_thiazole(m))      #190
  phys_features.append(Descriptors.fr_thiocyan(m))
  phys_features.append(Descriptors.fr_thiophene(m))
  phys_features.append(Descriptors.fr_unbrch_alkane(m))
  phys_features.append(Descriptors.fr_urea(m))            #194
  
  return phys_features