def filter_props(self, mol):
     """Quickly filters mols, doesn't save any calculated values and moves to the next one as soon as a molecules
     has a property not within desired range  """
     for desc in self.descriptors:
         desc = self.descriptors[desc]
         f = Descriptors.__getattribute__(desc[0])
         if desc[1] <= f(mol) <= desc[2]:
             pass
         else:
             return False
     # If all pass
     return True
Example #2
0
    def set_computables_from_mol(self, mol):
        try:  # warning comes up in pycharm (bug of pycharm)
            self.molecular_formula = Descriptors.rdMolDescriptors.CalcMolFormula(
                mol)
            self.molecular_weight = Descriptors.ExactMolWt(mol)
            self.inchi = inchi.MolToInchi(mol)
            self.inchikey = inchi.MolToInchiKey(mol)
            self.smiles = Chem.MolToSmiles(mol, isomericSmiles=False)
        except Exception as e:
            raise SpectrumError("Error occurred while computing properties" +
                                e.args) from e

        assert self.molecular_formula is not None, "molecular-formula can't be None"
        assert self.molecular_weight is not None, "molecular-weight can't be None"
        assert self.inchi is not None, "inchi can't be None"
        assert self.inchikey is not None, "inchikey can't be None"
        assert self.smiles is not None, "smiles can't be None"
    def _reward(self):
        """Calculates the reward of the current state.

    The reward is defined as the negative l1 distance between the current
    molecular weight and target molecular weight range.

    Returns:
      Float. The negative distance.
    """
        molecule = Chem.MolFromSmiles(self._state)
        if molecule is None:
            return -self.target_weight**2
        lower, upper = self.target_weight - 25, self.target_weight + 25
        mw = Descriptors.MolWt(molecule)
        if lower <= mw <= upper:
            return 1
        return -min(abs(lower - mw), abs(upper - mw))
Example #4
0
def calc_mol_weight(smiles):
    mol = Chem.MolFromSmiles(smiles)
    try:
        wt = Descriptors.MolWt(mol)
        f_charge = Chem.rdmolops.GetFormalCharge(mol)
    except:
        wt = 100
        f_charge = 0
    # lithium is often omitted in the compound database. add the weight of lithium
    if f_charge < 0:
        wt = wt - 6.941 * f_charge

    # Ca and Mg is often used just to reperesent the repeating units
    wt = wt - 40.078 * smiles.count("Ca")
    wt = wt - 24.305 * smiles.count("Mg")

    return wt
Example #5
0
    def calculate_properties_from_mol(self):
        """
        Function to calculate some molecular properties based on RDKit functionalities
        
        Return:
        Static physico-chemical properties: molecular weight, crippen logP, number of hydrogen bond acceptors and donors
        """

        # Generate molecule from sequence
        mol = Chem.MolFromSmiles(self.smiles)
        mol.SetProp("_Name", self.sequence)

        # Calculate the descriptors
        self.num_hdonors = Lipinski.NumHDonors(mol)
        self.num_hacceptors = Lipinski.NumHAcceptors(mol)
        self.mol_weight = Descriptors.MolWt(mol)
        self.mol_logp = Crippen.MolLogP(mol)
Example #6
0
    def getDiscriptor(self):
        from rdkit.Chem import Crippen
        from rdkit import Chem
        import pandas as pd
        from rdkit.Chem import Descriptors, Lipinski
        import os

        os.chdir(r"G:\マイドライブ\Data\Meram Chronic Data")
        df = pd.read_csv('extChronicStrcture.csv', engine='python')
        df = df[['CAS', 'canonical_smiles']]
        df = df.dropna(how='any')

        #df = pd.read_csv('extractInchi.csv',header=None)
        columns = [
            'CAS', 'weight', 'logP', 'RotatableBonds', 'HeavyAtomCounts',
            'AromProp', 'TPSA', 'HDonor', 'HAcceptors', 'FractionCSP3',
            'AromaticCarbocycles', 'AromaticHeterocycles'
        ]
        CAS = df['CAS']
        SMILES = df['canonical_smiles']

        resultDf = pd.DataFrame(columns=columns)
        for cas, smiles in zip(CAS, SMILES):
            mol = Chem.MolFromSmiles(smiles)
            wt = Descriptors.MolWt(mol)
            rot = Lipinski.NumRotatableBonds(mol)
            heavy = Lipinski.HeavyAtomCount(mol)
            logp = Crippen.MolLogP(mol)
            aromaticHeavyatoms = len(
                mol.GetSubstructMatches(Chem.MolFromSmarts('[a]')))
            numAtoms = mol.GetNumAtoms()
            aromprop = float(aromaticHeavyatoms / numAtoms)
            TPSA = Descriptors.TPSA(mol)
            HDonors = Descriptors.NumHDonors(mol)
            HAcceptors = Descriptors.NumHAcceptors(mol)

            FractionCSP3 = Descriptors.FractionCSP3(mol)
            AromaticCarbocycles = Descriptors.NumAromaticCarbocycles(mol)
            AromaticHeterocycles = Descriptors.NumAromaticHeterocycles(mol)

            (print(HDonors, HAcceptors))
            tempDf = pd.DataFrame([[
                cas, wt, logp, rot, heavy, aromprop, TPSA, HDonors, HAcceptors,
                FractionCSP3, AromaticCarbocycles, AromaticHeterocycles
            ]],
                                  columns=columns)
            resultDf = pd.concat([resultDf, tempDf])
        resultDf.to_csv('Descriptors.csv', index=False)
Example #7
0
def compute_descriptors(smiles):

    smiles = list(map(lambda x: Chem.MolFromSmiles(x), smiles))
    CanonicalSmiles = list(map(lambda x: Chem.MolToSmiles(x), smiles))
    HBA = list(map(lambda x: Descriptors.NumHAcceptors(x), smiles))
    HBD = list(map(lambda x: Descriptors.NumHDonors(x), smiles))
    RB = list(map(lambda x: Descriptors.NumRotatableBonds(x), smiles))
    LOGP = list(map(lambda x: Descriptors.MolLogP(x), smiles))
    TPSA = list(map(lambda x: Descriptors.TPSA(x), smiles))
    MW = list(map(lambda x: Descriptors.MolWt(x), smiles))

    return CanonicalSmiles, HBA, HBD, RB, LOGP, TPSA, MW
Example #8
0
def score(smiles, num_docking=3):
    smiles_md5 = str(hashlib.md5(smiles.encode('utf-8')).hexdigest())
    docking_result_file = '{}_out'.format(smiles_md5)
    sdf_name = '{}.sdf'.format(smiles_md5)
    score_name = '<SCORE.INTER>'  # <SCORE> or <SCORE.INTER>

    min_score = 1e10

    # Translation from SMILES to sdf
    if smiles == '':
        mol = None
    else:
        mol = Chem.MolFromSmiles(smiles)
    try:
        if mol is not None and Descriptors.MolWt(mol) < 500:
            mol = Chem.AddHs(mol)
            AllChem.EmbedMolecule(mol)
            AllChem.UFFOptimizeMolecule(mol, maxIters=200)
            fw = Chem.SDWriter(sdf_name)
            fw.write(mol)
            fw.close()

            # rdock calculation
            cmd = '$RBT_ROOT/bin/rbdock -r cavity.prm '\
                  '-p $RBT_ROOT/data/scripts/dock.prm '\
                  '-i {} -o {} -T 1 -n {} > /dev/null'\
                  .format(sdf_name, docking_result_file, num_docking)
            path = docking_result_file + '.sd'
            if not os.path.exists(path):
                subprocess.call(cmd, shell=True)

            # find the minimum score of rdock from multiple docking results
            if os.path.exists(path):
                with open(path, 'r') as f:
                    lines = f.readlines()
                isScore = False
                for line in lines:
                    if isScore:
                        min_score = min(float(line), min_score)
                        isScore = False
                    if score_name in line:  # next line has score
                        isScore = True
    except Exception:
        pass
    return min_score
Example #9
0
    def run_filter(self, mol):
        """
        This runs a Ghose filter for drug-likeliness. Ghose filter filters
        molecules by Molecular weight (MW), the number of atoms, and the logP
        value.

        We protonate the mol in this filter because hydrogens affect
        atom count. Our Ghose implementation counts hydrogens in against
        the total number of atoms.

        To pass the filter a molecule must be:
            MW between 160 and 500 dalton
            Number of Atoms: between 20 and 70
            logP  between -0,4 and +5,6

        Inputs:
        :param rdkit.Chem.rdchem.Mol object mol: An rdkit mol object to be
            tested if it passes the filters

        Returns:
        :returns: bool bool: True if the mol passes the filter; False if it
            fails the filter
        """
        copy_mol = copy.deepcopy(mol)
        copy_mol = Chem.AddHs(copy_mol)
        exact_mwt = Descriptors.ExactMolWt(copy_mol)
        if ((exact_mwt < 160) or (exact_mwt > 500)):
            return False

        num_atoms = copy_mol.GetNumAtoms()
        if ((num_atoms < 20) or (num_atoms > 70)):
            return False

        # molar Refractivity
        MolMR = Crippen.MolMR(copy_mol)
        if ((MolMR < 40) or (MolMR > 130)):
            return False

        # molar LogP
        mol_log_p = Crippen.MolLogP(copy_mol)
        if ((mol_log_p < -0.4) or (mol_log_p > 5.6)):
            return False

        # passed all filters
        return True
Example #10
0
def calc_props_dude_extended(smiles):
    try:
        mol = Chem.MolFromSmiles(smiles)

        # Calculate properties and store in dict
        prop_dict = {}
        # molweight
        prop_dict.update({'mol_wg': Descriptors.MolWt(mol)})
        # logP
        prop_dict.update({'log_p': Chem.Crippen.MolLogP(mol)})
        # HBA
        prop_dict.update(
            {'hba': Chem.rdMolDescriptors.CalcNumLipinskiHBA(mol)})
        # HBD
        prop_dict.update(
            {'hbd': Chem.rdMolDescriptors.CalcNumLipinskiHBD(mol)})
        # ring count
        prop_dict.update({'ring_ct': Chem.rdMolDescriptors.CalcNumRings(mol)})
        # rotatable bonds
        prop_dict.update(
            {'rot_bnds': Chem.rdMolDescriptors.CalcNumRotatableBonds(mol)})
        # Formal (net) charge
        prop_dict.update({'net_charge': Chem.rdmolops.GetFormalCharge(mol)})
        # Topological polar surface area
        prop_dict.update({'tpsa': Chem.rdMolDescriptors.CalcTPSA(mol)})
        # Stereo centers
        prop_dict.update({
            'stereo_cnts':
            len(
                Chem.FindMolChiralCenters(mol,
                                          force=True,
                                          includeUnassigned=True))
        })

        prop_array = [
            prop_dict['mol_wg'], prop_dict['log_p'], prop_dict['hba'],
            prop_dict['hbd'], prop_dict['ring_ct'], prop_dict['rot_bnds'],
            prop_dict['net_charge'], prop_dict['tpsa'],
            prop_dict['stereo_cnts']
        ]

        return (prop_dict, prop_array)

    except:
        return ({}, [-10, -10, -10, -10, -10, -10, -10, -10, -10])
Example #11
0
def get_score_components_from_mol(this_mol):
    try:
        logP = Descriptors.MolLogP(this_mol)
    except:
        logP = 0.0
    SA_score = -sascorer.calculateScore(this_mol)
    cycle_list = nx.cycle_basis(nx.Graph(
        rdmolops.GetAdjacencyMatrix(this_mol)))
    if len(cycle_list) == 0:
        cycle_length = 0
    else:
        cycle_length = max([len(j) for j in cycle_list])
    if cycle_length <= 6:
        cycle_length = 0
    else:
        cycle_length = cycle_length - 6
    cycle_score = -cycle_length
    return logP, SA_score, cycle_score
Example #12
0
def zinc_logp(smile):
    logp_value=[]
    compound=[]
    for i in range(len(smile)):
        middle=[]
        for j in range(len(smile[i])):
            middle.append(smile[i][j])
        com=''.join(middle)
        compound.append(com)
    for i in range(len(compound)):
        m = Chem.MolFromSmiles(compound[i])
        logp=Descriptors.MolLogP(m)
        logp_value.append(logp)

    ma=6.66134

    print(max(logp_value))
    print(logp_value)
Example #13
0
def logp_calculation(new_compound):
    print new_compound[0]
    logp_value = []
    valid_smile = []
    all_smile = []
    distance = []
    m = Chem.MolFromSmiles(str(new_compound[0]))
    try:
        if m is not None:
            logp = Descriptors.MolLogP(m)
            valid_smile.append(new_compound)
        else:
            logp = -100
    except:
        logp = -100
    all_smile.append(str(new_compound[0]))

    return logp, valid_smile, all_smile
Example #14
0
 def get_mol_set(self, smiles, atsym=['H', 'C', 'N', 'O'], MaxNa=20):
     mols = []
     for i, sm in enumerate(smiles):
         mol = Chem.MolFromSmiles(sm)
         if mol:
             mol = Chem.AddHs(mol)
             check = AllChem.EmbedMolecule(mol)
             fc = 0
             for a in mol.GetAtoms():
                 fc += a.GetFormalCharge()
                 
             if check == 0:
                 X, S = __convert_rdkitmol_to_nparr__(mol)
                 if set(S).issubset(atsym) and len(S) < MaxNa and fc == 0:
                     dec = Descriptors.NumRotatableBonds(mol)
                     if dec > 0:
                         mols.append(mol)
     return mols
Example #15
0
def simulation(chem_model, state):
    val = [
        '\n', '&', 'C', '(', ')', 'c', '1', '2', 'o', '=', 'O', 'N', '3', 'F',
        '[C@@H]', 'n', '-', '#', 'S', 'Cl', '[O-]', '[C@H]', '[NH+]', '[C@]',
        's', 'Br', '/', '[nH]', '[NH3+]', '4', '[NH2+]', '[C@@]', '[N+]',
        '[nH+]', '\\', '[S@]', '5', '[N-]', '[n+]', '[S@@]', '[S-]', '6', '7',
        'I', '[n-]', 'P', '[OH+]', '[NH-]', '[P@@H]', '[P@@]', '[PH2]', '[P@]',
        '[P+]', '[S+]', '[o+]', '[CH2-]', '[CH-]', '[SH+]', '[O+]', '[s+]',
        '[PH+]', '[PH]', '8', '[S@@+]'
    ]
    all_posible = chem_kn_simulation(chem_model, state, val)
    generate_smile = predict_smile(all_posible, val)
    new_compound = make_input_smile(generate_smile)
    kao = []
    try:
        m = Chem.MolFromSmiles(str(new_compound[0]))
    except:
        m = None
    if m != None:
        try:
            logp = Descriptors.MolLogP(m)
        except:
            logp = -1000
        SA_score = -sascorer.calculateScore(MolFromSmiles(new_compound[0]))
        cycle_list = nx.cycle_basis(
            nx.Graph(
                rdmolops.GetAdjacencyMatrix(MolFromSmiles(new_compound[0]))))
        if len(cycle_list) == 0:
            cycle_length = 0
        else:
            cycle_length = max([len(j) for j in cycle_list])
        if cycle_length <= 6:
            cycle_length = 0
        else:
            cycle_length = cycle_length - 6
        cycle_score = -cycle_length
        SA_score_norm = SA_score  #(SA_score-SA_mean)/SA_std
        logp_norm = logp  #(logp-logP_mean)/logP_std
        cycle_score_norm = cycle_score  #(cycle_score-cycle_mean)/cycle_std
        score_one = SA_score_norm + logp_norm + cycle_score_norm
        score = score_one / (1 + abs(score_one))
    else:
        score = -1000 / (1 + 1000)
    return score, new_compound[0]
Example #16
0
 def check_others(self, infile):
     """
     Check radicals and partial charges, MMFF_SMILES is used to check
     
     Args:
         infile: data index file
     """
     update_list = []
     os.chdir(self.datadir)
     data = pd.read_csv(os.path.join(self.outdir, infile))
     index_list = list(data["index"])
     for i in index_list:
         mol = Chem.SDMolSupplier(str(i) + self.suffix)[0]
         if Descriptors.NumRadicalElectrons(mol) == 0:
             if len([atom for atom in mol.GetAtoms() if atom.GetFormalCharge() != 0]) == 0:
                 update_list.append(i)
     data = data[data["index"].isin(update_list)]
     data.to_csv(os.path.join(self.outdir, infile.split(".")[0] + "_rmrpc.csv"), index = False)
     os.chdir(self.olddir)
Example #17
0
def check_node_type(new_compound, SA_mean, SA_std, logP_mean, logP_std,
                    cycle_mean, cycle_std):
    node_index = []
    valid_compound = []
    score = []

    for i in range(len(new_compound)):
        try:
            m = Chem.MolFromSmiles(str(new_compound[i]))
        except:
            None

        if m != None and len(new_compound[i]) <= 81:
            try:
                logp = Descriptors.MolLogP(m)
            except ValueError:  # habdle Sanitization error: Explicit valence for atom is greater than permitted
                continue
            node_index.append(i)
            valid_compound.append(new_compound[i])
            SA_score = -sascorer.calculateScore(MolFromSmiles(new_compound[i]))
            cycle_list = nx.cycle_basis(
                nx.Graph(
                    rdmolops.GetAdjacencyMatrix(MolFromSmiles(
                        new_compound[i]))))
            if len(cycle_list) == 0:
                cycle_length = 0
            else:
                cycle_length = max([len(j) for j in cycle_list])
            if cycle_length <= 6:
                cycle_length = 0
            else:
                cycle_length = cycle_length - 6
            cycle_score = -cycle_length
            # print(cycle_score)
            # print(SA_score)
            # print(logp)
            SA_score_norm = (SA_score - SA_mean) / SA_std
            logp_norm = (logp - logP_mean) / logP_std
            cycle_score_norm = (cycle_score - cycle_mean) / cycle_std
            score_one = SA_score_norm + logp_norm + cycle_score_norm
            score.append(score_one)

    return node_index, score, valid_compound
Example #18
0
 def generate_fingerprints_and_create_list(self):
     #generate fingerprints of predicted ligands and known ligands:
     gen_mo = rdFingerprintGenerator.GetMorganGenerator(fpSize=2048,
                                                        radius=2)
     predicted_fps = [
         gen_mo.GetFingerprint(mol) for mol in self.predicted['molecules']
     ]
     true_fps = [
         gen_mo.GetFingerprint(mol) for mol in self.true_pos['molecules']
     ]
     similarities = list()
     for count, mol in enumerate(predicted_fps):
         tanimoto_values = ([
             DataStructs.TanimotoSimilarity(mol, i) for i in true_fps
         ])
         index_of_highest = np.argmax(tanimoto_values)
         similarities.append(tanimoto_values[index_of_highest])
     #module code is in: https://github.com/rdkit/rdkit/tree/master/Contrib/SA_Score
     sa_score = [
         sascorer.calculateScore(i)
         for i in list(self.predicted['molecules'])
     ]
     #create a list holding the QED drug-likeness score
     #reference: https://doi.org/10.1038/nchem.1243
     qeds = [qed(mol) for mol in self.predicted['molecules']]
     #create a list holding logp:
     logp = [Descriptors.MolLogP(m) for m in self.predicted['molecules']]
     #filter catalog usage instructions are here: https://github.com/rdkit/rdkit/pull/536
     params = FilterCatalogParams()
     params.AddCatalog(FilterCatalogParams.FilterCatalogs.BRENK)
     catalog = FilterCatalog(params)
     self.brenk = np.array(
         [catalog.HasMatch(m) for m in self.predicted['molecules']])
     #add these lists as columns to the 'predicted' pd.DataFrame
     self.predicted['similarities'] = similarities
     self.predicted['sa_score'] = sa_score
     self.predicted['qeds'] = qeds
     self.predicted['logp'] = logp
     print(self.predicted['logp'] < 6)
     shortlist_mask = ((self.predicted['similarities'] < 0.2) &
                       (self.predicted['sa_score'] < 4) &
                       (self.predicted['qeds'] > 0.25) &
                       (self.predicted['logp'] < 6) & (~self.brenk))
def _make_compound_info(mol_object):

    return {
        'smiles': AllChem.MolToSmiles(mol_object, True),
        'inchikey': AllChem.InchiToInchiKey(AllChem.MolToInchi(mol_object)),
        'mass': Descriptors.MolWt(mol_object),
        'exactmass': AllChem.CalcExactMolWt(mol_object),
        'formula': AllChem.CalcMolFormula(mol_object),
        'charge': AllChem.GetFormalCharge(mol_object),
        'fingerprints': {
            'maccs':
            dict([(str(x), 1) for x in AllChem.GetMACCSKeysFingerprint(
                mol_object).GetOnBits()]),
            'rdkit':
            dict([(str(x), 1)
                  for x in AllChem.RDKFingerprint(mol_object).GetOnBits()]),
        },
        'dblinks': {},
    }
def make_fingerprints(length, verbose, mols, chosen=None):
    if chosen   == 1:
        fp_list = [
            fingerprint(lambda x : GetHashedAtomPairFingerprintAsBitVect(x, nBits = length),
                     "&qfuot;Atom pair (1985)")]
    elif chosen == 2:
        fp_list = [
             fingerprint(lambda x : GetHashedTopologicalTorsionFingerprintAsBitVect(x, nBits = length),
                     "Topological torsion (1987)")]
    elif chosen == 3:
        fp_list = [
             fingerprint(lambda x : GetMorganFingerprintAsBitVect(x, 3, nBits = length),
                     "Morgan circular ")]
    elif chosen == 4:
         fp_list = [
             fingerprint(FingerprintMol, "Estate (1995)")]
    elif chosen == 5:
        fp_list = [
             fingerprint(lambda x: GetAvalonFP(x, nBits=length),
                    "Avalon bit based (2006)")]
    elif chosen == 6:
        fp_list = [
            fingerprint(lambda x: np.append(GetAvalonFP(x, nBits=length), Descriptors.MolWt(x)),
                    "Avalon+mol. weight")]
    elif chosen == 7:
        fp_list = [
            fingerprint(lambda x: GetErGFingerprint(x), "ErG fingerprint (2006)")]
    elif chosen == 8:
        fp_list = [
            fingerprint(lambda x : RDKFingerprint(x, fpSize=length),
                     "RDKit fingerprint")]
    elif chosen == 9:
        fp_list = [
            fingerprint(lambda x : FingerprintMols.FingerprintMol(x),
                     "RDKit fingerprint2")]
    else:
        fp_list = [fingerprint(lambda x : MACCSkeys.GenMACCSKeys(x), "RDKit MACCSkeys")]

    for fp in fp_list:
        if (verbose): print("doing", fp.name)
        fp.apply_fp(mols)

    return fp_list
def add_features_to_test_file(input, output):
    with open(input, 'r') as f:
        lines = f.read().splitlines()

    with open(output, 'w') as f:
        f.writelines(lines[0] +
                     ',atom,bonds,molwt,double_bonds,valence_electrons\n')
        for line in range(1, len(lines)):
            smiles = lines[line].split(',')[1]
            m = Chem.MolFromSmiles(smiles)
            l = lines[line] + \
                ',' + str(m.GetNumAtoms()) + \
                ',' + str(m.GetNumBonds()) + \
                ',' + str(Descriptors. MolWt(m)) + \
                ',' + str(smiles.count('=')) + \
                ',' + str(Descriptors.NumValenceElectrons(m)) + '\n'
            if line % 10000 == 0:
                print line
            f.write(l)
Example #22
0
def fragment(mol, mode, quiet=False):
    frags = Chem.GetMolFrags(mol, asMols=True)

    if len(frags) == 1:
        return mol
    else:
        # TODO - handle ties
        biggest_index = -1
        i = 0
        if mode == 'hac':
            biggest_count = 0
            for frag in frags:
                hac = frag.GetNumHeavyAtoms()
                if hac > biggest_count:
                    biggest_count = hac
                    biggest_mol = frag
                    biggest_index = i
                i+=1
            if not quiet:
                utils.log("Chose fragment", biggest_index, "from", len(frags), "based on HAC")
        elif mode == 'mw':
            biggest_mw = 0
            for frag in frags:
                mw = Descriptors.MolWt(frag)
                if mw > biggest_mw:
                    biggest_mw = mw
                    biggest_mol = frag
                    biggest_index = i
                i+=1
            if not quiet:
                utils.log("Chose fragment", biggest_index, "from", len(frags), "based on MW")
        else:
            raise ValueError('Invalid fragment mode:',mode)

        # copy the properties across
        for name in mol.GetPropNames():
            biggest_mol.SetProp(name, mol.GetProp(name))

        # _Name is a magical property that is not in the ones returned by GetPropNames
        if '_Name' in mol.GetPropNames():
            biggest_mol.SetProp("_Name", mol.GetProp("_Name"))

        return biggest_mol
def gaussion_workers(chem_model,val,state,m):

    all_posible=chem_kn_simulation(chem_model,state,val,m)
    generate_smile=predict_smile(all_posible,val)
    new_compound=make_input_smile(generate_smile)
    score=[]
    kao=[]

    try:
        m = Chem.MolFromSmiles(str(new_compound[0]))
    except:
        m=None
    #if m!=None and len(task[i])<=81:
    if m!=None:
        try:
           logp=Descriptors.MolLogP(m)
        except:
           logp=-1000
        SA_score = -sascorer.calculateScore(MolFromSmiles(new_compound[0]))
        cycle_list = nx.cycle_basis(nx.Graph(rdmolops.GetAdjacencyMatrix(MolFromSmiles(new_compound[0]))))
        if len(cycle_list) == 0:
            cycle_length = 0
        else:
            cycle_length = max([ len(j) for j in cycle_list ])
        if cycle_length <= 6:
            cycle_length = 0
        else:
            cycle_length = cycle_length - 6
        cycle_score = -cycle_length
            #print cycle_score
            #print SA_score
            #print logp
        SA_score_norm=(SA_score-SA_mean)/SA_std
        logp_norm=(logp-logP_mean)/logP_std
        cycle_score_norm=(cycle_score-cycle_mean)/cycle_std
        score_one = SA_score_norm + logp_norm + cycle_score_norm
        score.append(score_one)

    else:
        score.append(-1000)
    score.append(new_compound[0])

    return score
Example #24
0
def calc_descriptors(rdmol):
    fp = Chem.GetMorganFingerprintAsBitVect(rdmol,
                                            radius=2,
                                            nBits=N_BITS,
                                            useFeatures=False)
    np_fp = np.zeros(N_BITS)
    ecfp = DataStructs.ConvertToNumpyArray(fp, np_fp)
    logp = Descriptors.MolLogP(rdmol)
    mwt = Descriptors.MolWt(rdmol)
    rtb = Descriptors.NumRotatableBonds(rdmol)
    hbd = Descriptors.NumHDonors(rdmol)
    hba = Descriptors.NumHAcceptors(rdmol)
    tpsa = Descriptors.TPSA(rdmol)
    return [logp, mwt, rtb, hbd, hba, tpsa, np_fp]
def check_smi(insmi):
    label = True
    mol = Chem.MolFromSmiles(insmi)
    if mol:
        atms = mol.GetAtoms()
        n_atoms = len(atms)
        if n_atoms < 10:
            label = False
        s_n = 0
        for atom in atms:
            if atom.GetSymbol() not in allowed_elements:
                label = False
                break
            if atom.GetIsotope():
                label = False
                break
            if atom.GetChiralTag() != non_s:
                s_n += 1
        if s_n >= 5:
            label = False

        for patt in rules_smarts:
            if len(mol.GetSubstructMatches(patt)) > 0:
                label = False
                break
        # check MW
        mw = Descriptors.MolWt(mol)
        if mw > 750:
            label = False
        r_info = mol.GetRingInfo()
        if r_info.NumRings() > 7:
            label = False
        r_a_size = [len(i) for i in r_info.AtomRings()]
        if r_a_size:
            max_r_size = max(r_a_size)
            min_r_size = min(r_a_size)
            if max_r_size > 8:
                label = False
    else:
        label = False

    return label
def HOF_examples(output_dir):
    """
    Prepare figure showing the value of d for all molecules used in the
    BioHOFs from: 10.1021/jacs.9b06589

    """
    # the n-phenyl esters
    mol_list_1 = [
        'fluorescein', 'hydrogen_peroxide', 'methanol', 'formaldehyde', 'urea'
    ]
    smiles_list_1 = [
        'C1=CC=C2C(=C1)C(=O)OC23C4=C(C=C(C=C4)O)OC5=C3C=CC(=C5)O', 'OO', 'CO',
        'C=O', 'C(=O)(N)N'
    ]
    fig, ax = plt.subplots(figsize=(8, 5))
    for i, name in enumerate(mol_list_1):
        out_file = (f"{output_dir}/"
                    f"{name.replace(' ', '_').replace('/', '__')}"
                    '_diam_result.csv')
        if os.path.exists(out_file) is False:
            continue
        results = pd.read_csv(out_file)
        mid_diam = min(results['diam2'])
        mol = Chem.AddHs(Chem.MolFromSmiles(smiles_list_1[i]))
        MW = Descriptors.MolWt(mol)
        print(name, mol_list_1[i], MW, mid_diam)
        ax.scatter(MW,
                   mid_diam,
                   c='#5499C7',
                   edgecolors='k',
                   marker='o',
                   alpha=1.0,
                   s=140)

    # ax.axhline(y=11.8, c='k', alpha=0.2)
    pfn.define_standard_plot(ax,
                             xtitle='molecular weight [g/mol]',
                             ytitle=r'$d$ [$\mathrm{\AA}$]',
                             xlim=(10, 500),
                             ylim=(2.5, 15))
    fig.tight_layout()
    fig.savefig("HOF_examples.pdf", dpi=720, bbox_inches='tight')
def cyt_C_perox_assay(output_dir):
    """
    Prepare figure showing the change in intermediate diameter for 3
    peroxide molcules degraded by Cyt-C in ZIF-8 (One-Pot Synthesis of
    Protein-Embedded Metal–Organic Frameworks with Enhanced Biological
    Activities, DOI:10.1021/nl5026419)

    """
    # the n-phenyl esters
    mol_list_1 = [
        'hydrogen peroxide', 'methyl ethyl ketone peroxide',
        'tert-butyl hydroperoxide'
    ]
    smiles_list_1 = ['OO', 'CCC(C)(OO)OOC(C)(CC)OO', 'CC(C)(C)OO']
    fig, ax = plt.subplots()
    for i, name in enumerate(mol_list_1):
        out_file = (f"{output_dir}/"
                    f"{name.replace(' ', '_').replace('/', '__')}"
                    '_diam_result.csv')
        if os.path.exists(out_file) is False:
            continue
        results = pd.read_csv(out_file)
        mid_diam = min(results['diam2'])
        mol = Chem.AddHs(Chem.MolFromSmiles(smiles_list_1[i]))
        MW = Descriptors.MolWt(mol)
        print(name, mol_list_1[i], MW, mid_diam)
        ax.scatter(MW,
                   mid_diam,
                   c='k',
                   edgecolors='k',
                   marker='o',
                   alpha=1.0,
                   s=100)

    ax.axhspan(ymin=4.0, ymax=6.6, facecolor='k', alpha=0.2, hatch="/")
    pfn.define_standard_plot(ax,
                             xtitle='molecular weight [g/mol]',
                             ytitle=r'$d$ [$\mathrm{\AA}$]',
                             xlim=(10, 250),
                             ylim=(2.5, 8))
    fig.tight_layout()
    fig.savefig("cytC_comp.pdf", dpi=720, bbox_inches='tight')
Example #28
0
    def getGeneralInfo(self, params={}):
        if not "smi" in params:
            print("Parameter 'smi' not found in argument list")
            return []

        req_smiles = params["smi"]
        mol = Chem.MolFromSmiles(req_smiles)

        canonized_smiles = Chem.MolToSmiles(mol)
        inchi = Chem.inchi.MolToInchi(mol)
        inchiKey = Chem.inchi.InchiToInchiKey(inchi)
        MW = Descriptors.MolWt(mol)
        LogP = Crippen.MolLogP(mol)

        return {
            "canonized_smiles": canonized_smiles,
            "inchi": inchiKey,
            "MW": MW,
            "LogP": LogP
        }
Example #29
0
    def _featurize(self, mol):
        """
    Calculate molecular weight.

    Parameters
    ----------
    mol : RDKit Mol
        Molecule.

    Returns
    -------
    np.ndarray of length 1 containing the molecular weight.
    """
        try:
            from rdkit.Chem import Descriptors
        except ModuleNotFoundError:
            raise ValueError("This class requires RDKit to be installed.")
        wt = Descriptors.ExactMolWt(mol)
        wt = [wt]
        return np.asarray(wt)
Example #30
0
def main(name, argv):
    if len(argv) != 3:
        print_usage(name)
        return

    MW = float(argv[1])
    RB = int(argv[2])

    #read molport building blocks
    with open(argv[0], 'r') as f:
        for line in f:
            line_s = line.split()
            molecule = [Chem.MolFromSmiles(line_s[0]), line_s[1]]
            if molecule[0] == None:
                continue
            if Descriptors.MolWt(
                    molecule[0]
            ) <= MW and rdMolDescriptors.CalcNumRotatableBonds(
                    molecule[0]) <= RB:
                print Chem.MolToSmiles(molecule[0]) + "\t" + line_s[1]
Example #31
0
def cut_some_bonds(frag_weight, mol, seed):
    cuttable_bonds = common.find_cuttable_bonds(mol)
    cut_bonds_indexes = [b.GetIdx() for b in cuttable_bonds]
    total_weight = Descriptors.MolWt(mol)
    nb_frags = round(total_weight / frag_weight)
    max_cuts = min(len(cut_bonds_indexes), nb_frags - 1)
    # print("mol %s; cut %d bonds" % (mol.GetProp("name"), max_cuts),
    #       file=sys.stderr)
    random.shuffle(cut_bonds_indexes)
    to_cut = cut_bonds_indexes[0:max_cuts]
    if len(to_cut) == 0:
        # molecule too small: not fragmented
        # still, we output it so that input and output SMILES files can be
        # visualized side-by-side
        smi = Chem.MolToSmiles(mol)
        name = get_name(mol)
        dico = {}
        return (smi, name, dico)
    else:
        return fragment_on_bonds_and_label(mol, to_cut)
Example #32
0
def phys_featurizer(s):
  m=Chem.MolFromSmiles(s)
  phys_features=[]
  #Featurization begins
  phys_features.append(Descriptors.BertzCT(m))  #0
  phys_features.append(Descriptors.Chi0(m))
  phys_features.append(Descriptors.Chi0n(m))
  phys_features.append(Descriptors.Chi0v(m))
  phys_features.append(Descriptors.Chi1(m))
  phys_features.append(Descriptors.Chi1n(m))
  phys_features.append(Descriptors.Chi1v(m))
  phys_features.append(Descriptors.Chi2n(m))
  phys_features.append(Descriptors.Chi2v(m))
  phys_features.append(Descriptors.Chi3n(m))
  phys_features.append(Descriptors.Chi3v(m))     #10
  phys_features.append(Descriptors.Chi4n(m))
  phys_features.append(Descriptors.Chi4v(m))
  phys_features.append(Descriptors.EState_VSA1(m))
  phys_features.append(Descriptors.EState_VSA10(m))
  phys_features.append(Descriptors.EState_VSA11(m))
  phys_features.append(Descriptors.EState_VSA2(m))
  phys_features.append(Descriptors.EState_VSA3(m))
  phys_features.append(Descriptors.EState_VSA4(m))
  phys_features.append(Descriptors.EState_VSA5(m))
  phys_features.append(Descriptors.EState_VSA6(m))  #20
  phys_features.append(Descriptors.EState_VSA7(m))
  phys_features.append(Descriptors.EState_VSA8(m))
  phys_features.append(Descriptors.EState_VSA9(m))
  phys_features.append(Descriptors.ExactMolWt(m))   #24
  phys_features.append(Descriptors.FractionCSP3(m)) 
  phys_features.append(Descriptors.HallKierAlpha(m))
  phys_features.append(Descriptors.HeavyAtomCount(m))
  phys_features.append(Descriptors.HeavyAtomMolWt(m))
  phys_features.append(Descriptors.Ipc(m))
  phys_features.append(Descriptors.Kappa1(m))   #30
  phys_features.append(Descriptors.Kappa2(m))
  phys_features.append(Descriptors.Kappa3(m))
  phys_features.append(Descriptors.LabuteASA(m))
  phys_features.append(Descriptors.MaxAbsEStateIndex(m))
  phys_features.append(Descriptors.MaxAbsPartialCharge(m))
  phys_features.append(Descriptors.MaxEStateIndex(m))
  phys_features.append(Descriptors.MaxPartialCharge(m))
  phys_features.append(Descriptors.MinAbsEStateIndex(m))
  phys_features.append(Descriptors.MinAbsPartialCharge(m))
  phys_features.append(Descriptors.MinEStateIndex(m))    #40
  phys_features.append(Descriptors.MinPartialCharge(m))
  phys_features.append(Descriptors.MolLogP(m))
  phys_features.append(Descriptors.MolMR(m))
  phys_features.append(Descriptors.MolWt(m))
  phys_features.append(Descriptors.NHOHCount(m))
  phys_features.append(Descriptors.NOCount(m))
  phys_features.append(Descriptors.NumAliphaticCarbocycles(m))
  phys_features.append(Descriptors.NumAliphaticHeterocycles(m))
  phys_features.append(Descriptors.NumAliphaticRings(m))
  phys_features.append(Descriptors.NumAromaticCarbocycles(m))   #50
  phys_features.append(Descriptors.NumAromaticHeterocycles(m))
  phys_features.append(Descriptors.NumAromaticRings(m))
  phys_features.append(Descriptors.NumHAcceptors(m))
  phys_features.append(Descriptors.NumHDonors(m))
  phys_features.append(Descriptors.NumHeteroatoms(m))
  phys_features.append(Descriptors.NumRadicalElectrons(m))
  phys_features.append(Descriptors.NumRotatableBonds(m))
  phys_features.append(Descriptors.NumSaturatedCarbocycles(m))
  phys_features.append(Descriptors.NumSaturatedHeterocycles(m))
  phys_features.append(Descriptors.NumSaturatedRings(m))       #60
  phys_features.append(Descriptors.NumValenceElectrons(m))
  phys_features.append(Descriptors.PEOE_VSA1(m))
  phys_features.append(Descriptors.PEOE_VSA10(m))
  phys_features.append(Descriptors.PEOE_VSA11(m))
  phys_features.append(Descriptors.PEOE_VSA12(m))
  phys_features.append(Descriptors.PEOE_VSA13(m))
  phys_features.append(Descriptors.PEOE_VSA14(m))
  phys_features.append(Descriptors.PEOE_VSA2(m))
  phys_features.append(Descriptors.PEOE_VSA3(m))
  phys_features.append(Descriptors.PEOE_VSA4(m))        #70
  phys_features.append(Descriptors.PEOE_VSA5(m))
  phys_features.append(Descriptors.PEOE_VSA6(m))
  phys_features.append(Descriptors.PEOE_VSA7(m))
  phys_features.append(Descriptors.PEOE_VSA8(m))
  phys_features.append(Descriptors.PEOE_VSA9(m))
  phys_features.append(Descriptors.RingCount(m))
  phys_features.append(Descriptors.SMR_VSA1(m))
  phys_features.append(Descriptors.SMR_VSA10(m))
  phys_features.append(Descriptors.SMR_VSA2(m))
  phys_features.append(Descriptors.SMR_VSA3(m))    #80
  phys_features.append(Descriptors.SMR_VSA4(m))
  phys_features.append(Descriptors.SMR_VSA5(m))
  phys_features.append(Descriptors.SMR_VSA6(m))
  phys_features.append(Descriptors.SMR_VSA7(m))
  phys_features.append(Descriptors.SMR_VSA8(m))
  phys_features.append(Descriptors.SMR_VSA9(m))
  phys_features.append(Descriptors.SlogP_VSA1(m))
  phys_features.append(Descriptors.SlogP_VSA10(m))
  phys_features.append(Descriptors.SlogP_VSA11(m))
  phys_features.append(Descriptors.SlogP_VSA12(m))  #90
  phys_features.append(Descriptors.SlogP_VSA2(m))
  phys_features.append(Descriptors.SlogP_VSA3(m))
  phys_features.append(Descriptors.SlogP_VSA4(m))
  phys_features.append(Descriptors.SlogP_VSA5(m))
  phys_features.append(Descriptors.SlogP_VSA6(m))
  phys_features.append(Descriptors.SlogP_VSA7(m))
  phys_features.append(Descriptors.SlogP_VSA8(m))
  phys_features.append(Descriptors.SlogP_VSA9(m))
  phys_features.append(Descriptors.TPSA(m))
  phys_features.append(Descriptors.VSA_EState1(m)) #100
  phys_features.append(Descriptors.VSA_EState10(m))
  phys_features.append(Descriptors.VSA_EState2(m))
  phys_features.append(Descriptors.VSA_EState3(m))
  phys_features.append(Descriptors.VSA_EState4(m))
  phys_features.append(Descriptors.VSA_EState5(m))
  phys_features.append(Descriptors.VSA_EState6(m))
  phys_features.append(Descriptors.VSA_EState7(m))
  phys_features.append(Descriptors.VSA_EState8(m))
  phys_features.append(Descriptors.VSA_EState9(m))
  phys_features.append(Descriptors.fr_Al_COO(m))   #110
  phys_features.append(Descriptors.fr_Al_OH(m))
  phys_features.append(Descriptors.fr_Al_OH_noTert(m))
  phys_features.append(Descriptors.fr_ArN(m))
  phys_features.append(Descriptors.fr_Ar_COO(m))
  phys_features.append(Descriptors.fr_Ar_N(m))
  phys_features.append(Descriptors.fr_Ar_NH(m))
  phys_features.append(Descriptors.fr_Ar_OH(m))
  phys_features.append(Descriptors.fr_COO(m))
  phys_features.append(Descriptors.fr_COO2(m))
  phys_features.append(Descriptors.fr_C_O(m))   #120
  phys_features.append(Descriptors.fr_C_O_noCOO(m))
  phys_features.append(Descriptors.fr_C_S(m))
  phys_features.append(Descriptors.fr_HOCCN(m))
  phys_features.append(Descriptors.fr_Imine(m))
  phys_features.append(Descriptors.fr_NH0(m))
  phys_features.append(Descriptors.fr_NH1(m))
  phys_features.append(Descriptors.fr_NH2(m))
  phys_features.append(Descriptors.fr_N_O(m))
  phys_features.append(Descriptors.fr_Ndealkylation1(m))
  phys_features.append(Descriptors.fr_Ndealkylation2(m))  #130
  phys_features.append(Descriptors.fr_Nhpyrrole(m))
  phys_features.append(Descriptors.fr_SH(m))
  phys_features.append(Descriptors.fr_aldehyde(m))
  phys_features.append(Descriptors.fr_alkyl_carbamate(m))
  phys_features.append(Descriptors.fr_alkyl_halide(m))
  phys_features.append(Descriptors.fr_allylic_oxid(m))
  phys_features.append(Descriptors.fr_amide(m))
  phys_features.append(Descriptors.fr_amidine(m))
  phys_features.append(Descriptors.fr_aniline(m))
  phys_features.append(Descriptors.fr_aryl_methyl(m))   #140
  phys_features.append(Descriptors.fr_azide(m))
  phys_features.append(Descriptors.fr_azo(m))
  phys_features.append(Descriptors.fr_barbitur(m))
  phys_features.append(Descriptors.fr_benzene(m))
  phys_features.append(Descriptors.fr_benzodiazepine(m))
  phys_features.append(Descriptors.fr_bicyclic(m))
  phys_features.append(Descriptors.fr_diazo(m))
  phys_features.append(Descriptors.fr_dihydropyridine(m))
  phys_features.append(Descriptors.fr_epoxide(m))
  phys_features.append(Descriptors.fr_ester(m))     #150
  phys_features.append(Descriptors.fr_ether(m))
  phys_features.append(Descriptors.fr_furan(m))
  phys_features.append(Descriptors.fr_guanido(m))
  phys_features.append(Descriptors.fr_halogen(m))
  phys_features.append(Descriptors.fr_hdrzine(m))
  phys_features.append(Descriptors.fr_hdrzone(m))
  phys_features.append(Descriptors.fr_imidazole(m))
  phys_features.append(Descriptors.fr_imide(m))
  phys_features.append(Descriptors.fr_isocyan(m))
  phys_features.append(Descriptors.fr_isothiocyan(m))  #160
  phys_features.append(Descriptors.fr_ketone(m))
  phys_features.append(Descriptors.fr_ketone_Topliss(m))
  phys_features.append(Descriptors.fr_lactam(m))
  phys_features.append(Descriptors.fr_lactone(m))
  phys_features.append(Descriptors.fr_methoxy(m))
  phys_features.append(Descriptors.fr_morpholine(m))
  phys_features.append(Descriptors.fr_nitrile(m))
  phys_features.append(Descriptors.fr_nitro(m))
  phys_features.append(Descriptors.fr_nitro_arom(m))
  phys_features.append(Descriptors.fr_nitro_arom_nonortho(m))   #170
  phys_features.append(Descriptors.fr_nitroso(m))
  phys_features.append(Descriptors.fr_oxazole(m))
  phys_features.append(Descriptors.fr_oxime(m))
  phys_features.append(Descriptors.fr_para_hydroxylation(m))
  phys_features.append(Descriptors.fr_phenol(m))
  phys_features.append(Descriptors.fr_phenol_noOrthoHbond(m))
  phys_features.append(Descriptors.fr_phos_acid(m))
  phys_features.append(Descriptors.fr_phos_ester(m))
  phys_features.append(Descriptors.fr_piperdine(m))
  phys_features.append(Descriptors.fr_piperzine(m))             #180
  phys_features.append(Descriptors.fr_priamide(m))
  phys_features.append(Descriptors.fr_prisulfonamd(m))
  phys_features.append(Descriptors.fr_pyridine(m))
  phys_features.append(Descriptors.fr_quatN(m))
  phys_features.append(Descriptors.fr_sulfide(m))
  phys_features.append(Descriptors.fr_sulfonamd(m))
  phys_features.append(Descriptors.fr_sulfone(m))
  phys_features.append(Descriptors.fr_term_acetylene(m))
  phys_features.append(Descriptors.fr_tetrazole(m))
  phys_features.append(Descriptors.fr_thiazole(m))      #190
  phys_features.append(Descriptors.fr_thiocyan(m))
  phys_features.append(Descriptors.fr_thiophene(m))
  phys_features.append(Descriptors.fr_unbrch_alkane(m))
  phys_features.append(Descriptors.fr_urea(m))            #194
  
  return phys_features