Beispiel #1
0
  def testMolWt(self):
    mol = Chem.MolFromSmiles("C")
    amw = rdMD._CalcMolWt(mol)
    self.assertTrue(feq(amw, 16.043, .001))
    amw = rdMD._CalcMolWt(mol, True)
    self.assertTrue(feq(amw, 12.011, .001))
    mol2 = Chem.AddHs(mol)
    amw = rdMD._CalcMolWt(mol2)
    self.assertTrue(feq(amw, 16.043, .001))
    amw = rdMD._CalcMolWt(mol2, True)
    self.assertTrue(feq(amw, 12.011, .001))

    mol = Chem.MolFromSmiles("C")
    amw = rdMD.CalcExactMolWt(mol)
    self.assertTrue(feq(amw, 16.031, .001))
Beispiel #2
0
  def testMolWt(self):
    mol = Chem.MolFromSmiles("C");
    amw = rdMD._CalcMolWt(mol);
    self.failUnless(feq(amw,16.043,.001));
    amw = rdMD._CalcMolWt(mol,True);
    self.failUnless(feq(amw,12.011,.001));
    mol2 = Chem.AddHs(mol);
    amw = rdMD._CalcMolWt(mol2);
    self.failUnless(feq(amw,16.043,.001));
    amw = rdMD._CalcMolWt(mol2,True);
    self.failUnless(feq(amw,12.011,.001));

    mol = Chem.MolFromSmiles("C");
    amw = rdMD.CalcExactMolWt(mol);
    self.failUnless(feq(amw,16.031,.001));
Beispiel #3
0
    def testMolWt(self):
        mol = Chem.MolFromSmiles("C")
        amw = rdMD._CalcMolWt(mol)
        self.assertTrue(feq(amw, 16.043, .001))
        amw = rdMD._CalcMolWt(mol, True)
        self.assertTrue(feq(amw, 12.011, .001))
        mol2 = Chem.AddHs(mol)
        amw = rdMD._CalcMolWt(mol2)
        self.assertTrue(feq(amw, 16.043, .001))
        amw = rdMD._CalcMolWt(mol2, True)
        self.assertTrue(feq(amw, 12.011, .001))

        mol = Chem.MolFromSmiles("C")
        amw = rdMD.CalcExactMolWt(mol)
        self.assertTrue(feq(amw, 16.031, .001))
Beispiel #4
0
def calc(smi, name):
    m = Chem.MolFromSmiles(smi)
    if m is not None:
        try:
            hba = rdMolDescriptors.CalcNumHBA(m)
            hbd = rdMolDescriptors.CalcNumHBD(m)
            nrings = rdMolDescriptors.CalcNumRings(m)
            rtb = rdMolDescriptors.CalcNumRotatableBonds(m)
            psa = rdMolDescriptors.CalcTPSA(m)
            logp, mr = rdMolDescriptors.CalcCrippenDescriptors(m)
            mw = rdMolDescriptors._CalcMolWt(m)
            csp3 = rdMolDescriptors.CalcFractionCSP3(m)
            hac = m.GetNumHeavyAtoms()
            if hac == 0:
                fmf = 0
            else:
                fmf = GetScaffoldForMol(m).GetNumHeavyAtoms() / hac
            qed = QED.qed(m)
            nrings_fused = fused_ring_count(m)
            return name, hba, hbd, hba + hbd, nrings, rtb, round(psa, 2), round(logp, 2), round(mr, 2), round(mw, 2), \
                   round(csp3, 3), round(fmf, 3), round(qed, 3), hac, nrings_fused
        except:
            sys.stderr.write(
                f'molecule {name} was omitted due to an error in calculation of some descriptors\n'
            )
            return None
    else:
        sys.stderr.write('smiles %s cannot be parsed (%s)' % (smi, name))
        return None
Beispiel #5
0
def properties(mol):
    """
  Calculates the properties that are required to calculate the QED descriptor.
  """
    if mol is None:
        raise ValueError('You need to provide a mol argument.')
    mol = Chem.RemoveHs(mol)
    qedProperties = QEDproperties(
        MW=rdmd._CalcMolWt(mol),
        ALOGP=Crippen.MolLogP(mol),
        HBA=sum(
            len(mol.GetSubstructMatches(pattern)) for pattern in Acceptors
            if mol.HasSubstructMatch(pattern)),
        HBD=rdmd.CalcNumHBD(mol),
        PSA=MolSurf.TPSA(mol),
        ROTB=rdmd.CalcNumRotatableBonds(mol,
                                        rdmd.NumRotatableBondsOptions.Strict),
        AROM=Chem.GetSSSR(Chem.DeleteSubstructs(Chem.Mol(mol),
                                                AliphaticRings)),
        ALERTS=sum(1 for alert in StructuralAlerts
                   if mol.HasSubstructMatch(alert)),
    )
    # The replacement
    # AROM=Lipinski.NumAromaticRings(mol),
    # is not identical. The expression above tends to count more rings
    # N1C2=CC=CC=C2SC3=C1C=CC4=C3C=CC=C4
    # OC1=C(O)C=C2C(=C1)OC3=CC(=O)C(=CC3=C2C4=CC=CC=C4)O
    # CC(C)C1=CC2=C(C)C=CC2=C(C)C=C1  uses 2, should be 0 ?
    return qedProperties
Beispiel #6
0
def properties(mol):
  """
  Calculates the properties that are required to calculate the QED descriptor.
  """
  if mol is None:
    raise ValueError('You need to provide a mol argument.')
  mol = Chem.RemoveHs(mol)
  qedProperties = QEDproperties(
    MW=rdmd._CalcMolWt(mol),
    ALOGP=Crippen.MolLogP(mol),
    HBA=sum(len(mol.GetSubstructMatches(pattern)) for pattern in Acceptors
            if mol.HasSubstructMatch(pattern)),
    HBD=rdmd.CalcNumHBD(mol),
    PSA=MolSurf.TPSA(mol),
    ROTB=rdmd.CalcNumRotatableBonds(mol, rdmd.NumRotatableBondsOptions.Strict),
    AROM=Chem.GetSSSR(Chem.DeleteSubstructs(Chem.Mol(mol), AliphaticRings)),
    ALERTS=sum(1 for alert in StructuralAlerts if mol.HasSubstructMatch(alert)),
  )
  # The replacement
  # AROM=Lipinski.NumAromaticRings(mol),
  # is not identical. The expression above tends to count more rings
  # N1C2=CC=CC=C2SC3=C1C=CC4=C3C=CC=C4
  # OC1=C(O)C=C2C(=C1)OC3=CC(=O)C(=CC3=C2C4=CC=CC=C4)O
  # CC(C)C1=CC2=C(C)C=CC2=C(C)C=C1  uses 2, should be 0 ?
  return qedProperties
def main():
    sub_df = pd.read_csv("submissions_final_result.csv")

    cmp_ds = []

    for _, row in sub_df.iterrows():
        cmp_dict = {}
        mol = Chem.MolFromSmiles(row['smiles_string'])
        cmp_dict['submission_id'] = row['submission_id']
        cmp_dict['smiles_string'] = row['smiles_string']

        # Lipinski's rule
        cmp_dict['h_bond_donor'] = rd.CalcNumLipinskiHBD(
            mol)  # Lipinski Hbond donor
        cmp_dict['h_bond_acceptor'] = rd.CalcNumLipinskiHBA(
            mol)  # Lipinski Hbond acceptor
        cmp_dict['moluclar_mass'] = rd._CalcMolWt(mol)  # Molecular Weight
        cmp_dict['log_p'] = rd.CalcCrippenDescriptors(mol)[
            0]  # Partition coefficient

        # Topological polar surface area
        cmp_dict['topological_polar_surface_area'] = rd.CalcTPSA(mol)

        cmp_ds.append(cmp_dict)

    result = pd.merge(sub_df,
                      pd.DataFrame(cmp_ds),
                      on=['submission_id', 'smiles_string'])
    result.to_csv("lipinski_psa_result.csv", index=False, encoding='utf-8')
Beispiel #8
0
def calc(smi, name):
    m = Chem.MolFromSmiles(smi)
    if m is not None:
        try:
            hba = rdMolDescriptors.CalcNumHBA(m)

            hbd = rdMolDescriptors.CalcNumHBD(m)
            nrings = rdMolDescriptors.CalcNumRings(m)
            rtb = rdMolDescriptors.CalcNumRotatableBonds(m)
            psa = rdMolDescriptors.CalcTPSA(m)
            logp, mr = rdMolDescriptors.CalcCrippenDescriptors(m)
            mw = rdMolDescriptors._CalcMolWt(m)
            csp3 = rdMolDescriptors.CalcFractionCSP3(m)
            hac = m.GetNumHeavyAtoms()
            if hac == 0:
                fmf = 0
            else:
                fmf = GetScaffoldForMol(m).GetNumHeavyAtoms() / hac
            qed = QED.qed(m)
            nrings_fused = fused_ring_count(m)
            n_unique_hba_hbd_atoms = count_hbd_hba_atoms(m)
            max_ring_size = len(max(m.GetRingInfo().AtomRings(), key=len, default=()))
            n_chiral_centers = len(FindMolChiralCenters(m, includeUnassigned=True))
            fcsp3_bm = rdMolDescriptors.CalcFractionCSP3(GetScaffoldForMol(m))
            return name, hba, hbd, hba + hbd, nrings, rtb, round(psa, 2), round(logp, 2), round(mr, 2), round(mw, 2), \
                   round(csp3, 3), round(fmf, 3), round(qed, 3), hac, nrings_fused, n_unique_hba_hbd_atoms, \
                   max_ring_size, n_chiral_centers, round(fcsp3_bm, 3)
        except:
            sys.stderr.write(f'molecule {name} was omitted due to an error in calculation of some descriptors\n')
            return None
    else:
        sys.stderr.write('smiles %s cannot be parsed (%s)' % (smi, name))
        return None
Beispiel #9
0
def compute_descs_rdkit(mol):
    # We can always add more later on
    # noinspection PyProtectedMember
    MW = rdMolDescriptors._CalcMolWt(mol)
    HBA = rdMolDescriptors.CalcNumHBA(mol)
    HBD = rdMolDescriptors.CalcNumHBD(mol)
    TPSA = rdMolDescriptors.CalcTPSA(mol)
    aromatic_rings = rdMolDescriptors.CalcNumAromaticRings(mol)
    nb_heteroatoms = rdMolDescriptors.CalcNumHeteroatoms(mol)
    nb_rot_bonds = rdMolDescriptors.CalcNumRotatableBonds(mol)
    return MW, HBA, HBD, TPSA, aromatic_rings, nb_heteroatoms, nb_rot_bonds
def main(in_file, output):

  Cmpds  = {}
  InMols = rdkit_open([in_file])
  print('\n # Number of input molecule: {0}'.format(len(InMols)))
  for mol in InMols:
    m = {}

    name = mol.GetProp('_Name').split()[0]
    
    m['Name'] = name
    m['Formula'] = rd.CalcMolFormula(mol)
    m['SMILES'] = Chem.MolToSmiles(mol)

    m['MW']   = rd._CalcMolWt(mol)               # Molecular Weight
    m['logP'] = rd.CalcCrippenDescriptors(mol)[0]  # Partition coefficient
    m['HDon'] = rd.CalcNumLipinskiHBD(mol)      # Lipinski Hbond donor
    m['HAcc'] = rd.CalcNumLipinskiHBA(mol)      # Lipinski Hbond acceptor
    m['TPSA'] = rd.CalcTPSA(mol)                # Topological polar surface area

    m['Rotat'] = rd.CalcNumRotatableBonds(mol, strict=True) # Rotatable bond
    m['MolRef'] = rd.CalcCrippenDescriptors(mol)[1]         # Molar refractivity
    m['AliRing'] = rd.CalcNumAliphaticRings(mol)        # Aliphatic ring number
    m['AroRing'] = rd.CalcNumAromaticRings(mol)         # Aromatic ring number
#    m['Stereo'] = rd.CalcNumAtomStereoCenters(mol)      # Stereo center number
#    m['UnspStereo'] = rd.CalcNumUnspecifiedAtomStereoCenters(mol)  # unspecified stereo

    m['SMILES'] = Chem.MolToSmiles(mol, 
                    isomericSmiles=True, allHsExplicit=False)
    Cmpds[name] = m

  ####################################

  df = pd.DataFrame.from_dict(Cmpds, orient='index')
  df.index.name = 'Name'

  # Columns of data to print out
  Columns = [ 'Formula',
              'MW',    'logP',   'HDon',    'HAcc',    'TPSA',
              'Rotat', 'MolRef', 'AliRing', 'AroRing', 
              #'Stereo', 'UnspStereo', 
              'SMILES', ]
  reorder = df[Columns]

  # Output to CSV
  reorder.to_csv( output+'.csv', sep=',', na_rep='NA', encoding='utf-8',
                  float_format='%.5f', header=True )

  # Output to Excel
  reorder.to_excel( output+'.xlsx', header=True, na_rep='NA' )
def calc(smi, name):
    m = Chem.MolFromSmiles(smi)
    if m is not None:
        hba = rdMolDescriptors.CalcNumHBA(m)
        hbd = rdMolDescriptors.CalcNumHBD(m)
        nrings = rdMolDescriptors.CalcNumRings(m)
        rtb = rdMolDescriptors.CalcNumRotatableBonds(m)
        psa = rdMolDescriptors.CalcTPSA(m)
        logp, mr = rdMolDescriptors.CalcCrippenDescriptors(m)
        mw = rdMolDescriptors._CalcMolWt(m)
        csp3 = rdMolDescriptors.CalcFractionCSP3(m)
        fmf = GetScaffoldForMol(m).GetNumAtoms(onlyHeavy=True) / m.GetNumAtoms(onlyHeavy=True)
        return name, hba, hbd, hba + hbd, nrings, rtb, round(psa, 2), round(logp, 2), round(mr, 2), round(mw, 2), \
               round(csp3, 3), round(fmf, 3)
    else:
        sys.stderr.write('smiles %s cannot be parsed (%s)' % (smi, name))
        return None
    def filter_druglikeness_5_rules(self, smiles):

        count = 0
        for i in smiles:
            mol = Chem.MolFromSmiles(i)
            mol = Chem.RemoveHs(mol)

            MW = rdmd._CalcMolWt(mol)
            ALOGP = Crippen.MolLogP(mol)
            HBA = rdmd.CalcNumHBA(mol)
            HBD = rdmd.CalcNumHBD(mol)
            PSA = MolSurf.TPSA(mol)
            ROTB = rdmd.CalcNumRotatableBonds(
                mol, rdmd.NumRotatableBondsOptions.Strict)

            if MW > 600 or ALOGP > 6 or ALOGP < 0 or HBA > 11 or HBD > 7 or PSA > 180 or ROTB > 11:
                smiles.remove(i)
                count = count + 1
        print("unavaliable rule_5_drug:%i" % count)

        return smiles
Beispiel #13
0
 def calculate_properties(self, smiles=None, mol=None, props=[]):
     """this method calculates basic properties for the mol
     returns : error (bool)"""
     if len(props) == 0:
         return True
     if mol is None:
         mol = Chem.MolFromSmiles(smiles)
     if mol is None:
         return True
     if 'py_formula' in props:
         self.data['py_formula'] = desc.CalcMolFormula(mol)
     if 'py_em' in props:
         self.data['py_em'] = round(desc.CalcExactMolWt(mol), 5)
     if 'py_n_Cl_Br' in props:
         all_atoms = []
         for atom in mol.GetAtoms():
             all_atoms.append(atom.GetSymbol())
         n_Cl = all_atoms.count('Cl')
         n_Br = all_atoms.count('Br')
         self.data['py_n_Cl_Br'] = n_Cl + n_Br
     if 'py_na' in props:
         self.data['py_na'] = mol.GetNumAtoms()
     if 'py_mw' in props:
         self.data['py_mw'] = desc._CalcMolWt(mol)
     if 'py_fsp3' in props:
         self.data['py_fsp3'] = desc.CalcFractionCSP3(mol)
     if 'py_rb' in props:
         self.data['py_rb'] = desc.CalcNumRotatableBonds(mol)
     if 'py_tpsa' in props:
         self.data['py_tpsa'] = desc.CalcTPSA(mol)
     if 'py_clogp' in props:
         self.data['py_clogp'] = desc.CalcCrippenDescriptors(mol)[0]
     if 'py_nar' in props:
         self.data['py_nar'] = desc.CalcNumAromaticRings(mol)
     if 'py_nhba' in props:
         self.data['py_nhba'] = desc.CalcNumHBA(mol)
     if 'py_nhbd' in props:
         self.data['py_nhbd'] = desc.CalcNumHBD(mol)
     return False
Beispiel #14
0
def properties(mol):
    """
  Calculates the properties that are required to calculate the QED descriptor.
  """
    matches = []
    if (mol is None):
        raise TypeError('You need to provide a mol argument.')
    x = [0] * 8
    x[0] = rdmd._CalcMolWt(mol)  # MW
    x[1] = Crippen.MolLogP(mol)  # ALOGP
    for hbaPattern in Acceptors:  # HBA
        if (mol.HasSubstructMatch(hbaPattern)):
            matches = mol.GetSubstructMatches(hbaPattern)
            x[2] += len(matches)
    x[3] = Lipinski.NumHDonors(mol)  # HBD
    x[4] = MolSurf.TPSA(mol)  # PSA
    x[5] = Lipinski.NumRotatableBonds(mol)  # ROTB
    x[6] = Chem.GetSSSR(Chem.DeleteSubstructs(deepcopy(mol),
                                              AliphaticRings))  # AROM
    for alert in StructuralAlerts:  # ALERTS
        if (mol.HasSubstructMatch(alert)): x[7] += 1
    return x
Beispiel #15
0
        for name in tmp:
            if name[0] != '_' and name[-1] != '_' and name not in others:
                # filter out python reference implementations:
                if name[:2] == 'py' and name[2:] in tmp:
                    continue
                if name == 'print_function':
                    continue
                thing = getattr(mod, name)
                if _isCallable(thing):
                    namespace[name] = thing
                    _descList.append((name, thing))
    descList = _descList


MolWt = lambda *x, **y: _rdMolDescriptors._CalcMolWt(*x, **y)
MolWt.version = _rdMolDescriptors._CalcMolWt_version
MolWt.__doc__ = """The average molecular weight of the molecule

  >>> MolWt(Chem.MolFromSmiles('CC'))
  30.07
  >>> MolWt(Chem.MolFromSmiles('[NH4+].[Cl-]'))
  53.49...

"""

HeavyAtomMolWt = lambda x: MolWt(x, True)
HeavyAtomMolWt.__doc__ = """The average molecular weight of the molecule ignoring hydrogens

  >>> HeavyAtomMolWt(Chem.MolFromSmiles('CC'))
  24.02...
Beispiel #16
0
        for name in tmp:
            if name[0] != "_" and name[-1] != "_" and name not in others:
                # filter out python reference implementations:
                if name[:2] == "py" and name[2:] in tmp:
                    continue
                thing = getattr(mod, name)
                if _isCallable(thing):
                    namespace[name] = thing
                    _descList.append((name, thing))
    descList = _descList


from rdkit.Chem import rdMolDescriptors as _rdMolDescriptors

MolWt = lambda *x, **y: _rdMolDescriptors._CalcMolWt(*x, **y)
MolWt.version = _rdMolDescriptors._CalcMolWt_version
MolWt.__doc__ = """The average molecular weight of the molecule

  >>> MolWt(Chem.MolFromSmiles('CC'))
  30.07
  >>> MolWt(Chem.MolFromSmiles('[NH4+].[Cl-]'))
  53.49...

"""

HeavyAtomMolWt = lambda x: MolWt(x, True)
HeavyAtomMolWt.__doc__ = """The average molecular weight of the molecule ignoring hydrogens

  >>> HeavyAtomMolWt(Chem.MolFromSmiles('CC'))
  24.02...
def calc_molecular_weight(sm):
    sm = sm.replace("Q", DUMMY_ATOM)
    mol = Chem.MolFromSmiles(sm)
    mw = rdMolDescriptors._CalcMolWt(mol)
    mw = mw - DUMMY_ATOM_WEIGHT * sm.count(DUMMY_ATOM)
    return mw
from rdkit import Chem
from rdkit.Chem import rdMolDescriptors
import networkx as nx
import re
import copy

from .write_smiles import write_smiles

#***** graph processing funcs ***********
SHARE_PARAMS = ["mw", "mn", "n", "pdi", "d"]
DUMMY_ATOM = "Y"
DUMMY_ATOM_WEIGHT = rdMolDescriptors._CalcMolWt(
    Chem.MolFromSmiles("[" + DUMMY_ATOM + "]"))
MAX_ATOMS = 10000


def draw_chem_graph(g):
    """
    draw chemicals from networkX object
    
    Parameters
    ----------
    g : networkX object

    """
    pos = nx.spring_layout(g)
    node_labels = nx.get_node_attributes(g, 'polymer')
    nx.draw_networkx_labels(g,
                            pos,
                            labels=node_labels,
                            font_size=10,
Beispiel #19
0
 if pid != '' and pid not in pubids and row[41].split(
         ',')[0] in prot_id2idx and row[37] != '':
     pubids.append(pid)
     # import pdb
     # pdb.set_trace()
     # print "entered"
     proId = row[41].split(',')[0]
     if proId not in prot_id2idx:
         print proId, pid, row[37]
     pd = psc_array[prot_id2idx[proId]]
     try:
         if (row[9] == '' or row[1] == '' or row[1] is None):
             continue
         else:
             md = Chem.MolFromSmiles(row[1])
             if float(row[9]) < 100 and _CalcMolWt(
                     md) < 1000 and md is not None:
                 fp1 = convToArr(
                     AllChem.GetMorganFingerprintAsBitVect(md, 1))
                 fp2 = convToArr(
                     AllChem.GetMorganFingerprintAsBitVect(md, 2))
                 fp3 = convToArr(
                     AllChem.GetMorganFingerprintAsBitVect(md, 3))
                 out = pid, proId, fp1.tolist(), fp2.tolist(
                 ), fp3.tolist(), pd.tolist(), 1
                 writer.writerow(out)
                 # print "writing done"
                 countp += 1
             elif float(row[9]) > 10000 and _CalcMolWt(
                     md) < 100 and md is not None:
                 fp1 = convToArr(
                     AllChem.GetMorganFingerprintAsBitVect(md, 1))
def calc_MW_from_SMILES_list(SMILES):
    mol = Chem.MolFromSmiles(SMILES)
    return rdMolDescriptors._CalcMolWt(mol)