Esempio n. 1
0
def get_filter_values(mol):
    """
    calculate the values, for a given molecule, that are used to filter
    return as a dictionary
    """

    assert isinstance(mol, Chem.Mol)

    values = {}
    values["MW"] = desc.CalcExactMolWt(mol)
    values["logP"] = crip.MolLogP(mol)
    values["HBA"] = lip.NumHAcceptors(mol)
    values["HBD"] = lip.NumHDonors(mol)
    values["tPSA"] = desc.CalcTPSA(mol)
    values["rot_bonds"] = lip.NumRotatableBonds(mol)
    values["rigid_bonds"] = mol.GetNumBonds() - values["rot_bonds"]  # assume mutual exclusion
    values["num_rings"] = lip.RingCount(mol)
    values["num_hetero_atoms"] = lip.NumHeteroatoms(mol)
    values["charge"] = rdmolops.GetFormalCharge(mol)  # trusting this charge calculation method
    values["num_carbons"], values["num_charges"], values["max_ring_size"] = get_atom_props(mol)
    try:
        values["hc_ratio"] = float(values["num_hetero_atoms"]) / float(values["num_carbons"])
    except ZeroDivisionError:
        values["hc_ratio"] = 100000000  # if there are zero carbons
    values["fc"] = len(list(Brics.FindBRICSBonds(mol)))  # how many BRICS bonds, related to complexity
    values["is_good"] = True  # default to true, but not yet observed
    atoms = [atom.GetSymbol() for atom in mol.GetAtoms()]  # get all the atoms, and make the list unique (only types)
    atoms = set(atoms)
    atoms = list(atoms)
    values["atoms"] = atoms
    values["num_chiral_centers"] = len(Chem.FindMolChiralCenters(mol, includeUnassigned=True))
    values["rejections"] = []  # empty list to store the reasons for rejection

    return values
Esempio n. 2
0
 def veber_infraction(molecule: Chem.Mol) -> bool:
   """
   Checks if a given molecule fails the veber infraction filters.
   """
   rotatable_bond_saturation = Lipinski.NumRotatableBonds(molecule) > 10
   hydrogen_bond_saturation = Lipinski.NumHAcceptors(molecule) + Lipinski.NumHDonors(molecule) > 10
   return rotatable_bond_saturation or hydrogen_bond_saturation
Esempio n. 3
0
def get_descriptors(df):
    PandasTools.ChangeMoleculeRendering(renderer='String')
    Lmol = df['ROMol']
    Ldescriptors = []
    for m in Lmol:

        # Calculer les propriétés chimiques
        MW = round(Descriptors.ExactMolWt(m), 1)
        LogP = round(Descriptors.MolLogP(m), 1)
        TPSA = round(Descriptors.TPSA(m), 1)
        LabuteASA = round(Descriptors.LabuteASA(m), 1)
        HBA = Descriptors.NumHAcceptors(m)
        HBD = Descriptors.NumHDonors(m)
        FCSP3 = Lipinski.FractionCSP3(m)
        MQN8 = rdMolDescriptors.MQNs_(m)[7]
        MQN10 = rdMolDescriptors.MQNs_(m)[9]
        NAR = Lipinski.NumAromaticRings(m)
        NRB = Chem.Descriptors.NumRotatableBonds(m)

        Ldescriptors.append([
            MW, LogP, TPSA, LabuteASA, HBA, HBD, FCSP3, MQN8, MQN10, NAR, NRB
        ])

    # Create pandas row for conditions results with values and information whether rule of five is violated
    prop_df = pd.DataFrame(Ldescriptors)
    prop_df.columns = [
        'MW', 'LogP', 'TPSA', 'LabuteASA', 'HBA', 'HBD', 'FCSP3', 'MQN8',
        'MQN10', 'NAR', 'NRB'
    ]
    prop_df = prop_df.set_index(df.index)

    return prop_df
Esempio n. 4
0
  def test1(self):
    " testing first 200 mols from NCI "
    suppl = Chem.SDMolSupplier(self.inFileName)
    idx = 1
    oldDonorSmarts = Chem.MolFromSmarts('[NH1,NH2,OH1]')
    OldDonorCount = lambda x,y=oldDonorSmarts:Lipinski._NumMatches(x,y)
    oldAcceptorSmarts = Chem.MolFromSmarts('[N,O]')
    OldAcceptorCount = lambda x,y=oldAcceptorSmarts:Lipinski._NumMatches(x,y)
    for m in suppl:
      if m:
        calc = Lipinski.NHOHCount(m)
        orig = int(m.GetProp('NUM_LIPINSKIHDONORS'))
        assert calc==orig,'bad num h donors for mol %d (%s): %d != %d'%(idx,m.GetProp('SMILES'),calc,orig)

        calc = Lipinski.NOCount(m)
        orig = int(m.GetProp('NUM_LIPINSKIHACCEPTORS'))
        assert calc==orig,'bad num h acceptors for mol %d (%s): %d != %d'%(idx,m.GetProp('SMILES'),calc,orig)

        calc = Lipinski.NumHDonors(m)
        orig = int(m.GetProp('NUM_HDONORS'))
        assert calc==orig,'bad num h donors for mol %d (%s): %d != %d'%(idx,m.GetProp('SMILES'),calc,orig)

        calc = Lipinski.NumHAcceptors(m)
        orig = int(m.GetProp('NUM_HACCEPTORS'))
        assert calc==orig,'bad num h acceptors for mol %d (%s): %d != %d'%(idx,m.GetProp('SMILES'),calc,orig)

        calc = Lipinski.NumHeteroatoms(m)
        orig = int(m.GetProp('NUM_HETEROATOMS'))
        assert calc==orig,'bad num heteroatoms for mol %d (%s): %d != %d'%(idx,m.GetProp('SMILES'),calc,orig)

        calc = Lipinski.NumRotatableBonds(m)
        orig = int(m.GetProp('NUM_ROTATABLEBONDS'))
        assert calc==orig,'bad num rotors for mol %d (%s): %d != %d'%(idx,m.GetProp('SMILES'),calc,orig)
      idx += 1
Esempio n. 5
0
def lipinski(smiles, verbose=False):


    moldata= []
    for elem in smiles:

        mol=Chem.MolFromSmiles(elem) 
        moldata.append(mol)

    baseData= np.arange(1,1)
    i=0  
    for mol in moldata:        
       
      desc_MolWt = Descriptors.MolWt(mol)
      desc_MolLogP = Descriptors.MolLogP(mol)
      desc_NumHDonors = Lipinski.NumHDonors(mol)
      desc_NumHAcceptors = Lipinski.NumHAcceptors(mol)
          
      row = np.array([desc_MolWt,
                      desc_MolLogP,
                      desc_NumHDonors,
                      desc_NumHAcceptors])   
  
      if(i==0):
          baseData=row
      else:
          baseData=np.vstack([baseData, row])
      i=i+1      
  
    columnNames=["MW","LogP","NumHDonors","NumHAcceptors"]   
    descriptors = pd.DataFrame(data=baseData,columns=columnNames)
    
    return descriptors
Esempio n. 6
0
    def calc_lipinski(self, mol):
        """
        Returns:     a tuple consisting of:
            - a boolean indicating whether the molecule passed Lipinski test
            - a dictionary giving the values of the Lipinski check.

        NOTE:   Lipinski's rules are:
            - Hydrogen bond donors <= 5
            - Hydrogen bond acceptors <= 10
            - Molecular weight < 500 daltons
            - logP < 5
        """

        num_hdonors = Lipi.NumHDonors(mol)
        num_hacceptors = Lipi.NumHAcceptors(mol)
        mol_weight = Descriptors.MolWt(mol)
        mol_logp = round(Crippen.MolLogP(mol), 4)

        return ((num_hdonors <= 5 and num_hacceptors <= 10 and mol_weight < 500
                 and mol_logp < 5), {
                     'hydrogen_bond_donors': num_hdonors,
                     'hydrogen_bond_acceptors': num_hacceptors,
                     'molecular_weight': mol_weight,
                     'logp': mol_logp
                 })
Esempio n. 7
0
def lipinski_rule(mol):
    fingerprint = rdMolDescriptors.GetHashedAtomPairFingerprintAsBitVect(mol)
    return [
        Lipinski.NHOHCount(mol) <= 5,
        Lipinski.NOCount(mol) <= 10,
        Descriptors.ExactMolWt(mol) <= 500,
        LogP('logP').run(fingerprint) <= 5]
Esempio n. 8
0
def ProcessMol(mol,typeConversions,globalProps,nDone,nameProp='_Name',nameCol='compound_id',
               redraw=False,keepHs=False,
               skipProps=False,addComputedProps=False,
               skipSmiles=False,
               uniqNames=None,namesSeen=None):
  if not mol:
    raise ValueError('no molecule')
  if keepHs:
    Chem.SanitizeMol(mol)
  try:
    nm = mol.GetProp(nameProp)
  except KeyError:
    nm = None
  if not nm:
    nm = 'Mol_%d'%nDone
  if uniqNames and nm in namesSeen:
    logger.error('duplicate compound id (%s) encountered. second instance skipped.'%nm)
    return None
  namesSeen.add(nm)
  row = [nm]
  if not skipProps:
    if addComputedProps:
      nHD=Lipinski.NumHDonors(mol)
      mol.SetProp('DonorCount',str(nHD))
      nHA=Lipinski.NumHAcceptors(mol)
      mol.SetProp('AcceptorCount',str(nHA))
      nRot=Lipinski.NumRotatableBonds(mol)
      mol.SetProp('RotatableBondCount',str(nRot))
      MW=Descriptors.MolWt(mol)
      mol.SetProp('AMW',str(MW))
      logp=Crippen.MolLogP(mol)
      mol.SetProp('MolLogP',str(logp))

    pns = list(mol.GetPropNames())
    pD={}
    for pi,pn in enumerate(pns):
      if pn.lower()==nameCol.lower(): continue
      pv = mol.GetProp(pn).strip()
      if pv.find('>')<0 and pv.find('<')<0:
        colTyp = globalProps.get(pn,2)
        while colTyp>0:
          try:
            tpi = typeConversions[colTyp][1](pv)
          except:
            colTyp-=1
          else:
            break
        globalProps[pn]=colTyp
        pD[pn]=typeConversions[colTyp][1](pv)
      else:
        pD[pn]=pv
  else:
    pD={}
  if redraw:
    AllChem.Compute2DCoords(m)
  if not skipSmiles:
    row.append(Chem.MolToSmiles(mol,True))
  row.append(DbModule.binaryHolder(mol.ToBinary()))
  row.append(pD)
  return row
Esempio n. 9
0
def check_ligand(file_path):
    bool = False
    if os.path.isfile(file_path):
        suppl = Chem.SDMolSupplier(file_path)
        for mol in suppl:
            if mol is not None:
                # components of rule
                hydrogen_bond_doner = True if Lipinski.NumHDonors(
                    mol) <= 5 else False
                hydrogen_bond_acceptors = True if Lipinski.NumHAcceptors(
                    mol) <= 10 else False
                molecular_mass = True if Descriptors.ExactMolWt(
                    mol) <= 500 else False
                octanol_water_partition_coefficient_logP = True if Crippen.MolLogP(
                    mol) <= 5 else False
                components_rank = hydrogen_bond_doner + hydrogen_bond_acceptors + molecular_mass + octanol_water_partition_coefficient_logP

                # variants
                partition_coefficient_logP = True if -0.4 <= Crippen.MolLogP(
                    mol) <= 5.6 else False
                molar_refractivity = True if 40 <= Crippen.MolMR(
                    mol) <= 130 else False
                molecular_weight = True if 180 <= Descriptors.ExactMolWt(
                    mol) <= 500 else False
                number_of_atoms = True if 20 <= Lipinski.HeavyAtomCount(
                    mol) <= 70 else False
                polar_surface_area = True if MolSurf.TPSA(
                    mol) <= 140 else False
                variants_rank = partition_coefficient_logP + molar_refractivity + molecular_weight + number_of_atoms + polar_surface_area

                if (components_rank == 4) and (variants_rank == 4
                                               or variants_rank == 5):
                    bool = True
    return bool
Esempio n. 10
0
def score_molecule(smiles):
    lipinski_score = 0
    qed = LipinskiRuleOfFiveDecorator.MAX_QED + 1

    try:
        m = Chem.MolFromSmiles(smiles)
        logp = Descriptors.MolLogP(m)
        lipinski_score += 1 if logp < LipinskiRuleOfFiveDecorator.MAX_LOGP else 0

        wt = Descriptors.MolWt(m)
        lipinski_score += 1 if wt < LipinskiRuleOfFiveDecorator.MAX_MOL_WT else 0

        hdonor = Lipinski.NumHDonors(m)
        lipinski_score += 1 if hdonor < LipinskiRuleOfFiveDecorator.MAX_H_DONORS else 0

        hacceptor = Lipinski.NumHAcceptors(m)
        lipinski_score += 1 if hacceptor < LipinskiRuleOfFiveDecorator.MAX_H_DONORS else 0

        rotatable_bond = Lipinski.NumRotatableBonds(m)
        lipinski_score += 1 if rotatable_bond < LipinskiRuleOfFiveDecorator.MAX_ROTATABLE_BONDS else 0

        qed = QED.qed(m)
    except Exception as ex:
        lipinski_score = 0
        logger.exception(ex)

    return lipinski_score, qed
Esempio n. 11
0
def get_ro5_from_mol(mol):
    """
    Get Lipinski's rule of five criteria for a molecule, i.e. molecular weight, logP, number of hydrogen bond acceptors/donors and
    accordance to Lipinski's rule of five.
    (Takes about 1s for 2000 mols.)

    Parameters
    ----------
    mol : rdkit.Chem.rdchem.Mol
        Molecule.

    Returns
    -------
    pd.Series
        Rule of five criteria for input molecule.
    """

    mw = 1 if Descriptors.ExactMolWt(mol) <= 500 else 0
    logp = 1 if Descriptors.MolLogP(mol) <= 5 else 0
    hbd = 1 if Lipinski.NumHDonors(mol) <= 5 else 0
    hba = 1 if Lipinski.NumHAcceptors(mol) <= 10 else 0
    lipinski = 1 if mw + logp + hbd + hba >= 3 else 0

    return pd.Series([mw, logp, hbd, hba, lipinski],
                     index="mw logp hbd hba lipinski".split())
Esempio n. 12
0
def get_phys_fp(compound):
    c = []
    c.append(compound['mol_weight'] / 500)
    logp = get_logp(compound['dsstox_sid'])
    logp = logp / 10 if logp else logp
    c.append(logp)
    m = chm.MolFromSmiles(compound['smiles'])
    c.append(lip.NumHDonors(m) / 5)
    c.append(lip.NumHAcceptors(m) / 10)
    return c
Esempio n. 13
0
def lipinski(smile):
	# Convert into Chem object
	mol = Chem.MolFromSmiles(smile)

	MolWt = Descriptors.MolWt(mol)
	MolLogP = Descriptors.MolLogP(mol)
	NumHDonors = Lipinski.NumHDonors(mol)
	NumHAcceptors = Lipinski.NumHAcceptors(mol)

	return NumHDonors, NumHAcceptors, MolWt, MolLogP
def mole_proper(mol):
    num_hdonors = Lipinski.NumHDonors(mol)
    num_hacceptors = Lipinski.NumHAcceptors(mol)
    num_rotatable = Lipinski.NumRotatableBonds(mol)
    mol_weight = Descriptors.MolWt(mol)
    mol_logp = Crippen.MolLogP(mol)
    mol_TPSA = Descriptors.TPSA(mol)
    proper = (num_hdonors, num_hacceptors, num_rotatable, mol_weight, mol_logp,
              mol_TPSA)
    return proper
Esempio n. 15
0
def auto_sampling(mult_factor, mol, log):
    auto_samples = 0
    auto_samples += 3 * (Lipinski.NumRotatableBonds(mol)
                         )  # x3, for C3 rotations
    auto_samples += 3 * (Lipinski.NHOHCount(mol))  # x3, for OH/NH rotations
    auto_samples += 3 * (Lipinski.NumSaturatedRings(mol)
                         )  # x3, for boat/chair/envelope confs
    if auto_samples == 0:
        auto_samples = mult_factor
    else:
        auto_samples = mult_factor * auto_samples
    return auto_samples
Esempio n. 16
0
def filter(mol, type = "frags"):

    HBD = Lipinski.NumHDonors(mol)
    HBA = Lipinski.NumHAcceptors(mol)
    rings = len(Chem.GetSymmSSSR(mol))
    MW = Chem.Descriptors.MolWt(mol)

    if type == "frags":
        action = (HBD <=8) & (HBA <=8) & (rings >= 1) & (MW <=800)
    else:
        action = (HBD <= 5) & (HBA <= 5) & (MW <= 500)

    return action
Esempio n. 17
0
def auto_sampling(mult_factor,mol,args,log):
	if args.metal_complex:
		if len(args.metal_idx) > 0:
			mult_factor = mult_factor*3*len(args.metal_idx) # this accounts for possible trans/cis isomers in metal complexes
	auto_samples = 0
	auto_samples += 3*(Lipinski.NumRotatableBonds(mol)) # x3, for C3 rotations
	auto_samples += 3*(Lipinski.NHOHCount(mol)) # x3, for OH/NH rotations
	auto_samples += 3*(Lipinski.NumSaturatedRings(mol)) # x3, for boat/chair/envelope confs
	if auto_samples == 0:
		auto_samples = mult_factor
	else:
		auto_samples = mult_factor*auto_samples
	return auto_samples
Esempio n. 18
0
  def test1(self):
    " testing first 200 mols from NCI "
    # figure out which rotor version we are using
    m = Chem.MolFromSmiles("CC(C)(C)c1cc(O)c(cc1O)C(C)(C)C")
    if Lipinski.NumRotatableBonds(m) == 2:
      rot_prop = NonStrict
    else:
      rot_prop = Strict
    
    suppl = Chem.SDMolSupplier(self.inFileName)
    idx = 1
    oldDonorSmarts = Chem.MolFromSmarts('[NH1,NH2,OH1]')
    OldDonorCount = lambda x,y=oldDonorSmarts:Lipinski._NumMatches(x,y)
    oldAcceptorSmarts = Chem.MolFromSmarts('[N,O]')
    OldAcceptorCount = lambda x,y=oldAcceptorSmarts:Lipinski._NumMatches(x,y)
    for m in suppl:
      if m:
        calc = Lipinski.NHOHCount(m)
        orig = int(m.GetProp('NUM_LIPINSKIHDONORS'))
        assert calc==orig,'bad num h donors for mol %d (%s): %d != %d'%(idx,m.GetProp('SMILES'),calc,orig)

        calc = Lipinski.NOCount(m)
        orig = int(m.GetProp('NUM_LIPINSKIHACCEPTORS'))
        assert calc==orig,'bad num h acceptors for mol %d (%s): %d != %d'%(idx,m.GetProp('SMILES'),calc,orig)

        calc = Lipinski.NumHDonors(m)
        orig = int(m.GetProp('NUM_HDONORS'))
        assert calc==orig,'bad num h donors for mol %d (%s): %d != %d'%(idx,m.GetProp('SMILES'),calc,orig)

        calc = Lipinski.NumHAcceptors(m)
        orig = int(m.GetProp('NUM_HACCEPTORS'))
        assert calc==orig,'bad num h acceptors for mol %d (%s): %d != %d'%(idx,m.GetProp('SMILES'),calc,orig)

        calc = Lipinski.NumHeteroatoms(m)
        orig = int(m.GetProp('NUM_HETEROATOMS'))
        assert calc==orig,'bad num heteroatoms for mol %d (%s): %d != %d'%(idx,m.GetProp('SMILES'),calc,orig)

        calc = Lipinski.NumRotatableBonds(m)
        orig = int(m.GetProp(rot_prop))
        assert calc==orig,'bad num rotors for mol %d (%s): %d != %d'%(idx,m.GetProp('SMILES'),calc,orig)

        # test the underlying numrotatable bonds
        calc = rdMolDescriptors.CalcNumRotatableBonds(m, rdMolDescriptors.NumRotatableBondsOptions.NonStrict)
        orig = int(m.GetProp(NonStrict))
        assert calc==orig,'bad num rotors for mol %d (%s): %d != %d'%(idx,m.GetProp('SMILES'),calc,orig)
        
        calc = rdMolDescriptors.CalcNumRotatableBonds(m, rdMolDescriptors.NumRotatableBondsOptions.Strict)
        orig = int(m.GetProp(Strict))
        assert calc==orig,'bad num rotors for mol %d (%s): %d != %d'%(idx,m.GetProp('SMILES'),calc,orig)
        
      idx += 1
Esempio n. 19
0
def check_lipinski(mol):
    fgs = load_functional_groups()
    h_donors = Lipinski.NumHDonors(mol.rdmol)
    h_acceptors = Lipinski.NumHAcceptors(mol.rdmol)
    log_p = MolLogP(mol.rdmol)
    wt = MolWt(mol.rdmol)
    if h_donors <= 5 and h_acceptors <= 5 and log_p < 5:
        if wt >= 450:
            mol.join(fgs['terminal_fg'].get_random())
            return True, False
        else:
            return True, False
    else:
        return False, False
Esempio n. 20
0
 def testIssue2183420(self):
   " testing a problem with the acceptor definition "
   self.assertTrue(Lipinski.NumHAcceptors(Chem.MolFromSmiles('NC')) == 1)
   self.assertTrue(Lipinski.NumHAcceptors(Chem.MolFromSmiles('CNC')) == 1)
   self.assertTrue(Lipinski.NumHAcceptors(Chem.MolFromSmiles('CN(C)C')) == 1)
   self.assertTrue(Lipinski.NumHAcceptors(Chem.MolFromSmiles('NC(=O)')) == 1)
   self.assertTrue(Lipinski.NumHAcceptors(Chem.MolFromSmiles('NC(=O)C')) == 1)
   self.assertTrue(Lipinski.NumHAcceptors(Chem.MolFromSmiles('CNC(=O)')) == 1)
   self.assertTrue(Lipinski.NumHAcceptors(Chem.MolFromSmiles('CNC(=O)C')) == 1)
   self.assertTrue(Lipinski.NumHAcceptors(Chem.MolFromSmiles('O=CNC(=O)C')) == 2)
   self.assertTrue(Lipinski.NumHAcceptors(Chem.MolFromSmiles('O=C(C)NC(=O)C')) == 2)
Esempio n. 21
0
    def calcScore(self, m, smi):

        self.value = 0.0

        mw = Descriptors.MolWt(m)

        if mw > 700 or mw < 100: return False

        num_hdonors = Lipinski.NumHDonors(m)
        num_hacceptors = Lipinski.NumHAcceptors(m)

        if num_hdonors > 5: return False
        if num_hacceptors > 10: return False

        return True
Esempio n. 22
0
    def getDiscriptor(self):
        from rdkit.Chem import Crippen
        from rdkit import Chem
        import pandas as pd
        from rdkit.Chem import Descriptors, Lipinski
        import os

        os.chdir(r"G:\マイドライブ\Data\Meram Chronic Data")
        df = pd.read_csv('extChronicStrcture.csv', engine='python')
        df = df[['CAS', 'canonical_smiles']]
        df = df.dropna(how='any')

        #df = pd.read_csv('extractInchi.csv',header=None)
        columns = [
            'CAS', 'weight', 'logP', 'RotatableBonds', 'HeavyAtomCounts',
            'AromProp', 'TPSA', 'HDonor', 'HAcceptors', 'FractionCSP3',
            'AromaticCarbocycles', 'AromaticHeterocycles'
        ]
        CAS = df['CAS']
        SMILES = df['canonical_smiles']

        resultDf = pd.DataFrame(columns=columns)
        for cas, smiles in zip(CAS, SMILES):
            mol = Chem.MolFromSmiles(smiles)
            wt = Descriptors.MolWt(mol)
            rot = Lipinski.NumRotatableBonds(mol)
            heavy = Lipinski.HeavyAtomCount(mol)
            logp = Crippen.MolLogP(mol)
            aromaticHeavyatoms = len(
                mol.GetSubstructMatches(Chem.MolFromSmarts('[a]')))
            numAtoms = mol.GetNumAtoms()
            aromprop = float(aromaticHeavyatoms / numAtoms)
            TPSA = Descriptors.TPSA(mol)
            HDonors = Descriptors.NumHDonors(mol)
            HAcceptors = Descriptors.NumHAcceptors(mol)

            FractionCSP3 = Descriptors.FractionCSP3(mol)
            AromaticCarbocycles = Descriptors.NumAromaticCarbocycles(mol)
            AromaticHeterocycles = Descriptors.NumAromaticHeterocycles(mol)

            (print(HDonors, HAcceptors))
            tempDf = pd.DataFrame([[
                cas, wt, logp, rot, heavy, aromprop, TPSA, HDonors, HAcceptors,
                FractionCSP3, AromaticCarbocycles, AromaticHeterocycles
            ]],
                                  columns=columns)
            resultDf = pd.concat([resultDf, tempDf])
        resultDf.to_csv('Descriptors.csv', index=False)
Esempio n. 23
0
def lipinski_trial(smiles):
    '''
    Returns which of Lipinski's rules a molecule has failed, or an empty list
    
    Lipinski's rules are:
    Hydrogen bond donors <= 5
    Hydrogen bond acceptors <= 10
    Molecular weight < 500 daltons
    logP < 5
    '''
    passed = []
    failed = []

    mol = Chem.MolFromSmiles(smiles)
    if mol is None:
        raise Exception('%s is not a valid SMILES string' % smiles)

    num_hdonors = Lipinski.NumHDonors(mol)
    num_hacceptors = Lipinski.NumHAcceptors(mol)
    mol_weight = Descriptors.MolWt(mol)
    mol_logp = Crippen.MolLogP(mol)

    failed = []

    if num_hdonors > 5:
        failed.append('Over 5 H-bond donors, found %s' % num_hdonors)
    else:
        passed.append('Found %s H-bond donors' % num_hdonors)

    if num_hacceptors > 10:
        failed.append('Over 10 H-bond acceptors, found %s' \
        % num_hacceptors)
    else:
        passed.append('Found %s H-bond acceptors' % num_hacceptors)

    if mol_weight >= 500:
        failed.append('Molecular weight over 500, calculated %s'\
        % mol_weight)
    else:
        passed.append('Molecular weight: %s' % mol_weight)

    if mol_logp >= 5:
        failed.append('Log partition coefficient over 5, calculated %s' \
        % mol_logp)
    else:
        passed.append('Log partition coefficient: %s' % mol_logp)

    return passed, failed
    def run_filter(self, mol):
        """
        This runs a Strict Lipinski filter. Lipinski filter refines for orally
        available drugs. It filters molecules by Molecular weight (MW), the
        number of hydrogen donors, the number hydrogen acceptors, and the logP
        value.

        This is a strict Lipinski which means a ligand must pass all the
        requirements.

        To pass the Lipinski filter a molecule must be:
            MW: Max 500 dalton
            Number of H acceptors: Max 10
            Number of H donors: Max 5
            logP Max +5.0

        If you use the Lipinski Filter please cite: C.A. Lipinski et al.
        Experimental and computational approaches to estimate solubility and
        permeability in drug discovery and development settings Advanced Drug
        Delivery Reviews, 46 (2001), pp. 3-26

        Inputs:
        :param rdkit.Chem.rdchem.Mol object mol: An rdkit mol object to be
            tested if it passes the filters

        Returns:
        :returns: bool bool: True if the mol passes the filter; False if it
          fails the filter
        """

        exact_mwt = Descriptors.ExactMolWt(mol)
        if exact_mwt > 500:
            return False

        num_hydrogen_bond_donors = Lipinski.NumHDonors(mol)
        if num_hydrogen_bond_donors > 5:
            return False

        num_hydrogen_bond_acceptors = Lipinski.NumHAcceptors(mol)
        if num_hydrogen_bond_acceptors > 10:
            return False

        mol_log_p = Crippen.MolLogP(mol)
        if mol_log_p > 5:
            return False

        # Passed all filters
        return True
Esempio n. 25
0
def ProcessMol(session,
               mol,
               globalProps,
               nDone,
               nameProp='_Name',
               nameCol='compound_id',
               redraw=False,
               keepHs=False,
               skipProps=False,
               addComputedProps=False,
               skipSmiles=False):
    if not mol:
        raise ValueError('no molecule')
    if keepHs:
        Chem.SanitizeMol(mol)
    try:
        nm = mol.GetProp(nameProp)
    except KeyError:
        nm = None
    if not nm:
        nm = 'Mol_%d' % nDone

    cmpd = Compound()
    session.add(cmpd)

    if redraw:
        AllChem.Compute2DCoords(m)

    if not skipSmiles:
        cmpd.smiles = Chem.MolToSmiles(mol, True)
    cmpd.molpkl = mol.ToBinary()
    setattr(cmpd, nameCol, nm)

    if not skipProps:
        if addComputedProps:
            cmpd.DonorCount = Lipinski.NumHDonors(mol)
            cmpd.AcceptorCount = Lipinski.NumHAcceptors(mol)
            cmpd.RotatableBondCount = Lipinski.NumRotatableBonds(mol)
            cmpd.AMW = Descriptors.MolWt(mol)
            cmpd.MolLogP = Crippen.MolLogP(mol)
        pns = list(mol.GetPropNames())
        for pi, pn in enumerate(pns):
            if pn.lower() == nameCol.lower():
                continue
            pv = mol.GetProp(pn).strip()
            if pn in globalProps:
                setattr(cmpd, pn.lower(), pv)
    return cmpd
Esempio n. 26
0
    def testMQNDetails(self):
        refFile = os.path.join(RDConfig.RDCodeDir, 'Chem', 'test_data',
                               'MQNs_regress.pkl')
        refFile2 = os.path.join(RDConfig.RDCodeDir, 'Chem', 'test_data',
                                'MQNs_non_strict_regress.pkl')
        # figure out which definition we are currently using
        m = Chem.MolFromSmiles("CC(C)(C)c1cc(O)c(cc1O)C(C)(C)C")
        if Lipinski.NumRotatableBonds(m) == 2:
            refFile = refFile2

        with open(refFile, 'r') as intf:
            buf = intf.read().replace('\r\n', '\n').encode('utf-8')
            intf.close()
        with io.BytesIO(buf) as inf:
            pkl = inf.read()
        refData = cPickle.loads(pkl, encoding='bytes')
        fn = os.path.join(RDConfig.RDCodeDir, 'Chem', 'test_data',
                          'aromat_regress.txt')
        ms = [x for x in Chem.SmilesMolSupplier(fn, delimiter='\t')]
        refData2 = []
        for i, m in enumerate(ms):
            mqns = rdMolDescriptors.MQNs_(m)
            refData2.append((m, mqns))
            if mqns != refData[i][1]:
                indices = [
                    (j, x, y)
                    for j, x, y in zip(range(len(mqns)), mqns, refData[i][1])
                    if x != y
                ]
                print(i, Chem.MolToSmiles(m), indices)
            self.assertEqual(mqns, refData[i][1])
Esempio n. 27
0
    def testMQNDetails(self):
        refFile = os.path.join(os.path.dirname(__file__), 'test_data',
                               'MQNs_regress.pkl')
        refFile2 = os.path.join(os.path.dirname(__file__), 'test_data',
                                'MQNs_non_strict_regress.pkl')
        # figure out which definition we are currently using
        m = Chem.MolFromSmiles("CC(C)(C)c1cc(O)c(cc1O)C(C)(C)C")
        if Lipinski.NumRotatableBonds(m) == 2:
            refFile = refFile2

        with open(refFile, 'rb') as intf:
            refData = pickle.load(intf)
        fn = os.path.join(os.path.dirname(__file__), 'test_data',
                          'aromat_regress.txt')
        ms = [x for x in Chem.SmilesMolSupplier(fn, delimiter='\t')]
        for i, m in enumerate(ms):
            mqns = rdMolDescriptors.MQNs_(m)
            if mqns != refData[i][1]:
                indices = [
                    (j, x, y)
                    for j, x, y in zip(range(len(mqns)), mqns, refData[i][1])
                    if x != y
                ]
                print(i, Chem.MolToSmiles(m), indices)
            self.assertEqual(mqns, refData[i][1])
def filter2(filter1_dict, input_nr_of_hetero_atoms):
    """ takes all text from dictionary with the substructures and returns a 
    dictionary with substructures which are filtered based on the presence of
    heteroatoms.
  
    filter1_dict: dict with substructures and structure identifier
    input_nr_of_hetero_atoms: integer, number of hetero atoms input paramater value
    """

    filter2_dict = {}
    for key, values in filter1_dict.items():
        structure_id = key
        for smile in values:
            if smile != '<NA>':
                sub_mol = Chem.MolFromSmiles(smile)
                nr_of_hetero_atoms = Lipinski.NumHeteroatoms(sub_mol)
                if nr_of_hetero_atoms >= input_nr_of_hetero_atoms:
                    if structure_id in filter2_dict:
                        filter2_dict[structure_id].append(smile)
                    if structure_id not in filter2_dict:
                        filter2_dict[structure_id] = [smile]
        if structure_id not in filter2_dict:
            filter2_dict[structure_id] = ['<NA>']

    print('~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~')
    print('nr of substructures after filter 2 [hetero atoms]')
    nr_of_subs = 0
    for key, value in filter2_dict.items():
        for val in value:
            if val != '<NA>':
                nr_of_subs += 1
    print(nr_of_subs)
    return filter2_dict
Esempio n. 29
0
def calculate_property(m):
    # SA_score = -sascorer.calculateScore(m)
    MW = Descriptors.MolWt(m)
    RB = Lipinski.NumRotatableBonds(m)
    logp = Descriptors.MolLogP(m)
    #return (SA_score, MW, RB, logp)
    return (MW, RB, logp)
Esempio n. 30
0
 def testIssue80(self):
     from rdkit.Chem import Lipinski
     m = Chem.MolFromSmiles('CCOC')
     ref = Crippen.MolLogP(m)
     Lipinski.NHOHCount(m)
     probe = Crippen.MolLogP(m)
     self.failUnless(probe == ref)
Esempio n. 31
0
def generate(smiles):
    moldata = []
    for elem in smiles:
        mol = Chem.MolFromSmiles(elem)
        moldata.append(mol)

    baseData = np.arange(1, 1)
    i = 0
    for mol in moldata:

        desc_MolLogP = Crippen.MolLogP(mol)
        desc_MolWt = Descriptors.MolWt(mol)
        desc_NumRotatableBonds = Lipinski.NumRotatableBonds(mol)
        desc_AromaticProportion = getAromaticProportion(mol)

        row = np.array([desc_MolLogP,
                        desc_MolWt,
                        desc_NumRotatableBonds,
                        desc_AromaticProportion])

        if i == 0:
            baseData = row
        else:
            baseData = np.vstack([baseData, row])
        i = i + 1

    columnNames = ["MolLogP", "MolWt", "NumRotatableBonds", "AromaticProportion"]
    descriptors = pd.DataFrame(data=baseData, columns=columnNames)

    return descriptors
Esempio n. 32
0
def pct_rotatable_bonds(mol):
    n_bonds = mol.GetNumBonds()
    if n_bonds > 0:
        rot_bonds = Lipinski.NumRotatableBonds(mol) / n_bonds
    else:
        rot_bonds = 0
    return rot_bonds