def get_filter_values(mol): """ calculate the values, for a given molecule, that are used to filter return as a dictionary """ assert isinstance(mol, Chem.Mol) values = {} values["MW"] = desc.CalcExactMolWt(mol) values["logP"] = crip.MolLogP(mol) values["HBA"] = lip.NumHAcceptors(mol) values["HBD"] = lip.NumHDonors(mol) values["tPSA"] = desc.CalcTPSA(mol) values["rot_bonds"] = lip.NumRotatableBonds(mol) values["rigid_bonds"] = mol.GetNumBonds() - values["rot_bonds"] # assume mutual exclusion values["num_rings"] = lip.RingCount(mol) values["num_hetero_atoms"] = lip.NumHeteroatoms(mol) values["charge"] = rdmolops.GetFormalCharge(mol) # trusting this charge calculation method values["num_carbons"], values["num_charges"], values["max_ring_size"] = get_atom_props(mol) try: values["hc_ratio"] = float(values["num_hetero_atoms"]) / float(values["num_carbons"]) except ZeroDivisionError: values["hc_ratio"] = 100000000 # if there are zero carbons values["fc"] = len(list(Brics.FindBRICSBonds(mol))) # how many BRICS bonds, related to complexity values["is_good"] = True # default to true, but not yet observed atoms = [atom.GetSymbol() for atom in mol.GetAtoms()] # get all the atoms, and make the list unique (only types) atoms = set(atoms) atoms = list(atoms) values["atoms"] = atoms values["num_chiral_centers"] = len(Chem.FindMolChiralCenters(mol, includeUnassigned=True)) values["rejections"] = [] # empty list to store the reasons for rejection return values
def veber_infraction(molecule: Chem.Mol) -> bool: """ Checks if a given molecule fails the veber infraction filters. """ rotatable_bond_saturation = Lipinski.NumRotatableBonds(molecule) > 10 hydrogen_bond_saturation = Lipinski.NumHAcceptors(molecule) + Lipinski.NumHDonors(molecule) > 10 return rotatable_bond_saturation or hydrogen_bond_saturation
def get_descriptors(df): PandasTools.ChangeMoleculeRendering(renderer='String') Lmol = df['ROMol'] Ldescriptors = [] for m in Lmol: # Calculer les propriétés chimiques MW = round(Descriptors.ExactMolWt(m), 1) LogP = round(Descriptors.MolLogP(m), 1) TPSA = round(Descriptors.TPSA(m), 1) LabuteASA = round(Descriptors.LabuteASA(m), 1) HBA = Descriptors.NumHAcceptors(m) HBD = Descriptors.NumHDonors(m) FCSP3 = Lipinski.FractionCSP3(m) MQN8 = rdMolDescriptors.MQNs_(m)[7] MQN10 = rdMolDescriptors.MQNs_(m)[9] NAR = Lipinski.NumAromaticRings(m) NRB = Chem.Descriptors.NumRotatableBonds(m) Ldescriptors.append([ MW, LogP, TPSA, LabuteASA, HBA, HBD, FCSP3, MQN8, MQN10, NAR, NRB ]) # Create pandas row for conditions results with values and information whether rule of five is violated prop_df = pd.DataFrame(Ldescriptors) prop_df.columns = [ 'MW', 'LogP', 'TPSA', 'LabuteASA', 'HBA', 'HBD', 'FCSP3', 'MQN8', 'MQN10', 'NAR', 'NRB' ] prop_df = prop_df.set_index(df.index) return prop_df
def test1(self): " testing first 200 mols from NCI " suppl = Chem.SDMolSupplier(self.inFileName) idx = 1 oldDonorSmarts = Chem.MolFromSmarts('[NH1,NH2,OH1]') OldDonorCount = lambda x,y=oldDonorSmarts:Lipinski._NumMatches(x,y) oldAcceptorSmarts = Chem.MolFromSmarts('[N,O]') OldAcceptorCount = lambda x,y=oldAcceptorSmarts:Lipinski._NumMatches(x,y) for m in suppl: if m: calc = Lipinski.NHOHCount(m) orig = int(m.GetProp('NUM_LIPINSKIHDONORS')) assert calc==orig,'bad num h donors for mol %d (%s): %d != %d'%(idx,m.GetProp('SMILES'),calc,orig) calc = Lipinski.NOCount(m) orig = int(m.GetProp('NUM_LIPINSKIHACCEPTORS')) assert calc==orig,'bad num h acceptors for mol %d (%s): %d != %d'%(idx,m.GetProp('SMILES'),calc,orig) calc = Lipinski.NumHDonors(m) orig = int(m.GetProp('NUM_HDONORS')) assert calc==orig,'bad num h donors for mol %d (%s): %d != %d'%(idx,m.GetProp('SMILES'),calc,orig) calc = Lipinski.NumHAcceptors(m) orig = int(m.GetProp('NUM_HACCEPTORS')) assert calc==orig,'bad num h acceptors for mol %d (%s): %d != %d'%(idx,m.GetProp('SMILES'),calc,orig) calc = Lipinski.NumHeteroatoms(m) orig = int(m.GetProp('NUM_HETEROATOMS')) assert calc==orig,'bad num heteroatoms for mol %d (%s): %d != %d'%(idx,m.GetProp('SMILES'),calc,orig) calc = Lipinski.NumRotatableBonds(m) orig = int(m.GetProp('NUM_ROTATABLEBONDS')) assert calc==orig,'bad num rotors for mol %d (%s): %d != %d'%(idx,m.GetProp('SMILES'),calc,orig) idx += 1
def lipinski(smiles, verbose=False): moldata= [] for elem in smiles: mol=Chem.MolFromSmiles(elem) moldata.append(mol) baseData= np.arange(1,1) i=0 for mol in moldata: desc_MolWt = Descriptors.MolWt(mol) desc_MolLogP = Descriptors.MolLogP(mol) desc_NumHDonors = Lipinski.NumHDonors(mol) desc_NumHAcceptors = Lipinski.NumHAcceptors(mol) row = np.array([desc_MolWt, desc_MolLogP, desc_NumHDonors, desc_NumHAcceptors]) if(i==0): baseData=row else: baseData=np.vstack([baseData, row]) i=i+1 columnNames=["MW","LogP","NumHDonors","NumHAcceptors"] descriptors = pd.DataFrame(data=baseData,columns=columnNames) return descriptors
def calc_lipinski(self, mol): """ Returns: a tuple consisting of: - a boolean indicating whether the molecule passed Lipinski test - a dictionary giving the values of the Lipinski check. NOTE: Lipinski's rules are: - Hydrogen bond donors <= 5 - Hydrogen bond acceptors <= 10 - Molecular weight < 500 daltons - logP < 5 """ num_hdonors = Lipi.NumHDonors(mol) num_hacceptors = Lipi.NumHAcceptors(mol) mol_weight = Descriptors.MolWt(mol) mol_logp = round(Crippen.MolLogP(mol), 4) return ((num_hdonors <= 5 and num_hacceptors <= 10 and mol_weight < 500 and mol_logp < 5), { 'hydrogen_bond_donors': num_hdonors, 'hydrogen_bond_acceptors': num_hacceptors, 'molecular_weight': mol_weight, 'logp': mol_logp })
def lipinski_rule(mol): fingerprint = rdMolDescriptors.GetHashedAtomPairFingerprintAsBitVect(mol) return [ Lipinski.NHOHCount(mol) <= 5, Lipinski.NOCount(mol) <= 10, Descriptors.ExactMolWt(mol) <= 500, LogP('logP').run(fingerprint) <= 5]
def ProcessMol(mol,typeConversions,globalProps,nDone,nameProp='_Name',nameCol='compound_id', redraw=False,keepHs=False, skipProps=False,addComputedProps=False, skipSmiles=False, uniqNames=None,namesSeen=None): if not mol: raise ValueError('no molecule') if keepHs: Chem.SanitizeMol(mol) try: nm = mol.GetProp(nameProp) except KeyError: nm = None if not nm: nm = 'Mol_%d'%nDone if uniqNames and nm in namesSeen: logger.error('duplicate compound id (%s) encountered. second instance skipped.'%nm) return None namesSeen.add(nm) row = [nm] if not skipProps: if addComputedProps: nHD=Lipinski.NumHDonors(mol) mol.SetProp('DonorCount',str(nHD)) nHA=Lipinski.NumHAcceptors(mol) mol.SetProp('AcceptorCount',str(nHA)) nRot=Lipinski.NumRotatableBonds(mol) mol.SetProp('RotatableBondCount',str(nRot)) MW=Descriptors.MolWt(mol) mol.SetProp('AMW',str(MW)) logp=Crippen.MolLogP(mol) mol.SetProp('MolLogP',str(logp)) pns = list(mol.GetPropNames()) pD={} for pi,pn in enumerate(pns): if pn.lower()==nameCol.lower(): continue pv = mol.GetProp(pn).strip() if pv.find('>')<0 and pv.find('<')<0: colTyp = globalProps.get(pn,2) while colTyp>0: try: tpi = typeConversions[colTyp][1](pv) except: colTyp-=1 else: break globalProps[pn]=colTyp pD[pn]=typeConversions[colTyp][1](pv) else: pD[pn]=pv else: pD={} if redraw: AllChem.Compute2DCoords(m) if not skipSmiles: row.append(Chem.MolToSmiles(mol,True)) row.append(DbModule.binaryHolder(mol.ToBinary())) row.append(pD) return row
def check_ligand(file_path): bool = False if os.path.isfile(file_path): suppl = Chem.SDMolSupplier(file_path) for mol in suppl: if mol is not None: # components of rule hydrogen_bond_doner = True if Lipinski.NumHDonors( mol) <= 5 else False hydrogen_bond_acceptors = True if Lipinski.NumHAcceptors( mol) <= 10 else False molecular_mass = True if Descriptors.ExactMolWt( mol) <= 500 else False octanol_water_partition_coefficient_logP = True if Crippen.MolLogP( mol) <= 5 else False components_rank = hydrogen_bond_doner + hydrogen_bond_acceptors + molecular_mass + octanol_water_partition_coefficient_logP # variants partition_coefficient_logP = True if -0.4 <= Crippen.MolLogP( mol) <= 5.6 else False molar_refractivity = True if 40 <= Crippen.MolMR( mol) <= 130 else False molecular_weight = True if 180 <= Descriptors.ExactMolWt( mol) <= 500 else False number_of_atoms = True if 20 <= Lipinski.HeavyAtomCount( mol) <= 70 else False polar_surface_area = True if MolSurf.TPSA( mol) <= 140 else False variants_rank = partition_coefficient_logP + molar_refractivity + molecular_weight + number_of_atoms + polar_surface_area if (components_rank == 4) and (variants_rank == 4 or variants_rank == 5): bool = True return bool
def score_molecule(smiles): lipinski_score = 0 qed = LipinskiRuleOfFiveDecorator.MAX_QED + 1 try: m = Chem.MolFromSmiles(smiles) logp = Descriptors.MolLogP(m) lipinski_score += 1 if logp < LipinskiRuleOfFiveDecorator.MAX_LOGP else 0 wt = Descriptors.MolWt(m) lipinski_score += 1 if wt < LipinskiRuleOfFiveDecorator.MAX_MOL_WT else 0 hdonor = Lipinski.NumHDonors(m) lipinski_score += 1 if hdonor < LipinskiRuleOfFiveDecorator.MAX_H_DONORS else 0 hacceptor = Lipinski.NumHAcceptors(m) lipinski_score += 1 if hacceptor < LipinskiRuleOfFiveDecorator.MAX_H_DONORS else 0 rotatable_bond = Lipinski.NumRotatableBonds(m) lipinski_score += 1 if rotatable_bond < LipinskiRuleOfFiveDecorator.MAX_ROTATABLE_BONDS else 0 qed = QED.qed(m) except Exception as ex: lipinski_score = 0 logger.exception(ex) return lipinski_score, qed
def get_ro5_from_mol(mol): """ Get Lipinski's rule of five criteria for a molecule, i.e. molecular weight, logP, number of hydrogen bond acceptors/donors and accordance to Lipinski's rule of five. (Takes about 1s for 2000 mols.) Parameters ---------- mol : rdkit.Chem.rdchem.Mol Molecule. Returns ------- pd.Series Rule of five criteria for input molecule. """ mw = 1 if Descriptors.ExactMolWt(mol) <= 500 else 0 logp = 1 if Descriptors.MolLogP(mol) <= 5 else 0 hbd = 1 if Lipinski.NumHDonors(mol) <= 5 else 0 hba = 1 if Lipinski.NumHAcceptors(mol) <= 10 else 0 lipinski = 1 if mw + logp + hbd + hba >= 3 else 0 return pd.Series([mw, logp, hbd, hba, lipinski], index="mw logp hbd hba lipinski".split())
def get_phys_fp(compound): c = [] c.append(compound['mol_weight'] / 500) logp = get_logp(compound['dsstox_sid']) logp = logp / 10 if logp else logp c.append(logp) m = chm.MolFromSmiles(compound['smiles']) c.append(lip.NumHDonors(m) / 5) c.append(lip.NumHAcceptors(m) / 10) return c
def lipinski(smile): # Convert into Chem object mol = Chem.MolFromSmiles(smile) MolWt = Descriptors.MolWt(mol) MolLogP = Descriptors.MolLogP(mol) NumHDonors = Lipinski.NumHDonors(mol) NumHAcceptors = Lipinski.NumHAcceptors(mol) return NumHDonors, NumHAcceptors, MolWt, MolLogP
def mole_proper(mol): num_hdonors = Lipinski.NumHDonors(mol) num_hacceptors = Lipinski.NumHAcceptors(mol) num_rotatable = Lipinski.NumRotatableBonds(mol) mol_weight = Descriptors.MolWt(mol) mol_logp = Crippen.MolLogP(mol) mol_TPSA = Descriptors.TPSA(mol) proper = (num_hdonors, num_hacceptors, num_rotatable, mol_weight, mol_logp, mol_TPSA) return proper
def auto_sampling(mult_factor, mol, log): auto_samples = 0 auto_samples += 3 * (Lipinski.NumRotatableBonds(mol) ) # x3, for C3 rotations auto_samples += 3 * (Lipinski.NHOHCount(mol)) # x3, for OH/NH rotations auto_samples += 3 * (Lipinski.NumSaturatedRings(mol) ) # x3, for boat/chair/envelope confs if auto_samples == 0: auto_samples = mult_factor else: auto_samples = mult_factor * auto_samples return auto_samples
def filter(mol, type = "frags"): HBD = Lipinski.NumHDonors(mol) HBA = Lipinski.NumHAcceptors(mol) rings = len(Chem.GetSymmSSSR(mol)) MW = Chem.Descriptors.MolWt(mol) if type == "frags": action = (HBD <=8) & (HBA <=8) & (rings >= 1) & (MW <=800) else: action = (HBD <= 5) & (HBA <= 5) & (MW <= 500) return action
def auto_sampling(mult_factor,mol,args,log): if args.metal_complex: if len(args.metal_idx) > 0: mult_factor = mult_factor*3*len(args.metal_idx) # this accounts for possible trans/cis isomers in metal complexes auto_samples = 0 auto_samples += 3*(Lipinski.NumRotatableBonds(mol)) # x3, for C3 rotations auto_samples += 3*(Lipinski.NHOHCount(mol)) # x3, for OH/NH rotations auto_samples += 3*(Lipinski.NumSaturatedRings(mol)) # x3, for boat/chair/envelope confs if auto_samples == 0: auto_samples = mult_factor else: auto_samples = mult_factor*auto_samples return auto_samples
def test1(self): " testing first 200 mols from NCI " # figure out which rotor version we are using m = Chem.MolFromSmiles("CC(C)(C)c1cc(O)c(cc1O)C(C)(C)C") if Lipinski.NumRotatableBonds(m) == 2: rot_prop = NonStrict else: rot_prop = Strict suppl = Chem.SDMolSupplier(self.inFileName) idx = 1 oldDonorSmarts = Chem.MolFromSmarts('[NH1,NH2,OH1]') OldDonorCount = lambda x,y=oldDonorSmarts:Lipinski._NumMatches(x,y) oldAcceptorSmarts = Chem.MolFromSmarts('[N,O]') OldAcceptorCount = lambda x,y=oldAcceptorSmarts:Lipinski._NumMatches(x,y) for m in suppl: if m: calc = Lipinski.NHOHCount(m) orig = int(m.GetProp('NUM_LIPINSKIHDONORS')) assert calc==orig,'bad num h donors for mol %d (%s): %d != %d'%(idx,m.GetProp('SMILES'),calc,orig) calc = Lipinski.NOCount(m) orig = int(m.GetProp('NUM_LIPINSKIHACCEPTORS')) assert calc==orig,'bad num h acceptors for mol %d (%s): %d != %d'%(idx,m.GetProp('SMILES'),calc,orig) calc = Lipinski.NumHDonors(m) orig = int(m.GetProp('NUM_HDONORS')) assert calc==orig,'bad num h donors for mol %d (%s): %d != %d'%(idx,m.GetProp('SMILES'),calc,orig) calc = Lipinski.NumHAcceptors(m) orig = int(m.GetProp('NUM_HACCEPTORS')) assert calc==orig,'bad num h acceptors for mol %d (%s): %d != %d'%(idx,m.GetProp('SMILES'),calc,orig) calc = Lipinski.NumHeteroatoms(m) orig = int(m.GetProp('NUM_HETEROATOMS')) assert calc==orig,'bad num heteroatoms for mol %d (%s): %d != %d'%(idx,m.GetProp('SMILES'),calc,orig) calc = Lipinski.NumRotatableBonds(m) orig = int(m.GetProp(rot_prop)) assert calc==orig,'bad num rotors for mol %d (%s): %d != %d'%(idx,m.GetProp('SMILES'),calc,orig) # test the underlying numrotatable bonds calc = rdMolDescriptors.CalcNumRotatableBonds(m, rdMolDescriptors.NumRotatableBondsOptions.NonStrict) orig = int(m.GetProp(NonStrict)) assert calc==orig,'bad num rotors for mol %d (%s): %d != %d'%(idx,m.GetProp('SMILES'),calc,orig) calc = rdMolDescriptors.CalcNumRotatableBonds(m, rdMolDescriptors.NumRotatableBondsOptions.Strict) orig = int(m.GetProp(Strict)) assert calc==orig,'bad num rotors for mol %d (%s): %d != %d'%(idx,m.GetProp('SMILES'),calc,orig) idx += 1
def check_lipinski(mol): fgs = load_functional_groups() h_donors = Lipinski.NumHDonors(mol.rdmol) h_acceptors = Lipinski.NumHAcceptors(mol.rdmol) log_p = MolLogP(mol.rdmol) wt = MolWt(mol.rdmol) if h_donors <= 5 and h_acceptors <= 5 and log_p < 5: if wt >= 450: mol.join(fgs['terminal_fg'].get_random()) return True, False else: return True, False else: return False, False
def testIssue2183420(self): " testing a problem with the acceptor definition " self.assertTrue(Lipinski.NumHAcceptors(Chem.MolFromSmiles('NC')) == 1) self.assertTrue(Lipinski.NumHAcceptors(Chem.MolFromSmiles('CNC')) == 1) self.assertTrue(Lipinski.NumHAcceptors(Chem.MolFromSmiles('CN(C)C')) == 1) self.assertTrue(Lipinski.NumHAcceptors(Chem.MolFromSmiles('NC(=O)')) == 1) self.assertTrue(Lipinski.NumHAcceptors(Chem.MolFromSmiles('NC(=O)C')) == 1) self.assertTrue(Lipinski.NumHAcceptors(Chem.MolFromSmiles('CNC(=O)')) == 1) self.assertTrue(Lipinski.NumHAcceptors(Chem.MolFromSmiles('CNC(=O)C')) == 1) self.assertTrue(Lipinski.NumHAcceptors(Chem.MolFromSmiles('O=CNC(=O)C')) == 2) self.assertTrue(Lipinski.NumHAcceptors(Chem.MolFromSmiles('O=C(C)NC(=O)C')) == 2)
def calcScore(self, m, smi): self.value = 0.0 mw = Descriptors.MolWt(m) if mw > 700 or mw < 100: return False num_hdonors = Lipinski.NumHDonors(m) num_hacceptors = Lipinski.NumHAcceptors(m) if num_hdonors > 5: return False if num_hacceptors > 10: return False return True
def getDiscriptor(self): from rdkit.Chem import Crippen from rdkit import Chem import pandas as pd from rdkit.Chem import Descriptors, Lipinski import os os.chdir(r"G:\マイドライブ\Data\Meram Chronic Data") df = pd.read_csv('extChronicStrcture.csv', engine='python') df = df[['CAS', 'canonical_smiles']] df = df.dropna(how='any') #df = pd.read_csv('extractInchi.csv',header=None) columns = [ 'CAS', 'weight', 'logP', 'RotatableBonds', 'HeavyAtomCounts', 'AromProp', 'TPSA', 'HDonor', 'HAcceptors', 'FractionCSP3', 'AromaticCarbocycles', 'AromaticHeterocycles' ] CAS = df['CAS'] SMILES = df['canonical_smiles'] resultDf = pd.DataFrame(columns=columns) for cas, smiles in zip(CAS, SMILES): mol = Chem.MolFromSmiles(smiles) wt = Descriptors.MolWt(mol) rot = Lipinski.NumRotatableBonds(mol) heavy = Lipinski.HeavyAtomCount(mol) logp = Crippen.MolLogP(mol) aromaticHeavyatoms = len( mol.GetSubstructMatches(Chem.MolFromSmarts('[a]'))) numAtoms = mol.GetNumAtoms() aromprop = float(aromaticHeavyatoms / numAtoms) TPSA = Descriptors.TPSA(mol) HDonors = Descriptors.NumHDonors(mol) HAcceptors = Descriptors.NumHAcceptors(mol) FractionCSP3 = Descriptors.FractionCSP3(mol) AromaticCarbocycles = Descriptors.NumAromaticCarbocycles(mol) AromaticHeterocycles = Descriptors.NumAromaticHeterocycles(mol) (print(HDonors, HAcceptors)) tempDf = pd.DataFrame([[ cas, wt, logp, rot, heavy, aromprop, TPSA, HDonors, HAcceptors, FractionCSP3, AromaticCarbocycles, AromaticHeterocycles ]], columns=columns) resultDf = pd.concat([resultDf, tempDf]) resultDf.to_csv('Descriptors.csv', index=False)
def lipinski_trial(smiles): ''' Returns which of Lipinski's rules a molecule has failed, or an empty list Lipinski's rules are: Hydrogen bond donors <= 5 Hydrogen bond acceptors <= 10 Molecular weight < 500 daltons logP < 5 ''' passed = [] failed = [] mol = Chem.MolFromSmiles(smiles) if mol is None: raise Exception('%s is not a valid SMILES string' % smiles) num_hdonors = Lipinski.NumHDonors(mol) num_hacceptors = Lipinski.NumHAcceptors(mol) mol_weight = Descriptors.MolWt(mol) mol_logp = Crippen.MolLogP(mol) failed = [] if num_hdonors > 5: failed.append('Over 5 H-bond donors, found %s' % num_hdonors) else: passed.append('Found %s H-bond donors' % num_hdonors) if num_hacceptors > 10: failed.append('Over 10 H-bond acceptors, found %s' \ % num_hacceptors) else: passed.append('Found %s H-bond acceptors' % num_hacceptors) if mol_weight >= 500: failed.append('Molecular weight over 500, calculated %s'\ % mol_weight) else: passed.append('Molecular weight: %s' % mol_weight) if mol_logp >= 5: failed.append('Log partition coefficient over 5, calculated %s' \ % mol_logp) else: passed.append('Log partition coefficient: %s' % mol_logp) return passed, failed
def run_filter(self, mol): """ This runs a Strict Lipinski filter. Lipinski filter refines for orally available drugs. It filters molecules by Molecular weight (MW), the number of hydrogen donors, the number hydrogen acceptors, and the logP value. This is a strict Lipinski which means a ligand must pass all the requirements. To pass the Lipinski filter a molecule must be: MW: Max 500 dalton Number of H acceptors: Max 10 Number of H donors: Max 5 logP Max +5.0 If you use the Lipinski Filter please cite: C.A. Lipinski et al. Experimental and computational approaches to estimate solubility and permeability in drug discovery and development settings Advanced Drug Delivery Reviews, 46 (2001), pp. 3-26 Inputs: :param rdkit.Chem.rdchem.Mol object mol: An rdkit mol object to be tested if it passes the filters Returns: :returns: bool bool: True if the mol passes the filter; False if it fails the filter """ exact_mwt = Descriptors.ExactMolWt(mol) if exact_mwt > 500: return False num_hydrogen_bond_donors = Lipinski.NumHDonors(mol) if num_hydrogen_bond_donors > 5: return False num_hydrogen_bond_acceptors = Lipinski.NumHAcceptors(mol) if num_hydrogen_bond_acceptors > 10: return False mol_log_p = Crippen.MolLogP(mol) if mol_log_p > 5: return False # Passed all filters return True
def ProcessMol(session, mol, globalProps, nDone, nameProp='_Name', nameCol='compound_id', redraw=False, keepHs=False, skipProps=False, addComputedProps=False, skipSmiles=False): if not mol: raise ValueError('no molecule') if keepHs: Chem.SanitizeMol(mol) try: nm = mol.GetProp(nameProp) except KeyError: nm = None if not nm: nm = 'Mol_%d' % nDone cmpd = Compound() session.add(cmpd) if redraw: AllChem.Compute2DCoords(m) if not skipSmiles: cmpd.smiles = Chem.MolToSmiles(mol, True) cmpd.molpkl = mol.ToBinary() setattr(cmpd, nameCol, nm) if not skipProps: if addComputedProps: cmpd.DonorCount = Lipinski.NumHDonors(mol) cmpd.AcceptorCount = Lipinski.NumHAcceptors(mol) cmpd.RotatableBondCount = Lipinski.NumRotatableBonds(mol) cmpd.AMW = Descriptors.MolWt(mol) cmpd.MolLogP = Crippen.MolLogP(mol) pns = list(mol.GetPropNames()) for pi, pn in enumerate(pns): if pn.lower() == nameCol.lower(): continue pv = mol.GetProp(pn).strip() if pn in globalProps: setattr(cmpd, pn.lower(), pv) return cmpd
def testMQNDetails(self): refFile = os.path.join(RDConfig.RDCodeDir, 'Chem', 'test_data', 'MQNs_regress.pkl') refFile2 = os.path.join(RDConfig.RDCodeDir, 'Chem', 'test_data', 'MQNs_non_strict_regress.pkl') # figure out which definition we are currently using m = Chem.MolFromSmiles("CC(C)(C)c1cc(O)c(cc1O)C(C)(C)C") if Lipinski.NumRotatableBonds(m) == 2: refFile = refFile2 with open(refFile, 'r') as intf: buf = intf.read().replace('\r\n', '\n').encode('utf-8') intf.close() with io.BytesIO(buf) as inf: pkl = inf.read() refData = cPickle.loads(pkl, encoding='bytes') fn = os.path.join(RDConfig.RDCodeDir, 'Chem', 'test_data', 'aromat_regress.txt') ms = [x for x in Chem.SmilesMolSupplier(fn, delimiter='\t')] refData2 = [] for i, m in enumerate(ms): mqns = rdMolDescriptors.MQNs_(m) refData2.append((m, mqns)) if mqns != refData[i][1]: indices = [ (j, x, y) for j, x, y in zip(range(len(mqns)), mqns, refData[i][1]) if x != y ] print(i, Chem.MolToSmiles(m), indices) self.assertEqual(mqns, refData[i][1])
def testMQNDetails(self): refFile = os.path.join(os.path.dirname(__file__), 'test_data', 'MQNs_regress.pkl') refFile2 = os.path.join(os.path.dirname(__file__), 'test_data', 'MQNs_non_strict_regress.pkl') # figure out which definition we are currently using m = Chem.MolFromSmiles("CC(C)(C)c1cc(O)c(cc1O)C(C)(C)C") if Lipinski.NumRotatableBonds(m) == 2: refFile = refFile2 with open(refFile, 'rb') as intf: refData = pickle.load(intf) fn = os.path.join(os.path.dirname(__file__), 'test_data', 'aromat_regress.txt') ms = [x for x in Chem.SmilesMolSupplier(fn, delimiter='\t')] for i, m in enumerate(ms): mqns = rdMolDescriptors.MQNs_(m) if mqns != refData[i][1]: indices = [ (j, x, y) for j, x, y in zip(range(len(mqns)), mqns, refData[i][1]) if x != y ] print(i, Chem.MolToSmiles(m), indices) self.assertEqual(mqns, refData[i][1])
def filter2(filter1_dict, input_nr_of_hetero_atoms): """ takes all text from dictionary with the substructures and returns a dictionary with substructures which are filtered based on the presence of heteroatoms. filter1_dict: dict with substructures and structure identifier input_nr_of_hetero_atoms: integer, number of hetero atoms input paramater value """ filter2_dict = {} for key, values in filter1_dict.items(): structure_id = key for smile in values: if smile != '<NA>': sub_mol = Chem.MolFromSmiles(smile) nr_of_hetero_atoms = Lipinski.NumHeteroatoms(sub_mol) if nr_of_hetero_atoms >= input_nr_of_hetero_atoms: if structure_id in filter2_dict: filter2_dict[structure_id].append(smile) if structure_id not in filter2_dict: filter2_dict[structure_id] = [smile] if structure_id not in filter2_dict: filter2_dict[structure_id] = ['<NA>'] print('~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~') print('nr of substructures after filter 2 [hetero atoms]') nr_of_subs = 0 for key, value in filter2_dict.items(): for val in value: if val != '<NA>': nr_of_subs += 1 print(nr_of_subs) return filter2_dict
def calculate_property(m): # SA_score = -sascorer.calculateScore(m) MW = Descriptors.MolWt(m) RB = Lipinski.NumRotatableBonds(m) logp = Descriptors.MolLogP(m) #return (SA_score, MW, RB, logp) return (MW, RB, logp)
def testIssue80(self): from rdkit.Chem import Lipinski m = Chem.MolFromSmiles('CCOC') ref = Crippen.MolLogP(m) Lipinski.NHOHCount(m) probe = Crippen.MolLogP(m) self.failUnless(probe == ref)
def generate(smiles): moldata = [] for elem in smiles: mol = Chem.MolFromSmiles(elem) moldata.append(mol) baseData = np.arange(1, 1) i = 0 for mol in moldata: desc_MolLogP = Crippen.MolLogP(mol) desc_MolWt = Descriptors.MolWt(mol) desc_NumRotatableBonds = Lipinski.NumRotatableBonds(mol) desc_AromaticProportion = getAromaticProportion(mol) row = np.array([desc_MolLogP, desc_MolWt, desc_NumRotatableBonds, desc_AromaticProportion]) if i == 0: baseData = row else: baseData = np.vstack([baseData, row]) i = i + 1 columnNames = ["MolLogP", "MolWt", "NumRotatableBonds", "AromaticProportion"] descriptors = pd.DataFrame(data=baseData, columns=columnNames) return descriptors
def pct_rotatable_bonds(mol): n_bonds = mol.GetNumBonds() if n_bonds > 0: rot_bonds = Lipinski.NumRotatableBonds(mol) / n_bonds else: rot_bonds = 0 return rot_bonds