def testIssue2183420(self): " testing a problem with the acceptor definition " self.assertTrue(Lipinski.NumHAcceptors(Chem.MolFromSmiles('NC')) == 1) self.assertTrue(Lipinski.NumHAcceptors(Chem.MolFromSmiles('CNC')) == 1) self.assertTrue(Lipinski.NumHAcceptors(Chem.MolFromSmiles('CN(C)C')) == 1) self.assertTrue(Lipinski.NumHAcceptors(Chem.MolFromSmiles('NC(=O)')) == 1) self.assertTrue(Lipinski.NumHAcceptors(Chem.MolFromSmiles('NC(=O)C')) == 1) self.assertTrue(Lipinski.NumHAcceptors(Chem.MolFromSmiles('CNC(=O)')) == 1) self.assertTrue(Lipinski.NumHAcceptors(Chem.MolFromSmiles('CNC(=O)C')) == 1) self.assertTrue(Lipinski.NumHAcceptors(Chem.MolFromSmiles('O=CNC(=O)C')) == 2) self.assertTrue(Lipinski.NumHAcceptors(Chem.MolFromSmiles('O=C(C)NC(=O)C')) == 2)
def score_molecule(smiles): lipinski_score = 0 qed = LipinskiRuleOfFiveDecorator.MAX_QED + 1 try: m = Chem.MolFromSmiles(smiles) logp = Descriptors.MolLogP(m) lipinski_score += 1 if logp < LipinskiRuleOfFiveDecorator.MAX_LOGP else 0 wt = Descriptors.MolWt(m) lipinski_score += 1 if wt < LipinskiRuleOfFiveDecorator.MAX_MOL_WT else 0 hdonor = Lipinski.NumHDonors(m) lipinski_score += 1 if hdonor < LipinskiRuleOfFiveDecorator.MAX_H_DONORS else 0 hacceptor = Lipinski.NumHAcceptors(m) lipinski_score += 1 if hacceptor < LipinskiRuleOfFiveDecorator.MAX_H_DONORS else 0 rotatable_bond = Lipinski.NumRotatableBonds(m) lipinski_score += 1 if rotatable_bond < LipinskiRuleOfFiveDecorator.MAX_ROTATABLE_BONDS else 0 qed = QED.qed(m) except Exception as ex: lipinski_score = 0 logger.exception(ex) return lipinski_score, qed
def lipinski(smiles, verbose=False): moldata= [] for elem in smiles: mol=Chem.MolFromSmiles(elem) moldata.append(mol) baseData= np.arange(1,1) i=0 for mol in moldata: desc_MolWt = Descriptors.MolWt(mol) desc_MolLogP = Descriptors.MolLogP(mol) desc_NumHDonors = Lipinski.NumHDonors(mol) desc_NumHAcceptors = Lipinski.NumHAcceptors(mol) row = np.array([desc_MolWt, desc_MolLogP, desc_NumHDonors, desc_NumHAcceptors]) if(i==0): baseData=row else: baseData=np.vstack([baseData, row]) i=i+1 columnNames=["MW","LogP","NumHDonors","NumHAcceptors"] descriptors = pd.DataFrame(data=baseData,columns=columnNames) return descriptors
def ProcessMol(mol,typeConversions,globalProps,nDone,nameProp='_Name',nameCol='compound_id', redraw=False,keepHs=False, skipProps=False,addComputedProps=False, skipSmiles=False, uniqNames=None,namesSeen=None): if not mol: raise ValueError('no molecule') if keepHs: Chem.SanitizeMol(mol) try: nm = mol.GetProp(nameProp) except KeyError: nm = None if not nm: nm = 'Mol_%d'%nDone if uniqNames and nm in namesSeen: logger.error('duplicate compound id (%s) encountered. second instance skipped.'%nm) return None namesSeen.add(nm) row = [nm] if not skipProps: if addComputedProps: nHD=Lipinski.NumHDonors(mol) mol.SetProp('DonorCount',str(nHD)) nHA=Lipinski.NumHAcceptors(mol) mol.SetProp('AcceptorCount',str(nHA)) nRot=Lipinski.NumRotatableBonds(mol) mol.SetProp('RotatableBondCount',str(nRot)) MW=Descriptors.MolWt(mol) mol.SetProp('AMW',str(MW)) logp=Crippen.MolLogP(mol) mol.SetProp('MolLogP',str(logp)) pns = list(mol.GetPropNames()) pD={} for pi,pn in enumerate(pns): if pn.lower()==nameCol.lower(): continue pv = mol.GetProp(pn).strip() if pv.find('>')<0 and pv.find('<')<0: colTyp = globalProps.get(pn,2) while colTyp>0: try: tpi = typeConversions[colTyp][1](pv) except: colTyp-=1 else: break globalProps[pn]=colTyp pD[pn]=typeConversions[colTyp][1](pv) else: pD[pn]=pv else: pD={} if redraw: AllChem.Compute2DCoords(m) if not skipSmiles: row.append(Chem.MolToSmiles(mol,True)) row.append(DbModule.binaryHolder(mol.ToBinary())) row.append(pD) return row
def get_ro5_from_mol(mol): """ Get Lipinski's rule of five criteria for a molecule, i.e. molecular weight, logP, number of hydrogen bond acceptors/donors and accordance to Lipinski's rule of five. (Takes about 1s for 2000 mols.) Parameters ---------- mol : rdkit.Chem.rdchem.Mol Molecule. Returns ------- pd.Series Rule of five criteria for input molecule. """ mw = 1 if Descriptors.ExactMolWt(mol) <= 500 else 0 logp = 1 if Descriptors.MolLogP(mol) <= 5 else 0 hbd = 1 if Lipinski.NumHDonors(mol) <= 5 else 0 hba = 1 if Lipinski.NumHAcceptors(mol) <= 10 else 0 lipinski = 1 if mw + logp + hbd + hba >= 3 else 0 return pd.Series([mw, logp, hbd, hba, lipinski], index="mw logp hbd hba lipinski".split())
def check_ligand(file_path): bool = False if os.path.isfile(file_path): suppl = Chem.SDMolSupplier(file_path) for mol in suppl: if mol is not None: # components of rule hydrogen_bond_doner = True if Lipinski.NumHDonors( mol) <= 5 else False hydrogen_bond_acceptors = True if Lipinski.NumHAcceptors( mol) <= 10 else False molecular_mass = True if Descriptors.ExactMolWt( mol) <= 500 else False octanol_water_partition_coefficient_logP = True if Crippen.MolLogP( mol) <= 5 else False components_rank = hydrogen_bond_doner + hydrogen_bond_acceptors + molecular_mass + octanol_water_partition_coefficient_logP # variants partition_coefficient_logP = True if -0.4 <= Crippen.MolLogP( mol) <= 5.6 else False molar_refractivity = True if 40 <= Crippen.MolMR( mol) <= 130 else False molecular_weight = True if 180 <= Descriptors.ExactMolWt( mol) <= 500 else False number_of_atoms = True if 20 <= Lipinski.HeavyAtomCount( mol) <= 70 else False polar_surface_area = True if MolSurf.TPSA( mol) <= 140 else False variants_rank = partition_coefficient_logP + molar_refractivity + molecular_weight + number_of_atoms + polar_surface_area if (components_rank == 4) and (variants_rank == 4 or variants_rank == 5): bool = True return bool
def calc_lipinski(self, mol): """ Returns: a tuple consisting of: - a boolean indicating whether the molecule passed Lipinski test - a dictionary giving the values of the Lipinski check. NOTE: Lipinski's rules are: - Hydrogen bond donors <= 5 - Hydrogen bond acceptors <= 10 - Molecular weight < 500 daltons - logP < 5 """ num_hdonors = Lipi.NumHDonors(mol) num_hacceptors = Lipi.NumHAcceptors(mol) mol_weight = Descriptors.MolWt(mol) mol_logp = round(Crippen.MolLogP(mol), 4) return ((num_hdonors <= 5 and num_hacceptors <= 10 and mol_weight < 500 and mol_logp < 5), { 'hydrogen_bond_donors': num_hdonors, 'hydrogen_bond_acceptors': num_hacceptors, 'molecular_weight': mol_weight, 'logp': mol_logp })
def get_filter_values(mol): """ calculate the values, for a given molecule, that are used to filter return as a dictionary """ assert isinstance(mol, Chem.Mol) values = {} values["MW"] = desc.CalcExactMolWt(mol) values["logP"] = crip.MolLogP(mol) values["HBA"] = lip.NumHAcceptors(mol) values["HBD"] = lip.NumHDonors(mol) values["tPSA"] = desc.CalcTPSA(mol) values["rot_bonds"] = lip.NumRotatableBonds(mol) values["rigid_bonds"] = mol.GetNumBonds() - values["rot_bonds"] # assume mutual exclusion values["num_rings"] = lip.RingCount(mol) values["num_hetero_atoms"] = lip.NumHeteroatoms(mol) values["charge"] = rdmolops.GetFormalCharge(mol) # trusting this charge calculation method values["num_carbons"], values["num_charges"], values["max_ring_size"] = get_atom_props(mol) try: values["hc_ratio"] = float(values["num_hetero_atoms"]) / float(values["num_carbons"]) except ZeroDivisionError: values["hc_ratio"] = 100000000 # if there are zero carbons values["fc"] = len(list(Brics.FindBRICSBonds(mol))) # how many BRICS bonds, related to complexity values["is_good"] = True # default to true, but not yet observed atoms = [atom.GetSymbol() for atom in mol.GetAtoms()] # get all the atoms, and make the list unique (only types) atoms = set(atoms) atoms = list(atoms) values["atoms"] = atoms values["num_chiral_centers"] = len(Chem.FindMolChiralCenters(mol, includeUnassigned=True)) values["rejections"] = [] # empty list to store the reasons for rejection return values
def get_descriptors(mol, write=False): # Make a copy of the molecule dataframe desc = [ Lipinski.NumAromaticHeterocycles(mol), Lipinski.NumAromaticRings(mol), Lipinski.NumHDonors(mol), Lipinski.RingCount(mol), Lipinski.NHOHCount(mol), Lipinski.NumHeteroatoms(mol), Lipinski.NumAliphaticCarbocycles(mol), Lipinski.NumSaturatedCarbocycles(mol), Lipinski.NumAliphaticHeterocycles(mol), Lipinski.NumHAcceptors(mol), Lipinski.NumSaturatedHeterocycles(mol), Lipinski.NumAliphaticRings(mol), Descriptors.NumRadicalElectrons(mol), Descriptors.MaxPartialCharge(mol), Descriptors.NumValenceElectrons(mol), Lipinski.FractionCSP3(mol), Descriptors.MaxAbsPartialCharge(mol), Lipinski.NumAromaticCarbocycles(mol), Lipinski.NumSaturatedRings(mol), Lipinski.NumRotatableBonds(mol) ] desc = [0 if i != i else i for i in desc] return desc
def veber_infraction(molecule: Chem.Mol) -> bool: """ Checks if a given molecule fails the veber infraction filters. """ rotatable_bond_saturation = Lipinski.NumRotatableBonds(molecule) > 10 hydrogen_bond_saturation = Lipinski.NumHAcceptors(molecule) + Lipinski.NumHDonors(molecule) > 10 return rotatable_bond_saturation or hydrogen_bond_saturation
def test1(self): " testing first 200 mols from NCI " # figure out which rotor version we are using m = Chem.MolFromSmiles("CC(C)(C)c1cc(O)c(cc1O)C(C)(C)C") if Lipinski.NumRotatableBonds(m) == 2: rot_prop = NonStrict else: rot_prop = Strict suppl = Chem.SDMolSupplier(self.inFileName) idx = 1 for m in suppl: if m: calc = Lipinski.NHOHCount(m) orig = int(m.GetProp('NUM_LIPINSKIHDONORS')) assert calc == orig, 'bad num h donors for mol %d (%s): %d != %d' % ( idx, m.GetProp('SMILES'), calc, orig) calc = Lipinski.NOCount(m) orig = int(m.GetProp('NUM_LIPINSKIHACCEPTORS')) assert calc == orig, 'bad num h acceptors for mol %d (%s): %d != %d' % ( idx, m.GetProp('SMILES'), calc, orig) calc = Lipinski.NumHDonors(m) orig = int(m.GetProp('NUM_HDONORS')) assert calc == orig, 'bad num h donors for mol %d (%s): %d != %d' % ( idx, m.GetProp('SMILES'), calc, orig) calc = Lipinski.NumHAcceptors(m) orig = int(m.GetProp('NUM_HACCEPTORS')) assert calc == orig, 'bad num h acceptors for mol %d (%s): %d != %d' % ( idx, m.GetProp('SMILES'), calc, orig) calc = Lipinski.NumHeteroatoms(m) orig = int(m.GetProp('NUM_HETEROATOMS')) assert calc == orig, 'bad num heteroatoms for mol %d (%s): %d != %d' % ( idx, m.GetProp('SMILES'), calc, orig) calc = Lipinski.NumRotatableBonds(m) orig = int(m.GetProp(rot_prop)) assert calc == orig, 'bad num rotors for mol %d (%s): %d != %d' % ( idx, m.GetProp('SMILES'), calc, orig) # test the underlying numrotatable bonds calc = rdMolDescriptors.CalcNumRotatableBonds( m, rdMolDescriptors.NumRotatableBondsOptions.NonStrict) orig = int(m.GetProp(NonStrict)) assert calc == orig, 'bad num rotors for mol %d (%s): %d != %d' % ( idx, m.GetProp('SMILES'), calc, orig) calc = rdMolDescriptors.CalcNumRotatableBonds( m, rdMolDescriptors.NumRotatableBondsOptions.Strict) orig = int(m.GetProp(Strict)) assert calc == orig, 'bad num rotors for mol %d (%s): %d != %d' % ( idx, m.GetProp('SMILES'), calc, orig) idx += 1
def CalculateHacceptorNumber(mol): """ Calculation of Hydrogen bond acceptor count in a molecule Parameters: mol: rdkit molecule Returns: Hacceptor Number """ return LPK.NumHAcceptors(mol)
def lipinski(smile): # Convert into Chem object mol = Chem.MolFromSmiles(smile) MolWt = Descriptors.MolWt(mol) MolLogP = Descriptors.MolLogP(mol) NumHDonors = Lipinski.NumHDonors(mol) NumHAcceptors = Lipinski.NumHAcceptors(mol) return NumHDonors, NumHAcceptors, MolWt, MolLogP
def mole_proper(mol): num_hdonors = Lipinski.NumHDonors(mol) num_hacceptors = Lipinski.NumHAcceptors(mol) num_rotatable = Lipinski.NumRotatableBonds(mol) mol_weight = Descriptors.MolWt(mol) mol_logp = Crippen.MolLogP(mol) mol_TPSA = Descriptors.TPSA(mol) proper = (num_hdonors, num_hacceptors, num_rotatable, mol_weight, mol_logp, mol_TPSA) return proper
def get_phys_fp(compound): c = [] c.append(compound['mol_weight'] / 500) logp = get_logp(compound['dsstox_sid']) logp = logp / 10 if logp else logp c.append(logp) m = chm.MolFromSmiles(compound['smiles']) c.append(lip.NumHDonors(m) / 5) c.append(lip.NumHAcceptors(m) / 10) return c
def CalculateNumHAcceptors(mol): """ Caculation of the number of Hydrogen Bond Acceptors --->nHA :param mol: molecular :type mol: rdkit.Chem.rdchem.Mol :return: the number of Hydrogen Bond Acceptors :rtype: int """ nHA = Lipinski.NumHAcceptors(mol) return nHA
def descriptors(self, mol): aromatic_frac = self.arofrac(mol) mw = Descriptors.ExactMolWt(mol, False) valence_e = Descriptors.NumValenceElectrons(mol) h_acceptors = Lipinski.NumHAcceptors(mol) h_donors = Lipinski.NumHDonors(mol) NO_counts = Lipinski.NOCount(mol) NHOH_count = Lipinski.NHOHCount(mol) rotors = Lipinski.NumRotatableBonds(mol) SP3_frac = Lipinski.FractionCSP3(mol) logP = Crippen.MolLogP(mol) SP_bonds = len(mol.GetSubstructMatches(Chem.MolFromSmarts('[^1]'))) return([aromatic_frac,mw,valence_e,h_acceptors,h_donors,NO_counts,NHOH_count, rotors,SP3_frac,logP,SP_bonds])
def filter(mol, type = "frags"): HBD = Lipinski.NumHDonors(mol) HBA = Lipinski.NumHAcceptors(mol) rings = len(Chem.GetSymmSSSR(mol)) MW = Chem.Descriptors.MolWt(mol) if type == "frags": action = (HBD <=8) & (HBA <=8) & (rings >= 1) & (MW <=800) else: action = (HBD <= 5) & (HBA <= 5) & (MW <= 500) return action
def check_lipinski(mol): fgs = load_functional_groups() h_donors = Lipinski.NumHDonors(mol.rdmol) h_acceptors = Lipinski.NumHAcceptors(mol.rdmol) log_p = MolLogP(mol.rdmol) wt = MolWt(mol.rdmol) if h_donors <= 5 and h_acceptors <= 5 and log_p < 5: if wt >= 450: mol.join(fgs['terminal_fg'].get_random()) return True, False else: return True, False else: return False, False
def calcScore(self, m, smi): self.value = 0.0 mw = Descriptors.MolWt(m) if mw > 700 or mw < 100: return False num_hdonors = Lipinski.NumHDonors(m) num_hacceptors = Lipinski.NumHAcceptors(m) if num_hdonors > 5: return False if num_hacceptors > 10: return False return True
def lipinski_trial(smiles): ''' Returns which of Lipinski's rules a molecule has failed, or an empty list Lipinski's rules are: Hydrogen bond donors <= 5 Hydrogen bond acceptors <= 10 Molecular weight < 500 daltons logP < 5 ''' passed = [] failed = [] mol = Chem.MolFromSmiles(smiles) if mol is None: raise Exception('%s is not a valid SMILES string' % smiles) num_hdonors = Lipinski.NumHDonors(mol) num_hacceptors = Lipinski.NumHAcceptors(mol) mol_weight = Descriptors.MolWt(mol) mol_logp = Crippen.MolLogP(mol) failed = [] if num_hdonors > 5: failed.append('Over 5 H-bond donors, found %s' % num_hdonors) else: passed.append('Found %s H-bond donors' % num_hdonors) if num_hacceptors > 10: failed.append('Over 10 H-bond acceptors, found %s' \ % num_hacceptors) else: passed.append('Found %s H-bond acceptors' % num_hacceptors) if mol_weight >= 500: failed.append('Molecular weight over 500, calculated %s'\ % mol_weight) else: passed.append('Molecular weight: %s' % mol_weight) if mol_logp >= 5: failed.append('Log partition coefficient over 5, calculated %s' \ % mol_logp) else: passed.append('Log partition coefficient: %s' % mol_logp) return passed, failed
def run_filter(self, mol): """ This runs a Strict Lipinski filter. Lipinski filter refines for orally available drugs. It filters molecules by Molecular weight (MW), the number of hydrogen donors, the number hydrogen acceptors, and the logP value. This is a strict Lipinski which means a ligand must pass all the requirements. To pass the Lipinski filter a molecule must be: MW: Max 500 dalton Number of H acceptors: Max 10 Number of H donors: Max 5 logP Max +5.0 If you use the Lipinski Filter please cite: C.A. Lipinski et al. Experimental and computational approaches to estimate solubility and permeability in drug discovery and development settings Advanced Drug Delivery Reviews, 46 (2001), pp. 3-26 Inputs: :param rdkit.Chem.rdchem.Mol object mol: An rdkit mol object to be tested if it passes the filters Returns: :returns: bool bool: True if the mol passes the filter; False if it fails the filter """ exact_mwt = Descriptors.ExactMolWt(mol) if exact_mwt > 500: return False num_hydrogen_bond_donors = Lipinski.NumHDonors(mol) if num_hydrogen_bond_donors > 5: return False num_hydrogen_bond_acceptors = Lipinski.NumHAcceptors(mol) if num_hydrogen_bond_acceptors > 10: return False mol_log_p = Crippen.MolLogP(mol) if mol_log_p > 5: return False # Passed all filters return True
def ProcessMol(session, mol, globalProps, nDone, nameProp='_Name', nameCol='compound_id', redraw=False, keepHs=False, skipProps=False, addComputedProps=False, skipSmiles=False): if not mol: raise ValueError('no molecule') if keepHs: Chem.SanitizeMol(mol) try: nm = mol.GetProp(nameProp) except KeyError: nm = None if not nm: nm = 'Mol_%d' % nDone cmpd = Compound() session.add(cmpd) if redraw: AllChem.Compute2DCoords(m) if not skipSmiles: cmpd.smiles = Chem.MolToSmiles(mol, True) cmpd.molpkl = mol.ToBinary() setattr(cmpd, nameCol, nm) if not skipProps: if addComputedProps: cmpd.DonorCount = Lipinski.NumHDonors(mol) cmpd.AcceptorCount = Lipinski.NumHAcceptors(mol) cmpd.RotatableBondCount = Lipinski.NumRotatableBonds(mol) cmpd.AMW = Descriptors.MolWt(mol) cmpd.MolLogP = Crippen.MolLogP(mol) pns = list(mol.GetPropNames()) for pi, pn in enumerate(pns): if pn.lower() == nameCol.lower(): continue pv = mol.GetProp(pn).strip() if pn in globalProps: setattr(cmpd, pn.lower(), pv) return cmpd
def in_Ro5(mol): """ Test whether a molecule is in Lipinski "Rule of 5" space, meaning - 5 or fewer H bond donors - 10 or fewer H bond acceptors - MW < 500 Da - logP < 5 """ h_donor = Lipinski.NumHDonors(mol) h_accept = Lipinski.NumHAcceptors(mol) mw = Descriptors.MolWt(mol) logP = Descriptors.MolLogP(mol) Ro5 = h_donor <= 5 and h_accept <= 10 and mw <= 500 and logP < 5 return(Ro5)
def run_filter(self, mol): """ This runs the Lenient Lipinski filter. Lipinski filter refines for orally available drugs. It filters molecules by Molecular weight (MW), the number of hydrogen donors, the number hydrogen acceptors, and the logP value. This is a Lenient Lipinski which means a ligand is allowed one violation exception to the Lipinski Rule of 5 restraints. To pass the Lipinski filter a molecule must be: MW: Max 500 dalton Number of H acceptors: Max 10 Number of H donors: Max 5 logP Max +5.0 Inputs: :param rdkit.Chem.rdchem.Mol object mol: An rdkit mol object to be tested if it passes the filters Returns: :returns: bool bool: True if the mol passes the filter; False if it fails the filter """ violation_counter = 0 exact_mwt = Descriptors.ExactMolWt(mol) if exact_mwt > 500: violation_counter = violation_counter + 1 num_hydrogen_bond_donors = Lipinski.NumHDonors(mol) if num_hydrogen_bond_donors > 5: violation_counter = violation_counter + 1 num_hydrogen_bond_acceptors = Lipinski.NumHAcceptors(mol) if num_hydrogen_bond_acceptors > 10: violation_counter = violation_counter + 1 mol_log_p = Crippen.MolLogP(mol) if mol_log_p > 5: violation_counter = violation_counter + 1 if violation_counter < 2: return True # Failed more than two filters return False
def CalculateHacceptorNumber(mol): """ ################################################################# Calculation of Hydrogen bond acceptor counts in a molecule ---->naccr Usage: result=CalculateHacceptorNumber(mol) Input: mol is a molecule object. Output: result is a numeric value. ################################################################# """ return LPK.NumHAcceptors(mol)
def calculate_properties_from_mol(self): """ Function to calculate some molecular properties based on RDKit functionalities Return: Static physico-chemical properties: molecular weight, crippen logP, number of hydrogen bond acceptors and donors """ # Generate molecule from sequence mol = Chem.MolFromSmiles(self.smiles) mol.SetProp("_Name", self.sequence) # Calculate the descriptors self.num_hdonors = Lipinski.NumHDonors(mol) self.num_hacceptors = Lipinski.NumHAcceptors(mol) self.mol_weight = Descriptors.MolWt(mol) self.mol_logp = Crippen.MolLogP(mol)
def PhyChem(smiles): """ Calculating the 19D physicochemical descriptors for each molecules, the value has been normalized with Gaussian distribution. Arguments: smiles (list): list of SMILES strings. Returns: props (ndarray): m X 19 matrix as normalized PhysChem descriptors. m is the No. of samples """ props = [] for smile in smiles: mol = Chem.MolFromSmiles(smile) try: MW = desc.MolWt(mol) LOGP = Crippen.MolLogP(mol) HBA = Lipinski.NumHAcceptors(mol) HBD = Lipinski.NumHDonors(mol) rotable = Lipinski.NumRotatableBonds(mol) amide = AllChem.CalcNumAmideBonds(mol) bridge = AllChem.CalcNumBridgeheadAtoms(mol) heteroA = Lipinski.NumHeteroatoms(mol) heavy = Lipinski.HeavyAtomCount(mol) spiro = AllChem.CalcNumSpiroAtoms(mol) FCSP3 = AllChem.CalcFractionCSP3(mol) ring = Lipinski.RingCount(mol) Aliphatic = AllChem.CalcNumAliphaticRings(mol) aromatic = AllChem.CalcNumAromaticRings(mol) saturated = AllChem.CalcNumSaturatedRings(mol) heteroR = AllChem.CalcNumHeterocycles(mol) TPSA = MolSurf.TPSA(mol) valence = desc.NumValenceElectrons(mol) mr = Crippen.MolMR(mol) # charge = AllChem.ComputeGasteigerCharges(mol) prop = [ MW, LOGP, HBA, HBD, rotable, amide, bridge, heteroA, heavy, spiro, FCSP3, ring, Aliphatic, aromatic, saturated, heteroR, TPSA, valence, mr ] except Exception: print(smile) prop = [0] * 19 props.append(prop) props = np.array(props) props = Scaler().fit_transform(props) return props
def CalculateNumHAcceptors(mol): """ ################################################################# Caculation of the number of Hydrogen Bond Acceptors ---->nHA Usage: result = CalculateNumHAcceptors(mol) Input: mol is a molecular object Output: result is a numeric values ################################################################# """ nHA = Lipinski.NumHAcceptors(mol) return nHA
def properties(fnames, labels, is_active=False): """ Five structural properties calculation for each molecule in each given file. These properties contains No. of Hydrogen Bond Acceptor/Donor, Rotatable Bond, Aliphatic Ring, Aromatic Ring and Heterocycle. Arguments: fnames (list): the file path of molecules. labels (list): the label for each file in the fnames. is_active (bool, optional): selecting only active ligands (True) or all of the molecules (False) if it is true, the molecule with PCHEMBL_VALUE >= 6.5 or SCORE > 0.5 will be selected. (Default: False) Returns: df (DataFrame): the table contains three columns; 'Set' is the label of fname the molecule belongs to, 'Property' is the name of one of five properties, 'Number' is the property value. """ props = [] for i, fname in enumerate(fnames): df = pd.read_table(fname) if 'SCORE' in df.columns: df = df[df.SCORE > (0.5 if is_active else 0)] elif 'PCHEMBL_VALUE' in df.columns: df = df[df.PCHEMBL_VALUE >= (6.5 if is_active else 0)] df = df.drop_duplicates(subset='CANONICAL_SMILES') if len(df) > int(1e5): df = df.sample(int(1e5)) for smile in tqdm(df.CANONICAL_SMILES): mol = Chem.MolFromSmiles(smile) HA = Lipinski.NumHAcceptors(mol) props.append([labels[i], 'Hydrogen Bond\nAcceptor', HA]) HD = Lipinski.NumHDonors(mol) props.append([labels[i], 'Hydrogen\nBond Donor', HD]) RB = Lipinski.NumRotatableBonds(mol) props.append([labels[i], 'Rotatable\nBond', RB]) RI = AllChem.CalcNumAliphaticRings(mol) props.append([labels[i], 'Aliphatic\nRing', RI]) AR = Lipinski.NumAromaticRings(mol) props.append([labels[i], 'Aromatic\nRing', AR]) HC = AllChem.CalcNumHeterocycles(mol) props.append([labels[i], 'Heterocycle', HC]) df = pd.DataFrame(props, columns=['Set', 'Property', 'Number']) return df