def get_descriptors(mol, write=False): # Make a copy of the molecule dataframe desc = [ Lipinski.NumAromaticHeterocycles(mol), Lipinski.NumAromaticRings(mol), Lipinski.NumHDonors(mol), Lipinski.RingCount(mol), Lipinski.NHOHCount(mol), Lipinski.NumHeteroatoms(mol), Lipinski.NumAliphaticCarbocycles(mol), Lipinski.NumSaturatedCarbocycles(mol), Lipinski.NumAliphaticHeterocycles(mol), Lipinski.NumHAcceptors(mol), Lipinski.NumSaturatedHeterocycles(mol), Lipinski.NumAliphaticRings(mol), Descriptors.NumRadicalElectrons(mol), Descriptors.MaxPartialCharge(mol), Descriptors.NumValenceElectrons(mol), Lipinski.FractionCSP3(mol), Descriptors.MaxAbsPartialCharge(mol), Lipinski.NumAromaticCarbocycles(mol), Lipinski.NumSaturatedRings(mol), Lipinski.NumRotatableBonds(mol) ] desc = [0 if i != i else i for i in desc] return desc
def get_descriptors(df): PandasTools.ChangeMoleculeRendering(renderer='String') Lmol = df['ROMol'] Ldescriptors = [] for m in Lmol: # Calculer les propriétés chimiques MW = round(Descriptors.ExactMolWt(m), 1) LogP = round(Descriptors.MolLogP(m), 1) TPSA = round(Descriptors.TPSA(m), 1) LabuteASA = round(Descriptors.LabuteASA(m), 1) HBA = Descriptors.NumHAcceptors(m) HBD = Descriptors.NumHDonors(m) FCSP3 = Lipinski.FractionCSP3(m) MQN8 = rdMolDescriptors.MQNs_(m)[7] MQN10 = rdMolDescriptors.MQNs_(m)[9] NAR = Lipinski.NumAromaticRings(m) NRB = Chem.Descriptors.NumRotatableBonds(m) Ldescriptors.append([ MW, LogP, TPSA, LabuteASA, HBA, HBD, FCSP3, MQN8, MQN10, NAR, NRB ]) # Create pandas row for conditions results with values and information whether rule of five is violated prop_df = pd.DataFrame(Ldescriptors) prop_df.columns = [ 'MW', 'LogP', 'TPSA', 'LabuteASA', 'HBA', 'HBD', 'FCSP3', 'MQN8', 'MQN10', 'NAR', 'NRB' ] prop_df = prop_df.set_index(df.index) return prop_df
def CalculateFsp3(mol): """ Fsp3 (carbon bond saturation) is defined as the number of sp3 hybridized carbons / total carbon count. --->FSP3 :param mol: molecular :type mol: rdkit.Chem.rdchem.Mol :return: the carbon bond saturation :rtype: float """ return round(Lipinski.FractionCSP3(mol), 2)
def descriptors(self, mol): aromatic_frac = self.arofrac(mol) mw = Descriptors.ExactMolWt(mol, False) valence_e = Descriptors.NumValenceElectrons(mol) h_acceptors = Lipinski.NumHAcceptors(mol) h_donors = Lipinski.NumHDonors(mol) NO_counts = Lipinski.NOCount(mol) NHOH_count = Lipinski.NHOHCount(mol) rotors = Lipinski.NumRotatableBonds(mol) SP3_frac = Lipinski.FractionCSP3(mol) logP = Crippen.MolLogP(mol) SP_bonds = len(mol.GetSubstructMatches(Chem.MolFromSmarts('[^1]'))) return([aromatic_frac,mw,valence_e,h_acceptors,h_donors,NO_counts,NHOH_count, rotors,SP3_frac,logP,SP_bonds])
def CalculateFsp3(mol): """ ################################################################# Fsp3 (carbon bond saturation) is defined as the number of sp3 hybridized carbons / total carbon count. ---->FSP3 Usage: result = CalculateFsp3(mol) Input: mol is a molecule object. Output: result is a numeric value. ################################################################# """ return round(Lipinski.FractionCSP3(mol), 2)
org_rings1 = [Lipinski.RingCount(mol) for mol in tqdm(org_mols)] org_rings2 = [Lipinski.NumAliphaticRings(mol) for mol in tqdm(org_mols)] org_rings3 = [Lipinski.NumAromaticRings(mol) for mol in tqdm(org_mols)] ## SA score org_SA = [] for mol in tqdm(org_mols): try: org_SA.append(sascorer.calculateScore(mol)) except (OverflowError, ZeroDivisionError): pass ## NP-likeness fscore = npscorer.readNPModel() org_NP = [npscorer.scoreMol(mol, fscore) for mol in tqdm(org_mols)] ## % sp3 carbons org_sp3 = [Lipinski.FractionCSP3(mol) for mol in org_mols] ## % rotatable bonds org_rot = [pct_rotatable_bonds(mol) for mol in org_mols] ## % of stereocentres org_stereo = [pct_stereocentres(mol) for mol in org_mols] # Murcko scaffolds org_murcko = [] for mol in org_mols: try: org_murcko.append(MurckoScaffoldSmiles(mol=mol)) except ValueError: pass # org_murcko = [MurckoScaffoldSmiles(mol=mol) for mol in org_mols] org_murcko_counts = np.unique(org_murcko, return_counts=True) ## hydrogen donors/acceptors org_donors = [Lipinski.NumHDonors(mol) for mol in org_mols]
## logP logp = [Descriptors.MolLogP(mol) for mol in tqdm(mols)] ## Bertz TC tcs = [BertzCT(mol) for mol in tqdm(mols)] ## TPSA tpsa = [Descriptors.TPSA(mol) for mol in mols] ## QED qed = [] for mol in tqdm(mols): try: qed.append(Descriptors.qed(mol)) except OverflowError: pass ## % of sp3 carbons pct_sp3 = [Lipinski.FractionCSP3(mol) for mol in tqdm(mols)] ## % heteroatoms pct_hetero = [Lipinski.NumHeteroatoms(mol) / mol.GetNumAtoms() for mol in \ tqdm(mols)] ## number of rings rings = [Lipinski.RingCount(mol) for mol in tqdm(mols)] ## SA score SA = [] for mol in tqdm(mols): try: SA.append(sascorer.calculateScore(mol)) except (OverflowError, ZeroDivisionError): pass ## NP-likeness fscore = npscorer.readNPModel()
def calculate_metrics(mol): # calculate chemical descriptors ## % of sp3 carbons pct_sp3 = Lipinski.FractionCSP3(mol) ## H bond donors/acceptors h_acceptor = Lipinski.NumHAcceptors(mol) h_donor = Lipinski.NumHDonors(mol) ## number of rotable bonds n_bonds = mol.GetNumBonds() if n_bonds > 0: rot_bonds = Lipinski.NumRotatableBonds(mol) / n_bonds else: rot_bonds = 0 ## number of rings, aromatic and aliphatic n_rings = Lipinski.RingCount(mol) n_rings_ali = Lipinski.NumAliphaticRings(mol) n_rings_aro = Lipinski.NumAromaticRings(mol) ## number of stereocentres Chem.AssignStereochemistry(mol) n_stereo = CalcNumAtomStereoCenters(mol) ## polarity tpsa = Chem.CalcTPSA(mol) ## hydrophobicity logP = Descriptors.MolLogP(mol) ## molecular weight mw = Descriptors.MolWt(mol) ## in Lipinski space? Ro5 = in_Ro5(mol) ## % heteroatoms n_atoms = len(mol.GetAtoms()) pct_hetero = Lipinski.NumHeteroatoms(mol) / n_atoms ## number of each atom symbols = [atom.GetSymbol() for atom in mol.GetAtoms()] atom_counts = Counter(symbols) ## Murcko scaffolds murcko = Chem.MolToSmiles(MurckoScaffold.GetScaffoldForMol(mol)) ## NP-likeness try: np_score = calculateNPScore(mol, np_mod) except ValueError: np_score = None ## synthetic accessibility try: sa_score = calculateSAScore(mol, sa_mod) except ValueError: sa_score = None ## topological complexity bertz_idx = BertzCT(mol) # create dict metrics = { '% sp3 carbons': pct_sp3, 'H bond acceptors': h_acceptor, 'H bond donors': h_donor, '% rotatable bonds': rot_bonds, 'Rings': n_rings, 'Rings, aliphatic': n_rings_ali, 'Rings, aromatic': n_rings_aro, 'Stereocentres': n_stereo, 'Topological polar surface area': tpsa, 'LogP': logP, 'Molecular weight': mw, 'Lipinski rule of 5': Ro5, '% heteroatoms': pct_hetero, 'Murcko scaffold': murcko, 'NP-likeness score': np_score, 'Synthetic accessibility score': sa_score, 'Bertz topological complexity': bertz_idx } # append atom counts for key in atom_counts.keys(): metrics['Atoms with symbol ' + key] = atom_counts[key] return (metrics)