def get_filter_values(mol): """ calculate the values, for a given molecule, that are used to filter return as a dictionary """ assert isinstance(mol, Chem.Mol) values = {} values["MW"] = desc.CalcExactMolWt(mol) values["logP"] = crip.MolLogP(mol) values["HBA"] = lip.NumHAcceptors(mol) values["HBD"] = lip.NumHDonors(mol) values["tPSA"] = desc.CalcTPSA(mol) values["rot_bonds"] = lip.NumRotatableBonds(mol) values["rigid_bonds"] = mol.GetNumBonds() - values["rot_bonds"] # assume mutual exclusion values["num_rings"] = lip.RingCount(mol) values["num_hetero_atoms"] = lip.NumHeteroatoms(mol) values["charge"] = rdmolops.GetFormalCharge(mol) # trusting this charge calculation method values["num_carbons"], values["num_charges"], values["max_ring_size"] = get_atom_props(mol) try: values["hc_ratio"] = float(values["num_hetero_atoms"]) / float(values["num_carbons"]) except ZeroDivisionError: values["hc_ratio"] = 100000000 # if there are zero carbons values["fc"] = len(list(Brics.FindBRICSBonds(mol))) # how many BRICS bonds, related to complexity values["is_good"] = True # default to true, but not yet observed atoms = [atom.GetSymbol() for atom in mol.GetAtoms()] # get all the atoms, and make the list unique (only types) atoms = set(atoms) atoms = list(atoms) values["atoms"] = atoms values["num_chiral_centers"] = len(Chem.FindMolChiralCenters(mol, includeUnassigned=True)) values["rejections"] = [] # empty list to store the reasons for rejection return values
def get_descriptors(mol, write=False): # Make a copy of the molecule dataframe desc = [ Lipinski.NumAromaticHeterocycles(mol), Lipinski.NumAromaticRings(mol), Lipinski.NumHDonors(mol), Lipinski.RingCount(mol), Lipinski.NHOHCount(mol), Lipinski.NumHeteroatoms(mol), Lipinski.NumAliphaticCarbocycles(mol), Lipinski.NumSaturatedCarbocycles(mol), Lipinski.NumAliphaticHeterocycles(mol), Lipinski.NumHAcceptors(mol), Lipinski.NumSaturatedHeterocycles(mol), Lipinski.NumAliphaticRings(mol), Descriptors.NumRadicalElectrons(mol), Descriptors.MaxPartialCharge(mol), Descriptors.NumValenceElectrons(mol), Lipinski.FractionCSP3(mol), Descriptors.MaxAbsPartialCharge(mol), Lipinski.NumAromaticCarbocycles(mol), Lipinski.NumSaturatedRings(mol), Lipinski.NumRotatableBonds(mol) ] desc = [0 if i != i else i for i in desc] return desc
def PhyChem(smiles): """ Calculating the 19D physicochemical descriptors for each molecules, the value has been normalized with Gaussian distribution. Arguments: smiles (list): list of SMILES strings. Returns: props (ndarray): m X 19 matrix as normalized PhysChem descriptors. m is the No. of samples """ props = [] for smile in smiles: mol = Chem.MolFromSmiles(smile) try: MW = desc.MolWt(mol) LOGP = Crippen.MolLogP(mol) HBA = Lipinski.NumHAcceptors(mol) HBD = Lipinski.NumHDonors(mol) rotable = Lipinski.NumRotatableBonds(mol) amide = AllChem.CalcNumAmideBonds(mol) bridge = AllChem.CalcNumBridgeheadAtoms(mol) heteroA = Lipinski.NumHeteroatoms(mol) heavy = Lipinski.HeavyAtomCount(mol) spiro = AllChem.CalcNumSpiroAtoms(mol) FCSP3 = AllChem.CalcFractionCSP3(mol) ring = Lipinski.RingCount(mol) Aliphatic = AllChem.CalcNumAliphaticRings(mol) aromatic = AllChem.CalcNumAromaticRings(mol) saturated = AllChem.CalcNumSaturatedRings(mol) heteroR = AllChem.CalcNumHeterocycles(mol) TPSA = MolSurf.TPSA(mol) valence = desc.NumValenceElectrons(mol) mr = Crippen.MolMR(mol) # charge = AllChem.ComputeGasteigerCharges(mol) prop = [ MW, LOGP, HBA, HBD, rotable, amide, bridge, heteroA, heavy, spiro, FCSP3, ring, Aliphatic, aromatic, saturated, heteroR, TPSA, valence, mr ] except Exception: print(smile) prop = [0] * 19 props.append(prop) props = np.array(props) props = Scaler().fit_transform(props) return props
def __init__(self, *args, **kwargs): if len(args) > 2: super(Compound, self).__init__(*args, **kwargs) return mol_as_RDmol = args[0] if len(args) > 0 else None if not mol_as_RDmol: mol_as_RDmol = kwargs['mol_as_RDmol'] if 'mol_as_RDmol' in kwargs else None if not mol_as_RDmol: raise RuntimeError("No RDMol specified") description = args[1] if len(args) > 1 else None if not description: description = kwargs['description'] if 'description' in kwargs else '' new_kwargs = dict() new_kwargs['unique_id'] = self._generate_id() new_kwargs['smiles'] = Chem.MolToSmiles(mol_as_RDmol, isomericSmiles=True, canonical=True) new_kwargs['inchi'] = Chem.MolToInchi(mol_as_RDmol) new_kwargs['inchi_key'] = Chem.InchiToInchiKey(new_kwargs['inchi']) new_kwargs['mol_weight_exact'] = Descriptors.ExactMolWt(mol_as_RDmol) new_kwargs['heavy_atoms_count'] = Lipinski.HeavyAtomCount(mol_as_RDmol) new_kwargs['ring_count'] = Lipinski.RingCount(mol_as_RDmol) new_kwargs['mol'] = mol_as_RDmol super(Compound, self).__init__(description=description, **new_kwargs)
## logP org_logp = [Descriptors.MolLogP(mol) for mol in tqdm(org_mols)] ## Bertz TC org_tcs = [BertzCT(mol) for mol in tqdm(org_mols)] ## TPSA org_tpsa = [Descriptors.TPSA(mol) for mol in org_mols] ## QED org_qed = [] for mol in org_mols: try: org_qed.append(Descriptors.qed(mol)) except OverflowError: pass ## number of rings org_rings1 = [Lipinski.RingCount(mol) for mol in tqdm(org_mols)] org_rings2 = [Lipinski.NumAliphaticRings(mol) for mol in tqdm(org_mols)] org_rings3 = [Lipinski.NumAromaticRings(mol) for mol in tqdm(org_mols)] ## SA score org_SA = [] for mol in tqdm(org_mols): try: org_SA.append(sascorer.calculateScore(mol)) except (OverflowError, ZeroDivisionError): pass ## NP-likeness fscore = npscorer.readNPModel() org_NP = [npscorer.scoreMol(mol, fscore) for mol in tqdm(org_mols)] ## % sp3 carbons org_sp3 = [Lipinski.FractionCSP3(mol) for mol in org_mols]
def calc_rdkit(mol): descriptors = pd.Series( np.array([ Crippen.MolLogP(mol), Crippen.MolMR(mol), Descriptors.FpDensityMorgan1(mol), Descriptors.FpDensityMorgan2(mol), Descriptors.FpDensityMorgan3(mol), Descriptors.FractionCSP3(mol), Descriptors.HeavyAtomMolWt(mol), Descriptors.MaxAbsPartialCharge(mol), Descriptors.MaxPartialCharge(mol), Descriptors.MinAbsPartialCharge(mol), Descriptors.MinPartialCharge(mol), Descriptors.MolWt(mol), Descriptors.NumRadicalElectrons(mol), Descriptors.NumValenceElectrons(mol), EState.EState.MaxAbsEStateIndex(mol), EState.EState.MaxEStateIndex(mol), EState.EState.MinAbsEStateIndex(mol), EState.EState.MinEStateIndex(mol), EState.EState_VSA.EState_VSA1(mol), EState.EState_VSA.EState_VSA10(mol), EState.EState_VSA.EState_VSA11(mol), EState.EState_VSA.EState_VSA2(mol), EState.EState_VSA.EState_VSA3(mol), EState.EState_VSA.EState_VSA4(mol), EState.EState_VSA.EState_VSA5(mol), EState.EState_VSA.EState_VSA6(mol), EState.EState_VSA.EState_VSA7(mol), EState.EState_VSA.EState_VSA8(mol), EState.EState_VSA.EState_VSA9(mol), Fragments.fr_Al_COO(mol), Fragments.fr_Al_OH(mol), Fragments.fr_Al_OH_noTert(mol), Fragments.fr_aldehyde(mol), Fragments.fr_alkyl_carbamate(mol), Fragments.fr_alkyl_halide(mol), Fragments.fr_allylic_oxid(mol), Fragments.fr_amide(mol), Fragments.fr_amidine(mol), Fragments.fr_aniline(mol), Fragments.fr_Ar_COO(mol), Fragments.fr_Ar_N(mol), Fragments.fr_Ar_NH(mol), Fragments.fr_Ar_OH(mol), Fragments.fr_ArN(mol), Fragments.fr_aryl_methyl(mol), Fragments.fr_azide(mol), Fragments.fr_azo(mol), Fragments.fr_barbitur(mol), Fragments.fr_benzene(mol), Fragments.fr_benzodiazepine(mol), Fragments.fr_bicyclic(mol), Fragments.fr_C_O(mol), Fragments.fr_C_O_noCOO(mol), Fragments.fr_C_S(mol), Fragments.fr_COO(mol), Fragments.fr_COO2(mol), Fragments.fr_diazo(mol), Fragments.fr_dihydropyridine(mol), Fragments.fr_epoxide(mol), Fragments.fr_ester(mol), Fragments.fr_ether(mol), Fragments.fr_furan(mol), Fragments.fr_guanido(mol), Fragments.fr_halogen(mol), Fragments.fr_hdrzine(mol), Fragments.fr_hdrzone(mol), Fragments.fr_HOCCN(mol), Fragments.fr_imidazole(mol), Fragments.fr_imide(mol), Fragments.fr_Imine(mol), Fragments.fr_isocyan(mol), Fragments.fr_isothiocyan(mol), Fragments.fr_ketone(mol), Fragments.fr_ketone_Topliss(mol), Fragments.fr_lactam(mol), Fragments.fr_lactone(mol), Fragments.fr_methoxy(mol), Fragments.fr_morpholine(mol), Fragments.fr_N_O(mol), Fragments.fr_Ndealkylation1(mol), Fragments.fr_Ndealkylation2(mol), Fragments.fr_NH0(mol), Fragments.fr_NH1(mol), Fragments.fr_NH2(mol), Fragments.fr_Nhpyrrole(mol), Fragments.fr_nitrile(mol), Fragments.fr_nitro(mol), Fragments.fr_nitro_arom(mol), Fragments.fr_nitro_arom_nonortho(mol), Fragments.fr_nitroso(mol), Fragments.fr_oxazole(mol), Fragments.fr_oxime(mol), Fragments.fr_para_hydroxylation(mol), Fragments.fr_phenol(mol), Fragments.fr_phenol_noOrthoHbond(mol), Fragments.fr_phos_acid(mol), Fragments.fr_phos_ester(mol), Fragments.fr_piperdine(mol), Fragments.fr_piperzine(mol), Fragments.fr_priamide(mol), Fragments.fr_prisulfonamd(mol), Fragments.fr_pyridine(mol), Fragments.fr_quatN(mol), Fragments.fr_SH(mol), Fragments.fr_sulfide(mol), Fragments.fr_sulfonamd(mol), Fragments.fr_sulfone(mol), Fragments.fr_term_acetylene(mol), Fragments.fr_tetrazole(mol), Fragments.fr_thiazole(mol), Fragments.fr_thiocyan(mol), Fragments.fr_thiophene(mol), Fragments.fr_unbrch_alkane(mol), Fragments.fr_urea(mol), GraphDescriptors.BalabanJ(mol), GraphDescriptors.BertzCT(mol), GraphDescriptors.Chi0(mol), GraphDescriptors.Chi0n(mol), GraphDescriptors.Chi0v(mol), GraphDescriptors.Chi1(mol), GraphDescriptors.Chi1n(mol), GraphDescriptors.Chi1v(mol), GraphDescriptors.Chi2n(mol), GraphDescriptors.Chi2v(mol), GraphDescriptors.Chi3n(mol), GraphDescriptors.Chi3v(mol), GraphDescriptors.Chi4n(mol), GraphDescriptors.Chi4v(mol), GraphDescriptors.HallKierAlpha(mol), GraphDescriptors.Ipc(mol), GraphDescriptors.Kappa1(mol), GraphDescriptors.Kappa2(mol), GraphDescriptors.Kappa3(mol), Lipinski.HeavyAtomCount(mol), Lipinski.NHOHCount(mol), Lipinski.NOCount(mol), Lipinski.NumAliphaticCarbocycles(mol), Lipinski.NumAliphaticHeterocycles(mol), Lipinski.NumAliphaticRings(mol), Lipinski.NumAromaticCarbocycles(mol), Lipinski.NumAromaticHeterocycles(mol), Lipinski.NumAromaticRings(mol), Lipinski.NumHAcceptors(mol), Lipinski.NumHDonors(mol), Lipinski.NumHeteroatoms(mol), Lipinski.NumRotatableBonds(mol), Lipinski.NumSaturatedCarbocycles(mol), Lipinski.NumSaturatedHeterocycles(mol), Lipinski.NumSaturatedRings(mol), Lipinski.RingCount(mol), MolSurf.LabuteASA(mol), MolSurf.PEOE_VSA1(mol), MolSurf.PEOE_VSA10(mol), MolSurf.PEOE_VSA11(mol), MolSurf.PEOE_VSA12(mol), MolSurf.PEOE_VSA13(mol), MolSurf.PEOE_VSA14(mol), MolSurf.PEOE_VSA2(mol), MolSurf.PEOE_VSA3(mol), MolSurf.PEOE_VSA4(mol), MolSurf.PEOE_VSA5(mol), MolSurf.PEOE_VSA6(mol), MolSurf.PEOE_VSA7(mol), MolSurf.PEOE_VSA8(mol), MolSurf.PEOE_VSA9(mol), MolSurf.SlogP_VSA1(mol), MolSurf.SlogP_VSA10(mol), MolSurf.SlogP_VSA11(mol), MolSurf.SlogP_VSA12(mol), MolSurf.SlogP_VSA2(mol), MolSurf.SlogP_VSA3(mol), MolSurf.SlogP_VSA4(mol), MolSurf.SlogP_VSA5(mol), MolSurf.SlogP_VSA6(mol), MolSurf.SlogP_VSA7(mol), MolSurf.SlogP_VSA8(mol), MolSurf.SlogP_VSA9(mol), MolSurf.SMR_VSA1(mol), MolSurf.SMR_VSA10(mol), MolSurf.SMR_VSA2(mol), MolSurf.SMR_VSA3(mol), MolSurf.SMR_VSA4(mol), MolSurf.SMR_VSA5(mol), MolSurf.SMR_VSA6(mol), MolSurf.SMR_VSA7(mol), MolSurf.SMR_VSA8(mol), MolSurf.SMR_VSA9(mol), MolSurf.TPSA(mol) ])) return descriptors
tpsa = [Descriptors.TPSA(mol) for mol in mols] ## QED qed = [] for mol in tqdm(mols): try: qed.append(Descriptors.qed(mol)) except OverflowError: pass ## % of sp3 carbons pct_sp3 = [Lipinski.FractionCSP3(mol) for mol in tqdm(mols)] ## % heteroatoms pct_hetero = [Lipinski.NumHeteroatoms(mol) / mol.GetNumAtoms() for mol in \ tqdm(mols)] ## number of rings rings = [Lipinski.RingCount(mol) for mol in tqdm(mols)] ## SA score SA = [] for mol in tqdm(mols): try: SA.append(sascorer.calculateScore(mol)) except (OverflowError, ZeroDivisionError): pass ## NP-likeness fscore = npscorer.readNPModel() NP = [npscorer.scoreMol(mol, fscore) for mol in tqdm(mols)] # add all outcomes to data frame res = res.append(pd.DataFrame({'outcome': 'Molecular weight', 'value': mws }))
def calculate_metrics(mol): # calculate chemical descriptors ## % of sp3 carbons pct_sp3 = Lipinski.FractionCSP3(mol) ## H bond donors/acceptors h_acceptor = Lipinski.NumHAcceptors(mol) h_donor = Lipinski.NumHDonors(mol) ## number of rotable bonds n_bonds = mol.GetNumBonds() if n_bonds > 0: rot_bonds = Lipinski.NumRotatableBonds(mol) / n_bonds else: rot_bonds = 0 ## number of rings, aromatic and aliphatic n_rings = Lipinski.RingCount(mol) n_rings_ali = Lipinski.NumAliphaticRings(mol) n_rings_aro = Lipinski.NumAromaticRings(mol) ## number of stereocentres Chem.AssignStereochemistry(mol) n_stereo = CalcNumAtomStereoCenters(mol) ## polarity tpsa = Chem.CalcTPSA(mol) ## hydrophobicity logP = Descriptors.MolLogP(mol) ## molecular weight mw = Descriptors.MolWt(mol) ## in Lipinski space? Ro5 = in_Ro5(mol) ## % heteroatoms n_atoms = len(mol.GetAtoms()) pct_hetero = Lipinski.NumHeteroatoms(mol) / n_atoms ## number of each atom symbols = [atom.GetSymbol() for atom in mol.GetAtoms()] atom_counts = Counter(symbols) ## Murcko scaffolds murcko = Chem.MolToSmiles(MurckoScaffold.GetScaffoldForMol(mol)) ## NP-likeness try: np_score = calculateNPScore(mol, np_mod) except ValueError: np_score = None ## synthetic accessibility try: sa_score = calculateSAScore(mol, sa_mod) except ValueError: sa_score = None ## topological complexity bertz_idx = BertzCT(mol) # create dict metrics = { '% sp3 carbons': pct_sp3, 'H bond acceptors': h_acceptor, 'H bond donors': h_donor, '% rotatable bonds': rot_bonds, 'Rings': n_rings, 'Rings, aliphatic': n_rings_ali, 'Rings, aromatic': n_rings_aro, 'Stereocentres': n_stereo, 'Topological polar surface area': tpsa, 'LogP': logP, 'Molecular weight': mw, 'Lipinski rule of 5': Ro5, '% heteroatoms': pct_hetero, 'Murcko scaffold': murcko, 'NP-likeness score': np_score, 'Synthetic accessibility score': sa_score, 'Bertz topological complexity': bertz_idx } # append atom counts for key in atom_counts.keys(): metrics['Atoms with symbol ' + key] = atom_counts[key] return (metrics)