def get_filter_values(mol): """ calculate the values, for a given molecule, that are used to filter return as a dictionary """ assert isinstance(mol, Chem.Mol) values = {} values["MW"] = desc.CalcExactMolWt(mol) values["logP"] = crip.MolLogP(mol) values["HBA"] = lip.NumHAcceptors(mol) values["HBD"] = lip.NumHDonors(mol) values["tPSA"] = desc.CalcTPSA(mol) values["rot_bonds"] = lip.NumRotatableBonds(mol) values["rigid_bonds"] = mol.GetNumBonds() - values["rot_bonds"] # assume mutual exclusion values["num_rings"] = lip.RingCount(mol) values["num_hetero_atoms"] = lip.NumHeteroatoms(mol) values["charge"] = rdmolops.GetFormalCharge(mol) # trusting this charge calculation method values["num_carbons"], values["num_charges"], values["max_ring_size"] = get_atom_props(mol) try: values["hc_ratio"] = float(values["num_hetero_atoms"]) / float(values["num_carbons"]) except ZeroDivisionError: values["hc_ratio"] = 100000000 # if there are zero carbons values["fc"] = len(list(Brics.FindBRICSBonds(mol))) # how many BRICS bonds, related to complexity values["is_good"] = True # default to true, but not yet observed atoms = [atom.GetSymbol() for atom in mol.GetAtoms()] # get all the atoms, and make the list unique (only types) atoms = set(atoms) atoms = list(atoms) values["atoms"] = atoms values["num_chiral_centers"] = len(Chem.FindMolChiralCenters(mol, includeUnassigned=True)) values["rejections"] = [] # empty list to store the reasons for rejection return values
def filter2(filter1_dict, input_nr_of_hetero_atoms): """ takes all text from dictionary with the substructures and returns a dictionary with substructures which are filtered based on the presence of heteroatoms. filter1_dict: dict with substructures and structure identifier input_nr_of_hetero_atoms: integer, number of hetero atoms input paramater value """ filter2_dict = {} for key, values in filter1_dict.items(): structure_id = key for smile in values: if smile != '<NA>': sub_mol = Chem.MolFromSmiles(smile) nr_of_hetero_atoms = Lipinski.NumHeteroatoms(sub_mol) if nr_of_hetero_atoms >= input_nr_of_hetero_atoms: if structure_id in filter2_dict: filter2_dict[structure_id].append(smile) if structure_id not in filter2_dict: filter2_dict[structure_id] = [smile] if structure_id not in filter2_dict: filter2_dict[structure_id] = ['<NA>'] print('~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~') print('nr of substructures after filter 2 [hetero atoms]') nr_of_subs = 0 for key, value in filter2_dict.items(): for val in value: if val != '<NA>': nr_of_subs += 1 print(nr_of_subs) return filter2_dict
def get_descriptors(mol, write=False): # Make a copy of the molecule dataframe desc = [ Lipinski.NumAromaticHeterocycles(mol), Lipinski.NumAromaticRings(mol), Lipinski.NumHDonors(mol), Lipinski.RingCount(mol), Lipinski.NHOHCount(mol), Lipinski.NumHeteroatoms(mol), Lipinski.NumAliphaticCarbocycles(mol), Lipinski.NumSaturatedCarbocycles(mol), Lipinski.NumAliphaticHeterocycles(mol), Lipinski.NumHAcceptors(mol), Lipinski.NumSaturatedHeterocycles(mol), Lipinski.NumAliphaticRings(mol), Descriptors.NumRadicalElectrons(mol), Descriptors.MaxPartialCharge(mol), Descriptors.NumValenceElectrons(mol), Lipinski.FractionCSP3(mol), Descriptors.MaxAbsPartialCharge(mol), Lipinski.NumAromaticCarbocycles(mol), Lipinski.NumSaturatedRings(mol), Lipinski.NumRotatableBonds(mol) ] desc = [0 if i != i else i for i in desc] return desc
def test1(self): " testing first 200 mols from NCI " # figure out which rotor version we are using m = Chem.MolFromSmiles("CC(C)(C)c1cc(O)c(cc1O)C(C)(C)C") if Lipinski.NumRotatableBonds(m) == 2: rot_prop = NonStrict else: rot_prop = Strict suppl = Chem.SDMolSupplier(self.inFileName) idx = 1 for m in suppl: if m: calc = Lipinski.NHOHCount(m) orig = int(m.GetProp('NUM_LIPINSKIHDONORS')) assert calc == orig, 'bad num h donors for mol %d (%s): %d != %d' % ( idx, m.GetProp('SMILES'), calc, orig) calc = Lipinski.NOCount(m) orig = int(m.GetProp('NUM_LIPINSKIHACCEPTORS')) assert calc == orig, 'bad num h acceptors for mol %d (%s): %d != %d' % ( idx, m.GetProp('SMILES'), calc, orig) calc = Lipinski.NumHDonors(m) orig = int(m.GetProp('NUM_HDONORS')) assert calc == orig, 'bad num h donors for mol %d (%s): %d != %d' % ( idx, m.GetProp('SMILES'), calc, orig) calc = Lipinski.NumHAcceptors(m) orig = int(m.GetProp('NUM_HACCEPTORS')) assert calc == orig, 'bad num h acceptors for mol %d (%s): %d != %d' % ( idx, m.GetProp('SMILES'), calc, orig) calc = Lipinski.NumHeteroatoms(m) orig = int(m.GetProp('NUM_HETEROATOMS')) assert calc == orig, 'bad num heteroatoms for mol %d (%s): %d != %d' % ( idx, m.GetProp('SMILES'), calc, orig) calc = Lipinski.NumRotatableBonds(m) orig = int(m.GetProp(rot_prop)) assert calc == orig, 'bad num rotors for mol %d (%s): %d != %d' % ( idx, m.GetProp('SMILES'), calc, orig) # test the underlying numrotatable bonds calc = rdMolDescriptors.CalcNumRotatableBonds( m, rdMolDescriptors.NumRotatableBondsOptions.NonStrict) orig = int(m.GetProp(NonStrict)) assert calc == orig, 'bad num rotors for mol %d (%s): %d != %d' % ( idx, m.GetProp('SMILES'), calc, orig) calc = rdMolDescriptors.CalcNumRotatableBonds( m, rdMolDescriptors.NumRotatableBondsOptions.Strict) orig = int(m.GetProp(Strict)) assert calc == orig, 'bad num rotors for mol %d (%s): %d != %d' % ( idx, m.GetProp('SMILES'), calc, orig) idx += 1
def PhyChem(smiles): """ Calculating the 19D physicochemical descriptors for each molecules, the value has been normalized with Gaussian distribution. Arguments: smiles (list): list of SMILES strings. Returns: props (ndarray): m X 19 matrix as normalized PhysChem descriptors. m is the No. of samples """ props = [] for smile in smiles: mol = Chem.MolFromSmiles(smile) try: MW = desc.MolWt(mol) LOGP = Crippen.MolLogP(mol) HBA = Lipinski.NumHAcceptors(mol) HBD = Lipinski.NumHDonors(mol) rotable = Lipinski.NumRotatableBonds(mol) amide = AllChem.CalcNumAmideBonds(mol) bridge = AllChem.CalcNumBridgeheadAtoms(mol) heteroA = Lipinski.NumHeteroatoms(mol) heavy = Lipinski.HeavyAtomCount(mol) spiro = AllChem.CalcNumSpiroAtoms(mol) FCSP3 = AllChem.CalcFractionCSP3(mol) ring = Lipinski.RingCount(mol) Aliphatic = AllChem.CalcNumAliphaticRings(mol) aromatic = AllChem.CalcNumAromaticRings(mol) saturated = AllChem.CalcNumSaturatedRings(mol) heteroR = AllChem.CalcNumHeterocycles(mol) TPSA = MolSurf.TPSA(mol) valence = desc.NumValenceElectrons(mol) mr = Crippen.MolMR(mol) # charge = AllChem.ComputeGasteigerCharges(mol) prop = [ MW, LOGP, HBA, HBD, rotable, amide, bridge, heteroA, heavy, spiro, FCSP3, ring, Aliphatic, aromatic, saturated, heteroR, TPSA, valence, mr ] except Exception: print(smile) prop = [0] * 19 props.append(prop) props = np.array(props) props = Scaler().fit_transform(props) return props
def test1(self): " testing first 200 mols from NCI " suppl = Chem.SDMolSupplier(self.inFileName) idx = 1 oldDonorSmarts = Chem.MolFromSmarts('[NH1,NH2,OH1]') OldDonorCount = lambda x, y=oldDonorSmarts: Lipinski._NumMatches(x, y) oldAcceptorSmarts = Chem.MolFromSmarts('[N,O]') OldAcceptorCount = lambda x, y=oldAcceptorSmarts: Lipinski._NumMatches( x, y) for m in suppl: if m: calc = Lipinski.NHOHCount(m) orig = int(m.GetProp('NUM_LIPINSKIHDONORS')) assert calc == orig, 'bad num h donors for mol %d (%s): %d != %d' % ( idx, m.GetProp('SMILES'), calc, orig) calc = Lipinski.NOCount(m) orig = int(m.GetProp('NUM_LIPINSKIHACCEPTORS')) assert calc == orig, 'bad num h acceptors for mol %d (%s): %d != %d' % ( idx, m.GetProp('SMILES'), calc, orig) calc = Lipinski.NumHDonors(m) orig = int(m.GetProp('NUM_HDONORS')) assert calc == orig, 'bad num h donors for mol %d (%s): %d != %d' % ( idx, m.GetProp('SMILES'), calc, orig) calc = Lipinski.NumHAcceptors(m) orig = int(m.GetProp('NUM_HACCEPTORS')) assert calc == orig, 'bad num h acceptors for mol %d (%s): %d != %d' % ( idx, m.GetProp('SMILES'), calc, orig) calc = Lipinski.NumHeteroatoms(m) orig = int(m.GetProp('NUM_HETEROATOMS')) assert calc == orig, 'bad num heteroatoms for mol %d (%s): %d != %d' % ( idx, m.GetProp('SMILES'), calc, orig) calc = Lipinski.NumRotatableBonds(m) orig = int(m.GetProp('NUM_ROTATABLEBONDS')) assert calc == orig, 'bad num rotors for mol %d (%s): %d != %d' % ( idx, m.GetProp('SMILES'), calc, orig) idx += 1
def extract(x, from_smiles): if from_smiles: mol = Chem.MolFromSmiles(x) else: mol = x if (mol is None) or (len(mol.GetAtoms()) == 0): if include_3D: return [0] * 29 else: return [0] * 24 else: logP = Crippen.MolLogP(mol) refractivity = Crippen.MolMR(mol) weight = Descriptors.MolWt(mol) exact_weight = Descriptors.ExactMolWt(mol) heavy_weight = Descriptors.HeavyAtomMolWt(mol) heavy_count = Lipinski.HeavyAtomCount(mol) nhoh_count = Lipinski.NHOHCount(mol) no_count = Lipinski.NOCount(mol) hacceptor_count = Lipinski.NumHAcceptors(mol) hdonor_count = Lipinski.NumHDonors(mol) hetero_count = Lipinski.NumHeteroatoms(mol) rotatable_bond_count = Lipinski.NumRotatableBonds(mol) valance_electron_count = Descriptors.NumValenceElectrons(mol) amide_bond_count = rdMolDescriptors.CalcNumAmideBonds(mol) aliphatic_ring_count = Lipinski.NumAliphaticRings(mol) aromatic_ring_count = Lipinski.NumAromaticRings(mol) saturated_ring_count = Lipinski.NumSaturatedRings(mol) aliphatic_cycle_count = Lipinski.NumAliphaticCarbocycles(mol) aliphaticHetero_cycle_count = Lipinski.NumAliphaticHeterocycles( mol) aromatic_cycle_count = Lipinski.NumAromaticCarbocycles(mol) aromaticHetero_cycle_count = Lipinski.NumAromaticHeterocycles(mol) saturated_cycle_count = Lipinski.NumSaturatedCarbocycles(mol) saturatedHetero_cycle_count = Lipinski.NumSaturatedHeterocycles( mol) tpsa = rdMolDescriptors.CalcTPSA(mol) if include_3D: mol_3D = Chem.AddHs(mol) AllChem.EmbedMolecule(mol_3D) AllChem.MMFFOptimizeMolecule(mol_3D) eccentricity = rdMolDescriptors.CalcEccentricity(mol_3D) asphericity = rdMolDescriptors.CalcAsphericity(mol_3D) spherocity = rdMolDescriptors.CalcSpherocityIndex(mol_3D) inertial = rdMolDescriptors.CalcInertialShapeFactor(mol_3D) gyration = rdMolDescriptors.CalcRadiusOfGyration(mol_3D) return [ logP, refractivity, weight, exact_weight, heavy_weight, heavy_count, nhoh_count, no_count, hacceptor_count, hdonor_count, hetero_count, rotatable_bond_count, valance_electron_count, amide_bond_count, aliphatic_ring_count, aromatic_ring_count, saturated_ring_count, aliphatic_cycle_count, aliphaticHetero_cycle_count, aromatic_cycle_count, aromaticHetero_cycle_count, saturated_cycle_count, saturatedHetero_cycle_count, tpsa, eccentricity, asphericity, spherocity, inertial, gyration ] else: return [ logP, refractivity, weight, exact_weight, heavy_weight, heavy_count, nhoh_count, no_count, hacceptor_count, hdonor_count, hetero_count, rotatable_bond_count, valance_electron_count, amide_bond_count, aliphatic_ring_count, aromatic_ring_count, saturated_ring_count, aliphatic_cycle_count, aliphaticHetero_cycle_count, aromatic_cycle_count, aromaticHetero_cycle_count, saturated_cycle_count, saturatedHetero_cycle_count, tpsa ]
def calc_rdkit(mol): descriptors = pd.Series( np.array([ Crippen.MolLogP(mol), Crippen.MolMR(mol), Descriptors.FpDensityMorgan1(mol), Descriptors.FpDensityMorgan2(mol), Descriptors.FpDensityMorgan3(mol), Descriptors.FractionCSP3(mol), Descriptors.HeavyAtomMolWt(mol), Descriptors.MaxAbsPartialCharge(mol), Descriptors.MaxPartialCharge(mol), Descriptors.MinAbsPartialCharge(mol), Descriptors.MinPartialCharge(mol), Descriptors.MolWt(mol), Descriptors.NumRadicalElectrons(mol), Descriptors.NumValenceElectrons(mol), EState.EState.MaxAbsEStateIndex(mol), EState.EState.MaxEStateIndex(mol), EState.EState.MinAbsEStateIndex(mol), EState.EState.MinEStateIndex(mol), EState.EState_VSA.EState_VSA1(mol), EState.EState_VSA.EState_VSA10(mol), EState.EState_VSA.EState_VSA11(mol), EState.EState_VSA.EState_VSA2(mol), EState.EState_VSA.EState_VSA3(mol), EState.EState_VSA.EState_VSA4(mol), EState.EState_VSA.EState_VSA5(mol), EState.EState_VSA.EState_VSA6(mol), EState.EState_VSA.EState_VSA7(mol), EState.EState_VSA.EState_VSA8(mol), EState.EState_VSA.EState_VSA9(mol), Fragments.fr_Al_COO(mol), Fragments.fr_Al_OH(mol), Fragments.fr_Al_OH_noTert(mol), Fragments.fr_aldehyde(mol), Fragments.fr_alkyl_carbamate(mol), Fragments.fr_alkyl_halide(mol), Fragments.fr_allylic_oxid(mol), Fragments.fr_amide(mol), Fragments.fr_amidine(mol), Fragments.fr_aniline(mol), Fragments.fr_Ar_COO(mol), Fragments.fr_Ar_N(mol), Fragments.fr_Ar_NH(mol), Fragments.fr_Ar_OH(mol), Fragments.fr_ArN(mol), Fragments.fr_aryl_methyl(mol), Fragments.fr_azide(mol), Fragments.fr_azo(mol), Fragments.fr_barbitur(mol), Fragments.fr_benzene(mol), Fragments.fr_benzodiazepine(mol), Fragments.fr_bicyclic(mol), Fragments.fr_C_O(mol), Fragments.fr_C_O_noCOO(mol), Fragments.fr_C_S(mol), Fragments.fr_COO(mol), Fragments.fr_COO2(mol), Fragments.fr_diazo(mol), Fragments.fr_dihydropyridine(mol), Fragments.fr_epoxide(mol), Fragments.fr_ester(mol), Fragments.fr_ether(mol), Fragments.fr_furan(mol), Fragments.fr_guanido(mol), Fragments.fr_halogen(mol), Fragments.fr_hdrzine(mol), Fragments.fr_hdrzone(mol), Fragments.fr_HOCCN(mol), Fragments.fr_imidazole(mol), Fragments.fr_imide(mol), Fragments.fr_Imine(mol), Fragments.fr_isocyan(mol), Fragments.fr_isothiocyan(mol), Fragments.fr_ketone(mol), Fragments.fr_ketone_Topliss(mol), Fragments.fr_lactam(mol), Fragments.fr_lactone(mol), Fragments.fr_methoxy(mol), Fragments.fr_morpholine(mol), Fragments.fr_N_O(mol), Fragments.fr_Ndealkylation1(mol), Fragments.fr_Ndealkylation2(mol), Fragments.fr_NH0(mol), Fragments.fr_NH1(mol), Fragments.fr_NH2(mol), Fragments.fr_Nhpyrrole(mol), Fragments.fr_nitrile(mol), Fragments.fr_nitro(mol), Fragments.fr_nitro_arom(mol), Fragments.fr_nitro_arom_nonortho(mol), Fragments.fr_nitroso(mol), Fragments.fr_oxazole(mol), Fragments.fr_oxime(mol), Fragments.fr_para_hydroxylation(mol), Fragments.fr_phenol(mol), Fragments.fr_phenol_noOrthoHbond(mol), Fragments.fr_phos_acid(mol), Fragments.fr_phos_ester(mol), Fragments.fr_piperdine(mol), Fragments.fr_piperzine(mol), Fragments.fr_priamide(mol), Fragments.fr_prisulfonamd(mol), Fragments.fr_pyridine(mol), Fragments.fr_quatN(mol), Fragments.fr_SH(mol), Fragments.fr_sulfide(mol), Fragments.fr_sulfonamd(mol), Fragments.fr_sulfone(mol), Fragments.fr_term_acetylene(mol), Fragments.fr_tetrazole(mol), Fragments.fr_thiazole(mol), Fragments.fr_thiocyan(mol), Fragments.fr_thiophene(mol), Fragments.fr_unbrch_alkane(mol), Fragments.fr_urea(mol), GraphDescriptors.BalabanJ(mol), GraphDescriptors.BertzCT(mol), GraphDescriptors.Chi0(mol), GraphDescriptors.Chi0n(mol), GraphDescriptors.Chi0v(mol), GraphDescriptors.Chi1(mol), GraphDescriptors.Chi1n(mol), GraphDescriptors.Chi1v(mol), GraphDescriptors.Chi2n(mol), GraphDescriptors.Chi2v(mol), GraphDescriptors.Chi3n(mol), GraphDescriptors.Chi3v(mol), GraphDescriptors.Chi4n(mol), GraphDescriptors.Chi4v(mol), GraphDescriptors.HallKierAlpha(mol), GraphDescriptors.Ipc(mol), GraphDescriptors.Kappa1(mol), GraphDescriptors.Kappa2(mol), GraphDescriptors.Kappa3(mol), Lipinski.HeavyAtomCount(mol), Lipinski.NHOHCount(mol), Lipinski.NOCount(mol), Lipinski.NumAliphaticCarbocycles(mol), Lipinski.NumAliphaticHeterocycles(mol), Lipinski.NumAliphaticRings(mol), Lipinski.NumAromaticCarbocycles(mol), Lipinski.NumAromaticHeterocycles(mol), Lipinski.NumAromaticRings(mol), Lipinski.NumHAcceptors(mol), Lipinski.NumHDonors(mol), Lipinski.NumHeteroatoms(mol), Lipinski.NumRotatableBonds(mol), Lipinski.NumSaturatedCarbocycles(mol), Lipinski.NumSaturatedHeterocycles(mol), Lipinski.NumSaturatedRings(mol), Lipinski.RingCount(mol), MolSurf.LabuteASA(mol), MolSurf.PEOE_VSA1(mol), MolSurf.PEOE_VSA10(mol), MolSurf.PEOE_VSA11(mol), MolSurf.PEOE_VSA12(mol), MolSurf.PEOE_VSA13(mol), MolSurf.PEOE_VSA14(mol), MolSurf.PEOE_VSA2(mol), MolSurf.PEOE_VSA3(mol), MolSurf.PEOE_VSA4(mol), MolSurf.PEOE_VSA5(mol), MolSurf.PEOE_VSA6(mol), MolSurf.PEOE_VSA7(mol), MolSurf.PEOE_VSA8(mol), MolSurf.PEOE_VSA9(mol), MolSurf.SlogP_VSA1(mol), MolSurf.SlogP_VSA10(mol), MolSurf.SlogP_VSA11(mol), MolSurf.SlogP_VSA12(mol), MolSurf.SlogP_VSA2(mol), MolSurf.SlogP_VSA3(mol), MolSurf.SlogP_VSA4(mol), MolSurf.SlogP_VSA5(mol), MolSurf.SlogP_VSA6(mol), MolSurf.SlogP_VSA7(mol), MolSurf.SlogP_VSA8(mol), MolSurf.SlogP_VSA9(mol), MolSurf.SMR_VSA1(mol), MolSurf.SMR_VSA10(mol), MolSurf.SMR_VSA2(mol), MolSurf.SMR_VSA3(mol), MolSurf.SMR_VSA4(mol), MolSurf.SMR_VSA5(mol), MolSurf.SMR_VSA6(mol), MolSurf.SMR_VSA7(mol), MolSurf.SMR_VSA8(mol), MolSurf.SMR_VSA9(mol), MolSurf.TPSA(mol) ])) return descriptors
## Bertz TC tcs = [BertzCT(mol) for mol in tqdm(mols)] ## TPSA tpsa = [Descriptors.TPSA(mol) for mol in mols] ## QED qed = [] for mol in tqdm(mols): try: qed.append(Descriptors.qed(mol)) except OverflowError: pass ## % of sp3 carbons pct_sp3 = [Lipinski.FractionCSP3(mol) for mol in tqdm(mols)] ## % heteroatoms pct_hetero = [Lipinski.NumHeteroatoms(mol) / mol.GetNumAtoms() for mol in \ tqdm(mols)] ## number of rings rings = [Lipinski.RingCount(mol) for mol in tqdm(mols)] ## SA score SA = [] for mol in tqdm(mols): try: SA.append(sascorer.calculateScore(mol)) except (OverflowError, ZeroDivisionError): pass ## NP-likeness fscore = npscorer.readNPModel() NP = [npscorer.scoreMol(mol, fscore) for mol in tqdm(mols)]
def calculate_metrics(mol): # calculate chemical descriptors ## % of sp3 carbons pct_sp3 = Lipinski.FractionCSP3(mol) ## H bond donors/acceptors h_acceptor = Lipinski.NumHAcceptors(mol) h_donor = Lipinski.NumHDonors(mol) ## number of rotable bonds n_bonds = mol.GetNumBonds() if n_bonds > 0: rot_bonds = Lipinski.NumRotatableBonds(mol) / n_bonds else: rot_bonds = 0 ## number of rings, aromatic and aliphatic n_rings = Lipinski.RingCount(mol) n_rings_ali = Lipinski.NumAliphaticRings(mol) n_rings_aro = Lipinski.NumAromaticRings(mol) ## number of stereocentres Chem.AssignStereochemistry(mol) n_stereo = CalcNumAtomStereoCenters(mol) ## polarity tpsa = Chem.CalcTPSA(mol) ## hydrophobicity logP = Descriptors.MolLogP(mol) ## molecular weight mw = Descriptors.MolWt(mol) ## in Lipinski space? Ro5 = in_Ro5(mol) ## % heteroatoms n_atoms = len(mol.GetAtoms()) pct_hetero = Lipinski.NumHeteroatoms(mol) / n_atoms ## number of each atom symbols = [atom.GetSymbol() for atom in mol.GetAtoms()] atom_counts = Counter(symbols) ## Murcko scaffolds murcko = Chem.MolToSmiles(MurckoScaffold.GetScaffoldForMol(mol)) ## NP-likeness try: np_score = calculateNPScore(mol, np_mod) except ValueError: np_score = None ## synthetic accessibility try: sa_score = calculateSAScore(mol, sa_mod) except ValueError: sa_score = None ## topological complexity bertz_idx = BertzCT(mol) # create dict metrics = { '% sp3 carbons': pct_sp3, 'H bond acceptors': h_acceptor, 'H bond donors': h_donor, '% rotatable bonds': rot_bonds, 'Rings': n_rings, 'Rings, aliphatic': n_rings_ali, 'Rings, aromatic': n_rings_aro, 'Stereocentres': n_stereo, 'Topological polar surface area': tpsa, 'LogP': logP, 'Molecular weight': mw, 'Lipinski rule of 5': Ro5, '% heteroatoms': pct_hetero, 'Murcko scaffold': murcko, 'NP-likeness score': np_score, 'Synthetic accessibility score': sa_score, 'Bertz topological complexity': bertz_idx } # append atom counts for key in atom_counts.keys(): metrics['Atoms with symbol ' + key] = atom_counts[key] return (metrics)