Example #1
0
def get_descriptors(df):
    PandasTools.ChangeMoleculeRendering(renderer='String')
    Lmol = df['ROMol']
    Ldescriptors = []
    for m in Lmol:

        # Calculer les propriétés chimiques
        MW = round(Descriptors.ExactMolWt(m), 1)
        LogP = round(Descriptors.MolLogP(m), 1)
        TPSA = round(Descriptors.TPSA(m), 1)
        LabuteASA = round(Descriptors.LabuteASA(m), 1)
        HBA = Descriptors.NumHAcceptors(m)
        HBD = Descriptors.NumHDonors(m)
        FCSP3 = Lipinski.FractionCSP3(m)
        MQN8 = rdMolDescriptors.MQNs_(m)[7]
        MQN10 = rdMolDescriptors.MQNs_(m)[9]
        NAR = Lipinski.NumAromaticRings(m)
        NRB = Chem.Descriptors.NumRotatableBonds(m)

        Ldescriptors.append([
            MW, LogP, TPSA, LabuteASA, HBA, HBD, FCSP3, MQN8, MQN10, NAR, NRB
        ])

    # Create pandas row for conditions results with values and information whether rule of five is violated
    prop_df = pd.DataFrame(Ldescriptors)
    prop_df.columns = [
        'MW', 'LogP', 'TPSA', 'LabuteASA', 'HBA', 'HBD', 'FCSP3', 'MQN8',
        'MQN10', 'NAR', 'NRB'
    ]
    prop_df = prop_df.set_index(df.index)

    return prop_df
Example #2
0
def get_descriptors(mol, write=False):
    # Make a copy of the molecule dataframe
    desc = [
        Lipinski.NumAromaticHeterocycles(mol),
        Lipinski.NumAromaticRings(mol),
        Lipinski.NumHDonors(mol),
        Lipinski.RingCount(mol),
        Lipinski.NHOHCount(mol),
        Lipinski.NumHeteroatoms(mol),
        Lipinski.NumAliphaticCarbocycles(mol),
        Lipinski.NumSaturatedCarbocycles(mol),
        Lipinski.NumAliphaticHeterocycles(mol),
        Lipinski.NumHAcceptors(mol),
        Lipinski.NumSaturatedHeterocycles(mol),
        Lipinski.NumAliphaticRings(mol),
        Descriptors.NumRadicalElectrons(mol),
        Descriptors.MaxPartialCharge(mol),
        Descriptors.NumValenceElectrons(mol),
        Lipinski.FractionCSP3(mol),
        Descriptors.MaxAbsPartialCharge(mol),
        Lipinski.NumAromaticCarbocycles(mol),
        Lipinski.NumSaturatedRings(mol),
        Lipinski.NumRotatableBonds(mol)
    ]

    desc = [0 if i != i else i for i in desc]
    return desc
Example #3
0
def mole_proper(mol):
    num_hdonors = Lipinski.NumHDonors(mol)
    num_hacceptors = Lipinski.NumHAcceptors(mol)
    num_rotatable = Lipinski.NumRotatableBonds(mol)
    num_aromatic = Lipinski.NumAromaticRings(mol)
    mol_weight = Descriptors.MolWt(mol)
    mol_logp = Crippen.MolLogP(mol)
    mol_TPSA = Descriptors.TPSA(mol)
    proper = [
        num_hdonors, num_hacceptors, num_rotatable, num_aromatic, mol_weight,
        mol_logp, mol_TPSA
    ]
    return proper
Example #4
0
def properties(fnames, labels, is_active=False):
    """ Five structural properties calculation for each molecule in each given file.
    These properties contains No. of Hydrogen Bond Acceptor/Donor, Rotatable Bond,
    Aliphatic Ring, Aromatic Ring and Heterocycle.

    Arguments:
        fnames (list): the file path of molecules.
        labels (list): the label for each file in the fnames.
        is_active (bool, optional): selecting only active ligands (True) or all of the molecules (False)
            if it is true, the molecule with PCHEMBL_VALUE >= 6.5 or SCORE > 0.5 will be selected.
            (Default: False)

    Returns:
        df (DataFrame): the table contains three columns; 'Set' is the label
            of fname the molecule belongs to, 'Property' is the name of one
            of five properties, 'Number' is the property value.
    """

    props = []
    for i, fname in enumerate(fnames):
        df = pd.read_table(fname)
        if 'SCORE' in df.columns:
            df = df[df.SCORE > (0.5 if is_active else 0)]
        elif 'PCHEMBL_VALUE' in df.columns:
            df = df[df.PCHEMBL_VALUE >= (6.5 if is_active else 0)]
        df = df.drop_duplicates(subset='CANONICAL_SMILES')
        if len(df) > int(1e5):
            df = df.sample(int(1e5))
        for smile in tqdm(df.CANONICAL_SMILES):
            mol = Chem.MolFromSmiles(smile)
            HA = Lipinski.NumHAcceptors(mol)
            props.append([labels[i], 'Hydrogen Bond\nAcceptor', HA])
            HD = Lipinski.NumHDonors(mol)
            props.append([labels[i], 'Hydrogen\nBond Donor', HD])
            RB = Lipinski.NumRotatableBonds(mol)
            props.append([labels[i], 'Rotatable\nBond', RB])
            RI = AllChem.CalcNumAliphaticRings(mol)
            props.append([labels[i], 'Aliphatic\nRing', RI])
            AR = Lipinski.NumAromaticRings(mol)
            props.append([labels[i], 'Aromatic\nRing', AR])
            HC = AllChem.CalcNumHeterocycles(mol)
            props.append([labels[i], 'Heterocycle', HC])
    df = pd.DataFrame(props, columns=['Set', 'Property', 'Number'])
    return df
Example #5
0
    def extract(x, from_smiles):
        if from_smiles:
            mol = Chem.MolFromSmiles(x)
        else:
            mol = x

        if (mol is None) or (len(mol.GetAtoms()) == 0):
            if include_3D:
                return [0] * 29
            else:
                return [0] * 24
        else:
            logP = Crippen.MolLogP(mol)
            refractivity = Crippen.MolMR(mol)

            weight = Descriptors.MolWt(mol)
            exact_weight = Descriptors.ExactMolWt(mol)
            heavy_weight = Descriptors.HeavyAtomMolWt(mol)
            heavy_count = Lipinski.HeavyAtomCount(mol)
            nhoh_count = Lipinski.NHOHCount(mol)
            no_count = Lipinski.NOCount(mol)
            hacceptor_count = Lipinski.NumHAcceptors(mol)
            hdonor_count = Lipinski.NumHDonors(mol)
            hetero_count = Lipinski.NumHeteroatoms(mol)
            rotatable_bond_count = Lipinski.NumRotatableBonds(mol)
            valance_electron_count = Descriptors.NumValenceElectrons(mol)
            amide_bond_count = rdMolDescriptors.CalcNumAmideBonds(mol)
            aliphatic_ring_count = Lipinski.NumAliphaticRings(mol)
            aromatic_ring_count = Lipinski.NumAromaticRings(mol)
            saturated_ring_count = Lipinski.NumSaturatedRings(mol)
            aliphatic_cycle_count = Lipinski.NumAliphaticCarbocycles(mol)
            aliphaticHetero_cycle_count = Lipinski.NumAliphaticHeterocycles(
                mol)
            aromatic_cycle_count = Lipinski.NumAromaticCarbocycles(mol)
            aromaticHetero_cycle_count = Lipinski.NumAromaticHeterocycles(mol)
            saturated_cycle_count = Lipinski.NumSaturatedCarbocycles(mol)
            saturatedHetero_cycle_count = Lipinski.NumSaturatedHeterocycles(
                mol)

            tpsa = rdMolDescriptors.CalcTPSA(mol)

            if include_3D:
                mol_3D = Chem.AddHs(mol)
                AllChem.EmbedMolecule(mol_3D)
                AllChem.MMFFOptimizeMolecule(mol_3D)
                eccentricity = rdMolDescriptors.CalcEccentricity(mol_3D)
                asphericity = rdMolDescriptors.CalcAsphericity(mol_3D)
                spherocity = rdMolDescriptors.CalcSpherocityIndex(mol_3D)
                inertial = rdMolDescriptors.CalcInertialShapeFactor(mol_3D)
                gyration = rdMolDescriptors.CalcRadiusOfGyration(mol_3D)

                return [
                    logP, refractivity, weight, exact_weight, heavy_weight,
                    heavy_count, nhoh_count, no_count, hacceptor_count,
                    hdonor_count, hetero_count, rotatable_bond_count,
                    valance_electron_count, amide_bond_count,
                    aliphatic_ring_count, aromatic_ring_count,
                    saturated_ring_count, aliphatic_cycle_count,
                    aliphaticHetero_cycle_count, aromatic_cycle_count,
                    aromaticHetero_cycle_count, saturated_cycle_count,
                    saturatedHetero_cycle_count, tpsa, eccentricity,
                    asphericity, spherocity, inertial, gyration
                ]
            else:
                return [
                    logP, refractivity, weight, exact_weight, heavy_weight,
                    heavy_count, nhoh_count, no_count, hacceptor_count,
                    hdonor_count, hetero_count, rotatable_bond_count,
                    valance_electron_count, amide_bond_count,
                    aliphatic_ring_count, aromatic_ring_count,
                    saturated_ring_count, aliphatic_cycle_count,
                    aliphaticHetero_cycle_count, aromatic_cycle_count,
                    aromaticHetero_cycle_count, saturated_cycle_count,
                    saturatedHetero_cycle_count, tpsa
                ]
Example #6
0
    ## Bertz TC
    org_tcs = [BertzCT(mol) for mol in tqdm(org_mols)]
    ## TPSA
    org_tpsa = [Descriptors.TPSA(mol) for mol in org_mols]
    ## QED
    org_qed = []
    for mol in org_mols:
        try:
            org_qed.append(Descriptors.qed(mol))
        except OverflowError:
            pass

    ## number of rings
    org_rings1 = [Lipinski.RingCount(mol) for mol in tqdm(org_mols)]
    org_rings2 = [Lipinski.NumAliphaticRings(mol) for mol in tqdm(org_mols)]
    org_rings3 = [Lipinski.NumAromaticRings(mol) for mol in tqdm(org_mols)]
    ## SA score
    org_SA = []
    for mol in tqdm(org_mols):
        try:
            org_SA.append(sascorer.calculateScore(mol))
        except (OverflowError, ZeroDivisionError):
            pass

    ## NP-likeness
    fscore = npscorer.readNPModel()
    org_NP = [npscorer.scoreMol(mol, fscore) for mol in tqdm(org_mols)]
    ## % sp3 carbons
    org_sp3 = [Lipinski.FractionCSP3(mol) for mol in org_mols]
    ## % rotatable bonds
    org_rot = [pct_rotatable_bonds(mol) for mol in org_mols]
Example #7
0
def calc_rdkit(mol):
    descriptors = pd.Series(
        np.array([
            Crippen.MolLogP(mol),
            Crippen.MolMR(mol),
            Descriptors.FpDensityMorgan1(mol),
            Descriptors.FpDensityMorgan2(mol),
            Descriptors.FpDensityMorgan3(mol),
            Descriptors.FractionCSP3(mol),
            Descriptors.HeavyAtomMolWt(mol),
            Descriptors.MaxAbsPartialCharge(mol),
            Descriptors.MaxPartialCharge(mol),
            Descriptors.MinAbsPartialCharge(mol),
            Descriptors.MinPartialCharge(mol),
            Descriptors.MolWt(mol),
            Descriptors.NumRadicalElectrons(mol),
            Descriptors.NumValenceElectrons(mol),
            EState.EState.MaxAbsEStateIndex(mol),
            EState.EState.MaxEStateIndex(mol),
            EState.EState.MinAbsEStateIndex(mol),
            EState.EState.MinEStateIndex(mol),
            EState.EState_VSA.EState_VSA1(mol),
            EState.EState_VSA.EState_VSA10(mol),
            EState.EState_VSA.EState_VSA11(mol),
            EState.EState_VSA.EState_VSA2(mol),
            EState.EState_VSA.EState_VSA3(mol),
            EState.EState_VSA.EState_VSA4(mol),
            EState.EState_VSA.EState_VSA5(mol),
            EState.EState_VSA.EState_VSA6(mol),
            EState.EState_VSA.EState_VSA7(mol),
            EState.EState_VSA.EState_VSA8(mol),
            EState.EState_VSA.EState_VSA9(mol),
            Fragments.fr_Al_COO(mol),
            Fragments.fr_Al_OH(mol),
            Fragments.fr_Al_OH_noTert(mol),
            Fragments.fr_aldehyde(mol),
            Fragments.fr_alkyl_carbamate(mol),
            Fragments.fr_alkyl_halide(mol),
            Fragments.fr_allylic_oxid(mol),
            Fragments.fr_amide(mol),
            Fragments.fr_amidine(mol),
            Fragments.fr_aniline(mol),
            Fragments.fr_Ar_COO(mol),
            Fragments.fr_Ar_N(mol),
            Fragments.fr_Ar_NH(mol),
            Fragments.fr_Ar_OH(mol),
            Fragments.fr_ArN(mol),
            Fragments.fr_aryl_methyl(mol),
            Fragments.fr_azide(mol),
            Fragments.fr_azo(mol),
            Fragments.fr_barbitur(mol),
            Fragments.fr_benzene(mol),
            Fragments.fr_benzodiazepine(mol),
            Fragments.fr_bicyclic(mol),
            Fragments.fr_C_O(mol),
            Fragments.fr_C_O_noCOO(mol),
            Fragments.fr_C_S(mol),
            Fragments.fr_COO(mol),
            Fragments.fr_COO2(mol),
            Fragments.fr_diazo(mol),
            Fragments.fr_dihydropyridine(mol),
            Fragments.fr_epoxide(mol),
            Fragments.fr_ester(mol),
            Fragments.fr_ether(mol),
            Fragments.fr_furan(mol),
            Fragments.fr_guanido(mol),
            Fragments.fr_halogen(mol),
            Fragments.fr_hdrzine(mol),
            Fragments.fr_hdrzone(mol),
            Fragments.fr_HOCCN(mol),
            Fragments.fr_imidazole(mol),
            Fragments.fr_imide(mol),
            Fragments.fr_Imine(mol),
            Fragments.fr_isocyan(mol),
            Fragments.fr_isothiocyan(mol),
            Fragments.fr_ketone(mol),
            Fragments.fr_ketone_Topliss(mol),
            Fragments.fr_lactam(mol),
            Fragments.fr_lactone(mol),
            Fragments.fr_methoxy(mol),
            Fragments.fr_morpholine(mol),
            Fragments.fr_N_O(mol),
            Fragments.fr_Ndealkylation1(mol),
            Fragments.fr_Ndealkylation2(mol),
            Fragments.fr_NH0(mol),
            Fragments.fr_NH1(mol),
            Fragments.fr_NH2(mol),
            Fragments.fr_Nhpyrrole(mol),
            Fragments.fr_nitrile(mol),
            Fragments.fr_nitro(mol),
            Fragments.fr_nitro_arom(mol),
            Fragments.fr_nitro_arom_nonortho(mol),
            Fragments.fr_nitroso(mol),
            Fragments.fr_oxazole(mol),
            Fragments.fr_oxime(mol),
            Fragments.fr_para_hydroxylation(mol),
            Fragments.fr_phenol(mol),
            Fragments.fr_phenol_noOrthoHbond(mol),
            Fragments.fr_phos_acid(mol),
            Fragments.fr_phos_ester(mol),
            Fragments.fr_piperdine(mol),
            Fragments.fr_piperzine(mol),
            Fragments.fr_priamide(mol),
            Fragments.fr_prisulfonamd(mol),
            Fragments.fr_pyridine(mol),
            Fragments.fr_quatN(mol),
            Fragments.fr_SH(mol),
            Fragments.fr_sulfide(mol),
            Fragments.fr_sulfonamd(mol),
            Fragments.fr_sulfone(mol),
            Fragments.fr_term_acetylene(mol),
            Fragments.fr_tetrazole(mol),
            Fragments.fr_thiazole(mol),
            Fragments.fr_thiocyan(mol),
            Fragments.fr_thiophene(mol),
            Fragments.fr_unbrch_alkane(mol),
            Fragments.fr_urea(mol),
            GraphDescriptors.BalabanJ(mol),
            GraphDescriptors.BertzCT(mol),
            GraphDescriptors.Chi0(mol),
            GraphDescriptors.Chi0n(mol),
            GraphDescriptors.Chi0v(mol),
            GraphDescriptors.Chi1(mol),
            GraphDescriptors.Chi1n(mol),
            GraphDescriptors.Chi1v(mol),
            GraphDescriptors.Chi2n(mol),
            GraphDescriptors.Chi2v(mol),
            GraphDescriptors.Chi3n(mol),
            GraphDescriptors.Chi3v(mol),
            GraphDescriptors.Chi4n(mol),
            GraphDescriptors.Chi4v(mol),
            GraphDescriptors.HallKierAlpha(mol),
            GraphDescriptors.Ipc(mol),
            GraphDescriptors.Kappa1(mol),
            GraphDescriptors.Kappa2(mol),
            GraphDescriptors.Kappa3(mol),
            Lipinski.HeavyAtomCount(mol),
            Lipinski.NHOHCount(mol),
            Lipinski.NOCount(mol),
            Lipinski.NumAliphaticCarbocycles(mol),
            Lipinski.NumAliphaticHeterocycles(mol),
            Lipinski.NumAliphaticRings(mol),
            Lipinski.NumAromaticCarbocycles(mol),
            Lipinski.NumAromaticHeterocycles(mol),
            Lipinski.NumAromaticRings(mol),
            Lipinski.NumHAcceptors(mol),
            Lipinski.NumHDonors(mol),
            Lipinski.NumHeteroatoms(mol),
            Lipinski.NumRotatableBonds(mol),
            Lipinski.NumSaturatedCarbocycles(mol),
            Lipinski.NumSaturatedHeterocycles(mol),
            Lipinski.NumSaturatedRings(mol),
            Lipinski.RingCount(mol),
            MolSurf.LabuteASA(mol),
            MolSurf.PEOE_VSA1(mol),
            MolSurf.PEOE_VSA10(mol),
            MolSurf.PEOE_VSA11(mol),
            MolSurf.PEOE_VSA12(mol),
            MolSurf.PEOE_VSA13(mol),
            MolSurf.PEOE_VSA14(mol),
            MolSurf.PEOE_VSA2(mol),
            MolSurf.PEOE_VSA3(mol),
            MolSurf.PEOE_VSA4(mol),
            MolSurf.PEOE_VSA5(mol),
            MolSurf.PEOE_VSA6(mol),
            MolSurf.PEOE_VSA7(mol),
            MolSurf.PEOE_VSA8(mol),
            MolSurf.PEOE_VSA9(mol),
            MolSurf.SlogP_VSA1(mol),
            MolSurf.SlogP_VSA10(mol),
            MolSurf.SlogP_VSA11(mol),
            MolSurf.SlogP_VSA12(mol),
            MolSurf.SlogP_VSA2(mol),
            MolSurf.SlogP_VSA3(mol),
            MolSurf.SlogP_VSA4(mol),
            MolSurf.SlogP_VSA5(mol),
            MolSurf.SlogP_VSA6(mol),
            MolSurf.SlogP_VSA7(mol),
            MolSurf.SlogP_VSA8(mol),
            MolSurf.SlogP_VSA9(mol),
            MolSurf.SMR_VSA1(mol),
            MolSurf.SMR_VSA10(mol),
            MolSurf.SMR_VSA2(mol),
            MolSurf.SMR_VSA3(mol),
            MolSurf.SMR_VSA4(mol),
            MolSurf.SMR_VSA5(mol),
            MolSurf.SMR_VSA6(mol),
            MolSurf.SMR_VSA7(mol),
            MolSurf.SMR_VSA8(mol),
            MolSurf.SMR_VSA9(mol),
            MolSurf.TPSA(mol)
        ]))
    return descriptors
Example #8
0
def main():
    # CLI options parsing
    parser = argparse.ArgumentParser(
        description = "Project molecules read from a SMILES file into an 8D \
        space whose dimensions are molecular descriptors: \
        (MolW, HA, cLogP, MR, TPSA, RotB, HBA, HBD, FC)")
    parser.add_argument("-i", metavar = "input_smi", dest = "input_smi",
                        help = "input SMILES file")
    parser.add_argument("-o", metavar = "output_csv", dest = "output_csv",
                        help = "output CSV file")
    parser.add_argument('--no-header', dest='no_header',
                        action='store_true', default=False,
                        help = "no CSV header in output file")
    # just warn about aliens by default
    parser.add_argument('--remove-aliens', dest='rm_aliens',
                        action='store_true', default=False,
                        help = "don't allow aliens in output file")
    # parse CLI
    if len(sys.argv) == 1:
        # show help in case user has no clue of what to do
        parser.print_help(sys.stderr)
        sys.exit(1)
    args = parser.parse_args()
    input_smi = args.input_smi
    output_csv = args.output_csv
    rm_aliens = args.rm_aliens
    no_header = args.no_header
    out_count = 0
    alien_count = 0
    error_count = 0
    with open(output_csv, 'w') as out_file:
        if not no_header:
            print("#name,MolW,HA,cLogP,AR,MR,TPSA,RotB,HBA,HBD,FC", file=out_file)
        for i, mol, name in RobustSmilesMolSupplier(input_smi):
            if mol is None:
                error_count += 1
            else:
                MolW = Descriptors.MolWt(mol)
                HA = Lipinski.HeavyAtomCount(mol)
                cLogP = Descriptors.MolLogP(mol)
                AR = Lipinski.NumAromaticRings(mol)
                MR = Descriptors.MolMR(mol)
                TPSA = Descriptors.TPSA(mol)
                RotB = Descriptors.NumRotatableBonds(mol)
                HBA = Descriptors.NumHAcceptors(mol)
                HBD = Descriptors.NumHDonors(mol)
                FC = Chem.rdmolops.GetFormalCharge(mol)
                alien = is_alien(MolW, cLogP, TPSA, RotB, HBA, HBD, FC)
                if alien:
                    alien_str = alien_diagnose(i, name, MolW, cLogP, TPSA,
                                               RotB, HBA, HBD, FC)
                    print("WARN: %s" % alien_str, file=sys.stderr)
                    alien_count += 1
                if (not alien) or (not rm_aliens):
                    csv_line = "%s,%g,%d,%g,%d,%g,%g,%d,%d,%d,%d" % \
                               (name, MolW, HA, cLogP, AR, MR, TPSA, RotB,
                                HBA, HBD, FC)
                    print(csv_line, file=out_file)
                    out_count += 1
    total_count = out_count + error_count
    if rm_aliens:
        total_count += alien_count
    print("encoded: %d aliens: %d errors: %d total: %d" % \
          (out_count, alien_count, error_count, total_count),
          file=sys.stderr)
def calculate_metrics(mol):
    # calculate chemical descriptors
    ## % of sp3 carbons
    pct_sp3 = Lipinski.FractionCSP3(mol)
    ## H bond donors/acceptors
    h_acceptor = Lipinski.NumHAcceptors(mol)
    h_donor = Lipinski.NumHDonors(mol)
    ## number of rotable bonds
    n_bonds = mol.GetNumBonds()
    if n_bonds > 0:
        rot_bonds = Lipinski.NumRotatableBonds(mol) / n_bonds
    else:
        rot_bonds = 0
    ## number of rings, aromatic and aliphatic
    n_rings = Lipinski.RingCount(mol)
    n_rings_ali = Lipinski.NumAliphaticRings(mol)
    n_rings_aro = Lipinski.NumAromaticRings(mol)
    ## number of stereocentres
    Chem.AssignStereochemistry(mol)
    n_stereo = CalcNumAtomStereoCenters(mol)
    ## polarity
    tpsa = Chem.CalcTPSA(mol)
    ## hydrophobicity
    logP = Descriptors.MolLogP(mol)
    ## molecular weight
    mw = Descriptors.MolWt(mol)
    ## in Lipinski space?
    Ro5 = in_Ro5(mol)
    ## % heteroatoms
    n_atoms = len(mol.GetAtoms())
    pct_hetero = Lipinski.NumHeteroatoms(mol) / n_atoms
    ## number of each atom
    symbols = [atom.GetSymbol() for atom in mol.GetAtoms()]
    atom_counts = Counter(symbols)
    ## Murcko scaffolds
    murcko = Chem.MolToSmiles(MurckoScaffold.GetScaffoldForMol(mol))
    ## NP-likeness
    try:
        np_score = calculateNPScore(mol, np_mod)
    except ValueError:
        np_score = None
    ## synthetic accessibility
    try:
        sa_score = calculateSAScore(mol, sa_mod)
    except ValueError:
        sa_score = None
    ## topological complexity
    bertz_idx = BertzCT(mol)
    # create dict
    metrics = {
        '% sp3 carbons': pct_sp3,
        'H bond acceptors': h_acceptor,
        'H bond donors': h_donor,
        '% rotatable bonds': rot_bonds,
        'Rings': n_rings,
        'Rings, aliphatic': n_rings_ali,
        'Rings, aromatic': n_rings_aro,
        'Stereocentres': n_stereo,
        'Topological polar surface area': tpsa,
        'LogP': logP,
        'Molecular weight': mw,
        'Lipinski rule of 5': Ro5,
        '% heteroatoms': pct_hetero,
        'Murcko scaffold': murcko,
        'NP-likeness score': np_score,
        'Synthetic accessibility score': sa_score,
        'Bertz topological complexity': bertz_idx
    }
    # append atom counts
    for key in atom_counts.keys():
        metrics['Atoms with symbol ' + key] = atom_counts[key]
    return (metrics)