Example #1
0
def get_filter_values(mol):
    """
    calculate the values, for a given molecule, that are used to filter
    return as a dictionary
    """

    assert isinstance(mol, Chem.Mol)

    values = {}
    values["MW"] = desc.CalcExactMolWt(mol)
    values["logP"] = crip.MolLogP(mol)
    values["HBA"] = lip.NumHAcceptors(mol)
    values["HBD"] = lip.NumHDonors(mol)
    values["tPSA"] = desc.CalcTPSA(mol)
    values["rot_bonds"] = lip.NumRotatableBonds(mol)
    values["rigid_bonds"] = mol.GetNumBonds() - values["rot_bonds"]  # assume mutual exclusion
    values["num_rings"] = lip.RingCount(mol)
    values["num_hetero_atoms"] = lip.NumHeteroatoms(mol)
    values["charge"] = rdmolops.GetFormalCharge(mol)  # trusting this charge calculation method
    values["num_carbons"], values["num_charges"], values["max_ring_size"] = get_atom_props(mol)
    try:
        values["hc_ratio"] = float(values["num_hetero_atoms"]) / float(values["num_carbons"])
    except ZeroDivisionError:
        values["hc_ratio"] = 100000000  # if there are zero carbons
    values["fc"] = len(list(Brics.FindBRICSBonds(mol)))  # how many BRICS bonds, related to complexity
    values["is_good"] = True  # default to true, but not yet observed
    atoms = [atom.GetSymbol() for atom in mol.GetAtoms()]  # get all the atoms, and make the list unique (only types)
    atoms = set(atoms)
    atoms = list(atoms)
    values["atoms"] = atoms
    values["num_chiral_centers"] = len(Chem.FindMolChiralCenters(mol, includeUnassigned=True))
    values["rejections"] = []  # empty list to store the reasons for rejection

    return values
def filter2(filter1_dict, input_nr_of_hetero_atoms):
    """ takes all text from dictionary with the substructures and returns a 
    dictionary with substructures which are filtered based on the presence of
    heteroatoms.
  
    filter1_dict: dict with substructures and structure identifier
    input_nr_of_hetero_atoms: integer, number of hetero atoms input paramater value
    """

    filter2_dict = {}
    for key, values in filter1_dict.items():
        structure_id = key
        for smile in values:
            if smile != '<NA>':
                sub_mol = Chem.MolFromSmiles(smile)
                nr_of_hetero_atoms = Lipinski.NumHeteroatoms(sub_mol)
                if nr_of_hetero_atoms >= input_nr_of_hetero_atoms:
                    if structure_id in filter2_dict:
                        filter2_dict[structure_id].append(smile)
                    if structure_id not in filter2_dict:
                        filter2_dict[structure_id] = [smile]
        if structure_id not in filter2_dict:
            filter2_dict[structure_id] = ['<NA>']

    print('~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~')
    print('nr of substructures after filter 2 [hetero atoms]')
    nr_of_subs = 0
    for key, value in filter2_dict.items():
        for val in value:
            if val != '<NA>':
                nr_of_subs += 1
    print(nr_of_subs)
    return filter2_dict
Example #3
0
def get_descriptors(mol, write=False):
    # Make a copy of the molecule dataframe
    desc = [
        Lipinski.NumAromaticHeterocycles(mol),
        Lipinski.NumAromaticRings(mol),
        Lipinski.NumHDonors(mol),
        Lipinski.RingCount(mol),
        Lipinski.NHOHCount(mol),
        Lipinski.NumHeteroatoms(mol),
        Lipinski.NumAliphaticCarbocycles(mol),
        Lipinski.NumSaturatedCarbocycles(mol),
        Lipinski.NumAliphaticHeterocycles(mol),
        Lipinski.NumHAcceptors(mol),
        Lipinski.NumSaturatedHeterocycles(mol),
        Lipinski.NumAliphaticRings(mol),
        Descriptors.NumRadicalElectrons(mol),
        Descriptors.MaxPartialCharge(mol),
        Descriptors.NumValenceElectrons(mol),
        Lipinski.FractionCSP3(mol),
        Descriptors.MaxAbsPartialCharge(mol),
        Lipinski.NumAromaticCarbocycles(mol),
        Lipinski.NumSaturatedRings(mol),
        Lipinski.NumRotatableBonds(mol)
    ]

    desc = [0 if i != i else i for i in desc]
    return desc
Example #4
0
    def test1(self):
        " testing first 200 mols from NCI "
        # figure out which rotor version we are using
        m = Chem.MolFromSmiles("CC(C)(C)c1cc(O)c(cc1O)C(C)(C)C")
        if Lipinski.NumRotatableBonds(m) == 2:
            rot_prop = NonStrict
        else:
            rot_prop = Strict

        suppl = Chem.SDMolSupplier(self.inFileName)
        idx = 1
        for m in suppl:
            if m:
                calc = Lipinski.NHOHCount(m)
                orig = int(m.GetProp('NUM_LIPINSKIHDONORS'))
                assert calc == orig, 'bad num h donors for mol %d (%s): %d != %d' % (
                    idx, m.GetProp('SMILES'), calc, orig)

                calc = Lipinski.NOCount(m)
                orig = int(m.GetProp('NUM_LIPINSKIHACCEPTORS'))
                assert calc == orig, 'bad num h acceptors for mol %d (%s): %d != %d' % (
                    idx, m.GetProp('SMILES'), calc, orig)

                calc = Lipinski.NumHDonors(m)
                orig = int(m.GetProp('NUM_HDONORS'))
                assert calc == orig, 'bad num h donors for mol %d (%s): %d != %d' % (
                    idx, m.GetProp('SMILES'), calc, orig)

                calc = Lipinski.NumHAcceptors(m)
                orig = int(m.GetProp('NUM_HACCEPTORS'))
                assert calc == orig, 'bad num h acceptors for mol %d (%s): %d != %d' % (
                    idx, m.GetProp('SMILES'), calc, orig)

                calc = Lipinski.NumHeteroatoms(m)
                orig = int(m.GetProp('NUM_HETEROATOMS'))
                assert calc == orig, 'bad num heteroatoms for mol %d (%s): %d != %d' % (
                    idx, m.GetProp('SMILES'), calc, orig)

                calc = Lipinski.NumRotatableBonds(m)
                orig = int(m.GetProp(rot_prop))
                assert calc == orig, 'bad num rotors for mol %d (%s): %d != %d' % (
                    idx, m.GetProp('SMILES'), calc, orig)

                # test the underlying numrotatable bonds
                calc = rdMolDescriptors.CalcNumRotatableBonds(
                    m, rdMolDescriptors.NumRotatableBondsOptions.NonStrict)
                orig = int(m.GetProp(NonStrict))
                assert calc == orig, 'bad num rotors for mol %d (%s): %d != %d' % (
                    idx, m.GetProp('SMILES'), calc, orig)

                calc = rdMolDescriptors.CalcNumRotatableBonds(
                    m, rdMolDescriptors.NumRotatableBondsOptions.Strict)
                orig = int(m.GetProp(Strict))
                assert calc == orig, 'bad num rotors for mol %d (%s): %d != %d' % (
                    idx, m.GetProp('SMILES'), calc, orig)

            idx += 1
Example #5
0
def PhyChem(smiles):
    """ Calculating the 19D physicochemical descriptors for each molecules,
    the value has been normalized with Gaussian distribution.

    Arguments:
        smiles (list): list of SMILES strings.
    Returns:
        props (ndarray): m X 19 matrix as normalized PhysChem descriptors.
            m is the No. of samples
    """
    props = []
    for smile in smiles:
        mol = Chem.MolFromSmiles(smile)
        try:
            MW = desc.MolWt(mol)
            LOGP = Crippen.MolLogP(mol)
            HBA = Lipinski.NumHAcceptors(mol)
            HBD = Lipinski.NumHDonors(mol)
            rotable = Lipinski.NumRotatableBonds(mol)
            amide = AllChem.CalcNumAmideBonds(mol)
            bridge = AllChem.CalcNumBridgeheadAtoms(mol)
            heteroA = Lipinski.NumHeteroatoms(mol)
            heavy = Lipinski.HeavyAtomCount(mol)
            spiro = AllChem.CalcNumSpiroAtoms(mol)
            FCSP3 = AllChem.CalcFractionCSP3(mol)
            ring = Lipinski.RingCount(mol)
            Aliphatic = AllChem.CalcNumAliphaticRings(mol)
            aromatic = AllChem.CalcNumAromaticRings(mol)
            saturated = AllChem.CalcNumSaturatedRings(mol)
            heteroR = AllChem.CalcNumHeterocycles(mol)
            TPSA = MolSurf.TPSA(mol)
            valence = desc.NumValenceElectrons(mol)
            mr = Crippen.MolMR(mol)
            # charge = AllChem.ComputeGasteigerCharges(mol)
            prop = [
                MW, LOGP, HBA, HBD, rotable, amide, bridge, heteroA, heavy,
                spiro, FCSP3, ring, Aliphatic, aromatic, saturated, heteroR,
                TPSA, valence, mr
            ]
        except Exception:
            print(smile)
            prop = [0] * 19
        props.append(prop)
    props = np.array(props)
    props = Scaler().fit_transform(props)
    return props
Example #6
0
    def test1(self):
        " testing first 200 mols from NCI "
        suppl = Chem.SDMolSupplier(self.inFileName)
        idx = 1
        oldDonorSmarts = Chem.MolFromSmarts('[NH1,NH2,OH1]')
        OldDonorCount = lambda x, y=oldDonorSmarts: Lipinski._NumMatches(x, y)
        oldAcceptorSmarts = Chem.MolFromSmarts('[N,O]')
        OldAcceptorCount = lambda x, y=oldAcceptorSmarts: Lipinski._NumMatches(
            x, y)
        for m in suppl:
            if m:
                calc = Lipinski.NHOHCount(m)
                orig = int(m.GetProp('NUM_LIPINSKIHDONORS'))
                assert calc == orig, 'bad num h donors for mol %d (%s): %d != %d' % (
                    idx, m.GetProp('SMILES'), calc, orig)

                calc = Lipinski.NOCount(m)
                orig = int(m.GetProp('NUM_LIPINSKIHACCEPTORS'))
                assert calc == orig, 'bad num h acceptors for mol %d (%s): %d != %d' % (
                    idx, m.GetProp('SMILES'), calc, orig)

                calc = Lipinski.NumHDonors(m)
                orig = int(m.GetProp('NUM_HDONORS'))
                assert calc == orig, 'bad num h donors for mol %d (%s): %d != %d' % (
                    idx, m.GetProp('SMILES'), calc, orig)

                calc = Lipinski.NumHAcceptors(m)
                orig = int(m.GetProp('NUM_HACCEPTORS'))
                assert calc == orig, 'bad num h acceptors for mol %d (%s): %d != %d' % (
                    idx, m.GetProp('SMILES'), calc, orig)

                calc = Lipinski.NumHeteroatoms(m)
                orig = int(m.GetProp('NUM_HETEROATOMS'))
                assert calc == orig, 'bad num heteroatoms for mol %d (%s): %d != %d' % (
                    idx, m.GetProp('SMILES'), calc, orig)

                calc = Lipinski.NumRotatableBonds(m)
                orig = int(m.GetProp('NUM_ROTATABLEBONDS'))
                assert calc == orig, 'bad num rotors for mol %d (%s): %d != %d' % (
                    idx, m.GetProp('SMILES'), calc, orig)
            idx += 1
Example #7
0
    def extract(x, from_smiles):
        if from_smiles:
            mol = Chem.MolFromSmiles(x)
        else:
            mol = x

        if (mol is None) or (len(mol.GetAtoms()) == 0):
            if include_3D:
                return [0] * 29
            else:
                return [0] * 24
        else:
            logP = Crippen.MolLogP(mol)
            refractivity = Crippen.MolMR(mol)

            weight = Descriptors.MolWt(mol)
            exact_weight = Descriptors.ExactMolWt(mol)
            heavy_weight = Descriptors.HeavyAtomMolWt(mol)
            heavy_count = Lipinski.HeavyAtomCount(mol)
            nhoh_count = Lipinski.NHOHCount(mol)
            no_count = Lipinski.NOCount(mol)
            hacceptor_count = Lipinski.NumHAcceptors(mol)
            hdonor_count = Lipinski.NumHDonors(mol)
            hetero_count = Lipinski.NumHeteroatoms(mol)
            rotatable_bond_count = Lipinski.NumRotatableBonds(mol)
            valance_electron_count = Descriptors.NumValenceElectrons(mol)
            amide_bond_count = rdMolDescriptors.CalcNumAmideBonds(mol)
            aliphatic_ring_count = Lipinski.NumAliphaticRings(mol)
            aromatic_ring_count = Lipinski.NumAromaticRings(mol)
            saturated_ring_count = Lipinski.NumSaturatedRings(mol)
            aliphatic_cycle_count = Lipinski.NumAliphaticCarbocycles(mol)
            aliphaticHetero_cycle_count = Lipinski.NumAliphaticHeterocycles(
                mol)
            aromatic_cycle_count = Lipinski.NumAromaticCarbocycles(mol)
            aromaticHetero_cycle_count = Lipinski.NumAromaticHeterocycles(mol)
            saturated_cycle_count = Lipinski.NumSaturatedCarbocycles(mol)
            saturatedHetero_cycle_count = Lipinski.NumSaturatedHeterocycles(
                mol)

            tpsa = rdMolDescriptors.CalcTPSA(mol)

            if include_3D:
                mol_3D = Chem.AddHs(mol)
                AllChem.EmbedMolecule(mol_3D)
                AllChem.MMFFOptimizeMolecule(mol_3D)
                eccentricity = rdMolDescriptors.CalcEccentricity(mol_3D)
                asphericity = rdMolDescriptors.CalcAsphericity(mol_3D)
                spherocity = rdMolDescriptors.CalcSpherocityIndex(mol_3D)
                inertial = rdMolDescriptors.CalcInertialShapeFactor(mol_3D)
                gyration = rdMolDescriptors.CalcRadiusOfGyration(mol_3D)

                return [
                    logP, refractivity, weight, exact_weight, heavy_weight,
                    heavy_count, nhoh_count, no_count, hacceptor_count,
                    hdonor_count, hetero_count, rotatable_bond_count,
                    valance_electron_count, amide_bond_count,
                    aliphatic_ring_count, aromatic_ring_count,
                    saturated_ring_count, aliphatic_cycle_count,
                    aliphaticHetero_cycle_count, aromatic_cycle_count,
                    aromaticHetero_cycle_count, saturated_cycle_count,
                    saturatedHetero_cycle_count, tpsa, eccentricity,
                    asphericity, spherocity, inertial, gyration
                ]
            else:
                return [
                    logP, refractivity, weight, exact_weight, heavy_weight,
                    heavy_count, nhoh_count, no_count, hacceptor_count,
                    hdonor_count, hetero_count, rotatable_bond_count,
                    valance_electron_count, amide_bond_count,
                    aliphatic_ring_count, aromatic_ring_count,
                    saturated_ring_count, aliphatic_cycle_count,
                    aliphaticHetero_cycle_count, aromatic_cycle_count,
                    aromaticHetero_cycle_count, saturated_cycle_count,
                    saturatedHetero_cycle_count, tpsa
                ]
Example #8
0
def calc_rdkit(mol):
    descriptors = pd.Series(
        np.array([
            Crippen.MolLogP(mol),
            Crippen.MolMR(mol),
            Descriptors.FpDensityMorgan1(mol),
            Descriptors.FpDensityMorgan2(mol),
            Descriptors.FpDensityMorgan3(mol),
            Descriptors.FractionCSP3(mol),
            Descriptors.HeavyAtomMolWt(mol),
            Descriptors.MaxAbsPartialCharge(mol),
            Descriptors.MaxPartialCharge(mol),
            Descriptors.MinAbsPartialCharge(mol),
            Descriptors.MinPartialCharge(mol),
            Descriptors.MolWt(mol),
            Descriptors.NumRadicalElectrons(mol),
            Descriptors.NumValenceElectrons(mol),
            EState.EState.MaxAbsEStateIndex(mol),
            EState.EState.MaxEStateIndex(mol),
            EState.EState.MinAbsEStateIndex(mol),
            EState.EState.MinEStateIndex(mol),
            EState.EState_VSA.EState_VSA1(mol),
            EState.EState_VSA.EState_VSA10(mol),
            EState.EState_VSA.EState_VSA11(mol),
            EState.EState_VSA.EState_VSA2(mol),
            EState.EState_VSA.EState_VSA3(mol),
            EState.EState_VSA.EState_VSA4(mol),
            EState.EState_VSA.EState_VSA5(mol),
            EState.EState_VSA.EState_VSA6(mol),
            EState.EState_VSA.EState_VSA7(mol),
            EState.EState_VSA.EState_VSA8(mol),
            EState.EState_VSA.EState_VSA9(mol),
            Fragments.fr_Al_COO(mol),
            Fragments.fr_Al_OH(mol),
            Fragments.fr_Al_OH_noTert(mol),
            Fragments.fr_aldehyde(mol),
            Fragments.fr_alkyl_carbamate(mol),
            Fragments.fr_alkyl_halide(mol),
            Fragments.fr_allylic_oxid(mol),
            Fragments.fr_amide(mol),
            Fragments.fr_amidine(mol),
            Fragments.fr_aniline(mol),
            Fragments.fr_Ar_COO(mol),
            Fragments.fr_Ar_N(mol),
            Fragments.fr_Ar_NH(mol),
            Fragments.fr_Ar_OH(mol),
            Fragments.fr_ArN(mol),
            Fragments.fr_aryl_methyl(mol),
            Fragments.fr_azide(mol),
            Fragments.fr_azo(mol),
            Fragments.fr_barbitur(mol),
            Fragments.fr_benzene(mol),
            Fragments.fr_benzodiazepine(mol),
            Fragments.fr_bicyclic(mol),
            Fragments.fr_C_O(mol),
            Fragments.fr_C_O_noCOO(mol),
            Fragments.fr_C_S(mol),
            Fragments.fr_COO(mol),
            Fragments.fr_COO2(mol),
            Fragments.fr_diazo(mol),
            Fragments.fr_dihydropyridine(mol),
            Fragments.fr_epoxide(mol),
            Fragments.fr_ester(mol),
            Fragments.fr_ether(mol),
            Fragments.fr_furan(mol),
            Fragments.fr_guanido(mol),
            Fragments.fr_halogen(mol),
            Fragments.fr_hdrzine(mol),
            Fragments.fr_hdrzone(mol),
            Fragments.fr_HOCCN(mol),
            Fragments.fr_imidazole(mol),
            Fragments.fr_imide(mol),
            Fragments.fr_Imine(mol),
            Fragments.fr_isocyan(mol),
            Fragments.fr_isothiocyan(mol),
            Fragments.fr_ketone(mol),
            Fragments.fr_ketone_Topliss(mol),
            Fragments.fr_lactam(mol),
            Fragments.fr_lactone(mol),
            Fragments.fr_methoxy(mol),
            Fragments.fr_morpholine(mol),
            Fragments.fr_N_O(mol),
            Fragments.fr_Ndealkylation1(mol),
            Fragments.fr_Ndealkylation2(mol),
            Fragments.fr_NH0(mol),
            Fragments.fr_NH1(mol),
            Fragments.fr_NH2(mol),
            Fragments.fr_Nhpyrrole(mol),
            Fragments.fr_nitrile(mol),
            Fragments.fr_nitro(mol),
            Fragments.fr_nitro_arom(mol),
            Fragments.fr_nitro_arom_nonortho(mol),
            Fragments.fr_nitroso(mol),
            Fragments.fr_oxazole(mol),
            Fragments.fr_oxime(mol),
            Fragments.fr_para_hydroxylation(mol),
            Fragments.fr_phenol(mol),
            Fragments.fr_phenol_noOrthoHbond(mol),
            Fragments.fr_phos_acid(mol),
            Fragments.fr_phos_ester(mol),
            Fragments.fr_piperdine(mol),
            Fragments.fr_piperzine(mol),
            Fragments.fr_priamide(mol),
            Fragments.fr_prisulfonamd(mol),
            Fragments.fr_pyridine(mol),
            Fragments.fr_quatN(mol),
            Fragments.fr_SH(mol),
            Fragments.fr_sulfide(mol),
            Fragments.fr_sulfonamd(mol),
            Fragments.fr_sulfone(mol),
            Fragments.fr_term_acetylene(mol),
            Fragments.fr_tetrazole(mol),
            Fragments.fr_thiazole(mol),
            Fragments.fr_thiocyan(mol),
            Fragments.fr_thiophene(mol),
            Fragments.fr_unbrch_alkane(mol),
            Fragments.fr_urea(mol),
            GraphDescriptors.BalabanJ(mol),
            GraphDescriptors.BertzCT(mol),
            GraphDescriptors.Chi0(mol),
            GraphDescriptors.Chi0n(mol),
            GraphDescriptors.Chi0v(mol),
            GraphDescriptors.Chi1(mol),
            GraphDescriptors.Chi1n(mol),
            GraphDescriptors.Chi1v(mol),
            GraphDescriptors.Chi2n(mol),
            GraphDescriptors.Chi2v(mol),
            GraphDescriptors.Chi3n(mol),
            GraphDescriptors.Chi3v(mol),
            GraphDescriptors.Chi4n(mol),
            GraphDescriptors.Chi4v(mol),
            GraphDescriptors.HallKierAlpha(mol),
            GraphDescriptors.Ipc(mol),
            GraphDescriptors.Kappa1(mol),
            GraphDescriptors.Kappa2(mol),
            GraphDescriptors.Kappa3(mol),
            Lipinski.HeavyAtomCount(mol),
            Lipinski.NHOHCount(mol),
            Lipinski.NOCount(mol),
            Lipinski.NumAliphaticCarbocycles(mol),
            Lipinski.NumAliphaticHeterocycles(mol),
            Lipinski.NumAliphaticRings(mol),
            Lipinski.NumAromaticCarbocycles(mol),
            Lipinski.NumAromaticHeterocycles(mol),
            Lipinski.NumAromaticRings(mol),
            Lipinski.NumHAcceptors(mol),
            Lipinski.NumHDonors(mol),
            Lipinski.NumHeteroatoms(mol),
            Lipinski.NumRotatableBonds(mol),
            Lipinski.NumSaturatedCarbocycles(mol),
            Lipinski.NumSaturatedHeterocycles(mol),
            Lipinski.NumSaturatedRings(mol),
            Lipinski.RingCount(mol),
            MolSurf.LabuteASA(mol),
            MolSurf.PEOE_VSA1(mol),
            MolSurf.PEOE_VSA10(mol),
            MolSurf.PEOE_VSA11(mol),
            MolSurf.PEOE_VSA12(mol),
            MolSurf.PEOE_VSA13(mol),
            MolSurf.PEOE_VSA14(mol),
            MolSurf.PEOE_VSA2(mol),
            MolSurf.PEOE_VSA3(mol),
            MolSurf.PEOE_VSA4(mol),
            MolSurf.PEOE_VSA5(mol),
            MolSurf.PEOE_VSA6(mol),
            MolSurf.PEOE_VSA7(mol),
            MolSurf.PEOE_VSA8(mol),
            MolSurf.PEOE_VSA9(mol),
            MolSurf.SlogP_VSA1(mol),
            MolSurf.SlogP_VSA10(mol),
            MolSurf.SlogP_VSA11(mol),
            MolSurf.SlogP_VSA12(mol),
            MolSurf.SlogP_VSA2(mol),
            MolSurf.SlogP_VSA3(mol),
            MolSurf.SlogP_VSA4(mol),
            MolSurf.SlogP_VSA5(mol),
            MolSurf.SlogP_VSA6(mol),
            MolSurf.SlogP_VSA7(mol),
            MolSurf.SlogP_VSA8(mol),
            MolSurf.SlogP_VSA9(mol),
            MolSurf.SMR_VSA1(mol),
            MolSurf.SMR_VSA10(mol),
            MolSurf.SMR_VSA2(mol),
            MolSurf.SMR_VSA3(mol),
            MolSurf.SMR_VSA4(mol),
            MolSurf.SMR_VSA5(mol),
            MolSurf.SMR_VSA6(mol),
            MolSurf.SMR_VSA7(mol),
            MolSurf.SMR_VSA8(mol),
            MolSurf.SMR_VSA9(mol),
            MolSurf.TPSA(mol)
        ]))
    return descriptors
## Bertz TC
tcs = [BertzCT(mol) for mol in tqdm(mols)]
## TPSA
tpsa = [Descriptors.TPSA(mol) for mol in mols]
## QED
qed = []
for mol in tqdm(mols):
    try:
        qed.append(Descriptors.qed(mol))
    except OverflowError:
        pass

## % of sp3 carbons
pct_sp3 = [Lipinski.FractionCSP3(mol) for mol in tqdm(mols)]
## % heteroatoms
pct_hetero = [Lipinski.NumHeteroatoms(mol) / mol.GetNumAtoms() for mol in \
              tqdm(mols)]
## number of rings
rings = [Lipinski.RingCount(mol) for mol in tqdm(mols)]
## SA score
SA = []
for mol in tqdm(mols):
    try:
        SA.append(sascorer.calculateScore(mol))
    except (OverflowError, ZeroDivisionError):
        pass

## NP-likeness
fscore = npscorer.readNPModel()
NP = [npscorer.scoreMol(mol, fscore) for mol in tqdm(mols)]
def calculate_metrics(mol):
    # calculate chemical descriptors
    ## % of sp3 carbons
    pct_sp3 = Lipinski.FractionCSP3(mol)
    ## H bond donors/acceptors
    h_acceptor = Lipinski.NumHAcceptors(mol)
    h_donor = Lipinski.NumHDonors(mol)
    ## number of rotable bonds
    n_bonds = mol.GetNumBonds()
    if n_bonds > 0:
        rot_bonds = Lipinski.NumRotatableBonds(mol) / n_bonds
    else:
        rot_bonds = 0
    ## number of rings, aromatic and aliphatic
    n_rings = Lipinski.RingCount(mol)
    n_rings_ali = Lipinski.NumAliphaticRings(mol)
    n_rings_aro = Lipinski.NumAromaticRings(mol)
    ## number of stereocentres
    Chem.AssignStereochemistry(mol)
    n_stereo = CalcNumAtomStereoCenters(mol)
    ## polarity
    tpsa = Chem.CalcTPSA(mol)
    ## hydrophobicity
    logP = Descriptors.MolLogP(mol)
    ## molecular weight
    mw = Descriptors.MolWt(mol)
    ## in Lipinski space?
    Ro5 = in_Ro5(mol)
    ## % heteroatoms
    n_atoms = len(mol.GetAtoms())
    pct_hetero = Lipinski.NumHeteroatoms(mol) / n_atoms
    ## number of each atom
    symbols = [atom.GetSymbol() for atom in mol.GetAtoms()]
    atom_counts = Counter(symbols)
    ## Murcko scaffolds
    murcko = Chem.MolToSmiles(MurckoScaffold.GetScaffoldForMol(mol))
    ## NP-likeness
    try:
        np_score = calculateNPScore(mol, np_mod)
    except ValueError:
        np_score = None
    ## synthetic accessibility
    try:
        sa_score = calculateSAScore(mol, sa_mod)
    except ValueError:
        sa_score = None
    ## topological complexity
    bertz_idx = BertzCT(mol)
    # create dict
    metrics = {
        '% sp3 carbons': pct_sp3,
        'H bond acceptors': h_acceptor,
        'H bond donors': h_donor,
        '% rotatable bonds': rot_bonds,
        'Rings': n_rings,
        'Rings, aliphatic': n_rings_ali,
        'Rings, aromatic': n_rings_aro,
        'Stereocentres': n_stereo,
        'Topological polar surface area': tpsa,
        'LogP': logP,
        'Molecular weight': mw,
        'Lipinski rule of 5': Ro5,
        '% heteroatoms': pct_hetero,
        'Murcko scaffold': murcko,
        'NP-likeness score': np_score,
        'Synthetic accessibility score': sa_score,
        'Bertz topological complexity': bertz_idx
    }
    # append atom counts
    for key in atom_counts.keys():
        metrics['Atoms with symbol ' + key] = atom_counts[key]
    return (metrics)