Example #1
0
def get_filter_values(mol):
    """
    calculate the values, for a given molecule, that are used to filter
    return as a dictionary
    """

    assert isinstance(mol, Chem.Mol)

    values = {}
    values["MW"] = desc.CalcExactMolWt(mol)
    values["logP"] = crip.MolLogP(mol)
    values["HBA"] = lip.NumHAcceptors(mol)
    values["HBD"] = lip.NumHDonors(mol)
    values["tPSA"] = desc.CalcTPSA(mol)
    values["rot_bonds"] = lip.NumRotatableBonds(mol)
    values["rigid_bonds"] = mol.GetNumBonds() - values["rot_bonds"]  # assume mutual exclusion
    values["num_rings"] = lip.RingCount(mol)
    values["num_hetero_atoms"] = lip.NumHeteroatoms(mol)
    values["charge"] = rdmolops.GetFormalCharge(mol)  # trusting this charge calculation method
    values["num_carbons"], values["num_charges"], values["max_ring_size"] = get_atom_props(mol)
    try:
        values["hc_ratio"] = float(values["num_hetero_atoms"]) / float(values["num_carbons"])
    except ZeroDivisionError:
        values["hc_ratio"] = 100000000  # if there are zero carbons
    values["fc"] = len(list(Brics.FindBRICSBonds(mol)))  # how many BRICS bonds, related to complexity
    values["is_good"] = True  # default to true, but not yet observed
    atoms = [atom.GetSymbol() for atom in mol.GetAtoms()]  # get all the atoms, and make the list unique (only types)
    atoms = set(atoms)
    atoms = list(atoms)
    values["atoms"] = atoms
    values["num_chiral_centers"] = len(Chem.FindMolChiralCenters(mol, includeUnassigned=True))
    values["rejections"] = []  # empty list to store the reasons for rejection

    return values
Example #2
0
def get_descriptors(mol, write=False):
    # Make a copy of the molecule dataframe
    desc = [
        Lipinski.NumAromaticHeterocycles(mol),
        Lipinski.NumAromaticRings(mol),
        Lipinski.NumHDonors(mol),
        Lipinski.RingCount(mol),
        Lipinski.NHOHCount(mol),
        Lipinski.NumHeteroatoms(mol),
        Lipinski.NumAliphaticCarbocycles(mol),
        Lipinski.NumSaturatedCarbocycles(mol),
        Lipinski.NumAliphaticHeterocycles(mol),
        Lipinski.NumHAcceptors(mol),
        Lipinski.NumSaturatedHeterocycles(mol),
        Lipinski.NumAliphaticRings(mol),
        Descriptors.NumRadicalElectrons(mol),
        Descriptors.MaxPartialCharge(mol),
        Descriptors.NumValenceElectrons(mol),
        Lipinski.FractionCSP3(mol),
        Descriptors.MaxAbsPartialCharge(mol),
        Lipinski.NumAromaticCarbocycles(mol),
        Lipinski.NumSaturatedRings(mol),
        Lipinski.NumRotatableBonds(mol)
    ]

    desc = [0 if i != i else i for i in desc]
    return desc
Example #3
0
def PhyChem(smiles):
    """ Calculating the 19D physicochemical descriptors for each molecules,
    the value has been normalized with Gaussian distribution.

    Arguments:
        smiles (list): list of SMILES strings.
    Returns:
        props (ndarray): m X 19 matrix as normalized PhysChem descriptors.
            m is the No. of samples
    """
    props = []
    for smile in smiles:
        mol = Chem.MolFromSmiles(smile)
        try:
            MW = desc.MolWt(mol)
            LOGP = Crippen.MolLogP(mol)
            HBA = Lipinski.NumHAcceptors(mol)
            HBD = Lipinski.NumHDonors(mol)
            rotable = Lipinski.NumRotatableBonds(mol)
            amide = AllChem.CalcNumAmideBonds(mol)
            bridge = AllChem.CalcNumBridgeheadAtoms(mol)
            heteroA = Lipinski.NumHeteroatoms(mol)
            heavy = Lipinski.HeavyAtomCount(mol)
            spiro = AllChem.CalcNumSpiroAtoms(mol)
            FCSP3 = AllChem.CalcFractionCSP3(mol)
            ring = Lipinski.RingCount(mol)
            Aliphatic = AllChem.CalcNumAliphaticRings(mol)
            aromatic = AllChem.CalcNumAromaticRings(mol)
            saturated = AllChem.CalcNumSaturatedRings(mol)
            heteroR = AllChem.CalcNumHeterocycles(mol)
            TPSA = MolSurf.TPSA(mol)
            valence = desc.NumValenceElectrons(mol)
            mr = Crippen.MolMR(mol)
            # charge = AllChem.ComputeGasteigerCharges(mol)
            prop = [
                MW, LOGP, HBA, HBD, rotable, amide, bridge, heteroA, heavy,
                spiro, FCSP3, ring, Aliphatic, aromatic, saturated, heteroR,
                TPSA, valence, mr
            ]
        except Exception:
            print(smile)
            prop = [0] * 19
        props.append(prop)
    props = np.array(props)
    props = Scaler().fit_transform(props)
    return props
Example #4
0
    def __init__(self, *args, **kwargs):
        if len(args) > 2:
            super(Compound, self).__init__(*args, **kwargs)
            return
        mol_as_RDmol = args[0] if len(args) > 0 else None
        if not mol_as_RDmol:
            mol_as_RDmol = kwargs['mol_as_RDmol'] if 'mol_as_RDmol' in kwargs else None
        if not mol_as_RDmol:
            raise RuntimeError("No RDMol specified")
        description = args[1] if len(args) > 1 else None
        if not description:
            description = kwargs['description'] if 'description' in kwargs else ''
        new_kwargs = dict()
        new_kwargs['unique_id'] = self._generate_id()
        new_kwargs['smiles'] = Chem.MolToSmiles(mol_as_RDmol, isomericSmiles=True, canonical=True)
        new_kwargs['inchi'] = Chem.MolToInchi(mol_as_RDmol)
        new_kwargs['inchi_key'] = Chem.InchiToInchiKey(new_kwargs['inchi'])
        new_kwargs['mol_weight_exact'] = Descriptors.ExactMolWt(mol_as_RDmol)
        new_kwargs['heavy_atoms_count'] = Lipinski.HeavyAtomCount(mol_as_RDmol)
        new_kwargs['ring_count'] = Lipinski.RingCount(mol_as_RDmol)
        new_kwargs['mol'] = mol_as_RDmol

        super(Compound, self).__init__(description=description, **new_kwargs)
Example #5
0
    ## logP
    org_logp = [Descriptors.MolLogP(mol) for mol in tqdm(org_mols)]
    ## Bertz TC
    org_tcs = [BertzCT(mol) for mol in tqdm(org_mols)]
    ## TPSA
    org_tpsa = [Descriptors.TPSA(mol) for mol in org_mols]
    ## QED
    org_qed = []
    for mol in org_mols:
        try:
            org_qed.append(Descriptors.qed(mol))
        except OverflowError:
            pass

    ## number of rings
    org_rings1 = [Lipinski.RingCount(mol) for mol in tqdm(org_mols)]
    org_rings2 = [Lipinski.NumAliphaticRings(mol) for mol in tqdm(org_mols)]
    org_rings3 = [Lipinski.NumAromaticRings(mol) for mol in tqdm(org_mols)]
    ## SA score
    org_SA = []
    for mol in tqdm(org_mols):
        try:
            org_SA.append(sascorer.calculateScore(mol))
        except (OverflowError, ZeroDivisionError):
            pass

    ## NP-likeness
    fscore = npscorer.readNPModel()
    org_NP = [npscorer.scoreMol(mol, fscore) for mol in tqdm(org_mols)]
    ## % sp3 carbons
    org_sp3 = [Lipinski.FractionCSP3(mol) for mol in org_mols]
Example #6
0
def calc_rdkit(mol):
    descriptors = pd.Series(
        np.array([
            Crippen.MolLogP(mol),
            Crippen.MolMR(mol),
            Descriptors.FpDensityMorgan1(mol),
            Descriptors.FpDensityMorgan2(mol),
            Descriptors.FpDensityMorgan3(mol),
            Descriptors.FractionCSP3(mol),
            Descriptors.HeavyAtomMolWt(mol),
            Descriptors.MaxAbsPartialCharge(mol),
            Descriptors.MaxPartialCharge(mol),
            Descriptors.MinAbsPartialCharge(mol),
            Descriptors.MinPartialCharge(mol),
            Descriptors.MolWt(mol),
            Descriptors.NumRadicalElectrons(mol),
            Descriptors.NumValenceElectrons(mol),
            EState.EState.MaxAbsEStateIndex(mol),
            EState.EState.MaxEStateIndex(mol),
            EState.EState.MinAbsEStateIndex(mol),
            EState.EState.MinEStateIndex(mol),
            EState.EState_VSA.EState_VSA1(mol),
            EState.EState_VSA.EState_VSA10(mol),
            EState.EState_VSA.EState_VSA11(mol),
            EState.EState_VSA.EState_VSA2(mol),
            EState.EState_VSA.EState_VSA3(mol),
            EState.EState_VSA.EState_VSA4(mol),
            EState.EState_VSA.EState_VSA5(mol),
            EState.EState_VSA.EState_VSA6(mol),
            EState.EState_VSA.EState_VSA7(mol),
            EState.EState_VSA.EState_VSA8(mol),
            EState.EState_VSA.EState_VSA9(mol),
            Fragments.fr_Al_COO(mol),
            Fragments.fr_Al_OH(mol),
            Fragments.fr_Al_OH_noTert(mol),
            Fragments.fr_aldehyde(mol),
            Fragments.fr_alkyl_carbamate(mol),
            Fragments.fr_alkyl_halide(mol),
            Fragments.fr_allylic_oxid(mol),
            Fragments.fr_amide(mol),
            Fragments.fr_amidine(mol),
            Fragments.fr_aniline(mol),
            Fragments.fr_Ar_COO(mol),
            Fragments.fr_Ar_N(mol),
            Fragments.fr_Ar_NH(mol),
            Fragments.fr_Ar_OH(mol),
            Fragments.fr_ArN(mol),
            Fragments.fr_aryl_methyl(mol),
            Fragments.fr_azide(mol),
            Fragments.fr_azo(mol),
            Fragments.fr_barbitur(mol),
            Fragments.fr_benzene(mol),
            Fragments.fr_benzodiazepine(mol),
            Fragments.fr_bicyclic(mol),
            Fragments.fr_C_O(mol),
            Fragments.fr_C_O_noCOO(mol),
            Fragments.fr_C_S(mol),
            Fragments.fr_COO(mol),
            Fragments.fr_COO2(mol),
            Fragments.fr_diazo(mol),
            Fragments.fr_dihydropyridine(mol),
            Fragments.fr_epoxide(mol),
            Fragments.fr_ester(mol),
            Fragments.fr_ether(mol),
            Fragments.fr_furan(mol),
            Fragments.fr_guanido(mol),
            Fragments.fr_halogen(mol),
            Fragments.fr_hdrzine(mol),
            Fragments.fr_hdrzone(mol),
            Fragments.fr_HOCCN(mol),
            Fragments.fr_imidazole(mol),
            Fragments.fr_imide(mol),
            Fragments.fr_Imine(mol),
            Fragments.fr_isocyan(mol),
            Fragments.fr_isothiocyan(mol),
            Fragments.fr_ketone(mol),
            Fragments.fr_ketone_Topliss(mol),
            Fragments.fr_lactam(mol),
            Fragments.fr_lactone(mol),
            Fragments.fr_methoxy(mol),
            Fragments.fr_morpholine(mol),
            Fragments.fr_N_O(mol),
            Fragments.fr_Ndealkylation1(mol),
            Fragments.fr_Ndealkylation2(mol),
            Fragments.fr_NH0(mol),
            Fragments.fr_NH1(mol),
            Fragments.fr_NH2(mol),
            Fragments.fr_Nhpyrrole(mol),
            Fragments.fr_nitrile(mol),
            Fragments.fr_nitro(mol),
            Fragments.fr_nitro_arom(mol),
            Fragments.fr_nitro_arom_nonortho(mol),
            Fragments.fr_nitroso(mol),
            Fragments.fr_oxazole(mol),
            Fragments.fr_oxime(mol),
            Fragments.fr_para_hydroxylation(mol),
            Fragments.fr_phenol(mol),
            Fragments.fr_phenol_noOrthoHbond(mol),
            Fragments.fr_phos_acid(mol),
            Fragments.fr_phos_ester(mol),
            Fragments.fr_piperdine(mol),
            Fragments.fr_piperzine(mol),
            Fragments.fr_priamide(mol),
            Fragments.fr_prisulfonamd(mol),
            Fragments.fr_pyridine(mol),
            Fragments.fr_quatN(mol),
            Fragments.fr_SH(mol),
            Fragments.fr_sulfide(mol),
            Fragments.fr_sulfonamd(mol),
            Fragments.fr_sulfone(mol),
            Fragments.fr_term_acetylene(mol),
            Fragments.fr_tetrazole(mol),
            Fragments.fr_thiazole(mol),
            Fragments.fr_thiocyan(mol),
            Fragments.fr_thiophene(mol),
            Fragments.fr_unbrch_alkane(mol),
            Fragments.fr_urea(mol),
            GraphDescriptors.BalabanJ(mol),
            GraphDescriptors.BertzCT(mol),
            GraphDescriptors.Chi0(mol),
            GraphDescriptors.Chi0n(mol),
            GraphDescriptors.Chi0v(mol),
            GraphDescriptors.Chi1(mol),
            GraphDescriptors.Chi1n(mol),
            GraphDescriptors.Chi1v(mol),
            GraphDescriptors.Chi2n(mol),
            GraphDescriptors.Chi2v(mol),
            GraphDescriptors.Chi3n(mol),
            GraphDescriptors.Chi3v(mol),
            GraphDescriptors.Chi4n(mol),
            GraphDescriptors.Chi4v(mol),
            GraphDescriptors.HallKierAlpha(mol),
            GraphDescriptors.Ipc(mol),
            GraphDescriptors.Kappa1(mol),
            GraphDescriptors.Kappa2(mol),
            GraphDescriptors.Kappa3(mol),
            Lipinski.HeavyAtomCount(mol),
            Lipinski.NHOHCount(mol),
            Lipinski.NOCount(mol),
            Lipinski.NumAliphaticCarbocycles(mol),
            Lipinski.NumAliphaticHeterocycles(mol),
            Lipinski.NumAliphaticRings(mol),
            Lipinski.NumAromaticCarbocycles(mol),
            Lipinski.NumAromaticHeterocycles(mol),
            Lipinski.NumAromaticRings(mol),
            Lipinski.NumHAcceptors(mol),
            Lipinski.NumHDonors(mol),
            Lipinski.NumHeteroatoms(mol),
            Lipinski.NumRotatableBonds(mol),
            Lipinski.NumSaturatedCarbocycles(mol),
            Lipinski.NumSaturatedHeterocycles(mol),
            Lipinski.NumSaturatedRings(mol),
            Lipinski.RingCount(mol),
            MolSurf.LabuteASA(mol),
            MolSurf.PEOE_VSA1(mol),
            MolSurf.PEOE_VSA10(mol),
            MolSurf.PEOE_VSA11(mol),
            MolSurf.PEOE_VSA12(mol),
            MolSurf.PEOE_VSA13(mol),
            MolSurf.PEOE_VSA14(mol),
            MolSurf.PEOE_VSA2(mol),
            MolSurf.PEOE_VSA3(mol),
            MolSurf.PEOE_VSA4(mol),
            MolSurf.PEOE_VSA5(mol),
            MolSurf.PEOE_VSA6(mol),
            MolSurf.PEOE_VSA7(mol),
            MolSurf.PEOE_VSA8(mol),
            MolSurf.PEOE_VSA9(mol),
            MolSurf.SlogP_VSA1(mol),
            MolSurf.SlogP_VSA10(mol),
            MolSurf.SlogP_VSA11(mol),
            MolSurf.SlogP_VSA12(mol),
            MolSurf.SlogP_VSA2(mol),
            MolSurf.SlogP_VSA3(mol),
            MolSurf.SlogP_VSA4(mol),
            MolSurf.SlogP_VSA5(mol),
            MolSurf.SlogP_VSA6(mol),
            MolSurf.SlogP_VSA7(mol),
            MolSurf.SlogP_VSA8(mol),
            MolSurf.SlogP_VSA9(mol),
            MolSurf.SMR_VSA1(mol),
            MolSurf.SMR_VSA10(mol),
            MolSurf.SMR_VSA2(mol),
            MolSurf.SMR_VSA3(mol),
            MolSurf.SMR_VSA4(mol),
            MolSurf.SMR_VSA5(mol),
            MolSurf.SMR_VSA6(mol),
            MolSurf.SMR_VSA7(mol),
            MolSurf.SMR_VSA8(mol),
            MolSurf.SMR_VSA9(mol),
            MolSurf.TPSA(mol)
        ]))
    return descriptors
tpsa = [Descriptors.TPSA(mol) for mol in mols]
## QED
qed = []
for mol in tqdm(mols):
    try:
        qed.append(Descriptors.qed(mol))
    except OverflowError:
        pass

## % of sp3 carbons
pct_sp3 = [Lipinski.FractionCSP3(mol) for mol in tqdm(mols)]
## % heteroatoms
pct_hetero = [Lipinski.NumHeteroatoms(mol) / mol.GetNumAtoms() for mol in \
              tqdm(mols)]
## number of rings
rings = [Lipinski.RingCount(mol) for mol in tqdm(mols)]
## SA score
SA = []
for mol in tqdm(mols):
    try:
        SA.append(sascorer.calculateScore(mol))
    except (OverflowError, ZeroDivisionError):
        pass

## NP-likeness
fscore = npscorer.readNPModel()
NP = [npscorer.scoreMol(mol, fscore) for mol in tqdm(mols)]

# add all outcomes to data frame
res = res.append(pd.DataFrame({'outcome': 'Molecular weight',
                               'value': mws }))
def calculate_metrics(mol):
    # calculate chemical descriptors
    ## % of sp3 carbons
    pct_sp3 = Lipinski.FractionCSP3(mol)
    ## H bond donors/acceptors
    h_acceptor = Lipinski.NumHAcceptors(mol)
    h_donor = Lipinski.NumHDonors(mol)
    ## number of rotable bonds
    n_bonds = mol.GetNumBonds()
    if n_bonds > 0:
        rot_bonds = Lipinski.NumRotatableBonds(mol) / n_bonds
    else:
        rot_bonds = 0
    ## number of rings, aromatic and aliphatic
    n_rings = Lipinski.RingCount(mol)
    n_rings_ali = Lipinski.NumAliphaticRings(mol)
    n_rings_aro = Lipinski.NumAromaticRings(mol)
    ## number of stereocentres
    Chem.AssignStereochemistry(mol)
    n_stereo = CalcNumAtomStereoCenters(mol)
    ## polarity
    tpsa = Chem.CalcTPSA(mol)
    ## hydrophobicity
    logP = Descriptors.MolLogP(mol)
    ## molecular weight
    mw = Descriptors.MolWt(mol)
    ## in Lipinski space?
    Ro5 = in_Ro5(mol)
    ## % heteroatoms
    n_atoms = len(mol.GetAtoms())
    pct_hetero = Lipinski.NumHeteroatoms(mol) / n_atoms
    ## number of each atom
    symbols = [atom.GetSymbol() for atom in mol.GetAtoms()]
    atom_counts = Counter(symbols)
    ## Murcko scaffolds
    murcko = Chem.MolToSmiles(MurckoScaffold.GetScaffoldForMol(mol))
    ## NP-likeness
    try:
        np_score = calculateNPScore(mol, np_mod)
    except ValueError:
        np_score = None
    ## synthetic accessibility
    try:
        sa_score = calculateSAScore(mol, sa_mod)
    except ValueError:
        sa_score = None
    ## topological complexity
    bertz_idx = BertzCT(mol)
    # create dict
    metrics = {
        '% sp3 carbons': pct_sp3,
        'H bond acceptors': h_acceptor,
        'H bond donors': h_donor,
        '% rotatable bonds': rot_bonds,
        'Rings': n_rings,
        'Rings, aliphatic': n_rings_ali,
        'Rings, aromatic': n_rings_aro,
        'Stereocentres': n_stereo,
        'Topological polar surface area': tpsa,
        'LogP': logP,
        'Molecular weight': mw,
        'Lipinski rule of 5': Ro5,
        '% heteroatoms': pct_hetero,
        'Murcko scaffold': murcko,
        'NP-likeness score': np_score,
        'Synthetic accessibility score': sa_score,
        'Bertz topological complexity': bertz_idx
    }
    # append atom counts
    for key in atom_counts.keys():
        metrics['Atoms with symbol ' + key] = atom_counts[key]
    return (metrics)