def calculate_scalar_descriptors(molecule, symbols):
    features = list()
    features.append(rdMD.CalcAsphericity(molecule))
    features += list(rdMD.CalcCrippenDescriptors(molecule))
    features.append(rdMD.CalcExactMolWt(molecule))
    features.append(rdMD.CalcEccentricity(molecule))
    features.append(rdMD.CalcFractionCSP3(molecule))
    features.append(rdMD.CalcLabuteASA(molecule))
    features.append(rdMD.CalcNPR1(molecule))
    features.append(rdMD.CalcNPR2(molecule))
    features.append(rdMD.CalcHallKierAlpha(molecule))

    # elemental distribution
    symbols = np.array(symbols)
    features.append(np.sum(symbols == 'H'))
    features.append(np.sum(symbols == 'C'))
    features.append(np.sum(symbols == 'N'))
    features.append(np.sum(symbols == 'O'))
    features.append(np.sum(symbols == 'F'))

    # ring features
    features.append(rdMD.CalcNumAliphaticCarbocycles(molecule))
    features.append(rdMD.CalcNumAliphaticHeterocycles(molecule))
    features.append(rdMD.CalcNumAromaticCarbocycles(molecule))
    features.append(rdMD.CalcNumAromaticHeterocycles(molecule))
    features.append(rdMD.CalcNumSaturatedCarbocycles(molecule))
    features.append(rdMD.CalcNumSaturatedHeterocycles(molecule))
    features.append(rdMD.CalcNumSpiroAtoms(
        molecule))  # atom shared between rings with one bond
    features.append(rdMD.CalcNumBridgeheadAtoms(
        molecule))  # atom shared between rings with at least two bonds

    # other counts
    features.append(rdMD.CalcNumAmideBonds(molecule))
    features.append(rdMD.CalcNumHBA(molecule))  # number of hydrogen acceptors
    features.append(rdMD.CalcNumHBD(molecule))  # number of hydrogen donors

    return np.array(features)
Beispiel #2
0
    def extract(x, from_smiles):
        if from_smiles:
            mol = Chem.MolFromSmiles(x)
        else:
            mol = x

        if (mol is None) or (len(mol.GetAtoms()) == 0):
            if include_3D:
                return [0] * 29
            else:
                return [0] * 24
        else:
            logP = Crippen.MolLogP(mol)
            refractivity = Crippen.MolMR(mol)

            weight = Descriptors.MolWt(mol)
            exact_weight = Descriptors.ExactMolWt(mol)
            heavy_weight = Descriptors.HeavyAtomMolWt(mol)
            heavy_count = Lipinski.HeavyAtomCount(mol)
            nhoh_count = Lipinski.NHOHCount(mol)
            no_count = Lipinski.NOCount(mol)
            hacceptor_count = Lipinski.NumHAcceptors(mol)
            hdonor_count = Lipinski.NumHDonors(mol)
            hetero_count = Lipinski.NumHeteroatoms(mol)
            rotatable_bond_count = Lipinski.NumRotatableBonds(mol)
            valance_electron_count = Descriptors.NumValenceElectrons(mol)
            amide_bond_count = rdMolDescriptors.CalcNumAmideBonds(mol)
            aliphatic_ring_count = Lipinski.NumAliphaticRings(mol)
            aromatic_ring_count = Lipinski.NumAromaticRings(mol)
            saturated_ring_count = Lipinski.NumSaturatedRings(mol)
            aliphatic_cycle_count = Lipinski.NumAliphaticCarbocycles(mol)
            aliphaticHetero_cycle_count = Lipinski.NumAliphaticHeterocycles(
                mol)
            aromatic_cycle_count = Lipinski.NumAromaticCarbocycles(mol)
            aromaticHetero_cycle_count = Lipinski.NumAromaticHeterocycles(mol)
            saturated_cycle_count = Lipinski.NumSaturatedCarbocycles(mol)
            saturatedHetero_cycle_count = Lipinski.NumSaturatedHeterocycles(
                mol)

            tpsa = rdMolDescriptors.CalcTPSA(mol)

            if include_3D:
                mol_3D = Chem.AddHs(mol)
                AllChem.EmbedMolecule(mol_3D)
                AllChem.MMFFOptimizeMolecule(mol_3D)
                eccentricity = rdMolDescriptors.CalcEccentricity(mol_3D)
                asphericity = rdMolDescriptors.CalcAsphericity(mol_3D)
                spherocity = rdMolDescriptors.CalcSpherocityIndex(mol_3D)
                inertial = rdMolDescriptors.CalcInertialShapeFactor(mol_3D)
                gyration = rdMolDescriptors.CalcRadiusOfGyration(mol_3D)

                return [
                    logP, refractivity, weight, exact_weight, heavy_weight,
                    heavy_count, nhoh_count, no_count, hacceptor_count,
                    hdonor_count, hetero_count, rotatable_bond_count,
                    valance_electron_count, amide_bond_count,
                    aliphatic_ring_count, aromatic_ring_count,
                    saturated_ring_count, aliphatic_cycle_count,
                    aliphaticHetero_cycle_count, aromatic_cycle_count,
                    aromaticHetero_cycle_count, saturated_cycle_count,
                    saturatedHetero_cycle_count, tpsa, eccentricity,
                    asphericity, spherocity, inertial, gyration
                ]
            else:
                return [
                    logP, refractivity, weight, exact_weight, heavy_weight,
                    heavy_count, nhoh_count, no_count, hacceptor_count,
                    hdonor_count, hetero_count, rotatable_bond_count,
                    valance_electron_count, amide_bond_count,
                    aliphatic_ring_count, aromatic_ring_count,
                    saturated_ring_count, aliphatic_cycle_count,
                    aliphaticHetero_cycle_count, aromatic_cycle_count,
                    aromaticHetero_cycle_count, saturated_cycle_count,
                    saturatedHetero_cycle_count, tpsa
                ]
Beispiel #3
0
       https://doi.org/10.1002/9783527618279.ch37

       Definition:
         sqrt(pm3**2 -pm1**2) / pm3**2

    **Arguments**

      - inMol: a molecule

      - confId: (optional) the conformation ID to use

      - useAtomicMasses: (optional) toggles use of atomic masses in the
        calculation. Defaults to True
    """

    Asphericity = lambda *x, **y: rdMolDescriptors.CalcAsphericity(*x, **y)
    Asphericity.version = rdMolDescriptors._CalcAsphericity_version
    Asphericity.__doc__ = """ molecular asphericity

       from Todeschini and Consoni "Descriptors from Molecular Geometry"
       Handbook of Chemoinformatics
       https://doi.org/10.1002/9783527618279.ch37

       Definition:
         0.5 * ((pm3-pm2)**2 + (pm3-pm1)**2 + (pm2-pm1)**2)/(pm1**2+pm2**2+pm3**2)

    **Arguments**

      - inMol: a molecule

      - confId: (optional) the conformation ID to use
Beispiel #4
0
    'CCCCCBr', 'CCCCCCBr', 'CI', 'CCI', 'CCCI', 'CCCCI', 'CCCCCI', 'CCCCCCI',
    'CF', 'CCF', 'CCCF', 'CCCCF', 'CCCCCF', 'CCCCCCF', 'CS', 'CCS', 'CCCS',
    'CCCCS', 'CCCCCS', 'CCCCCCS', 'CN', 'CCN', 'CCCN', 'CCCCN', 'CCCCCN',
    'CCCCCCN'
]
for smi in A:
    m = Chem.MolFromSmiles(smi)
    m = localopt(m, 100)
    #r=get3D(m,True)
    print smi
    print "---------"
    r = rdMD.CalcWHIM(m)
    print "Ei:" + str(r[0]) + "," + str(r[1]) + "," + str(r[2]) + "\n"
    print "Gi:" + str(r[5]) + "," + str(r[6]) + "," + str(r[7]) + "\n"
    print "SI:" + str(rdMD.CalcSpherocityIndex(m))
    print "AS:" + str(rdMD.CalcAsphericity(m))
    print "EX:" + str(rdMD.CalcEccentricity(m))
    for item in r:
        thefile.write("%.3f," % item)
    thefile.write("\n")
    #m.SetProp("smi", smi)
    #writer.write(m)

thefile = open('testBPA.txt', 'w')
writer = Chem.SDWriter('3DBPAmol.sdf')
B = [
    'CN(C)CC(Br)c1ccccc1', 'CN(C)CC(Br)c1ccc(F)cc1', 'CN(C)CC(Br)c1ccc(Cl)cc1',
    'CN(C)CC(Br)c1ccc(Cl)cc1', 'CN(C)CC(Br)c1ccc(I)cc1',
    'CN(C)CC(Br)c1ccc(C)cc1', 'CN(C)CC(Br)c1cccc(F)c1',
    'CN(C)CC(Br)c1cccc(Cl)c1', 'CN(C)CC(Br)c1cccc(Br)c1',
    'CN(C)CC(Br)c1cccc(I)c1', 'CN(C)CC(Br)c1cccc(C)c1',