Example #1
0
def get_descriptors(mols=None):
    table = pd.DataFrame()
    i = 0
    print('Computing descriptors for {} molecules'.format(len(mols)))
    for mol in mols:
        if mol:
            table.loc[i, 'Smiles'] = Chem.MolToSmiles(mol, True)
            table.loc[i, 'MolWt'] = Descriptors.MolWt(mol)
            table.loc[i, 'LogP'] = Descriptors.MolLogP(mol)
            table.loc[i, 'NumHAcceptors'] = Descriptors.NumHAcceptors(mol)
            table.loc[i, 'NumHDonors'] = Descriptors.NumHDonors(mol)
            table.loc[i, 'NumHeteroatoms'] = Descriptors.NumHeteroatoms(mol)
            table.loc[i,
                      'NumRotableBonds'] = Descriptors.NumRotatableBonds(mol)
            try:
                AllChem.EmbedMolecule(mol, useRandomCoords=True)
                AllChem.MMFFOptimizeMolecule(mol, mmffVariant='MMFF94s')
                table.loc[i, 'TPSA'] = Descriptors.TPSA(mol)
                table.loc[i, 'PMI1'] = Descriptors3D.PMI1(mol)
                table.loc[i, 'PMI2'] = Descriptors3D.PMI2(mol)
                table.loc[i, 'PMI3'] = Descriptors3D.PMI3(mol)
                table.loc[i, 'PBF'] = rdMolDescriptors.CalcPBF(mol)
                table.loc[i, 'NPR1'] = rdMolDescriptors.CalcNPR1(mol)
                table.loc[i, 'NPR2'] = rdMolDescriptors.CalcNPR2(mol)
                table.loc[i, 'ISF'] = Descriptors3D.InertialShapeFactor(mol)
                i = i + 1
            except Exception:
                i = i + 1
                continue

    table.to_csv('Descriptors.csv')
    print('Computed descriptors in file Descriptors.csv')
    return (plot_NPR(table=table))
Example #2
0
    def testGithub2037(self):
        m = Chem.AddHs(Chem.MolFromSmiles("CCCCCCC"))
        cids = AllChem.EmbedMultipleConfs(m, 10)
        # start with defaults (which does not cache results):
        npr1s = []
        npr2s = []
        for cid in cids:
            npr1s.append(rdMD.CalcNPR1(m, confId=cid))
            npr2s.append(rdMD.CalcNPR2(m, confId=cid))
        for i in range(1, len(npr1s)):
            self.assertNotAlmostEqual(npr1s[0], npr1s[i])
            self.assertNotAlmostEqual(npr2s[0], npr2s[i])

        # now ensure that we can cache:
        npr1s = []
        npr2s = []
        for cid in cids:
            npr1s.append(rdMD.CalcNPR1(m, confId=cid, force=False))
            npr2s.append(rdMD.CalcNPR2(m, confId=cid, force=False))
        for i in range(1, len(npr1s)):
            self.assertAlmostEqual(npr1s[0], npr1s[i])
            self.assertAlmostEqual(npr2s[0], npr2s[i])
def calculate_scalar_descriptors(molecule, symbols):
    features = list()
    features.append(rdMD.CalcAsphericity(molecule))
    features += list(rdMD.CalcCrippenDescriptors(molecule))
    features.append(rdMD.CalcExactMolWt(molecule))
    features.append(rdMD.CalcEccentricity(molecule))
    features.append(rdMD.CalcFractionCSP3(molecule))
    features.append(rdMD.CalcLabuteASA(molecule))
    features.append(rdMD.CalcNPR1(molecule))
    features.append(rdMD.CalcNPR2(molecule))
    features.append(rdMD.CalcHallKierAlpha(molecule))

    # elemental distribution
    symbols = np.array(symbols)
    features.append(np.sum(symbols == 'H'))
    features.append(np.sum(symbols == 'C'))
    features.append(np.sum(symbols == 'N'))
    features.append(np.sum(symbols == 'O'))
    features.append(np.sum(symbols == 'F'))

    # ring features
    features.append(rdMD.CalcNumAliphaticCarbocycles(molecule))
    features.append(rdMD.CalcNumAliphaticHeterocycles(molecule))
    features.append(rdMD.CalcNumAromaticCarbocycles(molecule))
    features.append(rdMD.CalcNumAromaticHeterocycles(molecule))
    features.append(rdMD.CalcNumSaturatedCarbocycles(molecule))
    features.append(rdMD.CalcNumSaturatedHeterocycles(molecule))
    features.append(rdMD.CalcNumSpiroAtoms(
        molecule))  # atom shared between rings with one bond
    features.append(rdMD.CalcNumBridgeheadAtoms(
        molecule))  # atom shared between rings with at least two bonds

    # other counts
    features.append(rdMD.CalcNumAmideBonds(molecule))
    features.append(rdMD.CalcNumHBA(molecule))  # number of hydrogen acceptors
    features.append(rdMD.CalcNumHBD(molecule))  # number of hydrogen donors

    return np.array(features)
Example #4
0
def main(args):
    print(rdBase.rdkitVersion)
    input_filename = os.path.basename(args[1]).split('.')[0]
    result_file = "output_" + input_filename

    print(result_file)
    try:
        with open(result_file, 'a') as output:
            header = ['smiles', 'cid', 'npr1', 'npr2']
            output.write((' '.join(header)) + '\n')
            with open(args[1]) as f:
                for line in f:
                    try:
                        smiles, cid = line.strip().split(None, 1)
                        mol = C.MolFromSmiles(smiles)
                        mol = C.AddHs(mol)
                        AllChem.EmbedMolecule(mol,
                                              useExpTorsionAnglePrefs=True,
                                              useBasicKnowledge=True)
                    except Exception as e:
                        continue
                    try:
                        npr1 = round(CD.CalcNPR1(mol), 4)
                        npr2 = round(CD.CalcNPR2(mol), 4)
                    except Exception as e:
                        print("Failed molecules:" + cid)
                        record_error(input_filename, smiles, cid)
                        continue
                    a = [smiles, cid, npr1, npr2]
                    a = map(str, a)
                    #print (' '.join(a))
                    output.write((' '.join(a)) + '\n')
                    #print (' '.join(a))
                f.close()
            output.close()
    except IOError as err:
        print(err)
Example #5
0
        from Sauer and Schwarz JCIM 43:987-1003 (2003)
        https://doi.org/10.1021/ci025599w


    **Arguments**

      - inMol: a molecule

      - confId: (optional) the conformation ID to use

      - useAtomicMasses: (optional) toggles use of atomic masses in the
        calculation. Defaults to True
    """

    NPR2 = lambda *x, **y: rdMolDescriptors.CalcNPR2(*x, **y)
    NPR2.version = rdMolDescriptors._CalcNPR2_version
    NPR2.__doc__ = """ Normalized principal moments ratio 2 (=I2/I3)

        from Sauer and Schwarz JCIM 43:987-1003 (2003)
        https://doi.org/10.1021/ci025599w


    **Arguments**

      - inMol: a molecule

      - confId: (optional) the conformation ID to use

      - useAtomicMasses: (optional) toggles use of atomic masses in the
        calculation. Defaults to True