def testBalabanJ(self):
    """ test calculation of the Balaban J value

      J values are from Balaban's paper and have had roundoff
      errors and typos corrected.
    """
    data = [  # alkanes
      ('CC', 1.0),
      ('CCC', 1.6330),
      ('CCCC', 1.9747),
      ('CC(C)C', 2.3238),
      ('CCCCC', 2.1906),
      ('CC(C)CC', 2.5396),
      ('CC(C)(C)C', 3.0237),
      ('CCCCCC', 2.3391),
      ('CC(C)CCC', 2.6272),
      ('CCC(C)CC', 2.7542),
      ('CC(C)(C)CC', 3.1685),
      ('CC(C)C(C)C', 2.9935),

      # cycloalkanes
      ('C1CCCCC1', 2.0000),
      ('C1C(C)CCCC1', 2.1229),
      ('C1C(CC)CCCC1', 2.1250),
      ('C1C(C)C(C)CCC1', 2.2794),
      ('C1C(C)CC(C)CC1', 2.2307),
      ('C1C(C)CCC(C)C1', 2.1924),
      ('C1C(CCC)CCCC1', 2.0779),
      ('C1C(C(C)C)CCCC1', 2.2284),
      ('C1C(CC)C(C)CCC1', 2.2973),
      ('C1C(CC)CC(C)CC1', 2.2317),
      ('C1C(CC)CCC(C)C1', 2.1804),
      ('C1C(C)C(C)C(C)CC1', 2.4133),
      ('C1C(C)C(C)CC(C)C1', 2.3462),
      ('C1C(C)CC(C)CC1(C)', 2.3409),
      # aromatics
      ('c1ccccc1', 3.0000),
      ('c1c(C)cccc1', 3.0215),
      ('c1c(CC)cccc1', 2.8321),
      ('c1c(C)c(C)ccc1', 3.1349),
      ('c1c(C)cc(C)cc1', 3.0777),
      ('c1c(C)ccc(C)c1', 3.0325),
      ('c1c(CCC)cccc1', 2.6149),
      ('c1c(C(C)C)cccc1', 2.8483),
      ('c1c(CC)c(C)ccc1', 3.0065),
      ('c1c(CC)cc(C)cc1', 2.9369),
      ('c1c(CC)ccc(C)c1', 2.8816),
      ('c1c(C)c(C)c(C)cc1', 3.2478),
      ('c1c(C)c(C)cc(C)c1', 3.1717),
      ('c1c(C)cc(C)cc1(C)', 3.1657)
    ]
    for smi, res in data:
      m = Chem.MolFromSmiles(smi)
      j = GraphDescriptors.BalabanJ(m, forceDMat=1)
      assert feq(j, res), 'mol %s (J=%f) should have J=%f' % (smi, j, res)
      j = GraphDescriptors.BalabanJ(m)
      assert feq(j, res), 'second pass: mol %s (J=%f) should have J=%f' % (smi, j, res)

    if doLong:
      self.__testDesc('PP_descrs_regress.rest.2.csv', 1, GraphDescriptors.BalabanJ)
    def testIpc(self):
        """ test calculation of Ipc.

   """
        data = [('CCCCC', 1.40564, 11.24511), ('CCC(C)C', 1.37878, 9.65148),
                ('CC(C)(C)C', 0.72193, 3.60964),
                ('CN(CC)CCC', 1.67982, 31.91664),
                ('C1CCCCC1', 1.71997, 34.39946),
                ('CC1CCCCC1', 1.68562, 47.19725),
                ('Cc1ccccc1', 1.68562, 47.19725),
                ('CC(C)=C(C)C', 1.36096, 13.60964), ('C#N', 1.00000, 2.00000),
                ('OC#N', 0.91830, 2.75489)]
        for smi, res1, res2 in data:
            m = Chem.MolFromSmiles(smi)
            Ipc = GraphDescriptors.Ipc(m, forceDMat=1)
            Ipc_avg = GraphDescriptors.Ipc(m, avg=1, forceDMat=1)
            assert feq(Ipc_avg, res1,
                       1e-3), 'mol %s (Ipc_avg=%f) should have Ipc_avg=%f' % (
                           smi, Ipc_avg, res1)
            assert feq(
                Ipc, res2,
                1e-3), 'mol %s (Ipc=%f) should have Ipc=%f' % (smi, Ipc, res2)
            Ipc = GraphDescriptors.Ipc(m)
            Ipc_avg = GraphDescriptors.Ipc(m, avg=1)
            assert feq(
                Ipc_avg, res1, 1e-3
            ), '2nd pass: mol %s (Ipc_avg=%f) should have Ipc_avg=%f' % (
                smi, Ipc_avg, res1)
            assert feq(
                Ipc, res2,
                1e-3), '2nd pass: mol %s (Ipc=%f) should have Ipc=%f' % (
                    smi, Ipc, res2)
Exemple #3
0
def CalculateIpc(mol):
    """
    #################################################################
    This returns the information content of the coefficients of the 
    
    characteristic polynomial of the adjacency matrix of a 
    
    hydrogen-suppressed graph of a molecule.

    'avg = 1' returns the information content divided by the total
    
    population.

    From D. Bonchev & N. Trinajstic, J. Chem. Phys. vol 67,
    
    4517-4533 (1977)
    
     ---->Ipc(log value)
     
    Usage: 
        
        result=CalculateIpc(mol)
        
        Input: mol is a molecule object
        
        Output: result is a numeric value
    #################################################################
    """
    co = GD.Ipc(mol)
    if co == 0:
        return 0
    else:
        return numpy.log10(GD.Ipc(mol))
  def testIpc(self):
    data = [('CCCCC', 1.40564, 11.24511), ('CCC(C)C', 1.37878, 9.65148),
            ('CC(C)(C)C', 0.72193, 3.60964), ('CN(CC)CCC', 1.67982, 31.91664),
            ('C1CCCCC1', 1.71997, 34.39946), ('CC1CCCCC1', 1.68562, 47.19725),
            ('Cc1ccccc1', 1.68562, 47.19725), ('CC(C)=C(C)C', 1.36096, 13.60964),
            ('C#N', 1.00000, 2.00000), ('OC#N', 0.91830, 2.75489)]
    for smi, res1, res2 in data:
      m = Chem.MolFromSmiles(smi)
      Ipc = GraphDescriptors.Ipc(m, forceDMat=1)
      Ipc_avg = GraphDescriptors.Ipc(m, avg=1, forceDMat=1)
      self.assertAlmostEqual(Ipc_avg, res1, delta=1e-3,
                             msg='mol %s (Ipc_avg=%f) should have Ipc_avg=%f' % (smi, Ipc_avg,
                                                                                 res1))
      self.assertAlmostEqual(Ipc, res2, delta=1e-3,
                             msg='mol %s (Ipc=%f) should have Ipc=%f' % (smi, Ipc, res2))

      Ipc = GraphDescriptors.Ipc(m)
      Ipc_avg = GraphDescriptors.Ipc(m, avg=1)
      self.assertAlmostEqual(Ipc_avg, res1, delta=1e-3,
                             msg='2nd pass: mol %s (Ipc_avg=%f) should have Ipc_avg=%f' % (
                               smi, Ipc_avg, res1))
      self.assertAlmostEqual(Ipc, res2, delta=1e-3,
                             msg='2nd pass: mol %s (Ipc=%f) should have Ipc=%f' % (smi, Ipc, res2))

      if doLong:
        self.__testDesc('PP_descrs_regress.csv', 4, GraphDescriptors.Ipc)
        self.__testDesc('PP_descrs_regress.2.csv', 4, GraphDescriptors.Ipc)
Exemple #5
0
 def testIssue125(self):
     # test an issue with calculating BalabanJ
     smi = 'O=C(OC)C1=C(C)NC(C)=C(C(OC)=O)C1C2=CC=CC=C2[N+]([O-])=O'
     m1 = Chem.MolFromSmiles(smi)
     m2 = Chem.MolFromSmiles(smi)
     Chem.MolToSmiles(m1)
     j1 = GraphDescriptors.BalabanJ(m1)
     j2 = GraphDescriptors.BalabanJ(m2)
     assert feq(j1, j2)
Exemple #6
0
def CalculateBertzCT(mol):
    """ 
    #################################################################
    A topological index meant to quantify "complexity" of molecules.

    Consists of a sum of two terms, one representing the complexity
    
    of the bonding, the other representing the complexity of the
    
    distribution of heteroatoms.

    From S. H. Bertz, J. Am. Chem. Soc., vol 103, 3599-3601 (1981)
    
    ---->BertzCT(log value)
    
    Usage: 
        
        result=CalculateBertzCT(mol)
        
        Input: mol is a molecule object
        
        Output: result is a numeric value
    #################################################################
    """
    temp = GD.BertzCT(mol)
    if temp > 0:
        return np.log10(temp)
    else:
        return np.log10(MINVALUE)
Exemple #7
0
  def _testBertzCTLong(self):
   """ test calculation of Bertz 'C(T)' index

    NOTE: this is a backwards compatibility test, because of the changes
      w.r.t. the treatment of aromatic atoms in the new version, we need
      to ignore molecules with aromatic rings...

     
   """
   col = 1
   with open(os.path.join(RDConfig.RDCodeDir,'Chem','test_data','PP_descrs_regress.2.csv'),'r') as inF:
     lineNum=0
     for line in inF:
       lineNum+=1
       if line[0] != '#':
         splitL = line.split(',')
         smi = splitL[0]
         m = Chem.MolFromSmiles(smi)

         assert m,'line %d, smiles: %s'%(lineNum,smi)
         useIt=1
         for atom in m.GetAtoms():
           if atom.GetIsAromatic():
             useIt=0
             break
         if useIt:
           tgtVal = float(splitL[col])
           try:
             val = GraphDescriptors.BertzCT(m)
           except Exception:
             val = 666
           assert feq(val,tgtVal,1e-4),'line %d, mol %s (CT calc = %f) should have CT = %f'%(lineNum,smi,val,tgtVal)
Exemple #8
0
    def testChi0n(self):
        data = [
            ('CCCCCC', 4.828),
            ('CCC(C)CC', 4.992),
            ('CC(C)CCC', 4.992),
            ('CC(C)C(C)C', 5.155),
            ('CC(C)(C)CC', 5.207),
            ('CCCCCO', 4.276),
            ('CCC(O)CC', 4.439),
            ('CC(O)(C)CC', 4.654),
            ('c1ccccc1O', 3.834),
            ('CCCl', 2.085),
            ('CCBr', 2.085),
            ('CCI', 2.085),
        ]
        for smi, res in data:
            m = Chem.MolFromSmiles(smi)
            chi = GraphDescriptors.Chi0n(m)
            assert feq(
                chi, res,
                1e-3), 'mol %s (Chi0n=%f) should have Chi0n=%f' % (smi, chi,
                                                                   res)

        if doLong:
            self.__testDesc('PP_descrs_regress.rest.2.csv', 6,
                            GraphDescriptors.Chi0n)
    def testChi0n(self):
        """ test calculation of Chi0n

    """
        data = [
            ('CCCCCC', 4.828),
            ('CCC(C)CC', 4.992),
            ('CC(C)CCC', 4.992),
            ('CC(C)C(C)C', 5.155),
            ('CC(C)(C)CC', 5.207),
            ('CCCCCO', 4.276),
            ('CCC(O)CC', 4.439),
            ('CC(O)(C)CC', 4.654),
            ('c1ccccc1O', 3.834),
            ('CCCl', 2.085),
            ('CCBr', 2.085),
            ('CCI', 2.085),
        ]
        for smi, res in data:
            m = Chem.MolFromSmiles(smi)
            chi = GraphDescriptors.Chi0n(m)
            assert feq(
                chi, res,
                1e-3), 'mol %s (Chi0n=%f) should have Chi0n=%f' % (smi, chi,
                                                                   res)
Exemple #10
0
def CalculateIpc(mol: Chem.Mol) -> float:
    """Get Bonchev-Trinajstic complexity index.

    Or Ipc.
    From Bonchev D. & Trinajstic N., J. Chem. Phys. (1977) 67,4517-4533.
    """
    return numpy.log10(GD.Ipc(mol))
Exemple #11
0
def CalculateBertzCT(mol: Chem.Mol) -> float:
    """Get Bertz complexity index.

    Or BertzCT.
    From Bertz S. H., J. Am. Chem. Soc. (1981) 103,3599-3601.
    """
    return numpy.log10(GD.BertzCT(mol))
Exemple #12
0
    def testKappa2(self):
        """ test calculation of the Hall-Kier kappa2 value

     corrected data from Tables 5 and 6 of Rev. Comp. Chem. vol 2, 367-422, (1991)

    """
        data = [('[C+2](C)(C)(C)(C)(C)C', 0.667),
                ('[C+](C)(C)(C)(C)(CC)', 1.240), ('C(C)(C)(C)(CCC)', 2.3444),
                ('CC(C)CCCC', 4.167), ('CCCCCCC', 6.000), ('CCCCCC', 5.000),
                ('CCCCCCC', 6.000), ('C1CCCC1', 1.440), ('C1CCCC1C', 1.633),
                ('C1CCCCC1', 2.222), ('C1CCCCCC1', 3.061), ('CCCCC', 4.00),
                ('CC=CCCC', 4.740), ('C1=CN=CN1', 0.884), ('c1ccccc1', 1.606),
                ('c1cnccc1', 1.552), ('n1ccncc1', 1.500), ('CCCCF', 3.930),
                ('CCCCCl', 4.290), ('CCCCBr', 4.480),
                ('CCC(C)C1CCC(C)CC1', 4.133), ('CC(C)CC1CCC(C)CC1', 4.133),
                ('CC(C)C1CCC(C)CCC1', 4.133)]
        for smi, res in data:
            m = Chem.MolFromSmiles(smi)
            kappa = GraphDescriptors.Kappa2(m)
            assert feq(kappa, res,
                       1e-3), 'mol %s (kappa2=%f) should have kappa2=%f' % (
                           smi, kappa, res)

        if doLong:
            self.__testDesc('PP_descrs_regress.rest.2.csv', 32,
                            GraphDescriptors.Kappa2)
 def testChi4n(self):
   data = [('CCCCCC', 0.500), ('CCC(C)CC', 0.289), ('CC(C)CCC', 0.577), ('CC(C)C(C)C', 0.000),
           ('CC(C)(C)CC', 0.000), ('CCCCCO', 0.362), ('CCC(O)CC', 0.289), ('CC(O)(C)CC', 0.000),
           ('c1ccccc1O', 0.428)]
   for smi, res in data:
     m = Chem.MolFromSmiles(smi)
     chi = GraphDescriptors.Chi4n(m)
     assert feq(chi, res, 1e-3), 'mol %s (Chi4n=%f) should have Chi4N=%f' % (smi, chi, res)
 def testChi5v(self):
   data = [('CCCCCC', 0.250), ('CCC(C)CC', 0.000), ('CC(C)CCC', 0.000), ('CC(C)C(C)C', 0.000),
           ('CC(C)(C)CC', 0.000), ('CCCCCO', 0.112), ('CCC(O)CC', 0.000), ('CC(O)(C)CC', 0.000),
           ('c1ccccc1O', 0.242)]
   for smi, res in data:
     m = Chem.MolFromSmiles(smi)
     chi = GraphDescriptors.ChiNv_(m, 5)
     assert feq(chi, res, 1e-3), 'mol %s (Chi5v=%f) should have Chi5V=%f' % (smi, chi, res)
Exemple #15
0
  def testChi3n(self):
    """ test calculation of Chi3n

    """
    data = [('CCCCCC',0.957),('CCC(C)CC',1.394),('CC(C)CCC',0.866),('CC(C)C(C)C',1.333),('CC(C)(C)CC',1.061),
            ('CCCCCO',0.762),('CCC(O)CC',0.943),('CC(O)(C)CC',0.865),('c1ccccc1O',0.756)]
    for smi,res in data:
      m = Chem.MolFromSmiles(smi)
      chi = GraphDescriptors.Chi3n(m)
      assert feq(chi,res,1e-3),'mol %s (Chi3n=%f) should have Chi3N=%f'%(smi,chi,res)
Exemple #16
0
  def testChi2n(self):
    """ test calculation of Chi2n

    """
    data = [('CCCCCC',1.707),('CCC(C)CC',1.922),('CC(C)CCC',2.183),
            ('CC(C)C(C)C',2.488),('CC(C)(C)CC',2.914),
            ('CCCCCO',1.431),('CCC(O)CC',1.470),('CC(O)(C)CC',2.166),('c1ccccc1O',1.336)]
    for smi,res in data:
      m = Chem.MolFromSmiles(smi)
      chi = GraphDescriptors.Chi2n(m)
      assert feq(chi,res,1e-3),'mol %s (Chi2n=%f) should have Chi2N=%f'%(smi,chi,res)
Exemple #17
0
  def testChi1n(self):
    """ test calculation of Chi1n

    """
    data = [('CCCCCC',2.914),('CCC(C)CC',2.808),('CC(C)CCC',2.770),
            ('CC(C)C(C)C',2.643),('CC(C)(C)CC',2.561),
            ('CCCCCO',2.523),('CCC(O)CC',2.489),('CC(O)(C)CC',2.284),('c1ccccc1O',2.134)]
    for smi,res in data:
      m = Chem.MolFromSmiles(smi)
      chi = GraphDescriptors.Chi1n(m)
      assert feq(chi,res,1e-3),'mol %s (Chi1n=%f) should have Chi1N=%f'%(smi,chi,res)
  def testChi2n(self):
    data = [('CCCCCC', 1.707), ('CCC(C)CC', 1.922), ('CC(C)CCC', 2.183), ('CC(C)C(C)C', 2.488),
            ('CC(C)(C)CC', 2.914), ('CCCCCO', 1.431), ('CCC(O)CC', 1.470), ('CC(O)(C)CC', 2.166),
            ('c1ccccc1O', 1.336)]
    for smi, res in data:
      m = Chem.MolFromSmiles(smi)
      chi = GraphDescriptors.Chi2n(m)
      assert feq(chi, res, 1e-3), 'mol %s (Chi2n=%f) should have Chi2N=%f' % (smi, chi, res)

    if doLong:
      self.__testDesc('PP_descrs_regress.rest.2.csv', 11, GraphDescriptors.Chi2n)
  def testChi1v(self):
    data = [('CCCCCC', 2.914), ('CCC(C)CC', 2.808), ('CC(C)CCC', 2.770), ('CC(C)C(C)C', 2.643),
            ('CC(C)(C)CC', 2.561), ('CCCCCO', 2.523), ('CCC(O)CC', 2.489), ('CC(O)(C)CC', 2.284),
            ('c1ccccc1O', 2.134)]
    for smi, res in data:
      m = Chem.MolFromSmiles(smi)
      chi = GraphDescriptors.Chi1v(m)
      assert feq(chi, res, 1e-3), 'mol %s (Chi1v=%f) should have Chi1V=%f' % (smi, chi, res)

    if doLong:
      self.__testDesc('PP_descrs_regress.rest.2.csv', 10, GraphDescriptors.Chi1v)
  def testChi3n(self):
    data = [('CCCCCC', 0.957), ('CCC(C)CC', 1.394), ('CC(C)CCC', 0.866), ('CC(C)C(C)C', 1.333),
            ('CC(C)(C)CC', 1.061), ('CCCCCO', 0.762), ('CCC(O)CC', 0.943), ('CC(O)(C)CC', 0.865),
            ('c1ccccc1O', 0.756)]
    for smi, res in data:
      m = Chem.MolFromSmiles(smi)
      chi = GraphDescriptors.Chi3n(m)
      assert feq(chi, res, 1e-3), 'mol %s (Chi3n=%f) should have Chi3N=%f' % (smi, chi, res)

    if doLong:
      self.__testDesc('PP_descrs_regress.rest.2.csv', 13, GraphDescriptors.Chi3n,
                      molFilter=_skip3rings)
Exemple #21
0
    def testOrderDepend(self):
        data = [('C=CC=C', 21.01955, 2.73205), ('O=CC=O', 25.01955, 2.73205),
                ('FCC(=O)CF', 46.7548875, 2.98816),
                ('O=C1C=CC(=O)C=C1', 148.705216, 2.8265),
                ('C12C(F)=C(O)C(F)C1C(F)=C(O)C(F)2', 315.250442, 2.4509),
                ('C12CC=CCC1C(=O)C3CC=CCC3C(=O)2', 321.539522, 1.95986)]

        for smi, CT, bal in data:
            m = Chem.MolFromSmiles(smi)
            newBal = GraphDescriptors.BalabanJ(m, forceDMat=1)
            assert feq(newBal, bal, 1e-4), 'mol %s %f!=%f' % (smi, newBal, bal)
            m = Chem.MolFromSmiles(smi)
            newCT = GraphDescriptors.BertzCT(m, forceDMat=1)
            assert feq(newCT, CT,
                       1e-4), 'mol %s (CT calc = %f) should have CT = %f' % (
                           smi, newCT, CT)
            m = Chem.MolFromSmiles(smi)
            newCT = GraphDescriptors.BertzCT(m, forceDMat=1)
            assert feq(newCT, CT,
                       1e-4), 'mol %s (CT calc = %f) should have CT = %f' % (
                           smi, newCT, CT)
            newBal = GraphDescriptors.BalabanJ(m, forceDMat=1)
            assert feq(newBal, bal, 1e-4), 'mol %s %f!=%f' % (smi, newBal, bal)

            m = Chem.MolFromSmiles(smi)
            newBal = GraphDescriptors.BalabanJ(m, forceDMat=1)
            assert feq(newBal, bal, 1e-4), 'mol %s %f!=%f' % (smi, newBal, bal)
            newCT = GraphDescriptors.BertzCT(m, forceDMat=1)
            assert feq(newCT, CT,
                       1e-4), 'mol %s (CT calc = %f) should have CT = %f' % (
                           smi, newCT, CT)
Exemple #22
0
def CalculateBertzCT(mol):
    """
    BertzCT index meant to quantify "complexity" of molecules.
        Parameters:
            mol: RDKit molecule object
        Returns:
            BertzCT: BertzCT index
    """
    temp = GD.BertzCT(mol)
    if temp > 0:
        return numpy.log10(temp)
    else:
        return "NaN"
Exemple #23
0
def BertzCT(mol, cutoff=100, dMat=None, forceDMat=1):
    """
    A topological index meant to quantify "complexity" of molecules.

    Consists of a sum of two terms, one representing the complexity
    of the bonding, the other representing the complexity of the
    distribution of heteroatoms.

    Original article: S. H. Bertz, J. Am. Chem. Soc., vol 103, 3599-3601 (1981)   
    
    Wrapper of GraphDescriptors.BertzCT
    """
    return GraphDescriptors.BertzCT(mol, cutoff, dMat, forceDMat)
Exemple #24
0
def CalculateIpc(mol):
    """
    Ipc index is the information for polynomial coefficients
    based information theory.
        Parameters:
            mol: RDKit molecule object
        Returns:
            Ipc: Ipc index
    """
    temp = GD.Ipc(mol)
    if temp > 0:
        return numpy.log10(temp)
    else:
        return "NaN"
  def testKappa1(self):
    """ test calculation of the Hall-Kier kappa1 value

     corrected data from Tables 3 and 6 of Rev. Comp. Chem. vol 2, 367-422, (1991)
    """
    data = [('C12CC2C3CC13', 2.344), ('C1CCC12CC2', 3.061), ('C1CCCCC1', 4.167), ('CCCCCC', 6.000),
            ('CCC(C)C1CCC(C)CC1', 9.091), ('CC(C)CC1CCC(C)CC1', 9.091),
            ('CC(C)C1CCC(C)CCC1', 9.091)]
    for smi, res in data:
      m = Chem.MolFromSmiles(smi)
      kappa = GraphDescriptors.Kappa1(m)
      assert feq(kappa, res, 1e-3), 'mol %s (kappa1=%f) should have kappa1=%f' % (smi, kappa, res)

    if doLong:
      self.__testDesc('PP_descrs_regress.rest.2.csv', 31, GraphDescriptors.Kappa1)
    def testBertzCTShort(self):
        """ test calculation of Bertz 'C(T)' index

   """
        data = [('C=CC=C', 21.01955), ('O=CC=O', 25.01955),
                ('FCC(=O)CF', 46.7548875), ('O=C1C=CC(=O)C=C1', 148.705216),
                ('C12C(F)=C(O)C(F)C1C(F)=C(O)C(F)2', 315.250442),
                ('C12CC=CCC1C(=O)C3CC=CCC3C(=O)2', 321.539522)]

        for smi, CT in data:
            m = Chem.MolFromSmiles(smi)
            newCT = GraphDescriptors.BertzCT(m, forceDMat=1)
            assert feq(newCT, CT,
                       1e-3), 'mol %s (CT calc = %f) should have CT = %f' % (
                           smi, newCT, CT)
Exemple #27
0
def CalculateIpc(mol):
    """
    This returns the information content of the coefficients 
    of the characteristic polynomial of the adjacency matrix 
    of a hydrogen-suppressed graph of a molecule. 
    'avg = 1' returns the information content divided by the total population. 
    From D. Bonchev & N. Trinajstic, J. Chem. Phys. vol 67, 4517-4533 (1977)
    
    log of log values for index
    """
    temp = GD.Ipc(mol)
    if temp <= 0:
        temp = MINVALUE

    res = np.log10(temp) + 8 + MINVALUE
    return np.log(res)
    def testKappa3(self):
        """ test calculation of the Hall-Kier kappa3 value

     corrected data from Tables 3 and 6 of Rev. Comp. Chem. vol 2, 367-422, (1991)

    """
        data = [('C[C+](C)(C)(C)C(C)(C)C', 2.000),
                ('CCC(C)C(C)(C)(CC)', 2.380), ('CCC(C)CC(C)CC', 4.500),
                ('CC(C)CCC(C)CC', 5.878), ('CC(C)CCCC(C)C', 8.000),
                ('CCC(C)C1CCC(C)CC1', 2.500), ('CC(C)CC1CCC(C)CC1', 3.265),
                ('CC(C)C1CCC(C)CCC1', 2.844)]
        for smi, res in data:
            m = Chem.MolFromSmiles(smi)
            kappa = GraphDescriptors.Kappa3(m)
            assert feq(kappa, res,
                       1e-3), 'mol %s (kappa3=%f) should have kappa3=%f' % (
                           smi, kappa, res)
  def testBertzCT(self):
    # test calculation of Bertz 'C(T)' index """
    data = [('C=CC=C', 21.01955), ('O=CC=O', 25.01955), ('FCC(=O)CF', 46.7548875),
            ('O=C1C=CC(=O)C=C1', 148.705216), ('C12C(F)=C(O)C(F)C1C(F)=C(O)C(F)2', 315.250442),
            ('C12CC=CCC1C(=O)C3CC=CCC3C(=O)2', 321.539522)]

    for smi, expected in data:
      m = Chem.MolFromSmiles(smi)
      newCT = GraphDescriptors.BertzCT(m, forceDMat=1)
      self.assertAlmostEqual(newCT, expected, delta=1e-3,
                             msg='mol %s (CT calc = %f) should have CT = %f' %
                             (smi, newCT, expected))

    if doLong:
      # We need to skip molecules with aromatic rings, due to changes in the
      # treatment of aromatic atoms. (Tests pass actually even without the filter!)
      self.__testDesc('PP_descrs_regress.2.csv', 1, GraphDescriptors.BertzCT,
                      molFilter=_hasAromaticAtoms)
Exemple #30
0
logP_model = LogP('logP')

smiles = [smile[:-1] for smile in open('data/smiles/all.txt', 'r').readlines()]

x = []
y = []
z = []

new = []

for smile in smiles:
    mol = Chem.MolFromSmiles(smile)
    fingerprint = rdMolDescriptors.GetHashedAtomPairFingerprintAsBitVect(mol)

    logP = logP_model.run(fingerprint)
    bertz = GraphDescriptors.BertzCT(mol)

    if logP < 10 and bertz < 1000:
        new.append(smile)
        x.append(logP)
        y.append(bertz)
        z.append(Descriptors.ExactMolWt(mol))

new_file = open('data/smiles/filtered.txt', 'w')
for smile in new:
    new_file.write(smile + '\n')

fig = plt.figure()
ax = fig.add_subplot(111, projection='3d')

ax.set_xlabel('logP')