Python Stats Exemples, rdkit.ML.Data.Stats Python Exemples

Exemple #1

0

Afficher le fichier

Fichier : EnrichPlot.py Projet : yinxx/rdkit

def MakePlot(details, final, counts, pickVects, nModels, nTrueActs=-1):
    if not hasattr(details, 'plotFile') or not details.plotFile:
        return

    dataFileName = '%s.dat' % (details.plotFile)
    outF = open(dataFileName, 'w+')
    i = 0
    while i < len(final) and counts[i] != 0:
        if nModels > 1:
            _, sd = Stats.MeanAndDev(pickVects[i])
            confInterval = Stats.GetConfidenceInterval(sd,
                                                       len(pickVects[i]),
                                                       level=90)
            outF.write('%d %f %f %d %f\n' %
                       (i + 1, final[i][0] / counts[i],
                        final[i][1] / counts[i], counts[i], confInterval))
        else:
            outF.write('%d %f %f %d\n' % (i + 1, final[i][0] / counts[i],
                                          final[i][1] / counts[i], counts[i]))
        i += 1
    outF.close()
    plotFileName = '%s.gnu' % (details.plotFile)
    gnuF = open(plotFileName, 'w+')
    gnuHdr = """# Generated by EnrichPlot.py version: %s
  set size square 0.7
  set xr [0:]
  set data styl points
  set ylab 'Num Correct Picks'
  set xlab 'Num Picks'
  set grid
  set nokey
  set term postscript enh color solid "Helvetica" 16
  set term X
  """ % (__VERSION_STRING)
    print(gnuHdr, file=gnuF)
    if nTrueActs > 0:
        print('set yr [0:%d]' % nTrueActs, file=gnuF)
    print('plot x with lines', file=gnuF)
    if nModels > 1:
        everyGap = i / 20
        print('replot "%s" using 1:2 with lines,' % (dataFileName),
              end='',
              file=gnuF)
        print('"%s" every %d using 1:2:5 with yerrorbars' %
              (dataFileName, everyGap),
              file=gnuF)
    else:
        print('replot "%s" with points' % (dataFileName), file=gnuF)
    gnuF.close()

    if hasattr(details, 'showPlot') and details.showPlot:
        try:
            from Gnuplot import Gnuplot
            p = Gnuplot()
            p('load "%s"' % (plotFileName))
            input('press return to continue...\n')
        except Exception:
            import traceback
            traceback.print_exc()

Exemple #2

0

Afficher le fichier

    def testTransform2(self):
        """ testing that rotation of points into PCA frame
    doesn't change dot products

    """
        self.d = numpy.array(
            [[1, 2.068703704, 2.040555556, 2.068703704, 2.141782407, 7.46],
             [2, 1.48537037, -0.186756425, 1.48537037, 1.803819444, 8.16],
             [3, 1.917469136, 0.785465797, 1.917469136, 2.046875, 8.68],
             [4, 2.068703704, 1.125743575, 2.068703704, 2.131944444, 8.89],
             [5, 2.138703704, 1.283243575, 2.138703704, 2.171319444, 9.25],
             [6, 2.152037037, 1.313243575, 2.152037037, 2.178819444, 9.3],
             [7, 1.730740741, 1.457222222, -0.179901738, 1.558449074, 7.52],
             [8, 1.973796296, 1.889320988, 0.792320484, 1.99054784, 8.16],
             [9, 2.058865741, 2.040555556, 1.132598262, 2.141782407, 8.3],
             [10, 2.098240741, 2.110555556, 1.290098262, 2.211782407, 8.4],
             [11, 2.105740741, 2.123888889, 1.320098262, 2.225115741, 8.46],
             [12, 1.390462963, -0.37502803, 0.171950113, 1.652584877, 8.19],
             [13, 1.475532407, -0.223793462, 0.512227891, 1.803819444, 8.57],
             [14, 1.522407407, -0.140460128, 0.699727891, 1.887152778, 8.82],
             [15, 1.822561728, 0.597194192, 0.604048879, 1.895640432, 8.89],
             [16, 1.907631173, 0.74842876, 0.944326657, 2.046875, 8.92],
             [17, 1.954506173, 0.831762094, 1.131826657, 2.130208333, 8.96],
             [18, 1.973796296, 0.93747197, 0.755283447, 1.980709877, 9],
             [19, 2.058865741, 1.088706538, 1.095561224, 2.131944444, 9.35],
             [20, 2.105740741, 1.172039872, 1.283061224, 2.215277778, 9.22],
             [21, 2.189074074, 1.359539872, 1.366394558, 2.262152778, 9.3],
             [22, 2.142199074, 1.276206538, 1.178894558, 2.178819444, 9.52]],
            'd')
        self.d = self.d[:, 1:-2]
        eVals, eVects = Stats.PrincipalComponents(self.d)
        pts = Stats.TransformPoints(eVects, self.d)

        avg = sum(self.d) / len(self.d)
        self.d -= avg

        for i in range(len(pts)):
            for j in range(len(pts)):
                vi = self.d[i]
                vi /= numpy.sqrt(numpy.dot(vi, vi))
                vj = self.d[j]
                vj /= numpy.sqrt(numpy.dot(vj, vj))
                pvi = pts[i]
                pvi /= numpy.sqrt(numpy.dot(pvi, pvi))
                pvj = pts[j]
                pvj /= numpy.sqrt(numpy.dot(pvj, pvj))
                assert feq(numpy.dot(vi, vj), numpy.dot(
                    pvi, pvj)), 'bad dot: %4.4f %4.4f' % (numpy.dot(
                        vi, vj), numpy.dot(pvi, pvj))

Exemple #3

0

Afficher le fichier

Fichier : UnitTestStats.py Projet : Kaziaa/rdkit-1

 def testCorrelation(self):
   # " test the correlation matrix calculation "
   m = Stats.FormCorrelationMatrix(self.d)
   target = numpy.array([[1., 0.66865732, -0.10131374], [0.66865732, 1., -0.28792771],
                         [-0.10131374, -0.28792771, 1.]])
   diff = abs(m - target)
   assert max(diff.ravel()) < FLOAT_TOL, 'correlation matrix incorrect'

Exemple #4

0

Afficher le fichier

def StdDev(mat):
    """ the standard deviation classifier

   This uses _ML.Data.Stats.StandardizeMatrix()_ to do the work

  """
    return Stats.StandardizeMatrix(mat)

Exemple #5

0

Afficher le fichier

 def testTransform(self):
     " test transformation to PCA frame "
     eVals, eVects = Stats.PrincipalComponents(self.d)
     pts = Stats.TransformPoints(eVects, self.d)
     p0 = numpy.array([-1.12488653, -1.84061768, -0.1294482])
     p3 = numpy.array([-3.82295273, -3.09754194, 0.24549203])
     p5 = numpy.array([2.29785176, 3.4726933, -0.36094115])
     assert max(abs(pts[0] -
                    p0)) < FLOAT_TOL, 'p0 comparison failed %s!=%s' % (str(
                        pts[0]), str(p0))
     assert max(abs(pts[3] -
                    p3)) < FLOAT_TOL, 'p3 comparison failed %s!=%s' % (str(
                        pts[3]), str(p3))
     assert max(abs(pts[5] -
                    p5)) < FLOAT_TOL, 'p5 comparison failed %s!=%s' % (str(
                        pts[5]), str(p5))

Exemple #6

0

Afficher le fichier

Fichier : UnitTestStats.py Projet : Kaziaa/rdkit-1

 def testPCA(self):
   # " test the PCA calculation "
   eVals, eVects = Stats.PrincipalComponents(self.d)
   tVals = numpy.array([1.76877414, 0.92707592, 0.30414995])
   tVect = numpy.array([[-0.64200458, -0.66321742, 0.38467229],
                        [0.34166917, 0.20166619, 0.91792861],
                        [-0.68636164, 0.72074503, 0.09713033]])
   assert max((abs(eVals - tVals)).ravel()) < FLOAT_TOL, 'bad variances from PCA'
   for i in range(eVects.shape[0]):
     assert (max((abs(eVects[i] - tVect[i])).ravel()) < FLOAT_TOL or
             max((abs(eVects[i] + tVect[i])).ravel()) < FLOAT_TOL), 'bad vectors from PCA'

Exemple #7

0

Afficher le fichier

Fichier : UnitTestStats.py Projet : Kaziaa/rdkit-1

  def testTransform(self):
    # " test transformation to PCA frame "
    _, eVects = Stats.PrincipalComponents(self.d)
    pts = Stats.TransformPoints(eVects, self.d)
    refPs = [numpy.array([-1.20362098, -1.79265006, 0.08776266]),
             numpy.array([4.63540648, 1.1669869, 0.47026415]),
             numpy.array([0.8709456, -0.50012821, 0.24763993]),
             numpy.array([-3.94140499, -2.88350573, 0.64863041]),
             numpy.array([-0.97015382, 2.42239972, 0.51066736]),
             numpy.array([2.43084762, 3.3115892, -0.77094542]),
             numpy.array([0.74360559, -2.67765459, 0.73974091]),
             numpy.array([-1.2274861, 3.6819975, -0.07856395]),
             numpy.array([-0.4342764, 0.04320715, 0.28202332]),
             numpy.array([-0.903863, -2.77224188, -2.13721937])]

    p0 = refPs[0]
    p3 = refPs[3]
    p5 = refPs[5]
    assert max(abs(pts[0] - p0)) < FLOAT_TOL, 'p0 comparison failed %s!=%s' % (str(pts[0]), str(p0))
    assert max(abs(pts[3] - p3)) < FLOAT_TOL, 'p3 comparison failed %s!=%s' % (str(pts[3]), str(p3))
    assert max(abs(pts[5] - p5)) < FLOAT_TOL, 'p5 comparison failed %s!=%s' % (str(pts[5]), str(p5))

Exemple #8

0

Afficher le fichier

Fichier : UnitTestStats.py Projet : Kaziaa/rdkit-1

  def testCovariance(self):
    # """ test the covariance x calculation
    #  test case from http://www.itl.nist.gov/div898/handbook/pmc/section5/pmc541.htm
    # """

    d = numpy.array([[4., 2, 0.6], [4.2, 2.1, 0.59], [3.9, 2.0, 0.58], [4.3, 2.1, 0.62],
                     [4.1, 2.2, 0.63]])
    m = Stats.FormCovarianceMatrix(d)
    target = numpy.array([[0.025, 0.0075, 0.00175], [0.0075, 0.007, 0.00135],
                          [0.00175, 0.00135, 0.00043]])
    diff = abs(m - target)
    assert max(diff.ravel()) < FLOAT_TOL, 'covariance matrix incorrect'

Exemple #9

0

Afficher le fichier

Fichier : EnrichPlot.py Projet : sb123456789sb/rdkit

                 nModels,
                 nTrueActs=nTrueActives)
    else:
        if nModels > 1:
            print(
                '#Index\tAvg_num_correct\tConf90Pct\tAvg_num_picked\tNum_picks\tlast_selection'
            )
        else:
            print(
                '#Index\tAvg_num_correct\tAvg_num_picked\tNum_picks\tlast_selection'
            )

        i = 0
        while i < nPts and counts[i] != 0:
            if nModels > 1:
                mean, sd = Stats.MeanAndDev(pickVects[i])
                confInterval = Stats.GetConfidenceInterval(sd,
                                                           len(pickVects[i]),
                                                           level=90)
                print('%d\t%f\t%f\t%f\t%d\t%s' %
                      (i + 1, final[i][0] / counts[i], confInterval,
                       final[i][1] / counts[i], counts[i], str(selPts[i])))
            else:
                print('%d\t%f\t%f\t%d\t%s' %
                      (i + 1, final[i][0] / counts[i], final[i][1] / counts[i],
                       counts[i], str(selPts[i])))
            i += 1

    mean, sd = Stats.MeanAndDev(halfwayPts)
    print('Halfway point: %.2f(%.2f)' % (mean, sd))

Exemple #10

0

Afficher le fichier

def EmbedOne(mol, name, match, pcophore, count=1, silent=0, **kwargs):
    """ generates statistics for a molecule's embeddings

  Four energies are computed for each embedding:
      1) E1: the energy (with constraints) of the initial embedding
      2) E2: the energy (with constraints) of the optimized embedding
      3) E3: the energy (no constraints) the geometry for E2
      4) E4: the energy (no constraints) of the optimized free-molecule
         (starting from the E3 geometry)

  Returns a 9-tuple:
      1) the mean value of E1
      2) the sample standard deviation of E1
      3) the mean value of E2
      4) the sample standard deviation of E2
      5) the mean value of E3
      6) the sample standard deviation of E3
      7) the mean value of E4
      8) the sample standard deviation of E4
      9) The number of embeddings that failed

  """
    global _times
    atomMatch = [list(x.GetAtomIds()) for x in match]
    bm, ms, nFailed = EmbedPharmacophore(mol,
                                         atomMatch,
                                         pcophore,
                                         count=count,
                                         silent=silent,
                                         **kwargs)
    e1s = []
    e2s = []
    e3s = []
    e4s = []
    d12s = []
    d23s = []
    d34s = []
    for m in ms:
        t1 = time.time()
        try:
            e1, e2 = OptimizeMol(m, bm, atomMatch)
        except ValueError:
            pass
        else:
            t2 = time.time()
            _times['opt1'] = _times.get('opt1', 0) + t2 - t1

            e1s.append(e1)
            e2s.append(e2)

            d12s.append(e1 - e2)
            t1 = time.time()
            try:
                e3, e4 = OptimizeMol(m, bm)
            except ValueError:
                pass
            else:
                t2 = time.time()
                _times['opt2'] = _times.get('opt2', 0) + t2 - t1
                e3s.append(e3)
                e4s.append(e4)
                d23s.append(e2 - e3)
                d34s.append(e3 - e4)
        count += 1
    try:
        e1, e1d = Stats.MeanAndDev(e1s)
    except Exception:
        e1 = -1.0
        e1d = -1.0
    try:
        e2, e2d = Stats.MeanAndDev(e2s)
    except Exception:
        e2 = -1.0
        e2d = -1.0
    try:
        e3, e3d = Stats.MeanAndDev(e3s)
    except Exception:
        e3 = -1.0
        e3d = -1.0

    try:
        e4, e4d = Stats.MeanAndDev(e4s)
    except Exception:
        e4 = -1.0
        e4d = -1.0
    if not silent:
        print('%s(%d): %.2f(%.2f) -> %.2f(%.2f) : %.2f(%.2f) -> %.2f(%.2f)' %
              (name, nFailed, e1, e1d, e2, e2d, e3, e3d, e4, e4d))
    return e1, e1d, e2, e2d, e3, e3d, e4, e4d, nFailed

Exemple #11

0

Afficher le fichier

def ErrorStats(conn, where, enrich=1):
    fields = (
        'overall_error,holdout_error,overall_result_matrix,' +
        'holdout_result_matrix,overall_correct_conf,overall_incorrect_conf,' +
        'holdout_correct_conf,holdout_incorrect_conf')
    try:
        data = conn.GetData(fields=fields, where=where)
    except Exception:
        import traceback
        traceback.print_exc()
        return None
    nPts = len(data)
    if not nPts:
        sys.stderr.write('no runs found\n')
        return None
    overall = numpy.zeros(nPts, numpy.float)
    overallEnrich = numpy.zeros(nPts, numpy.float)
    oCorConf = 0.0
    oInCorConf = 0.0
    holdout = numpy.zeros(nPts, numpy.float)
    holdoutEnrich = numpy.zeros(nPts, numpy.float)
    hCorConf = 0.0
    hInCorConf = 0.0
    overallMatrix = None
    holdoutMatrix = None
    for i in range(nPts):
        if data[i][0] is not None:
            overall[i] = data[i][0]
            oCorConf += data[i][4]
            oInCorConf += data[i][5]
        if data[i][1] is not None:
            holdout[i] = data[i][1]
            haveHoldout = 1
        else:
            haveHoldout = 0
        tmpOverall = 1. * eval(data[i][2])
        if enrich >= 0:
            overallEnrich[i] = ScreenComposite.CalcEnrichment(tmpOverall,
                                                              tgt=enrich)
        if haveHoldout:
            tmpHoldout = 1. * eval(data[i][3])
            if enrich >= 0:
                holdoutEnrich[i] = ScreenComposite.CalcEnrichment(tmpHoldout,
                                                                  tgt=enrich)
        if overallMatrix is None:
            if data[i][2] is not None:
                overallMatrix = tmpOverall
            if haveHoldout and data[i][3] is not None:
                holdoutMatrix = tmpHoldout
        else:
            overallMatrix += tmpOverall
            if haveHoldout:
                holdoutMatrix += tmpHoldout
        if haveHoldout:
            hCorConf += data[i][6]
            hInCorConf += data[i][7]

    avgOverall = sum(overall) / nPts
    oCorConf /= nPts
    oInCorConf /= nPts
    overallMatrix /= nPts
    oSort = numpy.argsort(overall)
    oMin = overall[oSort[0]]
    overall -= avgOverall
    devOverall = numpy.sqrt(sum(overall**2) / (nPts - 1))
    res = {}
    res['oAvg'] = 100 * avgOverall
    res['oDev'] = 100 * devOverall
    res['oCorrectConf'] = 100 * oCorConf
    res['oIncorrectConf'] = 100 * oInCorConf
    res['oResultMat'] = overallMatrix
    res['oBestIdx'] = oSort[0]
    res['oBestErr'] = 100 * oMin

    if enrich >= 0:
        mean, dev = Stats.MeanAndDev(overallEnrich)
        res['oAvgEnrich'] = mean
        res['oDevEnrich'] = dev

    if haveHoldout:
        avgHoldout = sum(holdout) / nPts
        hCorConf /= nPts
        hInCorConf /= nPts
        holdoutMatrix /= nPts
        hSort = numpy.argsort(holdout)
        hMin = holdout[hSort[0]]
        holdout -= avgHoldout
        devHoldout = numpy.sqrt(sum(holdout**2) / (nPts - 1))
        res['hAvg'] = 100 * avgHoldout
        res['hDev'] = 100 * devHoldout
        res['hCorrectConf'] = 100 * hCorConf
        res['hIncorrectConf'] = 100 * hInCorConf
        res['hResultMat'] = holdoutMatrix
        res['hBestIdx'] = hSort[0]
        res['hBestErr'] = 100 * hMin
        if enrich >= 0:
            mean, dev = Stats.MeanAndDev(holdoutEnrich)
            res['hAvgEnrich'] = mean
            res['hDevEnrich'] = dev
    return res