Python Stats.MeanAndDev Examples, rdkit.ML.Data.Stats.MeanAndDev Python Examples

Example #1

0

Show file

File: EnrichPlot.py Project: yinxx/rdkit

def MakePlot(details, final, counts, pickVects, nModels, nTrueActs=-1):
    if not hasattr(details, 'plotFile') or not details.plotFile:
        return

    dataFileName = '%s.dat' % (details.plotFile)
    outF = open(dataFileName, 'w+')
    i = 0
    while i < len(final) and counts[i] != 0:
        if nModels > 1:
            _, sd = Stats.MeanAndDev(pickVects[i])
            confInterval = Stats.GetConfidenceInterval(sd,
                                                       len(pickVects[i]),
                                                       level=90)
            outF.write('%d %f %f %d %f\n' %
                       (i + 1, final[i][0] / counts[i],
                        final[i][1] / counts[i], counts[i], confInterval))
        else:
            outF.write('%d %f %f %d\n' % (i + 1, final[i][0] / counts[i],
                                          final[i][1] / counts[i], counts[i]))
        i += 1
    outF.close()
    plotFileName = '%s.gnu' % (details.plotFile)
    gnuF = open(plotFileName, 'w+')
    gnuHdr = """# Generated by EnrichPlot.py version: %s
  set size square 0.7
  set xr [0:]
  set data styl points
  set ylab 'Num Correct Picks'
  set xlab 'Num Picks'
  set grid
  set nokey
  set term postscript enh color solid "Helvetica" 16
  set term X
  """ % (__VERSION_STRING)
    print(gnuHdr, file=gnuF)
    if nTrueActs > 0:
        print('set yr [0:%d]' % nTrueActs, file=gnuF)
    print('plot x with lines', file=gnuF)
    if nModels > 1:
        everyGap = i / 20
        print('replot "%s" using 1:2 with lines,' % (dataFileName),
              end='',
              file=gnuF)
        print('"%s" every %d using 1:2:5 with yerrorbars' %
              (dataFileName, everyGap),
              file=gnuF)
    else:
        print('replot "%s" with points' % (dataFileName), file=gnuF)
    gnuF.close()

    if hasattr(details, 'showPlot') and details.showPlot:
        try:
            from Gnuplot import Gnuplot
            p = Gnuplot()
            p('load "%s"' % (plotFileName))
            input('press return to continue...\n')
        except Exception:
            import traceback
            traceback.print_exc()

Example #2

0

Show file

File: EnrichPlot.py Project: sb123456789sb/rdkit

                 nModels,
                 nTrueActs=nTrueActives)
    else:
        if nModels > 1:
            print(
                '#Index\tAvg_num_correct\tConf90Pct\tAvg_num_picked\tNum_picks\tlast_selection'
            )
        else:
            print(
                '#Index\tAvg_num_correct\tAvg_num_picked\tNum_picks\tlast_selection'
            )

        i = 0
        while i < nPts and counts[i] != 0:
            if nModels > 1:
                mean, sd = Stats.MeanAndDev(pickVects[i])
                confInterval = Stats.GetConfidenceInterval(sd,
                                                           len(pickVects[i]),
                                                           level=90)
                print('%d\t%f\t%f\t%f\t%d\t%s' %
                      (i + 1, final[i][0] / counts[i], confInterval,
                       final[i][1] / counts[i], counts[i], str(selPts[i])))
            else:
                print('%d\t%f\t%f\t%d\t%s' %
                      (i + 1, final[i][0] / counts[i], final[i][1] / counts[i],
                       counts[i], str(selPts[i])))
            i += 1

    mean, sd = Stats.MeanAndDev(halfwayPts)
    print('Halfway point: %.2f(%.2f)' % (mean, sd))

Example #3

0

Show file

def EmbedOne(mol, name, match, pcophore, count=1, silent=0, **kwargs):
    """ generates statistics for a molecule's embeddings

  Four energies are computed for each embedding:
      1) E1: the energy (with constraints) of the initial embedding
      2) E2: the energy (with constraints) of the optimized embedding
      3) E3: the energy (no constraints) the geometry for E2
      4) E4: the energy (no constraints) of the optimized free-molecule
         (starting from the E3 geometry)

  Returns a 9-tuple:
      1) the mean value of E1
      2) the sample standard deviation of E1
      3) the mean value of E2
      4) the sample standard deviation of E2
      5) the mean value of E3
      6) the sample standard deviation of E3
      7) the mean value of E4
      8) the sample standard deviation of E4
      9) The number of embeddings that failed

  """
    global _times
    atomMatch = [list(x.GetAtomIds()) for x in match]
    bm, ms, nFailed = EmbedPharmacophore(mol,
                                         atomMatch,
                                         pcophore,
                                         count=count,
                                         silent=silent,
                                         **kwargs)
    e1s = []
    e2s = []
    e3s = []
    e4s = []
    d12s = []
    d23s = []
    d34s = []
    for m in ms:
        t1 = time.time()
        try:
            e1, e2 = OptimizeMol(m, bm, atomMatch)
        except ValueError:
            pass
        else:
            t2 = time.time()
            _times['opt1'] = _times.get('opt1', 0) + t2 - t1

            e1s.append(e1)
            e2s.append(e2)

            d12s.append(e1 - e2)
            t1 = time.time()
            try:
                e3, e4 = OptimizeMol(m, bm)
            except ValueError:
                pass
            else:
                t2 = time.time()
                _times['opt2'] = _times.get('opt2', 0) + t2 - t1
                e3s.append(e3)
                e4s.append(e4)
                d23s.append(e2 - e3)
                d34s.append(e3 - e4)
        count += 1
    try:
        e1, e1d = Stats.MeanAndDev(e1s)
    except Exception:
        e1 = -1.0
        e1d = -1.0
    try:
        e2, e2d = Stats.MeanAndDev(e2s)
    except Exception:
        e2 = -1.0
        e2d = -1.0
    try:
        e3, e3d = Stats.MeanAndDev(e3s)
    except Exception:
        e3 = -1.0
        e3d = -1.0

    try:
        e4, e4d = Stats.MeanAndDev(e4s)
    except Exception:
        e4 = -1.0
        e4d = -1.0
    if not silent:
        print('%s(%d): %.2f(%.2f) -> %.2f(%.2f) : %.2f(%.2f) -> %.2f(%.2f)' %
              (name, nFailed, e1, e1d, e2, e2d, e3, e3d, e4, e4d))
    return e1, e1d, e2, e2d, e3, e3d, e4, e4d, nFailed

Example #4

0

Show file

def ErrorStats(conn, where, enrich=1):
    fields = (
        'overall_error,holdout_error,overall_result_matrix,' +
        'holdout_result_matrix,overall_correct_conf,overall_incorrect_conf,' +
        'holdout_correct_conf,holdout_incorrect_conf')
    try:
        data = conn.GetData(fields=fields, where=where)
    except Exception:
        import traceback
        traceback.print_exc()
        return None
    nPts = len(data)
    if not nPts:
        sys.stderr.write('no runs found\n')
        return None
    overall = numpy.zeros(nPts, numpy.float)
    overallEnrich = numpy.zeros(nPts, numpy.float)
    oCorConf = 0.0
    oInCorConf = 0.0
    holdout = numpy.zeros(nPts, numpy.float)
    holdoutEnrich = numpy.zeros(nPts, numpy.float)
    hCorConf = 0.0
    hInCorConf = 0.0
    overallMatrix = None
    holdoutMatrix = None
    for i in range(nPts):
        if data[i][0] is not None:
            overall[i] = data[i][0]
            oCorConf += data[i][4]
            oInCorConf += data[i][5]
        if data[i][1] is not None:
            holdout[i] = data[i][1]
            haveHoldout = 1
        else:
            haveHoldout = 0
        tmpOverall = 1. * eval(data[i][2])
        if enrich >= 0:
            overallEnrich[i] = ScreenComposite.CalcEnrichment(tmpOverall,
                                                              tgt=enrich)
        if haveHoldout:
            tmpHoldout = 1. * eval(data[i][3])
            if enrich >= 0:
                holdoutEnrich[i] = ScreenComposite.CalcEnrichment(tmpHoldout,
                                                                  tgt=enrich)
        if overallMatrix is None:
            if data[i][2] is not None:
                overallMatrix = tmpOverall
            if haveHoldout and data[i][3] is not None:
                holdoutMatrix = tmpHoldout
        else:
            overallMatrix += tmpOverall
            if haveHoldout:
                holdoutMatrix += tmpHoldout
        if haveHoldout:
            hCorConf += data[i][6]
            hInCorConf += data[i][7]

    avgOverall = sum(overall) / nPts
    oCorConf /= nPts
    oInCorConf /= nPts
    overallMatrix /= nPts
    oSort = numpy.argsort(overall)
    oMin = overall[oSort[0]]
    overall -= avgOverall
    devOverall = numpy.sqrt(sum(overall**2) / (nPts - 1))
    res = {}
    res['oAvg'] = 100 * avgOverall
    res['oDev'] = 100 * devOverall
    res['oCorrectConf'] = 100 * oCorConf
    res['oIncorrectConf'] = 100 * oInCorConf
    res['oResultMat'] = overallMatrix
    res['oBestIdx'] = oSort[0]
    res['oBestErr'] = 100 * oMin

    if enrich >= 0:
        mean, dev = Stats.MeanAndDev(overallEnrich)
        res['oAvgEnrich'] = mean
        res['oDevEnrich'] = dev

    if haveHoldout:
        avgHoldout = sum(holdout) / nPts
        hCorConf /= nPts
        hInCorConf /= nPts
        holdoutMatrix /= nPts
        hSort = numpy.argsort(holdout)
        hMin = holdout[hSort[0]]
        holdout -= avgHoldout
        devHoldout = numpy.sqrt(sum(holdout**2) / (nPts - 1))
        res['hAvg'] = 100 * avgHoldout
        res['hDev'] = 100 * devHoldout
        res['hCorrectConf'] = 100 * hCorConf
        res['hIncorrectConf'] = 100 * hInCorConf
        res['hResultMat'] = holdoutMatrix
        res['hBestIdx'] = hSort[0]
        res['hBestErr'] = 100 * hMin
        if enrich >= 0:
            mean, dev = Stats.MeanAndDev(holdoutEnrich)
            res['hAvgEnrich'] = mean
            res['hDevEnrich'] = dev
    return res