Python Stats Examples, corestats.Stats Python Examples

Example #1

0

Show file

def get_timer_groups(merged_log):  # get the stats by timer group
    stats_lists = [line.split(',') for line in merged_log]
    uniq_timer_groups = list(
        set((stats_list[10].strip() for stats_list in stats_lists)))
    timer_group_stats = {}
    for timer_group in uniq_timer_groups:
        elapsed_times = []
        for stat_list in stats_lists:
            if timer_group == stat_list[10].strip():
                if stat_list[
                        5] == '200':  # just concerned with valid responses
                    elapsed_times.append(stat_list[8])
        stats = corestats.Stats(elapsed_times)
        stat_group = [
            stats.count(),
            stats.avg(),
            stats.stdev(),
            stats.min(),
            stats.percentile(50),
            stats.percentile(80),
            stats.percentile(90),
            stats.percentile(95),
            stats.percentile(99),
            stats.max()
        ]
        timer_group_stats[timer_group] = stat_group
    return timer_group_stats

Example #2

0

Show file

def find_outliers(vals):
    """Calculates the upper and lower bounds of a set of sample/case values


    >>> find_outliers([3.504, 5.234, 6.123, 7.234, 3.542, 5.341, 7.852, 4.555, 12.537])
    (11.252500000000001, 0.5364999999999993)

    >>> >>> find_outliers([9,12,15,17,31,50,7,5,6,8])
    (32.0, -8.0)

    If there are no vals, returns None for the upper and lower bounds,
    which code that calls it will have to deal with.
    >>> find_outliers([])
    (None, None)

    """

    logger.debug("xerxes vals is:", pf(vals))

    if vals:
        #logger.debug("vals is:", pf(vals))
        stats = corestats.Stats(vals)
        low_hinge = stats.percentile(25)
        up_hinge = stats.percentile(75)
        hstep = 1.5 * (up_hinge - low_hinge)

        upper_bound = up_hinge + hstep
        lower_bound = low_hinge - hstep

    else:
        upper_bound = None
        lower_bound = None

    logger.debug(pf(locals()))
    return upper_bound, lower_bound

Example #3

0

Show file

    def trainForAllFeatures(self):
        #此函数是用来检验去掉低增益的特征后模型的效果，因此在使用前务必将main（）函数中的fileName选为'./features_nomain.csv'（辅眼为准的滤波）
        w, h = self.datafortrain.shape
        kernels = ['linear', 'poly', 'rbf']
        for j in range(3):
            LCC_rbf = [];
            SROCC_rbf = [];
            RMSE_rbf = []
            for i in range(2000):
                print u"第%d次训练" % i
                randlist = range(w)
                np.random.shuffle(randlist)

                trainindex = randlist[0:60]
                testindex = randlist[60:w]

                # if j not in basecolumn:
                #     basecolumn.append(j)
                # if j==0:
                #     basecolumn.remove(j)
                train_feature = self.datafortrain[trainindex, :]
                train_feature = train_feature[:, 1:]
                train_label = self.datafortrain[trainindex, 0]

                test_feature = self.datafortrain[testindex, :]
                test_feature = test_feature[:, 1:]
                test_label = self.datafortrain[testindex, 0]

                '''============= train and predict by svr with rbf kernel ==============='''
                # rbf kernel
                clf = svm.SVR(kernel=kernels[j], C=1000, gamma=0.1)
                pred = clf.fit(train_feature, train_label).predict(test_feature)

                # rbf metrics
                srocc, p1 = statstool.spearmanr(pred, test_label)
                plcc, p2 = statstool.pearsonr(pred, test_label)
                rmse = sqrt(mean_squared_error(pred, test_label))
                LCC_rbf.append(plcc);
                SROCC_rbf.append(srocc);
                RMSE_rbf.append(rmse)

            sts_lcc = corestats.Stats(LCC_rbf)
            sts_srocc = corestats.Stats(SROCC_rbf)
            sts_rmse = corestats.Stats(RMSE_rbf)
            print "all_feature metrics %s:plcc:%f,srocc:%f,rmse:%f " % (
                kernels[j], sts_lcc.avg(), sts_srocc.avg(), sts_rmse.avg())

Example #4

0

Show file

File: printstat.py Project: yuhcaesar/memguard

def main():
    try:
        optlist, args = getopt.getopt(sys.argv[1:], 'd:h',
                                      ["deadline=", "help"])
    except getopt.GetoptError as err:
        print str(err)
        sys.exit(2)

    deadline = 0
    deadline_miss = 0

    for opt, val in optlist:
        if opt in ("-h", "--help"):
            print args[0] + " [-d <deadline (ms)>]"
        elif opt in ("-d", "--deadline"):
            deadline = float(val)
        else:
            assert False, "unhandled option"

    print "deadline: ", deadline

    file1 = open(args[0], 'r')

    items = []
    while (True):
        line = file1.readline()
        if not line:
            break
        tokens = line.split()
        # print tokens
        try:
            num = float(tokens[0])
        except ValueError:
            break
        items[len(items):] = [num]
        if deadline > 0 and num > deadline:
            deadline_miss += 1

    stats = corestats.Stats(items)
    print
    print "----[", args[0], "]---"
    print "count: ", stats.count()
    print "deadline miss: ", deadline_miss
    print "deadline miss ratio: (%f)" % (float(deadline_miss) / stats.count())
    print "min: ", stats.min()
    print "avg: ", stats.avg()
    print "90pctile: ", stats.percentile(90)
    print "95pctile: ", stats.percentile(95)
    print "99pctile: ", stats.percentile(99)
    #print "median: ", stats.median()
    print "max: ", stats.max()
    print "stdev: ", stats.stdev()
    #avg  min max 99pctile
    print "LINE(avg|min|max|99pct|stdev): ", stats.avg(), \
        stats.min(), stats.max(), stats.percentile(99), stats.stdev()

Example #5

0

Show file

File: logdiagnostic.py Project: wafflecopter/BigBrotherBot-For-UrT43

 def stats_per_second(self, *args):
     superstats = []
     for game_log, matrix in args:
         cstats = corestats.Stats()
         stats = {}
         mode = cstats.mode(matrix)
         stats['mode'] = mode[0][0]
         stats['modenext'] = mode[1][0]
         stats['mean'] = cstats.mean(matrix)
         stats['median'] = cstats.median(matrix)
         #stats['harmonicmean'] = mstats.harmonicmean(matrix)
         stats['variance'] = cstats.variance(matrix)
         stats['stddeviation'] = stats['variance']**0.5
         stats['3sigma'] = 3 * stats['stddeviation']
         stats['cumfreq'] = mstats.cumfreq(matrix)
         stats['itemfreq'] = mstats.itemfreq(
             matrix
         )  # frequency of each item (each item being the count of the occurrencies for each number of lines per second)
         stats['min'] = min(matrix)
         stats['max'] = max(matrix)
         stats['samplespace'] = stats['max'] - stats['min']
         stats['count'] = len(matrix)
         stats['kurtosis'] = mstats.kurtosis(matrix)
         stats['perfectvalue'] = int(
             math.ceil(stats['3sigma'] + stats['mean']))
         stats['perfectscore'] = cstats.percentileforvalue(
             matrix, math.ceil(stats['3sigma'] + stats['mean']))
         scorepercentiles = [
             10, 30, 50, 70, 80, 85, 90, 95, 99, 99.9, 99.99
         ]
         stats['itemscore'] = [
             (percentile, cstats.valueforpercentile(matrix, percentile))
             for percentile in scorepercentiles
         ]
         stats['skew'] = mstats.skew(
             matrix
         )  # if positive, there are more smaller than higher values from the mean. If negative, there are more higher than smaller values from the mean.
         if stats['skew'] > 0:
             stats[
                 'skewmeaning'] = 'There exist more smaller values from the mean than higher'
         else:
             stats[
                 'skewmeaning'] = 'There exist more higher values from the mean than smaller'
         superstats.append((game_log, stats))
     return superstats

Example #6

0

Show file

def generate_results(dir, test_name):
    print '\nGenerating Results...'
    try:
        merged_log = open(dir + '/agent_stats.csv', 'rb').readlines(
        )  # this log contains commingled results from all agents
    except IOError:
        sys.stderr.write('ERROR: Can not find your results log file\n')
    merged_error_log = merge_error_files(dir)

    if len(merged_log) == 0:
        fh = open(dir + '/results.html', 'w')
        fh.write(
            r'<html><body><p>None of the agents finished successfully.  There is no data to report.</p></body></html>\n'
        )
        fh.close()
        sys.stdout.write(
            'ERROR: None of the agents finished successfully.  There is no data to report.\n'
        )
        return

    timings = list_timings(merged_log)
    best_times, worst_times = best_and_worst_requests(merged_log)
    timer_group_stats = get_timer_groups(merged_log)
    timing_secs = [int(x[0])
                   for x in timings]  # grab just the secs (rounded-down)
    throughputs = calc_throughputs(timing_secs)  # dict of secs and throughputs

    #save throughputs to file
    fh = open('%s/agent_throughputs.csv' % dir, 'w')
    fh.close()

    for q_tuple in throughputs:
        through = (q_tuple, throughputs[q_tuple])
        f = open('%s/agent_throughputs.csv' % dir, 'a')
        f.write('%s,%f\n' % through)  # log as csv
        f.flush()
        f.close()

    throughput_stats = corestats.Stats(throughputs.values())
    resp_data_set = [x[1] for x in timings]  # grab just the timings
    response_stats = corestats.Stats(resp_data_set)

    # calc the stats and load up a dictionary with the results
    stats_dict = get_stats(response_stats, throughput_stats)

    # get the pickled stats dictionaries we saved
    runtime_stats_dict, workload_dict = load_dat_detail(dir)

    # get the summary stats and load up a dictionary with the results
    summary_dict = {}
    summary_dict['cur_time'] = time.strftime('%m/%d/%Y %H:%M:%S',
                                             time.localtime())
    summary_dict['duration'] = int(timings[-1][0] -
                                   timings[0][0]) + 1  # add 1 to round up
    summary_dict['num_agents'] = workload_dict['num_agents']
    summary_dict['req_count'] = len(timing_secs)
    summary_dict['err_count'] = len(merged_error_log)
    summary_dict['bytes_received'] = calc_bytes(merged_log)

    # write html report
    fh = open(dir + '/results.html', 'w')
    reportwriter.write_head_html(fh)
    reportwriter.write_starting_content(fh, test_name)
    reportwriter.write_summary_results(fh, summary_dict, workload_dict)
    reportwriter.write_stats_tables(fh, stats_dict)
    reportwriter.write_images(fh)
    reportwriter.write_timer_group_stats(fh, timer_group_stats)
    reportwriter.write_agent_detail_table(fh, runtime_stats_dict)
    reportwriter.write_best_worst_requests(fh, best_times, worst_times)
    reportwriter.write_closing_html(fh)
    fh.close()

    try:  # graphing only works on systems with Matplotlib installed
        print 'Generating Graphs...'
        import graph
        graph.resp_graph(timings, dir=dir + '/')
        graph.tp_graph(throughputs, dir=dir + '/')
    except:
        sys.stderr.write('ERROR: Unable to generate graphs with Matplotlib\n')

    print '\nDone generating results. You can view your test at:'
    print '%s/results.html\n' % dir

Example #7

0

Show file

    def lowgaininthebasicfeatures(self):
        w, h = self.datafortrain.shape
        kernels = ['linear', 'poly', 'rbf']
        LCC = [];
        SROCC = [];
        RMSE = [];
        baseplcc = 0;
        basesrocc = 0;
        basermse = 0
        for j in [0, 1, 2, 3, 4, 5, 7, 13, 14, 15]:
            print j
            LCC_rbf = [];
            SROCC_rbf = [];
            RMSE_rbf = []
            for i in range(2000):
                randlist = range(w)
                np.random.shuffle(randlist)

                trainindex = randlist[0:60]
                testindex = randlist[60:w]
                basecolumn = [6, 7, 8, 9, 10, 11, 12, 13, 15, 14]

                if j not in basecolumn:
                    basecolumn.append(j)
                if j == 0:
                    basecolumn.remove(j)
                train_feature = self.datafortrain[trainindex, :]
                train_feature = train_feature[:, basecolumn]
                train_label = self.datafortrain[trainindex, 0]

                test_feature = self.datafortrain[testindex, :]
                test_feature = test_feature[:, basecolumn]
                test_label = self.datafortrain[testindex, 0]

                '''============= train and predict by svr with rbf kernel ==============='''
                # rbf kernel
                clf = svm.SVR(kernel='rbf', C=1000, gamma=1 / 77)
                pred = clf.fit(train_feature, train_label).predict(test_feature)

                # rbf metrics
                srocc, p1 = statstool.spearmanr(pred, test_label)
                plcc, p2 = statstool.pearsonr(pred, test_label)
                rmse = sqrt(mean_squared_error(pred, test_label))
                LCC_rbf.append(plcc);
                SROCC_rbf.append(srocc);
                RMSE_rbf.append(rmse)

            sts_lcc = corestats.Stats(LCC_rbf)
            sts_srocc = corestats.Stats(SROCC_rbf)
            sts_rmse = corestats.Stats(RMSE_rbf)
            LCC.append(sts_lcc.avg());
            SROCC.append(sts_srocc.avg());
            RMSE.append(sts_rmse.avg())
            if j == 0:
                baseplcc = sts_lcc.avg();
                basesrocc = sts_srocc.avg();
                basermse = sts_rmse.avg()
            else:
                basecolumn.remove(j)
        #print "all_feature metrics %s:plcc:%f,srocc:%f,rmse:%f " %(kernels[2],sts_lcc.avg(),sts_srocc.avg(),sts_rmse.avg())

        # LCC_r = [(item-baseplcc)/baseplcc for item in LCC]
        # SROCC_r = [(item-basesrocc)/basesrocc for item in SROCC]
        # RMSE_r = [(item-basermse)/basermse for item in RMSE]

        N = 15

        ind = np.arange(N)  # the x locations for the groups
        width = 0.35  # the width of the bars

        fig, ax = plt.subplots()

        # add some
        ax.set_xlabel(u'特征序号', fontsize=18)
        ax.set_ylabel(u'相关系数和均方差', fontsize=18)
        # ax.set_title('The Correlation between features and mos')
        ax.set_xticks(ind)
        ax.set_xticklabels(('Orig', 'f1', 'f2', 'f3', 'f4', 'f5', 'f7', 'f13', 'f14', 'f15'))
        plt.axhline(y=baseplcc)
        plt.axhline(y=basesrocc)
        plt.axhline(y=basermse)
        plt.plot(LCC, 'b--*', label='PLCC')
        plt.plot(SROCC, 'g--+', label='SROCC')
        plt.plot(RMSE, 'r--o', label='RMSE')
        plt.legend(loc='center right')
        plt.show()