Exemple #1
0
def test_confidence_slice_pass_confidence_level():
    means = [float(x) for x in range(10)]
    low, mean, high = confidence_slice(means, '0.8')
    assert mean == (4 + 5) / 2.
    assert low == 1
    assert high == 8

    means = [float(x) for x in range(11)]
    low, mean, high = confidence_slice(means, '0.8')
    assert mean == 5
    assert low == 1
    assert high == 9
Exemple #2
0
def confidence_ratio_error_locs(x, y):
    xmeans = x.bootstrap_error_locs()
    ymeans = y.bootstrap_error_locs()
    out = []
    for a, b in zip(xmeans, ymeans):
        out.append((float(a - b) / float(b)) * 100.0)
    return confidence_slice(out, "0.99")
Exemple #3
0
def confidence_ratio_recovery_means(x, y):
    xmeans = x.bootstrap_recovery_means()
    ymeans = y.bootstrap_recovery_means()
    out = []
    for a, b in zip(xmeans, ymeans):
        out.append(float(a / b) * 100.0)
    return confidence_slice(out, "0.99")
Exemple #4
0
    def __init__(self, latex_name, pexecs, num_runs):
        self.latex_name = latex_name
        benches = {}
        for p in pexecs:
            if p.name not in benches:
                benches[p.name] = []
            benches[p.name].append(p)
        self.pexecs = list(benches.values())
        self.num_runs = num_runs
        self.bootstrapped_recovery_means = None
        self.bootstrapped_error_locs = None

        sys.stdout.write("%s: recovery_times..." % latex_name)
        sys.stdout.flush()
        self.recovery_time_mean_ci = confidence_slice(
            self.bootstrap_recovery_means(), "0.99")
        self.recovery_time_median_ci = confidence_slice(
            self.bootstrap_recovery_medians(), "0.99")
        sys.stdout.write(" failure rates...")
        sys.stdout.flush()
        self.failure_rate_ci = confidence_slice(self.bootstrap_failure_rates(),
                                                "0.99")
        sys.stdout.write(" error locations...")
        sys.stdout.flush()
        self.error_locs_ci = confidence_slice(self.bootstrap_error_locs(),
                                              "0.99")
        if latex_name != "\\panic":
            sys.stdout.write(" costs...")
            sys.stdout.flush()
            self.costs_ci = confidence_slice(self.bootstrap_costs(), "0.99")
        sys.stdout.write(" input skipped...")
        sys.stdout.flush()
        self.input_skipped_ci = confidence_slice(
            self.bootstrap_input_skipped(), "0.99")
        print
Exemple #5
0
def test_confidence_slice():
    # Suppose we get back the means:
    means = [x + 15 for x in range(1000)]  # already sorted

    # For a data set of size 1000, we expect alpha/2 to be 25
    # (for a 95% confidence interval)
    alpha_over_two = len(means) * 0.025
    assert (alpha_over_two) == 25

    # Therefore we lose 25 items off each end of the means list.
    # The first 25 indicies are 0, ..., 24, so lower bound should be index 25.
    # The last 25 indicies are -1, ..., -25, so upper bound is index -26
    # Put differently, the last 25 indicies are 999, ..., 975

    lower_index = int(math.floor(alpha_over_two))
    upper_index = int(-math.ceil(alpha_over_two) - 1)
    (lobo, hibo) = (means[lower_index], means[upper_index])

    # Since the data is the index plus 15, we should get an
    # interval: [25+15, 974+15]
    expect = (25 + 15, 974 + 15)
    assert (lobo, hibo) == expect

    # There is strictly speaking no median of 1000 items.
    # We take the mean of the two middle items items 500 and 501 at indicies
    # 499 and 500. Since the data is the index + 15, the middle values are
    # 514 and 515, the mean of which is 514.5
    median = 514.5

    # Check the implementation.
    confrange = confidence_slice(means)
    (got_lobo, got_median, got_hibo) = confrange
    assert confrange.lower == got_lobo
    assert confrange.median == got_median
    assert confrange.upper == got_hibo

    assert got_lobo == lobo
    assert got_hibo == hibo
    assert median == got_median

    assert confrange.error == _mean([median - lobo, hibo - median])
Exemple #6
0
    def bins_errs(run, num_bins, max_error_locs):
        bbins = [[] for _ in range(num_bins)]
        for _ in range(BOOTSTRAP):
            d = []
            for pexecs in run.pexecs:
                pexec = random.choice(pexecs)
                if pexec.succeeded or len(pexec.costs) > 0:
                    if zoom is None or len(pexec.costs) <= zoom:
                        d.append(len(pexec.costs))
            hbins, _ = histogram(d, bins=num_bins, range=(0, max_error_locs))
            for i, cnt in enumerate(hbins):
                bbins[i].append(cnt)

        bins = []
        errs = []
        for bbin in bbins:
            ci = confidence_slice(bbin, "0.99")
            bins.append(ci.median)
            errs.append(int(ci.error))

        return bins, errs
Exemple #7
0
def time_histogram(run, p, budget=RECOVERY_BUDGET):
    bbins = [[] for _ in range(HISTOGRAM_BINS)]
    for _ in range(BOOTSTRAP):
        d = [
            float(random.choice(pexecs).recovery_time) for pexecs in run.pexecs
        ]
        hbins, _ = histogram(d, bins=HISTOGRAM_BINS, range=(0, budget))
        for i, cnt in enumerate(hbins):
            bbins[i].append(cnt)

    bins = []
    errs = []
    for bbin in bbins:
        ci = confidence_slice(bbin, "0.99")
        bins.append(ci.median)
        errs.append(int(ci.error))

    # On PyPy, we want to force a collection before we start running subprocesses,
    # otherwise we can sometimes have consumed so much RAM that they can't run.
    bbins = None
    gc.collect()

    sns.set(style="whitegrid")
    plt.rc('text', usetex=True)
    plt.rc('font', family='sans-serif')
    fig, ax = plt.subplots(figsize=(8, 4))
    plt.bar(range(HISTOGRAM_BINS), bins, yerr=errs, align="center", log=True, color="#777777", \
            error_kw={"ecolor": "black", "elinewidth": 1, "capthick": 0.5, "capsize": 1})
    plt.yscale('symlog')
    ax.set_xlabel('Recovery time (s)')
    ax.set_ylabel('Number of files (log$_{10}$)')
    ax.grid(linewidth=0.25)
    ax.spines['right'].set_visible(False)
    ax.spines['top'].set_visible(False)
    ax.xaxis.set_ticks_position('bottom')
    ax.yaxis.set_ticks_position('left')
    ax.yaxis.set_tick_params(which='minor', size=0)
    ax.yaxis.set_tick_params(which='minor', width=0)
    plt.xlim(xmin=-.2, xmax=HISTOGRAM_BINS)
    plt.ylim(ymin=0, ymax=len(run.pexecs))
    locs = []
    labs = []
    if budget <= 0.5:
        num_labs = 5
    else:
        num_labs = 8
    for i in range(0, num_labs + 1):
        locs.append((HISTOGRAM_BINS / float(num_labs)) * i - 0.5)
        labs.append(i / (float(num_labs) / budget))
    plt.xticks(locs, labs)
    ylocs = []
    ylabs = []
    i = len(run.pexecs)
    while True:
        if i < 1:
            ylocs.append(0)
            ylabs.append(0)
            break
        else:
            ylocs.append(i)
            ylabs.append(i)
        i /= 10
    plt.yticks(ylocs, ylabs)
    formatter = ScalarFormatter()
    formatter.set_scientific(False)
    ax.yaxis.set_major_formatter(formatter)
    plt.tight_layout()
    plt.savefig(p, format="pdf")