Ejemplo n.º 1
0
def test_pivotal(self):
    mean = 100
    stdev = 10

    test = np.random.normal(loc=mean, scale=stdev, size=500)
    ctrl = np.random.normal(loc=mean, scale=stdev, size=5000)
    test = test * 1.1

    bsr = bs.bootstrap_ab(test, ctrl, bs_stats.mean, bs_compare.percent_change)

    bsr_percent = bs.bootstrap_ab(test,
                                  ctrl,
                                  bs_stats.mean,
                                  bs_compare.percent_change,
                                  is_pivotal=False)
    self.assertAlmostEqual(bsr.value, bsr_percent.value, delta=.1)

    self.assertAlmostEqual(bsr.lower_bound, bsr_percent.lower_bound, delta=.1)

    self.assertAlmostEqual(bsr.upper_bound, bsr_percent.upper_bound, delta=.1)

    bsr = bs.bootstrap(test, bs_stats.mean)

    bsr_percent = bs.bootstrap(test, bs_stats.mean, num_threads=10)
    self.assertAlmostEqual(bsr.value, bsr_percent.value, delta=.1)

    self.assertAlmostEqual(bsr.lower_bound, bsr_percent.lower_bound, delta=.1)

    self.assertAlmostEqual(bsr.upper_bound, bsr_percent.upper_bound, delta=.1)
Ejemplo n.º 2
0
    def test_bootstrap_ab(self):
        mean = 100
        stdev = 10

        test = np.random.normal(loc=mean, scale=stdev, size=500)
        ctrl = np.random.normal(loc=mean, scale=stdev, size=5000)
        test = test * 1.1

        bsr = bs.bootstrap_ab(test, ctrl, bs_stats.mean,
                              bs_compare.percent_change)
        self.assertAlmostEqual(bsr.value, 10, delta=.5)

        bsr2 = bs.bootstrap_ab(test, ctrl, bs_stats.sum,
                               bs_compare.percent_change)
        self.assertAlmostEqual(bsr2.value, -88, delta=2)

        bsr3 = bs.bootstrap_ab(test,
                               ctrl,
                               bs_stats.sum,
                               bs_compare.percent_change,
                               scale_test_by=10.)
        self.assertAlmostEqual(bsr3.value, 10, delta=.5)

        test_denom = np.random.normal(loc=mean, scale=stdev, size=500)
        ctrl_denom = np.random.normal(loc=mean, scale=stdev, size=5000)
        test_denom = test_denom * 1.1

        bsr4 = bs.bootstrap_ab(test,
                               ctrl,
                               bs_stats.mean,
                               bs_compare.percent_change,
                               test_denominator=test_denom,
                               ctrl_denominator=ctrl_denom)
        self.assertAlmostEqual(bsr4.value, 0, delta=.5)
Ejemplo n.º 3
0
    def test_bootstrap_ab_sparse(self):
        mean = 100
        stdev = 10

        test = np.random.normal(loc=mean, scale=stdev, size=500)
        ctrl = np.random.normal(loc=mean, scale=stdev, size=5000)
        test = test * 1.1
        test_sp = sparse.csr_matrix(test)
        ctrl_sp = sparse.csr_matrix(ctrl)

        bsr = bs.bootstrap_ab(test, ctrl, bs_stats.mean,
                              bs_compare.percent_change)
        bsr_sp = bs.bootstrap_ab(test_sp, ctrl_sp, bs_stats.mean,
                                 bs_compare.percent_change)

        self.assertAlmostEqual(
            bsr.value,
            bsr_sp.value,
            delta=.1,
        )
        self.assertAlmostEqual(
            bsr.upper_bound,
            bsr_sp.upper_bound,
            delta=.1,
        )

        self.assertAlmostEqual(
            bsr.lower_bound,
            bsr_sp.lower_bound,
            delta=.1,
        )
def compute_stat_from_eval_perfs(data_path, n1, n2):
    eval_perfs = np.empty([n1 + n2, 251]) * np.nan
    scores_our = []
    for i, f in enumerate(sorted(os.listdir(data_path))):
        if 'our' in f:
            scores = np.loadtxt(data_path + f)
            for j in range(scores.shape[0]):
                scores_our.append(scores[j])
        else:
            if '2M' in f:
                tmp = np.concatenate(
                    [np.zeros([20, 1]),
                     np.loadtxt(data_path + f)], axis=1)
            else:
                tmp = np.loadtxt(data_path + f)
            if tmp.shape[1] == 252:
                tmp = tmp[:, :-1]
            eval_perfs[i * 20:(i + 1) * n1, :] = tmp

    # compute statistics
    data1_litt = np.nanmean(eval_perfs[:n1][:, -10:], axis=1)
    data2_litt = np.nanmean(eval_perfs[n1:][:, -10:], axis=1)
    ks_litt, p_ks_litt = ks_2samp(data1_litt, data2_litt)
    ttest_litt, p_ttest_litt = ttest_ind(data1_litt,
                                         data2_litt,
                                         equal_var=False)
    data1_our = np.array(scores_our[:n1])
    data2_our = np.array(scores_our[n1:])
    ks_our, p_ks_our = ks_2samp(data1_our, data2_our)
    ttest_our, p_ttest_our = ttest_ind(data1_our, data2_our, equal_var=False)

    # estimation of confidence intervals with bootstrap method, https://github.com/facebookincubator/bootstrapped
    res_litt = bs.bootstrap_ab(data1_litt,
                               data2_litt,
                               bs_stats.mean,
                               bs_compare.difference,
                               num_iterations=10000)
    sign_litt = np.sign(res_litt.upper_bound) == np.sign(res_litt.lower_bound)
    res_our = bs.bootstrap_ab(data1_our,
                              data2_our,
                              bs_stats.mean,
                              bs_compare.difference,
                              num_iterations=10000)
    sign_our = np.sign(res_our.upper_bound) == np.sign(res_our.lower_bound)

    toSave = np.zeros([4, 4])
    toSave[0:2, :] = np.array([[ks_litt, p_ks_litt, ttest_litt, p_ttest_litt],
                               [ks_our, p_ks_our, ttest_our, p_ttest_our]])
    toSave[2, :] = np.array([
        res_litt.value, res_litt.lower_bound, res_litt.upper_bound,
        sign_litt * np.sign(res_litt.lower_bound)
    ])
    toSave[3, :] = np.array([
        res_our.value, res_our.lower_bound, res_our.upper_bound,
        sign_our * np.sign(res_our.lower_bound)
    ])

    np.savetxt(data_path + 'stats', toSave)
Ejemplo n.º 5
0
    def test_bootstrap_batch_size(self):
        mean = 100
        stdev = 10

        test = np.random.normal(loc=mean, scale=stdev, size=500)
        ctrl = np.random.normal(loc=mean, scale=stdev, size=5000)
        test = test * 1.1

        bsr = bs.bootstrap_ab(test, ctrl, bs_stats.mean,
                              bs_compare.percent_change)

        bsr_batch = bs.bootstrap_ab(test, ctrl, bs_stats.mean,
                                    bs_compare.percent_change,
                                    iteration_batch_size=10)
        self.assertAlmostEqual(
            bsr.value,
            bsr_batch.value,
            delta=.1
        )

        self.assertAlmostEqual(
            bsr.lower_bound,
            bsr_batch.lower_bound,
            delta=.1
        )

        self.assertAlmostEqual(
            bsr.upper_bound,
            bsr_batch.upper_bound,
            delta=.1
        )

        bsr = bs.bootstrap(test, bs_stats.mean)

        bsr_batch = bs.bootstrap(test, bs_stats.mean,
                                 iteration_batch_size=10)
        self.assertAlmostEqual(
            bsr.value,
            bsr_batch.value,
            delta=.1
        )

        self.assertAlmostEqual(
            bsr.lower_bound,
            bsr_batch.lower_bound,
            delta=.1
        )

        self.assertAlmostEqual(
            bsr.upper_bound,
            bsr_batch.upper_bound,
            delta=.1
        )
Ejemplo n.º 6
0
def compare_stats(data_path, n1, n2):
    """
    Computes statistical tests to assess whether two algorithms perform statistically differently.
    data_path should include the scores_absolute and scores_final of the two algorithms as such:
    1_scores_final_algo1, 2_scores_final_aglo2, 3_score_absolute_algo1, scores_absolute_algo2
    These files are created by the ''compute_plot_all' function.
    We compute Kolmogorov Smirnov test, the ttest, and a bootstrap ocnfidence interval of the difference in performance
    for the absolute and final metrics.
    """
    print('Running statistical tests..')
    eval_perfs = np.empty([n1+n2,1001])*np.nan # 1001 is the length of an episode
    scores_absolute = []
    scores_final = []
    for i, f in enumerate(sorted(os.listdir(data_path))):
        # print(f)
        if 'absolute' in f:
            scores = np.loadtxt(data_path+f)
            for j in range(scores.shape[0]):
                scores_absolute.append(scores[j])
        if 'final' in f:
            scores = np.loadtxt(data_path+f)
            for j in range(scores.shape[0]):
                scores_final.append(scores[j])

    data1_absolute = np.array(scores_absolute[:n1])
    data2_absolute = np.array(scores_absolute[n1:])
    data1_final = np.array(scores_final[:n1])
    data2_final = np.array(scores_final[n1:])

    ks_final, p_ks_final = ks_2samp(data1_final,data2_final)
    ttest_final, p_ttest_final = ttest_ind(data1_final,data2_final, equal_var=False)

    ks_absolute, p_ks_absolute = ks_2samp(data1_absolute, data2_absolute)
    ttest_absolute, p_ttest_absolute = ttest_ind(data1_absolute, data2_absolute, equal_var=False)

    # estimation of confidence intervals with bootstrap method, https://github.com/facebookincubator/bootstrapped
    res_final = bs.bootstrap_ab(data1_final, data2_final, bs_stats.mean,bs_compare.difference, num_iterations=10000)
    sign_final = np.sign(res_final.upper_bound)==np.sign(res_final.lower_bound)
    res_absolute = bs.bootstrap_ab(data1_absolute, data2_absolute, bs_stats.mean,bs_compare.difference, num_iterations=10000)
    sign_absolute = np.sign(res_absolute.upper_bound) == np.sign(res_absolute.lower_bound)

    toSave=np.zeros([4,4])
    toSave[0:2,:] = np.array([[ks_final, p_ks_final, ttest_final, p_ttest_final],[ks_absolute, p_ks_absolute, ttest_absolute, p_ttest_absolute]])
    toSave[2,:] = np.array([res_final.value,res_final.lower_bound,res_final.upper_bound,sign_final*np.sign(res_final.lower_bound)])
    toSave[3,:] = np.array([res_absolute.value,res_absolute.lower_bound,res_absolute.upper_bound,sign_absolute*np.sign(res_absolute.lower_bound)])

    np.savetxt(data_path + 'stats', toSave)
    print('Done.')
Ejemplo n.º 7
0
    def bootstrap_test(data1, data2, alpha=0.05):
        """
        Wraps around bootstrap test from https://github.com/facebookincubator/bootstrapped/.

        Params
        ------

        - data1 (ndarray of dim 1)
        The performance measures of Algo1.
        - data2 (ndarray of dim 1)
        The performance measures of Algo2.
        - alpha (float in ]0,1[)
        The significance level used by the Welch's t-test.

        """
        data1 = data1.squeeze()
        data2 = data2.squeeze()
        assert alpha <1 and alpha >0, "alpha should be between 0 and 1"

        res = bs.bootstrap_ab(data1, data2, bs_stats.mean, bs_compare.difference, alpha=alpha, num_iterations=10000)
        decision = np.sign(res.upper_bound) == np.sign(res.lower_bound)


        if decision:
            if np.sign(res.upper_bound)<0:
                print("\n\nResult of the bootstrap test at level %02g: μ2>μ1, the test passed with a confidence interval μ1-μ2 in %02g, %02g."
                    % (alpha, res.lower_bound, res.upper_bound))
            else:
                print("\n\nResult of the bootstrap test level %02g: μ1>μ2, the test passed with a confidence interval μ1-μ2 in %02g, %02g."
                    % (alpha, res.lower_bound, res.upper_bound))
        else:
            print("\n\nResults of the bootstrap test level %02g: there is not enough evidence to prove any order relation between μ1 and μ2." % alpha)
        print("Bootstrap test done.")
Ejemplo n.º 8
0
def bootstrapped_mean_difference_interval_for_continuous(
        data1, data2, alpha=0.05):
    """
    Return the difference of bootstrapped means for continuous metric

    Parameters
    ----------
    data1, data2 : One-dimension arrays with [0, 1] values.
    alpha : The alpha value for the confidence intervals.

    Returns
    -------
    bootstrapped_interval : The bootstrap confidence interval for a given distribution.
    """
    data1['denominator'] = 1
    data2['denominator'] = 1
    bootstrapped_interval = bs.bootstrap_ab(
        data1,
        data2,
        stat_func=bs_stats.sum,
        compare_func=bs_compare.difference,
        test_denominator=data1['denominator'],
        ctrl_denominator=data2['denominator'],
        alpha=alpha,
        return_distribution=False)
    return bootstrapped_interval
Ejemplo n.º 9
0
def run_simulation2(data, data2):
    results = []
    for i in range(3000):
        results.append(
            bas.bootstrap_ab(data, data2, bs_stats.mean,
                             bs_compare.percent_change))
    return results
Ejemplo n.º 10
0
def run_test(test_id, data1, data2, alpha=0.05):
    """
    Compute tests comparing data1 and data2 with confidence level alpha
    :param test_id: (str) refers to what test should be used
    :param data1: (np.ndarray) sample 1
    :param data2: (np.ndarray) sample 2
    :param alpha: (float) confidence level of the test
    :return: (bool) if True, the null hypothesis is rejected
    """
    data1 = data1.squeeze()
    data2 = data2.squeeze()
    n1 = data1.size
    n2 = data2.size

    if test_id == 'bootstrap':
        assert alpha < 1 and alpha > 0, "alpha should be between 0 and 1"
        res = bs.bootstrap_ab(data1,
                              data2,
                              bs_stats.mean,
                              bs_compare.difference,
                              alpha=alpha,
                              num_iterations=1000)
        rejection = np.sign(res.upper_bound) == np.sign(res.lower_bound)
        return rejection

    elif test_id == 't-test':
        _, p = ttest_ind(data1, data2, equal_var=True)
        return p < alpha

    elif test_id == "Welch t-test":
        _, p = ttest_ind(data1, data2, equal_var=False)
        return p < alpha

    elif test_id == 'Mann-Whitney':
        _, p = mannwhitneyu(data1, data2, alternative='two-sided')
        return p < alpha

    elif test_id == 'Ranked t-test':
        all_data = np.concatenate([data1.copy(), data2.copy()], axis=0)
        ranks = rankdata(all_data)
        ranks1 = ranks[:n1]
        ranks2 = ranks[n1:n1 + n2]
        assert ranks2.size == n2
        _, p = ttest_ind(ranks1, ranks2, equal_var=True)
        return p < alpha

    elif test_id == 'permutation':
        all_data = np.concatenate([data1.copy(), data2.copy()], axis=0)
        delta = np.abs(data1.mean() - data2.mean())
        num_samples = 1000
        estimates = []
        for _ in range(num_samples):
            estimates.append(run_permutation_test(all_data.copy(), n1, n2))
        estimates = np.abs(np.array(estimates))
        diff_count = len(np.where(estimates <= delta)[0])
        return (1.0 - (float(diff_count) / float(num_samples))) < alpha

    else:
        raise NotImplementedError
Ejemplo n.º 11
0
def run_simulation(data):
    lift = 1.25
    results = []
    for i in range(3000):
        random.shuffle(data)
        test = data[:len(data) / 2] * lift
        ctrl = data[len(data) / 2:]
        results.append(
            bas.bootstrap_ab(test, ctrl, bs_stats.mean,
                             bs_compare.percent_change))
    return results
Ejemplo n.º 12
0
    def empirical_false_pos_rate(data, alpha=0.05):
        """
        Compute and plot empirical estimates of the probability of type-I error given a list of performance measures.
        If this list is of size N_data
        This is done for N=2:floor(N_data/2). Two different tests are used: the bootstrap confidence interval test and the
        Welch's t-test, both with significance level alpha.

        Params
        ------
        - data1 (ndarray of dim 1)
        The performance measures of the considered algorithm.
        - alpha (float in ]0,1[)
        The significance level used by the two tests.
        """
        print('\n\nComputing empirical false positive rate ..')
        data = data.squeeze()
        sizes = range(2, data.size//2)
        nb_reps = 1000
        results = np.zeros([nb_reps, len(sizes), 2])
        blue = [0,0.447,0.7410,1]
        orange = [0.85,0.325,0.098,1]

        for i_n, n in enumerate(sizes):
            print('     N =', n)
            ind = list(range(2*n))
            for rep in range(nb_reps):
                # take two groups of size n in data, at random
                np.random.shuffle(ind)
                sample_1 = data[ind[:n]]
                sample_2 = data[ind[n:2*n]]
                # perform the two-tail Welch's t-test
                results[rep, i_n, 0] = stats.ttest_ind(sample_1, sample_2, equal_var=False)[1] < alpha
                # perform the bootstrap confidence interval test
                res_final = bs.bootstrap_ab(sample_1, sample_2, bs_stats.mean, bs_compare.difference, num_iterations=10000)
                results[rep, i_n, 1] = np.sign(res_final.upper_bound) == np.sign(res_final.lower_bound)

        res_mean = results.mean(axis=0)
        plt.figure(figsize=(16,10), frameon=False)
        plt.plot(sizes, alpha * np.ones(len(sizes)), c='k', linewidth=5, linestyle='--')
        plt.plot(sizes, res_mean[:,0], color=blue, linewidth=4)
        plt.plot(sizes, res_mean[:,1], color=orange, linewidth=4)

        plt.legend([u'α=%02d'%alpha] + ["Welch's $t$-test", 'bootstrap test'])
        plt.xlabel('sample size (N)')
        plt.ylabel('P(false positive)')
        plt.title(u'Estimation of type-I error rate as a function of $N$ when $α=0.05$')
        print("\n   Given N=%i and α=%02g, you can expect false positive rates: \n     For the Welch's t-test: %02g \n     For the bootstrap test: %02g."
            % (data.size //2, alpha, res_mean[-1,0], res_mean[-1,1] ))
        print('Done.')
Ejemplo n.º 13
0
def bootstrap_effect_size(test, ctrl) -> Tuple[float, Tuple[float, float]]:
    """

    Parameters
    ----------
    test
    ctrl

    Returns
    -------
    (effect size, (lower bound, upper bound))
    """
    comp = bs.bootstrap_ab(np.asarray(test),
                           np.asarray(ctrl),
                           bs_stats.median,
                           bs_compare.difference,
                           num_threads=-1)
    return comp.value, (comp.lower_bound, comp.upper_bound)
Ejemplo n.º 14
0
def test_hypothesis(group1, group2, name, group_names):
    group1 = group1.groupby(
            ['sessionID']).median()["rating"].rename(group_names[0])
    group2 = group2.groupby(
            ['sessionID']).median()["rating"].rename(group_names[1])
    likert_plot_hypo(group1, group2, name)
    print("Mittelwert von ", group1.name,
          "-", group1.mean())
    print("Mittelwert von ", group2.name,
          "-", group2.mean())
    bs_result = bs.bootstrap_ab(group1.to_numpy(),
                                group2.to_numpy(),
                                bs_stats.median,
                                bs_compare.percent_change)
    mean_change = bs_compare.percent_change(group1.mean(), group2.mean())
    print("Bootstrap Ergebnis:", bs_result)
    print("Unterschied im Mittelwert von",
          group1.name, "zu", group2.name, "(in Prozent)", mean_change)
    print("Ist der Unterschied signifikant?", bs_result.is_significant())
Ejemplo n.º 15
0
def bootstrapped_mean_difference_distribution_for_continuous(data1, data2):
    """
	Return the distribution of bootstrapped mean difference for continuous metric

	Parameters
    ----------
    data1, data2 : One-dimension arrays.

    Returns
    -------
    bootstrapped_mean_difference : Distribution of the mean difference
	"""
    bootstrapped_mean_difference = bs.bootstrap_ab(
        data1,
        data2,
        stat_func=bs_stats.mean,
        compare_func=bs_compare.difference,
        return_distribution=True)
    return bootstrapped_mean_difference
Ejemplo n.º 16
0
def bootstrapped_mean_difference_interval_for_continuous(
        data1, data2, alpha=0.05):
    """
	Return the difference of bootstrapped means for continuous metric

	Parameters
    ----------
    data1, data2 : One-dimension arrays.
    alpha : The alpha value for the confidence intervals.

    Returns
    -------
    bootstrapped_interval : The bootstrap confidence interval for a given distribution.
	"""
    bootstrapped_interval = bs.bootstrap_ab(data1,
                                            data2,
                                            stat_func=bs_stats.mean,
                                            compare_func=bs_compare.difference,
                                            alpha=alpha,
                                            return_distribution=False)
    return bootstrapped_interval
Ejemplo n.º 17
0
def bootstrapped_mean_difference_distribution_for_binomial(data1, data2):
    """
    Return the distribution of bootstrapped mean difference for binomial metric

    Parameters
    ----------
    data1, data2 : One-dimension arrays with [0, 1] values.

    Returns
    -------
    bootstrapped_mean_difference : Distribution of the mean difference
    """
    data1['denominator'] = 1
    data2['denominator'] = 1
    bootstrapped_mean_difference = bs.bootstrap_ab(
        data1,
        data2,
        stat_func=bs_stats.sum,
        compare_func=bs_compare.difference,
        test_denominator=data1['denominator'],
        ctrl_denominator=data2['denominator'],
        return_distribution=True)
    return bootstrapped_mean_difference
Ejemplo n.º 18
0
mean = 100
stdev = 10

samples = np.random.normal(loc=mean, scale=stdev, size=5000)
samples_t = np.random.normal(loc=mean, scale=stdev, size=5000)

bsr = bs.bootstrap(samples, bs_stats.mean)
print(bsr)

bsr2 = bs.bootstrap(samples, bs_stats.mean, method="pi")
print("pi:")
print(bsr2)

bsr3 = bs.bootstrap(samples, bs_stats.trimmed_mean)
print("trimmed mean:")
print(bsr3)

bsr4 = bs.bootstrap_ab(samples, samples_t, bs_stats.trimmed_mean,
                       bs_compare.percent_change)
print("ab:")
print(bsr4)

bsr5 = bs.bootstrap_ab(samples,
                       samples_t,
                       bs_stats.trimmed_mean,
                       bs_compare.percent_change,
                       method="pi")
print("ab pi:")
print(bsr5)
Ejemplo n.º 19
0
def run_simulation(group1, group2):
    results = []
    for i in range(3000):
        results.append(bs.bootstrap_ab(group1.to_numpy(), group2.to_numpy(), bs_stats.sum, bs_compare.percent_change))    
    return results
Ejemplo n.º 20
0
def real_pv_test(sample_size):
    df = pd.read_csv("0928-ctr.sql", sep='\t')
    df_1005 = pd.read_csv("1005-ctr.sql", sep='\t')

    total_pv = float(np.mean(df["exp_pv"]))
    total_pv_1005 = float(np.mean(df_1005["exp_pv"]))
    pv_diff = total_pv - total_pv_1005

    p_out, p_in, flag = 0, 0, 0
    zero_out, zero_in, zero_flag = 0, 0, 0
    ab_out, ab_in, ab_flag = 0, 0, 0

    bucket_num = 50
    #total_click_pv = float(np.mean(df["cli_pv"]))
    p_out, p_in, flag = 0, 0, 0
    c_out, c_in, c_flag = 0, 0, 0
    split_num = sample_size / bucket_num
    for i in range(0, 1000):
        print("{0}th sample--------------------".format(i))
        buck_index = np.floor(np.arange(0, sample_size) / split_num)
        #filename1 = "data/0928A{0}_{1}".format(sample_size,i)
        filename1, filename2, filename3 = "data/0928A1{1}_{0}".format(
            i, sample_size), "data/0928A2{1}_{0}".format(
                i, sample_size), "data/1005B{1}_{0}".format(i, sample_size)

        if os.path.exists(filename1) and os.path.exists(
                filename2) and os.path.exists(filename3):
            sample1, sample2, sample3 = pd.read_csv(
                filename1,
                sep='\t'), pd.read_csv(filename2,
                                       sep='\t'), pd.read_csv(filename3,
                                                              sep='\t')
        else:
            sample1, sample2, sample3 = df.sample(n=sample_size), df.sample(
                n=sample_size), df_1005.sample(n=sample_size)
            sample1["bucket_index"], sample2["bucket_index"], sample3[
                "bucket_index"] = buck_index, buck_index, buck_index
            sample1.to_csv(filename1, sep='\t')
            sample2.to_csv(filename2, sep='\t')
            sample3.to_csv(filename3, sep='\t')

        sample_0928 = sample1.groupby([
            'bucket_index'
        ])["cli_pv", "exp_pv"].mean().add_suffix('_sum').reset_index()
        sample_0928_1 = sample2.groupby([
            'bucket_index'
        ])["cli_pv", "exp_pv"].mean().add_suffix('_sum').reset_index()
        sample_1005 = sample3.groupby([
            'bucket_index'
        ])["cli_pv", "exp_pv"].mean().add_suffix('_sum').reset_index()

        #####bootstrap#######
        ####total
        r = bs.bootstrap(sample_0928.exp_pv_sum.values, bs_stats.mean)

        point, low, high = r.value, r.lower_bound, r.upper_bound
        if total_pv >= low and total_pv <= high:
            p_in = p_in + 1
            flag = 1
        else:
            p_out = p_out + 1
            flag = 0
        print(
            "total,flag:{0}, diff:{1}, real:{2}, low:{3}, high:{4}, width:{5}".
            format(flag, point - total_pv, total_pv, low, high, high - low))

        ####aa
        r = bs.bootstrap_ab(sample_0928.exp_pv_sum.values,
                            sample_0928_1.exp_pv_sum.values,
                            stat_func=bs_stats.mean,
                            compare_func=bs_compare.difference)

        # r = bs.bootstrap_ab(sample_0928.cli_pv_sum.values / sample_0928.exp_pv_sum.values,
        #                     sample_0928_1.cli_pv_sum.values / sample_0928_1.exp_pv_sum.values,
        #                     stat_func=bs_stats.mean,
        #                     compare_func=bs_compare.difference)
        point, low, high = r.value, r.lower_bound, r.upper_bound
        zero = 0.0
        if zero >= low and zero <= high:
            zero_in = zero_in + 1
            zero_flag = 1
        else:
            zero_out = zero_out + 1
            zero_flag = 0
        print("flag:{0}, diff:{1}, real:{2}, low:{3}, high:{4}, width:{5}".
              format(zero_flag, point - zero, 0.0, low, high, high - low))

        ####ab
        r = bs.bootstrap_ab(sample_0928.exp_pv_sum.values,
                            sample_1005.exp_pv_sum.values,
                            stat_func=bs_stats.mean,
                            compare_func=bs_compare.difference)
        point, low, high = r.value, r.lower_bound, r.upper_bound

        if pv_diff >= low and pv_diff <= high:
            ab_in = ab_in + 1
            ab_flag = 1
        else:
            ab_out = ab_out + 1
            ab_flag = 0
        print("flag:{0}, diff:{1}, real:{2}, low:{3}, high:{4}, width:{5}".
              format(ab_flag, point - pv_diff, pv_diff, low, high, high - low))

        if i % 50 == 0 or i / 50 == 0 or i == 999:
            # print("sample_size:{2},total,notcover:{0},cover:{1}".format(p_out, p_in,sample_size))
            # print("sample_size:{2},total,notcover:{0},cover:{1}".format(c_out, c_in,sample_size))
            print("total,not cover:{0},cover:{1}".format(p_out, p_in))
            print("aatest,not cover:{0},cover:{1}".format(zero_out, zero_in))
            print("abtest,not cover:{0},cover:{1}".format(ab_out, ab_in))

    print("end")
Ejemplo n.º 21
0
    elif ((pval < 0.05) and (st is not None)):
        print('%s Different average' % (test))
    else:
        print('%s is not applicapable' % (test))
    return 0

# 1. Sub-bucket Method
data['subbucket'] = data['user_id'].apply(lambda x: randint(0,1000)) # Variant 1
data['subbucket'] = data['user_id'].apply(lambda x: hash(x)%1000) # Variant 2

# 2. Bootstrap Method
data_a = data[data['group'] == 'experiment_buckets']
data_b = data[data['group'] == 'control_buckets']
bs_ab_estims = bs.bootstrap_ab(data_a.groupby(data['user_id']).target.sum().values,
                               data_b.groupby(data['user_id']).target.sum().values,
                                   bs_stats.mean,
                                   bs_compare.percent_change, num_iterations=5000, alpha=0.10,
                                   iteration_batch_size=100, scale_test_by=1, num_threads=4)

bs_data_a = bs.bootstrap(data_a.groupby(data['user_id']).target.sum().values,
                         stat_func=bs_stats.mean, num_iterations=10000, iteration_batch_size=300,
                         return_distribution=True)
bs_data_b = bs.bootstrap(data_b.groupby(data['user_id']).target.sum().values,
                         stat_func=bs_stats.mean, num_iterations=10000, iteration_batch_size=300,
                         return_distribution=True)

# Task:
# 1. Сгенерируйте данные следующего формата:
# group, bucket_id, user_id, target
# target - целевая переменная;
# bucket_id лежит в диапазоне от 0 до 10;
Ejemplo n.º 22
0
def compute_stats_vs(data_path, n1, n2):
    run = {}
    run['perfs'] = []
    scores_our = []
    for i, trial in enumerate(sorted(os.listdir(data_path))):

        if len(trial) < 3:
            print('Extracting: ', trial)
            filename = data_path + trial + '/progress.json'
            # print(filename)
            steps, eval_rewards = extract_performances(filename)
            run['perfs'].append(eval_rewards)
            scores = np.loadtxt(data_path + trial + '/scores')
            scores_our.append(scores[1, 0])

    n_runs = len(run['perfs'])
    assert n1 + n2 == n_runs
    max_steps = 0
    for i in range(n_runs):
        if len(run['perfs'][i]) > max_steps:
            max_steps = len(run['perfs'][i])
    eval_perfs = np.empty([n_runs, max_steps]) * (np.nan)
    for i in range(n_runs):
        eval_perfs[i, :len(run['perfs'][i])] = run['perfs'][i]

    # steps = steps[:700]
    inds = np.array(range(n_runs))
    # np.random.shuffle(inds)

    # modify GEP source
    for i in range(n1, n2):
        eval_perfs[i, 251:] = eval_perfs[i, :750]
        eval_perfs[i, :251] = np.zeros([251])

    print(inds)

    # compute statistics
    data1_litt = np.nanmean(eval_perfs[inds[:n1]][:, -10:], axis=1)
    data2_litt = np.nanmean(eval_perfs[inds[n1:]][:, -10:], axis=1)
    ks_litt, p_ks_litt = ks_2samp(data1_litt, data2_litt)
    ttest_litt, p_ttest_litt = ttest_ind(data1_litt,
                                         data2_litt,
                                         equal_var=False)
    data1_our = np.array(scores_our[:n1])
    data2_our = np.array(scores_our[n1:])
    ks_our, p_ks_our = ks_2samp(data1_our, data2_our)
    ttest_our, p_ttest_our = ttest_ind(data1_our, data2_our, equal_var=False)

    # estimation of confidence intervals with bootstrap method, https://github.com/facebookincubator/bootstrapped
    res_litt = bs.bootstrap_ab(data1_litt,
                               data2_litt,
                               bs_stats.mean,
                               bs_compare.difference,
                               num_iterations=10000)
    sign_litt = np.sign(res_litt.upper_bound) == np.sign(res_litt.lower_bound)
    res_our = bs.bootstrap_ab(data1_our,
                              data2_our,
                              bs_stats.mean,
                              bs_compare.difference,
                              num_iterations=10000)
    sign_our = np.sign(res_our.upper_bound) == np.sign(res_our.lower_bound)

    toSave = np.zeros([4, 4])
    toSave[0:2, :] = np.array([[ks_litt, p_ks_litt, ttest_litt, p_ttest_litt],
                               [ks_our, p_ks_our, ttest_our, p_ttest_our]])
    toSave[2, :] = np.array([
        res_litt.value, res_litt.lower_bound, res_litt.upper_bound,
        sign_litt * np.sign(res_litt.lower_bound)
    ])
    toSave[3, :] = np.array([
        res_our.value, res_our.lower_bound, res_our.upper_bound,
        sign_our * np.sign(res_our.lower_bound)
    ])
    np.savetxt(data_path + 'stats', toSave)
Ejemplo n.º 23
0
                             bs_compare.percent_change))
    return results


def run_simulation2(data, data2):
    results = []
    for i in range(3000):
        results.append(
            bas.bootstrap_ab(data, data2, bs_stats.mean,
                             bs_compare.percent_change))
    return results


print(
    "bootstrap a/b",
    bas.bootstrap_ab(final_average_return, final_average_return2,
                     bs_stats.mean, bs_compare.percent_change))
bab = bas.bootstrap_ab(final_average_return, final_average_return2,
                       bs_stats.mean, bs_compare.percent_change)
x = run_simulation2(final_average_return, final_average_return2)
bootstrap_ab = bs_power.power_stats(x)
print("power analysis bootstrap a/b")
print(bootstrap_ab)

print("***********************************************")
print("Bootstrap analysis")
print("***********************************************")
print("***Arg1****")
sim = bas.bootstrap(final_average_return, stat_func=bs_stats.mean)
print("%.2f (%.2f, %.2f)" % (sim.value, sim.lower_bound, sim.upper_bound))
print("***Arg2****")
sim = bas.bootstrap(final_average_return2, stat_func=bs_stats.mean)