コード例 #1
0
def get_relative_diff(df,
                      name,
                      metric,
                      test_group_column='test_group',
                      experiment_unit='event_date',
                      control='CONTROL',
                      test='TEST'):
    "Returns dataframe with relative differences between test and control"

    summary_df = df.set_index([test_group_column, experiment_unit]).unstack(test_group_column)
    control_df=summary_df.loc[:, idx[metric, control]]
    test_df=summary_df.loc[:, idx[metric, test]]
    control_df = control_df[control_df.notnull()]
    test_df = test_df[test_df.notnull()]
    result = pd.concat([control_df, test_df, test_df/control_df -1,], axis=1).replace([np.inf, -np.inf], np.nan).dropna()
    result['name'] = name
    result.columns  = ['control', 'test', 'rel_diff', 'name']
    stats = dict()
    stats['control_metric_sum'] = control_df.sum()
    stats['test_metric_sum'] = test_df.sum()
    stats['control_metric_mean'] = control_df.mean()
    stats['test_metric_mean'] = test_df.mean()
    stats['desc'] = sms.DescrStatsW(result['rel_diff'])
    stats['nobs'] = stats['desc'].nobs
    stats['mean'] = stats['desc'].mean
    stats['metric_delta_lcl'], stats['metric_delta_ucl'] = stats['desc'].tconfint_mean()
    stats['runs_test'] = runstest_1samp(result['rel_diff'], cutoff='mean')
    return dict(data=result, stats=stats)
コード例 #2
0
 def check_residuals_correlation(self, alpha=0.05):
   residuals = self.get_residuals()
   res = [item for sublist in residuals for item in sublist]
   (h_runs, p_runs) = runstest_1samp(res)
   if p_runs <= alpha:
     return (p_runs, False)
   else:
     return (p_runs, True)
コード例 #3
0
def autocor_test(data):
    lag = int((len(data))**0.5)
    acf_result = acf(data, nlags=lag, qstat=True, alpha=0.05)
    runstest_result = runstest_1samp(data, cutoff='mean')
    plot_acf(data, lags=lag, alpha=0.05)
    sns.plt.ylim(-0.15, 0.15)
    sns.plt.show()
    return (acf_result, runstest_result)
コード例 #4
0
def test_runstest():
    # comparison numbers from R, tseries, runs.test
    # currently only 2-sided used
    x = np.array([1, 1, 1, 0, 0, 1, 0, 1, 0, 1, 1, 1, 0, 1, 0, 1])

    z_twosided = 1.386750
    pvalue_twosided = 0.1655179

    z_greater = 1.386750
    pvalue_greater = 0.08275893

    z_less = 1.386750
    pvalue_less = 0.917241

    # print Runs(x).runs_test(correction=False)
    assert_almost_equal(np.array(Runs(x).runs_test(correction=False)), [z_twosided, pvalue_twosided], decimal=6)

    # compare with runstest_1samp which should have same indicator
    assert_almost_equal(runstest_1samp(x, correction=False), [z_twosided, pvalue_twosided], decimal=6)

    x2 = x - 0.5 + np.random.uniform(-0.1, 0.1, size=len(x))
    assert_almost_equal(runstest_1samp(x2, cutoff=0, correction=False), [z_twosided, pvalue_twosided], decimal=6)

    assert_almost_equal(runstest_1samp(x2, cutoff="mean", correction=False), [z_twosided, pvalue_twosided], decimal=6)
    assert_almost_equal(
        runstest_1samp(x2, cutoff=x2.mean(), correction=False), [z_twosided, pvalue_twosided], decimal=6
    )

    # check median
    assert_almost_equal(
        runstest_1samp(x2, cutoff="median", correction=False),
        runstest_1samp(x2, cutoff=np.median(x2), correction=False),
        decimal=6,
    )
コード例 #5
0
def test_runstest_2sample():
    # regression test, checked with MonteCarlo and looks reasonable

    x = [31.8, 32.8, 39.2, 36, 30, 34.5, 37.4]
    y = [35.5, 27.6, 21.3, 24.8, 36.7, 30]
    y[-1] += 1e-6  #avoid tie that creates warning
    groups = np.concatenate((np.zeros(len(x)), np.ones(len(y))))

    res = runstest_2samp(x, y)
    res1 = (0.022428065200812752, 0.98210649318649212)
    assert_allclose(res, res1, rtol=1e-6)

    # check as stacked array
    res2 = runstest_2samp(x, y)
    assert_allclose(res2, res, rtol=1e-6)

    xy = np.concatenate((x, y))
    res_1s = runstest_1samp(xy)
    assert_allclose(res_1s, res1, rtol=1e-6)
    # check cutoff
    res2_1s = runstest_1samp(xy, xy.mean())
    assert_allclose(res2_1s, res_1s, rtol=1e-6)
コード例 #6
0
def test_runstest_2sample():
    # regression test, checked with MonteCarlo and looks reasonable

    x = [31.8, 32.8, 39.2, 36, 30, 34.5, 37.4]
    y = [35.5, 27.6, 21.3, 24.8, 36.7, 30]
    y[-1] += 1e-6  #avoid tie that creates warning
    groups = np.concatenate((np.zeros(len(x)), np.ones(len(y))))

    res = runstest_2samp(x, y)
    res1 = (0.022428065200812752, 0.98210649318649212)
    assert_allclose(res, res1, rtol=1e-6)

    # check as stacked array
    res2 = runstest_2samp(x, y)
    assert_allclose(res2, res, rtol=1e-6)

    xy = np.concatenate((x, y))
    res_1s = runstest_1samp(xy)
    assert_allclose(res_1s, res1, rtol=1e-6)
    # check cutoff
    res2_1s = runstest_1samp(xy, xy.mean())
    assert_allclose(res2_1s, res_1s, rtol=1e-6)
コード例 #7
0
def test_runstest(reset_randomstate):
    #comparison numbers from R, tseries, runs.test
    #currently only 2-sided used
    x = np.array([1, 1, 1, 0, 0, 1, 0, 1, 0, 1, 1, 1, 0, 1, 0, 1])

    z_twosided = 1.386750
    pvalue_twosided = 0.1655179

    z_greater = 1.386750
    pvalue_greater = 0.08275893

    z_less = 1.386750
    pvalue_less = 0.917241

    #print Runs(x).runs_test(correction=False)
    assert_almost_equal(np.array(Runs(x).runs_test(correction=False)),
                        [z_twosided, pvalue_twosided],
                        decimal=6)

    # compare with runstest_1samp which should have same indicator
    assert_almost_equal(runstest_1samp(x, correction=False),
                        [z_twosided, pvalue_twosided],
                        decimal=6)

    x2 = x - 0.5 + np.random.uniform(-0.1, 0.1, size=len(x))
    assert_almost_equal(runstest_1samp(x2, cutoff=0, correction=False),
                        [z_twosided, pvalue_twosided],
                        decimal=6)

    assert_almost_equal(runstest_1samp(x2, cutoff='mean', correction=False),
                        [z_twosided, pvalue_twosided],
                        decimal=6)
    assert_almost_equal(runstest_1samp(x2, cutoff=x2.mean(), correction=False),
                        [z_twosided, pvalue_twosided],
                        decimal=6)

    # check median
    assert_almost_equal(runstest_1samp(x2, cutoff='median', correction=False),
                        runstest_1samp(x2,
                                       cutoff=np.median(x2),
                                       correction=False),
                        decimal=6)
コード例 #8
0
def test_mean_cutoff():
    x = [1] * 5 + [2] * 6 + [3] * 8
    cutoff = "mean"
    expected = (-4.007095978613213, 6.146988816717466e-05)
    results = runstest_1samp(x, cutoff=cutoff, correction=False)
    assert_almost_equal(expected, results)
コード例 #9
0
def test_numeric_cutoff():
    x = [1] * 5 + [2] * 6 + [3] * 8
    cutoff = 2
    expected = (-3.944254410803499, 8.004864125547193e-05)
    results = runstest_1samp(x, cutoff=cutoff, correction=False)
    assert_almost_equal(expected, results)
コード例 #10
0
 def test_WaldWolfowitzTest_xResultMean(self):
     x = np.random.randint(0, 100, 50)
     x1, p1 = wald_wolfowitz_test(x, cutoff='mean')
     x2, p2 = runstest_1samp(x, cutoff='mean', correction=False)
     assert pytest.approx(x2) == x1
コード例 #11
0
def def_runstest(x1, cutoff1='mean', correction1=True):
    res = runstest_1samp(x=x1, cutoff=cutoff1, correction=correction1)
    return res
コード例 #12
0
 def _residual_runs(self, residual, **kwargs):
     stat, p = runstest_1samp(residual, 0, correction=False, **kwargs)
     #print("Runs: %s, %s" % (str(stat), str(p)))
     return p