def get_relative_diff(df, name, metric, test_group_column='test_group', experiment_unit='event_date', control='CONTROL', test='TEST'): "Returns dataframe with relative differences between test and control" summary_df = df.set_index([test_group_column, experiment_unit]).unstack(test_group_column) control_df=summary_df.loc[:, idx[metric, control]] test_df=summary_df.loc[:, idx[metric, test]] control_df = control_df[control_df.notnull()] test_df = test_df[test_df.notnull()] result = pd.concat([control_df, test_df, test_df/control_df -1,], axis=1).replace([np.inf, -np.inf], np.nan).dropna() result['name'] = name result.columns = ['control', 'test', 'rel_diff', 'name'] stats = dict() stats['control_metric_sum'] = control_df.sum() stats['test_metric_sum'] = test_df.sum() stats['control_metric_mean'] = control_df.mean() stats['test_metric_mean'] = test_df.mean() stats['desc'] = sms.DescrStatsW(result['rel_diff']) stats['nobs'] = stats['desc'].nobs stats['mean'] = stats['desc'].mean stats['metric_delta_lcl'], stats['metric_delta_ucl'] = stats['desc'].tconfint_mean() stats['runs_test'] = runstest_1samp(result['rel_diff'], cutoff='mean') return dict(data=result, stats=stats)
def check_residuals_correlation(self, alpha=0.05): residuals = self.get_residuals() res = [item for sublist in residuals for item in sublist] (h_runs, p_runs) = runstest_1samp(res) if p_runs <= alpha: return (p_runs, False) else: return (p_runs, True)
def autocor_test(data): lag = int((len(data))**0.5) acf_result = acf(data, nlags=lag, qstat=True, alpha=0.05) runstest_result = runstest_1samp(data, cutoff='mean') plot_acf(data, lags=lag, alpha=0.05) sns.plt.ylim(-0.15, 0.15) sns.plt.show() return (acf_result, runstest_result)
def test_runstest(): # comparison numbers from R, tseries, runs.test # currently only 2-sided used x = np.array([1, 1, 1, 0, 0, 1, 0, 1, 0, 1, 1, 1, 0, 1, 0, 1]) z_twosided = 1.386750 pvalue_twosided = 0.1655179 z_greater = 1.386750 pvalue_greater = 0.08275893 z_less = 1.386750 pvalue_less = 0.917241 # print Runs(x).runs_test(correction=False) assert_almost_equal(np.array(Runs(x).runs_test(correction=False)), [z_twosided, pvalue_twosided], decimal=6) # compare with runstest_1samp which should have same indicator assert_almost_equal(runstest_1samp(x, correction=False), [z_twosided, pvalue_twosided], decimal=6) x2 = x - 0.5 + np.random.uniform(-0.1, 0.1, size=len(x)) assert_almost_equal(runstest_1samp(x2, cutoff=0, correction=False), [z_twosided, pvalue_twosided], decimal=6) assert_almost_equal(runstest_1samp(x2, cutoff="mean", correction=False), [z_twosided, pvalue_twosided], decimal=6) assert_almost_equal( runstest_1samp(x2, cutoff=x2.mean(), correction=False), [z_twosided, pvalue_twosided], decimal=6 ) # check median assert_almost_equal( runstest_1samp(x2, cutoff="median", correction=False), runstest_1samp(x2, cutoff=np.median(x2), correction=False), decimal=6, )
def test_runstest_2sample(): # regression test, checked with MonteCarlo and looks reasonable x = [31.8, 32.8, 39.2, 36, 30, 34.5, 37.4] y = [35.5, 27.6, 21.3, 24.8, 36.7, 30] y[-1] += 1e-6 #avoid tie that creates warning groups = np.concatenate((np.zeros(len(x)), np.ones(len(y)))) res = runstest_2samp(x, y) res1 = (0.022428065200812752, 0.98210649318649212) assert_allclose(res, res1, rtol=1e-6) # check as stacked array res2 = runstest_2samp(x, y) assert_allclose(res2, res, rtol=1e-6) xy = np.concatenate((x, y)) res_1s = runstest_1samp(xy) assert_allclose(res_1s, res1, rtol=1e-6) # check cutoff res2_1s = runstest_1samp(xy, xy.mean()) assert_allclose(res2_1s, res_1s, rtol=1e-6)
def test_runstest(reset_randomstate): #comparison numbers from R, tseries, runs.test #currently only 2-sided used x = np.array([1, 1, 1, 0, 0, 1, 0, 1, 0, 1, 1, 1, 0, 1, 0, 1]) z_twosided = 1.386750 pvalue_twosided = 0.1655179 z_greater = 1.386750 pvalue_greater = 0.08275893 z_less = 1.386750 pvalue_less = 0.917241 #print Runs(x).runs_test(correction=False) assert_almost_equal(np.array(Runs(x).runs_test(correction=False)), [z_twosided, pvalue_twosided], decimal=6) # compare with runstest_1samp which should have same indicator assert_almost_equal(runstest_1samp(x, correction=False), [z_twosided, pvalue_twosided], decimal=6) x2 = x - 0.5 + np.random.uniform(-0.1, 0.1, size=len(x)) assert_almost_equal(runstest_1samp(x2, cutoff=0, correction=False), [z_twosided, pvalue_twosided], decimal=6) assert_almost_equal(runstest_1samp(x2, cutoff='mean', correction=False), [z_twosided, pvalue_twosided], decimal=6) assert_almost_equal(runstest_1samp(x2, cutoff=x2.mean(), correction=False), [z_twosided, pvalue_twosided], decimal=6) # check median assert_almost_equal(runstest_1samp(x2, cutoff='median', correction=False), runstest_1samp(x2, cutoff=np.median(x2), correction=False), decimal=6)
def test_mean_cutoff(): x = [1] * 5 + [2] * 6 + [3] * 8 cutoff = "mean" expected = (-4.007095978613213, 6.146988816717466e-05) results = runstest_1samp(x, cutoff=cutoff, correction=False) assert_almost_equal(expected, results)
def test_numeric_cutoff(): x = [1] * 5 + [2] * 6 + [3] * 8 cutoff = 2 expected = (-3.944254410803499, 8.004864125547193e-05) results = runstest_1samp(x, cutoff=cutoff, correction=False) assert_almost_equal(expected, results)
def test_WaldWolfowitzTest_xResultMean(self): x = np.random.randint(0, 100, 50) x1, p1 = wald_wolfowitz_test(x, cutoff='mean') x2, p2 = runstest_1samp(x, cutoff='mean', correction=False) assert pytest.approx(x2) == x1
def def_runstest(x1, cutoff1='mean', correction1=True): res = runstest_1samp(x=x1, cutoff=cutoff1, correction=correction1) return res
def _residual_runs(self, residual, **kwargs): stat, p = runstest_1samp(residual, 0, correction=False, **kwargs) #print("Runs: %s, %s" % (str(stat), str(p))) return p