def test__trend__index_levels(self): """ Check if trend() returns the proper index levels """ np.random.seed(0) metrics, metadata = generate_random_data() metrics['time_since_treatment'] = metrics['treatment_start_time'] exp = Experiment('B', metrics, metadata, [4, 6]) # Perform sga() result = exp.trend() # Check if all index levels are present index_levels = [ pd.Index([u'normal_same', u'normal_shifted', u'normal_shifted_by_feature', u'normal_unequal_variance'], dtype='object', name=u'metric'), pd.Index([u'-'], dtype='object', name=u'subgroup_metric'), pd.Index([str(x) for x in np.arange(10.)], dtype='object', name=u'time'), pd.Float64Index([], dtype='float64', name=u'subgroup'), pd.Index([u'sample_size', u'uplift', u'uplift_pctile', u'variant_mean'], dtype='object', name=u'statistic'), pd.Float64Index([2.5, 97.5], dtype='float64', name=u'pctile') ] result_levels = list(result.df.index.levels) # Check if all index levels match expectation TODO: Make nice np.testing.assert_array_equal(index_levels[0], result_levels[0]) np.testing.assert_array_equal(index_levels[1], result_levels[1]) np.testing.assert_array_equal(index_levels[2], result_levels[2]) np.testing.assert_array_equal(index_levels[3], result_levels[3]) np.testing.assert_array_equal(index_levels[4], result_levels[4]) np.testing.assert_array_equal(index_levels[5], result_levels[5])
def test__trend__computation(self): """ Check if trend() functions properly """ np.random.seed(0) metrics, metadata = generate_random_data() metrics['time_since_treatment'] = metrics['treatment_start_time'] exp = Experiment('B', metrics, metadata, [4, 6]) # Perform sga() result = exp.trend() # check uplift df = result.statistic('trend', 'uplift', 'normal_shifted') np.testing.assert_almost_equal(df.loc[:, ('value', 'A')], np.array([-1.009421, -0.847400, -1.119885, -1.042597, -0.868819, -1.091165, -0.952307, -1.028234, -0.978774, -0.985696]), decimal=5) # check pctile df = result.statistic('trend', 'uplift_pctile', 'normal_shifted') np.testing.assert_almost_equal(df.loc[:, ('value', 'A')], np.array([-1.137482, -0.881360, -0.970678, -0.724122, -1.245795, -0.993975, -1.178494, -0.906699, -0.993683, -0.743954, -1.225361, -0.956969, -1.082180, -0.822435, -1.151715, -0.904753, -1.095209, -0.862340, -1.109407, -0.861985]), decimal=5) # check samplesize df = result.statistic('trend', 'sample_size', 'normal_shifted') np.testing.assert_almost_equal(df.loc[:, 'value'], np.column_stack(([649, 595, 600, 590, 625, 602, 607, 608, 616, 616], [405, 401, 378, 362, 377, 369, 406, 392, 414, 388])), decimal=5) # check variant_mean df = result.statistic('trend', 'variant_mean', 'normal_shifted') np.testing.assert_almost_equal(df.loc[:, 'value'], np.column_stack(([0.005761, 0.057487, -0.067107, 0.001125, 0.093085, -0.067894, -0.030500, -0.060996, 0.016257, -0.006091], [1.015182, 0.904887, 1.052778, 1.043721, 0.961904, 1.023271, 0.921807, 0.967238, 0.995031, 0.979605])), decimal=5)
def test__trend__index_levels(self): """ Check if trend() returns the proper index levels """ np.random.seed(0) metrics, metadata = generate_random_data() metrics['time_since_treatment'] = metrics['treatment_start_time'] exp = Experiment('B', metrics, metadata, [4, 6]) # Perform sga() result = exp.trend() # Check if all index levels are present index_levels = [ pd.Index([u'normal_same', u'normal_shifted', u'normal_shifted_by_feature', u'normal_unequal_variance'], dtype='object', name=u'metric'), pd.Index([u'-'], dtype='object', name=u'subgroup_metric'), pd.Index(range(10), dtype='object', name=u'time'), pd.Float64Index([], dtype='float64', name=u'subgroup'), pd.Index([u'sample_size', u'uplift', u'uplift_pctile', u'variant_mean'], dtype='object', name=u'statistic'), pd.Float64Index([2.5, 97.5], dtype='float64', name=u'pctile') ] result_levels = list(result.df.index.levels) # Check if all index levels match expectation TODO: Make nice np.testing.assert_array_equal(index_levels[0], result_levels[0]) np.testing.assert_array_equal(index_levels[1], result_levels[1]) np.testing.assert_array_equal(index_levels[2], result_levels[2]) np.testing.assert_array_equal(index_levels[3], result_levels[3]) np.testing.assert_array_equal(index_levels[4], result_levels[4]) np.testing.assert_array_equal(index_levels[5], result_levels[5])
def test__trend__computation(self): """ Check if trend() functions properly """ np.random.seed(0) metrics, metadata = generate_random_data() metrics['time_since_treatment'] = metrics['treatment_start_time'] exp = Experiment('B', metrics, metadata, [4, 6]) # Perform sga() result = exp.trend() # check uplift df = result.statistic('trend', 'uplift', 'normal_shifted') np.testing.assert_almost_equal(df.loc[:, ('value', 'A')], np.array([ -1.009421, -0.847400, -1.119885, -1.042597, -0.868819, -1.091165, -0.952307, -1.028234, -0.978774, -0.985696 ]), decimal=5) # check pctile df = result.statistic('trend', 'uplift_pctile', 'normal_shifted') np.testing.assert_almost_equal( df.loc[:, ('value', 'A')], np.array([ -1.137482, -0.881360, -0.970678, -0.724122, -1.245795, -0.993975, -1.178494, -0.906699, -0.993683, -0.743954, -1.225361, -0.956969, -1.082180, -0.822435, -1.151715, -0.904753, -1.095209, -0.862340, -1.109407, -0.861985 ]), decimal=5) # check samplesize df = result.statistic('trend', 'sample_size', 'normal_shifted') np.testing.assert_almost_equal( df.loc[:, 'value'], np.column_stack( ([649, 595, 600, 590, 625, 602, 607, 608, 616, 616], [405, 401, 378, 362, 377, 369, 406, 392, 414, 388])), decimal=5) # check variant_mean df = result.statistic('trend', 'variant_mean', 'normal_shifted') np.testing.assert_almost_equal(df.loc[:, 'value'], np.column_stack(([ 0.005761, 0.057487, -0.067107, 0.001125, 0.093085, -0.067894, -0.030500, -0.060996, 0.016257, -0.006091 ], [ 1.015182, 0.904887, 1.052778, 1.043721, 0.961904, 1.023271, 0.921807, 0.967238, 0.995031, 0.979605 ])), decimal=5)
def setUp(self): """ Load the needed datasets for all StatisticsTestCases and set the random seed so that randomized algorithms show deterministic behaviour. """ np.random.seed(0) self.data = Experiment('B', *generate_random_data(), dbg=Dbg(dbg_lvl=5)) # Create time column. TODO: Do this nicer self.data.kpis['time_since_treatment'] = \ self.data.features['treatment_start_time'] # Make time part of index self.data.kpis.set_index('time_since_treatment', append=True, inplace=True)
def setUp(self): """ Load the needed datasets for all TestCases and set the random seed so that randomized algorithms show deterministic behaviour. """ np.random.seed(0) self.data = Experiment('B', *generate_random_data()) # Create time column. TODO: Do this nicer self.data.kpis['time_since_treatment'] = \ self.data.features['treatment_start_time'] # Make time part of index self.data.kpis.set_index('time_since_treatment', append=True, inplace=True)
def setUp(self): # create test folder if not os.path.exists(TEST_FOLDER): os.makedirs(TEST_FOLDER) # generate metrics and metadata (metrics, metadata) = td.generate_random_data() # save metrics to .csv.gz file in test folder metrics.to_csv(path_or_buf=TEST_FOLDER + '/metrics.csv.gz', compression='gzip') # save metadata to .json file in test folder with open(TEST_FOLDER + '/metadata.json', 'w') as f: json.dump(metadata, f)
def setUp(self): """ Load the needed datasets for all StatisticsTestCases and set the random seed so that randomized algorithms show deterministic behaviour. """ np.random.seed(0) self.data = Experiment('B', *generate_random_data(), dbg=Dbg(dbg_lvl=5)) # Create time column. TODO: Do this nicer self.data.kpis['time_since_treatment'] = \ self.data.features['treatment_start_time'] # Make time part of index self.data.kpis.set_index('time_since_treatment', append=True, inplace=True) # Metadata as generated by generate_random_data() for later checks self.testmetadata = {'primary_KPI': 'normal_shifted', 'source': 'simulated', 'experiment': 'random_data_generation'}
res = self.data.delta() df = res.relative_uplift('delta', 'normal_same') np.testing.assert_almost_equal(df, np.array([[-4.219601, 0]]), decimal=5) def test_prob_uplift_over_zero_single_metric(self): """Check if the conversion from confidence intervals to probability is correct for one metric.""" res = self.data.delta(kpi_subset=['normal_same']) #df = prob_uplift_over_zero_single_metric(res.df, self.data.baseline_variant) np.testing.assert_almost_equal( res.df.loc[pd.IndexSlice[:, :, :, 'prob_uplift_over_0'], 'value'], np.array([[0.946519, np.nan]]), decimal=5) def test_prob_uplift_over_zero_multiple_metric(self): """Check if the conversion from confidence intervals to probability is correct for multiple metrics.""" res = self.data.delta(kpi_subset=['normal_same', 'normal_shifted']) #res.calculate_prob_uplift_over_zero() np.testing.assert_almost_equal( res.df.loc[pd.IndexSlice[:, :, :, 'prob_uplift_over_0'], 'value'], np.array([[0.946519, np.nan], [0, np.nan]]), decimal=5) if __name__ == '__main__': #unittest.main() np.random.seed(0) exp = Experiment('B', *generate_random_data()) res = exp.delta(['normal_shifted'])
def test__trend__computation(self): """ Check if trend() functions properly """ np.random.seed(0) metrics, metadata = generate_random_data() metrics['time_since_treatment'] = metrics['treatment_start_time'] exp = Experiment('B', metrics, metadata, [4, 6]) # Perform sga() with non-cumulative results result = exp.trend(cumulative=False) # check uplift df = result.statistic('trend', 'uplift', 'normal_shifted') np.testing.assert_almost_equal(df.loc[:, ('value', 'A')], np.array([-1.009421, -0.847400, -1.119885, -1.042597, -0.868819, -1.091165, -0.952307, -1.028234, -0.978774, -0.985696]), decimal=5) # check pctile df = result.statistic('trend', 'uplift_pctile', 'normal_shifted') np.testing.assert_almost_equal(df.loc[:, ('value', 'A')], np.array([-1.137482, -0.881360, -0.970678, -0.724122, -1.245795, -0.993975, -1.178494, -0.906699, -0.993683, -0.743954, -1.225361, -0.956969, -1.082180, -0.822435, -1.151715, -0.904753, -1.095209, -0.862340, -1.109407, -0.861985]), decimal=5) # check samplesize df = result.statistic('trend', 'sample_size', 'normal_shifted') np.testing.assert_almost_equal(df.loc[:, 'value'], np.column_stack(([649, 595, 600, 590, 625, 602, 607, 608, 616, 616], [405, 401, 378, 362, 377, 369, 406, 392, 414, 388])), decimal=5) # check variant_mean df = result.statistic('trend', 'variant_mean', 'normal_shifted') np.testing.assert_almost_equal(df.loc[:, 'value'], np.column_stack(([0.005761, 0.057487, -0.067107, 0.001125, 0.093085, -0.067894, -0.030500, -0.060996, 0.016257, -0.006091], [1.015182, 0.904887, 1.052778, 1.043721, 0.961904, 1.023271, 0.921807, 0.967238, 0.995031, 0.979605])), decimal=5) # Perform sga() with cumulative results result = exp.trend() # check uplift df = result.statistic('trend', 'uplift', 'normal_shifted') np.testing.assert_almost_equal(df.loc[:, ('value', 'A')], np.array([-1.009421, -0.929807, -0.991088, -1.003129, -0.976023, -0.994857,-0.988167,-0.993119,-0.991571, -0.990986]), decimal=5) # check pctile df = result.statistic('trend', 'uplift_pctile', 'normal_shifted') np.testing.assert_almost_equal(df.loc[:, ('value', 'A')], np.array([ -1.137482, -0.881360, -1.018794, -0.840820, -1.063820, -0.918356, -1.067283, -0.938976, -1.033110, -0.918936, -1.047413, -0.942302, -1.036888, -0.939446, -1.038455, -0.947784, -1.033861, -0.949280, -1.031002, -0.950970]), decimal=5) # check samplesize df = result.statistic('trend', 'sample_size', 'normal_shifted') np.testing.assert_almost_equal(df.loc[:, 'value'], np.column_stack(([ 649, 1244, 1844, 2434, 3059, 3661, 4268, 4876, 5492, 6108], [ 405, 806, 1184, 1546, 1923, 2292, 2698, 3090, 3504, 3892])), decimal=5) # check variant_mean df = result.statistic('trend', 'variant_mean', 'normal_shifted') np.testing.assert_almost_equal(df.loc[:, 'value'], np.column_stack(([ 0.005761, 0.030501, -0.001258, -0.000681, 0.018477, 0.004274, -0.000671, -0.008193, -0.005451, -0.005515], [ 1.015182, 0.960308, 0.989830, 1.002449, 0.994500, 0.999132, 0.987496, 0.984926, 0.986120, 0.985470])), decimal=5) # check metadata is preserved np.testing.assert_equal(True, all(item in result.metadata.items() for item in self.testmetadata.items()))
min_observations=min_observations, nruns=nruns, relative=relative) res.df = pd.concat([res.df, res_obj.df]) # NB: assuming all binning objects based on the same feature are the same res.set_binning(res_obj.binning) # Return the result object return res if __name__ == '__main__': from tests.tests_core.test_data import generate_random_data np.random.seed(0) metrics, metadata = generate_random_data() metrics['time_since_treatment'] = metrics['treatment_start_time'] exp = Experiment('B', metrics, metadata, [4, 6]) res = exp.delta(kpi_subset=['derived'], derived_kpis=[{ 'name': 'derived', 'formula': 'normal_same/normal_shifted' }], weighted_kpis=['derived']) # result = time_dependent_deltas(data.metrics.reset_index() # [['variant','time_since_treatment','normal_shifted']],variants=['A','B']).df.loc[:,1] # result = result.reset_index('subgroup',drop=True) # result['subgroup'] = np.nan # result = data.feature_check()
def detect_features(metrics): """ Automatically detect which of the metrics are features. """ from warnings import warn if 'time_since_treatment' in metrics: # TODO: test this! # assuming that time is the only optional extra index for kpis... nunique = metrics.groupby(ExperimentData.primary_indices).agg( lambda x: len(x.unique())).max() features_present = (nunique == 1) # TODO: drop time dimension from features (i.e. take first value) else: features_present = {m for m in metrics if m.lower() in ExperimentData.known_feature_metrics} warn('not tested') return features_present if __name__ == '__main__': from tests.tests_core.test_data import generate_random_data np.random.seed(0) metrics, meta = generate_random_data() D = ExperimentData(metrics, meta, 'default')