def run_analysis(features_file, kpis_file, metadata_file): """ Load kpis and features from file and pass them to expan to perform delta and subgroup analyses Args: features_file: features file path kpis_file: kpis file path metadata_file: metadata file path Returns: delta analysis results and subgroup analysis results as a tuple """ kpis = pd.read_csv(kpis_file) if features_file: features = pd.read_csv(features_file) else: features = 'default' print(features) metadata = parse_metadata(metadata_file) exp_data = ExperimentData(metrics=kpis, metadata=metadata, features=features) exp = Experiment(baseline_variant=metadata['baseline_variant'], metrics_or_kpis=kpis, metadata=metadata, features=features) return (exp.delta(), exp.sga())
def fixed_horizon(eid): dat = load_experiment(eid) snapshot = dat[dat.time_since_start < 100] #kpi = snapshot.groupby(['entity','variant']).converted.sum().reset_index() kpi = snapshot.groupby(['entity', 'variant']).converted.mean().reset_index() exp = Experiment(params[eid]['baseline'], kpi, metadata) res = exp.delta(kpi_subset=['converted']) return res
def early_stopping(eid, method, day_index): dat = load_experiment(eid) max_sample_size = float(len(np.unique(dat.entity))) print(max_sample_size) metadata['estimatedSampleSize'] = max_sample_size # daily peeking #for day in np.arange(1,np.ceil(max(dat.time_since_start))+1): snapshot = dat[dat.time_since_start < day_index] # sum #kpi = snapshot.groupby(['entity','variant']).converted.sum().reset_index() # mean kpi = snapshot.groupby(['entity', 'variant']).converted.mean().reset_index() current_sample_size = kpi.shape[0] exp = Experiment(params[eid]['baseline'], kpi, metadata) #res = exp.delta(method='group_sequential', kpi_subset=['converted'], # information_fraction=current_sample_size/max_sample_size) if 'bayes' in method: res = exp.delta(method=method, kpi_subset=['converted'], distribution='normal') elif method == 'group_sequential': res = exp.delta(method='group_sequential', kpi_subset=['converted'], information_fraction=current_sample_size / max_sample_size) else: raise NotImplementedError return (day_index, res.statistic('delta', 'stop', 'converted').loc[:, ('value', params[eid]['variant'])].values[0], res.statistic('delta', 'uplift', 'converted').loc[:, ('value', params[eid]['variant'])].values[0])
def run_analysis(features_file, kpis_file, metadata_file): kpis = pd.read_csv(kpis_file) if features_file: features = pd.read_csv(features_file) else: features = 'default' print(features) metadata = parse_metadata(metadata_file) exp_data = ExperimentData(metrics=kpis, metadata=metadata, features=features) exp = Experiment(baseline_variant=metadata['baseline_variant'], metrics_or_kpis=kpis, metadata=metadata, features=features) return (exp.delta(), exp.sga())
res = self.data.delta() df = res.relative_uplift('delta', 'normal_same') np.testing.assert_almost_equal(df, np.array([[-4.219601, 0]]), decimal=5) def test_prob_uplift_over_zero_single_metric(self): """Check if the conversion from confidence intervals to probability is correct for one metric.""" res = self.data.delta(kpi_subset=['normal_same']) #df = prob_uplift_over_zero_single_metric(res.df, self.data.baseline_variant) np.testing.assert_almost_equal( res.df.loc[pd.IndexSlice[:, :, :, 'prob_uplift_over_0'], 'value'], np.array([[0.946519, np.nan]]), decimal=5) def test_prob_uplift_over_zero_multiple_metric(self): """Check if the conversion from confidence intervals to probability is correct for multiple metrics.""" res = self.data.delta(kpi_subset=['normal_same', 'normal_shifted']) #res.calculate_prob_uplift_over_zero() np.testing.assert_almost_equal( res.df.loc[pd.IndexSlice[:, :, :, 'prob_uplift_over_0'], 'value'], np.array([[0.946519, np.nan], [0, np.nan]]), decimal=5) if __name__ == '__main__': #unittest.main() np.random.seed(0) exp = Experiment('B', *generate_random_data()) res = exp.delta(['normal_shifted'])
df.set_index(Results.mandatory_index_levels, inplace=True) # df = df.unstack('variant') # df.columns = df.columns.swaplevel(0,1) return df if __name__ == '__main__': #pass np.random.seed(0) from tests.tests_core.test_data import generate_random_data from expan.core.experiment import Experiment data = Experiment('B', *generate_random_data()) res = data.delta(kpi_subset=['normal_same', 'normal_shifted']) # df = res.calculate_prob_uplift_over_zero() # from test_core.test_results import load_example_results # aa = load_example_results() # order_means = aa.means('orders').iloc[0] # net_sales_var = aa.statistic('var', 'net_sales') # import numpy as np # res = Results(None) # res.append_delta('dummy', 'A', *(0.1,{'2.5':0.01,'97.5':0.2},1000,1000)) # res.append_delta('dummy', 'B', *(0,{'2.5':np.nan,'97.5':np.nan},1000,1000)) # from expan.core.experiment import Experiment # # np.random.seed(0)
df_before = pd.read_csv('nextgen_header.csv') print("BEFORE") print("data size:", len(df_before)) print("number of variants:", len(df_before['variant'].unique())) print("number of client ids:", len(df_before['entity'].unique())) exp = Experiment(control_variant_name='control', data=df_before, metadata={}, report_kpi_names=['revenue_per_user'], derived_kpis=[{ 'name': 'revenue_per_user', 'formula': 'revenue/users' }]) print(exp.delta()) ################################################### df_after = pd.read_csv('nextgen_header_after_removal.csv') print("AFTER") print("data size:", len(df_after)) print("number of variants:", len(df_after['variant'].unique())) print("number of client ids:", len(df_after['entity'].unique())) exp = Experiment(control_variant_name='control', data=df_after, metadata={}, report_kpi_names=['revenue_per_user'], derived_kpis=[{ 'name': 'revenue_per_user',