Beispiel #1
0
def run_analysis(features_file, kpis_file, metadata_file):
    """
		Load kpis and features from file and pass them to expan to perform delta and subgroup analyses

		Args:
			features_file: features file path
			kpis_file: kpis file path
			metadata_file: metadata file path

		Returns:
			delta analysis results and subgroup analysis results as a tuple
	"""
    kpis = pd.read_csv(kpis_file)
    if features_file:
        features = pd.read_csv(features_file)
    else:
        features = 'default'
    print(features)
    metadata = parse_metadata(metadata_file)

    exp_data = ExperimentData(metrics=kpis,
                              metadata=metadata,
                              features=features)
    exp = Experiment(baseline_variant=metadata['baseline_variant'],
                     metrics_or_kpis=kpis,
                     metadata=metadata,
                     features=features)

    return (exp.delta(), exp.sga())
Beispiel #2
0
def fixed_horizon(eid):
    dat = load_experiment(eid)
    snapshot = dat[dat.time_since_start < 100]
    #kpi = snapshot.groupby(['entity','variant']).converted.sum().reset_index()
    kpi = snapshot.groupby(['entity',
                            'variant']).converted.mean().reset_index()
    exp = Experiment(params[eid]['baseline'], kpi, metadata)
    res = exp.delta(kpi_subset=['converted'])

    return res
Beispiel #3
0
def early_stopping(eid, method, day_index):
    dat = load_experiment(eid)
    max_sample_size = float(len(np.unique(dat.entity)))
    print(max_sample_size)
    metadata['estimatedSampleSize'] = max_sample_size
    # daily peeking
    #for day in np.arange(1,np.ceil(max(dat.time_since_start))+1):
    snapshot = dat[dat.time_since_start < day_index]

    # sum
    #kpi = snapshot.groupby(['entity','variant']).converted.sum().reset_index()
    # mean
    kpi = snapshot.groupby(['entity',
                            'variant']).converted.mean().reset_index()

    current_sample_size = kpi.shape[0]
    exp = Experiment(params[eid]['baseline'], kpi, metadata)
    #res = exp.delta(method='group_sequential', kpi_subset=['converted'],
    #	information_fraction=current_sample_size/max_sample_size)
    if 'bayes' in method:
        res = exp.delta(method=method,
                        kpi_subset=['converted'],
                        distribution='normal')
    elif method == 'group_sequential':
        res = exp.delta(method='group_sequential',
                        kpi_subset=['converted'],
                        information_fraction=current_sample_size /
                        max_sample_size)
    else:
        raise NotImplementedError

    return (day_index,
            res.statistic('delta', 'stop',
                          'converted').loc[:,
                                           ('value',
                                            params[eid]['variant'])].values[0],
            res.statistic('delta', 'uplift',
                          'converted').loc[:,
                                           ('value',
                                            params[eid]['variant'])].values[0])
Beispiel #4
0
def run_analysis(features_file, kpis_file, metadata_file):
    kpis = pd.read_csv(kpis_file)
    if features_file:
        features = pd.read_csv(features_file)
    else:
        features = 'default'
    print(features)
    metadata = parse_metadata(metadata_file)

    exp_data = ExperimentData(metrics=kpis,
                              metadata=metadata,
                              features=features)
    exp = Experiment(baseline_variant=metadata['baseline_variant'],
                     metrics_or_kpis=kpis,
                     metadata=metadata,
                     features=features)

    return (exp.delta(), exp.sga())
Beispiel #5
0
        res = self.data.delta()
        df = res.relative_uplift('delta', 'normal_same')
        np.testing.assert_almost_equal(df,
                                       np.array([[-4.219601, 0]]),
                                       decimal=5)

    def test_prob_uplift_over_zero_single_metric(self):
        """Check if the conversion from confidence intervals to probability is correct for one metric."""
        res = self.data.delta(kpi_subset=['normal_same'])
        #df = prob_uplift_over_zero_single_metric(res.df, self.data.baseline_variant)
        np.testing.assert_almost_equal(
            res.df.loc[pd.IndexSlice[:, :, :, 'prob_uplift_over_0'], 'value'],
            np.array([[0.946519, np.nan]]),
            decimal=5)

    def test_prob_uplift_over_zero_multiple_metric(self):
        """Check if the conversion from confidence intervals to probability is correct for multiple metrics."""
        res = self.data.delta(kpi_subset=['normal_same', 'normal_shifted'])
        #res.calculate_prob_uplift_over_zero()
        np.testing.assert_almost_equal(
            res.df.loc[pd.IndexSlice[:, :, :, 'prob_uplift_over_0'], 'value'],
            np.array([[0.946519, np.nan], [0, np.nan]]),
            decimal=5)


if __name__ == '__main__':
    #unittest.main()
    np.random.seed(0)
    exp = Experiment('B', *generate_random_data())
    res = exp.delta(['normal_shifted'])
Beispiel #6
0
    df.set_index(Results.mandatory_index_levels, inplace=True)
    # df = df.unstack('variant')
    # df.columns = df.columns.swaplevel(0,1)

    return df


if __name__ == '__main__':
    #pass

    np.random.seed(0)
    from tests.tests_core.test_data import generate_random_data
    from expan.core.experiment import Experiment
    data = Experiment('B', *generate_random_data())
    res = data.delta(kpi_subset=['normal_same', 'normal_shifted'])
    # df = res.calculate_prob_uplift_over_zero()

    # from test_core.test_results import load_example_results
    # aa = load_example_results()
    # order_means = aa.means('orders').iloc[0]
    # net_sales_var = aa.statistic('var', 'net_sales')

    # import numpy as np
    # res = Results(None)
    # res.append_delta('dummy', 'A', *(0.1,{'2.5':0.01,'97.5':0.2},1000,1000))
    # res.append_delta('dummy', 'B', *(0,{'2.5':np.nan,'97.5':np.nan},1000,1000))

    # from expan.core.experiment import Experiment
    #
    # np.random.seed(0)
Beispiel #7
0
df_before = pd.read_csv('nextgen_header.csv')
print("BEFORE")
print("data size:", len(df_before))
print("number of variants:", len(df_before['variant'].unique()))
print("number of client ids:", len(df_before['entity'].unique()))

exp = Experiment(control_variant_name='control',
                 data=df_before,
                 metadata={},
                 report_kpi_names=['revenue_per_user'],
                 derived_kpis=[{
                     'name': 'revenue_per_user',
                     'formula': 'revenue/users'
                 }])
print(exp.delta())

###################################################

df_after = pd.read_csv('nextgen_header_after_removal.csv')
print("AFTER")
print("data size:", len(df_after))
print("number of variants:", len(df_after['variant'].unique()))
print("number of client ids:", len(df_after['entity'].unique()))

exp = Experiment(control_variant_name='control',
                 data=df_after,
                 metadata={},
                 report_kpi_names=['revenue_per_user'],
                 derived_kpis=[{
                     'name': 'revenue_per_user',