def feature_check(self, feature_subset=None, variant_subset=None, threshold=0.05, percentiles=[2.5, 97.5], assume_normal=True, min_observations=20, nruns=10000, relative=False): """ Compute feature check on all features, and return dataframe with column telling if feature check passed. Args: feature_subset (list): Features for which to perfom delta. If set to None all metrics are used. variant_subset (list): Variants to use compare against baseline. If set to None all variants are used. threshold (float): p-value used for dismissing null hypothesis (i.e. no difference between features for variant and baseline). assume_normal (boolean): specifies whether normal distribution assumptions can be made min_observations (integer): minimum observations necessary. If less observations are given, then NaN is returned nruns (integer): number of bootstrap runs to perform if assume normal is set to False. Returns: pd.DataFrame containing boolean column named 'ok' stating if feature chek was ok for the feature and variant combination specified in the corresponding columns. """ # TODO: this should return a results structure, like all the others? # - can monkey patch it with a function to just get the 'ok' column res = Results(None, metadata=self.metadata) # Check if data exists TODO: Necessary or guarantted by __init__() ? if self.features is None: warnings.warn('Empty data set entered to analysis.' + 'Returning empty result set') return res # TODO: Check if subsets are valid # If no subsets use superset if feature_subset is None: feature_subset = self.feature_names if variant_subset is None: variant_subset = self.variant_names deltaWorker = statx.make_delta(assume_normal, percentiles, min_observations, nruns, relative) # Iterate over the features for feature in feature_subset: df = (_feature_check_all_variants(self.features.reset_index()[['entity', 'variant', feature]], self.baseline_variant, deltaWorker)) if res.df is None: res.df = df else: res.df = res.df.append(df) return res
def delta(self, method='fixed_horizon', kpi_subset=None, derived_kpis=None, assume_normal=True, percentiles=[2.5, 97.5], min_observations =20, nruns=10000, relative=False, weighted_kpis=None): """ Wrapper for different delta functions with 'method' being the following: 'fixed_horizon': self.fixed_horizon_delta() 'group_sequential': self.group_sequential_delta() 'bayes_factor': self.bayes_factor_delta() 'bayes_precision': self.bayes_precision_delta() """ res = Results(None, metadata=self.metadata) res.metadata['reference_kpi'] = {} res.metadata['weighted_kpis'] = weighted_kpis reference_kpis = {} pattern = '([a-zA-Z][0-9a-zA-Z_]*)' # determine the complete KPI name list kpis_to_analyse = self.kpi_names.copy() if derived_kpis is not None: for dk in derived_kpis: kpiName = dk['name'] kpis_to_analyse.update([kpiName]) # assuming the columns in the formula can all be cast into float # and create the derived KPI as an additional column self.kpis.loc[:,kpiName] = eval(re.sub(pattern, r'self.kpis.\1.astype(float)', dk['formula'])) # store the reference metric name to be used in the weighting # TODO: only works for ratios res.metadata['reference_kpi'][kpiName] = re.sub(pattern+'/', '', dk['formula']) reference_kpis[kpiName] = re.sub(pattern+'/', '', dk['formula']) if kpi_subset is not None: kpis_to_analyse.intersection_update(kpi_subset) self.dbg(3, 'kpis_to_analyse: ' + ','.join(kpis_to_analyse)) defaultArgs = [res, kpis_to_analyse] deltaWorker = statx.make_delta(assume_normal, percentiles, min_observations, nruns, relative) method_table = { 'fixed_horizon': (self.fixed_horizon_delta, defaultArgs + [reference_kpis, weighted_kpis, deltaWorker]), 'group_sequential': (self.group_sequential_delta, defaultArgs ), 'bayes_factor': (self.bayes_factor_delta, defaultArgs ), 'bayes_precision': (self.bayes_precision_delta, defaultArgs ), } if not method in method_table: raise NotImplementedError else: entry = method_table[method] f = entry[0] vargs = entry[1] return f(*vargs)
def test_group_sequential_delta_no_estimatedSampleSize(self): """ group_sequential_delta should raise error if estimatedSampleSize is not present in the metadata. """ with self.assertRaises(ValueError): res = Results(None, metadata=self.data.metadata) result = self.data.group_sequential_delta( result=res, kpis_to_analyse=['normal_same'])
def test_group_sequential_delta(self): """ Check if Experiment.group_sequential_delta() functions properly """ # this should work self.assertTrue(isinstance( self.data, Experiment)) # check that the subclassing works self.assertTrue(self.data.baseline_variant == 'B') self.data.metadata['estimatedSampleSize'] = 100000 res = Results(None, metadata=self.data.metadata) result = self.data.group_sequential_delta( result=res, kpis_to_analyse=['normal_same']) # check uplift df = result.statistic('delta', 'uplift', 'normal_same') np.testing.assert_almost_equal(df.loc[:, ('value', 'A')], np.array([0.033053]), decimal=5) # check stop df = result.statistic('delta', 'stop', 'normal_same') np.testing.assert_equal(df.loc[:, 'value'], np.array([[0, 0]])) # check samplesize df = result.statistic('delta', 'sample_size', 'normal_same') np.testing.assert_almost_equal(df.loc[:, 'value'], np.array([[6108, 3892]]), decimal=5) # check variant_mean df = result.statistic('delta', 'variant_mean', 'normal_same') np.testing.assert_almost_equal(df.loc[:, 'value'], np.array([[0.025219, -0.007833]]), decimal=5) # check metadata is preserved np.testing.assert_equal( True, all(item in result.metadata.items() for item in self.testmetadata.items()))
def trend(self, kpi_subset=None, variant_subset=None, time_step=1, cumulative=True, assume_normal=True, percentiles=[2.5, 97.5], min_observations=20, nruns=10000, relative=False, **kwargs): """ Compute time delta (with confidence bounds) on all applicable metrics, and returns in the standard Results format. Does this for all non-baseline variants. Args: kpi_subset (list): KPIs for which to perfom delta computations. If set to None all features are used. variant_subset (list): Variants to use compare against baseline. If set to None all variants are used. time_step (integer): time increment over which to aggregate data. cumulative (boolean): Trend is calculated using data from start till the current bin or the current bin only assume_normal (boolean): specifies whether normal distribution assumptions can be made percentiles (list): list of percentile values to compute min_observations (integer): minimum observations necessary. If less observations are given, then NaN is returned nruns (integer): number of bootstrap runs to perform if assume normal is set to False. relative (boolean): If relative==True, then the values will be returned as distances below and above the mean, respectively, rather than the absolute values. In this case, the interval is mean-ret_val[0] to mean+ret_val[1]. This is more useful in many situations because it corresponds with the sem() and std() functions. Returns: Results object containing the computed deltas. """ res = Results(None, metadata=self.metadata) # Check if data exists if self.kpis_time is None: warnings.warn('Empty data set entered to analysis. ' + 'Returning empty result set') res.metadata['warnings']['Experiment.trend'] = \ UserWarning('Empty data set entered to analysis.') return res # Check if time is in dataframe column if 'time_since_treatment' not in self.kpis_time.index.names: warnings.warn('Need time column for trend analysis.' + 'Returning empty result set') res.metadata['warnings']['Experiment.trend'] = \ UserWarning('Need time column for trend analysis.') return res # TODO: Check if subsets are valid # If no subsets use superset if kpi_subset is None: kpi_subset = self.kpi_names if variant_subset is None: variant_subset = self.variant_names # Remove baseline from variant_set variant_subset = variant_subset - set([self.baseline_variant]) # Iterate over the kpis and variants # TODO: Check if this is the right approach deltaWorker = statx.make_delta(assume_normal, percentiles, min_observations, nruns, relative) for kpi in kpi_subset: for variant in variant_subset: # TODO: Add metadata to res.metadata res_obj = time_dependent_deltas( self.kpis_time.reset_index()[['variant', 'time_since_treatment', kpi]], variants=[variant, self.baseline_variant], time_step=time_step, cumulative=cumulative, deltaWorker=deltaWorker) res.df = pd.concat([res.df, res_obj.df]) # NB: assuming all binning objects based on the same feature are the same res.set_binning(res_obj.binning) # Return the result object return res
def sga(self, feature_subset=None, kpi_subset=None, variant_subset=None, n_bins=4, binning=None, assume_normal=True, percentiles=[2.5, 97.5], min_observations=20, nruns=10000, relative=False, **kwargs): """ Compute subgroup delta (with confidence bounds) on all applicable metrics, and returns in the standard Results format. Does this for all non-baseline variants. Args: feature_subset (list): Features which are binned for which to perfom delta computations. If set to None all features are used. kpi_subset (list): KPIs for which to perfom delta computations. If set to None all features are used. variant_subset (list): Variants to use compare against baseline. If set to None all variants are used. n_bins (integer): number of bins to create if binning is None binning (list of bins): preset (if None then binning is created) assume_normal (boolean): specifies whether normal distribution assumptions can be made percentiles (list): list of percentile values to compute min_observations (integer): minimum observations necessary. If less observations are given, then NaN is returned nruns (integer): number of bootstrap runs to perform if assume normal is set to False. relative (boolean): If relative==True, then the values will be returned as distances below and above the mean, respectively, rather than the absolute values. In this case, the interval is mean-ret_val[0] to mean+ret_val[1]. This is more useful in many situations because it corresponds with the sem() and std() functions. Returns: Results object containing the computed deltas. """ res = Results(None, metadata=self.metadata) # Check if data exists if self.metrics is None: warnings.warn('Empty data set entered to analysis.' + 'Returning empty result set') return res # TODO: Check if subsets are valid # If no subsets use superset if kpi_subset is None: kpi_subset = self.kpi_names if feature_subset is None: feature_subset = self.feature_names if variant_subset is None: variant_subset = self.variant_names # Remove baseline from variant_set variant_subset = variant_subset - set([self.baseline_variant]) # Iterate over the kpis, features and variants # TODO: Check if this is the right approach, # groupby and unstack as an alternative? deltaWorker = statx.make_delta(assume_normal, percentiles, min_observations, nruns, relative) for kpi in kpi_subset: for feature in feature_subset: res.df = pd.concat([ res.df, subgroup_deltas( self.metrics.reset_index()[['variant', feature, kpi]], variants=['dummy', self.baseline_variant], n_bins=n_bins, deltaWorker=deltaWorker).df]) # Return the result object return res
def _binned_deltas(df, variants, n_bins=4, binning=None, cumulative=False, label_format_str='{standard}', deltaWorker=statx.make_delta()): """ Calculates the feature dependent delta. Only used internally. All calculation by subgroup_delta() and time_dependant_delta() is pushed here. Args: df (pandas DataFrame): 3 columns. The order of the columns is expected to be variant, feature, kpi. variants (list of 2): 2 entries, first entry is the treatment variant, second entry specifies the baseline variant TODO: currently only the baseline variant is extracted from this list and deltas are calculated for all variants (see bug OCTO-869) n_bins (integer): number of bins to create if binning is None binning (list of bins): preset (if None then binning is created) cumulative (Bool): whether to accumulate data (for time trend analysis) label_format_str (string): format string for the binning label function deltaWorker: a closure generated by statitics.make_delta(), holding the numerical parameters of delta calculations Returns: pandas.DataFrame: bin-name, mean, percentile and corresponding values list: binning used """ # Performing binning of feature on feat2 if binning is None: binning = binmodule.create_binning(df.iloc[:, 1], nbins=n_bins) if cumulative==True and type(binning)!=binmodule.NumericalBinning: raise ValueError("Cannot calculate cumulative deltas for non-numerical binnings") # Applying binning to feat1 and feat2 arrays df.loc[:, '_tmp_bin_'] = binning.label(data=df.iloc[:, 1], format_str=label_format_str) # Initialize result object as data frame with bin keys as index def do_delta(f, bin_name): # find the corresponding bin in the baseline variant baseline_metric = f.iloc[:, 2][(f.iloc[:, 0] == variants[1])] out_df=pd.DataFrame() for v in f['variant'].unique(): v_metric = f.iloc[:, 2][ (f.iloc[:,0]==v) ] df = delta_to_dataframe_all_variants(f.columns[2], *deltaWorker(x=v_metric, y=baseline_metric)) # add new index levels for variant and binning df['_tmp_bin_']=bin_name df['variant'] = v df.set_index(['variant', '_tmp_bin_'], append=True, inplace=True) df=df.reorder_levels(['variant', '_tmp_bin_', 'metric', 'subgroup_metric', 'subgroup', 'statistic', 'pctile']) out_df=out_df.append(df) return out_df # Actual calculation result = pd.DataFrame() unique_tmp_bins=df['_tmp_bin_'].unique() for bin in unique_tmp_bins: if not cumulative: result=result.append(do_delta(df[df['_tmp_bin_'] == bin], bin)) else: result=result.append(do_delta(df[df['_tmp_bin_'] <= bin], bin)) # unstack variant result = result.unstack(0) # drop _tmp_bin_ in the input data frame del df['_tmp_bin_'] result.index = result.index.swaplevel(0, 2) result.index = result.index.swaplevel(0, 1) # Return result and binning return Results(result, {'binning': binning})
def delta(self, kpi_subset=None, derived_kpis=None, variant_subset=None, assume_normal=True, percentiles=[2.5, 97.5], min_observations=20, nruns=10000, relative=False, weighted_kpis=None): """ Compute delta (with confidence bounds) on all applicable kpis, and returns in the standard Results format. Does this for all non-baseline variants. TODO: Extend this function to metrics again with type-checking Args: kpi_subset (list): kpis for which to perfom delta. If set to None all kpis are used. derived_kpis (list): definition of additional KPIs derived from the primary ones, e.g. [{'name':'return_rate', 'formula':'returned/ordered'}] variant_subset (list): Variants to use compare against baseline. If set to None all variants are used. assume_normal (boolean): specifies whether normal distribution assumptions can be made percentiles (list): list of percentile values to compute min_observations (integer): minimum observations necessary. If less observations are given, then NaN is returned nruns (integer): number of bootstrap runs to perform if assume normal is set to False. relative (boolean): If relative==True, then the values will be returned as distances below and above the mean, respectively, rather than the absolute values. In this case, the interval is mean-ret_val[0] to mean+ret_val[1]. This is more useful in many situations because it corresponds with the sem() and std() functions. weighted_kpis (list): a list of metric names. For each metric in the list, the weighted mean and confidence intervals are calculated, which is equivalent to the overall metric. Otherwise the metrics are unweighted, this weighted approach is only relevant for ratios. Returns: Results object containing the computed deltas. """ res = Results(None, metadata=self.metadata) res.metadata['reference_kpi'] = {} res.metadata['weighted_kpis'] = weighted_kpis # determine the complete KPI name list kpis_to_analyse = self.kpi_names.copy() if derived_kpis is not None: for dk in derived_kpis: kpis_to_analyse.update([dk['name']]) # assuming the columns in the formula can all be cast into float # and create the derived KPI as an additional column self.kpis.loc[:, dk['name']] = eval( re.sub('([a-zA-Z_]+)', r'self.kpis.\1.astype(float)', dk['formula'])) # store the reference metric name to be used in the weighting # TODO: only works for ratios res.metadata['reference_kpi'][dk['name']] = re.sub( '([a-zA-Z_]+)/', '', dk['formula']) if kpi_subset is not None: kpis_to_analyse.intersection_update(kpi_subset) self.dbg(3, 'kpis_to_analyse: ' + ','.join(kpis_to_analyse)) treat_variants = self.variant_names - set([self.baseline_variant]) self.dbg(3, 'treat_variants before subset: ' + ','.join(treat_variants)) if variant_subset is not None: treat_variants.intersection_update(variant_subset) self.dbg(3, 'treat_variants to analyse: ' + ','.join(treat_variants)) for mname in kpis_to_analyse: # the weighted approach implies that derived_kpis is not None if weighted_kpis is not None and mname in weighted_kpis: reference_kpi = res.metadata['reference_kpi'][mname] weighted = True else: reference_kpi = mname weighted = False try: with warnings.catch_warnings(record=True) as w: # Cause all warnings to always be triggered. warnings.simplefilter("always") df = (_delta_all_variants( self.kpis.reset_index()[[ 'entity', 'variant', mname, reference_kpi ]], self.baseline_variant, assume_normal=assume_normal, percentiles=percentiles, min_observations=min_observations, nruns=nruns, relative=relative, weighted=weighted)) if len(w): res.metadata['warnings']['Experiment.delta'] = w[ -1].message if res.df is None: res.df = df else: res.df = res.df.append(df) except ValueError as e: res.metadata['errors']['Experiment.delta'] = e res.calculate_prob_uplift_over_zero() return res
def _binned_deltas(df, variants, n_bins=4, binning=None, cumulative=False, assume_normal=True, percentiles=[2.5, 97.5], min_observations=20, nruns=10000, relative=False, label_format_str='{standard}'): """ Calculates the feature dependent delta. Only used internally. All calculation by subgroup_delta() and time_dependant_delta() is pushed here. Args: df (pandas DataFrame): 3 columns. The order of the columns is expected to be variant, feature, kpi. variants (list of 2): 2 entries, first entry is the treatment variant, second entry specifies the baseline variant TODO: currently only the baseline variant is extracted from this list and deltas are calculated for all variants (see bug OCTO-869) n_bins (integer): number of bins to create if binning is None binning (list of bins): preset (if None then binning is created) percentiles (list): list of percentile values to compute min_observations (integer): minimum number of observations necessary. If less observations are given, then NaN is returned. nruns (integer): number of bootstrap runs to perform if assume normal is set to False. relative (boolean): If relative==True, then the values will be returned as distances below and above the mean, respectively, rather than the absolute values. In this case, the interval is mean-ret_val[0] to mean+ret_val[1]. This is more useful in many situations because it corresponds with the sem() and std() functions. label_format_str (string): format string for the binning label function. Returns: pandas.DataFrame: bin-name, mean, percentile and corresponding values list: binning used """ # Performing binning of feature on feat2 if binning is None: binning = binmodule.create_binning(df.iloc[:, 1], nbins=n_bins) if cumulative == True and type(binning) != binmodule.NumericalBinning: raise ValueError( "Cannot calculate cumulative deltas for non-numerical binnings") # Applying binning to feat1 and feat2 arrays df.loc[:, '_tmp_bin_'] = binning.label(data=df.iloc[:, 1], format_str=label_format_str) # Initialize result object as data frame with bin keys as index def do_delta(f, bin_name): # find the corresponding bin in the baseline variant baseline_metric = f.iloc[:, 2][(f.iloc[:, 0] == variants[1])] out_df = pd.DataFrame() for v in f['variant'].unique(): v_metric = f.iloc[:, 2][(f.iloc[:, 0] == v)] df = delta_to_dataframe_all_variants( f.columns[2], *statx.delta(x=v_metric, y=baseline_metric, assume_normal=assume_normal, percentiles=percentiles, min_observations=min_observations, nruns=nruns, relative=relative)) # add new index levels for variant and binning df['_tmp_bin_'] = bin_name df['variant'] = v df.set_index(['variant', '_tmp_bin_'], append=True, inplace=True) df = df.reorder_levels([ 'variant', '_tmp_bin_', 'metric', 'subgroup_metric', 'subgroup', 'statistic', 'pctile' ]) out_df = out_df.append(df) return out_df # Actual calculation result = pd.DataFrame() unique_tmp_bins = df['_tmp_bin_'].unique() for bin in unique_tmp_bins: if not cumulative: result = result.append(do_delta(df[df['_tmp_bin_'] == bin], bin)) else: result = result.append(do_delta(df[df['_tmp_bin_'] <= bin], bin)) # unstack variant result = result.unstack(0) # drop _tmp_bin_ in the input data frame del df['_tmp_bin_'] result.index = result.index.swaplevel(0, 2) result.index = result.index.swaplevel(0, 1) # Return result and binning return Results(result, {'binning': binning})