Ejemplo n.º 1
0
def _delta_all_variants(metric_df,
                        baseline_variant,
                        assume_normal=True,
                        percentiles=[2.5, 97.5],
                        min_observations=20,
                        nruns=10000,
                        relative=False,
                        weighted=False):
    """Applies delta to all variants, given a metric and a baseline variant.

	metric_df has 4 columns: entity, variant, metric, reference_kpi
	"""
    baseline_metric = metric_df.iloc[:,
                                     2][metric_df.iloc[:,
                                                       1] == baseline_variant]
    baseline_weights = metric_df.iloc[:,
                                      3][metric_df.iloc[:,
                                                        1] == baseline_variant]

    if weighted:
        # ASSUMPTIONS:
        # - reference KPI is never NaN (such that sum works the same as np.nansum)
        # - whenever the reference KPI is 0, it means the derived KPI is NaN,
        #	and therefore should not be counted (only works for ratio)
        do_delta = (lambda f: delta_to_dataframe_all_variants(
            f.columns[2],
            *statx.delta(x=f.iloc[:, 2],
                         y=baseline_metric,
                         assume_normal=assume_normal,
                         percentiles=percentiles,
                         min_observations=min_observations,
                         nruns=nruns,
                         relative=relative,
                         x_weights=f.iloc[:, 3] / sum(f.iloc[:, 3]) * sum(
                             f.iloc[:, 3] != 0),
                         y_weights=baseline_weights / sum(baseline_weights) *
                         sum(baseline_weights != 0))))
    else:
        do_delta = (lambda f: delta_to_dataframe_all_variants(
            f.columns[2],
            *statx.delta(x=f.iloc[:, 2],
                         y=baseline_metric,
                         assume_normal=assume_normal,
                         percentiles=percentiles,
                         min_observations=min_observations,
                         nruns=nruns,
                         relative=relative)))
    # Actual calculation
    return metric_df.groupby('variant').apply(do_delta).unstack(0)
Ejemplo n.º 2
0
def _delta_all_variants(metric_df, baseline_variant, weighted=False,
		                deltaWorker=statx.make_delta()):
	"""Applies delta to all variants, given a metric and a baseline variant.

	metric_df has 4 columns: entity, variant, metric, reference_kpi
	"""
	baseline_metric  = metric_df.iloc[:, 2][metric_df.iloc[:, 1] == baseline_variant]
	baseline_weights = metric_df.iloc[:, 3][metric_df.iloc[:, 1] == baseline_variant]

	if weighted:
		# ASSUMPTIONS:
		# - reference KPI is never NaN (such that sum works the same as np.nansum)
		# - whenever the reference KPI is 0, it means the derived KPI is NaN,
		#	and therefore should not be counted (only works for ratio)
		x_weights = lambda f: f.iloc[:,3]/sum(f.iloc[:,3])*sum(f.iloc[:,3]!=0)
		y_weights = lambda f: baseline_weights/sum(baseline_weights)*sum(baseline_weights!=0)
	else:
		x_weights = lambda f: 1
		y_weights = lambda f: 1

	do_delta = (lambda f: delta_to_dataframe_all_variants(f.columns[2],
		*deltaWorker(x=f.iloc[:,2],
			         y=baseline_metric,
					 x_weights = x_weights(f),
					 y_weights = y_weights(f))))

	# Actual calculation
	return metric_df.groupby('variant').apply(do_delta).unstack(0)
Ejemplo n.º 3
0
    def do_delta(f, bin_name):
        # find the corresponding bin in the baseline variant
        baseline_metric = f.iloc[:, 2][(f.iloc[:, 0] == variants[1])]
        out_df = pd.DataFrame()

        for v in f['variant'].unique():
            v_metric = f.iloc[:, 2][(f.iloc[:, 0] == v)]
            df = delta_to_dataframe_all_variants(
                f.columns[2],
                *statx.delta(x=v_metric,
                             y=baseline_metric,
                             assume_normal=assume_normal,
                             percentiles=percentiles,
                             min_observations=min_observations,
                             nruns=nruns,
                             relative=relative))

            # add new index levels for variant and binning
            df['_tmp_bin_'] = bin_name
            df['variant'] = v
            df.set_index(['variant', '_tmp_bin_'], append=True, inplace=True)
            df = df.reorder_levels([
                'variant', '_tmp_bin_', 'metric', 'subgroup_metric',
                'subgroup', 'statistic', 'pctile'
            ])

            out_df = out_df.append(df)
        return out_df
Ejemplo n.º 4
0
def _delta_all_variants(metric_df,
                        baseline_variant,
                        assume_normal=True,
                        percentiles=[2.5, 97.5],
                        min_observations=20,
                        nruns=10000,
                        relative=False,
                        weighted=False):
    """Applies delta to all variants, given a metric and a baseline variant.

	metric_df has 4 columns: entity, variant, metric, reference_kpi
	"""
    baseline_metric = metric_df.iloc[:,
                                     2][metric_df.iloc[:,
                                                       1] == baseline_variant]
    baseline_weights = metric_df.iloc[:,
                                      3][metric_df.iloc[:,
                                                        1] == baseline_variant]

    if weighted:
        do_delta = (lambda f: delta_to_dataframe_all_variants(
            f.columns[2],
            *statx.delta(x=f.iloc[:, 2],
                         y=baseline_metric,
                         assume_normal=assume_normal,
                         percentiles=percentiles,
                         min_observations=min_observations,
                         nruns=nruns,
                         relative=relative,
                         x_weights=f.iloc[:, 3] / sum(f.iloc[:, 3]) * len(
                             f.iloc[:, 3]),
                         y_weights=baseline_weights / sum(baseline_weights) *
                         len(baseline_weights))))
    else:
        do_delta = (lambda f: delta_to_dataframe_all_variants(
            f.columns[2],
            *statx.delta(x=f.iloc[:, 2],
                         y=baseline_metric,
                         assume_normal=assume_normal,
                         percentiles=percentiles,
                         min_observations=min_observations,
                         nruns=nruns,
                         relative=relative)))
    # Actual calculation
    return metric_df.groupby('variant').apply(do_delta).unstack(0)