def _bayes_sampling(x, y, distribution='normal'): """ Helper function. Args: x (array_like): sample of a treatment group y (array_like): sample of a control group distribution: name of the KPI distribution model, which assumes a Stan model file with the same name exists Returns: tuple: - the posterior samples - sample size of x - sample size of y - absolute mean of x - absolute mean of y """ # Checking if data was provided if x is None or y is None: raise ValueError('Please provide two non-None samples.') # Coercing missing values to right format _x = np.array(x, dtype=float) _y = np.array(y, dtype=float) mu_x = np.nanmean(_x) mu_y = np.nanmean(_y) n_x = statx.sample_size(_x) n_y = statx.sample_size(_y) if distribution == 'normal': fit_data = {'Nc': n_y, 'Nt': n_x, 'x': _x, 'y': _y} elif distribution == 'poisson': fit_data = { 'Nc': n_y, 'Nt': n_x, 'x': _x.astype(int), 'y': _y.astype(int) } else: raise NotImplementedError model_file = __location__ + '/../models/' + distribution + '_kpi.stan' sm = StanModel(file=model_file) fit = sm.sampling(data=fit_data, iter=25000, chains=4, n_jobs=1, seed=1, control={ 'stepsize': 0.01, 'adapt_delta': 0.99 }) traces = fit.extract() return traces, n_x, n_y, mu_x, mu_y
def test__sample_size__pdseries_categorical_with_na(self): """ Result of sample_size() is number of elements of a pandas series with categorical data including NAs. """ x = ['1', '1', 'NA', '2', 'NA', '5', '8'] self.assertEqual(statx.sample_size(x), 5)
def test__sample_size__pdseries_categorical(self): """ Result of sample_size() is number of elements of a pandas series with categorical data. """ x = pd.Series(['1', '7', '2', '5', '8', '0']) self.assertEqual(statx.sample_size(x), 6)
def test__sample_size__list_categorical(self): """ Result of sample_size() is number of elements of a list of categorical data. """ x = ['1', '1', '3', '2', '6', '5', '8'] self.assertEqual(statx.sample_size(x), 7)
def test__sample_size__nparray_numeric_with_nan(self): """ Result of sample_size() is number of elements of numpy array minus number of NaNs. """ x = np.array([1, 1, np.nan, 2, np.nan, 5, 8]) self.assertEqual(statx.sample_size(x), 5)
def group_sequential(x, y, spending_function='obrien_fleming', estimated_sample_size=None, alpha=0.05, cap=8): """ Group sequential method to determine whether to stop early or not. Args: x (array_like): sample of a treatment group y (array_like): sample of a control group spending_function: name of the alpha spending function, currently supports: 'obrien_fleming' estimated_sample_size: sample size to be achieved towards the end of experiment alpha: type-I error rate cap: upper bound of the adapted z-score Returns: EarlyStoppingStatistics object """ # Checking if data was provided if x is None or y is None: raise ValueError('Please provide two non-None samples.') # Coercing missing values to right format _x = np.array(x, dtype=float) _y = np.array(y, dtype=float) n_x = statx.sample_size(_x) n_y = statx.sample_size(_y) if not estimated_sample_size: information_fraction = 1.0 else: information_fraction = min(1.0, min(n_x, n_y) / estimated_sample_size) # alpha spending function if spending_function in ('obrien_fleming'): func = eval(spending_function) else: raise NotImplementedError alpha_new = func(information_fraction, alpha=alpha) # calculate the z-score bound bound = norm.ppf(1 - alpha_new / 2) # replace potential inf with an upper bound if bound == np.inf: bound = cap mu_x = np.nanmean(_x) mu_y = np.nanmean(_y) sigma_x = np.nanstd(_x) sigma_y = np.nanstd(_y) z = (mu_x - mu_y) / np.sqrt(sigma_x**2 / n_x + sigma_y**2 / n_y) if z > bound or z < -bound: stop = True else: stop = False interval = statx.normal_difference( mu_x, sigma_x, n_x, mu_y, sigma_y, n_y, [alpha_new * 100 / 2, 100 - alpha_new * 100 / 2]) # return stop, mu_x - mu_y, interval, n_x, n_y, mu_x, mu_y interval = [{'percentile': p, 'value': v} for (p, v) in interval.items()] return { 'stop': bool(stop), 'delta': float(mu_x - mu_y), 'confidence_interval': interval, 'treatment_sample_size': int(n_x), 'control_sample_size': int(n_y), 'treatment_mean': float(mu_x), 'control_mean': float(mu_y) }
def _bayes_sampling(x, y, distribution='normal', num_iters=25000, inference="sampling"): """ Helper function. Args: x (array_like): sample of a treatment group y (array_like): sample of a control group distribution: name of the KPI distribution model, which assumes a Stan model file with the same name exists num_iters: number of iterations of sampling Returns: tuple: - the posterior samples - sample size of x - sample size of y - absolute mean of x - absolute mean of y """ # Checking if data was provided if x is None or y is None: raise ValueError('Please provide two non-None samples.') # Coercing missing values to right format _x = np.array(x, dtype=float) _y = np.array(y, dtype=float) _x = drop_nan(_x) _y = drop_nan(_y) key = (str(_x), str(_y), num_iters, inference) if cache_sampling_results and key in sampling_results: return sampling_results[key] mu_x = np.nanmean(_x) mu_y = np.nanmean(_y) n_x = statx.sample_size(_x) n_y = statx.sample_size(_y) if distribution == 'normal': fit_data = {'Nc': n_y, 'Nt': n_x, 'x': _x, 'y': _y} elif distribution == 'poisson': fit_data = { 'Nc': n_y, 'Nt': n_x, 'x': _x.astype(int), 'y': _y.astype(int) } else: raise NotImplementedError model_file = __location__ + '/../models/' + distribution + '_kpi.stan' sm = get_or_compile_stan_model(model_file, distribution) if inference == "sampling": fit = sm.sampling(data=fit_data, iter=num_iters, chains=4, n_jobs=1, seed=1, control={ 'stepsize': 0.01, 'adapt_delta': 0.99 }) traces = fit.extract() elif inference == "variational": results_dict = sm.vb(data=fit_data, iter=10000) traces = {} for i in range(len(results_dict['sampler_param_names'])): para_name = results_dict['sampler_param_names'][i] para_values = np.array(results_dict['sampler_params'][i]) traces[para_name] = para_values if cache_sampling_results: sampling_results[key] = (traces, n_x, n_y, mu_x, mu_y) return traces, n_x, n_y, mu_x, mu_y
def do_delta_categorical(df): pval = statx.chi_square(x=df.iloc[:, 2], y=baseline_metric)[0] ss_x = statx.sample_size(df.iloc[:, 2]) return feature_check_to_dataframe(metric=df.columns[2], samplesize_variant=ss_x, pval=pval)
def test__sample_size__all_nans(self): """ Result of sample_size() is number of elements of numpy array minus number of NaNs. """ x = [np.nan, np.nan, np.nan, np.nan, np.nan, np.nan] self.assertEqual(statx.sample_size(x), 0)
def test__sample_size__list_numeric(self): """ Result of sample_size() is number of elements of a list. """ x = [1, 1, 2, 5, 8] self.assertEqual(statx.sample_size(x), 5)
def group_sequential(x, y, spending_function='obrien_fleming', information_fraction=1, alpha=0.05, cap=8): """ Group sequential method to determine whether to stop early or not. Args: x (array_like): sample of a treatment group y (array_like): sample of a control group spending_function: name of the alpha spending function, currently supports: 'obrien_fleming' information_fraction: share of the information amount at the point of evaluation, e.g. the share of the maximum sample size alpha: type-I error rate cap: upper bound of the adapted z-score Returns: tuple: - stop label - effect size (delta) - confidence interval of delta - sample size of x - sample size of y - absolute mean of x - absolute mean of y """ # Checking if data was provided if x is None or y is None: raise ValueError('Please provide two non-None samples.') # Coercing missing values to right format _x = np.array(x, dtype=float) _y = np.array(y, dtype=float) # if scalar, assume equal spacing between the intervals #if not isinstance(information_fraction, list): # fraction = np.linspace(0,1,information_fraction+1)[1:] #else: # fraction = information_fraction # alpha spending function if spending_function in ('obrien_fleming'): func = eval(spending_function) else: raise NotImplementedError alpha_new = func(information_fraction, alpha=alpha) # calculate the z-score bound bound = norm.ppf(1 - alpha_new / 2) # replace potential inf with an upper bound if bound == np.inf: bound = cap mu_x = np.nanmean(_x) mu_y = np.nanmean(_y) sigma_x = np.nanstd(_x) sigma_y = np.nanstd(_y) n_x = statx.sample_size(_x) n_y = statx.sample_size(_y) z = (mu_x - mu_y) / np.sqrt(sigma_x**2 / n_x + sigma_y**2 / n_y) if z > bound or z < -bound: stop = 1 else: stop = 0 interval = statx.normal_difference( mu_x, sigma_x, n_x, mu_y, sigma_y, n_y, [alpha_new * 100 / 2, 100 - alpha_new * 100 / 2]) return stop, mu_x - mu_y, interval, n_x, n_y, mu_x, mu_y
def _bayes_sampling(x, y, distribution='normal', num_iters=25000, inference="sampling"): """ Helper function for bayesian sampling. :param x: sample of a treatment group :type x: pd.Series or list (array-like) :param y: sample of a control group :type y: pd.Series or list (array-like) :param distribution: name of the KPI distribution model, which assumes a Stan model file with the same name exists :type distribution: str :param num_iters: number of iterations of sampling :type num_iters: int :param inference: 'sampling' for MCMC sampling method or 'variational' for variational inference :type inference: str :return: the posterior samples, sample size of x, sample size of y, absolute mean of x, absolute mean of y :rtype: tuple[array-like, array-like, array-like, float, float] """ # Checking if data was provided and it has correct format if x is None or y is None: raise ValueError('Please provide two non-empty samples.') if not isinstance(x, pd.Series) and not isinstance( x, np.ndarray) and not isinstance(x, list): raise TypeError('Please provide samples of type Series or list.') if type(x) != type(y): raise TypeError('Please provide samples of the same type.') logger.info( "Started running bayesian inference with {} procedure, treatment group of size {}, " "control group of size {}, {} distribution.".format( inference, len(x), len(y), distribution, inference)) # Coercing missing values to right format _x = np.array(x, dtype=float) _y = np.array(y, dtype=float) _x = drop_nan(_x) _y = drop_nan(_y) key = (str(_x), str(_y), num_iters, inference) if cache_sampling_results and key in sampling_results: return sampling_results[key] mu_x = np.nanmean(_x) mu_y = np.nanmean(_y) n_x = statx.sample_size(_x) n_y = statx.sample_size(_y) if distribution == 'normal': fit_data = {'Nc': n_y, 'Nt': n_x, 'x': _x, 'y': _y} elif distribution == 'poisson': fit_data = { 'Nc': n_y, 'Nt': n_x, 'x': _x.astype(int), 'y': _y.astype(int) } else: raise NotImplementedError model_file = __location__ + '/../models/' + distribution + '_kpi.stan' sm = get_or_compile_stan_model(model_file, distribution) if inference == "sampling": fit = sm.sampling(data=fit_data, iter=num_iters, chains=4, n_jobs=1, seed=1, control={ 'stepsize': 0.01, 'adapt_delta': 0.99 }) traces = fit.extract() elif inference == "variational": results_dict = sm.vb(data=fit_data, iter=10000) traces = {} for i in range(len(results_dict['sampler_param_names'])): para_name = results_dict['sampler_param_names'][i] para_values = np.array(results_dict['sampler_params'][i]) traces[para_name] = para_values if cache_sampling_results: sampling_results[key] = (traces, n_x, n_y, mu_x, mu_y) logger.info( "Finished running bayesian inference with {} procedure, treatment group of size {}, " "control group of size {}, {} distribution.".format( inference, len(x), len(y), distribution)) return traces, n_x, n_y, mu_x, mu_y
def test__sample_size__list_numeric(self): """ Result of sample_size() is number of elements of a list. """ x = [1, 1, 2, 5, 8] self.assertEqual(statx.sample_size(x), 5)
def test__sample_size__empty_list_numeric(self): """ Empty list returns 0. """ self.assertEqual(statx.sample_size([]), 0)
def _bayes_sampling(x, y, distribution='normal', num_iters=25000, inference="sampling"): """ Helper function for bayesian sampling. :param x: sample of a treatment group :type x: pd.Series or list (array-like) :param y: sample of a control group :type y: pd.Series or list (array-like) :param distribution: name of the KPI distribution model, which assumes a Stan model file with the same name exists :type distribution: str :param num_iters: number of iterations of sampling :type num_iters: int :param inference: 'sampling' for MCMC sampling method or 'variational' for variational inference :type inference: str :return: the posterior samples, sample size of x, sample size of y, absolute mean of x, absolute mean of y :rtype: tuple[array-like, array-like, array-like, float, float] """ # Checking if data was provided and it has correct format if x is None or y is None: raise ValueError('Please provide two non-empty samples.') if not isinstance(x, pd.Series) and not isinstance(x, np.ndarray) and not isinstance(x, list): raise TypeError('Please provide samples of type Series or list.') if type(x) != type(y): raise TypeError('Please provide samples of the same type.') logger.info("Started running bayesian inference with {} procedure, treatment group of size {}, " "control group of size {}, {} distribution.".format(inference, len(x), len(y), distribution, inference)) # Coercing missing values to right format _x = np.array(x, dtype=float) _y = np.array(y, dtype=float) _x = drop_nan(_x) _y = drop_nan(_y) key = (str(_x), str(_y), num_iters, inference) if cache_sampling_results and key in sampling_results: return sampling_results[key] mu_x = np.nanmean(_x) mu_y = np.nanmean(_y) n_x = statx.sample_size(_x) n_y = statx.sample_size(_y) if distribution == 'normal': fit_data = {'Nc': n_y, 'Nt': n_x, 'x': _x, 'y': _y} elif distribution == 'poisson': fit_data = {'Nc': n_y, 'Nt': n_x, 'x': _x.astype(int), 'y': _y.astype(int)} else: raise NotImplementedError model_file = __location__ + '/../models/' + distribution + '_kpi.stan' sm = get_or_compile_stan_model(model_file, distribution) if inference == "sampling": fit = sm.sampling(data=fit_data, iter=num_iters, chains=4, n_jobs=1, seed=1, control={'stepsize': 0.01, 'adapt_delta': 0.99}) traces = fit.extract() elif inference == "variational": results_dict = sm.vb(data=fit_data, iter=10000) traces = {} for i in range(len(results_dict['sampler_param_names'])): para_name = results_dict['sampler_param_names'][i] para_values = np.array(results_dict['sampler_params'][i]) traces[para_name] = para_values if cache_sampling_results: sampling_results[key] = (traces, n_x, n_y, mu_x, mu_y) logger.info("Finished running bayesian inference with {} procedure, treatment group of size {}, " "control group of size {}, {} distribution.".format(inference, len(x), len(y), distribution)) return traces, n_x, n_y, mu_x, mu_y
def group_sequential(x, y, spending_function='obrien_fleming', estimated_sample_size=None, alpha=0.05, cap=8): """ Group sequential method to determine whether to stop early. :param x: sample of a treatment group :type x: pd.Series or array-like :param y: sample of a control group :type y: pd.Series or array-like :param spending_function: name of the alpha spending function, currently supports only 'obrien_fleming'. :type spending_function: str :param estimated_sample_size: sample size to be achieved towards the end of experiment :type estimated_sample_size: int :param alpha: type-I error rate :type alpha: float :param cap: upper bound of the adapted z-score :type cap: int :return: results of type EarlyStoppingTestStatistics :rtype: EarlyStoppingTestStatistics """ # Checking if data was provided and it has correct format if x is None or y is None: raise ValueError('Please provide two non-empty samples.') if not isinstance(x, pd.Series) and not isinstance(x, np.ndarray) and not isinstance(x, list): raise TypeError('Please provide samples of type Series or list.') if type(x) != type(y): raise TypeError('Please provide samples of the same type.') logger.info("Started running group sequential early stopping; spending function is {}, size of treatment is {} " "and size of control is {}".format(spending_function, len(x), len(y))) # Coercing missing values to right format _x = np.array(x, dtype=float) _y = np.array(y, dtype=float) n_x = statx.sample_size(_x) n_y = statx.sample_size(_y) if not estimated_sample_size: information_fraction = 1.0 else: information_fraction = min(1.0, (n_x + n_y) / estimated_sample_size) # alpha spending function if spending_function in ('obrien_fleming'): func = eval(spending_function) else: raise NotImplementedError alpha_new = func(information_fraction, alpha=alpha) # calculate the z-score bound bound = norm.ppf(1 - alpha_new / 2) # replace potential inf with an upper bound if bound == np.inf: bound = cap mu_x = np.nanmean(_x) mu_y = np.nanmean(_y) sigma_x = np.nanstd(_x) sigma_y = np.nanstd(_y) z = (mu_x - mu_y) / np.sqrt(sigma_x ** 2 / n_x + sigma_y ** 2 / n_y) if z > bound or z < -bound: stop = True else: stop = False interval = statx.normal_difference(mu_x, sigma_x, n_x, mu_y, sigma_y, n_y, [alpha_new * 100 / 2, 100 - alpha_new * 100 / 2]) treatment_statistics = SampleStatistics(int(n_x), float(np.nanmean(_x)), float(np.nanvar(_x))) control_statistics = SampleStatistics(int(n_y), float(np.nanmean(_y)), float(np.nanvar(_y))) variant_statistics = BaseTestStatistics(control_statistics, treatment_statistics) p_value = statx.compute_p_value_from_samples(_x, _y) statistical_power = statx.compute_statistical_power_from_samples(_x, _y, alpha) logger.info("Finished running group sequential early stopping; spending function is {}, size of treatment is {} " "and size of control is {}".format(spending_function, len(x), len(y))) return EarlyStoppingTestStatistics(variant_statistics.control_statistics, variant_statistics.treatment_statistics, float(mu_x - mu_y), interval, p_value, statistical_power, stop)
def test__sample_size__all_nans(self): """ Result of sample_size() is number of elements of numpy array minus number of NaNs. """ x = [np.nan, np.nan, np.nan, np.nan, np.nan, np.nan] self.assertEqual(statx.sample_size(x), 0)
def group_sequential(x, y, spending_function='obrien_fleming', estimated_sample_size=None, alpha=0.05, cap=8): """ Group sequential method to determine whether to stop early. :param x: sample of a treatment group :type x: pd.Series or array-like :param y: sample of a control group :type y: pd.Series or array-like :param spending_function: name of the alpha spending function, currently supports only 'obrien_fleming'. :type spending_function: str :param estimated_sample_size: sample size to be achieved towards the end of experiment :type estimated_sample_size: int :param alpha: type-I error rate :type alpha: float :param cap: upper bound of the adapted z-score :type cap: int :return: results of type EarlyStoppingTestStatistics :rtype: EarlyStoppingTestStatistics """ # Checking if data was provided and it has correct format if x is None or y is None: raise ValueError('Please provide two non-empty samples.') if not isinstance(x, pd.Series) and not isinstance( x, np.ndarray) and not isinstance(x, list): raise TypeError('Please provide samples of type Series or list.') if type(x) != type(y): raise TypeError('Please provide samples of the same type.') logger.info( "Started running group sequential early stopping; spending function is {}, size of treatment is {} " "and size of control is {}".format(spending_function, len(x), len(y))) # Coercing missing values to right format _x = np.array(x, dtype=float) _y = np.array(y, dtype=float) n_x = statx.sample_size(_x) n_y = statx.sample_size(_y) if not estimated_sample_size: information_fraction = 1.0 else: information_fraction = min(1.0, (n_x + n_y) / estimated_sample_size) # alpha spending function if spending_function in ('obrien_fleming'): func = eval(spending_function) else: raise NotImplementedError alpha_new = func(information_fraction, alpha=alpha) # calculate the z-score bound bound = norm.ppf(1 - alpha_new / 2) # replace potential inf with an upper bound if bound == np.inf: bound = cap mu_x = np.nanmean(_x) mu_y = np.nanmean(_y) sigma_x = np.nanstd(_x) sigma_y = np.nanstd(_y) z = (mu_x - mu_y) / np.sqrt(sigma_x**2 / n_x + sigma_y**2 / n_y) if z > bound or z < -bound: stop = True else: stop = False interval = statx.normal_difference( mu_x, sigma_x, n_x, mu_y, sigma_y, n_y, [alpha_new * 100 / 2, 100 - alpha_new * 100 / 2]) treatment_statistics = SampleStatistics(int(n_x), float(np.nanmean(_x)), float(np.nanvar(_x))) control_statistics = SampleStatistics(int(n_y), float(np.nanmean(_y)), float(np.nanvar(_y))) variant_statistics = BaseTestStatistics(control_statistics, treatment_statistics) p_value = statx.compute_p_value_from_samples(_x, _y) statistical_power = statx.compute_statistical_power_from_samples( _x, _y, alpha) logger.info( "Finished running group sequential early stopping; spending function is {}, size of treatment is {} " "and size of control is {}".format(spending_function, len(x), len(y))) return EarlyStoppingTestStatistics(variant_statistics.control_statistics, variant_statistics.treatment_statistics, float(mu_x - mu_y), interval, p_value, statistical_power, stop)
def test__sample_size__empty_list_numeric(self): """ Empty list returns 0. """ self.assertEqual(statx.sample_size([]), 0)