def test__normal_difference__computation(self): """ Result of normal_difference() equals expected result. """ # Define subset of data for first test sbp = self.metrics.loc['systolic_bp', :] # computation of normal difference result1 = statx.normal_difference(sbp.men.m, sbp.men.s, sbp.men.n, sbp.women.m, sbp.women.s, sbp.women.n) # Checking if lower percentile of result1 is correct self.assertAlmostEqual(result1[2.5], 0.44582598543756413) # Checking if upper percentile of result1 is correct self.assertAlmostEqual(result1[97.5], 2.9541740145624127) # Define subset of data for second test clst = self.metrics.loc['serum_cholesterol', :] # Computation of normal difference result2 = statx.normal_difference(clst.men.m, clst.men.s, clst.men.n, clst.women.m, clst.women.s, clst.women.n) # Checking if lower percentile of result2 is correct self.assertAlmostEqual(result2[2.5], -17.159814380797162) # Checking if upper percentile of result2 is correct self.assertAlmostEqual(result2[97.5], -12.240185619202816) # test subsample of systolic blood pressure. Example from: # http://sphweb.bumc.bu.edu/otlt/MPH-Modules/BS/BS704_Confidence_Intervals/BS704_Confidence_Intervals5.html # Computation of normal difference result3 = statx.normal_difference(117.5, 9.7, 6, 126.8, 12., 4) # Checking if lower percentile of result3 is correct self.assertAlmostEqual(result3[2.5], -25.10960582643531) # Checking if upper percentile of result3 is correct self.assertAlmostEqual(result3[97.5], 6.5096058264353118)
def group_sequential(x, y, spending_function='obrien_fleming', estimated_sample_size=None, alpha=0.05, cap=8): """ Group sequential method to determine whether to stop early. :param x: sample of a treatment group :type x: pd.Series or array-like :param y: sample of a control group :type y: pd.Series or array-like :param spending_function: name of the alpha spending function, currently supports only 'obrien_fleming'. :type spending_function: str :param estimated_sample_size: sample size to be achieved towards the end of experiment :type estimated_sample_size: int :param alpha: type-I error rate :type alpha: float :param cap: upper bound of the adapted z-score :type cap: int :return: results of type EarlyStoppingTestStatistics :rtype: EarlyStoppingTestStatistics """ # Checking if data was provided and it has correct format if x is None or y is None: raise ValueError('Please provide two non-empty samples.') if not isinstance(x, pd.Series) and not isinstance( x, np.ndarray) and not isinstance(x, list): raise TypeError('Please provide samples of type Series or list.') if type(x) != type(y): raise TypeError('Please provide samples of the same type.') logger.info( "Started running group sequential early stopping; spending function is {}, size of treatment is {} " "and size of control is {}".format(spending_function, len(x), len(y))) # Coercing missing values to right format _x = np.array(x, dtype=float) _y = np.array(y, dtype=float) n_x = statx.sample_size(_x) n_y = statx.sample_size(_y) if not estimated_sample_size: information_fraction = 1.0 else: information_fraction = min(1.0, (n_x + n_y) / estimated_sample_size) # alpha spending function if spending_function in ('obrien_fleming'): func = eval(spending_function) else: raise NotImplementedError alpha_new = func(information_fraction, alpha=alpha) # calculate the z-score bound bound = norm.ppf(1 - alpha_new / 2) # replace potential inf with an upper bound if bound == np.inf: bound = cap mu_x = np.nanmean(_x) mu_y = np.nanmean(_y) sigma_x = np.nanstd(_x) sigma_y = np.nanstd(_y) z = (mu_x - mu_y) / np.sqrt(sigma_x**2 / n_x + sigma_y**2 / n_y) if z > bound or z < -bound: stop = True else: stop = False interval = statx.normal_difference( mu_x, sigma_x, n_x, mu_y, sigma_y, n_y, [alpha_new * 100 / 2, 100 - alpha_new * 100 / 2]) treatment_statistics = SampleStatistics(int(n_x), float(np.nanmean(_x)), float(np.nanvar(_x))) control_statistics = SampleStatistics(int(n_y), float(np.nanmean(_y)), float(np.nanvar(_y))) variant_statistics = BaseTestStatistics(control_statistics, treatment_statistics) p_value = statx.compute_p_value_from_samples(_x, _y) statistical_power = statx.compute_statistical_power_from_samples( _x, _y, alpha) logger.info( "Finished running group sequential early stopping; spending function is {}, size of treatment is {} " "and size of control is {}".format(spending_function, len(x), len(y))) return EarlyStoppingTestStatistics(variant_statistics.control_statistics, variant_statistics.treatment_statistics, float(mu_x - mu_y), interval, p_value, statistical_power, stop)
def group_sequential(x, y, spending_function='obrien_fleming', estimated_sample_size=None, alpha=0.05, cap=8): """ Group sequential method to determine whether to stop early. :param x: sample of a treatment group :type x: pd.Series or array-like :param y: sample of a control group :type y: pd.Series or array-like :param spending_function: name of the alpha spending function, currently supports only 'obrien_fleming'. :type spending_function: str :param estimated_sample_size: sample size to be achieved towards the end of experiment :type estimated_sample_size: int :param alpha: type-I error rate :type alpha: float :param cap: upper bound of the adapted z-score :type cap: int :return: results of type EarlyStoppingTestStatistics :rtype: EarlyStoppingTestStatistics """ # Checking if data was provided and it has correct format if x is None or y is None: raise ValueError('Please provide two non-empty samples.') if not isinstance(x, pd.Series) and not isinstance(x, np.ndarray) and not isinstance(x, list): raise TypeError('Please provide samples of type Series or list.') if type(x) != type(y): raise TypeError('Please provide samples of the same type.') logger.info("Started running group sequential early stopping; spending function is {}, size of treatment is {} " "and size of control is {}".format(spending_function, len(x), len(y))) # Coercing missing values to right format _x = np.array(x, dtype=float) _y = np.array(y, dtype=float) n_x = statx.sample_size(_x) n_y = statx.sample_size(_y) if not estimated_sample_size: information_fraction = 1.0 else: information_fraction = min(1.0, (n_x + n_y) / estimated_sample_size) # alpha spending function if spending_function in ('obrien_fleming'): func = eval(spending_function) else: raise NotImplementedError alpha_new = func(information_fraction, alpha=alpha) # calculate the z-score bound bound = norm.ppf(1 - alpha_new / 2) # replace potential inf with an upper bound if bound == np.inf: bound = cap mu_x = np.nanmean(_x) mu_y = np.nanmean(_y) sigma_x = np.nanstd(_x) sigma_y = np.nanstd(_y) z = (mu_x - mu_y) / np.sqrt(sigma_x ** 2 / n_x + sigma_y ** 2 / n_y) if z > bound or z < -bound: stop = True else: stop = False interval = statx.normal_difference(mu_x, sigma_x, n_x, mu_y, sigma_y, n_y, [alpha_new * 100 / 2, 100 - alpha_new * 100 / 2]) treatment_statistics = SampleStatistics(int(n_x), float(np.nanmean(_x)), float(np.nanvar(_x))) control_statistics = SampleStatistics(int(n_y), float(np.nanmean(_y)), float(np.nanvar(_y))) variant_statistics = BaseTestStatistics(control_statistics, treatment_statistics) p_value = statx.compute_p_value_from_samples(_x, _y) statistical_power = statx.compute_statistical_power_from_samples(_x, _y, alpha) logger.info("Finished running group sequential early stopping; spending function is {}, size of treatment is {} " "and size of control is {}".format(spending_function, len(x), len(y))) return EarlyStoppingTestStatistics(variant_statistics.control_statistics, variant_statistics.treatment_statistics, float(mu_x - mu_y), interval, p_value, statistical_power, stop)
def group_sequential(x, y, spending_function='obrien_fleming', estimated_sample_size=None, alpha=0.05, cap=8): """ Group sequential method to determine whether to stop early or not. Args: x (array_like): sample of a treatment group y (array_like): sample of a control group spending_function: name of the alpha spending function, currently supports: 'obrien_fleming' estimated_sample_size: sample size to be achieved towards the end of experiment alpha: type-I error rate cap: upper bound of the adapted z-score Returns: EarlyStoppingStatistics object """ # Checking if data was provided if x is None or y is None: raise ValueError('Please provide two non-None samples.') # Coercing missing values to right format _x = np.array(x, dtype=float) _y = np.array(y, dtype=float) n_x = statx.sample_size(_x) n_y = statx.sample_size(_y) if not estimated_sample_size: information_fraction = 1.0 else: information_fraction = min(1.0, min(n_x, n_y) / estimated_sample_size) # alpha spending function if spending_function in ('obrien_fleming'): func = eval(spending_function) else: raise NotImplementedError alpha_new = func(information_fraction, alpha=alpha) # calculate the z-score bound bound = norm.ppf(1 - alpha_new / 2) # replace potential inf with an upper bound if bound == np.inf: bound = cap mu_x = np.nanmean(_x) mu_y = np.nanmean(_y) sigma_x = np.nanstd(_x) sigma_y = np.nanstd(_y) z = (mu_x - mu_y) / np.sqrt(sigma_x**2 / n_x + sigma_y**2 / n_y) if z > bound or z < -bound: stop = True else: stop = False interval = statx.normal_difference( mu_x, sigma_x, n_x, mu_y, sigma_y, n_y, [alpha_new * 100 / 2, 100 - alpha_new * 100 / 2]) # return stop, mu_x - mu_y, interval, n_x, n_y, mu_x, mu_y interval = [{'percentile': p, 'value': v} for (p, v) in interval.items()] return { 'stop': bool(stop), 'delta': float(mu_x - mu_y), 'confidence_interval': interval, 'treatment_sample_size': int(n_x), 'control_sample_size': int(n_y), 'treatment_mean': float(mu_x), 'control_mean': float(mu_y) }
def group_sequential(x, y, spending_function='obrien_fleming', information_fraction=1, alpha=0.05, cap=8): """ Group sequential method to determine whether to stop early or not. Args: x (array_like): sample of a treatment group y (array_like): sample of a control group spending_function: name of the alpha spending function, currently supports: 'obrien_fleming' information_fraction: share of the information amount at the point of evaluation, e.g. the share of the maximum sample size alpha: type-I error rate cap: upper bound of the adapted z-score Returns: tuple: - stop label - effect size (delta) - confidence interval of delta - sample size of x - sample size of y - absolute mean of x - absolute mean of y """ # Checking if data was provided if x is None or y is None: raise ValueError('Please provide two non-None samples.') # Coercing missing values to right format _x = np.array(x, dtype=float) _y = np.array(y, dtype=float) # if scalar, assume equal spacing between the intervals #if not isinstance(information_fraction, list): # fraction = np.linspace(0,1,information_fraction+1)[1:] #else: # fraction = information_fraction # alpha spending function if spending_function in ('obrien_fleming'): func = eval(spending_function) else: raise NotImplementedError alpha_new = func(information_fraction, alpha=alpha) # calculate the z-score bound bound = norm.ppf(1 - alpha_new / 2) # replace potential inf with an upper bound if bound == np.inf: bound = cap mu_x = np.nanmean(_x) mu_y = np.nanmean(_y) sigma_x = np.nanstd(_x) sigma_y = np.nanstd(_y) n_x = statx.sample_size(_x) n_y = statx.sample_size(_y) z = (mu_x - mu_y) / np.sqrt(sigma_x**2 / n_x + sigma_y**2 / n_y) if z > bound or z < -bound: stop = 1 else: stop = 0 interval = statx.normal_difference( mu_x, sigma_x, n_x, mu_y, sigma_y, n_y, [alpha_new * 100 / 2, 100 - alpha_new * 100 / 2]) return stop, mu_x - mu_y, interval, n_x, n_y, mu_x, mu_y