def __init__(**kwargs): self.essencex = np.matrix() self.betafunction = np.matrix() self.c_matrix = np.matrix() self.u_matrix = np.matrix() self.sigma = 0 self.theta_zero = 0 #Scalar #CalcMethod #Option # CL # IP # I think there is an object here.... ###################################### self.df1 = 0 self.df2 = 0 self.dfh = [] self.dfe2 = 0 ####################################### self.alphatest = 0 self.n2 = 0 self.cl_type = 0 self.n_est = 0 self.rank_est = 0 self.alpha_cl = 0 self.alpha_cu = 0 self.tolerance = 0.000000000000000001 self.omega = 0 self.power = Power() self.exceptions = []
def _multi_power(alpha: float, df1: float, df2: float, omega: float, total_N: float, **kwargs) -> Power: """ The common part for these four multirep methods computing power""" noncentrality_dist = None quantile = None confidence_interval = None for key, value in kwargs.items(): if key == 'noncentrality_distribution': noncentrality_dist = value if key == 'quantile': quantile = value if key == 'confidence_interval': confidence_interval = value fcrit = finv(1 - alpha, df1, df2) if noncentrality_dist and quantile: omega = __calc_quantile_omega(noncentrality_dist, quantile) prob, fmethod = probf(fcrit, df1, df2, omega) elif noncentrality_dist and not quantile: prob, fmethod = noncentrality_dist.unconditional_power_simpson( fcrit=fcrit, df1=df1, df2=df2) else: prob, fmethod = probf(fcrit, df1, df2, omega) if fmethod == Constants.FMETHOD_NORMAL_LR and prob == 1: powerval = alpha else: powerval = 1 - prob powerval = float(powerval) power = Power(powerval, omega, fmethod) if confidence_interval: cl_type = _get_cl_type(confidence_interval) power.glmmpcl(is_multirep=True, alphatest=alpha, dfh=df1, n2=total_N, dfe2=df2, cl_type=cl_type, n_est=confidence_interval.n_est, rank_est=confidence_interval.rank_est, alpha_cl=confidence_interval.lower_tail, alpha_cu=confidence_interval.upper_tail, fcrit=fcrit, tolerance=1e-12, omega=omega) return power
def test_multi_power(self): alpha = 0.05 df1 = 3 df2 = 4 omega = 3 total_N = 10 expected = Power(0.138179071626, 3, Constants.FMETHOD_NORMAL_LR) actual = multirep._multi_power(alpha, df1, df2, omega, total_N) self.assertEqual(round(expected.power, 4), round(actual.power, 4)) self.assertEqual(expected.noncentrality_parameter, actual.noncentrality_parameter)
def test_glmmpcl(self): """ This should correctly calculate the confidence intervals for power base on??? """ #todo where is this example from? whay are we rounding? expected = Power(0.9, 0.05, Constants.FMETHOD_NOAPPROXIMATION) dfe1 = 20 - 1 alphatest = 0.05 dfh = 20 dfe2 = 28 fcrit = finv(1 - alphatest, dfh, dfe2) actual = expected.glmmpcl( is_multirep=True, alphatest=0.05, dfh=20, # df1 n2=30, # total_N ??? what is this dfe2=28, # df2 cl_type=Constants.CLTYPE_DESIRED_KNOWN, n_est=20, rank_est=1, alpha_cl=0.048, alpha_cu=0.052, fcrit=fcrit, tolerance=0.01, omega=200) power_l = 0.9999379 noncen_l = 105.66408 power_u = 1 noncen_u = 315.62306 fmethod = Constants.FMETHOD_NOAPPROXIMATION self.assertEqual(round(expected.lower_bound.power, 7), power_l) self.assertEqual( round(expected.lower_bound.noncentrality_parameter, 5), noncen_l) self.assertEqual(expected.lower_bound.fmethod, fmethod) self.assertEqual(expected.upper_bound.power, power_u) self.assertEqual( round(expected.upper_bound.noncentrality_parameter, 5), noncen_u) self.assertEqual(expected.upper_bound.fmethod, fmethod)
def samplesize( test, rank_C: float, rank_X: float, relative_group_sizes, alpha: float, sigma_star: np.matrix, delta_es: np.matrix, targetPower, starting_smallest_group_size=Constants.STARTING_SAMPLE_SIZE.value, **kwargs): """ Get the smallest realizable samplesize for the requested target power. :param test: The statistical test chosen. This must be pne of the tests available in pyglimmpse.multirep or pyglimmpse.unirep :param rank_C: Rank of the within contrast matrix for your study design. :param rank_U: Rank of the between contrast matrix for your study design. :param alpha: Type one error rate :param sigma_star: Sigma star :param targetPower: The power you wish to achieve :param rank_X: the rank of Es(X). Where X is your design matrix. :param delta: (Theta - Theta_0)'M^-1(Theta-Theta_0) :param relative_group_sizes: a list of ratios of size of the groups in your design. :param starting_smallest_group_size: The starting point for our integration. If this is less than the minimum realizeable smallest group size for your design, this function will return an error. :param optional_args: :return: """ # calculate max valid per group N max_n = min(sys.maxsize / rank_X, Constants.MAX_SAMPLE_SIZE.value) # declare variables prior to integration upper_power = Power() lower_power = Power() smallest_group_size = starting_smallest_group_size upper_bound_smallest_group_size = starting_smallest_group_size upper_bound_total_N = upper_bound_smallest_group_size * sum( relative_group_sizes) # find a samplesize which produces power greater than or equal to the desired power while (np.isnan(upper_power.power) or upper_power.power <= targetPower)\ and upper_bound_total_N < max_n: upper_bound_total_N = upper_bound_smallest_group_size * sum( relative_group_sizes) if upper_bound_total_N >= max_n: upper_bound_smallest_group_size = upper_bound_total_N / max_n # call power for this sample size upper_power = test(rank_C=rank_C, rank_X=rank_X, relative_group_sizes=relative_group_sizes, rep_N=upper_bound_smallest_group_size, alpha=alpha, sigma_star=sigma_star, delta_es=delta_es) if type(upper_power.power) is str: raise ValueError( 'Upper power is not calculable. Check that your design is realisable.' ' Usually the easies way to do this is to increase sample size' ) upper_bound_smallest_group_size += upper_bound_smallest_group_size # find a samplesize for the per group n/2 + 1 to define the lower bound of our search. #undo last doubling if upper_power.power is None or math.isnan(upper_power.power): raise ValueError( 'Could not find a samplesize which achieves the target power. Please check your design.' ) upper_bound_smallest_group_size = upper_bound_smallest_group_size / 2 # note we are using floor division lower_bound_smallest_group_size = upper_bound_smallest_group_size // 2 lower_power = test(rank_C=rank_C, rank_X=rank_X, relative_group_sizes=relative_group_sizes, rep_N=upper_bound_smallest_group_size // 2, alpha=alpha, sigma_star=sigma_star, delta_es=delta_es) # # At this point we have valid boundaries for searching. # There are two possible scenarios # 1. The upper bound == lower bound. # 2. The upper bound != lower bound and lower bound exceeds required power. # In this case we just take the value at the lower bound. # 3. The upper bound != lower bound and lower bound is less than the required power. # In this case we bisection search # if lower_power.power >= targetPower: total_N = lower_bound_smallest_group_size * sum(relative_group_sizes) power = lower_power else: f = lambda n: subtrtact_target_power( test(rank_C=rank_C, rank_X=rank_X, relative_group_sizes=relative_group_sizes, rep_N=n, alpha=alpha, sigma_star=sigma_star, delta_es=delta_es, **kwargs), targetPower) total_per_group_n = math.floor( optimize.bisect(f, lower_bound_smallest_group_size, upper_bound_smallest_group_size)) power = test(rank_C=rank_C, rank_X=rank_X, relative_group_sizes=relative_group_sizes, rep_N=total_per_group_n, alpha=alpha, sigma_star=sigma_star, delta_es=delta_es, **kwargs) if (power.power < targetPower) or np.isnan(power.power): total_per_group_n = total_per_group_n + 1 power = test(rank_C=rank_C, rank_X=rank_X, relative_group_sizes=relative_group_sizes, rep_N=total_per_group_n, alpha=alpha, sigma_star=sigma_star, delta_es=delta_es, **kwargs) if power.power < targetPower: raise ValueError( 'Samplesize cannot be calculated. Please check your design.' ) total_N = sum( [math.ceil(total_per_group_n) * g for g in relative_group_sizes]) return total_N, power
def _unirep_power_known_sigma_internal_pilot(rank_C, rank_U, total_N, rank_X, sigma_star, hypo_sum_square, expected_epsilon, epsilon, alpha, sigmastareval, unirep_method, **kwargs): """ This function calculates power for univariate repeated measures power calculations with known Sigma. Parameters ---------- rank_C: float rank of the C matrix rank_U: float rank of the U matrix total_N: float total number of observations rank_X: rank of the X matrix error_sum_square: float error sum of squares hypo_sum_square: float hypothesis sum of squares expected_epsilon: float expected value epsilon estimator epsilon: epsilon calculated from U`*SIGMA*U alpha: Significance level for target GLUM test unirep_method: Which method was used to find the expected value of epsilon. One of: * Uncorrected * geisser_greenhouse * chi_muller * hyuhn_feldt * box approximation_method: approximation used for cdf: * Muller and Barton (1989) approximation * Muller, Edwards and Taylor (2004) approximation or the univariate test. sigmastareval: eigenvalues of SIGMASTAR=U`*SIGMA*U sigmastarevec: eigenvectors of SIGMASTAR=U`*SIGMA*U n_ip total N from internal pilot study rank_ip rank of WHAT??? in internal pilot Returns ------- power: Power power for the univariate test. """ approximation_method = Constants.UCDF_MULLER2004_APPROXIMATION internal_pilot = None noncentrality_dist = None quantile = None tolerance = 1e-12 for key, value in kwargs.items(): if key == 'approximation_method': approximation_method = value if key == 'internal_pilot': internal_pilot = value if key == 'noncentrality_distribution': noncentrality_dist = value if key == 'quantile': quantile = value if key == 'tolerance': tolerance = value # optional_args = __process_optional_args(**kwargs) # E = SIGMASTAR # (N - rX) nue = total_N - rank_X undf1, undf2 = _calc_undf1_undf2(unirep_method, expected_epsilon, nue, rank_C, rank_U) # Create defaults - same for either SIGMA known or estimated hypothesis_error = HypothesisError(hypo_sum_square, sigma_star, rank_U) e_1_2, e_3_5, e_4 = _calc_multipliers_internal_pilot( unirep_method, expected_epsilon, epsilon, hypothesis_error, sigmastareval, rank_C, rank_U, internal_pilot.n_ip, internal_pilot.rank_ip) # Error checking e_1_2 = _err_checking(e_1_2, rank_U) omega = e_3_5 * hypothesis_error.q2 / hypothesis_error.lambar fcrit = finv(1 - alpha, undf1 * e_1_2, undf2 * e_1_2) df1, df2, power = _calc_power_muller_approx(undf1, undf2, omega, alpha, e_3_5, e_4, fcrit) power = Power(power, omega, Constants.BOX) return power
def _unirep_power_estimated_sigma(rank_C, rank_U, total_N, rank_X, sigma_star, hypo_sum_square, expected_epsilon, epsilon, alpha, unirep_method, **kwargs): """ This function calculates power for univariate repeated measures power calculations with known Sigma. Parameters ---------- rank_C: float rank of the C matrix rank_U: float rank of the U matrix total_N: float total number of observations rank_X: rank of the X matrix error_sum_square: float error sum of squares hypo_sum_square: float hypothesis sum of squares expected_epsilon: float expected value epsilon estimator epsilon: epsilon calculated from U`*SIGMA*U alpha: Significance level for target GLUM test unirep_method: Which method was used to find the expected value of epsilon. One of: * Uncorrected * geisser_greenhouse * chi_muller * hyuhn_feldt * box approximation_method: approximation used for cdf: * Muller and Barton (1989) approximation * Muller, Edwards and Taylor (2004) approximation or the univariate test. n_est: total N from estimate study rank_est: rank of WHAT??? from estimate study alpha_cl: type one error (alpha) for lower confidence bound alpha_cu: type one error (alpha) for lower confidence bound tolerance: value below which, numbers are considered zero Returns ------- power: Power power for the univariate test. """ approximation_method = Constants.UCDF_MULLER2004_APPROXIMATION confidence_interval = None noncentrality_dist = None quantile = None tolerance = 1e-12 for key, value in kwargs.items(): if key == 'approximation_method': approximation_method = value if key == 'confidence_interval': confidence_interval = value if key == 'noncentrality_distribution': noncentrality_dist = value if key == 'quantile': quantile = value if key == 'tolerance': tolerance = value # optional_args = __process_optional_args(**kwargs) # E = SIGMASTAR # (N - rX) nue = total_N - rank_X undf1, undf2 = _calc_undf1_undf2(unirep_method, expected_epsilon, nue, rank_C, rank_U) # Create defaults - same for either SIGMA known or estimated hypothesis_error = HypothesisError(hypo_sum_square, sigma_star, rank_U) cl1df, e_1_2, e_3_5, e_4, omegaua = _calc_multipliers_est_sigma( unirep_method=unirep_method, eps=epsilon.eps, hypothesis_error=hypothesis_error, nue=nue, rank_C=rank_C, rank_U=rank_U, approximation_method=approximation_method, n_est=confidence_interval.n_est, rank_est=confidence_interval.rank_est) # Error checking e_1_2 = _err_checking(e_1_2, rank_U) omega = e_3_5 * hypothesis_error.q2 / hypothesis_error.lambar if unirep_method == Constants.CM: omega = omegaua fcrit = finv(1 - alpha, undf1 * e_1_2, undf2 * e_1_2) df1, df2, power = _calc_power_muller_approx(undf1, undf2, omega, alpha, e_3_5, e_4, fcrit) power = Power(power, omega, Constants.SIGMA_ESTIMATED) if confidence_interval: cl_type = _get_cl_type(confidence_interval) power.glmmpcl(is_multirep=False, alphatest=alpha, dfh=cl1df, n2=total_N, rank_est=confidence_interval.rank_est, dfe2=df2, cl_type=cl_type, n_est=confidence_interval.n_est, alpha_cl=confidence_interval.lower_tail, alpha_cu=confidence_interval.upper_tail, fcrit=fcrit, tolerance=tolerance, omega=omega, df1_unirep=df1) return power
def _unirep_power_known_sigma(rank_C, rank_U, total_N, rank_X, sigma_star, hypo_sum_square, expected_epsilon, epsilon, alpha, unirep_method, **kwargs): """ This function calculates power for univariate repeated measures power calculations with known Sigma. Parameters ---------- rank_C: float rank of the C matrix rank_U: float rank of the U matrix total_N: float total number of observations rank_X: rank of the X matrix error_sum_square: np.matrix error sum of squares hypo_sum_square: np.matrix hypothesis sum of squares expected_epsilon: float expected value epsilon estimator epsilon: epsilon calculated from U`*SIGMA*U alpha: Significance level for target GLUM test unirep_method: Which method was used to find the expected value of epsilon. One of: * Uncorrected * geisser_greenhouse * chi_muller * hyuhn_feldt * box approximation_method: approximation used for cdf: * Muller and Barton (1989) approximation * Muller, Edwards and Taylor (2004) approximation Returns ------- power: Power power for the univariate test. """ # optional_args = __process_optional_args(**kwargs) approximation_method = Constants.UCDF_MULLER2004_APPROXIMATION noncentrality_dist = None quantile = None tolerance = 1e-12 for key, value in kwargs.items(): if key == 'approximation_method': approximation_method = value if key == 'noncentrality_distribution': noncentrality_dist = value if key == 'quantile': quantile = value if key == 'tolerance': tolerance = value nue = total_N - rank_X undf1, undf2 = _calc_undf1_undf2(unirep_method, expected_epsilon, nue, rank_C, rank_U) # Create defaults - same for either SIGMA known or estimated hypothesis_error = HypothesisError(hypo_sum_square, sigma_star, rank_U) e_1_2, e_3_5, e_4 = _calc_multipliers_known_sigma(epsilon, expected_epsilon, hypothesis_error, rank_C, rank_U, Constants.SIGMA_KNOWN) omega = e_3_5 * hypothesis_error.q2 / hypothesis_error.lambar # Error checking e_1_2 = _err_checking(e_1_2, rank_U) fcrit = finv(1 - alpha, undf1 * e_1_2, undf2 * e_1_2) if noncentrality_dist and quantile: omega = __calc_quantile_omega(noncentrality_dist, quantile) df1, df2, power = _calc_power_muller_approx(undf1, undf2, omega, alpha, e_3_5, e_4, fcrit) elif noncentrality_dist and not quantile: df1 = undf1 * e_3_5 df2 = undf2 * e_4 power = noncentrality_dist.unconditional_power_simpson(fcrit=fcrit, df1=df1, df2=df2) else: # 2. Muller, Edwards & Taylor 2002 and Muller Barton 1989 CDF approx # UCDFTEMP[]=4 reverts to UCDFTEMP[]=2 if exact CDF fails df1, df2, power = _calc_power_muller_approx(undf1, undf2, omega, alpha, e_3_5, e_4, fcrit) power = Power(power, omega, Constants.SIGMA_KNOWN) return power
def _unirep_power(epsilon_estimator, rank_C: float, rank_X: float, relative_group_sizes, rep_N: float, alpha: float, sigma_star: np.matrix, delta_es: np.matrix, unirep_method, **kwargs): error_sum_square, hypo_sum_square, rank_U, total_N = calc_properties( delta_es=delta_es, rank_X=rank_X, relative_group_sizes=relative_group_sizes, rep_N=rep_N, sigma_star=sigma_star) if len(inspect.signature(epsilon_estimator).parameters) == 0: expected_epsilon = epsilon_estimator() elif len(inspect.signature(epsilon_estimator).parameters) == 1: expected_epsilon = epsilon_estimator(rank_U=rank_U) else: expected_epsilon = epsilon_estimator(sigma_star=sigma_star, rank_U=rank_U, total_N=total_N, rank_X=rank_X) epsilon = _calc_epsilon(sigma_star, rank_U) power = Power(power='Not Calculable.') sigma_source = Constants.SIGMA_KNOWN confidence_interval = None if 'confidence_interval' in kwargs.keys(): confidence_interval = kwargs['confidence_interval'] if confidence_interval: sigma_source = Constants.SIGMA_ESTIMATED if sigma_source == Constants.SIGMA_KNOWN: power = _unirep_power_known_sigma(rank_C=rank_C, rank_U=rank_U, total_N=total_N, rank_X=rank_X, sigma_star=sigma_star, hypo_sum_square=hypo_sum_square, expected_epsilon=expected_epsilon, epsilon=epsilon.eps, alpha=alpha, unirep_method=unirep_method, **kwargs) if sigma_source == Constants.SIGMA_ESTIMATED: power = _unirep_power_estimated_sigma( rank_C=rank_C, rank_U=rank_U, total_N=total_N, rank_X=rank_X, sigma_star=sigma_star, hypo_sum_square=hypo_sum_square, expected_epsilon=expected_epsilon, epsilon=epsilon, alpha=alpha, unirep_method=unirep_method, **kwargs) if sigma_source == Constants.INTERNAL_PILOT: sigmastareval = np.linalg.eigvals(sigma_star) power = _unirep_power_known_sigma_internal_pilot( rank_C, rank_U, total_N, rank_X, sigma_star, hypo_sum_square, expected_epsilon, epsilon, alpha, sigmastareval, unirep_method, **kwargs) return power
def _undefined_power(error_message=None): """ Returns a Power object with NaN power and noncentralith and missing fmethod""" return Power(float('nan'), float('nan'), Constants.FMETHOD_MISSING, error_message)