def __init__(self, chiSquareTerms, normalCoefficient, accuracy): """ generated source for method __init__ """ if chiSquareTerms == None or len(chiSquareTerms) == 0: raise GlimmpseValidationException("No chi-square terms specified") if np.isnan(normalCoefficient): raise GlimmpseValidationException( "Invalid coefficient for the normal term") if np.isnan(accuracy) or accuracy <= 0: raise GlimmpseValidationException( "Accuracy must be greater than 0") self.chiSquareTerms = chiSquareTerms self.accuracy = accuracy self.normalCoefficient = normalCoefficient # find the min/max lambda (truncate min at 0) self.maxLambda = max(chiSquareTerms, key=lambda x: x.getLambda()).getLambda() self.minLambda = min(chiSquareTerms, key=lambda x: x.getLambda()).getLambda() self.maxLambdaAbsValue = (-self.minLambda if self.maxLambda < -self.minLambda else self.maxLambda) if self.maxLambda == 0 and self.minLambda == 0 and normalCoefficient == 0: raise GlimmpseValidationException( "At least one of min/max lambda values or coefficient of the normal term must be non-zero" ) # Build a list of ranks for chi squares based on the absolute value of the lambdas sortedList = [] for chiSquare in self.chiSquareTerms: sortedList.append(chiSquare.getLambda()) sortedList.sort(key=lambda absLambda: np.abs(absLambda)) rank = 0 for val in sortedList: self.chiSquareRankMap[val] = rank rank += 1
def parameters_positive_wrapper(**kwargs): Scalar = None for key, value in kwargs.items(): if key == 'Scalar': Scalar = value if Scalar: # Check sigscal if Scalar.sigma_scalar <= Scalar.tolerance: raise GlimmpseValidationException( 'ERROR 12: All SIGSCAL values must be > TOLERANCE > 0.') # Check alpha if Scalar.alpha <= Scalar.tolerance or Scalar.alpha >= 1: raise GlimmpseValidationException( 'ERROR 13: All ALPHA values must be > TOLERANCE > 0 and < 1.' ) # Check tolerance if Scalar.tolerance <= 0: raise GlimmpseValidationException( 'ERROR 17: User specified TOLERANCE <= zero.') if Scalar.tolerance >= 0.01: raise GlimmpseValidationException( 'WARNING 6: User specified TOLERANCE >= 0.01. This is the value assumed to be numeric ' 'zero and affects many calculations. Please check that this value is correct.' ) return function(**kwargs) return parameters_positive_wrapper
def __init__(self, cl_desire=False, sigma_type=False, beta_type=False, n_est=None, rank_est=None, alpha_cl=0.025, alpha_cu=0.025): """ :param cl_desire: confidence intervals is desired or not :param sigma_type: sigma is estimated or not :param beta_type: beta is estimated or not :param n_est: number of observations in analysis which yielded beata and sigma estimates :param rank_est: rank of design matrix in analysis which yielded beta and sigma estimates :param alpha_cl: lower tail probability for power CL :param alpha_cu: upper tail probability for power CL :return cl_type: CLTYPE_DESIRED_ESTIMATE, CLTYPE_DESIRED_KNOWN, CLTYPE_NOT_DESIRED """ self.sigma_type = sigma_type if cl_desire: # sigma is desired if sigma_type: # sigma is estimated if beta_type: # beta is estimated self.cl_type = Constants.CLTYPE_DESIRED_ESTIMATE else: # beta is known self.cl_type = Constants.CLTYPE_DESIRED_KNOWN assert n_est is not None assert rank_est is not None else: raise GlimmpseValidationException( 'sigma_type need to be estimated to calculate CL') else: self.cl_type = Constants.CLTYPE_NOT_DESIRED self.n_est = n_est self.rank_est = rank_est if alpha_cl < 0 or \ alpha_cu < 0 or \ alpha_cl >= 1 or \ alpha_cu >= 1 or \ (alpha_cl + alpha_cu >= 1): raise GlimmpseValidationException( 'ERROR 35: ALPHA_CL and ALPHA_CU must both be >= 0 and <= 1.') self.alpha_cl = alpha_cl self.alpha_cu = alpha_cu
def calculate_sigma_star(self, hypothesis_type): """Calculate sigma star from the factors included in the hypothesis, unless full beta has been selected, in which case all factors should be used.""" ############################################################## # Important! if at any point this logic is changed, be sure to # update the corresponding matrix display logic in the results # page in the front end web app. ############################################################## if hypothesis_type in [ HypothesisType.CUSTOM_U_MATRIX.value, HypothesisType.POLYNOMIAL.value ] and not isinstance(self.u_matrix, int): sigma_star = self.u_matrix.T * kronecker_list([ self.sigma_star_outcome_component, self.sigma_star_repeated_measure_component, self.sigma_star_cluster_component ]) * self.u_matrix else: sigma_star = kronecker_list([ self.sigma_star_outcome_component, self.sigma_star_repeated_measure_component, self.sigma_star_cluster_component ]) sigma_star = sigma_star - self.sigma_star_gaussian_adjustment try: np.linalg.cholesky(sigma_star) except np.linalg.LinAlgError: raise GlimmpseValidationException( Constants.ERR_NOT_POSITIVE_DEFINITE.value) return sigma_star
def __init__(self, sigma_star, rank_U): """ :param sigma_star: U` * (SIGMA # SIGSCALTEMP) * U :param rank_U: rank of U matrix d, number of distinct eigenvalues mtp, multiplicities of eigenvalues eps, epsilon calculated from U`*SIGMA*U deigval, first eigenvalue slam1, sum of eigenvalues squared slam2, sum of squared eigenvalues slam3, sum of eigenvalues """ if rank_U != np.shape(sigma_star)[0]: raise GlimmpseValidationException( "rank of U should equal to nrows of sigma_star") # Get eigenvalues of covariance matrix associated with E. This is NOT # the USUAL sigma. This cov matrix is that of (Y-YHAT)*U, not of (Y-YHAT). # The covariance matrix is normalized to minimize numerical problems self.esig = sigma_star / np.trace(sigma_star) seigval = np.linalg.eigvals(self.esig) deigval_array, mtp_array = np.unique(seigval, return_counts=True) self.slam1 = np.sum(seigval)**2 self.slam2 = np.sum(np.square(seigval)) self.slam3 = np.sum(seigval) self.eps = self.slam1 / (rank_U * self.slam2) self.d = len(deigval_array) self.deigval = np.matrix(deigval_array).T self.mtp = np.matrix(mtp_array).T
def calculate_u_matrix(self, isu_factors): if isu_factors.uMatrix and isu_factors.uMatrix.hypothesis_type == HypothesisType.CUSTOM_U_MATRIX.value: l = [ m for m in isu_factors.get_repeated_measures() if m.in_hypothesis ] u_matrix = 1 if len(l) > 0: u_matrix = isu_factors.uMatrix.values else: u_outcomes = np.identity(len(isu_factors.get_outcomes())) u_cluster = np.matrix([[1]]) u_repeated_measures = LinearModel._get_repeated_measures_u_matrix( self, isu_factors) u_matrix = kronecker_list( [u_outcomes, u_repeated_measures, u_cluster]) if not isinstance( u_matrix, int) and np.linalg.matrix_rank(u_matrix) != u_matrix.shape[1]: raise GlimmpseValidationException( "Your hypothesis is untestable because your within contrast matrix" " contains redundant information (is less than full column rank). " "Please change your custom contrast matrix.") return u_matrix
def calculate_c_matrix(self, isu_factors): if isu_factors.cMatrix and isu_factors.cMatrix.hypothesis_type == HypothesisType.CUSTOM_C_MATRIX.value: l = [p for p in isu_factors.get_predictors() if p.in_hypothesis] c_matrix = 1 if len(l) > 0: c_matrix = isu_factors.cMatrix.values else: predictors = isu_factors.get_predictors() if self.full_beta: partials = [ self.calculate_partial_c_matrix(p) for p in predictors ] else: partials = [ self.calculate_partial_c_matrix(p) for p in predictors if p.in_hypothesis ] partials.append(np.matrix(np.identity(1))) c_matrix = kronecker_list(partials) if not isinstance( c_matrix, int) and np.linalg.matrix_rank(c_matrix) != c_matrix.shape[0]: raise GlimmpseValidationException( "Your hypothesis is untestable because your between contrast matrix" " contains redundant information (is less than full row rank). " "Please change your custom contrast matrix.") return c_matrix
def isPositiveDefinite(self, m: np.matrix): """generated source for method isPositiveDefinite""" if m.shape[0] != m.shape[1]: raise GlimmpseValidationException( "Matrix must be non-null, square") eigenvalues = np.linalg.eigvals(m) test = [val > 0.0 for val in eigenvalues] return all(test)
def repn_positive_wrapper(**kwargs): Scalar = None Option = None for key, value in kwargs.items(): if key == 'Scalar': Scalar = value if key == 'Option': Option = value # Check repn if Scalar and Scalar.rep_n <= Scalar.tolerance: raise GlimmpseValidationException( 'ERROR 10: All REPN values must be > TOLERANCE > 0.') if Scalar and Option and Option.opt_fracrepn and Scalar.rep_n % 1 != 1: raise GlimmpseValidationException( 'ERROR 11: All REPN values must be positive integers. To allow fractional REPN values, ' 'specify opt_fracrepn') return function(**kwargs)
def valid_internal_pilot_wrapper(**kwargs): CL = None for key, value in kwargs.items(): if key == 'CL': CL = value if key == 'IP': IP = value # Check IP_PLAN and SIGTYPE if IP.ip_plan and CL.sigma_type: raise GlimmpseValidationException( 'ERROR 91: SIGMA must be known when planning an internal pilot.' ) return function(**kwargs)
def __unconditional_power_simpson_term(self, fcrit, df1, df2, t): """ calculate the integration performed in glueck and muller 200?? eq ?? """ # check bounds H0 ,H1 if self.H1 < self.H0: raise GlimmpseValidationException("H1 is greater than H0") elif round(self.H1, 12) == round(self.H0, 12): return 0 else: t1 = special.ncfdtr(df1, df2, t, fcrit) t2_fcrit = (fcrit * df1) / (df1 + 2) t2 = special.ncfdtr(df1 + 2, df2, t, t2_fcrit) return self.cdf(t) * (t1 - t2)
def validate_design(self): """ Valudates the study design. returns True is valid. Returns False and stores exceptions on object if invalid. """ self.exceptions = [] try: self.__pre_calc_validation() except GlimmpseValidationException as e: self.exceptions.push(e) except Exception: traceback.print_exc() self.exceptions.push(GlimmpseValidationException("Sorry, something seems to have gone wron with out calculations. Please contact us.")) if len(self.exceptions) > 0: return False else: return True
def check_options_wrapper(**kwargs): CL = None Option = None for key, value in kwargs.items(): if key == 'CL': CL = value if key == 'Option': Option = value if CL and Option and CL.cl_type == ClType.CLTYPE_NOT_DESIRED and Option.opt_noncencl: raise GlimmpseValidationException( "ERROR 83: NONCENCL is not a valid option when CL not desired." ) return function(**kwargs)
def _calc_undf1_undf2(unirep_method, exeps, nue, rank_C, rank_U): if rank_U > nue and (unirep_method == Constants.UN or unirep_method == Constants.GG or unirep_method == Constants.BOX): warnings.warn( 'Power is missing, because Uncorrected, Geisser-Greenhouse and Box tests are ' 'poorly behaved (super low power and test size) when B > N-R, i.e., HDLSS.' ) '''During the sample size searching process, the smaller sample size can raise this error but we dont want to stop searching ''' # raise GlimmpseValidationException('Power is missing, because Uncorrected, Geisser-Greenhouse and Box tests are' # 'poorly behaved (super low power and test size) when B > N-R, i.e., HDLSS.') if np.isnan(exeps) or nue <= 0: raise GlimmpseValidationException("exeps is NaN or total_N <= rank_X") undf1 = rank_C * rank_U undf2 = rank_U * nue return undf1, undf2
def __init__(self, ip_plan=False, n_ip=None, rank_ip=None): """ :param ip_plan: indicates whether power is computed within the context of planning an interval pilot :param n_ip: of observations planned for the internal pilot of the future study (required if IP_PLAN=True) :param rank_ip: rank of the design matrix used in the future study (required if IP_PLAN=True) """ self.ip_plan = ip_plan if ip_plan: assert n_ip is not None assert rank_ip is not None if n_ip <= rank_ip: raise GlimmpseValidationException( 'ERROR 90: N_IP must > RANK_IP') self.n_ip = n_ip self.rank_ip = rank_ip
def _calc_epsilon(sigma_star: np.matrix, rank_U: float) -> Epsilon: """ This module produces matrices required for Geisser-Greenhouse, Huynh-Feldt or uncorrected repeated measures power calculations. It is the first step. Program uses approximations of expected values of epsilon estimates due to Muller (1985), based on theorem of Fujikoshi (1978). Program requires that U be orthonormal and orthogonal to a columns of 1's. Parameters ---------- sigma_star: np.matrix The covariance matrix, :math:`\Sigma_*`, defined as: :math:`\Sigma_* = U\'\Sigma U` This should be scaled in advance by multiplying :math:`\Sigma` by a constant SIGMASCALARTEMP rank_U: float rank of U matrix Returns ------- epsilon :class:`.Epsilon` object containing the following d, number of distinct eigenvalues mtp, multiplicities of eigenvalues eps, epsilon calculated from U`*SIGMA*U deigval, first eigenvalue slam1, sum of eigenvalues squared slam2, sum of squared eigenvalues slam3, sum of eigenvalues """ #todo is this true for ALL epsilon? If so build into the class and remove this method. if rank_U != np.shape(sigma_star)[0]: raise GlimmpseValidationException( "rank of U should be equal to the number of rows in sigma_star") # Get eigenvalues of covariance matrix associated with E. This is NOT # the USUAL sigma. This cov matrix is that of (Y-YHAT)*U, not of (Y-YHAT). # The covariance matrix is normalized to minimize numerical problems epsilon = Epsilon(sigma_star, rank_U) return epsilon
def _samplesize(test, model, **kwargs): if model.noncentrality_distribution: kwargs['noncentrality_distribution'] = model.noncentrality_distribution if model.quantile: kwargs['quantile'] = model.quantile if model.confidence_interval: kwargs['confidence_interval'] = model.confidence_interval kwargs['tolerance'] = 1e-12 try: size, power = samplesize.samplesize( test=test, rank_C=np.linalg.matrix_rank(model.c_matrix), rank_X=model.get_rank_x(), relative_group_sizes=model.groups, alpha=model.alpha, sigma_star=model.sigma_star, delta_es=model.delta, targetPower=model.target_power, starting_smallest_group_size=model.minimum_smallest_group_size, **kwargs) except ValueError as e: raise GlimmpseValidationException(e.args[0]) return size, power
def ranksymm(matrix: np.matrix, tolerance: float) -> np.matrix: """This function computes the rank of a square symmetric nonnegative definite matrix via eigenvalues. Parameters ---------- matrix: Matrix for which rank is to be calculated tolerance: Value below which numbers are declared zero Returns ------- rankmatrix: if MATRIX is not symmetric or positive definite return . else returns the rank of the matrix """ # empty matrix if np.shape(matrix)[1] == 0: raise GlimmpseValidationException( "Matrix {0} does not exist.".format(matrix)) # number of rows not equal to number of columns if np.shape(matrix)[0] != np.shape(matrix)[1]: raise GlimmpseValidationException( "Matrix {0} is not square.".format(matrix)) # matrix with all missing values if np.isnan(matrix).all(): raise GlimmpseValidationException( "Matrix {0} is all missing values.".format(matrix)) maxabsval = abs(matrix).max() # matrix with all zero if maxabsval == 0: raise GlimmpseValidationException( "Matrix {0} has MAX(ABS(all elements)) = exact zero.".format( matrix)) nmatrix = matrix / maxabsval evals = np.linalg.eigvals(nmatrix) # matrix not symmetric if abs(nmatrix - nmatrix.T).max() >= tolerance**0.5: raise GlimmpseValidationException( "Matrix {0} is not symmetric within sqrt(tolerance).".format( matrix)) # matrix not non-negative definite if evals.min() < -tolerance**0.5: raise GlimmpseValidationException( "Matrix {0} is *NOT* non-negative definite (and has at \ least one eigenvalue strictly less than \ zero). This may happen due to programming \ error or rounding error of a nearly LTFR \ matrix. This may be able to be fixed using \ usual scaling/centering techniques. The \ Eigenvalues/MAX(ABS(original matrix)) are: {1}. \ The max(abs(original matrix)) is {2}.".format( matrix, evals, maxabsval)) rankmatrix = sum(evals >= tolerance) return rankmatrix
def from_study_design(self, study_design: StudyDesign, inputs: ScenarioInputs, orthonormalize_u_matrix): """ Populate a LinearModel with Values from a study design. :param study_design: A StudyDesign defined by the user :param alpha: The Type one error to be used :param target_power: The power for which minimum samplesize should be calculated :return: LinearModel """ try: self.orthonormalize_u_matrix = orthonormalize_u_matrix self.full_beta = study_design.full_beta self.essence_design_matrix = self.calculate_design_matrix( study_design.isu_factors) self.repeated_rows_in_design_matrix = inputs.smallest_group_size self.hypothesis_beta = self.get_beta(study_design.isu_factors, inputs) self.c_matrix = self.calculate_c_matrix(study_design.isu_factors) self.u_matrix = self.calculate_u_matrix(study_design.isu_factors) self.sigma_star_outcome_component = self.calculate_outcome_sigma_star( study_design.isu_factors, inputs) self.sigma_star_repeated_measure_component = self.calculate_rep_measure_sigma_star( study_design.isu_factors) self.sigma_star_cluster_component = self.calculate_cluster_sigma_star( study_design.isu_factors) self.sigma_star_gaussian_adjustment = self.calculate_gaussian_adjustment( study_design.gaussian_covariate) self.sigma_star = self.calculate_sigma_star( study_design.isu_factors.uMatrix.hypothesis_type) self.theta_zero = study_design.isu_factors.theta0 self.alpha = inputs.alpha self.test = inputs.test self.alpha = inputs.alpha self.target_power = inputs.target_power self.scale_factor = inputs.scale_factor self.variance_scale_factor = inputs.variance_scale_factor self.test = inputs.test self.smallest_group_size = inputs.smallest_group_size self.total_n = self.calculate_total_n(study_design.isu_factors, inputs) self.calc_metadata() np.set_printoptions(precision=18) self.groups = self.get_groups(study_design.isu_factors) self.power_method = inputs.power_method self.quantile = inputs.quantile self.confidence_interval = inputs.confidence_interval if study_design.solve_for == SolveFor.SAMPLESIZE: self.calculate_min_smallest_group_size( study_design.isu_factors, inputs) if np.linalg.matrix_rank(self.delta) == 0: self.errors.add(Constants.ERR_NO_DIFFERENCE) if study_design.gaussian_covariate: self.noncentrality_distribution = self.calculate_noncentrality_distribution( study_design) if self.noncentrality_distribution.errors and len( self.noncentrality_distribution.errors) > 0: self.errors.update(self.noncentrality_distribution.errors) else: self.noncentrality_distribution = None except (GlimmpseValidationException, GlimmpseCalculationException) as e: self.errors.add(e) except Exception as e: traceback.print_exc() self.errors.add( GlimmpseValidationException( "Sorry, something seems to have gone wrong with our calculations. Please contact us at [email protected]." ))
def _calc_multipliers_est_sigma(unirep_method, eps, hypothesis_error, nue, rank_C, rank_U, approximation_method, n_est, rank_est): # Case 2 # Enter loop to compute E1-E5 based on estimated SIGMA nu_est = n_est - rank_est if nu_est <= 1: raise GlimmpseValidationException( "ERROR 81: Too few estimation df in LASTUNI. df = N_EST - RANK_EST <= 1." ) # For POWERCALC =6=HF, =7=CM, =8=GG critical values epstilde_r = ((nu_est + 1) * hypothesis_error.q3 - 2 * hypothesis_error.q4 ) / (rank_U * (nu_est * hypothesis_error.q4 - hypothesis_error.q3)) epstilde_r_min = min(epstilde_r, 1) mult = np.power(nu_est, 2) + nu_est - 2 epsnhat_num = hypothesis_error.q3 * nu_est * ( nu_est + 1 ) + hypothesis_error.q1 * hypothesis_error.q2 * 2 * mult / rank_C - hypothesis_error.q4 * 2 * nu_est epsnhat_den = hypothesis_error.q4 * nu_est * nu_est + hypothesis_error.q5 * 2 * mult / rank_C - hypothesis_error.q3 * nu_est epsnhat = epsnhat_num / (rank_U * epsnhat_den) nua0 = (nu_est - 1) + nu_est * (nu_est - 1) / 2 tau10 = nu_est * ( (nu_est + 1) * hypothesis_error.q1 * hypothesis_error.q1 - 2 * hypothesis_error.q4) / (nu_est * nu_est + nu_est - 2) tau20 = nu_est * (nu_est * hypothesis_error.q4 - hypothesis_error.q1 * hypothesis_error.q1) / (nu_est * nu_est + nu_est - 2) epsda = tau10 * (nua0 - 2) * (nua0 - 4) / (rank_U * nua0 * nua0 * tau20) epsda = max(min(epsda, 1), 1 / rank_U) epsna = (1 + 2 * (hypothesis_error.q2 / rank_C) / hypothesis_error.q1) / ( 1 / epsda + 2 * rank_U * (hypothesis_error.q5 / rank_C) / (hypothesis_error.q1 * hypothesis_error.q1)) omegaua = hypothesis_error.q2 * epsna * (rank_U / hypothesis_error.q1) # Set E_1_2 for all tests # for UN or Box critical values if unirep_method == Constants.UN or unirep_method == Constants.BOX: e_1_2 = epsda # for HF crit val if unirep_method == Constants.HF: if rank_U <= nue: e_1_2 = epstilde_r_min else: e_1_2 = epsda # for CM crit val if unirep_method == Constants.CM: e_1_2 = epsda # for GG crit val if unirep_method == Constants.GG: e_1_2 = eps # Set E_3_5 for all tests if approximation_method == Constants.UCDF_MULLER1989_APPROXIMATION: e_3_5 = eps else: e_3_5 = epsnhat # Set E_4 for all tests if unirep_method == Constants.CM: e_4 = epsda else: e_4 = eps # Compute DF for confidence limits for all tests cl1df = rank_U * nu_est * e_4 / e_3_5 return cl1df, e_1_2, e_3_5, e_4, omegaua