Exemple #1
0
 def __init__(**kwargs):
     self.essencex = np.matrix()
     self.betafunction = np.matrix()
     self.c_matrix = np.matrix()
     self.u_matrix = np.matrix()
     self.sigma = 0
     self.theta_zero = 0
     #Scalar
     #CalcMethod
     #Option
     # CL
     # IP
     # I think there is an object here....
     ######################################
     self.df1 = 0
     self.df2 = 0
     self.dfh = []
     self.dfe2 = 0
     #######################################
     self.alphatest = 0
     self.n2 = 0
     self.cl_type = 0
     self.n_est = 0
     self.rank_est = 0
     self.alpha_cl = 0
     self.alpha_cu = 0
     self.tolerance = 0.000000000000000001
     self.omega = 0
     self.power = Power()
     self.exceptions = []
def _multi_power(alpha: float, df1: float, df2: float, omega: float,
                 total_N: float, **kwargs) -> Power:
    """ The common part for these four multirep methods computing power"""
    noncentrality_dist = None
    quantile = None
    confidence_interval = None
    for key, value in kwargs.items():
        if key == 'noncentrality_distribution':
            noncentrality_dist = value
        if key == 'quantile':
            quantile = value
        if key == 'confidence_interval':
            confidence_interval = value

    fcrit = finv(1 - alpha, df1, df2)
    if noncentrality_dist and quantile:
        omega = __calc_quantile_omega(noncentrality_dist, quantile)
        prob, fmethod = probf(fcrit, df1, df2, omega)
    elif noncentrality_dist and not quantile:
        prob, fmethod = noncentrality_dist.unconditional_power_simpson(
            fcrit=fcrit, df1=df1, df2=df2)
    else:
        prob, fmethod = probf(fcrit, df1, df2, omega)

    if fmethod == Constants.FMETHOD_NORMAL_LR and prob == 1:
        powerval = alpha
    else:
        powerval = 1 - prob
    powerval = float(powerval)
    power = Power(powerval, omega, fmethod)
    if confidence_interval:
        cl_type = _get_cl_type(confidence_interval)

        power.glmmpcl(is_multirep=True,
                      alphatest=alpha,
                      dfh=df1,
                      n2=total_N,
                      dfe2=df2,
                      cl_type=cl_type,
                      n_est=confidence_interval.n_est,
                      rank_est=confidence_interval.rank_est,
                      alpha_cl=confidence_interval.lower_tail,
                      alpha_cu=confidence_interval.upper_tail,
                      fcrit=fcrit,
                      tolerance=1e-12,
                      omega=omega)
    return power
Exemple #3
0
    def test_multi_power(self):
        alpha = 0.05
        df1 = 3
        df2 = 4
        omega = 3
        total_N = 10

        expected = Power(0.138179071626, 3, Constants.FMETHOD_NORMAL_LR)
        actual = multirep._multi_power(alpha, df1, df2, omega, total_N)
        self.assertEqual(round(expected.power, 4), round(actual.power, 4))
        self.assertEqual(expected.noncentrality_parameter,
                         actual.noncentrality_parameter)
Exemple #4
0
    def test_glmmpcl(self):
        """
        This should correctly calculate the confidence intervals for power base on???
        """
        #todo where is this example from? whay are we rounding?
        expected = Power(0.9, 0.05, Constants.FMETHOD_NOAPPROXIMATION)
        dfe1 = 20 - 1
        alphatest = 0.05
        dfh = 20
        dfe2 = 28
        fcrit = finv(1 - alphatest, dfh, dfe2)
        actual = expected.glmmpcl(
            is_multirep=True,
            alphatest=0.05,
            dfh=20,  # df1
            n2=30,  # total_N ??? what is this
            dfe2=28,  # df2
            cl_type=Constants.CLTYPE_DESIRED_KNOWN,
            n_est=20,
            rank_est=1,
            alpha_cl=0.048,
            alpha_cu=0.052,
            fcrit=fcrit,
            tolerance=0.01,
            omega=200)

        power_l = 0.9999379
        noncen_l = 105.66408
        power_u = 1
        noncen_u = 315.62306
        fmethod = Constants.FMETHOD_NOAPPROXIMATION

        self.assertEqual(round(expected.lower_bound.power, 7), power_l)
        self.assertEqual(
            round(expected.lower_bound.noncentrality_parameter, 5), noncen_l)
        self.assertEqual(expected.lower_bound.fmethod, fmethod)
        self.assertEqual(expected.upper_bound.power, power_u)
        self.assertEqual(
            round(expected.upper_bound.noncentrality_parameter, 5), noncen_u)
        self.assertEqual(expected.upper_bound.fmethod, fmethod)
def samplesize(
        test,
        rank_C: float,
        rank_X: float,
        relative_group_sizes,
        alpha: float,
        sigma_star: np.matrix,
        delta_es: np.matrix,
        targetPower,
        starting_smallest_group_size=Constants.STARTING_SAMPLE_SIZE.value,
        **kwargs):
    """
    Get the smallest realizable samplesize for the requested target power.
    :param test: The statistical test chosen. This must be pne of the tests available in pyglimmpse.multirep or pyglimmpse.unirep
    :param rank_C: Rank of the within contrast matrix for your study design.
    :param rank_U: Rank of the between contrast matrix for your study design.
    :param alpha: Type one error rate
    :param sigma_star: Sigma star
    :param targetPower: The power you wish to achieve
    :param rank_X: the rank of Es(X). Where X is your design matrix.
    :param delta: (Theta - Theta_0)'M^-1(Theta-Theta_0)
    :param relative_group_sizes: a list of ratios of size of the groups in your design.
    :param starting_smallest_group_size: The starting point for our integration. If this is less than the minimum realizeable smallest group size for your design, this function will return an error.
    :param optional_args:
    :return:
    """

    # calculate max valid per group N
    max_n = min(sys.maxsize / rank_X, Constants.MAX_SAMPLE_SIZE.value)
    # declare variables prior to integration
    upper_power = Power()
    lower_power = Power()
    smallest_group_size = starting_smallest_group_size
    upper_bound_smallest_group_size = starting_smallest_group_size
    upper_bound_total_N = upper_bound_smallest_group_size * sum(
        relative_group_sizes)

    # find a samplesize which produces power greater than or equal to the desired power
    while (np.isnan(upper_power.power) or upper_power.power <= targetPower)\
            and upper_bound_total_N < max_n:

        upper_bound_total_N = upper_bound_smallest_group_size * sum(
            relative_group_sizes)
        if upper_bound_total_N >= max_n:
            upper_bound_smallest_group_size = upper_bound_total_N / max_n

        # call power for this sample size
        upper_power = test(rank_C=rank_C,
                           rank_X=rank_X,
                           relative_group_sizes=relative_group_sizes,
                           rep_N=upper_bound_smallest_group_size,
                           alpha=alpha,
                           sigma_star=sigma_star,
                           delta_es=delta_es)
        if type(upper_power.power) is str:
            raise ValueError(
                'Upper power is not calculable. Check that your design is realisable.'
                ' Usually the easies way to do this is to increase sample size'
            )
        upper_bound_smallest_group_size += upper_bound_smallest_group_size

    # find a samplesize for the per group n/2 + 1 to define the lower bound of our search.
    #undo last doubling
    if upper_power.power is None or math.isnan(upper_power.power):
        raise ValueError(
            'Could not find a samplesize which achieves the target power. Please check your design.'
        )
    upper_bound_smallest_group_size = upper_bound_smallest_group_size / 2
    # note we are using floor division
    lower_bound_smallest_group_size = upper_bound_smallest_group_size // 2
    lower_power = test(rank_C=rank_C,
                       rank_X=rank_X,
                       relative_group_sizes=relative_group_sizes,
                       rep_N=upper_bound_smallest_group_size // 2,
                       alpha=alpha,
                       sigma_star=sigma_star,
                       delta_es=delta_es)

    #
    # At this point we have valid boundaries for searching.
    # There are two possible scenarios
    # 1. The upper bound == lower bound.
    # 2. The upper bound != lower bound and lower bound exceeds required power.
    # In this case we just take the value at the lower bound.
    # 3. The upper bound != lower bound and lower bound is less than the required power.
    # In this case we bisection search
    #
    if lower_power.power >= targetPower:
        total_N = lower_bound_smallest_group_size * sum(relative_group_sizes)
        power = lower_power
    else:
        f = lambda n: subtrtact_target_power(
            test(rank_C=rank_C,
                 rank_X=rank_X,
                 relative_group_sizes=relative_group_sizes,
                 rep_N=n,
                 alpha=alpha,
                 sigma_star=sigma_star,
                 delta_es=delta_es,
                 **kwargs), targetPower)

        total_per_group_n = math.floor(
            optimize.bisect(f, lower_bound_smallest_group_size,
                            upper_bound_smallest_group_size))
        power = test(rank_C=rank_C,
                     rank_X=rank_X,
                     relative_group_sizes=relative_group_sizes,
                     rep_N=total_per_group_n,
                     alpha=alpha,
                     sigma_star=sigma_star,
                     delta_es=delta_es,
                     **kwargs)

        if (power.power < targetPower) or np.isnan(power.power):
            total_per_group_n = total_per_group_n + 1
            power = test(rank_C=rank_C,
                         rank_X=rank_X,
                         relative_group_sizes=relative_group_sizes,
                         rep_N=total_per_group_n,
                         alpha=alpha,
                         sigma_star=sigma_star,
                         delta_es=delta_es,
                         **kwargs)
            if power.power < targetPower:
                raise ValueError(
                    'Samplesize cannot be calculated. Please check your design.'
                )
        total_N = sum(
            [math.ceil(total_per_group_n) * g for g in relative_group_sizes])
    return total_N, power
def _unirep_power_known_sigma_internal_pilot(rank_C, rank_U, total_N, rank_X,
                                             sigma_star, hypo_sum_square,
                                             expected_epsilon, epsilon, alpha,
                                             sigmastareval, unirep_method,
                                             **kwargs):
    """
    This function calculates power for univariate repeated measures power calculations with known Sigma.

    Parameters
    ----------
    rank_C: float
        rank of the C matrix
    rank_U: float
        rank of the U matrix
    total_N: float
        total number of observations
    rank_X:
        rank of the X matrix
    error_sum_square: float
        error sum of squares
    hypo_sum_square: float
        hypothesis sum of squares
    expected_epsilon: float
        expected value epsilon estimator
    epsilon:
        epsilon calculated from U`*SIGMA*U
    alpha:
        Significance level for target GLUM test
    unirep_method:
        Which method was used to find the expected value of epsilon.

        One of:

        * Uncorrected

        * geisser_greenhouse

        * chi_muller

        * hyuhn_feldt

        * box
    approximation_method:
        approximation used for cdf:

        * Muller and Barton (1989) approximation

        * Muller, Edwards and Taylor (2004) approximation
    or the univariate test.
    sigmastareval:
        eigenvalues  of SIGMASTAR=U`*SIGMA*U
    sigmastarevec:
        eigenvectors of SIGMASTAR=U`*SIGMA*U
    n_ip
        total N from internal pilot study
    rank_ip
        rank of WHAT??? in internal pilot

    Returns
    -------
    power: Power
        power for the univariate test.
    """
    approximation_method = Constants.UCDF_MULLER2004_APPROXIMATION
    internal_pilot = None
    noncentrality_dist = None
    quantile = None
    tolerance = 1e-12
    for key, value in kwargs.items():
        if key == 'approximation_method':
            approximation_method = value
        if key == 'internal_pilot':
            internal_pilot = value
        if key == 'noncentrality_distribution':
            noncentrality_dist = value
        if key == 'quantile':
            quantile = value
        if key == 'tolerance':
            tolerance = value
    # optional_args = __process_optional_args(**kwargs)
    # E = SIGMASTAR # (N - rX)
    nue = total_N - rank_X
    undf1, undf2 = _calc_undf1_undf2(unirep_method, expected_epsilon, nue,
                                     rank_C, rank_U)
    # Create defaults - same for either SIGMA known or estimated
    hypothesis_error = HypothesisError(hypo_sum_square, sigma_star, rank_U)
    e_1_2, e_3_5, e_4 = _calc_multipliers_internal_pilot(
        unirep_method, expected_epsilon, epsilon, hypothesis_error,
        sigmastareval, rank_C, rank_U, internal_pilot.n_ip,
        internal_pilot.rank_ip)

    # Error checking
    e_1_2 = _err_checking(e_1_2, rank_U)
    omega = e_3_5 * hypothesis_error.q2 / hypothesis_error.lambar
    fcrit = finv(1 - alpha, undf1 * e_1_2, undf2 * e_1_2)

    df1, df2, power = _calc_power_muller_approx(undf1, undf2, omega, alpha,
                                                e_3_5, e_4, fcrit)

    power = Power(power, omega, Constants.BOX)

    return power
def _unirep_power_estimated_sigma(rank_C, rank_U, total_N, rank_X, sigma_star,
                                  hypo_sum_square, expected_epsilon, epsilon,
                                  alpha, unirep_method, **kwargs):
    """
    This function calculates power for univariate repeated measures power calculations with known Sigma.

    Parameters
    ----------
    rank_C: float
        rank of the C matrix
    rank_U: float
        rank of the U matrix
    total_N: float
        total number of observations
    rank_X:
        rank of the X matrix
    error_sum_square: float
        error sum of squares
    hypo_sum_square: float
        hypothesis sum of squares
    expected_epsilon: float
        expected value epsilon estimator
    epsilon:
        epsilon calculated from U`*SIGMA*U
    alpha:
        Significance level for target GLUM test
    unirep_method:
        Which method was used to find the expected value of epsilon.

        One of:

        * Uncorrected

        * geisser_greenhouse

        * chi_muller

        * hyuhn_feldt

        * box
    approximation_method:
        approximation used for cdf:

        * Muller and Barton (1989) approximation

        * Muller, Edwards and Taylor (2004) approximation
    or the univariate test.
    n_est:
        total N from estimate study
    rank_est:
        rank of WHAT??? from estimate study
    alpha_cl:
        type one error (alpha) for lower confidence bound
    alpha_cu:
        type one error (alpha) for lower confidence bound
    tolerance:
        value below which, numbers are considered zero

    Returns
    -------
    power: Power
        power for the univariate test.
    """
    approximation_method = Constants.UCDF_MULLER2004_APPROXIMATION
    confidence_interval = None
    noncentrality_dist = None
    quantile = None
    tolerance = 1e-12
    for key, value in kwargs.items():
        if key == 'approximation_method':
            approximation_method = value
        if key == 'confidence_interval':
            confidence_interval = value
        if key == 'noncentrality_distribution':
            noncentrality_dist = value
        if key == 'quantile':
            quantile = value
        if key == 'tolerance':
            tolerance = value

    # optional_args = __process_optional_args(**kwargs)
    # E = SIGMASTAR # (N - rX)
    nue = total_N - rank_X
    undf1, undf2 = _calc_undf1_undf2(unirep_method, expected_epsilon, nue,
                                     rank_C, rank_U)
    # Create defaults - same for either SIGMA known or estimated
    hypothesis_error = HypothesisError(hypo_sum_square, sigma_star, rank_U)
    cl1df, e_1_2, e_3_5, e_4, omegaua = _calc_multipliers_est_sigma(
        unirep_method=unirep_method,
        eps=epsilon.eps,
        hypothesis_error=hypothesis_error,
        nue=nue,
        rank_C=rank_C,
        rank_U=rank_U,
        approximation_method=approximation_method,
        n_est=confidence_interval.n_est,
        rank_est=confidence_interval.rank_est)
    # Error checking
    e_1_2 = _err_checking(e_1_2, rank_U)
    omega = e_3_5 * hypothesis_error.q2 / hypothesis_error.lambar
    if unirep_method == Constants.CM:
        omega = omegaua
    fcrit = finv(1 - alpha, undf1 * e_1_2, undf2 * e_1_2)

    df1, df2, power = _calc_power_muller_approx(undf1, undf2, omega, alpha,
                                                e_3_5, e_4, fcrit)
    power = Power(power, omega, Constants.SIGMA_ESTIMATED)
    if confidence_interval:
        cl_type = _get_cl_type(confidence_interval)
        power.glmmpcl(is_multirep=False,
                      alphatest=alpha,
                      dfh=cl1df,
                      n2=total_N,
                      rank_est=confidence_interval.rank_est,
                      dfe2=df2,
                      cl_type=cl_type,
                      n_est=confidence_interval.n_est,
                      alpha_cl=confidence_interval.lower_tail,
                      alpha_cu=confidence_interval.upper_tail,
                      fcrit=fcrit,
                      tolerance=tolerance,
                      omega=omega,
                      df1_unirep=df1)

    return power
def _unirep_power_known_sigma(rank_C, rank_U, total_N, rank_X, sigma_star,
                              hypo_sum_square, expected_epsilon, epsilon,
                              alpha, unirep_method, **kwargs):
    """
    This function calculates power for univariate repeated measures power calculations with known Sigma.

    Parameters
    ----------
    rank_C: float
        rank of the C matrix
    rank_U: float
        rank of the U matrix
    total_N: float
        total number of observations
    rank_X:
        rank of the X matrix
    error_sum_square: np.matrix
        error sum of squares
    hypo_sum_square: np.matrix
        hypothesis sum of squares
    expected_epsilon: float
        expected value epsilon estimator
    epsilon:
        epsilon calculated from U`*SIGMA*U
    alpha:
        Significance level for target GLUM test
    unirep_method:
        Which method was used to find the expected value of epsilon.

        One of:

        * Uncorrected

        * geisser_greenhouse

        * chi_muller

        * hyuhn_feldt

        * box
    approximation_method:
        approximation used for cdf:

        * Muller and Barton (1989) approximation

        * Muller, Edwards and Taylor (2004) approximation


    Returns
    -------
    power: Power
        power for the univariate test.
    """
    # optional_args = __process_optional_args(**kwargs)
    approximation_method = Constants.UCDF_MULLER2004_APPROXIMATION
    noncentrality_dist = None
    quantile = None
    tolerance = 1e-12
    for key, value in kwargs.items():
        if key == 'approximation_method':
            approximation_method = value
        if key == 'noncentrality_distribution':
            noncentrality_dist = value
        if key == 'quantile':
            quantile = value
        if key == 'tolerance':
            tolerance = value

    nue = total_N - rank_X
    undf1, undf2 = _calc_undf1_undf2(unirep_method, expected_epsilon, nue,
                                     rank_C, rank_U)
    # Create defaults - same for either SIGMA known or estimated
    hypothesis_error = HypothesisError(hypo_sum_square, sigma_star, rank_U)
    e_1_2, e_3_5, e_4 = _calc_multipliers_known_sigma(epsilon,
                                                      expected_epsilon,
                                                      hypothesis_error, rank_C,
                                                      rank_U,
                                                      Constants.SIGMA_KNOWN)
    omega = e_3_5 * hypothesis_error.q2 / hypothesis_error.lambar
    # Error checking
    e_1_2 = _err_checking(e_1_2, rank_U)
    fcrit = finv(1 - alpha, undf1 * e_1_2, undf2 * e_1_2)

    if noncentrality_dist and quantile:
        omega = __calc_quantile_omega(noncentrality_dist, quantile)
        df1, df2, power = _calc_power_muller_approx(undf1, undf2, omega, alpha,
                                                    e_3_5, e_4, fcrit)
    elif noncentrality_dist and not quantile:
        df1 = undf1 * e_3_5
        df2 = undf2 * e_4
        power = noncentrality_dist.unconditional_power_simpson(fcrit=fcrit,
                                                               df1=df1,
                                                               df2=df2)
    else:
        # 2. Muller, Edwards & Taylor 2002 and Muller Barton 1989 CDF approx
        # UCDFTEMP[]=4 reverts to UCDFTEMP[]=2 if exact CDF fails
        df1, df2, power = _calc_power_muller_approx(undf1, undf2, omega, alpha,
                                                    e_3_5, e_4, fcrit)

    power = Power(power, omega, Constants.SIGMA_KNOWN)

    return power
def _unirep_power(epsilon_estimator, rank_C: float, rank_X: float,
                  relative_group_sizes, rep_N: float, alpha: float,
                  sigma_star: np.matrix, delta_es: np.matrix, unirep_method,
                  **kwargs):
    error_sum_square, hypo_sum_square, rank_U, total_N = calc_properties(
        delta_es=delta_es,
        rank_X=rank_X,
        relative_group_sizes=relative_group_sizes,
        rep_N=rep_N,
        sigma_star=sigma_star)
    if len(inspect.signature(epsilon_estimator).parameters) == 0:
        expected_epsilon = epsilon_estimator()
    elif len(inspect.signature(epsilon_estimator).parameters) == 1:
        expected_epsilon = epsilon_estimator(rank_U=rank_U)
    else:
        expected_epsilon = epsilon_estimator(sigma_star=sigma_star,
                                             rank_U=rank_U,
                                             total_N=total_N,
                                             rank_X=rank_X)
    epsilon = _calc_epsilon(sigma_star, rank_U)
    power = Power(power='Not Calculable.')

    sigma_source = Constants.SIGMA_KNOWN

    confidence_interval = None
    if 'confidence_interval' in kwargs.keys():
        confidence_interval = kwargs['confidence_interval']
    if confidence_interval:
        sigma_source = Constants.SIGMA_ESTIMATED

    if sigma_source == Constants.SIGMA_KNOWN:
        power = _unirep_power_known_sigma(rank_C=rank_C,
                                          rank_U=rank_U,
                                          total_N=total_N,
                                          rank_X=rank_X,
                                          sigma_star=sigma_star,
                                          hypo_sum_square=hypo_sum_square,
                                          expected_epsilon=expected_epsilon,
                                          epsilon=epsilon.eps,
                                          alpha=alpha,
                                          unirep_method=unirep_method,
                                          **kwargs)

    if sigma_source == Constants.SIGMA_ESTIMATED:
        power = _unirep_power_estimated_sigma(
            rank_C=rank_C,
            rank_U=rank_U,
            total_N=total_N,
            rank_X=rank_X,
            sigma_star=sigma_star,
            hypo_sum_square=hypo_sum_square,
            expected_epsilon=expected_epsilon,
            epsilon=epsilon,
            alpha=alpha,
            unirep_method=unirep_method,
            **kwargs)

    if sigma_source == Constants.INTERNAL_PILOT:
        sigmastareval = np.linalg.eigvals(sigma_star)
        power = _unirep_power_known_sigma_internal_pilot(
            rank_C, rank_U, total_N, rank_X, sigma_star, hypo_sum_square,
            expected_epsilon, epsilon, alpha, sigmastareval, unirep_method,
            **kwargs)
    return power
def _undefined_power(error_message=None):
    """ Returns a Power object with NaN power and noncentralith and missing fmethod"""
    return Power(float('nan'), float('nan'), Constants.FMETHOD_MISSING,
                 error_message)