Beispiel #1
0
def score_precision_beta(
    endog,
    exog_mean,
    exog_precision,
    bounded_reg_link,
    param_mean,
    param_precision,
):
    """
    Computes the score vector with respect to the precision regression 
    parameters. 
    
    For more details we refer to:
    Ferrari, S. L. P., Cribari-Neto, F. (2004). Beta regression
    for modeling rates and proportions. J. Appl. Statist. 31, 799–815
    
    Simas, A. B., Barreto-Souza, W., Rocha, A. V. (2010). Improved
    estimators for a general class of beta regression models. Computational
    Statistics and Data Analysis, 54, 348–366
    
    :param endog (array_like): 1d array of endogenous response variable.
    
    :param exog_mean (array_like): A nobs x k array where nobs is the number 
    of observations and k is the number of mean regressors. An intercept is 
    not included by default and should be added by the user.
    
    :param exog_precision (array_like): A nobs x q array where nobs is the 
    number of observations and q is the number of precision regressors. 
    An intercept is not included by default and should be added by the user.
    
    :param bounded_reg_link: An instance of BoundedRegLink. Recall that
    the default mean link is 'logit' and that the default precision link 
    is None.

    :param param_mean: 1d array of mean regression parameters.   
    
    :param param_precision: 1d array of precision regression parameters.
    """
    estimated_mean = estimate_mean(exog_mean, param_mean, bounded_reg_link)

    estimated_precision = estimate_precision(exog_precision, param_precision,
                                             bounded_reg_link)

    if exog_precision is None:
        exog_precision = param_precision * np.ones_like(estimated_mean)

    score_precision = np.matmul(
        exog_precision.T,
        (estimated_mean *
         (np.log(endog) - np.log(1 - endog) -
          digamma(estimated_mean * estimated_precision) + digamma(
              (1 - estimated_mean) * estimated_precision)) +
         digamma(estimated_precision) - digamma(
             (1 - estimated_mean) * estimated_precision) + np.log(1 - endog)) *
        bounded_reg_link.dphideta(estimated_precision),
    )

    return correct_dimension(score_precision)
Beispiel #2
0
def grad_q_precision_beta(
    endog,
    exog_mean,
    exog_precision,
    bounded_reg_link,
    param_mean,
    param_precision,
    previous_precision,
):
    """
    Computes the gradient of the Q function with respect to the precision
    regression parameters. 
    
    For more details we refer to:
    Barreto-Souza & Simas (2017) Improving estimation for beta regression 
    models via em-algorithm and related diagnostic tools, Volume 87, Pages
    2847-2867
    
    :param endog (array_like): 1d array of endogenous response variable.
    
    :param exog_mean (array_like): A nobs x k array where nobs is the number 
    of observations and k is the number of mean regressors. An intercept is 
    not included by default and should be added by the user.
    
    :param exog_precision (array_like): A nobs x q array where nobs is the 
    number of observations and q is the number of precision regressors. 
    An intercept is not included by default and should be added by the user.
    
    :param bounded_reg_link: An instance of BoundedRegLink. Recall that
    the default mean link is 'logit' and that the default precision link 
    is None.

    :param param_mean: 1d array of mean regression parameters.   
    
    :param param_precision: 1d array of precision regression parameters.
    
    :param previous_precision: 1d array of the regression parameters 
    related to the precision in the previous EM-step.
    """
    estimated_mean = estimate_mean(exog_mean, param_mean, bounded_reg_link)
    estimated_precision = estimate_precision(exog_precision, param_precision,
                                             bounded_reg_link)
    if exog_precision is None:
        exog_precision = param_precision * np.ones_like(estimated_mean)

    grad_precision = np.matmul(
        exog_precision.T,
        (estimated_mean * np.log(endog / (1 - endog)) +
         digamma(previous_precision) + np.log(1 - endog) -
         estimated_mean * digamma(estimated_mean * estimated_precision) -
         (1 - estimated_mean) * digamma(
             (1 - estimated_mean) * estimated_precision)) *
        bounded_reg_link.dphideta(estimated_precision),
    )

    return correct_dimension(grad_precision)
Beispiel #3
0
def em_loop(endog,
            exog_mean,
            exog_precision,
            bounded_reg_link,
            param_mean_start,
            param_precision_start,
            em_optim_params={
                "em_tolerance": 10**(-6),
                "max_em_iterations": 5000,
                "method": "BFGS",
            },
            **kwargs):
    '''
    Runs the EM procedure until convergence (or when reaching the 
    maximum number of iterations).
    
    For more details we refer to:
    Barreto-Souza & Simas (2017) Improving estimation for beta regression 
    models via em-algorithm and related diagnostic tools, Volume 87, Pages
    2847-2867
    
    :param endog (array_like): 1d array of endogenous response variable.
    
    :param exog_mean (array_like): A nobs x k array where nobs is the number 
    of observations and k is the number of mean regressors. An intercept is 
    not included by default and should be added by the user.
    
    :param exog_precision (array_like): A nobs x q array where nobs is the 
    number of observations and q is the number of precision regressors. 
    An intercept is not included by default and should be added by the user.
    
    :param bounded_reg_link: An instance of BoundedRegLink. Recall that
    the default mean link is 'logit' and that the default precision link 
    is None.

    :param param_mean_start (array_like): initial guesses of the mean 
    regression parameters.
    
    :param param_precision_start (array_like): initial guess of the 
    precision regression parameters.

    :param optim_params (dict): A dictionary of parameters related
    to optimization:
        em_tolerance: the error tolerance for convergence in the EM procedure.
        max_em_iterations: the maximum number of iterations in the EM 
                           procedure.
        
    **kwargs: additional parameters to be passed to the minimize function
                  from the scipy.optimize module.
                  
    The EM tolerance parameter is calculated as described in the above 
    reference.
    '''
    em_params = {
        "endog": endog,
        "exog_mean": exog_mean,
        "bounded_reg_link": bounded_reg_link,
        "exog_precision": exog_precision,
    }

    k = exog_mean.shape[1]
    if exog_precision is None:
        q = 1
    else:
        q = exog_precision.shape[1]

    em_tolerance = 1
    count = 0

    param_start = np.concatenate([param_mean_start, param_precision_start])
    previous_precision_param = param_precision_start

    while (em_tolerance > em_optim_params["em_tolerance"]
           and count < em_optim_params["max_em_iterations"]):
        previous_precision = previous_precision_update(
            previous_precision_param, exog_precision, bounded_reg_link)

        fit = maximize_q_function_beta(
            **em_params,
            param_start=param_start,
            previous_precision=previous_precision,
            method=em_optim_params["method"],
            **kwargs,
        )
        param_new = fit.x
        em_tolerance = compute_tolerance_beta(
            **em_params,
            param_new=param_new,
            param_start=param_start,
            previous_precision=previous_precision,
        )
        param_start = param_new
        previous_precision_param = correct_dimension(param_new[k:(k + q)])
        count += 1

    return {'estimated_parameters': param_new, 'count': count}