예제 #1
0
def _constraints_factor(encoding_matrix,
                        comparison='pairwise',
                        k_params=None,
                        idx_start=None):
    """helper function to create constraints based on encoding matrix

    Parameters
    ----------
    encoding_matrix : ndarray
        contrast matrix for the encoding of a factor as defined by patsy.
        The number of rows should be equal to the number of levels or categories
        of the factor, the number of columns should be equal to the number
        of parameters for this factor.
    comparison : str
        Currently only 'pairwise' is implemented. The restriction matrix
        can be used for testing the hypothesis that all pairwise differences
        are zero.
    k_params : int
        number of parameters
    idx_start : int
        Index of the first parameter of this factor. The restrictions on the
        factor are inserted as a block in the full restriction matrix starting
        at column with index `idx_start`.

    Returns
    -------
    contrast : ndarray
        Contrast or restriction matrix that can be used in hypothesis test
        of model results. The number of columns is k_params.
    """

    cm = encoding_matrix
    k_level, k_p = cm.shape

    import statsmodels.sandbox.stats.multicomp as mc
    if comparison in ['pairwise', 'pw', 'pairs']:
        c_all = -mc.contrast_allpairs(k_level)
    else:
        raise NotImplementedError('currentlyonly pairwise comparison')

    contrasts = c_all.dot(cm)
    if k_params is not None:
        if idx_start is None:
            raise ValueError("if k_params is not None, then idx_start is "
                             "required")
        contrasts = _embed_constraints(contrasts, k_params, idx_start)
    return contrasts
예제 #2
0
def _constraints_factor(encoding_matrix, comparison='pairwise', k_params=None,
                        idx_start=None):
    """helper function to create constraints based on encoding matrix

    Parameters
    ----------
    encoding_matrix : ndarray
        contrast matrix for the encoding of a factor as defined by patsy.
        The number of rows should be equal to the number of levels or categories
        of the factor, the number of columns should be equal to the number
        of parameters for this factor.
    comparison : str
        Currently only 'pairwise' is implemented. The restriction matrix
        can be used for testing the hypothesis that all pairwise differences
        are zero.
    k_params : int
        number of parameters
    idx_start : int
        Index of the first parameter of this factor. The restrictions on the
        factor are inserted as a block in the full restriction matrix starting
        at column with index `idx_start`.

    Returns
    -------
    contrast : ndarray
        Contrast or restriction matrix that can be used in hypothesis test
        of model results. The number of columns is k_params.
    """

    cm = encoding_matrix
    k_level, k_p = cm.shape

    import statsmodels.sandbox.stats.multicomp as mc
    if comparison in ['pairwise', 'pw', 'pairs']:
        c_all = -mc.contrast_allpairs(k_level)
    else:
        raise NotImplementedError('currentlyonly pairwise comparison')

    contrasts = c_all.dot(cm)
    if k_params is not None:
        if idx_start is None:
            raise ValueError("if k_params is not None, then idx_start is "
                             "required")
        contrasts = _embed_constraints(contrasts, k_params, idx_start)
    return contrasts
예제 #3
0
def t_test_pairwise(result, term_name, method='hs', alpha=0.05,
                    factor_labels=None, ignore=False):
    """perform pairwise t_test with multiple testing corrected p-values

    This uses the formula design_info encoding contrast matrix and should
    work for all encodings of a main effect.

    Parameters
    ----------
    result : result instance
        The results of an estimated model with a categorical main effect.
    term_name : str
        name of the term for which pairwise comparisons are computed.
        Term names for categorical effects are created by patsy and
        correspond to the main part of the exog names.
    method : str or list of strings
        multiple testing p-value correction, default is 'hs',
        see stats.multipletesting
    alpha : float
        significance level for multiple testing reject decision.
    factor_labels : None, list of str
        Labels for the factor levels used for pairwise labels. If not
        provided, then the labels from the formula design_info are used.
    ignore : boolean
        Turn off some of the exceptions raised by input checks.

    Returns
    -------
    results : instance of a simple Results class
        The results are stored as attributes, the main attributes are the
        following two. Other attributes are added for debugging purposes
        or as background information.

        - result_frame : pandas DataFrame with t_test results and multiple
          testing corrected p-values.
        - contrasts : matrix of constraints of the null hypothesis in the
          t_test.

    Notes
    -----

    Status: experimental. Currently only checked for treatment coding with
    and without specified reference level.

    Currently there are no multiple testing corrected confidence intervals
    available.

    """

    desinfo = result.model.data.design_info
    term_idx = desinfo.term_names.index(term_name)
    term = desinfo.terms[term_idx]
    idx_start = desinfo.term_slices[term].start
    if not ignore and len(term.factors) > 1:
        raise ValueError('interaction effects not yet supported')
    factor = term.factors[0]
    cat = desinfo.factor_infos[factor].categories
    if factor_labels is not None:
        if len(factor_labels) == len(cat):
            cat = factor_labels
        else:
            raise ValueError("factor_labels has the wrong length, should be %d" % len(cat))


    k_level = len(cat)
    cm = desinfo.term_codings[term][0].contrast_matrices[factor].matrix

    k_params = len(result.params)
    labels = _get_pairs_labels(k_level, cat)

    import statsmodels.sandbox.stats.multicomp as mc
    c_all_pairs = -mc.contrast_allpairs(k_level)
    contrasts_sub = c_all_pairs.dot(cm)
    contrasts = _embed_constraints(contrasts_sub, k_params, idx_start)
    res_df = t_test_multi(result, contrasts, method=method, ci_method=None,
                          alpha=alpha, contrast_names=labels)
    res = MultiCompResult(result_frame=res_df,
                          contrasts=contrasts,
                          term=term,
                          contrast_labels=labels,
                          term_encoding_matrix=cm)
    return res
예제 #4
0
def t_test_pairwise(result,
                    term_name,
                    method='hs',
                    alpha=0.05,
                    factor_labels=None,
                    ignore=False):
    """
    Perform pairwise t_test with multiple testing corrected p-values

    This uses the formula design_info encoding contrast matrix and should
    work for all encodings of a main effect.

    Parameters
    ----------
    result : result instance
        The results of an estimated model with a categorical main effect.
    term_name : str
        name of the term for which pairwise comparisons are computed.
        Term names for categorical effects are created by patsy and
        correspond to the main part of the exog names.
    method : {str, list[str]}
        multiple testing p-value correction, default is 'hs',
        see stats.multipletesting
    alpha : float
        significance level for multiple testing reject decision.
    factor_labels : {list[str], None}
        Labels for the factor levels used for pairwise labels. If not
        provided, then the labels from the formula design_info are used.
    ignore : bool
        Turn off some of the exceptions raised by input checks.

    Returns
    -------
    MultiCompResult
        The results are stored as attributes, the main attributes are the
        following two. Other attributes are added for debugging purposes
        or as background information.

        - result_frame : pandas DataFrame with t_test results and multiple
          testing corrected p-values.
        - contrasts : matrix of constraints of the null hypothesis in the
          t_test.

    Notes
    -----

    Status: experimental. Currently only checked for treatment coding with
    and without specified reference level.

    Currently there are no multiple testing corrected confidence intervals
    available.
    """

    desinfo = result.model.data.design_info
    term_idx = desinfo.term_names.index(term_name)
    term = desinfo.terms[term_idx]
    idx_start = desinfo.term_slices[term].start
    if not ignore and len(term.factors) > 1:
        raise ValueError('interaction effects not yet supported')
    factor = term.factors[0]
    cat = desinfo.factor_infos[factor].categories
    if factor_labels is not None:
        if len(factor_labels) == len(cat):
            cat = factor_labels
        else:
            raise ValueError(
                "factor_labels has the wrong length, should be %d" % len(cat))

    k_level = len(cat)
    cm = desinfo.term_codings[term][0].contrast_matrices[factor].matrix

    k_params = len(result.params)
    labels = _get_pairs_labels(k_level, cat)

    import statsmodels.sandbox.stats.multicomp as mc
    c_all_pairs = -mc.contrast_allpairs(k_level)
    contrasts_sub = c_all_pairs.dot(cm)
    contrasts = _embed_constraints(contrasts_sub, k_params, idx_start)
    res_df = t_test_multi(result,
                          contrasts,
                          method=method,
                          ci_method=None,
                          alpha=alpha,
                          contrast_names=labels)
    res = MultiCompResult(result_frame=res_df,
                          contrasts=contrasts,
                          term=term,
                          contrast_labels=labels,
                          term_encoding_matrix=cm)
    return res