コード例 #1
0
ファイル: reporting.py プロジェクト: Gilles86/nistats
def plot_contrast_matrix(contrast_def, design_matrix, colorbar=False, ax=None):
    """Creates plot for contrast definition.

    Parameters
    ----------
    contrast_def : str or array of shape (n_col) or list of (string or
                   array of shape (n_col))
        where ``n_col`` is the number of columns of the design matrix,
        (one array per run). If only one array is provided when there
        are several runs, it will be assumed that the same contrast is
        desired for all runs. The string can be a formula compatible with
        the linear constraint of the Patsy library. Basically one can use
        the name of the conditions as they appear in the design matrix of
        the fitted model combined with operators /*+- and numbers.
        Please checks the patsy documentation for formula examples:
        http://patsy.readthedocs.io/en/latest/API-reference.html#patsy.DesignInfo.linear_constraint

    design_matrix: pandas DataFrame

    colorbar: Boolean, optional (default False)
        Include a colorbar in the contrast matrix plot.

    ax: matplotlib Axes object, optional (default None)
        Directory where plotted figures will be stored.

    Returns
    -------
    Plot Axes object
    """

    design_column_names = design_matrix.columns.tolist()
    if isinstance(contrast_def, str):
        di = DesignInfo(design_column_names)
        contrast_def = di.linear_constraint(contrast_def).coefs

    if ax is None:
        plt.figure(figsize=(8, 4))
        ax = plt.gca()

    maxval = np.max(np.abs(contrast_def))

    con_mx = np.asmatrix(contrast_def)
    mat = ax.matshow(con_mx, aspect='equal', extent=[0, con_mx.shape[1],
                     0, con_mx.shape[0]], cmap='gray', vmin=-maxval,
                     vmax=maxval)
    ax.set_label('conditions')
    ax.set_ylabel('')
    ax.set_yticklabels(['' for x in ax.get_yticklabels()])

    # Shift ticks to be at 0.5, 1.5, etc
    ax.xaxis.set(ticks=np.arange(1.0, len(design_column_names) + 1.0),
                 ticklabels=design_column_names)
    ax.set_xticklabels(design_column_names, rotation=90, ha='right')

    if colorbar:
        plt.colorbar(mat, fraction=0.025, pad=0.04)

    plt.tight_layout()

    return ax
コード例 #2
0
def _get_contrast(second_level_contrast, design_matrix):
    """ Check and return contrast when testing one contrast at the time """
    if isinstance(second_level_contrast, str):
        if second_level_contrast in design_matrix.columns.tolist():
            contrast = second_level_contrast
        else:
            raise ValueError('"' + second_level_contrast + '" is not a valid' +
                             ' contrast name')
    else:
        # Check contrast definition
        if second_level_contrast is None:
            if design_matrix.shape[1] == 1:
                second_level_contrast = np.ones([1])
            else:
                raise ValueError('No second-level contrast is specified.')
        elif (np.nonzero(second_level_contrast)[0]).size != 1:
            raise ValueError('second_level_contrast must be '
                             'a list of 0s and 1s')
        if isinstance(second_level_contrast, np.ndarray):
            con_val = np.asarray(second_level_contrast, dtype=bool)
        else:
            design_info = DesignInfo(design_matrix.columns.tolist())
            constraint = design_info.linear_constraint(second_level_contrast)
            con_val = np.asarray(constraint.coefs, dtype=bool).ravel()
        contrast = np.asarray(design_matrix.columns.tolist())[con_val][0]
    return contrast
コード例 #3
0
ファイル: firstlevel.py プロジェクト: NILAB-UvA/nitools
def compute_rfx_contrast(imgs, design_matrix, contrast_def, mask=None, noise_model='ols', stat_type='t', output_type='z_score'):

    design_info = DesignInfo(design_matrix.columns.tolist())
    if isinstance(imgs, list):
        Y = np.stack([i.get_data() for i in imgs]).reshape(len(imgs), -1)        
    elif isinstance(imgs, np.ndarray):
        Y = imgs
    else:
        raise ValueError(f"Unknown format for Y ({type(imgs)}).")

    X = design_matrix.values
    labels, results = run_glm(Y, X, noise_model=noise_model)

    if isinstance(contrast_def, (np.ndarray, str)):
        con_vals = [contrast_def]
    elif isinstance(contrast_def, (list, tuple)):
        con_vals = contrast_def
    else:
        raise ValueError('contrast_def must be an array or str or list of'
                         ' (array or str)')

    for cidx, con in enumerate(con_vals):
        if not isinstance(con, np.ndarray):
            con_vals[cidx] = design_info.linear_constraint(con).coefs

    contrast = compute_contrast(labels, results, con_vals, stat_type)

    values = getattr(contrast, output_type)()
    if isinstance(imgs, list):
        values = nib.Nifti1Image(values.reshape(imgs[0].shape), affine=imgs[0].affine)

    return values
コード例 #4
0
    def fit_constrained(self, constraints, start_params=None, **fit_kwds):
        """fit the model subject to linear equality constraints

        The constraints are of the form   `R params = q`
        where R is the constraint_matrix and q is the vector of
        constraint_values.

        The estimation creates a new model with transformed design matrix,
        exog, and converts the results back to the original parameterization.


        Parameters
        ----------
        constraints : formula expression or tuple
            If it is a tuple, then the constraint needs to be given by two
            arrays (constraint_matrix, constraint_value), i.e. (R, q).
            Otherwise, the constraints can be given as strings or list of
            strings.
            see t_test for details
        start_params : None or array_like
            starting values for the optimization. `start_params` needs to be
            given in the original parameter space and are internally
            transformed.
        **fit_kwds : keyword arguments
            fit_kwds are used in the optimization of the transformed model.

        Returns
        -------
        results : Results instance

        """

        from patsy import DesignInfo
        from statsmodels.base._constraints import fit_constrained

        # same pattern as in base.LikelihoodModel.t_test
        lc = DesignInfo(self.exog_names).linear_constraint(constraints)
        R, q = lc.coefs, lc.constants

        # TODO: add start_params option, need access to tranformation
        #       fit_constrained needs to do the transformation
        params, cov, res_constr = fit_constrained(self,
                                                  R,
                                                  q,
                                                  start_params=start_params,
                                                  fit_kwds=fit_kwds)
        #create dummy results Instance, TODO: wire up properly
        res = self.fit(start_params=params, maxiter=0)  # we get a wrapper back
        res._results.params = params
        res._results.normalized_cov_params = cov
        k_constr = len(q)
        res._results.df_resid += k_constr
        res._results.df_model -= k_constr
        res._results.constraints = lc
        res._results.k_constr = k_constr
        res._results.results_constrained = res_constr
        # TODO: the next is not the best. history should bin in results
        res._results.model.history = res_constr.model.history
        res._results.mu = res_constr.mu
        return res
コード例 #5
0
def _get_con_val(second_level_contrast, design_matrix):
    """ Check the contrast and return con_val when testing one contrast or more
    """
    if second_level_contrast is None:
        if design_matrix.shape[1] == 1:
            second_level_contrast = np.ones([1])
        else:
            raise ValueError('No second-level contrast is specified.')
    if isinstance(second_level_contrast, np.ndarray):
        con_val = second_level_contrast
        if np.all(con_val == 0):
            raise ValueError('Contrast is null')
    else:
        design_info = DesignInfo(design_matrix.columns.tolist())
        constraint = design_info.linear_constraint(second_level_contrast)
        con_val = constraint.coefs
    return con_val
コード例 #6
0
def fit_constrained_wrap(model, constraints, start_params=None, **fit_kwds):
    """fit_constraint that returns a results instance

    This is a development version for fit_constrained methods or
    fit_constrained as standalone function.

    It will not work correctly for all models because creating a new
    results instance is not standardized for use outside the `fit` methods,
    and might need adjustements for this.

    This is the prototype for the fit_constrained method that has been added
    to Poisson and GLM.

    Parameters
    ----------
    model : Model
    constraints : tuple (constraint_matrix, constraint_values)
    start_params : array-like or None
    **fit_kwds
    """
    self = model  # alias for use as method

    # TODO: decide whether to move the imports
    from patsy import DesignInfo

    # same pattern as in base.LikelihoodModel.t_test
    lc = DesignInfo(self.exog_names).linear_constraint(constraints)
    R, q = lc.coefs, lc.constants

    # TODO: add start_params option, need access to tranformation
    #       fit_constrained needs to do the transformation
    params, cov, res_constr = fit_constrained(self,
                                              R,
                                              q,
                                              start_params=start_params,
                                              fit_kwds=fit_kwds)
    # create dummy results Instance, TODO: wire up properly
    res = self.fit(start_params=params, maxiter=0,
                   warn_convergence=False)  # we get a wrapper back
    res._results.params = params
    res._results.cov_params_default = cov
    cov_type = fit_kwds.get('cov_type', 'nonrobust')
    if cov_type == 'nonrobust':
        res._results.normalized_cov_params = cov / res_constr.scale
    else:
        res._results.normalized_cov_params = None

    k_constr = len(q)
    res._results.df_resid += k_constr
    res._results.df_model -= k_constr
    res._results.constraints = lc
    res._results.k_constr = k_constr
    res._results.results_constrained = res_constr
    # FIXME: don't alter these in-place
    return res
コード例 #7
0
def fit_constrained_wrap(model, constraints, start_params=None, **fit_kwds):
    """fit_constraint that returns a results instance

    This is a development version for fit_constrained methods or
    fit_constrained as standalone function.

    It will not work correctly for all models because creating a new
    results instance is not standardized for use outside the `fit` methods,
    and might need adjustements for this.

    This is the prototype for the fit_constrained method that has been added
    to Poisson and GLM.

    """

    self = model  # alias for use as method

    #constraints = (R, q)
    # TODO: temporary trailing underscore to not overwrite the monkey
    #       patched version
    # TODO: decide whether to move the imports
    from patsy import DesignInfo
    # we need this import if we copy it to a different module
    #from statsmodels.base._constraints import fit_constrained

    # same pattern as in base.LikelihoodModel.t_test
    lc = DesignInfo(self.exog_names).linear_constraint(constraints)
    R, q = lc.coefs, lc.constants

    # TODO: add start_params option, need access to tranformation
    #       fit_constrained needs to do the transformation
    params, cov, res_constr = fit_constrained(self,
                                              R,
                                              q,
                                              start_params=start_params,
                                              fit_kwds=fit_kwds)
    #create dummy results Instance, TODO: wire up properly
    res = self.fit(start_params=params, maxiter=0,
                   warn_convergence=False)  # we get a wrapper back
    res._results.params = params
    res._results.cov_params_default = cov
    cov_type = fit_kwds.get('cov_type', 'nonrobust')
    if cov_type == 'nonrobust':
        res._results.normalized_cov_params = cov / res_constr.scale
    else:
        res._results.normalized_cov_params = None

    k_constr = len(q)
    res._results.df_resid += k_constr
    res._results.df_model -= k_constr
    res._results.constraints = lc
    res._results.k_constr = k_constr
    res._results.results_constrained = res_constr
    return res
コード例 #8
0
ファイル: firstlevel.py プロジェクト: NILAB-UvA/nitools
    def compute_fxe_contrast(self, contrast_def, stat_type='t', run=None, output_type='z_score'):
        """ Computes a fixed effect across multiple runs. """
        
        self.logger.info(f"Computing contrast: {contrast_def} for task {self.task} ...")
        if self.glm is None:
            raise ValueError("GLM has not been run yet!")

        if run is None:
            results = self.glm['results']
            labels = self.glm['labels']
            dms = self.glm['dms']
            design_info = DesignInfo(dms[0].columns.tolist())
        else:
            results = self.glm['results'][run]
            labels = self.glm['labels'][run]
            dms = self.glm['dms'][run]
            design_info = DesignInfo(dms.columns.tolist())

        if isinstance(contrast_def, (np.ndarray, str)):
            con_vals = [contrast_def]
        elif isinstance(contrast_def, (list, tuple)):
            con_vals = contrast_def
        else:
            raise ValueError('contrast_def must be an array or str or list of'
                             ' (array or str)')

        for cidx, con in enumerate(con_vals):
            if not isinstance(con, np.ndarray):
                con_vals[cidx] = design_info.linear_constraint(con).coefs

        if run is None:
            contrast = _fixed_effect_contrast(labels, results, con_vals, stat_type)
        else:
            contrast = compute_contrast(labels, results, con_vals, stat_type)

        values = getattr(contrast, output_type)()
        if self.mask is not None:
            return masking.unmask(values, self.mask)
        else:
            return values
コード例 #9
0
ファイル: data.py プロジェクト: njsmith/pyrerp
 def compute_symbolic_transform(self, expression, exclude=[]):
     # This converts symbolic expressions like "-A1/2" into
     # matrices which perform that transformation. (Actually it is a bit of
     # a hack. The parser/interpreter from patsy that we re-use actually
     # converts arbitrary *combinations* of linear *constraints* into
     # matrices, and is designed to interpret strings like:
     #    "A1=2, rhz*2=lhz"
     # We re-use this code, but interpret the output differently:
     # only one expression is allowed, and it specifies some value that
     # is computed from the data, and then added to each channel
     # not mentioned in 'exclude'.
     transform = np.eye(self.num_channels)
     lc = DesignInfo(self.channel_names).linear_constraint(expression)
     # Check for the weird things that make sense for linear
     # constraints, but not for our hack here:
     if lc.coefs.shape[0] != 1:
         raise ValueError("only one expression allowed!")
     if np.any(lc.constants != 0):
         raise ValueError("transformations must be linear, not affine!")
     for i, channel_name in enumerate(self.channel_names):
         if channel_name not in exclude:
             transform[i, :] += lc.coefs[0, :]
     return transform
コード例 #10
0
def _multivariate_test(hypotheses, exog_names, endog_names, fn):
    k_xvar = len(exog_names)
    k_yvar = len(endog_names)
    results = {}
    for hypo in hypotheses:
        if len(hypo) == 2:
            name, L = hypo
            M = None
            C = None
        elif len(hypo) == 3:
            name, L, M = hypo
            C = None
        elif len(hypo) == 4:
            name, L, M, C = hypo
        else:
            raise ValueError('hypotheses must be a tuple of length 2, 3 or 4.'
                             ' len(hypotheses)=%d' % len(hypo))
        if any(isinstance(j, string_types) for j in L):
            L = DesignInfo(exog_names).linear_constraint(L).coefs
        else:
            if not isinstance(L, np.ndarray) or len(L.shape) != 2:
                raise ValueError('Contrast matrix L must be a 2-d array!')
            if L.shape[1] != k_xvar:
                raise ValueError('Contrast matrix L should have the same '
                                 'number of columns as exog! %d != %d' %
                                 (L.shape[1], k_xvar))
        if M is None:
            M = np.eye(k_yvar)
        elif any(isinstance(j, string_types) for j in M):
            M = DesignInfo(endog_names).linear_constraint(M).coefs.T
        else:
            if M is not None:
                if not isinstance(M, np.ndarray) or len(M.shape) != 2:
                    raise ValueError('Transform matrix M must be a 2-d array!')
                if M.shape[0] != k_yvar:
                    raise ValueError('Transform matrix M should have the same '
                                     'number of rows as the number of columns '
                                     'of endog! %d != %d' %
                                     (M.shape[0], k_yvar))
        if C is None:
            C = np.zeros([L.shape[0], M.shape[1]])
        elif not isinstance(C, np.ndarray):
            raise ValueError('Constant matrix C must be a 2-d array!')

        if C.shape[0] != L.shape[0]:
            raise ValueError('contrast L and constant C must have the same '
                             'number of rows! %d!=%d' %
                             (L.shape[0], C.shape[0]))
        if C.shape[1] != M.shape[1]:
            raise ValueError('transform M and constant C must have the same '
                             'number of columns! %d!=%d' %
                             (M.shape[1], C.shape[1]))
        E, H, q, df_resid = fn(L, M, C)
        EH = np.add(E, H)
        p = matrix_rank(EH)

        # eigenvalues of inv(E + H)H
        eigv2 = np.sort(eigvals(solve(EH, H)))
        stat_table = multivariate_stats(eigv2, p, q, df_resid)

        results[name] = {
            'stat': stat_table,
            'contrast_L': L,
            'transform_M': M,
            'constant_C': C
        }
    return results
コード例 #11
0
    def compute_contrast(self,
                         contrast_def,
                         stat_type=None,
                         output_type='z_score'):
        """Generate different outputs corresponding to
        the contrasts provided e.g. z_map, t_map, effects and variance.
        In multi-session case, outputs the fixed effects map.

        Parameters
        ----------
        contrast_def : str or array of shape (n_col) or list of (string or
                       array of shape (n_col))
                       
            where ``n_col`` is the number of columns of the design matrix,
            (one array per run). If only one array is provided when there
            are several runs, it will be assumed that the same contrast is
            desired for all runs. The string can be a formula compatible with
            the linear constraint of the Patsy library. Basically one can use
            the name of the conditions as they appear in the design matrix of
            the fitted model combined with operators /\*+- and numbers.
            Please checks the patsy documentation for formula examples:
            http://patsy.readthedocs.io/en/latest/API-reference.html#patsy.DesignInfo.linear_constraint

        stat_type : {'t', 'F'}, optional
            type of the contrast

        output_type : str, optional
            Type of the output map. Can be 'z_score', 'stat', 'p_value',
            'effect_size', 'effect_variance' or 'all'

        Returns
        -------
        output : Nifti1Image or dict
            The desired output image(s). If ``output_type == 'all'``, then
            the output is a dictionary of images, keyed by the type of image.

        """
        if self.labels_ is None or self.results_ is None:
            raise ValueError('The model has not been fit yet')

        if isinstance(contrast_def, (np.ndarray, str)):
            con_vals = [contrast_def]
        elif isinstance(contrast_def, (list, tuple)):
            con_vals = contrast_def
        else:
            raise ValueError('contrast_def must be an array or str or list of'
                             ' (array or str)')

        # Translate formulas to vectors with patsy
        design_info = DesignInfo(self.design_matrices_[0].columns.tolist())
        for cidx, con in enumerate(con_vals):
            if not isinstance(con, np.ndarray):
                con_vals[cidx] = design_info.linear_constraint(con).coefs

        n_runs = len(self.labels_)
        if len(con_vals) != n_runs:
            warn('One contrast given, assuming it for all %d runs' % n_runs)
            con_vals = con_vals * n_runs

        # 'all' is assumed to be the final entry; if adding more, place before 'all'
        valid_types = [
            'z_score', 'stat', 'p_value', 'effect_size', 'effect_variance',
            'all'
        ]
        if output_type not in valid_types:
            raise ValueError(
                'output_type must be one of {}'.format(valid_types))

        contrast = _fixed_effect_contrast(self.labels_, self.results_,
                                          con_vals, stat_type)

        output_types = valid_types[:-1] if output_type == 'all' else [
            output_type
        ]

        outputs = {}
        for output_type_ in output_types:
            estimate_ = getattr(contrast, output_type_)()
            # Prepare the returned images
            output = self.masker_.inverse_transform(estimate_)
            contrast_name = str(con_vals)
            output.header['descrip'] = ('%s of contrast %s' %
                                        (output_type_, contrast_name))
            outputs[output_type_] = output

        return outputs if output_type == 'all' else output
コード例 #12
0
ファイル: second_level_model.py プロジェクト: takhs91/nistats
    def compute_contrast(self,
                         contrast_def,
                         stat_type=None,
                         output_type='z_score'):
        """Generate different outputs corresponding to
        the contrasts provided e.g. z_map, t_map, effects and variance.

        Parameters
        ----------
        contrast_def : str or array of shape (n_col)
            where ``n_col`` is the number of columns of the design matrix,
            The string can be a formula compatible with the linear constraint
            of the Patsy library. Basically one can use the name of the
            conditions as they appear in the design matrix of
            the fitted model combined with operators /*+- and numbers.
            Please checks the patsy documentation for formula examples:
            http://patsy.readthedocs.io/en/latest/API-reference.html#patsy.DesignInfo.linear_constraint

        stat_type : {'t', 'F'}, optional
            type of the contrast

        output_type : str, optional
            Type of the output map. Can be 'z_score', 'stat', 'p_value',
            'effect_size' or 'effect_variance'

        Returns
        -------
        output_image : Nifti1Image
            The desired output image

        """
        # check model was fit
        if self.labels_ is None or self.results_ is None:
            raise ValueError('The model has not been fit yet')

        # check contrast definition
        if isinstance(contrast_def, np.ndarray):
            con_val = contrast_def
            if np.all(con_val == 0):
                raise ValueError('Contrast is null')
        else:
            design_info = DesignInfo(self.design_matrix_.columns.tolist())
            con_val = design_info.linear_constraint(contrast_def).coefs

        # check output type
        if isinstance(output_type, _basestring):
            if output_type not in [
                    'z_score', 'stat', 'p_value', 'effect_size',
                    'effect_variance'
            ]:
                raise ValueError(
                    'output_type must be one of "z_score", "stat"'
                    ', "p_value", "effect_size" or "effect_variance"')
        else:
            raise ValueError('output_type must be one of "z_score", "stat",'
                             ' "p_value", "effect_size" or "effect_variance"')

        if self.memory is not None:
            arg_ignore = ['labels', 'results']
            mem_contrast = self.memory.cache(compute_contrast,
                                             ignore=arg_ignore)
        else:
            mem_contrast = compute_contrast
        contrast = mem_contrast(self.labels_, self.results_, con_val,
                                stat_type)

        estimate_ = getattr(contrast, output_type)()
        # Prepare the returned images
        output = self.masker_.inverse_transform(estimate_)
        contrast_name = str(con_val)
        output.get_header()['descrip'] = ('%s of contrast %s' %
                                          (output_type, contrast_name))
        return output
コード例 #13
0
ファイル: second_level_model.py プロジェクト: alpinho/nistats
    def compute_contrast(
            self, second_level_contrast=None, first_level_contrast=None,
            second_level_stat_type=None, output_type='z_score'):
        """Generate different outputs corresponding to
        the contrasts provided e.g. z_map, t_map, effects and variance.

        Parameters
        ----------
        second_level_contrast: str or array of shape (n_col), optional
            Where ``n_col`` is the number of columns of the design matrix,
            The string can be a formula compatible with the linear constraint
            of the Patsy library. Basically one can use the name of the
            conditions as they appear in the design matrix of
            the fitted model combined with operators /\*+- and numbers.
            Please check the patsy documentation for formula examples:
            http://patsy.readthedocs.io/en/latest/API-reference.html#patsy.DesignInfo.linear_constraint
            The default (None) is accepted if the design matrix has a single
            column, in which case the only possible contrast array([1]) is
            applied; when the design matrix has multiple columns, an error is
            raised.

        first_level_contrast: str or array of shape (n_col) with respect to
                              FirstLevelModel, optional
                              
            In case a list of FirstLevelModel was provided as
            second_level_input, we have to provide a contrast to apply to
            the first level models to get the corresponding list of images
            desired, that would be tested at the second level. In case a
            pandas DataFrame was provided as second_level_input this is the
            map name to extract from the pandas dataframe map_name column.
            It has to be a 't' contrast.

        second_level_stat_type: {'t', 'F'}, optional
            Type of the second level contrast

        output_type: str, optional
            Type of the output map. Can be 'z_score', 'stat', 'p_value',
            'effect_size' or 'effect_variance'

        Returns
        -------
        output_image: Nifti1Image
            The desired output image

        """
        if self.second_level_input_ is None:
            raise ValueError('The model has not been fit yet')

        # first_level_contrast check
        if isinstance(self.second_level_input_[0], FirstLevelModel):
            if first_level_contrast is None:
                raise ValueError('If second_level_input was a list of '
                                 'FirstLevelModel, then first_level_contrast '
                                 'is mandatory. It corresponds to the '
                                 'second_level_contrast argument of the '
                                 'compute_contrast method of FirstLevelModel')

        # check contrast definition
        if second_level_contrast is None:
            if self.design_matrix_.shape[1] == 1:
                second_level_contrast = np.ones([1])
            else:
                raise ValueError('No second-level contrast is specified.')
        if isinstance(second_level_contrast, np.ndarray):
            con_val = second_level_contrast
            if np.all(con_val == 0):
                raise ValueError('Contrast is null')
        else:
            design_info = DesignInfo(self.design_matrix_.columns.tolist())
            constraint = design_info.linear_constraint(second_level_contrast)
            con_val = constraint.coefs
        # check output type
        if isinstance(output_type, _basestring):
            if output_type not in ['z_score', 'stat', 'p_value', 'effect_size',
                                   'effect_variance']:
                raise ValueError(
                    'output_type must be one of "z_score", "stat"'
                    ', "p_value", "effect_size" or "effect_variance"')
        else:
            raise ValueError('output_type must be one of "z_score", "stat",'
                             ' "p_value", "effect_size" or "effect_variance"')

        # Get effect_maps appropriate for chosen contrast
        effect_maps = _infer_effect_maps(self.second_level_input_,
                                         first_level_contrast)
        # Check design matrix X and effect maps Y agree on number of rows
        if len(effect_maps) != self.design_matrix_.shape[0]:
            raise ValueError(
                'design_matrix does not match the number of maps considered. '
                '%i rows in design matrix do not match with %i maps' %
                (self.design_matrix_.shape[0], len(effect_maps)))

        # Fit an Ordinary Least Squares regression for parametric statistics
        Y = self.masker_.transform(effect_maps)
        if self.memory:
            mem_glm = self.memory.cache(run_glm, ignore=['n_jobs'])
        else:
            mem_glm = run_glm
        labels, results = mem_glm(Y, self.design_matrix_.values,
                                  n_jobs=self.n_jobs, noise_model='ols')
        # We save memory if inspecting model details is not necessary
        if self.minimize_memory:
            for key in results:
                results[key] = SimpleRegressionResults(results[key])
        self.labels_ = labels
        self.results_ = results

        # We compute contrast object
        if self.memory:
            mem_contrast = self.memory.cache(compute_contrast)
        else:
            mem_contrast = compute_contrast
        contrast = mem_contrast(self.labels_, self.results_, con_val,
                                second_level_stat_type)

        # We get desired output from contrast object
        estimate_ = getattr(contrast, output_type)()

        # Prepare the returned images
        output = self.masker_.inverse_transform(estimate_)
        contrast_name = str(con_val)
        output.header['descrip'] = (
            '%s of contrast %s' % (output_type, contrast_name))
        return output
コード例 #14
0
ファイル: base.py プロジェクト: haytastan/crankshaft
    def wald_test(self,
                  r_matrix,
                  cov_p=None,
                  scale=1.0,
                  invcov=None,
                  use_f=None):
        """
        Compute a Wald-test for a joint linear hypothesis.
        Parameters
        ----------
        r_matrix : array-like, str, or tuple
            - array : An r x k array where r is the number of restrictions to
              test and k is the number of regressors. It is assumed that the
              linear combination is equal to zero.
            - str : The full hypotheses to test can be given as a string.
              See the examples.
            - tuple : A tuple of arrays in the form (R, q), ``q`` can be
              either a scalar or a length p row vector.
        cov_p : array-like, optional
            An alternative estimate for the parameter covariance matrix.
            If None is given, self.normalized_cov_params is used.
        scale : float, optional
            Default is 1.0 for no scaling.
        invcov : array-like, optional
            A q x q array to specify an inverse covariance matrix based on a
            restrictions matrix.
        use_f : bool
            If True, then the F-distribution is used. If False, then the
            asymptotic distribution, chisquare is used. If use_f is None, then
            the F distribution is used if the model specifies that use_t is True.
            The test statistic is proportionally adjusted for the distribution
            by the number of constraints in the hypothesis.
        Returns
        -------
        res : ContrastResults instance
            The results for the test are attributes of this results instance.
        See also
        --------
        statsmodels.stats.contrast.ContrastResults
        f_test
        t_test
        patsy.DesignInfo.linear_constraint
        Notes
        -----
        The matrix `r_matrix` is assumed to be non-singular. More precisely,
        r_matrix (pX pX.T) r_matrix.T
        is assumed invertible. Here, pX is the generalized inverse of the
        design matrix of the model. There can be problems in non-OLS models
        where the rank of the covariance of the noise is not full.
        """
        if use_f is None:
            #switch to use_t false if undefined
            use_f = (hasattr(self, 'use_t') and self.use_t)

        from patsy import DesignInfo
        names = self.model.data.param_names
        LC = DesignInfo(names).linear_constraint(r_matrix)
        r_matrix, q_matrix = LC.coefs, LC.constants

        if (self.normalized_cov_params is None and cov_p is None
                and invcov is None
                and not hasattr(self, 'cov_params_default')):
            raise ValueError('need covariance of parameters for computing '
                             'F statistics')

        cparams = np.dot(r_matrix, self.params[:, None])
        J = float(r_matrix.shape[0])  # number of restrictions
        if q_matrix is None:
            q_matrix = np.zeros(J)
        else:
            q_matrix = np.asarray(q_matrix)
        if q_matrix.ndim == 1:
            q_matrix = q_matrix[:, None]
            if q_matrix.shape[0] != J:
                raise ValueError("r_matrix and q_matrix must have the same "
                                 "number of rows")
        Rbq = cparams - q_matrix
        if invcov is None:
            cov_p = self.cov_params(r_matrix=r_matrix, cov_p=cov_p)
            if np.isnan(cov_p).max():
                raise ValueError("r_matrix performs f_test for using "
                                 "dimensions that are asymptotically "
                                 "non-normal")
            invcov = np.linalg.inv(cov_p)

        if (hasattr(self, 'mle_settings')
                and self.mle_settings['optimizer'] in ['l1', 'l1_cvxopt_cp']):
            F = nan_dot(nan_dot(Rbq.T, invcov), Rbq)
        else:
            F = np.dot(np.dot(Rbq.T, invcov), Rbq)

        df_resid = getattr(self, 'df_resid_inference', self.df_resid)
        if use_f:
            F /= J
            return ContrastResults(F=F,
                                   df_denom=df_resid,
                                   df_num=invcov.shape[0])
        else:
            return ContrastResults(chi2=F,
                                   df_denom=J,
                                   statistic=F,
                                   distribution='chi2',
                                   distargs=(J, ))
コード例 #15
0
ファイル: base.py プロジェクト: haytastan/crankshaft
    def t_test(self, r_matrix, cov_p=None, scale=None, use_t=None):
        """
        Compute a t-test for a each linear hypothesis of the form Rb = q
        Parameters
        ----------
        r_matrix : array-like, str, tuple
            - array : If an array is given, a p x k 2d array or length k 1d
              array specifying the linear restrictions. It is assumed
              that the linear combination is equal to zero.
            - str : The full hypotheses to test can be given as a string.
              See the examples.
            - tuple : A tuple of arrays in the form (R, q). If q is given,
              can be either a scalar or a length p row vector.
        cov_p : array-like, optional
            An alternative estimate for the parameter covariance matrix.
            If None is given, self.normalized_cov_params is used.
        scale : float, optional
            An optional `scale` to use.  Default is the scale specified
            by the model fit.
        use_t : bool, optional
            If use_t is None, then the default of the model is used.
            If use_t is True, then the p-values are based on the t
            distribution.
            If use_t is False, then the p-values are based on the normal
            distribution.
        Returns
        -------
        res : ContrastResults instance
            The results for the test are attributes of this results instance.
            The available results have the same elements as the parameter table
            in `summary()`.
        Examples
        --------
        >>> import numpy as np
        >>> import statsmodels.api as sm
        >>> data = sm.datasets.longley.load()
        >>> data.exog = sm.add_constant(data.exog)
        >>> results = sm.OLS(data.endog, data.exog).fit()
        >>> r = np.zeros_like(results.params)
        >>> r[5:] = [1,-1]
        >>> print(r)
        [ 0.  0.  0.  0.  0.  1. -1.]
        r tests that the coefficients on the 5th and 6th independent
        variable are the same.
        >>> T_test = results.t_test(r)
        >>> print(T_test)
        <T contrast: effect=-1829.2025687192481, sd=455.39079425193762,
        t=-4.0167754636411717, p=0.0015163772380899498, df_denom=9>
        >>> T_test.effect
        -1829.2025687192481
        >>> T_test.sd
        455.39079425193762
        >>> T_test.tvalue
        -4.0167754636411717
        >>> T_test.pvalue
        0.0015163772380899498
        Alternatively, you can specify the hypothesis tests using a string
        >>> from statsmodels.formula.api import ols
        >>> dta = sm.datasets.longley.load_pandas().data
        >>> formula = 'TOTEMP ~ GNPDEFL + GNP + UNEMP + ARMED + POP + YEAR'
        >>> results = ols(formula, dta).fit()
        >>> hypotheses = 'GNPDEFL = GNP, UNEMP = 2, YEAR/1829 = 1'
        >>> t_test = results.t_test(hypotheses)
        >>> print(t_test)
        See Also
        ---------
        tvalues : individual t statistics
        f_test : for F tests
        patsy.DesignInfo.linear_constraint
        """
        from patsy import DesignInfo
        names = self.model.data.param_names
        LC = DesignInfo(names).linear_constraint(r_matrix)
        r_matrix, q_matrix = LC.coefs, LC.constants
        num_ttests = r_matrix.shape[0]
        num_params = r_matrix.shape[1]

        if (cov_p is None and self.normalized_cov_params is None
                and not hasattr(self, 'cov_params_default')):
            raise ValueError('Need covariance of parameters for computing '
                             'T statistics')
        if num_params != self.params.shape[0]:
            raise ValueError('r_matrix and params are not aligned')
        if q_matrix is None:
            q_matrix = np.zeros(num_ttests)
        else:
            q_matrix = np.asarray(q_matrix)
            q_matrix = q_matrix.squeeze()
        if q_matrix.size > 1:
            if q_matrix.shape[0] != num_ttests:
                raise ValueError("r_matrix and q_matrix must have the same "
                                 "number of rows")

        if use_t is None:
            #switch to use_t false if undefined
            use_t = (hasattr(self, 'use_t') and self.use_t)

        _t = _sd = None

        _effect = np.dot(r_matrix, self.params)
        # nan_dot multiplies with the convention nan * 0 = 0

        # Perform the test
        if num_ttests > 1:
            _sd = np.sqrt(
                np.diag(self.cov_params(r_matrix=r_matrix, cov_p=cov_p)))
        else:
            _sd = np.sqrt(self.cov_params(r_matrix=r_matrix, cov_p=cov_p))
        _t = (_effect - q_matrix) * recipr(_sd)

        df_resid = getattr(self, 'df_resid_inference', self.df_resid)

        if use_t:
            return ContrastResults(effect=_effect,
                                   t=_t,
                                   sd=_sd,
                                   df_denom=df_resid)
        else:
            return ContrastResults(effect=_effect,
                                   statistic=_t,
                                   sd=_sd,
                                   df_denom=df_resid,
                                   distribution='norm')
コード例 #16
0
def _multivariate_test(hypotheses, exog_names, endog_names, fn):
    """
    Multivariate linear model hypotheses testing

    For y = x * params, where y are the dependent variables and x are the
    independent variables, testing L * params * M = 0 where L is the contrast
    matrix for hypotheses testing and M is the transformation matrix for
    transforming the dependent variables in y.

    Algorithm:
        T = L*inv(X'X)*L'
        H = M'B'L'*inv(T)*LBM
        E =  M'(Y'Y - B'X'XB)M
    And then finding the eigenvalues of inv(H + E)*H

    .. [*] https://support.sas.com/documentation/cdl/en/statug/63033/HTML/default/viewer.htm#statug_introreg_sect012.htm

    Parameters
    ----------
    %(hypotheses_doc)s
    k_xvar : int
        The number of independent variables
    k_yvar : int
        The number of dependent variables
    fn : function
        a function fn(contrast_L, transform_M) that returns E, H, q, df_resid
        where q is the rank of T matrix

    Returns
    -------
    results : MANOVAResults
    """

    k_xvar = len(exog_names)
    k_yvar = len(endog_names)
    results = {}
    for hypo in hypotheses:
        if len(hypo) ==2:
            name, L = hypo
            M = None
            C = None
        elif len(hypo) == 3:
            name, L, M = hypo
            C = None
        elif len(hypo) == 4:
            name, L, M, C = hypo
        else:
            raise ValueError('hypotheses must be a tuple of length 2, 3 or 4.'
                             ' len(hypotheses)=%d' % len(hypo))
        if any(isinstance(j, str) for j in L):
            L = DesignInfo(exog_names).linear_constraint(L).coefs
        else:
            if not isinstance(L, np.ndarray) or len(L.shape) != 2:
                raise ValueError('Contrast matrix L must be a 2-d array!')
            if L.shape[1] != k_xvar:
                raise ValueError('Contrast matrix L should have the same '
                                 'number of columns as exog! %d != %d' %
                                 (L.shape[1], k_xvar))
        if M is None:
            M = np.eye(k_yvar)
        elif any(isinstance(j, str) for j in M):
            M = DesignInfo(endog_names).linear_constraint(M).coefs.T
        else:
            if M is not None:
                if not isinstance(M, np.ndarray) or len(M.shape) != 2:
                    raise ValueError('Transform matrix M must be a 2-d array!')
                if M.shape[0] != k_yvar:
                    raise ValueError('Transform matrix M should have the same '
                                     'number of rows as the number of columns '
                                     'of endog! %d != %d' %
                                     (M.shape[0], k_yvar))
        if C is None:
            C = np.zeros([L.shape[0], M.shape[1]])
        elif not isinstance(C, np.ndarray):
            raise ValueError('Constant matrix C must be a 2-d array!')

        if C.shape[0] != L.shape[0]:
            raise ValueError('contrast L and constant C must have the same '
                             'number of rows! %d!=%d'
                             % (L.shape[0], C.shape[0]))
        if C.shape[1] != M.shape[1]:
            raise ValueError('transform M and constant C must have the same '
                             'number of columns! %d!=%d'
                             % (M.shape[1], C.shape[1]))
        E, H, q, df_resid = fn(L, M, C)
        EH = np.add(E, H)
        p = matrix_rank(EH)

        # eigenvalues of inv(E + H)H
        eigv2 = np.sort(eigvals(solve(EH, H)))
        stat_table = multivariate_stats(eigv2, p, q, df_resid)

        results[name] = {'stat':stat_table, 'contrast_L':L,
                         'transform_M':M, 'constant_C':C}
    return results
    def wald_test(self,
                  r_matrix,
                  xname=None,
                  cov_p=None,
                  scale=1.0,
                  invcov=None,
                  use_f=None):
        """
        Compute a Wald-test for a joint linear hypothesis.

        Parameters
        ----------
        r_matrix : {array_like, str, tuple}
            One of:

            - array : An r x k array where r is the number of restrictions to
              test and k is the number of regressors. It is assumed that the
              linear combination is equal to zero.
            - str : The full hypotheses to test can be given as a string.
              See the examples.
            - tuple : A tuple of arrays in the form (R, q), ``q`` can be
              either a scalar or a length p row vector.

        cov_p : array_like, optional
            An alternative estimate for the parameter covariance matrix.
            If None is given, self.normalized_cov_params is used.
        scale : float, optional
            Default is 1.0 for no scaling.

            .. deprecated:: 0.10.0

        invcov : array_like, optional
            A q x q array to specify an inverse covariance matrix based on a
            restrictions matrix.
        use_f : bool
            If True, then the F-distribution is used. If False, then the
            asymptotic distribution, chisquare is used. If use_f is None, then
            the F distribution is used if the model specifies that use_t is True.
            The test statistic is proportionally adjusted for the distribution
            by the number of constraints in the hypothesis.
        df_constraints : int, optional
            The number of constraints. If not provided the number of
            constraints is determined from r_matrix.

        Returns
        -------
        ContrastResults
            The results for the test are attributes of this results instance.
        """
        from patsy import DesignInfo
        names = xname
        params = self.params.ravel()
        LC = DesignInfo(names).linear_constraint(r_matrix)
        r_matrix, q_matrix = LC.coefs, LC.constants

        cparams = np.dot(r_matrix, params[:, None])
        J = float(r_matrix.shape[0])  # number of restrictions

        if q_matrix is None:
            q_matrix = np.zeros(J)
        else:
            q_matrix = np.asarray(q_matrix)
        if q_matrix.ndim == 1:
            q_matrix = q_matrix[:, None]
            if q_matrix.shape[0] != J:
                raise ValueError("r_matrix and q_matrix must have the same "
                                 "number of rows")
        Rbq = cparams - q_matrix
        if invcov is None:
            cov_p = self.cov_params(r_matrix=r_matrix,
                                    cov_p=self.Hinv(self.params))
            if np.isnan(cov_p).max():
                raise ValueError("r_matrix performs f_test for using "
                                 "dimensions that are asymptotically "
                                 "non-normal")
            invcov = np.linalg.pinv(cov_p)
            J_ = np.linalg.matrix_rank(cov_p)
            if J_ < J:
                import warnings
                warnings.warn(
                    'covariance of constraints does not have full '
                    'rank. The number of constraints is %d, but '
                    'rank is %d' % (J, J_), ValueWarning)
                J = J_

        F = np.dot(np.dot(Rbq.T, invcov), Rbq)
        df_resid = self.df_resid

        return ContrastResults(chi2=F,
                               df_denom=J,
                               statistic=F,
                               distribution='chi2',
                               distargs=(J, ))
コード例 #18
0
ファイル: first_level_model.py プロジェクト: bthirion/nistats
    def compute_contrast(self, contrast_def, stat_type=None,
                         output_type='z_score'):
        """Generate different outputs corresponding to
        the contrasts provided e.g. z_map, t_map, effects and variance.
        In multi-session case, outputs the fixed effects map.

        Parameters
        ----------
        contrast_def : str or array of shape (n_col) or list of (string or
                       array of shape (n_col))
                       
            where ``n_col`` is the number of columns of the design matrix,
            (one array per run). If only one array is provided when there
            are several runs, it will be assumed that the same contrast is
            desired for all runs. The string can be a formula compatible with
            the linear constraint of the Patsy library. Basically one can use
            the name of the conditions as they appear in the design matrix of
            the fitted model combined with operators /\*+- and numbers.
            Please checks the patsy documentation for formula examples:
            http://patsy.readthedocs.io/en/latest/API-reference.html#patsy.DesignInfo.linear_constraint

        stat_type : {'t', 'F'}, optional
            type of the contrast

        output_type : str, optional
            Type of the output map. Can be 'z_score', 'stat', 'p_value',
            'effect_size', 'effect_variance' or 'all'

        Returns
        -------
        output : Nifti1Image or dict
            The desired output image(s). If ``output_type == 'all'``, then
            the output is a dictionary of images, keyed by the type of image.

        """
        if self.labels_ is None or self.results_ is None:
            raise ValueError('The model has not been fit yet')

        if isinstance(contrast_def, (np.ndarray, str)):
            con_vals = [contrast_def]
        elif isinstance(contrast_def, (list, tuple)):
            con_vals = contrast_def
        else:
            raise ValueError('contrast_def must be an array or str or list of'
                             ' (array or str)')

        # Translate formulas to vectors with patsy
        design_info = DesignInfo(self.design_matrices_[0].columns.tolist())
        for cidx, con in enumerate(con_vals):
            if not isinstance(con, np.ndarray):
                con_vals[cidx] = design_info.linear_constraint(con).coefs

        n_runs = len(self.labels_)
        if len(con_vals) != n_runs:
            warn('One contrast given, assuming it for all %d runs' % n_runs)
            con_vals = con_vals * n_runs

        # 'all' is assumed to be the final entry; if adding more, place before 'all'
        valid_types = ['z_score', 'stat', 'p_value', 'effect_size',
                       'effect_variance', 'all']
        if output_type not in valid_types:
            raise ValueError('output_type must be one of {}'.format(valid_types))

        contrast = _fixed_effect_contrast(self.labels_, self.results_,
                                          con_vals, stat_type)

        output_types = valid_types[:-1] if output_type == 'all' else [output_type]

        outputs = {}
        for output_type_ in output_types:
            estimate_ = getattr(contrast, output_type_)()
            # Prepare the returned images
            output = self.masker_.inverse_transform(estimate_)
            contrast_name = str(con_vals)
            output.header['descrip'] = (
                '%s of contrast %s' % (output_type_, contrast_name))
            outputs[output_type_] = output

        return outputs if output_type == 'all' else output
コード例 #19
0
ファイル: second_level_model.py プロジェクト: m9h/nistats
    def compute_contrast(self,
                         second_level_contrast=None,
                         first_level_contrast=None,
                         second_level_stat_type=None,
                         output_type='z_score'):
        """Generate different outputs corresponding to
        the contrasts provided e.g. z_map, t_map, effects and variance.

        Parameters
        ----------
        second_level_contrast: str or array of shape (n_col), optional
            Where ``n_col`` is the number of columns of the design matrix,
            The string can be a formula compatible with the linear constraint
            of the Patsy library. Basically one can use the name of the
            conditions as they appear in the design matrix of
            the fitted model combined with operators /\*+- and numbers.
            Please check the patsy documentation for formula examples:
            http://patsy.readthedocs.io/en/latest/API-reference.html#patsy.DesignInfo.linear_constraint
            The default (None) is accepted if the design matrix has a single
            column, in which case the only possible contrast array([1]) is
            applied; when the design matrix has multiple columns, an error is
            raised.

        first_level_contrast: str or array of shape (n_col) with respect to
                              FirstLevelModel, optional
                              
            In case a list of FirstLevelModel was provided as
            second_level_input, we have to provide a contrast to apply to
            the first level models to get the corresponding list of images
            desired, that would be tested at the second level. In case a
            pandas DataFrame was provided as second_level_input this is the
            map name to extract from the pandas dataframe map_name column.
            It has to be a 't' contrast.

        second_level_stat_type: {'t', 'F'}, optional
            Type of the second level contrast

        output_type: str, optional
            Type of the output map. Can be 'z_score', 'stat', 'p_value',
            'effect_size' or 'effect_variance'

        Returns
        -------
        output_image: Nifti1Image
            The desired output image

        """
        if self.second_level_input_ is None:
            raise ValueError('The model has not been fit yet')

        # first_level_contrast check
        if isinstance(self.second_level_input_[0], FirstLevelModel):
            if first_level_contrast is None:
                raise ValueError('If second_level_input was a list of '
                                 'FirstLevelModel, then first_level_contrast '
                                 'is mandatory. It corresponds to the '
                                 'second_level_contrast argument of the '
                                 'compute_contrast method of FirstLevelModel')

        # check contrast definition
        if second_level_contrast is None:
            if self.design_matrix_.shape[1] == 1:
                second_level_contrast = np.ones([1])
            else:
                raise ValueError('No second-level contrast is specified.')
        if isinstance(second_level_contrast, np.ndarray):
            con_val = second_level_contrast
            if np.all(con_val == 0):
                raise ValueError('Contrast is null')
        else:
            design_info = DesignInfo(self.design_matrix_.columns.tolist())
            constraint = design_info.linear_constraint(second_level_contrast)
            con_val = constraint.coefs
        # check output type
        if isinstance(output_type, _basestring):
            if output_type not in [
                    'z_score', 'stat', 'p_value', 'effect_size',
                    'effect_variance'
            ]:
                raise ValueError(
                    'output_type must be one of "z_score", "stat"'
                    ', "p_value", "effect_size" or "effect_variance"')
        else:
            raise ValueError('output_type must be one of "z_score", "stat",'
                             ' "p_value", "effect_size" or "effect_variance"')

        # Get effect_maps appropriate for chosen contrast
        effect_maps = _infer_effect_maps(self.second_level_input_,
                                         first_level_contrast)
        # Check design matrix X and effect maps Y agree on number of rows
        if len(effect_maps) != self.design_matrix_.shape[0]:
            raise ValueError(
                'design_matrix does not match the number of maps considered. '
                '%i rows in design matrix do not match with %i maps' %
                (self.design_matrix_.shape[0], len(effect_maps)))

        # Fit an Ordinary Least Squares regression for parametric statistics
        Y = self.masker_.transform(effect_maps)
        if self.memory:
            mem_glm = self.memory.cache(run_glm, ignore=['n_jobs'])
        else:
            mem_glm = run_glm
        labels, results = mem_glm(Y,
                                  self.design_matrix_.values,
                                  n_jobs=self.n_jobs,
                                  noise_model='ols')
        # We save memory if inspecting model details is not necessary
        if self.minimize_memory:
            for key in results:
                results[key] = SimpleRegressionResults(results[key])
        self.labels_ = labels
        self.results_ = results

        # We compute contrast object
        if self.memory:
            mem_contrast = self.memory.cache(compute_contrast)
        else:
            mem_contrast = compute_contrast
        contrast = mem_contrast(self.labels_, self.results_, con_val,
                                second_level_stat_type)

        # We get desired output from contrast object
        estimate_ = getattr(contrast, output_type)()

        # Prepare the returned images
        output = self.masker_.inverse_transform(estimate_)
        contrast_name = str(con_val)
        output.header['descrip'] = ('%s of contrast %s' %
                                    (output_type, contrast_name))
        return output
コード例 #20
0
    # L does not have full row rank, calculating constant fails with Singular Matrix

    # transform data xr = T x
    np.random.seed(1)
    x = np.random.randn(10, 5)
    xr = tr1.reduce(x)
    # roundtrip
    x2 = tr1.expand(xr)
    # this does not hold ? do not use constant? do not need it anyway ?
    #assert_allclose(x2, x, rtol=1e-14)


    from patsy import DesignInfo

    names = 'a b c d'.split()
    LC = DesignInfo(names).linear_constraint('a + b = 0')
    LC = DesignInfo(names).linear_constraint(['a + b = 0', 'a + 2*c = 1', 'b-a', 'c-a', 'd-a'])
    #LC = DesignInfo(self.model.exog_names).linear_constraint(r_matrix)
    r_matrix, q_matrix = LC.coefs, LC.constants

    np.random.seed(123)
    nobs = 20
    x = 1 + np.random.randn(nobs, 4)
    exog = np.column_stack((np.ones(nobs), x))
    endog = exog.sum(1) + np.random.randn(nobs)

    from statsmodels.regression.linear_model import OLS
    res2 = OLS(endog, exog).fit()
    #transf = TransformRestriction(np.eye(exog.shape[1])[:2], res2.params[:2] / 2)
    transf = TransformRestriction([[0, 0, 0,1,1]], res2.params[-2:].sum())
    exog_st = transf.reduce(exog)
コード例 #21
0
    def __init__(self, endog, exog, constraints=None, **kwargs):
        # Standardize data
        endog_using_pandas = _is_using_pandas(endog, None)
        if not endog_using_pandas:
            endog = np.asanyarray(endog)

        exog_is_using_pandas = _is_using_pandas(exog, None)
        if not exog_is_using_pandas:
            exog = np.asarray(exog)

        # Make sure we have 2-dimensional array
        if exog.ndim == 1:
            if not exog_is_using_pandas:
                exog = exog[:, None]
            else:
                exog = pd.DataFrame(exog)

        self.k_exog = exog.shape[1]

        # Handle constraints
        self.k_constraints = 0
        self._r_matrix = self._q_matrix = None
        if constraints is not None:
            from patsy import DesignInfo
            from statsmodels.base.data import handle_data
            data = handle_data(endog, exog, **kwargs)
            names = data.param_names
            LC = DesignInfo(names).linear_constraint(constraints)
            self._r_matrix, self._q_matrix = LC.coefs, LC.constants
            self.k_constraints = self._r_matrix.shape[0]

            constraint_endog = np.zeros((len(endog), len(self._r_matrix)))
            if endog_using_pandas:
                constraint_endog = pd.DataFrame(constraint_endog,
                                                index=endog.index)
                endog = concat([endog, constraint_endog], axis=1)
                endog.values[:, 1:] = self._q_matrix[:, 0]
            else:
                endog[:, 1:] = self._q_matrix[:, 0]

        # Handle coefficient initialization
        kwargs.setdefault('initialization', 'diffuse')

        # Initialize the state space representation
        super(RecursiveLS, self).__init__(
            endog, k_states=self.k_exog, exog=exog, **kwargs)

        # Use univariate filtering by default
        self.ssm.filter_univariate = True

        # Concentrate the scale out of the likelihood function
        self.ssm.filter_concentrated = True

        # Setup the state space representation
        self['design'] = np.zeros((self.k_endog, self.k_states, self.nobs))
        self['design', 0] = self.exog[:, :, None].T
        if self._r_matrix is not None:
            self['design', 1:, :] = self._r_matrix[:, :, None]
        self['transition'] = np.eye(self.k_states)

        # Notice that the filter output does not depend on the measurement
        # variance, so we set it here to 1
        self['obs_cov', 0, 0] = 1.
        self['transition'] = np.eye(self.k_states)

        # Linear constraints are technically imposed by adding "fake" endog
        # variables that are used during filtering, but for all model- and
        # results-based purposes we want k_endog = 1.
        if self._r_matrix is not None:
            self.k_endog = 1
コード例 #22
0
ファイル: modelspecs.py プロジェクト: Fernal73/LearnPython3
#!/usr/bin/env python
# -*- coding: utf-8 -*-

from patsy import dmatrix
from patsy import DesignMatrix, DesignInfo
from patsy import LookupFactor, ModelDesc, Term

X = [[1, 10], [1, 20], [1, -2]]
print(dmatrix(X))
design_info = DesignInfo(["Intercept!", "Not intercept!"])
X_dm = DesignMatrix(X, design_info)
print(dmatrix(X_dm))


def add_predictors(base_formula, extra_predictors):
    desc = ModelDesc.from_formula(base_formula)
    # Using LookupFactor here ensures that everything will work correctly even
    # if one of the column names in extra_columns is named like "weight.in.kg"
    # or "sys.exit()" or "LittleBobbyTables()".
    desc.rhs_termlist += [Term([LookupFactor(p)]) for p in extra_predictors]
    return desc


extra_predictors = [f"x{i}" for i in range(10)]
desc = add_predictors("np.log(y) ~ a*b + c:d", extra_predictors)
print(desc.describe())