def quadratic_form_test(
    params: ArrayLike,
    cov: ArrayLike,
    restriction: OptionalArrayLike = None,
    value: OptionalArrayLike = None,
    formula: Optional[Union[str, List[str]]] = None,
) -> WaldTestStatistic:
    if formula is not None and restriction is not None:
        raise ValueError("restriction and formula cannot be used"
                         "simultaneously.")
    if formula is not None:
        assert isinstance(params, Series)
        di = DesignInfo(list(params.index))
        lc = di.linear_constraint(formula)
        restriction, value = lc.coefs, lc.constants
    restriction = np.asarray(restriction)
    if value is None:
        value = np.zeros(restriction.shape[0])
    value = np.asarray(value).ravel()[:, None]
    diff = restriction @ np.asarray(params)[:, None] - value
    rcov = restriction @ cov @ restriction.T
    stat = float(diff.T @ np.linalg.inv(rcov) @ diff)
    df = restriction.shape[0]
    null = "Linear equality constraint is valid"
    name = "Linear Equality Hypothesis Test"

    return WaldTestStatistic(stat, null, df, name=name)
Example #2
0
def quadratic_form_test(params, cov, restriction=None, value=None, formula=None):
    if formula is not None and restriction is not None:
        raise ValueError('restriction and formula cannot be used'
                         'simultaneously.')
    if formula is not None:
        di = DesignInfo(list(params.index))
        lc = di.linear_constraint(formula)
        restriction, value = lc.coefs, lc.constants
    restriction = np.asarray(restriction)
    if value is None:
        value = np.zeros(restriction.shape[0])
    value = np.asarray(value).ravel()[:, None]
    diff = restriction @ params.values[:, None] - value
    rcov = restriction @ cov @ restriction.T
    stat = float(diff.T @ np.linalg.inv(rcov) @ diff)
    df = restriction.shape[0]
    null = 'Linear equality constraint is valid'
    name = 'Linear Equality Hypothesis Test'

    return WaldTestStatistic(stat, null, df, name=name)
Example #3
0
 def _regularize_matrix(m, default_column_prefix):
     di = DesignInfo.from_array(m, default_column_prefix)
     if have_pandas and isinstance(m, (pandas.Series, pandas.DataFrame)):
         orig_index = m.index
     else:
         orig_index = None
     if return_type == "dataframe":
         m = atleast_2d_column_default(m, preserve_pandas=True)
         m = pandas.DataFrame(m)
         m.columns = di.column_names
         m.design_info = di
         return (m, orig_index)
     else:
         return (DesignMatrix(m, di), orig_index)
Example #4
0
 def _regularize_matrix(m, default_column_prefix):
     di = DesignInfo.from_array(m, default_column_prefix)
     if have_pandas and isinstance(m,
                                   (pandas.Series, pandas.DataFrame)):
         orig_index = m.index
     else:
         orig_index = None
     if return_type == "dataframe":
         m = atleast_2d_column_default(m, preserve_pandas=True)
         m = pandas.DataFrame(m)
         m.columns = di.column_names
         m.design_info = di
         return (m, orig_index)
     else:
         return (DesignMatrix(m, di), orig_index)
Example #5
0
def test_DesignInfo_subset():
    # For each combination of:
    #   formula, term names, term objects, mixed term name and term objects
    # check that results match subset of full build
    # and that removed variables don't hurt
    all_data = {"x": [1, 2], "y": [[3.1, 3.2], [4.1, 4.2]], "z": [5, 6]}
    all_terms = make_termlist("x", "y", "z")

    def iter_maker():
        yield all_data

    all_builder = design_matrix_builders([all_terms], iter_maker, 0)[0]
    full_matrix = build_design_matrices([all_builder], all_data)[0]

    def t(which_terms, variables, columns):
        sub_design_info = all_builder.subset(which_terms)
        sub_data = {}
        for variable in variables:
            sub_data[variable] = all_data[variable]
        sub_matrix = build_design_matrices([sub_design_info], sub_data)[0]
        sub_full_matrix = full_matrix[:, columns]
        if not isinstance(which_terms, six.string_types):
            assert len(which_terms) == len(sub_design_info.terms)
        assert np.array_equal(sub_matrix, sub_full_matrix)

    t("~ 0 + x + y + z", ["x", "y", "z"], slice(None))
    t(["x", "y", "z"], ["x", "y", "z"], slice(None))
    # Compatibility: six.PY2 wasn't added until 1.4.0, but six.PY3 exists in
    # all versions.
    if not six.PY3:
        t([unicode("x"), unicode("y"),
           unicode("z")], ["x", "y", "z"], slice(None))
    t(all_terms, ["x", "y", "z"], slice(None))
    t([all_terms[0], "y", all_terms[2]], ["x", "y", "z"], slice(None))

    t("~ 0 + x + z", ["x", "z"], [0, 3])
    t(["x", "z"], ["x", "z"], [0, 3])
    # Compatibility: six.PY2 wasn't added until 1.4.0, but six.PY3 exists in
    # all versions.
    if not six.PY3:
        t([unicode("x"), unicode("z")], ["x", "z"], [0, 3])
    t([all_terms[0], all_terms[2]], ["x", "z"], [0, 3])
    t([all_terms[0], "z"], ["x", "z"], [0, 3])

    t("~ 0 + z + x", ["x", "z"], [3, 0])
    t(["z", "x"], ["x", "z"], [3, 0])
    t([six.text_type("z"), six.text_type("x")], ["x", "z"], [3, 0])
    t([all_terms[2], all_terms[0]], ["x", "z"], [3, 0])
    t([all_terms[2], "x"], ["x", "z"], [3, 0])

    t("~ 0 + y", ["y"], [1, 2])
    t(["y"], ["y"], [1, 2])
    t([six.text_type("y")], ["y"], [1, 2])
    t([all_terms[1]], ["y"], [1, 2])

    # Formula can't have a LHS
    pytest.raises(PatsyError, all_builder.subset, "a ~ a")
    # Term must exist
    pytest.raises(KeyError, all_builder.subset, "~ asdf")
    pytest.raises(KeyError, all_builder.subset, ["asdf"])
    pytest.raises(KeyError, all_builder.subset, [Term(["asdf"])])

    # Also check for a minimal DesignInfo (column names only)
    min_di = DesignInfo(["a", "b", "c"])
    min_di_subset = min_di.subset(["c", "a"])
    assert min_di_subset.column_names == ["c", "a"]
    assert min_di_subset.terms is None
Example #6
0
def design_matrix_builders(termlists, data_iter_maker, eval_env,
                           NA_action="drop"):
    """Construct several :class:`DesignInfo` objects from termlists.

    This is one of Patsy's fundamental functions. This function and
    :func:`build_design_matrices` together form the API to the core formula
    interpretation machinery.

    :arg termlists: A list of termlists, where each termlist is a list of
      :class:`Term` objects which together specify a design matrix.
    :arg data_iter_maker: A zero-argument callable which returns an iterator
      over dict-like data objects. This must be a callable rather than a
      simple iterator because sufficiently complex formulas may require
      multiple passes over the data (e.g. if there are nested stateful
      transforms).
    :arg eval_env: Either a :class:`EvalEnvironment` which will be used to
      look up any variables referenced in `termlists` that cannot be
      found in `data_iter_maker`, or else a depth represented as an
      integer which will be passed to :meth:`EvalEnvironment.capture`.
      ``eval_env=0`` means to use the context of the function calling
      :func:`design_matrix_builders` for lookups. If calling this function
      from a library, you probably want ``eval_env=1``, which means that
      variables should be resolved in *your* caller's namespace.
    :arg NA_action: An :class:`NAAction` object or string, used to determine
      what values count as 'missing' for purposes of determining the levels of
      categorical factors.
    :returns: A list of :class:`DesignInfo` objects, one for each
      termlist passed in.

    This function performs zero or more iterations over the data in order to
    sniff out any necessary information about factor types, set up stateful
    transforms, pick column names, etc.

    See :ref:`formulas` for details.

    .. versionadded:: 0.2.0
       The ``NA_action`` argument.
    .. versionadded:: 0.4.0
       The ``eval_env`` argument.
    """
    # People upgrading from versions prior to 0.4.0 could potentially have
    # passed NA_action as the 3rd positional argument. Fortunately
    # EvalEnvironment.capture only accepts int and EvalEnvironment objects,
    # and we improved its error messages to make this clear.
    eval_env = EvalEnvironment.capture(eval_env, reference=1)
    if isinstance(NA_action, str):
        NA_action = NAAction(NA_action)
    all_factors = set()
    for termlist in termlists:
        for term in termlist:
            all_factors.update(term.factors)
    factor_states = _factors_memorize(all_factors, data_iter_maker, eval_env)
    # Now all the factors have working eval methods, so we can evaluate them
    # on some data to find out what type of data they return.
    (num_column_counts,
     cat_levels_contrasts) = _examine_factor_types(all_factors,
                                                   factor_states,
                                                   data_iter_maker,
                                                   NA_action)
    # Now we need the factor infos, which encapsulate the knowledge of
    # how to turn any given factor into a chunk of data:
    factor_infos = {}
    for factor in all_factors:
        if factor in num_column_counts:
            fi = FactorInfo(factor,
                            "numerical",
                            factor_states[factor],
                            num_columns=num_column_counts[factor],
                            categories=None)
        else:
            assert factor in cat_levels_contrasts
            categories = cat_levels_contrasts[factor][0]
            fi = FactorInfo(factor,
                            "categorical",
                            factor_states[factor],
                            num_columns=None,
                            categories=categories)
        factor_infos[factor] = fi
    # And now we can construct the DesignInfo for each termlist:
    design_infos = []
    for termlist in termlists:
        term_to_subterm_infos = _make_subterm_infos(termlist,
                                                    num_column_counts,
                                                    cat_levels_contrasts)
        assert isinstance(term_to_subterm_infos, OrderedDict)
        assert frozenset(term_to_subterm_infos) == frozenset(termlist)
        this_design_factor_infos = {}
        for term in termlist:
            for factor in term.factors:
                this_design_factor_infos[factor] = factor_infos[factor]
        column_names = []
        for subterms in six.itervalues(term_to_subterm_infos):
            for subterm in subterms:
                for column_name in _subterm_column_names_iter(
                        factor_infos, subterm):
                    column_names.append(column_name)
        design_infos.append(DesignInfo(column_names,
                                       factor_infos=this_design_factor_infos,
                                       term_codings=term_to_subterm_infos))
    return design_infos
Example #7
0
def test_DesignInfo_subset():
    # For each combination of:
    #   formula, term names, term objects, mixed term name and term objects
    # check that results match subset of full build
    # and that removed variables don't hurt
    all_data = {"x": [1, 2],
                "y": [[3.1, 3.2],
                      [4.1, 4.2]],
                "z": [5, 6]}
    all_terms = make_termlist("x", "y", "z")
    def iter_maker():
        yield all_data
    all_builder = design_matrix_builders([all_terms], iter_maker, 0)[0]
    full_matrix = build_design_matrices([all_builder], all_data)[0]

    def t(which_terms, variables, columns):
        sub_design_info = all_builder.subset(which_terms)
        sub_data = {}
        for variable in variables:
            sub_data[variable] = all_data[variable]
        sub_matrix = build_design_matrices([sub_design_info], sub_data)[0]
        sub_full_matrix = full_matrix[:, columns]
        if not isinstance(which_terms, six.string_types):
            assert len(which_terms) == len(sub_design_info.terms)
        assert np.array_equal(sub_matrix, sub_full_matrix)

    t("~ 0 + x + y + z", ["x", "y", "z"], slice(None))
    t(["x", "y", "z"], ["x", "y", "z"], slice(None))
    # Compatibility: six.PY2 wasn't added until 1.4.0, but six.PY3 exists in
    # all versions.
    if not six.PY3:
        t([unicode("x"), unicode("y"), unicode("z")],
          ["x", "y", "z"], slice(None))
    t(all_terms, ["x", "y", "z"], slice(None))
    t([all_terms[0], "y", all_terms[2]], ["x", "y", "z"], slice(None))

    t("~ 0 + x + z", ["x", "z"], [0, 3])
    t(["x", "z"], ["x", "z"], [0, 3])
    # Compatibility: six.PY2 wasn't added until 1.4.0, but six.PY3 exists in
    # all versions.
    if not six.PY3:
        t([unicode("x"), unicode("z")], ["x", "z"], [0, 3])
    t([all_terms[0], all_terms[2]], ["x", "z"], [0, 3])
    t([all_terms[0], "z"], ["x", "z"], [0, 3])

    t("~ 0 + z + x", ["x", "z"], [3, 0])
    t(["z", "x"], ["x", "z"], [3, 0])
    t([six.text_type("z"), six.text_type("x")], ["x", "z"], [3, 0])
    t([all_terms[2], all_terms[0]], ["x", "z"], [3, 0])
    t([all_terms[2], "x"], ["x", "z"], [3, 0])

    t("~ 0 + y", ["y"], [1, 2])
    t(["y"], ["y"], [1, 2])
    t([six.text_type("y")], ["y"], [1, 2])
    t([all_terms[1]], ["y"], [1, 2])

    # Formula can't have a LHS
    assert_raises(PatsyError, all_builder.subset, "a ~ a")
    # Term must exist
    assert_raises(KeyError, all_builder.subset, "~ asdf")
    assert_raises(KeyError, all_builder.subset, ["asdf"])
    assert_raises(KeyError,
                  all_builder.subset, [Term(["asdf"])])

    # Also check for a minimal DesignInfo (column names only)
    min_di = DesignInfo(["a", "b", "c"])
    min_di_subset = min_di.subset(["c", "a"])
    assert min_di_subset.column_names == ["c", "a"]
    assert min_di_subset.terms is None
Example #8
0
 def design_info(self):
     """A :class:`DesignInfo` object giving information about the design
     matrices that this DesignMatrixBuilder can be used to create."""
     return DesignInfo(self._column_names, self._term_slices, builder=self)