Beispiel #1
0
def check_result(expect_builders, lhs, rhs, data,
                 expected_rhs_values, expected_rhs_names,
                 expected_lhs_values, expected_lhs_names): # pragma: no cover
    assert np.allclose(rhs, expected_rhs_values)
    assert rhs.design_info.column_names == expected_rhs_names
    if lhs is not None:
        assert np.allclose(lhs, expected_lhs_values)
        assert lhs.design_info.column_names == expected_lhs_names
    else:
        assert expected_lhs_values is None
        assert expected_lhs_names is None
    
    if expect_builders:
        if lhs is None:
            new_rhs, = build_design_matrices([rhs.design_info.builder], data)
        else:
            new_lhs, new_rhs = build_design_matrices([lhs.design_info.builder,
                                                      rhs.design_info.builder],
                                                     data)
            assert np.allclose(new_lhs, lhs)
            assert new_lhs.design_info.column_names == expected_lhs_names
        assert np.allclose(new_rhs, rhs)
        assert new_rhs.design_info.column_names == expected_rhs_names
    else:
        assert rhs.design_info.builder is None
        assert lhs is None or lhs.design_info.builder is None
Beispiel #2
0
def check_result(expect_builders, lhs, rhs, data, expected_rhs_values,
                 expected_rhs_names, expected_lhs_values,
                 expected_lhs_names):  # pragma: no cover
    assert np.allclose(rhs, expected_rhs_values)
    assert rhs.design_info.column_names == expected_rhs_names
    if lhs is not None:
        assert np.allclose(lhs, expected_lhs_values)
        assert lhs.design_info.column_names == expected_lhs_names
    else:
        assert expected_lhs_values is None
        assert expected_lhs_names is None

    if expect_builders:
        if lhs is None:
            new_rhs, = build_design_matrices([rhs.design_info.builder], data)
        else:
            new_lhs, new_rhs = build_design_matrices(
                [lhs.design_info.builder, rhs.design_info.builder], data)
            assert np.allclose(new_lhs, lhs)
            assert new_lhs.design_info.column_names == expected_lhs_names
        assert np.allclose(new_rhs, rhs)
        assert new_rhs.design_info.column_names == expected_rhs_names
    else:
        assert rhs.design_info.builder is None
        assert lhs is None or lhs.design_info.builder is None
Beispiel #3
0
def test_incremental():
    # incr_dbuilder(s)
    # stateful transformations
    datas = [{"a": ["a2", "a2", "a2"], "x": [1, 2, 3]}, {"a": ["a2", "a2", "a1"], "x": [4, 5, 6]}]
    x = np.asarray([1, 2, 3, 4, 5, 6])
    sin_center_x = np.sin(x - np.mean(x))
    x_col = sin_center_x - np.mean(sin_center_x)

    def data_iter_maker():
        return iter(datas)

    builders = incr_dbuilders("1 ~ a + center(np.sin(center(x)))", data_iter_maker)
    lhs, rhs = build_design_matrices(builders, datas[1])
    assert lhs.design_info.column_names == ["Intercept"]
    assert rhs.design_info.column_names == ["Intercept", "a[T.a2]", "center(np.sin(center(x)))"]
    assert np.allclose(lhs, [[1], [1], [1]])
    assert np.allclose(rhs, np.column_stack(([1, 1, 1], [1, 1, 0], x_col[3:])))

    builder = incr_dbuilder("~ a + center(np.sin(center(x)))", data_iter_maker)
    (rhs,) = build_design_matrices([builder], datas[1])
    assert rhs.design_info.column_names == ["Intercept", "a[T.a2]", "center(np.sin(center(x)))"]
    assert np.allclose(lhs, [[1], [1], [1]])
    assert np.allclose(rhs, np.column_stack(([1, 1, 1], [1, 1, 0], x_col[3:])))

    assert_raises(PatsyError, incr_dbuilder, "x ~ x", data_iter_maker)
    assert_raises(PatsyError, incr_dbuilders, "x", data_iter_maker)
Beispiel #4
0
def test_0d_data():
    # Use case from statsmodels/statsmodels#1881
    data_0d = {"x1": 1.1, "x2": 1.2, "a": "a1"}

    for formula, expected in [("x1 + x2", [[1, 1.1, 1.2]]), ("C(a, levels=('a1', 'a2')) + x1", [[1, 0, 1.1]])]:
        mat = dmatrix(formula, data_0d)
        assert np.allclose(mat, expected)

        assert np.allclose(build_design_matrices([mat.design_info], data_0d)[0], expected)
        if have_pandas:
            data_series = pandas.Series(data_0d)
            assert np.allclose(dmatrix(formula, data_series), expected)

            assert np.allclose(build_design_matrices([mat.design_info], data_series)[0], expected)
Beispiel #5
0
def test_evalfactor_reraise():
    # From issue #11:
    env = EvalEnvironment.capture()
    data = {"X" : [0,1,2,3], "Y" : [1,2,3,4]}
    formula = "C(X) + Y"
    new_data = {"X" : [0,0,1,2,3,3,4], "Y" : [1,2,3,4,5,6,7]}
    info = dmatrix(formula, data)
    # This will produce a PatsyError, which is originally raised within the
    # call to C() (which has no way to know where it is being called
    # from). But EvalFactor should notice this, and add a useful origin:
    try:
        build_design_matrices([info.design_info.builder], new_data)
    except PatsyError, e:
        assert e.origin == Origin(formula, 0, 4)
Beispiel #6
0
def test_0d_data():
    # Use case from statsmodels/statsmodels#1881
    data_0d = {"x1": 1.1, "x2": 1.2, "a": "a1"}

    for formula, expected in [
        ("x1 + x2", [[1, 1.1, 1.2]]),
        ("C(a, levels=('a1', 'a2')) + x1", [[1, 0, 1.1]]),
    ]:
        mat = dmatrix(formula, data_0d)
        assert np.allclose(mat, expected)

        assert np.allclose(
            build_design_matrices([mat.design_info], data_0d)[0], expected)
        if have_pandas:
            data_series = pandas.Series(data_0d)
            assert np.allclose(dmatrix(formula, data_series), expected)

            assert np.allclose(
                build_design_matrices([mat.design_info], data_series)[0],
                expected)
Beispiel #7
0
def test_incremental():
    # incr_dbuilder(s)
    # stateful transformations
    datas = [
        {
            "a": ["a2", "a2", "a2"],
            "x": [1, 2, 3]
        },
        {
            "a": ["a2", "a2", "a1"],
            "x": [4, 5, 6]
        },
    ]
    x = np.asarray([1, 2, 3, 4, 5, 6])
    sin_center_x = np.sin(x - np.mean(x))
    x_col = sin_center_x - np.mean(sin_center_x)

    def data_iter_maker():
        return iter(datas)

    builders = incr_dbuilders("1 ~ a + center(np.sin(center(x)))",
                              data_iter_maker)
    lhs, rhs = build_design_matrices(builders, datas[1])
    assert lhs.design_info.column_names == ["Intercept"]
    assert rhs.design_info.column_names == [
        "Intercept", "a[T.a2]", "center(np.sin(center(x)))"
    ]
    assert np.allclose(lhs, [[1], [1], [1]])
    assert np.allclose(rhs, np.column_stack(([1, 1, 1], [1, 1, 0], x_col[3:])))

    builder = incr_dbuilder("~ a + center(np.sin(center(x)))", data_iter_maker)
    (rhs, ) = build_design_matrices([builder], datas[1])
    assert rhs.design_info.column_names == [
        "Intercept", "a[T.a2]", "center(np.sin(center(x)))"
    ]
    assert np.allclose(lhs, [[1], [1], [1]])
    assert np.allclose(rhs, np.column_stack(([1, 1, 1], [1, 1, 0], x_col[3:])))

    assert_raises(PatsyError, incr_dbuilder, "x ~ x", data_iter_maker)
    assert_raises(PatsyError, incr_dbuilders, "x", data_iter_maker)
Beispiel #8
0
def t(formula_like, data, depth,
      expect_builders,
      expected_rhs_values, expected_rhs_names,
      expected_lhs_values=None, expected_lhs_names=None): # pragma: no cover
    if isinstance(depth, int):
        depth += 1
    def data_iter_maker():
        return iter([data])
    if (isinstance(formula_like, (basestring, ModelDesc, DesignMatrixBuilder))
        or (isinstance(formula_like, tuple)
            and isinstance(formula_like[0], DesignMatrixBuilder))
        or hasattr(formula_like, "__patsy_get_model_desc__")):
        if expected_lhs_values is None:
            builder = incr_dbuilder(formula_like, data_iter_maker, depth)
            lhs = None
            (rhs,) = build_design_matrices([builder], data)
        else:
            builders = incr_dbuilders(formula_like, data_iter_maker, depth)
            lhs, rhs = build_design_matrices(builders, data)
        check_result(expect_builders, lhs, rhs, data,
                     expected_rhs_values, expected_rhs_names,
                     expected_lhs_values, expected_lhs_names)
    else:
        assert_raises(PatsyError, incr_dbuilders,
                      formula_like, data_iter_maker)
        assert_raises(PatsyError, incr_dbuilder,
                      formula_like, data_iter_maker)
    one_mat_fs = [dmatrix]
    two_mat_fs = [dmatrices]
    if have_pandas:
        one_mat_fs.append(dmatrix_pandas)
        two_mat_fs.append(dmatrices_pandas)
    if expected_lhs_values is None:
        for f in one_mat_fs:
            rhs = f(formula_like, data, depth)
            check_result(expect_builders, None, rhs, data,
                         expected_rhs_values, expected_rhs_names,
                         expected_lhs_values, expected_lhs_names)

        # We inline assert_raises here to avoid complications with the
        # depth argument.
        for f in two_mat_fs:
            try:
                f(formula_like, data, depth)
            except PatsyError:
                pass
            else:
                raise AssertionError
    else:
        for f in one_mat_fs:
            try:
                f(formula_like, data, depth)
            except PatsyError:
                pass
            else:
                raise AssertionError

        for f in two_mat_fs:
            (lhs, rhs) = f(formula_like, data, depth)
            check_result(expect_builders, lhs, rhs, data,
                         expected_rhs_values, expected_rhs_names,
                         expected_lhs_values, expected_lhs_names)
Beispiel #9
0
def _do_highlevel_design(formula_like, data, eval_env, return_type):
    if return_type == "dataframe" and not have_pandas:
        raise PatsyError("pandas.DataFrame was requested, but pandas " "is not installed")
    if return_type not in ("matrix", "dataframe"):
        raise PatsyError("unrecognized output type %r, should be " "'matrix' or 'dataframe'" % (return_type,))

    def data_iter_maker():
        return iter([data])

    builders = _try_incr_builders(formula_like, data_iter_maker, eval_env)
    if builders is not None:
        return build_design_matrices(builders, data, return_type=return_type)
    else:
        # No builders, but maybe we can still get matrices
        if isinstance(formula_like, tuple):
            if len(formula_like) != 2:
                raise PatsyError("don't know what to do with a length %s " "matrices tuple" % (len(formula_like),))
            (lhs, rhs) = formula_like
        else:
            # subok=True is necessary here to allow DesignMatrixes to pass
            # through
            (lhs, rhs) = (None, asarray_or_pandas(formula_like, subok=True))
        # some sort of explicit matrix or matrices were given. Currently we
        # have them in one of these forms:
        #   -- an ndarray or subclass
        #   -- a DesignMatrix
        #   -- a pandas.Series
        #   -- a pandas.DataFrame
        # and we have to produce a standard output format.
        def _regularize_matrix(m, default_column_prefix):
            di = DesignInfo.from_array(m, default_column_prefix)
            if have_pandas and isinstance(m, (pandas.Series, pandas.DataFrame)):
                orig_index = m.index
            else:
                orig_index = None
            if return_type == "dataframe":
                m = atleast_2d_column_default(m, preserve_pandas=True)
                m = pandas.DataFrame(m)
                m.columns = di.column_names
                m.design_info = di
                return (m, orig_index)
            else:
                return (DesignMatrix(m, di), orig_index)

        rhs, rhs_orig_index = _regularize_matrix(rhs, "x")
        if lhs is None:
            lhs = np.zeros((rhs.shape[0], 0), dtype=float)
        lhs, lhs_orig_index = _regularize_matrix(lhs, "y")

        assert isinstance(getattr(lhs, "design_info", None), DesignInfo)
        assert isinstance(getattr(rhs, "design_info", None), DesignInfo)
        if lhs.shape[0] != rhs.shape[0]:
            raise PatsyError(
                "shape mismatch: outcome matrix has %s rows, "
                "predictor matrix has %s rows" % (lhs.shape[0], rhs.shape[0])
            )
        if rhs_orig_index is not None and lhs_orig_index is not None:
            if not rhs_orig_index.equals(lhs_orig_index):
                raise PatsyError("index mismatch: outcome and " "predictor have incompatible indexes")
        if return_type == "dataframe":
            if rhs_orig_index is not None and lhs_orig_index is None:
                lhs.index = rhs.index
            if rhs_orig_index is None and lhs_orig_index is not None:
                rhs.index = lhs.index
        return (lhs, rhs)
Beispiel #10
0
def t(formula_like,
      data,
      depth,
      expect_builders,
      expected_rhs_values,
      expected_rhs_names,
      expected_lhs_values=None,
      expected_lhs_names=None):  # pragma: no cover
    if isinstance(depth, int):
        depth += 1

    def data_iter_maker():
        return iter([data])

    if (isinstance(formula_like, (basestring, ModelDesc, DesignMatrixBuilder))
            or (isinstance(formula_like, tuple)
                and isinstance(formula_like[0], DesignMatrixBuilder))
            or hasattr(formula_like, "__patsy_get_model_desc__")):
        if expected_lhs_values is None:
            builder = incr_dbuilder(formula_like, data_iter_maker, depth)
            lhs = None
            (rhs, ) = build_design_matrices([builder], data)
        else:
            builders = incr_dbuilders(formula_like, data_iter_maker, depth)
            lhs, rhs = build_design_matrices(builders, data)
        check_result(expect_builders, lhs, rhs, data, expected_rhs_values,
                     expected_rhs_names, expected_lhs_values,
                     expected_lhs_names)
    else:
        assert_raises(PatsyError, incr_dbuilders, formula_like,
                      data_iter_maker)
        assert_raises(PatsyError, incr_dbuilder, formula_like, data_iter_maker)
    one_mat_fs = [dmatrix]
    two_mat_fs = [dmatrices]
    if have_pandas:
        one_mat_fs.append(dmatrix_pandas)
        two_mat_fs.append(dmatrices_pandas)
    if expected_lhs_values is None:
        for f in one_mat_fs:
            rhs = f(formula_like, data, depth)
            check_result(expect_builders, None, rhs, data, expected_rhs_values,
                         expected_rhs_names, expected_lhs_values,
                         expected_lhs_names)

        # We inline assert_raises here to avoid complications with the
        # depth argument.
        for f in two_mat_fs:
            try:
                f(formula_like, data, depth)
            except PatsyError:
                pass
            else:
                raise AssertionError
    else:
        for f in one_mat_fs:
            try:
                f(formula_like, data, depth)
            except PatsyError:
                pass
            else:
                raise AssertionError

        for f in two_mat_fs:
            (lhs, rhs) = f(formula_like, data, depth)
            check_result(expect_builders, lhs, rhs, data, expected_rhs_values,
                         expected_rhs_names, expected_lhs_values,
                         expected_lhs_names)
Beispiel #11
0
def _do_highlevel_design(formula_like, data, eval_env, NA_action, return_type):
    if return_type == "dataframe" and not have_pandas:
        raise PatsyError("pandas.DataFrame was requested, but pandas "
                         "is not installed")
    if return_type not in ("matrix", "dataframe"):
        raise PatsyError("unrecognized output type %r, should be "
                         "'matrix' or 'dataframe'" % (return_type, ))

    def data_iter_maker():
        return iter([data])

    builders = _try_incr_builders(formula_like, data_iter_maker, eval_env,
                                  NA_action)
    if builders is not None:
        return build_design_matrices(builders,
                                     data,
                                     NA_action=NA_action,
                                     return_type=return_type)
    else:
        # No builders, but maybe we can still get matrices
        if isinstance(formula_like, tuple):
            if len(formula_like) != 2:
                raise PatsyError("don't know what to do with a length %s "
                                 "matrices tuple" % (len(formula_like), ))
            (lhs, rhs) = formula_like
        else:
            # subok=True is necessary here to allow DesignMatrixes to pass
            # through
            (lhs, rhs) = (None, asarray_or_pandas(formula_like, subok=True))
        # some sort of explicit matrix or matrices were given. Currently we
        # have them in one of these forms:
        #   -- an ndarray or subclass
        #   -- a DesignMatrix
        #   -- a pandas.Series
        #   -- a pandas.DataFrame
        # and we have to produce a standard output format.
        def _regularize_matrix(m, default_column_prefix):
            di = DesignInfo.from_array(m, default_column_prefix)
            if have_pandas and isinstance(m,
                                          (pandas.Series, pandas.DataFrame)):
                orig_index = m.index
            else:
                orig_index = None
            if return_type == "dataframe":
                m = atleast_2d_column_default(m, preserve_pandas=True)
                m = pandas.DataFrame(m)
                m.columns = di.column_names
                m.design_info = di
                return (m, orig_index)
            else:
                return (DesignMatrix(m, di), orig_index)

        rhs, rhs_orig_index = _regularize_matrix(rhs, "x")
        if lhs is None:
            lhs = np.zeros((rhs.shape[0], 0), dtype=float)
        lhs, lhs_orig_index = _regularize_matrix(lhs, "y")

        assert isinstance(getattr(lhs, "design_info", None), DesignInfo)
        assert isinstance(getattr(rhs, "design_info", None), DesignInfo)
        if lhs.shape[0] != rhs.shape[0]:
            raise PatsyError("shape mismatch: outcome matrix has %s rows, "
                             "predictor matrix has %s rows" %
                             (lhs.shape[0], rhs.shape[0]))
        if rhs_orig_index is not None and lhs_orig_index is not None:
            if not rhs_orig_index.equals(lhs_orig_index):
                raise PatsyError("index mismatch: outcome and "
                                 "predictor have incompatible indexes")
        if return_type == "dataframe":
            if rhs_orig_index is not None and lhs_orig_index is None:
                lhs.index = rhs.index
            if rhs_orig_index is None and lhs_orig_index is not None:
                rhs.index = lhs.index
        return (lhs, rhs)