Exemple #1
0
def _try_incr_builders(formula_like, data_iter_maker, eval_env):
    if isinstance(formula_like, DesignMatrixBuilder):
        return (design_matrix_builders([[]], data_iter_maker)[0],
                formula_like)
    if (isinstance(formula_like, tuple)
        and len(formula_like) == 2
        and isinstance(formula_like[0], DesignMatrixBuilder)
        and isinstance(formula_like[1], DesignMatrixBuilder)):
        return formula_like
    if hasattr(formula_like, "__patsy_get_model_desc__"):
        formula_like = formula_like.__patsy_get_model_desc__(eval_env)
        if not isinstance(formula_like, ModelDesc):
            raise PatsyError("bad value from %r.__patsy_get_model_desc__"
                                % (formula_like,))
        # fallthrough
    if isinstance(formula_like, basestring):
        eval_env = _get_env(eval_env)
        formula_like = ModelDesc.from_formula(formula_like, eval_env)
        # fallthrough
    if isinstance(formula_like, ModelDesc):
        return design_matrix_builders([formula_like.lhs_termlist,
                                       formula_like.rhs_termlist],
                                      data_iter_maker)
    else:
        return None
Exemple #2
0
def _try_incr_builders(formula_like, data_iter_maker, eval_env,
                       NA_action):
    if isinstance(formula_like, DesignInfo):
        return (design_matrix_builders([[]], data_iter_maker, eval_env, NA_action)[0],
                formula_like)
    if (isinstance(formula_like, tuple)
        and len(formula_like) == 2
        and isinstance(formula_like[0], DesignInfo)
        and isinstance(formula_like[1], DesignInfo)):
        return formula_like
    if hasattr(formula_like, "__patsy_get_model_desc__"):
        formula_like = formula_like.__patsy_get_model_desc__(eval_env)
        if not isinstance(formula_like, ModelDesc):
            raise PatsyError("bad value from %r.__patsy_get_model_desc__"
                                % (formula_like,))
        # fallthrough
    if isinstance(formula_like, str):
        formula_like = ModelDesc.from_formula(formula_like)
        # fallthrough
    if isinstance(formula_like, ModelDesc):
        assert isinstance(eval_env, EvalEnvironment)
        return design_matrix_builders([formula_like.lhs_termlist,
                                       formula_like.rhs_termlist],
                                      data_iter_maker,
                                      eval_env,
                                      NA_action)
    else:
        return None
Exemple #3
0
def _try_incr_builders(formula_like, data_iter_maker, eval_env,
                       NA_action):
    if isinstance(formula_like, DesignInfo):
        return (design_matrix_builders([[]], data_iter_maker, eval_env, NA_action)[0],
                formula_like)
    if (isinstance(formula_like, tuple)
        and len(formula_like) == 2
        and isinstance(formula_like[0], DesignInfo)
        and isinstance(formula_like[1], DesignInfo)):
        return formula_like
    if hasattr(formula_like, "__patsy_get_model_desc__"):
        formula_like = formula_like.__patsy_get_model_desc__(eval_env)
        if not isinstance(formula_like, ModelDesc):
            raise PatsyError("bad value from %r.__patsy_get_model_desc__"
                                % (formula_like,))
        # fallthrough
    if not six.PY3 and isinstance(formula_like, unicode):
        # Included for the convenience of people who are using py2 with
        # __future__.unicode_literals.
        try:
            formula_like = formula_like.encode("ascii")
        except UnicodeEncodeError:
            raise PatsyError(
                "On Python 2, formula strings must be either 'str' objects, "
                "or else 'unicode' objects containing only ascii "
                "characters. You passed a unicode string with non-ascii "
                "characters. I'm afraid you'll have to either switch to "
                "ascii-only, or else upgrade to Python 3.")
    if isinstance(formula_like, str):
        formula_like = ModelDesc.from_formula(formula_like)
        # fallthrough
    if isinstance(formula_like, ModelDesc):
        assert isinstance(eval_env, EvalEnvironment)
        return design_matrix_builders([formula_like.lhs_termlist,
                                       formula_like.rhs_termlist],
                                      data_iter_maker,
                                      eval_env,
                                      NA_action)
    else:
        return None
Exemple #4
0
def _try_incr_builders(formula_like, data_iter_maker, eval_env, NA_action):
    if isinstance(formula_like, DesignMatrixBuilder):
        return (design_matrix_builders([[]], data_iter_maker,
                                       NA_action)[0], formula_like)
    if (isinstance(formula_like, tuple) and len(formula_like) == 2
            and isinstance(formula_like[0], DesignMatrixBuilder)
            and isinstance(formula_like[1], DesignMatrixBuilder)):
        return formula_like
    if hasattr(formula_like, "__patsy_get_model_desc__"):
        formula_like = formula_like.__patsy_get_model_desc__(eval_env)
        if not isinstance(formula_like, ModelDesc):
            raise PatsyError("bad value from %r.__patsy_get_model_desc__" %
                             (formula_like, ))
        # fallthrough
    if isinstance(formula_like, basestring):
        assert isinstance(eval_env, EvalEnvironment)
        formula_like = ModelDesc.from_formula(formula_like, eval_env)
        # fallthrough
    if isinstance(formula_like, ModelDesc):
        return design_matrix_builders(
            [formula_like.lhs_termlist, formula_like.rhs_termlist],
            data_iter_maker, NA_action)
    else:
        return None
Exemple #5
0
def _try_incr_builders(formula_like, data_iter_maker, eval_env, NA_action):
    if isinstance(formula_like, DesignInfo):
        return (design_matrix_builders([[]], data_iter_maker, eval_env,
                                       NA_action)[0], formula_like)
    if (isinstance(formula_like, tuple) and len(formula_like) == 2
            and isinstance(formula_like[0], DesignInfo)
            and isinstance(formula_like[1], DesignInfo)):
        return formula_like
    if hasattr(formula_like, "__patsy_get_model_desc__"):
        formula_like = formula_like.__patsy_get_model_desc__(eval_env)
        if not isinstance(formula_like, ModelDesc):
            raise PatsyError("bad value from %r.__patsy_get_model_desc__" %
                             (formula_like, ))
        # fallthrough
    if not six.PY3 and isinstance(formula_like, unicode):
        # Included for the convenience of people who are using py2 with
        # __future__.unicode_literals.
        try:
            formula_like = formula_like.encode("ascii")
        except UnicodeEncodeError:
            raise PatsyError(
                "On Python 2, formula strings must be either 'str' objects, "
                "or else 'unicode' objects containing only ascii "
                "characters. You passed a unicode string with non-ascii "
                "characters. I'm afraid you'll have to either switch to "
                "ascii-only, or else upgrade to Python 3.")
    if isinstance(formula_like, str):
        formula_like = ModelDesc.from_formula(formula_like)
        # fallthrough
    if isinstance(formula_like, ModelDesc):
        assert isinstance(eval_env, EvalEnvironment)
        return design_matrix_builders(
            [formula_like.lhs_termlist, formula_like.rhs_termlist],
            data_iter_maker, eval_env, NA_action)
    else:
        return None
Exemple #6
0
def test_formula_likes():
    # Plain array-like, rhs only
    t([[1, 2, 3], [4, 5, 6]], {}, 0,
      False,
      [[1, 2, 3], [4, 5, 6]], ["x0", "x1", "x2"])
    t((None, [[1, 2, 3], [4, 5, 6]]), {}, 0,
      False,
      [[1, 2, 3], [4, 5, 6]], ["x0", "x1", "x2"])
    t(np.asarray([[1, 2, 3], [4, 5, 6]]), {}, 0,
      False,
      [[1, 2, 3], [4, 5, 6]], ["x0", "x1", "x2"])
    t((None, np.asarray([[1, 2, 3], [4, 5, 6]])), {}, 0,
      False,
      [[1, 2, 3], [4, 5, 6]], ["x0", "x1", "x2"])
    dm = DesignMatrix([[1, 2, 3], [4, 5, 6]], default_column_prefix="foo")
    t(dm, {}, 0,
      False,
      [[1, 2, 3], [4, 5, 6]], ["foo0", "foo1", "foo2"])
    t((None, dm), {}, 0,
      False,
      [[1, 2, 3], [4, 5, 6]], ["foo0", "foo1", "foo2"])
      
    # Plain array-likes, lhs and rhs
    t(([1, 2], [[1, 2, 3], [4, 5, 6]]), {}, 0,
      False,
      [[1, 2, 3], [4, 5, 6]], ["x0", "x1", "x2"],
      [[1], [2]], ["y0"])
    t(([[1], [2]], [[1, 2, 3], [4, 5, 6]]), {}, 0,
      False,
      [[1, 2, 3], [4, 5, 6]], ["x0", "x1", "x2"],
      [[1], [2]], ["y0"])
    t((np.asarray([1, 2]), np.asarray([[1, 2, 3], [4, 5, 6]])), {}, 0,
      False,
      [[1, 2, 3], [4, 5, 6]], ["x0", "x1", "x2"],
      [[1], [2]], ["y0"])
    t((np.asarray([[1], [2]]), np.asarray([[1, 2, 3], [4, 5, 6]])), {}, 0,
      False,
      [[1, 2, 3], [4, 5, 6]], ["x0", "x1", "x2"],
      [[1], [2]], ["y0"])
    x_dm = DesignMatrix([[1, 2, 3], [4, 5, 6]], default_column_prefix="foo")
    y_dm = DesignMatrix([1, 2], default_column_prefix="bar")
    t((y_dm, x_dm), {}, 0,
      False,
      [[1, 2, 3], [4, 5, 6]], ["foo0", "foo1", "foo2"],
      [[1], [2]], ["bar0"])
    # number of rows must match
    t_invalid(([1, 2, 3], [[1, 2, 3], [4, 5, 6]]), {}, 0)

    # tuples must have the right size
    t_invalid(([[1, 2, 3]],), {}, 0)
    t_invalid(([[1, 2, 3]], [[1, 2, 3]], [[1, 2, 3]]), {}, 0)

    # plain Series and DataFrames
    if have_pandas:
        # Names are extracted
        t(pandas.DataFrame({"x": [1, 2, 3]}), {}, 0,
          False,
          [[1], [2], [3]], ["x"])
        t(pandas.Series([1, 2, 3], name="asdf"), {}, 0,
          False,
          [[1], [2], [3]], ["asdf"])
        t((pandas.DataFrame({"y": [4, 5, 6]}),
           pandas.DataFrame({"x": [1, 2, 3]})), {}, 0,
          False,
          [[1], [2], [3]], ["x"],
          [[4], [5], [6]], ["y"])
        t((pandas.Series([4, 5, 6], name="y"),
           pandas.Series([1, 2, 3], name="x")), {}, 0,
          False,
          [[1], [2], [3]], ["x"],
          [[4], [5], [6]], ["y"])
        # Or invented
        t((pandas.DataFrame([[4, 5, 6]]),
           pandas.DataFrame([[1, 2, 3]], columns=[7, 8, 9])), {}, 0,
          False,
          [[1, 2, 3]], ["x7", "x8", "x9"],
          [[4, 5, 6]], ["y0", "y1", "y2"])
        t(pandas.Series([1, 2, 3]), {}, 0,
          False,
          [[1], [2], [3]], ["x0"])
        # indices must match
        t_invalid((pandas.DataFrame([[1]], index=[1]),
                   pandas.DataFrame([[1]], index=[2])),
                  {}, 0)

    # Foreign ModelDesc factories
    class ForeignModelSource(object):
        def __patsy_get_model_desc__(self, data):
            return ModelDesc([Term([LookupFactor("Y")])],
                             [Term([LookupFactor("X")])])
    foreign_model = ForeignModelSource()
    t(foreign_model,
      {"Y": [1, 2],
       "X": [[1, 2], [3, 4]]},
      0,
      True,
      [[1, 2], [3, 4]], ["X[0]", "X[1]"],
      [[1], [2]], ["Y"])
    class BadForeignModelSource(object):
        def __patsy_get_model_desc__(self, data):
            return data
    t_invalid(BadForeignModelSource(), {}, 0)

    # string formulas
    t("y ~ x", {"y": [1, 2], "x": [3, 4]}, 0,
      True,
      [[1, 3], [1, 4]], ["Intercept", "x"],
      [[1], [2]], ["y"])
    t("~ x", {"y": [1, 2], "x": [3, 4]}, 0,
      True,
      [[1, 3], [1, 4]], ["Intercept", "x"])
    t("x + y", {"y": [1, 2], "x": [3, 4]}, 0,
      True,
      [[1, 3, 1], [1, 4, 2]], ["Intercept", "x", "y"])
    
    # ModelDesc
    desc = ModelDesc([], [Term([LookupFactor("x")])])
    t(desc, {"x": [1.5, 2.5, 3.5]}, 0,
      True,
      [[1.5], [2.5], [3.5]], ["x"])
    desc = ModelDesc([], [Term([]), Term([LookupFactor("x")])])
    t(desc, {"x": [1.5, 2.5, 3.5]}, 0,
      True,
      [[1, 1.5], [1, 2.5], [1, 3.5]], ["Intercept", "x"])
    desc = ModelDesc([Term([LookupFactor("y")])],
                     [Term([]), Term([LookupFactor("x")])])
    t(desc, {"x": [1.5, 2.5, 3.5], "y": [10, 20, 30]}, 0,
      True,
      [[1, 1.5], [1, 2.5], [1, 3.5]], ["Intercept", "x"],
      [[10], [20], [30]], ["y"])

    # builders
    termlists = ([],
                 [Term([LookupFactor("x")])],
                 [Term([]), Term([LookupFactor("x")])],
                 )
    builders = design_matrix_builders(termlists,
                                      lambda: iter([{"x": [1, 2, 3]}]))
    # twople but with no LHS
    t((builders[0], builders[2]), {"x": [10, 20, 30]}, 0,
      True,
      [[1, 10], [1, 20], [1, 30]], ["Intercept", "x"])
    # single DesignMatrixBuilder
    t(builders[2], {"x": [10, 20, 30]}, 0,
      True,
      [[1, 10], [1, 20], [1, 30]], ["Intercept", "x"])
    # twople with LHS
    t((builders[1], builders[2]), {"x": [10, 20, 30]}, 0,
      True,
      [[1, 10], [1, 20], [1, 30]], ["Intercept", "x"],
      [[10], [20], [30]], ["x"])
    
    # check depth arguments
    x_in_env = [1, 2, 3]
    t("~ x_in_env", {}, 0,
      True,
      [[1, 1], [1, 2], [1, 3]], ["Intercept", "x_in_env"])
    t("~ x_in_env", {"x_in_env": [10, 20, 30]}, 0,
      True,
      [[1, 10], [1, 20], [1, 30]], ["Intercept", "x_in_env"])
    # Trying to pull x_in_env out of our *caller* shouldn't work.
    t_invalid("~ x_in_env", {}, 1, exc=(NameError, PatsyError))
    # But then again it should, if called from one down on the stack:
    def check_nested_call():
        x_in_env = "asdf"
        t("~ x_in_env", {}, 1,
          True,
          [[1, 1], [1, 2], [1, 3]], ["Intercept", "x_in_env"])
    check_nested_call()
    # passing in an explicit EvalEnvironment also works:
    e = EvalEnvironment.capture(1)
    t_invalid("~ x_in_env", {}, e, exc=(NameError, PatsyError))
    e = EvalEnvironment.capture(0)
    def check_nested_call_2():
        x_in_env = "asdf"
        t("~ x_in_env", {}, e,
          True,
          [[1, 1], [1, 2], [1, 3]], ["Intercept", "x_in_env"])
    check_nested_call_2()
Exemple #7
0
def test_formula_likes():
    # Plain array-like, rhs only
    t([[1, 2, 3], [4, 5, 6]], {}, 0, False, [[1, 2, 3], [4, 5, 6]],
      ["x0", "x1", "x2"])
    t((None, [[1, 2, 3], [4, 5, 6]]), {}, 0, False, [[1, 2, 3], [4, 5, 6]],
      ["x0", "x1", "x2"])
    t(np.asarray([[1, 2, 3], [4, 5, 6]]), {}, 0, False, [[1, 2, 3], [4, 5, 6]],
      ["x0", "x1", "x2"])
    t((None, np.asarray([[1, 2, 3], [4, 5, 6]])), {}, 0, False,
      [[1, 2, 3], [4, 5, 6]], ["x0", "x1", "x2"])
    dm = DesignMatrix([[1, 2, 3], [4, 5, 6]], default_column_prefix="foo")
    t(dm, {}, 0, False, [[1, 2, 3], [4, 5, 6]], ["foo0", "foo1", "foo2"])
    t((None, dm), {}, 0, False, [[1, 2, 3], [4, 5, 6]],
      ["foo0", "foo1", "foo2"])

    # Plain array-likes, lhs and rhs
    t(([1, 2], [[1, 2, 3], [4, 5, 6]]), {}, 0, False, [[1, 2, 3], [4, 5, 6]],
      ["x0", "x1", "x2"], [[1], [2]], ["y0"])
    t(([[1], [2]], [[1, 2, 3], [4, 5, 6]]), {}, 0, False,
      [[1, 2, 3], [4, 5, 6]], ["x0", "x1", "x2"], [[1], [2]], ["y0"])
    t((np.asarray([1, 2]), np.asarray([[1, 2, 3], [4, 5, 6]])), {}, 0, False,
      [[1, 2, 3], [4, 5, 6]], ["x0", "x1", "x2"], [[1], [2]], ["y0"])
    t((np.asarray([[1], [2]]), np.asarray([[1, 2, 3], [4, 5, 6]])), {}, 0,
      False, [[1, 2, 3], [4, 5, 6]], ["x0", "x1", "x2"], [[1], [2]], ["y0"])
    x_dm = DesignMatrix([[1, 2, 3], [4, 5, 6]], default_column_prefix="foo")
    y_dm = DesignMatrix([1, 2], default_column_prefix="bar")
    t((y_dm, x_dm), {}, 0, False, [[1, 2, 3], [4, 5, 6]],
      ["foo0", "foo1", "foo2"], [[1], [2]], ["bar0"])
    # number of rows must match
    t_invalid(([1, 2, 3], [[1, 2, 3], [4, 5, 6]]), {}, 0)

    # tuples must have the right size
    t_invalid(([[1, 2, 3]], ), {}, 0)
    t_invalid(([[1, 2, 3]], [[1, 2, 3]], [[1, 2, 3]]), {}, 0)

    # plain Series and DataFrames
    if have_pandas:
        # Names are extracted
        t(pandas.DataFrame({"x": [1, 2, 3]}), {}, 0, False, [[1], [2], [3]],
          ["x"])
        t(pandas.Series([1, 2, 3], name="asdf"), {}, 0, False, [[1], [2], [3]],
          ["asdf"])
        t((pandas.DataFrame({"y": [4, 5, 6]
                             }), pandas.DataFrame({"x": [1, 2, 3]})), {}, 0,
          False, [[1], [2], [3]], ["x"], [[4], [5], [6]], ["y"])
        t((pandas.Series([4, 5, 6],
                         name="y"), pandas.Series([1, 2, 3], name="x")), {}, 0,
          False, [[1], [2], [3]], ["x"], [[4], [5], [6]], ["y"])
        # Or invented
        t((pandas.DataFrame([[4, 5, 6]]),
           pandas.DataFrame([[1, 2, 3]], columns=[7, 8, 9])), {}, 0, False,
          [[1, 2, 3]], ["x7", "x8", "x9"], [[4, 5, 6]], ["y0", "y1", "y2"])
        t(pandas.Series([1, 2, 3]), {}, 0, False, [[1], [2], [3]], ["x0"])
        # indices must match
        t_invalid((pandas.DataFrame(
            [[1]], index=[1]), pandas.DataFrame([[1]], index=[2])), {}, 0)

    # Foreign ModelDesc factories
    class ForeignModelSource(object):
        def __patsy_get_model_desc__(self, data):
            return ModelDesc([Term([LookupFactor("Y")])],
                             [Term([LookupFactor("X")])])

    foreign_model = ForeignModelSource()
    t(foreign_model, {
        "Y": [1, 2],
        "X": [[1, 2], [3, 4]]
    }, 0, True, [[1, 2], [3, 4]], ["X[0]", "X[1]"], [[1], [2]], ["Y"])

    class BadForeignModelSource(object):
        def __patsy_get_model_desc__(self, data):
            return data

    t_invalid(BadForeignModelSource(), {}, 0)

    # string formulas
    t("y ~ x", {
        "y": [1, 2],
        "x": [3, 4]
    }, 0, True, [[1, 3], [1, 4]], ["Intercept", "x"], [[1], [2]], ["y"])
    t("~ x", {
        "y": [1, 2],
        "x": [3, 4]
    }, 0, True, [[1, 3], [1, 4]], ["Intercept", "x"])
    t("x + y", {
        "y": [1, 2],
        "x": [3, 4]
    }, 0, True, [[1, 3, 1], [1, 4, 2]], ["Intercept", "x", "y"])

    # ModelDesc
    desc = ModelDesc([], [Term([LookupFactor("x")])])
    t(desc, {"x": [1.5, 2.5, 3.5]}, 0, True, [[1.5], [2.5], [3.5]], ["x"])
    desc = ModelDesc([], [Term([]), Term([LookupFactor("x")])])
    t(desc, {"x": [1.5, 2.5, 3.5]}, 0, True, [[1, 1.5], [1, 2.5], [1, 3.5]],
      ["Intercept", "x"])
    desc = ModelDesc([Term([LookupFactor("y")])],
                     [Term([]), Term([LookupFactor("x")])])
    t(desc, {
        "x": [1.5, 2.5, 3.5],
        "y": [10, 20, 30]
    }, 0, True, [[1, 1.5], [1, 2.5], [1, 3.5]], ["Intercept", "x"],
      [[10], [20], [30]], ["y"])

    # builders
    termlists = (
        [],
        [Term([LookupFactor("x")])],
        [Term([]), Term([LookupFactor("x")])],
    )
    builders = design_matrix_builders(termlists, lambda: iter([{
        "x": [1, 2, 3]
    }]))
    # twople but with no LHS
    t((builders[0], builders[2]), {"x": [10, 20, 30]}, 0, True,
      [[1, 10], [1, 20], [1, 30]], ["Intercept", "x"])
    # single DesignMatrixBuilder
    t(builders[2], {"x": [10, 20, 30]}, 0, True, [[1, 10], [1, 20], [1, 30]],
      ["Intercept", "x"])
    # twople with LHS
    t((builders[1], builders[2]), {"x": [10, 20, 30]}, 0, True,
      [[1, 10], [1, 20], [1, 30]], ["Intercept", "x"], [[10], [20], [30]],
      ["x"])

    # check depth arguments
    x_in_env = [1, 2, 3]
    t("~ x_in_env", {}, 0, True, [[1, 1], [1, 2], [1, 3]],
      ["Intercept", "x_in_env"])
    t("~ x_in_env", {"x_in_env": [10, 20, 30]}, 0, True,
      [[1, 10], [1, 20], [1, 30]], ["Intercept", "x_in_env"])
    # Trying to pull x_in_env out of our *caller* shouldn't work.
    t_invalid("~ x_in_env", {}, 1, exc=(NameError, PatsyError))

    # But then again it should, if called from one down on the stack:
    def check_nested_call():
        x_in_env = "asdf"
        t("~ x_in_env", {}, 1, True, [[1, 1], [1, 2], [1, 3]],
          ["Intercept", "x_in_env"])

    check_nested_call()
    # passing in an explicit EvalEnvironment also works:
    e = EvalEnvironment.capture(1)
    t_invalid("~ x_in_env", {}, e, exc=(NameError, PatsyError))
    e = EvalEnvironment.capture(0)

    def check_nested_call_2():
        x_in_env = "asdf"
        t("~ x_in_env", {}, e, True, [[1, 1], [1, 2], [1, 3]],
          ["Intercept", "x_in_env"])

    check_nested_call_2()