def test_EvalFactor_memorize_passes_needed(): from patsy.state import stateful_transform foo = stateful_transform(lambda: "FOO-OBJ") bar = stateful_transform(lambda: "BAR-OBJ") quux = stateful_transform(lambda: "QUUX-OBJ") e = EvalFactor("foo(x) + bar(foo(y)) + quux(z, w)") state = {} eval_env = EvalEnvironment.capture(0) passes = e.memorize_passes_needed(state, eval_env) print(passes) print(state) assert passes == 2 for name in ["foo", "bar", "quux"]: assert state["eval_env"].namespace[name] is locals()[name] for name in ["w", "x", "y", "z", "e", "state"]: assert name not in state["eval_env"].namespace assert state["transforms"] == { "_patsy_stobj0__foo__": "FOO-OBJ", "_patsy_stobj1__bar__": "BAR-OBJ", "_patsy_stobj2__foo__": "FOO-OBJ", "_patsy_stobj3__quux__": "QUUX-OBJ" } assert (state["eval_code"] == "_patsy_stobj0__foo__.transform(x)" " + _patsy_stobj1__bar__.transform(" "_patsy_stobj2__foo__.transform(y))" " + _patsy_stobj3__quux__.transform(z, w)") assert (state["memorize_code"] == { "_patsy_stobj0__foo__": "_patsy_stobj0__foo__.memorize_chunk(x)", "_patsy_stobj1__bar__": "_patsy_stobj1__bar__.memorize_chunk(_patsy_stobj2__foo__.transform(y))", "_patsy_stobj2__foo__": "_patsy_stobj2__foo__.memorize_chunk(y)", "_patsy_stobj3__quux__": "_patsy_stobj3__quux__.memorize_chunk(z, w)", }) assert state["pass_bins"] == [ set([ "_patsy_stobj0__foo__", "_patsy_stobj2__foo__", "_patsy_stobj3__quux__" ]), set(["_patsy_stobj1__bar__"]) ]
def test_EvalFactor_memorize_passes_needed(): from patsy.state import stateful_transform foo = stateful_transform(lambda: "FOO-OBJ") bar = stateful_transform(lambda: "BAR-OBJ") quux = stateful_transform(lambda: "QUUX-OBJ") e = EvalFactor("foo(x) + bar(foo(y)) + quux(z, w)") state = {} eval_env = EvalEnvironment.capture(0) passes = e.memorize_passes_needed(state, eval_env) print(passes) print(state) assert passes == 2 for name in ["foo", "bar", "quux"]: assert state["eval_env"].namespace[name] is locals()[name] for name in ["w", "x", "y", "z", "e", "state"]: assert name not in state["eval_env"].namespace assert state["transforms"] == {"_patsy_stobj0__foo__": "FOO-OBJ", "_patsy_stobj1__bar__": "BAR-OBJ", "_patsy_stobj2__foo__": "FOO-OBJ", "_patsy_stobj3__quux__": "QUUX-OBJ"} assert (state["eval_code"] == "_patsy_stobj0__foo__.transform(x)" " + _patsy_stobj1__bar__.transform(" "_patsy_stobj2__foo__.transform(y))" " + _patsy_stobj3__quux__.transform(z, w)") assert (state["memorize_code"] == {"_patsy_stobj0__foo__": "_patsy_stobj0__foo__.memorize_chunk(x)", "_patsy_stobj1__bar__": "_patsy_stobj1__bar__.memorize_chunk(_patsy_stobj2__foo__.transform(y))", "_patsy_stobj2__foo__": "_patsy_stobj2__foo__.memorize_chunk(y)", "_patsy_stobj3__quux__": "_patsy_stobj3__quux__.memorize_chunk(z, w)", }) assert state["pass_bins"] == [set(["_patsy_stobj0__foo__", "_patsy_stobj2__foo__", "_patsy_stobj3__quux__"]), set(["_patsy_stobj1__bar__"])]
def test_EvalFactor_memorize_passes_needed(): from patsy.state import stateful_transform foo = stateful_transform(lambda: "FOO-OBJ") bar = stateful_transform(lambda: "BAR-OBJ") quux = stateful_transform(lambda: "QUUX-OBJ") e = EvalFactor("foo(x) + bar(foo(y)) + quux(z, w)", EvalEnvironment.capture(0)) state = {} passes = e.memorize_passes_needed(state) print passes print state assert passes == 2 assert state["transforms"] == { "_patsy_stobj0__foo__": "FOO-OBJ", "_patsy_stobj1__bar__": "BAR-OBJ", "_patsy_stobj2__foo__": "FOO-OBJ", "_patsy_stobj3__quux__": "QUUX-OBJ" } assert (state["eval_code"] == "_patsy_stobj0__foo__.transform(x)" " + _patsy_stobj1__bar__.transform(" "_patsy_stobj2__foo__.transform(y))" " + _patsy_stobj3__quux__.transform(z, w)") assert (state["memorize_code"] == { "_patsy_stobj0__foo__": "_patsy_stobj0__foo__.memorize_chunk(x)", "_patsy_stobj1__bar__": "_patsy_stobj1__bar__.memorize_chunk(_patsy_stobj2__foo__.transform(y))", "_patsy_stobj2__foo__": "_patsy_stobj2__foo__.memorize_chunk(y)", "_patsy_stobj3__quux__": "_patsy_stobj3__quux__.memorize_chunk(z, w)", }) assert state["pass_bins"] == [ set([ "_patsy_stobj0__foo__", "_patsy_stobj2__foo__", "_patsy_stobj3__quux__" ]), set(["_patsy_stobj1__bar__"]) ]
def test_EvalFactor_end_to_end(): from patsy.state import stateful_transform foo = stateful_transform(_MockTransform) e = EvalFactor("foo(x) + foo(foo(y))") state = {} eval_env = EvalEnvironment.capture(0) passes = e.memorize_passes_needed(state, eval_env) print(passes) print(state) assert passes == 2 assert state["eval_env"].namespace["foo"] is foo for name in ["x", "y", "e", "state"]: assert name not in state["eval_env"].namespace import numpy as np e.memorize_chunk(state, 0, {"x": np.array([1, 2]), "y": np.array([10, 11])}) assert state["transforms"]["_patsy_stobj0__foo__"]._memorize_chunk_called == 1 assert state["transforms"]["_patsy_stobj2__foo__"]._memorize_chunk_called == 1 e.memorize_chunk(state, 0, {"x": np.array([12, -10]), "y": np.array([100, 3])}) assert state["transforms"]["_patsy_stobj0__foo__"]._memorize_chunk_called == 2 assert state["transforms"]["_patsy_stobj2__foo__"]._memorize_chunk_called == 2 assert state["transforms"]["_patsy_stobj0__foo__"]._memorize_finish_called == 0 assert state["transforms"]["_patsy_stobj2__foo__"]._memorize_finish_called == 0 e.memorize_finish(state, 0) assert state["transforms"]["_patsy_stobj0__foo__"]._memorize_finish_called == 1 assert state["transforms"]["_patsy_stobj2__foo__"]._memorize_finish_called == 1 assert state["transforms"]["_patsy_stobj1__foo__"]._memorize_chunk_called == 0 assert state["transforms"]["_patsy_stobj1__foo__"]._memorize_finish_called == 0 e.memorize_chunk(state, 1, {"x": np.array([1, 2]), "y": np.array([10, 11])}) e.memorize_chunk(state, 1, {"x": np.array([12, -10]), "y": np.array([100, 3])}) e.memorize_finish(state, 1) for transform in six.itervalues(state["transforms"]): assert transform._memorize_chunk_called == 2 assert transform._memorize_finish_called == 1 # sums: # 0: 1 + 2 + 12 + -10 == 5 # 2: 10 + 11 + 100 + 3 == 124 # 1: (10 - 124) + (11 - 124) + (100 - 124) + (3 - 124) == -372 # results: # 0: -4, -3, 7, -15 # 2: -114, -113, -24, -121 # 1: 258, 259, 348, 251 # 0 + 1: 254, 256, 355, 236 assert np.all(e.eval(state, {"x": np.array([1, 2, 12, -10]), "y": np.array([10, 11, 100, 3])}) == [254, 256, 355, 236])
def test_EvalFactor_memorize_passes_needed(): from patsy.state import stateful_transform foo = stateful_transform(lambda: "FOO-OBJ") bar = stateful_transform(lambda: "BAR-OBJ") quux = stateful_transform(lambda: "QUUX-OBJ") e = EvalFactor("foo(x) + bar(foo(y)) + quux(z, w)", EvalEnvironment.capture(0)) state = {} passes = e.memorize_passes_needed(state) print(passes) print(state) assert passes == 2 assert state["transforms"] == {"_patsy_stobj0__foo__": "FOO-OBJ", "_patsy_stobj1__bar__": "BAR-OBJ", "_patsy_stobj2__foo__": "FOO-OBJ", "_patsy_stobj3__quux__": "QUUX-OBJ"} assert (state["eval_code"] == "_patsy_stobj0__foo__.transform(x)" " + _patsy_stobj1__bar__.transform(" "_patsy_stobj2__foo__.transform(y))" " + _patsy_stobj3__quux__.transform(z, w)") assert (state["memorize_code"] == {"_patsy_stobj0__foo__": "_patsy_stobj0__foo__.memorize_chunk(x)", "_patsy_stobj1__bar__": "_patsy_stobj1__bar__.memorize_chunk(_patsy_stobj2__foo__.transform(y))", "_patsy_stobj2__foo__": "_patsy_stobj2__foo__.memorize_chunk(y)", "_patsy_stobj3__quux__": "_patsy_stobj3__quux__.memorize_chunk(z, w)", }) assert state["pass_bins"] == [set(["_patsy_stobj0__foo__", "_patsy_stobj2__foo__", "_patsy_stobj3__quux__"]), set(["_patsy_stobj1__bar__"])]
the resulting design matrix. Note that in this example, due to the centering constraint, 6 knots will get computed from the input data ``x`` to achieve 5 degrees of freedom. .. note:: This function reproduce the cubic regression splines 'cr' and 'cs' as implemented in the R package 'mgcv' (GAM modelling). """ __doc__ += CubicRegressionSpline.common_doc def __init__(self): CubicRegressionSpline.__init__(self, name='cr', cyclic=False) cr = stateful_transform(CR) class CC(CubicRegressionSpline): """cc(x, df=None, knots=None, lower_bound=None, upper_bound=None, constraints=None) Generates a cyclic cubic spline basis for ``x`` (with the option of absorbing centering or more general parameters constraints), allowing non-linear fits. The usual usage is something like:: y ~ 1 + cc(x, df=7, constraints='center') to fit ``y`` as a smooth function of ``x``, with 7 degrees of freedom given to the smooth, and centering constraint absorbed in the resulting design matrix. Note that in this example, due to the centering and cyclic constraints, 9 knots will get computed from the input data ``x``
self._degree = args["degree"] self._all_knots = all_knots def transform(self, x, df=None, knots=None, degree=3, include_intercept=False, lower_bound=None, upper_bound=None): basis = _eval_bspline_basis(x, self._all_knots, self._degree) if not include_intercept: basis = basis[:, 1:] if have_pandas: if isinstance(x, (pandas.Series, pandas.DataFrame)): basis = pandas.DataFrame(basis) basis.index = x.index return basis bs = stateful_transform(BS) def test_bs_compat(): from patsy.test_state import check_stateful from patsy.test_splines_bs_data import (R_bs_test_x, R_bs_test_data, R_bs_num_tests) lines = R_bs_test_data.split("\n") tests_ran = 0 start_idx = lines.index("--BEGIN TEST CASE--") while True: if not lines[start_idx] == "--BEGIN TEST CASE--": break start_idx += 1 stop_idx = lines.index("--END TEST CASE--", start_idx) block = lines[start_idx:stop_idx]
to achieve 5 degrees of freedom. .. note:: This function reproduce the cubic regression splines 'cr' and 'cs' as implemented in the R package 'mgcv' (GAM modelling). """ # Under python -OO, __doc__ will be defined but set to None if __doc__: __doc__ += CubicRegressionSpline.common_doc def __init__(self): CubicRegressionSpline.__init__(self, name='cr', cyclic=False) cr = stateful_transform(CR) class CC(CubicRegressionSpline): """cc(x, df=None, knots=None, lower_bound=None, upper_bound=None, constraints=None) Generates a cyclic cubic spline basis for ``x`` (with the option of absorbing centering or more general parameters constraints), allowing non-linear fits. The usual usage is something like:: y ~ 1 + cc(x, df=7, constraints='center') to fit ``y`` as a smooth function of ``x``, with 7 degrees of freedom given to the smooth, and centering constraint absorbed in the resulting design matrix. Note that in this example, due to the centering and cyclic constraints, 9 knots will get computed from the input data ``x``
def transform(self, x, df=None, knots=None, degree=3, include_intercept=False, lower_bound=None, upper_bound=None): basis = _eval_bspline_basis(x, self._all_knots, self._degree) if not include_intercept: basis = basis[:, 1:] if have_pandas: if isinstance(x, (pandas.Series, pandas.DataFrame)): basis = pandas.DataFrame(basis) basis.index = x.index return basis __getstate__ = no_pickling bs = stateful_transform(BS) def test_bs_compat(): from patsy.test_state import check_stateful from patsy.test_splines_bs_data import (R_bs_test_x, R_bs_test_data, R_bs_num_tests) lines = R_bs_test_data.split("\n") tests_ran = 0 start_idx = lines.index("--BEGIN TEST CASE--") while True: if not lines[start_idx] == "--BEGIN TEST CASE--": break start_idx += 1 stop_idx = lines.index("--END TEST CASE--", start_idx) block = lines[start_idx:stop_idx]
if levels is not None and data.levels != levels: raise PatsyError("changing levels of categorical data " "not supported yet") return Categorical(data.int_array, data.levels, **kwargs) if levels is None: levels = self._levels_tuple return Categorical.from_sequence(data, levels, **kwargs) # This is for the use of the building code, which uses this transform to # convert string arrays (and similar) into Categoricals, and after # memorizing the data it needs to know what the levels were. def levels(self): assert self._levels_tuple is not None return self._levels_tuple C = stateful_transform(CategoricalTransform) def test_CategoricalTransform(): t1 = CategoricalTransform() t1.memorize_chunk(["a", "b"]) t1.memorize_chunk(["a", "c"]) t1.memorize_finish() c1 = t1.transform(["a", "c"]) assert c1.levels == ("a", "b", "c") assert np.all(c1.int_array == [0, 2]) t2 = CategoricalTransform() t2.memorize_chunk(["a", "b"], contrast="foo", levels=["c", "b", "a"]) t2.memorize_chunk(["a", "c"], contrast="foo", levels=["c", "b", "a"]) t2.memorize_finish() c2 = t2.transform(["a", "c"], contrast="foo", levels=["c", "b", "a"])
assert bs == "bs" or bs == "cc", "Spline basis not defined!" if bs == "bs": self.s = BSplines(x, df=[df], degree=[degree], include_intercept=True, knot_kwds=None) elif bs == "cc": self.s = CyclicCubicSplines(x, df=[df]) self.penalty_matrices = self.s.penalty_matrices def memorize_finish(self): pass def transform(self, x, bs, df=4, degree=3, return_penalty=False, knot_kwds=None): return self.s.transform(np.expand_dims(x.to_numpy(), axis=1)) __getstate__ = no_pickling spline = stateful_transform( Spline) #conversion of Spline class to patsy statefull transform