Beispiel #1
0
def test_Origin():
    o1 = Origin("012345", 2, 4)
    o2 = Origin("012345", 4, 5)
    assert o1.caretize() == "012345\n  ^^"
    assert o2.caretize() == "012345\n    ^"
    o3 = Origin.combine([o1, o2])
    assert o3.code == "012345"
    assert o3.start == 2
    assert o3.end == 5
    assert o3.caretize(indent=2) == "  012345\n    ^^^"
    assert o3 == Origin("012345", 2, 5)

    class ObjWithOrigin(object):
        def __init__(self, origin=None):
            self.origin = origin

    o4 = Origin.combine([ObjWithOrigin(o1), ObjWithOrigin(), None])
    assert o4 == o1
    o5 = Origin.combine([ObjWithOrigin(o1), o2])
    assert o5 == o3

    assert Origin.combine([ObjWithOrigin(), ObjWithOrigin()]) is None

    from patsy.util import assert_no_pickling
    assert_no_pickling(Origin("", 0, 0))
Beispiel #2
0
def test_LinearConstraint():
    from numpy.testing.utils import assert_equal
    lc = LinearConstraint(["foo", "bar"], [1, 1])
    assert lc.variable_names == ["foo", "bar"]
    assert_equal(lc.coefs, [[1, 1]])
    assert_equal(lc.constants, [[0]])

    lc = LinearConstraint(["foo", "bar"], [[1, 1], [2, 3]], [10, 20])
    assert_equal(lc.coefs, [[1, 1], [2, 3]])
    assert_equal(lc.constants, [[10], [20]])
    
    assert lc.coefs.dtype == np.dtype(float)
    assert lc.constants.dtype == np.dtype(float)

    from nose.tools import assert_raises
    assert_raises(ValueError, LinearConstraint, ["a"], [[1, 2]])
    assert_raises(ValueError, LinearConstraint, ["a"], [[[1]]])
    assert_raises(ValueError, LinearConstraint, ["a"], [[1, 2]], [3, 4])
    assert_raises(ValueError, LinearConstraint, ["a", "b"], [[1, 2]], [3, 4])
    assert_raises(ValueError, LinearConstraint, ["a"], [[0]])
    assert_raises(ValueError, LinearConstraint, ["a"], [[1]], [[]])
    assert_raises(ValueError, LinearConstraint, ["a", "b"], [])
    assert_raises(ValueError, LinearConstraint, ["a", "b"],
                  np.zeros((0, 2)))

    assert_no_pickling(lc)
def test_LookupFactor():
    l_a = LookupFactor("a")
    assert l_a.name() == "a"
    assert l_a == LookupFactor("a")
    assert l_a != LookupFactor("b")
    assert hash(l_a) == hash(LookupFactor("a"))
    assert hash(l_a) != hash(LookupFactor("b"))
    assert l_a.eval({}, {"a": 1}) == 1
    assert l_a.eval({}, {"a": 2}) == 2
    assert repr(l_a) == "LookupFactor('a')"
    assert l_a.origin is None
    l_with_origin = LookupFactor("b", origin="asdf")
    assert l_with_origin.origin == "asdf"

    l_c = LookupFactor("c",
                       force_categorical=True,
                       contrast="CONTRAST",
                       levels=(1, 2))
    box = l_c.eval({}, {"c": [1, 1, 2]})
    assert box.data == [1, 1, 2]
    assert box.contrast == "CONTRAST"
    assert box.levels == (1, 2)

    import pytest
    pytest.raises(ValueError, LookupFactor, "nc", contrast="CONTRAST")
    pytest.raises(ValueError, LookupFactor, "nc", levels=(1, 2))

    assert_no_pickling(LookupFactor("a"))
Beispiel #4
0
def test_LookupFactor():
    l_a = LookupFactor("a")
    assert l_a.name() == "a"
    assert l_a == LookupFactor("a")
    assert l_a != LookupFactor("b")
    assert hash(l_a) == hash(LookupFactor("a"))
    assert hash(l_a) != hash(LookupFactor("b"))
    assert l_a.eval({}, {"a": 1}) == 1
    assert l_a.eval({}, {"a": 2}) == 2
    assert repr(l_a) == "LookupFactor('a')"
    assert l_a.origin is None
    l_with_origin = LookupFactor("b", origin="asdf")
    assert l_with_origin.origin == "asdf"

    l_c = LookupFactor("c", force_categorical=True,
                       contrast="CONTRAST", levels=(1, 2))
    box = l_c.eval({}, {"c": [1, 1, 2]})
    assert box.data == [1, 1, 2]
    assert box.contrast == "CONTRAST"
    assert box.levels == (1, 2)

    from nose.tools import assert_raises
    assert_raises(ValueError, LookupFactor, "nc", contrast="CONTRAST")
    assert_raises(ValueError, LookupFactor, "nc", levels=(1, 2))

    assert_no_pickling(LookupFactor("a"))
Beispiel #5
0
def test_LinearConstraint():
    try:
        from numpy.testing import assert_equal
    except ImportError:
        from numpy.testing.utils import assert_equal
    lc = LinearConstraint(["foo", "bar"], [1, 1])
    assert lc.variable_names == ["foo", "bar"]
    assert_equal(lc.coefs, [[1, 1]])
    assert_equal(lc.constants, [[0]])

    lc = LinearConstraint(["foo", "bar"], [[1, 1], [2, 3]], [10, 20])
    assert_equal(lc.coefs, [[1, 1], [2, 3]])
    assert_equal(lc.constants, [[10], [20]])

    assert lc.coefs.dtype == np.dtype(float)
    assert lc.constants.dtype == np.dtype(float)

    # statsmodels wants to be able to create degenerate constraints like this,
    # see:
    #     https://github.com/pydata/patsy/issues/89
    # We used to forbid it, but I guess it's harmless, so why not.
    lc = LinearConstraint(["a"], [[0]])
    assert_equal(lc.coefs, [[0]])

    from pytest import raises
    raises(ValueError, LinearConstraint, ["a"], [[1, 2]])
    raises(ValueError, LinearConstraint, ["a"], [[[1]]])
    raises(ValueError, LinearConstraint, ["a"], [[1, 2]], [3, 4])
    raises(ValueError, LinearConstraint, ["a", "b"], [[1, 2]], [3, 4])
    raises(ValueError, LinearConstraint, ["a"], [[1]], [[]])
    raises(ValueError, LinearConstraint, ["a", "b"], [])
    raises(ValueError, LinearConstraint, ["a", "b"], np.zeros((0, 2)))

    assert_no_pickling(lc)
Beispiel #6
0
def test_NAAction_basic():
    from nose.tools import assert_raises
    assert_raises(ValueError, NAAction, on_NA="pord")
    assert_raises(ValueError, NAAction, NA_types=("NaN", "asdf"))
    assert_raises(ValueError, NAAction, NA_types="NaN")

    assert_no_pickling(NAAction())
Beispiel #7
0
def test_LinearConstraint():
    from numpy.testing.utils import assert_equal
    lc = LinearConstraint(["foo", "bar"], [1, 1])
    assert lc.variable_names == ["foo", "bar"]
    assert_equal(lc.coefs, [[1, 1]])
    assert_equal(lc.constants, [[0]])

    lc = LinearConstraint(["foo", "bar"], [[1, 1], [2, 3]], [10, 20])
    assert_equal(lc.coefs, [[1, 1], [2, 3]])
    assert_equal(lc.constants, [[10], [20]])
    
    assert lc.coefs.dtype == np.dtype(float)
    assert lc.constants.dtype == np.dtype(float)

    from nose.tools import assert_raises
    assert_raises(ValueError, LinearConstraint, ["a"], [[1, 2]])
    assert_raises(ValueError, LinearConstraint, ["a"], [[[1]]])
    assert_raises(ValueError, LinearConstraint, ["a"], [[1, 2]], [3, 4])
    assert_raises(ValueError, LinearConstraint, ["a", "b"], [[1, 2]], [3, 4])
    assert_raises(ValueError, LinearConstraint, ["a"], [[0]])
    assert_raises(ValueError, LinearConstraint, ["a"], [[1]], [[]])
    assert_raises(ValueError, LinearConstraint, ["a", "b"], [])
    assert_raises(ValueError, LinearConstraint, ["a", "b"],
                  np.zeros((0, 2)))

    assert_no_pickling(lc)
Beispiel #8
0
def test_NAAction_basic():
    import pytest
    pytest.raises(ValueError, NAAction, on_NA="pord")
    pytest.raises(ValueError, NAAction, NA_types=("NaN", "asdf"))
    pytest.raises(ValueError, NAAction, NA_types="NaN")

    assert_no_pickling(NAAction())
Beispiel #9
0
def test_NAAction_basic():
    from nose.tools import assert_raises
    assert_raises(ValueError, NAAction, on_NA="pord")
    assert_raises(ValueError, NAAction, NA_types=("NaN", "asdf"))
    assert_raises(ValueError, NAAction, NA_types="NaN")

    assert_no_pickling(NAAction())
Beispiel #10
0
def test_LinearConstraint():
    from numpy.testing.utils import assert_equal
    lc = LinearConstraint(["foo", "bar"], [1, 1])
    assert lc.variable_names == ["foo", "bar"]
    assert_equal(lc.coefs, [[1, 1]])
    assert_equal(lc.constants, [[0]])

    lc = LinearConstraint(["foo", "bar"], [[1, 1], [2, 3]], [10, 20])
    assert_equal(lc.coefs, [[1, 1], [2, 3]])
    assert_equal(lc.constants, [[10], [20]])
    
    assert lc.coefs.dtype == np.dtype(float)
    assert lc.constants.dtype == np.dtype(float)


    # statsmodels wants to be able to create degenerate constraints like this,
    # see:
    #     https://github.com/pydata/patsy/issues/89
    # We used to forbid it, but I guess it's harmless, so why not.
    lc = LinearConstraint(["a"], [[0]])
    assert_equal(lc.coefs, [[0]])

    from nose.tools import assert_raises
    assert_raises(ValueError, LinearConstraint, ["a"], [[1, 2]])
    assert_raises(ValueError, LinearConstraint, ["a"], [[[1]]])
    assert_raises(ValueError, LinearConstraint, ["a"], [[1, 2]], [3, 4])
    assert_raises(ValueError, LinearConstraint, ["a", "b"], [[1, 2]], [3, 4])
    assert_raises(ValueError, LinearConstraint, ["a"], [[1]], [[]])
    assert_raises(ValueError, LinearConstraint, ["a", "b"], [])
    assert_raises(ValueError, LinearConstraint, ["a", "b"],
                  np.zeros((0, 2)))

    assert_no_pickling(lc)
def test_ContrastMatrix():
    cm = ContrastMatrix([[1, 0], [0, 1]], ["a", "b"])
    assert np.array_equal(cm.matrix, np.eye(2))
    assert cm.column_suffixes == ["a", "b"]
    # smoke test
    repr(cm)

    from nose.tools import assert_raises
    assert_raises(PatsyError, ContrastMatrix, [[1], [0]], ["a", "b"])

    assert_no_pickling(cm)
Beispiel #12
0
def test_EvalFactor_basics():
    e = EvalFactor("a+b")
    assert e.code == "a + b"
    assert e.name() == "a + b"
    e2 = EvalFactor("a    +b", origin="asdf")
    assert e == e2
    assert hash(e) == hash(e2)
    assert e.origin is None
    assert e2.origin == "asdf"

    assert_no_pickling(e)
Beispiel #13
0
def test_Term():
    assert Term([1, 2, 1]).factors == (1, 2)
    assert Term([1, 2]) == Term([2, 1])
    assert hash(Term([1, 2])) == hash(Term([2, 1]))
    f1 = _MockFactor("a")
    f2 = _MockFactor("b")
    assert Term([f1, f2]).name() == "a:b"
    assert Term([f2, f1]).name() == "b:a"
    assert Term([]).name() == "Intercept"

    assert_no_pickling(Term([]))
Beispiel #14
0
def test_Term():
    assert Term([1, 2, 1]).factors == (1, 2)
    assert Term([1, 2]) == Term([2, 1])
    assert hash(Term([1, 2])) == hash(Term([2, 1]))
    f1 = _MockFactor("a")
    f2 = _MockFactor("b")
    assert Term([f1, f2]).name() == "a:b"
    assert Term([f2, f1]).name() == "b:a"
    assert Term([]).name() == "Intercept"

    assert_no_pickling(Term([]))
Beispiel #15
0
def test_ContrastMatrix():
    cm = ContrastMatrix([[1, 0], [0, 1]], ["a", "b"])
    assert np.array_equal(cm.matrix, np.eye(2))
    assert cm.column_suffixes == ["a", "b"]
    # smoke test
    repr(cm)

    from nose.tools import assert_raises
    assert_raises(PatsyError, ContrastMatrix, [[1], [0]], ["a", "b"])

    assert_no_pickling(cm)
Beispiel #16
0
def test_EvalFactor_basics():
    e = EvalFactor("a+b")
    assert e.code == "a + b"
    assert e.name() == "a + b"
    e2 = EvalFactor("a    +b", origin="asdf")
    assert e == e2
    assert hash(e) == hash(e2)
    assert e.origin is None
    assert e2.origin == "asdf"

    assert_no_pickling(e)
Beispiel #17
0
def test_design_matrix():
    from nose.tools import assert_raises

    di = DesignInfo(["a1", "a2", "a3", "b"])
    mm = DesignMatrix([[12, 14, 16, 18]], di)
    assert mm.design_info.column_names == ["a1", "a2", "a3", "b"]

    bad_di = DesignInfo(["a1"])
    assert_raises(ValueError, DesignMatrix, [[12, 14, 16, 18]], bad_di)

    mm2 = DesignMatrix([[12, 14, 16, 18]])
    assert mm2.design_info.column_names == [
        "column0", "column1", "column2", "column3"
    ]

    mm3 = DesignMatrix([12, 14, 16, 18])
    assert mm3.shape == (4, 1)

    # DesignMatrix always has exactly 2 dimensions
    assert_raises(ValueError, DesignMatrix, [[[1]]])

    # DesignMatrix constructor passes through existing DesignMatrixes
    mm4 = DesignMatrix(mm)
    assert mm4 is mm
    # But not if they are really slices:
    mm5 = DesignMatrix(mm.diagonal())
    assert mm5 is not mm

    mm6 = DesignMatrix([[12, 14, 16, 18]], default_column_prefix="x")
    assert mm6.design_info.column_names == ["x0", "x1", "x2", "x3"]

    assert_no_pickling(mm6)

    # Only real-valued matrices can be DesignMatrixs
    assert_raises(ValueError, DesignMatrix, [1, 2, 3j])
    assert_raises(ValueError, DesignMatrix, ["a", "b", "c"])
    assert_raises(ValueError, DesignMatrix, [1, 2, object()])

    # Just smoke tests
    repr(mm)
    repr(DesignMatrix(np.arange(100)))
    repr(DesignMatrix(np.arange(100) * 2.0))
    repr(mm[1:, :])
    repr(DesignMatrix(np.arange(100).reshape((1, 100))))
    repr(DesignMatrix([np.nan, np.inf]))
    repr(DesignMatrix([np.nan, 0, 1e20, 20.5]))
    # handling of zero-size matrices
    repr(DesignMatrix(np.zeros((1, 0))))
    repr(DesignMatrix(np.zeros((0, 1))))
    repr(DesignMatrix(np.zeros((0, 0))))
Beispiel #18
0
def test_design_matrix():
    from nose.tools import assert_raises

    di = DesignInfo(["a1", "a2", "a3", "b"])
    mm = DesignMatrix([[12, 14, 16, 18]], di)
    assert mm.design_info.column_names == ["a1", "a2", "a3", "b"]

    bad_di = DesignInfo(["a1"])
    assert_raises(ValueError, DesignMatrix, [[12, 14, 16, 18]], bad_di)

    mm2 = DesignMatrix([[12, 14, 16, 18]])
    assert mm2.design_info.column_names == ["column0", "column1", "column2",
                                            "column3"]

    mm3 = DesignMatrix([12, 14, 16, 18])
    assert mm3.shape == (4, 1)

    # DesignMatrix always has exactly 2 dimensions
    assert_raises(ValueError, DesignMatrix, [[[1]]])

    # DesignMatrix constructor passes through existing DesignMatrixes
    mm4 = DesignMatrix(mm)
    assert mm4 is mm
    # But not if they are really slices:
    mm5 = DesignMatrix(mm.diagonal())
    assert mm5 is not mm

    mm6 = DesignMatrix([[12, 14, 16, 18]], default_column_prefix="x")
    assert mm6.design_info.column_names == ["x0", "x1", "x2", "x3"]

    assert_no_pickling(mm6)

    # Only real-valued matrices can be DesignMatrixs
    assert_raises(ValueError, DesignMatrix, [1, 2, 3j])
    assert_raises(ValueError, DesignMatrix, ["a", "b", "c"])
    assert_raises(ValueError, DesignMatrix, [1, 2, object()])

    # Just smoke tests
    repr(mm)
    repr(DesignMatrix(np.arange(100)))
    repr(DesignMatrix(np.arange(100) * 2.0))
    repr(mm[1:, :])
    repr(DesignMatrix(np.arange(100).reshape((1, 100))))
    repr(DesignMatrix([np.nan, np.inf]))
    repr(DesignMatrix([np.nan, 0, 1e20, 20.5]))
    # handling of zero-size matrices
    repr(DesignMatrix(np.zeros((1, 0))))
    repr(DesignMatrix(np.zeros((0, 1))))
    repr(DesignMatrix(np.zeros((0, 0))))
Beispiel #19
0
def test_ModelDesc():
    f1 = _MockFactor("a")
    f2 = _MockFactor("b")
    m = ModelDesc([INTERCEPT, Term([f1])], [Term([f1]), Term([f1, f2])])
    assert m.lhs_termlist == [INTERCEPT, Term([f1])]
    assert m.rhs_termlist == [Term([f1]), Term([f1, f2])]
    print(m.describe())
    assert m.describe() == "1 + a ~ 0 + a + a:b"

    assert_no_pickling(m)

    assert ModelDesc([], []).describe() == "~ 0"
    assert ModelDesc([INTERCEPT], []).describe() == "1 ~ 0"
    assert ModelDesc([INTERCEPT], [INTERCEPT]).describe() == "1 ~ 1"
    assert (ModelDesc([INTERCEPT],
                      [INTERCEPT, Term([f2])]).describe() == "1 ~ b")
Beispiel #20
0
def test_ModelDesc():
    f1 = _MockFactor("a")
    f2 = _MockFactor("b")
    m = ModelDesc([INTERCEPT, Term([f1])], [Term([f1]), Term([f1, f2])])
    assert m.lhs_termlist == [INTERCEPT, Term([f1])]
    assert m.rhs_termlist == [Term([f1]), Term([f1, f2])]
    print(m.describe())
    assert m.describe() == "1 + a ~ 0 + a + a:b"

    assert_no_pickling(m)

    assert ModelDesc([], []).describe() == "~ 0"
    assert ModelDesc([INTERCEPT], []).describe() == "1 ~ 0"
    assert ModelDesc([INTERCEPT], [INTERCEPT]).describe() == "1 ~ 1"
    assert (ModelDesc([INTERCEPT], [INTERCEPT, Term([f2])]).describe()
            == "1 ~ b")
Beispiel #21
0
def test_VarLookupDict():
    d1 = {"a": 1}
    d2 = {"a": 2, "b": 3}
    ds = VarLookupDict([d1, d2])
    assert ds["a"] == 1
    assert ds["b"] == 3
    assert "a" in ds
    assert "c" not in ds
    from nose.tools import assert_raises
    assert_raises(KeyError, ds.__getitem__, "c")
    ds["a"] = 10
    assert ds["a"] == 10
    assert d1["a"] == 1
    assert ds.get("c") is None
    assert isinstance(repr(ds), six.string_types)

    assert_no_pickling(ds)
Beispiel #22
0
def test_VarLookupDict():
    d1 = {"a": 1}
    d2 = {"a": 2, "b": 3}
    ds = VarLookupDict([d1, d2])
    assert ds["a"] == 1
    assert ds["b"] == 3
    assert "a" in ds
    assert "c" not in ds
    import pytest
    pytest.raises(KeyError, ds.__getitem__, "c")
    ds["a"] = 10
    assert ds["a"] == 10
    assert d1["a"] == 1
    assert ds.get("c") is None
    assert isinstance(repr(ds), six.string_types)

    assert_no_pickling(ds)
Beispiel #23
0
def test_C():
    c1 = C("asdf")
    assert isinstance(c1, _CategoricalBox)
    assert c1.data == "asdf"
    assert c1.levels is None
    assert c1.contrast is None
    c2 = C("DATA", "CONTRAST", "LEVELS")
    assert c2.data == "DATA"
    assert c2.contrast == "CONTRAST"
    assert c2.levels == "LEVELS"
    c3 = C(c2, levels="NEW LEVELS")
    assert c3.data == "DATA"
    assert c3.contrast == "CONTRAST"
    assert c3.levels == "NEW LEVELS"
    c4 = C(c2, "NEW CONTRAST")
    assert c4.data == "DATA"
    assert c4.contrast == "NEW CONTRAST"
    assert c4.levels == "LEVELS"

    assert_no_pickling(c4)
Beispiel #24
0
def test_C():
    c1 = C("asdf")
    assert isinstance(c1, _CategoricalBox)
    assert c1.data == "asdf"
    assert c1.levels is None
    assert c1.contrast is None
    c2 = C("DATA", "CONTRAST", "LEVELS")
    assert c2.data == "DATA"
    assert c2.contrast == "CONTRAST"
    assert c2.levels == "LEVELS"
    c3 = C(c2, levels="NEW LEVELS")
    assert c3.data == "DATA"
    assert c3.contrast == "CONTRAST"
    assert c3.levels == "NEW LEVELS"
    c4 = C(c2, "NEW CONTRAST")
    assert c4.data == "DATA"
    assert c4.contrast == "NEW CONTRAST"
    assert c4.levels == "LEVELS"

    assert_no_pickling(c4)
Beispiel #25
0
def test_Origin():
    o1 = Origin("012345", 2, 4)
    o2 = Origin("012345", 4, 5)
    assert o1.caretize() == "012345\n  ^^"
    assert o2.caretize() == "012345\n    ^"
    o3 = Origin.combine([o1, o2])
    assert o3.code == "012345"
    assert o3.start == 2
    assert o3.end == 5
    assert o3.caretize(indent=2) == "  012345\n    ^^^"
    assert o3 == Origin("012345", 2, 5)

    class ObjWithOrigin(object):
        def __init__(self, origin=None):
            self.origin = origin
    o4 = Origin.combine([ObjWithOrigin(o1), ObjWithOrigin(), None])
    assert o4 == o1
    o5 = Origin.combine([ObjWithOrigin(o1), o2])
    assert o5 == o3

    assert Origin.combine([ObjWithOrigin(), ObjWithOrigin()]) is None

    from patsy.util import assert_no_pickling
    assert_no_pickling(Origin("", 0, 0))
Beispiel #26
0
def test_EvalEnvironment_capture_namespace():
    c0, c, b1, b2, a1, a2 = _a()
    assert "test_EvalEnvironment_capture_namespace" in c0.namespace
    assert "test_EvalEnvironment_capture_namespace" in c.namespace
    assert "test_EvalEnvironment_capture_namespace" in b1.namespace
    assert "test_EvalEnvironment_capture_namespace" in b2.namespace
    assert "test_EvalEnvironment_capture_namespace" in a1.namespace
    assert "test_EvalEnvironment_capture_namespace" in a2.namespace
    assert c0.namespace["_c"] == 1
    assert c.namespace["_c"] == 1
    assert b1.namespace["_b"] == 1
    assert b2.namespace["_b"] == 1
    assert a1.namespace["_a"] == 1
    assert a2.namespace["_a"] == 1
    assert b1.namespace["_c"] is _c
    assert b2.namespace["_c"] is _c
    from nose.tools import assert_raises
    assert_raises(ValueError, EvalEnvironment.capture, 10 ** 6)

    assert EvalEnvironment.capture(b1) is b1

    assert_raises(TypeError, EvalEnvironment.capture, 1.2)

    assert_no_pickling(EvalEnvironment.capture())
Beispiel #27
0
def test_EvalEnvironment_capture_namespace():
    c0, c, b1, b2, a1, a2 = _a()
    assert "test_EvalEnvironment_capture_namespace" in c0.namespace
    assert "test_EvalEnvironment_capture_namespace" in c.namespace
    assert "test_EvalEnvironment_capture_namespace" in b1.namespace
    assert "test_EvalEnvironment_capture_namespace" in b2.namespace
    assert "test_EvalEnvironment_capture_namespace" in a1.namespace
    assert "test_EvalEnvironment_capture_namespace" in a2.namespace
    assert c0.namespace["_c"] == 1
    assert c.namespace["_c"] == 1
    assert b1.namespace["_b"] == 1
    assert b2.namespace["_b"] == 1
    assert a1.namespace["_a"] == 1
    assert a2.namespace["_a"] == 1
    assert b1.namespace["_c"] is _c
    assert b2.namespace["_c"] is _c
    import pytest
    pytest.raises(ValueError, EvalEnvironment.capture, 10**6)

    assert EvalEnvironment.capture(b1) is b1

    pytest.raises(TypeError, EvalEnvironment.capture, 1.2)

    assert_no_pickling(EvalEnvironment.capture())
Beispiel #28
0
def test_DesignInfo():
    from nose.tools import assert_raises
    class _MockFactor(object):
        def __init__(self, name):
            self._name = name

        def name(self):
            return self._name
    f_x = _MockFactor("x")
    f_y = _MockFactor("y")
    t_x = Term([f_x])
    t_y = Term([f_y])
    factor_infos = {f_x:
                      FactorInfo(f_x, "numerical", {}, num_columns=3),
                    f_y:
                      FactorInfo(f_y, "numerical", {}, num_columns=1),
                   }
    term_codings = OrderedDict([(t_x, [SubtermInfo([f_x], {}, 3)]),
                                (t_y, [SubtermInfo([f_y], {}, 1)])])
    di = DesignInfo(["x1", "x2", "x3", "y"], factor_infos, term_codings)
    assert di.column_names == ["x1", "x2", "x3", "y"]
    assert di.term_names == ["x", "y"]
    assert di.terms == [t_x, t_y]
    assert di.column_name_indexes == {"x1": 0, "x2": 1, "x3": 2, "y": 3}
    assert di.term_name_slices == {"x": slice(0, 3), "y": slice(3, 4)}
    assert di.term_slices == {t_x: slice(0, 3), t_y: slice(3, 4)}
    assert di.describe() == "x + y"

    assert di.slice(1) == slice(1, 2)
    assert di.slice("x1") == slice(0, 1)
    assert di.slice("x2") == slice(1, 2)
    assert di.slice("x3") == slice(2, 3)
    assert di.slice("x") == slice(0, 3)
    assert di.slice(t_x) == slice(0, 3)
    assert di.slice("y") == slice(3, 4)
    assert di.slice(t_y) == slice(3, 4)
    assert di.slice(slice(2, 4)) == slice(2, 4)
    assert_raises(PatsyError, di.slice, "asdf")

    # smoke test
    repr(di)

    assert_no_pickling(di)

    # One without term objects
    di = DesignInfo(["a1", "a2", "a3", "b"])
    assert di.column_names == ["a1", "a2", "a3", "b"]
    assert di.term_names == ["a1", "a2", "a3", "b"]
    assert di.terms is None
    assert di.column_name_indexes == {"a1": 0, "a2": 1, "a3": 2, "b": 3}
    assert di.term_name_slices == {"a1": slice(0, 1),
                                   "a2": slice(1, 2),
                                   "a3": slice(2, 3),
                                   "b": slice(3, 4)}
    assert di.term_slices is None
    assert di.describe() == "a1 + a2 + a3 + b"

    assert di.slice(1) == slice(1, 2)
    assert di.slice("a1") == slice(0, 1)
    assert di.slice("a2") == slice(1, 2)
    assert di.slice("a3") == slice(2, 3)
    assert di.slice("b") == slice(3, 4)

    # Check intercept handling in describe()
    assert DesignInfo(["Intercept", "a", "b"]).describe() == "1 + a + b"

    # Failure modes
    # must specify either both or neither of factor_infos and term_codings:
    assert_raises(ValueError, DesignInfo,
                  ["x1", "x2", "x3", "y"], factor_infos=factor_infos)
    assert_raises(ValueError, DesignInfo,
                  ["x1", "x2", "x3", "y"], term_codings=term_codings)
    # factor_infos must be a dict
    assert_raises(ValueError, DesignInfo,
                  ["x1", "x2", "x3", "y"], list(factor_infos), term_codings)
    # wrong number of column names:
    assert_raises(ValueError, DesignInfo,
                  ["x1", "x2", "x3", "y1", "y2"], factor_infos, term_codings)
    assert_raises(ValueError, DesignInfo,
                  ["x1", "x2", "x3"], factor_infos, term_codings)
    # name overlap problems
    assert_raises(ValueError, DesignInfo,
                  ["x1", "x2", "y", "y2"], factor_infos, term_codings)
    # duplicate name
    assert_raises(ValueError, DesignInfo,
                  ["x1", "x1", "x1", "y"], factor_infos, term_codings)

    # f_y is in factor_infos, but not mentioned in any term
    term_codings_x_only = OrderedDict(term_codings)
    del term_codings_x_only[t_y]
    assert_raises(ValueError, DesignInfo,
                  ["x1", "x2", "x3"], factor_infos, term_codings_x_only)

    # f_a is in a term, but not in factor_infos
    f_a = _MockFactor("a")
    t_a = Term([f_a])
    term_codings_with_a = OrderedDict(term_codings)
    term_codings_with_a[t_a] = [SubtermInfo([f_a], {}, 1)]
    assert_raises(ValueError, DesignInfo,
                  ["x1", "x2", "x3", "y", "a"],
                  factor_infos, term_codings_with_a)

    # bad factor_infos
    not_factor_infos = dict(factor_infos)
    not_factor_infos[f_x] = "what is this I don't even"
    assert_raises(ValueError, DesignInfo,
                  ["x1", "x2", "x3", "y"], not_factor_infos, term_codings)

    mismatch_factor_infos = dict(factor_infos)
    mismatch_factor_infos[f_x] = FactorInfo(f_a, "numerical", {}, num_columns=3)
    assert_raises(ValueError, DesignInfo,
                  ["x1", "x2", "x3", "y"], mismatch_factor_infos, term_codings)

    # bad term_codings
    assert_raises(ValueError, DesignInfo,
                  ["x1", "x2", "x3", "y"], factor_infos, dict(term_codings))

    not_term_codings = OrderedDict(term_codings)
    not_term_codings["this is a string"] = term_codings[t_x]
    assert_raises(ValueError, DesignInfo,
                  ["x1", "x2", "x3", "y"], factor_infos, not_term_codings)

    non_list_term_codings = OrderedDict(term_codings)
    non_list_term_codings[t_y] = tuple(term_codings[t_y])
    assert_raises(ValueError, DesignInfo,
                  ["x1", "x2", "x3", "y"], factor_infos, non_list_term_codings)

    non_subterm_term_codings = OrderedDict(term_codings)
    non_subterm_term_codings[t_y][0] = "not a SubtermInfo"
    assert_raises(ValueError, DesignInfo,
                  ["x1", "x2", "x3", "y"], factor_infos, non_subterm_term_codings)

    bad_subterm = OrderedDict(term_codings)
    # f_x is a factor in this model, but it is not a factor in t_y
    term_codings[t_y][0] = SubtermInfo([f_x], {}, 1)
    assert_raises(ValueError, DesignInfo,
                  ["x1", "x2", "x3", "y"], factor_infos, bad_subterm)

    # contrast matrix has wrong number of rows
    factor_codings_a = {f_a:
                          FactorInfo(f_a, "categorical", {},
                                     categories=["a1", "a2"])}
    term_codings_a_bad_rows = OrderedDict([
        (t_a,
         [SubtermInfo([f_a],
                      {f_a: ContrastMatrix(np.ones((3, 2)),
                                           ["[1]", "[2]"])},
                      2)])])
    assert_raises(ValueError, DesignInfo,
                  ["a[1]", "a[2]"],
                  factor_codings_a,
                  term_codings_a_bad_rows)

    # have a contrast matrix for a non-categorical factor
    t_ax = Term([f_a, f_x])
    factor_codings_ax = {f_a:
                           FactorInfo(f_a, "categorical", {},
                                      categories=["a1", "a2"]),
                         f_x:
                           FactorInfo(f_x, "numerical", {},
                                      num_columns=2)}
    term_codings_ax_extra_cm = OrderedDict([
        (t_ax,
         [SubtermInfo([f_a, f_x],
                      {f_a: ContrastMatrix(np.ones((2, 2)), ["[1]", "[2]"]),
                       f_x: ContrastMatrix(np.ones((2, 2)), ["[1]", "[2]"])},
                      4)])])
    assert_raises(ValueError, DesignInfo,
                  ["a[1]:x[1]", "a[2]:x[1]", "a[1]:x[2]", "a[2]:x[2]"],
                  factor_codings_ax,
                  term_codings_ax_extra_cm)

    # no contrast matrix for a categorical factor
    term_codings_ax_missing_cm = OrderedDict([
        (t_ax,
         [SubtermInfo([f_a, f_x],
                      {},
                      4)])])
    # This actually fails before it hits the relevant check with a KeyError,
    # but that's okay... the previous test still exercises the check.
    assert_raises((ValueError, KeyError), DesignInfo,
                  ["a[1]:x[1]", "a[2]:x[1]", "a[1]:x[2]", "a[2]:x[2]"],
                  factor_codings_ax,
                  term_codings_ax_missing_cm)

    # subterm num_columns doesn't match the value computed from the individual
    # factors
    term_codings_ax_wrong_subterm_columns = OrderedDict([
        (t_ax,
         [SubtermInfo([f_a, f_x],
                      {f_a: ContrastMatrix(np.ones((2, 3)),
                                           ["[1]", "[2]", "[3]"])},
                      # should be 2 * 3 = 6
                      5)])])
    assert_raises(ValueError, DesignInfo,
                  ["a[1]:x[1]", "a[2]:x[1]", "a[3]:x[1]",
                   "a[1]:x[2]", "a[2]:x[2]", "a[3]:x[2]"],
                  factor_codings_ax,
                  term_codings_ax_wrong_subterm_columns)
Beispiel #29
0
def test_DesignInfo():
    from nose.tools import assert_raises
    class _MockFactor(object):
        def __init__(self, name):
            self._name = name

        def name(self):
            return self._name
    f_x = _MockFactor("x")
    f_y = _MockFactor("y")
    t_x = Term([f_x])
    t_y = Term([f_y])
    factor_infos = {f_x:
                      FactorInfo(f_x, "numerical", {}, num_columns=3),
                    f_y:
                      FactorInfo(f_y, "numerical", {}, num_columns=1),
                   }
    term_codings = OrderedDict([(t_x, [SubtermInfo([f_x], {}, 3)]),
                                (t_y, [SubtermInfo([f_y], {}, 1)])])
    di = DesignInfo(["x1", "x2", "x3", "y"], factor_infos, term_codings)
    assert di.column_names == ["x1", "x2", "x3", "y"]
    assert di.term_names == ["x", "y"]
    assert di.terms == [t_x, t_y]
    assert di.column_name_indexes == {"x1": 0, "x2": 1, "x3": 2, "y": 3}
    assert di.term_name_slices == {"x": slice(0, 3), "y": slice(3, 4)}
    assert di.term_slices == {t_x: slice(0, 3), t_y: slice(3, 4)}
    assert di.describe() == "x + y"

    assert di.slice(1) == slice(1, 2)
    assert di.slice("x1") == slice(0, 1)
    assert di.slice("x2") == slice(1, 2)
    assert di.slice("x3") == slice(2, 3)
    assert di.slice("x") == slice(0, 3)
    assert di.slice(t_x) == slice(0, 3)
    assert di.slice("y") == slice(3, 4)
    assert di.slice(t_y) == slice(3, 4)
    assert di.slice(slice(2, 4)) == slice(2, 4)
    assert_raises(PatsyError, di.slice, "asdf")

    # smoke test
    repr(di)

    assert_no_pickling(di)

    # One without term objects
    di = DesignInfo(["a1", "a2", "a3", "b"])
    assert di.column_names == ["a1", "a2", "a3", "b"]
    assert di.term_names == ["a1", "a2", "a3", "b"]
    assert di.terms is None
    assert di.column_name_indexes == {"a1": 0, "a2": 1, "a3": 2, "b": 3}
    assert di.term_name_slices == {"a1": slice(0, 1),
                                   "a2": slice(1, 2),
                                   "a3": slice(2, 3),
                                   "b": slice(3, 4)}
    assert di.term_slices is None
    assert di.describe() == "a1 + a2 + a3 + b"

    assert di.slice(1) == slice(1, 2)
    assert di.slice("a1") == slice(0, 1)
    assert di.slice("a2") == slice(1, 2)
    assert di.slice("a3") == slice(2, 3)
    assert di.slice("b") == slice(3, 4)

    # Check intercept handling in describe()
    assert DesignInfo(["Intercept", "a", "b"]).describe() == "1 + a + b"

    # Failure modes
    # must specify either both or neither of factor_infos and term_codings:
    assert_raises(ValueError, DesignInfo,
                  ["x1", "x2", "x3", "y"], factor_infos=factor_infos)
    assert_raises(ValueError, DesignInfo,
                  ["x1", "x2", "x3", "y"], term_codings=term_codings)
    # factor_infos must be a dict
    assert_raises(ValueError, DesignInfo,
                  ["x1", "x2", "x3", "y"], list(factor_infos), term_codings)
    # wrong number of column names:
    assert_raises(ValueError, DesignInfo,
                  ["x1", "x2", "x3", "y1", "y2"], factor_infos, term_codings)
    assert_raises(ValueError, DesignInfo,
                  ["x1", "x2", "x3"], factor_infos, term_codings)
    # name overlap problems
    assert_raises(ValueError, DesignInfo,
                  ["x1", "x2", "y", "y2"], factor_infos, term_codings)
    # duplicate name
    assert_raises(ValueError, DesignInfo,
                  ["x1", "x1", "x1", "y"], factor_infos, term_codings)

    # f_y is in factor_infos, but not mentioned in any term
    term_codings_x_only = OrderedDict(term_codings)
    del term_codings_x_only[t_y]
    assert_raises(ValueError, DesignInfo,
                  ["x1", "x2", "x3"], factor_infos, term_codings_x_only)

    # f_a is in a term, but not in factor_infos
    f_a = _MockFactor("a")
    t_a = Term([f_a])
    term_codings_with_a = OrderedDict(term_codings)
    term_codings_with_a[t_a] = [SubtermInfo([f_a], {}, 1)]
    assert_raises(ValueError, DesignInfo,
                  ["x1", "x2", "x3", "y", "a"],
                  factor_infos, term_codings_with_a)

    # bad factor_infos
    not_factor_infos = dict(factor_infos)
    not_factor_infos[f_x] = "what is this I don't even"
    assert_raises(ValueError, DesignInfo,
                  ["x1", "x2", "x3", "y"], not_factor_infos, term_codings)

    mismatch_factor_infos = dict(factor_infos)
    mismatch_factor_infos[f_x] = FactorInfo(f_a, "numerical", {}, num_columns=3)
    assert_raises(ValueError, DesignInfo,
                  ["x1", "x2", "x3", "y"], mismatch_factor_infos, term_codings)

    # bad term_codings
    assert_raises(ValueError, DesignInfo,
                  ["x1", "x2", "x3", "y"], factor_infos, dict(term_codings))

    not_term_codings = OrderedDict(term_codings)
    not_term_codings["this is a string"] = term_codings[t_x]
    assert_raises(ValueError, DesignInfo,
                  ["x1", "x2", "x3", "y"], factor_infos, not_term_codings)

    non_list_term_codings = OrderedDict(term_codings)
    non_list_term_codings[t_y] = tuple(term_codings[t_y])
    assert_raises(ValueError, DesignInfo,
                  ["x1", "x2", "x3", "y"], factor_infos, non_list_term_codings)

    non_subterm_term_codings = OrderedDict(term_codings)
    non_subterm_term_codings[t_y][0] = "not a SubtermInfo"
    assert_raises(ValueError, DesignInfo,
                  ["x1", "x2", "x3", "y"], factor_infos, non_subterm_term_codings)

    bad_subterm = OrderedDict(term_codings)
    # f_x is a factor in this model, but it is not a factor in t_y
    term_codings[t_y][0] = SubtermInfo([f_x], {}, 1)
    assert_raises(ValueError, DesignInfo,
                  ["x1", "x2", "x3", "y"], factor_infos, bad_subterm)

    # contrast matrix has wrong number of rows
    factor_codings_a = {f_a:
                          FactorInfo(f_a, "categorical", {},
                                     categories=["a1", "a2"])}
    term_codings_a_bad_rows = OrderedDict([
        (t_a,
         [SubtermInfo([f_a],
                      {f_a: ContrastMatrix(np.ones((3, 2)),
                                           ["[1]", "[2]"])},
                      2)])])
    assert_raises(ValueError, DesignInfo,
                  ["a[1]", "a[2]"],
                  factor_codings_a,
                  term_codings_a_bad_rows)

    # have a contrast matrix for a non-categorical factor
    t_ax = Term([f_a, f_x])
    factor_codings_ax = {f_a:
                           FactorInfo(f_a, "categorical", {},
                                      categories=["a1", "a2"]),
                         f_x:
                           FactorInfo(f_x, "numerical", {},
                                      num_columns=2)}
    term_codings_ax_extra_cm = OrderedDict([
        (t_ax,
         [SubtermInfo([f_a, f_x],
                      {f_a: ContrastMatrix(np.ones((2, 2)), ["[1]", "[2]"]),
                       f_x: ContrastMatrix(np.ones((2, 2)), ["[1]", "[2]"])},
                      4)])])
    assert_raises(ValueError, DesignInfo,
                  ["a[1]:x[1]", "a[2]:x[1]", "a[1]:x[2]", "a[2]:x[2]"],
                  factor_codings_ax,
                  term_codings_ax_extra_cm)

    # no contrast matrix for a categorical factor
    term_codings_ax_missing_cm = OrderedDict([
        (t_ax,
         [SubtermInfo([f_a, f_x],
                      {},
                      4)])])
    # This actually fails before it hits the relevant check with a KeyError,
    # but that's okay... the previous test still exercises the check.
    assert_raises((ValueError, KeyError), DesignInfo,
                  ["a[1]:x[1]", "a[2]:x[1]", "a[1]:x[2]", "a[2]:x[2]"],
                  factor_codings_ax,
                  term_codings_ax_missing_cm)

    # subterm num_columns doesn't match the value computed from the individual
    # factors
    term_codings_ax_wrong_subterm_columns = OrderedDict([
        (t_ax,
         [SubtermInfo([f_a, f_x],
                      {f_a: ContrastMatrix(np.ones((2, 3)),
                                           ["[1]", "[2]", "[3]"])},
                      # should be 2 * 3 = 6
                      5)])])
    assert_raises(ValueError, DesignInfo,
                  ["a[1]:x[1]", "a[2]:x[1]", "a[3]:x[1]",
                   "a[1]:x[2]", "a[2]:x[2]", "a[3]:x[2]"],
                  factor_codings_ax,
                  term_codings_ax_wrong_subterm_columns)