def test_contrast(): from patsy.contrasts import ContrastMatrix, Sum values = ["a1", "a3", "a1", "a2"] # No intercept in model, full-rank coding of 'a' m = make_matrix({"a": C(values)}, 3, [["a"]], column_names=["a[a1]", "a[a2]", "a[a3]"]) assert np.allclose(m, [[1, 0, 0], [0, 0, 1], [1, 0, 0], [0, 1, 0]]) for s in (Sum, Sum()): m = make_matrix({"a": C(values, s)}, 3, [["a"]], column_names=["a[mean]", "a[S.a1]", "a[S.a2]"]) # Output from R assert np.allclose(m, [[1, 1, 0], [1, -1, -1], [1, 1, 0], [1, 0, 1]]) m = make_matrix({"a": C(values, Sum(omit=0))}, 3, [["a"]], column_names=["a[mean]", "a[S.a2]", "a[S.a3]"]) # Output from R assert np.allclose(m, [[1, -1, -1], [1, 0, 1], [1, -1, -1], [1, 1, 0]]) # Intercept in model, non-full-rank coding of 'a' m = make_matrix({"a": C(values)}, 3, [[], ["a"]], column_names=["Intercept", "a[T.a2]", "a[T.a3]"]) assert np.allclose(m, [[1, 0, 0], [1, 0, 1], [1, 0, 0], [1, 1, 0]]) for s in (Sum, Sum()): m = make_matrix({"a": C(values, s)}, 3, [[], ["a"]], column_names=["Intercept", "a[S.a1]", "a[S.a2]"]) # Output from R assert np.allclose(m, [[1, 1, 0], [1, -1, -1], [1, 1, 0], [1, 0, 1]]) m = make_matrix({"a": C(values, Sum(omit=0))}, 3, [[], ["a"]], column_names=["Intercept", "a[S.a2]", "a[S.a3]"]) # Output from R assert np.allclose(m, [[1, -1, -1], [1, 0, 1], [1, -1, -1], [1, 1, 0]]) # Weird ad hoc less-than-full-rank coding of 'a' m = make_matrix({"a": C(values, [[7, 12], [2, 13], [8, -1]])}, 2, [["a"]], column_names=["a[custom0]", "a[custom1]"]) assert np.allclose(m, [[7, 12], [8, -1], [7, 12], [2, 13]]) m = make_matrix( { "a": C(values, ContrastMatrix([[7, 12], [2, 13], [8, -1]], ["[foo]", "[bar]"])) }, 2, [["a"]], column_names=["a[foo]", "a[bar]"]) assert np.allclose(m, [[7, 12], [8, -1], [7, 12], [2, 13]])
def test__ColumnBuilder(): from nose.tools import assert_raises from patsy.contrasts import ContrastMatrix from patsy.categorical import C f1 = _MockFactor("f1") f2 = _MockFactor("f2") f3 = _MockFactor("f3") contrast = ContrastMatrix(np.array([[0, 0.5], [3, 0]]), ["[c1]", "[c2]"]) cb = _ColumnBuilder([f1, f2, f3], {f1: 1, f3: 1}, {f2: contrast}) mat = np.empty((3, 2)) assert cb.column_names() == ["f1:f2[c1]:f3", "f1:f2[c2]:f3"] cb.build( { f1: atleast_2d_column_default([1, 2, 3]), f2: np.asarray([0, 0, 1]), f3: atleast_2d_column_default([7.5, 2, -12]) }, mat) assert np.allclose( mat, [[0, 0.5 * 1 * 7.5], [0, 0.5 * 2 * 2], [3 * 3 * -12, 0]]) # Check that missing categorical values blow up assert_raises( PatsyError, cb.build, { f1: atleast_2d_column_default([1, 2, 3]), f2: np.asarray([0, -1, 1]), f3: atleast_2d_column_default([7.5, 2, -12]) }, mat) cb2 = _ColumnBuilder([f1, f2, f3], {f1: 2, f3: 1}, {f2: contrast}) mat2 = np.empty((3, 4)) cb2.build( { f1: atleast_2d_column_default([[1, 2], [3, 4], [5, 6]]), f2: np.asarray([0, 0, 1]), f3: atleast_2d_column_default([7.5, 2, -12]) }, mat2) assert cb2.column_names() == [ "f1[0]:f2[c1]:f3", "f1[1]:f2[c1]:f3", "f1[0]:f2[c2]:f3", "f1[1]:f2[c2]:f3" ] assert np.allclose( mat2, [[0, 0, 0.5 * 1 * 7.5, 0.5 * 2 * 7.5], [0, 0, 0.5 * 3 * 2, 0.5 * 4 * 2], [3 * 5 * -12, 3 * 6 * -12, 0, 0]]) # Check intercept building: cb_intercept = _ColumnBuilder([], {}, {}) assert cb_intercept.column_names() == ["Intercept"] mat3 = np.empty((3, 1)) cb_intercept.build({f1: [1, 2, 3], f2: [1, 2, 3], f3: [1, 2, 3]}, mat3) assert np.allclose(mat3, 1)
def test_SubtermInfo(): cm = ContrastMatrix(np.ones((2, 2)), ["[1]", "[2]"]) s = SubtermInfo(["a", "x"], {"a": cm}, 4) assert s.factors == ("a", "x") assert s.contrast_matrices == {"a": cm} assert s.num_columns == 4 # smoke test repr(s) from nose.tools import assert_raises assert_raises(TypeError, SubtermInfo, 1, {}, 1) assert_raises(ValueError, SubtermInfo, ["a", "x"], 1, 1) assert_raises(ValueError, SubtermInfo, ["a", "x"], {"z": cm}, 1) assert_raises(ValueError, SubtermInfo, ["a", "x"], {"a": 1}, 1) assert_raises(ValueError, SubtermInfo, ["a", "x"], {}, 1.5)
def test_SubtermInfo(): cm = ContrastMatrix(np.ones((2, 2)), ["[1]", "[2]"]) s = SubtermInfo(["a", "x"], {"a": cm}, 4) assert s.factors == ("a", "x") assert s.contrast_matrices == {"a": cm} assert s.num_columns == 4 # Make sure longs are accepted for num_columns if not six.PY3: s = SubtermInfo(["a", "x"], {"a": cm}, long(4)) assert s.num_columns == 4 # smoke test repr(s) import pytest pytest.raises(TypeError, SubtermInfo, 1, {}, 1) pytest.raises(ValueError, SubtermInfo, ["a", "x"], 1, 1) pytest.raises(ValueError, SubtermInfo, ["a", "x"], {"z": cm}, 1) pytest.raises(ValueError, SubtermInfo, ["a", "x"], {"a": 1}, 1) pytest.raises(ValueError, SubtermInfo, ["a", "x"], {}, 1.5)
def test__subterm_column_names_iter_and__build_subterm(): import pytest from patsy.contrasts import ContrastMatrix from patsy.categorical import C f1 = _MockFactor("f1") f2 = _MockFactor("f2") f3 = _MockFactor("f3") contrast = ContrastMatrix(np.array([[0, 0.5], [3, 0]]), ["[c1]", "[c2]"]) factor_infos1 = {f1: FactorInfo(f1, "numerical", {}, num_columns=1, categories=None), f2: FactorInfo(f2, "categorical", {}, num_columns=None, categories=["a", "b"]), f3: FactorInfo(f3, "numerical", {}, num_columns=1, categories=None), } contrast_matrices = {f2: contrast} subterm1 = SubtermInfo([f1, f2, f3], contrast_matrices, 2) assert (list(_subterm_column_names_iter(factor_infos1, subterm1)) == ["f1:f2[c1]:f3", "f1:f2[c2]:f3"]) mat = np.empty((3, 2)) _build_subterm(subterm1, factor_infos1, {f1: atleast_2d_column_default([1, 2, 3]), f2: np.asarray([0, 0, 1]), f3: atleast_2d_column_default([7.5, 2, -12])}, mat) assert np.allclose(mat, [[0, 0.5 * 1 * 7.5], [0, 0.5 * 2 * 2], [3 * 3 * -12, 0]]) # Check that missing categorical values blow up pytest.raises(PatsyError, _build_subterm, subterm1, factor_infos1, {f1: atleast_2d_column_default([1, 2, 3]), f2: np.asarray([0, -1, 1]), f3: atleast_2d_column_default([7.5, 2, -12])}, mat) factor_infos2 = dict(factor_infos1) factor_infos2[f1] = FactorInfo(f1, "numerical", {}, num_columns=2, categories=None) subterm2 = SubtermInfo([f1, f2, f3], contrast_matrices, 4) assert (list(_subterm_column_names_iter(factor_infos2, subterm2)) == ["f1[0]:f2[c1]:f3", "f1[1]:f2[c1]:f3", "f1[0]:f2[c2]:f3", "f1[1]:f2[c2]:f3"]) mat2 = np.empty((3, 4)) _build_subterm(subterm2, factor_infos2, {f1: atleast_2d_column_default([[1, 2], [3, 4], [5, 6]]), f2: np.asarray([0, 0, 1]), f3: atleast_2d_column_default([7.5, 2, -12])}, mat2) assert np.allclose(mat2, [[0, 0, 0.5 * 1 * 7.5, 0.5 * 2 * 7.5], [0, 0, 0.5 * 3 * 2, 0.5 * 4 * 2], [3 * 5 * -12, 3 * 6 * -12, 0, 0]]) subterm_int = SubtermInfo([], {}, 1) assert list(_subterm_column_names_iter({}, subterm_int)) == ["Intercept"] mat3 = np.empty((3, 1)) _build_subterm(subterm_int, {}, {f1: [1, 2, 3], f2: [1, 2, 3], f3: [1, 2, 3]}, mat3) assert np.allclose(mat3, 1)
def test_DesignInfo(): from nose.tools import assert_raises class _MockFactor(object): def __init__(self, name): self._name = name def name(self): return self._name f_x = _MockFactor("x") f_y = _MockFactor("y") t_x = Term([f_x]) t_y = Term([f_y]) factor_infos = {f_x: FactorInfo(f_x, "numerical", {}, num_columns=3), f_y: FactorInfo(f_y, "numerical", {}, num_columns=1), } term_codings = OrderedDict([(t_x, [SubtermInfo([f_x], {}, 3)]), (t_y, [SubtermInfo([f_y], {}, 1)])]) di = DesignInfo(["x1", "x2", "x3", "y"], factor_infos, term_codings) assert di.column_names == ["x1", "x2", "x3", "y"] assert di.term_names == ["x", "y"] assert di.terms == [t_x, t_y] assert di.column_name_indexes == {"x1": 0, "x2": 1, "x3": 2, "y": 3} assert di.term_name_slices == {"x": slice(0, 3), "y": slice(3, 4)} assert di.term_slices == {t_x: slice(0, 3), t_y: slice(3, 4)} assert di.describe() == "x + y" assert di.slice(1) == slice(1, 2) assert di.slice("x1") == slice(0, 1) assert di.slice("x2") == slice(1, 2) assert di.slice("x3") == slice(2, 3) assert di.slice("x") == slice(0, 3) assert di.slice(t_x) == slice(0, 3) assert di.slice("y") == slice(3, 4) assert di.slice(t_y) == slice(3, 4) assert di.slice(slice(2, 4)) == slice(2, 4) assert_raises(PatsyError, di.slice, "asdf") # smoke test repr(di) assert_no_pickling(di) # One without term objects di = DesignInfo(["a1", "a2", "a3", "b"]) assert di.column_names == ["a1", "a2", "a3", "b"] assert di.term_names == ["a1", "a2", "a3", "b"] assert di.terms is None assert di.column_name_indexes == {"a1": 0, "a2": 1, "a3": 2, "b": 3} assert di.term_name_slices == {"a1": slice(0, 1), "a2": slice(1, 2), "a3": slice(2, 3), "b": slice(3, 4)} assert di.term_slices is None assert di.describe() == "a1 + a2 + a3 + b" assert di.slice(1) == slice(1, 2) assert di.slice("a1") == slice(0, 1) assert di.slice("a2") == slice(1, 2) assert di.slice("a3") == slice(2, 3) assert di.slice("b") == slice(3, 4) # Check intercept handling in describe() assert DesignInfo(["Intercept", "a", "b"]).describe() == "1 + a + b" # Failure modes # must specify either both or neither of factor_infos and term_codings: assert_raises(ValueError, DesignInfo, ["x1", "x2", "x3", "y"], factor_infos=factor_infos) assert_raises(ValueError, DesignInfo, ["x1", "x2", "x3", "y"], term_codings=term_codings) # factor_infos must be a dict assert_raises(ValueError, DesignInfo, ["x1", "x2", "x3", "y"], list(factor_infos), term_codings) # wrong number of column names: assert_raises(ValueError, DesignInfo, ["x1", "x2", "x3", "y1", "y2"], factor_infos, term_codings) assert_raises(ValueError, DesignInfo, ["x1", "x2", "x3"], factor_infos, term_codings) # name overlap problems assert_raises(ValueError, DesignInfo, ["x1", "x2", "y", "y2"], factor_infos, term_codings) # duplicate name assert_raises(ValueError, DesignInfo, ["x1", "x1", "x1", "y"], factor_infos, term_codings) # f_y is in factor_infos, but not mentioned in any term term_codings_x_only = OrderedDict(term_codings) del term_codings_x_only[t_y] assert_raises(ValueError, DesignInfo, ["x1", "x2", "x3"], factor_infos, term_codings_x_only) # f_a is in a term, but not in factor_infos f_a = _MockFactor("a") t_a = Term([f_a]) term_codings_with_a = OrderedDict(term_codings) term_codings_with_a[t_a] = [SubtermInfo([f_a], {}, 1)] assert_raises(ValueError, DesignInfo, ["x1", "x2", "x3", "y", "a"], factor_infos, term_codings_with_a) # bad factor_infos not_factor_infos = dict(factor_infos) not_factor_infos[f_x] = "what is this I don't even" assert_raises(ValueError, DesignInfo, ["x1", "x2", "x3", "y"], not_factor_infos, term_codings) mismatch_factor_infos = dict(factor_infos) mismatch_factor_infos[f_x] = FactorInfo(f_a, "numerical", {}, num_columns=3) assert_raises(ValueError, DesignInfo, ["x1", "x2", "x3", "y"], mismatch_factor_infos, term_codings) # bad term_codings assert_raises(ValueError, DesignInfo, ["x1", "x2", "x3", "y"], factor_infos, dict(term_codings)) not_term_codings = OrderedDict(term_codings) not_term_codings["this is a string"] = term_codings[t_x] assert_raises(ValueError, DesignInfo, ["x1", "x2", "x3", "y"], factor_infos, not_term_codings) non_list_term_codings = OrderedDict(term_codings) non_list_term_codings[t_y] = tuple(term_codings[t_y]) assert_raises(ValueError, DesignInfo, ["x1", "x2", "x3", "y"], factor_infos, non_list_term_codings) non_subterm_term_codings = OrderedDict(term_codings) non_subterm_term_codings[t_y][0] = "not a SubtermInfo" assert_raises(ValueError, DesignInfo, ["x1", "x2", "x3", "y"], factor_infos, non_subterm_term_codings) bad_subterm = OrderedDict(term_codings) # f_x is a factor in this model, but it is not a factor in t_y term_codings[t_y][0] = SubtermInfo([f_x], {}, 1) assert_raises(ValueError, DesignInfo, ["x1", "x2", "x3", "y"], factor_infos, bad_subterm) # contrast matrix has wrong number of rows factor_codings_a = {f_a: FactorInfo(f_a, "categorical", {}, categories=["a1", "a2"])} term_codings_a_bad_rows = OrderedDict([ (t_a, [SubtermInfo([f_a], {f_a: ContrastMatrix(np.ones((3, 2)), ["[1]", "[2]"])}, 2)])]) assert_raises(ValueError, DesignInfo, ["a[1]", "a[2]"], factor_codings_a, term_codings_a_bad_rows) # have a contrast matrix for a non-categorical factor t_ax = Term([f_a, f_x]) factor_codings_ax = {f_a: FactorInfo(f_a, "categorical", {}, categories=["a1", "a2"]), f_x: FactorInfo(f_x, "numerical", {}, num_columns=2)} term_codings_ax_extra_cm = OrderedDict([ (t_ax, [SubtermInfo([f_a, f_x], {f_a: ContrastMatrix(np.ones((2, 2)), ["[1]", "[2]"]), f_x: ContrastMatrix(np.ones((2, 2)), ["[1]", "[2]"])}, 4)])]) assert_raises(ValueError, DesignInfo, ["a[1]:x[1]", "a[2]:x[1]", "a[1]:x[2]", "a[2]:x[2]"], factor_codings_ax, term_codings_ax_extra_cm) # no contrast matrix for a categorical factor term_codings_ax_missing_cm = OrderedDict([ (t_ax, [SubtermInfo([f_a, f_x], {}, 4)])]) # This actually fails before it hits the relevant check with a KeyError, # but that's okay... the previous test still exercises the check. assert_raises((ValueError, KeyError), DesignInfo, ["a[1]:x[1]", "a[2]:x[1]", "a[1]:x[2]", "a[2]:x[2]"], factor_codings_ax, term_codings_ax_missing_cm) # subterm num_columns doesn't match the value computed from the individual # factors term_codings_ax_wrong_subterm_columns = OrderedDict([ (t_ax, [SubtermInfo([f_a, f_x], {f_a: ContrastMatrix(np.ones((2, 3)), ["[1]", "[2]", "[3]"])}, # should be 2 * 3 = 6 5)])]) assert_raises(ValueError, DesignInfo, ["a[1]:x[1]", "a[2]:x[1]", "a[3]:x[1]", "a[1]:x[2]", "a[2]:x[2]", "a[3]:x[2]"], factor_codings_ax, term_codings_ax_wrong_subterm_columns)
def code_without_intercept(self, levels): contrast = self._simple_contrast(levels) return ContrastMatrix(contrast, _name_levels("Simp.", levels[:-1]))
def code_with_intercept(self, levels): contrast = np.column_stack( (np.ones(len(levels)), self._simple_contrast(levels))) return ContrastMatrix(contrast, _name_levels("Simp.", levels))