Python ContrastMatrixの例、patsy.contrasts.ContrastMatrix Pythonの例

コード例 #1

0

ファイルを表示

def test_contrast():
    from patsy.contrasts import ContrastMatrix, Sum
    values = ["a1", "a3", "a1", "a2"]

    # No intercept in model, full-rank coding of 'a'
    m = make_matrix({"a": C(values)},
                    3, [["a"]],
                    column_names=["a[a1]", "a[a2]", "a[a3]"])

    assert np.allclose(m, [[1, 0, 0], [0, 0, 1], [1, 0, 0], [0, 1, 0]])

    for s in (Sum, Sum()):
        m = make_matrix({"a": C(values, s)},
                        3, [["a"]],
                        column_names=["a[mean]", "a[S.a1]", "a[S.a2]"])
        # Output from R
        assert np.allclose(m, [[1, 1, 0], [1, -1, -1], [1, 1, 0], [1, 0, 1]])

    m = make_matrix({"a": C(values, Sum(omit=0))},
                    3, [["a"]],
                    column_names=["a[mean]", "a[S.a2]", "a[S.a3]"])
    # Output from R
    assert np.allclose(m, [[1, -1, -1], [1, 0, 1], [1, -1, -1], [1, 1, 0]])

    # Intercept in model, non-full-rank coding of 'a'
    m = make_matrix({"a": C(values)},
                    3, [[], ["a"]],
                    column_names=["Intercept", "a[T.a2]", "a[T.a3]"])

    assert np.allclose(m, [[1, 0, 0], [1, 0, 1], [1, 0, 0], [1, 1, 0]])

    for s in (Sum, Sum()):
        m = make_matrix({"a": C(values, s)},
                        3, [[], ["a"]],
                        column_names=["Intercept", "a[S.a1]", "a[S.a2]"])
        # Output from R
        assert np.allclose(m, [[1, 1, 0], [1, -1, -1], [1, 1, 0], [1, 0, 1]])

    m = make_matrix({"a": C(values, Sum(omit=0))},
                    3, [[], ["a"]],
                    column_names=["Intercept", "a[S.a2]", "a[S.a3]"])
    # Output from R
    assert np.allclose(m, [[1, -1, -1], [1, 0, 1], [1, -1, -1], [1, 1, 0]])

    # Weird ad hoc less-than-full-rank coding of 'a'
    m = make_matrix({"a": C(values, [[7, 12], [2, 13], [8, -1]])},
                    2, [["a"]],
                    column_names=["a[custom0]", "a[custom1]"])
    assert np.allclose(m, [[7, 12], [8, -1], [7, 12], [2, 13]])

    m = make_matrix(
        {
            "a":
            C(values,
              ContrastMatrix([[7, 12], [2, 13], [8, -1]], ["[foo]", "[bar]"]))
        },
        2, [["a"]],
        column_names=["a[foo]", "a[bar]"])
    assert np.allclose(m, [[7, 12], [8, -1], [7, 12], [2, 13]])

コード例 #2

0

ファイルを表示

ファイル: build.py プロジェクト: joaonatali/patsy

def test__ColumnBuilder():
    from nose.tools import assert_raises
    from patsy.contrasts import ContrastMatrix
    from patsy.categorical import C
    f1 = _MockFactor("f1")
    f2 = _MockFactor("f2")
    f3 = _MockFactor("f3")
    contrast = ContrastMatrix(np.array([[0, 0.5], [3, 0]]), ["[c1]", "[c2]"])

    cb = _ColumnBuilder([f1, f2, f3], {f1: 1, f3: 1}, {f2: contrast})
    mat = np.empty((3, 2))
    assert cb.column_names() == ["f1:f2[c1]:f3", "f1:f2[c2]:f3"]
    cb.build(
        {
            f1: atleast_2d_column_default([1, 2, 3]),
            f2: np.asarray([0, 0, 1]),
            f3: atleast_2d_column_default([7.5, 2, -12])
        }, mat)
    assert np.allclose(
        mat, [[0, 0.5 * 1 * 7.5], [0, 0.5 * 2 * 2], [3 * 3 * -12, 0]])
    # Check that missing categorical values blow up
    assert_raises(
        PatsyError, cb.build, {
            f1: atleast_2d_column_default([1, 2, 3]),
            f2: np.asarray([0, -1, 1]),
            f3: atleast_2d_column_default([7.5, 2, -12])
        }, mat)

    cb2 = _ColumnBuilder([f1, f2, f3], {f1: 2, f3: 1}, {f2: contrast})
    mat2 = np.empty((3, 4))
    cb2.build(
        {
            f1: atleast_2d_column_default([[1, 2], [3, 4], [5, 6]]),
            f2: np.asarray([0, 0, 1]),
            f3: atleast_2d_column_default([7.5, 2, -12])
        }, mat2)
    assert cb2.column_names() == [
        "f1[0]:f2[c1]:f3", "f1[1]:f2[c1]:f3", "f1[0]:f2[c2]:f3",
        "f1[1]:f2[c2]:f3"
    ]
    assert np.allclose(
        mat2,
        [[0, 0, 0.5 * 1 * 7.5, 0.5 * 2 * 7.5],
         [0, 0, 0.5 * 3 * 2, 0.5 * 4 * 2], [3 * 5 * -12, 3 * 6 * -12, 0, 0]])
    # Check intercept building:
    cb_intercept = _ColumnBuilder([], {}, {})
    assert cb_intercept.column_names() == ["Intercept"]
    mat3 = np.empty((3, 1))
    cb_intercept.build({f1: [1, 2, 3], f2: [1, 2, 3], f3: [1, 2, 3]}, mat3)
    assert np.allclose(mat3, 1)

コード例 #3

0

ファイルを表示

def test_SubtermInfo():
    cm = ContrastMatrix(np.ones((2, 2)), ["[1]", "[2]"])
    s = SubtermInfo(["a", "x"], {"a": cm}, 4)
    assert s.factors == ("a", "x")
    assert s.contrast_matrices == {"a": cm}
    assert s.num_columns == 4

    # smoke test
    repr(s)

    from nose.tools import assert_raises
    assert_raises(TypeError, SubtermInfo, 1, {}, 1)
    assert_raises(ValueError, SubtermInfo, ["a", "x"], 1, 1)
    assert_raises(ValueError, SubtermInfo, ["a", "x"], {"z": cm}, 1)
    assert_raises(ValueError, SubtermInfo, ["a", "x"], {"a": 1}, 1)
    assert_raises(ValueError, SubtermInfo, ["a", "x"], {}, 1.5)

コード例 #4

0

ファイルを表示

ファイル: design_info.py プロジェクト: MarceloDL-A/metodos_python

def test_SubtermInfo():
    cm = ContrastMatrix(np.ones((2, 2)), ["[1]", "[2]"])
    s = SubtermInfo(["a", "x"], {"a": cm}, 4)
    assert s.factors == ("a", "x")
    assert s.contrast_matrices == {"a": cm}
    assert s.num_columns == 4

    # Make sure longs are accepted for num_columns
    if not six.PY3:
        s = SubtermInfo(["a", "x"], {"a": cm}, long(4))
        assert s.num_columns == 4

    # smoke test
    repr(s)

    import pytest
    pytest.raises(TypeError, SubtermInfo, 1, {}, 1)
    pytest.raises(ValueError, SubtermInfo, ["a", "x"], 1, 1)
    pytest.raises(ValueError, SubtermInfo, ["a", "x"], {"z": cm}, 1)
    pytest.raises(ValueError, SubtermInfo, ["a", "x"], {"a": 1}, 1)
    pytest.raises(ValueError, SubtermInfo, ["a", "x"], {}, 1.5)

コード例 #5

0

ファイルを表示

ファイル: build.py プロジェクト: MarceloDL-A/metodos_python

def test__subterm_column_names_iter_and__build_subterm():
    import pytest
    from patsy.contrasts import ContrastMatrix
    from patsy.categorical import C
    f1 = _MockFactor("f1")
    f2 = _MockFactor("f2")
    f3 = _MockFactor("f3")
    contrast = ContrastMatrix(np.array([[0, 0.5],
                                        [3, 0]]),
                              ["[c1]", "[c2]"])

    factor_infos1 = {f1: FactorInfo(f1, "numerical", {},
                                    num_columns=1, categories=None),
                     f2: FactorInfo(f2, "categorical", {},
                                    num_columns=None, categories=["a", "b"]),
                     f3: FactorInfo(f3, "numerical", {},
                                    num_columns=1, categories=None),
                     }
    contrast_matrices = {f2: contrast}
    subterm1 = SubtermInfo([f1, f2, f3], contrast_matrices, 2)
    assert (list(_subterm_column_names_iter(factor_infos1, subterm1))
            == ["f1:f2[c1]:f3", "f1:f2[c2]:f3"])

    mat = np.empty((3, 2))
    _build_subterm(subterm1, factor_infos1,
                   {f1: atleast_2d_column_default([1, 2, 3]),
                    f2: np.asarray([0, 0, 1]),
                    f3: atleast_2d_column_default([7.5, 2, -12])},
                   mat)
    assert np.allclose(mat, [[0, 0.5 * 1 * 7.5],
                             [0, 0.5 * 2 * 2],
                             [3 * 3 * -12, 0]])
    # Check that missing categorical values blow up
    pytest.raises(PatsyError, _build_subterm, subterm1, factor_infos1,
                  {f1: atleast_2d_column_default([1, 2, 3]),
                   f2: np.asarray([0, -1, 1]),
                   f3: atleast_2d_column_default([7.5, 2, -12])},
                  mat)

    factor_infos2 = dict(factor_infos1)
    factor_infos2[f1] = FactorInfo(f1, "numerical", {},
                                   num_columns=2, categories=None)
    subterm2 = SubtermInfo([f1, f2, f3], contrast_matrices, 4)
    assert (list(_subterm_column_names_iter(factor_infos2, subterm2))
            == ["f1[0]:f2[c1]:f3",
                "f1[1]:f2[c1]:f3",
                "f1[0]:f2[c2]:f3",
                "f1[1]:f2[c2]:f3"])

    mat2 = np.empty((3, 4))
    _build_subterm(subterm2, factor_infos2,
                   {f1: atleast_2d_column_default([[1, 2], [3, 4], [5, 6]]),
                    f2: np.asarray([0, 0, 1]),
                    f3: atleast_2d_column_default([7.5, 2, -12])},
                   mat2)
    assert np.allclose(mat2, [[0, 0, 0.5 * 1 * 7.5, 0.5 * 2 * 7.5],
                              [0, 0, 0.5 * 3 * 2, 0.5 * 4 * 2],
                              [3 * 5 * -12, 3 * 6 * -12, 0, 0]])


    subterm_int = SubtermInfo([], {}, 1)
    assert list(_subterm_column_names_iter({}, subterm_int)) == ["Intercept"]

    mat3 = np.empty((3, 1))
    _build_subterm(subterm_int, {},
                   {f1: [1, 2, 3], f2: [1, 2, 3], f3: [1, 2, 3]},
                   mat3)
    assert np.allclose(mat3, 1)

コード例 #6

0

ファイルを表示

def test_DesignInfo():
    from nose.tools import assert_raises
    class _MockFactor(object):
        def __init__(self, name):
            self._name = name

        def name(self):
            return self._name
    f_x = _MockFactor("x")
    f_y = _MockFactor("y")
    t_x = Term([f_x])
    t_y = Term([f_y])
    factor_infos = {f_x:
                      FactorInfo(f_x, "numerical", {}, num_columns=3),
                    f_y:
                      FactorInfo(f_y, "numerical", {}, num_columns=1),
                   }
    term_codings = OrderedDict([(t_x, [SubtermInfo([f_x], {}, 3)]),
                                (t_y, [SubtermInfo([f_y], {}, 1)])])
    di = DesignInfo(["x1", "x2", "x3", "y"], factor_infos, term_codings)
    assert di.column_names == ["x1", "x2", "x3", "y"]
    assert di.term_names == ["x", "y"]
    assert di.terms == [t_x, t_y]
    assert di.column_name_indexes == {"x1": 0, "x2": 1, "x3": 2, "y": 3}
    assert di.term_name_slices == {"x": slice(0, 3), "y": slice(3, 4)}
    assert di.term_slices == {t_x: slice(0, 3), t_y: slice(3, 4)}
    assert di.describe() == "x + y"

    assert di.slice(1) == slice(1, 2)
    assert di.slice("x1") == slice(0, 1)
    assert di.slice("x2") == slice(1, 2)
    assert di.slice("x3") == slice(2, 3)
    assert di.slice("x") == slice(0, 3)
    assert di.slice(t_x) == slice(0, 3)
    assert di.slice("y") == slice(3, 4)
    assert di.slice(t_y) == slice(3, 4)
    assert di.slice(slice(2, 4)) == slice(2, 4)
    assert_raises(PatsyError, di.slice, "asdf")

    # smoke test
    repr(di)

    assert_no_pickling(di)

    # One without term objects
    di = DesignInfo(["a1", "a2", "a3", "b"])
    assert di.column_names == ["a1", "a2", "a3", "b"]
    assert di.term_names == ["a1", "a2", "a3", "b"]
    assert di.terms is None
    assert di.column_name_indexes == {"a1": 0, "a2": 1, "a3": 2, "b": 3}
    assert di.term_name_slices == {"a1": slice(0, 1),
                                   "a2": slice(1, 2),
                                   "a3": slice(2, 3),
                                   "b": slice(3, 4)}
    assert di.term_slices is None
    assert di.describe() == "a1 + a2 + a3 + b"

    assert di.slice(1) == slice(1, 2)
    assert di.slice("a1") == slice(0, 1)
    assert di.slice("a2") == slice(1, 2)
    assert di.slice("a3") == slice(2, 3)
    assert di.slice("b") == slice(3, 4)

    # Check intercept handling in describe()
    assert DesignInfo(["Intercept", "a", "b"]).describe() == "1 + a + b"

    # Failure modes
    # must specify either both or neither of factor_infos and term_codings:
    assert_raises(ValueError, DesignInfo,
                  ["x1", "x2", "x3", "y"], factor_infos=factor_infos)
    assert_raises(ValueError, DesignInfo,
                  ["x1", "x2", "x3", "y"], term_codings=term_codings)
    # factor_infos must be a dict
    assert_raises(ValueError, DesignInfo,
                  ["x1", "x2", "x3", "y"], list(factor_infos), term_codings)
    # wrong number of column names:
    assert_raises(ValueError, DesignInfo,
                  ["x1", "x2", "x3", "y1", "y2"], factor_infos, term_codings)
    assert_raises(ValueError, DesignInfo,
                  ["x1", "x2", "x3"], factor_infos, term_codings)
    # name overlap problems
    assert_raises(ValueError, DesignInfo,
                  ["x1", "x2", "y", "y2"], factor_infos, term_codings)
    # duplicate name
    assert_raises(ValueError, DesignInfo,
                  ["x1", "x1", "x1", "y"], factor_infos, term_codings)

    # f_y is in factor_infos, but not mentioned in any term
    term_codings_x_only = OrderedDict(term_codings)
    del term_codings_x_only[t_y]
    assert_raises(ValueError, DesignInfo,
                  ["x1", "x2", "x3"], factor_infos, term_codings_x_only)

    # f_a is in a term, but not in factor_infos
    f_a = _MockFactor("a")
    t_a = Term([f_a])
    term_codings_with_a = OrderedDict(term_codings)
    term_codings_with_a[t_a] = [SubtermInfo([f_a], {}, 1)]
    assert_raises(ValueError, DesignInfo,
                  ["x1", "x2", "x3", "y", "a"],
                  factor_infos, term_codings_with_a)

    # bad factor_infos
    not_factor_infos = dict(factor_infos)
    not_factor_infos[f_x] = "what is this I don't even"
    assert_raises(ValueError, DesignInfo,
                  ["x1", "x2", "x3", "y"], not_factor_infos, term_codings)

    mismatch_factor_infos = dict(factor_infos)
    mismatch_factor_infos[f_x] = FactorInfo(f_a, "numerical", {}, num_columns=3)
    assert_raises(ValueError, DesignInfo,
                  ["x1", "x2", "x3", "y"], mismatch_factor_infos, term_codings)

    # bad term_codings
    assert_raises(ValueError, DesignInfo,
                  ["x1", "x2", "x3", "y"], factor_infos, dict(term_codings))

    not_term_codings = OrderedDict(term_codings)
    not_term_codings["this is a string"] = term_codings[t_x]
    assert_raises(ValueError, DesignInfo,
                  ["x1", "x2", "x3", "y"], factor_infos, not_term_codings)

    non_list_term_codings = OrderedDict(term_codings)
    non_list_term_codings[t_y] = tuple(term_codings[t_y])
    assert_raises(ValueError, DesignInfo,
                  ["x1", "x2", "x3", "y"], factor_infos, non_list_term_codings)

    non_subterm_term_codings = OrderedDict(term_codings)
    non_subterm_term_codings[t_y][0] = "not a SubtermInfo"
    assert_raises(ValueError, DesignInfo,
                  ["x1", "x2", "x3", "y"], factor_infos, non_subterm_term_codings)

    bad_subterm = OrderedDict(term_codings)
    # f_x is a factor in this model, but it is not a factor in t_y
    term_codings[t_y][0] = SubtermInfo([f_x], {}, 1)
    assert_raises(ValueError, DesignInfo,
                  ["x1", "x2", "x3", "y"], factor_infos, bad_subterm)

    # contrast matrix has wrong number of rows
    factor_codings_a = {f_a:
                          FactorInfo(f_a, "categorical", {},
                                     categories=["a1", "a2"])}
    term_codings_a_bad_rows = OrderedDict([
        (t_a,
         [SubtermInfo([f_a],
                      {f_a: ContrastMatrix(np.ones((3, 2)),
                                           ["[1]", "[2]"])},
                      2)])])
    assert_raises(ValueError, DesignInfo,
                  ["a[1]", "a[2]"],
                  factor_codings_a,
                  term_codings_a_bad_rows)

    # have a contrast matrix for a non-categorical factor
    t_ax = Term([f_a, f_x])
    factor_codings_ax = {f_a:
                           FactorInfo(f_a, "categorical", {},
                                      categories=["a1", "a2"]),
                         f_x:
                           FactorInfo(f_x, "numerical", {},
                                      num_columns=2)}
    term_codings_ax_extra_cm = OrderedDict([
        (t_ax,
         [SubtermInfo([f_a, f_x],
                      {f_a: ContrastMatrix(np.ones((2, 2)), ["[1]", "[2]"]),
                       f_x: ContrastMatrix(np.ones((2, 2)), ["[1]", "[2]"])},
                      4)])])
    assert_raises(ValueError, DesignInfo,
                  ["a[1]:x[1]", "a[2]:x[1]", "a[1]:x[2]", "a[2]:x[2]"],
                  factor_codings_ax,
                  term_codings_ax_extra_cm)

    # no contrast matrix for a categorical factor
    term_codings_ax_missing_cm = OrderedDict([
        (t_ax,
         [SubtermInfo([f_a, f_x],
                      {},
                      4)])])
    # This actually fails before it hits the relevant check with a KeyError,
    # but that's okay... the previous test still exercises the check.
    assert_raises((ValueError, KeyError), DesignInfo,
                  ["a[1]:x[1]", "a[2]:x[1]", "a[1]:x[2]", "a[2]:x[2]"],
                  factor_codings_ax,
                  term_codings_ax_missing_cm)

    # subterm num_columns doesn't match the value computed from the individual
    # factors
    term_codings_ax_wrong_subterm_columns = OrderedDict([
        (t_ax,
         [SubtermInfo([f_a, f_x],
                      {f_a: ContrastMatrix(np.ones((2, 3)),
                                           ["[1]", "[2]", "[3]"])},
                      # should be 2 * 3 = 6
                      5)])])
    assert_raises(ValueError, DesignInfo,
                  ["a[1]:x[1]", "a[2]:x[1]", "a[3]:x[1]",
                   "a[1]:x[2]", "a[2]:x[2]", "a[3]:x[2]"],
                  factor_codings_ax,
                  term_codings_ax_wrong_subterm_columns)

コード例 #7

0

ファイルを表示

ファイル: contrasts.py プロジェクト: patmosxx-v2/Pyto

 def code_without_intercept(self, levels):
     contrast = self._simple_contrast(levels)
     return ContrastMatrix(contrast, _name_levels("Simp.", levels[:-1]))

コード例 #8

0

ファイルを表示

ファイル: contrasts.py プロジェクト: patmosxx-v2/Pyto

 def code_with_intercept(self, levels):
     contrast = np.column_stack(
         (np.ones(len(levels)), self._simple_contrast(levels)))
     return ContrastMatrix(contrast, _name_levels("Simp.", levels))