Example #1
0
def test_DesignInfo_subset():
    # For each combination of:
    #   formula, term names, term objects, mixed term name and term objects
    # check that results match subset of full build
    # and that removed variables don't hurt
    all_data = {"x": [1, 2], "y": [[3.1, 3.2], [4.1, 4.2]], "z": [5, 6]}
    all_terms = make_termlist("x", "y", "z")

    def iter_maker():
        yield all_data

    all_builder = design_matrix_builders([all_terms], iter_maker, 0)[0]
    full_matrix = build_design_matrices([all_builder], all_data)[0]

    def t(which_terms, variables, columns):
        sub_design_info = all_builder.subset(which_terms)
        sub_data = {}
        for variable in variables:
            sub_data[variable] = all_data[variable]
        sub_matrix = build_design_matrices([sub_design_info], sub_data)[0]
        sub_full_matrix = full_matrix[:, columns]
        if not isinstance(which_terms, six.string_types):
            assert len(which_terms) == len(sub_design_info.terms)
        assert np.array_equal(sub_matrix, sub_full_matrix)

    t("~ 0 + x + y + z", ["x", "y", "z"], slice(None))
    t(["x", "y", "z"], ["x", "y", "z"], slice(None))
    # Compatibility: six.PY2 wasn't added until 1.4.0, but six.PY3 exists in
    # all versions.
    if not six.PY3:
        t([unicode("x"), unicode("y"),
           unicode("z")], ["x", "y", "z"], slice(None))
    t(all_terms, ["x", "y", "z"], slice(None))
    t([all_terms[0], "y", all_terms[2]], ["x", "y", "z"], slice(None))

    t("~ 0 + x + z", ["x", "z"], [0, 3])
    t(["x", "z"], ["x", "z"], [0, 3])
    # Compatibility: six.PY2 wasn't added until 1.4.0, but six.PY3 exists in
    # all versions.
    if not six.PY3:
        t([unicode("x"), unicode("z")], ["x", "z"], [0, 3])
    t([all_terms[0], all_terms[2]], ["x", "z"], [0, 3])
    t([all_terms[0], "z"], ["x", "z"], [0, 3])

    t("~ 0 + z + x", ["x", "z"], [3, 0])
    t(["z", "x"], ["x", "z"], [3, 0])
    t([six.text_type("z"), six.text_type("x")], ["x", "z"], [3, 0])
    t([all_terms[2], all_terms[0]], ["x", "z"], [3, 0])
    t([all_terms[2], "x"], ["x", "z"], [3, 0])

    t("~ 0 + y", ["y"], [1, 2])
    t(["y"], ["y"], [1, 2])
    t([six.text_type("y")], ["y"], [1, 2])
    t([all_terms[1]], ["y"], [1, 2])

    # Formula can't have a LHS
    pytest.raises(PatsyError, all_builder.subset, "a ~ a")
    # Term must exist
    pytest.raises(KeyError, all_builder.subset, "~ asdf")
    pytest.raises(KeyError, all_builder.subset, ["asdf"])
    pytest.raises(KeyError, all_builder.subset, [Term(["asdf"])])

    # Also check for a minimal DesignInfo (column names only)
    min_di = DesignInfo(["a", "b", "c"])
    min_di_subset = min_di.subset(["c", "a"])
    assert min_di_subset.column_names == ["c", "a"]
    assert min_di_subset.terms is None
Example #2
0
def test_DesignInfo_subset():
    # For each combination of:
    #   formula, term names, term objects, mixed term name and term objects
    # check that results match subset of full build
    # and that removed variables don't hurt
    all_data = {"x": [1, 2],
                "y": [[3.1, 3.2],
                      [4.1, 4.2]],
                "z": [5, 6]}
    all_terms = make_termlist("x", "y", "z")
    def iter_maker():
        yield all_data
    all_builder = design_matrix_builders([all_terms], iter_maker, 0)[0]
    full_matrix = build_design_matrices([all_builder], all_data)[0]

    def t(which_terms, variables, columns):
        sub_design_info = all_builder.subset(which_terms)
        sub_data = {}
        for variable in variables:
            sub_data[variable] = all_data[variable]
        sub_matrix = build_design_matrices([sub_design_info], sub_data)[0]
        sub_full_matrix = full_matrix[:, columns]
        if not isinstance(which_terms, six.string_types):
            assert len(which_terms) == len(sub_design_info.terms)
        assert np.array_equal(sub_matrix, sub_full_matrix)

    t("~ 0 + x + y + z", ["x", "y", "z"], slice(None))
    t(["x", "y", "z"], ["x", "y", "z"], slice(None))
    # Compatibility: six.PY2 wasn't added until 1.4.0, but six.PY3 exists in
    # all versions.
    if not six.PY3:
        t([unicode("x"), unicode("y"), unicode("z")],
          ["x", "y", "z"], slice(None))
    t(all_terms, ["x", "y", "z"], slice(None))
    t([all_terms[0], "y", all_terms[2]], ["x", "y", "z"], slice(None))

    t("~ 0 + x + z", ["x", "z"], [0, 3])
    t(["x", "z"], ["x", "z"], [0, 3])
    # Compatibility: six.PY2 wasn't added until 1.4.0, but six.PY3 exists in
    # all versions.
    if not six.PY3:
        t([unicode("x"), unicode("z")], ["x", "z"], [0, 3])
    t([all_terms[0], all_terms[2]], ["x", "z"], [0, 3])
    t([all_terms[0], "z"], ["x", "z"], [0, 3])

    t("~ 0 + z + x", ["x", "z"], [3, 0])
    t(["z", "x"], ["x", "z"], [3, 0])
    t([six.text_type("z"), six.text_type("x")], ["x", "z"], [3, 0])
    t([all_terms[2], all_terms[0]], ["x", "z"], [3, 0])
    t([all_terms[2], "x"], ["x", "z"], [3, 0])

    t("~ 0 + y", ["y"], [1, 2])
    t(["y"], ["y"], [1, 2])
    t([six.text_type("y")], ["y"], [1, 2])
    t([all_terms[1]], ["y"], [1, 2])

    # Formula can't have a LHS
    assert_raises(PatsyError, all_builder.subset, "a ~ a")
    # Term must exist
    assert_raises(KeyError, all_builder.subset, "~ asdf")
    assert_raises(KeyError, all_builder.subset, ["asdf"])
    assert_raises(KeyError,
                  all_builder.subset, [Term(["asdf"])])

    # Also check for a minimal DesignInfo (column names only)
    min_di = DesignInfo(["a", "b", "c"])
    min_di_subset = min_di.subset(["c", "a"])
    assert min_di_subset.column_names == ["c", "a"]
    assert min_di_subset.terms is None