Python design_matricesの例、formulae.matrices.design_matrices Pythonの例

コード例 #1

0

ファイルを表示

ファイル: test_eval_new_data.py プロジェクト: bambinos/formulae

def test_model_categoric_common(data, data2):
    dm = design_matrices("y ~ g1", data)
    common1 = dm.common
    common2 = common1.evaluate_new_data(data2)
    arr = np.array([0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0])

    assert common1.terms["g1"] == common2.terms["g1"]
    assert np.allclose(common2["g1"].flatten(), arr)

    dm = design_matrices("y ~ 0 + C(u)", data)
    common1 = dm.common
    common2 = common1.evaluate_new_data(data2)
    arr = np.array(
        [
            [1, 0, 0],
            [0, 1, 0],
            [0, 1, 0],
            [0, 1, 0],
            [0, 1, 0],
            [0, 0, 1],
            [0, 0, 1],
            [0, 1, 0],
            [1, 0, 0],
            [0, 1, 0],
            [0, 0, 1],
            [0, 0, 1],
            [0, 0, 1],
            [1, 0, 0],
            [0, 0, 1],
            [0, 1, 0],
        ]
    )
    assert common1.terms["C(u)"] == common2.terms["C(u)"]
    assert (common2["C(u)"] == arr).all()

コード例 #2

0

ファイルを表示

ファイル: test_eval_new_data.py プロジェクト: bambinos/formulae

def test_model_numeric_common(data, data2):
    dm = design_matrices("y ~ np.exp(x) + z", data)
    common2 = dm.common.evaluate_new_data(data2)
    assert np.allclose(np.exp(data2["x"]), common2["np.exp(x)"].flatten())
    assert np.allclose(data2["z"], common2["z"].flatten())

    dm = design_matrices("y ~ center(x) + scale(z)", data)
    common1 = dm.common
    common2 = dm.common.evaluate_new_data(data2)

    # First, assert stateful transforms remember the original parameter values
    t1_mean1 = common1.terms["center(x)"].components[0].call.stateful_transform.mean
    t1_mean2 = common2.terms["center(x)"].components[0].call.stateful_transform.mean
    assert np.allclose(t1_mean1, 0, atol=1)
    assert np.allclose(t1_mean1, t1_mean2)

    t2_mean1 = common1.terms["scale(z)"].components[0].call.stateful_transform.mean
    t2_mean2 = common2.terms["scale(z)"].components[0].call.stateful_transform.mean
    t2_std1 = common1.terms["scale(z)"].components[0].call.stateful_transform.std
    t2_std2 = common2.terms["scale(z)"].components[0].call.stateful_transform.std
    assert np.allclose(t2_mean1, 0, atol=1)
    assert np.allclose(t2_std1, 1, atol=1)
    assert np.allclose(t2_mean1, t2_mean2)
    assert np.allclose(t2_std1, t2_std2)

    # Second, assert variables have been transformed using original parameter values
    assert np.allclose(common2["center(x)"].flatten(), data2["x"] - t1_mean1)
    assert np.allclose(common2["scale(z)"].flatten(), (data2["z"] - t2_mean1) / t2_std1)

コード例 #3

0

ファイルを表示

ファイル: test_design_matrices.py プロジェクト: zwelitunyiswa/formulae

def test_external_transforms(data):
    dm = design_matrices("y ~ np.exp(x1)", data)
    assert np.allclose(dm.common["np.exp(x1)"][:, 0], np.exp(data["x1"]))

    def add_ten(x):
        return x + 10

    dm = design_matrices("y ~ add_ten(x1)", data)
    assert np.allclose(dm.common["add_ten(x1)"][:, 0], data["x1"] + 10)

コード例 #4

0

ファイルを表示

ファイル: test_design_matrices.py プロジェクト: zwelitunyiswa/formulae

def test_interactions(data):
    # These two models are the same
    dm = design_matrices("y ~ f * g", data)
    dm2 = design_matrices("y ~ f + g + f:g", data)
    assert compare_dicts(dm2.common.terms_info, dm.common.terms_info)

    # When no intercept too
    dm = design_matrices("y ~ 0 + f * g", data)
    dm2 = design_matrices("y ~ 0 + f + g + f:g", data)
    assert compare_dicts(dm2.common.terms_info, dm.common.terms_info)

    # Mix of numeric/categoric
    # "g" in "g" -> reduced
    # "g" in "x1:g" -> reduced because x1 is present in formula
    dm = design_matrices("y ~ x1 + g + x1:g", data)
    assert list(dm.common.terms_info.keys()) == ["Intercept", "x1", "g", "x1:g"]
    assert dm.common.terms_info["g"]["type"] == "categoric"
    assert dm.common.terms_info["g"]["encoding"] == "reduced"
    assert dm.common.terms_info["x1:g"]["terms"]["g"]["encoding"] == "reduced"

    # "g" in "g" -> reduced
    # "g" in "x1:g" -> full because x1 is not present in formula
    dm = design_matrices("y ~ g + x1:g", data)
    assert list(dm.common.terms_info.keys()) == ["Intercept", "g", "x1:g"]
    assert dm.common.terms_info["g"]["type"] == "categoric"
    assert dm.common.terms_info["g"]["encoding"] == "reduced"
    assert dm.common.terms_info["x1:g"]["terms"]["g"]["encoding"] == "full"

    # "g" in "x1:x2:g" is full, because x1:x2 is a new group and we don't have x1:x2 in the model
    dm = design_matrices("y ~ x1 + g + x1:g + x1:x2:g", data)
    assert list(dm.common.terms_info.keys()) == ["Intercept", "x1", "g", "x1:g", "x1:x2:g"]
    assert dm.common.terms_info["g"]["type"] == "categoric"
    assert dm.common.terms_info["g"]["encoding"] == "reduced"
    assert dm.common.terms_info["x1:g"]["terms"]["g"]["encoding"] == "reduced"
    assert dm.common.terms_info["x1:x2:g"]["terms"]["g"]["encoding"] == "full"

    # "g" in "x1:x2:g" is reduced, because x1:x2 is a new group and we have x1:x2 in the model
    dm = design_matrices("y ~ x1 + g + x1:x2 + x1:g + x1:x2:g", data)
    assert list(dm.common.terms_info.keys()) == ["Intercept", "x1", "g", "x1:x2", "x1:g", "x1:x2:g"]
    assert dm.common.terms_info["g"]["type"] == "categoric"
    assert dm.common.terms_info["g"]["encoding"] == "reduced"
    assert dm.common.terms_info["x1:g"]["terms"]["g"]["encoding"] == "reduced"
    assert dm.common.terms_info["x1:x2:g"]["terms"]["g"]["encoding"] == "reduced"

    # And now, since we don't have intercept, x1 and x1:x2 all "g" are full
    dm = design_matrices("y ~ 0 + g + x1:g + x1:x2:g", data)
    assert list(dm.common.terms_info.keys()) == ["g", "x1:g", "x1:x2:g"]
    assert dm.common.terms_info["g"]["type"] == "categoric"
    assert dm.common.terms_info["g"]["encoding"] == "full"
    assert dm.common.terms_info["x1:g"]["terms"]["g"]["encoding"] == "full"
    assert dm.common.terms_info["x1:x2:g"]["terms"]["g"]["encoding"] == "full"

    # Two numerics
    dm = design_matrices("y ~ x1:x2", data)
    assert "x1:x2" in dm.common.terms_info.keys()
    assert np.allclose(dm.common["x1:x2"][:, 0], data["x1"] * data["x2"])

コード例 #5

0

ファイルを表示

ファイル: test_design_matrices.py プロジェクト: bambinos/formulae

def test_interactions(data):
    # These two models are the same
    dm = design_matrices("y ~ f * g", data)
    dm2 = design_matrices("y ~ f + g + f:g", data)
    assert dm2.common.terms == dm.common.terms

    # When no intercept too
    dm = design_matrices("y ~ 0 + f * g", data)
    dm2 = design_matrices("y ~ 0 + f + g + f:g", data)
    assert dm2.common.terms == dm.common.terms

    # Mix of numeric/categoric
    # "g" in "g" -> does not span intercept
    # "g" in "x1:g" -> does not span intercept because x1 is present in formula
    dm = design_matrices("y ~ x1 + g + x1:g", data)
    assert list(dm.common.terms) == ["Intercept", "x1", "g", "x1:g"]
    assert dm.common.terms["g"].kind == "categoric"
    assert dm.common.terms["g"].spans_intercept is False
    assert dm.common.terms["x1:g"].components[1].spans_intercept is False

    # "g" in "g" -> reduced
    # "g" in "x1:g" -> full because x1 is not present in formula
    dm = design_matrices("y ~ g + x1:g", data)
    assert list(dm.common.terms) == ["Intercept", "g", "x1:g"]
    assert dm.common.terms["g"].kind == "categoric"
    assert dm.common.terms["g"].spans_intercept is False
    assert dm.common.terms["x1:g"].components[1].spans_intercept is True

    # "g" in "x1:x2:g" is full, because x1:x2 is a new group and we don't have x1:x2 in the model
    dm = design_matrices("y ~ x1 + g + x1:g + x1:x2:g", data)
    assert list(dm.common.terms) == ["Intercept", "x1", "g", "x1:g", "x1:x2:g"]
    assert dm.common.terms["g"].kind == "categoric"
    assert dm.common.terms["g"].spans_intercept is False
    assert dm.common.terms["x1:g"].components[1].spans_intercept is False
    assert dm.common.terms["x1:x2:g"].components[2].spans_intercept is True

    # "g" in "x1:x2:g" is reduced, because x1:x2 is a new group and we have x1:x2 in the model
    dm = design_matrices("y ~ x1 + g + x1:x2 + x1:g + x1:x2:g", data)
    assert list(dm.common.terms) == ["Intercept", "x1", "g", "x1:x2", "x1:g", "x1:x2:g"]
    assert dm.common.terms["g"].kind == "categoric"
    assert dm.common.terms["g"].spans_intercept is False
    assert dm.common.terms["x1:g"].components[1].spans_intercept is False
    assert dm.common.terms["x1:x2:g"].components[2].spans_intercept is False

    # And now, since we don't have intercept, x1 and x1:x2 all "g" are full
    dm = design_matrices("y ~ 0 + g + x1:g + x1:x2:g", data)
    assert list(dm.common.terms) == ["g", "x1:g", "x1:x2:g"]
    assert dm.common.terms["g"].kind == "categoric"
    assert dm.common.terms["g"].spans_intercept is True
    assert dm.common.terms["x1:g"].components[1].spans_intercept is True
    assert dm.common.terms["x1:x2:g"].components[2].spans_intercept is True

    # Two numerics
    dm = design_matrices("y ~ x1:x2", data)
    assert "x1:x2" in dm.common.terms
    assert np.allclose(dm.common["x1:x2"][:, 0], data["x1"] * data["x2"])

コード例 #6

0

ファイルを表示

ファイル: test_design_matrices.py プロジェクト: bambinos/formulae

def test_predict_prop(beetle):
    # If trials is a variable, new dataset must have that variable
    dm = design_matrices("prop(y, n) ~ x", beetle)
    result = dm.response.evaluate_new_data(pd.DataFrame({"n": [10, 10, 30, 30]}))
    assert (result == np.array([10, 10, 30, 30])).all()

    # If trials is a constant value, return that same value
    dm = design_matrices("prop(y, 70) ~ x", beetle)
    result = dm.response.evaluate_new_data(pd.DataFrame({"n": [10, 10, 30, 30]}))
    assert (result == np.array([70, 70, 70, 70])).all()

コード例 #7

0

ファイルを表示

ファイル: test_design_matrices.py プロジェクト: bambinos/formulae

def test_predict_offset(beetle):
    # If offset is a variable, new dataset must have that variable
    dm = design_matrices("y ~ x + offset(x)", beetle)
    result = dm.common.evaluate_new_data(pd.DataFrame({"x": [1, 2, 3]}))["offset(x)"]
    assert (result == np.array([1, 2, 3])[:, np.newaxis]).all()

    # If offset is a constant value, return that same value
    dm = design_matrices("y ~ x + offset(10)", beetle)
    result = dm.common.evaluate_new_data(pd.DataFrame({"x": [1, 2, 3]}))["offset(10)"]
    assert (result == np.array([10, 10, 10])[:, np.newaxis]).all()

コード例 #8

0

ファイルを表示

ファイル: test_design_matrices.py プロジェクト: bambinos/formulae

def test_categoric_group_specific():
    data = pd.DataFrame(
        {
            "BP": np.random.normal(size=30),
            "BMI": np.random.normal(size=30),
            "age_grp": np.random.choice([0, 1, 2], size=30),
        }
    )
    dm = design_matrices("BP ~ 0 + (C(age_grp)|BMI)", data)
    list(dm.group.terms.keys()) == ["1|BMI", "C(age_grp)[1]|BMI", "C(age_grp)[2]|BMI"]

    dm = design_matrices("BP ~ 0 + (0 + C(age_grp)|BMI)", data)
    list(dm.group.terms) == ["C(age_grp)[0]|BMI", "C(age_grp)[1]|BMI", "C(age_grp)[2]|BMI"]

コード例 #9

0

ファイルを表示

ファイル: test_design_matrices.py プロジェクト: bambinos/formulae

def test_categoric_responses():
    data = pd.DataFrame(
        {
            "y1": np.random.choice(["A", "B", "C"], size=30),
            "y2": np.random.choice(["A", "B"], size=30),
            "y3": np.random.choice(["Hi there", "Bye bye", "What??"], size=30),
            "x": np.random.normal(size=30),
        }
    )

    # Multi-level response. Response is a design matrix of dummies that span the intercept.
    response = design_matrices("y1 ~ x", data).response
    assert list(np.unique(response.design_matrix)) == [0, 1]
    assert response.levels == ["A", "B", "C"]

    # Multi-level response, explicitly converted to binary
    response = design_matrices("y1['A'] ~ x", data).response
    assert list(np.unique(response.design_matrix)) == [0, 1]
    assert response.levels is None

    # Response has two levels but it is not flagged as binary because it was not converted to that
    # XTODO: Revisit if this logic is fine
    response = design_matrices("y2 ~ x", data).response
    assert list(np.unique(response.design_matrix)) == [0, 1]
    assert response.levels == ["A", "B"]

    # Binary response with explicit level
    response = design_matrices("y2['B'] ~ x", data).response
    assert list(np.unique(response.design_matrix)) == [0, 1]
    assert response.levels is None

    # Binary response with explicit level passed as identifier
    response = design_matrices("y2[B] ~ x", data).response
    assert list(np.unique(response.design_matrix)) == [0, 1]
    assert response.levels is None

    # Binary response with explicit level with spaces
    response = design_matrices("y3['Bye bye'] ~ x", data).response
    assert list(np.unique(response.design_matrix)) == [0, 1]
    assert response.levels is None

    # Users trying to use nested brackets (WHY?)
    with pytest.raises(ParseError, match=re.escape("Are you using nested brackets? Why?")):
        design_matrices("y3[A[B]] ~ x", data)

    # Users try to pass a number to use a number
    with pytest.raises(
        ParseError, match=re.escape("Subset notation only allows a string or an identifer")
    ):
        design_matrices("y3[1] ~ x", data)

コード例 #10

0

ファイルを表示

ファイル: test_design_matrices.py プロジェクト: bambinos/formulae

def test_common_predictor(data):
    dm = design_matrices("y ~ x1", data)
    assert list(dm.common.terms) == ["Intercept", "x1"]
    assert dm.common.terms["x1"].kind == "numeric"
    assert dm.common.terms["x1"].labels == ["x1"]
    assert dm.common.terms["x1"].levels is None

    # 'f' does not span intercept because the intercept is already icluded
    dm = design_matrices("y ~ f", data)
    assert list(dm.common.terms) == ["Intercept", "f"]
    assert dm.common.terms["f"].kind == "categoric"
    assert dm.common.terms["f"].labels == [f"f[{l}]" for l in sorted(data["f"].unique())[1:]]
    assert dm.common.terms["f"].levels == sorted(list(data["f"].unique()))[1:]
    assert dm.common.terms["f"].spans_intercept == False

コード例 #11

0

ファイルを表示

ファイル: test_design_matrices.py プロジェクト: bambinos/formulae

def test_design_matrices_categoric_call(data):
    def f(x):
        return pd.Categorical(x)

    def f_ordered(x):
        return pd.Categorical(x, categories=np.unique(x), ordered=True)

    # does not span intercept
    design_matrices("y ~ f(x3) + f_ordered(g)", data)

    # spans intercept
    dm = design_matrices("y ~ 0 + f(g)", data)

    # Evaluate new data
    dm.common.evaluate_new_data(data)

コード例 #12

0

ファイルを表示

ファイル: test_design_matrices.py プロジェクト: bambinos/formulae

def test_categorical_ordered_series():
    # Test it works
    data = pd.DataFrame({"x": list("abcd") * 10})
    data["x"] = pd.Categorical(data["x"], list("bcda"), ordered=True)
    design_matrices("S(x)", data)

    # Test it works and it respects original order
    levels = design_matrices("x", data).common.terms["x"].levels
    assert levels == list("cda")

    levels = design_matrices("T(x)", data).common.terms["T(x)"].levels
    assert levels == list("cda")

    levels = design_matrices("S(x)", data).common.terms["S(x)"].levels
    assert levels == list("bcd")

コード例 #13

0

ファイルを表示

ファイル: test_design_matrices.py プロジェクト: bambinos/formulae

def test_attempt_to_evaluate_non_proportion_response(data):
    response, _, _ = design_matrices("y ~ x1", data)

    with pytest.raises(
        ValueError, match="Can't evaluate response term with kind different to 'proportion'"
    ):
        response.evaluate_new_data(data)

コード例 #14

0

ファイルを表示

ファイル: test_design_matrices.py プロジェクト: bambinos/formulae

def test_group_specific_intercept_only(data):
    dm = design_matrices("y ~ 0 + (1|g)", data)
    assert len(dm.group.terms) == 1
    assert dm.group.terms["1|g"].kind == "intercept"
    assert dm.group.terms["1|g"].groups == ["A", "B"]
    assert dm.group.terms["1|g"].labels == ["1|g[A]", "1|g[B]"]
    assert dm.common == None

コード例 #15

0

ファイルを表示

ファイル: test_design_matrices.py プロジェクト: bambinos/formulae

def test_extra_namespace(data):
    extra_namespace = {"myfunc": np.log}
    dm = design_matrices(
        "y ~ myfunc(x3) + x3", data, na_action="drop", extra_namespace=extra_namespace
    )
    df = dm.common.as_dataframe()
    assert df["myfunc(x3)"].equals(np.log(df["x3"]))

コード例 #16

0

ファイルを表示

ファイル: test_design_matrices.py プロジェクト: bambinos/formulae

def test_bs_categorical_interaction():
    data = pd.DataFrame(
        {
            "state": np.tile(
                ["lonely", "depressed", "hopeful", "stressed", "positive", "isolated"], 10
            ),
            "time": np.repeat(np.arange(0, 5), 12),
        }
    )

    formula = "0 + bs(time, degree=2, df=3) : state"
    dm = design_matrices(formula, data)
    assert dm.common.terms["bs(time, degree = 2, df = 3):state"].levels == [
        "0, depressed",
        "0, hopeful",
        "0, isolated",
        "0, lonely",
        "0, positive",
        "0, stressed",
        "1, depressed",
        "1, hopeful",
        "1, isolated",
        "1, lonely",
        "1, positive",
        "1, stressed",
        "2, depressed",
        "2, hopeful",
        "2, isolated",
        "2, lonely",
        "2, positive",
        "2, stressed",
    ]

コード例 #17

0

ファイルを表示

ファイル: test_design_matrices.py プロジェクト: bambinos/formulae

def test_common_as_data_frame(data):
    _, common, _ = design_matrices("g ~ x1 + x2", data)
    common_as_dataframe = common.as_dataframe()
    assert common_as_dataframe.columns.tolist() == ["Intercept", "x1", "x2"]
    assert (common_as_dataframe["Intercept"] == 1).all()
    assert (common_as_dataframe["x1"] == data["x1"]).all()
    assert (common_as_dataframe["x2"] == data["x2"]).all()

コード例 #18

0

ファイルを表示

ファイル: test_design_matrices.py プロジェクト: bambinos/formulae

def test_common_intercept_only_model(data):
    dm = design_matrices("y ~ 1", data)
    assert len(dm.common.terms) == 1
    assert dm.common.terms["Intercept"].kind == "intercept"
    assert dm.common.terms["Intercept"].labels == ["Intercept"]
    assert all(dm.common.design_matrix == 1)
    assert dm.group == None

コード例 #19

0

ファイルを表示

ファイル: test_design_matrices.py プロジェクト: zwelitunyiswa/formulae

def test_common_intercept_only_model(data):
    dm = design_matrices("y ~ 1", data)
    assert len(dm.common.terms_info) == 1
    assert dm.common.terms_info["Intercept"]["type"] == "intercept"
    assert dm.common.terms_info["Intercept"]["full_names"] == ["Intercept"]
    assert all(dm.common.design_matrix == 1)
    assert dm.group == None

コード例 #20

0

ファイルを表示

ファイル: test_design_matrices.py プロジェクト: zwelitunyiswa/formulae

def test_group_specific_intercept_only(data):
    dm = design_matrices("y ~ 0 + (1|g)", data)
    assert len(dm.group.terms_info) == 1
    assert dm.group.terms_info["1|g"]["type"] == "intercept"
    assert dm.group.terms_info["1|g"]["groups"] == ["A", "B"]
    assert dm.group.terms_info["1|g"]["full_names"] == ["1|g[A]", "1|g[B]"]
    assert dm.common == None

コード例 #21

0

ファイルを表示

ファイル: test_eval_new_data.py プロジェクト: bambinos/formulae

def test_components_arent_shared():

    """
    Components used in full interaction operator used to be shared between terms, which may save
    space and time, but result in unexpected behavior if components have different encodings
    in the different terms.
    """
    data = pd.DataFrame(
        {
            "y": np.random.normal(size=100),
            "x": np.random.normal(size=100),
            "g": np.random.choice(["A", "B", "C"], size=100),
        }
    )

    common = design_matrices("y ~ 0 + x*g", data).common
    assert id(common.terms["x"].components[0]) != id(common.terms["x:g"].components[0])
    assert id(common.terms["g"].components[0]) != id(common.terms["x:g"].components[1])

    new_data = data = pd.DataFrame(
        {
            "y": np.random.normal(size=100),
            "x": np.random.normal(size=100),
            "g": np.random.choice(["A", "B", "C"], size=100),
        }
    )

    new_common = common.evaluate_new_data(new_data)
    assert new_common.design_matrix.shape[1] == 6

コード例 #22

0

ファイルを表示

ファイル: test_design_matrices.py プロジェクト: zwelitunyiswa/formulae

def test_categoric_encoding_with_numeric_interaction():
    np.random.seed(1234)
    size = 20
    data = pd.DataFrame(
        {
            "y": np.random.uniform(size=size),
            "x1": np.random.uniform(size=size),
            "x2": np.random.uniform(size=size),
            "x3": [1, 2, 3, 4] * 5,
            "f": np.random.choice(["A", "B"], size=size),
            "g": np.random.choice(["A", "B"], size=size),
            "h": np.random.choice(["A", "B"], size=size),
            "j": np.random.choice(["A", "B"], size=size),
        }
    )
    dm = design_matrices("y ~ x1 + x2 + f:g + h:j:x2", data)
    assert list(dm.common.terms_info.keys()) == ["Intercept", "x1", "x2", "g", "f:g", "j", "h:j:x2"]
    assert dm.common.terms_info["g"]["encoding"] == "reduced"
    assert dm.common.terms_info["f:g"]["type"] == "interaction"
    assert dm.common.terms_info["f:g"]["terms"]["f"]["encoding"] == "reduced"
    assert dm.common.terms_info["f:g"]["terms"]["g"]["encoding"] == "full"
    assert dm.common.terms_info["f:g"]["full_names"] == ["f[B]:g[A]", "f[B]:g[B]"]
    assert dm.common.terms_info["j"]["encoding"] == "reduced"
    assert dm.common.terms_info["h:j:x2"]["terms"]["h"]["encoding"] == "reduced"
    assert dm.common.terms_info["h:j:x2"]["terms"]["j"]["encoding"] == "full"
    assert dm.common.terms_info["h:j:x2"]["terms"]["x2"]["type"] == "numeric"

コード例 #23

0

ファイルを表示

ファイル: test_eval_new_data.py プロジェクト: bambinos/formulae

def test_nested_transform(data, data2):
    # Nested transformation still remembers original parameters
    common = design_matrices("I(center(x) ** 2)", data).common

    x = common.evaluate_new_data(data2)["I(center(x) ** 2)"]
    y = (data2["x"] - data["x"].mean()) ** 2

    assert np.allclose(x.flatten(), np.array(y).flatten())

    # A more complicated example involving a stateful transform, with an external function call
    # with a binary operator
    common = design_matrices("scale(np.exp(x) + 1)", data).common

    x = common.evaluate_new_data(data2)["scale(np.exp(x) + 1)"]
    y = (np.exp(data2["x"]) + 1 - np.mean(np.exp(data["x"]) + 1)) / np.std(np.exp(data["x"]) + 1)
    assert np.allclose(x.flatten(), np.array(y).flatten())

コード例 #24

0

ファイルを表示

ファイル: test_eval_new_data.py プロジェクト: bambinos/formulae

def test_model_categoric_group(data, data2):
    dm = design_matrices("y ~ (0 + g1|g2)", data)
    group1 = dm.group
    group2 = group1.evaluate_new_data(data2)

    assert group1.terms["g1|g2"] == group2.terms["g1|g2"]

    arr = np.array(
        [
            [1, 0, 0, 0],
            [0, 0, 0, 1],
            [0, 1, 0, 0],
            [0, 0, 0, 1],
            [0, 1, 0, 0],
            [0, 0, 1, 0],
            [1, 0, 0, 0],
            [0, 0, 1, 0],
            [0, 1, 0, 0],
            [0, 0, 0, 1],
            [0, 1, 0, 0],
            [0, 0, 1, 0],
            [1, 0, 0, 0],
            [0, 0, 0, 1],
            [1, 0, 0, 0],
            [0, 0, 0, 1],
        ]
    )
    assert (group2["g1|g2"] == arr).all()

コード例 #25

0

ファイルを表示

ファイル: test_design_matrices.py プロジェクト: bambinos/formulae

def test_interactions_in_group_specific(pixel):
    # We have group specific terms with the following characteristics
    # 1. expr=categoric, factor=categoric
    # 2. expr=intercept, factor=categoric
    # 3. expr=intercept, factor=interaction between categorics
    # The desing matrices used for the comparison are loaded from text files.
    # The encoding is implicitly checked when comparing names.

    from os.path import dirname, join

    data_dir = join(dirname(__file__), "data/group_specific")
    slope_by_dog_original = np.loadtxt(join(data_dir, "slope_by_dog.txt"))
    intercept_by_side_original = np.loadtxt(join(data_dir, "intercept_by_side.txt"))
    intercept_by_side_dog_original = np.loadtxt(join(data_dir, "intercept_by_side_dog.txt"))
    dog_and_side_by_day_original = np.loadtxt(join(data_dir, "dog_and_side_by_day.txt"))

    dm = design_matrices("pixel ~ day +  (0 + day | Dog) + (1 | Side/Dog)", pixel)
    slope_by_dog = dm.group["day|Dog"]
    intercept_by_side = dm.group["1|Side"]
    intercept_by_side_dog = dm.group["1|Side:Dog"]

    # Assert values in the design matrix
    assert (slope_by_dog == slope_by_dog_original).all()
    assert (intercept_by_side == intercept_by_side_original).all()
    assert (intercept_by_side_dog == intercept_by_side_dog_original).all()

    # Assert labels
    names = [f"day[{d}]|Dog[{g}]" for g in [1, 2, 3] for d in [2, 4, 6]]
    assert dm.group.terms["day|Dog"].labels == names
    names = [f"1|Side[{s}]" for s in ["L", "R"]]
    assert dm.group.terms["1|Side"].labels == names
    names = [f"1|Side[{s}]:Dog[{d}]" for s in ["L", "R"] for d in [1, 2, 3]]
    assert dm.group.terms["1|Side:Dog"].labels == names

    # Another design matrix
    dm = design_matrices("(0 + Dog:Side | day)", pixel)
    dog_and_side_by_day = dm.group["Dog:Side|day"]

    # Assert values in the design matrix
    assert (dog_and_side_by_day == dog_and_side_by_day_original).all()

    # Assert labels
    names = [
        f"Dog[{d}]:Side[{s}]|day[{g}]" for g in [2, 4, 6] for d in [1, 2, 3] for s in ["L", "R"]
    ]
    assert dm.group.terms["Dog:Side|day"].labels == names

コード例 #26

0

ファイルを表示

ファイル: test_design_matrices.py プロジェクト: bambinos/formulae

def test_response_repr_and_str(data):
    response, _, _ = design_matrices("y ~ x1", data)
    text = (
        "ResponseMatrix  \n"
        "  name: y\n"
        "  kind: numeric\n"
        "  shape: (20,)\n\n"
        "To access the actual design matrix do 'np.array(this_obj)'"
    )
    assert str(response) == text
    assert repr(response) == text

    response, _, _ = design_matrices("g ~ x1", data)
    text = (
        "ResponseMatrix  \n"
        "  name: g\n"
        "  kind: categoric\n"
        "  shape: (20, 2)\n"
        "  levels: ['A', 'B']\n\n"
        "To access the actual design matrix do 'np.array(this_obj)'"
    )
    assert str(response) == text
    assert repr(response) == text

    response, _, _ = design_matrices("g ~ x1", data)
    text = (
        "ResponseMatrix  \n"
        "  name: g\n"
        "  kind: categoric\n"
        "  shape: (20, 2)\n"
        "  levels: ['A', 'B']\n\n"
        "To access the actual design matrix do 'np.array(this_obj)'"
    )
    assert str(response) == text
    assert repr(response) == text

    response, _, _ = design_matrices("g[A] ~ x1", data)
    text = (
        "ResponseMatrix  \n"
        "  name: g\n"
        "  kind: categoric\n"
        "  length: shape: (20,)\n\n"
        "To access the actual design matrix do 'np.array(this_obj)'"
    )
    str(response) == text

コード例 #27

0

ファイルを表示

ファイル: test_design_matrices.py プロジェクト: zwelitunyiswa/formulae

def test_common_predictor(data):
    dm = design_matrices("y ~ x1", data)
    assert list(dm.common.terms_info.keys()) == ["Intercept", "x1"]
    assert dm.common.terms_info["x1"]["type"] == "numeric"
    assert dm.common.terms_info["x1"]["full_names"] == ["x1"]

    # uses alphabetic order
    # reference is the first value by default
    # reduced because we included intercept
    dm = design_matrices("y ~ f", data)
    assert list(dm.common.terms_info.keys()) == ["Intercept", "f"]
    assert dm.common.terms_info["f"]["type"] == "categoric"
    assert dm.common.terms_info["f"]["levels"] == sorted(list(data["f"].unique()))
    assert dm.common.terms_info["f"]["reference"] == sorted(list(data["f"].unique()))[0]
    assert dm.common.terms_info["f"]["encoding"] == "reduced"
    assert dm.common.terms_info["f"]["full_names"] == [
        f"f[{l}]" for l in sorted(data["f"].unique())[1:]
    ]

コード例 #28

0

ファイルを表示

ファイル: test_eval_new_data.py プロジェクト: bambinos/formulae

def test_model_numeric_group(data, data2):
    dm = design_matrices("y ~ (x|g1)", data)
    group1 = dm.group
    group2 = group1.evaluate_new_data(data2)

    # These even share their ID..
    # NOTE: Terms are the same, but the design matrices change because they're based on != data
    assert group1.terms["1|g1"] == group2.terms["1|g1"]
    assert group1.terms["x|g1"] == group2.terms["x|g1"]

コード例 #29

0

ファイルを表示

ファイル: test_design_matrices.py プロジェクト: bambinos/formulae

def test_prop_response_fails():
    # x larger than n
    with pytest.raises(ValueError):
        design_matrices("prop(x, n) ~ 1", pd.DataFrame({"x": [2, 3], "n": [1, 2]}))

    # x and/or n not integer
    with pytest.raises(ValueError):
        design_matrices("prop(x, n) ~ 1", pd.DataFrame({"x": [2, 3.3], "n": [4, 4]}))

    with pytest.raises(ValueError):
        design_matrices("prop(x, n) ~ 1", pd.DataFrame({"x": [2, 3], "n": [4.3, 4]}))

    # x not a variable name
    with pytest.raises(ValueError):
        design_matrices("prop(10, n) ~ 1", pd.DataFrame({"x": [2, 3], "n": [1, 2]}))

    # trials must be integer, not float
    with pytest.raises(ValueError):
        design_matrices("prop(x, 3.4) ~ 1", pd.DataFrame({"x": [2, 3], "n": [1, 2]}))

コード例 #30

0

ファイルを表示

ファイル: test_design_matrices.py プロジェクト: bambinos/formulae

def test_C_aliases():
    size = 100
    rng = np.random.default_rng(1234)
    data = pd.DataFrame(
        {
            "x": rng.integers(5, 10, size=size),
            "g": rng.choice(["a", "b", "c"], size=size),
        }
    )

    t1 = design_matrices("S(x)", data).common.terms["S(x)"]
    t2 = design_matrices("C(x, Sum)", data).common.terms["C(x, Sum)"]

    assert np.array_equal(t1.data, t2.data)

    t1 = design_matrices("T(x)", data).common.terms["T(x)"]
    t2 = design_matrices("C(x, Treatment)", data).common.terms["C(x, Treatment)"]

    assert np.array_equal(t1.data, t2.data)