def test_model_categoric_common(data, data2): dm = design_matrices("y ~ g1", data) common1 = dm.common common2 = common1.evaluate_new_data(data2) arr = np.array([0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0]) assert common1.terms["g1"] == common2.terms["g1"] assert np.allclose(common2["g1"].flatten(), arr) dm = design_matrices("y ~ 0 + C(u)", data) common1 = dm.common common2 = common1.evaluate_new_data(data2) arr = np.array( [ [1, 0, 0], [0, 1, 0], [0, 1, 0], [0, 1, 0], [0, 1, 0], [0, 0, 1], [0, 0, 1], [0, 1, 0], [1, 0, 0], [0, 1, 0], [0, 0, 1], [0, 0, 1], [0, 0, 1], [1, 0, 0], [0, 0, 1], [0, 1, 0], ] ) assert common1.terms["C(u)"] == common2.terms["C(u)"] assert (common2["C(u)"] == arr).all()
def test_model_numeric_common(data, data2): dm = design_matrices("y ~ np.exp(x) + z", data) common2 = dm.common.evaluate_new_data(data2) assert np.allclose(np.exp(data2["x"]), common2["np.exp(x)"].flatten()) assert np.allclose(data2["z"], common2["z"].flatten()) dm = design_matrices("y ~ center(x) + scale(z)", data) common1 = dm.common common2 = dm.common.evaluate_new_data(data2) # First, assert stateful transforms remember the original parameter values t1_mean1 = common1.terms["center(x)"].components[0].call.stateful_transform.mean t1_mean2 = common2.terms["center(x)"].components[0].call.stateful_transform.mean assert np.allclose(t1_mean1, 0, atol=1) assert np.allclose(t1_mean1, t1_mean2) t2_mean1 = common1.terms["scale(z)"].components[0].call.stateful_transform.mean t2_mean2 = common2.terms["scale(z)"].components[0].call.stateful_transform.mean t2_std1 = common1.terms["scale(z)"].components[0].call.stateful_transform.std t2_std2 = common2.terms["scale(z)"].components[0].call.stateful_transform.std assert np.allclose(t2_mean1, 0, atol=1) assert np.allclose(t2_std1, 1, atol=1) assert np.allclose(t2_mean1, t2_mean2) assert np.allclose(t2_std1, t2_std2) # Second, assert variables have been transformed using original parameter values assert np.allclose(common2["center(x)"].flatten(), data2["x"] - t1_mean1) assert np.allclose(common2["scale(z)"].flatten(), (data2["z"] - t2_mean1) / t2_std1)
def test_external_transforms(data): dm = design_matrices("y ~ np.exp(x1)", data) assert np.allclose(dm.common["np.exp(x1)"][:, 0], np.exp(data["x1"])) def add_ten(x): return x + 10 dm = design_matrices("y ~ add_ten(x1)", data) assert np.allclose(dm.common["add_ten(x1)"][:, 0], data["x1"] + 10)
def test_interactions(data): # These two models are the same dm = design_matrices("y ~ f * g", data) dm2 = design_matrices("y ~ f + g + f:g", data) assert compare_dicts(dm2.common.terms_info, dm.common.terms_info) # When no intercept too dm = design_matrices("y ~ 0 + f * g", data) dm2 = design_matrices("y ~ 0 + f + g + f:g", data) assert compare_dicts(dm2.common.terms_info, dm.common.terms_info) # Mix of numeric/categoric # "g" in "g" -> reduced # "g" in "x1:g" -> reduced because x1 is present in formula dm = design_matrices("y ~ x1 + g + x1:g", data) assert list(dm.common.terms_info.keys()) == ["Intercept", "x1", "g", "x1:g"] assert dm.common.terms_info["g"]["type"] == "categoric" assert dm.common.terms_info["g"]["encoding"] == "reduced" assert dm.common.terms_info["x1:g"]["terms"]["g"]["encoding"] == "reduced" # "g" in "g" -> reduced # "g" in "x1:g" -> full because x1 is not present in formula dm = design_matrices("y ~ g + x1:g", data) assert list(dm.common.terms_info.keys()) == ["Intercept", "g", "x1:g"] assert dm.common.terms_info["g"]["type"] == "categoric" assert dm.common.terms_info["g"]["encoding"] == "reduced" assert dm.common.terms_info["x1:g"]["terms"]["g"]["encoding"] == "full" # "g" in "x1:x2:g" is full, because x1:x2 is a new group and we don't have x1:x2 in the model dm = design_matrices("y ~ x1 + g + x1:g + x1:x2:g", data) assert list(dm.common.terms_info.keys()) == ["Intercept", "x1", "g", "x1:g", "x1:x2:g"] assert dm.common.terms_info["g"]["type"] == "categoric" assert dm.common.terms_info["g"]["encoding"] == "reduced" assert dm.common.terms_info["x1:g"]["terms"]["g"]["encoding"] == "reduced" assert dm.common.terms_info["x1:x2:g"]["terms"]["g"]["encoding"] == "full" # "g" in "x1:x2:g" is reduced, because x1:x2 is a new group and we have x1:x2 in the model dm = design_matrices("y ~ x1 + g + x1:x2 + x1:g + x1:x2:g", data) assert list(dm.common.terms_info.keys()) == ["Intercept", "x1", "g", "x1:x2", "x1:g", "x1:x2:g"] assert dm.common.terms_info["g"]["type"] == "categoric" assert dm.common.terms_info["g"]["encoding"] == "reduced" assert dm.common.terms_info["x1:g"]["terms"]["g"]["encoding"] == "reduced" assert dm.common.terms_info["x1:x2:g"]["terms"]["g"]["encoding"] == "reduced" # And now, since we don't have intercept, x1 and x1:x2 all "g" are full dm = design_matrices("y ~ 0 + g + x1:g + x1:x2:g", data) assert list(dm.common.terms_info.keys()) == ["g", "x1:g", "x1:x2:g"] assert dm.common.terms_info["g"]["type"] == "categoric" assert dm.common.terms_info["g"]["encoding"] == "full" assert dm.common.terms_info["x1:g"]["terms"]["g"]["encoding"] == "full" assert dm.common.terms_info["x1:x2:g"]["terms"]["g"]["encoding"] == "full" # Two numerics dm = design_matrices("y ~ x1:x2", data) assert "x1:x2" in dm.common.terms_info.keys() assert np.allclose(dm.common["x1:x2"][:, 0], data["x1"] * data["x2"])
def test_interactions(data): # These two models are the same dm = design_matrices("y ~ f * g", data) dm2 = design_matrices("y ~ f + g + f:g", data) assert dm2.common.terms == dm.common.terms # When no intercept too dm = design_matrices("y ~ 0 + f * g", data) dm2 = design_matrices("y ~ 0 + f + g + f:g", data) assert dm2.common.terms == dm.common.terms # Mix of numeric/categoric # "g" in "g" -> does not span intercept # "g" in "x1:g" -> does not span intercept because x1 is present in formula dm = design_matrices("y ~ x1 + g + x1:g", data) assert list(dm.common.terms) == ["Intercept", "x1", "g", "x1:g"] assert dm.common.terms["g"].kind == "categoric" assert dm.common.terms["g"].spans_intercept is False assert dm.common.terms["x1:g"].components[1].spans_intercept is False # "g" in "g" -> reduced # "g" in "x1:g" -> full because x1 is not present in formula dm = design_matrices("y ~ g + x1:g", data) assert list(dm.common.terms) == ["Intercept", "g", "x1:g"] assert dm.common.terms["g"].kind == "categoric" assert dm.common.terms["g"].spans_intercept is False assert dm.common.terms["x1:g"].components[1].spans_intercept is True # "g" in "x1:x2:g" is full, because x1:x2 is a new group and we don't have x1:x2 in the model dm = design_matrices("y ~ x1 + g + x1:g + x1:x2:g", data) assert list(dm.common.terms) == ["Intercept", "x1", "g", "x1:g", "x1:x2:g"] assert dm.common.terms["g"].kind == "categoric" assert dm.common.terms["g"].spans_intercept is False assert dm.common.terms["x1:g"].components[1].spans_intercept is False assert dm.common.terms["x1:x2:g"].components[2].spans_intercept is True # "g" in "x1:x2:g" is reduced, because x1:x2 is a new group and we have x1:x2 in the model dm = design_matrices("y ~ x1 + g + x1:x2 + x1:g + x1:x2:g", data) assert list(dm.common.terms) == ["Intercept", "x1", "g", "x1:x2", "x1:g", "x1:x2:g"] assert dm.common.terms["g"].kind == "categoric" assert dm.common.terms["g"].spans_intercept is False assert dm.common.terms["x1:g"].components[1].spans_intercept is False assert dm.common.terms["x1:x2:g"].components[2].spans_intercept is False # And now, since we don't have intercept, x1 and x1:x2 all "g" are full dm = design_matrices("y ~ 0 + g + x1:g + x1:x2:g", data) assert list(dm.common.terms) == ["g", "x1:g", "x1:x2:g"] assert dm.common.terms["g"].kind == "categoric" assert dm.common.terms["g"].spans_intercept is True assert dm.common.terms["x1:g"].components[1].spans_intercept is True assert dm.common.terms["x1:x2:g"].components[2].spans_intercept is True # Two numerics dm = design_matrices("y ~ x1:x2", data) assert "x1:x2" in dm.common.terms assert np.allclose(dm.common["x1:x2"][:, 0], data["x1"] * data["x2"])
def test_predict_prop(beetle): # If trials is a variable, new dataset must have that variable dm = design_matrices("prop(y, n) ~ x", beetle) result = dm.response.evaluate_new_data(pd.DataFrame({"n": [10, 10, 30, 30]})) assert (result == np.array([10, 10, 30, 30])).all() # If trials is a constant value, return that same value dm = design_matrices("prop(y, 70) ~ x", beetle) result = dm.response.evaluate_new_data(pd.DataFrame({"n": [10, 10, 30, 30]})) assert (result == np.array([70, 70, 70, 70])).all()
def test_predict_offset(beetle): # If offset is a variable, new dataset must have that variable dm = design_matrices("y ~ x + offset(x)", beetle) result = dm.common.evaluate_new_data(pd.DataFrame({"x": [1, 2, 3]}))["offset(x)"] assert (result == np.array([1, 2, 3])[:, np.newaxis]).all() # If offset is a constant value, return that same value dm = design_matrices("y ~ x + offset(10)", beetle) result = dm.common.evaluate_new_data(pd.DataFrame({"x": [1, 2, 3]}))["offset(10)"] assert (result == np.array([10, 10, 10])[:, np.newaxis]).all()
def test_categoric_group_specific(): data = pd.DataFrame( { "BP": np.random.normal(size=30), "BMI": np.random.normal(size=30), "age_grp": np.random.choice([0, 1, 2], size=30), } ) dm = design_matrices("BP ~ 0 + (C(age_grp)|BMI)", data) list(dm.group.terms.keys()) == ["1|BMI", "C(age_grp)[1]|BMI", "C(age_grp)[2]|BMI"] dm = design_matrices("BP ~ 0 + (0 + C(age_grp)|BMI)", data) list(dm.group.terms) == ["C(age_grp)[0]|BMI", "C(age_grp)[1]|BMI", "C(age_grp)[2]|BMI"]
def test_categoric_responses(): data = pd.DataFrame( { "y1": np.random.choice(["A", "B", "C"], size=30), "y2": np.random.choice(["A", "B"], size=30), "y3": np.random.choice(["Hi there", "Bye bye", "What??"], size=30), "x": np.random.normal(size=30), } ) # Multi-level response. Response is a design matrix of dummies that span the intercept. response = design_matrices("y1 ~ x", data).response assert list(np.unique(response.design_matrix)) == [0, 1] assert response.levels == ["A", "B", "C"] # Multi-level response, explicitly converted to binary response = design_matrices("y1['A'] ~ x", data).response assert list(np.unique(response.design_matrix)) == [0, 1] assert response.levels is None # Response has two levels but it is not flagged as binary because it was not converted to that # XTODO: Revisit if this logic is fine response = design_matrices("y2 ~ x", data).response assert list(np.unique(response.design_matrix)) == [0, 1] assert response.levels == ["A", "B"] # Binary response with explicit level response = design_matrices("y2['B'] ~ x", data).response assert list(np.unique(response.design_matrix)) == [0, 1] assert response.levels is None # Binary response with explicit level passed as identifier response = design_matrices("y2[B] ~ x", data).response assert list(np.unique(response.design_matrix)) == [0, 1] assert response.levels is None # Binary response with explicit level with spaces response = design_matrices("y3['Bye bye'] ~ x", data).response assert list(np.unique(response.design_matrix)) == [0, 1] assert response.levels is None # Users trying to use nested brackets (WHY?) with pytest.raises(ParseError, match=re.escape("Are you using nested brackets? Why?")): design_matrices("y3[A[B]] ~ x", data) # Users try to pass a number to use a number with pytest.raises( ParseError, match=re.escape("Subset notation only allows a string or an identifer") ): design_matrices("y3[1] ~ x", data)
def test_common_predictor(data): dm = design_matrices("y ~ x1", data) assert list(dm.common.terms) == ["Intercept", "x1"] assert dm.common.terms["x1"].kind == "numeric" assert dm.common.terms["x1"].labels == ["x1"] assert dm.common.terms["x1"].levels is None # 'f' does not span intercept because the intercept is already icluded dm = design_matrices("y ~ f", data) assert list(dm.common.terms) == ["Intercept", "f"] assert dm.common.terms["f"].kind == "categoric" assert dm.common.terms["f"].labels == [f"f[{l}]" for l in sorted(data["f"].unique())[1:]] assert dm.common.terms["f"].levels == sorted(list(data["f"].unique()))[1:] assert dm.common.terms["f"].spans_intercept == False
def test_design_matrices_categoric_call(data): def f(x): return pd.Categorical(x) def f_ordered(x): return pd.Categorical(x, categories=np.unique(x), ordered=True) # does not span intercept design_matrices("y ~ f(x3) + f_ordered(g)", data) # spans intercept dm = design_matrices("y ~ 0 + f(g)", data) # Evaluate new data dm.common.evaluate_new_data(data)
def test_categorical_ordered_series(): # Test it works data = pd.DataFrame({"x": list("abcd") * 10}) data["x"] = pd.Categorical(data["x"], list("bcda"), ordered=True) design_matrices("S(x)", data) # Test it works and it respects original order levels = design_matrices("x", data).common.terms["x"].levels assert levels == list("cda") levels = design_matrices("T(x)", data).common.terms["T(x)"].levels assert levels == list("cda") levels = design_matrices("S(x)", data).common.terms["S(x)"].levels assert levels == list("bcd")
def test_attempt_to_evaluate_non_proportion_response(data): response, _, _ = design_matrices("y ~ x1", data) with pytest.raises( ValueError, match="Can't evaluate response term with kind different to 'proportion'" ): response.evaluate_new_data(data)
def test_group_specific_intercept_only(data): dm = design_matrices("y ~ 0 + (1|g)", data) assert len(dm.group.terms) == 1 assert dm.group.terms["1|g"].kind == "intercept" assert dm.group.terms["1|g"].groups == ["A", "B"] assert dm.group.terms["1|g"].labels == ["1|g[A]", "1|g[B]"] assert dm.common == None
def test_extra_namespace(data): extra_namespace = {"myfunc": np.log} dm = design_matrices( "y ~ myfunc(x3) + x3", data, na_action="drop", extra_namespace=extra_namespace ) df = dm.common.as_dataframe() assert df["myfunc(x3)"].equals(np.log(df["x3"]))
def test_bs_categorical_interaction(): data = pd.DataFrame( { "state": np.tile( ["lonely", "depressed", "hopeful", "stressed", "positive", "isolated"], 10 ), "time": np.repeat(np.arange(0, 5), 12), } ) formula = "0 + bs(time, degree=2, df=3) : state" dm = design_matrices(formula, data) assert dm.common.terms["bs(time, degree = 2, df = 3):state"].levels == [ "0, depressed", "0, hopeful", "0, isolated", "0, lonely", "0, positive", "0, stressed", "1, depressed", "1, hopeful", "1, isolated", "1, lonely", "1, positive", "1, stressed", "2, depressed", "2, hopeful", "2, isolated", "2, lonely", "2, positive", "2, stressed", ]
def test_common_as_data_frame(data): _, common, _ = design_matrices("g ~ x1 + x2", data) common_as_dataframe = common.as_dataframe() assert common_as_dataframe.columns.tolist() == ["Intercept", "x1", "x2"] assert (common_as_dataframe["Intercept"] == 1).all() assert (common_as_dataframe["x1"] == data["x1"]).all() assert (common_as_dataframe["x2"] == data["x2"]).all()
def test_common_intercept_only_model(data): dm = design_matrices("y ~ 1", data) assert len(dm.common.terms) == 1 assert dm.common.terms["Intercept"].kind == "intercept" assert dm.common.terms["Intercept"].labels == ["Intercept"] assert all(dm.common.design_matrix == 1) assert dm.group == None
def test_common_intercept_only_model(data): dm = design_matrices("y ~ 1", data) assert len(dm.common.terms_info) == 1 assert dm.common.terms_info["Intercept"]["type"] == "intercept" assert dm.common.terms_info["Intercept"]["full_names"] == ["Intercept"] assert all(dm.common.design_matrix == 1) assert dm.group == None
def test_group_specific_intercept_only(data): dm = design_matrices("y ~ 0 + (1|g)", data) assert len(dm.group.terms_info) == 1 assert dm.group.terms_info["1|g"]["type"] == "intercept" assert dm.group.terms_info["1|g"]["groups"] == ["A", "B"] assert dm.group.terms_info["1|g"]["full_names"] == ["1|g[A]", "1|g[B]"] assert dm.common == None
def test_components_arent_shared(): """ Components used in full interaction operator used to be shared between terms, which may save space and time, but result in unexpected behavior if components have different encodings in the different terms. """ data = pd.DataFrame( { "y": np.random.normal(size=100), "x": np.random.normal(size=100), "g": np.random.choice(["A", "B", "C"], size=100), } ) common = design_matrices("y ~ 0 + x*g", data).common assert id(common.terms["x"].components[0]) != id(common.terms["x:g"].components[0]) assert id(common.terms["g"].components[0]) != id(common.terms["x:g"].components[1]) new_data = data = pd.DataFrame( { "y": np.random.normal(size=100), "x": np.random.normal(size=100), "g": np.random.choice(["A", "B", "C"], size=100), } ) new_common = common.evaluate_new_data(new_data) assert new_common.design_matrix.shape[1] == 6
def test_categoric_encoding_with_numeric_interaction(): np.random.seed(1234) size = 20 data = pd.DataFrame( { "y": np.random.uniform(size=size), "x1": np.random.uniform(size=size), "x2": np.random.uniform(size=size), "x3": [1, 2, 3, 4] * 5, "f": np.random.choice(["A", "B"], size=size), "g": np.random.choice(["A", "B"], size=size), "h": np.random.choice(["A", "B"], size=size), "j": np.random.choice(["A", "B"], size=size), } ) dm = design_matrices("y ~ x1 + x2 + f:g + h:j:x2", data) assert list(dm.common.terms_info.keys()) == ["Intercept", "x1", "x2", "g", "f:g", "j", "h:j:x2"] assert dm.common.terms_info["g"]["encoding"] == "reduced" assert dm.common.terms_info["f:g"]["type"] == "interaction" assert dm.common.terms_info["f:g"]["terms"]["f"]["encoding"] == "reduced" assert dm.common.terms_info["f:g"]["terms"]["g"]["encoding"] == "full" assert dm.common.terms_info["f:g"]["full_names"] == ["f[B]:g[A]", "f[B]:g[B]"] assert dm.common.terms_info["j"]["encoding"] == "reduced" assert dm.common.terms_info["h:j:x2"]["terms"]["h"]["encoding"] == "reduced" assert dm.common.terms_info["h:j:x2"]["terms"]["j"]["encoding"] == "full" assert dm.common.terms_info["h:j:x2"]["terms"]["x2"]["type"] == "numeric"
def test_nested_transform(data, data2): # Nested transformation still remembers original parameters common = design_matrices("I(center(x) ** 2)", data).common x = common.evaluate_new_data(data2)["I(center(x) ** 2)"] y = (data2["x"] - data["x"].mean()) ** 2 assert np.allclose(x.flatten(), np.array(y).flatten()) # A more complicated example involving a stateful transform, with an external function call # with a binary operator common = design_matrices("scale(np.exp(x) + 1)", data).common x = common.evaluate_new_data(data2)["scale(np.exp(x) + 1)"] y = (np.exp(data2["x"]) + 1 - np.mean(np.exp(data["x"]) + 1)) / np.std(np.exp(data["x"]) + 1) assert np.allclose(x.flatten(), np.array(y).flatten())
def test_model_categoric_group(data, data2): dm = design_matrices("y ~ (0 + g1|g2)", data) group1 = dm.group group2 = group1.evaluate_new_data(data2) assert group1.terms["g1|g2"] == group2.terms["g1|g2"] arr = np.array( [ [1, 0, 0, 0], [0, 0, 0, 1], [0, 1, 0, 0], [0, 0, 0, 1], [0, 1, 0, 0], [0, 0, 1, 0], [1, 0, 0, 0], [0, 0, 1, 0], [0, 1, 0, 0], [0, 0, 0, 1], [0, 1, 0, 0], [0, 0, 1, 0], [1, 0, 0, 0], [0, 0, 0, 1], [1, 0, 0, 0], [0, 0, 0, 1], ] ) assert (group2["g1|g2"] == arr).all()
def test_interactions_in_group_specific(pixel): # We have group specific terms with the following characteristics # 1. expr=categoric, factor=categoric # 2. expr=intercept, factor=categoric # 3. expr=intercept, factor=interaction between categorics # The desing matrices used for the comparison are loaded from text files. # The encoding is implicitly checked when comparing names. from os.path import dirname, join data_dir = join(dirname(__file__), "data/group_specific") slope_by_dog_original = np.loadtxt(join(data_dir, "slope_by_dog.txt")) intercept_by_side_original = np.loadtxt(join(data_dir, "intercept_by_side.txt")) intercept_by_side_dog_original = np.loadtxt(join(data_dir, "intercept_by_side_dog.txt")) dog_and_side_by_day_original = np.loadtxt(join(data_dir, "dog_and_side_by_day.txt")) dm = design_matrices("pixel ~ day + (0 + day | Dog) + (1 | Side/Dog)", pixel) slope_by_dog = dm.group["day|Dog"] intercept_by_side = dm.group["1|Side"] intercept_by_side_dog = dm.group["1|Side:Dog"] # Assert values in the design matrix assert (slope_by_dog == slope_by_dog_original).all() assert (intercept_by_side == intercept_by_side_original).all() assert (intercept_by_side_dog == intercept_by_side_dog_original).all() # Assert labels names = [f"day[{d}]|Dog[{g}]" for g in [1, 2, 3] for d in [2, 4, 6]] assert dm.group.terms["day|Dog"].labels == names names = [f"1|Side[{s}]" for s in ["L", "R"]] assert dm.group.terms["1|Side"].labels == names names = [f"1|Side[{s}]:Dog[{d}]" for s in ["L", "R"] for d in [1, 2, 3]] assert dm.group.terms["1|Side:Dog"].labels == names # Another design matrix dm = design_matrices("(0 + Dog:Side | day)", pixel) dog_and_side_by_day = dm.group["Dog:Side|day"] # Assert values in the design matrix assert (dog_and_side_by_day == dog_and_side_by_day_original).all() # Assert labels names = [ f"Dog[{d}]:Side[{s}]|day[{g}]" for g in [2, 4, 6] for d in [1, 2, 3] for s in ["L", "R"] ] assert dm.group.terms["Dog:Side|day"].labels == names
def test_response_repr_and_str(data): response, _, _ = design_matrices("y ~ x1", data) text = ( "ResponseMatrix \n" " name: y\n" " kind: numeric\n" " shape: (20,)\n\n" "To access the actual design matrix do 'np.array(this_obj)'" ) assert str(response) == text assert repr(response) == text response, _, _ = design_matrices("g ~ x1", data) text = ( "ResponseMatrix \n" " name: g\n" " kind: categoric\n" " shape: (20, 2)\n" " levels: ['A', 'B']\n\n" "To access the actual design matrix do 'np.array(this_obj)'" ) assert str(response) == text assert repr(response) == text response, _, _ = design_matrices("g ~ x1", data) text = ( "ResponseMatrix \n" " name: g\n" " kind: categoric\n" " shape: (20, 2)\n" " levels: ['A', 'B']\n\n" "To access the actual design matrix do 'np.array(this_obj)'" ) assert str(response) == text assert repr(response) == text response, _, _ = design_matrices("g[A] ~ x1", data) text = ( "ResponseMatrix \n" " name: g\n" " kind: categoric\n" " length: shape: (20,)\n\n" "To access the actual design matrix do 'np.array(this_obj)'" ) str(response) == text
def test_common_predictor(data): dm = design_matrices("y ~ x1", data) assert list(dm.common.terms_info.keys()) == ["Intercept", "x1"] assert dm.common.terms_info["x1"]["type"] == "numeric" assert dm.common.terms_info["x1"]["full_names"] == ["x1"] # uses alphabetic order # reference is the first value by default # reduced because we included intercept dm = design_matrices("y ~ f", data) assert list(dm.common.terms_info.keys()) == ["Intercept", "f"] assert dm.common.terms_info["f"]["type"] == "categoric" assert dm.common.terms_info["f"]["levels"] == sorted(list(data["f"].unique())) assert dm.common.terms_info["f"]["reference"] == sorted(list(data["f"].unique()))[0] assert dm.common.terms_info["f"]["encoding"] == "reduced" assert dm.common.terms_info["f"]["full_names"] == [ f"f[{l}]" for l in sorted(data["f"].unique())[1:] ]
def test_model_numeric_group(data, data2): dm = design_matrices("y ~ (x|g1)", data) group1 = dm.group group2 = group1.evaluate_new_data(data2) # These even share their ID.. # NOTE: Terms are the same, but the design matrices change because they're based on != data assert group1.terms["1|g1"] == group2.terms["1|g1"] assert group1.terms["x|g1"] == group2.terms["x|g1"]
def test_prop_response_fails(): # x larger than n with pytest.raises(ValueError): design_matrices("prop(x, n) ~ 1", pd.DataFrame({"x": [2, 3], "n": [1, 2]})) # x and/or n not integer with pytest.raises(ValueError): design_matrices("prop(x, n) ~ 1", pd.DataFrame({"x": [2, 3.3], "n": [4, 4]})) with pytest.raises(ValueError): design_matrices("prop(x, n) ~ 1", pd.DataFrame({"x": [2, 3], "n": [4.3, 4]})) # x not a variable name with pytest.raises(ValueError): design_matrices("prop(10, n) ~ 1", pd.DataFrame({"x": [2, 3], "n": [1, 2]})) # trials must be integer, not float with pytest.raises(ValueError): design_matrices("prop(x, 3.4) ~ 1", pd.DataFrame({"x": [2, 3], "n": [1, 2]}))
def test_C_aliases(): size = 100 rng = np.random.default_rng(1234) data = pd.DataFrame( { "x": rng.integers(5, 10, size=size), "g": rng.choice(["a", "b", "c"], size=size), } ) t1 = design_matrices("S(x)", data).common.terms["S(x)"] t2 = design_matrices("C(x, Sum)", data).common.terms["C(x, Sum)"] assert np.array_equal(t1.data, t2.data) t1 = design_matrices("T(x)", data).common.terms["T(x)"] t2 = design_matrices("C(x, Treatment)", data).common.terms["C(x, Treatment)"] assert np.array_equal(t1.data, t2.data)