def test_select_one_from_empty_rel_should_return_empty_if_not_keep_missing(): empty_relationship = Relationship(seed=1) selected = empty_relationship.select_one(from_ids=["non_existing"], discard_empty=True) assert selected.shape == (0, 2) assert selected.columns.tolist() == ["from", "to"]
def test_select_one_from_all_ids_should_return_one_line_per_id(): tested = Relationship(seed=1) tested.add_relations(from_ids=["a", "b", "b", "c"], to_ids=["b", "c", "a", "b"]) selected = tested.select_one() assert set(selected["from"].unique()) == {"a", "b", "c"}
def test_select_one_from_empty_rel_should_return_none_if_keep_missing(): empty_relationship = Relationship(seed=1) selected = empty_relationship.select_one(from_ids=["non_existing"], discard_empty=False) assert selected.shape == (1, 2) assert selected.columns.tolist() == ["from", "to"] assert selected.iloc[0]["from"] == "non_existing" assert selected.iloc[0]["to"] is None
def test_select_one_nonexistingids_should_return_empty_if_not_keep_missing(): tested = Relationship(seed=1) tested.add_relations(from_ids=["a", "b", "b", "c"], to_ids=["b", "c", "a", "b"]) result = tested.select_one(["non_existing_id", "neither"], discard_empty=True) assert result.shape[0] == 0 assert result.columns.tolist() == ["from", "to"]
def test_pop_one_relationship_should_remove_element(): # we're removing relations from this one => working on a copy not to # influence other tests oneto1_copy = Relationship(seed=1) oneto1_copy.add_relations(from_ids=["a", "b", "c", "d", "e"], to_ids=["ta", "tb", "tc", "td", "te"]) selected = oneto1_copy.select_one(from_ids=["a", "d"], remove_selected=True) # unique "to" value should have been taken assert selected.sort_values("from")["to"].tolist() == ["ta", "td"] assert selected.columns.tolist() == ["from", "to"] # and removed form the relationship assert set(oneto1_copy.grouped.keys()) == {"b", "c", "e"} # selecting the same again should just return nothing selected = oneto1_copy.select_one(from_ids=["a", "d"], remove_selected=True) assert selected.shape[0] == 0 assert selected.columns.tolist() == ["from", "to"] # and have no impact on the relationship assert set(oneto1_copy.grouped.keys()) == {"b", "c", "e"} # selecting the same again without discarding empty relationship should # now return a size 2 dataframe with Nones selected = oneto1_copy.select_one(from_ids=["a", "d"], remove_selected=True, discard_empty=False) assert selected.shape[0] == 2 assert sorted(selected.columns.tolist()) == ["from", "to"] assert selected["to"].tolist() == [None, None] assert sorted(selected["from"].tolist()) == ["a", "d"]
def test_weighted_relationship_should_take_weights_into_account(): # a,b and c are all connected to x,y and z, but the weight is 0 # everywhere except to y one_to_three_weighted = Relationship(seed=1234) one_to_three_weighted.add_relations(from_ids=["a"] * 3 + ["b"] * 3 + ["c"] * 3, to_ids=["x", "y", "z"] * 3, weights=[0, 1, 0] * 3) selected = one_to_three_weighted.select_one() # => with those weights, only x should should be selected assert selected["to"].tolist() == ["y", "y", "y"] assert sorted(selected["from"].tolist()) == ["a", "b", "c"]
def test_weighted_relationship_should_take_overridden_weights_into_account(): # a,b and c are all connected to x,y and z, but the weight is 0 # everywhere except to y one_to_three_weighted = Relationship(seed=1234) one_to_three_weighted.add_relations(from_ids=["a"] * 3 + ["b"] * 3 + ["c"] * 3, to_ids=["x", "y", "z"] * 3, weights=[0, 1, 0] * 3) # if we override the weight, we can only specify one value per "to" value overridden_to_weights = pd.Series(data=[0, 0, 1], index=["x", "y", "z"]) selected = one_to_three_weighted.select_one( overridden_to_weights=overridden_to_weights) # the initial weights should have been discarded and the one provided as # input should have been joined and used as expected assert selected["to"].tolist() == ["z", "z", "z"] assert sorted(selected["from"].tolist()) == ["a", "b", "c"]
def test_select_one_nonexistingids_should_insert_none_if_keep_missing(): tested = Relationship(seed=1) tested.add_relations(from_ids=["a", "b", "b", "c"], to_ids=["a1", "b1", "b2", "c1"]) result = tested.select_one(["c", "b_non_existing_id", "a", "neither", "a"], discard_empty=False) assert result.shape[0] == 5 assert result.columns.tolist() == ["from", "to"] result_s = result.sort_values("from") assert result_s["from"].tolist() == [ "a", "a", "b_non_existing_id", "c", "neither" ] assert result_s["to"].tolist() == [ "a1", "a1", None, "c1", None, ]
def test_seeded_relationship_should_always_return_same_selection(): from_ids = ["a", "a", "a", "b", "b", "b", "c", "c", "c"] to_ids = ["af1", "af2", "af3", "bf1", "bf2", "bf3", "cf1", "cf2", "cf3"] # two relationship seeded identically tested1 = Relationship(seed=1345) tested2 = Relationship(seed=1345) tested1.add_relations(from_ids=from_ids, to_ids=to_ids) tested2.add_relations(from_ids=from_ids, to_ids=to_ids) assert tested1.select_one(from_ids=["a"]).equals( tested2.select_one(from_ids=["a"])) assert tested1.select_one(from_ids=["b"]).equals( tested2.select_one(from_ids=["b"])) assert tested1.select_one(from_ids=["a", "b", "d"]).equals( tested2.select_one(from_ids=["a", "b", "d"]))
def test_select_one_from_empty_relationship_should_return_void(): tested = Relationship(seed=1) result = tested.select_one(pd.Series([])) assert result.shape[0] == 0 assert result.columns.tolist() == ["from", "to"]