Beispiel #1
0
def test_select_one_from_empty_rel_should_return_empty_if_not_keep_missing():
    empty_relationship = Relationship(seed=1)

    selected = empty_relationship.select_one(from_ids=["non_existing"],
                                             discard_empty=True)
    assert selected.shape == (0, 2)
    assert selected.columns.tolist() == ["from", "to"]
Beispiel #2
0
def test_select_one_from_all_ids_should_return_one_line_per_id():
    tested = Relationship(seed=1)
    tested.add_relations(from_ids=["a", "b", "b", "c"],
                         to_ids=["b", "c", "a", "b"])

    selected = tested.select_one()

    assert set(selected["from"].unique()) == {"a", "b", "c"}
Beispiel #3
0
def test_select_all_function_from_empty_relationship_should_return_empty():
    empty_relationship = Relationship(seed=1)

    selected = empty_relationship.select_all_horizontal(
        from_ids=["non_existing"])

    assert selected.shape == (0, 2)
    assert selected.columns.tolist() == ["from", "to"]
Beispiel #4
0
def test_select_one_from_empty_rel_should_return_none_if_keep_missing():
    empty_relationship = Relationship(seed=1)

    selected = empty_relationship.select_one(from_ids=["non_existing"],
                                             discard_empty=False)
    assert selected.shape == (1, 2)
    assert selected.columns.tolist() == ["from", "to"]
    assert selected.iloc[0]["from"] == "non_existing"
    assert selected.iloc[0]["to"] is None
Beispiel #5
0
def test_select_one_nonexistingids_should_return_empty_if_not_keep_missing():
    tested = Relationship(seed=1)
    tested.add_relations(from_ids=["a", "b", "b", "c"],
                         to_ids=["b", "c", "a", "b"])

    result = tested.select_one(["non_existing_id", "neither"],
                               discard_empty=True)

    assert result.shape[0] == 0
    assert result.columns.tolist() == ["from", "to"]
Beispiel #6
0
def test_weighted_relationship_should_take_weights_into_account():

    # a,b and c are all connected to x,y and z, but the weight is 0
    # everywhere except to y
    one_to_three_weighted = Relationship(seed=1234)
    one_to_three_weighted.add_relations(from_ids=["a"] * 3 + ["b"] * 3 +
                                        ["c"] * 3,
                                        to_ids=["x", "y", "z"] * 3,
                                        weights=[0, 1, 0] * 3)

    selected = one_to_three_weighted.select_one()

    # => with those weights, only x should should be selected
    assert selected["to"].tolist() == ["y", "y", "y"]
    assert sorted(selected["from"].tolist()) == ["a", "b", "c"]
Beispiel #7
0
def test_weighted_relationship_should_take_overridden_weights_into_account():

    # a,b and c are all connected to x,y and z, but the weight is 0
    # everywhere except to y
    one_to_three_weighted = Relationship(seed=1234)
    one_to_three_weighted.add_relations(from_ids=["a"] * 3 + ["b"] * 3 +
                                        ["c"] * 3,
                                        to_ids=["x", "y", "z"] * 3,
                                        weights=[0, 1, 0] * 3)

    # if we override the weight, we can only specify one value per "to" value
    overridden_to_weights = pd.Series(data=[0, 0, 1], index=["x", "y", "z"])
    selected = one_to_three_weighted.select_one(
        overridden_to_weights=overridden_to_weights)

    # the initial weights should have been discarded and the one provided as
    # input should have been joined and used as expected
    assert selected["to"].tolist() == ["z", "z", "z"]
    assert sorted(selected["from"].tolist()) == ["a", "b", "c"]
Beispiel #8
0
def test_add_grouped():
    story_data = pd.DataFrame({
        "boxes": ["b1", "b2"],
        "fruits": [["f11", "f12", "f13", "f14"], ["f21", "f22", "f23", "f24"]],
    })

    rel = Relationship(seed=1)
    ag = rel.ops.add_grouped(from_field="boxes", grouped_items_field="fruits")

    ag(story_data)

    # we should have 4 relationships from b1 and from b2
    assert rel.get_relations(from_ids=["b1"])["from"].tolist() == [
        "b1", "b1", "b1", "b1"
    ]

    assert rel.get_relations(from_ids=["b2"])["from"].tolist() == [
        "b2", "b2", "b2", "b2"
    ]

    # pointing to each of the values above
    assert rel.get_relations(from_ids=["b1"])["to"].tolist() == [
        "f11", "f12", "f13", "f14"
    ]
    assert rel.get_relations(from_ids=["b2"])["to"].tolist() == [
        "f21", "f22", "f23", "f24"
    ]
    def create_relationship(self, name, seed=None):
        """
        creates an empty relationship from the members of this population
        """

        if name is self.relationships:
            raise ValueError("cannot create a second relationship with "
                             "existing name {}".format(name))

        self.relationships[name] = Relationship(
            seed=seed if seed else next(self.circus.seeder))

        return self.relationships[name]
Beispiel #10
0
def test_select_one_nonexistingids_should_insert_none_if_keep_missing():
    tested = Relationship(seed=1)
    tested.add_relations(from_ids=["a", "b", "b", "c"],
                         to_ids=["a1", "b1", "b2", "c1"])

    result = tested.select_one(["c", "b_non_existing_id", "a", "neither", "a"],
                               discard_empty=False)

    assert result.shape[0] == 5
    assert result.columns.tolist() == ["from", "to"]

    result_s = result.sort_values("from")

    assert result_s["from"].tolist() == [
        "a", "a", "b_non_existing_id", "c", "neither"
    ]

    assert result_s["to"].tolist() == [
        "a1",
        "a1",
        None,
        "c1",
        None,
    ]
Beispiel #11
0
def test_select_many_with_drop_should_remove_elements():

    story_data_index = build_ids(5, prefix="cl_", max_length=1)

    # makes a copy since we're going to drop some elements
    four_to_plenty_copy = Relationship(seed=1)
    for i in range(100):
        four_to_plenty_copy.add_relations(
            from_ids=["a", "b", "c", "d"],
            to_ids=["a_%d" % i, "b_%d" % i,
                    "c_%d" % i, "d_%d" % i])

    selection = four_to_plenty.select_many(from_ids=pd.Series(
        ["a", "b", "c", "b", "a"], index=story_data_index),
                                           named_as="selected_sets",
                                           quantities=[4, 5, 6, 7, 8],
                                           remove_selected=True,
                                           discard_empty=False)

    # makes sure all selected values have been removed
    for from_id in selection.index:
        for to_id in selection.ix[from_id]["selected_sets"].tolist():
            rels = four_to_plenty_copy.get_relations(from_ids=[from_id])
            assert to_id not in rels["to"]
Beispiel #12
0
def test_io_round_trip():

    with path.tempdir() as p:
        full_path = os.path.join(p, "relationship.csv")
        four_to_plenty.save_to(full_path)

        retrieved = Relationship.load_from(full_path)

        assert four_to_plenty.seed == retrieved.seed
        assert four_to_plenty.unique_tos() == retrieved.unique_tos()
        assert four_to_plenty.grouped.keys() == retrieved.grouped.keys()

        expected_relations = four_to_plenty.get_relations().sort_values(
            ["from", "to"]).reset_index()
        actual_relations = retrieved.get_relations().sort_values(
            ["from", "to"]).reset_index()

        assert expected_relations["from"].equals(actual_relations["from"])
        assert expected_relations["to"].equals(actual_relations["to"])
        assert expected_relations["weight"].equals(actual_relations["weight"])
    def load_from(folder, circus):
        """
        Reads all persistent data of this population and loads it

        :param folder: folder containing all CSV files of this population
        :param circus: parent circus containing this population
        :return:
        """

        ids_path = os.path.join(folder, "ids.csv")
        ids = pd.read_csv(ids_path, index_col=0, names=[]).index

        attribute_dir = os.path.join(folder, "attributes")
        if os.path.exists(attribute_dir):
            attributes = {
                filename[:-4]:
                Attribute.load_from(os.path.join(attribute_dir, filename))
                for filename in os.listdir(attribute_dir)
            }
        else:
            attributes = {}

        relationships_dir = os.path.join(folder, "relationships")
        if os.path.exists(relationships_dir):
            relationships = {
                filename[:-4]: Relationship.load_from(
                    os.path.join(relationships_dir, filename))
                for filename in os.listdir(relationships_dir)
            }
        else:
            relationships = {}

        population = Population(circus=circus, size=0)
        population.attributes = attributes
        population.relationships = relationships
        population.ids = ids
        population.size = len(ids)

        return population
Beispiel #14
0
def test_pop_one_relationship_should_remove_element():
    # we're removing relations from this one => working on a copy not to
    # influence other tests
    oneto1_copy = Relationship(seed=1)
    oneto1_copy.add_relations(from_ids=["a", "b", "c", "d", "e"],
                              to_ids=["ta", "tb", "tc", "td", "te"])

    selected = oneto1_copy.select_one(from_ids=["a", "d"],
                                      remove_selected=True)

    # unique "to" value should have been taken
    assert selected.sort_values("from")["to"].tolist() == ["ta", "td"]
    assert selected.columns.tolist() == ["from", "to"]

    # and removed form the relationship
    assert set(oneto1_copy.grouped.keys()) == {"b", "c", "e"}

    # selecting the same again should just return nothing
    selected = oneto1_copy.select_one(from_ids=["a", "d"],
                                      remove_selected=True)

    assert selected.shape[0] == 0
    assert selected.columns.tolist() == ["from", "to"]

    # and have no impact on the relationship
    assert set(oneto1_copy.grouped.keys()) == {"b", "c", "e"}

    # selecting the same again without discarding empty relationship should
    # now return a size 2 dataframe with Nones
    selected = oneto1_copy.select_one(from_ids=["a", "d"],
                                      remove_selected=True,
                                      discard_empty=False)
    assert selected.shape[0] == 2
    assert sorted(selected.columns.tolist()) == ["from", "to"]
    assert selected["to"].tolist() == [None, None]
    assert sorted(selected["from"].tolist()) == ["a", "d"]
Beispiel #15
0
def test_seeded_relationship_should_always_return_same_selection():

    from_ids = ["a", "a", "a", "b", "b", "b", "c", "c", "c"]
    to_ids = ["af1", "af2", "af3", "bf1", "bf2", "bf3", "cf1", "cf2", "cf3"]

    # two relationship seeded identically
    tested1 = Relationship(seed=1345)
    tested2 = Relationship(seed=1345)

    tested1.add_relations(from_ids=from_ids, to_ids=to_ids)
    tested2.add_relations(from_ids=from_ids, to_ids=to_ids)

    assert tested1.select_one(from_ids=["a"]).equals(
        tested2.select_one(from_ids=["a"]))

    assert tested1.select_one(from_ids=["b"]).equals(
        tested2.select_one(from_ids=["b"]))

    assert tested1.select_one(from_ids=["a", "b", "d"]).equals(
        tested2.select_one(from_ids=["a", "b", "d"]))
Beispiel #16
0
import path
import pandas as pd
import logging
import os
import numpy as np
import functools

from trumania.core.util_functions import setup_logging
from trumania.core.util_functions import build_ids
from trumania.core.relationship import Relationship

setup_logging()

oneto1 = Relationship(seed=1)
oneto1.add_relations(from_ids=pd.Series(["a", "b", "c", "d", "e"]),
                     to_ids=pd.Series(["ta", "tb", "tc", "td", "te"]))

four_to_one = Relationship(seed=1)
four_to_one.add_relations(from_ids=pd.Series(["a", "b", "c", "d"]),
                          to_ids=pd.Series(["z", "z", "z", "z"]))

four_to_two = Relationship(seed=1)
four_to_two.add_relations(from_ids=pd.Series(["a", "b", "c", "d"]),
                          to_ids=pd.Series(["y", "y", "y", "y"]))
four_to_two.add_relations(from_ids=pd.Series(["a", "b", "c", "d"]),
                          to_ids=pd.Series(["z", "z", "z", "z"]))

two_per_from = Relationship(seed=1)
two_per_from.add_relations(from_ids=pd.Series(["a", "b", "c", "d"]),
                           to_ids=pd.Series(["ya", "yb", "yc", "yd"]))
two_per_from.add_relations(from_ids=pd.Series(["a", "b", "c", "d"]),
Beispiel #17
0
def test_select_many_several_times_with_pop_should_empty_all_data():

    rel = Relationship(seed=1234)
    froms = ["id1"] * 2500 + ["id2"] * 1500 + ["id3"] * 500
    tos = np.random.choice(a=range(10), size=len(froms))
    rel.add_relations(from_ids=froms, to_ids=tos)

    assert rel.get_relations().shape[0] == 2500 + 1500 + 500

    # we'll be selecting 1000 values from all 3 ids, 3 times

    # first selection: we should be able to get some values out, though id3
    # should already be exhausted
    selection1 = rel.select_many(from_ids=pd.Series(["id1", "id2", "id3"],
                                                    index=["f1", "f2", "f3"]),
                                 named_as="the_selection",
                                 quantities=[1000, 1000, 1000],
                                 remove_selected=True,
                                 discard_empty=False)

    assert selection1.columns.tolist() == ["the_selection"]
    assert sorted(selection1.index.tolist()) == ["f1", "f2", "f3"]

    # only 500 could be obtained from "id3":
    selection_sizes1 = selection1["the_selection"].map(len)
    assert selection_sizes1[["f1", "f2", "f3"]].tolist() == [1000, 1000, 500]

    # remove_selected => size of the relationship should have decreased
    assert rel.get_relations().shape[0] == 1500 + 500 + 0

    # second selection: similar story for id2 as for id3, plus now id3 should
    # just return an empty list (since discard_empty is False)
    selection2 = rel.select_many(from_ids=pd.Series(["id1", "id2", "id3"],
                                                    index=["f1", "f2", "f3"]),
                                 named_as="the_selection",
                                 quantities=[1000, 1000, 1000],
                                 remove_selected=True,
                                 discard_empty=False)

    assert selection2.columns.tolist() == ["the_selection"]
    assert sorted(selection2.index.tolist()) == ["f1", "f2", "f3"]

    # only 500 could be obtained from "id2" and nothing from "id2":
    selection_sizes2 = selection2["the_selection"].map(len)
    assert selection_sizes2[["f1", "f2", "f3"]].tolist() == [1000, 500, 0]

    # remove_selected => size of the relationship should have decreased
    assert rel.get_relations().shape[0] == 500 + 0 + 0

    # third selection: should be very simlar to above
    selection3 = rel.select_many(from_ids=pd.Series(["id1", "id2", "id3"],
                                                    index=["f1", "f2", "f3"]),
                                 named_as="the_selection",
                                 quantities=[1000, 1000, 1000],
                                 remove_selected=True,
                                 discard_empty=False)

    assert selection3.columns.tolist() == ["the_selection"]
    assert sorted(selection3.index.tolist()) == ["f1", "f2", "f3"]

    selection_sizes3 = selection3["the_selection"].map(len)
    assert selection_sizes3[["f1", "f2", "f3"]].tolist() == [500, 0, 0]

    # the relationship should now be empty
    assert rel.get_relations().shape[0] == 0 + 0 + 0

    # one last time: selection from a fully empty relationship
    # third selection: should be very similar to above
    selection4 = rel.select_many(from_ids=pd.Series(["id1", "id2", "id3"],
                                                    index=["f1", "f2", "f3"]),
                                 named_as="the_selection",
                                 quantities=[1000, 1000, 1000],
                                 remove_selected=True,
                                 discard_empty=False)

    assert selection4.columns.tolist() == ["the_selection"]
    assert sorted(selection4.index.tolist()) == ["f1", "f2", "f3"]

    selection_sizes4 = selection4["the_selection"].map(len)
    assert selection_sizes4[["f1", "f2", "f3"]].tolist() == [0, 0, 0]

    # relationship should still be empty
    assert rel.get_relations().shape[0] == 0
Beispiel #18
0
def test_select_one_from_empty_relationship_should_return_void():
    tested = Relationship(seed=1)
    result = tested.select_one(pd.Series([]))
    assert result.shape[0] == 0
    assert result.columns.tolist() == ["from", "to"]