Exemple #1
0
def test_select_one_from_all_ids_should_return_one_line_per_id():
    tested = Relationship(seed=1)
    tested.add_relations(from_ids=["a", "b", "b", "c"],
                         to_ids=["b", "c", "a", "b"])

    selected = tested.select_one()

    assert set(selected["from"].unique()) == {"a", "b", "c"}
Exemple #2
0
def test_select_one_nonexistingids_should_return_empty_if_not_keep_missing():
    tested = Relationship(seed=1)
    tested.add_relations(from_ids=["a", "b", "b", "c"],
                         to_ids=["b", "c", "a", "b"])

    result = tested.select_one(["non_existing_id", "neither"],
                               discard_empty=True)

    assert result.shape[0] == 0
    assert result.columns.tolist() == ["from", "to"]
Exemple #3
0
def test_weighted_relationship_should_take_weights_into_account():

    # a,b and c are all connected to x,y and z, but the weight is 0
    # everywhere except to y
    one_to_three_weighted = Relationship(seed=1234)
    one_to_three_weighted.add_relations(from_ids=["a"] * 3 + ["b"] * 3 +
                                        ["c"] * 3,
                                        to_ids=["x", "y", "z"] * 3,
                                        weights=[0, 1, 0] * 3)

    selected = one_to_three_weighted.select_one()

    # => with those weights, only x should should be selected
    assert selected["to"].tolist() == ["y", "y", "y"]
    assert sorted(selected["from"].tolist()) == ["a", "b", "c"]
Exemple #4
0
def test_weighted_relationship_should_take_overridden_weights_into_account():

    # a,b and c are all connected to x,y and z, but the weight is 0
    # everywhere except to y
    one_to_three_weighted = Relationship(seed=1234)
    one_to_three_weighted.add_relations(from_ids=["a"] * 3 + ["b"] * 3 +
                                        ["c"] * 3,
                                        to_ids=["x", "y", "z"] * 3,
                                        weights=[0, 1, 0] * 3)

    # if we override the weight, we can only specify one value per "to" value
    overridden_to_weights = pd.Series(data=[0, 0, 1], index=["x", "y", "z"])
    selected = one_to_three_weighted.select_one(
        overridden_to_weights=overridden_to_weights)

    # the initial weights should have been discarded and the one provided as
    # input should have been joined and used as expected
    assert selected["to"].tolist() == ["z", "z", "z"]
    assert sorted(selected["from"].tolist()) == ["a", "b", "c"]
Exemple #5
0
def test_seeded_relationship_should_always_return_same_selection():

    from_ids = ["a", "a", "a", "b", "b", "b", "c", "c", "c"]
    to_ids = ["af1", "af2", "af3", "bf1", "bf2", "bf3", "cf1", "cf2", "cf3"]

    # two relationship seeded identically
    tested1 = Relationship(seed=1345)
    tested2 = Relationship(seed=1345)

    tested1.add_relations(from_ids=from_ids, to_ids=to_ids)
    tested2.add_relations(from_ids=from_ids, to_ids=to_ids)

    assert tested1.select_one(from_ids=["a"]).equals(
        tested2.select_one(from_ids=["a"]))

    assert tested1.select_one(from_ids=["b"]).equals(
        tested2.select_one(from_ids=["b"]))

    assert tested1.select_one(from_ids=["a", "b", "d"]).equals(
        tested2.select_one(from_ids=["a", "b", "d"]))
Exemple #6
0
def test_select_many_with_drop_should_remove_elements():

    story_data_index = build_ids(5, prefix="cl_", max_length=1)

    # makes a copy since we're going to drop some elements
    four_to_plenty_copy = Relationship(seed=1)
    for i in range(100):
        four_to_plenty_copy.add_relations(
            from_ids=["a", "b", "c", "d"],
            to_ids=["a_%d" % i, "b_%d" % i,
                    "c_%d" % i, "d_%d" % i])

    selection = four_to_plenty.select_many(from_ids=pd.Series(
        ["a", "b", "c", "b", "a"], index=story_data_index),
                                           named_as="selected_sets",
                                           quantities=[4, 5, 6, 7, 8],
                                           remove_selected=True,
                                           discard_empty=False)

    # makes sure all selected values have been removed
    for from_id in selection.index:
        for to_id in selection.ix[from_id]["selected_sets"].tolist():
            rels = four_to_plenty_copy.get_relations(from_ids=[from_id])
            assert to_id not in rels["to"]
Exemple #7
0
def test_pop_one_relationship_should_remove_element():
    # we're removing relations from this one => working on a copy not to
    # influence other tests
    oneto1_copy = Relationship(seed=1)
    oneto1_copy.add_relations(from_ids=["a", "b", "c", "d", "e"],
                              to_ids=["ta", "tb", "tc", "td", "te"])

    selected = oneto1_copy.select_one(from_ids=["a", "d"],
                                      remove_selected=True)

    # unique "to" value should have been taken
    assert selected.sort_values("from")["to"].tolist() == ["ta", "td"]
    assert selected.columns.tolist() == ["from", "to"]

    # and removed form the relationship
    assert set(oneto1_copy.grouped.keys()) == {"b", "c", "e"}

    # selecting the same again should just return nothing
    selected = oneto1_copy.select_one(from_ids=["a", "d"],
                                      remove_selected=True)

    assert selected.shape[0] == 0
    assert selected.columns.tolist() == ["from", "to"]

    # and have no impact on the relationship
    assert set(oneto1_copy.grouped.keys()) == {"b", "c", "e"}

    # selecting the same again without discarding empty relationship should
    # now return a size 2 dataframe with Nones
    selected = oneto1_copy.select_one(from_ids=["a", "d"],
                                      remove_selected=True,
                                      discard_empty=False)
    assert selected.shape[0] == 2
    assert sorted(selected.columns.tolist()) == ["from", "to"]
    assert selected["to"].tolist() == [None, None]
    assert sorted(selected["from"].tolist()) == ["a", "d"]
Exemple #8
0
def test_select_one_nonexistingids_should_insert_none_if_keep_missing():
    tested = Relationship(seed=1)
    tested.add_relations(from_ids=["a", "b", "b", "c"],
                         to_ids=["a1", "b1", "b2", "c1"])

    result = tested.select_one(["c", "b_non_existing_id", "a", "neither", "a"],
                               discard_empty=False)

    assert result.shape[0] == 5
    assert result.columns.tolist() == ["from", "to"]

    result_s = result.sort_values("from")

    assert result_s["from"].tolist() == [
        "a", "a", "b_non_existing_id", "c", "neither"
    ]

    assert result_s["to"].tolist() == [
        "a1",
        "a1",
        None,
        "c1",
        None,
    ]
Exemple #9
0
def test_select_many_several_times_with_pop_should_empty_all_data():

    rel = Relationship(seed=1234)
    froms = ["id1"] * 2500 + ["id2"] * 1500 + ["id3"] * 500
    tos = np.random.choice(a=range(10), size=len(froms))
    rel.add_relations(from_ids=froms, to_ids=tos)

    assert rel.get_relations().shape[0] == 2500 + 1500 + 500

    # we'll be selecting 1000 values from all 3 ids, 3 times

    # first selection: we should be able to get some values out, though id3
    # should already be exhausted
    selection1 = rel.select_many(from_ids=pd.Series(["id1", "id2", "id3"],
                                                    index=["f1", "f2", "f3"]),
                                 named_as="the_selection",
                                 quantities=[1000, 1000, 1000],
                                 remove_selected=True,
                                 discard_empty=False)

    assert selection1.columns.tolist() == ["the_selection"]
    assert sorted(selection1.index.tolist()) == ["f1", "f2", "f3"]

    # only 500 could be obtained from "id3":
    selection_sizes1 = selection1["the_selection"].map(len)
    assert selection_sizes1[["f1", "f2", "f3"]].tolist() == [1000, 1000, 500]

    # remove_selected => size of the relationship should have decreased
    assert rel.get_relations().shape[0] == 1500 + 500 + 0

    # second selection: similar story for id2 as for id3, plus now id3 should
    # just return an empty list (since discard_empty is False)
    selection2 = rel.select_many(from_ids=pd.Series(["id1", "id2", "id3"],
                                                    index=["f1", "f2", "f3"]),
                                 named_as="the_selection",
                                 quantities=[1000, 1000, 1000],
                                 remove_selected=True,
                                 discard_empty=False)

    assert selection2.columns.tolist() == ["the_selection"]
    assert sorted(selection2.index.tolist()) == ["f1", "f2", "f3"]

    # only 500 could be obtained from "id2" and nothing from "id2":
    selection_sizes2 = selection2["the_selection"].map(len)
    assert selection_sizes2[["f1", "f2", "f3"]].tolist() == [1000, 500, 0]

    # remove_selected => size of the relationship should have decreased
    assert rel.get_relations().shape[0] == 500 + 0 + 0

    # third selection: should be very simlar to above
    selection3 = rel.select_many(from_ids=pd.Series(["id1", "id2", "id3"],
                                                    index=["f1", "f2", "f3"]),
                                 named_as="the_selection",
                                 quantities=[1000, 1000, 1000],
                                 remove_selected=True,
                                 discard_empty=False)

    assert selection3.columns.tolist() == ["the_selection"]
    assert sorted(selection3.index.tolist()) == ["f1", "f2", "f3"]

    selection_sizes3 = selection3["the_selection"].map(len)
    assert selection_sizes3[["f1", "f2", "f3"]].tolist() == [500, 0, 0]

    # the relationship should now be empty
    assert rel.get_relations().shape[0] == 0 + 0 + 0

    # one last time: selection from a fully empty relationship
    # third selection: should be very similar to above
    selection4 = rel.select_many(from_ids=pd.Series(["id1", "id2", "id3"],
                                                    index=["f1", "f2", "f3"]),
                                 named_as="the_selection",
                                 quantities=[1000, 1000, 1000],
                                 remove_selected=True,
                                 discard_empty=False)

    assert selection4.columns.tolist() == ["the_selection"]
    assert sorted(selection4.index.tolist()) == ["f1", "f2", "f3"]

    selection_sizes4 = selection4["the_selection"].map(len)
    assert selection_sizes4[["f1", "f2", "f3"]].tolist() == [0, 0, 0]

    # relationship should still be empty
    assert rel.get_relations().shape[0] == 0
Exemple #10
0
import path
import pandas as pd
import logging
import os
import numpy as np
import functools

from trumania.core.util_functions import setup_logging
from trumania.core.util_functions import build_ids
from trumania.core.relationship import Relationship

setup_logging()

oneto1 = Relationship(seed=1)
oneto1.add_relations(from_ids=pd.Series(["a", "b", "c", "d", "e"]),
                     to_ids=pd.Series(["ta", "tb", "tc", "td", "te"]))

four_to_one = Relationship(seed=1)
four_to_one.add_relations(from_ids=pd.Series(["a", "b", "c", "d"]),
                          to_ids=pd.Series(["z", "z", "z", "z"]))

four_to_two = Relationship(seed=1)
four_to_two.add_relations(from_ids=pd.Series(["a", "b", "c", "d"]),
                          to_ids=pd.Series(["y", "y", "y", "y"]))
four_to_two.add_relations(from_ids=pd.Series(["a", "b", "c", "d"]),
                          to_ids=pd.Series(["z", "z", "z", "z"]))

two_per_from = Relationship(seed=1)
two_per_from.add_relations(from_ids=pd.Series(["a", "b", "c", "d"]),
                           to_ids=pd.Series(["ya", "yb", "yc", "yd"]))
two_per_from.add_relations(from_ids=pd.Series(["a", "b", "c", "d"]),