Esempio n. 1
0
def test_add_nested_to_plain():
    mp = MetaPartition(
        label="label_1",
        file="file",
        data=pd.DataFrame({"test": [1, 2, 3]}),
        indices={"test": [1, 2, 3]},
    )

    to_nest = [
        MetaPartition(
            label="label_2",
            data=pd.DataFrame({"test": [4, 5, 6]}),
            indices={"test": [4, 5, 6]},
        ),
        MetaPartition(
            label="label_22",
            data=pd.DataFrame({"test": [4, 5, 6]}),
            indices={"test": [4, 5, 6]},
        ),
    ]
    mp_nested = to_nest[0].add_metapartition(to_nest[1])

    mp_add_nested = mp.add_metapartition(mp_nested)
    mp_iter = mp.add_metapartition(to_nest[0]).add_metapartition(to_nest[1])

    assert mp_add_nested == mp_iter
def test_eq_nested():
    mp_1 = MetaPartition(
        label="label_1",
        file="file",
        data=pd.DataFrame({"test": [1, 2, 3]}),
        indices={"test": [1, 2, 3]},
    )

    mp_2 = MetaPartition(
        label="label_2",
        data=pd.DataFrame({"test": [4, 5, 6]}),
        indices={"test": [4, 5, 6]},
    )

    mp = mp_1.add_metapartition(mp_2)

    assert mp == mp
    assert mp != mp_2
    assert mp_2 != mp

    mp_other = MetaPartition(label="label_3",
                             data=pd.DataFrame({"test": [4, 5, 6]}))
    mp_other = mp_1.add_metapartition(mp_other)
    assert mp != mp_other
    assert mp_other != mp
Esempio n. 3
0
def test_nested_incompatible_meta():
    mp = MetaPartition(
        label="label_1",
        data=pd.DataFrame({"test": np.array([1, 2, 3], dtype=np.int8)}),
        metadata_version=4,
    )

    mp_2 = MetaPartition(
        label="label_2",
        data=pd.DataFrame({"test": np.array([4, 5, 6], dtype=np.float64)}),
        metadata_version=4,
    )
    with pytest.raises(ValueError):
        mp.add_metapartition(mp_2)
def test_nested_copy():
    mp = MetaPartition(
        label="label_1",
        file="file",
        data=pd.DataFrame({"test": [1, 2, 3]}),
        indices={"test": {
            1: "label_1",
            2: "label_2",
            3: "label_3"
        }},
    )

    mp_2 = MetaPartition(
        label="label_2",
        data=pd.DataFrame({"test": [4, 5, 6]}),
        indices={"test": [4, 5, 6]},
    )
    mp = mp.add_metapartition(mp_2)
    assert len(mp.metapartitions) == 2
    new_mp = mp.copy()

    # Check if the copy is identical
    assert len(new_mp.metapartitions) == len(mp.metapartitions)
    assert new_mp == mp
    # ... but not the same object
    assert id(new_mp) != id(mp)
Esempio n. 5
0
def test_partition_on_nested():
    original_df = pd.DataFrame(
        {
            "level1": [1, 2, 3, 1, 2, 3],
            "level2": [1, 1, 1, 2, 2, 2],
            "no_index_col": np.arange(0, 6),
        }
    )
    mp = MetaPartition(
        label="label_1",
        files={"core": "file"},
        data={"core": original_df},
        dataset_metadata={"dataset": "metadata"},
        metadata_version=4,
    )
    mp2 = MetaPartition(
        label="label_2",
        files={"core": "file"},
        data={"core": original_df},
        dataset_metadata={"dataset": "metadata"},
        metadata_version=4,
    )
    mp = mp.add_metapartition(mp2)
    new_mp = mp.partition_on(["level1", "level2"])
    assert len(new_mp.metapartitions) == 12

    labels = []
    for mp in new_mp:
        labels.append(mp.label)
        assert len(mp.data) == 1
        assert "core" in mp.data
        df = mp.data["core"]
        assert df._is_view

        # try to be agnostic about the order
        assert len(df) == 1
        assert "level1" not in df
        assert "level2" not in df
        assert "no_index_col" in df
    expected_labels = [
        "level1=1/level2=1/label_1",
        "level1=1/level2=2/label_1",
        "level1=2/level2=1/label_1",
        "level1=2/level2=2/label_1",
        "level1=3/level2=1/label_1",
        "level1=3/level2=2/label_1",
        "level1=1/level2=1/label_2",
        "level1=1/level2=2/label_2",
        "level1=2/level2=1/label_2",
        "level1=2/level2=2/label_2",
        "level1=3/level2=1/label_2",
        "level1=3/level2=2/label_2",
    ]
    assert sorted(labels) == sorted(expected_labels)
Esempio n. 6
0
def test_partition_label_helper(labels, flat_labels):
    mps = []
    for lbl in labels:
        if isinstance(lbl, list):
            mp = MetaPartition(lbl[0])
            for nested_lbl in lbl[1:]:
                mp = mp.add_metapartition(MetaPartition(label=nested_lbl))
            mps.append(mp)
        else:
            mps.append(MetaPartition(label=lbl))

    assert set(partition_labels_from_mps(mps)) == set(flat_labels)
Esempio n. 7
0
def test_eq_nested():
    mp = MetaPartition(
        label="label_1",
        files={"core": "file"},
        data={"core": pd.DataFrame({"test": [1, 2, 3]})},
        indices={"test": [1, 2, 3]},
        dataset_metadata={"dataset": "metadata"},
    )

    mp_2 = MetaPartition(
        label="label_2",
        data={"core": pd.DataFrame({"test": [4, 5, 6]})},
        indices={"test": [4, 5, 6]},
    )
    mp = mp.add_metapartition(mp_2)
    assert mp == mp
Esempio n. 8
0
def test_add_metapartition_duplicate_labels():
    mp = MetaPartition(label="label")

    mp_2 = MetaPartition(label="label")
    with pytest.raises(RuntimeError):
        mp.add_metapartition(mp_2)
Esempio n. 9
0
def test_add_metapartition():
    mp = MetaPartition(
        label="label_1",
        data=pd.DataFrame({"test": [1, 2, 3]}),
        indices={"test": [1, 2, 3]},
    )

    mp_2 = MetaPartition(
        label="label_2",
        data=pd.DataFrame({"test": [4, 5, 6]}),
        indices={"test": [4, 5, 6]},
    )

    new_mp = mp.add_metapartition(mp_2)

    # Cannot access single object attributes
    with pytest.raises(AttributeError):
        new_mp.indices
    with pytest.raises(AttributeError):
        new_mp.label
    with pytest.raises(AttributeError):
        new_mp.data
    with pytest.raises(AttributeError):
        new_mp.file
    with pytest.raises(AttributeError):
        new_mp.indices
    with pytest.raises(AttributeError):
        new_mp.indices

    partition_list = new_mp.metapartitions

    assert len(partition_list) == 2

    first_mp = partition_list[0]
    assert first_mp["label"] == "label_1"
    assert first_mp["indices"] == {"test": [1, 2, 3]}

    first_mp = partition_list[1]
    assert first_mp["label"] == "label_2"
    assert first_mp["indices"] == {"test": [4, 5, 6]}

    # This tests whether it is possible to add to an already nested MetaPartition
    mp_3 = MetaPartition(
        label="label_3",
        data=pd.DataFrame({"test": [7, 8, 9]}),
        indices={"test": [7, 8, 9]},
    )
    new_mp = new_mp.add_metapartition(mp_3)

    partition_list = new_mp.metapartitions

    assert len(partition_list) == 3

    first_mp = partition_list[0]
    assert first_mp["label"] == "label_1"
    assert first_mp["indices"] == {"test": [1, 2, 3]}

    first_mp = partition_list[1]
    assert first_mp["label"] == "label_2"
    assert first_mp["indices"] == {"test": [4, 5, 6]}

    first_mp = partition_list[2]
    assert first_mp["label"] == "label_3"
    assert first_mp["indices"] == {"test": [7, 8, 9]}