Example #1
0
def test_eq():
    df = pd.DataFrame({"a": [1]})
    df_same = pd.DataFrame({"a": [1]})
    df_other = pd.DataFrame({"a": [2]})
    df_diff_col = pd.DataFrame({"b": [1]})
    df_diff_type = pd.DataFrame({"b": [1.0]})

    meta_partition = MetaPartition.from_dict({"label": "test_label", "data": df})
    assert meta_partition == meta_partition

    meta_partition_same = MetaPartition.from_dict(
        {"label": "test_label", "data": df_same}
    )
    assert meta_partition == meta_partition_same

    meta_partition_diff_label = MetaPartition.from_dict(
        {"label": "another_label", "data": df}
    )
    assert meta_partition != meta_partition_diff_label
    assert meta_partition_diff_label != meta_partition

    meta_partition_diff_files = MetaPartition.from_dict(
        {"label": "another_label", "data": df, "file": "something"}
    )
    assert meta_partition != meta_partition_diff_files
    assert meta_partition_diff_files != meta_partition

    meta_partition_diff_col = MetaPartition.from_dict(
        {"label": "test_label", "data": df_diff_col}
    )
    assert meta_partition != meta_partition_diff_col
    assert meta_partition_diff_col != meta_partition

    meta_partition_diff_type = MetaPartition.from_dict(
        {"label": "test_label", "data": df_diff_type}
    )
    assert meta_partition != meta_partition_diff_type
    assert meta_partition_diff_type != meta_partition

    meta_partition_diff_metadata = MetaPartition.from_dict(
        {"label": "test_label", "data": df_diff_type}
    )
    assert meta_partition != meta_partition_diff_metadata
    assert meta_partition_diff_metadata != meta_partition

    meta_partition_different_df = MetaPartition.from_dict(
        {"label": "test_label", "data": df_other}
    )
    assert not meta_partition == meta_partition_different_df

    meta_partition_empty_data = MetaPartition.from_dict(
        {"label": "test_label", "data": None}
    )
    assert meta_partition_empty_data == meta_partition_empty_data

    assert not meta_partition == "abc"
Example #2
0
def test_from_dict():
    df = pd.DataFrame({"a": [1]})
    dct = {"data": df, "label": "test_label"}
    meta_partition = MetaPartition.from_dict(dct)

    pdt.assert_frame_equal(meta_partition.data, df)
    assert meta_partition.metadata_version == DEFAULT_METADATA_VERSION
Example #3
0
def test_concatenate_identical_col_df_naming():
    input_dct = {
        "some": pd.DataFrame({
            "A": [1],
            "B": [1]
        }),
        "name": pd.DataFrame({
            "A": [2],
            "B": [2]
        }),
        "second": pd.DataFrame({
            "A": [3],
            "B": [3],
            "C": [3]
        }),
    }
    dct = {"label": "test_label", "data": input_dct}
    meta_partition = MetaPartition.from_dict(dct)
    result = meta_partition.concat_dataframes().data

    assert len(result) == 2
    assert "some_name" in result
    first_expected = pd.DataFrame({"A": [1, 2], "B": [1, 2]})
    pdt.assert_frame_equal(result["some_name"], first_expected)
    assert "second" in result
    first_expected = pd.DataFrame({"A": [3], "B": [3], "C": [3]})
    pdt.assert_frame_equal(result["second"], first_expected)
Example #4
0
def test_concatenate_no_change():
    input_dct = {
        "first_0": pd.DataFrame({"A": [1], "B": [1]}),
        "second": pd.DataFrame({"A": [3], "B": [3], "C": [3]}),
    }
    dct = {"label": "test_label", "data": input_dct}
    meta_partition = MetaPartition.from_dict(dct)
    result = meta_partition.concat_dataframes()
    assert result == meta_partition
Example #5
0
def test_eq():
    df = pd.DataFrame({"a": [1]})
    df_same = pd.DataFrame({"a": [1]})
    df_other = pd.DataFrame({"a": [2]})

    meta_partition = MetaPartition.from_dict({
        "label": "test_label",
        "data": {
            "core": df
        }
    })
    assert meta_partition == meta_partition

    meta_partition_same = MetaPartition.from_dict({
        "label": "test_label",
        "data": {
            "core": df_same
        }
    })
    assert meta_partition == meta_partition_same

    meta_partition_different_df = MetaPartition.from_dict({
        "label": "test_label",
        "data": {
            "core": df_other
        }
    })
    assert not meta_partition == meta_partition_different_df

    meta_partition_different_label = MetaPartition.from_dict({
        "label": "test_label",
        "data": {
            "not_core": df_same
        }
    })
    assert not meta_partition == meta_partition_different_label

    meta_partition_empty_data = MetaPartition.from_dict({
        "label": "test_label",
        "data": {}
    })
    assert meta_partition_empty_data == meta_partition_empty_data

    meta_partition_more_data = MetaPartition.from_dict({
        "label": "test_label",
        "data": {
            "core": df,
            "not_core": df
        }
    })
    assert not (meta_partition == meta_partition_more_data)

    assert not meta_partition == "abc"