Beispiel #1
0
def test_column_names(unittest):
    data_id, duplicates_type = "1", "column_names"
    with ExitStack() as stack:
        data = {data_id: duplicates_data()}
        stack.enter_context(mock.patch("dtale.global_state.DATA", data))

        builder = DuplicateCheck(data_id, duplicates_type, {"keep": "first"})
        unittest.assertEquals(builder.test(), {"foo": ["Foo", "foo", "fOo", "foO"]})
        new_data_id = builder.execute()
        unittest.assertEquals(list(data[new_data_id].columns), ["Foo", "bar"])

    with ExitStack() as stack:
        data = {data_id: duplicates_data()}
        stack.enter_context(mock.patch("dtale.global_state.DATA", data))

        builder = DuplicateCheck(data_id, duplicates_type, {"keep": "last"})
        unittest.assertEquals(builder.test(), {"foo": ["Foo", "foo", "fOo", "foO"]})
        new_data_id = builder.execute()
        unittest.assertEquals(list(data[new_data_id].columns), ["foO", "bar"])

    with ExitStack() as stack:
        data = {data_id: duplicates_data()}
        stack.enter_context(mock.patch("dtale.global_state.DATA", data))

        builder = DuplicateCheck(data_id, duplicates_type, {"keep": "none"})
        unittest.assertEquals(builder.test(), {"foo": ["Foo", "foo", "fOo", "foO"]})
        new_data_id = builder.execute()
        unittest.assertEquals(list(data[new_data_id].columns), ["bar"])

    with ExitStack() as stack:
        data = {data_id: non_duplicate_data()}
        stack.enter_context(mock.patch("dtale.global_state.DATA", data))

        builder = DuplicateCheck(data_id, duplicates_type, {"keep": "none"})
        with pytest.raises(NoDuplicatesException):
            builder.checker.remove(data[data_id])

    with ExitStack() as stack:
        data = {data_id: duplicates_data().drop("bar", axis=1)}
        stack.enter_context(mock.patch("dtale.global_state.DATA", data))

        builder = DuplicateCheck(data_id, duplicates_type, {"keep": "none"})
        with pytest.raises(RemoveAllDataException):
            builder.execute()
Beispiel #2
0
def test_show_duplicates(unittest):
    data_id, duplicates_type = "1", "show"
    with ExitStack() as stack:
        data = {data_id: duplicates_data()}
        stack.enter_context(mock.patch("dtale.global_state.DATA", data))

        builder = DuplicateCheck(data_id, duplicates_type, {"group": ["foo"]})
        unittest.assertEquals(builder.test(), {})
        with pytest.raises(NoDuplicatesToShowException):
            builder.execute()

    with ExitStack() as stack:
        data = {data_id: duplicates_data()}
        stack.enter_context(mock.patch("dtale.global_state.DATA", data))

        builder = DuplicateCheck(data_id, duplicates_type, {"group": ["foO", "bar"]})
        unittest.assertEquals(
            builder.test(),
            {
                "4, 5": dict(count=3, filter=["4", "5"]),
                "4, 6": dict(count=2, filter=["4", "6"]),
            },
        )
        new_data_id = builder.execute()
        assert new_data_id == "2"
        unittest.assertEquals(data[new_data_id]["Foo"].tolist(), [1, 2, 3, 4, 5])

    with ExitStack() as stack:
        data = {data_id: duplicates_data()}
        stack.enter_context(mock.patch("dtale.global_state.DATA", data))

        builder = DuplicateCheck(
            data_id, duplicates_type, {"group": ["foO", "bar"], "filter": ["4", "5"]}
        )
        new_data_id = builder.execute()
        assert new_data_id == "2"
        unittest.assertEquals(data[new_data_id]["Foo"].tolist(), [1, 2, 3])
Beispiel #3
0
def test_rows(unittest):
    data_id, duplicates_type = "1", "rows"
    with ExitStack() as stack:
        data = {data_id: duplicates_data()}
        stack.enter_context(mock.patch("dtale.global_state.DATA", data))

        builder = DuplicateCheck(
            data_id, duplicates_type, {"keep": "first", "subset": "foo"}
        )
        unittest.assertEquals(builder.test(), 0)
        pre_length = len(data[data_id])
        new_data_id = builder.execute()
        assert pre_length == len(data[new_data_id])

    with ExitStack() as stack:
        data = {data_id: duplicates_data()}
        stack.enter_context(mock.patch("dtale.global_state.DATA", data))

        builder = DuplicateCheck(
            data_id, duplicates_type, {"keep": "first", "subset": ["foO", "bar"]}
        )
        unittest.assertEquals(builder.test(), 3)
        new_data_id = builder.execute()
        assert len(data[new_data_id]) == 2
        unittest.assertEquals(data[new_data_id]["Foo"].tolist(), [1, 4])

    with ExitStack() as stack:
        data = {data_id: duplicates_data()}
        stack.enter_context(mock.patch("dtale.global_state.DATA", data))

        builder = DuplicateCheck(
            data_id, duplicates_type, {"keep": "last", "subset": ["foO", "bar"]}
        )
        unittest.assertEquals(builder.test(), 3)
        new_data_id = builder.execute()
        assert len(data[new_data_id]) == 2
        unittest.assertEquals(data[new_data_id]["Foo"].tolist(), [3, 5])

    with ExitStack() as stack:
        data = {data_id: duplicates_data()}
        stack.enter_context(mock.patch("dtale.global_state.DATA", data))

        builder = DuplicateCheck(
            data_id, duplicates_type, {"keep": "none", "subset": ["foO", "bar"]}
        )
        unittest.assertEquals(builder.test(), 5)
        with pytest.raises(RemoveAllDataException):
            builder.execute()
Beispiel #4
0
def test_column_names(unittest):
    import dtale.global_state as global_state

    global_state.clear_store()
    data_id, duplicates_type = "1", "column_names"
    data = {data_id: duplicates_data()}
    build_data_inst(data)

    builder = DuplicateCheck(data_id, duplicates_type, {"keep": "first"})
    unittest.assertEqual(builder.test(), {"foo": ["Foo", "foo", "fOo", "foO"]})
    new_data_id = builder.execute()
    unittest.assertEqual(
        list(global_state.get_data(new_data_id).columns), ["Foo", "bar"]
    )

    data = {data_id: duplicates_data()}
    build_data_inst(data)

    builder = DuplicateCheck(data_id, duplicates_type, {"keep": "last"})
    unittest.assertEqual(builder.test(), {"foo": ["Foo", "foo", "fOo", "foO"]})
    new_data_id = builder.execute()
    unittest.assertEqual(
        list(global_state.get_data(new_data_id).columns), ["foO", "bar"]
    )

    data = {data_id: duplicates_data()}
    build_data_inst(data)

    builder = DuplicateCheck(data_id, duplicates_type, {"keep": "none"})
    unittest.assertEqual(builder.test(), {"foo": ["Foo", "foo", "fOo", "foO"]})
    new_data_id = builder.execute()
    unittest.assertEqual(list(global_state.get_data(new_data_id).columns), ["bar"])

    data = {data_id: non_duplicate_data()}
    build_data_inst(data)

    builder = DuplicateCheck(data_id, duplicates_type, {"keep": "none"})
    with pytest.raises(NoDuplicatesException):
        builder.checker.remove(data[data_id])

    data = {data_id: duplicates_data().drop("bar", axis=1)}
    build_data_inst(data)

    builder = DuplicateCheck(data_id, duplicates_type, {"keep": "none"})
    with pytest.raises(RemoveAllDataException):
        builder.execute()
Beispiel #5
0
def test_show_duplicates(unittest):
    import dtale.global_state as global_state

    global_state.clear_store()
    data_id, duplicates_type = "1", "show"
    data = {data_id: duplicates_data()}
    build_data_inst(data)

    builder = DuplicateCheck(data_id, duplicates_type, {"group": ["foo"]})
    unittest.assertEqual(builder.test(), {})
    with pytest.raises(NoDuplicatesToShowException):
        builder.execute()

    data = {data_id: duplicates_data()}
    build_data_inst(data)

    builder = DuplicateCheck(data_id, duplicates_type, {"group": ["foO", "bar"]})
    unittest.assertEqual(
        builder.test(),
        {
            "4, 5": dict(count=3, filter=["4", "5"]),
            "4, 6": dict(count=2, filter=["4", "6"]),
        },
    )
    new_data_id = builder.execute()
    assert new_data_id == 2
    unittest.assertEqual(
        global_state.get_data(new_data_id)["Foo"].tolist(), [1, 2, 3, 4, 5]
    )

    data = {data_id: duplicates_data()}
    build_data_inst(data)

    builder = DuplicateCheck(
        data_id, duplicates_type, {"group": ["foO", "bar"], "filter": ["4", "5"]}
    )
    new_data_id = builder.execute()
    unittest.assertEqual(global_state.get_data(new_data_id)["Foo"].tolist(), [1, 2, 3])
Beispiel #6
0
def test_rows(unittest):
    import dtale.global_state as global_state

    global_state.clear_store()
    data_id, duplicates_type = "1", "rows"
    data = {data_id: duplicates_data()}
    build_data_inst(data)

    builder = DuplicateCheck(
        data_id, duplicates_type, {"keep": "first", "subset": "foo"}
    )
    unittest.assertEqual(builder.test(), dict(removed=0, total=5, remaining=5))
    pre_length = len(data[data_id])
    new_data_id = builder.execute()
    assert pre_length == len(global_state.get_data(new_data_id))

    data = {data_id: duplicates_data()}
    build_data_inst(data)

    builder = DuplicateCheck(
        data_id, duplicates_type, {"keep": "first", "subset": ["foO", "bar"]}
    )
    unittest.assertEqual(builder.test(), dict(removed=3, total=5, remaining=2))
    new_data_id = builder.execute()
    assert len(global_state.get_data(new_data_id)) == 2
    unittest.assertEqual(global_state.get_data(new_data_id)["Foo"].tolist(), [1, 4])

    data = {data_id: duplicates_data()}
    build_data_inst(data)

    builder = DuplicateCheck(
        data_id, duplicates_type, {"keep": "last", "subset": ["foO", "bar"]}
    )
    unittest.assertEqual(builder.test(), dict(removed=3, total=5, remaining=2))
    new_data_id = builder.execute()
    assert len(global_state.get_data(new_data_id)) == 2
    unittest.assertEqual(global_state.get_data(new_data_id)["Foo"].tolist(), [3, 5])

    data = {data_id: duplicates_data()}
    build_data_inst(data)

    builder = DuplicateCheck(
        data_id, duplicates_type, {"keep": "none", "subset": ["foO", "bar"]}
    )
    unittest.assertEqual(builder.test(), dict(removed=5, total=5, remaining=0))
    with pytest.raises(RemoveAllDataException):
        builder.execute()