Exemple #1
0
def test_rows(unittest):
    data_id, duplicates_type = "1", "rows"
    with ExitStack() as stack:
        data = {data_id: duplicates_data()}
        stack.enter_context(mock.patch("dtale.global_state.DATA", data))

        builder = DuplicateCheck(data_id, duplicates_type, {
            "keep": "first",
            "subset": "foo"
        })
        unittest.assertEqual(builder.test(),
                             dict(removed=0, total=5, remaining=5))
        pre_length = len(data[data_id])
        new_data_id = builder.execute()
        assert pre_length == len(data[new_data_id])

    with ExitStack() as stack:
        data = {data_id: duplicates_data()}
        stack.enter_context(mock.patch("dtale.global_state.DATA", data))

        builder = DuplicateCheck(data_id, duplicates_type, {
            "keep": "first",
            "subset": ["foO", "bar"]
        })
        unittest.assertEqual(builder.test(),
                             dict(removed=3, total=5, remaining=2))
        new_data_id = builder.execute()
        assert len(data[new_data_id]) == 2
        unittest.assertEqual(data[new_data_id]["Foo"].tolist(), [1, 4])

    with ExitStack() as stack:
        data = {data_id: duplicates_data()}
        stack.enter_context(mock.patch("dtale.global_state.DATA", data))

        builder = DuplicateCheck(data_id, duplicates_type, {
            "keep": "last",
            "subset": ["foO", "bar"]
        })
        unittest.assertEqual(builder.test(),
                             dict(removed=3, total=5, remaining=2))
        new_data_id = builder.execute()
        assert len(data[new_data_id]) == 2
        unittest.assertEqual(data[new_data_id]["Foo"].tolist(), [3, 5])

    with ExitStack() as stack:
        data = {data_id: duplicates_data()}
        stack.enter_context(mock.patch("dtale.global_state.DATA", data))

        builder = DuplicateCheck(data_id, duplicates_type, {
            "keep": "none",
            "subset": ["foO", "bar"]
        })
        unittest.assertEqual(builder.test(),
                             dict(removed=5, total=5, remaining=0))
        with pytest.raises(RemoveAllDataException):
            builder.execute()
Exemple #2
0
def test_columns(unittest):
    import dtale.global_state as global_state

    global_state.clear_store()
    data_id, duplicates_type = "1", "columns"

    data = {data_id: duplicates_data()}
    build_data_inst(data)

    builder = DuplicateCheck(data_id, duplicates_type, {"keep": "first"})
    unittest.assertEqual(builder.test(), {"Foo": ["foo"]})
    new_data_id = builder.execute()
    unittest.assertEqual(
        list(global_state.get_data(new_data_id).columns), ["Foo", "fOo", "foO", "bar"]
    )

    data = {data_id: duplicates_data()}
    build_data_inst(data)

    builder = DuplicateCheck(data_id, duplicates_type, {"keep": "last"})
    unittest.assertEqual(builder.test(), {"foo": ["Foo"]})
    new_data_id = builder.execute()
    unittest.assertEqual(
        list(global_state.get_data(new_data_id).columns), ["foo", "fOo", "foO", "bar"]
    )

    data = {data_id: duplicates_data()}
    build_data_inst(data)

    builder = DuplicateCheck(data_id, duplicates_type, {"keep": "none"})
    unittest.assertEqual(builder.test(), {"Foo": ["foo"]})
    new_data_id = builder.execute()
    unittest.assertEqual(
        list(global_state.get_data(new_data_id).columns), ["fOo", "foO", "bar"]
    )

    data = {data_id: duplicates_data().drop(["fOo", "foO", "bar"], axis=1)}
    build_data_inst(data)

    builder = DuplicateCheck(data_id, duplicates_type, {"keep": "none"})
    with pytest.raises(RemoveAllDataException):
        builder.execute()

    data = {data_id: non_duplicate_data()}
    build_data_inst(data)

    builder = DuplicateCheck(data_id, duplicates_type, {"keep": "none"})
    with pytest.raises(NoDuplicatesException):
        builder.checker.remove(data[data_id])
Exemple #3
0
def test_columns(unittest):
    data_id, duplicates_type = "1", "columns"
    with ExitStack() as stack:
        data = {data_id: duplicates_data()}
        stack.enter_context(mock.patch("dtale.global_state.DATA", data))

        builder = DuplicateCheck(data_id, duplicates_type, {"keep": "first"})
        unittest.assertEquals(builder.test(), {"Foo": ["foo"]})
        new_data_id = builder.execute()
        unittest.assertEquals(
            list(data[new_data_id].columns), ["Foo", "fOo", "foO", "bar"]
        )

    with ExitStack() as stack:
        data = {data_id: duplicates_data()}
        stack.enter_context(mock.patch("dtale.global_state.DATA", data))

        builder = DuplicateCheck(data_id, duplicates_type, {"keep": "last"})
        unittest.assertEquals(builder.test(), {"foo": ["Foo"]})
        new_data_id = builder.execute()
        unittest.assertEquals(
            list(data[new_data_id].columns), ["foo", "fOo", "foO", "bar"]
        )

    with ExitStack() as stack:
        data = {data_id: duplicates_data()}
        stack.enter_context(mock.patch("dtale.global_state.DATA", data))

        builder = DuplicateCheck(data_id, duplicates_type, {"keep": "none"})
        unittest.assertEquals(builder.test(), {"Foo": ["foo"]})
        new_data_id = builder.execute()
        unittest.assertEquals(list(data[new_data_id].columns), ["fOo", "foO", "bar"])

    with ExitStack() as stack:
        data = {data_id: duplicates_data().drop(["fOo", "foO", "bar"], axis=1)}
        stack.enter_context(mock.patch("dtale.global_state.DATA", data))

        builder = DuplicateCheck(data_id, duplicates_type, {"keep": "none"})
        with pytest.raises(RemoveAllDataException):
            builder.execute()

    with ExitStack() as stack:
        data = {data_id: non_duplicate_data()}
        stack.enter_context(mock.patch("dtale.global_state.DATA", data))

        builder = DuplicateCheck(data_id, duplicates_type, {"keep": "none"})
        with pytest.raises(NoDuplicatesException):
            builder.checker.remove(data[data_id])
Exemple #4
0
def test_rows(unittest):
    import dtale.global_state as global_state

    global_state.clear_store()
    data_id, duplicates_type = "1", "rows"
    data = {data_id: duplicates_data()}
    build_data_inst(data)

    builder = DuplicateCheck(
        data_id, duplicates_type, {"keep": "first", "subset": "foo"}
    )
    unittest.assertEqual(builder.test(), dict(removed=0, total=5, remaining=5))
    pre_length = len(data[data_id])
    new_data_id = builder.execute()
    assert pre_length == len(global_state.get_data(new_data_id))

    data = {data_id: duplicates_data()}
    build_data_inst(data)

    builder = DuplicateCheck(
        data_id, duplicates_type, {"keep": "first", "subset": ["foO", "bar"]}
    )
    unittest.assertEqual(builder.test(), dict(removed=3, total=5, remaining=2))
    new_data_id = builder.execute()
    assert len(global_state.get_data(new_data_id)) == 2
    unittest.assertEqual(global_state.get_data(new_data_id)["Foo"].tolist(), [1, 4])

    data = {data_id: duplicates_data()}
    build_data_inst(data)

    builder = DuplicateCheck(
        data_id, duplicates_type, {"keep": "last", "subset": ["foO", "bar"]}
    )
    unittest.assertEqual(builder.test(), dict(removed=3, total=5, remaining=2))
    new_data_id = builder.execute()
    assert len(global_state.get_data(new_data_id)) == 2
    unittest.assertEqual(global_state.get_data(new_data_id)["Foo"].tolist(), [3, 5])

    data = {data_id: duplicates_data()}
    build_data_inst(data)

    builder = DuplicateCheck(
        data_id, duplicates_type, {"keep": "none", "subset": ["foO", "bar"]}
    )
    unittest.assertEqual(builder.test(), dict(removed=5, total=5, remaining=0))
    with pytest.raises(RemoveAllDataException):
        builder.execute()
Exemple #5
0
def test_show_duplicates(unittest):
    data_id, duplicates_type = "1", "show"
    with ExitStack() as stack:
        data = {data_id: duplicates_data()}
        stack.enter_context(mock.patch("dtale.global_state.DATA", data))

        builder = DuplicateCheck(data_id, duplicates_type, {"group": ["foo"]})
        unittest.assertEqual(builder.test(), {})
        with pytest.raises(NoDuplicatesToShowException):
            builder.execute()

    with ExitStack() as stack:
        data = {data_id: duplicates_data()}
        stack.enter_context(mock.patch("dtale.global_state.DATA", data))

        builder = DuplicateCheck(data_id, duplicates_type,
                                 {"group": ["foO", "bar"]})
        unittest.assertEqual(
            builder.test(),
            {
                "4, 5": dict(count=3, filter=["4", "5"]),
                "4, 6": dict(count=2, filter=["4", "6"]),
            },
        )
        new_data_id = builder.execute()
        assert new_data_id == "2"
        unittest.assertEqual(data[new_data_id]["Foo"].tolist(),
                             [1, 2, 3, 4, 5])

    with ExitStack() as stack:
        data = {data_id: duplicates_data()}
        stack.enter_context(mock.patch("dtale.global_state.DATA", data))

        builder = DuplicateCheck(data_id, duplicates_type, {
            "group": ["foO", "bar"],
            "filter": ["4", "5"]
        })
        new_data_id = builder.execute()
        assert new_data_id == "2"
        unittest.assertEqual(data[new_data_id]["Foo"].tolist(), [1, 2, 3])
Exemple #6
0
def test_show_duplicates(unittest):
    import dtale.global_state as global_state

    global_state.clear_store()
    data_id, duplicates_type = "1", "show"
    data = {data_id: duplicates_data()}
    build_data_inst(data)

    builder = DuplicateCheck(data_id, duplicates_type, {"group": ["foo"]})
    unittest.assertEqual(builder.test(), {})
    with pytest.raises(NoDuplicatesToShowException):
        builder.execute()

    data = {data_id: duplicates_data()}
    build_data_inst(data)

    builder = DuplicateCheck(data_id, duplicates_type, {"group": ["foO", "bar"]})
    unittest.assertEqual(
        builder.test(),
        {
            "4, 5": dict(count=3, filter=["4", "5"]),
            "4, 6": dict(count=2, filter=["4", "6"]),
        },
    )
    new_data_id = builder.execute()
    assert new_data_id == 2
    unittest.assertEqual(
        global_state.get_data(new_data_id)["Foo"].tolist(), [1, 2, 3, 4, 5]
    )

    data = {data_id: duplicates_data()}
    build_data_inst(data)

    builder = DuplicateCheck(
        data_id, duplicates_type, {"group": ["foO", "bar"], "filter": ["4", "5"]}
    )
    new_data_id = builder.execute()
    unittest.assertEqual(global_state.get_data(new_data_id)["Foo"].tolist(), [1, 2, 3])