Example #1
0
def test_find_by(data, columns, expected_messages, expected_err_items_count):
    df = pd.DataFrame(data)
    assert duplicates.find_by(df, columns) == create_result(
        "Duplicates",
        expected_messages,
        items_count=len(df),
        err_items_count=expected_err_items_count,
    )
Example #2
0
 def run_general_rules(self):
     self.save_result(garbage_symbols(self.source_items.df))
     df = self.source_items.df
     self.save_result(
         coverage_rules.check_fields_coverage(
             df.drop(columns=df.columns[df.columns.str.startswith("_")])))
     self.save_result(category_rules.get_categories(df))
     if getattr(self, "uniques", None):
         self.save_result(
             duplicate_rules.find_by(self.source_items.df, self.uniques))
Example #3
0
def test_find_by(data, columns, expected_messages):
    df = pd.DataFrame(data)
    assert_results_equal(
        duplicates.find_by(df, columns),
        create_result("Duplicates", expected_messages, items_count=len(df)),
    )