def test_find_by(data, columns, expected_messages, expected_err_items_count): df = pd.DataFrame(data) assert duplicates.find_by(df, columns) == create_result( "Duplicates", expected_messages, items_count=len(df), err_items_count=expected_err_items_count, )
def run_general_rules(self): self.save_result(garbage_symbols(self.source_items.df)) df = self.source_items.df self.save_result( coverage_rules.check_fields_coverage( df.drop(columns=df.columns[df.columns.str.startswith("_")]))) self.save_result(category_rules.get_categories(df)) if getattr(self, "uniques", None): self.save_result( duplicate_rules.find_by(self.source_items.df, self.uniques))
def test_find_by(data, columns, expected_messages): df = pd.DataFrame(data) assert_results_equal( duplicates.find_by(df, columns), create_result("Duplicates", expected_messages, items_count=len(df)), )