def test_returns_empty_list_for_unique_columns(self): csv_data = "aaa,bbb\n" "1,1\n" "2,2\n" df = pd.read_csv(StringIO(csv_data)) assert list(df.columns) == ["aaa", "bbb"] assert check_duplicate_columns(df, "file") == []
def test_returns_empty_list_if_column_end_with_number_but_is_not_duplicate( self): csv_data = "taxon taxon,taxon taxon f.1\n" "1,1\n" "2,2\n" df = pd.read_csv(StringIO(csv_data)) assert list(df.columns) == ["taxon taxon", "taxon taxon f.1"] assert check_duplicate_columns(df, "file") == []
def test_returns_False_if_space_columns_have_similiar_names_but_different_values( self, ): csv_data = "a,a \n" "1,3\n" "2,4\n" df = pd.read_csv(StringIO(csv_data)) assert list(df.columns) == ["a", "a "] expected = [{ "bad_column": "a ", "filename": "file", "same_value": False }] assert check_duplicate_columns(df, "file") == expected
def test_returns_True_if_space_columns_have_similiar_names_and_same_values( self): csv_data = "a,a \n" "1,1\n" "2,2\n" df = pd.read_csv(StringIO(csv_data)) assert list(df.columns) == ["a", "a "] expected = [{ "bad_column": "a ", "filename": "file", "same_value": True }] assert check_duplicate_columns(df, "file") == expected
def test_returns_False_if_duplicate_columns_have_different_values(self): csv_data = "a,a,a\n" "1,3,3\n" "2,4,4\n" df = pd.read_csv(StringIO(csv_data)) assert list(df.columns) == ["a", "a.1", "a.2"] expected = [ { "filename": "file", "bad_column": "a.1", "same_value": False }, { "filename": "file", "bad_column": "a.2", "same_value": False }, ] assert check_duplicate_columns(df, "file") == expected
def test_returns_True_if_duplicate_columns_have_same_value(self): csv_data = "a,a,a\n" "1,1,1\n" "2,2,2\n" df = pd.read_csv(StringIO(csv_data)) assert list(df.columns) == ["a", "a.1", "a.2"] expected = [ { "filename": "file", "bad_column": "a.1", "same_value": True }, { "filename": "file", "bad_column": "a.2", "same_value": True }, ] assert check_duplicate_columns(df, "file") == expected
def test_returns_empty_list_if_space_columns_have_differnt_names(self): csv_data = "a,b \n" "1,1\n" "2,2\n" df = pd.read_csv(StringIO(csv_data)) assert list(df.columns) == ["a", "b "] assert check_duplicate_columns(df, "file") == []