def test_format_index_column(self): comparable = Compare(order=0) comparable.header = [{ "column_name": "id", "column_location": 1, "column_type": "" }, { "column_name": "first_name", "column_location": 2, "column_type": "mapped" }, { "column_name": "last_name", "column_location": 3, "column_type": "duplicate" }, { "column_name": "last_name", "column_location": 4, "column_type": "duplicate" }, { "column_name": "requirement", "column_location": 5, "column_type": "mapped" }, { "column_name": "alternate_name", "column_location": 6, "column_type": "" }] comparable.index_column_name = [{ "column_name": "id", "column_location": 1, "column_type": "" }] header_validator.format_index_column(comparable) assert comparable.header[0]['column_type'] == "index"
def test_set_map_column_number(self): comparable = Compare(order=1) comparable.header = [{ "column_name": "id", "column_location": 1, "column_type": "" }, { "column_name": "first_name", "column_location": 2, "column_type": "mapped" }, { "column_name": "last_name", "column_location": 3, "column_type": "duplicate" }, { "column_name": "last_name", "column_location": 4, "column_type": "duplicate" }, { "column_name": "requirement", "column_location": 5, "column_type": "mapped" }, { "column_name": "alternate_name", "column_location": 6, "column_type": "" }] comparable.index_column_name = [{ "column_name": "id", "column_location": 1, "column_type": "" }] header_validator.set_map_column_number(comparable) assert 2 == comparable.number_of_mapped_columns
def test_stringify_index(self, mock_df, mock_index): comparable = Compare() comparable.data_frame = mock_df comparable.index_column_name = mock_index index_validator.stringify_index(comparable) for val in comparable.data_frame['id']: assert isinstance(val, str)
def test_extract_not_checked_column(self): comparable = Compare() comparable.header = [ {"column_name": "id", "column_location": 1, "column_type": ""}, {"column_name": "first_name", "column_location": 2, "column_type": ""}, {"column_name": "last_name", "column_location": 3, "column_type": "not_checked"}, {"column_name": "middle_name", "column_location": 3, "column_type": "not_checked"}, {"column_name": "requirement", "column_location": 5, "column_type": "mapped"}, {"column_name": "alternate_name", "column_location": 6, "column_type": ""}] comparable.index_column_name = [ {"column_name": "id", "column_location": 1}] data = { 'id': [1, 2, 3], 'first_name': ['f1', 'f2', 'f3'], 'last_name': ['l1', 'l2', 'l3'], 'middle_name': ['m1', 'm2', 'm3'], 'requirement': ['r1', 'r2', 'r3'], 'alternate_name': ['a1', 'a2', 'a3'] } expected = pd.DataFrame({'last_name': ['l1', 'l2', 'l3'], 'middle_name': ['m1', 'm2', 'm3']}) comparable.data_frame = pd.DataFrame(data) data_importer.extract_not_checked_column(comparable) assert expected.equals(comparable.not_checked_column)
def test_validate_index_identity(self): comparable_a = Compare() comparable_b = Compare() comparable_a.header = TestIndexValidator.mock_header comparable_a.index_column_name = TestIndexValidator.mock_index comparable_b.header = TestIndexValidator.mock_header comparable_b.index_column_name = TestIndexValidator.mock_index data = TestIndexValidator.mock_data_a comparable_a.data_frame = pd.DataFrame(data) comparable_b.data_frame = pd.DataFrame(data) assert cell_comparator.validate_index_identity(comparable_a, comparable_b) is None
def test_sort_index(self, mock_index): comparable = Compare() comparable.index_column_name = mock_index d = {'id': ['1', '2', 'str', 'abc', 'abc']} comparable.data_frame = pd.DataFrame(data=d, dtype="object") index_validator.sort_index(comparable) expected = ['1', '2', 'abc', 'abc', 'str'] assert expected == list(comparable.data_frame['id'])
def test_strip_index(self, mock_df, mock_index): comparable = Compare() comparable.data_frame = mock_df comparable.index_column_name = mock_index index_validator.strip_index(comparable) expected = [1, 2, 'str', 'abc', 'abc'] assert expected == list(comparable.data_frame["id"])
def test_check_for_duplicate_index(self, mock_index): comparable = Compare() comparable.index_column_name = mock_index d = {'id': ['dup1', '1', '2', 'dup1', '3', '4']} comparable.data_frame = pd.DataFrame(data=d, dtype="object") index_validator.check_for_duplicate_index(comparable) actual = list(comparable.duplicate_index['id'].values) expected = ['dup1', 'dup1'] assert expected == actual
def test_check_for_empty_index(self, mock_index): comparable = Compare() comparable.index_column_name = mock_index d = {'id': ['', '', 'str', 'abc', 'abc', '', None, np.nan]} comparable.data_frame = pd.DataFrame(data=d, dtype="object") index_validator.check_for_empty_index(comparable) expected = [0, 1, 5, 6, 7] actual = comparable.empty_index assert expected == actual
def test_check_for_disjunctive_index(self, mock_index): comparable_1 = Compare() comparable_2 = Compare() comparable_1.index_column_name = mock_index comparable_2.index_column_name = mock_index data1 = { 'id': ['a', 'b', '1', '2', 'c'], "fname": ['p1', 'q1', 'r1', 's1', 'j1'] } data2 = { 'id': ['a', 'b', '3', '4', 'c'], "fname": ['p2', 'q2', 'r2', 's2', 'j2'] } comparable_1.data_frame = pd.DataFrame(data=data1, dtype="object") comparable_2.data_frame = pd.DataFrame(data=data2, dtype="object") index_validator.check_for_disjunctive_index(comparable_1, comparable_2) expected = ['1', '2'] actual = list(comparable_1.disjunctive_index['id'].values) assert expected == actual
def test_drop_empty_index(self, mock_index): comparable = Compare() comparable.index_column_name = mock_index d = {'id': ['', '', 'str', 'abc', 'abc', '', None]} comparable.data_frame = pd.DataFrame(data=d, dtype="object") comparable.empty_index = [0, 1, 5, 6] index_validator.drop_empty_index(comparable) expected = ['str', 'abc', 'abc'] actual = list(comparable.data_frame["id"]) assert expected == actual
def test_drop_duplicate_index(self, mock_index): comparable = Compare() comparable.index_column_name = mock_index d = {'id': ['dup1', 'dup1', 'dup1', 'AAA', 'BBB']} dup = {'id': ['dup1', 'dup1', 'dup1']} comparable.data_frame = pd.DataFrame(data=d, dtype="object") comparable.duplicate_index = pd.DataFrame(data=dup, dtype="object") index_validator.drop_duplicate_index(comparable) expected = ['AAA', 'BBB'] actual = list(comparable.data_frame['id'].values) assert expected == actual
def test_set_index_column_location_when_multiple_index_exist(self): comparable = Compare() comparable.index_column_name = [{"column_name": "id"}] comparable.header = [{ "column_name": "id", "column_location": 1 }, { "column_name": "id", "column_location": 2 }] with pytest.raises(AppErrorHandler): header_validator.set_index_column_location(comparable)
def test_drop_disjunctive_index(self, mock_index): comparable = Compare() comparable.index_column_name = mock_index data1 = { 'id': ['1', '2', 'a', 'b', 'c'], "fname": ['p1', 'q1', 'r1', 's1', 'j1'] } comparable.data_frame = pd.DataFrame(data=data1, dtype="object") disjunctive = {'id': ['1', '2']} comparable.disjunctive_index = pd.DataFrame(data=disjunctive, dtype="object") index_validator.drop_disjunctive_index(comparable) expected = ['a', 'b', 'c'] actual = list(comparable.data_frame['id'].values) assert expected == actual
def test_remove_white_space_char(self): comparable = Compare() comparable.header = TestIndexValidator.mock_header comparable.index_column_name = TestIndexValidator.mock_index comparable.data_frame = pd.DataFrame(TestIndexValidator.mock_data_white_space_char) cell_comparator.remove_non_printable_char(comparable) expected = pd.DataFrame({ 'id': [1, 2, 3], 'first_name': ['f1', 'f 2', 'f3'], 'last_name': ['l1', 'l2', 'l3'], 'middle_name': ['m1', 'm2', 'm3'], 'requirement': ['r1', 'r 2', 'r 3'], 'alternate_name': ['a 1', 'a 2', 'a 3'] }) cell_comparator.remove_white_space_char(comparable) assert expected.equals(comparable.data_frame)
def test_strip_index_2(self, mock_index): comparable = Compare() data1 = pd.DataFrame({ 'id': [ ' 1 ', '2 ', ' a \n\n', '\t\t\t b \n', ' c ' ], "fname": ['p1', 'q1', 'r1', 's1', 'j1'] }) expected = pd.DataFrame({ 'id': ['1', '2', 'a', 'b', 'c'], "fname": ['p1', 'q1', 'r1', 's1', 'j1'] }) comparable.index_column_name = mock_index comparable.data_frame = data1 index_validator.strip_index(comparable) assert expected.equals(data1)
def test_get_index_name(self, mock_index): comparable = Compare() comparable.index_column_name = mock_index assert index_validator.get_index_name(comparable) == 'id'
def test_create_the_entire_excel_file(self): comparable_1 = Compare() comparable_2 = Compare() comparable_1.order = 0 comparable_1.start_column = 1 comparable_1.end_column = 12 comparable_1.index_column_start = 1 comparable_1.index_column_end = 1 comparable_1.checked_column_start = 2 comparable_1.checked_column_end = 3 comparable_1.not_checked_column_start = 4 comparable_1.not_checked_column_end = 5 comparable_1.disjunctive_column_start = 6 comparable_1.disjunctive_column_end = 7 comparable_1.duplicate_column_start = 8 comparable_1.duplicate_column_end = 9 comparable_1.unnamed_column_start = 10 comparable_1.unnamed_column_end = 11 comparable_1.file_name = "/path/to.my/file/my_first_file___1.csv" comparable_1.header = [ { "column_name": "id", "column_location": 1, "column_type": "index" }, { "column_name": "first_name", "column_location": 2, "column_type": "not_checked" }, { "column_name": "last_name(@duplicate)", "column_location": 3, "column_type": "duplicate" }, { "column_name": "last_name(@duplicate)", "column_location": 4, "column_type": "duplicate" }, { "column_name": "requirement1", "column_location": 5, "column_type": "mapped" }, { "column_name": "requirement2", "column_location": 6, "column_type": "mapped" }, { "column_name": "alternate_name", "column_location": 7, "column_type": "not_checked" }, { "column_name": "@unnamed", "column_location": 8, "column_type": "unnamed" }, { "column_name": "@unnamed", "column_location": 9, "column_type": "unnamed" }, { "column_name": "some_name(@NotFound)", "column_location": 10, "column_type": "disjunctive" }, { "column_name": "some_name(@NotFound)", "column_location": 11, "column_type": "disjunctive" }, ] comparable_1.index_column_name = [{ "column_name": "id", "column_location": 1, "column_type": "" }] comparable_2.header = [ { "column_name": "id", "column_location": 1, "column_type": "index" }, { "column_name": "first_name", "column_location": 2, "column_type": "not_checked" }, { "column_name": "last_name(@duplicate.4)", "column_location": 4, "column_type": "duplicate" }, { "column_name": "last_name(@duplicate.3)", "column_location": 3, "column_type": "duplicate" }, { "column_name": "requirement1", "column_location": 5, "column_type": "mapped" }, { "column_name": "requirement2", "column_location": 6, "column_type": "mapped" }, { "column_name": "alternate_name", "column_location": 7, "column_type": "not_checked" }, { "column_name": "@unnamed.8", "column_location": 8, "column_type": "unnamed" }, { "column_name": "@unnamed.9", "column_location": 9, "column_type": "unnamed" }, { "column_name": "some_name(@NotFound.10)", "column_location": 10, "column_type": "disjunctive" }, { "column_name": "some_name(@NotFound.11)", "column_location": 11, "column_type": "disjunctive" }, ] comparable_2.index_column_name = [{ "column_name": "id", "column_location": 1, "column_type": "" }] comparable_2.order = 1 comparable_2.start_column = 12 comparable_2.end_column = 22 comparable_2.index_column_start = 12 comparable_2.index_column_end = 12 comparable_2.checked_column_start = 13 comparable_2.checked_column_end = 14 comparable_2.not_checked_column_start = 15 comparable_2.not_checked_column_end = 16 comparable_2.disjunctive_column_start = 17 comparable_2.disjunctive_column_end = 18 comparable_2.duplicate_column_start = 19 comparable_2.duplicate_column_end = 20 comparable_2.unnamed_column_start = 21 comparable_2.unnamed_column_end = 22 comparable_2.hide_duplicate_columns = True comparable_2.hide_unnamed_columns = True comparable_2.hide_disjunctive_columns = True comparable_2.file_name = "/path/to.my/file/my_second_file__2.csv" Compare.output_path = "./" Compare.worksheet_name = "Master" data_exporter.create_name_for_output_file(comparable_1, comparable_2) data_exporter.create_excel_workbook() data_exporter.create_excel_worksheet() data_exporter.add_local_excel_format([comparable_1, comparable_2]) data_exporter.apply_column_general_format([comparable_1, comparable_2]) data_exporter.write_file_name_label() data_exporter.write_file_name_title([comparable_1, comparable_2]) data_exporter.write_column_type_label() data_exporter.write_index_column_type_title( [comparable_1, comparable_2]) data_exporter.write_checked_column_type_title( [comparable_1, comparable_2]) data_exporter.write_not_checked_column_type_title( [comparable_1, comparable_2]) data_exporter.write_disjunctive_column_type_title( [comparable_1, comparable_2]) data_exporter.write_duplicate_column_type_title( [comparable_1, comparable_2]) data_exporter.write_unnamed_column_type_title( [comparable_1, comparable_2]) data_exporter.apply_checked_column_hide_condition( [comparable_1, comparable_2]) data_exporter.apply_not_checked_column_hide_condition( [comparable_1, comparable_2]) data_exporter.apply_disjunctive_column_hide_condition( [comparable_1, comparable_2]) data_exporter.apply_duplicate_column_hide_condition( [comparable_1, comparable_2]) data_exporter.apply_unnamed_column_hide_condition( [comparable_1, comparable_2]) data_exporter.write_column_name_label() data_exporter.write_index_column_name([comparable_1, comparable_2]) data_exporter.write_checked_column_name([comparable_1, comparable_2]) data_exporter.write_not_checked_column_name( [comparable_1, comparable_2]) data_exporter.write_disjunctive_column_name( [comparable_1, comparable_2]) data_exporter.write_duplicate_column_name([comparable_1, comparable_2]) data_exporter.write_unnamed_column_name([comparable_1, comparable_2]) data_exporter.close_excel_workbook()
def test_set_index_column_location(self): comparable = Compare() comparable.index_column_name = [{"column_name": "id"}] comparable.header = [{"column_name": "id", "column_location": 1}] assert header_validator.set_index_column_location(comparable) is None
def test_check_for_index_name_existence_when_index_does_not_exist(self): comparable = Compare() comparable.index_column_name = [{"column_name": "id"}] comparable.header = [{"column_name": "id1", "column_location": 1}] with pytest.raises(AppErrorHandler): header_validator.check_for_index_name_existence(comparable)
def test_check_for_index_name_existence(self): comparable = Compare() comparable.index_column_name = [{"column_name": "id"}] comparable.header = [{"column_name": "id", "column_location": 1}] assert header_validator.check_for_index_name_existence( comparable) is None