Beispiel #1
0
    def test_set_map_column_number(self):
        comparable = Compare(order=1)
        comparable.header = [{
            "column_name": "id",
            "column_location": 1,
            "column_type": ""
        }, {
            "column_name": "first_name",
            "column_location": 2,
            "column_type": "mapped"
        }, {
            "column_name": "last_name",
            "column_location": 3,
            "column_type": "duplicate"
        }, {
            "column_name": "last_name",
            "column_location": 4,
            "column_type": "duplicate"
        }, {
            "column_name": "requirement",
            "column_location": 5,
            "column_type": "mapped"
        }, {
            "column_name": "alternate_name",
            "column_location": 6,
            "column_type": ""
        }]
        comparable.index_column_name = [{
            "column_name": "id",
            "column_location": 1,
            "column_type": ""
        }]

        header_validator.set_map_column_number(comparable)
        assert 2 == comparable.number_of_mapped_columns
    def test_extract_not_checked_column(self):
        comparable = Compare()
        comparable.header = [
            {"column_name": "id", "column_location": 1, "column_type": ""},
            {"column_name": "first_name", "column_location": 2, "column_type": ""},
            {"column_name": "last_name", "column_location": 3, "column_type": "not_checked"},
            {"column_name": "middle_name", "column_location": 3, "column_type": "not_checked"},
            {"column_name": "requirement", "column_location": 5, "column_type": "mapped"},
            {"column_name": "alternate_name", "column_location": 6, "column_type": ""}]
        comparable.index_column_name = [
            {"column_name": "id", "column_location": 1}]
        data = {
            'id': [1, 2, 3],
            'first_name': ['f1', 'f2', 'f3'],
            'last_name': ['l1', 'l2', 'l3'],
            'middle_name': ['m1', 'm2', 'm3'],
            'requirement': ['r1', 'r2', 'r3'],
            'alternate_name': ['a1', 'a2', 'a3']
        }
        expected = pd.DataFrame({'last_name': ['l1', 'l2', 'l3'],
                                 'middle_name': ['m1', 'm2', 'm3']})

        comparable.data_frame = pd.DataFrame(data)
        data_importer.extract_not_checked_column(comparable)
        assert expected.equals(comparable.not_checked_column)
Beispiel #3
0
    def test_format_index_column(self):
        comparable = Compare(order=0)
        comparable.header = [{
            "column_name": "id",
            "column_location": 1,
            "column_type": ""
        }, {
            "column_name": "first_name",
            "column_location": 2,
            "column_type": "mapped"
        }, {
            "column_name": "last_name",
            "column_location": 3,
            "column_type": "duplicate"
        }, {
            "column_name": "last_name",
            "column_location": 4,
            "column_type": "duplicate"
        }, {
            "column_name": "requirement",
            "column_location": 5,
            "column_type": "mapped"
        }, {
            "column_name": "alternate_name",
            "column_location": 6,
            "column_type": ""
        }]
        comparable.index_column_name = [{
            "column_name": "id",
            "column_location": 1,
            "column_type": ""
        }]

        header_validator.format_index_column(comparable)
        assert comparable.header[0]['column_type'] == "index"
Beispiel #4
0
 def test_set_start_end_index_column(self):
     comparable = Compare(order=0)
     comparable.start_column = 1
     comparable.number_of_index_column = 1
     header_validator.set_start_end_index_column(comparable)
     assert 1 == comparable.index_column_start
     assert 1 == comparable.index_column_end
 def test_check_for_delimiter(self):
     comparable = Compare()
     comparable.file_name = os.path.join(self.MOCK_DATA_DIR,
                                         "simple_csv_file.csv")
     comparable.encoding = "utf-8"
     comparable.delimiter = ","
     assert input_validator.check_for_delimiter(comparable) is None
 def test_check_for_input_file_existence_when_file_exist(self, mock_isfile):
     comparator_obj = Compare()
     comparator_obj.file_name = mock_input_dict.parameter_for_input_file_existence[
         "file_1_name"]
     mock_isfile.return_value = True
     assert input_validator.check_for_input_file_existence(
         comparator_obj) is None
Beispiel #7
0
 def test_set_map_column_location(self):
     Compare.map_columns = [(
         {
             "column_name": "first_name"
         },
         {
             "column_name": "first_name"
         },
     ), (
         {
             "column_name": "last_name"
         },
         {
             "column_name": "last_name"
         },
     )]
     comparable = Compare(order=0)
     comparable.header = [{
         "column_name": "first_name",
         "column_location": 2
     }, {
         "column_name": "last_name",
         "column_location": 3
     }]
     header_validator.set_map_column_location(comparable)
     assert comparable.header[0] == Compare.map_columns[0][0]
     assert comparable.header[1] == Compare.map_columns[1][0]
 def test_stringify_index(self, mock_df, mock_index):
     comparable = Compare()
     comparable.data_frame = mock_df
     comparable.index_column_name = mock_index
     index_validator.stringify_index(comparable)
     for val in comparable.data_frame['id']:
         assert isinstance(val, str)
Beispiel #9
0
 def test_set_map_column_location_when_not_exist(self):
     Compare.map_columns = [(
         {
             "column_name": "first_name"
         },
         {
             "column_name": "first_name"
         },
     ), (
         {
             "column_name": "last_name"
         },
         {
             "column_name": "last_name"
         },
     )]
     comparable = Compare(order=0)
     comparable.header = [{
         "column_name": "first_name1",
         "column_location": 2
     }, {
         "column_name": "last_name1",
         "column_location": 3
     }]
     with pytest.raises(AppErrorHandler):
         header_validator.set_map_column_location(comparable)
 def test_check_for_non_comma_delimiter(self):
     comparable = Compare()
     comparable.file_name = os.path.join(self.MOCK_DATA_DIR,
                                         "none_csv_file.txt")
     comparable.encoding = "utf-8"
     comparable.delimiter = ","
     with pytest.raises(AppErrorHandler):
         input_validator.check_for_delimiter(comparable)
 def test_freeze_pandas_index(self):
     comparable = Compare()
     df = pd.DataFrame({'a': ['x', 'y', 'z']})
     expected = pd.DataFrame(
         {'a': ['x', 'y', 'z'], Field.pandas_original_index.value: [0, 1, 2]})
     comparable.original_data_frame = df
     data_importer.freeze_pandas_index(comparable)
     assert df.equals(expected)
    def test_strip_index(self, mock_df, mock_index):
        comparable = Compare()
        comparable.data_frame = mock_df
        comparable.index_column_name = mock_index

        index_validator.strip_index(comparable)
        expected = [1, 2, 'str', 'abc', 'abc']
        assert expected == list(comparable.data_frame["id"])
 def test_sort_index(self, mock_index):
     comparable = Compare()
     comparable.index_column_name = mock_index
     d = {'id': ['1', '2', 'str', 'abc', 'abc']}
     comparable.data_frame = pd.DataFrame(data=d, dtype="object")
     index_validator.sort_index(comparable)
     expected = ['1', '2', 'abc', 'abc', 'str']
     assert expected == list(comparable.data_frame['id'])
 def test_check_for_duplicate_index(self, mock_index):
     comparable = Compare()
     comparable.index_column_name = mock_index
     d = {'id': ['dup1', '1', '2', 'dup1', '3', '4']}
     comparable.data_frame = pd.DataFrame(data=d, dtype="object")
     index_validator.check_for_duplicate_index(comparable)
     actual = list(comparable.duplicate_index['id'].values)
     expected = ['dup1', 'dup1']
     assert expected == actual
 def test_check_for_empty_index(self, mock_index):
     comparable = Compare()
     comparable.index_column_name = mock_index
     d = {'id': ['', '', 'str', 'abc', 'abc', '', None, np.nan]}
     comparable.data_frame = pd.DataFrame(data=d, dtype="object")
     index_validator.check_for_empty_index(comparable)
     expected = [0, 1, 5, 6, 7]
     actual = comparable.empty_index
     assert expected == actual
 def test_check_for_output_file_write_access_when_file_is_not_writeable(
         self, mock_os_access):
     comparator_obj = Compare()
     comparator_obj.file_name = mock_input_dict.parameter_for_input_file_existence[
         "file_1_name"]
     mock_os_access.return_value = False
     with pytest.raises(AppErrorHandler):
         input_validator.check_for_file_write_access(comparator_obj)
         mock_os_access.assert_called_once_with(
             f'os.access({comparator_obj}, os.W_OK)')
 def test_get_file_encoding_for_failing_encoding_cases(self):
     failing_encoding_names = self.create_mock_data_file_for_encoding_types(
         "failing_encoding_names.csv")
     comparator_obj = Compare()
     for encoding_val in failing_encoding_names:
         with pytest.raises(AppErrorHandler):
             mock_file_path = os.path.join(self.MOCK_DATA_DIR,
                                           f'{encoding_val}.csv')
             comparator_obj.file_name = mock_file_path
             input_validator.set_file_encoding(comparator_obj)
 def test_drop_empty_index(self, mock_index):
     comparable = Compare()
     comparable.index_column_name = mock_index
     d = {'id': ['', '', 'str', 'abc', 'abc', '', None]}
     comparable.data_frame = pd.DataFrame(data=d, dtype="object")
     comparable.empty_index = [0, 1, 5, 6]
     index_validator.drop_empty_index(comparable)
     expected = ['str', 'abc', 'abc']
     actual = list(comparable.data_frame["id"])
     assert expected == actual
 def test_check_for_input_file_existence_when_file_does_not_exist(
         self, mock_isfile):
     comparator_obj = Compare()
     comparator_obj.file_name = mock_input_dict.parameter_for_input_file_existence[
         "file_1_name"]
     mock_isfile.return_value = False
     with pytest.raises(AppErrorHandler):
         input_validator.check_for_input_file_existence(comparator_obj)
         mock_isfile.assert_called_once_with(
             f'os.path.isfile({comparator_obj})')
Beispiel #20
0
def set_start_end_checked_column(comparable: Compare):
    if comparable.number_of_regular_columns + comparable.number_of_mapped_columns > 0:
        comparable.checked_column_start = \
            comparable.start_column \
            + comparable.number_of_index_column + 1 - 1

        comparable.checked_column_end = \
            comparable.checked_column_start \
            + comparable.number_of_regular_columns \
            + comparable.number_of_mapped_columns \
            - 1
 def test_verify_file_encoding_when_can_not_detect_encoding(self):
     not_able_to_detect_encoding_names = self.create_mock_data_file_for_encoding_types(
         "failing_encoding_names.csv")
     comparator_obj = Compare()
     for encoding_val in not_able_to_detect_encoding_names:
         with pytest.raises(AppErrorHandler):
             mock_file_path = os.path.join(self.MOCK_DATA_DIR,
                                           f'{encoding_val}.csv')
             comparator_obj.file_name = mock_file_path
             comparator_obj.encoding = 'ascii'
             input_validator.check_for_file_encoding(comparator_obj)
 def test_verify_file_encoding_when_can_detect_encoding(self):
     not_able_to_detect_encoding_names = self.create_mock_data_file_for_encoding_types(
         "failing_encoding_names.csv")
     comparator_obj = Compare()
     for encoding_val in not_able_to_detect_encoding_names:
         mock_file_path = os.path.join(self.MOCK_DATA_DIR,
                                       f'{encoding_val}.csv')
         comparator_obj.file_name = mock_file_path
         comparator_obj.encoding = encoding_val
         assert input_validator.check_for_file_encoding(
             comparator_obj) is None
 def test_drop_duplicate_index(self, mock_index):
     comparable = Compare()
     comparable.index_column_name = mock_index
     d = {'id': ['dup1', 'dup1', 'dup1', 'AAA', 'BBB']}
     dup = {'id': ['dup1', 'dup1', 'dup1']}
     comparable.data_frame = pd.DataFrame(data=d, dtype="object")
     comparable.duplicate_index = pd.DataFrame(data=dup, dtype="object")
     index_validator.drop_duplicate_index(comparable)
     expected = ['AAA', 'BBB']
     actual = list(comparable.data_frame['id'].values)
     assert expected == actual
    def comparable(self, request):
        f_path, f_name = os.path.split(os.path.abspath(__file__))
        mock_data_dir = os.path.join(f_path, os.pardir, 'mock_data')
        comparable_obj = Compare()
        for name in request.param:
            setattr(comparable_obj, name, request.param[name])

        if "file_name" in request.param:
            comparable_obj.file_name = os.path.join(mock_data_dir, f'{request.param["file_name"]}')

        return comparable_obj
Beispiel #25
0
def set_start_column(comparable_a: Compare, comparable_b: Compare):
    comparable_a.start_column = 1  # not supporting multi index
    comparable_b.start_column = \
        comparable_a.number_of_index_column \
        + comparable_a.number_of_unnamed_columns \
        + comparable_a.number_of_duplicate_columns \
        + comparable_a.number_of_disjunctive_columns \
        + comparable_a.number_of_not_checked_columns \
        + comparable_a.number_of_mapped_columns \
        + comparable_a.number_of_regular_columns \
        + 1  # The first column is for labels and xlxwriter is 0 based
Beispiel #26
0
 def test_set_index_column_location_when_multiple_index_exist(self):
     comparable = Compare()
     comparable.index_column_name = [{"column_name": "id"}]
     comparable.header = [{
         "column_name": "id",
         "column_location": 1
     }, {
         "column_name": "id",
         "column_location": 2
     }]
     with pytest.raises(AppErrorHandler):
         header_validator.set_index_column_location(comparable)
Beispiel #27
0
 def test_set_start_column(self):
     comparable_a = Compare(order=0)
     comparable_b = Compare(order=1)
     comparable_a.number_of_index_column = 1  # not supporting multi index
     comparable_a.number_of_unnamed_columns = 2
     comparable_a.number_of_duplicate_columns = 2
     comparable_a.number_of_disjunctive_columns = 2
     comparable_a.number_of_not_checked_columns = 2
     comparable_a.number_of_mapped_columns = 2
     comparable_a.number_of_regular_columns = 2
     header_validator.set_start_column(comparable_a, comparable_b)
     assert 14 == comparable_b.start_column
     assert 1 == comparable_a.start_column
Beispiel #28
0
    def test_set_start_end_unnamed_column_2(self):
        comparable = Compare(order=0)
        comparable.start_column = 14
        comparable.number_of_index_column = 1

        comparable.number_of_mapped_columns = 2
        comparable.number_of_regular_columns = 2
        comparable.number_of_not_checked_columns = 2
        comparable.number_of_disjunctive_columns = 2
        comparable.number_of_duplicate_columns = 2
        comparable.number_of_unnamed_columns = 2
        header_validator.set_start_end_unnamed_column(comparable)
        assert 25 == comparable.unnamed_column_start
        assert 26 == comparable.unnamed_column_end
Beispiel #29
0
def set_start_end_duplicate_column(comparable: Compare):
    if comparable.number_of_duplicate_columns > 0:
        comparable.duplicate_column_start = \
            comparable.start_column \
            + comparable.number_of_index_column \
            + comparable.number_of_regular_columns \
            + comparable.number_of_mapped_columns \
            + comparable.number_of_not_checked_columns \
            + comparable.number_of_disjunctive_columns \
            + 1 - 1

        comparable.duplicate_column_end = \
            comparable.duplicate_column_start \
            + comparable.number_of_duplicate_columns \
            - 1
def add_local_excel_format(comparable: Compare):
    comparable.column_general_format = comparable.workbook.add_format(
        excel_schema.general_column_format[comparable.order])

    comparable.header_format_left_border = comparable.workbook.add_format(
        excel_schema.header_format_left_border[comparable.order])

    comparable.header_format_left_bottom_border = comparable.workbook.add_format(
        excel_schema.header_format_left_bottom_border[comparable.order])

    comparable.header_format_bottom_border = comparable.workbook.add_format(
        excel_schema.header_format_bottom_border[comparable.order])

    comparable.header_format = comparable.workbook.add_format(
        excel_schema.header_format[comparable.order])