def drop_duplicate_column(comparable: Compare):
    header_duplicate = [
        header[Field.column_name.value] for header in comparable.header
        if header[Field.column_type.value] == Field.duplicate.value
    ]
    comparable.data_frame = comparable.data_frame.drop(
        columns=header_duplicate)
def drop_not_checked_column(comparable: Compare):
    header_not_checked = [
        header[Field.column_name.value] for header in comparable.header
        if header[Field.column_type.value] == Field.not_checked.value
    ]
    comparable.data_frame = comparable.data_frame.drop(
        columns=header_not_checked)
    def test_extract_not_checked_column(self):
        comparable = Compare()
        comparable.header = [
            {"column_name": "id", "column_location": 1, "column_type": ""},
            {"column_name": "first_name", "column_location": 2, "column_type": ""},
            {"column_name": "last_name", "column_location": 3, "column_type": "not_checked"},
            {"column_name": "middle_name", "column_location": 3, "column_type": "not_checked"},
            {"column_name": "requirement", "column_location": 5, "column_type": "mapped"},
            {"column_name": "alternate_name", "column_location": 6, "column_type": ""}]
        comparable.index_column_name = [
            {"column_name": "id", "column_location": 1}]
        data = {
            'id': [1, 2, 3],
            'first_name': ['f1', 'f2', 'f3'],
            'last_name': ['l1', 'l2', 'l3'],
            'middle_name': ['m1', 'm2', 'm3'],
            'requirement': ['r1', 'r2', 'r3'],
            'alternate_name': ['a1', 'a2', 'a3']
        }
        expected = pd.DataFrame({'last_name': ['l1', 'l2', 'l3'],
                                 'middle_name': ['m1', 'm2', 'm3']})

        comparable.data_frame = pd.DataFrame(data)
        data_importer.extract_not_checked_column(comparable)
        assert expected.equals(comparable.not_checked_column)
Beispiel #4
0
def drop_disjunctive_column(comparable: Compare):
    header_disjunctive = [
        header[Field.column_name.value] for header in comparable.header
        if header[Field.column_type.value] == Field.disjunctive.value
    ]
    comparable.data_frame = comparable.data_frame.drop(
        columns=header_disjunctive)
 def test_stringify_index(self, mock_df, mock_index):
     comparable = Compare()
     comparable.data_frame = mock_df
     comparable.index_column_name = mock_index
     index_validator.stringify_index(comparable)
     for val in comparable.data_frame['id']:
         assert isinstance(val, str)
    def test_validate_index_identity(self):
        comparable_a = Compare()
        comparable_b = Compare()

        comparable_a.header = TestIndexValidator.mock_header
        comparable_a.index_column_name = TestIndexValidator.mock_index

        comparable_b.header = TestIndexValidator.mock_header
        comparable_b.index_column_name = TestIndexValidator.mock_index

        data = TestIndexValidator.mock_data_a

        comparable_a.data_frame = pd.DataFrame(data)
        comparable_b.data_frame = pd.DataFrame(data)

        assert cell_comparator.validate_index_identity(comparable_a, comparable_b) is None
 def test_sort_index(self, mock_index):
     comparable = Compare()
     comparable.index_column_name = mock_index
     d = {'id': ['1', '2', 'str', 'abc', 'abc']}
     comparable.data_frame = pd.DataFrame(data=d, dtype="object")
     index_validator.sort_index(comparable)
     expected = ['1', '2', 'abc', 'abc', 'str']
     assert expected == list(comparable.data_frame['id'])
    def test_strip_index(self, mock_df, mock_index):
        comparable = Compare()
        comparable.data_frame = mock_df
        comparable.index_column_name = mock_index

        index_validator.strip_index(comparable)
        expected = [1, 2, 'str', 'abc', 'abc']
        assert expected == list(comparable.data_frame["id"])
 def test_check_for_duplicate_index(self, mock_index):
     comparable = Compare()
     comparable.index_column_name = mock_index
     d = {'id': ['dup1', '1', '2', 'dup1', '3', '4']}
     comparable.data_frame = pd.DataFrame(data=d, dtype="object")
     index_validator.check_for_duplicate_index(comparable)
     actual = list(comparable.duplicate_index['id'].values)
     expected = ['dup1', 'dup1']
     assert expected == actual
 def test_check_for_empty_index(self, mock_index):
     comparable = Compare()
     comparable.index_column_name = mock_index
     d = {'id': ['', '', 'str', 'abc', 'abc', '', None, np.nan]}
     comparable.data_frame = pd.DataFrame(data=d, dtype="object")
     index_validator.check_for_empty_index(comparable)
     expected = [0, 1, 5, 6, 7]
     actual = comparable.empty_index
     assert expected == actual
 def test_drop_empty_index(self, mock_index):
     comparable = Compare()
     comparable.index_column_name = mock_index
     d = {'id': ['', '', 'str', 'abc', 'abc', '', None]}
     comparable.data_frame = pd.DataFrame(data=d, dtype="object")
     comparable.empty_index = [0, 1, 5, 6]
     index_validator.drop_empty_index(comparable)
     expected = ['str', 'abc', 'abc']
     actual = list(comparable.data_frame["id"])
     assert expected == actual
 def test_check_for_disjunctive_index(self, mock_index):
     comparable_1 = Compare()
     comparable_2 = Compare()
     comparable_1.index_column_name = mock_index
     comparable_2.index_column_name = mock_index
     data1 = {
         'id': ['a', 'b', '1', '2', 'c'],
         "fname": ['p1', 'q1', 'r1', 's1', 'j1']
     }
     data2 = {
         'id': ['a', 'b', '3', '4', 'c'],
         "fname": ['p2', 'q2', 'r2', 's2', 'j2']
     }
     comparable_1.data_frame = pd.DataFrame(data=data1, dtype="object")
     comparable_2.data_frame = pd.DataFrame(data=data2, dtype="object")
     index_validator.check_for_disjunctive_index(comparable_1, comparable_2)
     expected = ['1', '2']
     actual = list(comparable_1.disjunctive_index['id'].values)
     assert expected == actual
 def test_drop_duplicate_index(self, mock_index):
     comparable = Compare()
     comparable.index_column_name = mock_index
     d = {'id': ['dup1', 'dup1', 'dup1', 'AAA', 'BBB']}
     dup = {'id': ['dup1', 'dup1', 'dup1']}
     comparable.data_frame = pd.DataFrame(data=d, dtype="object")
     comparable.duplicate_index = pd.DataFrame(data=dup, dtype="object")
     index_validator.drop_duplicate_index(comparable)
     expected = ['AAA', 'BBB']
     actual = list(comparable.data_frame['id'].values)
     assert expected == actual
 def test_drop_disjunctive_index(self, mock_index):
     comparable = Compare()
     comparable.index_column_name = mock_index
     data1 = {
         'id': ['1', '2', 'a', 'b', 'c'],
         "fname": ['p1', 'q1', 'r1', 's1', 'j1']
     }
     comparable.data_frame = pd.DataFrame(data=data1, dtype="object")
     disjunctive = {'id': ['1', '2']}
     comparable.disjunctive_index = pd.DataFrame(data=disjunctive,
                                                 dtype="object")
     index_validator.drop_disjunctive_index(comparable)
     expected = ['a', 'b', 'c']
     actual = list(comparable.data_frame['id'].values)
     assert expected == actual
 def test_strip_index_2(self, mock_index):
     comparable = Compare()
     data1 = pd.DataFrame({
         'id': [
             '        1 ', '2           ', '     a \n\n',
             '\t\t\t b        \n', ' c '
         ],
         "fname": ['p1', 'q1', 'r1', 's1', 'j1']
     })
     expected = pd.DataFrame({
         'id': ['1', '2', 'a', 'b', 'c'],
         "fname": ['p1', 'q1', 'r1', 's1', 'j1']
     })
     comparable.index_column_name = mock_index
     comparable.data_frame = data1
     index_validator.strip_index(comparable)
     assert expected.equals(data1)
    def test_remove_white_space_char(self):
        comparable = Compare()
        comparable.header = TestIndexValidator.mock_header
        comparable.index_column_name = TestIndexValidator.mock_index
        comparable.data_frame = pd.DataFrame(TestIndexValidator.mock_data_white_space_char)
        cell_comparator.remove_non_printable_char(comparable)
        expected = pd.DataFrame({
            'id': [1, 2, 3],
            'first_name': ['f1', 'f 2', 'f3'],
            'last_name': ['l1', 'l2', 'l3'],
            'middle_name': ['m1', 'm2', 'm3'],
            'requirement': ['r1', 'r 2', 'r 3'],
            'alternate_name': ['a 1', 'a 2', 'a 3']
        })

        cell_comparator.remove_white_space_char(comparable)
        assert expected.equals(comparable.data_frame)
def remove_nan(comparable: Compare):
    comparable.data_frame = comparable.data_frame.fillna('')
Beispiel #18
0
def drop_empty_index(comparable: Compare):
    comparable.data_frame = comparable.data_frame.drop(comparable.empty_index)
def drop_duplicate_index(comparable: Compare):
    index_name = idx.get_index_name(comparable)
    index_of_dup = comparable.duplicate_index[index_name].index.values
    comparable.data_frame = comparable.data_frame.drop(index_of_dup)
def set_data_frame(comparable: Compare):
    comparable.data_frame = comparable.original_data_frame.copy(deep=True)
def drop_disjunctive_index(comparable: Compare):
    index_name = idx.get_index_name(comparable)
    index_of_disjunctive = comparable.disjunctive_index[
        index_name].index.values
    comparable.data_frame = comparable.data_frame.drop(index_of_disjunctive)