Beispiel #1
0
    def test_match_columns(self):
        raw = [
            {
                'first name': 'Mary',
                'LASTNAME': 'Nichols',
                'Middle__Name': 'D'
            },
            {
                'first name': 'Lucy',
                'LASTNAME': 'Peterson',
                'Middle__Name': 'S'
            },
        ]
        tbl = Table(raw)

        desired_raw = [
            {
                'first_name': 'Mary',
                'middle_name': 'D',
                'last_name': 'Nichols'
            },
            {
                'first_name': 'Lucy',
                'middle_name': 'S',
                'last_name': 'Peterson'
            },
        ]
        desired_tbl = Table(desired_raw)

        # Test with fuzzy matching
        tbl.match_columns(desired_tbl.columns)
        assert_matching_tables(desired_tbl, tbl)

        # Test disable fuzzy matching, and fail due due to the missing cols
        self.assertRaises(TypeError,
                          Table(raw).match_columns,
                          desired_tbl.columns,
                          fuzzy_match=False,
                          if_missing_columns='fail')

        # Test disable fuzzy matching, and fail due to the extra cols
        self.assertRaises(TypeError,
                          Table(raw).match_columns,
                          desired_tbl.columns,
                          fuzzy_match=False,
                          if_extra_columns='fail')

        # Test table that already has the right columns, shouldn't need fuzzy match
        tbl = Table(desired_raw)
        tbl.match_columns(desired_tbl.columns,
                          fuzzy_match=False,
                          if_missing_columns='fail',
                          if_extra_columns='fail')
        assert_matching_tables(desired_tbl, tbl)

        # Test table with missing col, verify the missing col gets added by default
        tbl = Table([
            {
                'first name': 'Mary',
                'LASTNAME': 'Nichols'
            },
            {
                'first name': 'Lucy',
                'LASTNAME': 'Peterson'
            },
        ])
        tbl.match_columns(desired_tbl.columns)
        desired_tbl = (
            Table(desired_raw).remove_column('middle_name').add_column(
                'middle_name', index=1))
        assert_matching_tables(desired_tbl, tbl)

        # Test table with extra col, verify the extra col gets removed by default
        tbl = Table([
            {
                'first name': 'Mary',
                'LASTNAME': 'Nichols',
                'Age': 32,
                'Middle__Name': 'D'
            },
            {
                'first name': 'Lucy',
                'LASTNAME': 'Peterson',
                'Age': 26,
                'Middle__Name': 'S'
            },
        ])
        desired_tbl = Table(desired_raw)
        tbl.match_columns(desired_tbl.columns)
        assert_matching_tables(desired_tbl, tbl)

        # Test table with two columns that normalize the same and aren't in desired cols, verify
        # they both get removed.
        tbl = Table([
            {
                'first name': 'Mary',
                'LASTNAME': 'Nichols',
                'Age': 32,
                'Middle__Name': 'D',
                'AGE': None
            },
            {
                'first name': 'Lucy',
                'LASTNAME': 'Peterson',
                'Age': 26,
                'Middle__Name': 'S',
                'AGE': None
            },
        ])
        tbl.match_columns(desired_tbl.columns)
        assert_matching_tables(desired_tbl, tbl)

        # Test table with two columns that match desired cols, verify only the first gets kept.
        tbl = Table([
            {
                'first name': 'Mary',
                'LASTNAME': 'Nichols',
                'First Name': None,
                'Middle__Name': 'D'
            },
            {
                'first name': 'Lucy',
                'LASTNAME': 'Peterson',
                'First Name': None,
                'Middle__Name': 'S'
            },
        ])
        tbl.match_columns(desired_tbl.columns)
        assert_matching_tables(desired_tbl, tbl)