Esempio n. 1
0
    def test_phonetic_does_not_exist(self):

        values = pd.Series([np.nan, u'John', u'Mary Ann', u'billy',
                            u'Jonathan', u'Gretha', u'Micheal', u'Sjors'])

        with self.assertRaises(ValueError):
            phonetic(values, 'unknown_algorithm')
Esempio n. 2
0
    def test_phonetic_does_not_exist(self):

        values = pd.Series([
            np.nan, u'John', u'Mary Ann', u'billy', u'Jonathan', u'Gretha',
            u'Micheal', u'Sjors'
        ])

        with self.assertRaises(ValueError):
            phonetic(values, 'unknown_algorithm')
    def test_encode_nysiis(self):

        values = pd.Series([np.nan, u'John', u'Mary Ann', u'billy', u'Jonathan', u'Gretha', u'Micheal', u'Sjors'])
        expected = pd.Series([np.nan, u'JAN', u'MARYAN', u'BALY', u'JANATAN', u'GRAT', u'MACAL', u'SJAR'])

        phon = phonetic(values, 'nysiis')

        pdt.assert_series_equal(phon, expected)
    def test_encode_soundex(self):

        values = pd.Series([np.nan, u'John', u'Mary Ann', u'billy', u'Jonathan', u'Gretha', u'Micheal', u'Sjors'])
        expected = pd.Series([np.nan, u'J500', u'M650', u'B400', u'J535', u'G630', u'M240', u'S620'])

        phon = phonetic(values, 'soundex')

        pdt.assert_series_equal(phon, expected)
Esempio n. 5
0
    def test_encode_match_rating(self):

        values = pd.Series([np.nan, u'John', u'Mary Ann', u'billy',
                            u'Jonathan', u'Gretha', u'Micheal', u'Sjors'])
        expected = pd.Series(
            [np.nan, u'JHN', u'MRYNN', u'BLLY', u'JNTHN',
             u'GRTH', u'MCHL', u'SJRS'])

        phon = phonetic(values, method='match_rating')

        pdt.assert_series_equal(phon, expected)
Esempio n. 6
0
    def test_encode_metaphone(self):

        values = pd.Series([np.nan, u'John', u'Mary Ann', u'billy',
                            u'Jonathan', u'Gretha', u'Micheal', u'Sjors'])
        expected = pd.Series(
            [np.nan, u'JN', u'MRYN', u'BL', u'JN0N',
             u'KR0', u'MXL', u'SJRS'])

        phon = phonetic(values, method='metaphone')

        pdt.assert_series_equal(phon, expected)
Esempio n. 7
0
    def test_encode_metaphone(self):

        values = pd.Series([
            np.nan, u'John', u'Mary Ann', u'billy', u'Jonathan', u'Gretha',
            u'Micheal', u'Sjors'
        ])
        expected = pd.Series(
            [np.nan, u'JN', u'MRYN', u'BL', u'JN0N', u'KR0', u'MXL', u'SJRS'])

        phon = phonetic(values, method='metaphone')

        pdt.assert_series_equal(phon, expected)
Esempio n. 8
0
    def test_encode_match_rating(self):

        values = pd.Series([
            np.nan, u'John', u'Mary Ann', u'billy', u'Jonathan', u'Gretha',
            u'Micheal', u'Sjors'
        ])
        expected = pd.Series([
            np.nan, u'JHN', u'MRYNN', u'BLLY', u'JNTHN', u'GRTH', u'MCHL',
            u'SJRS'
        ])

        phon = phonetic(values, method='match_rating')

        pdt.assert_series_equal(phon, expected)
Esempio n. 9
0
    def test_encode_nysiis(self):

        values = pd.Series([
            np.nan, u'John', u'Mary Ann', u'billy', u'Jonathan', u'Gretha',
            u'Micheal', u'Sjors'
        ])
        expected = pd.Series([
            np.nan, u'JAN', u'MARYAN', u'BALY', u'JANATAN', u'GRAT', u'MACAL',
            u'SJAR'
        ])

        phon = phonetic(values, 'nysiis')

        pdt.assert_series_equal(phon, expected)
Esempio n. 10
0
    def test_encode_soundex(self):

        values = pd.Series([
            np.nan, u'John', u'Mary Ann', u'billy', u'Jonathan', u'Gretha',
            u'Micheal', u'Sjors'
        ])
        expected = pd.Series([
            np.nan, u'J500', u'M650', u'B400', u'J535', u'G630', u'M240',
            u'S620'
        ])

        phon = phonetic(values, 'soundex')

        pdt.assert_series_equal(phon, expected)