def compare(self, string1, string2):
        '''Compare soundex of given strings

           This function checks if 2 given strings are phonetically
           sounds same by doing soundex code comparison

           :param string1: First string for comparison
           :param string2: Second string for comparison

           :return: Returns 0 if both strings are same, 1 if strings
                    sound phonetically same, 2 if strings are
                    phonetically not same. We can't perform English
                    cross language comparision if English string is
                    passed as one function will return -1.

        '''
        # do a quick check
      
        if string1 == string2:
            return 0

        string1_lang = get_language(string1[0])
        string2_lang = get_language(string2[0])

        if (string1_lang == 'en_US' and string2_lang != 'en_US') or \
           (string1_lang != 'en_US' and string2_lang == 'en_US'):
            # Can't Soundex compare English and Indic string
            return -1

        soundex1 = self.soundex(string1)
        soundex2 = self.soundex(string2)
        if soundex1[1:] == soundex2[1:] and soundex1[0] == soundex2[0]:
            # Strings sound phonetically same and same language
            return 1

        if soundex1[1:] == soundex2[1:] and soundex1[0] != soundex2[0]:
            # Strings sound phonetically same but different language
            return 2
        else:
            # Strings are not same
            return -1
Example #2
0
    def compare(self, string1, string2):
        '''Compare soundex of given strings

           This function checks if 2 given strings are phonetically
           sounds same by doing soundex code comparison

           :param string1: First string for comparison
           :param string2: Second string for comparison

           :return: Returns 0 if both strings are same, 1 if strings
                    sound phonetically same, 2 if strings are
                    phonetically not same. We can't perform English
                    cross language comparision if English string is
                    passed as one function will return -1.

        '''
        # do a quick check
        if string1 == string2:
            return 0

        string1_lang = get_language(string1[0])
        string2_lang = get_language(string2[0])

        if (string1_lang == 'en_US' and string2_lang != 'en_US') or \
           (string1_lang != 'en_US' and string2_lang == 'en_US'):
            # Can't Soundex compare English and Indic string
            return -1

        soundex1 = self.soundex(string1)
        soundex2 = self.soundex(string2)

        if soundex1[1:] == soundex2[1:]:
            # Strings sound phonetically same
            return 1

        # Strings are not same
        return 2
Example #3
0
    def soundex(self, name, length=8):
        '''Calculate soundex of given string

           This function calculates soundex for Indian language string
           as well as English string.

           This function is exposed as service method for JSONRPC in
           SILPA framework.

           :param name: String whose Soundex value to be calculated
           :param length: Length of final Soundex string, if soundex
                          caculated is more than this it will be
                          truncated to length.
           :return: Soundex string of `name'
        '''
        sndx = []
        fc = name[0]

        # translate alpha chars in name to soundex digits
        for c in name[1:].lower():
            d = str(self.soundexCode(c))

            # remove all 0s from the soundex code
            if d == '0':
                continue

            # duplicate consecutive soundex digits are skipped
            if len(sndx) == 0:
                sndx.append(d)
            elif d != sndx[-1]:
                sndx.append(d)

        # append first character to result
        sndx.insert(0, fc)

        if get_language(name[0]) == 'en_US':
            # Don't padd
            return ''.join(sndx)

        if len(sndx) < length:
            sndx.extend(repeat('0', length))
            return ''.join(sndx[:length])

        return ''.join(sndx[:length])
    def soundex(self, name, length=8):
        '''Calculate soundex of given string

           This function calculates soundex for Indian language string
           as well as English string.

           This function is exposed as service method for JSONRPC in
           SILPA framework.

           :param name: String whose Soundex value to be calculated
           :param length: Length of final Soundex string, if soundex
                          caculated is more than this it will be
                          truncated to length.
           :return: Soundex string of `name'
        '''
        sndx = []
        fc = name[0]

        # translate alpha chars in name to soundex digits
        for c in name[1:].lower():
            d = str(self.soundexCode(c))

            # remove all 0s from the soundex code
            if d == '0':
                continue

            # duplicate consecutive soundex digits are skipped
            if len(sndx) == 0:
                sndx.append(d)
            elif d != sndx[-1]:
                sndx.append(d)

        # append first character to result
        sndx.insert(0, fc)

        if get_language(name[0]) == 'en_US':
            # Don't padd
            return ''.join(sndx)

        if len(sndx) < length:
            sndx.extend(repeat('0', length))
            return ''.join(sndx[:length])

        return ''.join(sndx[:length])
Example #5
0
    def soundexCode(self, char):
        '''Return the soundex code for given character

           :param char:
               Character whose soundex code is needed
           :return:
               Returns soundex code if character is found in charmap
               else returns 0
        '''
        lang = get_language(char)
        try:
            if lang == "en_US":
                return _soundex_map["soundex_en"][charmap[lang].index(char)]
            else:
                return _soundex_map["soundex"][charmap[lang].index(char)]
        except:
            # Case of exception KeyError because we don't have soundex
            # mapping for the character
            pass

        return 0
    def soundexCode(self, char):
        '''Return the soundex code for given character

           :param char:
               Character whose soundex code is needed
           :return:
               Returns soundex code if character is found in charmap
               else returns 0
        '''
        lang = get_language(char)
        try:
            if lang == "en_US":
                return _soundex_map["soundex_en"][charmap[lang].index(char)]
            else:
                return _soundex_map["soundex"][charmap[lang].index(char)]
        except:
            # Case of exception KeyError because we don't have soundex
            # mapping for the character
            pass

        return 0
Example #7
0
def test_get_language():
    '''TEST: Get language'''
    assert get_language(u'ನ') == 'kn_IN'
    assert get_language(u'അ') == 'ml_IN'
    assert get_language(u'அ') == 'ta_IN'
    assert get_language(u'అ') == 'te_IN'
    assert get_language(u'અ') == 'gu_IN'
    assert get_language(u'অ') == 'bn_IN'
    assert get_language(u'ਅ') == 'pa_IN'
    assert get_language(u'अ') == 'hi_IN'
    assert get_language(u'ଅ') == 'or_IN'
    assert get_language('a') == 'en_US'
    assert get_language('eː') == 'IPA'
    assert get_language('ê') == 'ISO15919'
    assert get_language('!') is None