Ejemplo n.º 1
0
def bigram_encode(title):
    """encode a title in bigram form"""
    global bigram

    result = ''
    title = SearchKey.strip_accents(title)

    while len(title) >= 2:
        if SearchKey.is_valid_character(title[0]):

            b = title[0:2]
            if b in bigram:
                result += bigram[b]
                title = title[2:]
            else:
                result += chr(ord(title[0:1]))
                title = title[1:]
        else:
            #result += '?'
            title = title[1:]
    if len(title) == 1:
        if SearchKey.is_valid_character(title[0]):
            result += chr(ord(title[0]))
        #else:
        #    result += '?'

    return SearchKey.compact_spaces(result)
Ejemplo n.º 2
0
def bigram_encode(title):
    """encode a title in bigram form"""
    global bigram

    result = ''
    title = SearchKey.strip_accents(title)

    while len(title) >= 2:
        if SearchKey.is_valid_character(title[0]):

            b = title[0:2]
            if b in bigram:
                result += bigram[b]
                title = title[2:]
            else:
                result += chr(ord(title[0:1]))
                title = title[1:]
        else:
            #result += '?'
            title = title[1:]
    if len(title) == 1:
        if SearchKey.is_valid_character(title[0]):
            result += chr(ord(title[0]))
        #else:
        #    result += '?'

    return SearchKey.compact_spaces(result)
Ejemplo n.º 3
0
def generate_bigram(text):
    """create bigram from pairs of characters"""
    global bigram

    if len(text) > 2:
        try:
            if SearchKey.is_valid_character(text[0]) and SearchKey.is_valid_character(text[1]):
                bigram[text[0:2]] += 1
        except KeyError:
            bigram[text[0:2]] = 1

    if len(text) > 4:
        try:
            if SearchKey.is_valid_character(text[2]) and SearchKey.is_valid_character(text[3]):
                bigram[text[2:4]] += 1
        except KeyError:
            bigram[text[2:4]] = 1
Ejemplo n.º 4
0
def generate_bigram(text):
    """create bigram from pairs of characters"""
    global bigram

    if len(text) > 2:
        try:
            if SearchKey.is_valid_character(text[0]) and SearchKey.is_valid_character(text[1]):
                bigram[text[0:2]] += 1
        except KeyError:
            bigram[text[0:2]] = 1

    if len(text) > 4:
        try:
            if SearchKey.is_valid_character(text[2]) and SearchKey.is_valid_character(text[3]):
                bigram[text[2:4]] += 1
        except KeyError:
            bigram[text[2:4]] = 1