def bigram_encode(title): """encode a title in bigram form""" global bigram result = '' title = SearchKey.strip_accents(title) while len(title) >= 2: if SearchKey.is_valid_character(title[0]): b = title[0:2] if b in bigram: result += bigram[b] title = title[2:] else: result += chr(ord(title[0:1])) title = title[1:] else: #result += '?' title = title[1:] if len(title) == 1: if SearchKey.is_valid_character(title[0]): result += chr(ord(title[0])) #else: # result += '?' return SearchKey.compact_spaces(result)
def bigram_encode(title): """encode a title in bigram form""" global bigram result = '' title = SearchKey.strip_accents(title) while len(title) >= 2: if SearchKey.is_valid_character(title[0]): b = title[0:2] if b in bigram: result += bigram[b] title = title[2:] else: result += chr(ord(title[0:1])) title = title[1:] else: #result += '?' title = title[1:] if len(title) == 1: if SearchKey.is_valid_character(title[0]): result += chr(ord(title[0])) #else: # result += '?' return SearchKey.compact_spaces(result)
def generate_bigram(text): """create bigram from pairs of characters""" global bigram if len(text) > 2: try: if SearchKey.is_valid_character(text[0]) and SearchKey.is_valid_character(text[1]): bigram[text[0:2]] += 1 except KeyError: bigram[text[0:2]] = 1 if len(text) > 4: try: if SearchKey.is_valid_character(text[2]) and SearchKey.is_valid_character(text[3]): bigram[text[2:4]] += 1 except KeyError: bigram[text[2:4]] = 1
def generate_bigram(text): """create bigram from pairs of characters""" global bigram if len(text) > 2: try: if SearchKey.is_valid_character(text[0]) and SearchKey.is_valid_character(text[1]): bigram[text[0:2]] += 1 except KeyError: bigram[text[0:2]] = 1 if len(text) > 4: try: if SearchKey.is_valid_character(text[2]) and SearchKey.is_valid_character(text[3]): bigram[text[2:4]] += 1 except KeyError: bigram[text[2:4]] = 1