def train_bigram_words(bi_words_count, out_filename): bigram_words_trained = {} i = 0 for pair in bi_words_count.items(): print(str(i) + "/" + str(len(bi_words_count.items())), end="\r") text = (' '.join(pair[0].split())).split() first_word = text[0] second_word = text[1] second_number = word_to_number(second_word) freq = pair[1] if first_word in bigram_words_trained: possibilities = bigram_words_trained[first_word] if second_number in possibilities: possibilities[second_number].append((second_word, freq)) else: possibilities[second_number] = [(second_word, freq)] else: bigram_words_trained[first_word] = { second_number: [(second_word, freq)] } i += 1 with open(out_filename, 'w') as fp: json.dump(bigram_words_trained, fp) return 1
def train_bigram_letters(bi_letters_count, out_filename): bigram_letters_trained = {}; i = 1 for pair in bi_letters_count.items(): print(str(i) + "/" + str(len(bi_letters_count.items())), end="\r") first_letter = pair[0][0] second_letter = pair[0][1] freq = pair[1] number = word_to_number(second_letter) code = first_letter + number if code in bigram_letters_trained: bigram_letters_trained[code].append((pair[0], freq)) else: bigram_letters_trained[code] = [(pair[0], freq)] i += 1 with open(out_filename, 'w') as outfile: json.dump(bigram_letters_trained, outfile); return
def train_bigram_letters(bi_letters_count, out_filename): bigram_letters_trained = {} i = 1 for pair in bi_letters_count.items(): print(str(i) + "/" + str(len(bi_letters_count.items())), end="\r") first_letter = pair[0][0] second_letter = pair[0][1] freq = pair[1] number = word_to_number(second_letter) code = first_letter + number if code in bigram_letters_trained: bigram_letters_trained[code].append((pair[0], freq)) else: bigram_letters_trained[code] = [(pair[0], freq)] i += 1 with open(out_filename, 'w') as outfile: json.dump(bigram_letters_trained, outfile) return
def train_unigram_words(words_count, out_filename): unigram_words_trained = {} i = 0; for pair in words_count.items(): print(str(i) + "/" + str(len(words_count.items())), end="\r") word = pair[0] number = functions.word_to_number(word) freq = pair[1] if number in unigram_words_trained: unigram_words_trained[number].append((word, freq)) else: unigram_words_trained[number] = [(word, freq)] i += 1 with open(out_filename, 'w') as fp: json.dump(unigram_words_trained, fp) return
def train_unigram_words(words_count, out_filename): unigram_words_trained = {} i = 0 for pair in words_count.items(): print(str(i) + "/" + str(len(words_count.items())), end="\r") word = pair[0] number = functions.word_to_number(word) freq = pair[1] if number in unigram_words_trained: unigram_words_trained[number].append((word, freq)) else: unigram_words_trained[number] = [(word, freq)] i += 1 with open(out_filename, 'w') as fp: json.dump(unigram_words_trained, fp) return