def get_word_signatures_tag(word, dict_e, unk_tag_list): signatures = WordSignatures.get_word_signatures(word) if signatures == [word.lower()]: return {UNK: unk_tag_list} else: signatures_tags = dict() for signature in signatures: signatures_tags[signature] = DictUtils.possible_tags(signature, dict_e) return signatures_tags
def main(input_file_name, q_mle, e_mle, greedy_hmm_output, extra_file_name): start = datetime.now() sentences = FileUtils.read_lines(input_file_name) dict_q = DictUtils.convert_line_to_dict(FileUtils.read_lines(q_mle)) dict_e = DictUtils.convert_line_to_dict(FileUtils.read_lines(e_mle)) unk_tag_list = DictUtils.possible_tags(UNK, dict_e) tagged_text = greedy(sentences, dict_q, dict_e, unk_tag_list) FileUtils.write_tagged_text(greedy_hmm_output, tagged_text) end = datetime.now() print('Running Time: {0}'.format(end - start))
def main(input_file_name, q_mle, e_mle, hmm_viterbi_predictions, extra_file_name): start = datetime.now() sentences = FileUtils.read_lines(input_file_name) dict_q = DictUtils.convert_line_to_dict(FileUtils.read_lines(q_mle)) dict_e = DictUtils.convert_line_to_dict(FileUtils.read_lines(e_mle)) unk_tag_list = DictUtils.possible_tags('*UNK*', dict_e) tagged_text = viterbi(sentences, dict_q, dict_e, unk_tag_list) FileUtils.write_tagged_text(hmm_viterbi_predictions, tagged_text) end = datetime.now() print('Running Time: {0}'.format(end - start))
def possible_tags(word, dict_e, unk_tag_list): words_tags = DictUtils.possible_tags(word, dict_e) if len(words_tags) == 0: return get_word_signatures_tag(word, dict_e, unk_tag_list) else: return {word: words_tags}