def build_word_dict(args, examples): """Return a word dictionary from question and document words in provided examples. """ word_dict = Dictionary() for w in load_words(args, examples): word_dict.add(w) return word_dict
def build_char_dict(args, examples): """Return a char dictionary from question and document words in provided examples. """ char_dict = Dictionary() for c in load_chars(args, examples): char_dict.add(c) return char_dict
pos_count = {} data = joblib.load(UTS_DICT_DATA) count = 0 logger.info("End loading") for key in data: # count += 1 # if count > 30: # break defs = [] pos_tags = {} text = key for definition in data[key]: pos_tag = definition['pos'] if pos_tag not in pos_tags: i = len(pos_tags) pos_tags[pos_tag] = i tag_data = {'tag': pos_tag, 'defs': []} defs.append(tag_data) index = pos_tags[pos_tag] defs[index]["defs"].append({ "def": definition['definition'], 'examples': [definition['example']] }) word = Word(text, defs) dict.add(word) logger.info("End Process") dict.save(join(DICT_FOLDER, 'underthesea_dictionary.yaml')) logger.info("End Save") print('[+] Done')