def load_mentions_vocab_from_files(mentions_files, filter_stop_words=False): logger.info("Loading mentions files...") mentions = [] for _file in mentions_files: mentions.extend(MentionData.read_mentions_json_to_mentions_data_list(_file)) return load_mentions_vocab(mentions, filter_stop_words)
def load_mentions_vocab(mentions_files, filter_stop_words=False): logger.info('Loading mentions files...') mentions = [] logger.info('Done loading mentions files, starting local dump creation...') for _file in mentions_files: mentions.extend( MentionData.read_mentions_json_to_mentions_data_list(_file)) return extract_vocab(mentions, filter_stop_words)
def load_mentions_vocab(mentions_files, filter_stop_words=False): logger.info('Loading mentions files...') mentions = [] for _file in mentions_files: mentions.extend( MentionData.read_mentions_json_to_mentions_data_list(_file)) vocab = extract_vocab(mentions, filter_stop_words) logger.info('Done loading mentions files...') return vocab
def wordnet_dump(): out_file = args.output mentions_file = args.mentions logger.info('Loading mentions files...') mentions = MentionData.read_mentions_json_to_mentions_data_list(mentions_file) logger.info('Done loading mentions files, starting local dump creation...') result_dump = dict() wordnet = WordnetOnline() for mention in mentions: page = wordnet.get_pages(mention) result_dump[page.orig_phrase] = page with open(out_file, 'w') as out: json.dump(result_dump, out, default=json_dumper) logger.info('Wordnet Dump Created Successfully, ' 'extracted total of %d wn pages', len(result_dump)) logger.info('Saving dump to file-%s', out_file)
def elmo_dump(): out_file = args.output mention_files = list() if os.path.isdir(args.mentions): for (dirpath, _, files) in os.walk(args.mentions): for file in files: if file == '.DS_Store': continue mention_files.append(join(dirpath, file)) else: mention_files.append(args.mentions) mentions = [] for _file in mention_files: mentions.extend(MentionData.read_mentions_json_to_mentions_data_list(_file)) elmo_ecb_embeddings = load_elmo_for_vocab(mentions) with open(out_file, 'wb') as f: pickle.dump(elmo_ecb_embeddings, f) logger.info('Saving dump to file-%s', out_file)