Esempio n. 1
0
def load_mentions_vocab_from_files(mentions_files, filter_stop_words=False):
    logger.info("Loading mentions files...")
    mentions = []
    for _file in mentions_files:
        mentions.extend(MentionData.read_mentions_json_to_mentions_data_list(_file))

    return load_mentions_vocab(mentions, filter_stop_words)
Esempio n. 2
0
def load_mentions_vocab(mentions_files, filter_stop_words=False):
    logger.info('Loading mentions files...')
    mentions = []
    logger.info('Done loading mentions files, starting local dump creation...')
    for _file in mentions_files:
        mentions.extend(
            MentionData.read_mentions_json_to_mentions_data_list(_file))

    return extract_vocab(mentions, filter_stop_words)
Esempio n. 3
0
def load_mentions_vocab(mentions_files, filter_stop_words=False):
    logger.info('Loading mentions files...')
    mentions = []
    for _file in mentions_files:
        mentions.extend(
            MentionData.read_mentions_json_to_mentions_data_list(_file))

    vocab = extract_vocab(mentions, filter_stop_words)
    logger.info('Done loading mentions files...')
    return vocab
def wordnet_dump():
    out_file = args.output
    mentions_file = args.mentions
    logger.info('Loading mentions files...')
    mentions = MentionData.read_mentions_json_to_mentions_data_list(mentions_file)
    logger.info('Done loading mentions files, starting local dump creation...')
    result_dump = dict()
    wordnet = WordnetOnline()
    for mention in mentions:
        page = wordnet.get_pages(mention)
        result_dump[page.orig_phrase] = page

    with open(out_file, 'w') as out:
        json.dump(result_dump, out, default=json_dumper)

    logger.info('Wordnet Dump Created Successfully, '
                'extracted total of %d wn pages', len(result_dump))
    logger.info('Saving dump to file-%s', out_file)
def elmo_dump():
    out_file = args.output
    mention_files = list()
    if os.path.isdir(args.mentions):
        for (dirpath, _, files) in os.walk(args.mentions):
            for file in files:
                if file == '.DS_Store':
                    continue

                mention_files.append(join(dirpath, file))
    else:
        mention_files.append(args.mentions)

    mentions = []
    for _file in mention_files:
        mentions.extend(MentionData.read_mentions_json_to_mentions_data_list(_file))

    elmo_ecb_embeddings = load_elmo_for_vocab(mentions)

    with open(out_file, 'wb') as f:
        pickle.dump(elmo_ecb_embeddings, f)

    logger.info('Saving dump to file-%s', out_file)