def save_data_graph_feat(out_folder, features, examples, graphs, f_type,
                         config):
    cached_features_file = os.path.join(
        out_folder, get_cached_filename('{}_features'.format(f_type), config))
    print('(1). Start saving {} features'.format(len(features)))
    with gzip.open(cached_features_file, 'wb') as fout:
        pickle.dump(features, fout)
    print('Save {} features into {}'.format(len(features),
                                            cached_features_file))

    print('(2). Start saving {} examples'.format(len(examples)))
    cached_examples_file = os.path.join(
        out_folder, get_cached_filename('{}_examples'.format(f_type), config))
    with gzip.open(cached_examples_file, 'wb') as fout:
        pickle.dump(examples, fout)
    print('Save {} examples into {}'.format(len(examples),
                                            cached_examples_file))

    print('(3). Start saving {} graphs'.format(len(graphs)))
    cached_graph_file = os.path.join(
        out_folder, get_cached_filename('{}_graphs'.format(f_type), config))
    with gzip.open(cached_graph_file, 'wb') as fout:
        pickle.dump(graphs, fout)
    print('Save {} graphs into {}'.format(len(graphs), cached_graph_file))
Esempio n. 2
0
 def get_graph_file(self, tag, f_type=None):
     cached_filename = get_cached_filename('{}_graphs'.format(f_type),
                                           self.config)
     return join(self.data_dir, tag, cached_filename)
def get_graph_file(data_dir, tag, f_type, config):
    cached_filename = get_cached_filename('{}_graphs'.format(f_type), config)
    return join(data_dir, tag, cached_filename)
def get_example_file(data_dir, tag, f_type, config):
    cached_filename = get_cached_filename('{}_examples'.format(f_type), config)
    return join(data_dir, tag, cached_filename)
Esempio n. 5
0
        data_source_name = "{}".format(ranker)
    if "train" in data_type:
        data_source_type = data_source_name
    else:
        data_source_type = None
    print('data type = {} \n data source type = {} \n data source name = {}'.
          format(data_type, data_source_type, data_source_name))
    examples = read_hotpot_examples(para_file=args.para_path,
                                    full_file=args.full_data,
                                    ner_file=args.ner_path,
                                    doc_link_file=args.doc_link_ner,
                                    data_source_type=data_source_type)

    cached_examples_file = os.path.join(
        args.output_dir,
        get_cached_filename('{}_examples'.format(data_source_name), args))

    with gzip.open(cached_examples_file, 'wb') as fout:
        pickle.dump(examples, fout)

    features = convert_examples_to_features(
        examples,
        tokenizer,
        max_seq_length=args.max_seq_length,
        max_query_length=args.max_query_length,
        max_entity_num=args.max_entity_num,
        cls_token=tokenizer.cls_token,
        sep_token=tokenizer.sep_token,
        is_roberta=bool(args.model_type in ['roberta']),
        filter_no_ans=args.filter_no_ans)
 def get_example_file(self, tag, f_type):
     cached_filename = get_cached_filename('{}_examples'.format(f_type),
                                           self.config)
     return join(self.data_dir, tag, cached_filename)