Esempio n. 1
0
def generate_raw_data(input_file, embed_map, gen_type='gold'):
    print('loading data from %s' % input_file)
    data_reader = jsonlines.open(input_file)

    raw_data = []
    for doc_data in data_reader.iter():
        doc = Document(doc_data, embed_map)
        if gen_type == 'gold':
            raw_data += doc.generate_gold_anaphor_data()
        else:
            raw_data += doc.generate_candidate_anaphor_data()

    print("---> total number of training pairs: %s" % len(raw_data))
    return raw_data