def convert(data_home): output_directory = os.path.join(data_home, "processed", "wikicoref", "test") convert_lib.create_processed_data_dir(output_directory) test_set = os.path.join(data_home, "original", "WikiCoref", "Evaluation", "key-OntoNotesScheme") converted_dataset = create_dataset(test_set) convert_lib.write_converted(converted_dataset, output_directory)
def convert(data_home): ontonotes_directory = os.path.join(data_home, "original", "CoNLL12/flat/") output_directory = os.path.join(data_home, "processed", CONLL12) convert_lib.create_processed_data_dir(output_directory) ontonotes_datasets = {} for split in [ convert_lib.DatasetSplit.train, convert_lib.DatasetSplit.dev ]: input_filename = ''.join( [ontonotes_directory, split, ".", convert_lib.FormatName.txt]) converted_dataset = create_dataset(input_filename, ONTONOTES_FIELD_MAP) convert_lib.write_converted(converted_dataset, output_directory + "/" + split)
def convert(data_home): output_directory = os.path.join(data_home, "processed", "wikicoref", "test") convert_lib.create_processed_data_dir(output_directory) test_set = os.path.join(data_home, "original", "WikiCoref", "Evaluation", "key-OntoNotesScheme") converted_dataset = create_dataset(test_set) convert_lib.write_converted(converted_dataset, output_directory) mult_directory = output_directory.replace( convert_lib.DatasetName.wikicoref, "wikicoref_mult") convert_lib.create_processed_data_dir(mult_directory) # for split, dataset in preco_datasets.items(): converted_dataset.remove_singletons() convert_lib.write_converted(converted_dataset, mult_directory)
def convert(data_home): gap_directory = os.path.join(data_home, "original", "GAP", "gap-coreference") output_directory = os.path.join(data_home, "processed", "gap") convert_lib.create_processed_data_dir(output_directory) gap_datasets = {} for split, split_name in zip([ convert_lib.DatasetSplit.dev, convert_lib.DatasetSplit.valid, convert_lib.DatasetSplit.test ], ["development", "validation", "test"]): input_filename = os.path.join(gap_directory, "gap-" + split_name + ".tsv") print(input_filename) converted_dataset = create_dataset(input_filename) convert_lib.write_converted(converted_dataset, output_directory + "/" + split)
def convert(data_home): preco_directory = os.path.join(data_home, "original", "PreCo_1.0") resplit_directory = os.path.join(data_home, "processed", PRECO, "resplit") convert_lib.create_processed_data_dir(resplit_directory) output_directory = os.path.join(data_home, "processed", PRECO) resplit(preco_directory, resplit_directory) convert_lib.create_processed_data_dir(output_directory) preco_datasets = {} for split in [ convert_lib.DatasetSplit.train, convert_lib.DatasetSplit.dev, convert_lib.DatasetSplit.test ]: input_filename = os.path.join( resplit_directory, split + "." + convert_lib.FormatName.jsonl) converted_dataset = create_dataset(input_filename) convert_lib.write_converted(converted_dataset, output_directory + "/" + split) preco_datasets[split] = converted_dataset mult_directory = output_directory.replace(PRECO, "preco_mult") convert_lib.create_processed_data_dir(mult_directory) for split, dataset in preco_datasets.items(): dataset.remove_singletons() convert_lib.write_converted(dataset, mult_directory + "/" + split)