class Parser: def __init__(self, archive_path, cuda_device, overrides, weights_file, lookup_path, wordnet_path, am_tools_path): jnius_config.set_classpath(".", am_tools_path) # Load model archive = load_archive(archive_path, cuda_device, overrides, weights_file) config = archive.config config.formalism = "DUMMY" prepare_environment(config) model = archive.model model.eval() dataset_reader = DatasetReader.from_params( config.pop('dataset_reader')) self.predictor = AMconllPredictor(dataset_reader, k=6, give_up=0, threads=1, model=model) self.formalism = AMRInterface(lookup_path, wordnet_path) def parse(self, sentence): print("") words = spacy_tokenize(sentence) am_sentence = from_raw_text(rawstr=sentence, words=words, add_art_root=False, attributes=dict(), contract_ne=True) with TemporaryDirectory() as direc: temp_path = direc + f"/sentences.amconll" output_filename = direc + "/parsed.amconll" with open(temp_path, "w") as f: f.write(str(am_sentence)) f.write("\n\n") self.predictor.parse_and_save(formalism="AMR-2017", input_file=temp_path, output_file=output_filename) interpreted_output = self.formalism.evaluate( output_filename, direc) with open(interpreted_output) as f: return str(f.read()).rstrip()
def __init__(self, archive_path, cuda_device, overrides, weights_file, lookup_path, wordnet_path, am_tools_path): jnius_config.set_classpath(".", am_tools_path) # Load model archive = load_archive(archive_path, cuda_device, overrides, weights_file) config = archive.config config.formalism = "DUMMY" prepare_environment(config) model = archive.model model.eval() dataset_reader = DatasetReader.from_params( config.pop('dataset_reader')) self.predictor = AMconllPredictor(dataset_reader, k=6, give_up=0, threads=1, model=model) self.formalism = AMRInterface(lookup_path, wordnet_path)
else: dataset_reader = DatasetReader.from_params(config.pop('dataset_reader')) evaluation_data_path = args.input_file embedding_sources: Dict[str, str] = (json.loads(args.embedding_sources_mapping) if args.embedding_sources_mapping else {}) if args.extend_vocab: logger.info("Vocabulary is being extended with test instances.") logger.info("Reading evaluation data from %s", evaluation_data_path) instances = dataset_reader.read(evaluation_data_path) model.vocab.extend_from_instances(Params({}), instances=instances) model.extend_embedder_vocab(embedding_sources) predictor = AMconllPredictor(dataset_reader, args.k, args.give_up, args.threads, model=model) requires_art_root = { "DM": True, "PAS": True, "PSD": True, "EDS": False, "AMR-2015": False, "AMR-2017": False } sentences = [] with open(args.input_file) as f: for sentence in f:
else: dataset_reader = DatasetReader.from_params(config.pop('dataset_reader')) evaluation_data_path = args.input_file embedding_sources: Dict[str, str] = (json.loads(args.embedding_sources_mapping) if args.embedding_sources_mapping else {}) if args.extend_vocab: logger.info("Vocabulary is being extended with test instances.") logger.info("Reading evaluation data from %s", evaluation_data_path) instances = dataset_reader.read(evaluation_data_path) model.vocab.extend_from_instances(Params({}), instances=instances) model.extend_embedder_vocab(embedding_sources) predictor = AMconllPredictor(dataset_reader, args.k, args.give_up, args.threads, model=model) formalism = args.formalism instances = dataset_reader.read([[ formalism, args.input_file ]]) # we need to give the formalism to amconll dataset_reader model.train(False) data_iterator = DataIterator.from_params(config.pop('iterator')) with open(args.input_file) as f: conll_sentences = list(amconll_tools.parse_amconll(f)) predictions = dataset_reader.restore_order( forward_on_instances(model, instances, data_iterator))