コード例 #1
0
ファイル: parser.py プロジェクト: jgontrum/am-parser
class Parser:
    def __init__(self, archive_path, cuda_device, overrides, weights_file,
                 lookup_path, wordnet_path, am_tools_path):
        jnius_config.set_classpath(".", am_tools_path)

        # Load model
        archive = load_archive(archive_path, cuda_device, overrides,
                               weights_file)
        config = archive.config
        config.formalism = "DUMMY"
        prepare_environment(config)
        model = archive.model
        model.eval()
        dataset_reader = DatasetReader.from_params(
            config.pop('dataset_reader'))

        self.predictor = AMconllPredictor(dataset_reader,
                                          k=6,
                                          give_up=0,
                                          threads=1,
                                          model=model)

        self.formalism = AMRInterface(lookup_path, wordnet_path)

    def parse(self, sentence):
        print("")
        words = spacy_tokenize(sentence)
        am_sentence = from_raw_text(rawstr=sentence,
                                    words=words,
                                    add_art_root=False,
                                    attributes=dict(),
                                    contract_ne=True)

        with TemporaryDirectory() as direc:
            temp_path = direc + f"/sentences.amconll"
            output_filename = direc + "/parsed.amconll"

            with open(temp_path, "w") as f:
                f.write(str(am_sentence))
                f.write("\n\n")

            self.predictor.parse_and_save(formalism="AMR-2017",
                                          input_file=temp_path,
                                          output_file=output_filename)

            interpreted_output = self.formalism.evaluate(
                output_filename, direc)

            with open(interpreted_output) as f:
                return str(f.read()).rstrip()
コード例 #2
0
ファイル: parser.py プロジェクト: jgontrum/am-parser
    def __init__(self, archive_path, cuda_device, overrides, weights_file,
                 lookup_path, wordnet_path, am_tools_path):
        jnius_config.set_classpath(".", am_tools_path)

        # Load model
        archive = load_archive(archive_path, cuda_device, overrides,
                               weights_file)
        config = archive.config
        config.formalism = "DUMMY"
        prepare_environment(config)
        model = archive.model
        model.eval()
        dataset_reader = DatasetReader.from_params(
            config.pop('dataset_reader'))

        self.predictor = AMconllPredictor(dataset_reader,
                                          k=6,
                                          give_up=0,
                                          threads=1,
                                          model=model)

        self.formalism = AMRInterface(lookup_path, wordnet_path)
コード例 #3
0
else:
    dataset_reader = DatasetReader.from_params(config.pop('dataset_reader'))
evaluation_data_path = args.input_file

embedding_sources: Dict[str, str] = (json.loads(args.embedding_sources_mapping)
                                     if args.embedding_sources_mapping else {})
if args.extend_vocab:
    logger.info("Vocabulary is being extended with test instances.")
    logger.info("Reading evaluation data from %s", evaluation_data_path)
    instances = dataset_reader.read(evaluation_data_path)
    model.vocab.extend_from_instances(Params({}), instances=instances)
    model.extend_embedder_vocab(embedding_sources)

predictor = AMconllPredictor(dataset_reader,
                             args.k,
                             args.give_up,
                             args.threads,
                             model=model)

requires_art_root = {
    "DM": True,
    "PAS": True,
    "PSD": True,
    "EDS": False,
    "AMR-2015": False,
    "AMR-2017": False
}

sentences = []
with open(args.input_file) as f:
    for sentence in f:
コード例 #4
0
else:
    dataset_reader = DatasetReader.from_params(config.pop('dataset_reader'))
evaluation_data_path = args.input_file

embedding_sources: Dict[str, str] = (json.loads(args.embedding_sources_mapping)
                                     if args.embedding_sources_mapping else {})
if args.extend_vocab:
    logger.info("Vocabulary is being extended with test instances.")
    logger.info("Reading evaluation data from %s", evaluation_data_path)
    instances = dataset_reader.read(evaluation_data_path)
    model.vocab.extend_from_instances(Params({}), instances=instances)
    model.extend_embedder_vocab(embedding_sources)

predictor = AMconllPredictor(dataset_reader,
                             args.k,
                             args.give_up,
                             args.threads,
                             model=model)

formalism = args.formalism
instances = dataset_reader.read([[
    formalism, args.input_file
]])  # we need to give the formalism to amconll dataset_reader
model.train(False)
data_iterator = DataIterator.from_params(config.pop('iterator'))

with open(args.input_file) as f:
    conll_sentences = list(amconll_tools.parse_amconll(f))

predictions = dataset_reader.restore_order(
    forward_on_instances(model, instances, data_iterator))