def load_amrs_cached(amr_fpath): global pgraph_cache pgraphs = pgraph_cache.get(amr_fpath, None) if pgraphs is None: pgraphs = penman.load(amr_fpath, model=NoOpModel()) pgraph_cache[amr_fpath] = pgraphs return pgraphs
def wikify_file(self, infpath, outfpath): print('Loading', infpath) pgraphs = penman.load(infpath) winfo_list = self.find_wiki_nodes_for_graphs(pgraphs) print('Running BLINK to get wiki values') winfo_list = self.predict_blink(winfo_list) print('Adding and saving graphs to', outfpath) pgraphs = self.add_wiki_to_graphs(pgraphs, winfo_list) penman.dump(pgraphs, outfpath, indent=6)
def read_from_file(filepath, graph=False): graphs = penman.load(filepath, cls=CODEC) if graph: return graphs codec = CODEC() amrs = [] for g in graphs: amr = codec.encode(g) amrs.append(amr) return amrs
def load(fh, model): """ Deserialize PENMAN graphs from a file (handle or filename) Args: fh: filename or file object model: Xmrs subclass instantiated from decoded triples Returns: a list of objects (of class *model*) """ graphs = penman.load(fh, cls=XMRSCodec) xs = [model.from_triples(g.triples()) for g in graphs] return xs
def read_file(source): # read preprocessed amr file token, lemma, pos, ner, amrs = [], [], [], [], [] graphs = penman.load(source) logger.info('read from %s, %d amrs' % (source, len(graphs))) for g in graphs: # Load the metadata token.append(json.loads(g.metadata['tokens'])) lemma.append(json.loads(g.metadata['lemmas'])) pos.append(json.loads(g.metadata['pos_tags'])) ner.append(json.loads(g.metadata['ner_tags'])) # Build the AMRGraph from the penman graph amr_graph = AMRGraph(g) amrs.append(amr_graph) return amrs, token, lemma, pos, ner
def load(source): """ Deserialize PENMAN graphs from a file (handle or filename) Args: source: filename or file object Returns: a list of DMRS objects """ if not hasattr(source, 'read'): source = Path(source).expanduser() try: graphs = penman.load(source) except penman.PenmanError as exc: raise PyDelphinException('could not decode with Penman') from exc xs = [from_triples(g.triples) for g in graphs] return xs
def load_amr_file(source, dereify=None, remove_wiki=False): assert remove_wiki in (False, 'replace', 'remove') # Select the model to use if dereify is None or dereify: # None or True (odd way to do default logic) model = Model() # default penman model, same as load(..., model=None) else: # False model = noop_model # Load the data out = penman.load(source=source, model=model) # Remove or replace the wiki tags if remove_wiki == 'remove': for i in range(len(out)): out[i] = _remove_wiki(out[i]) elif remove_wiki == 'replace': for i in range(len(out)): out[i] = _replace_wiki(out[i]) return out
def gather_test_graphs(): # These are for amr_annotation_3.0/data/multisentence/ms-amr-split/test/msamr_dfa_007.xml fn = 'data/amr_annotation_3.0/data/amrs/unsplit/amr-release-3.0-amrs-dfa.txt' gids = [ "DF-200-192400-625_7046.1", "DF-200-192400-625_7046.2", "DF-200-192400-625_7046.3", "DF-200-192400-625_7046.4", "DF-200-192400-625_7046.5", "DF-200-192400-625_7046.6", "DF-200-192400-625_7046.7", "DF-200-192400-625_7046.8", "DF-200-192400-625_7046.9", "DF-200-192400-625_7046.10", "DF-200-192400-625_7046.11", "DF-200-192400-625_7046.12", "DF-200-192400-625_7046.13", "DF-200-192400-625_7046.14", "DF-200-192400-625_7046.15", "DF-200-192400-625_7046.16", "DF-200-192400-625_7046.17", "DF-200-192400-625_7046.18" ] # Load the AMR file with penman and then extract the specific ids and put them in order pgraphs = penman.load(fn, model=NoOpModel()) ordered_pgraphs = [None] * len(gids) for pgraph in pgraphs: gid = pgraph.metadata['id'] doc_idx = gids.index(gid) if gid in gids else None if doc_idx is not None: ordered_pgraphs[doc_idx] = pgraph assert None not in ordered_pgraphs return ordered_pgraphs
top = [(x[0], x[1], x[1] * 100 /len(sample)) for x in counter.most_common(top_k)] return top if __name__ == "__main__": fname = sys.argv[1] print("Loading Verb-Brasil framesets") framesets = [] with open("verbo-brasil.dic","r", encoding="utf8") as f: framesets = [line.strip() for line in f] print(f'Verb-Brasil ({len(framesets)}) loaded') amrs = penman.load(fname) nodes = [] instance_nodes = [] edges = [] tokens = [] freq_concepts = {"general concepts": 0, "named-entities": 0, "modal verbs": 0, \ "amr-unknown": 0, "Verbo-Brasil framesets": 0, "constants": 0, \ "negative":0, "special frames": 0} for amr in amrs: if "snt" in amr.metadata: tokens += [token.lower() for token in amr.metadata["snt"].split()] else:
def wikify_file(self, infn, outfn): new_graphs = [] for graph in tqdm(penman.load(infn)): new_graph = self.wikify_graph(graph) new_graphs.append(new_graph) penman.dump(new_graphs, outfn, indent=6)
def get_sents_from_AMR(infn): sents = [] for graph in penman.load(infn): sents.append( graph.metadata['snt'] ) return sents
if __name__ == '__main__': if 1: # dev dataset gold_alignments_fn = 'amrlib/alignments/isi_hand_alignments/dev-gold.txt' test_amr_fn = 'amrlib/data/alignments/dev-aligned.txt' else: # test dataset gold_alignments_fn = 'amrlib/alignments/isi_hand_alignments/test-gold.txt' test_amr_fn = 'amrlib/data/alignments/test-aligned.txt' # Print load alignments print('Loading alignments from', gold_alignments_fn) gold_alignments, gold_ids = load_gold_alignments(gold_alignments_fn) # Load the aligned corpus and extract the data print('Loading corpus data from', test_amr_fn) pgraphs = penman.load(test_amr_fn, model=NoOpModel()) test_alignments = [ g.metadata['rbw_alignments'].strip().split() for g in pgraphs ] test_alignments = [a for a in test_alignments if a] test_ids = [g.metadata['id'] for g in pgraphs] # Sanity check that things match up assert len(gold_alignments) == len(test_alignments), '%s != %s' % ( len(gold_alignments), len(test_alignments)) assert len(gold_alignments) == 100, len(gold_alignments) for gold_id, test_id in zip(gold_ids, test_ids): assert gold_id == test_id, '%s != %s' % (gold_id, test_id) print('Gold and Test aligment files match') # Score against isi automated alignments
#!/usr/bin/python3 import setup_run_dir # Set the working directory and python sys.path to 2 levels above import os import penman if __name__ == '__main__': data_dir = 'amrlib/data/LDC2020T02' for fn in ('dev.txt', 'test.txt', 'train.txt'): fpath = os.path.join(data_dir, fn) print('Loading', fpath) graphs = penman.load(fpath) print('Loaded {:,} graphs'.format(len(graphs))) print()