from edge_to_irtg import edge2irtg from convert_irtg_to_mrp import get_edges, get_id2lex, get_input, get_mrp_edges, get_nodes, get_tops, irtg2mrp from process_c import compress_c_edge, decompress_c infile = sys.argv[1] outfile = sys.argv[2] non_deducible = ["id", "flavor", "framework", "version", "time"] with open(infile, 'r') as f: for line in f: mrp_dict = json.loads(line) extras = {} for category in mrp_dict.keys(): if category in non_deducible: extras[category] = mrp_dict[category] edges = get_mrp_edges(mrp_dict) labels = get_id2lex(mrp_dict) decompressed_c = decompress_c(edges) raised_d = raise_edge(decompressed_c, 'D', ['P', 'S'], mark=True) print(labels) for (u, v) in decompressed_c.keys(): if u not in labels: labels[u] = 'Non-Terminal' print(labels) postprocessed_mrp = irtg2mrp(raised_d, labels) for key in extras.keys(): postprocessed_mrp[key] = extras[key] with open(outfile, 'a') as out: out.write(json.dumps(postprocessed_mrp)) out.write('\n')
framework = mrp_dict["framework"] version = mrp_dict["version"] time = mrp_dict["time"] for token_file in os.listdir(tokenized_dir): #print(token_file) if token_file[:3] == filename[:3]: companion_data = json.load( open(tokenized_dir + token_file, encoding='utf-8')) if id not in companion_data.keys(): continue else: spans = ' '.join( list(companion_data[id]["spans"].keys())) tokens = companion_data[id]['tokenization'] edges = get_mrp_edges(mrp_dict, get_remote=False) edges = eliminate_h(edges) labels = get_id2lex(mrp_dict) compressed_edges = compress_c_edge(edges) compressed_labels = update_id_labels( compressed_edges, labels) irtg_format_compressed = edge2irtg( compressed_edges, labels) node_tokens = node_to_token_index( companion_data, mrp_dict, compressed_labels, id) aligned = percolate(compressed_edges, priority_queue, compressed_labels) alignments = '' for alignment in aligned.keys():
import sys import json import collections import os import random from tqdm import tqdm from edge_to_irtg import edge2irtg from get_edges_from_mrp import get_id2lex, get_mrp_edges from convert_irtg_to_mrp import get_edges, get_mrp_edges, get_nodes, get_tops, irtg2mrp from eliminate_h_top import eliminate_h from a_star_mrp import * mrp_in = sys.argv[1] with open(mrp_in) as infile: for line in infile: mrp = json.loads(line) print(mrp['id']) labels = get_id2lex(mrp) edges = get_mrp_edges(mrp, get_remote = False) irtg = edge2irtg(edges, labels) print(irtg) print('_'*40)
label_dict[u] = label_dict[int(str(u)[:-4])] else: label_dict[u] = 'Non-Terminal' nodes_in_edge_dict = list(set([node for edge in edge_dict.keys() for node in edge])) label_dict_nodes = list(label_dict.keys()) for edge in edge_dict.keys(): for node in edge: if node not in label_dict.keys(): label_dict[node] = 'Non-Terminal' return label_dict with open(mrp_data_path,encoding='utf8', errors='ignore') as infile: counter = 0 for line in infile: #print(line) mrp_dict = json.loads(line) id = mrp_dict["id"] print(id) edges = get_mrp_edges(mrp_dict, get_remote = True) edges = eliminate_h(edges) labels = get_id2lex(mrp_dict) compressed_edges = compress_c_edge(edges) compressed_labels = update_id_labels(compressed_edges, labels) irtg_format_compressed = edge2irtg(compressed_edges, labels) print(irtg_format_compressed) node_tokens = node_to_token_index(companion_data, mrp_dict, compressed_labels, id) #print(companion_data) #print(compressed_labels) #print(node_tokens) alignments = align(compressed_edges, priority_dict, mrp_dict, node_tokens, compressed_labels)
from edge_to_irtg import edge2irtg from process_c import * from move_edges import lower_edge, raise_edge from test_head_percolation import update_id_labels from utils import number_edges from a_star_mrp import get_roots mrp_in = sys.argv[1] with open(mrp_in) as infile: for line in infile: mrp = json.loads(line) id = mrp['id'] print(id) labels = get_id2lex(mrp) edges = get_mrp_edges(mrp, get_remote=True) edges = number_edges(edges, 'A') print('original') irtg_original = edge2irtg(edges, labels) print(irtg_original) compressed = compress_c_edge(edges) labels = update_id_labels(compressed, labels) print('COMPRESSED') irtg_compressed = edge2irtg(edges, labels) print(irtg_compressed) print('RAISED U') raised_u = raise_edge(compressed, 'U', ['L', 'H', 'P', 'S', 'A', 'D'], label_dict=labels) labels = update_id_labels(raised_u, labels) irtg_raised_u = edge2irtg(raised_u, labels)
import json import sys from get_edges_from_mrp import get_id2lex, get_mrp_edges corpus = sys.argv[1] with open(corpus) as infile: for line in infile: mrp = json.loads(line) labels = get_id2lex(mrp) edges = get_mrp_edges(mrp) nodes = set() node_mentions_in_edges = set() for node_id in labels.keys(): nodes.add(node_id) for (u, v) in edges.keys(): node_mentions_in_edges.add(u) node_mentions_in_edges.add(v)