open(tokenized_dir + token_file, encoding='utf-8')) if id not in companion_data.keys(): continue else: spans = ' '.join( list(companion_data[id]["spans"].keys())) tokens = companion_data[id]['tokenization'] edges = get_mrp_edges(mrp_dict, get_remote=False) edges = eliminate_h(edges) labels = get_id2lex(mrp_dict) compressed_edges = compress_c_edge(edges) compressed_labels = update_id_labels( compressed_edges, labels) irtg_format_compressed = edge2irtg( compressed_edges, labels) node_tokens = node_to_token_index( companion_data, mrp_dict, compressed_labels, id) aligned = percolate(compressed_edges, priority_queue, compressed_labels) alignments = '' for alignment in aligned.keys(): for node in aligned[alignment]: if type(node) == str: if '<root>' in node: node = node[:-6] alignments += str(node) + '|' alignments += str( alignment) + '!' + '||' + str(
import sys import json import collections import os import random from tqdm import tqdm from edge_to_irtg import edge2irtg from get_edges_from_mrp import get_id2lex, get_mrp_edges from convert_irtg_to_mrp import get_edges, get_mrp_edges, get_nodes, get_tops, irtg2mrp from eliminate_h_top import eliminate_h from a_star_mrp import * mrp_in = sys.argv[1] with open(mrp_in) as infile: for line in infile: mrp = json.loads(line) print(mrp['id']) labels = get_id2lex(mrp) edges = get_mrp_edges(mrp, get_remote = False) irtg = edge2irtg(edges, labels) print(irtg) print('_'*40)
from edge_to_irtg import edge2irtg from process_c import * from move_edges import lower_edge, raise_edge from test_head_percolation import update_id_labels from utils import number_edges from a_star_mrp import get_roots mrp_in = sys.argv[1] with open(mrp_in) as infile: for line in infile: mrp = json.loads(line) id = mrp['id'] #print(id) #print('original') labels = get_id2lex(mrp) edges = get_mrp_edges(mrp, get_remote=False) irtg_original = edge2irtg(edges, labels) #print(irtg_original) lowered = lower_edge(edges) decompressed = decompress_c(edges, labels) labels = update_id_labels(lowered, labels) irtg_decompressed = edge2irtg(decompressed, labels) #print('Lowered') #print(irtg_decompressed) if len(get_roots(decompressed)) == 0: print(get_roots(decompressed)) print(id) print(irtg_decompressed) #print('_'*40)
from test_head_percolation import update_id_labels from utils import number_edges from a_star_mrp import get_roots mrp_in = sys.argv[1] with open(mrp_in) as infile: for line in infile: mrp = json.loads(line) id = mrp['id'] print(id) labels = get_id2lex(mrp) edges = get_mrp_edges(mrp, get_remote=True) edges = number_edges(edges, 'A') print('original') irtg_original = edge2irtg(edges, labels) print(irtg_original) compressed = compress_c_edge(edges) labels = update_id_labels(compressed, labels) print('COMPRESSED') irtg_compressed = edge2irtg(edges, labels) print(irtg_compressed) print('RAISED U') raised_u = raise_edge(compressed, 'U', ['L', 'H', 'P', 'S', 'A', 'D'], label_dict=labels) labels = update_id_labels(raised_u, labels) irtg_raised_u = edge2irtg(raised_u, labels) print(irtg_raised_u) print('RAISED F') raised_f = raise_edge(
raised_t = raise_edge( raised_r, 'T', ['L', 'H', 'P', 'S', 'Q', 'E', 'F', 'D'], label_dict=labels) labels = update_id_labels(raised_t, labels) raised_q = raise_edge( raised_t, 'Q', [ 'L', 'H', 'P', 'S', 'T', 'Q', 'E', 'F', 'D' ], label_dict=labels) updated_id_labels = update_id_labels( raised_q, labels) irtg_format_raised = edge2irtg( raised_q, updated_id_labels) #node_tokens = node_to_token_index(companion_data, mrp_dict, updated_id_labels, id) node_tokens = node_to_token_index_mod( companion_data, mrp_dict, updated_id_labels, id) #print(node_tokens) aligned = percolate(raised_q, priority_queue, updated_id_labels) alignments = '' for alignment in aligned.keys(): for node in aligned[alignment]: if type(node) == str: if '<root>' in node: node = node[:-6] alignments += str(node) + '|' alignments += str(