Example #1
0
     open(tokenized_dir + token_file,
          encoding='utf-8'))
 if id not in companion_data.keys():
     continue
 else:
     spans = ' '.join(
         list(companion_data[id]["spans"].keys()))
     tokens = companion_data[id]['tokenization']
     edges = get_mrp_edges(mrp_dict,
                           get_remote=False)
     edges = eliminate_h(edges)
     labels = get_id2lex(mrp_dict)
     compressed_edges = compress_c_edge(edges)
     compressed_labels = update_id_labels(
         compressed_edges, labels)
     irtg_format_compressed = edge2irtg(
         compressed_edges, labels)
     node_tokens = node_to_token_index(
         companion_data, mrp_dict,
         compressed_labels, id)
     aligned = percolate(compressed_edges,
                         priority_queue,
                         compressed_labels)
     alignments = ''
     for alignment in aligned.keys():
         for node in aligned[alignment]:
             if type(node) == str:
                 if '<root>' in node:
                     node = node[:-6]
             alignments += str(node) + '|'
         alignments += str(
             alignment) + '!' + '||' + str(
Example #2
0
import sys
import json
import collections
import os
import random
from tqdm import tqdm

from edge_to_irtg import edge2irtg
from get_edges_from_mrp import get_id2lex, get_mrp_edges
from convert_irtg_to_mrp import get_edges, get_mrp_edges, get_nodes, get_tops, irtg2mrp
from eliminate_h_top import eliminate_h
from a_star_mrp import *

mrp_in = sys.argv[1]

with open(mrp_in) as infile:
    for line in infile:
        mrp = json.loads(line)
        print(mrp['id'])
        labels = get_id2lex(mrp)
        edges = get_mrp_edges(mrp, get_remote = False)
        irtg = edge2irtg(edges, labels)
        print(irtg)
        print('_'*40)
from edge_to_irtg import edge2irtg
from process_c import *
from move_edges import lower_edge, raise_edge
from test_head_percolation import update_id_labels
from utils import number_edges
from a_star_mrp import get_roots

mrp_in = sys.argv[1]

with open(mrp_in) as infile:
    for line in infile:
        mrp = json.loads(line)
        id = mrp['id']
        #print(id)
        #print('original')
        labels = get_id2lex(mrp)
        edges = get_mrp_edges(mrp, get_remote=False)
        irtg_original = edge2irtg(edges, labels)
        #print(irtg_original)
        lowered = lower_edge(edges)
        decompressed = decompress_c(edges, labels)
        labels = update_id_labels(lowered, labels)
        irtg_decompressed = edge2irtg(decompressed, labels)
        #print('Lowered')
        #print(irtg_decompressed)
        if len(get_roots(decompressed)) == 0:
            print(get_roots(decompressed))
            print(id)
            print(irtg_decompressed)
        #print('_'*40)
Example #4
0
from test_head_percolation import update_id_labels
from utils import number_edges
from a_star_mrp import get_roots

mrp_in = sys.argv[1]

with open(mrp_in) as infile:
    for line in infile:
        mrp = json.loads(line)
        id = mrp['id']
        print(id)
        labels = get_id2lex(mrp)
        edges = get_mrp_edges(mrp, get_remote=True)
        edges = number_edges(edges, 'A')
        print('original')
        irtg_original = edge2irtg(edges, labels)
        print(irtg_original)
        compressed = compress_c_edge(edges)
        labels = update_id_labels(compressed, labels)
        print('COMPRESSED')
        irtg_compressed = edge2irtg(edges, labels)
        print(irtg_compressed)
        print('RAISED U')
        raised_u = raise_edge(compressed,
                              'U', ['L', 'H', 'P', 'S', 'A', 'D'],
                              label_dict=labels)
        labels = update_id_labels(raised_u, labels)
        irtg_raised_u = edge2irtg(raised_u, labels)
        print(irtg_raised_u)
        print('RAISED F')
        raised_f = raise_edge(
 raised_t = raise_edge(
     raised_r,
     'T',
     ['L', 'H', 'P', 'S', 'Q', 'E', 'F', 'D'],
     label_dict=labels)
 labels = update_id_labels(raised_t, labels)
 raised_q = raise_edge(
     raised_t,
     'Q', [
         'L', 'H', 'P', 'S', 'T', 'Q', 'E', 'F',
         'D'
     ],
     label_dict=labels)
 updated_id_labels = update_id_labels(
     raised_q, labels)
 irtg_format_raised = edge2irtg(
     raised_q, updated_id_labels)
 #node_tokens = node_to_token_index(companion_data, mrp_dict, updated_id_labels, id)
 node_tokens = node_to_token_index_mod(
     companion_data, mrp_dict,
     updated_id_labels, id)
 #print(node_tokens)
 aligned = percolate(raised_q, priority_queue,
                     updated_id_labels)
 alignments = ''
 for alignment in aligned.keys():
     for node in aligned[alignment]:
         if type(node) == str:
             if '<root>' in node:
                 node = node[:-6]
         alignments += str(node) + '|'
     alignments += str(