annotations = codecs.open('../web/annotation.txt', 'r', 'utf-8').read().strip() sentence_obj_list = [] for s_idx, sentence in enumerate(annotations.split('===')): if sentence.strip() != '': S = Sentence(s_idx, '', '', '') prev_matches = None for sent in sentence.split('\n'): if sent.strip() != '': action = sent.split(':')[1].strip() matches = re.findall(r'\[.*?\]', sent.strip()) matches = [m[1:-1] for m in matches] if action.strip() == '': S.graphs = [] nodes_in_visible_order = [] for m_idx, m in enumerate(matches): g = Graph(m_idx) S.graphs.append(g) for w_idx, w in enumerate(m.split()): n = Node(id=len(g.nodes), s=w, en_id=w_idx, de_id=None, lang='en', visible=True) n.visible = True n.to_en = False n.to_de = True n.graph = g g.nodes.append(n) nodes_in_visible_order.append(n) for node in nodes_in_visible_order: in_left_gids = get_neighbor(node, nodes_in_visible_order, 'left') in_right_gids = get_neighbor(node, nodes_in_visible_order, 'right') if node.er_lang == "en": node.en_left = in_left_gids
'utf-8').read().strip() sentence_obj_list = [] for s_idx, sentence in enumerate(annotations.split('===')): if sentence.strip() != '': S = Sentence(s_idx, '', '', '') prev_matches = None for sent in sentence.split('\n'): if sent.strip() != '': action = sent.split(':')[1].strip() matches = re.findall(r'\[.*?\]', sent.strip()) matches = [m[1:-1] for m in matches] if action.strip() == '': S.graphs = [] nodes_in_visible_order = [] for m_idx, m in enumerate(matches): g = Graph(m_idx) S.graphs.append(g) for w_idx, w in enumerate(m.split()): n = Node(id=len(g.nodes), s=w, en_id=w_idx, de_id=None, lang='en', visible=True) n.visible = True n.to_en = False n.to_de = True n.graph = g g.nodes.append(n) nodes_in_visible_order.append(n)
wa_no_null = insert_epsilon_edge(wa, input_sent[inp_span[0]:inp_span[1] + 1], output_sent[out_span[0]:out_span[1] + 1]) sym_coverage, sym_wa = make_symmetric(wa_no_null) assert sym_coverage == 0 untangle = untangle_wa(sym_wa) final_groups = {} for iu in sorted(untangle): ou = untangle[iu] if len(iu) > 1: assert len(ou) == 1 # or (len(iu) == 2 and len(ou) == 2) pass if len(ou) > 1: assert len(iu) == 1 # or (len(iu) == 2 and len(ou) == 2) pass final_groups[group_idx] = (iu, ou, inp_span, out_span) coe_graph = Graph(group_idx) to_nodes = [] node_idx = 0 for iu_idx in iu: assert inp_phrase[iu_idx] == input_sent[inp_span[0] + iu_idx] input_coverage[inp_span[0] + iu_idx] = 1 input_tok_group[inp_span[0] + iu_idx] = group_idx n = Node(node_idx, input_sent[inp_span[0] + iu_idx], None, inp_span[0] + iu_idx, DE_LANG, VIS_LANG == DE_LANG, True, False, False) node_idx += 1 to_nodes.append(n) from_nodes = [] for ou_idx in ou: assert out_phrase[ou_idx] == output_sent[out_span[0] + ou_idx] output_tok_group[out_span[0] + ou_idx] = group_idx
wa, input_sent[inp_span[0]:inp_span[1] + 1], output_sent[out_span[0]:out_span[1] + 1]) sym_coverage, sym_wa = make_symmetric(wa_no_null) assert sym_coverage == 0 untangle = untangle_wa(sym_wa) final_groups = {} for iu in sorted(untangle): ou = untangle[iu] if len(iu) > 1: assert len(ou) == 1 # or (len(iu) == 2 and len(ou) == 2) pass if len(ou) > 1: assert len(iu) == 1 # or (len(iu) == 2 and len(ou) == 2) pass final_groups[group_idx] = (iu, ou, inp_span, out_span) coe_graph = Graph(group_idx) to_nodes = [] node_idx = 0 for iu_idx in iu: assert inp_phrase[iu_idx] == input_sent[inp_span[0] + iu_idx] input_coverage[inp_span[0] + iu_idx] = 1 input_tok_group[inp_span[0] + iu_idx] = group_idx n = Node(node_idx, input_sent[inp_span[0] + iu_idx], None, inp_span[0] + iu_idx, DE_LANG, VIS_LANG == DE_LANG, True, False, False) node_idx += 1 to_nodes.append(n) from_nodes = [] for ou_idx in ou: