예제 #1
0
    annotations = codecs.open('../web/annotation.txt', 'r', 'utf-8').read().strip()
    sentence_obj_list = []
    for s_idx, sentence in enumerate(annotations.split('===')):
        if sentence.strip() != '':
            S = Sentence(s_idx, '', '', '')
            prev_matches = None
            for sent in sentence.split('\n'):
                if sent.strip() != '':
                    action = sent.split(':')[1].strip()
                    matches = re.findall(r'\[.*?\]', sent.strip())
                    matches = [m[1:-1] for m in matches]
                    if action.strip() == '':
                        S.graphs = []
                        nodes_in_visible_order = []
                        for m_idx, m in enumerate(matches):
                            g = Graph(m_idx)
                            S.graphs.append(g)
                            for w_idx, w in enumerate(m.split()):
                                n = Node(id=len(g.nodes), s=w, en_id=w_idx, de_id=None, lang='en', visible=True)
                                n.visible = True
                                n.to_en = False
                                n.to_de = True
                                n.graph = g
                                g.nodes.append(n)
                                nodes_in_visible_order.append(n)

                        for node in nodes_in_visible_order:
                            in_left_gids = get_neighbor(node, nodes_in_visible_order, 'left')
                            in_right_gids = get_neighbor(node, nodes_in_visible_order, 'right')
                            if node.er_lang == "en":
                                node.en_left = in_left_gids
예제 #2
0
                              'utf-8').read().strip()
    sentence_obj_list = []
    for s_idx, sentence in enumerate(annotations.split('===')):
        if sentence.strip() != '':
            S = Sentence(s_idx, '', '', '')
            prev_matches = None
            for sent in sentence.split('\n'):
                if sent.strip() != '':
                    action = sent.split(':')[1].strip()
                    matches = re.findall(r'\[.*?\]', sent.strip())
                    matches = [m[1:-1] for m in matches]
                    if action.strip() == '':
                        S.graphs = []
                        nodes_in_visible_order = []
                        for m_idx, m in enumerate(matches):
                            g = Graph(m_idx)
                            S.graphs.append(g)
                            for w_idx, w in enumerate(m.split()):
                                n = Node(id=len(g.nodes),
                                         s=w,
                                         en_id=w_idx,
                                         de_id=None,
                                         lang='en',
                                         visible=True)
                                n.visible = True
                                n.to_en = False
                                n.to_de = True
                                n.graph = g
                                g.nodes.append(n)
                                nodes_in_visible_order.append(n)
예제 #3
0
            wa_no_null = insert_epsilon_edge(wa, input_sent[inp_span[0]:inp_span[1] + 1],
                                             output_sent[out_span[0]:out_span[1] + 1])
            sym_coverage, sym_wa = make_symmetric(wa_no_null)
            assert sym_coverage == 0
            untangle = untangle_wa(sym_wa)
            final_groups = {}
            for iu in sorted(untangle):
                ou = untangle[iu]
                if len(iu) > 1:
                    assert len(ou) == 1  # or (len(iu) == 2 and len(ou) == 2)
                    pass
                if len(ou) > 1:
                    assert len(iu) == 1  # or (len(iu) == 2 and len(ou) == 2)
                    pass
                final_groups[group_idx] = (iu, ou, inp_span, out_span)
                coe_graph = Graph(group_idx)
                to_nodes = []
                node_idx = 0
                for iu_idx in iu:
                    assert inp_phrase[iu_idx] == input_sent[inp_span[0] + iu_idx]
                    input_coverage[inp_span[0] + iu_idx] = 1
                    input_tok_group[inp_span[0] + iu_idx] = group_idx
                    n = Node(node_idx, input_sent[inp_span[0] + iu_idx], None, inp_span[0] + iu_idx, DE_LANG,
                             VIS_LANG == DE_LANG, True, False, False)
                    node_idx += 1
                    to_nodes.append(n)

                from_nodes = []
                for ou_idx in ou:
                    assert out_phrase[ou_idx] == output_sent[out_span[0] + ou_idx]
                    output_tok_group[out_span[0] + ou_idx] = group_idx
예제 #4
0
                wa, input_sent[inp_span[0]:inp_span[1] + 1],
                output_sent[out_span[0]:out_span[1] + 1])
            sym_coverage, sym_wa = make_symmetric(wa_no_null)
            assert sym_coverage == 0
            untangle = untangle_wa(sym_wa)
            final_groups = {}
            for iu in sorted(untangle):
                ou = untangle[iu]
                if len(iu) > 1:
                    assert len(ou) == 1  # or (len(iu) == 2 and len(ou) == 2)
                    pass
                if len(ou) > 1:
                    assert len(iu) == 1  # or (len(iu) == 2 and len(ou) == 2)
                    pass
                final_groups[group_idx] = (iu, ou, inp_span, out_span)
                coe_graph = Graph(group_idx)
                to_nodes = []
                node_idx = 0
                for iu_idx in iu:
                    assert inp_phrase[iu_idx] == input_sent[inp_span[0] +
                                                            iu_idx]
                    input_coverage[inp_span[0] + iu_idx] = 1
                    input_tok_group[inp_span[0] + iu_idx] = group_idx
                    n = Node(node_idx, input_sent[inp_span[0] + iu_idx], None,
                             inp_span[0] + iu_idx, DE_LANG,
                             VIS_LANG == DE_LANG, True, False, False)
                    node_idx += 1
                    to_nodes.append(n)

                from_nodes = []
                for ou_idx in ou: