if __name__ == '__main__':
    # annotation_file = sys.argv[1]
    annotations = codecs.open('../web/annotation.txt', 'r', 'utf-8').read().strip()
    sentence_obj_list = []
    for s_idx, sentence in enumerate(annotations.split('===')):
        if sentence.strip() != '':
            S = Sentence(s_idx, '', '', '')
            prev_matches = None
            for sent in sentence.split('\n'):
                if sent.strip() != '':
                    action = sent.split(':')[1].strip()
                    matches = re.findall(r'\[.*?\]', sent.strip())
                    matches = [m[1:-1] for m in matches]
                    if action.strip() == '':
                        S.graphs = []
                        nodes_in_visible_order = []
                        for m_idx, m in enumerate(matches):
                            g = Graph(m_idx)
                            S.graphs.append(g)
                            for w_idx, w in enumerate(m.split()):
                                n = Node(id=len(g.nodes), s=w, en_id=w_idx, de_id=None, lang='en', visible=True)
                                n.visible = True
                                n.to_en = False
                                n.to_de = True
                                n.graph = g
                                g.nodes.append(n)
                                nodes_in_visible_order.append(n)

                        for node in nodes_in_visible_order:
                            in_left_gids = get_neighbor(node, nodes_in_visible_order, 'left')
Example #2
0
if __name__ == '__main__':
    # annotation_file = sys.argv[1]
    annotations = codecs.open('../web/annotation.txt', 'r',
                              'utf-8').read().strip()
    sentence_obj_list = []
    for s_idx, sentence in enumerate(annotations.split('===')):
        if sentence.strip() != '':
            S = Sentence(s_idx, '', '', '')
            prev_matches = None
            for sent in sentence.split('\n'):
                if sent.strip() != '':
                    action = sent.split(':')[1].strip()
                    matches = re.findall(r'\[.*?\]', sent.strip())
                    matches = [m[1:-1] for m in matches]
                    if action.strip() == '':
                        S.graphs = []
                        nodes_in_visible_order = []
                        for m_idx, m in enumerate(matches):
                            g = Graph(m_idx)
                            S.graphs.append(g)
                            for w_idx, w in enumerate(m.split()):
                                n = Node(id=len(g.nodes),
                                         s=w,
                                         en_id=w_idx,
                                         de_id=None,
                                         lang='en',
                                         visible=True)
                                n.visible = True
                                n.to_en = False
                                n.to_de = True
                                n.graph = g
Example #3
0
                    pass
                if len(to_nodes) > 1:
                    assert len(from_nodes) == 1  # or (len(iu) == 2 and len(ou) == 2)
                    pass
                coe_graph.nodes = from_nodes + to_nodes
                coe_graph.edges = make_edges(from_nodes, to_nodes)
                coe_graph.edges = make_edges_with_intermediate_nodes(from_nodes, to_nodes,
                                                                     intermediate=intermediate_nodes, graph=coe_graph)
                coe_sentence.graphs.append(coe_graph)
                group_idx += 1

        if 0 in input_coverage:
            eps_word_alignment += 1
            assert 0 not in input_coverage

        coe_sentence.graphs = sort_groups_by_lang(coe_sentence.graphs, VIS_LANG)
        sys.stderr.write(' '.join([str(i) for i in input_tok_group]) + '\n')
        sys.stderr.write(' '.join([str(i) for i in output_tok_group]) + '\n')

        split_inp, split_out, split_orderings = mark_swaps_transfers_interrupts(
            input_tok_group,
            output_tok_group)
        split_sets = get_split_sets(split_inp, split_out)
        swap_rules = get_swap_rules(coe_sentence, input_tok_group, output_tok_group, input_parse, split_sets, VIS_LANG)
        for sr in swap_rules:
            sys.stderr.write('swaps-pets:' + str(sr) + '\n')

        split_inp_str = ' '.join([str(i) + "-" + ','.join([str(k) for k in j[0]]) for i, j in split_inp.items()])
        sys.stderr.write('split inp:' + split_inp_str + '\n')
        split_out_str = ' '.join([str(i) + "-" + ','.join([str(k) for k in j[0]]) for i, j in split_out.items()])
        sys.stderr.write('split out:' + split_out_str + '\n')
Example #4
0
                    pass
                coe_graph.nodes = from_nodes + to_nodes
                coe_graph.edges = make_edges(from_nodes, to_nodes)
                coe_graph.edges = make_edges_with_intermediate_nodes(
                    from_nodes,
                    to_nodes,
                    intermediate=intermediate_nodes,
                    graph=coe_graph)
                coe_sentence.graphs.append(coe_graph)
                group_idx += 1

        if 0 in input_coverage:
            eps_word_alignment += 1
            assert 0 not in input_coverage

        coe_sentence.graphs = sort_groups_by_lang(coe_sentence.graphs,
                                                  VIS_LANG)
        sys.stderr.write(' '.join([str(i) for i in input_tok_group]) + '\n')
        sys.stderr.write(' '.join([str(i) for i in output_tok_group]) + '\n')

        split_inp, split_out, split_orderings = mark_swaps_transfers_interrupts(
            input_tok_group, output_tok_group)
        split_sets = get_split_sets(split_inp, split_out)
        swap_rules = get_swap_rules(coe_sentence, input_tok_group,
                                    output_tok_group, input_parse, split_sets,
                                    VIS_LANG)
        for sr in swap_rules:
            sys.stderr.write('swaps-pets:' + str(sr) + '\n')

        split_inp_str = ' '.join([
            str(i) + "-" + ','.join([str(k) for k in j[0]])
            for i, j in split_inp.items()