if __name__ == '__main__': # annotation_file = sys.argv[1] annotations = codecs.open('../web/annotation.txt', 'r', 'utf-8').read().strip() sentence_obj_list = [] for s_idx, sentence in enumerate(annotations.split('===')): if sentence.strip() != '': S = Sentence(s_idx, '', '', '') prev_matches = None for sent in sentence.split('\n'): if sent.strip() != '': action = sent.split(':')[1].strip() matches = re.findall(r'\[.*?\]', sent.strip()) matches = [m[1:-1] for m in matches] if action.strip() == '': S.graphs = [] nodes_in_visible_order = [] for m_idx, m in enumerate(matches): g = Graph(m_idx) S.graphs.append(g) for w_idx, w in enumerate(m.split()): n = Node(id=len(g.nodes), s=w, en_id=w_idx, de_id=None, lang='en', visible=True) n.visible = True n.to_en = False n.to_de = True n.graph = g g.nodes.append(n) nodes_in_visible_order.append(n) for node in nodes_in_visible_order: in_left_gids = get_neighbor(node, nodes_in_visible_order, 'left')
if __name__ == '__main__': # annotation_file = sys.argv[1] annotations = codecs.open('../web/annotation.txt', 'r', 'utf-8').read().strip() sentence_obj_list = [] for s_idx, sentence in enumerate(annotations.split('===')): if sentence.strip() != '': S = Sentence(s_idx, '', '', '') prev_matches = None for sent in sentence.split('\n'): if sent.strip() != '': action = sent.split(':')[1].strip() matches = re.findall(r'\[.*?\]', sent.strip()) matches = [m[1:-1] for m in matches] if action.strip() == '': S.graphs = [] nodes_in_visible_order = [] for m_idx, m in enumerate(matches): g = Graph(m_idx) S.graphs.append(g) for w_idx, w in enumerate(m.split()): n = Node(id=len(g.nodes), s=w, en_id=w_idx, de_id=None, lang='en', visible=True) n.visible = True n.to_en = False n.to_de = True n.graph = g
pass if len(to_nodes) > 1: assert len(from_nodes) == 1 # or (len(iu) == 2 and len(ou) == 2) pass coe_graph.nodes = from_nodes + to_nodes coe_graph.edges = make_edges(from_nodes, to_nodes) coe_graph.edges = make_edges_with_intermediate_nodes(from_nodes, to_nodes, intermediate=intermediate_nodes, graph=coe_graph) coe_sentence.graphs.append(coe_graph) group_idx += 1 if 0 in input_coverage: eps_word_alignment += 1 assert 0 not in input_coverage coe_sentence.graphs = sort_groups_by_lang(coe_sentence.graphs, VIS_LANG) sys.stderr.write(' '.join([str(i) for i in input_tok_group]) + '\n') sys.stderr.write(' '.join([str(i) for i in output_tok_group]) + '\n') split_inp, split_out, split_orderings = mark_swaps_transfers_interrupts( input_tok_group, output_tok_group) split_sets = get_split_sets(split_inp, split_out) swap_rules = get_swap_rules(coe_sentence, input_tok_group, output_tok_group, input_parse, split_sets, VIS_LANG) for sr in swap_rules: sys.stderr.write('swaps-pets:' + str(sr) + '\n') split_inp_str = ' '.join([str(i) + "-" + ','.join([str(k) for k in j[0]]) for i, j in split_inp.items()]) sys.stderr.write('split inp:' + split_inp_str + '\n') split_out_str = ' '.join([str(i) + "-" + ','.join([str(k) for k in j[0]]) for i, j in split_out.items()]) sys.stderr.write('split out:' + split_out_str + '\n')
pass coe_graph.nodes = from_nodes + to_nodes coe_graph.edges = make_edges(from_nodes, to_nodes) coe_graph.edges = make_edges_with_intermediate_nodes( from_nodes, to_nodes, intermediate=intermediate_nodes, graph=coe_graph) coe_sentence.graphs.append(coe_graph) group_idx += 1 if 0 in input_coverage: eps_word_alignment += 1 assert 0 not in input_coverage coe_sentence.graphs = sort_groups_by_lang(coe_sentence.graphs, VIS_LANG) sys.stderr.write(' '.join([str(i) for i in input_tok_group]) + '\n') sys.stderr.write(' '.join([str(i) for i in output_tok_group]) + '\n') split_inp, split_out, split_orderings = mark_swaps_transfers_interrupts( input_tok_group, output_tok_group) split_sets = get_split_sets(split_inp, split_out) swap_rules = get_swap_rules(coe_sentence, input_tok_group, output_tok_group, input_parse, split_sets, VIS_LANG) for sr in swap_rules: sys.stderr.write('swaps-pets:' + str(sr) + '\n') split_inp_str = ' '.join([ str(i) + "-" + ','.join([str(k) for k in j[0]]) for i, j in split_inp.items()