def test_even_tree_for_string_2(): # aaaaabbabb # 1111122122 inputstr = farach.str2int('1111122122') t_odd = farach.T_odd(inputstr) t_even = farach.T_even(t_odd, inputstr) root = utils.Node(aId="root") inner = utils.Node(1, "inner") inner2 = utils.Node(2, "inner") inner.add_child(inner2) inner.add_child(utils.Node(3, 8)) inner2.add_child(utils.Node(9, 2)) inner2.add_child(utils.Node(7, 4)) root.add_child(inner) root.add_child(utils.Node(5, 6)) root.add_child(utils.Node(1, 10)) root.update_leaf_list() # print("t_even:") # print(t_even.fancyprint(inputstr)) # print("root:") # print(root.fancyprint(inputstr)) assert (t_even.fancyprint(inputstr) == root.fancyprint(inputstr))
def test_even_tree_for_string_1(): inputstr = utils.str2int("11112122") t_odd = farach.T_odd(inputstr) t_even = farach.T_even(t_odd, inputstr) root = utils.Node(aId="root") inner = utils.Node(1, "inner") root.add_child(inner) root.add_child(utils.Node(1, 8)) inner.add_child(utils.Node(7, 2)) inner2 = utils.Node(2, "inner") inner.add_child(inner2) inner2.add_child(utils.Node(5, 4)) inner2.add_child(utils.Node(3, 6)) root.update_leaf_list() assert t_even.fancyprint(inputstr) == root.fancyprint(inputstr)
def conll_to_action(self, tgt, num_, permutation): cnt = 0 if 'dev' in tgt: oracle_fname = './data/processed/dev.oracle.en' txt_fname = './data/processed/dev.en' elif 'test' in tgt: oracle_fname = './data/processed/test.oracle.en' txt_fname = './data/processed/test.en' elif 'train' in tgt: oracle_fname = './data/processed/train.oracle.en' txt_fname = './data/processed/train.en' else: print('Error: invalid file name of ' + tgt) exit(1) oracle_f = open(oracle_fname, 'w', encoding='utf-8') plain_f = open(txt_fname, 'w', encoding='utf-8') tagged_file = open(tgt, 'r', encoding='utf-8') bulk = tagged_file.read() blocks = re.compile(r"\n{2,}").split(bulk) blocks = list(filter(None, blocks)) for i in permutation: block = blocks[i] tokens = [] buffer = [] child_to_head_dict = {} for line in block.splitlines(): attr_list = line.split('\t') if attr_list[1] == '.' or attr_list[1] == '?': continue tokens.append(attr_list[1]) num = int(attr_list[0]) head = int(attr_list[6]) label = attr_list[7] node = utils.Node(num, head, label) child_to_head_dict[num] = head buffer.append(node) arcs = utils.write_oracle(buffer, child_to_head_dict) for i, token in enumerate(tokens): token_lowered = token.lower() if i == 0: plain_f.write(token_lowered) else: plain_f.write(' ') plain_f.write(token_lowered) plain_f.write('\n') for arc in arcs: oracle_f.write(arc + '\n') oracle_f.write('\n') cnt += 1 if cnt == num_: break tagged_file.close() oracle_f.close() plain_f.close() return txt_fname, oracle_fname
def insert(head, val): ''' Insert val in to linked list @params: val: int @params: head: Node @return: head: Node ''' if not head or head.val >= val: new_head = utils.Node(val) new_head.next = head return new_head cur = head while cur.next and cur.next.val < val: cur = cur.next if not cur.next: # val is the last node cur.next = utils.Node(val) else: node = utils.Node(val) node.next = cur.next cur.next = node return head
def remove(head, target): ''' Remove node if val is target @params: head: Node @params: target: int @return: head: Node ''' cur = dummy = utils.Node(-1) dummy.next = head while cur.next: if cur.next.val == target: cur.next = cur.next.next else: cur = cur.next return dummy.next