def transform_to_ZhangShasha_tree(parse_tree, offset = 0, treepos = 0, nuclearity = True): edges = [] if not isinstance(parse_tree, Tree): return edges ''' use breadth-first search ''' if nuclearity: rel_classes = rel_status_classes else: rel_classes = class2rel.keys() rel_classes.append('NO-REL') if nuclearity: parent_label = parse_tree.node else: parent_label = parse_tree.node[ : -6] if parse_tree.node != 'NO-REL' else parse_tree.node parent_label = str(rel_classes.index(parent_label)) parent_label += ':' + str(treepos) parent_label = parent_label.replace('-', '_') left_treepos = treepos + 1 left = parse_tree[0] if not isinstance(left, Tree): left_label = 'e%d' % (offset + 1) left_span = 1 right_treepos = treepos + 1 else: if nuclearity: left_label = left.node else: left_label = left.node[ : -6] if left.node != 'NO-REL' else left.node left_label = str(rel_classes.index(left_label)) + ':' + str(left_treepos) left_span = len(left.leaves()) right_treepos = treepos + len(left.treepositions()) + len(left.leaves()) right = parse_tree[1] if not isinstance(right, Tree): right_label = 'e%d' % (offset + 1 + left_span) else: if nuclearity: right_label = right.node else: right_label = right.node[ : -6] if right.node != 'NO-REL' else right.node right_label = str(rel_classes.index(right_label)) + ':' + str(right_treepos) edges.append(parent_label + '-' + left_label) edges.extend(transform_to_ZhangShasha_tree(left, offset, left_treepos)) edges.append(parent_label + '-' + right_label) edges.extend(transform_to_ZhangShasha_tree(right, offset + left_span, right_treepos)) return edges
def transform_to_shift_reduce_actions(parse_tree, relation = True, nuclearity = True, compressed = True, offset = 1): if not isinstance(parse_tree, Tree): if not compressed: return ['S'] else: return [] if nuclearity: rel_classes = rel_status_classes else: rel_classes = class2rel.keys() actions = [] left = parse_tree[0] right = parse_tree[1] if isinstance(left, Tree): left_span = len(left.leaves()) else: left_span = 1 actions.extend(transform_to_shift_reduce_actions(left, relation, nuclearity, compressed, offset)) actions.extend(transform_to_shift_reduce_actions(right, relation, nuclearity, compressed, offset + left_span)) head_action = 'R' if relation: if nuclearity: action = parse_tree.node else: action = parse_tree.node[ : -6] head_action += str(rel_classes.index(action)) else: if nuclearity: action = parse_tree.node[ -6 : ] head_action += action if compressed: left_start = offset left_end = offset + left_span - 1 right_start = left_end + 1 if isinstance(right, Tree): right_span = len(right.leaves()) else: right_span = 1 right_end = right_start + right_span - 1 head_action += '[(%d,%d),(%d,%d)]' % (left_start, left_end, right_start, right_end) actions.append(head_action) return actions