예제 #1
0
def convert_tree(t):
    label = None
    
    if t.label() == "text":
        return slice_text(t[0])
    
    children = []
    for elem in t:
        if not isinstance(elem, Tree) or (elem.label() != "span" and elem.label() != "rel2par" and elem.label() != "leaf"):
            children.append(elem)
            if isinstance(elem, Tree) and (label is None or label[0] == "span"):
                for sub in (s for s in elem if isinstance(s, Tree)):
                    if sub.label() == "rel2par":
                        label = sub
                        break;
        
    if len(children) == 1:
        return convert_tree(children[0])

    label_rel = rel2class[label[0].lower()] + "[" + children[0].label()[0:1] + "][" + children[1].label()[0:1] + "]"
        
    if len(children) > 2:
        for item in children[1:]:
            item._parent = None

        return ParseTree(label_rel, [convert_tree(children[0]),
                                        ParseTree(label_rel, [convert_tree(children[1]),
                                                                 convert_tree(ParseTree("temp", children[2:]))])])
    else:
        return ParseTree(label_rel, [convert_tree(children[0]), convert_tree(children[1])])
예제 #2
0
def load_tree_from_file(filename, tokenize = False):
    def preprocess_leaf(leaf):
        leaf = re.sub('_!(.+?)!_', '\\1', leaf)
        if tokenize:
            return leaf.split(' ')
        else:
            return leaf
    
    if filename.endswith('.dis'):
        pt = rst_lib.load_tree(filename)
    elif filename.endswith('.tree'):
        pt = ParseTree.parse(open(filename).read(), leaf_pattern = '_!.+?!_', parse_leaf = preprocess_leaf)
    
    return pt
예제 #3
0
def load_tree_from_file(filename, tokenize = False):
    def preprocess_leaf(leaf):
        leaf = re.sub('_!(.+?)!_', '\\1', leaf)
        if tokenize:
            return leaf.split(' ')
        else:
            return leaf
    
    if filename.endswith('.dis'):
        pt = rst_lib.load_tree(filename)
    elif filename.endswith('.tree'):
        pt = ParseTree.parse(open(filename).read(), leaf_pattern = '_!.+?!_', parse_leaf = preprocess_leaf)
    
    return pt
예제 #4
0
def make_new_subtree(label, subtree1, subtree2, deepcopy = False):
    if deepcopy:
        stump1_clone = copy_subtree(subtree1, True)
        stump2_clone = copy_subtree(subtree2, True)
    else:
        stump1_clone = subtree1
        stump2_clone = subtree2
        
    if isinstance(stump1_clone, ParseTree):
        stump1_clone._parent = None
        
    if isinstance(stump2_clone, ParseTree):
        stump2_clone._parent = None
    
    return ParseTree(label, [stump1_clone, stump2_clone])
예제 #5
0
    def build_tree(self, doc):
        if len(doc.edus) == 1:
            return [ParseTree("n/a", [doc.edus[0]])]

        for i in range(len(doc.sentences)):
            sentence = doc.sentences[i]
            (start_edu, end_edu) = doc.cuts[i]

            if self.verbose:
                print 'sentence %d' % i
                print 'start_edu', start_edu, 'end_edu', end_edu

            self.intra_parser.parse_each_sentence(sentence)

        self.multi_parser.parse_document(doc)

        return doc.discourse_tree
예제 #6
0
    def build_tree(self, doc):
        #        print self.use_contextual_features
        # Check if only one EDU
        if len(doc.edus) == 1:
            return [ParseTree("n/a", [doc.edus[0]])]

        for i in range(len(doc.sentences)):
            sentence = doc.sentences[i]
            (start_edu, end_edu) = doc.cuts[i]

            if self.verbose:
                print('sentence %d' % i)
                print('start_edu', start_edu, 'end_edu', end_edu)

            self.intra_parser.parse_each_sentence(sentence)

        self.multi_parser.parse_document(doc)

        return doc.discourse_tree
예제 #7
0
def simplify_tree(tree, start):
    if not tree:
        return None
    
#        print 'before', tree
    
    if not isinstance(tree, ParseTree):
        t = ParseTree('leaf', [str(start + 1)])
    else:
        t = tree.__deepcopy__(None)
#            print t
        
        L = simplify_tree(tree[0], start)
        R = simplify_tree(tree[1], start + len(L.leaves()))
        
        t[0] = L
        t[1] = R
    
#        print 'end', t
#        print
    
    return t