Python Tree Examples, nltk.tree.Tree Python Examples

Example #1

0

Show file

def parsed(element):
    if element:
        # element viewed as a list is non-empty (it has subelements)
        subtrees = map(parsed, element)
        subtrees = [t for t in subtrees if t is not None]
        return tree.Tree(element.tag, subtrees)
    else:
        # element viewed as a list is empty. we are in a terminal.
        if element.get('elliptic') == 'yes':
            return None
        else:
            return tree.Tree(
                element.get('pos') or element.get('ne') or 'unk',
                [element.get('wd')])

Example #2

0

Show file

def btree_xrc(relation_list, words):
    """
    构建二叉树
    :param relation_list: 依存关系集合
    :param words: 原始词集合
    :return: 二叉树
    """
    #
    # for relation in relation_list:
    #     print relation.mw, " - ", relation.relation, " - ", relation.cw

    # while len(relation_list) >0:

    # relation_dict = {}
    # count = 1
    # for r in relation_list:
    #     relation_dict[count] = r
    #     count += 1
    # 直接用list的下标作为编号,,因此编号从0 开始

    # T = []
    T_list = []

    # 栈
    stack = []
    T = tree.Tree("root", ["", ""])
    T_word = []
    for w in words:

        if len(stack) == 0:
            print w
            stack.append(w)
        else:
            w0 = stack.pop()
            print w, w0
            for r in relation_list:
                if r.mw == w and r.cw == w0:
                    temp = tree.Tree(relation_list.index(r), [w, w0])
                    T_word.append([w, w0])


                    T = temp
                elif r.mw == w0 and r.cw == w:
                    temp = tree.Tree(relation_list.index(r), [w0, w])
                    T_word.append([w, w0])
                    T = temp

    T.draw()

Example #3

0

Show file

    def unary_parses(self, p, t, i, j):
        node = t.label()
        l_val = node.l_val
        r_val = node.r_val
        if node.mark == '|':
            res = []

        elif node.mark == '<>':
            p2 = self.p_stop_left(node.word, l_val, self.harmonic) + p
            t2 = tree.Tree(Node('|', node.word, node.index, l_val, r_val), [t])
            res = [(p2, t2)]
        elif node.mark == '>':
            p2 = self.p_stop_right(node.word, r_val, self.harmonic) + p
            t2 = tree.Tree(Node('<>', node.word, node.index, l_val, r_val), [t])
            res = self.unary_parses(p2, t2, i, j)
        return [(p, t)] + res

Example #4

0

Show file

def parsed(element):
    """Converts a 'sentence' XML element (xml.etree.ElementTree.Element) to
    an NLTK tree.

    element -- the XML sentence element (or a subelement)
    """
    if element:
        # element viewed as a list is non-empty (it has subelements)
        subtrees = map(parsed, element)  # recursive call here!
        subtrees = [t for t in subtrees if t is not None]
        return tree.Tree(element.tag, subtrees)
    else:
        # element viewed as a list is empty. we are in a terminal.
        if element.get('elliptic') == 'yes' and not element.get('wd'):
            return None
        else:
            return tree.Tree(
                element.get('pos') or element.get('ne') or 'unk',
                [element.get('wd')])

Example #5

0

Show file

def gold_pos_strategy(line, abp_domain_size, gold_pos_dict=None, **kwargs):
    if gold_pos_dict is None:
        raise Exception(
            "Gold pos dictionary must be provided when using the gold pos strategy!"
        )
    placholder = '-REPLACE-'
    new_tree = tree.Tree(str(random.randint(1, abp_domain_size)),
                         [placholder, placholder])
    num_nodes = len(line) - 2
    for i in range(num_nodes):
        positions = new_tree.treepositions('leaves')
        random_pick = random.choice(positions)
        add_tree = tree.Tree(str(random.randint(1, abp_domain_size)),
                             [placholder, placholder])
        new_tree[random_pick] = add_tree
    for index, replace in enumerate(new_tree.treepositions('leaves')):
        new_tree[replace] = tree.Tree(str(gold_pos_dict[line[index]]),
                                      [line[index]])
    return new_tree

Example #6

0

Show file

File: depgraph.py Project: speedcell4/dmvccm

 def constree(self):
     # Some depgraphs have several roots (for instance, 512th of Turkish).
     # i = self.root['address']
     roots = self.nodelist[0]['deps']
     if len(roots) == 1:
         return treebank.Tree(self._constree(roots[0]))
     else:
         # TODO: check projectivity here also.
         trees = [self._constree(i) for i in roots]
         return treebank.Tree(tree.Tree('TOP', trees))

Example #7

0

Show file

File: listops.py Project: avena554/listops_sandbox

def to_tensor(tree: tree_mod.Tree):
    """
    Maps a tree of int(s) to a tree of torch.LongTensor(s) which contain the same values.

    :param tree:
    :return:
    """
    lab = torch.LongTensor([int(tree.label())])
    children = [to_tensor(t) for t in tree]

    return tree_mod.Tree(lab, children)

Example #8

0

Show file

File: listops.py Project: avena554/listops_sandbox

def to_numbers(tree: tree_mod.Tree, interner: interners.Interner):
    """
    Maps a tree to a tree of int(s) which are the numbers assigned to the node labels by the interner.

    :param tree:
    :param interner:
    :return:
    """
    num = interner(tree.label().strip())

    return tree_mod.Tree(num, [to_numbers(t, interner) for t in tree])

Example #9

0

Show file

 def recursion(t, f):
     if not isinstance(t, tree.Tree):
         return t
     subtrees = []
     for st in t:
         if f(st):
             st = recursion(st, f)
             subtrees += [st]
     if subtrees == []:
         return t.label()
     else:
         return tree.Tree(t.label(), subtrees)

Example #10

0

Show file

 def recursion(t, f):
     # terminals are pos tags
     # if not isinstance(t, tree.Tree):
     #     return t
     if t.height() == 2:
         return t
     subtrees = []
     for st in t:
         if f(st):
             st = recursion(st, f)
             subtrees += [st]
     if subtrees == []:
         # ideally, subtrees cannot be empty
         return t.label()
     else:
         return tree.Tree(t.label(), subtrees)

Example #11

0

Show file

File: SentenceSplitter.py Project: OctopusMD/ARGUS-Chatbot

 def createTree1(self, tags):
     isVerb=False
     tempList=list("")
     curTree=NULL
     prevTree=NULL
     
     for words in reversed(tags):
         #start creating a VP branch
         if(words[1]=='VERB' or words[1]=='ADV'):
             #continuing on current branch
             if(isVerb):
                 tempList.append(words[0])
             #finish the previous branch so that we can start the VP branch
             else:
                 if(prevTree==NULL and curTree==NULL):
                     curTree=tree.Tree('NP', tempList)
                 else:
                     prevTree=curTree
                     curTree=tree.Tree('NP', [tempList, prevTree] )
                 isVerb=True
                 tempList.clear()
                 tempList.append(words[0])
         #start creating a NP branch
         elif(words[1]=='NOUN' or words[1]=='ADJ'):
             #continuing on current branch
             if(not isVerb):
                 tempList.append(words[0])
             #finish the previous branch so that we can start the NP branch
             else:
                 if(prevTree==NULL and curTree==NULL):
                     curTree=tree.Tree('VP', tempList)
                 else:
                     prevTree=curTree
                     curTree=tree.Tree('VP', [tempList, prevTree] )
                 isVerb=False
                 tempList.clear()
                 tempList.append(words[0])
         else:
             tempList.append(words[0])
     if(isVerb):
         prevTree=curTree
         curTree=tree.Tree('VP', tempList)
     else:
         prevTree=curTree
         curTree=tree.Tree('NP', tempList)
     self.myTree=tree.Tree('S', [curTree, prevTree])

Example #12

0

Show file

    def filter_subtrees(self, f):
        def recursion(t, f):
            if not isinstance(t, tree.Tree):
                return t
            subtrees = []
            for st in t:
                if f(st):
                    st = recursion(st, f)
                    subtrees += [st]
            if subtrees == []:
                return t.label()
            else:
                return tree.Tree(t.label(), subtrees)

        t = recursion(self, f)
        if isinstance(t, tree.Tree):
            self.__init__(t, self.labels)
        else:
            self.__init__(tree.Tree(t, []), self.labels)

Example #13

0

Show file

def creat_tree(mw, cw, relation, id):

    if relation == "VOB":
        return tree.Tree(id, [cw, mw])
    # elif



#
# a = [1,2,3,4]
# b=[]
# for i in a:
#     b.append(i)
#
#
# for n in range(len(a)):
#     print n, a.pop()
#     print
#
#
#
# print b

Example #14

0

Show file

    def filter_subtrees(self, f):
        def recursion(t, f):
            # terminals are pos tags
            # if not isinstance(t, tree.Tree):
            #     return t
            if t.height() == 2:
                return t
            subtrees = []
            for st in t:
                if f(st):
                    st = recursion(st, f)
                    subtrees += [st]
            if subtrees == []:
                # ideally, subtrees cannot be empty
                return t.label()
            else:
                return tree.Tree(t.label(), subtrees)

        t = recursion(self, f)
        if isinstance(t, tree.Tree):
            self.__init__(t, self.labels)
        else:
            self.__init__(tree.Tree(t, []), self.labels)

Example #15

0

Show file

File: util_test.py Project: mideind/Annotald

    def test_queryVersionCookie(self):
        version_string = "( (VERSION (FORMAT dash) (FOO (BAR baz))))"
        self.assertEqual(util.queryVersionCookie(version_string, "FORMAT"),
                         "dash")
        self.assertEqual(util.queryVersionCookie(version_string, "FOO.BAR"),
                         "baz")
        self.assertEqual(util.queryVersionCookie(version_string, "FOO"),
                         T.Tree("BAR", ["baz"]))

        self.assertIsNone(util.queryVersionCookie(version_string, "ABC"))

        # Invalid version cookie gives null result
        invalid_version_tree = "( (FOO bar))"
        self.assertIsNone(util.queryVersionCookie(invalid_version_tree, "foo"))

        # multiple matches gives null result, only for aberrant key
        multiple_matches = "( (VERSION (FOO bar) (FOO baz) (BAR quux)))"
        self.assertIsNone(util.queryVersionCookie(multiple_matches, "FOO"))
        self.assertEqual(util.queryVersionCookie(multiple_matches, "BAR"),
                         "quux")

        # Empty input gives null result
        self.assertIsNone(util.queryVersionCookie("", "FOO"))
        self.assertIsNone(util.queryVersionCookie(None, "FOO"))

Example #16

0

Show file

    def dep_parse(self, s):
        """
        output:
            returned t is a nltk.tree.Tree without root node
        """
        parse = {}
        # OPTIMIZATION: END considered only explicitly
        # s = s + [self.end_symbol]

        n = len(s)

        for i in range(n):
            j = i + 1
            w = str(s[i])
            t1 = tree.Tree(Node('>', w, i, 0, 0), [w])

            parse[i, j] = ParseDict(self.unary_parses(math.log(1.0), t1, i, j))

        for l in range(2, n+1):
            for i in range(n-l+1):
                j = i + l
                parse_dict = ParseDict()
                for k in range(i+1, j):
                    for (p1, t1) in parse[i, k].itervalues():
                        for (p2, t2) in parse[k, j].itervalues():
                            n1 = t1.label()
                            n2 = t2.label()
                            if n1.mark == '>' and n2.mark == '|':
                                m = n1.index
                                h = n1.word
                                p = self.p_nonstop_right(h, n1.r_val, self.harmonic) + \
                                    self.p_attach_right(n2.word, h, self.harmonic, n2.index - m) + \
                                    p1 + p2
                                new_node = Node(n1.mark, n1.word, n1.index, n1.l_val, n1.r_val + 1)
                                t = tree.Tree(new_node, [t1, t2])
                                parse_dict.add(p, t)
                            if n1.mark == '|' and n2.mark == '<>':
                                m = n2.index
                                h = n2.word
                                p = self.p_nonstop_left(h, n2.l_val, self.harmonic) + \
                                    self.p_attach_left(n1.word, h, self.harmonic, m - n1.index) + \
                                    p1 + p2
                                new_node = Node(n2.mark, n2.word, n2.index, n2.l_val + 1, n2.r_val)
                                t = tree.Tree(new_node, [t1, t2])
                                parse_dict.add(p, t)

                parse[i, j] = ParseDict(sum((self.unary_parses(p, t, i, j) \
                                    for (p, t) in parse_dict.itervalues()), []))

        w = s[0]
        (p1, t1) = parse[0, n].val('|'+w+'0')
        t_max, p_max = t1, p1 + self.p_attach_left(w, self.end_symbol, self.harmonic)
        l = [(t_max, p_max)]
        for i in range(1, n):
            w = s[i]
            (p1, t1) = parse[0, n].val('|'+w+str(i))
            p = p1 + self.p_attach_left(w, self.end_symbol, self.harmonic)
            if p > p_max:
                p_max = p
                l = [(t1, p)]
            elif p == p_max:
                l += [(t1, p)]
        (t_max, p_max) = self.choice(l, self.args.choice)

        return (t_max, p_max)

Example #17

0

Show file

    current = ""
    while stack:
        current = stack.pop()

        if isinstance(current, tree.Tree):
            for i in range(len(current)):
                stack.append(current[i])

        elif isinstance(current, str):
            # print "[输出] ",current
            print current


if __name__ == "__main__":
    C = tree.Tree("C", ["E", "F"])
    B = tree.Tree("B", [C, "D"])
    M = tree.Tree("M", ["O", "P"])
    H = tree.Tree("H", [M, "N"])
    G = tree.Tree("G", ["X", "Y"])
    A = tree.Tree("A", [G, H])
    K = tree.Tree("K", ["L", "Q"])

    root = tree.Tree("Root", [A, B, K])

    print root[0]
    print root.height()
    print len(root)
    print type(root)

    import time

Example #18

0

Show file

def treebank_from_sentences(S):
    """Returns a treebank with sentences S and trivial trees.
    """
    trees = [Tree(tree.Tree('ROOT', [tree.Tree(x, [x]) for x in s])) for s in S]
    return Treebank(trees)

Example #19

0

Show file

File: cast3lb.py Project: sustcsonglin/struct-learning-with-flow

 def parsed(self, files=None):
     for t in treebank.SavedTreebank.parsed(self, files):
         yield Cast3LBTree(tree.Tree('ROOT', [t]), t.labels)

Example #20

0

Show file

File: left_corner2normal_tree_converter.py Project: lifengjin/db-pcfg

from State import State

Example #21

0

Show file

def btree(relation_list, words):
    """
    构建二叉树
    :param relation_list: 依存关系集合
    :param words: 情感词词集合
    :return: 二叉树
    """
    #
    # for relation in relation_list:
    #     print relation.mw, " - ", relation.relation, " - ", relation.cw

    # while len(relation_list) >0:

    T = []
    T_list = []
    for w in words:
        es = {}
        es_word = [w]   # 当前情感词的关联词集合

        sort_dict = {
            "VOB": 1,
            "ATT": 2,
            "ADV": 3,
            "VV": 4,
            "COO": 5,
            "SMP": 6,
            "SBV": 7,
            "CNJ": 8
        }


        # sort_dict = {
        #     "ATT": 1,
        #     "ADV": 2,
        #     "VV": 3,
        #     "COO": 4,
        #     "SMP": 5,
        #     "VOB": 6,
        #     "SBV": 7,
        #     "CNJ": 8
        # }

        # 查找情感词关联的依存关系
        for relation in relation_list:
            if es_word.__contains__(relation.mw) or es_word.__contains__(relation.cw):

                es_word.append(relation.mw)
                es_word.append(relation.cw)

                print relation.mw, " - ", relation.relation, " - ", relation.cw
                es[relation] = sort_dict[relation.relation]

                # relation_list.remove(relation)

        # 对es中的依存关系进行排序得到rs
        es_sort = sorted(es.iteritems(), key=lambda d: d[1], reverse=False)

        rs = []
        for e in es_sort:
            rs.append(e[0])

        print "-----------------"
        for r in rs:
            print r.mw, " - ", r.relation, " - ", r.cw

        # 剔除rs 中的CNJ得到rs1

        # 开始遍历建树
        T1 = tree.Tree("root", ["", ""])  # 某个情感词关联的所有依存
        T1_word = []
        T1_list = []   # 子树集合

        for r in rs:
            name = r.relation
            w1 = r.cw
            w2 = r.mw

            if not T1_word.__contains__(w1) and not T1_word.__contains__(w2):
                print "[1]",w1,w2
                n1 = w1
                n2 = w2
                T1_word.append(w1)
                T1_word.append(w2)

                print T1.leaves()[1].__len__()
                if T1.leaves()[0].__len__() == 0:
                    T1 = tree.Tree(name, [n2, n1])
                    T1_list.append(T1)
                else:
                    temp = tree.Tree(name, [n2, n1])
                    T1_list.append(temp)
                    # T1 = tree.Tree(name, [T1, temp])

            elif T1_word.__contains__(w1) and not T1_word.__contains__(w2):
                print "[2]", w1, w2
                T1_word.append(w2)
                n1 = T1
                n2 = w2

                T1 = tree.Tree(name, [n2, n1])

            elif T1_word.__contains__(w2) and not T1_word.__contains__(w1):
                print "[3]", w1, w2
                n1 = "RIGHT"
                n2 = w2
                T1_word.append(w1)
            else:
                print "[4]", w1, w2
                n1 = tree.Tree("n1", ["", ""])
                n2 = tree.Tree("n2", ["", ""])

                for t in T1_list:
                    if t.leaves().__contains__(w1):
                        n1 = t
                    if t.leaves().__contains__(w2):
                        n2 = t

                T1 = tree.Tree(name, [n2, n1])

#
        T1.draw()

Example #22

0

Show file

File: dmvccm.py Project: speedcell4/dmvccm

    def dep_parse(self, s):
        parse = {}
        # OPTIMIZATION: END considered only explicitly
        # s = s + [self.end_symbol]
        # solo para uso como param. de phi
        s2 = s + [self.end_symbol]
        n = len(s)

        for i in range(n):
            j = i + 1
            # >w -> w
            # <w -> w
            w = s[i]
            # DMVCCM: multiplicar por phi:
            # aca da lo mismo:
            # phi = self.phi(i, j, s)
            phi = 1.0
            pl = self.p_order('left', w) * phi
            pr = self.p_order('right', w) * phi
            t0 = tree.Tree(dmv.Node('<', w, i), [w])
            t1 = tree.Tree(dmv.Node('>', w, i), [w])

            parse[i, j] = dmv.ParseDict(self.unary_parses(pl, t0, i, j) + self.unary_parses(pr, t1, i, j))

        for l in range(2, n + 1):
            for i in range(n - l + 1):
                j = i + l
                # tenemos parse[a, b] para todas las cosas adentro de (i, j).
                parse_dict = dmv.ParseDict()
                phi = self.phi(i, j, s2)
                for k in range(i + 1, j):
                    # aqui, mejores parses entre parse[i, k] y parse[k, j]
                    for (p1, t1) in parse[i, k].itervalues():
                        for (p2, t2) in parse[k, j].itervalues():
                            n1 = t1.node
                            n2 = t2.node
                            if n1.mark[0] == '>' and n2.mark == '|':
                                m = n1.index
                                h = n1.word
                                # n2.index-m = distancia entre uno y otro
                                # DMVCCM: multiplicar por phi:
                                p = self.p_nonstop_right(h, m == k - 1) * \
                                    self.p_attach_right(n2.word, h, n2.index - m) * \
                                    p1 * p2 * phi
                                t = tree.Tree(n1, [t1, t2])
                                parse_dict.add(p, t)
                            if n1.mark == '|' and n2.mark[0] == '<':
                                m = n2.index
                                h = n2.word
                                # m-n1.index = distancia entre uno y otro
                                # DMVCCM: multiplicar por phi:
                                p = self.p_nonstop_left(h, m == k) * \
                                    self.p_attach_left(n1.word, h, m - n1.index) * \
                                    p1 * p2 * phi
                                t = tree.Tree(n2, [t1, t2])
                                parse_dict.add(p, t)

                # here is where the stops are generated:
                parse[i, j] = dmv.ParseDict(sum((self.unary_parses(p, t, i, j) \
                                                 for (p, t) in parse_dict.itervalues()), []))

        # OPTIMIZATION: finally, choose the head of the sentence.
        # t_max, p_max = None, 0.0
        w = s[0]
        (p1, t1) = parse[0, n].val('|' + w + '0')
        t_max, p_max = t1, p1 * self.p_attach_left(w, self.end_symbol, n)
        # unbiased:
        l = [(t_max, p_max)]
        for i in range(1, n):
            w = s[i]
            (p1, t1) = parse[0, n].val('|' + w + str(i))
            p = p1 * self.p_attach_left(w, self.end_symbol, n - i)
            # aca hay bias (> seria elegir el primer head en caso de empate):
            # al parecer este bias solo afecta al modelo si no esta entrenado.
            # bias a RBRANCH:
            # if p > p_max:
            #    t_max, p_max = t1, p
            # bias a LBRANCH:
            # if p >= p_max:
            #    t_max, p_max = t1, p
            # unbiased:
            if p > p_max:
                p_max = p
                l = [(t1, p)]
            elif p == p_max:
                l += [(t1, p)]
        (t_max, p_max) = random.choice(l)

        return (t_max, p_max)

Example #23

0

Show file

def btree_zx(relation_list):
    """
    构建二叉树
    :param relation_list: 依存关系集合
    :param words: 原始词集合
    :return: 二叉树
    """

    # while len(relation_list) >0:

    # relation_dict = {}
    # count = 1
    # for r in relation_list:
    #     relation_dict[count] = r
    #     count += 1
    # 直接用list的下标作为编号,,因此编号从0 开始

    # 栈
    stack = []

    list_copy = []
    for r in relation_list:
        list_copy.append(r)

    T = tree.Tree("root", ["", ""])
    T_list = []
    T_word = []

    for n in range(len(relation_list)):
        r = relation_list.pop()

        mw = r.mw
        cw = r.cw

        id = list_copy.index(r)

        # 如果是并列关系: COO
        if r.relation == "COO":
            print "\n[COO]---"
            if not T_word.__contains__(mw) and not T_word.__contains__(cw):
                temp = creat_tree(mw, cw, r.relation, id)
                T_word.extend([mw, cw])
                T = temp
                T_list.append(T)
            elif T_word.__contains__(mw) and not T_word.__contains__(cw):
                print "[2]", mw, cw
                T_word.append(cw)

                temp = T
                T = tree.Tree(id, [temp, cw])
                T_list.append(T)
            elif not T_word.__contains__(mw) and T_word.__contains__(cw):
                print "[3]", mw, cw
                T_word.append(mw)

                temp = T
                T = tree.Tree(id, [mw, temp])
                T_list.append(T)
            else:
                print "[4]", mw, cw
                n1 = tree.Tree("n1", ["", ""])
                n2 = tree.Tree("n2", ["", ""])

                for t in T_list:
                    if t.leaves().__contains__(cw):
                        n1 = t
                    if t.leaves().__contains__(mw):
                        n2 = t
                T = tree.Tree(id, [n2, n1])
                T_list.append(T)
        else:
            print "\n[不是COO]---"
            if not T_word.__contains__(mw) and not T_word.__contains__(cw):
                print "[1]", mw, cw
                temp = creat_tree(mw, cw, r.relation, id)
                T_word.extend([mw, cw])
                T = temp
                T_list.append(T)
            elif T_word.__contains__(mw) and not T_word.__contains__(cw):
                print "[2]", mw, cw
                T_word.append(cw)

                temp = T
                T = tree.Tree(id, [temp, cw])
                T_list.append(T)
            elif not T_word.__contains__(mw) and T_word.__contains__(cw):
                print "[3]", mw, cw
                T_word.append(mw)
                temp = T
                T = tree.Tree(id, [mw, temp])
                T_list.append(T)
            else:
                print "[4]", mw, cw
                n1 = tree.Tree("n1", ["", ""])
                n2 = tree.Tree("n2", ["", ""])

                for t in T_list:
                    if t.leaves().__contains__(cw):
                        n1 = t
                    if t.leaves().__contains__(mw):
                        n2 = t
                T = tree.Tree(id, [n2, n1])
                T_list.append(T)


    T.draw()

Example #24

0

Show file

    current = ""
    while stack:
        current = stack.pop()

        if isinstance(current, tree.Tree):
            for i in range(len(current)):
                stack.append(current[i])

        elif isinstance(current, str):
            # print "[输出] ",current
            print (current)


if __name__ == "__main__":
    C = tree.Tree("我", ["E", "F"])
    B = tree.Tree("是", [C, "D"])
    H = tree.Tree("好", ["M", "N"])
    A = tree.Tree("人", ["G", H])
    root = tree.Tree("Root", [A, B])


    print (root[0])
    print (root.height())
    print( len(root))
    print (type(root))

    test(root)
    # test_2(root)

    root.draw()

Example #25

0

Show file

File: dmv.py Project: dugu9sword/dmvccm

    def dep_parse(self, s):
        parse = {}
        # OPTIMIZATION: END considered only explicitly
        # s = s + [self.end_symbol]
        n = len(s)

        for i in range(n):
            j = i + 1
            # >w -> w
            # <w -> w
            w = s[i]
            p = self.p_order('left', w)
            t0 = tree.Tree(Node('<', w, i), [w])
            t1 = tree.Tree(Node('>', w, i), [w])

            parse[i, j] = ParseDict(self.unary_parses(p, t0, i, j) + \
                                    self.unary_parses(1.0 - p, t1, i, j))

        for l in range(2, n + 1):
            for i in range(n - l + 1):
                j = i + l
                # tenemos parse[a, b] para todas las cosas adentro de (i, j).
                parse_dict = ParseDict()
                for k in range(i + 1, j):
                    # aqui, mejores parses entre parse[i, k] y parse[k, j]
                    for (p1, t1) in parse[i, k].itervalues():
                        for (p2, t2) in parse[k, j].itervalues():
                            n1 = t1.node
                            n2 = t2.node
                            if n1.mark[0] == '>' and n2.mark == '|':
                                m = n1.index
                                h = n1.word
                                # n2.index-m = distancia entre uno y otro
                                p = self.p_nonstop_right(h, m==k-1) * \
                                    self.p_attach_right(n2.word, h, n2.index-m) * \
                                    p1 * p2
                                t = tree.Tree(n1, [t1, t2])
                                parse_dict.add(p, t)
                            if n1.mark == '|' and n2.mark[0] == '<':
                                m = n2.index
                                h = n2.word
                                # m-n1.index = distancia entre uno y otro
                                p = self.p_nonstop_left(h, m==k) * \
                                    self.p_attach_left(n1.word, h, m-n1.index) * \
                                    p1 * p2
                                t = tree.Tree(n2, [t1, t2])
                                parse_dict.add(p, t)

                # aca se generan los stops
                parse[i, j] = ParseDict(sum((self.unary_parses(p, t, i, j) \
                                    for (p, t) in parse_dict.itervalues()), []))

        # solo falta elegir el head de la oracion:
        #t_max, p_max = None, 0.0
        w = s[0]
        (p1, t1) = parse[0, n].val('|' + w + '0')
        t_max, p_max = t1, p1 * self.p_attach_left(w, self.end_symbol, n)
        # unbiased:
        l = [(t_max, p_max)]
        for i in range(1, n):
            w = s[i]
            (p1, t1) = parse[0, n].val('|' + w + str(i))
            p = p1 * self.p_attach_left(w, self.end_symbol, n - i)
            # aca hay bias (> seria elegir el primer head en caso de empate):
            # al parecer este bias solo afecta al modelo si no esta entrenado.
            # bias a RBRANCH:
            #if p > p_max:
            #    t_max, p_max = t1, p
            # bias a LBRANCH:
            #if p >= p_max:
            #    t_max, p_max = t1, p
            # unbiased:
            if p > p_max:
                p_max = p
                l = [(t1, p)]
            elif p == p_max:
                l += [(t1, p)]
        (t_max, p_max) = random.choice(l)

        return (t_max, p_max)