Python NodeData 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: tgen.tree

클래스/타입: NodeData

hotexamples.com에서의 예제들: 5

Python NodeData - 5개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 tgen.tree.NodeData에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

자주 사용되는 메소드들

보기 숨기기

NodeData(5)

자주 사용되는 메소드들

NodeData (5)

예제 #1

파일 보기

    def _create_subtree(self, tree, parent_idx, emb, pos):
        """Recursive subroutine used for `ids_to_tree()`, do not use otherwise.
        Solves a subtree (starting just after the opening bracket, returning a position
        just after the corresponding closing bracket).

        @param tree: the tree to work on (will be enhanced by the subtree)
        @param parent_idx: the ID of the parent for the current subtree
        @param emb: the source embeddings
        @param pos: starting position in the source embeddings
        @return: the final position used in the current subtree
        """

        if pos >= len(emb):  # avoid running out of the tree (for invalid trees)
            return pos

        node_idx = tree.create_child(parent_idx, len(tree), NodeData(None, None))
        t_lemma = None
        formeme = None

        while pos < len(emb) and emb[pos] not in [self.BR_CLOSE, self.STOP, self.VOID]:

            if emb[pos] == self.BR_OPEN:
                # recurse into subtree
                pos = self._create_subtree(tree, node_idx, emb, pos + 1)

            elif emb[pos] == self.UNK_T_LEMMA:
                if t_lemma is None:
                    t_lemma = self.id_to_string[self.UNK_T_LEMMA]
                pos += 1

            elif emb[pos] == self.UNK_FORMEME:
                if formeme is None:
                    formeme = self.id_to_string[self.UNK_FORMEME]
                pos += 1

            elif emb[pos] >= self.MIN_VALID:
                # remember the t-lemma and formeme for normal nodes
                token = self.id_to_string.get(emb[pos])
                if t_lemma is None:
                    t_lemma = token
                elif formeme is None:
                    formeme = token

                # move the node to its correct position
                # (which we now know it's at the current end of the tree)
                if node_idx != len(tree) - 1:
                    tree.move_node(node_idx, len(tree) - 1)
                    node_idx = len(tree) - 1
                pos += 1

        if pos < len(emb) and emb[pos] == self.BR_CLOSE:
            # skip this closing bracket so that we don't process it next time
            pos += 1

        # fill in the t-lemma and formeme that we've found
        if t_lemma is not None or formeme is not None:
            tree.nodes[node_idx] = NodeData(t_lemma, formeme)

        return pos

예제 #2

파일 보기

    def lexicalize(self, gen_trees, abst_file):
        """Lexicalize nodes in the generated trees (which may represent trees, tokens, or tagged lemmas).
        Expects lexicalization file (and surface forms file) to be loaded in the Lexicalizer object,
        otherwise nothing will happen. The actual operation depends on the generator mode.

        @param gen_trees: list of TreeData objects representing generated trees/tokens/tagged lemmas
        @param abst_file: abstraction/delexicalization instructions file path
        @return: None
        """
        abstss = smart_load_absts(abst_file, len(gen_trees))
        for sent_no, (tree, absts) in enumerate(zip(gen_trees, abstss)):
            log_debug("Lexicalizing sentence %d: %s" % ((sent_no + 1), str(tree)))
            sent = self._tree_to_sentence(tree)
            log_debug(str(sent))
            for idx, tok in enumerate(sent):
                if tok and tok.startswith('X-'):  # we would like to lexicalize
                    slot = tok[2:]
                    # check if we have a value to substitute; if yes, do it
                    abst = self._first_abst(absts, slot)
                    if abst:
                        # tagged lemmas: one token with appropriate value
                        if self.mode == 'tagged_lemmas':
                            tag = sent[idx+1] if idx < len(sent) - 1 else None
                            val = self.get_surface_form(sent, idx, slot, abst.value, tag=tag)
                            tree.nodes[idx+1] = NodeData(t_lemma=val, formeme='x')
                        # trees: one node with appropriate value, keep formeme
                        elif self.mode == 'trees':
                            formeme = sent[idx+1] if idx < len(sent) - 1 else None
                            val = self.get_surface_form(sent, idx, slot, abst.value,
                                                        formeme=formeme)
                            tree.nodes[old_div(idx,2)+1] = NodeData(t_lemma=val,
                                                           formeme=tree[old_div(idx,2)+1].formeme)
                        # tokens: one token with all words from the value (postprocessed below)
                        else:
                            val = self.get_surface_form(sent, idx, slot, abst.value)
                            tree.nodes[idx+1] = NodeData(t_lemma=val, formeme='x')
                        sent[idx] = val  # save value to be used in LM next time
            # postprocess tokens (split multi-word nodes)
            if self.mode == 'tokens':
                idx = 1
                while idx < len(tree):
                    if ' ' in tree[idx].t_lemma:
                        value = tree[idx].t_lemma
                        tree.remove_node(idx)
                        for shift, tok in enumerate(value.split(' ')):
                            tree.create_child(0, idx + shift,
                                              NodeData(t_lemma=tok, formeme='x'))
                        idx += shift
                    idx += 1

예제 #3

파일 보기

    def ids_to_tree(self, emb, postprocess=True):
        """Create a fake (flat) t-tree from token embeddings (IDs).

        @param emb: source embeddings (token IDs)
        @param postprocess: postprocess the sentence (capitalize sentence start, merge plural \
            markers)? True by default.
        @return: the corresponding tree
        """

        tree = TreeData()
        tokens = self.ids_to_strings(emb)

        for token in tokens:
            if token in ['<GO>', '<STOP>', '<VOID>']:
                continue
            if postprocess:
                # casing (only if set to lowercase)
                if self.lowercase and len(tree) == 1 or tree.nodes[-1].t_lemma in ['.', '?', '!']:
                    token = token[0].upper() + token[1:]
                # plural merging (if plural tokens come up)
                if token == '<-s>' and tree.nodes[-1].t_lemma is not None:
                    token = self._singular_to_plural(tree.nodes[-1].t_lemma)
                    tree.remove_node(len(tree) - 1)
                elif token == '<-s>':
                    continue

            tree.create_child(0, len(tree), NodeData(token, 'x'))

        return tree

예제 #4

파일 보기

    def ids_to_tree(self, emb, postprocess=True):
        """Create a fake (flat) t-tree from token embeddings (IDs).

        @param emb: source embeddings (token IDs)
        @param postprocess: postprocess the sentence (capitalize sentence start, merge plural \
            markers)? True by default.
        @return: the corresponding tree
        """

        tree = TreeData()
        tokens = self.ids_to_strings(emb)

        for token in tokens:
            if token in ['<GO>', '<STOP>', '<VOID>']:
                continue
            tree.create_child(0, len(tree), NodeData(token, 'x'))

        return tree

예제 #5

파일 보기

파일: test_random.py 프로젝트: ProjectsUCSC/E2E-NLG-Personage

from tgen.planner import CandidateList
from tgen.tree import TreeData, NodeData
import random
import zlib

random.seed(1206)

l = CandidateList()
for i in xrange(10000):
    #    l[str(i)] = random.randint(0, 100)
    #    l[str(random.randint(0,1000))] = random.randint(0, 100)
    #    l[(str(random.randint(0,1000)), str(random.randint(0,1000)))] = random.randint(0, 100)
    #    tree = TreeData()
    #    tree.create_child(0, 1, NodeData(str(random.randint(0, 1000)), str(random.randint(0, 1000))))
    #    l[tree] = random.randint(0, 100)
    tree = TreeData()
    for j in xrange(random.randint(1, 10)):
        tree.create_child(
            random.randint(0,
                           len(tree) - 1),
            random.randint(0, 1) == 1,
            NodeData(str(random.randint(0, 1000)), str(random.randint(0,
                                                                      1000))))
    l[tree] = random.randint(0, 100)
x = []
while l:
    x.append(l.pop())
print zlib.crc32(str(x))