Beispiel #1
0
    def newnode(self, start, count, compound=False):
        #logging.info("new node: start=" + str(start) + " count=" + str(count))
        if not self.head:
            raise RuntimeError(
                "This SentenceLinkedList is null! Can't combine.")
        if start + count > self.size:
            logging.error(self.__str__())
            raise RuntimeError("Can't get " + str(count) +
                               " items start from " + str(start) +
                               " from the sentence!")

        startnode = self.get(start)
        endnode = self.get(start + count - 1)
        p = startnode
        sons = []
        EndOffset = p.StartOffset
        NewText = ""
        NewNorm = ""
        NewAtom = ""
        hasUpperRelations = []
        for i in range(count):
            if i == 0:
                spaces = ""
            else:
                if compound:
                    spaces = "_"
                else:
                    spaces = " " * (p.StartOffset - EndOffset)
            EndOffset = p.EndOffset
            NewText += spaces + p.text
            NewNorm += spaces + p.norm
            NewAtom += spaces + p.atom
            if p.UpperRelationship and p.UpperRelationship != 'H':
                hasUpperRelations.append(
                    FeatureOntology.GetFeatureID("has" + p.UpperRelationship))
            sons.append(p)
            p = p.next

        NewNode = SentenceNode(NewText)
        NewNode.norm = NewNorm
        NewNode.atom = NewAtom
        NewNode.sons = sons
        NewNode.StartOffset = startnode.StartOffset
        NewNode.EndOffset = endnode.EndOffset
        Lexicon.ApplyWordLengthFeature(NewNode)
        for haverelation in hasUpperRelations:
            NewNode.ApplyFeature(haverelation)
        return NewNode, startnode, endnode
Beispiel #2
0
    def transform(self, nodelist):  #Transform from SentenceLinkedList to Depen
        if logging.root.isEnabledFor(logging.DEBUG):
            logging.debug("Start to transform:\n {}".format(
                jsonpickle.dumps(nodelist)))
        self.fulltext = nodelist.root().text
        self.fullnorm = nodelist.root().norm
        self.fullatom = nodelist.root().atom
        root = nodelist.head
        if root.text == '' and utils.FeatureID_JS in root.features:
            root = root.next  #ignore the first empty (virtual) JS node

        temp_subgraphs = []
        # Collect all the leaf nodes into self.nodes.
        while root is not None:
            #each "root" has a tree, independent from others.
            node = root
            nodestack = set()
            while node:
                if node.sons:
                    if len(node.sons) == 2 and len(node.text) == 2 and len(
                            node.sons[0].text) == 1 and len(
                                node.sons[1].text) == 1:
                        DanziDict.update({node: node.sons})
                    if node.next:
                        nodestack.add(node.next)
                    node = node.sons[0]
                else:
                    if not (node.text == ''
                            and utils.FeatureID_JM in node.features):
                        self.nodes.update({node.ID: copy.deepcopy(node)
                                           })  # add leaf node to self.nodes.

                    if node == root:  #if node is in root level, don't get next.
                        if nodestack:
                            node = nodestack.pop()
                        else:
                            node = None
                        continue

                    node = node.next
                    if node is None and nodestack:
                        node = nodestack.pop()
            if not (root.text == '' and utils.FeatureID_JM in root.features):
                temp_subgraphs.append(SubGraph(root))
                self._roots.append(root.ID)
            root = root.next

        #filling up the subgraphs.
        while temp_subgraphs:
            subgraph = temp_subgraphs.pop()
            node = subgraph.startnode

            if node.sons:
                subnode = node.sons[0]
                nodestack = set()
                while subnode:
                    if subnode.sons:
                        if utils.FeatureID_H not in subnode.features:
                            temp_subgraphs.append(SubGraph(
                                subnode))  # non-leaf, non-H. it is a subgraph.
                            subgraph.leaves.append(
                                [subnode.ID, subnode.UpperRelationship])
                            subnode = subnode.next
                            if subnode is None and nodestack:
                                subnode = nodestack.pop()
                        else:
                            if subnode.next:
                                nodestack.add(subnode.next)
                            subnode = subnode.sons[0]
                    else:  # this is a leaf node.
                        #  use the copy in self.nodes to apply feature modification
                        if utils.FeatureID_H in subnode.features:
                            subgraph.headID = subnode.ID
                            self.nodes[subnode.ID].features.update(
                                subgraph.startnode.features)
                            Lexicon.ApplyWordLengthFeature(
                                self.nodes[subnode.ID])
                        else:
                            if not (subnode.text == '' and utils.FeatureID_JM
                                    in subnode.features):
                                subgraph.leaves.append(
                                    [subnode.ID, subnode.UpperRelationship])
                        subnode = subnode.next
                        if subnode is None and nodestack:
                            subnode = nodestack.pop()
            else:
                subgraph.headID = subgraph.startnode.ID

            self._subgraphs.append(subgraph)  # add to the permanent subgraphs

        # now set the roots, from the top node to the head.
        for i in range(len(self._roots)):
            if self._roots[i] not in self.nodes:
                for _subgraph in self._subgraphs:
                    if _subgraph.startnode.ID == self._roots[i]:
                        self._roots[i] = _subgraph.headID

        # now process the non-leaf, non-H points.
        # copy information to self.graph
        for subgraph in self._subgraphs:
            for relation in subgraph.leaves:
                if relation[0] not in self.nodes:
                    for _subgraph in self._subgraphs:
                        if _subgraph.startnode.ID == relation[0]:
                            relation[0] = _subgraph.headID
                            #print("The previous ID" + str(relation[0]) + " is replaced by head ID" + str(_subgraph.headID))
                            break
                self._AddEdge(relation[0], relation[1], subgraph.headID)
        index = 0
        prevnode = None
        for node in sorted(self.nodes.values(),
                           key=operator.attrgetter("StartOffset")):
            node.Index = index
            if prevnode:
                self._AddEdge(node.ID, "RIGHT", prevnode.ID)
                self._AddEdge(prevnode.ID, "LEFT", node.ID)
            prevnode = node
            index += 1

        self._MarkNext()
        self.root = self._roots[0]

        if logging.root.isEnabledFor(logging.DEBUG):
            logging.debug("End of transform:\n {}".format(self))