def newnode(self, start, count, compound=False): #logging.info("new node: start=" + str(start) + " count=" + str(count)) if not self.head: raise RuntimeError( "This SentenceLinkedList is null! Can't combine.") if start + count > self.size: logging.error(self.__str__()) raise RuntimeError("Can't get " + str(count) + " items start from " + str(start) + " from the sentence!") startnode = self.get(start) endnode = self.get(start + count - 1) p = startnode sons = [] EndOffset = p.StartOffset NewText = "" NewNorm = "" NewAtom = "" hasUpperRelations = [] for i in range(count): if i == 0: spaces = "" else: if compound: spaces = "_" else: spaces = " " * (p.StartOffset - EndOffset) EndOffset = p.EndOffset NewText += spaces + p.text NewNorm += spaces + p.norm NewAtom += spaces + p.atom if p.UpperRelationship and p.UpperRelationship != 'H': hasUpperRelations.append( FeatureOntology.GetFeatureID("has" + p.UpperRelationship)) sons.append(p) p = p.next NewNode = SentenceNode(NewText) NewNode.norm = NewNorm NewNode.atom = NewAtom NewNode.sons = sons NewNode.StartOffset = startnode.StartOffset NewNode.EndOffset = endnode.EndOffset Lexicon.ApplyWordLengthFeature(NewNode) for haverelation in hasUpperRelations: NewNode.ApplyFeature(haverelation) return NewNode, startnode, endnode
def transform(self, nodelist): #Transform from SentenceLinkedList to Depen if logging.root.isEnabledFor(logging.DEBUG): logging.debug("Start to transform:\n {}".format( jsonpickle.dumps(nodelist))) self.fulltext = nodelist.root().text self.fullnorm = nodelist.root().norm self.fullatom = nodelist.root().atom root = nodelist.head if root.text == '' and utils.FeatureID_JS in root.features: root = root.next #ignore the first empty (virtual) JS node temp_subgraphs = [] # Collect all the leaf nodes into self.nodes. while root is not None: #each "root" has a tree, independent from others. node = root nodestack = set() while node: if node.sons: if len(node.sons) == 2 and len(node.text) == 2 and len( node.sons[0].text) == 1 and len( node.sons[1].text) == 1: DanziDict.update({node: node.sons}) if node.next: nodestack.add(node.next) node = node.sons[0] else: if not (node.text == '' and utils.FeatureID_JM in node.features): self.nodes.update({node.ID: copy.deepcopy(node) }) # add leaf node to self.nodes. if node == root: #if node is in root level, don't get next. if nodestack: node = nodestack.pop() else: node = None continue node = node.next if node is None and nodestack: node = nodestack.pop() if not (root.text == '' and utils.FeatureID_JM in root.features): temp_subgraphs.append(SubGraph(root)) self._roots.append(root.ID) root = root.next #filling up the subgraphs. while temp_subgraphs: subgraph = temp_subgraphs.pop() node = subgraph.startnode if node.sons: subnode = node.sons[0] nodestack = set() while subnode: if subnode.sons: if utils.FeatureID_H not in subnode.features: temp_subgraphs.append(SubGraph( subnode)) # non-leaf, non-H. it is a subgraph. subgraph.leaves.append( [subnode.ID, subnode.UpperRelationship]) subnode = subnode.next if subnode is None and nodestack: subnode = nodestack.pop() else: if subnode.next: nodestack.add(subnode.next) subnode = subnode.sons[0] else: # this is a leaf node. # use the copy in self.nodes to apply feature modification if utils.FeatureID_H in subnode.features: subgraph.headID = subnode.ID self.nodes[subnode.ID].features.update( subgraph.startnode.features) Lexicon.ApplyWordLengthFeature( self.nodes[subnode.ID]) else: if not (subnode.text == '' and utils.FeatureID_JM in subnode.features): subgraph.leaves.append( [subnode.ID, subnode.UpperRelationship]) subnode = subnode.next if subnode is None and nodestack: subnode = nodestack.pop() else: subgraph.headID = subgraph.startnode.ID self._subgraphs.append(subgraph) # add to the permanent subgraphs # now set the roots, from the top node to the head. for i in range(len(self._roots)): if self._roots[i] not in self.nodes: for _subgraph in self._subgraphs: if _subgraph.startnode.ID == self._roots[i]: self._roots[i] = _subgraph.headID # now process the non-leaf, non-H points. # copy information to self.graph for subgraph in self._subgraphs: for relation in subgraph.leaves: if relation[0] not in self.nodes: for _subgraph in self._subgraphs: if _subgraph.startnode.ID == relation[0]: relation[0] = _subgraph.headID #print("The previous ID" + str(relation[0]) + " is replaced by head ID" + str(_subgraph.headID)) break self._AddEdge(relation[0], relation[1], subgraph.headID) index = 0 prevnode = None for node in sorted(self.nodes.values(), key=operator.attrgetter("StartOffset")): node.Index = index if prevnode: self._AddEdge(node.ID, "RIGHT", prevnode.ID) self._AddEdge(prevnode.ID, "LEFT", node.ID) prevnode = node index += 1 self._MarkNext() self.root = self._roots[0] if logging.root.isEnabledFor(logging.DEBUG): logging.debug("End of transform:\n {}".format(self))