def asDictionary(self): outdic = {} outdic[u"surface"] = self.getSurface() outdic[u"tokenized"] = u" ".join([t.getSurface() for t in self.getTokens()]) mytokens = [] for token in self.getTokens(): mytoken = {} mytoken[u"surface"] = token.getSurface() mytoken[u"nodeid"] = token.getNodeid() mytoken[u"position"] = token.getPosition() mytoken[u"relations"] = token.getRelations() mytoken[u"dependency"] = token.getDependency() mytoken[u"dependedTokenIds"] = token.getDependedTokenIds() mytoken[u"typedDependedTokenIds"] = token.getTypedDependedTokenIds() mytokens.append(mytoken) outdic[u"tokens"] = mytokens outdic[u"nodes"] = [] for (id, node) in self.getNodes().items(): mynode = {} mynode[u"tag"] = node.getTag() mynode[u"parent"] = node.getParentNodeid() mynode[u"id"] = node.getNodeid() outdic[u"nodes"].append(mynode) return outdic
def append(self, token): #TODO is it need to resort by token's id?? assert isinstance(token, slex.corpus.token.Token) newtokneid = len(self.__tokens) self.__nid2tokenid[token.getNodeid()] = newtokneid self.__tokens.append( token ) self.__token_ids.append( token.getPosition() )
def getNode(self, token): if type(token) is int: return self.__nodes[ token ] assert isinstance(token, slex.corpus.token.Token) tokenid = token.getNodeid() return self.__nodes[ tokenid ]