def asDictionary(self): outdic = {} outdic[u"surface"] = self.getSurface() outdic[u"tokenized"] = u" ".join([t.getSurface() for t in self.getTokens()]) mytokens = [] for token in self.getTokens(): mytoken = {} mytoken[u"surface"] = token.getSurface() mytoken[u"nodeid"] = token.getNodeid() mytoken[u"position"] = token.getPosition() mytoken[u"relations"] = token.getRelations() mytoken[u"dependency"] = token.getDependency() mytoken[u"dependedTokenIds"] = token.getDependedTokenIds() mytoken[u"typedDependedTokenIds"] = token.getTypedDependedTokenIds() mytokens.append(mytoken) outdic[u"tokens"] = mytokens outdic[u"nodes"] = [] for (id, node) in self.getNodes().items(): mynode = {} mynode[u"tag"] = node.getTag() mynode[u"parent"] = node.getParentNodeid() mynode[u"id"] = node.getNodeid() outdic[u"nodes"].append(mynode) return outdic
def append(self, token): #TODO is it need to resort by token's id?? assert isinstance(token, slex.corpus.token.Token) newtokneid = len(self.__tokens) self.__nid2tokenid[token.getNodeid()] = newtokneid self.__tokens.append( token ) self.__token_ids.append( token.getPosition() )
def getTokenIdByPosition(self, position, getRight=False): assert isinstance(position, int) assert isinstance(getRight, bool) __id = len(self.__tokens) if __id != 0: __id -= 1 for id, token in enumerate(self.__tokens): token_position = token.getPosition() # if token_position <= position < token_position + len(token) : if position <= token_position + len(token) : __id = id break if getRight is False: return __id else: __id += 1 if __id < len(self.__tokens): return __id else: return None
def getIndex(self, token): assert isinstance(token, slex.corpus.token.Token) import bisect return bisect.bisect_left(self.__token_ids, token.getPosition())