Example #1
0
    def asDictionary(self):
        outdic = {}
        outdic[u"surface"] = self.getSurface()
        outdic[u"tokenized"] = u" ".join([t.getSurface() for t in self.getTokens()])

        mytokens = []
        for token in self.getTokens():
            mytoken = {}
            mytoken[u"surface"] = token.getSurface()
            mytoken[u"nodeid"] = token.getNodeid()
            mytoken[u"position"] = token.getPosition()
            mytoken[u"relations"] = token.getRelations()
            mytoken[u"dependency"] = token.getDependency()
            mytoken[u"dependedTokenIds"] = token.getDependedTokenIds()
            mytoken[u"typedDependedTokenIds"] = token.getTypedDependedTokenIds()
            mytokens.append(mytoken)
        outdic[u"tokens"] = mytokens

        outdic[u"nodes"] = []
        for (id, node) in self.getNodes().items():
            mynode = {}
            mynode[u"tag"] = node.getTag()
            mynode[u"parent"] = node.getParentNodeid()
            mynode[u"id"] = node.getNodeid()
            outdic[u"nodes"].append(mynode)
        return outdic
Example #2
0
    def append(self, token):
        #TODO is it need to resort by token's id??
        assert isinstance(token, slex.corpus.token.Token)

        newtokneid = len(self.__tokens)
        self.__nid2tokenid[token.getNodeid()] = newtokneid

        self.__tokens.append( token )
        self.__token_ids.append( token.getPosition()  )
Example #3
0
    def getTokenIdByPosition(self, position, getRight=False):
        assert isinstance(position, int)
        assert isinstance(getRight, bool)

        __id = len(self.__tokens)
        if __id != 0:
            __id -= 1
        for id, token in enumerate(self.__tokens):
            token_position = token.getPosition()
#            if token_position <= position < token_position + len(token) :
            if position <= token_position + len(token) :
                __id = id
                break
        if getRight is False:
            return __id
        else:
            __id += 1
            if __id < len(self.__tokens):
                return __id
            else:
                return None
Example #4
0
 def getIndex(self, token):
     assert isinstance(token, slex.corpus.token.Token)
     import bisect
     return bisect.bisect_left(self.__token_ids, token.getPosition())