def getFull(self, length, idfs = None):
     """return document as BOW column vector. optionally scale each element by idfs"""
     
     bow = utils_dml.vect2bow(self.getTokenIds())
     if (idfs == None):
         result = numpy.zeros(length, dtype = int)
         for tokenid, freq in bow.iteritems():
             result[tokenid] = freq
     else:
         result = numpy.zeros(length, dtype = numpy.float32)
         for tokenid, freq in bow.iteritems():
             result[tokenid] = freq * idfs[tokenid]
     return result
    def getFull(self, length, idfs=None):
        """return document as BOW column vector. optionally scale each element by idfs"""

        bow = utils_dml.vect2bow(self.getTokenIds())
        if (idfs == None):
            result = numpy.zeros(length, dtype=int)
            for tokenid, freq in bow.iteritems():
                result[tokenid] = freq
        else:
            result = numpy.zeros(length, dtype=numpy.float32)
            for tokenid, freq in bow.iteritems():
                result[tokenid] = freq * idfs[tokenid]
        return result
 def getSparse(self, length, idfs = None):
     """return document as sparse BOW column vector (scipy.sparse.lil_matrix[tokenid,0]=frequency)"""
     
     bow = utils_dml.vect2bow(self.getTokenIds())  # lil_matrix access to element is O(log), so build a hash first to get O(1)
     if idfs == None:
         result = scipy.sparse.lil_matrix(shape = (1, length), dtype = int)
         for tokenid, freq in bow.iteritems():
             result[0, tokenid] = freq
     else:
         result = scipy.sparse.lil_matrix(shape = (1, length), dtype = numpy.float32)
         for tokenid, freq in bow.iteritems():
             result[0, tokenid] = freq * idfs[tokenid]
     return result
    def getSparse(self, length, idfs=None):
        """return document as sparse BOW column vector (scipy.sparse.lil_matrix[tokenid,0]=frequency)"""

        bow = utils_dml.vect2bow(
            self.getTokenIds()
        )  # lil_matrix access to element is O(log), so build a hash first to get O(1)
        if idfs == None:
            result = scipy.sparse.lil_matrix(shape=(1, length), dtype=int)
            for tokenid, freq in bow.iteritems():
                result[0, tokenid] = freq
        else:
            result = scipy.sparse.lil_matrix(shape=(1, length),
                                             dtype=numpy.float32)
            for tokenid, freq in bow.iteritems():
                result[0, tokenid] = freq * idfs[tokenid]
        return result