Beispiel #1
0
    def __init__(self, b, t, **kw):
        super(KLSH, self).__init__(b=b, t=t, **kw)
        #self._check_parameters()

        self.hash = Hashing(b=b, t=t)
        self.bucket = KlshBucket()
        self.storage = PickleStorage()
Beispiel #2
0
class KLSH(KLSHBase):
    '''
    >>>from klsh import KLSH
    >>>klsh = KLSH(
    ...     b=b,    #number of hash bits
    ...     t=t,    #number of
    ...)
    '''
    def __init__(self, b, t, **kw):
        super(KLSH, self).__init__(b=b, t=t, **kw)
        #self._check_parameters()

        self.hash = Hashing(b=b, t=t)
        self.bucket = KlshBucket()
        self.storage = PickleStorage()

    def loadDataSet(self, filename, delim=','):
        self.dataMat = np.loadtxt(filename, delimiter=delim)

    def preprocessing(self):
        dataMat = self.dataMat
        numOfData = dataMat[:, 0].size
        self.insert_matrix(dataMat, numOfData)
        self.store_Wmat()
        self.bucket.store_buckets()

    def insert_matrix(self, matrix, num):
        KerMat = self.hash.kernelMatrix(matrix)
        CenterMat = self.hash.center(KerMat)
        (HashTable, self.W) = self.hash.creatHashTable(CenterMat, self.b,
                                                       self.t)
        for i in xrange(num):
            self.bucket.insert_buckets(matrix[i, :], HashTable[i, :])

    def knn(self, vector, knum, stored=False):
        if stored == False:
            hashed_array = self.hash.do_hashing(
                vector, self.dataMat,
                self.W)  #vector must be the first argument
            knn_vectors = self.bucket.select_knn(knum, hashed_array)
        else:
            self.W = self.load_Wmat()
            self.bucket.load_buckets()
            hashed_array = self.hash.do_hashing(vector, self.dataMat, self.W)
            #hashed_array = self.hash.do_hashing(self.dataMat, vector, self.W)
            knn_vectors = self.bucket.select_knn(knum, hashed_array)

        return knn_vectors

    def store_Wmat(self):
        fw = open('Wmat.data', 'wb')
        self.storage.save(self.W, fw)
        fw.close()

    def load_Wmat(self):
        fr = open('Wmat.data')
        W = self.storage.load(fr)
        fr.close()
        return W
Beispiel #3
0
    def __init__(self, b, t, **kw):
        super(KLSH, self).__init__(b=b, t=t, **kw)
        # self._check_parameters()

        self.hash = Hashing(b=b, t=t)
        self.bucket = KlshBucket()
        self.storage = PickleStorage()
Beispiel #4
0
class KLSH(KLSHBase):
    """
    >>>from klsh import KLSH
    >>>klsh = KLSH(
    ...     b=b,    #number of hash bits
    ...     t=t,    #number of
    ...)
    """

    def __init__(self, b, t, **kw):
        super(KLSH, self).__init__(b=b, t=t, **kw)
        # self._check_parameters()

        self.hash = Hashing(b=b, t=t)
        self.bucket = KlshBucket()
        self.storage = PickleStorage()

    def loadDataSet(self, filename, delim=","):
        self.dataMat = np.loadtxt(filename, delimiter=delim)

    def preprocessing(self):
        dataMat = self.dataMat
        numOfData = dataMat[:, 0].size
        self.insert_matrix(dataMat, numOfData)
        self.store_Wmat()
        self.bucket.store_buckets()

    def insert_matrix(self, matrix, num):
        KerMat = self.hash.kernelMatrix(matrix)
        CenterMat = self.hash.center(KerMat)
        (HashTable, self.W) = self.hash.creatHashTable(CenterMat, self.b, self.t)
        for i in xrange(num):
            self.bucket.insert_buckets(matrix[i, :], HashTable[i, :])

    def knn(self, vector, knum, stored=False):
        if stored == False:
            hashed_array = self.hash.do_hashing(vector, self.dataMat, self.W)  # vector must be the first argument
            knn_vectors = self.bucket.select_knn(knum, hashed_array)
        else:
            self.W = self.load_Wmat()
            self.bucket.load_buckets()
            hashed_array = self.hash.do_hashing(vector, self.dataMat, self.W)
            # hashed_array = self.hash.do_hashing(self.dataMat, vector, self.W)
            knn_vectors = self.bucket.select_knn(knum, hashed_array)

        return knn_vectors

    def store_Wmat(self):
        fw = open("Wmat.data", "wb")
        self.storage.save(self.W, fw)
        fw.close()

    def load_Wmat(self):
        fr = open("Wmat.data")
        W = self.storage.load(fr)
        fr.close()
        return W