Esempio n. 1
0
    def build(self, pardic=None):
        pardic['vals'] = pardic['vals'].copy()
        # training data
        vals = pardic['vals']
        # the number of coarse centroids
        coarsek = pardic.get('coarsek', 1024)

        logging.info('Building coarse quantizer - BEGIN')
        coa_centroids = kmeans(vals.astype(np.float32), coarsek, niter=100)
        cids = pq_kmeans_assign(coa_centroids, vals)
        logging.info('Building coarse quantizer - DONE')

        pardic['vals'] -= coa_centroids[cids, :]
        super(IVFPQEncoder, self).build(pardic)

        self.ecdat['coa_centroids'] = coa_centroids
        self.ecdat['coarsek'] = coarsek
Esempio n. 2
0
File: pq.py Progetto: xdTin/hdidx
    def build(self, pardic=None):
        pardic['vals'] = pardic['vals'].copy()
        # training data
        vals = pardic['vals']
        # the number of coarse centroids
        coarsek = pardic.get('coarsek', 1024)

        logging.info('Building coarse quantizer - BEGIN')
        coa_centroids = kmeans(vals.astype(np.float32), coarsek, niter=100)
        cids = pq_kmeans_assign(coa_centroids, vals)
        logging.info('Building coarse quantizer - DONE')

        pardic['vals'] -= coa_centroids[cids, :]
        super(IVFPQEncoder, self).build(pardic)

        self.ecdat['coa_centroids'] = coa_centroids
        self.ecdat['coarsek'] = coarsek
Esempio n. 3
0
    def build(self, pardic=None):
        # training data
        vals = pardic['vals']
        # the number of subquantizers
        nsubq = pardic['nsubq']
        # the number bits of each subquantizer
        nsubqbits = pardic.get('nsubqbits', 8)
        # the number of items in one block
        blksize = pardic.get('blksize', 16384)

        # vector dimension
        dim = vals.shape[1]
        # dimension of the subvectors to quantize
        dsub = dim / nsubq
        # number of centroids per subquantizer
        ksub = 2 ** nsubqbits

        """
        Initializing indexer data
        """
        ecdat = dict()
        ecdat['nsubq'] = nsubq
        ecdat['ksub'] = ksub
        ecdat['dsub'] = dsub
        ecdat['blksize'] = blksize
        ecdat['centroids'] = [None for q in range(nsubq)]

        logging.info("Building codebooks in subspaces - BEGIN")
        for q in range(nsubq):
            logging.info("\tsubspace %d/%d" % (q, nsubq))
            vs = np.require(vals[:, q*dsub:(q+1)*dsub],
                            requirements='C', dtype=np.float32)
            ecdat['centroids'][q] = kmeans(vs, ksub, niter=100)
        logging.info("Building codebooks in subspaces - DONE")

        self.ecdat = ecdat
Esempio n. 4
0
File: pq.py Progetto: xdTin/hdidx
    def build(self, pardic=None):
        # training data
        vals = pardic['vals']
        # the number of subquantizers
        nsubq = pardic['nsubq']
        # the number bits of each subquantizer
        nsubqbits = pardic.get('nsubqbits', 8)
        # the number of items in one block
        blksize = pardic.get('blksize', 16384)

        # vector dimension
        dim = vals.shape[1]
        # dimension of the subvectors to quantize
        dsub = dim / nsubq
        # number of centroids per subquantizer
        ksub = 2**nsubqbits
        """
        Initializing indexer data
        """
        ecdat = dict()
        ecdat['nsubq'] = nsubq
        ecdat['ksub'] = ksub
        ecdat['dsub'] = dsub
        ecdat['blksize'] = blksize
        ecdat['centroids'] = [None for q in range(nsubq)]

        logging.info("Building codebooks in subspaces - BEGIN")
        for q in range(nsubq):
            logging.info("\tsubspace %d/%d" % (q, nsubq))
            vs = np.require(vals[:, q * dsub:(q + 1) * dsub],
                            requirements='C',
                            dtype=np.float32)
            ecdat['centroids'][q] = kmeans(vs, ksub, niter=100)
        logging.info("Building codebooks in subspaces - DONE")

        self.ecdat = ecdat