Example #1
0
 def test_random_lloyd_host_ptr(self):
     hostptr = (self.samples.__array_interface__["data"][0], -1,
                self.samples.shape)
     with self.stdout:
         centroids, assignments = kmeans_cuda(hostptr,
                                              50,
                                              init="random",
                                              device=0,
                                              verbosity=2,
                                              seed=3,
                                              tolerance=0.05,
                                              yinyang_t=0)
     self.assertEqual(self._get_iters_number(self.stdout), 7)
     self.assertEqual(centroids.shape, (50, 2))
     self.assertEqual(assignments.shape, (13000, ))
     self._validate(centroids, assignments, 0.05)
     with self.assertRaises(ValueError):
         kmeans_cuda(("bullshit", -1, self.samples.shape),
                     50,
                     init="random",
                     device=0,
                     verbosity=2,
                     seed=3,
                     tolerance=0.05,
                     yinyang_t=0)
     with self.assertRaises(TypeError):
         kmeans_cuda("bullshit",
                     50,
                     init="random",
                     device=0,
                     verbosity=2,
                     seed=3,
                     tolerance=0.05,
                     yinyang_t=0)
Example #2
0
 def test_256_features(self):
     arr = numpy.random.rand(1000, 256).astype(numpy.float32)
     arr /= numpy.linalg.norm(arr, axis=1)[:, None]
     with self.stdout:
         kmeans_cuda(
             arr, 10, init="kmeans++", metric="cos", device=0, verbosity=3,
             yinyang_t=0.1, seed=3)
     self.assertEqual(self._get_iters_number(self.stdout), 9)
Example #3
0
File: test.py Project: src-d/kmcuda
 def test_256_features(self):
     arr = numpy.random.rand(1000, 256).astype(numpy.float32)
     arr /= numpy.linalg.norm(arr, axis=1)[:, None]
     with self.stdout:
         kmeans_cuda(
             arr, 10, init="kmeans++", metric="cos", device=0, verbosity=3,
             yinyang_t=0.1, seed=3)
     self.assertEqual(self._get_iters_number(self.stdout), 9)
Example #4
0
 def test_fp16_kmeanspp_validate(self):
     centroids32, _ = kmeans_cuda(
         self.samples, 50, init="kmeans++", device=1,
         verbosity=2, seed=3, tolerance=1.0, yinyang_t=0)
     samples = self.samples.astype(numpy.float16)
     centroids16, _ = kmeans_cuda(
         samples, 50, init="kmeans++", device=1,
         verbosity=2, seed=3, tolerance=1.0, yinyang_t=0)
     delta = numpy.max(abs(centroids16[:4] - centroids32[:4]))
     self.assertLess(delta, 1.5e-4)
Example #5
0
File: test.py Project: src-d/kmcuda
 def test_fp16_kmeanspp_validate(self):
     centroids32, _ = kmeans_cuda(
         self.samples, 50, init="kmeans++", device=1,
         verbosity=2, seed=3, tolerance=1.0, yinyang_t=0)
     samples = self.samples.astype(numpy.float16)
     centroids16, _ = kmeans_cuda(
         samples, 50, init="kmeans++", device=1,
         verbosity=2, seed=3, tolerance=1.0, yinyang_t=0)
     delta = numpy.max(abs(centroids16[:4] - centroids32[:4]))
     self.assertLess(delta, 1.5e-4)
Example #6
0
 def test_afkmc2_big_k_lloyd(self):
     with self.stdout:
         kmeans_cuda(self.samples,
                     200,
                     init=("afkmc2", 100),
                     device=0,
                     verbosity=2,
                     seed=3,
                     tolerance=0.05,
                     yinyang_t=0)
     self.assertEqual(self._get_iters_number(self.stdout), 4)
Example #7
0
File: test.py Project: src-d/kmcuda
 def test_import_lloyd(self):
     with self.stdout:
         centroids, assignments = kmeans_cuda(
             self.samples, 50, init="random", device=1,
             verbosity=2, seed=3, tolerance=0.25, yinyang_t=0)
         centroids, assignments = kmeans_cuda(
             self.samples, 50, init=centroids, device=1,
             verbosity=2, seed=3, tolerance=0.05, yinyang_t=0)
     # one is 2nd stage init
     self.assertEqual(self._get_iters_number(self.stdout), 8)
     self._validate(centroids, assignments, 0.05)
Example #8
0
 def test_import_lloyd(self):
     with self.stdout:
         centroids, assignments = kmeans_cuda(
             self.samples, 50, init="random", device=1,
             verbosity=2, seed=3, tolerance=0.25, yinyang_t=0)
         centroids, assignments = kmeans_cuda(
             self.samples, 50, init=centroids, device=1,
             verbosity=2, seed=3, tolerance=0.05, yinyang_t=0)
     # one is 2nd stage init
     self.assertEqual(self._get_iters_number(self.stdout), 8)
     self._validate(centroids, assignments, 0.05)
Example #9
0
 def test_crap(self):
     with self.assertRaises(TypeError):
         kmeans_cuda(self.samples,
                     "bullshit",
                     init="random",
                     device=1,
                     verbosity=2,
                     seed=3,
                     tolerance=0.05,
                     yinyang_t=0)
     with self.assertRaises(ValueError):
         kmeans_cuda(self.samples,
                     50,
                     init="bullshit",
                     device=1,
                     verbosity=2,
                     seed=3,
                     tolerance=0.05,
                     yinyang_t=0)
     with self.assertRaises(ValueError):
         kmeans_cuda(self.samples,
                     50,
                     init="random",
                     device=1,
                     tolerance=100,
                     yinyang_t=0)
     with self.assertRaises(ValueError):
         kmeans_cuda(self.samples,
                     50,
                     init="random",
                     device=1,
                     yinyang_t=10)
Example #10
0
 def test_fp16(self):
     samples = self.samples.astype(numpy.float16)
     ca = kmeans_cuda(samples, 50, seed=777, verbosity=1)
     nb = knn_cuda(10, samples, *ca, verbosity=2, device=1)
     bn = NearestNeighbors(n_neighbors=10).fit(samples).kneighbors()[1]
     print("diff: %d" % (nb != bn).sum())
     self.assertTrue((nb != bn).sum() < 500)
Example #11
0
 def _test_large(self, k, dev):
     cache = "/tmp/kmcuda_knn_cache_large.pickle"
     samples = numpy.random.rand(40000, 48).astype(numpy.float32)
     samples[:10000] += 1.0
     samples[10000:20000] -= 1.0
     samples[20000:30000, 0] += 2.0
     samples[30000:, 0] -= 2.0
     try:
         with open(cache, "rb") as fin:
             ca = pickle.load(fin)
     except:
         ca = kmeans_cuda(samples, 800, seed=777, verbosity=1)
         with open(cache, "wb") as fout:
             pickle.dump(ca, fout, protocol=-1)
     print("nan: %s" % numpy.nonzero(ca[0][:, 0] != ca[0][:, 0])[0])
     nb = knn_cuda(k, samples, *ca, verbosity=2, device=dev)
     print("checking...")
     for i, sn in enumerate(nb):
         for j in range(len(sn) - 1):
             self.assertLessEqual(
                 numpy.linalg.norm(samples[i] - samples[sn[j]]) -
                 numpy.linalg.norm(samples[i] - samples[sn[j + 1]]),
                 .0000003)
         mdist = numpy.linalg.norm(samples[i] - samples[sn[-1]])
         sn = set(sn)
         for r in numpy.random.randint(0, high=len(samples), size=100):
             if r not in sn:
                 if i == r:
                     continue
                 try:
                     self.assertLessEqual(
                         mdist, numpy.linalg.norm(samples[i] - samples[r]))
                 except AssertionError as e:
                     print(i, r)
                     raise e from None
Example #12
0
 def test_kmeanspp_yinyang(self):
     with self.stdout:
         centroids, assignments = kmeans_cuda(
             self.samples, 50, init="kmeans++", device=1,
             verbosity=2, seed=3, tolerance=0.01, yinyang_t=0.1)
     self.assertEqual(self._get_iters_number(self.stdout), 15 + 3)
     self._validate(centroids, assignments, 0.01)
Example #13
0
 def test_afkmc2_lloyd_2gpus(self):
     with self.stdout:
         centroids, assignments = kmeans_cuda(
             self.samples, 50, init="afkmc2", device=0,
             verbosity=2, seed=3, tolerance=0.05, yinyang_t=0)
     self.assertEqual(self._get_iters_number(self.stdout), 4)
     self._validate(centroids, assignments, 0.05)
Example #14
0
File: test.py Project: src-d/kmcuda
 def test_kmeanspp_yinyang(self):
     with self.stdout:
         centroids, assignments = kmeans_cuda(
             self.samples, 50, init="kmeans++", device=1,
             verbosity=2, seed=3, tolerance=0.01, yinyang_t=0.1)
     self.assertEqual(self._get_iters_number(self.stdout), 15 + 3)
     self._validate(centroids, assignments, 0.01)
Example #15
0
 def cluster(bins, flattened_val, val, inertia=None):
     if USE_KMEANS_CUDA and kmeans_cuda:
         invalids = None
         int_bins = bins
         while invalids is None or int_bins - invalids < bins:
             if invalids:
                 int_bins = bins + invalids
             codebook, _ = kmeans_cuda(flattened_val.reshape((-1, 1)),
                                       int_bins,
                                       device=1)
             invalids = np.count_nonzero(
                 np.isnan(codebook).any(axis=1)) + np.count_nonzero(
                     np.isneginf(codebook).any(axis=1)) + np.count_nonzero(
                         np.isposinf(codebook).any(axis=1))
         codebook = codebook[~np.isnan(codebook).any(axis=1)]
         codebook = codebook[~np.isneginf(codebook).any(axis=1)]
         codebook = codebook[~np.isposinf(codebook).any(axis=1)]
     else:
         kmeans = KMeans(n_clusters=bins)
         kmeans.fit(flattened_val.reshape((-1, 1)))
         codebook = kmeans.cluster_centers_
     codebook = codebook.astype(val.dtype).flatten()
     compressed_val, codes = ConstantStore.codes_and_compressed(
         flattened_val, codebook, val.shape)
     if inertia is not None:
         inertia.append(kmeans.inertia_)
     return compressed_val, codes, codebook
Example #16
0
 def test_random_lloyd_same_device_ptr_all_devs(self):
     cuda = CUDA()
     devptr = cuda.api.allocate(self.samples.size * 4, 0)
     cuda.api.copy_to_device(devptr, self.samples)
     with self.stdout:
         cdevptr, adevptr = kmeans_cuda(
             (devptr, 0, self.samples.shape), 50, init="random", device=0,
             verbosity=2, seed=3, tolerance=0.05, yinyang_t=0)
     cuda.api.wrap(cdevptr, 0)
     cuda.api.wrap(adevptr, 0)
     try:
         self.assertEqual(self._get_iters_number(self.stdout), 7)
         self.assertIsInstance(cdevptr, int)
         self.assertIsInstance(adevptr, int)
         centroids = cuda.api.copy_to_host(
             cdevptr, 100, numpy.float32).reshape((50, 2))
         assignments = cuda.api.copy_to_host(
             adevptr, 13000, numpy.uint32)
         self._validate(centroids, assignments, 0.05)
         new_samples = cuda.api.copy_to_host(
             devptr, self.samples.size, numpy.float32)
     finally:
         cuda.api.free(devptr)
         cuda.api.free(cdevptr)
         cuda.api.free(adevptr)
     self.assertTrue((self.samples.ravel() == new_samples.ravel()).all())
Example #17
0
 def test_hostptr(self):
     cents, asses = kmeans_cuda(self.samples, 50, seed=777, verbosity=1)
     samples_ptr = self.samples.__array_interface__["data"][0]
     centroids_ptr = cents.__array_interface__["data"][0]
     asses_ptr = asses.__array_interface__["data"][0]
     nb = knn_cuda(10, (samples_ptr, -1, self.samples.shape),
                   (centroids_ptr, len(cents)), asses_ptr, verbosity=2)
     bn = NearestNeighbors(n_neighbors=10).fit(self.samples).kneighbors()[1]
     print("diff: %d" % (nb != bn).sum())
     self.assertTrue((nb == bn).all())
     with self.assertRaises(ValueError):
         knn_cuda(10, ("bullshit", -1, self.samples.shape),
                  (centroids_ptr, len(cents)), asses_ptr, verbosity=2)
     with self.assertRaises(TypeError):
         knn_cuda(10, "bullshit",
                  (centroids_ptr, len(cents)), asses_ptr, verbosity=2)
     with self.assertRaises(ValueError):
         knn_cuda(10, ("samples_ptr", -1, self.samples.shape),
                  ("bullshit", len(cents)), asses_ptr, verbosity=2)
     with self.assertRaises(ValueError):
         knn_cuda(10, ("samples_ptr", -1, self.samples.shape),
                  "bullshit", asses_ptr, verbosity=2)
     with self.assertRaises(ValueError):
         knn_cuda(10, ("samples_ptr", -1, self.samples.shape),
                  (centroids_ptr, len(cents)), "bullshit", verbosity=2)
Example #18
0
def k_means_vector_gpu_fp32(weight_vector,
                            n_clusters,
                            verbosity=0,
                            seed=int(time.time()),
                            gpu_id=7):

    if n_clusters == 1:
        mean_sample = np.mean(weight_vector, axis=1)
        weight_vector = np.tile(mean_sample, (weight_vector.shape[0], 1))
        return weight_vector
    elif weight_vector.shape[1] == 1:
        return weight_vector
    elif weight_vector.shape[0] == n_clusters:
        return weight_vector
    else:
        init_centers = sklearn.cluster.k_means_._k_init(
            X=weight_vector,
            n_clusters=n_clusters,
            x_squared_norms=row_norms(weight_vector, squared=True),
            random_state=RandomState(seed))
        centers, labels = kmeans_cuda(samples=weight_vector,
                                      clusters=n_clusters,
                                      init=init_centers,
                                      yinyang_t=0,
                                      seed=seed,
                                      device=gpu_id,
                                      verbosity=verbosity)
        weight_vector_compress = np.zeros(
            (weight_vector.shape[0], weight_vector.shape[1]), dtype=np.float32)
        for v in range(weight_vector.shape[0]):
            weight_vector_compress[v, :] = centers[labels[v], :]
        return weight_vector_compress
Example #19
0
    def __init__(self, X, n_clusters, n_init=3, method="kmcuda"):
        if method == "kmcuda":
            self.inertia = np.inf
            for _ in range(n_init):
                centers, y_pred = kmeans_cuda(X.astype(np.float32), n_clusters)
                full_idx = np.arange(len(X))
                centroids_idxs = []
                inertia = 0
                for i in range(n_clusters):
                    idx = full_idx[y_pred == i]
                    if len(idx) != 0:
                        X_sub = X[idx]
                        norm = la.norm(X_sub - centers[i], axis=1)
                        min_idx = norm.argmin()
                        centroids_idxs.append(idx[min_idx])
                        inertia += np.sum(norm)
                    else:
                        centroids_idxs.append(0)
                centroids_idxs = np.array(centroids_idxs)

                if inertia < self.inertia:
                    self.centers = centers
                    self.y_pred = y_pred
                    self.centroids_idxs = centroids_idxs
        elif method == "sklearn":
            km = KMeans(n_clusters, n_init=n_init)
            self.y_pred = km.fit_predict(X)
            self.centers = km.cluster_centers_
            self.centroids_idxs = km.transform(X).argmin(axis=0)
        else:
            raise NotImplementedError
def main(argv):
    # Verify that parameters are set correctly.
    args = parser.parse_args(argv)

    gallery_pids, gallery_fids = common.load_dataset(args.gallery_dataset, None)

    log_file = os.path.join(exp_root, "recall_eval")
    logging.config.dictConfig(common.get_logging_dict(log_file))
    log = logging.getLogger('recall_eval')

    with h5py.File(args.gallery_embeddings, 'r') as f_gallery:
        gallery_embs = np.array(f_gallery['emb'])
        #gallery_embs_var = np.array(f_gallery['emb_var'])
        #print('gallery_embs_var.shape =>',gallery_embs_var.shape)

    num_clusters = len(np.unique(gallery_pids))
    print('Start clustering K ={}'.format(num_clusters))
    #kmeans = KMeans(n_clusters=num_clusters, random_state=0).fit(gallery_embs)
    #print('NMI :: {}'.format(normalized_mutual_info_score(gallery_pids, kmeans.labels_)))

    centroids, assignments = kmeans_cuda(gallery_embs,num_clusters,seed=3)
    log.info(exp_root)
    log.info('NMI :: {}'.format(normalized_mutual_info_score(gallery_pids, assignments)))
    log.info('Clustering complete')



    log.info('Eval with Recall-K')
    names, accs = evaluate_emb(gallery_embs,gallery_pids)
    log.info(names)
    log.info(accs)
Example #21
0
 def test_fp16_cosine_metric(self):
     arr = numpy.empty((10000, 2), dtype=numpy.float16)
     angs = numpy.random.rand(10000) * 2 * numpy.pi
     for i in range(10000):
         arr[i] = numpy.sin(angs[i]), numpy.cos(angs[i])
     with self.stdout:
         centroids, assignments = kmeans_cuda(arr,
                                              4,
                                              init="kmeans++",
                                              metric="cos",
                                              device=1,
                                              verbosity=2,
                                              seed=3)
     self.assertEqual(self._get_iters_number(self.stdout), 5)
     self.assertEqual(len(centroids), 4)
     for c in centroids:
         norm = numpy.linalg.norm(c)
         self.assertTrue(0.9995 < norm < 1.0005)
     dists = numpy.round(cosine_distances(centroids)).astype(int)
     self.assertTrue((dists == [
         [0, 2, 1, 1],
         [2, 0, 1, 1],
         [1, 1, 0, 2],
         [1, 1, 2, 0],
     ]).all())
     self.assertEqual(numpy.min(assignments), 0)
     self.assertEqual(numpy.max(assignments), 3)
Example #22
0
 def _test_device_ptr(self, dev):
     cuda = CUDA()
     sdevptr = cuda.api.allocate(self.samples.size * 4, 0)
     cuda.api.copy_to_device(sdevptr, self.samples)
     cdevptr, adevptr = kmeans_cuda((sdevptr, 0, self.samples.shape),
                                    50,
                                    init="random",
                                    device=0,
                                    verbosity=2,
                                    seed=3,
                                    tolerance=0.05,
                                    yinyang_t=0)
     cuda.api.wrap(cdevptr, 0)
     cuda.api.wrap(adevptr, 0)
     ndevptr = knn_cuda(10, (sdevptr, 0, self.samples.shape), (cdevptr, 50),
                        adevptr,
                        device=dev,
                        verbosity=2)
     cuda.api.wrap(ndevptr, 0)
     try:
         nb = cuda.api.copy_to_host(
             ndevptr, self.samples.shape[0] * 10, numpy.uint32) \
             .reshape((self.samples.shape[0], 10))
         bn = NearestNeighbors(n_neighbors=10).fit(
             self.samples).kneighbors()[1]
         self.assertEqual((nb != bn).sum(), 0)
     finally:
         cuda.api.free(sdevptr)
         cuda.api.free(cdevptr)
         cuda.api.free(adevptr)
         cuda.api.free(ndevptr)
def basis_cluster(weight, num_basis, num_clusters, cuda=False):
    """Divide the weight into `num_basis` basis and clustering

    Params:
        - weight: weight matrix to do basis clustering
        - num_basis: number of basis, also the dimension of coordinates
        - num_cluster: number of clusters per basis

    Return:
        - basis: (Nb, Nc, E/Nb)the cluster centers for each basis.
        - coordinates: (V, Nb) the belongings for basis of each token.
    """
    partial_embeddings = weight.chunk(num_basis, dim=1)

    coordinates = []
    basis = []
    if not cuda:
        from sklearn.cluster import KMeans
        clustor = KMeans(init='k-means++', n_clusters=num_clusters, n_init=10)
    for partial_embedding in partial_embeddings:
        if cuda:
            from libKMCUDA import kmeans_cuda
            centroid, coordinate = kmeans_cuda(partial_embedding.numpy(), num_clusters, seed=7)
            # some clusters may have zero elements, thus the centroids becomes [nan] in libKMCUDA
            centroid = np.nan_to_num(centroid)
        else:
            clustor.fit(partial_embedding.numpy())
            centroid, coordinate = clustor.cluster_centers_, clustor.labels_
        basis.append(torch.from_numpy(centroid.astype('float')))
        coordinates.append(torch.from_numpy(coordinate.astype('int32')))

    basis = torch.stack(basis).float()  # Nb X Nc(clusters) X E/Nb
    coordinates = torch.stack(coordinates).t().long()  # V X Nb(number of basis)
    return basis, coordinates
Example #24
0
    def fit(self, raw_batch):
        """
        https://github.com/src-d/kmcuda#python-api
        due to performance reasons, uses kmcuda instead of sklearn's KMeans.
        :param raw_batch:
        :return:
        """
        # do not consider single-packet flows
        raw_batch = raw_batch[raw_batch.raw_packet1 != 0]
        # form matrix (n_packet x (packet_size, IAT))
        packet_features = raw_batch[self.raw_columns].values.reshape(-1, 2)
        # omit non_packet values
        packet_features = drop_nan_packets(packet_features)
        init_clusters = "k-means++" if self._cluster_centers is None else self._cluster_centers
        logger.info(
            'fitting on {} packets, init clusters from data: {}'.format(
                packet_features.shape[0], isinstance(init_clusters, str)))
        packet_features = self.scaler.transform(packet_features)

        cluster_centers_, assignments = kmeans_cuda(samples=packet_features,
                                                    clusters=self.n_clusters,
                                                    tolerance=0.01,
                                                    init=init_clusters,
                                                    yinyang_t=0,
                                                    metric="L2",
                                                    average_distance=False,
                                                    seed=1,
                                                    device=0,
                                                    verbosity=1)
        self._cluster_centers = cluster_centers_
        self._evaluate(packet_features, cluster_centers_[assignments])
Example #25
0
 def test_cosine_metric2(self):
     samples = numpy.random.random((16000, 4)).astype(numpy.float32)
     samples /= numpy.linalg.norm(samples, axis=1)[:, numpy.newaxis]
     centroids, assignments = kmeans_cuda(
         samples, 50, metric="cos", verbosity=2, seed=3)
     for c in centroids:
         norm = numpy.linalg.norm(c)
         self.assertTrue(0.9999 < norm < 1.0001)
Example #26
0
 def test_fp16_kmeanspp_lloyd(self):
     samples = self.samples.astype(numpy.float16)
     with self.stdout:
         centroids, assignments = kmeans_cuda(
             samples, 50, init="kmeans++", device=1,
             verbosity=2, seed=3, tolerance=0.05, yinyang_t=0)
     self.assertEqual(self._get_iters_number(self.stdout), 5)
     centroids = centroids.astype(numpy.float32)
     self._validate(centroids, assignments, 0.05)
Example #27
0
File: test.py Project: src-d/kmcuda
 def test_fp16_afkmc2_lloyd(self):
     samples = self.samples.astype(numpy.float16)
     with self.stdout:
         centroids, assignments = kmeans_cuda(
             samples, 50, init="afkmc2", device=1,
             verbosity=2, seed=3, tolerance=0.05, yinyang_t=0)
     self.assertEqual(self._get_iters_number(self.stdout), 4)
     centroids = centroids.astype(numpy.float32)
     self._validate(centroids, assignments, 0.05)
Example #28
0
 def test_random_lloyd_all_gpus(self):
     with self.stdout:
         centroids, assignments = kmeans_cuda(
             self.samples, 50, init="random", device=0,
             verbosity=2, seed=3, tolerance=0.05, yinyang_t=0)
     self.assertEqual(self._get_iters_number(self.stdout), 7)
     self.assertEqual(centroids.shape, (50, 2))
     self.assertEqual(assignments.shape, (13000,))
     self._validate(centroids, assignments, 0.05)
Example #29
0
File: test.py Project: src-d/kmcuda
 def test_random_lloyd_all_gpus(self):
     with self.stdout:
         centroids, assignments = kmeans_cuda(
             self.samples, 50, init="random", device=0,
             verbosity=2, seed=3, tolerance=0.05, yinyang_t=0)
     self.assertEqual(self._get_iters_number(self.stdout), 7)
     self.assertEqual(centroids.shape, (50, 2))
     self.assertEqual(assignments.shape, (13000,))
     self._validate(centroids, assignments, 0.05)
Example #30
0
 def test_random_lloyd_all_explicit_gpus(self):
     with self.assertRaises(ValueError):
         centroids, assignments = kmeans_cuda(self.samples,
                                              50,
                                              init="random",
                                              device=0xFFFF,
                                              verbosity=2,
                                              seed=3,
                                              tolerance=0.05,
                                              yinyang_t=0)
Example #31
0
def kmeans_data(i_file,o_file,c_array):
    feature = np.load(i_file).astype('float32')
    print(feature.shape)
    for i in range(len(c_array)):
        #centroids,assignments = kmeans_cuda(feature,c_array[i],init="random",yinyang_t=0,metric="cos",verbosity=1)
        centroids, assignments = kmeans_cuda(feature, c_array[i], init="random", yinyang_t=0, verbosity=1)
        center_feature = delete_same_rows(centroids)
        center_feature,_ = sortd.custum_sort_matrix(center_feature,rule=True) #排序矩阵
        np.save(o_file+str(center_feature.shape[0]),center_feature)
        rebm.rebuild_mnist(o_file+str(center_feature.shape[0])+".npy") #重构数据
Example #32
0
 def test_fp16_kmeanspp_yinyang(self):
     samples = self.samples.astype(numpy.float16)
     with self.stdout:
         centroids, assignments = kmeans_cuda(
             samples, 50, init="kmeans++", device=1,
             verbosity=2, seed=3, tolerance=0.01, yinyang_t=0.1)
     # fp16 precision increases the number of iterations
     self.assertEqual(self._get_iters_number(self.stdout), 19 + 5)
     centroids = centroids.astype(numpy.float32)
     self._validate(centroids, assignments, 0.01)
Example #33
0
 def _test_average_distance(self, dev):
     centroids, assignments, distance = kmeans_cuda(
         self.samples, 50, init="kmeans++", device=dev,
         verbosity=2, seed=3, tolerance=0.05, yinyang_t=0,
         average_distance=True)
     valid_dist = 0.0
     for sample, ass in zip(self.samples, assignments):
         valid_dist += numpy.linalg.norm(sample - centroids[ass])
     valid_dist /= self.samples.shape[0]
     self.assertLess(numpy.abs(valid_dist - distance), 1e-6)
Example #34
0
File: test.py Project: src-d/kmcuda
 def test_fp16_kmeanspp_yinyang(self):
     samples = self.samples.astype(numpy.float16)
     with self.stdout:
         centroids, assignments = kmeans_cuda(
             samples, 50, init="kmeans++", device=1,
             verbosity=2, seed=3, tolerance=0.01, yinyang_t=0.1)
     # fp16 precision increases the number of iterations
     self.assertEqual(self._get_iters_number(self.stdout), 16 + 7)
     centroids = centroids.astype(numpy.float32)
     self._validate(centroids, assignments, 0.0105)
Example #35
0
File: test.py Project: src-d/kmcuda
 def test_random_lloyd_host_ptr(self):
     hostptr = (self.samples.__array_interface__["data"][0],
                -1, self.samples.shape)
     with self.stdout:
         centroids, assignments = kmeans_cuda(
             hostptr, 50, init="random", device=0,
             verbosity=2, seed=3, tolerance=0.05, yinyang_t=0)
     self.assertEqual(self._get_iters_number(self.stdout), 7)
     self.assertEqual(centroids.shape, (50, 2))
     self.assertEqual(assignments.shape, (13000,))
     self._validate(centroids, assignments, 0.05)
     with self.assertRaises(ValueError):
         kmeans_cuda(
             ("bullshit", -1, self.samples.shape), 50, init="random",
             device=0, verbosity=2, seed=3, tolerance=0.05, yinyang_t=0)
     with self.assertRaises(TypeError):
         kmeans_cuda(
             "bullshit", 50, init="random",
             device=0, verbosity=2, seed=3, tolerance=0.05, yinyang_t=0)
Example #36
0
 def test_cosine_metric(self):
     samples = self.samples.copy()
     samples /= numpy.linalg.norm(samples, axis=1)[:, numpy.newaxis]
     ca = kmeans_cuda(samples, 50, seed=777, verbosity=1, metric="angular")
     nb = knn_cuda(40, samples, *ca, verbosity=2, device=1, metric="angular")
     bn = NearestNeighbors(
         n_neighbors=40,
         metric=lambda x, y: numpy.arccos(max(min(x.dot(y), 1), -1))) \
         .fit(samples).kneighbors()[1]
     print("diff: %d" % (nb != bn).sum())
     self.assertLessEqual((nb != bn).sum(), 114918)
 def fit(self, dscData):
     if self.seed is not None:
         self.kmdata = kmcu.kmeans_cuda(dscData,
                                        self.n_clusters,
                                        metric=self.metric,
                                        verbosity=self.verbosity,
                                        seed=self.seed)
     else:
         self.kmdata = kmcu.kmeans_cuda(dscData,
                                        self.n_clusters,
                                        metric=self.metric,
                                        verbosity=self.verbosity)
     self.centroids_ = self.kmdata[0]
     tmpNumNAN = np.isnan(self.kmdata[0]).sum()
     if tmpNumNAN > 0:
         print('\t!!!warning!!! found {} NAN in kmean-centroids'.format(
             tmpNumNAN))
     self.centroids_[np.isnan(self.centroids_)] = -2.
     self.labels_ = self.kmdata[1]
     return self
Example #38
0
 def fit(self, X):
     logging.info('Using GPU-accelerated K-Means...')
     self.cluster_centers_ = kmeans_cuda(X.astype(np.float32),
                                         clusters=self.k,
                                         seed=self.seed,
                                         init=self.init)[0].astype(
                                             np.float32)
     self.kmeans_obj.cluster_centers_ = self.cluster_centers_
     if hasattr(self.kmeans_obj, '_check_params'):
         self.kmeans_obj._check_params(
             np.zeros_like(X))  # properly initialize
     return self.kmeans_obj
Example #39
0
File: test.py Project: src-d/kmcuda
 def test_crap(self):
     with self.assertRaises(TypeError):
         kmeans_cuda(
             self.samples, "bullshit", init="random", device=1,
             verbosity=2, seed=3, tolerance=0.05, yinyang_t=0)
     with self.assertRaises(ValueError):
         kmeans_cuda(
             self.samples, 50, init="bullshit", device=1,
             verbosity=2, seed=3, tolerance=0.05, yinyang_t=0)
     with self.assertRaises(ValueError):
         kmeans_cuda(
             self.samples, 50, init="random", device=1,
             tolerance=100, yinyang_t=0)
     with self.assertRaises(ValueError):
         kmeans_cuda(
             self.samples, 50, init="random", device=1,
             yinyang_t=10)
Example #40
0
File: test.py Project: src-d/kmcuda
 def test_fp16_random_lloyd(self):
     samples = self.samples.astype(numpy.float16)
     with self.stdout:
         centroids, assignments = kmeans_cuda(
             samples, 50, init="random", device=1,
             verbosity=2, seed=3, tolerance=0.05, yinyang_t=0)
     self.assertEqual(centroids.dtype, numpy.float16)
     centroids = centroids.astype(numpy.float32)
     self.assertEqual(self._get_iters_number(self.stdout), 7)
     self.assertEqual(sys.getrefcount(centroids), 2)
     self.assertEqual(sys.getrefcount(assignments), 2)
     self.assertEqual(sys.getrefcount(self.samples), 2)
     self.assertEqual(centroids.shape, (50, 2))
     self.assertEqual(assignments.shape, (13000,))
     self._validate(centroids, assignments, 0.05)
Example #41
0
File: test.py Project: src-d/kmcuda
 def test_kmeanspp_lloyd_uint32_overflow(self):
     print("initializing samples...")
     samples = numpy.empty((167772160, 8), dtype=numpy.float32)
     tile = numpy.hstack((self.samples,) * 4)
     for i in range(0, samples.shape[0], self.samples.shape[0]):
         end = i + self.samples.shape[0]
         if end < samples.shape[0]:
             samples[i:end] = tile
         else:
             samples[i:] = tile[:samples.shape[0] - i]
     print("running k-means...")
     try:
         with self.stdout:
             centroids, assignments = kmeans_cuda(
                 samples, 50, init="kmeans++", device=0,
                 verbosity=2, seed=3, tolerance=0.142, yinyang_t=0)
         self.assertEqual(self._get_iters_number(self.stdout), 2)
     except MemoryError:
         self.skipTest("Not enough GPU memory.")
Example #42
0
File: test.py Project: src-d/kmcuda
 def _test_device_ptr(self, dev):
     cuda = CUDA()
     sdevptr = cuda.api.allocate(self.samples.size * 4, 0)
     cuda.api.copy_to_device(sdevptr, self.samples)
     cdevptr, adevptr = kmeans_cuda(
         (sdevptr, 0, self.samples.shape), 50, init="random", device=0,
         verbosity=2, seed=3, tolerance=0.05, yinyang_t=0)
     cuda.api.wrap(cdevptr, 0)
     cuda.api.wrap(adevptr, 0)
     ndevptr = knn_cuda(10, (sdevptr, 0, self.samples.shape),
                        (cdevptr, 50), adevptr, device=dev, verbosity=2)
     cuda.api.wrap(ndevptr, 0)
     try:
         nb = cuda.api.copy_to_host(
             ndevptr, self.samples.shape[0] * 10, numpy.uint32) \
             .reshape((self.samples.shape[0], 10))
         bn = NearestNeighbors(n_neighbors=10).fit(self.samples).kneighbors()[1]
         self.assertEqual((nb != bn).sum(), 0)
     finally:
         cuda.api.free(sdevptr)
         cuda.api.free(cdevptr)
         cuda.api.free(adevptr)
         cuda.api.free(ndevptr)
Example #43
0
File: test.py Project: src-d/kmcuda
 def test_fp16_cosine_metric(self):
     arr = numpy.empty((10000, 2), dtype=numpy.float16)
     angs = numpy.random.rand(10000) * 2 * numpy.pi
     for i in range(10000):
         arr[i] = numpy.sin(angs[i]), numpy.cos(angs[i])
     with self.stdout:
         centroids, assignments = kmeans_cuda(
             arr, 4, init="kmeans++", metric="cos", device=1, verbosity=2,
             seed=3)
     self.assertEqual(self._get_iters_number(self.stdout), 5)
     self.assertEqual(len(centroids), 4)
     for c in centroids:
         norm = numpy.linalg.norm(c)
         self.assertTrue(0.9995 < norm < 1.0005)
     dists = numpy.round(cosine_distances(centroids)).astype(int)
     self.assertTrue((dists == [
         [0, 2, 1, 1],
         [2, 0, 1, 1],
         [1, 1, 0, 2],
         [1, 1, 2, 0],
     ]).all())
     self.assertEqual(numpy.min(assignments), 0)
     self.assertEqual(numpy.max(assignments), 3)
Example #44
0
File: test.py Project: src-d/kmcuda
 def _test_small(self, k, dev, dmax=0):
     ca = kmeans_cuda(self.samples, 50, seed=777, verbosity=1)
     nb = knn_cuda(k, self.samples, *ca, verbosity=2, device=dev)
     bn = NearestNeighbors(n_neighbors=k).fit(self.samples).kneighbors()[1]
     print("diff: %d" % (nb != bn).sum())
     self.assertTrue((nb != bn).sum() <= dmax)
Example #45
0
File: test.py Project: src-d/kmcuda
 def test_random_lloyd_all_explicit_gpus(self):
     with self.assertRaises(ValueError):
         centroids, assignments = kmeans_cuda(
             self.samples, 50, init="random", device=0xFFFF,
             verbosity=2, seed=3, tolerance=0.05, yinyang_t=0)
Example #46
0
File: test.py Project: src-d/kmcuda
 def test_afkmc2_big_k_lloyd(self):
     with self.stdout:
         kmeans_cuda(
             self.samples, 200, init=("afkmc2", 100), device=0,
             verbosity=2, seed=3, tolerance=0.05, yinyang_t=0)
     self.assertEqual(self._get_iters_number(self.stdout), 4)