def run_tests(nTests, nPts, nDim, nClusters, nReps=1, verbose = VERBOSE, print_times = PRINT_TIMES, verify = 1): # run_tests(nTests, nPts, nDim, nClusters, nReps [, verbose [, print_times]] # Generate nPts random data elements with nDim dimensions and nCluster random clusters, # then run kmeans for nReps and compare gpu and cpu results. This is repeated nTests times if(nPts * nDim *nClusters > CPU_SIZE_LIMIT): #print "Too big to verify wiht cpu calculation" verify = 0 # too big to run on cpu cpu_time = 0. gpu_time = 0. gpu_data_time = 0. gpu_module_time = 0. gpu_ccdist_time = 0. gpu_hdclosest_time = 0. gpu_init_time = 0. gpu_step3_time = 0. gpu_step4_time = 0. gpu_step56_time = 0. np.random.seed(SEED) data = np.random.rand(nDim, nPts).astype(np.float32) clusters = np.random.rand(nDim, nClusters).astype(np.float32) if verbose: print "data" print data print "\nclusters" print clusters nErrors = 0 # repeat this test nTests times for iTest in range(nTests): if verify: #run the cpu algorithm t1 = time.time() (cpu_clusters, cpu_assign) = kmeans_cpu(data, clusters, nReps) cpu_assign.shape = (nPts,) t2 = time.time() cpu_time += t2-t1 if verbose: print "------------------------ cpu results ------------------------" print "cpu_assignments" print cpu_assign print "cpu_clusters" print cpu_clusters print "-------------------------------------------------------------" #run the gpu algorithm t1 = time.time() (gpu_ccdist, gpu_hdClosest, gpu_assign, gpu_lower, gpu_upper, \ gpu_clusters, gpu_cluster_movement, \ data_time, module_time, init_time, ccdist_time, hdclosest_time, \ step3_time, step4_time, step56_time) = \ trikmeans_gpu(data, clusters, nReps, 1) t2 = time.time() gpu_time += t2-t1 gpu_data_time += data_time gpu_module_time += module_time gpu_ccdist_time += ccdist_time gpu_hdclosest_time += hdclosest_time gpu_init_time += init_time gpu_step3_time += step3_time gpu_step4_time += step4_time gpu_step56_time += step56_time if verbose: print "------------------------ gpu results ------------------------" print "gpu_assignments" print gpu_assign print "gpu_clusters" print gpu_clusters print "-------------------------------------------------------------" if verify: # calculate the number of data points in each cluster c = np.arange(nClusters) c_counts = np.sum(cpu_assign.reshape(nPts,1) == c, axis=0) # verify the results... nErrors += verify_assignments(gpu_assign.get(), cpu_assign, data, gpu_clusters, cpu_clusters, verbose, iTest) nErrors += verify_clusters(gpu_clusters, cpu_clusters, cpu_assign, verbose, iTest) if print_times: print "\n---------------------------------------------" print "nPts =", nPts print "nDim =", nDim print "nClusters =", nClusters print "nReps =", nReps if verify: print "average cpu time (ms) =", cpu_time/nTests*1000. else: print "average cpu time (ms) = N/A" print "average gpu time (ms) =", gpu_time/nTests*1000. print " data time (ms) =", gpu_data_time/nTests*1000. print " module time (ms) =", gpu_module_time/nTests*1000. print " init time (ms) =", gpu_init_time/nTests*1000. print " ccdist time (ms) =", gpu_ccdist_time/nTests*1000. print " hdclosest time (ms) =", gpu_hdclosest_time/nTests*1000. print " step3 time (ms) =", gpu_step3_time/nTests*1000. print " step4 time (ms) =", gpu_step4_time/nTests*1000. print " step56 time (ms) =", gpu_step56_time/nTests*1000. print "---------------------------------------------" if verify: return nErrors else: return -1
def run_labels(data, nClusters, nReps, seed=SEED): random.seed(seed) # run py_kmeans.kmeans once to get a starting label assignment, # which will be used by the scipy routine and others clusters, dist, labels = py_kmeans.kmeans(data, nClusters, 1, 0) if VERBOSE: print "data" print data print "initial clusters:" print clusters (nPts, nDim) = data.shape nClusters = clusters.shape[0] print "[nPts:{0:6}][nDim:{1:4}][nClusters:{2:4}][nReps:{3:3}]...".format(nPts, nDim, nClusters, nReps), data2 = np.swapaxes(data, 0, 1).astype(np.float32).copy('C') clusters2 = np.swapaxes(clusters, 0, 1).astype(np.float32).copy('C') if VERBOSE: print "data2" print data2 print "clusters2" print clusters2 """ t1 = time.time() (cuda_clusters, cuda_labels) = cuda_kmeans.kmeans_gpu(data2, clusters2, nReps+1) if VERBOSE: print "cuda_kmeans labels:" print cuda_labels t2 = time.time() if PRINT_TIMES: print "\ncuda ", t2-t1 """ t1 = time.time() (tri_clusters, tri_labels) = cuda_kmeans_tri.trikmeans_gpu(data2, clusters2, nReps+1) if VERBOSE: print "cuda_kmeans_tri labels:" print tri_labels t2 = time.time() if PRINT_TIMES: print "tri ", t2-t1 t1 = time.time() labels_mpi = mpi_labels(data, nClusters, nReps+1, seed) if VERBOSE: print "mpi labels:" print labels_mpi[0] t2 = time.time() if PRINT_TIMES: print "mpi ", t2-t1 if scipyFlag: t1 = time.time() labels_scipy = scipy_labels(data, clusters, nReps) if VERBOSE: print "scipy labels:" print labels_scipy[0] t2 = time.time() if PRINT_TIMES: print "scipy", t2-t1 t1 = time.time() (cpu_clusters, cpu_labels) = cpu_kmeans.kmeans_cpu(data2, clusters2, nReps+1) if VERBOSE: print "cpu_kmeans labels:" print cpu_labels t2 = time.time() if PRINT_TIMES: print "cpu ", t2-t1 error = 0 if scipyFlag: try: np.testing.assert_array_equal(labels_mpi[0], labels_scipy[0]) except AssertionError: print "mpi<>scipy", error = 1 try: np.testing.assert_array_equal(labels_mpi[0], cpu_labels) except AssertionError: print "mpi<>cpu", error = 1 """ try: np.testing.assert_array_equal(cuda_labels, tri_labels) except AssertionError: print "cuda<>tri", error = 1 """ try: np.testing.assert_array_equal(tri_labels, cpu_labels) except AssertionError: print "tri<>cpu", error = 1 try: np.testing.assert_array_equal(labels_mpi[0], tri_labels) except AssertionError: print "tri<>mpi", error = 1 if error == 0: print "Labels OK ..." else: print ""
def run_tests(nTests, nPts, nDim, nClusters, nReps=1, verbose=VERBOSE, print_times=PRINT_TIMES, verify=1): # run_tests(nTests, nPts, nDim, nClusters, nReps [, verbose [, print_times]] # Generate nPts random data elements with nDim dimensions and nCluster random clusters, # then run kmeans for nReps and compare gpu and cpu results. This is repeated nTests times if (nPts * nDim * nClusters > CPU_SIZE_LIMIT): #print "Too big to verify wiht cpu calculation" verify = 0 # too big to run on cpu cpu_time = 0. gpu_time = 0. gpu_data_time = 0. gpu_module_time = 0. gpu_ccdist_time = 0. gpu_hdclosest_time = 0. gpu_init_time = 0. gpu_step3_time = 0. gpu_step4_time = 0. gpu_step56_time = 0. nErrors = 0 # repeat this test nTests times for iTest in range(nTests): np.random.seed(SEED + iTest) data = np.random.rand(nDim, nPts).astype(np.float32) clusters = np.random.rand(nDim, nClusters).astype(np.float32) if verbose: print "data" print data print "\nclusters" print clusters if verify: #run the cpu algorithm t1 = time.time() (cpu_clusters, cpu_assign) = kmeans_cpu(data, clusters, nReps) cpu_assign.shape = (nPts, ) t2 = time.time() cpu_time += t2 - t1 if verbose: print "------------------------ cpu results ------------------------" print "cpu_assignments" print cpu_assign print "cpu_clusters" print cpu_clusters print "-------------------------------------------------------------" #run the gpu algorithm t1 = time.time() (gpu_ccdist, gpu_hdClosest, gpu_assign, gpu_lower, gpu_upper, \ gpu_clusters, gpu_cluster_movement, \ data_time, module_time, init_time, ccdist_time, hdclosest_time, \ step3_time, step4_time, step56_time) = \ trikmeans_gpu(data, clusters, nReps, 1) t2 = time.time() gpu_time += t2 - t1 gpu_data_time += data_time gpu_module_time += module_time gpu_ccdist_time += ccdist_time gpu_hdclosest_time += hdclosest_time gpu_init_time += init_time gpu_step3_time += step3_time gpu_step4_time += step4_time gpu_step56_time += step56_time if verbose: print "------------------------ gpu results ------------------------" print "gpu_assignments" print gpu_assign print "gpu_clusters" print gpu_clusters print "-------------------------------------------------------------" if verify: # calculate the number of data points in each cluster c = np.arange(nClusters) c_counts = np.sum(cpu_assign.reshape(nPts, 1) == c, axis=0) # verify the results... err = verify_assignments(gpu_assign.get(), cpu_assign, data, gpu_clusters, cpu_clusters, verbose, iTest) err += verify_clusters(gpu_clusters, cpu_clusters, cpu_assign, verbose, iTest) if err: nErrors += 1 if print_times: print "\n---------------------------------------------" print "nPts =", nPts print "nDim =", nDim print "nClusters =", nClusters print "nReps =", nReps if verify: print "average cpu time (ms) =", cpu_time / nTests * 1000. else: print "average cpu time (ms) = N/A" print "average gpu time (ms) =", gpu_time / nTests * 1000. print " data time (ms) =", gpu_data_time / nTests * 1000. print " module time (ms) =", gpu_module_time / nTests * 1000. print " init time (ms) =", gpu_init_time / nTests * 1000. print " ccdist time (ms) =", gpu_ccdist_time / nTests * 1000. print " hdclosest time (ms) =", gpu_hdclosest_time / nTests * 1000. print " step3 time (ms) =", gpu_step3_time / nTests * 1000. print " step4 time (ms) =", gpu_step4_time / nTests * 1000. print " step56 time (ms) =", gpu_step56_time / nTests * 1000. print "---------------------------------------------" if verify: return nErrors else: return -1
def run_tests(nTests, nPts, nDim, nClusters, nReps=1, verbose = VERBOSE, print_times = PRINT_TIMES, verify = 1): # run_tests(nTests, nPts, nDim, nClusters, nReps [, verbose [, print_times]] # Generate nPts random data elements with nDim dimensions and nCluster random clusters, # then run kmeans for nReps and compare gpu and cpu results. This is repeated nTests times if(nPts * nDim *nClusters > CPU_SIZE_LIMIT): #print "Too big to verify wiht cpu calculation" verify = 0 # too big to run on cpu cpu_time = 0. gpu_time = 0. gpu_data_time = 0. gpu_module_time = 0. gpu_assign_time = 0. gpu_calc_time = 0. nErrors = 0 # repeat this test nTests times for iTest in range(nTests): np.random.seed(SEED+iTest) data = np.random.rand(nDim, nPts).astype(np.float32) clusters = np.random.rand(nDim, nClusters).astype(np.float32) if verbose: print "data" print data print "\nclusters" print clusters if verify: #run the cpu algorithm t1 = time.time() (cpu_clusters, cpu_assign) = kmeans_cpu(data, clusters, nReps) cpu_assign.shape = (nPts,) t2 = time.time() cpu_time += t2-t1 if verbose: print "------------------------ cpu results ------------------------" print "cpu_assignments" print cpu_assign print "cpu_clusters" print cpu_clusters print "-------------------------------------------------------------" #run the gpu algorithm t1 = time.time() (gpu_assign, gpu_clusters, \ data_time, module_time, assign_time, calc_time) = \ kmeans_gpu(data, clusters, nReps, 1) pycuda.autoinit.context.synchronize() t2 = time.time() gpu_time += t2-t1 gpu_data_time += data_time gpu_module_time += module_time gpu_assign_time += assign_time gpu_calc_time += calc_time if verbose: print "------------------------ gpu results ------------------------" print "gpu_assignments" print gpu_assign print "gpu_clusters" print gpu_clusters print "-------------------------------------------------------------" if verify: # calculate the number of data points in each cluster c = np.arange(nClusters) c_counts = np.sum(cpu_assign.reshape(nPts,1) == c, axis=0) # verify the results... err = verify_assignments(gpu_assign.get(), cpu_assign, data, gpu_clusters, cpu_clusters, verbose, iTest) err += verify_clusters(gpu_clusters, cpu_clusters, cpu_assign, verbose, iTest) if err: nErrors += 1 if print_times: print "\n---------------------------------------------" print "nPts =", nPts print "nDim =", nDim print "nClusters =", nClusters print "nReps =", nReps if verify: print "average cpu time (ms) =", cpu_time/nTests*1000. else: print "average cpu time (ms) = N/A" print "average gpu time (ms) =", gpu_time/nTests*1000. print " data time (ms) =", gpu_data_time/nTests*1000. print " module time (ms) =", gpu_module_time/nTests*1000. print " assign time (ms) =", gpu_assign_time/nTests*1000. print " calc time (ms) =", gpu_calc_time/nTests*1000. print "---------------------------------------------" if verify: return nErrors else: return -1
def run_tests(nTests, nPts, nDim, nClusters, nReps, verbose = VERBOSE, print_times = PRINT_TIMES): # run_tests(nTests, nPts, nDim, nClusters, nReps [, verbose [, print_times]] # Generate nPts random data elements with nDim dimensions and nCluster random clusters, # then run kmeans for nReps and compare gpu and cpu results. This is repeated nTests times nErrors = 0 nCalcErrors = 0 cpu_time = 0. gpu_time = 0. gpu_data_time = 0. gpu_module_time = 0. gpu_assign_time = 0. gpu_calc_time = 0. np.random.seed(100); #data = np.random.rand(nDim, nPts).astype(np.float32) #clusters = np.random.rand(nDim, nClusters).astype(np.float32) data = np.array([[3., 4., 4., 9., 5., 6., 9., 5., 5., 7., 6.], \ [3., 3., 2., 2., 1., 2., 4., 2., 4., 4., 5.]]).astype(np.float32) clusters = np.array([[4.57142878, 7.75], \ [2.42857146, 3.75]]).astype(np.float32) print "nPts =", nPts print "nDim =", nDim print "nClusters =", nClusters print "nReps =", nReps if verbose: print "data" print data print "\nclusters" print clusters for i in range(nTests): t1 = time.time() (cpu_clusters, cpu_assign) = kmeans_cpu(data, clusters, nReps) print cpu_assign.shape cpu_assign.shape = (nPts,) t2 = time.time() cpu_time += t2-t1 if verbose: print "cpu assignments" print cpu_assign print "cpu clusters" print cpu_clusters print "cpu time = ", t2-t1 t1 = time.time() (gpu_clusters, gpu_assign, data_time, module_time, assign_time, calc_time) = kmeans_gpu(data, clusters, nReps, 1) t2 = time.time() gpu_time += t2-t1 gpu_data_time += data_time gpu_module_time += module_time gpu_assign_time += assign_time gpu_calc_time += calc_time if verbose: print "gpu assignments" print gpu_assign print "gpu clusters" print gpu_clusters print "gpu time = ", t2-t1 # calculate the number of data points in each cluster c = np.arange(nClusters) c_counts = np.sum(cpu_assign.reshape(nPts,1) == c, axis=0) # verify results differences = sum(gpu_assign != cpu_assign) if(differences > 0): nErrors += 1 if verbose: print "Test",i,"*** ERROR ***", differences, "differences" iDiff = np.arange(nPts)[gpu_assign != cpu_assign] print "iDiff", iDiff for ii in iDiff: print "data point is", data[:,ii] print "cpu assigned to", cpu_assign[ii] print " with center at (cpu)", cpu_clusters[:,cpu_assign[ii]] print " with center at (gpu)", gpu_clusters.get()[:,cpu_assign[ii]] print "gpu assigned to", gpu_assign[ii] print " with center at (cpu)", cpu_clusters[:,gpu_assign[ii]] print " with center at (gpu)", gpu_clusters.get()[:, gpu_assign[ii]] else: if verbose: print "Cluster assignment OK" diff = np.max(np.abs(gpu_clusters.get() - cpu_clusters)) if verbose: print "max error in cluster centers is", diff print "avg error in cluster centers is", print np.mean(np.abs(gpu_clusters.get()-cpu_clusters)) if diff > 1e-7 * max(c_counts) or math.isnan(diff): nCalcErrors += 1 if verbose: print "Test",i,"*** ERROR *** max diff was", diff print else: if verbose: print "Test", i, "OK" if print_times: print "\n---------------------------------------------" print "nPts =", nPts print "nDim =", nDim print "nClusters =", nClusters print "nReps =", nReps print "Assignment errors =", nErrors, "out of", nTests, "tests" print "Calculation errors =", nCalcErrors, "out of", nTests, "tests" print "average cpu time (ms) =", cpu_time/nTests*1000. print "average gpu time (ms) =", gpu_time/nTests*1000. print " data time (ms) =", gpu_data_time/nTests*1000. print " module time (ms) =", gpu_module_time/nTests*1000. print " assign time (ms) =", gpu_assign_time/nTests*1000. print " calc time (ms) =", gpu_calc_time/nTests*1000. print "---------------------------------------------" return nErrors + nCalcErrors
def run_labels(data, nClusters, nReps, skip_cpu = SKIP_CPU, seed=SEED): random.seed(seed) # run py_kmeans.kmeans once to get a starting label assignment, # which will be used by the scipy routine and others clusters, dist, labels = py_kmeans.kmeans(data, nClusters, 1, 0) if VERBOSE: print "data" print data print "initial clusters:" print clusters (nPts, nDim) = data.shape nClusters = clusters.shape[0] print "[nPts:{0:6}][nDim:{1:4}][nClusters:{2:4}][nReps:{3:3}]...".format(nPts, nDim, nClusters, nReps), data2 = np.swapaxes(data, 0, 1).astype(np.float32).copy('C') clusters2 = np.swapaxes(clusters, 0, 1).astype(np.float32).copy('C') if VERBOSE: print "data2" print data2 print "clusters2" print clusters2 """ t1 = time.time() (cuda_clusters, cuda_labels) = cuda_kmeans.kmeans_gpu(data2, clusters2, nReps+1) if VERBOSE: print "cuda_kmeans labels:" print cuda_labels t2 = time.time() if PRINT_TIMES: print "\ncuda ", t2-t1 """ t1 = time.time() (tri_clusters, tri_labels) = cuda_kmeans_tri.trikmeans_gpu(data2, clusters2, nReps+1) if VERBOSE: print "\ncuda_kmeans_tri labels:" print tri_labels t2 = time.time() if PRINT_TIMES: print "\ntri ", t2-t1 t1 = time.time() labels_mpi = mpi_labels(data, nClusters, nReps+1, seed) if VERBOSE: print "mpi labels:" print labels_mpi[0] t2 = time.time() if PRINT_TIMES: print "mpi ", t2-t1 if scipyFlag: t1 = time.time() labels_scipy = scipy_labels(data, clusters, nReps) if VERBOSE: print "scipy labels:" print labels_scipy[0] t2 = time.time() if PRINT_TIMES: print "scipy", t2-t1 t1 = time.time() if not skip_cpu: (cpu_clusters, cpu_labels) = cpu_kmeans.kmeans_cpu(data2, clusters2, nReps+1) if VERBOSE: print "cpu_kmeans labels:" print cpu_labels t2 = time.time() if PRINT_TIMES: print "cpu ", t2-t1 error = 0 if scipyFlag: try: np.testing.assert_array_equal(labels_mpi[0], labels_scipy[0]) except AssertionError: print "mpi<>scipy", error = 1 if not skip_cpu: try: np.testing.assert_array_equal(labels_mpi[0], cpu_labels) except AssertionError: print "mpi<>cpu", error = 1 try: np.testing.assert_array_equal(tri_labels, cpu_labels) except AssertionError: print "tri<>cpu", error = 1 """ try: np.testing.assert_array_equal(cuda_labels, tri_labels) except AssertionError: print "cuda<>tri", error = 1 """ try: np.testing.assert_array_equal(labels_mpi[0], tri_labels) except AssertionError: print "tri<>mpi", error = 1 if error == 0: print "Labels OK ..." else: print ""