def run_tests1(nTests, nPts, nDim, nClusters, nReps=1, verbose = VERBOSE, print_times = PRINT_TIMES): # run_tests(nTests, nPts, nDim, nClusters, nReps [, verbose [, print_times]] # Runs one repition and checks various intermdiate values against a cpu calculation if nReps > 1: print "This method only runs test for nReps == 1" return 1 # Generate nPts random data elements with nDim dimensions and nCluster random clusters, # then run kmeans for nReps and compare gpu and cpu results. This is repeated nTests times cpu_time = 0. gpu_time = 0. gpu_data_time = 0. gpu_module_time = 0. gpu_ccdist_time = 0. gpu_hdclosest_time = 0. gpu_init_time = 0. gpu_step3_time = 0. gpu_step4_time = 0. gpu_step56_time = 0. np.random.seed(SEED) data = np.random.rand(nDim, nPts).astype(np.float32) clusters = np.random.rand(nDim, nClusters).astype(np.float32) if verbose: print "data" print data print "\nclusters" print clusters nErrors = 0 # repeat this test nTests times for iTest in range(nTests): #run the gpu algorithm t1 = time.time() (gpu_ccdist, gpu_hdClosest, gpu_assignments, gpu_lower, gpu_upper, \ gpu_clusters2, gpu_cluster_movement, \ data_time, module_time, init_time, ccdist_time, hdclosest_time, \ step3_time, step4_time, step56_time) = \ trikmeans_gpu(data, clusters, nReps, 1) t2 = time.time() gpu_time += t2-t1 gpu_data_time += data_time gpu_module_time += module_time gpu_ccdist_time += ccdist_time gpu_hdclosest_time += hdclosest_time gpu_init_time += init_time gpu_step3_time += step3_time gpu_step4_time += step4_time gpu_step56_time += step56_time if verbose: print "------------------------ gpu results ------------------------" print "cluster-cluster distances" print gpu_ccdist print "half distance to closest" print gpu_hdClosest print "gpu time = ", t2-t1 print "gpu_assignments" print gpu_assignments print "gpu_lower" print gpu_lower print "gpu_upper" print gpu_upper print "gpu_clusters2" print gpu_clusters2 print "-------------------------------------------------------------" # check ccdist and hdClosest ccdist = np.array(gpu_ccdist.get()) hdClosest = np.array(gpu_hdClosest.get()) t1 = time.time() cpu_ccdist = 0.5 * np.sqrt(((clusters[:,:,np.newaxis]-clusters[:,np.newaxis,:])**2).sum(0)) t2 = time.time() cpu_ccdist_time = t2-t1 if verbose: print "cpu_ccdist" print cpu_ccdist error = np.abs(cpu_ccdist - ccdist) if np.max(error) > 1e-7 * nDim * 2: print "iteration", iTest, print "***ERROR*** max ccdist error =", np.max(error) nErrors += 1 if verbose: print "average ccdist error =", np.mean(error) print "max ccdist error =", np.max(error) t1 = time.time() cpu_ccdist[cpu_ccdist == 0.] = 1e10 good_hdClosest = np.min(cpu_ccdist, 0) t2 = time.time() cpu_hdclosest_time = t2-t1 if verbose: print "good_hdClosest" print good_hdClosest err = np.abs(good_hdClosest - hdClosest) if np.max(err) > 1e-7 * nDim: print "***ERROR*** max hdClosest error =", np.max(err) nErrors += 1 if verbose: print "errors on hdClosest" print err print "max error on hdClosest =", np.max(err) # calculate cpu initial assignments t1 = time.time() cpu_assign = assign_cpu(data, clusters) t2 = time.time() cpu_assign_time = t2-t1 if verbose: print "assignments shape =", cpu_assign.shape print "data shape =", data.shape print "cpu assignments" print cpu_assign print "gpu assignments" print gpu_assignments print "gpu new clusters" print gpu_clusters2 differences = sum(gpu_assignments.get() - cpu_assign) if(differences > 0): nErrors += 1 print differences, "errors in initial assignment" else: if verbose: print "initial cluster assignments match" # calculate the number of data points in each cluster c = np.arange(nClusters) c_counts = np.sum(cpu_assign.reshape(nPts,1) == c, axis=0) # calculate cpu new cluster values: t1 = time.time() cpu_new_clusters = calc_cpu(data, cpu_assign, clusters) t2 = time.time() cpu_calc_time = t2-t1 if verbose: print "cpu new clusters" print cpu_new_clusters diff = np.max(np.abs(gpu_clusters2 - cpu_new_clusters)) if diff > 1e-7 * max(c_counts) or math.isnan(diff): iDiff = np.arange(nClusters)[((gpu_clusters2 - cpu_new_clusters)**2).sum(0) > 1e-7] print "clusters that differ:" print iDiff nErrors += 1 if verbose: print "Test",iTest,"*** ERROR *** max diff was", diff for x in iDiff: print "\ndata for cluster ",x print "gpu:" print gpu_clusters2[:,x] print "cpu:" print cpu_new_clusters[:,x] print "points assigned:" for ii in range(nPts): if cpu_assign[ii] == x: print "data point #",ii print data[:,ii] else: if verbose: print "Test", iTest, "OK" #check if the cluster movement values are correct cpu_cluster_movement = np.sqrt(((clusters - cpu_new_clusters)**2).sum(0)) diff = np.max(np.abs(cpu_cluster_movement - gpu_cluster_movement.get())) if diff > 1e-6 * nDim: print "*** ERROR *** max cluster movement error =", diff nErrors += 1 if verbose: print "cpu cluster movements" print cpu_cluster_movement print "gpu cluster movements" print gpu_cluster_movement print "max diff in cluster movements is", diff cpu_time = cpu_assign_time + cpu_calc_time if print_times: print "\n---------------------------------------------" print "nPts =", nPts print "nDim =", nDim print "nClusters =", nClusters print "nReps =", nReps print "average cpu time (ms) =", cpu_time/nTests*1000. print " assign time (ms) =", cpu_assign_time/nTests*1000. if nReps == 1: print " calc time (ms) =", cpu_calc_time/nTests*1000. print "average gpu time (ms) =", gpu_time/nTests*1000. else: print " calc time (ms) =" print "average gpu time (ms) =" print " data time (ms) =", gpu_data_time/nTests*1000. print " module time (ms) =", gpu_module_time/nTests*1000. print " init time (ms) =", gpu_init_time/nTests*1000. print " ccdist time (ms) =", gpu_ccdist_time/nTests*1000. print " hdclosest time (ms) =", gpu_hdclosest_time/nTests*1000. print " step3 time (ms) =", gpu_step3_time/nTests*1000. print " step4 time (ms) =", gpu_step4_time/nTests*1000. print " step56 time (ms) =", gpu_step56_time/nTests*1000. print "---------------------------------------------" return nErrors
def run_tests1(nTests, nPts, nDim, nClusters, nReps=1, verbose = VERBOSE, print_times = PRINT_TIMES): # run_tests(nTests, nPts, nDim, nClusters, nReps [, verbose [, print_times]] # Runs one repition and checks various intermdiate values against a cpu calculation if nReps > 1: print "This method only runs test for nReps == 1" return 1 # Generate nPts random data elements with nDim dimensions and nCluster random clusters, # then run kmeans for nReps and compare gpu and cpu results. This is repeated nTests times cpu_time = 0. gpu_time = 0. gpu_data_time = 0. gpu_module_time = 0. gpu_assign_time = 0. gpu_calc_time = 0. np.random.seed(SEED) data = np.random.rand(nDim, nPts).astype(np.float32) clusters = np.random.rand(nDim, nClusters).astype(np.float32) if verbose: print "data" print data print "\nclusters" print clusters nErrors = 0 # repeat this test nTests times for iTest in range(nTests): #run the gpu algorithm t1 = time.time() (gpu_assignments, gpu_clusters2, \ data_time, module_time, assign_time, calc_time) = \ kmeans_gpu(data, clusters, nReps, 1) pycuda.autoinit.context.synchronize() t2 = time.time() gpu_time += t2-t1 gpu_data_time += data_time gpu_module_time += module_time gpu_assign_time += assign_time gpu_calc_time += calc_time if verbose: print "------------------------ gpu results ------------------------" print "gpu time = ", t2-t1 print "gpu_assignments" print gpu_assignments print "gpu_clusters2" print gpu_clusters2 print "-------------------------------------------------------------" # calculate cpu initial assignments t1 = time.time() cpu_assign = assign_cpu(data, clusters) t2 = time.time() cpu_assign_time = t2-t1 if verbose: print "assignments shape =", cpu_assign.shape print "data shape =", data.shape print "cpu assignments" print cpu_assign print "gpu assignments" print gpu_assignments print "gpu new clusters" print gpu_clusters2 differences = sum(gpu_assignments.get() - cpu_assign) if(differences > 0): nErrors += 1 print differences, "errors in initial assignment" else: if verbose: print "initial cluster assignments match" # calculate the number of data points in each cluster c = np.arange(nClusters) c_counts = np.sum(cpu_assign.reshape(nPts,1) == c, axis=0) # calculate cpu new cluster values: t1 = time.time() cpu_new_clusters = calc_cpu(data, cpu_assign, clusters) t2 = time.time() cpu_calc_time = t2-t1 if verbose: print "cpu new clusters" print cpu_new_clusters diff = np.max(np.abs(gpu_clusters2 - cpu_new_clusters)) if diff > 1e-7 * max(c_counts) or math.isnan(diff): iDiff = np.arange(nClusters)[((gpu_clusters2 - cpu_new_clusters)**2).sum(0) > 1e-7] print "clusters that differ:" print iDiff nErrors += 1 if verbose: print "Test",iTest,"*** ERROR *** max diff was", diff for x in iDiff: print "\ndata for cluster ",x print "gpu:" print gpu_clusters2[:,x] print "cpu:" print cpu_new_clusters[:,x] print "points assigned:" for ii in range(nPts): if cpu_assign[ii] == x: print "data point #",ii print data[:,ii] else: if verbose: print "Test", iTest, "OK" cpu_time = cpu_assign_time + cpu_calc_time if print_times: print "\n---------------------------------------------" print "nPts =", nPts print "nDim =", nDim print "nClusters =", nClusters print "nReps =", nReps print "average cpu time (ms) =", cpu_time/nTests*1000. print " assign time (ms) =", cpu_assign_time/nTests*1000. print " calc time (ms) =", cpu_calc_time/nTests*1000. print "average gpu time (ms) =", gpu_time/nTests*1000. print " data time (ms) =", gpu_data_time/nTests*1000. print " module time (ms) =", gpu_module_time/nTests*1000. print " assign time (ms) =", gpu_assign_time/nTests*1000. print " calc time (ms) =", gpu_calc_time/nTests*1000. print "---------------------------------------------" return nErrors
def run_tests1(nTests, nPts, nDim, nClusters, nReps=1, verbose=VERBOSE, print_times=PRINT_TIMES): # run_tests(nTests, nPts, nDim, nClusters, nReps [, verbose [, print_times]] # Runs one repition and checks various intermdiate values against a cpu calculation if nReps > 1: print "This method only runs test for nReps == 1" return 1 # Generate nPts random data elements with nDim dimensions and nCluster random clusters, # then run kmeans for nReps and compare gpu and cpu results. This is repeated nTests times cpu_time = 0. gpu_time = 0. gpu_data_time = 0. gpu_module_time = 0. gpu_ccdist_time = 0. gpu_hdclosest_time = 0. gpu_init_time = 0. gpu_step3_time = 0. gpu_step4_time = 0. gpu_step56_time = 0. np.random.seed(SEED) data = np.random.rand(nDim, nPts).astype(np.float32) clusters = np.random.rand(nDim, nClusters).astype(np.float32) if verbose: print "data" print data print "\nclusters" print clusters nErrors = 0 # repeat this test nTests times for iTest in range(nTests): #run the gpu algorithm t1 = time.time() (gpu_ccdist, gpu_hdClosest, gpu_assignments, gpu_lower, gpu_upper, \ gpu_clusters2, gpu_cluster_movement, \ data_time, module_time, init_time, ccdist_time, hdclosest_time, \ step3_time, step4_time, step56_time) = \ trikmeans_gpu(data, clusters, nReps, 1) t2 = time.time() gpu_time += t2 - t1 gpu_data_time += data_time gpu_module_time += module_time gpu_ccdist_time += ccdist_time gpu_hdclosest_time += hdclosest_time gpu_init_time += init_time gpu_step3_time += step3_time gpu_step4_time += step4_time gpu_step56_time += step56_time if verbose: print "------------------------ gpu results ------------------------" print "cluster-cluster distances" print gpu_ccdist print "half distance to closest" print gpu_hdClosest print "gpu time = ", t2 - t1 print "gpu_assignments" print gpu_assignments print "gpu_lower" print gpu_lower print "gpu_upper" print gpu_upper print "gpu_clusters2" print gpu_clusters2 print "-------------------------------------------------------------" # check ccdist and hdClosest ccdist = np.array(gpu_ccdist.get()) hdClosest = np.array(gpu_hdClosest.get()) t1 = time.time() cpu_ccdist = 0.5 * np.sqrt( ((clusters[:, :, np.newaxis] - clusters[:, np.newaxis, :])** 2).sum(0)) t2 = time.time() cpu_ccdist_time = t2 - t1 if verbose: print "cpu_ccdist" print cpu_ccdist error = np.abs(cpu_ccdist - ccdist) if np.max(error) > 1e-7 * nDim * 2: print "iteration", iTest, print "***ERROR*** max ccdist error =", np.max(error) nErrors += 1 if verbose: print "average ccdist error =", np.mean(error) print "max ccdist error =", np.max(error) t1 = time.time() cpu_ccdist[cpu_ccdist == 0.] = 1e10 good_hdClosest = np.min(cpu_ccdist, 0) t2 = time.time() cpu_hdclosest_time = t2 - t1 if verbose: print "good_hdClosest" print good_hdClosest err = np.abs(good_hdClosest - hdClosest) if np.max(err) > 1e-7 * nDim: print "***ERROR*** max hdClosest error =", np.max(err) nErrors += 1 if verbose: print "errors on hdClosest" print err print "max error on hdClosest =", np.max(err) # calculate cpu initial assignments t1 = time.time() cpu_assign = assign_cpu(data, clusters) t2 = time.time() cpu_assign_time = t2 - t1 if verbose: print "assignments shape =", cpu_assign.shape print "data shape =", data.shape print "cpu assignments" print cpu_assign print "gpu assignments" print gpu_assignments print "gpu new clusters" print gpu_clusters2 differences = sum(gpu_assignments.get() - cpu_assign) if (differences > 0): nErrors += 1 print differences, "errors in initial assignment" else: if verbose: print "initial cluster assignments match" # calculate the number of data points in each cluster c = np.arange(nClusters) c_counts = np.sum(cpu_assign.reshape(nPts, 1) == c, axis=0) # calculate cpu new cluster values: t1 = time.time() cpu_new_clusters = calc_cpu(data, cpu_assign, clusters) t2 = time.time() cpu_calc_time = t2 - t1 if verbose: print "cpu new clusters" print cpu_new_clusters diff = np.max(np.abs(gpu_clusters2 - cpu_new_clusters)) if diff > 1e-7 * max(c_counts) or math.isnan(diff): iDiff = np.arange(nClusters)[( (gpu_clusters2 - cpu_new_clusters)**2).sum(0) > 1e-7] print "clusters that differ:" print iDiff nErrors += 1 if verbose: print "Test", iTest, "*** ERROR *** max diff was", diff for x in iDiff: print "\ndata for cluster ", x print "gpu:" print gpu_clusters2[:, x] print "cpu:" print cpu_new_clusters[:, x] print "points assigned:" for ii in range(nPts): if cpu_assign[ii] == x: print "data point #", ii print data[:, ii] else: if verbose: print "Test", iTest, "OK" #check if the cluster movement values are correct cpu_cluster_movement = np.sqrt( ((clusters - cpu_new_clusters)**2).sum(0)) diff = np.max(np.abs(cpu_cluster_movement - gpu_cluster_movement.get())) if diff > 1e-6 * nDim: print "*** ERROR *** max cluster movement error =", diff nErrors += 1 if verbose: print "cpu cluster movements" print cpu_cluster_movement print "gpu cluster movements" print gpu_cluster_movement print "max diff in cluster movements is", diff cpu_time = cpu_assign_time + cpu_calc_time if print_times: print "\n---------------------------------------------" print "nPts =", nPts print "nDim =", nDim print "nClusters =", nClusters print "nReps =", nReps print "average cpu time (ms) =", cpu_time / nTests * 1000. print " assign time (ms) =", cpu_assign_time / nTests * 1000. if nReps == 1: print " calc time (ms) =", cpu_calc_time / nTests * 1000. print "average gpu time (ms) =", gpu_time / nTests * 1000. else: print " calc time (ms) =" print "average gpu time (ms) =" print " data time (ms) =", gpu_data_time / nTests * 1000. print " module time (ms) =", gpu_module_time / nTests * 1000. print " init time (ms) =", gpu_init_time / nTests * 1000. print " ccdist time (ms) =", gpu_ccdist_time / nTests * 1000. print " hdclosest time (ms) =", gpu_hdclosest_time / nTests * 1000. print " step3 time (ms) =", gpu_step3_time / nTests * 1000. print " step4 time (ms) =", gpu_step4_time / nTests * 1000. print " step56 time (ms) =", gpu_step56_time / nTests * 1000. print "---------------------------------------------" return nErrors
def run_tests1(nTests, nPts, nDim, nClusters, nReps=1, verbose=VERBOSE, print_times=PRINT_TIMES): # run_tests(nTests, nPts, nDim, nClusters, nReps [, verbose [, print_times]] # Runs one repition and checks various intermdiate values against a cpu calculation if nReps > 1: print "This method only runs test for nReps == 1" return 1 # Generate nPts random data elements with nDim dimensions and nCluster random clusters, # then run kmeans for nReps and compare gpu and cpu results. This is repeated nTests times cpu_time = 0. gpu_time = 0. gpu_data_time = 0. gpu_module_time = 0. gpu_assign_time = 0. gpu_calc_time = 0. np.random.seed(SEED) data = np.random.rand(nDim, nPts).astype(np.float32) clusters = np.random.rand(nDim, nClusters).astype(np.float32) if verbose: print "data" print data print "\nclusters" print clusters nErrors = 0 # repeat this test nTests times for iTest in range(nTests): #run the gpu algorithm t1 = time.time() (gpu_assignments, gpu_clusters2, \ data_time, module_time, assign_time, calc_time) = \ kmeans_gpu(data, clusters, nReps, 1) pycuda.autoinit.context.synchronize() t2 = time.time() gpu_time += t2 - t1 gpu_data_time += data_time gpu_module_time += module_time gpu_assign_time += assign_time gpu_calc_time += calc_time if verbose: print "------------------------ gpu results ------------------------" print "gpu time = ", t2 - t1 print "gpu_assignments" print gpu_assignments print "gpu_clusters2" print gpu_clusters2 print "-------------------------------------------------------------" # calculate cpu initial assignments t1 = time.time() cpu_assign = assign_cpu(data, clusters) t2 = time.time() cpu_assign_time = t2 - t1 if verbose: print "assignments shape =", cpu_assign.shape print "data shape =", data.shape print "cpu assignments" print cpu_assign print "gpu assignments" print gpu_assignments print "gpu new clusters" print gpu_clusters2 differences = sum(gpu_assignments.get() - cpu_assign) if (differences > 0): nErrors += 1 print differences, "errors in initial assignment" else: if verbose: print "initial cluster assignments match" # calculate the number of data points in each cluster c = np.arange(nClusters) c_counts = np.sum(cpu_assign.reshape(nPts, 1) == c, axis=0) # calculate cpu new cluster values: t1 = time.time() cpu_new_clusters = calc_cpu(data, cpu_assign, clusters) t2 = time.time() cpu_calc_time = t2 - t1 if verbose: print "cpu new clusters" print cpu_new_clusters diff = np.max(np.abs(gpu_clusters2 - cpu_new_clusters)) if diff > 1e-7 * max(c_counts) or math.isnan(diff): iDiff = np.arange(nClusters)[( (gpu_clusters2 - cpu_new_clusters)**2).sum(0) > 1e-7] print "clusters that differ:" print iDiff nErrors += 1 if verbose: print "Test", iTest, "*** ERROR *** max diff was", diff for x in iDiff: print "\ndata for cluster ", x print "gpu:" print gpu_clusters2[:, x] print "cpu:" print cpu_new_clusters[:, x] print "points assigned:" for ii in range(nPts): if cpu_assign[ii] == x: print "data point #", ii print data[:, ii] else: if verbose: print "Test", iTest, "OK" cpu_time = cpu_assign_time + cpu_calc_time if print_times: print "\n---------------------------------------------" print "nPts =", nPts print "nDim =", nDim print "nClusters =", nClusters print "nReps =", nReps print "average cpu time (ms) =", cpu_time / nTests * 1000. print " assign time (ms) =", cpu_assign_time / nTests * 1000. print " calc time (ms) =", cpu_calc_time / nTests * 1000. print "average gpu time (ms) =", gpu_time / nTests * 1000. print " data time (ms) =", gpu_data_time / nTests * 1000. print " module time (ms) =", gpu_module_time / nTests * 1000. print " assign time (ms) =", gpu_assign_time / nTests * 1000. print " calc time (ms) =", gpu_calc_time / nTests * 1000. print "---------------------------------------------" return nErrors