Example #1
0
def run_tests(nTests, nPts, nDim, nClusters, nReps=1, verbose = VERBOSE, print_times = PRINT_TIMES,
                 verify = 1):
    # run_tests(nTests, nPts, nDim, nClusters, nReps [, verbose [, print_times]]
    
    # Generate nPts random data elements with nDim dimensions and nCluster random clusters,
    # then run kmeans for nReps and compare gpu and cpu results.  This is repeated nTests times
    
    if(nPts * nDim *nClusters > CPU_SIZE_LIMIT):
        #print "Too big to verify wiht cpu calculation"
        verify = 0  # too big to run on cpu
        
    cpu_time = 0.
    gpu_time = 0.
    
    gpu_data_time = 0.
    gpu_module_time = 0.
    gpu_ccdist_time = 0.
    gpu_hdclosest_time = 0.
    gpu_init_time = 0.
    gpu_step3_time = 0.
    gpu_step4_time = 0.
    gpu_step56_time = 0.

    np.random.seed(SEED)
    data = np.random.rand(nDim, nPts).astype(np.float32)
    clusters = np.random.rand(nDim, nClusters).astype(np.float32)

    if verbose:
        print "data"
        print data
        print "\nclusters"
        print clusters

    nErrors = 0

    # repeat this test nTests times
    for iTest in range(nTests):
    
        if verify:
            #run the cpu algorithm
            t1 = time.time()
            (cpu_clusters, cpu_assign) = kmeans_cpu(data, clusters, nReps)
            cpu_assign.shape = (nPts,)
            t2 = time.time()
            cpu_time += t2-t1
            
            if verbose:
                print "------------------------ cpu results ------------------------"
                print "cpu_assignments"
                print cpu_assign
                print "cpu_clusters"
                print cpu_clusters
                print "-------------------------------------------------------------"
        
        #run the gpu algorithm
        t1 = time.time()
        (gpu_ccdist, gpu_hdClosest, gpu_assign, gpu_lower, gpu_upper, \
            gpu_clusters, gpu_cluster_movement, \
            data_time, module_time, init_time, ccdist_time, hdclosest_time, \
            step3_time, step4_time, step56_time) = \
            trikmeans_gpu(data, clusters, nReps, 1)
        t2 = time.time()        
        gpu_time += t2-t1
        gpu_data_time += data_time
        gpu_module_time += module_time
        gpu_ccdist_time += ccdist_time
        gpu_hdclosest_time += hdclosest_time
        gpu_init_time += init_time
        gpu_step3_time += step3_time
        gpu_step4_time += step4_time
        gpu_step56_time += step56_time
        
        if verbose:
            print "------------------------ gpu results ------------------------"
            print "gpu_assignments"
            print gpu_assign
            print "gpu_clusters"
            print gpu_clusters
            print "-------------------------------------------------------------"
            

        if verify:
            # calculate the number of data points in each cluster
            c = np.arange(nClusters)
            c_counts = np.sum(cpu_assign.reshape(nPts,1) == c, axis=0)

            # verify the results...
            nErrors += verify_assignments(gpu_assign.get(), cpu_assign, data, gpu_clusters, 
                                            cpu_clusters, verbose, iTest)
            nErrors += verify_clusters(gpu_clusters, cpu_clusters, cpu_assign, verbose, iTest)


    if print_times:
        print "\n---------------------------------------------"
        print "nPts      =", nPts
        print "nDim      =", nDim
        print "nClusters =", nClusters
        print "nReps     =", nReps
        if verify:
            print "average cpu time (ms) =", cpu_time/nTests*1000.
        else:
            print "average cpu time (ms) = N/A"
        print "average gpu time (ms) =", gpu_time/nTests*1000.
        print "       data time (ms) =", gpu_data_time/nTests*1000.
        print "     module time (ms) =", gpu_module_time/nTests*1000.
        print "       init time (ms) =", gpu_init_time/nTests*1000.        
        print "     ccdist time (ms) =", gpu_ccdist_time/nTests*1000.        
        print "  hdclosest time (ms) =", gpu_hdclosest_time/nTests*1000.        
        print "      step3 time (ms) =", gpu_step3_time/nTests*1000.        
        print "      step4 time (ms) =", gpu_step4_time/nTests*1000.        
        print "     step56 time (ms) =", gpu_step56_time/nTests*1000.        
        print "---------------------------------------------"

    if verify:
        return nErrors
    else:
        return -1
Example #2
0
def run_labels(data, nClusters, nReps, seed=SEED):
    random.seed(seed)
    # run py_kmeans.kmeans once to get a starting label assignment,
    # which will be used by the scipy routine and others
    clusters, dist, labels = py_kmeans.kmeans(data, nClusters, 1, 0)
    if VERBOSE:
        print "data"
        print data
        print "initial clusters:"
        print clusters
 
    (nPts, nDim) = data.shape
    nClusters = clusters.shape[0] 
    print "[nPts:{0:6}][nDim:{1:4}][nClusters:{2:4}][nReps:{3:3}]...".format(nPts, nDim, 
                                                                            nClusters, nReps),

    data2 = np.swapaxes(data, 0, 1).astype(np.float32).copy('C')
    clusters2 = np.swapaxes(clusters, 0, 1).astype(np.float32).copy('C')

    if VERBOSE:
        print "data2"
        print data2
        print "clusters2"
        print clusters2

    """
    t1 = time.time()
    (cuda_clusters, cuda_labels) = cuda_kmeans.kmeans_gpu(data2, clusters2, nReps+1)
    if VERBOSE:
        print "cuda_kmeans labels:"
        print cuda_labels
    t2 = time.time()
    if PRINT_TIMES:
        print "\ncuda ", t2-t1
    """
    
    t1 = time.time()
    (tri_clusters, tri_labels) = cuda_kmeans_tri.trikmeans_gpu(data2, clusters2, nReps+1)
    if VERBOSE:
        print "cuda_kmeans_tri labels:"
        print tri_labels
    t2 = time.time()
    if PRINT_TIMES:
        print "tri  ", t2-t1

    t1 = time.time()
    labels_mpi = mpi_labels(data, nClusters, nReps+1, seed)
    if VERBOSE:
        print "mpi labels:"
        print labels_mpi[0]
    t2 = time.time()
    if PRINT_TIMES:
        print "mpi  ", t2-t1

    if scipyFlag:
        t1 = time.time()
        labels_scipy = scipy_labels(data, clusters, nReps)
        if VERBOSE:
            print "scipy labels:"
            print labels_scipy[0]
        t2 = time.time()
        if PRINT_TIMES:
            print "scipy", t2-t1
    
    t1 = time.time()
    (cpu_clusters, cpu_labels) = cpu_kmeans.kmeans_cpu(data2, clusters2, nReps+1)
    if VERBOSE:
        print "cpu_kmeans labels:"
        print cpu_labels
    t2 = time.time()
    if PRINT_TIMES:
        print "cpu  ", t2-t1

    error = 0
    
    if scipyFlag:
        try:
            np.testing.assert_array_equal(labels_mpi[0], labels_scipy[0])
        except AssertionError:
            print "mpi<>scipy",
            error = 1
    
    try:
        np.testing.assert_array_equal(labels_mpi[0], cpu_labels)
    except AssertionError:
        print "mpi<>cpu",
        error = 1
    
    """
    try:
        np.testing.assert_array_equal(cuda_labels, tri_labels)
    except AssertionError:
        print "cuda<>tri",
        error = 1
    """
    
    try:
        np.testing.assert_array_equal(tri_labels, cpu_labels)
    except AssertionError:
        print "tri<>cpu",
        error = 1

    try:
        np.testing.assert_array_equal(labels_mpi[0], tri_labels)
    except AssertionError:
        print "tri<>mpi",
        error = 1

    if error == 0:
        print "Labels OK ..."
    else:
        print ""
Example #3
0
def run_tests(nTests,
              nPts,
              nDim,
              nClusters,
              nReps=1,
              verbose=VERBOSE,
              print_times=PRINT_TIMES,
              verify=1):
    # run_tests(nTests, nPts, nDim, nClusters, nReps [, verbose [, print_times]]

    # Generate nPts random data elements with nDim dimensions and nCluster random clusters,
    # then run kmeans for nReps and compare gpu and cpu results.  This is repeated nTests times

    if (nPts * nDim * nClusters > CPU_SIZE_LIMIT):
        #print "Too big to verify wiht cpu calculation"
        verify = 0  # too big to run on cpu

    cpu_time = 0.
    gpu_time = 0.

    gpu_data_time = 0.
    gpu_module_time = 0.
    gpu_ccdist_time = 0.
    gpu_hdclosest_time = 0.
    gpu_init_time = 0.
    gpu_step3_time = 0.
    gpu_step4_time = 0.
    gpu_step56_time = 0.

    nErrors = 0

    # repeat this test nTests times
    for iTest in range(nTests):

        np.random.seed(SEED + iTest)
        data = np.random.rand(nDim, nPts).astype(np.float32)
        clusters = np.random.rand(nDim, nClusters).astype(np.float32)

        if verbose:
            print "data"
            print data
            print "\nclusters"
            print clusters

        if verify:
            #run the cpu algorithm
            t1 = time.time()
            (cpu_clusters, cpu_assign) = kmeans_cpu(data, clusters, nReps)
            cpu_assign.shape = (nPts, )
            t2 = time.time()
            cpu_time += t2 - t1

            if verbose:
                print "------------------------ cpu results ------------------------"
                print "cpu_assignments"
                print cpu_assign
                print "cpu_clusters"
                print cpu_clusters
                print "-------------------------------------------------------------"

        #run the gpu algorithm
        t1 = time.time()
        (gpu_ccdist, gpu_hdClosest, gpu_assign, gpu_lower, gpu_upper, \
            gpu_clusters, gpu_cluster_movement, \
            data_time, module_time, init_time, ccdist_time, hdclosest_time, \
            step3_time, step4_time, step56_time) = \
            trikmeans_gpu(data, clusters, nReps, 1)
        t2 = time.time()
        gpu_time += t2 - t1
        gpu_data_time += data_time
        gpu_module_time += module_time
        gpu_ccdist_time += ccdist_time
        gpu_hdclosest_time += hdclosest_time
        gpu_init_time += init_time
        gpu_step3_time += step3_time
        gpu_step4_time += step4_time
        gpu_step56_time += step56_time

        if verbose:
            print "------------------------ gpu results ------------------------"
            print "gpu_assignments"
            print gpu_assign
            print "gpu_clusters"
            print gpu_clusters
            print "-------------------------------------------------------------"

        if verify:
            # calculate the number of data points in each cluster
            c = np.arange(nClusters)
            c_counts = np.sum(cpu_assign.reshape(nPts, 1) == c, axis=0)

            # verify the results...
            err = verify_assignments(gpu_assign.get(), cpu_assign, data,
                                     gpu_clusters, cpu_clusters, verbose,
                                     iTest)
            err += verify_clusters(gpu_clusters, cpu_clusters, cpu_assign,
                                   verbose, iTest)
            if err:
                nErrors += 1

    if print_times:
        print "\n---------------------------------------------"
        print "nPts      =", nPts
        print "nDim      =", nDim
        print "nClusters =", nClusters
        print "nReps     =", nReps
        if verify:
            print "average cpu time (ms) =", cpu_time / nTests * 1000.
        else:
            print "average cpu time (ms) = N/A"
        print "average gpu time (ms) =", gpu_time / nTests * 1000.
        print "       data time (ms) =", gpu_data_time / nTests * 1000.
        print "     module time (ms) =", gpu_module_time / nTests * 1000.
        print "       init time (ms) =", gpu_init_time / nTests * 1000.
        print "     ccdist time (ms) =", gpu_ccdist_time / nTests * 1000.
        print "  hdclosest time (ms) =", gpu_hdclosest_time / nTests * 1000.
        print "      step3 time (ms) =", gpu_step3_time / nTests * 1000.
        print "      step4 time (ms) =", gpu_step4_time / nTests * 1000.
        print "     step56 time (ms) =", gpu_step56_time / nTests * 1000.
        print "---------------------------------------------"

    if verify:
        return nErrors
    else:
        return -1
Example #4
0
def run_tests(nTests, nPts, nDim, nClusters, nReps=1, verbose = VERBOSE, print_times = PRINT_TIMES,
                 verify = 1):
    # run_tests(nTests, nPts, nDim, nClusters, nReps [, verbose [, print_times]]
    
    # Generate nPts random data elements with nDim dimensions and nCluster random clusters,
    # then run kmeans for nReps and compare gpu and cpu results.  This is repeated nTests times
    
    if(nPts * nDim *nClusters > CPU_SIZE_LIMIT):
        #print "Too big to verify wiht cpu calculation"
        verify = 0  # too big to run on cpu
        
    cpu_time = 0.
    gpu_time = 0.
    
    gpu_data_time = 0.
    gpu_module_time = 0.
    gpu_assign_time = 0.
    gpu_calc_time = 0.

    nErrors = 0

    # repeat this test nTests times
    for iTest in range(nTests):
    
        np.random.seed(SEED+iTest)
        data = np.random.rand(nDim, nPts).astype(np.float32)
        clusters = np.random.rand(nDim, nClusters).astype(np.float32)

        if verbose:
            print "data"
            print data
            print "\nclusters"
            print clusters

        if verify:
            #run the cpu algorithm
            t1 = time.time()
            (cpu_clusters, cpu_assign) = kmeans_cpu(data, clusters, nReps)
            cpu_assign.shape = (nPts,)
            t2 = time.time()
            cpu_time += t2-t1
            
            if verbose:
                print "------------------------ cpu results ------------------------"
                print "cpu_assignments"
                print cpu_assign
                print "cpu_clusters"
                print cpu_clusters
                print "-------------------------------------------------------------"
        
        #run the gpu algorithm
        t1 = time.time()
        (gpu_assign, gpu_clusters, \
            data_time, module_time, assign_time, calc_time) = \
            kmeans_gpu(data, clusters, nReps, 1)
        pycuda.autoinit.context.synchronize()
        t2 = time.time()        
        gpu_time += t2-t1
        gpu_data_time += data_time
        gpu_module_time += module_time
        gpu_assign_time += assign_time
        gpu_calc_time += calc_time
        
        if verbose:
            print "------------------------ gpu results ------------------------"
            print "gpu_assignments"
            print gpu_assign
            print "gpu_clusters"
            print gpu_clusters
            print "-------------------------------------------------------------"
            

        if verify:
            # calculate the number of data points in each cluster
            c = np.arange(nClusters)
            c_counts = np.sum(cpu_assign.reshape(nPts,1) == c, axis=0)

            # verify the results...
            err = verify_assignments(gpu_assign.get(), cpu_assign, data, gpu_clusters, 
                                            cpu_clusters, verbose, iTest)
            err += verify_clusters(gpu_clusters, cpu_clusters, cpu_assign, verbose, iTest)
            if err:
                nErrors += 1

    if print_times:
        print "\n---------------------------------------------"
        print "nPts      =", nPts
        print "nDim      =", nDim
        print "nClusters =", nClusters
        print "nReps     =", nReps
        if verify:
            print "average cpu time (ms) =", cpu_time/nTests*1000.
        else:
            print "average cpu time (ms) = N/A"
        print "average gpu time (ms) =", gpu_time/nTests*1000.
        print "       data time (ms) =", gpu_data_time/nTests*1000.
        print "     module time (ms) =", gpu_module_time/nTests*1000.
        print "     assign time (ms) =", gpu_assign_time/nTests*1000.        
        print "       calc time (ms) =", gpu_calc_time/nTests*1000.        
        print "---------------------------------------------"

    if verify:
        return nErrors
    else:
        return -1
Example #5
0
def run_tests(nTests, nPts, nDim, nClusters, nReps, verbose = VERBOSE, print_times = PRINT_TIMES):
    # run_tests(nTests, nPts, nDim, nClusters, nReps [, verbose [, print_times]]
    
    # Generate nPts random data elements with nDim dimensions and nCluster random clusters,
    # then run kmeans for nReps and compare gpu and cpu results.  This is repeated nTests times
    nErrors = 0
    nCalcErrors = 0
    cpu_time = 0.
    gpu_time = 0.
    
    gpu_data_time = 0.
    gpu_module_time = 0.
    gpu_assign_time = 0.
    gpu_calc_time = 0.

    np.random.seed(100);
    #data = np.random.rand(nDim, nPts).astype(np.float32)
    #clusters = np.random.rand(nDim, nClusters).astype(np.float32)

    data = np.array([[3., 4., 4., 9., 5., 6., 9., 5., 5., 7., 6.], \
                     [3., 3., 2., 2., 1., 2., 4., 2., 4., 4., 5.]]).astype(np.float32)
    clusters = np.array([[4.57142878, 7.75], \
                         [2.42857146, 3.75]]).astype(np.float32)
                         
    print "nPts =", nPts
    print "nDim =", nDim
    print "nClusters =", nClusters
    print "nReps =", nReps

    if verbose:
        print "data"
        print data
        print "\nclusters"
        print clusters
        
    for i in range(nTests):
        t1 = time.time()
        (cpu_clusters, cpu_assign) = kmeans_cpu(data, clusters, nReps)
        print cpu_assign.shape
        cpu_assign.shape = (nPts,)
        t2 = time.time()
        cpu_time += t2-t1
        if verbose:
            print "cpu assignments"
            print cpu_assign
            print "cpu clusters"
            print cpu_clusters
            print "cpu time = ", t2-t1
            
        t1 = time.time()
        (gpu_clusters, gpu_assign, data_time, module_time, assign_time, calc_time) = kmeans_gpu(data, clusters, nReps, 1)
        t2 = time.time()
        
        gpu_time += t2-t1
        gpu_data_time += data_time
        gpu_module_time += module_time
        gpu_assign_time += assign_time
        gpu_calc_time += calc_time
        
        if verbose:
            print "gpu assignments"
            print gpu_assign
            print "gpu clusters"
            print gpu_clusters
            print "gpu time = ", t2-t1
    
        # calculate the number of data points in each cluster
        c = np.arange(nClusters)
        c_counts = np.sum(cpu_assign.reshape(nPts,1) == c, axis=0)
        
        # verify results
        differences = sum(gpu_assign != cpu_assign)
        if(differences > 0):
            nErrors += 1
            if verbose:
                print "Test",i,"*** ERROR ***", differences, "differences"
                iDiff = np.arange(nPts)[gpu_assign != cpu_assign]
                print "iDiff", iDiff
                for ii in iDiff:
                    print "data point is", data[:,ii]
                    print "cpu assigned to", cpu_assign[ii]
                    print "   with center at (cpu)", cpu_clusters[:,cpu_assign[ii]]
                    print "   with center at (gpu)", gpu_clusters.get()[:,cpu_assign[ii]]
                    print "gpu assigned to", gpu_assign[ii]
                    print "   with center at (cpu)", cpu_clusters[:,gpu_assign[ii]]
                    print "   with center at (gpu)", gpu_clusters.get()[:, gpu_assign[ii]]
        else:
            if verbose:
                print "Cluster assignment OK"

        diff = np.max(np.abs(gpu_clusters.get() - cpu_clusters))

        if verbose:
            print "max error in cluster centers is", diff
            print "avg error in cluster centers is", 
            print np.mean(np.abs(gpu_clusters.get()-cpu_clusters))

        if diff > 1e-7 * max(c_counts) or math.isnan(diff):
            nCalcErrors += 1
            if verbose:
                print "Test",i,"*** ERROR *** max diff was", diff
                print 
        else:
            if verbose:
                print "Test", i, "OK"

    if print_times:
        print "\n---------------------------------------------"
        print "nPts      =", nPts
        print "nDim      =", nDim
        print "nClusters =", nClusters
        print "nReps     =", nReps
        print "Assignment errors  =", nErrors, "out of", nTests, "tests"
        print "Calculation errors =", nCalcErrors, "out of", nTests, "tests"
        print "average cpu time (ms) =", cpu_time/nTests*1000.
        print "average gpu time (ms) =", gpu_time/nTests*1000.
        print "       data time (ms) =", gpu_data_time/nTests*1000.
        print "     module time (ms) =", gpu_module_time/nTests*1000.
        print "     assign time (ms) =", gpu_assign_time/nTests*1000.        
        print "       calc time (ms) =", gpu_calc_time/nTests*1000.        
        print "---------------------------------------------"

    return nErrors + nCalcErrors
def run_labels(data, nClusters, nReps, skip_cpu = SKIP_CPU, seed=SEED):
    random.seed(seed)
    # run py_kmeans.kmeans once to get a starting label assignment,
    # which will be used by the scipy routine and others
    clusters, dist, labels = py_kmeans.kmeans(data, nClusters, 1, 0)
    if VERBOSE:
        print "data"
        print data
        print "initial clusters:"
        print clusters
 
    (nPts, nDim) = data.shape
    nClusters = clusters.shape[0] 
    print "[nPts:{0:6}][nDim:{1:4}][nClusters:{2:4}][nReps:{3:3}]...".format(nPts, nDim, 
                                                                            nClusters, nReps),

    data2 = np.swapaxes(data, 0, 1).astype(np.float32).copy('C')
    clusters2 = np.swapaxes(clusters, 0, 1).astype(np.float32).copy('C')

    if VERBOSE:
        print "data2"
        print data2
        print "clusters2"
        print clusters2

    """
    t1 = time.time()
    (cuda_clusters, cuda_labels) = cuda_kmeans.kmeans_gpu(data2, clusters2, nReps+1)
    if VERBOSE:
        print "cuda_kmeans labels:"
        print cuda_labels
    t2 = time.time()
    if PRINT_TIMES:
        print "\ncuda ", t2-t1
    """
    
    t1 = time.time()
    (tri_clusters, tri_labels) = cuda_kmeans_tri.trikmeans_gpu(data2, clusters2, nReps+1)
    if VERBOSE:
        print "\ncuda_kmeans_tri labels:"
        print tri_labels
    t2 = time.time()
    if PRINT_TIMES:
        print "\ntri  ", t2-t1

    t1 = time.time()
    labels_mpi = mpi_labels(data, nClusters, nReps+1, seed)
    if VERBOSE:
        print "mpi labels:"
        print labels_mpi[0]
    t2 = time.time()
    if PRINT_TIMES:
        print "mpi  ", t2-t1

    if scipyFlag:
        t1 = time.time()
        labels_scipy = scipy_labels(data, clusters, nReps)
        if VERBOSE:
            print "scipy labels:"
            print labels_scipy[0]
        t2 = time.time()
        if PRINT_TIMES:
            print "scipy", t2-t1
    
    t1 = time.time()
    if not skip_cpu:
        (cpu_clusters, cpu_labels) = cpu_kmeans.kmeans_cpu(data2, clusters2, nReps+1)
        if VERBOSE:
            print "cpu_kmeans labels:"
            print cpu_labels
        t2 = time.time()
        if PRINT_TIMES:
            print "cpu  ", t2-t1

    error = 0
    
    if scipyFlag:
        try:
            np.testing.assert_array_equal(labels_mpi[0], labels_scipy[0])
        except AssertionError:
            print "mpi<>scipy",
            error = 1
    
    if not skip_cpu:
        try:
            np.testing.assert_array_equal(labels_mpi[0], cpu_labels)
        except AssertionError:
            print "mpi<>cpu",
            error = 1
    
        try:
            np.testing.assert_array_equal(tri_labels, cpu_labels)
        except AssertionError:
            print "tri<>cpu",
            error = 1

    """
    try:
        np.testing.assert_array_equal(cuda_labels, tri_labels)
    except AssertionError:
        print "cuda<>tri",
        error = 1
    """
    
    try:
        np.testing.assert_array_equal(labels_mpi[0], tri_labels)
    except AssertionError:
        print "tri<>mpi",
        error = 1


    if error == 0:
        print "Labels OK ..."
    else:
        print ""