Python cpuGFLOPS Examples

Programming Language: Python

Namespace/Package Name: cpuFunctions

Method/Function: cpuGFLOPS

Examples at hotexamples.com: 5

Python cpuGFLOPS - 5 examples found. These are the top rated real world Python examples of cpuFunctions.cpuGFLOPS extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

File: cu_gflops.py Project: fgreve/python-cuda

def main(device, loops=1):

    gpuGFLOPS = device.functions["gpuGFLOPS"]

    cuFuncSetBlockShape(gpuGFLOPS, BLOCK_SIZE_G, 1, 1)

    t0 = time()
    for i in range(loops):
        cuCtxSynchronize()
        cuLaunchGrid(gpuGFLOPS, GRID_SIZE_G, 1)
        cuCtxSynchronize()
    t0 = time() - t0

    flopsc = 4096. * ITERATIONS_C * BLOCK_SIZE_C
    flopsg = 4096. * ITERATIONS_G * BLOCK_SIZE_G * GRID_SIZE_G

    flopsc *= 1.e-9 * float(loops)
    flopsg *= 1.e-9 * float(loops)

    t1 = time()
    for i in range(loops):
        cpuGFLOPS()
    t1 = time() - t1
    #    peakg = 4.*8.*2.*1.458 # 4MP*8SP/MP*2flops/SP/clock*clock[GHz] (8600GTS)
    peakg = 14. * 8. * 2. * 1.512  # 14MP*8SP/MP*2flops/SP/clock*clock[GHz] (9800GT)
    print "%8.3f%8.2f%8.3f%8.2f [%.2f]" % (t1, flopsc / t1, t0, flopsg / t0,
                                           peakg)

Example #2

Show file

File: cuda_gflops.py Project: jamak/epimorphism

def main(loops = 1):

    blockDim  = dim3(BLOCK_SIZE,1,1)
    gridDim   = dim3(GRID_SIZE,1,1)

    t0 = time()
    cudaThreadSynchronize()
    for i in range(loops):
        cudaConfigureCall(gridDim,blockDim,0,0)
        gpuGFLOPS()
    cudaThreadSynchronize()
    t0 = time()-t0
    cudaThreadExit()

    flopsc = 4096.*ITERATIONS*BLOCK_SIZE
    flopsg = flopsc*GRID_SIZE
    flopsc *= 1.e-9*float(loops)
    flopsg *= 1.e-9*float(loops)

    t1 = time()
    for i in range(loops):
        cpuGFLOPS()
    t1 = time()-t1
    peakg = 4.*8.*2.*1.458 # 2MP*8SP/MP*2flops/SP/clock*clock[GHz]
    print "%8.3f%8.2f%8.3f%8.2f [%.2f]" % (
        t1,flopsc/t1,t0,flopsg/t0,peakg)

Example #3

Show file

File: cu_gflops.py Project: npinto/python-cuda

def main(device,loops = 1):

    gpuGFLOPS = device.functions["gpuGFLOPS"]

    cuFuncSetBlockShape(gpuGFLOPS,BLOCK_SIZE_G,1,1)

    t0 = time()
    for i in range(loops):
        cuCtxSynchronize()
        cuLaunchGrid(gpuGFLOPS,GRID_SIZE_G,1)
        cuCtxSynchronize()
    t0 = time()-t0

    flopsc = 4096.*ITERATIONS_C*BLOCK_SIZE_C
    flopsg = 4096.*ITERATIONS_G*BLOCK_SIZE_G*GRID_SIZE_G

    flopsc *= 1.e-9*float(loops)
    flopsg *= 1.e-9*float(loops)

    t1 = time()
    for i in range(loops):
        cpuGFLOPS()
    t1 = time()-t1
#    peakg = 4.*8.*2.*1.458 # 4MP*8SP/MP*2flops/SP/clock*clock[GHz] (8600GTS)
    peakg = 14.*8.*2.*1.512 # 14MP*8SP/MP*2flops/SP/clock*clock[GHz] (9800GT)
    print "%8.3f%8.2f%8.3f%8.2f [%.2f]" % (
        t1,flopsc/t1,t0,flopsg/t0,peakg)

Example #4

Show file

File: cuda_gflops.py Project: npinto/python-cuda

def main(loops = 1):

    blockDim  = dim3(BLOCK_SIZE_G,1,1)
    gridDim   = dim3(GRID_SIZE_G,1,1)

    t0 = time()
    cudaThreadSynchronize()
    for i in range(loops):
        cudaConfigureCall(gridDim,blockDim,0,0)
        gpuGFLOPS()
    cudaThreadSynchronize()
    t0 = time()-t0
    cudaThreadExit()

    flopsc = 4096.*ITERATIONS_C*BLOCK_SIZE_C
    flopsg = 4096.*ITERATIONS_G*BLOCK_SIZE_G*GRID_SIZE_G
    flopsc *= 1.e-9*float(loops)
    flopsg *= 1.e-9*float(loops)

    t1 = time()
    for i in range(loops):
        cpuGFLOPS()
    t1 = time()-t1
#    peakg = 4.*8.*2.*1.458 # 4MP*8SP/MP*2flops/SP/clock*clock[GHz] (8600GTS)
    peakg = 14.*8.*2.*1.512 # 14MP*8SP/MP*2flops/SP/clock*clock[GHz] (9800GT)
    print "%8.3f%8.2f%8.3f%8.2f [%.2f]" % (t1,flopsc/t1,t0,flopsg/t0,peakg)
    print "%8.3f%8.2f" % (flopsc/t1*2.8,flopsg/t0*1.512/112)

Example #5

Show file

File: cuda_gflops.py Project: fgreve/python-cuda

def main(loops=1):

    blockDim = dim3(BLOCK_SIZE_G, 1, 1)
    gridDim = dim3(GRID_SIZE_G, 1, 1)

    t0 = time()
    cudaThreadSynchronize()
    for i in range(loops):
        cudaConfigureCall(gridDim, blockDim, 0, 0)
        gpuGFLOPS()
    cudaThreadSynchronize()
    t0 = time() - t0
    cudaThreadExit()

    flopsc = 4096. * ITERATIONS_C * BLOCK_SIZE_C
    flopsg = 4096. * ITERATIONS_G * BLOCK_SIZE_G * GRID_SIZE_G
    flopsc *= 1.e-9 * float(loops)
    flopsg *= 1.e-9 * float(loops)

    t1 = time()
    for i in range(loops):
        cpuGFLOPS()
    t1 = time() - t1
    #    peakg = 4.*8.*2.*1.458 # 4MP*8SP/MP*2flops/SP/clock*clock[GHz] (8600GTS)
    peakg = 14. * 8. * 2. * 1.512  # 14MP*8SP/MP*2flops/SP/clock*clock[GHz] (9800GT)
    print "%8.3f%8.2f%8.3f%8.2f [%.2f]" % (t1, flopsc / t1, t0, flopsg / t0,
                                           peakg)
    print "%8.3f%8.2f" % (flopsc / t1 * 2.8, flopsg / t0 * 1.512 / 112)