Example #1
0
def main(device, loops=1):

    gpuGFLOPS = device.functions["gpuGFLOPS"]

    cuFuncSetBlockShape(gpuGFLOPS, BLOCK_SIZE_G, 1, 1)

    t0 = time()
    for i in range(loops):
        cuCtxSynchronize()
        cuLaunchGrid(gpuGFLOPS, GRID_SIZE_G, 1)
        cuCtxSynchronize()
    t0 = time() - t0

    flopsc = 4096. * ITERATIONS_C * BLOCK_SIZE_C
    flopsg = 4096. * ITERATIONS_G * BLOCK_SIZE_G * GRID_SIZE_G

    flopsc *= 1.e-9 * float(loops)
    flopsg *= 1.e-9 * float(loops)

    t1 = time()
    for i in range(loops):
        cpuGFLOPS()
    t1 = time() - t1
    #    peakg = 4.*8.*2.*1.458 # 4MP*8SP/MP*2flops/SP/clock*clock[GHz] (8600GTS)
    peakg = 14. * 8. * 2. * 1.512  # 14MP*8SP/MP*2flops/SP/clock*clock[GHz] (9800GT)
    print "%8.3f%8.2f%8.3f%8.2f [%.2f]" % (t1, flopsc / t1, t0, flopsg / t0,
                                           peakg)
Example #2
0
def main(loops = 1):

    blockDim  = dim3(BLOCK_SIZE,1,1)
    gridDim   = dim3(GRID_SIZE,1,1)

    t0 = time()
    cudaThreadSynchronize()
    for i in range(loops):
        cudaConfigureCall(gridDim,blockDim,0,0)
        gpuGFLOPS()
    cudaThreadSynchronize()
    t0 = time()-t0
    cudaThreadExit()

    flopsc = 4096.*ITERATIONS*BLOCK_SIZE
    flopsg = flopsc*GRID_SIZE
    flopsc *= 1.e-9*float(loops)
    flopsg *= 1.e-9*float(loops)

    t1 = time()
    for i in range(loops):
        cpuGFLOPS()
    t1 = time()-t1
    peakg = 4.*8.*2.*1.458 # 2MP*8SP/MP*2flops/SP/clock*clock[GHz]
    print "%8.3f%8.2f%8.3f%8.2f [%.2f]" % (
        t1,flopsc/t1,t0,flopsg/t0,peakg)
Example #3
0
def main(device,loops = 1):

    gpuGFLOPS = device.functions["gpuGFLOPS"]

    cuFuncSetBlockShape(gpuGFLOPS,BLOCK_SIZE_G,1,1)

    t0 = time()
    for i in range(loops):
        cuCtxSynchronize()
        cuLaunchGrid(gpuGFLOPS,GRID_SIZE_G,1)
        cuCtxSynchronize()
    t0 = time()-t0

    flopsc = 4096.*ITERATIONS_C*BLOCK_SIZE_C
    flopsg = 4096.*ITERATIONS_G*BLOCK_SIZE_G*GRID_SIZE_G

    flopsc *= 1.e-9*float(loops)
    flopsg *= 1.e-9*float(loops)

    t1 = time()
    for i in range(loops):
        cpuGFLOPS()
    t1 = time()-t1
#    peakg = 4.*8.*2.*1.458 # 4MP*8SP/MP*2flops/SP/clock*clock[GHz] (8600GTS)
    peakg = 14.*8.*2.*1.512 # 14MP*8SP/MP*2flops/SP/clock*clock[GHz] (9800GT)
    print "%8.3f%8.2f%8.3f%8.2f [%.2f]" % (
        t1,flopsc/t1,t0,flopsg/t0,peakg)
Example #4
0
def main(loops = 1):

    blockDim  = dim3(BLOCK_SIZE_G,1,1)
    gridDim   = dim3(GRID_SIZE_G,1,1)

    t0 = time()
    cudaThreadSynchronize()
    for i in range(loops):
        cudaConfigureCall(gridDim,blockDim,0,0)
        gpuGFLOPS()
    cudaThreadSynchronize()
    t0 = time()-t0
    cudaThreadExit()

    flopsc = 4096.*ITERATIONS_C*BLOCK_SIZE_C
    flopsg = 4096.*ITERATIONS_G*BLOCK_SIZE_G*GRID_SIZE_G
    flopsc *= 1.e-9*float(loops)
    flopsg *= 1.e-9*float(loops)

    t1 = time()
    for i in range(loops):
        cpuGFLOPS()
    t1 = time()-t1
#    peakg = 4.*8.*2.*1.458 # 4MP*8SP/MP*2flops/SP/clock*clock[GHz] (8600GTS)
    peakg = 14.*8.*2.*1.512 # 14MP*8SP/MP*2flops/SP/clock*clock[GHz] (9800GT)
    print "%8.3f%8.2f%8.3f%8.2f [%.2f]" % (t1,flopsc/t1,t0,flopsg/t0,peakg)
    print "%8.3f%8.2f" % (flopsc/t1*2.8,flopsg/t0*1.512/112)
Example #5
0
def main(loops=1):

    blockDim = dim3(BLOCK_SIZE_G, 1, 1)
    gridDim = dim3(GRID_SIZE_G, 1, 1)

    t0 = time()
    cudaThreadSynchronize()
    for i in range(loops):
        cudaConfigureCall(gridDim, blockDim, 0, 0)
        gpuGFLOPS()
    cudaThreadSynchronize()
    t0 = time() - t0
    cudaThreadExit()

    flopsc = 4096. * ITERATIONS_C * BLOCK_SIZE_C
    flopsg = 4096. * ITERATIONS_G * BLOCK_SIZE_G * GRID_SIZE_G
    flopsc *= 1.e-9 * float(loops)
    flopsg *= 1.e-9 * float(loops)

    t1 = time()
    for i in range(loops):
        cpuGFLOPS()
    t1 = time() - t1
    #    peakg = 4.*8.*2.*1.458 # 4MP*8SP/MP*2flops/SP/clock*clock[GHz] (8600GTS)
    peakg = 14. * 8. * 2. * 1.512  # 14MP*8SP/MP*2flops/SP/clock*clock[GHz] (9800GT)
    print "%8.3f%8.2f%8.3f%8.2f [%.2f]" % (t1, flopsc / t1, t0, flopsg / t0,
                                           peakg)
    print "%8.3f%8.2f" % (flopsc / t1 * 2.8, flopsg / t0 * 1.512 / 112)