Beispiel #1
0
def main(device, vlength=128, loops=1):

    alfa = c_float(.5)
    n2 = vlength  ## Vector length
    gpuSAXPY = device.functions["gpuSAXPY"]

    h_X = (c_float * n2)()
    h_Y = (c_float * n2)()
    g_Y = (c_float * n2)()

    fixedInit(h_X)

    d_X = getMemory(h_X)
    d_Y = getMemory(h_Y)

    cuFuncSetBlockShape(gpuSAXPY, BLOCK_SIZE, 1, 1)
    cuParamSetf(gpuSAXPY, 0, alfa)
    cuParamSeti(gpuSAXPY, 4, d_X)
    cuParamSeti(gpuSAXPY, 8, d_Y)
    cuParamSeti(gpuSAXPY, 12, n2)
    cuParamSetSize(gpuSAXPY, 16)

    cuCtxSynchronize()
    t0 = time()
    for i in range(loops):
        cuLaunchGrid(gpuSAXPY, GRID_SIZE, 1)
    cuCtxSynchronize()
    t0 = time() - t0

    flops = (2.e-9 * n2) * float(loops)
    cuMemcpyDtoH(g_Y, d_Y, n2 * S4)
    cuCtxSynchronize()

    cuMemFree(d_X)
    cuMemFree(d_Y)

    t1 = time()
    for i in range(loops):
        cpuSAXPY(alfa, h_X, h_Y)
    t1 = time() - t1
    print "%10d%6.2f%6.2f" % (vlength, flops / t1, flops / t0)

    if checkErrorFlag:
        err, mxe = checkError(h_Y, g_Y)
        print "Avg and max rel error = %.2e %.2e" % (err, mxe)
Beispiel #2
0
def main(device, vlength=128, loops=1):

    alfa = c_float(0.5)
    n2 = vlength  ## Vector length
    gpuSAXPY = device.functions["gpuSAXPY"]

    h_X = (c_float * n2)()
    h_Y = (c_float * n2)()
    g_Y = (c_float * n2)()

    fixedInit(h_X)

    d_X = getMemory(h_X)
    d_Y = getMemory(h_Y)

    cuFuncSetBlockShape(gpuSAXPY, BLOCK_SIZE, 1, 1)
    cuParamSetf(gpuSAXPY, 0, alfa)
    cuParamSeti(gpuSAXPY, 4, d_X)
    cuParamSeti(gpuSAXPY, 8, d_Y)
    cuParamSeti(gpuSAXPY, 12, n2)
    cuParamSetSize(gpuSAXPY, 16)

    cuCtxSynchronize()
    t0 = time()
    for i in range(loops):
        cuLaunchGrid(gpuSAXPY, GRID_SIZE, 1)
    cuCtxSynchronize()
    t0 = time() - t0

    flops = (2.0e-9 * n2) * float(loops)
    cuMemcpyDtoH(g_Y, d_Y, n2 * S4)
    cuCtxSynchronize()

    cuMemFree(d_X)
    cuMemFree(d_Y)

    t1 = time()
    for i in range(loops):
        cpuSAXPY(alfa, h_X, h_Y)
    t1 = time() - t1
    print "%10d%6.2f%6.2f" % (vlength, flops / t1, flops / t0)

    if checkErrorFlag:
        err, mxe = checkError(h_Y, g_Y)
        print "Avg and max rel error = %.2e %.2e" % (err, mxe)
Beispiel #3
0
def main(vlength = 128,loops = 1):

    alfa = c_float(.5)
    n2 = vlength ## Vector length

    h_X = (c_float*n2)()
    h_Y = (c_float*n2)()
    g_Y = (c_float*n2)()

    fixedInit(h_X)

    d_X = getMemory(h_X)
    d_Y = getMemory(h_Y)

    blockDim  = dim3(BLOCK_SIZE,1,1)
    gridDim   = dim3(GRID_SIZE,1,1)

    t0 = time()
    cudaThreadSynchronize()
    for i in range(loops):
        cudaConfigureCall(gridDim,blockDim,0,0)
        gpuSAXPY(alfa,d_X,d_Y,n2)
    cudaThreadSynchronize()
    t0 = time()-t0

    flops = (2.e-9*n2)*float(loops)
    g_Y = (c_float*n2)()
    cudaMemcpy(g_Y,d_Y,S4*n2,cudaMemcpyDeviceToHost)
    cudaThreadSynchronize()

    cudaFree(d_X)
    cudaFree(d_Y)

    cudaThreadExit()
    t1 = time()
    for i in range(loops):
        cpuSAXPY(alfa,h_X,h_Y)
    t1 = time()-t1
    print "%10d%6.2f%6.2f" % (vlength,flops/t1,flops/t0)

    if checkErrorFlag:
        err,mxe = checkError(h_Y,g_Y)
        print "Avg and max rel error = %.2e %.2e" % (err,mxe)
Beispiel #4
0
def main(vlength = 128,loops = 1):

    alfa = c_float(.5)
    n2 = vlength ## Vector length

    h_X = (c_float*n2)()
    h_Y = (c_float*n2)()
    g_Y = (c_float*n2)()

    fixedInit(h_X)

    d_X = getMemory(h_X)
    d_Y = getMemory(h_Y)

    blockDim  = dim3(BLOCK_SIZE,1,1)
    gridDim   = dim3(GRID_SIZE,1,1)

    t0 = time()
    cudaThreadSynchronize()
    for i in range(loops):
        cudaConfigureCall(gridDim,blockDim,0,0)
        gpuSAXPY(alfa,d_X,d_Y,n2)
    cudaThreadSynchronize()
    t0 = time()-t0

    flops = (2.e-9*n2)*float(loops)
    g_Y = (c_float*n2)()
    cudaMemcpy(g_Y,d_Y,S4*n2,cudaMemcpyDeviceToHost)
    cudaThreadSynchronize()

    cudaFree(d_X)
    cudaFree(d_Y)

    cudaThreadExit()
    t1 = time()
    for i in range(loops):
        cpuSAXPY(alfa,h_X,h_Y)
    t1 = time()-t1
    print "%10d%6.2f%6.2f" % (vlength,flops/t1,flops/t0)

    if checkErrorFlag:
        err,mxe = checkError(h_Y,g_Y)
        print "Avg and max rel error = %.2e %.2e" % (err,mxe)