Example #1
0
def main(vlength = 128,loops = 1):

    n2 = vlength ## Vector length

    h_S = (c_float*n2)()
    h_X = (c_float*n2)()
    h_T = (c_float*n2)()
    h_C = (c_float*n2)()
    h_P = (c_float*n2)()

    
    randInit(h_S,5.,30.)
    randInit(h_X,1.,100.)
    randInit(h_T,.25,10.)
    R,V = .03,.3

    d_S = getMemory(h_S)
    d_X = getMemory(h_X)
    d_T = getMemory(h_T)
    d_C = getMemory(h_C)
    d_P = getMemory(h_P)

    blockDim  = dim3(BLOCK_SIZE,1,1)
    gridDim   = dim3(GRID_SIZE,1,1)

    cudaThreadSynchronize()
    t0 = time()
    for i in range(loops):
        cudaConfigureCall(gridDim,blockDim,0,0)
        gpuBLSC(d_C,d_P,d_S,d_X,d_T,R,V,n2)
    cudaThreadSynchronize()
    t0 = time()-t0

    flops = (2.e-6*n2)*float(loops)
    g_C = (c_float*n2)()
    g_P = (c_float*n2)()
    cudaMemcpy(g_C,d_C,S4*n2,cudaMemcpyDeviceToHost)
    cudaMemcpy(g_P,d_P,S4*n2,cudaMemcpyDeviceToHost)
    cudaThreadSynchronize()

    cudaFree(d_S)
    cudaFree(d_X)
    cudaFree(d_T)
    cudaFree(d_C)
    cudaFree(d_P)

    cudaThreadExit()
    t1 = time()
    for i in range(loops):
        cpuBLSC(h_C,h_P,h_S,h_X,h_T,R,V,n2)
    t1 = time()-t1
    print "%10d%10.2f%10.2f" % (vlength,flops/t1,flops/t0)

    if checkErrorFlag:
        err,mxe = checkError(h_C,g_C)
        print "Avg rel error (call) = %.2e" % (err,)
        err,mxe = checkError(h_P,g_P)
        print "Avg rel error (put)  = %.2e" % (err,)
Example #2
0
def main(vlength = 128,loops = 1):

    n2 = vlength ## Vector length

    h_S = (c_float*n2)()
    h_X = (c_float*n2)()
    h_T = (c_float*n2)()
    h_C = (c_float*n2)()
    h_P = (c_float*n2)()


    randInit(h_S,5.,30.)
    randInit(h_X,1.,100.)
    randInit(h_T,.25,10.)
    R,V = .03,.3

    d_S = getMemory(h_S)
    d_X = getMemory(h_X)
    d_T = getMemory(h_T)
    d_C = getMemory(h_C)
    d_P = getMemory(h_P)

    blockDim  = dim3(BLOCK_SIZE,1,1)
    gridDim   = dim3(GRID_SIZE,1,1)

    cudaThreadSynchronize()
    t0 = time()
    for i in range(loops):
        cudaConfigureCall(gridDim,blockDim,0,0)
        gpuBLSC(d_C,d_P,d_S,d_X,d_T,R,V,n2)
    cudaThreadSynchronize()
    t0 = time()-t0

    flops = (2.e-6*n2)*float(loops)
    g_C = (c_float*n2)()
    g_P = (c_float*n2)()
    cudaMemcpy(g_C,d_C,S4*n2,cudaMemcpyDeviceToHost)
    cudaMemcpy(g_P,d_P,S4*n2,cudaMemcpyDeviceToHost)
    cudaThreadSynchronize()

    cudaFree(d_S)
    cudaFree(d_X)
    cudaFree(d_T)
    cudaFree(d_C)
    cudaFree(d_P)

    cudaThreadExit()
    t1 = time()
    for i in range(loops):
        cpuBLSC(h_C,h_P,h_S,h_X,h_T,R,V,n2)
    t1 = time()-t1
    print "%10d%10.2f%10.2f" % (vlength,flops/t1,flops/t0)

    if checkErrorFlag:
        err,mxe = checkError(h_C,g_C)
        print "Avg rel error (call) = %.2e" % (err,)
        err,mxe = checkError(h_P,g_P)
        print "Avg rel error (put)  = %.2e" % (err,)
Example #3
0
def main(device,vlength = 128,loops = 1):

    n2 = vlength ## Vector length

    gpuBLSC = device.functions["gpuBLSC"]

    h_S = (c_float*n2)()
    h_X = (c_float*n2)()
    h_T = (c_float*n2)()
    h_C = (c_float*n2)()
    h_P = (c_float*n2)()


    randInit(h_S,5.,30.)
    randInit(h_X,1.,100.)
    randInit(h_T,.25,10.)
    R,V = .03,.3

    d_S = getMemory(h_S)
    d_X = getMemory(h_X)
    d_T = getMemory(h_T)
    d_C = getMemory(h_C)
    d_P = getMemory(h_P)

    cuFuncSetBlockShape(gpuBLSC,BLOCK_SIZE,1,1)
    cuParamSeti(gpuBLSC, 0,d_C)
    cuParamSeti(gpuBLSC, 4,d_P)
    cuParamSeti(gpuBLSC, 8,d_S)
    cuParamSeti(gpuBLSC,12,d_X)
    cuParamSeti(gpuBLSC,16,d_T)
    cuParamSetf(gpuBLSC,20,R)
    cuParamSetf(gpuBLSC,24,V)
    cuParamSeti(gpuBLSC,28,n2)
    cuParamSetSize(gpuBLSC,32)

    cuCtxSynchronize()
    t0 = time()
    for i in range(loops):
        cuLaunchGrid(gpuBLSC,GRID_SIZE,1)
    cuCtxSynchronize()
    t0 = time()-t0

    flops = (2.e-6*n2)*float(loops)
    g_C = (c_float*n2)()
    g_P = (c_float*n2)()
    cuMemcpyDtoH(g_C,d_C,n2*S4)
    cuMemcpyDtoH(g_P,d_P,n2*S4)
    cuCtxSynchronize()

    cuMemFree(d_S)
    cuMemFree(d_X)
    cuMemFree(d_T)
    cuMemFree(d_C)
    cuMemFree(d_P)

    t1 = time()
    for i in range(loops):
        cpuBLSC(h_C,h_P,h_S,h_X,h_T,R,V,n2)
    t1 = time()-t1
    print "%10d%10.2f%10.2f" % (vlength,flops/t1,flops/t0)

    if checkErrorFlag:
        err,mxe = checkError(h_C,g_C)
        print "Avg rel error (call) = %.2e" % (err,)
        err,mxe = checkError(h_P,g_P)
        print "Avg rel error (put)  = %.2e" % (err,)