Ejemplo n.º 1
0
def test_dot():
    A1 = np.float32(np.random.rand(2,4))
    A2 = np.float32(np.random.rand(4,2))
    B1 = gpu.array(A1)
    B2 = gpu.array(A2)
    B3 = gpu.dot(B1,B2)
    C = B3.tocpu()


    t.assert_array_almost_equal(np.dot(A1,A2),C,4,"array.tocpu not equal to init array!")
    B1 = gpu.array(A1)
    B2 = gpu.array(A2)
    B3 = gpu.empty((2,2))

    gpu.dot(B1,B2,B3)

    t.assert_array_almost_equal(np.dot(A1,A2),B3.tocpu(),4,"array.tocpu not equal to init array!")
Ejemplo n.º 2
0
def test_dot():
    A1 = np.float32(np.random.rand(2, 4))
    A2 = np.float32(np.random.rand(4, 2))
    B1 = gpu.array(A1)
    B2 = gpu.array(A2)
    B3 = gpu.dot(B1, B2)
    C = B3.tocpu()

    t.assert_array_almost_equal(np.dot(A1, A2), C, 4,
                                "array.tocpu not equal to init array!")
    B1 = gpu.array(A1)
    B2 = gpu.array(A2)
    B3 = gpu.empty((2, 2))

    gpu.dot(B1, B2, B3)

    t.assert_array_almost_equal(np.dot(A1, A2), B3.tocpu(), 4,
                                "array.tocpu not equal to init array!")
Ejemplo n.º 3
0
def test_timer():
    if gpu.lib.pt_clusterNet == gpu.lib.pt_clusterNetCPU: return
    t = gpu.Timer()
    A = gpu.rand(100, 100)
    B = gpu.rand(100, 100)
    C = gpu.rand(100, 100)
    time = 0

    t.tick()
    for i in range(10):
        gpu.dot(A, B, C)
    time = t.tock()
    assert time > 0

    time = 0
    t.tick("Timer test")
    gpu.dot(A, B, C)
    time = t.tock("Timer test")
    assert time > 0

    accumulative_time = 0
    for i in range(100):
        t.tick('cumulative')
        gpu.dot(A, B, C)
        t.tick('cumulative')
    accumulative_time = t.tock('cumulative')

    assert accumulative_time > 5 * time
Ejemplo n.º 4
0
def test_timer():
    if gpu.lib.pt_clusterNet == gpu.lib.pt_clusterNetCPU: return    
    t = gpu.Timer()
    A = gpu.rand(100,100)
    B = gpu.rand(100,100)
    C = gpu.rand(100,100)
    time = 0

    t.tick()
    for i in range(10):
        gpu.dot(A,B,C)
    time = t.tock()
    assert time > 0

    time = 0
    t.tick("Timer test")
    gpu.dot(A,B,C)
    time = t.tock("Timer test")
    assert time > 0

    accumulative_time = 0
    for i in range(100):
        t.tick('cumulative')
        gpu.dot(A,B,C)
        t.tick('cumulative')
    accumulative_time = t.tock('cumulative')

    assert accumulative_time > 5*time
Ejemplo n.º 5
0
    input = gpu.rand(dim_inner,dim1)
    W = gpu.rand(dim_outer,dim_inner)
    output = gpu.rand(dim_outer,dim1)
    
    input2 = gpu2.random.rand(dim_inner,dim1)
    W2 = gpu2.random.rand(dim_outer,dim_inner)
    output2 = gpu2.random.rand(dim_outer,dim1)


mean_time = 0
for i in range(5):
    iters = 100
    #warmup
    for j in range(1000):
        if batch_first_mode:
            gpu.dot(input,W,output)
        else:
            gpu.dot(W, input, output)
    t.tick(str(dim_inner))
    for j in range(iters):
        if batch_first_mode:
            gpu.dot(input,W,output)
        else:
            gpu.dot(W, input, output)
    t.tick(str(dim_inner))
    
print t.tock(str(dim_inner))/5/iters



mean_time = 0
dim_inner = 32
dim_outer = 256
for i in range(1000):
    dim_inner += 32
    A = gpu.rand(dim1, dim_inner)
    B = gpu.rand(dim_inner, dim_outer)
    C = gpu.rand(dim1, dim_outer)

    if dim_inner > 0: iters = 1000
    if dim_inner > 100: iters = 100
    if dim_inner > 1000: iters = 10
    if dim_inner > 3000: iters = 4

    #warmup
    for j in range(2):
        gpu.dot(A, B, C)
    t.tick(str(dim_inner))
    for j in range(iters):
        gpu.dot(A, B, C)
    sec = t.tock(str(dim_inner)) / 1000.
    tilesA = (dim1 / 16) * ((dim_inner / 64) +
                            (1 if dim_inner % 64 > 0 else 0))
    tilesB = ((dim_inner / 64) + (1 if dim_inner % 64 > 0 else 0)) * (
        (dim_inner / 16) * ((dim_outer / 64) +
                            (1 if dim_outer % 64 > 0 else 0)))
    memops = (tilesA + tilesB) * 16 * 64 + (dim_inner * dim_outer)

    #print sec / (memops*iters)
    #print (memops/sec)*4*(1024**-3)*iters
    #print iters*(dim**3)/(sec*1000*1000*1000)
    #print iters*(dim1*dim_inner*dim_outer)/(sec*1000*1000*1000)
Ejemplo n.º 7
0
else:
    input = gpu.rand(dim_inner, dim1)
    W = gpu.rand(dim_outer, dim_inner)
    output = gpu.rand(dim_outer, dim1)

    input2 = gpu2.random.rand(dim_inner, dim1)
    W2 = gpu2.random.rand(dim_outer, dim_inner)
    output2 = gpu2.random.rand(dim_outer, dim1)

mean_time = 0
for i in range(5):
    iters = 100
    #warmup
    for j in range(1000):
        if batch_first_mode:
            gpu.dot(input, W, output)
        else:
            gpu.dot(W, input, output)
    t.tick(str(dim_inner))
    for j in range(iters):
        if batch_first_mode:
            gpu.dot(input, W, output)
        else:
            gpu.dot(W, input, output)
    t.tick(str(dim_inner))

print t.tock(str(dim_inner)) / 5 / iters

mean_time = 0
for i in range(5):
    iters = 100
dim_inner = 32
dim_outer = 256
for i in range(1000):
    dim_inner += 32
    A = gpu.rand(dim1,dim_inner)
    B = gpu.rand(dim_inner,dim_outer)
    C = gpu.rand(dim1,dim_outer)
    
    if dim_inner > 0: iters = 1000
    if dim_inner > 100: iters = 100
    if dim_inner > 1000: iters = 10
    if dim_inner > 3000: iters = 4
    
    #warmup
    for j in range(2):
        gpu.dot(A,B,C)
    t.tick(str(dim_inner))
    for j in range(iters):
        gpu.dot(A,B,C)
    sec = t.tock(str(dim_inner))/1000.
    tilesA = (dim1/16)*((dim_inner/64) + (1 if dim_inner % 64 > 0 else 0))
    tilesB = ((dim_inner/64) + (1 if dim_inner % 64 > 0 else 0))*((dim_inner/16)*((dim_outer/64) + (1 if dim_outer % 64 > 0 else 0)))
    memops = (tilesA+tilesB)*16*64 + (dim_inner*dim_outer)
    
    #print sec / (memops*iters)    
    #print (memops/sec)*4*(1024**-3)*iters
    #print iters*(dim**3)/(sec*1000*1000*1000)
    #print iters*(dim1*dim_inner*dim_outer)/(sec*1000*1000*1000)
    print iters*dim1*dim_inner*dim_outer/(6144.*1000*1000*1000)*24, sec
    
    A2 = gpu.rand(dim1,dim_inner)