Esempio n. 1
0
def check_fence_and_lock(gatype):
    if 0 == me:
        print '> Checking ga.fence and ga.lock',
    g_a = create_global_array(gatype)
    ga.zero(g_a)
    if not ga.create_mutexes(1):
        ga.error('ga.create_mutexes failed')
    if n < 2:
        ga.error('insufficient n to test ga.fence', n)
    ga.lock(0)
    a = ga.get(g_a) # get original values
    a[:,0] += 1 # add my contribution
    # need to use fence to assure that coms complete before leaving
    # critical section
    ga.init_fence()
    ga.put(g_a, a)
    ga.fence()
    ga.unlock(0)
    if not ga.destroy_mutexes():
        ga.error('mutex not destroyed')
    ga.sync()
    if 0 == me:
        a = ga.get(g_a)
        if not np.all(a[:,0] == nproc):
            ga.error('fence failed')
    if 0 == me:
        print 'OK'
Esempio n. 2
0
def check_zero(gatype):
    if 0 == me:
        print '> Checking zero ...',
    g_a = create_global_array(gatype)
    ga.zero(g_a)
    a = ga.get(g_a)
    if not np.all(a == 0):
        ga.error('ga.zero failed')
    if 0 == me:
        print 'OK'
    ga.destroy(g_a)
Esempio n. 3
0
def test2D():
    n = 1024
    buf = np.zeros((n,n), dtype=np.float64)
    chunk = np.asarray([1,3,4,9,16,24,30,48,64,91,128,171,256,353,440,512])
    g_a = ga.create(ga.C_DBL, (n,n), 'a')
    if 0 == g_a:
        ga.error('ga.create failed')
    buf[:] = 0.01
    ga.zero(g_a)
    if 0 == me:
        print (' Performance of GA get, put & acc'
                ' for square sections of array[%d,%d]' % (n,n))
    lo,hi = ga.distribution(g_a, me)
    # local ops
    TestPutGetAcc(g_a, n, chunk, buf, lo, hi, True)
    # remote ops
    TestPutGetAcc(g_a, n, chunk, buf, lo, hi, False)
Esempio n. 4
0
def test2D():
    n = 1024
    buf = np.zeros((n, n), dtype=np.float64)
    chunk = np.asarray(
        [1, 3, 4, 9, 16, 24, 30, 48, 64, 91, 128, 171, 256, 353, 440, 512])
    g_a = ga.create(ga.C_DBL, (n, n), 'a')
    if 0 == g_a:
        ga.error('ga.create failed')
    buf[:] = 0.01
    ga.zero(g_a)
    if 0 == me:
        print(
            ' Performance of GA get, put & acc'
            ' for square sections of array[%d,%d]' % (n, n))
    lo, hi = ga.distribution(g_a, me)
    # local ops
    TestPutGetAcc(g_a, n, chunk, buf, lo, hi, True)
    # remote ops
    TestPutGetAcc(g_a, n, chunk, buf, lo, hi, False)
Esempio n. 5
0
def test1D():
    n = 1024*1024
    buf = np.zeros(n/4, dtype=np.float64)
    chunk = np.asarray([1,9,16,81,256,576,900,2304,4096,8281,
        16384,29241,65536,124609,193600,262144])
    g_a = ga.create(ga.C_DBL, (n,), 'a')
    if 0 == g_a:
        ga.error('ga.create failed')
    buf[:] = 0.01
    ga.zero(g_a)
    if 0 == me:
        print ''
        print ''
        print ''
        print (' Performance of GA get, put & acc'
                ' for 1-dimensional sections of array[%d]' % n)
    lo,hi = ga.distribution(g_a, me)
    # local ops
    TestPutGetAcc1(g_a, n, chunk, buf, lo, hi, True)
    # remote ops
    TestPutGetAcc1(g_a, n, chunk, buf, lo, hi, False)
Esempio n. 6
0
def check_accumulate_overlap(gatype):
    if 0 == me:
        print '> Checking overlapping accumulate ...',
    g_a = create_global_array(gatype)
    ga.zero(g_a)
    ga.acc(g_a, [1], (n/2,n/2), (n/2+1,n/2+1), 1)
    ga.sync()
    if MIRROR:
        if 0 == iproc:
            x = abs(ga.get(g_a, (n/2,n/2), (n/2+1,n/2+1))[0,0] - lprocs)
            if not 0 == x:
                ga.error('overlapping accumulate failed -- expected %s got %s'%(
                        x, lprocs))
    else:
        if 0 == me:
            x = abs(ga.get(g_a, (n/2,n/2), (n/2+1,n/2+1))[0,0] - nproc)
            if not 0 == x:
                ga.error('overlapping accumulate failed -- expected %s got %s'%(
                        x, nproc))
    if 0 == me:
        print 'OK'
    ga.destroy(g_a)
Esempio n. 7
0
def test1D():
    n = 1024 * 1024
    buf = np.zeros(n / 4, dtype=np.float64)
    chunk = np.asarray([
        1, 9, 16, 81, 256, 576, 900, 2304, 4096, 8281, 16384, 29241, 65536,
        124609, 193600, 262144
    ])
    g_a = ga.create(ga.C_DBL, (n, ), 'a')
    if 0 == g_a:
        ga.error('ga.create failed')
    buf[:] = 0.01
    ga.zero(g_a)
    if 0 == me:
        print ''
        print ''
        print ''
        print(
            ' Performance of GA get, put & acc'
            ' for 1-dimensional sections of array[%d]' % n)
    lo, hi = ga.distribution(g_a, me)
    # local ops
    TestPutGetAcc1(g_a, n, chunk, buf, lo, hi, True)
    # remote ops
    TestPutGetAcc1(g_a, n, chunk, buf, lo, hi, False)
Esempio n. 8
0
nprocs = ga.nnodes()
myrank = ga.nodeid()

g_pi = ga.create(ga.C_DBL, [1])

one_time = False
if len(sys.argv) == 2:
    n = int(sys.argv[1])
    one_time = True

while True:
    if not one_time:
        if myrank == 0:
            n = get_n()
            n = ga.brdcst(n)
        else:
            n = ga.brdcst(0)
        if n == 0:
            break
    ga.zero(g_pi)
    mypi = comp_pi(n, myrank, nprocs)
    ga.acc(g_pi, mypi)
    ga.sync()
    if myrank == 0:
        pi = ga.get(g_pi)[0]
        prn_pi(pi, PI)
    if one_time:
        break

ga.destroy(g_pi)
Esempio n. 9
0
    v = np.dot(a, b)
    val = int(np.abs(np.sum(c - v)) > 0.0001)
    val = ga.gop_add(val)
    return val == 0


if __name__ == '__main__':
    if nproc > MULTIPLIER**3:
        if 0 == me:
            print "You must use less than %s processors" % (MULTIPLIER**3 + 1)
    else:
        g_a = ga.create(ga.C_DBL, [N, N])
        g_b = ga.create(ga.C_DBL, [N, N])
        g_c = ga.create(ga.C_DBL, [N, N])
        g_counter = ga.create(ga.C_INT, [1])
        ga.zero(g_counter)
        # put some fake data into input arrays A and B
        if me == 0:
            ga.put(g_a, np.random.random(N * N))
            ga.put(g_b, np.random.random(N * N))
        ga.sync()
        if me == 0:
            print "srumma...",
        srumma(g_a, g_b, g_c, CHUNK_SIZE, MULTIPLIER, g_counter)
        if me == 0:
            print "done"
        if me == 0:
            print "verifying using ga.gemm...",
        ok = verify_using_ga(g_a, g_b, g_c)
        if me == 0:
            if ok:
Esempio n. 10
0
def main():
    # TODO there's got to be a loopless, more pythonic way to do this
    ii = 0
    for i in range(num1*num1):
        ii += 1
        if ii > num1:
            ii = 0
        h0[i] = ii
    # compute times assuming 500 mflops and 5 second target time
    # ntimes = max(3.0, 5.0/(4.0-9*num**3))
    ntimes = 5

    for ii in range(howmany):
        num_m = nums_m[ii]
        num_n = nums_n[ii]
        num_k = nums_k[ii]
        a = 0.5/(num_m*num_n)
        if num_m > nummax or num_n > nummax or num_k > nummax:
            ga.error('Insufficient memory: check nummax')
        
        if BLOCK_CYCLIC:
            block_size = [128,128]
            g_c = ga.create_handle()
            ga.set_data(g_c, (num_m,num_n), ga.C_DBL)
            ga.set_array_name(g_c, 'g_c')
            ga.set_block_cyclic(g_c, block_size)
            if not ga.allocate(g_c):
                ga.error('create failed')
            block_size = [128,128]
            g_b = ga.create_handle()
            ga.set_data(g_b, (num_k,num_n), ga.C_DBL)
            ga.set_array_name(g_b, 'g_b')
            ga.set_block_cyclic(g_b, block_size)
            if not ga.allocate(g_b):
                ga.error('create failed')
            block_size = [128,128]
            g_a = ga.create_handle()
            ga.set_data(g_a, (num_m,num_k), ga.C_DBL)
            ga.set_array_name(g_a, 'g_a')
            ga.set_block_cyclic(g_a, block_size)
            if not ga.allocate(g_a):
                ga.error('create failed')
        else:
            g_a = ga.create(ga.C_DBL, (num_m,num_k), 'g_a')
            g_b = ga.create(ga.C_DBL, (num_k,num_n), 'g_b')
            g_c = ga.create(ga.C_DBL, (num_m,num_n), 'g_c')
            for handle in [g_a,g_b,g_c]:
                if 0 == handle:
                    ga.error('create failed')

        # initialize matrices A and B
        if 0 == me:
            load_ga(g_a, h0, num_m, num_k)
            load_ga(g_b, h0, num_k, num_n)
        ga.zero(g_c)
        ga.sync()

        if 0 == me:
            print '\nMatrix Multiplication C = A[%d,%d] x B[%d,%d]\n' % (
                    num_m, num_k, num_k, num_n)
            print ' %4s  %12s  %12s  %7s  %7s'%(
                    "Run#", "Time (seconds)", "mflops/proc",
                    "A trans", "B trans")
        avg_t[:] = 0
        avg_mf[:] = 0
        for itime in range(ntimes):
            for i in range(ntrans):
                ga.sync()
                ta = transa[i]
                tb = transb[i]
                t1 = time.time()
                ga.gemm(ta,tb,num_m,num_n,num_k,1,g_a,g_b,0,g_c)
                t1 = time.time() - t1
                if 0 == me:
                    mf = 2*num_m*num_n*num_k/t1*10**-6/nproc
                    avg_t[i] += t1
                    avg_mf[i] += mf
                    print ' %4d  %12.4f  %12.1f  %7s  %7s'%(
                            itime+1, t1, mf, ta, tb)
                    if VERIFY and itime == 0:
                        verify_ga_gemm(ta, tb, num_m, num_n, num_k,
                                1.0, g_a, g_b, 0.0, g_c)
        if 0 == me:
            print ''
            for i in range(ntrans):
                print 'Average: %12.4f seconds %12.1f mflops/proc %s %s'%(
                            avg_t[i]/ntimes, avg_mf[i]/ntimes,
                            transa[i], transb[i])
            if VERIFY:
                print 'All ga.gemms are verified...O.K.'
def convergence_test_L2(g_a, g_b):
    # compute L2 norm of change
    # subtract g_b from g_a, results stored in g_b
    ga.add(g_a, g_b, g_b, beta=-1)
    # compute elementwise dot product (i.e. treats N-d arrays as vectors)
    value = ga.dot(g_b, g_b)
    if DEBUG:
        print_sync(value)
    return value < EPSILON

# create GA, distribute entire rows
g_a = ga.create_ghosts(ga.C_FLOAT, (dim,dim), (1,1), chunk=(0,dim))
# create a duplicate GA for the convergence test
g_b = ga.duplicate(g_a)

ga.zero(g_a)
(rlo,clo),(rhi,chi) = ga.distribution(g_a)

def set_boundary_conditions_put(g_a):
    # process 0 initializes global array
    # this would only set the initial conditions since we are putting an entire
    # zeros array with the outer elements changed
    if rank == 0:
        a = np.zeros((dim,dim), dtype=np.float32)
        a[0,:] = 100 #top row
        a[:,0] = 75 #left column
        a[:,a.shape[0] - 1] = 50 #right column
        ga.put(g_a, a)
    ga.sync()

def set_boundary_conditions_access(g_a):
    c = ga.get(g_c)
    v = np.dot(a,b)
    val = int(np.abs(np.sum(c-v))>0.0001)
    val = ga.gop_add(val)
    return val == 0

if __name__ == '__main__':
    if nproc > MULTIPLIER**3:
        if 0 == me:
            print "You must use less than %s processors" % (MULTIPLIER**3+1)
    else:
        g_a = ga.create(ga.C_DBL, [N,N])
        g_b = ga.create(ga.C_DBL, [N,N])
        g_c = ga.create(ga.C_DBL, [N,N])
        g_counter = ga.create(ga.C_INT, [1])
        ga.zero(g_counter)
        # put some fake data into input arrays A and B
        if me == 0:
            ga.put(g_a, np.random.random(N*N))
            ga.put(g_b, np.random.random(N*N))
        ga.sync()
        if me == 0:
            print "srumma...",
        srumma(g_a, g_b, g_c, CHUNK_SIZE, MULTIPLIER, g_counter)
        if me == 0:
            print "done"
        if me == 0:
            print "verifying using ga.gemm...",
        ok = verify_using_ga(g_a, g_b, g_c)
        if me == 0:
            if ok:
Esempio n. 13
0
nprocs = ga.nnodes()
myrank = ga.nodeid()

g_pi = ga.create(ga.C_DBL, [1])

one_time = False
if len(sys.argv) == 2:
    n = int(sys.argv[1])
    one_time = True

while True:
    if not one_time:
        if myrank == 0:
            n = get_n()
            n = ga.brdcst(n)
        else:
            n = ga.brdcst(0)
        if n == 0:
            break
    ga.zero(g_pi)
    mypi = comp_pi(n, myrank, nprocs)
    ga.acc(g_pi, mypi)
    ga.sync()
    if myrank == 0:
        pi = ga.get(g_pi)[0]
        prn_pi(pi, PI)
    if one_time:
        break

ga.destroy(g_pi)