Ejemplo n.º 1
0
def check_gather(gatype):
    if 0 == me:
        print '> Checking gather (might be slow)...',
    g_a = create_global_array(gatype)
    a = create_local_a(gatype)
    if 0 == me:
        ga.put(g_a, a)
    ga.sync()
    ijv = np.zeros((m,2), dtype=np.int64)
    random.seed(ga.nodeid()*51 + 1) # different seed for each proc
    for j in range(10):
        itmp = None
        if MIRROR:
            itmp = random.randint(0,lprocs-1)
        else:
            itmp = random.randint(0,nproc-1)
        if itmp == me:
            for loop in range(m):
                ijv[loop,:] = (random.randint(0,n-1),random.randint(0,n-1))
                #if ijv[loop,0] > ijv[loop,1]:
                #    ijv[loop,:] = ijv[loop,::-1] # reverse
            result = ga.gather(g_a, ijv)
            for loop in range(m):
                value = ga.get(g_a, ijv[loop], ijv[loop]+1).flatten()
                if not result[loop] == value:
                    ga.error('gather failed')
    if 0 == me:
        print 'OK'
    ga.destroy(g_a)
Ejemplo n.º 2
0
def verify(g_a, g_b):
    ### copy the entire block of data from the global array "g_a" into the
    ### local array "a" and similarly for "g_b" and "b".
    if not np.all(a[::-1] == b):
        print "Mismatch: a[::-1] is not equal to b"
        ga.error("verify failed")
    print "Transpose OK"
def verify(g_a, g_b):
    a = ga.get(g_a)
    b = ga.get(g_b)
    if not np.all(a[::-1] == b):
        print "Mismatch: a[::-1] is not equal to b"
        ga.error("verify failed")
    print "Transpose OK"
Ejemplo n.º 4
0
def check_dot(gatype):
    if 0 == me:
        print '> Checking dot ...',
    np.random.seed(12345) # everyone has same seed
    g_a = create_global_array(gatype)
    g_b = create_global_array(gatype)
    a = create_local_a(gatype)
    b = np.random.random_sample((n,n))
    if MIRROR:
        if 0 == iproc:
            ga.put(g_b, b)
            ga.put(g_a, a)
    else:
        if 0 == me:
            ga.put(g_b, b)
            ga.put(g_a, a)
    ga.sync()
    sum1 = np.sum(a*b)
    sum2 = ga.dot(g_a, g_b)
    if mismatch(sum1, sum2):
        ga.error('dot wrong %s != %s' % (sum1, sum2))
    if 0 == me:
        print 'OK'
    ga.destroy(g_a)
    ga.destroy(g_b)
Ejemplo n.º 5
0
def main():
    if 4 != nproc and 0 == me:
        ga.error('Program requires 4 GA processes; nproc=%s' % nproc)
    test2D()
    test1D()
    if 0 == me:
        print 'All tests successful'
Ejemplo n.º 6
0
def main():
    if 4 != nproc and 0 == me:
        ga.error('Program requires 4 GA processes; nproc=%s' % nproc)
    test2D()
    test1D()
    if 0 == me:
        print 'All tests successful'
Ejemplo n.º 7
0
def check_put_disjoint(gatype):
    """each node fills in disjoint sections of the array"""
    if 0 == me:
        print '> Checking disjoint put ...',
    g_a = create_global_array(gatype)
    a = create_local_a(gatype)
    inc = (n-1)/20 + 1
    ij = 0
    for i in range(0,n,inc):
        for j in range(0,n,inc):
            check = False
            if MIRROR:
                check = ij % lprocs == iproc
            else:
                check = ij % nproc == me
            if check:
                lo = [i,j]
                hi = [min(i+inc,n), min(j+inc,n)]
                piece = a[ga.zip(lo,hi)]
                ga.put(g_a, piece, lo, hi)
                # the following check is not part of the original test.F
                result = ga.get(g_a, lo, hi)
                if not np.all(result == piece):
                    ga.error("put followed by get failed", 1)
            ga.sync()
            ij += 1
    ga.sync()
    # all nodes check all of a
    b = ga.get(g_a)
    if not np.all(a == b):
        ga.error('put failed, exiting')
    if 0 == me:
        print 'OK'
    ga.destroy(g_a)
Ejemplo n.º 8
0
def check_get(gatype):
    """check nloop random gets from each node"""
    if 0 == me:
        print '> Checking random get (%d calls)...' % nloop
    g_a = create_global_array(gatype)
    a = create_local_a(gatype)
    if 0 == me:
        ga.put(g_a, a)
    ga.sync()
    nwords = 0
    random.seed(ga.nodeid()*51+1) # different seed for each proc
    for loop in range(nloop):
        ilo,ihi = random.randint(0, nloop-1),random.randint(0, nloop-1)
        if ihi < ilo: ilo,ihi = ihi,ilo
        jlo,jhi = random.randint(0, nloop-1),random.randint(0, nloop-1)
        if jhi < jlo: jlo,jhi = jhi,jlo
        nwords += (ihi-ilo+1)*(jhi-jlo+1)
        ihi += 1
        jhi += 1
        result = ga.get(g_a, (ilo,jlo), (ihi,jhi))
        if not np.all(result == a[ilo:ihi,jlo:jhi]):
            ga.error('random get failed')
        if 0 == me and loop % max(1,nloop/20) == 0:
            print ' call %d node %d checking get((%d,%d),(%d,%d)) total %f' % (
                    loop, me, ilo, ihi, jlo, jhi, nwords)
    if 0 == me:
        print 'OK'
    ga.destroy(g_a)
Ejemplo n.º 9
0
def create_global_array(gatype):
    if NEW_API:
        g_a = ga.create_handle()
        ga.set_data(g_a, [n,n], gatype)
        ga.set_array_name(g_a, 'a')
        if USE_RESTRICTED:
            num_restricted = nproc/2 or 1
            restricted_list = np.arange(num_restricted) + num_restricted/2
            ga.set_restricted(g_a, restricted_list)
        if BLOCK_CYCLIC:
            if USE_SCALAPACK_DISTR:
                if nproc % 2 == 0:
                    ga.error('Available procs must be divisible by 2',nproc)
                ga.set_block_cyclic_proc_grid(g_a, block_size, proc_grid)
            else:
                ga.set_block_cyclic(g_a, block_size)
        if MIRROR:
            p_mirror = ga.pgroup_get_mirror()
            ga.set_pgroup(g_a, p_mirror)
        ga.allocate(g_a)
    else:
        if MIRROR:
            p_mirror = ga.pgroup_get_mirror()
            ga.create_config(gatype, (n,n), 'a', None, p_mirror)
        else:
            g_a = ga.create(gatype, (n,n), 'a')
    if 0 == g_a:
        ga.error('ga.create failed')
    if MIRROR:
        lproc = me - ga.cluster_procid(inode, 0)
        lo,hi = ga.distribution(g_a, lproc)
    else:
        lo,hi = ga.distribution(g_a, me)
    ga.sync()
    return g_a
Ejemplo n.º 10
0
def verify(g_a, g_b):
    a = ga.get(g_a)
    b = ga.get(g_b)
    if not np.all(a[::-1] == b):
        print "Mismatch: a[::-1] is not equal to b"
        ga.error("verify failed")
    print "Transpose OK"
Ejemplo n.º 11
0
def check_scatter(gatype):
    nptype = ga.dtype(gatype)
    if 0 == me:
        print '> Checking scatter (might be slow)...',
    g_a = create_global_array(gatype)
    a = create_local_a(gatype)
    if 0 == me:
        ga.put(g_a, a)
    ga.sync()
    ijv = np.zeros((m,2), dtype=np.int64)
    v = np.zeros(m, dtype=nptype)
    random.seed(ga.nodeid()*51 + 1) # different seed for each proc
    for j in range(10):
        check = None
        if MIRROR:
            check = random.randint(0,lprocs-1) == iproc
        else:
            check = random.randint(0,nproc-1) == me
        if check:
            for loop in range(m):
                ijv[loop,:] = (random.randint(0,n-1),random.randint(0,n-1))
                v[loop] = ijv[loop,0]+ijv[loop,1]
            ga.scatter(g_a, v, ijv)
            for loop in range(m):
                value = ga.get(g_a, ijv[loop], ijv[loop]+1).flatten()
                if not v[loop] == value:
                    ga.error('scatter failed')
    if 0 == me:
        print 'OK'
    ga.destroy(g_a)
Ejemplo n.º 12
0
def verify(g_a, g_b):
    ### copy the entire block of data from the global array "g_a" into the
    ### local array "a" and similarly for "g_b" and "b".
    if not np.all(a[::-1] == b):
        print "Mismatch: a[::-1] is not equal to b"
        ga.error("verify failed")
    print "Transpose OK"
Ejemplo n.º 13
0
def check_gop(nptype):
    if 0 == me:
        print '> checking ga.gop (%s)' % nptype,
    input = np.arange(n, dtype=nptype) + me
    sum = np.arange(n, dtype=nptype)*nproc + (nproc-1)*nproc/2
    output = ga.gop(input, '+')
    if not np.all(output == sum):
        ga.error('ga.gop (%s) error' % nptype)
    if 0 == me:
        print 'OK'
Ejemplo n.º 14
0
def check_zero(gatype):
    if 0 == me:
        print '> Checking zero ...',
    g_a = create_global_array(gatype)
    ga.zero(g_a)
    a = ga.get(g_a)
    if not np.all(a == 0):
        ga.error('ga.zero failed')
    if 0 == me:
        print 'OK'
    ga.destroy(g_a)
Ejemplo n.º 15
0
def matrix_multiply():
    # Configure array dimensions. Force an unequal data distribution.
    dims = [TOTALELEMS]*NDIM
    chunk = [TOTALELEMS/nprocs-1]*NDIM

    # Create a global array g_a and duplicate it to get g_b and g_c.
    g_a = ga.create(ga.C_DBL, dims, "array A", chunk)
    if not g_a: ga.error("create failed: A")
    if not me: print "Created Array A"

    g_b = ga.duplicate(g_a, "array B")
    g_c = ga.duplicate(g_a, "array C")
    if not g_b or not g_c: ga.eror("duplicate failed")
    if not me: print "Created Arrays B and C"

    # Initialize data in matrices a and b.
    if not me: print "Initializing matrix A and B"
    a = np.random.rand(*dims)*29
    b = np.random.rand(*dims)*37

    # Copy data to global arrays g_a and g_b.
    if not me:
        ga.put(g_a, a)
        ga.put(g_b, b)

    # Synchronize all processors to make sure everyone has data.
    ga.sync()

    # Determine which block of data is locally owned. Note that
    # the same block is locally owned for all GAs.
    lo,hi = ga.distribution(g_c)

    # Get the blocks from g_a and g_b needed to compute this block in
    # g_c and copy them into the local buffers a and b.
    a = ga.get(g_a, (lo[0],0), (hi[0],dims[0]))
    b = ga.get(g_b, (0,lo[1]), (dims[1],hi[1]))

    # Do local matrix multiplication and store the result in local
    # buffer c. Start by evaluating the transpose of b.
    btrns = b.transpose()

    # Multiply a and b to get c.
    c = np.dot(a,b)

    # Copy c back to g_c.
    ga.put(g_c, c, lo, hi)

    verify(g_a, g_b, g_c)

    # Deallocate arrays.
    ga.destroy(g_a)
    ga.destroy(g_b)
    ga.destroy(g_c)
Ejemplo n.º 16
0
def TRANSPOSE1D():
    # Configure array dimensions. Force an unequal data distribution.
    dims = [nprocs * TOTALELEMS + nprocs / 2]
    chunk = [TOTALELEMS]  # minimum data on each process

    # create a global array g_a and duplicate it to get g_b
    ### create GA of integers with dimension "dims" with minimum block size
    ### "chunk" and name of "Array A" and assign the handle to the variable
    ### "g_a"
    if not g_a: ga.error("create failed: A")
    if not me: print "Created Array A"

    ### create a second global array assigned to the handled "g_b" by
    ### duplicating "g_a" and assigning the name "Array B"
    if not g_b: ga.error("duplicate failed")
    if not me: print "Created Array B"

    # initialize data in g_a
    if not me:
        print "Initializing matrix A"
        ### copy contents of a numpy range array into the remote
        ### global array "g_a"
        ### HINT: use numpy's arange() e.g. np.arange(###, dtype=np.int32)

    # Synchronize all processors to guarantee that everyone has data
    # before proceeding to the next step.
    ### synchronize all processors

    # Start initial phase of inversion by inverting the data held locally on
    # each processor. Start by finding out which data each processor owns.
    ### find out which block of data my node owns for the global array "g_a"
    ### and store the contents of the arrays into "lo" and "hi"

    # Get locally held data and copy it into local buffer a
    ### use the arrays "lo" and "hi" to copy the locally held block of data
    ### from the global array "g_a" into the local array "a".

    # Invert data locally
    b = a[::-1]

    # Invert data globally by copying locally inverted blocks into
    # their inverted positions in the GA
    lo2 = [dims[0] - hi[0]]
    hi2 = [dims[0] - lo[0]]
    ### copy data from the local array "b" into the block of the global
    ### array "g_a" described by the integer arrays "lo" and "hi"

    # Synchronize all processors to make sure inversion is complete
    ### synchronize all processors

    # Check to see if inversion is correct
    if not me: verify(g_a, g_b)
Ejemplo n.º 17
0
def TRANSPOSE1D():
    # Configure array dimensions. Force an unequal data distribution.
    dims = [nprocs*TOTALELEMS + nprocs/2]
    chunk = [TOTALELEMS] # minimum data on each process

    # create a global array g_a and duplicate it to get g_b
    ### create GA of integers with dimension "dims" with minimum block size
    ### "chunk" and name of "Array A" and assign the handle to the variable
    ### "g_a"
    if not g_a: ga.error("create failed: A")
    if not me: print "Created Array A"

    ### create a second global array assigned to the handled "g_b" by
    ### duplicating "g_a" and assigning the name "Array B"
    if not g_b: ga.error("duplicate failed")
    if not me: print "Created Array B"

    # initialize data in g_a
    if not me:
        print "Initializing matrix A"
        ### copy contents of a numpy range array into the remote
        ### global array "g_a"
        ### HINT: use numpy's arange() e.g. np.arange(###, dtype=np.int32)

    # Synchronize all processors to guarantee that everyone has data
    # before proceeding to the next step.
    ### synchronize all processors

    # Start initial phase of inversion by inverting the data held locally on
    # each processor. Start by finding out which data each processor owns.
    ### find out which block of data my node owns for the global array "g_a"
    ### and store the contents of the arrays into "lo" and "hi"

    # Get locally held data and copy it into local buffer a
    ### use the arrays "lo" and "hi" to copy the locally held block of data
    ### from the global array "g_a" into the local array "a".

    # Invert data locally
    b = a[::-1]

    # Invert data globally by copying locally inverted blocks into
    # their inverted positions in the GA
    lo2 = [dims[0]-hi[0]]
    hi2 = [dims[0]-lo[0]]
    ### copy data from the local array "b" into the block of the global
    ### array "g_a" described by the integer arrays "lo" and "hi"

    # Synchronize all processors to make sure inversion is complete
    ### synchronize all processors

    # Check to see if inversion is correct
    if not me: verify(g_a, g_b)
Ejemplo n.º 18
0
def check_broadcast():
    if 0 == me:
        print '> Checking ga.brdcst',
    buf = [0,0]
    if nproc-1 == me:
        buf = [me,nproc]
    buf = ga.brdcst(buf,nproc-1)
    if buf[0] != nproc-1:
        ga.error('ga.brdcst buf[0] failed')
    if buf[1] != nproc:
        ga.error('ga.brdcst buf[1] failed')
    if 0 == me:
        print 'OK'
Ejemplo n.º 19
0
def check_fence_and_lock(gatype):
    if 0 == me:
        print '> Checking ga.fence and ga.lock',
    g_a = create_global_array(gatype)
    ga.zero(g_a)
    if not ga.create_mutexes(1):
        ga.error('ga.create_mutexes failed')
    if n < 2:
        ga.error('insufficient n to test ga.fence', n)
    ga.lock(0)
    a = ga.get(g_a) # get original values
    a[:,0] += 1 # add my contribution
    # need to use fence to assure that coms complete before leaving
    # critical section
    ga.init_fence()
    ga.put(g_a, a)
    ga.fence()
    ga.unlock(0)
    if not ga.destroy_mutexes():
        ga.error('mutex not destroyed')
    ga.sync()
    if 0 == me:
        a = ga.get(g_a)
        if not np.all(a[:,0] == nproc):
            ga.error('fence failed')
    if 0 == me:
        print 'OK'
Ejemplo n.º 20
0
def check_copy(gatype):
    if 0 == me:
        print '> Checking copy ...',
    g_a = create_global_array(gatype)
    g_b = create_global_array(gatype)
    a = create_local_a(gatype)
    if 0 == me:
        ga.put(g_a, a)
    ga.copy(g_a, g_b)
    if not np.all(a == ga.get(g_b)):
        ga.error('copy failed')
    if 0 == me:
        print 'OK'
    ga.destroy(g_a)
    ga.destroy(g_b)
Ejemplo n.º 21
0
def check_scale(gatype):
    if 0 == me:
        print '> Checking scale ...',
    g_a = create_global_array(gatype)
    a = create_local_a(gatype)
    if 0 == me:
        ga.put(g_a, a)
    ga.sync()
    ga.scale(g_a, 0.123)
    a *= 0.123
    if np.any(np.vectorize(mismatch)(a,ga.get(g_a))):
        ga.error('add failed')
    if 0 == me:
        print 'OK'
    ga.destroy(g_a)
Ejemplo n.º 22
0
def test2D():
    n = 1024
    buf = np.zeros((n,n), dtype=np.float64)
    chunk = np.asarray([1,3,4,9,16,24,30,48,64,91,128,171,256,353,440,512])
    g_a = ga.create(ga.C_DBL, (n,n), 'a')
    if 0 == g_a:
        ga.error('ga.create failed')
    buf[:] = 0.01
    ga.zero(g_a)
    if 0 == me:
        print (' Performance of GA get, put & acc'
                ' for square sections of array[%d,%d]' % (n,n))
    lo,hi = ga.distribution(g_a, me)
    # local ops
    TestPutGetAcc(g_a, n, chunk, buf, lo, hi, True)
    # remote ops
    TestPutGetAcc(g_a, n, chunk, buf, lo, hi, False)
Ejemplo n.º 23
0
def TRANSPOSE1D():
    # Configure array dimensions. Force an unequal data distribution.
    dims = [nprocs * TOTALELEMS + nprocs / 2]
    chunk = [TOTALELEMS]  # minimum data on each process

    # create a global array g_a and duplicate it to get g_b
    g_a = ga.create(ga.C_INT, dims, "array A", chunk)
    if not g_a: ga.error("create failed: A")
    if not me: print "Created Array A"

    g_b = ga.duplicate(g_a, "array B")
    if not g_b: ga.error("duplicate failed")
    if not me: print "Created Array B"

    # initialize data in g_a
    if not me:
        print "Initializing matrix A"
        ga.put(g_a, np.arange(dims[0], dtype=np.int32))

    # Synchronize all processors to guarantee that everyone has data
    # before proceeding to the next step.
    ga.sync()

    # Start initial phase of inversion by inverting the data held locally on
    # each processor. Start by finding out which data each processor owns.
    lo, hi = ga.distribution(g_a)

    # Get locally held data and copy it into local buffer a
    a = ga.get(g_a, lo, hi)

    # Invert data locally
    b = a[::-1]

    # Invert data globally by copying locally inverted blocks into
    # their inverted positions in the GA
    ga.put(g_b, b, dims[0] - hi[0], dims[0] - lo[0])

    # Synchronize all processors to make sure inversion is complete
    ga.sync()

    # Check to see if inversion is correct
    if not me: verify(g_a, g_b)

    # Deallocate arrays
    ga.destroy(g_a)
    ga.destroy(g_b)
def TRANSPOSE1D():
    # Configure array dimensions. Force an unequal data distribution.
    dims = [nprocs*TOTALELEMS + nprocs/2]
    chunk = [TOTALELEMS] # minimum data on each process

    # create a global array g_a and duplicate it to get g_b
    g_a = ga.create(ga.C_INT, dims, "array A", chunk)
    if not g_a: ga.error("create failed: A")
    if not me: print "Created Array A"

    g_b = ga.duplicate(g_a, "array B")
    if not g_b: ga.error("duplicate failed")
    if not me: print "Created Array B"

    # initialize data in g_a
    if not me:
        print "Initializing matrix A"
        ga.put(g_a, np.arange(dims[0], dtype=np.int32))

    # Synchronize all processors to guarantee that everyone has data
    # before proceeding to the next step.
    ga.sync()

    # Start initial phase of inversion by inverting the data held locally on
    # each processor. Start by finding out which data each processor owns.
    lo,hi = ga.distribution(g_a)

    # Get locally held data and copy it into local buffer a
    a = ga.get(g_a, lo, hi)

    # Invert data locally
    b = a[::-1]

    # Invert data globally by copying locally inverted blocks into
    # their inverted positions in the GA
    ga.put(g_b, b, dims[0]-hi[0], dims[0]-lo[0])

    # Synchronize all processors to make sure inversion is complete
    ga.sync()

    # Check to see if inversion is correct
    if not me: verify(g_a, g_b)

    # Deallocate arrays
    ga.destroy(g_a)
    ga.destroy(g_b)
Ejemplo n.º 25
0
def test2D():
    n = 1024
    buf = np.zeros((n, n), dtype=np.float64)
    chunk = np.asarray(
        [1, 3, 4, 9, 16, 24, 30, 48, 64, 91, 128, 171, 256, 353, 440, 512])
    g_a = ga.create(ga.C_DBL, (n, n), 'a')
    if 0 == g_a:
        ga.error('ga.create failed')
    buf[:] = 0.01
    ga.zero(g_a)
    if 0 == me:
        print(
            ' Performance of GA get, put & acc'
            ' for square sections of array[%d,%d]' % (n, n))
    lo, hi = ga.distribution(g_a, me)
    # local ops
    TestPutGetAcc(g_a, n, chunk, buf, lo, hi, True)
    # remote ops
    TestPutGetAcc(g_a, n, chunk, buf, lo, hi, False)
Ejemplo n.º 26
0
def verify_ga_gemm(ta, tb, num_m, num_n, num_k, alpha, g_a, g_b, beta, g_c):
    tmpa = np.ndarray((num_m, num_k), dtype=np.float64)
    tmpb = np.ndarray((num_k, num_n), dtype=np.float64)
    tmpc = np.ndarray((num_m, num_n), dtype=np.float64)
    tmpa = ga.get(g_a, buffer=tmpa)
    tmpb = ga.get(g_b, buffer=tmpb)
    tmpc = ga.get(g_c, buffer=tmpc)
    if not ta and not tb:
        result = dgemm(alpha, tmpa, tmpb, beta=beta, trans_a=ta, trans_b=tb)
    elif ta and not tb:
        result = dgemm(alpha, tmpa, tmpb, beta=beta, trans_a=ta, trans_b=tb)
    elif not ta and tb:
        result = dgemm(alpha, tmpa, tmpb, beta=beta, trans_a=ta, trans_b=tb)
    elif ta and tb:
        result = dgemm(alpha, tmpa, tmpb, beta=beta, trans_a=ta, trans_b=tb)
    else:
        raise ValueError, "shouldn't get here"
    abs_value = np.abs(tmpc-result)
    if np.any(abs_value > 1):
        ga.error('verify ga.gemm failed')
Ejemplo n.º 27
0
def verify(g_a, g_b, g_c):
    g_chk = ga.duplicate(g_a, "array check")
    if not g_chk: ga.error("duplicate failed")
    ga.sync()

    ga.gemm(False, False, TOTALELEMS, TOTALELEMS, TOTALELEMS, 1.0, g_a, g_b,
            0.0, g_chk);
    ga.sync()

    ga.add(g_c, g_chk, g_chk, 1.0, -1.0)
    rchk = ga.dot(g_chk, g_chk)

    if not me:
        print "Normed difference in matrices: %12.4f" % rchk
        if not (-TOLERANCE < rchk < TOLERANCE):
            ga.error("Matrix multiply verify failed")
        else:
            print "Matrix Multiply OK"

    ga.destroy(g_chk)
Ejemplo n.º 28
0
def test1D():
    n = 1024*1024
    buf = np.zeros(n/4, dtype=np.float64)
    chunk = np.asarray([1,9,16,81,256,576,900,2304,4096,8281,
        16384,29241,65536,124609,193600,262144])
    g_a = ga.create(ga.C_DBL, (n,), 'a')
    if 0 == g_a:
        ga.error('ga.create failed')
    buf[:] = 0.01
    ga.zero(g_a)
    if 0 == me:
        print ''
        print ''
        print ''
        print (' Performance of GA get, put & acc'
                ' for 1-dimensional sections of array[%d]' % n)
    lo,hi = ga.distribution(g_a, me)
    # local ops
    TestPutGetAcc1(g_a, n, chunk, buf, lo, hi, True)
    # remote ops
    TestPutGetAcc1(g_a, n, chunk, buf, lo, hi, False)
Ejemplo n.º 29
0
def check_accumulate_overlap(gatype):
    if 0 == me:
        print '> Checking overlapping accumulate ...',
    g_a = create_global_array(gatype)
    ga.zero(g_a)
    ga.acc(g_a, [1], (n/2,n/2), (n/2+1,n/2+1), 1)
    ga.sync()
    if MIRROR:
        if 0 == iproc:
            x = abs(ga.get(g_a, (n/2,n/2), (n/2+1,n/2+1))[0,0] - lprocs)
            if not 0 == x:
                ga.error('overlapping accumulate failed -- expected %s got %s'%(
                        x, lprocs))
    else:
        if 0 == me:
            x = abs(ga.get(g_a, (n/2,n/2), (n/2+1,n/2+1))[0,0] - nproc)
            if not 0 == x:
                ga.error('overlapping accumulate failed -- expected %s got %s'%(
                        x, nproc))
    if 0 == me:
        print 'OK'
    ga.destroy(g_a)
Ejemplo n.º 30
0
def check_add(gatype):
    if 0 == me:
        print '> Checking add ...',
    g_a = create_global_array(gatype)
    g_b = create_global_array(gatype)
    a = create_local_a(gatype)
    b = create_local_b(gatype)
    alpha = None
    beta = None
    if 0 == me:
        ga.put(g_a, a)
    ga.sync();
    np.random.seed(12345) # everyone has same seed
    if gatype in [ga.C_SCPL,ga.C_DCPL]:
        b_real = np.random.random_sample((n,n))
        b_imag = np.random.random_sample((n,n))
        b[:] = np.vectorize(complex)(b_real,b_imag)
        alpha = complex(0.1,-0.1)
        beta = complex(0.9,-0.9)
    else:
        b[:] = np.random.random_sample((n,n))
        alpha = 0.1
        beta = 0.9
    a = alpha*a + beta*b
    if MIRROR:
        if 0 == iproc:
            ga.put(g_b, b)
    else:
        if 0 == me:
            ga.put(g_b, b)
    ga.sync()
    ga.add(g_a, g_b, g_b, alpha, beta)
    b = ga.get(g_b, buffer=b)
    if np.any(np.vectorize(mismatch)(b,a)):
        ga.error('add failed')
    if 0 == me:
        print 'OK'
    ga.destroy(g_a)
    ga.destroy(g_b)
Ejemplo n.º 31
0
def test1D():
    n = 1024 * 1024
    buf = np.zeros(n / 4, dtype=np.float64)
    chunk = np.asarray([
        1, 9, 16, 81, 256, 576, 900, 2304, 4096, 8281, 16384, 29241, 65536,
        124609, 193600, 262144
    ])
    g_a = ga.create(ga.C_DBL, (n, ), 'a')
    if 0 == g_a:
        ga.error('ga.create failed')
    buf[:] = 0.01
    ga.zero(g_a)
    if 0 == me:
        print ''
        print ''
        print ''
        print(
            ' Performance of GA get, put & acc'
            ' for 1-dimensional sections of array[%d]' % n)
    lo, hi = ga.distribution(g_a, me)
    # local ops
    TestPutGetAcc1(g_a, n, chunk, buf, lo, hi, True)
    # remote ops
    TestPutGetAcc1(g_a, n, chunk, buf, lo, hi, False)
Ejemplo n.º 32
0
def check_accumulate_disjoint(gatype):
    """Each node accumulates into disjoint sections of the array."""
    if 0 == me:
        print '> Checking disjoint accumulate ...',
    g_a = create_global_array(gatype)
    a = create_local_a(gatype)
    b = np.fromfunction(lambda i,j: i+j+2, (n,n), dtype=ga.dtype(gatype))
    if 0 == me:
        ga.put(g_a, a)
    ga.sync()
    inc = (n-1)/20 + 1
    ij = 0
    for i in range(0,n,inc):
        for j in range(0,n,inc):
            x = 10.0
            lo = [i,j]
            hi = [min(i+inc,n), min(j+inc,n)]
            piece = b[ga.zip(lo,hi)]
            check = False
            if MIRROR:
                check = ij % lprocs == iproc
            else:
                check = ij % nproc == me
            if check:
                ga.acc(g_a, piece, lo, hi, x)
            ga.sync()
            ij += 1
            # each process applies all updates to its local copy
            a[ga.zip(lo,hi)] += x * piece
    ga.sync()
    # all nodes check all of a
    if not np.all(ga.get(g_a) == a):
        ga.error('acc failed')
    if 0 == me:
        print 'OK'
    ga.destroy(g_a)
Ejemplo n.º 33
0
import mpi4py.MPI  # initialize Message Passing Interface
from ga4py import ga  # initialize Global Arrays

import numpy as np

me = ga.nodeid()
nproc = ga.nnodes()


def print_distribution(g_a):
    for i in range(ga.nnodes()):
        lo, hi = ga.distribution(g_a, i)
        print "P=%s lo=%s hi=%s" % (i, lo, hi)


# create some irregular arrays
block = [3, 2]
map = [0, 2, 6, 0, 5]
if nproc < np.prod(block):
    raise ValueError, "ERROR: fewer procs than requested blocks"
g_a = ga.create_irreg(ga.C_DBL, [8, 10], block, map, "Array A")
if not g_a:
    ga.error("Could not create global array A", g_a)
g_b = ga.create(ga.C_INT, (2, 3, 4, 5, 6))

if not me:
    print_distribution(g_a)
    print_distribution(g_b)
Ejemplo n.º 34
0
def matrix_multiply():
    # Configure array dimensions. Force an unequal data distribution.
    dims = [TOTALELEMS]*NDIM
    chunk = [TOTALELEMS/nprocs-1]*NDIM

    # Create a global array g_a and duplicate it to get g_b and g_c.
    ### create GA of doubles with dimensions "dims", with minimum block size
    ### "chunk", and with name "array A", and assign the handle to the integer
    ### variable "g_a".
    if not g_a: ga.error("create failed: A")
    if not me: print "Created Array A"

    ### Duplicate array "g_a" to create arrays "g_b" and "g_c" with array
    ### names "array B" and "array C", respectively.
    if not g_b or not g_c: ga.eror("duplicate failed")
    if not me: print "Created Arrays B and C"

    # Initialize data in matrices a and b.
    if not me: print "Initializing matrix A and B"
    a = np.random.rand(*dims)*29
    b = np.random.rand(*dims)*37

    # Copy data to global arrays g_a and g_b.
    if not me:
        ### copy the contents of array "a" into the global array "g_a"
        ### similarly for "b"

    # Synchronize all processors to make sure everyone has data.
    ### Synchronize all processors

    # Determine which block of data is locally owned. Note that
    # the same block is locally owned for all GAs.
    ### find out which block of data my node owns for the global array "g_c"
    ### and store the contents in the integer arrays "lo" and "hi"

    # Get the blocks from g_a and g_b needed to compute this block in
    # g_c and copy them into the local buffers a and b.
    lo2 = (lo[0],0)
    hi2 = (hi[0],dims[0]))
    ### copy the block of data described by the arrays "lo2" and "hi2" from
    ### the global array "g_a" in to the local array "a"

    lo3 = (0,lo[1])
    hi3 = (dims[1],hi[1]))
    ### copy the block of data described by the arrays "lo3" and "hi3" from
    ### the global array "g_b" in to the local array "b"

    # Do local matrix multiplication and store the result in local
    # buffer c. Start by evaluating the transpose of b.
    btrns = b.transpose()

    # Multiply a and b to get c.
    c = np.dot(a,b)

    # Copy c back to g_c.
    ### copy data from the local array "c" into the block of the global array
    ### "g_c" described by the integer arrays "lo" and "hi".

    verify(g_a, g_b, g_c)

    # Deallocate arrays.
    ### destroy the global arrays "g_a", "g_b", "g_c"

if __name__ == '__main__':
    if not me: print "\nUsing %d processes\n" % nprocs
    matrix_multiply()
    if not me: print "\nTerminating..."
Ejemplo n.º 35
0
def main():
    # TODO there's got to be a loopless, more pythonic way to do this
    ii = 0
    for i in range(num1*num1):
        ii += 1
        if ii > num1:
            ii = 0
        h0[i] = ii
    # compute times assuming 500 mflops and 5 second target time
    # ntimes = max(3.0, 5.0/(4.0-9*num**3))
    ntimes = 5

    for ii in range(howmany):
        num_m = nums_m[ii]
        num_n = nums_n[ii]
        num_k = nums_k[ii]
        a = 0.5/(num_m*num_n)
        if num_m > nummax or num_n > nummax or num_k > nummax:
            ga.error('Insufficient memory: check nummax')
        
        if BLOCK_CYCLIC:
            block_size = [128,128]
            g_c = ga.create_handle()
            ga.set_data(g_c, (num_m,num_n), ga.C_DBL)
            ga.set_array_name(g_c, 'g_c')
            ga.set_block_cyclic(g_c, block_size)
            if not ga.allocate(g_c):
                ga.error('create failed')
            block_size = [128,128]
            g_b = ga.create_handle()
            ga.set_data(g_b, (num_k,num_n), ga.C_DBL)
            ga.set_array_name(g_b, 'g_b')
            ga.set_block_cyclic(g_b, block_size)
            if not ga.allocate(g_b):
                ga.error('create failed')
            block_size = [128,128]
            g_a = ga.create_handle()
            ga.set_data(g_a, (num_m,num_k), ga.C_DBL)
            ga.set_array_name(g_a, 'g_a')
            ga.set_block_cyclic(g_a, block_size)
            if not ga.allocate(g_a):
                ga.error('create failed')
        else:
            g_a = ga.create(ga.C_DBL, (num_m,num_k), 'g_a')
            g_b = ga.create(ga.C_DBL, (num_k,num_n), 'g_b')
            g_c = ga.create(ga.C_DBL, (num_m,num_n), 'g_c')
            for handle in [g_a,g_b,g_c]:
                if 0 == handle:
                    ga.error('create failed')

        # initialize matrices A and B
        if 0 == me:
            load_ga(g_a, h0, num_m, num_k)
            load_ga(g_b, h0, num_k, num_n)
        ga.zero(g_c)
        ga.sync()

        if 0 == me:
            print '\nMatrix Multiplication C = A[%d,%d] x B[%d,%d]\n' % (
                    num_m, num_k, num_k, num_n)
            print ' %4s  %12s  %12s  %7s  %7s'%(
                    "Run#", "Time (seconds)", "mflops/proc",
                    "A trans", "B trans")
        avg_t[:] = 0
        avg_mf[:] = 0
        for itime in range(ntimes):
            for i in range(ntrans):
                ga.sync()
                ta = transa[i]
                tb = transb[i]
                t1 = time.time()
                ga.gemm(ta,tb,num_m,num_n,num_k,1,g_a,g_b,0,g_c)
                t1 = time.time() - t1
                if 0 == me:
                    mf = 2*num_m*num_n*num_k/t1*10**-6/nproc
                    avg_t[i] += t1
                    avg_mf[i] += mf
                    print ' %4d  %12.4f  %12.1f  %7s  %7s'%(
                            itime+1, t1, mf, ta, tb)
                    if VERIFY and itime == 0:
                        verify_ga_gemm(ta, tb, num_m, num_n, num_k,
                                1.0, g_a, g_b, 0.0, g_c)
        if 0 == me:
            print ''
            for i in range(ntrans):
                print 'Average: %12.4f seconds %12.1f mflops/proc %s %s'%(
                            avg_t[i]/ntimes, avg_mf[i]/ntimes,
                            transa[i], transb[i])
            if VERIFY:
                print 'All ga.gemms are verified...O.K.'
Ejemplo n.º 36
0
import mpi4py.MPI # initialize Message Passing Interface
from ga4py import ga # initialize Global Arrays

import numpy as np

me = ga.nodeid()
nproc = ga.nnodes()

def print_distribution(g_a):
    for i in range(ga.nnodes()):
        lo,hi = ga.distribution(g_a, i)
        print "P=%s lo=%s hi=%s" % (i,lo,hi)

# create some irregular arrays
block = [3,2]
map = [0,2,6,0,5]
if nproc < np.prod(block):
    raise ValueError, "ERROR: fewer procs than requested blocks"
g_a = ga.create_irreg(ga.C_DBL, [8,10], block, map, "Array A")
if not g_a:
    ga.error("Could not create global array A",g_a)
g_b = ga.create(ga.C_INT, (2,3,4,5,6))

if not me:
    print_distribution(g_a)
    print_distribution(g_b)