def create_global_array(gatype): if NEW_API: g_a = ga.create_handle() ga.set_data(g_a, [n,n], gatype) ga.set_array_name(g_a, 'a') if USE_RESTRICTED: num_restricted = nproc/2 or 1 restricted_list = np.arange(num_restricted) + num_restricted/2 ga.set_restricted(g_a, restricted_list) if BLOCK_CYCLIC: if USE_SCALAPACK_DISTR: if nproc % 2 == 0: ga.error('Available procs must be divisible by 2',nproc) ga.set_block_cyclic_proc_grid(g_a, block_size, proc_grid) else: ga.set_block_cyclic(g_a, block_size) if MIRROR: p_mirror = ga.pgroup_get_mirror() ga.set_pgroup(g_a, p_mirror) ga.allocate(g_a) else: if MIRROR: p_mirror = ga.pgroup_get_mirror() ga.create_config(gatype, (n,n), 'a', None, p_mirror) else: g_a = ga.create(gatype, (n,n), 'a') if 0 == g_a: ga.error('ga.create failed') if MIRROR: lproc = me - ga.cluster_procid(inode, 0) lo,hi = ga.distribution(g_a, lproc) else: lo,hi = ga.distribution(g_a, me) ga.sync() return g_a
def parallel_task(): me = ga.pgroup_nodeid() nproc = ga.pgroup_nnodes() if not me: print "This is process 0 on group %s" % ga.pgroup_get_default() g_a = ga.create(ga.C_DBL, (3, 4, 5)) ga.randomize(g_a) if me == 0: print np.sum(ga.access(g_a))
def parallel_task(): me = ga.pgroup_nodeid() nproc = ga.pgroup_nnodes() if not me: print "This is process 0 on group %s" % ga.pgroup_get_default() g_a = ga.create(ga.C_DBL, (3,4,5)) ga.randomize(g_a) if me == 0: print np.sum(ga.access(g_a))
def matrix_multiply(): # Configure array dimensions. Force an unequal data distribution. dims = [TOTALELEMS]*NDIM chunk = [TOTALELEMS/nprocs-1]*NDIM # Create a global array g_a and duplicate it to get g_b and g_c. g_a = ga.create(ga.C_DBL, dims, "array A", chunk) if not g_a: ga.error("create failed: A") if not me: print "Created Array A" g_b = ga.duplicate(g_a, "array B") g_c = ga.duplicate(g_a, "array C") if not g_b or not g_c: ga.eror("duplicate failed") if not me: print "Created Arrays B and C" # Initialize data in matrices a and b. if not me: print "Initializing matrix A and B" a = np.random.rand(*dims)*29 b = np.random.rand(*dims)*37 # Copy data to global arrays g_a and g_b. if not me: ga.put(g_a, a) ga.put(g_b, b) # Synchronize all processors to make sure everyone has data. ga.sync() # Determine which block of data is locally owned. Note that # the same block is locally owned for all GAs. lo,hi = ga.distribution(g_c) # Get the blocks from g_a and g_b needed to compute this block in # g_c and copy them into the local buffers a and b. a = ga.get(g_a, (lo[0],0), (hi[0],dims[0])) b = ga.get(g_b, (0,lo[1]), (dims[1],hi[1])) # Do local matrix multiplication and store the result in local # buffer c. Start by evaluating the transpose of b. btrns = b.transpose() # Multiply a and b to get c. c = np.dot(a,b) # Copy c back to g_c. ga.put(g_c, c, lo, hi) verify(g_a, g_b, g_c) # Deallocate arrays. ga.destroy(g_a) ga.destroy(g_b) ga.destroy(g_c)
def test2D(): n = 1024 buf = np.zeros((n,n), dtype=np.float64) chunk = np.asarray([1,3,4,9,16,24,30,48,64,91,128,171,256,353,440,512]) g_a = ga.create(ga.C_DBL, (n,n), 'a') if 0 == g_a: ga.error('ga.create failed') buf[:] = 0.01 ga.zero(g_a) if 0 == me: print (' Performance of GA get, put & acc' ' for square sections of array[%d,%d]' % (n,n)) lo,hi = ga.distribution(g_a, me) # local ops TestPutGetAcc(g_a, n, chunk, buf, lo, hi, True) # remote ops TestPutGetAcc(g_a, n, chunk, buf, lo, hi, False)
def TRANSPOSE1D(): # Configure array dimensions. Force an unequal data distribution. dims = [nprocs*TOTALELEMS + nprocs/2] chunk = [TOTALELEMS] # minimum data on each process # create a global array g_a and duplicate it to get g_b g_a = ga.create(ga.C_INT, dims, "array A", chunk) if not g_a: ga.error("create failed: A") if not me: print "Created Array A" g_b = ga.duplicate(g_a, "array B") if not g_b: ga.error("duplicate failed") if not me: print "Created Array B" # initialize data in g_a if not me: print "Initializing matrix A" ga.put(g_a, np.arange(dims[0], dtype=np.int32)) # Synchronize all processors to guarantee that everyone has data # before proceeding to the next step. ga.sync() # Start initial phase of inversion by inverting the data held locally on # each processor. Start by finding out which data each processor owns. lo,hi = ga.distribution(g_a) # Get locally held data and copy it into local buffer a a = ga.get(g_a, lo, hi) # Invert data locally b = a[::-1] # Invert data globally by copying locally inverted blocks into # their inverted positions in the GA ga.put(g_b, b, dims[0]-hi[0], dims[0]-lo[0]) # Synchronize all processors to make sure inversion is complete ga.sync() # Check to see if inversion is correct if not me: verify(g_a, g_b) # Deallocate arrays ga.destroy(g_a) ga.destroy(g_b)
def TRANSPOSE1D(): # Configure array dimensions. Force an unequal data distribution. dims = [nprocs * TOTALELEMS + nprocs / 2] chunk = [TOTALELEMS] # minimum data on each process # create a global array g_a and duplicate it to get g_b g_a = ga.create(ga.C_INT, dims, "array A", chunk) if not g_a: ga.error("create failed: A") if not me: print "Created Array A" g_b = ga.duplicate(g_a, "array B") if not g_b: ga.error("duplicate failed") if not me: print "Created Array B" # initialize data in g_a if not me: print "Initializing matrix A" ga.put(g_a, np.arange(dims[0], dtype=np.int32)) # Synchronize all processors to guarantee that everyone has data # before proceeding to the next step. ga.sync() # Start initial phase of inversion by inverting the data held locally on # each processor. Start by finding out which data each processor owns. lo, hi = ga.distribution(g_a) # Get locally held data and copy it into local buffer a a = ga.get(g_a, lo, hi) # Invert data locally b = a[::-1] # Invert data globally by copying locally inverted blocks into # their inverted positions in the GA ga.put(g_b, b, dims[0] - hi[0], dims[0] - lo[0]) # Synchronize all processors to make sure inversion is complete ga.sync() # Check to see if inversion is correct if not me: verify(g_a, g_b) # Deallocate arrays ga.destroy(g_a) ga.destroy(g_b)
def test2D(): n = 1024 buf = np.zeros((n, n), dtype=np.float64) chunk = np.asarray( [1, 3, 4, 9, 16, 24, 30, 48, 64, 91, 128, 171, 256, 353, 440, 512]) g_a = ga.create(ga.C_DBL, (n, n), 'a') if 0 == g_a: ga.error('ga.create failed') buf[:] = 0.01 ga.zero(g_a) if 0 == me: print( ' Performance of GA get, put & acc' ' for square sections of array[%d,%d]' % (n, n)) lo, hi = ga.distribution(g_a, me) # local ops TestPutGetAcc(g_a, n, chunk, buf, lo, hi, True) # remote ops TestPutGetAcc(g_a, n, chunk, buf, lo, hi, False)
def test1D(): n = 1024*1024 buf = np.zeros(n/4, dtype=np.float64) chunk = np.asarray([1,9,16,81,256,576,900,2304,4096,8281, 16384,29241,65536,124609,193600,262144]) g_a = ga.create(ga.C_DBL, (n,), 'a') if 0 == g_a: ga.error('ga.create failed') buf[:] = 0.01 ga.zero(g_a) if 0 == me: print '' print '' print '' print (' Performance of GA get, put & acc' ' for 1-dimensional sections of array[%d]' % n) lo,hi = ga.distribution(g_a, me) # local ops TestPutGetAcc1(g_a, n, chunk, buf, lo, hi, True) # remote ops TestPutGetAcc1(g_a, n, chunk, buf, lo, hi, False)
def test1D(): n = 1024 * 1024 buf = np.zeros(n / 4, dtype=np.float64) chunk = np.asarray([ 1, 9, 16, 81, 256, 576, 900, 2304, 4096, 8281, 16384, 29241, 65536, 124609, 193600, 262144 ]) g_a = ga.create(ga.C_DBL, (n, ), 'a') if 0 == g_a: ga.error('ga.create failed') buf[:] = 0.01 ga.zero(g_a) if 0 == me: print '' print '' print '' print( ' Performance of GA get, put & acc' ' for 1-dimensional sections of array[%d]' % n) lo, hi = ga.distribution(g_a, me) # local ops TestPutGetAcc1(g_a, n, chunk, buf, lo, hi, True) # remote ops TestPutGetAcc1(g_a, n, chunk, buf, lo, hi, False)
# the (i,j) element has the value j*NSIZE + i. This corresponds to # numbering each of the elements consecutively in column-major order. # # Find local processor ID and number of processors me = ga.nodeid() nprocs = ga.nnodes() if me == 0: print "Initialized GA library on %d processes" % nprocs # Create a GA dims = (NSIZE,NSIZE) chunk = (-1,-1) ld = NSIZE g_a = ga.create(ga.C_INT, dims, "test_a", chunk) if me == 0 and g_a: print "\nSuccessfully created Global Array" # Initialize data in GA. Find data owned by neighboring processor nghbr = (me+1)%nprocs lo,hi = ga.distribution(g_a, nghbr) # Create data in local buffer, assign unique value for each data element patch_shape = hi-lo a_buf = np.fromfunction(lambda i,j: j*NSIZE + i, patch_shape, dtype=ga.dtype(ga.C_INT)) a_buf += lo[1,np.newaxis] a_buf += lo[np.newaxis,0]*dims[0]
s = 0.0 for i in xrange(myrank + 1, n + 1, nprocs): x = h * (i - 0.5) s += 4.0 / (1.0 + x**2) return s * h def prn_pi(pi, PI): message = "pi is approximately %.16f, error is %.16f" print(message % (pi, abs(pi - PI))) nprocs = ga.nnodes() myrank = ga.nodeid() g_pi = ga.create(ga.C_DBL, [1]) one_time = False if len(sys.argv) == 2: n = int(sys.argv[1]) one_time = True while True: if not one_time: if myrank == 0: n = get_n() n = ga.brdcst(n) else: n = ga.brdcst(0) if n == 0: break
u = mda.Universe(PSF, longXTC1) print(len(u.trajectory)) start1 = time.time() u = mda.Universe(PSF, longXTC1) mobile = u.select_atoms( "(resid 1:29 or resid 60:121 or resid 160:214) and name CA") index = mobile.indices topology, trajectory = mobile.universe.filename, mobile.universe.trajectory.filename uref = mda.Universe(PSF, longXTC0) ref0 = uref.select_atoms( "(resid 1:29 or resid 60:121 or resid 160:214) and name CA") xref0 = ref0.positions - ref0.center_of_mass() bsize = int(np.ceil(mobile.universe.trajectory.n_frames)) g_a = ga.create(ga.C_DBL, [bsize * size, 2], "RMSD") buf = np.zeros([bsize * size, 2], dtype=float) # Create each segment for each process start2 = time.time() frames_seg = np.zeros([size, 2], dtype=int) for iblock in range(size): frames_seg[iblock, :] = iblock * bsize, (iblock + 1) * bsize d = dict([key, frames_seg[key]] for key in range(size)) start, stop = d[rank][0], d[rank][1] # Block-RMSD in Parallel start3 = time.time()
import mpi4py.MPI # initialize Message Passing Interface from ga4py import ga # initialize Global Arrays me = ga.nodeid() def print_distribution(g_a): for i in range(ga.nnodes()): lo, hi = ga.distribution(g_a, i) print "%s lo=%s hi=%s" % (i, lo, hi) # create some arrays g_a = ga.create(ga.C_DBL, (10, 20, 30)) g_b = ga.create(ga.C_INT, (2, 3, 4, 5, 6)) if not me: print_distribution(g_a) print_distribution(g_b)
def main(): # TODO there's got to be a loopless, more pythonic way to do this ii = 0 for i in range(num1*num1): ii += 1 if ii > num1: ii = 0 h0[i] = ii # compute times assuming 500 mflops and 5 second target time # ntimes = max(3.0, 5.0/(4.0-9*num**3)) ntimes = 5 for ii in range(howmany): num_m = nums_m[ii] num_n = nums_n[ii] num_k = nums_k[ii] a = 0.5/(num_m*num_n) if num_m > nummax or num_n > nummax or num_k > nummax: ga.error('Insufficient memory: check nummax') if BLOCK_CYCLIC: block_size = [128,128] g_c = ga.create_handle() ga.set_data(g_c, (num_m,num_n), ga.C_DBL) ga.set_array_name(g_c, 'g_c') ga.set_block_cyclic(g_c, block_size) if not ga.allocate(g_c): ga.error('create failed') block_size = [128,128] g_b = ga.create_handle() ga.set_data(g_b, (num_k,num_n), ga.C_DBL) ga.set_array_name(g_b, 'g_b') ga.set_block_cyclic(g_b, block_size) if not ga.allocate(g_b): ga.error('create failed') block_size = [128,128] g_a = ga.create_handle() ga.set_data(g_a, (num_m,num_k), ga.C_DBL) ga.set_array_name(g_a, 'g_a') ga.set_block_cyclic(g_a, block_size) if not ga.allocate(g_a): ga.error('create failed') else: g_a = ga.create(ga.C_DBL, (num_m,num_k), 'g_a') g_b = ga.create(ga.C_DBL, (num_k,num_n), 'g_b') g_c = ga.create(ga.C_DBL, (num_m,num_n), 'g_c') for handle in [g_a,g_b,g_c]: if 0 == handle: ga.error('create failed') # initialize matrices A and B if 0 == me: load_ga(g_a, h0, num_m, num_k) load_ga(g_b, h0, num_k, num_n) ga.zero(g_c) ga.sync() if 0 == me: print '\nMatrix Multiplication C = A[%d,%d] x B[%d,%d]\n' % ( num_m, num_k, num_k, num_n) print ' %4s %12s %12s %7s %7s'%( "Run#", "Time (seconds)", "mflops/proc", "A trans", "B trans") avg_t[:] = 0 avg_mf[:] = 0 for itime in range(ntimes): for i in range(ntrans): ga.sync() ta = transa[i] tb = transb[i] t1 = time.time() ga.gemm(ta,tb,num_m,num_n,num_k,1,g_a,g_b,0,g_c) t1 = time.time() - t1 if 0 == me: mf = 2*num_m*num_n*num_k/t1*10**-6/nproc avg_t[i] += t1 avg_mf[i] += mf print ' %4d %12.4f %12.1f %7s %7s'%( itime+1, t1, mf, ta, tb) if VERIFY and itime == 0: verify_ga_gemm(ta, tb, num_m, num_n, num_k, 1.0, g_a, g_b, 0.0, g_c) if 0 == me: print '' for i in range(ntrans): print 'Average: %12.4f seconds %12.1f mflops/proc %s %s'%( avg_t[i]/ntimes, avg_mf[i]/ntimes, transa[i], transb[i]) if VERIFY: print 'All ga.gemms are verified...O.K.'
def verify_using_np(g_a, g_b, g_c): a = ga.get(g_a) b = ga.get(g_b) c = ga.get(g_c) v = np.dot(a,b) val = int(np.abs(np.sum(c-v))>0.0001) val = ga.gop_add(val) return val == 0 if __name__ == '__main__': if nproc > MULTIPLIER**3: if 0 == me: print "You must use less than %s processors" % (MULTIPLIER**3+1) else: g_a = ga.create(ga.C_DBL, [N,N]) g_b = ga.create(ga.C_DBL, [N,N]) g_c = ga.create(ga.C_DBL, [N,N]) # put some fake data into input arrays A and B if me == 0: ga.put(g_a, np.random.random(N*N)) ga.put(g_b, np.random.random(N*N)) ga.sync() if me == 0: print "srumma...", srumma(g_a, g_b, g_c, CHUNK_SIZE, MULTIPLIER) if me == 0: print "done" if me == 0: print "verifying using ga.gemm...", ok = verify_using_ga(g_a, g_b, g_c)
"""Use ga.access() to sum locally per SMP node.""" import mpi4py.MPI from ga4py import ga import numpy as np # Okay, we create the global array g_a = ga.create(ga.C_DBL, (3,4,5,6)) if world_id == 0: ga.put(g_a, np.arange(3*4*5*6)) ga.sync() # You're on your own!
import mpi4py.MPI # initialize Message Passing Interface from ga4py import ga # initialize Global Arrays me = ga.nodeid() def print_distribution(g_a): for i in range(ga.nnodes()): lo, hi = ga.distribution(g_a, i) print "%s lo=%s hi=%s" % (i, lo, hi) # create some arrays g_a = ga.create(ga.C_DBL, (10, 20, 30), chunk=(-1, 20, -1)) g_b = ga.create(ga.C_DBL, (10, 20, 30), chunk=(10, -1, -1)) if not me: print_distribution(g_a) print_distribution(g_b) ga.fill(g_a, 6) ga.copy(g_a, g_b) if not me: buffer = ga.access(g_b) print buffer.shape print buffer
def verify_using_np(g_a, g_b, g_c): a = ga.get(g_a) b = ga.get(g_b) c = ga.get(g_c) v = np.dot(a,b) val = int(np.abs(np.sum(c-v))>0.0001) val = ga.gop_add(val) return val == 0 if __name__ == '__main__': if nproc > MULTIPLIER**3: if 0 == me: print "You must use less than %s processors" % (MULTIPLIER**3+1) else: g_a = ga.create(ga.C_DBL, [N,N]) g_b = ga.create(ga.C_DBL, [N,N]) g_c = ga.create(ga.C_DBL, [N,N]) g_counter = ga.create(ga.C_INT, [1]) ga.zero(g_counter) # put some fake data into input arrays A and B if me == 0: ga.put(g_a, np.random.random(N*N)) ga.put(g_b, np.random.random(N*N)) ga.sync() if me == 0: print "srumma...", srumma(g_a, g_b, g_c, CHUNK_SIZE, MULTIPLIER, g_counter) if me == 0: print "done" if me == 0:
def verify_using_np(g_a, g_b, g_c): a = ga.get(g_a) b = ga.get(g_b) c = ga.get(g_c) v = np.dot(a, b) val = int(np.abs(np.sum(c - v)) > 0.0001) val = ga.gop_add(val) return val == 0 if __name__ == '__main__': if nproc > MULTIPLIER**3: if 0 == me: print "You must use less than %s processors" % (MULTIPLIER**3 + 1) else: g_a = ga.create(ga.C_DBL, [N, N]) g_b = ga.create(ga.C_DBL, [N, N]) g_c = ga.create(ga.C_DBL, [N, N]) g_counter = ga.create(ga.C_INT, [1]) ga.zero(g_counter) # put some fake data into input arrays A and B if me == 0: ga.put(g_a, np.random.random(N * N)) ga.put(g_b, np.random.random(N * N)) ga.sync() if me == 0: print "srumma...", srumma(g_a, g_b, g_c, CHUNK_SIZE, MULTIPLIER, g_counter) if me == 0: print "done" if me == 0:
def verify_using_np(g_a, g_b, g_c): a = ga.get(g_a) b = ga.get(g_b) c = ga.get(g_c) v = np.dot(a, b) val = int(np.abs(np.sum(c - v)) > 0.0001) val = ga.gop_add(val) return val == 0 if __name__ == '__main__': if nproc > MULTIPLIER**3: if 0 == me: print "You must use less than %s processors" % (MULTIPLIER**3 + 1) else: g_a = ga.create(ga.C_DBL, [N, N]) g_b = ga.create(ga.C_DBL, [N, N]) g_c = ga.create(ga.C_DBL, [N, N]) # put some fake data into input arrays A and B if me == 0: ga.put(g_a, np.random.random(N * N)) ga.put(g_b, np.random.random(N * N)) ga.sync() if me == 0: print "srumma...", srumma(g_a, g_b, g_c, CHUNK_SIZE, MULTIPLIER) if me == 0: print "done" if me == 0: print "verifying using ga.gemm...", ok = verify_using_ga(g_a, g_b, g_c)
if DEBUG: print_sync(value) return value < EPSILON def convergence_test_L2(g_a, g_b): # compute L2 norm of change # subtract g_b from g_a, results stored in g_b ga.add(g_a, g_b, g_b, beta=-1) # compute elementwise dot product (i.e. treats N-d arrays as vectors) value = ga.dot(g_b, g_b) if DEBUG: print_sync(value) return value < EPSILON # create GA, distribute entire rows g_a = ga.create(ga.C_FLOAT, (dim,dim), chunk=(0,dim)) # create a duplicate GA for the convergence test g_b = ga.duplicate(g_a) # process 0 initializes global array # Note: alternatively, each process could initialize its local data using # ga.access() and ga.distribution() a = np.zeros((dim,dim), dtype=np.float32) if rank == 0: a[0,:] = 100 #top row a[:,0] = 75 #left column a[:,a.shape[0] - 1] = 50 #right column ga.put(g_a, a) ga.sync() # which piece of array do I own?
import mpi4py.MPI # initialize Message Passing Interface from ga4py import ga # initialize Global Arrays me = ga.nodeid() def print_distribution(g_a): for i in range(ga.nnodes()): lo,hi = ga.distribution(g_a, i) print "%s lo=%s hi=%s" % (i,lo,hi) # create some arrays g_a = ga.create(ga.C_DBL, (10,20,30), chunk=(-1,20,-1)) g_b = ga.create(ga.C_DBL, (10,20,30), chunk=(10,-1,-1)) if not me: print_distribution(g_a) print_distribution(g_b) ga.fill(g_a, 6) ga.copy(g_a,g_b) if not me: buffer = ga.access(g_b) print buffer.shape print buffer
"""Use ga.access() to sum locally per SMP node.""" import mpi4py.MPI from ga4py import ga import numpy as np # Okay, we create the global array g_a = ga.create(ga.C_DBL, (3, 4, 5, 6)) if world_id == 0: ga.put(g_a, np.arange(3 * 4 * 5 * 6)) ga.sync() # You're on your own!
import mpi4py.MPI # initialize Message Passing Interface from ga4py import ga # initialize Global Arrays import numpy as np me = ga.nodeid() nproc = ga.nnodes() def print_distribution(g_a): for i in range(ga.nnodes()): lo,hi = ga.distribution(g_a, i) print "P=%s lo=%s hi=%s" % (i,lo,hi) # create some irregular arrays block = [3,2] map = [0,2,6,0,5] if nproc < np.prod(block): raise ValueError, "ERROR: fewer procs than requested blocks" g_a = ga.create_irreg(ga.C_DBL, [8,10], block, map, "Array A") if not g_a: ga.error("Could not create global array A",g_a) g_b = ga.create(ga.C_INT, (2,3,4,5,6)) if not me: print_distribution(g_a) print_distribution(g_b)
def comp_pi(n, myrank=0, nprocs=1): h = 1.0 / n; s = 0.0; for i in xrange(myrank + 1, n + 1, nprocs): x = h * (i - 0.5); s += 4.0 / (1.0 + x**2); return s * h def prn_pi(pi, PI): message = "pi is approximately %.16f, error is %.16f" print (message % (pi, abs(pi - PI))) nprocs = ga.nnodes() myrank = ga.nodeid() g_pi = ga.create(ga.C_DBL, [1]) one_time = False if len(sys.argv) == 2: n = int(sys.argv[1]) one_time = True while True: if not one_time: if myrank == 0: n = get_n() n = ga.brdcst(n) else: n = ga.brdcst(0) if n == 0: break
def parallel_task(): me = ga.pgroup_nodeid() nproc = ga.pgroup_nnodes() ### print a message from the master of the group g_a = ga.create(ga.C_DBL, (3,4,5)) ga.randomize(g_a)
import mpi4py.MPI # initialize Message Passing Interface from ga4py import ga # initialize Global Arrays me = ga.nodeid() def print_distribution(g_a): for i in range(ga.nnodes()): lo,hi = ga.distribution(g_a, i) print "%s lo=%s hi=%s" % (i,lo,hi) # create some arrays g_a = ga.create(ga.C_DBL, (10,20,30)) g_b = ga.create(ga.C_INT, (2,3,4,5,6)) if not me: print_distribution(g_a) print_distribution(g_b)
import mpi4py.MPI # initialize Message Passing Interface from ga4py import ga # initialize Global Arrays import numpy as np me = ga.nodeid() nproc = ga.nnodes() def print_distribution(g_a): for i in range(ga.nnodes()): lo, hi = ga.distribution(g_a, i) print "P=%s lo=%s hi=%s" % (i, lo, hi) # create some irregular arrays block = [3, 2] map = [0, 2, 6, 0, 5] if nproc < np.prod(block): raise ValueError, "ERROR: fewer procs than requested blocks" g_a = ga.create_irreg(ga.C_DBL, [8, 10], block, map, "Array A") if not g_a: ga.error("Could not create global array A", g_a) g_b = ga.create(ga.C_INT, (2, 3, 4, 5, 6)) if not me: print_distribution(g_a) print_distribution(g_b)