import mpi4py.MPI import ga import numpy as np me = ga.nodeid() nproc = ga.nnodes() def parallel_task(): me = ga.pgroup_nodeid() nproc = ga.pgroup_nnodes() ### print a message from the master of the group g_a = ga.create(ga.C_DBL, (3, 4, 5)) ga.randomize(g_a) ### sum the g_a and print the sum ### -OR- do something else with g_a... midproc = nproc // 2 ### assign to 'proclist_first' the first half of the process range ### assign to 'proclist_last' the last half of the process range ### create the 'group_id_first' process group ### create the 'group_id_last' process group if me in proclist_first: ### set the default group to 'group_id_first' parallel_task() ### reset the default group to the world group ### synchronize if me in proclist_last: ### set the default group to 'group_id_last' parallel_task()
"""A way over-simplified SRUMMA matrix multiplication implementation. Assumes square matrices with the shape as a multiple of the block size. """ import mpi4py.MPI import ga import numpy as np CHUNK_SIZE = 256 MULTIPLIER = 3 N = CHUNK_SIZE*MULTIPLIER me = ga.nodeid() nproc = ga.nnodes() class Task(object): def __init__(self, alo, ahi, blo, bhi, clo, chi): self.alo = alo self.ahi = ahi self.blo = blo self.bhi = bhi self.clo = clo self.chi = chi def __repr__(self): return "Task(%s,%s,%s,%s,%s,%s)" % ( self.alo, self.ahi, self.blo, self.bhi, self.clo, self.chi) def get_task_list(chunk_size, multiplier): count = 0 task_list = [None]*multiplier**3
def print_distribution(g_a): for i in range(ga.nnodes()): lo,hi = ga.distribution(g_a, i) print "%s lo=%s hi=%s" % (i,lo,hi)
"""Use ga.access() to sum locally per SMP node.""" import mpi4py.MPI import ga import numpy as np world_id = ga.nodeid() world_nproc = ga.nnodes() node_id = ga.cluster_nodeid() node_nproc = ga.cluster_nprocs(node_id) node_me = ga.cluster_procid(node_id,ga.nodeid()) g_a = ga.create(ga.C_DBL, (3,4,5,6)) if world_id == 0: ga.put(g_a, np.arange(3*4*5*6)) ga.sync() if node_me == 0: sum = 0 for i in range(node_nproc): smp_neighbor_world_id = ga.cluster_procid(node_id,i) buffer = ga.access(g_a, proc=smp_neighbor_world_id) sum += np.sum(buffer) print sum
Multiplication of two square matrices with randomly generated contents. """ import mpi4py.MPI import ga import numpy as np NDIM = 2 TOTALELEMS = 1007 MAXPROC = 128 NBUF = 4 TOLERANCE = 0.1 me = ga.nodeid() nprocs = ga.nnodes() def verify(g_a, g_b, g_c): g_chk = ga.duplicate(g_a, "array check") if not g_chk: ga.error("duplicate failed") ga.sync() ga.gemm(False, False, TOTALELEMS, TOTALELEMS, TOTALELEMS, 1.0, g_a, g_b, 0.0, g_chk) ga.sync() ga.add(g_c, g_chk, g_chk, 1.0, -1.0) rchk = ga.dot(g_chk, g_chk) if not me:
def print_distribution(g_a): for i in range(ga.nnodes()): lo,hi = ga.distribution(g_a, i) print "P=%s lo=%s hi=%s" % (i,lo,hi)
""" transpose of 1-d array. e.g. (1 2 3 4 5 6 7 8 9 10) => (10 9 8 7 6 5 4 3 2 1) """ import mpi4py.MPI import ga import numpy as np # Find local processor ID and number of processors. me = ga.nodeid() nprocs = ga.nnodes() TOTALELEMS = 197 def verify(g_a, g_b): a = ga.get(g_a) b = ga.get(g_b) if not np.all(a[::-1] == b): print "Mismatch: a[::-1] is not equal to b" ga.error("verify failed") print "Transpose OK" def TRANSPOSE1D(): # Configure array dimensions. Force an unequal data distribution. dims = [nprocs*TOTALELEMS + nprocs/2] chunk = [TOTALELEMS] # minimum data on each process # create a global array g_a and duplicate it to get g_b g_a = ga.create(ga.C_INT, dims, "array A", chunk) if not g_a: ga.error("create failed: A")
"""Use ga.access() to sum locally per SMP node.""" import mpi4py.MPI import ga import numpy as np world_id = ga.nodeid() world_nproc = ga.nnodes() node_id = ga.cluster_nodeid() node_nproc = ga.cluster_nprocs(node_id) node_me = ga.cluster_procid(node_id, ga.nodeid()) g_a = ga.create(ga.C_DBL, (3, 4, 5, 6)) if world_id == 0: ga.put(g_a, np.arange(3 * 4 * 5 * 6)) ga.sync() if node_me == 0: sum = 0 for i in range(node_nproc): smp_neighbor_world_id = ga.cluster_procid(node_id, i) buffer = ga.access(g_a, proc=smp_neighbor_world_id) sum += np.sum(buffer) print sum
import mpi4py.MPI # initialize Message Passing Interface import ga # initialize Global Arrays print "hello from %s out of %s" % (ga.nodeid(),ga.nnodes())