import numpy as np from numpy.fft import fft2, ifft2 from math import ceil, fabs from mpi4py import MPI from parutils import pprint #============================================================================= # Main size = 10000 # lengt of vector v iter = 20 # number of iterations to run comm = MPI.COMM_WORLD pprint("============================================================================") pprint(" Running %d parallel MPI processes" % comm.size) my_size = size // comm.size # Every process computes a vector of lenth *my_size* size = comm.size*my_size # Make sure size is a integer multiple of comm.size my_offset = comm.rank*my_size # This is the complete vector vec = np.zeros(size) # Every element zero... vec[0] = 1.0 # ... besides vec[0] # Create my (local) slice of the matrix my_M = np.zeros((my_size, size)) for i in xrange(my_size): j = (my_offset+i-1) % size my_M[i,j] = 1.0
#!/usr/bin/env python from __future__ import division from __future__ import print_function import numpy as np from mpi4py import MPI from parutils import pprint comm = MPI.COMM_WORLD pprint("-" * 78) pprint(" Running on %d cores" % comm.size) pprint("-" * 78) comm.Barrier() # Prepare a vector of N=5 elements to be broadcasted... N = 5 if comm.rank == 0: A = np.arange(N, dtype=np.float64) # rank 0 has proper data else: A = np.empty(N, dtype=np.float64) # all other just an empty array # Broadcast A from rank 0 to everybody comm.Bcast([A, MPI.DOUBLE]) # Everybody should now have the same... print("[%02d] %s" % (comm.rank, A))
import sys sys.path.insert(0, "../pylib") from time import time from mpi4py import MPI import numpy as np from parutils import pprint sizes = [ 2**n for n in xrange(1,24) ] runs = 50 comm = MPI.COMM_WORLD pprint("Benchmarking Reduce performance on %d parallel MPI processes..." % comm.size) pprint() pprint("%15s | %12s | %12s" % ("Size (bytes)", "Time (msec)", "Bandwidth (MiBytes/s)")) for s in sizes: data = np.ones(s) res = np.empty_like(data) comm.Barrier() t_min = np.inf for i in xrange(runs): t0 = time() comm.Reduce( [data, MPI.DOUBLE], [res, MPI.DOUBLE] ) t = time()-t0 t_min = min(t, t_min)
rem = (right - left) % comm.size if (rem != 0): right += comm.size - rem images = pyfits.getdata(in_fname)[left:right, :, :] #h5in.root.images image_count, height, width = images.shape image_count = min(image_count, 200) # rem = image_count % comm.size # if (image_count % comm.size != 0): # extra_col = comm.size - rem # pprint("image_count % comm.size != 0") # sys.exit(1) pprint( "============================================================================" ) pprint(" Running %d parallel MPI processes" % comm.size) pprint(" Reading images from '%s'" % in_fname) pprint(" Processing %d images of size %d x %d" % (image_count, width, height)) pprint(" Writing transformed images into '%s'" % out_fname) # Prepare convolution kernel in frequency space kernel_ = np.ones((height, width)) # rank 0 needs buffer space to gather data if comm.rank == 0: gbuf = np.empty((comm.size, height, width)) origin_header = pyfits.open(in_fname)[0].header new_images = np.zeros((image_count, height, width)) else:
from mpi4py import MPI from bernstein.utils import autotable from parutils import pprint #============================================================================= # Main comm = MPI.COMM_WORLD in_fname = sys.argv[-2] out_fname = sys.argv[-1] try: h5in = tables.openFile(in_fname, 'r') except: pprint("Error: Could not open file %s" % in_fname) exit(1) #h5out = autotable.AutoTable(out_fname) # images = h5in.root.images image_count, height, width = images.shape image_count = min(image_count, 200) pprint( "============================================================================" ) pprint(" Running %d parallel MPI processes" % comm.size) pprint(" Reading images from '%s'" % in_fname) pprint(" Processing %d images of size %d x %d" % (image_count, width, height))
#!/usr/bin/env python from __future__ import division import numpy as np from mpi4py import MPI from parutils import pprint comm = MPI.COMM_WORLD pprint("-"*78) pprint(" Running on %d cores" % comm.size) pprint("-"*78) comm.Barrier() # Prepare a vector of N=5 elements to be broadcasted... N = 5 if comm.rank == 0: A = np.arange(N, dtype=np.float64) # rank 0 has proper data else: A = np.empty(N, dtype=np.float64) # all other just an empty array # Broadcast A from rank 0 to everybody comm.Bcast( [A, MPI.DOUBLE] ) # Everybody should now have the same... print "[%02d] %s" % (comm.rank, A)
SOUTH = 1 EAST = 2 WEST = 3 if __name__ == "__main__": comm = MPI.COMM_WORLD mpi_rows = int(np.floor(np.sqrt(comm.size))) mpi_cols = comm.size // mpi_rows if mpi_rows*mpi_cols > comm.size: mpi_cols -= 1 if mpi_rows*mpi_cols > comm.size: mpi_rows -= 1 pprint("="*78 ) pprint("Running %d parallel processes (ranks)" % (comm.size) ) pprint("Creating a %d x %d processor grid..." % (mpi_rows, mpi_cols) ) # Create a 2d cartesian grid with periodic boundary conditions ccomm = comm.Create_cart( (mpi_rows, mpi_cols), periods=(True, True), reorder=True) my_mpi_row, my_mpi_col = ccomm.Get_coords( ccomm.rank ) # Identifiy our neighbours on the grid neigh = [0,0,0,0] neigh[NORTH], neigh[SOUTH] = ccomm.Shift(0, 1) neigh[EAST], neigh[WEST] = ccomm.Shift(1, 1) # Create matrices my_A = np.random.normal(size=(my_N, my_M)).astype(np.float32)
(options, args) = parser.parse_args() # Parse dtype argument if options.dtype == "float32": dtype_str = "np.float32" dtype = np.float32 elif options.dtype == "float64": dtype_str = "np.float64" dtype = np.float64 else: print "[FATAL] Unknown type %s" % options.dtype benches = options.benches.split(",") comm = MPI.COMM_WORLD pprint() pprint("Running %d parallel MPI processes: Results display collective performance" % comm.size) pprint() # Calculate sizes nbytes = options.nbytes * 1024 * 1024 size = nbytes // np.dtype(dtype).itemsize if 'O1' in benches: linear_benchcodes = ( ("x = 1 * a" , 1 , 2 ), ("x = a * a" , 1 , 3 ), ("x = a * b" , 1 , 3 ), ("x = a * b * c" , 2 , 6 ), ("x = a[::2] * b[::2]" , 0.5, 1.5), ("x = np.exp(a)" , 1 , 2 ),
from __future__ import division import sys sys.path.insert(0, "../pylib") import numpy as np from mpi4py import MPI from parutils import pprint #============================================================================= # Main comm = MPI.COMM_WORLD pprint("-"*78) pprint(" Running %d parallel processes..." % comm.size) pprint("-"*78) my_N = 10 + comm.rank my_a = comm.rank * np.ones(my_N) N = comm.allreduce(my_N) #a = np.empty(N) a = comm.gather(my_a) pprint("Gathered array: %s" % a)
(options, args) = parser.parse_args() # Parse dtype argument if options.dtype == "float32": dtype_str = "np.float32" dtype = np.float32 elif options.dtype == "float64": dtype_str = "np.float64" dtype = np.float64 else: print "[FATAL] Unknown type %s" % options.dtype benches = options.benches.split(",") comm = MPI.COMM_WORLD pprint() pprint("Running %d parallel MPI processes: Results display collective performance" % comm.size) pprint() # Calculate sizes runs = int(options.runs) nbytes = options.nbytes * 1024 * 1024 size = nbytes // np.dtype(dtype).itemsize if 'O1' in benches: linear_benchcodes = ( ("x = 1 * a" , 1 , 2 ), ("x = a * a" , 1 , 2 ), ("x = a * b" , 1 , 3 ), ("x = a * b * c" , 2 , 4 ), ("x = a[::2] * b[::2]" , 0.5, 1.5),
#!/usr/bin/env python from __future__ import division from __future__ import print_function import numpy as np from mpi4py import MPI from parutils import pprint comm = MPI.COMM_WORLD pprint("-" * 78) pprint(" Running on %d cores" % comm.size) pprint("-" * 78) my_N = 4 N = my_N * comm.size if comm.rank == 0: A = np.arange(N, dtype=np.float64) else: A = np.empty(N, dtype=np.float64) my_A = np.empty(my_N, dtype=np.float64) # Scatter data into my_A arrays comm.Scatter([A, MPI.DOUBLE], [my_A, MPI.DOUBLE]) pprint("After Scatter:") for r in range(comm.size):
import numpy as np from numpy.fft import fft2, ifft2 from math import ceil, fabs from mpi4py import MPI from parutils import pprint #============================================================================= # Main size = 10000 # lengt of vector v iter = 20 # number of iterations to run comm = MPI.COMM_WORLD pprint( "============================================================================" ) pprint(" Running %d parallel MPI processes" % comm.size) my_size = size // comm.size # Every process computes a vector of lenth *my_size* size = comm.size * my_size # Make sure size is a integer multiple of comm.size my_offset = comm.rank * my_size # This is the complete vector vec = np.zeros(size) # Every element zero... vec[0] = 1.0 # ... besides vec[0] # Create my (local) slice of the matrix my_M = np.zeros((my_size, size)) for i in xrange(my_size): j = (my_offset + i - 1) % size
#!/usr/bin/env python from __future__ import division import numpy as np from mpi4py import MPI from parutils import pprint comm = MPI.COMM_WORLD pprint("-"*78) pprint(" Running on %d cores" % comm.size) pprint("-"*78) my_N = 4 N = my_N * comm.size if comm.rank == 0: A = np.arange(N, dtype=np.float64) else: A = np.empty(N, dtype=np.float64) my_A = np.empty(my_N, dtype=np.float64) # Scatter data into my_A arrays comm.Scatter( [A, MPI.DOUBLE], [my_A, MPI.DOUBLE] ) pprint("After Scatter:") for r in xrange(comm.size): if comm.rank == r:
from mpi4py import MPI from bernstein.utils import autotable from parutils import pprint #============================================================================= # Main comm = MPI.COMM_WORLD in_fname = sys.argv[-2] out_fname = sys.argv[-1] try: h5in = tables.openFile(in_fname, 'r') except: pprint("Error: Could not open file %s" % in_fname) exit(1) #h5out = autotable.AutoTable(out_fname) # images = h5in.root.images image_count, height, width = images.shape image_count = min(image_count, 200) pprint("============================================================================") pprint(" Running %d parallel MPI processes" % comm.size) pprint(" Reading images from '%s'" % in_fname) pprint(" Processing %d images of size %d x %d" % (image_count, width, height)) pprint(" Writing whitened images into '%s'" % out_fname)