def load_seismic_volume(filename, key, dist, use_hdf5): ''' Load the seismic volume, from HDF5 or .dnpy files. ''' # Create context. context = Context() if use_hdf5: print('Loading from .hdf5 file...') da = load_hdf5_distarray(context, filename, key, dist) else: print('Loading from .dnpy files...') da = load_dnpy_distarray(context, filename) # Print some stuff about the array. if False: dump_distarray_info(da) return da
def _transport_pre_plugin_list_run(self): self.n_processes = \ self.exp.meta_data.plugin_list._get_n_processing_plugins() self.context = Context(targets=self.targets) closing(self.context).__enter__()
def _transport_initialise(self, options): # self.exp is not available here MPI_setup(options) # change this? with closing(Context()) as context: self.targets = context.targets # set mpi logging here?
class DistArrayTransport(TransportControl): def __init__(self): self.targets = None self.context = None self.n_processes = None self.count = None self.history = [] def _transport_initialise(self, options): # self.exp is not available here MPI_setup(options) # change this? with closing(Context()) as context: self.targets = context.targets # set mpi logging here? def _transport_pre_plugin_list_run(self): self.n_processes = \ self.exp.meta_data.plugin_list._get_n_processing_plugins() self.context = Context(targets=self.targets) closing(self.context).__enter__() def _transport_pre_plugin(self): # store all datasets and associated patterns self.__update_history(self.exp.index) self.__distribute_arrays(self.exp.index) def _transport_post_plugin(self): # if you wish to output datasets that have been removed from the index # then do that here (data.remove is True) pass def _transport_post_plugin_list_run(self): # convert distarrays to hdf5 for data in self.exp.index['in_data'].values(): name = data.get_name() fname = self.exp.meta_data.get('filename')[name] gname = self.exp.meta_data.get('group_name')[name] data.data.context.save_hdf5(fname, data.data, gname, mode='w') self.exp._get_experiment_collection()['saver_plugin']\ ._open_read_only(data, fname, gname) closing(self.context).__exit__() def __update_history(self, data_index): for dtype, data_dict in data_index.iteritems(): for name, dobj in data_dict.iteritems(): pattern = dobj._get_plugin_data().get_pattern() self.history.append({name: pattern}) def __distribute_arrays(self, data_index): if not self.history: self.__load_data_from_hdf5(data_index['in_data']) # expand this later for other types (or first set should always be treated as hdf5 dataset?) # - i.e. get data as before directly from file and output to distributed array else: self.__redistribute_data(data_index['in_data']) self.__create_out_data(data_index['out_data']) def __redistribute_data(self, data_list): """ Calculate the pattern distributions and if they are not the same\ redistribute. """ for data in data_list.values(): patterns = self.__get_distribution_history(data.get_name()) if patterns[0] != patterns[1]: temp = data.data.toarray() # *** temporarily creating ndarray # distarray (create empty dist array and populate?) distribution = \ Distribution(self.context, data.get_shape(), patterns[-1]) # currently redundant data.data = self.context.fromarray(temp, patterns[-1]) def __load_data_from_hdf5(self, data_list): ''' Create a distarray from the specified section of the HDF5 file. ''' for data in data_list: input_file = data.backing_file.filename dist = self.__calculate_distribution( data._get_plugin_data().get_pattern()) distribution = \ Distribution(self.context, data.get_shape(), dist=dist) data.data = self.context.load_hdf5( input_file, distribution=distribution, key=data.name) def __create_out_data(self, out_data): for data in out_data.values(): dist = self.__calculate_distribution( data._get_plugin_data().get_pattern()) dist = Distribution(self.context, data.get_shape(), dist) data.data = self.context.zeros(dist, dtype=np.int32) def __get_distribution_history(self, name): hist = [i for i in range(len(self.history)) if self.history[i].keys()[0] == name][-2:] return [self.__calculate_distribution( self.history[p].values()[0]) for p in hist] def __calculate_distribution(self, pattern): core_dirs = pattern.values()[0]['core_dir'] slice_dirs = pattern.values()[0]['slice_dir'] nDims = len(core_dirs + slice_dirs) dist = ['n']*nDims for sl in slice_dirs: dist[sl] = 'b' return ''.join(dist) def _transport_process(self, plugin): #self.distributed_process(self.process, plugin) print self.testing pickler.dump(self) self.distributed_process() def distributed_process(self, kernel): self.context.register(kernel) iters_key = \ self.context.apply(self.local_process, (), {'kernel': kernel}) return iters_key def local_process(frames, output, params, kernel): from distarray.localapi import LocalArray recon = kernel(frames, output, params) res = LocalArray(output.distribution, buf=recon) return proxyize(res) # noqa def testing(self): print "running the testing function"
def cli(cmd): """ Process command line arguments, set default params, and do_julia_runs. Parameters ---------- cmd : list of str sys.argv """ parser = argparse.ArgumentParser(description=__doc__) parser.add_argument('resolution_list', metavar='N', type=int, nargs='+', help="resolutions of the Julia set to benchmark (NxN)") parser.add_argument("-r", "--repeat", type=int, dest='repeat_count', default=3, help=("number of repetitions of each unique parameter " "set, default: 3")) parser.add_argument("-o", "--output-filename", type=str, dest='output_filename', default='out.json', help=("filename to write the json data to.")) parser.add_argument("-k", "--kernel", type=str, default='fancy', choices=("fancy", "numpy", "cython"), help=("kernel to use for computation. " "Options are 'fancy', 'numpy', or 'cython'.")) parser.add_argument( "-s", "--scaling", type=str, default="strong", choices=("strong", "weak"), help=("Kind of scaling test. Options are 'strong' or 'weak'")) args = parser.parse_args() ## Default parameters with closing(Context()) as context: # use all available targets engine_count_list = list(range(1, len(context.targets) + 1)) dist_list = ['bn', 'cn', 'bb', 'cc'] c_list = [complex(-0.045, 0.45)] # This Julia set has many points inside # needing all iterations. re_ax = (-1.5, 1.5) im_ax = (-1.5, 1.5) z_max = 2.0 n_max = 100 fn_from_kernel = { 'fancy': fancy_numpy_julia_calc, 'numpy': numpy_julia_calc } if args.kernel == 'cython': from kernel import cython_julia_calc fn_from_kernel['cython'] = cython_julia_calc results = do_julia_runs(args.repeat_count, engine_count_list, dist_list, args.resolution_list, c_list, re_ax, im_ax, z_max, n_max, output_filename=args.output_filename, kernel=fn_from_kernel[args.kernel], scaling=args.scaling)
def do_julia_runs(repeat_count, engine_count_list, dist_list, resolution_list, c_list, re_ax, im_ax, z_max, n_max, output_filename, kernel=fancy_numpy_julia_calc, scaling="strong"): """Perform a series of Julia set calculations, and print the results. Loop over all parameter lists. Parameters ---------- repeat_count : int Number of times to repeat each unique parameter set. Later we can take the average or minimum of these values to reduce noise in the output. engine_count_list : list of int List of numbers of engines to test. Example: list(range(1, 5)) dist_list : list of 2-element sequences List of distribution types to test. Example: ['bn', 'cn', 'bb', 'cc'] resolution_list = list of int List of resolutions of Julia set to test. c_list : list of complex Constants to use to compute Julia set. Example: [complex(-0.045, 0.45)] re_ax : 2-tuple of float Min and max for real axis. im_ax : 2-tuple of float Min and max for imaginary axis. z_max : float Size of number that we consider as going off to infinity. I think that 2.0 is sufficient to be sure that the point will escape. n_max : int Maximum iteration counts. Points in the set will hit this limit, so increasing this has a large effect on the run-time. output_filename : str kernel : function Kernel to use for computation of the Julia set. Options are 'fancy', 'numpy', or 'cython'. scaling: str, either "strong" or "weak" """ max_engine_count = max(engine_count_list) with closing(Context()) as context: # Check that we have enough engines available. num_engines = len(context.targets) if max_engine_count > num_engines: msg = 'Require %d engines, but only %d are available.' % ( max_engine_count, num_engines) raise ValueError(msg) # Loop over everything and time the calculations. results = [] hdr = (('Start', 'End', 'Dist', 'Resolution', 'c', 'Engines', 'Iters')) print("(n/n_runs: time)", hdr) # progress stats n_regular_runs = repeat_count * (len(resolution_list) * len(c_list) * len(engine_count_list) * len(dist_list)) n_numpy_runs = repeat_count * (len(resolution_list) * len(c_list)) n_runs = n_regular_runs + n_numpy_runs prog_fmt = "({:d}/{:d}: {:0.3f}s)" n = 0 for i in range(repeat_count): for resolution in resolution_list: dimensions = (resolution, resolution) for c in c_list: with closing(Context(targets=[0])) as context: # numpy julia run complex_plane = create_complex_plane( context, dimensions, 'bn', re_ax, im_ax) result = do_julia_run(context, 'numpy', dimensions, c, complex_plane, z_max, n_max, benchmark_numpy=True, kernel=kernel) results.append({h: r for h, r in zip(hdr, result)}) n += 1 print(prog_fmt.format(n, n_runs, result[1] - result[0]), result) for engine_count in engine_count_list: if scaling == "weak": factor = sqrt(engine_count) dimensions = (int(floor(resolution * factor)), ) * 2 for dist in dist_list: targets = list(range(engine_count)) with closing(Context(targets=targets)) as context: context.register(kernel) complex_plane = create_complex_plane( context, dimensions, dist, re_ax, im_ax) result = do_julia_run(context, dist, dimensions, c, complex_plane, z_max, n_max, benchmark_numpy=False, kernel=kernel) results.append({h: r for h, r in zip(hdr, result)}) n += 1 print( prog_fmt.format(n, n_runs, result[1] - result[0]), result) with open(output_filename, 'wt') as fp: json.dump(results, fp, sort_keys=True, indent=4, separators=(',', ': ')) return results
""" Script to test launching an MPI-only client. $ mpiexec -np <np> python launch_mpi.py If exits cleanly, then everything is fine. If exits with an error code, then there's a problem. """ from __future__ import print_function from distarray.globalapi import Context, Distribution import numpy as np c = Context(kind='MPI') fmt = lambda s: "{:.<25s}:".format(s) print(fmt("Context"), c) print(fmt("targets"), c.targets) if __name__ == '__main__': size = len(c.targets) * 100 print(fmt("size"), size) dist = Distribution(c, (size,)) print(fmt("Distribution"), dist) da = c.ones(dist, dtype=np.int64) print(fmt("DistArray"), da) factor = 2 db = da * factor print(fmt("DistArray"), db)
# --------------------------------------------------------------------------- # Copyright (C) 2008-2014, IPython Development Team and Enthought, Inc. # Distributed under the terms of the BSD License. See COPYING.rst. # --------------------------------------------------------------------------- """ Estimate pi using a Monte Carlo method with distarray. """ from __future__ import division, print_function from util import timer from distarray.globalapi import Context, Distribution, hypot from distarray.globalapi.random import Random context = Context() random = Random(context) @timer def calc_pi(n): """Estimate pi using distributed NumPy arrays.""" distribution = Distribution(context=context, shape=(n, )) x = random.rand(distribution) y = random.rand(distribution) r = hypot(x, y) mask = (r < 1) return 4 * mask.sum().toarray() / n def main(N):
''' Needs IPython da cluster running: dacluster start -n4 ''' def timeit(method): def timed(*args, **kw): ts = time.time() result = method(*args, **kw) te = time.time() print 'Time: %2.6f sec' % (te-ts) return result return timed context = Context() @timeit def task_np(arr): return (np.sin(arr) + np.cos(arr)).sum(axis=1) / arr.sum(axis=2) @timeit def task_da(arr): return (da.sin(arr) + da.cos(arr)).sum(axis=1) / arr.sum(axis=2) N = 400 np_arr = np.random.random_sample(size=(N,N,N))