def test_from_global_dim_data_bc(self): """ Test creation of a block-cyclic array. """ rows, cols = 5, 9 global_dim_data = ( # dim 0 { 'dist_type': 'c', 'proc_grid_size': 2, 'size': rows, 'block_size': 2, }, # dim 1 { 'dist_type': 'c', 'proc_grid_size': 2, 'size': cols, 'block_size': 2, }, ) distribution = Distribution.from_global_dim_data( self.context, global_dim_data) distarr = DistArray(distribution, dtype=int) distarr.toarray() las = distarr.get_localarrays() local_shapes = [la.local_shape for la in las] self.assertSequenceEqual(local_shapes, [(3, 5), (3, 4), (2, 5), (2, 4)])
def test_from_global_dim_data_irregular_block(self): bounds = (0, 2, 3, 4, 10) glb_dim_data = ({"dist_type": "b", "bounds": bounds},) distribution = Distribution.from_global_dim_data(self.context, glb_dim_data) distarr = DistArray(distribution, dtype=int) distarr.toarray()
def test___init__(self): shape = (5, 5) distribution = Distribution(self.context, shape, ('b', 'c')) da = DistArray(distribution, dtype=int) da.fill(42) nda = numpy.empty(shape, dtype=int) nda.fill(42) assert_array_equal(da.tondarray(), nda)
def test_from_global_dim_data_irregular_block(self): bounds = (0, 2, 3, 4, 10) glb_dim_data = ({'dist_type': 'b', 'bounds': bounds}, ) distribution = Distribution.from_global_dim_data( self.context, glb_dim_data) distarr = DistArray(distribution, dtype=int) distarr.toarray()
def test___init__(self): shape = (5, 5) distribution = Distribution(self.context, shape, ("b", "c")) da = DistArray(distribution, dtype=int) da.fill(42) nda = numpy.empty(shape, dtype=int) nda.fill(42) assert_array_equal(da.tondarray(), nda)
def test_from_global_dim_data_uu(self): rows = 6 cols = 20 row_ixs = numpy.random.permutation(range(rows)) col_ixs = numpy.random.permutation(range(cols)) row_indices = [row_ixs[: rows // 2], row_ixs[rows // 2 :]] col_indices = [col_ixs[: cols // 4], col_ixs[cols // 4 :]] glb_dim_data = ({"dist_type": "u", "indices": row_indices}, {"dist_type": "u", "indices": col_indices}) distribution = Distribution.from_global_dim_data(self.context, glb_dim_data) distarr = DistArray(distribution, dtype=int) distarr.toarray()
def test_irregular_block_assignment(self): global_dim_data = ({ 'dist_type': 'b', 'bounds': (0, 5), }, { 'dist_type': 'b', 'bounds': (0, 2, 6, 7, 9), }) distribution = Distribution.from_global_dim_data( self.context, global_dim_data) distarr = DistArray(distribution, dtype=int) distarr.toarray()
def test_from_global_dim_data_bu(self): rows = 9 row_break_point = rows // 2 cols = 10 col_indices = numpy.random.permutation(range(cols)) col_break_point = len(col_indices) // 3 indices = [col_indices[:col_break_point], col_indices[col_break_point:]] glb_dim_data = ( {"dist_type": "b", "bounds": (0, row_break_point, rows)}, {"dist_type": "u", "indices": indices}, ) distribution = Distribution.from_global_dim_data(self.context, glb_dim_data) distarr = DistArray(distribution, dtype=int) distarr.toarray()
def load_npy(self, filename, distribution): """ Load a DistArray from a dataset in a ``.npy`` file. Parameters ---------- filename : str Filename to load. distribution: Distribution object Returns ------- result : DistArray A DistArray encapsulating the file loaded. """ def _local_load_npy(filename, ddpr, comm): from distarray.localapi import load_npy if len(ddpr): dim_data = ddpr[comm.Get_rank()] else: dim_data = () return proxyize(load_npy(comm, filename, dim_data)) ddpr = distribution.get_dim_data_per_rank() da_key = self.apply(_local_load_npy, (filename, ddpr, distribution.comm), targets=distribution.targets) return DistArray.from_localarrays(da_key[0], distribution=distribution)
def proxy_func(a, b, *args, **kwargs): context = determine_context(a, b) is_a_dap = isinstance(a, DistArray) is_b_dap = isinstance(b, DistArray) if is_a_dap and is_b_dap: if not a.distribution.is_compatible(b.distribution): raise ValueError("distributions not compatible.") a_key = a.key b_key = b.key distribution = a.distribution elif is_a_dap and numpy.isscalar(b): a_key = a.key b_key = b distribution = a.distribution elif is_b_dap and numpy.isscalar(a): a_key = a b_key = b.key distribution = b.distribution else: raise TypeError("only DistArray or scalars are accepted") def func_call(func_name, a, b, args, kwargs): from distarray.utils import get_from_dotted_name dotted_name = "distarray.localapi.%s" % (func_name,) func = get_from_dotted_name(dotted_name) res = func(a, b, *args, **kwargs) return proxyize(res), res.dtype # noqa res = context.apply(func_call, args=(name, a_key, b_key, args, kwargs), targets=distribution.targets) new_key = res[0][0] dtype = res[0][1] return DistArray.from_localarrays(new_key, distribution=distribution, dtype=dtype)
def test_from_global_dim_data_1d(self): total_size = 40 list_of_indices = [ [29, 38, 18, 19, 11, 33, 10, 1, 22, 25], [5, 15, 34, 12, 16, 24, 23, 39, 6, 36], [0, 7, 27, 4, 32, 37, 21, 26, 9, 17], [35, 14, 20, 13, 3, 30, 2, 8, 28, 31], ] glb_dim_data = ({"dist_type": "u", "indices": list_of_indices},) distribution = Distribution.from_global_dim_data(self.context, glb_dim_data) distarr = DistArray(distribution, dtype=int) for i in range(total_size): distarr[i] = i localarrays = distarr.get_localarrays() for i, arr in enumerate(localarrays): assert_allclose(arr, list_of_indices[i])
def test_from_global_dim_data_bc(self): """ Test creation of a block-cyclic array. """ rows, cols = 5, 9 global_dim_data = ( # dim 0 {"dist_type": "c", "proc_grid_size": 2, "size": rows, "block_size": 2}, # dim 1 {"dist_type": "c", "proc_grid_size": 2, "size": cols, "block_size": 2}, ) distribution = Distribution.from_global_dim_data(self.context, global_dim_data) distarr = DistArray(distribution, dtype=int) distarr.toarray() las = distarr.get_localarrays() local_shapes = [la.local_shape for la in las] self.assertSequenceEqual(local_shapes, [(3, 5), (3, 4), (2, 5), (2, 4)])
def distributed_julia_calc(la, la2, kernel=numpy_julia_calc): context = la.context context.register(kernel) iters_key = context.apply(local_julia_calc, (la.key, la2.key), {'kernel': kernel}) iters_da = DistArray.from_localarrays(iters_key[0], context=context, dtype=np.int32) #return iters_da return iters_da
def distributed_julia_calc(distarray, c, z_max, n_max, kernel=fancy_numpy_julia_calc): """Calculate the Julia set for an array of points in the complex plane. Parameters ---------- distarray : DistArray DistArray of complex values whose iterations we will count. c : complex Complex number to add at each iteration. z_max : float Magnitude of complex value that we assume goes to infinity. n_max : int Maximum number of iterations. kernel: function Kernel to use for computation of the Julia set. Options are 'fancy', 'numpy', or 'cython'. """ context = distarray.context iters_key = context.apply(local_julia_calc, (distarray.key, c, z_max, n_max), {'kernel': kernel}) iters_da = DistArray.from_localarrays(iters_key[0], context=context, dtype=numpy.int32) return iters_da
def _process_local_results(self, results, targets): """Figure out what to return on the Client. Parameters ---------- key : string Key corresponding to wrapped function's return value. Returns ------- Varied A DistArray (if locally all values are LocalArray), a None (if locally all values are None), or else, pull the result back to the client and return it. If all but one of the pulled values is None, return that non-None value only. """ def is_NoneType(pxy): return pxy.type_str == str(type(None)) def is_LocalArray(pxy): return (isinstance(pxy, Proxy) and pxy.type_str == "<class 'distarray.localapi.localarray.LocalArray'>") if all(is_LocalArray(r) for r in results): result = DistArray.from_localarrays(results[0], context=self, targets=targets) elif all(r is None for r in results): result = None else: if has_exactly_one(results): result = next(x for x in results if x is not None) else: result = results return result
def distributed_process(data, output, processes, process, params, kernel=timeseries_correction_set_up): print "IN THE DISTRIBUTED_PROCESS FUNCTION" context = data.context context.register(kernel) iters_key = context.apply(local_process, (data.key, params), {'kernel': kernel}) output = da.from_localarrays(iters_key[0], context=context, dtype=np.int32) return output
def distributed_julia_calc(distarray, c, z_max, n_max, kernel=numpy_julia_calc): context = distarray.context iters_key = context.apply(local_julia_calc, (distarray.key, c, z_max, n_max), {'kernel': kernel}) iters_da = DistArray.from_localarrays(iters_key[0], context=context, dtype=numpy.int32) return iters_da
def test_from_global_dim_data_1d(self): total_size = 40 list_of_indices = [ [29, 38, 18, 19, 11, 33, 10, 1, 22, 25], [5, 15, 34, 12, 16, 24, 23, 39, 6, 36], [0, 7, 27, 4, 32, 37, 21, 26, 9, 17], [35, 14, 20, 13, 3, 30, 2, 8, 28, 31], ] glb_dim_data = ({ 'dist_type': 'u', 'indices': list_of_indices, }, ) distribution = Distribution.from_global_dim_data( self.context, glb_dim_data) distarr = DistArray(distribution, dtype=int) for i in range(total_size): distarr[i] = i localarrays = distarr.get_localarrays() for i, arr in enumerate(localarrays): assert_allclose(arr, list_of_indices[i])
def test_from_global_dim_data_uu(self): rows = 6 cols = 20 row_ixs = numpy.random.permutation(range(rows)) col_ixs = numpy.random.permutation(range(cols)) row_indices = [row_ixs[:rows // 2], row_ixs[rows // 2:]] col_indices = [col_ixs[:cols // 4], col_ixs[cols // 4:]] glb_dim_data = ( { 'dist_type': 'u', 'indices': row_indices }, { 'dist_type': 'u', 'indices': col_indices }, ) distribution = Distribution.from_global_dim_data( self.context, glb_dim_data) distarr = DistArray(distribution, dtype=int) distarr.toarray()
def test_from_global_dim_data_bu(self): rows = 9 row_break_point = rows // 2 cols = 10 col_indices = numpy.random.permutation(range(cols)) col_break_point = len(col_indices) // 3 indices = [ col_indices[:col_break_point], col_indices[col_break_point:] ] glb_dim_data = ( { 'dist_type': 'b', 'bounds': (0, row_break_point, rows) }, { 'dist_type': 'u', 'indices': indices }, ) distribution = Distribution.from_global_dim_data( self.context, glb_dim_data) distarr = DistArray(distribution, dtype=int) distarr.toarray()
def proxy_func(a, *args, **kwargs): context = determine_context(a) def func_call(func_name, arr_name, args, kwargs): from distarray.utils import get_from_dotted_name dotted_name = "distarray.localapi.%s" % (func_name,) func = get_from_dotted_name(dotted_name) res = func(arr_name, *args, **kwargs) return proxyize(res), res.dtype # noqa res = context.apply(func_call, args=(name, a.key, args, kwargs), targets=a.targets) new_key = res[0][0] dtype = res[0][1] return DistArray.from_localarrays(new_key, distribution=a.distribution, dtype=dtype)
def process(self, plugin, in_data, out_data, processes, process, params, kernel): if kernel is "timeseries_correction_set_up": kernel = du.timeseries_correction_set_up elif kernel is "reconstruction_set_up": kernel = du.reconstruction_set_up elif kernel is "filter_set_up": kernel = du.filter_set_up else: print("The kernel", kernel, "has not been registered in dist_array_transport") sys.exit(1) iters_key = du.distributed_process(plugin, in_data, out_data, processes, process, params, kernel) out_data.data = da.from_localarrays(iters_key[0], context=in_data.data.context, dtype=np.int32)
def proxy_func(a, *args, **kwargs): context = determine_context(a) def func_call(func_name, arr_name, args, kwargs): from distarray.utils import get_from_dotted_name dotted_name = 'distarray.localapi.%s' % (func_name, ) func = get_from_dotted_name(dotted_name) res = func(arr_name, *args, **kwargs) return proxyize(res), res.dtype # noqa res = context.apply(func_call, args=(name, a.key, args, kwargs), targets=a.targets) new_key = res[0][0] dtype = res[0][1] return DistArray.from_localarrays(new_key, distribution=a.distribution, dtype=dtype)
def fromfunction(self, function, shape, **kwargs): """Create a DistArray from a function over global indices. Unlike numpy's `fromfunction`, the result of distarray's `fromfunction` is restricted to the same Distribution as the index array generated from `shape`. See numpy.fromfunction for more details. """ self.push_function(function.__name__, function, targets=self.targets) def _local_fromfunction(func_name, comm, ddpr, kwargs): from distarray.localapi import fromfunction from distarray.localapi.maps import Distribution from importlib import import_module main = import_module('__main__') if len(ddpr): dim_data = ddpr[comm.Get_rank()] else: dim_data = () func = getattr(main, func_name) dist = Distribution(comm, dim_data=dim_data) local_arr = fromfunction(func, dist, **kwargs) return proxyize(local_arr) dist = kwargs.get('dist', None) grid_shape = kwargs.get('grid_shape', None) distribution = Distribution(context=self, shape=shape, dist=dist, grid_shape=grid_shape) ddpr = distribution.get_dim_data_per_rank() da_name = self.apply( _local_fromfunction, (function.__name__, distribution.comm, ddpr, kwargs), targets=distribution.targets) return DistArray.from_localarrays(da_name[0], distribution=distribution)
def fromfunction(self, function, shape, **kwargs): """Create a DistArray from a function over global indices. Unlike numpy's `fromfunction`, the result of distarray's `fromfunction` is restricted to the same Distribution as the index array generated from `shape`. See numpy.fromfunction for more details. """ self.push_function(function.__name__, function, targets=self.targets) def _local_fromfunction(func_name, comm, ddpr, kwargs): from distarray.localapi import fromfunction from distarray.localapi.maps import Distribution from importlib import import_module main = import_module('__main__') if len(ddpr): dim_data = ddpr[comm.Get_rank()] else: dim_data = () func = getattr(main, func_name) dist = Distribution(comm, dim_data=dim_data) local_arr = fromfunction(func, dist, **kwargs) return proxyize(local_arr) dist = kwargs.get('dist', None) grid_shape = kwargs.get('grid_shape', None) distribution = Distribution(context=self, shape=shape, dist=dist, grid_shape=grid_shape) ddpr = distribution.get_dim_data_per_rank() da_name = self.apply(_local_fromfunction, (function.__name__, distribution.comm, ddpr, kwargs), targets=distribution.targets) return DistArray.from_localarrays(da_name[0], distribution=distribution)
def _create_local(self, local_call, shape_or_dist, dtype): """Creates LocalArrays with the method named in `local_call`.""" def create_local(local_call, ddpr, dtype, comm): from distarray.localapi.maps import Distribution if len(ddpr) == 0: dim_data = () else: dim_data = ddpr[comm.Get_rank()] local_call = eval(local_call) distribution = Distribution(comm=comm, dim_data=dim_data) rval = local_call(distribution=distribution, dtype=dtype) return proxyize(rval) distribution = asdistribution(self, shape_or_dist) ddpr = distribution.get_dim_data_per_rank() args = [local_call, ddpr, dtype, distribution.comm] da_key = self.apply(create_local, args=args, targets=distribution.targets)[0] return DistArray.from_localarrays(da_key, distribution=distribution, dtype=dtype)
def _local_rand_call(self, local_func_name, shape_or_dist, kwargs=None): kwargs = kwargs or {} def _local_call(comm, local_func_name, ddpr, kwargs): import distarray.localapi.random as local_random from distarray.localapi.maps import Distribution local_func = getattr(local_random, local_func_name) if len(ddpr): dim_data = ddpr[comm.Get_rank()] else: dim_data = () dist = Distribution(dim_data=dim_data, comm=comm) return proxyize(local_func(distribution=dist, **kwargs)) distribution = asdistribution(self.context, shape_or_dist) ddpr = distribution.get_dim_data_per_rank() args = (distribution.comm, local_func_name, ddpr, kwargs) da_key = self.context.apply(_local_call, args, targets=distribution.targets) return DistArray.from_localarrays(da_key[0], distribution=distribution)
def load_hdf5(self, filename, distribution, key='buffer'): """ Load a DistArray from a dataset in an ``.hdf5`` file. Parameters ---------- filename : str Filename to load. distribution: Distribution object key : str, optional The identifier for the group to load the DistArray from (the default is 'buffer'). Returns ------- result : DistArray A DistArray encapsulating the file loaded. """ try: import h5py except ImportError: errmsg = "An MPI-enabled h5py must be available to use load_hdf5." raise ImportError(errmsg) def _local_load_hdf5(filename, ddpr, comm, key): from distarray.localapi import load_hdf5 if len(ddpr): dim_data = ddpr[comm.Get_rank()] else: dim_data = () return proxyize(load_hdf5(comm, filename, dim_data, key)) ddpr = distribution.get_dim_data_per_rank() da_key = self.apply(_local_load_hdf5, (filename, ddpr, distribution.comm, key), targets=distribution.targets) return DistArray.from_localarrays(da_key[0], distribution=distribution)
def proxy_func(a, b, *args, **kwargs): context = determine_context(a, b) is_a_dap = isinstance(a, DistArray) is_b_dap = isinstance(b, DistArray) if is_a_dap and is_b_dap: if not a.distribution.is_compatible(b.distribution): raise ValueError("distributions not compatible.") a_key = a.key b_key = b.key distribution = a.distribution elif is_a_dap and numpy.isscalar(b): a_key = a.key b_key = b distribution = a.distribution elif is_b_dap and numpy.isscalar(a): a_key = a b_key = b.key distribution = b.distribution else: raise TypeError('only DistArray or scalars are accepted') def func_call(func_name, a, b, args, kwargs): from distarray.utils import get_from_dotted_name dotted_name = 'distarray.localapi.%s' % (func_name, ) func = get_from_dotted_name(dotted_name) res = func(a, b, *args, **kwargs) return proxyize(res), res.dtype # noqa res = context.apply(func_call, args=(name, a_key, b_key, args, kwargs), targets=distribution.targets) new_key = res[0][0] dtype = res[0][1] return DistArray.from_localarrays(new_key, distribution=distribution, dtype=dtype)
def with_context_and_dtype(self): da = DistArray.from_localarrays(self.distarray.key, context=self.context, dtype=int) assert_array_equal(da.toarray(), self.expected)
def with_distribution_and_dtype(self): da = DistArray.from_localarrays(self.distarray.key, distribution=self.distribution, dtype=int) assert_array_equal(da.toarray(), self.expected)
def with_distribution_and_context(self): with self.assertRaise(RuntimeError): DistArray.from_localarrays(self.distarray.key, context=self.context, distribution=self.distribution)
def load_dnpy(self, name): """ Load a distributed array from ``.dnpy`` files. The ``.dnpy`` file format is a binary format inspired by NumPy's ``.npy`` format. The header of a particular ``.dnpy`` file contains information about which portion of a DistArray is saved in it (using the metadata outlined in the Distributed Array Protocol), and the data portion contains the output of NumPy's `save` function for the local array data. See the module docstring for `distarray.localapi.format` for full details. Parameters ---------- name : str or list of str If a str, this is used as the prefix for the filename used by each engine. Each engine will load a file named ``<name>_<rank>.dnpy``. If a list of str, each engine will use the name at the index corresponding to its rank. An exception is raised if the length of this list is not the same as the context's communicator's size. Returns ------- result : DistArray A DistArray encapsulating the file loaded on each engine. Raises ------ TypeError If `name` is an iterable whose length is different from the context's communicator's size. See Also -------- save_dnpy : Saving files to load with with load_dnpy. """ def _local_load_dnpy(comm, fname_base): from distarray.localapi import load_dnpy fname = "%s_%s.dnpy" % (fname_base, comm.Get_rank()) local_arr = load_dnpy(comm, fname) return proxyize(local_arr) def _local_load_dnpy_names(comm, fnames): from distarray.localapi import load_dnpy fname = fnames[comm.Get_rank()] local_arr = load_dnpy(comm, fname) return proxyize(local_arr) if isinstance(name, six.string_types): func = _local_load_dnpy elif isinstance(name, collections.Sequence): if len(name) != len(self.targets): errmsg = "`name` must be the same length as `self.targets`." raise TypeError(errmsg) func = _local_load_dnpy_names else: errmsg = "`name` must be a string or a list." raise TypeError(errmsg) da_key = self.apply(func, (self.comm, name), targets=self.targets) return DistArray.from_localarrays(da_key[0], context=self)
def distributed_filter(distarray, local_filter): ''' Filter a DistArray, returning a new DistArray. ''' context = distarray.context filtered_key = context.apply(local_filter, (distarray.key, )) filtered_da = DistArray.from_localarrays(filtered_key[0], context=context) return filtered_da
def distributed_filter(distarray, local_filter): ''' Filter a DistArray, returning a new DistArray. ''' context = distarray.context filtered_key = context.apply(local_filter, (distarray.key,)) filtered_da = DistArray.from_localarrays(filtered_key[0], context=context) return filtered_da
def test_irregular_block_assignment(self): global_dim_data = ({"dist_type": "b", "bounds": (0, 5)}, {"dist_type": "b", "bounds": (0, 2, 6, 7, 9)}) distribution = Distribution.from_global_dim_data(self.context, global_dim_data) distarr = DistArray(distribution, dtype=int) distarr.toarray()