def test_dist_sizes(self): dist = Distribution(self.context, (2, 3, 4), dist=('n', 'b', 'c')) ddpr = dist.get_dim_data_per_rank() shapes = metadata_utils.shapes_from_dim_data_per_rank(ddpr) if self.context.nengines == 4: self.assertEqual(shapes, [(2, 2, 2), (2, 2, 2), (2, 1, 2), (2, 1, 2)])
def create_complex_plane(context, resolution, dist, re_ax, im_ax): """Create a DistArray containing points on the complex plane. Parameters ---------- context : DistArray Context resolution : 2-tuple The number of points along Re and Im axes. dist : 2-element sequence or dict dist_type for of the DistArray Distribution. re_ax : 2-tuple The (lower, upper) range of the Re axis. im_ax : 2-tuple The (lower, upper) range of the Im axis. """ import numpy as np from kernel import fill_complex_plane # Create an empty distributed array. distribution = Distribution(context, (resolution[0], resolution[1]), dist=dist) complex_plane = context.empty(distribution, dtype=np.complex64) context.apply(fill_complex_plane, (complex_plane.key, re_ax, im_ax, resolution)) return complex_plane
def setUpClass(cls): # raise a skipTest if plotting import fails # (because matplotlib isn't installed, probably) cls.plt = import_or_skip("distarray.plotting") super(TestPlotting, cls).setUpClass() cls.da = Distribution(cls.context, (64, 64)) cls.arr = cls.context.ones(cls.da)
def calc_pi(n): """Estimate pi using distributed NumPy arrays.""" distribution = Distribution(context=context, shape=(n, )) x = random.rand(distribution) y = random.rand(distribution) r = hypot(x, y) mask = (r < 1) return 4 * mask.sum().toarray() / n
def test_c_size(self): dim_dict = {'dist_type': 'c', 'size': 42, 'proc_grid_size': 2, 'proc_grid_rank': 0, 'start': 0} dist = Distribution.from_global_dim_data(self.context, (dim_dict,)) ddpr = dist.get_dim_data_per_rank() shapes = metadata_utils.shapes_from_dim_data_per_rank(ddpr) self.assertEqual(shapes, [(21,), (21,)])
def __load_data_from_hdf5(self, data_list): ''' Create a distarray from the specified section of the HDF5 file. ''' for data in data_list: input_file = data.backing_file.filename dist = self.__calculate_distribution( data._get_plugin_data().get_pattern()) distribution = \ Distribution(self.context, data.get_shape(), dist=dist) data.data = self.context.load_hdf5( input_file, distribution=distribution, key=data.name)
def __redistribute_data(self, data_list): """ Calculate the pattern distributions and if they are not the same\ redistribute. """ for data in data_list.values(): patterns = self.__get_distribution_history(data.get_name()) if patterns[0] != patterns[1]: temp = data.data.toarray() # *** temporarily creating ndarray # distarray (create empty dist array and populate?) distribution = \ Distribution(self.context, data.get_shape(), patterns[-1]) # currently redundant data.data = self.context.fromarray(temp, patterns[-1])
def load_hdf5_distarray(context, filename, key, dist): ''' Create a distarray from the specified section of the HDF5 file. ''' # Filename for load_hdf5() needs the full path. pathname = os.path.abspath(filename) # Get array shape. print('Getting array shape...') array_shape = get_hdf5_dataset_shape(pathname, key) # Create distribution. print('Creating distribution...') distribution = Distribution(context, array_shape, dist=dist) # Load HDF5 file into DistArray. print('Loading HDF5 file...') distarray = context.load_hdf5(filename=pathname, distribution=distribution, key=key) print('Loaded.') return distarray
def __create_out_data(self, out_data): for data in out_data.values(): dist = self.__calculate_distribution( data._get_plugin_data().get_pattern()) dist = Distribution(self.context, data.get_shape(), dist) data.data = self.context.zeros(dist, dtype=np.int32)
If exits cleanly, then everything is fine. If exits with an error code, then there's a problem. """ from __future__ import print_function from distarray.globalapi import Context, Distribution import numpy as np c = Context(kind='MPI') fmt = lambda s: "{:.<25s}:".format(s) print(fmt("Context"), c) print(fmt("targets"), c.targets) if __name__ == '__main__': size = len(c.targets) * 100 print(fmt("size"), size) dist = Distribution(c, (size,)) print(fmt("Distribution"), dist) da = c.ones(dist, dtype=np.int64) print(fmt("DistArray"), da) factor = 2 db = da * factor print(fmt("DistArray"), db) sum = db.sum().tondarray() print(fmt("sum"), sum) print(fmt("sum == factor * size"), sum == size * factor) assert sum == size * factor
def create_distribution_plot_and_documentation(context, params): """Create an array distribution plot and the related .rst documentation.""" def shape_text(shape): """ Get a text string describing the array shape. """ # Always want to display at least N X M. if len(shape) == 1: shape = (1, shape[0]) shape_labels = ['%d' % (s) for s in shape] shape_text = ' X '.join(shape_labels) return shape_text title = params['title'] labels = params['labels'] shape = params['shape'] grid_shape = params.get('grid_shape', None) text = params.get('text', None) dist = params.get('dist', None) dimdata = params.get('dimdata', None) filename = params.get('filename', None) skip = params.get('skip', False) if skip: return # Create array, either from dist or dimdata. if dist is not None: distribution = Distribution(context, shape, dist=dist, grid_shape=grid_shape) elif dimdata is not None: distribution = Distribution.from_global_dim_data(context, dimdata) else: raise ValueError('Must provide either dist or dimdata.') array = context.empty(distribution) # Fill the array. This is slow but not a real problem here. value = 0.0 if len(shape) == 1: for i in range(shape[0]): array[i] = value value += 1.0 elif len(shape) == 2: for row in range(shape[0]): for col in range(shape[1]): array[row, col] = value value += 1.0 elif len(shape) == 3: for i in range(shape[0]): for j in range(shape[1]): for k in range(shape[2]): array[i, j, k] = value value += 1.0 else: # TODO: Even better would be to generalize this to any dimensions. raise ValueError('Array must be 1, 2, or 3 dimensional.') # Get all process grid coordinates. # This is duplicating work in print_array_documentation(), # but it is needed for the local array plots. def _get_process_coords(local_arr): return local_arr.cart_coords process_coords = context.apply(_get_process_coords, (array.key, ), targets=array.targets) # Plot title and axis labels. plot_title = title + ' ' + shape_text(shape) + '\n' if len(shape) == 1: # add more space for cramped plot. plot_title += '\n' xlabel = 'Axis 1, %s' % (labels[1]) ylabel = 'Axis 0, %s' % (labels[0]) # Documentation title and text description. doc_title = title dist_text = ' X '.join(["'%s'" % (label) for label in labels]) # Choose 'a' vs 'an' appropriately. if title[0] in 'aeiouAEIOU': article = 'an' else: article = 'a' doc_text = 'A (%s) array, with %s %s (%s) distribution over a (%s) process grid.' % ( shape_text(shape), article, title, dist_text, shape_text(array.grid_shape)) if text is not None: doc_text = doc_text + "\n\n" + text # Filenames for array plots. global_plot_filename = filename local_plot_filename = None if global_plot_filename is not None: root, ext = os.path.splitext(global_plot_filename) local_plot_filename = root + '_local' + ext # Create plot. if len(shape) in [1, 2]: plotting.plot_array_distribution( array, process_coords, title=plot_title, xlabel=xlabel, ylabel=ylabel, legend=True, global_plot_filename=global_plot_filename, local_plot_filename=local_plot_filename) else: # Not plottable, avoid writing links to missing plots. global_plot_filename = None local_plot_filename = None # Print documentation. print_array_documentation(context, array, title=doc_title, text=doc_text, global_plot_filename=global_plot_filename, local_plot_filename=local_plot_filename)
def create_distribution_plot_and_documentation(context, params): """Create an array distribution plot and the related .rst documentation.""" def shape_text(shape): """ Get a text string describing the array shape. """ # Always want to display at least N X M. if len(shape) == 1: shape = (1, shape[0]) shape_labels = ['%d' % (s) for s in shape] shape_text = ' X '.join(shape_labels) return shape_text title = params['title'] labels = params['labels'] shape = params['shape'] grid_shape = params.get('grid_shape', None) text = params.get('text', None) dist = params.get('dist', None) dimdata = params.get('dimdata', None) filename = params.get('filename', None) skip = params.get('skip', False) if skip: return # Create array, either from dist or dimdata. if dist is not None: distribution = Distribution(context, shape, dist=dist, grid_shape=grid_shape) elif dimdata is not None: distribution = Distribution.from_global_dim_data(context, dimdata) else: raise ValueError('Must provide either dist or dimdata.') array = context.empty(distribution) # Fill the array. This is slow but not a real problem here. value = 0.0 if len(shape) == 1: for i in range(shape[0]): array[i] = value value += 1.0 elif len(shape) == 2: for row in range(shape[0]): for col in range(shape[1]): array[row, col] = value value += 1.0 elif len(shape) == 3: for i in range(shape[0]): for j in range(shape[1]): for k in range(shape[2]): array[i, j, k] = value value += 1.0 else: # TODO: Even better would be to generalize this to any dimensions. raise ValueError('Array must be 1, 2, or 3 dimensional.') # Get all process grid coordinates. # This is duplicating work in print_array_documentation(), # but it is needed for the local array plots. def _get_process_coords(local_arr): return local_arr.cart_coords process_coords = context.apply(_get_process_coords, (array.key,), targets=array.targets) # Plot title and axis labels. plot_title = title + ' ' + shape_text(shape) + '\n' if len(shape) == 1: # add more space for cramped plot. plot_title += '\n' xlabel = 'Axis 1, %s' % (labels[1]) ylabel = 'Axis 0, %s' % (labels[0]) # Documentation title and text description. doc_title = title dist_text = ' X '.join(["'%s'" % (label) for label in labels]) # Choose 'a' vs 'an' appropriately. if title[0] in 'aeiouAEIOU': article = 'an' else: article = 'a' doc_text = 'A (%s) array, with %s %s (%s) distribution over a (%s) process grid.' % ( shape_text(shape), article, title, dist_text, shape_text(array.grid_shape)) if text is not None: doc_text = doc_text + "\n\n" + text # Filenames for array plots. global_plot_filename = filename local_plot_filename = None if global_plot_filename is not None: root, ext = os.path.splitext(global_plot_filename) local_plot_filename = root + '_local' + ext # Create plot. if len(shape) in [1, 2]: plotting.plot_array_distribution( array, process_coords, title=plot_title, xlabel=xlabel, ylabel=ylabel, legend=True, global_plot_filename=global_plot_filename, local_plot_filename=local_plot_filename) else: # Not plottable, avoid writing links to missing plots. global_plot_filename = None local_plot_filename = None # Print documentation. print_array_documentation( context, array, title=doc_title, text=doc_text, global_plot_filename=global_plot_filename, local_plot_filename=local_plot_filename)