def open_dset(filename, dataset_path, distaxes='all'): """Create a pyDive.h5.h5_ndarray instance from file. :param filename: name of hdf5 file. :param dataset_path: path within hdf5 file to a single dataset. :param distaxes ints: distributed axes. Defaults to 'all' meaning each axis is distributed. :return: pyDive.h5.h5_ndarray instance """ fileHandle = h5.File(filename, "r") dataset = fileHandle[dataset_path] dtype = dataset.dtype shape = dataset.shape fileHandle.close() result = h5_ndarray(shape, dtype, distaxes, None, None, True) target_shapes = result.target_shapes() target_offset_vectors = result.target_offset_vectors() view = com.getView() view.scatter("shape", target_shapes, targets=result.target_ranks) view.scatter("offset", target_offset_vectors, targets=result.target_ranks) view.execute("{0} = pyDive.arrays.local.h5_ndarray.h5_ndarray('{1}','{2}',shape=shape[0],offset=offset[0])"\ .format(result.name, filename, dataset_path), targets=result.target_ranks) return result
def GPU_copier(source, dest): view = com.getView() # send view.execute('{0}_send_tasks = interengine.scatterArrayGPU_async({0}, src_commData[0], target2rank)'\ .format(source.name), targets=source.target_ranks) # receive view.execute("""\ {0}_recv_tasks, {0}_recv_bufs = interengine.gatherArraysGPU_async({1}, dest_commData[0], target2rank) """.format(source.name, dest.name),\ targets=dest.target_ranks) # finish communication view.execute('''\ if "{0}_send_tasks" in locals(): MPI.Request.Waitall({0}_send_tasks) del {0}_send_tasks if "{0}_recv_tasks" in locals(): MPI.Request.Waitall({0}_recv_tasks) interengine.finish_GPUcommunication({1}, dest_commData[0], {0}_recv_bufs) del {0}_recv_tasks, {0}_recv_bufs '''.format(source.name, dest.name), targets=tuple(set(source.target_ranks + dest.target_ranks)))
def open_variable(filename, variable_path, distaxis=0): """Create a pyDive.adios.ad_ndarray instance from file. :param filename: name of adios file. :param variable_path: path within adios file to a single variable. :param distaxis int: distributed axis :return: pyDive.adios.ad_ndarray instance """ fileHandle = ad.file(filename) variable = fileHandle.var[variable_path] dtype = variable.type shape = tuple(variable.dims) fileHandle.close() result = ad_ndarray(shape, dtype, distaxis, None, None, True) target_shapes = result.target_shapes() target_offset_vectors = result.target_offset_vectors() view = com.getView() view.scatter("shape", target_shapes, targets=result.target_ranks) view.scatter("offset", target_offset_vectors, targets=result.target_ranks) view.execute("{0} = pyDive.arrays.local.ad_ndarray.ad_ndarray('{1}','{2}',shape=shape[0],offset=offset[0])"\ .format(result.name, filename, variable_path), targets=result.target_ranks) return result
def to_cpu(self): """Copy array data to cpu main memory. :result pyDive.ndarray: distributed cpu array. """ result = pyDive.arrays.ndarray.hollow_like(self) view = com.getView() view.execute("{0} = {1}.to_cpu()".format(result.name, self.name), targets=result.target_ranks) return result
def test_cloned_ndarray(init_pyDive): view = com.getView() for size in sizes: ref_array = np.arange(np.prod(size)) test_array = pyDive.cloned.empty(size, dtype=np.int) test_array[:] = ref_array assert np.array_equal(ref_array * len(view), test_array.sum())
def load(self): """Load array from file into main memory of all engines in parallel. :return: pyDive.ndarray instance """ result = hollow_like(self) view = com.getView() view.execute("{0} = {1}.load()".format(result.name, self.name), targets=result.target_ranks) return result
def factory_like_wrapper(factory_name, other, kwargs): result = arraytype(other.shape, other.dtype, other.distaxes, other.target_offsets, other.target_ranks, True, **kwargs) view = com.getView() view.push({'kwargs': kwargs}, targets=result.target_ranks) view.execute("{0} = {1}({2}, **kwargs)".format(result.name, factory_name, other.name), targets=result.target_ranks) return result
def factory_wrapper(factory_name, shape, dtype, distaxes, kwargs): result = arraytype(shape, dtype, distaxes, None, None, True, **kwargs) target_shapes = result.target_shapes() view = com.getView() view.scatter('target_shape', target_shapes, targets=result.target_ranks) view.push({'kwargs' : kwargs, 'dtype' : dtype}, targets=result.target_ranks) view.execute("{0} = {1}(shape=target_shape[0], dtype=dtype, **kwargs)".format(result.name, factory_name),\ targets=result.target_ranks) return result
def ufunc_wrapper(ufunc_name, args, kwargs): arg0 = args[0] args = [arg.dist_like(arg0) if hasattr(arg, "target_ranks") else arg for arg in args] arg_names = [repr(arg) for arg in args] arg_string = ",".join(arg_names) view = com.getView() result = arg0.__class__(arg0.shape, arg0.dtype, arg0.distaxes, arg0.target_offsets, arg0.target_ranks, no_allocation=True, **arg0.kwargs) view.execute("{0} = {1}({2}); dtype={0}.dtype".format(repr(result), ufunc_name, arg_string), targets=arg0.target_ranks) result.dtype = view.pull("dtype", targets=result.target_ranks[0]) result.nbytes = np.dtype(result.dtype).itemsize * np.prod(result.shape) return result
def array(array_like, distaxes="all"): """Create a pyDive.gpu_ndarray instance from an array-like object. :param array_like: Any object exposing the array interface, e.g. numpy-array, python sequence, ... :param ints distaxis: distributed axes. Defaults to 'all' meaning each axis is distributed. """ result_cpu = pyDive.arrays.ndarray.array(array_like, distaxes) result = hollow_like(result_cpu) view = com.getView() view.execute( "{0} = pyDive.arrays.local.gpu_ndarray.gpu_ndarray_cast(pycuda.gpuarray.to_gpu({1}))".format( repr(result), repr(result_cpu) ), targets=result.target_ranks, ) return result
def factory_wrapper(factory_name, shape, dtype, distaxes, kwargs): result = arraytype(shape, dtype, distaxes, None, None, True, **kwargs) target_shapes = result.target_shapes() view = com.getView() view.scatter('target_shape', target_shapes, targets=result.target_ranks) view.push({ 'kwargs': kwargs, 'dtype': dtype }, targets=result.target_ranks) view.execute("{0} = {1}(shape=target_shape[0], dtype=dtype, **kwargs)".format(result.name, factory_name),\ targets=result.target_ranks) return result
def array(array_like, distaxes='all'): """Create a pyDive.gpu_ndarray instance from an array-like object. :param array_like: Any object exposing the array interface, e.g. numpy-array, python sequence, ... :param ints distaxis: distributed axes. Defaults to 'all' meaning each axis is distributed. """ result_cpu = pyDive.arrays.ndarray.array(array_like, distaxes) result = hollow_like(result_cpu) view = com.getView() view.execute("{0} = pyDive.arrays.local.gpu_ndarray.gpu_ndarray_cast(pycuda.gpuarray.to_gpu({1}))"\ .format(repr(result), repr(result_cpu)), targets=result.target_ranks) return result #ufunc_names = [key for key, value in np.__dict__.items() if isinstance(value, np.ufunc)] #ufuncs = multiple_axes.generate_ufuncs(ufunc_names, "np") #globals().update(ufuncs)
def MPI_copier(source, dest): view = com.getView() # send view.execute('{0}_send_tasks = interengine.scatterArrayMPI_async({0}, src_commData[0], target2rank)'\ .format(source.name), targets=source.target_ranks) # receive view.execute("""\ {0}_recv_tasks, {0}_recv_bufs = interengine.gatherArraysMPI_async({1}, dest_commData[0], target2rank) """.format(source.name, dest.name),\ targets=dest.target_ranks) # finish communication view.execute('''\ if "{0}_send_tasks" in locals(): MPI.Request.Waitall({0}_send_tasks) del {0}_send_tasks if "{0}_recv_tasks" in locals(): MPI.Request.Waitall({0}_recv_tasks) interengine.finish_MPIcommunication({1}, dest_commData[0], {0}_recv_bufs) del {0}_recv_tasks, {0}_recv_bufs '''.format(source.name, dest.name), targets=tuple(set(source.target_ranks + dest.target_ranks)))
def ufunc_wrapper(ufunc_name, args, kwargs): arg0 = args[0] args = [ arg.dist_like(arg0) if hasattr(arg, "target_ranks") else arg for arg in args ] arg_names = [repr(arg) for arg in args] arg_string = ",".join(arg_names) view = com.getView() result = arg0.__class__(arg0.shape, arg0.dtype, arg0.distaxes, arg0.target_offsets, arg0.target_ranks, no_allocation=True, **arg0.kwargs) view.execute("{0} = {1}({2}); dtype={0}.dtype".format( repr(result), ufunc_name, arg_string), targets=arg0.target_ranks) result.dtype = view.pull("dtype", targets=result.target_ranks[0]) result.nbytes = np.dtype(result.dtype).itemsize * np.prod(result.shape) return result
def __init__(self, shape, dtype=np.float, distaxes='all', target_offsets=None, target_ranks=None, no_allocation=False, **kwargs): """Creates an instance of {arraytype_name}. This is a low-level method of instantiating an array, it should rather be constructed using factory functions ("empty", "zeros", "open", ...) :param ints shape: shape of array :param dtype: datatype of a single element :param ints distaxes: distributed axes. Accepts a single integer too. Defaults to 'all' meaning each axis is distributed. :param target_offsets: For each distributed axis there is a (inner) list in the outer list. The inner list contains the offsets of the local array. :type target_offsets: list of lists :param ints target_ranks: linear list of :term:`engine` ranks holding the local arrays. The last distributed axis is iterated over first. :param bool no_allocation: if ``True`` no instance of {local_arraytype_name} will be created on engine. Useful for manual instantiation of the local array. :param kwargs: additional keyword arguments are forwarded to the constructor of the local array. """ #: size of the array on each axis if type(shape) not in (list, tuple): shape = (shape, ) elif type(shape) is not tuple: shape = tuple(shape) self.shape = shape ##: datatype of a single data value self.dtype = dtype if distaxes == 'all': distaxes = tuple(range(len(shape))) elif type(distaxes) not in (list, tuple): distaxes = (distaxes, ) elif type(distaxes) is not tuple: distaxes = tuple(distaxes) #: axes on which memory is distributed across :term:`engines <engine>` self.distaxes = distaxes #: total bytes consumed by elements of this array. self.nbytes = np.dtype(dtype).itemsize * np.prod(self.shape) self.view = com.getView() self.kwargs = kwargs self.local_copy_is_dirty = False assert len(distaxes) <= len(shape),\ "more distributed axes ({}) than dimensions ({})".format(len(distaxes), len(shape)) for distaxis in distaxes: assert distaxis >= 0 and distaxis < len(self.shape),\ "distributed axis ({}) has to be within [0,{}]".format(distaxis, len(self.shape)-1) if target_offsets is None and target_ranks is None: # create hypothetical patch with best surface-to-volume ratio patch_volume = np.prod( [self.shape[distaxis] for distaxis in distaxes]) / float(len(self.view.targets)) patch_edge_length = pow(patch_volume, 1.0 / len(distaxes)) def factorize(n): if n == 1: yield 1 return for f in range(2, n // 2 + 1) + [n]: while n % f == 0: n //= f yield f prime_factors = list( factorize(len(self.view.targets)) )[:: -1] # get prime factors of number of engines in descending order sorted_distaxes = sorted( distaxes, key=lambda axis: self.shape[ axis]) # sort distributed axes in ascending order # calculate number of available targets (engines) per distributed axis # This value should be close to array_edge_length / patch_edge_length num_targets_av = [1] * len(self.shape) for distaxis in sorted_distaxes[:-1]: num_patches = self.shape[distaxis] / patch_edge_length while float(num_targets_av[distaxis] ) < num_patches and prime_factors: num_targets_av[distaxis] *= prime_factors.pop() # the largest axis gets the remaining (largest) prime_factors if prime_factors: num_targets_av[sorted_distaxes[-1]] *= np.prod(prime_factors) # calculate target_offsets localshape = np.array(self.shape) for distaxis in distaxes: localshape[distaxis] = (self.shape[distaxis] - 1) / num_targets_av[distaxis] + 1 # number of occupied targets for each distributed axis by this ndarray instance num_targets = [ (self.shape[distaxis] - 1) / localshape[distaxis] + 1 for distaxis in distaxes ] # calculate target_offsets target_offsets = [ np.arange(num_targets[i]) * localshape[distaxes[i]] for i in range(len(distaxes)) ] # generate target_ranks list target_ranks = tuple(range(np.prod(num_targets))) if target_offsets is None: localshape = np.array(self.shape) for distaxis in distaxes: localshape[distaxis] = (self.shape[distaxis] - 1) / len( target_ranks[distaxis]) + 1 target_offsets = [ np.arange(num_targets[i]) * localshape[distaxes[i]] for i in range(len(distaxes)) ] if target_ranks is None: num_targets = [ len(target_offsets_axis) for target_offsets_axis in target_offsets ] target_ranks = tuple(range(np.prod(num_targets))) elif type(target_ranks) is not tuple: target_ranks = tuple(target_ranks) self.target_offsets = target_offsets self.target_ranks = target_ranks # generate a unique variable name used on target representing this instance global array_id #: Unique variable name of the local *array* on *engine*. #: Unless you are doing manual stuff on the *engines* there is no need for dealing with this attribute. self.name = 'dist_array' + str(array_id) array_id += 1 if no_allocation: self.view.push({self.name: None}, targets=self.target_ranks) else: target_shapes = self.target_shapes() self.view.scatter('target_shape', target_shapes, targets=self.target_ranks) self.view.push({ 'kwargs': kwargs, 'dtype': dtype }, targets=self.target_ranks) self.view.execute('%s = %s(shape=target_shape[0], dtype=dtype, **kwargs)' % \ (self.name, self.__class__.target_modulename + "." + self.__class__.local_arraytype.__name__), targets=self.target_ranks)
def factory_like_wrapper(factory_name, other, kwargs): result = arraytype(other.shape, other.dtype, other.distaxes, other.target_offsets, other.target_ranks, True, **kwargs) view = com.getView() view.push({'kwargs' : kwargs}, targets=result.target_ranks) view.execute("{0} = {1}({2}, **kwargs)".format(result.name, factory_name, other.name), targets=result.target_ranks) return result