def _compute_rechunk(x, chunks): """Compute the rechunk of *x* to the given *chunks*.""" if x.size == 0: # Special case for empty array, as the algorithm below does not behave correctly return empty(x.shape, chunks=chunks, dtype=x.dtype) ndim = x.ndim crossed = intersect_chunks(x.chunks, chunks) x2 = dict() intermediates = dict() token = tokenize(x, chunks) merge_name = "rechunk-merge-" + token split_name = "rechunk-split-" + token split_name_suffixes = count() # Pre-allocate old block references, to allow re-use and reduce the # graph's memory footprint a bit. old_blocks = np.empty([len(c) for c in x.chunks], dtype="O") for index in np.ndindex(old_blocks.shape): old_blocks[index] = (x.name, ) + index # Iterate over all new blocks new_index = product(*(range(len(c)) for c in chunks)) for new_idx, cross1 in zip(new_index, crossed): key = (merge_name, ) + new_idx old_block_indices = [[cr[i][0] for cr in cross1] for i in range(ndim)] subdims1 = [len(set(old_block_indices[i])) for i in range(ndim)] rec_cat_arg = np.empty(subdims1, dtype="O") rec_cat_arg_flat = rec_cat_arg.flat # Iterate over the old blocks required to build the new block for rec_cat_index, ind_slices in enumerate(cross1): old_block_index, slices = zip(*ind_slices) name = (split_name, next(split_name_suffixes)) old_index = old_blocks[old_block_index][1:] if all(slc.start == 0 and slc.stop == x.chunks[i][ind] for i, (slc, ind) in enumerate(zip(slices, old_index))): rec_cat_arg_flat[rec_cat_index] = old_blocks[old_block_index] else: intermediates[name] = (getitem, old_blocks[old_block_index], slices) rec_cat_arg_flat[rec_cat_index] = name assert rec_cat_index == rec_cat_arg.size - 1 # New block is formed by concatenation of sliced old blocks if all(d == 1 for d in rec_cat_arg.shape): x2[key] = rec_cat_arg.flat[0] else: x2[key] = (concatenate3, rec_cat_arg.tolist()) del old_blocks, new_index layer = toolz.merge(x2, intermediates) graph = HighLevelGraph.from_collections(merge_name, layer, dependencies=[x]) return Array(graph, merge_name, chunks, meta=x)
def indices(dimensions, dtype=int, chunks="auto"): """ Implements NumPy's ``indices`` for Dask Arrays. Generates a grid of indices covering the dimensions provided. The final array has the shape ``(len(dimensions), *dimensions)``. The chunks are used to specify the chunking for axis 1 up to ``len(dimensions)``. The 0th axis always has chunks of length 1. Parameters ---------- dimensions : sequence of ints The shape of the index grid. dtype : dtype, optional Type to use for the array. Default is ``int``. chunks : sequence of ints, str The size of each block. Must be one of the following forms: - A blocksize like (500, 1000) - A size in bytes, like "100 MiB" which will choose a uniform block-like shape - The word "auto" which acts like the above, but uses a configuration value ``array.chunk-size`` for the chunk size Note that the last block will have fewer samples if ``len(array) % chunks != 0``. Returns ------- grid : dask array """ dimensions = tuple(dimensions) dtype = np.dtype(dtype) chunks = normalize_chunks(chunks, shape=dimensions, dtype=dtype) if len(dimensions) != len(chunks): raise ValueError("Need same number of chunks as dimensions.") xi = [] for i in range(len(dimensions)): xi.append(arange(dimensions[i], dtype=dtype, chunks=(chunks[i], ))) grid = [] if all(dimensions): grid = meshgrid(*xi, indexing="ij") if grid: grid = stack(grid) else: grid = empty((len(dimensions), ) + dimensions, dtype=dtype, chunks=(1, ) + chunks) return grid
def empty_like(a, dtype=None, order="C", chunks=None, name=None, shape=None): """ Return a new array with the same shape and type as a given array. Parameters ---------- a : array_like The shape and data-type of `a` define these same attributes of the returned array. dtype : data-type, optional Overrides the data type of the result. order : {'C', 'F'}, optional Whether to store multidimensional data in C- or Fortran-contiguous (row- or column-wise) order in memory. chunks : sequence of ints The number of samples on each block. Note that the last block will have fewer samples if ``len(array) % chunks != 0``. name : str, optional An optional keyname for the array. Defaults to hashing the input keyword arguments. shape : int or sequence of ints, optional. Overrides the shape of the result. Returns ------- out : ndarray Array of uninitialized (arbitrary) data with the same shape and type as `a`. See Also -------- ones_like : Return an array of ones with shape and type of input. zeros_like : Return an array of zeros with shape and type of input. empty : Return a new uninitialized array. ones : Return a new array setting values to one. zeros : Return a new array setting values to zero. Notes ----- This function does *not* initialize the returned array; to do that use `zeros_like` or `ones_like` instead. It may be marginally faster than the functions that do set the array values. """ a = asarray(a, name=False) shape, chunks = _get_like_function_shapes_chunks(a, chunks, shape) return empty( shape, dtype=(dtype or a.dtype), order=order, chunks=chunks, name=name, meta=a._meta, )
def tile(A, reps): try: tup = tuple(reps) except TypeError: tup = (reps, ) if any(i < 0 for i in tup): raise ValueError("Negative `reps` are not allowed.") c = asarray(A) if all(tup): for nrep in tup[::-1]: c = nrep * [c] return block(c) d = len(tup) if d < c.ndim: tup = (1, ) * (c.ndim - d) + tup if c.ndim < d: shape = (1, ) * (d - c.ndim) + c.shape else: shape = c.shape shape_out = tuple(s * t for s, t in zip(shape, tup)) return empty(shape=shape_out, dtype=c.dtype)