def map_uop(self, op_name: str, arr: BlockArray, out: BlockArray = None, where=True, args=None, kwargs=None) -> BlockArray: """A map, for unary operators, that applies to every entry of an array. Args: op_name: An element-wise unary operator. arr: A BlockArray. out: A BlockArray to which the result is written. where: An indicator specifying the indices to which op is applied. args: Args provided to op. kwargs: Keyword args provided to op. Returns: A BlockArray. """ if where is not True: raise NotImplementedError("'where' argument is not yet supported.") args = () if args is None else args kwargs = {} if kwargs is None else kwargs shape = arr.shape block_shape = arr.block_shape dtype = array_utils.get_uop_output_type(op_name, arr.dtype) assert len(shape) == len(block_shape) if out is None: grid = ArrayGrid(shape, block_shape, dtype.__name__) rarr = BlockArray(grid, self.cm) else: rarr = out grid = rarr.grid assert rarr.shape == arr.shape and rarr.block_shape == arr.block_shape for grid_entry in grid.get_entry_iterator(): # TODO(hme): Faster to create ndarray first, # and instantiate block array on return # to avoid instantiating blocks on BlockArray initialization. rarr.blocks[grid_entry] = arr.blocks[grid_entry].uop_map( op_name, args=args, kwargs=kwargs) return rarr
def test_compute_block_shape(app_inst: ArrayApplication): dtype = np.float32 cores_per_node = 64 # Tall-skinny. for size in [64, 128, 256, 512, 1024]: size_str = "%sGB" % size num_nodes = size // 64 cluster_shape = (16, 1) shape, expected_block_shape, expected_grid_shape = ideal_tall_skinny_shapes(size_str, dtype) block_shape = app_inst.cm.compute_block_shape(shape, dtype, cluster_shape, num_nodes*cores_per_node) grid: ArrayGrid = ArrayGrid(shape, block_shape, dtype.__name__) print("tall-skinny", "cluster_shape=%s" % str(cluster_shape), "grid_shape=%s" % str(expected_grid_shape), "size=%s" % size_str, "bytes computed=%s" % (grid.nbytes() / 10**9)) assert expected_grid_shape == grid.grid_shape assert expected_block_shape == block_shape # Square. for size in [4, 16, 64, 256, 1024]: size_str = "%sGB" % size num_nodes = 1 if size < 64 else size//64 cluster_shape = int(np.sqrt(num_nodes)), int(np.sqrt(num_nodes)) shape, expected_block_shape, expected_grid_shape = ideal_square_shapes(size_str, dtype) block_shape = app_inst.cm.compute_block_shape(shape, dtype, cluster_shape, num_nodes*cores_per_node) grid: ArrayGrid = ArrayGrid(shape, block_shape, dtype.__name__) print("square", "cluster_shape=%s" % str(cluster_shape), "grid_shape=%s" % str(expected_grid_shape), "size=%s" % size_str, "bytes computed=%s" % (grid.nbytes() / 10**9)) assert expected_grid_shape == grid.grid_shape, "%s != %s" % (expected_grid_shape, grid.grid_shape) assert expected_block_shape == block_shape, "%s != %s" % (expected_block_shape, block_shape)
def eye(self, shape: tuple, block_shape: tuple, dtype: np.dtype = None): assert len(shape) == len(block_shape) == 2 if dtype is None: dtype = np.float64 grid = ArrayGrid(shape, block_shape, dtype.__name__) grid_meta = grid.to_meta() rarr = BlockArray(grid, self.cm) for grid_entry in grid.get_entry_iterator(): syskwargs = { "grid_entry": grid_entry, "grid_shape": grid.grid_shape } if np.all(np.diff(grid_entry) == 0): # This is a diagonal block. rarr.blocks[grid_entry].oid = self.cm.new_block( "eye", grid_entry, grid_meta, syskwargs=syskwargs) else: rarr.blocks[grid_entry].oid = self.cm.new_block( "zeros", grid_entry, grid_meta, syskwargs=syskwargs) return rarr
def delete_fs(self, filename: str): meta = self._fs.read_meta_fs(filename) addresses = meta["addresses"] grid_meta = meta["grid_meta"] grid = ArrayGrid.from_meta(grid_meta) result_grid = ArrayGrid( grid.grid_shape, tuple(np.ones_like(grid.shape, dtype=np.int)), dtype=dict.__name__, ) rarr = BlockArray(result_grid, self.cm) for grid_entry in addresses: device_id: DeviceID = DeviceID.from_str(addresses[grid_entry]) rarr.blocks[grid_entry].oid = self._fs.delete_block_fs( filename, grid_entry, grid_meta, syskwargs={"device_id": device_id}) self._fs.delete_meta_fs(filename) return rarr
def vec_from_oids(self, oids, shape, block_shape, dtype): arr = BlockArray( ArrayGrid(shape=shape, block_shape=shape, dtype=dtype.__name__), self.cm) # Make sure resulting grid shape is a vector (1 dimensional). assert np.sum(arr.grid.grid_shape) == (max(arr.grid.grid_shape) + len(arr.grid.grid_shape) - 1) for i, grid_entry in enumerate(arr.grid.get_entry_iterator()): arr.blocks[grid_entry].oid = oids[i] if block_shape != shape: return arr.reshape(block_shape=block_shape) return arr
def _group_index_lists_by_block(self, dst_slice_tuples, src_grid: ArrayGrid, dst_index_list, src_index_list): # TODO(hme): Keep this function here until it's needed for greater support of # selection/assignment operations. # Block grid entries needed to write to given dst_slice_selection. src_blocks = {} dst_slice_np = np.array(dst_slice_tuples).T dst_index_arr = np.array(dst_index_list) src_index_arr = np.array(src_index_list) # Pick the smallest type to represent indices. # A set of these indices may be transmitted over the network, # so we want to pick the smallest encoding possible. index_types = [(2**8, np.uint8), (2**16, np.uint16), (2**32, np.uint32), (2**64, np.uint64)] index_type = None for bound, curr_index_type in index_types: if np.all(np.array(src_grid.block_shape) < bound) and np.all( dst_slice_np[1] < bound): index_type = curr_index_type break if index_type is None: raise Exception( "Unable to encode block indices, blocks are too large.") for grid_entry in src_grid.get_entry_iterator(): src_slice_np = np.array(src_grid.get_slice_tuples(grid_entry)).T index_pairs = [] for i in range(src_index_arr.shape[0]): src_index = src_index_arr[i] dst_index = dst_index_arr[i] if np.all((src_slice_np[0] <= src_index) & (src_index < src_slice_np[1])): index_pair = ((dst_index - dst_slice_np[0]).astype(index_type), (src_index - src_slice_np[0]).astype(index_type)) index_pairs.append(index_pair) if len(index_pairs) > 0: src_blocks[grid_entry] = index_pairs return src_blocks
def indirect_tsr(app: ArrayApplication, X: BlockArray, reshape_output=True): assert len(X.shape) == 2 # TODO (hme): This assertion is temporary and ensures returned # shape of qr of block is correct. assert X.block_shape[0] >= X.shape[1] # Compute R for each block. grid = X.grid grid_shape = grid.grid_shape shape = X.shape block_shape = X.block_shape R_oids = [] # Assume no blocking along second dim. for i in range(grid_shape[0]): # Select a row according to block_shape. row = [] for j in range(grid_shape[1]): row.append(X.blocks[i, j].oid) R_oids.append(app.cm.qr(*row, mode="r", axis=1, syskwargs={ "grid_entry": (i, 0), "grid_shape": (grid_shape[0], 1), "options": {"num_returns": 1} }) ) # Construct R by summing over R blocks. # TODO (hme): Communication may be inefficient due to redundancy of data. R_shape = (shape[1], shape[1]) R_block_shape = (block_shape[1], block_shape[1]) tsR = BlockArray(ArrayGrid(shape=R_shape, block_shape=R_shape, dtype=X.dtype.__name__), app.cm) tsR.blocks[0, 0].oid = app.cm.qr(*R_oids, mode="r", axis=0, syskwargs={ "grid_entry": (0, 0), "grid_shape": (1, 1), "options": {"num_returns": 1} }) # If blocking is "tall-skinny," then we're done. if R_shape != R_block_shape: if reshape_output: R = tsR.reshape(R_shape, block_shape=R_block_shape) else: R = tsR else: R = tsR return R
def _new_array(self, op_name: str, shape: tuple, block_shape: tuple, dtype: np.dtype = None): assert len(shape) == len(block_shape) if dtype is None: dtype = np.float64 grid = ArrayGrid(shape, block_shape, dtype.__name__) grid_meta = grid.to_meta() rarr = BlockArray(grid, self.cm) for grid_entry in grid.get_entry_iterator(): rarr.blocks[grid_entry].oid = self.cm.new_block( op_name, grid_entry, grid_meta, syskwargs={ "grid_entry": grid_entry, "grid_shape": grid.grid_shape }, ) return rarr
def train(params: Dict, data: NumsDMatrix, *args, evals=(), **kwargs): X: BlockArray = data.X y: BlockArray = data.y assert len(X.shape) == 2 assert X.shape[0] == X.shape[0] and X.block_shape[0] == y.block_shape[0] assert len(y.shape) == 1 or (len(y.shape) == 2 and y.shape[1] == 1) app: ArrayApplication = _instance() cm: ComputeManager = app.cm cm.register("xgb_train", xgb_train_remote, {}) # Start tracker num_workers = X.grid.grid_shape[0] env = _start_rabit_tracker(num_workers) rabit_args = [("%s=%s" % item).encode() for item in env.items()] evals_flat = [] for eval_X, eval_y, eval_method in evals: if eval_X.shape != eval_X.block_shape: eval_X = eval_X.reshape(shape=eval_X.shape, block_shape=eval_X.shape) if eval_y.shape != eval_y.block_shape: eval_y = eval_y.reshape(shape=eval_y.shape, block_shape=eval_y.shape) eval_X_oid = eval_X.blocks.item().oid eval_y_oid = eval_y.blocks.item().oid evals_flat += [eval_X_oid, eval_y_oid, eval_method] X: BlockArray = X.reshape(block_shape=(X.block_shape[0], X.shape[1])) result: BlockArray = BlockArray( ArrayGrid(shape=(X.grid.grid_shape[0], ), block_shape=(1, ), dtype="dict"), cm) for grid_entry in X.grid.get_entry_iterator(): X_block: Block = X.blocks[grid_entry] i = grid_entry[0] if len(y.shape) == 1: y_block: Block = y.blocks[i] else: y_block: Block = y.blocks[i, 0] syskwargs = {"grid_entry": grid_entry, "grid_shape": X.grid.grid_shape} result.blocks[i].oid = cm.call("xgb_train", X_block.oid, y_block.oid, rabit_args, params, args, kwargs, *evals_flat, syskwargs=syskwargs) return result
def from_blocks(cls, arr: np.ndarray, result_shape, cm): sample_idx = tuple(0 for dim in arr.shape) if isinstance(arr, Block): sample_block = arr result_shape = () else: sample_block = arr[sample_idx] if result_shape is None: result_shape = array_utils.shape_from_block_array(arr) result_block_shape = sample_block.shape result_dtype_str = sample_block.dtype.__name__ result_grid = ArrayGrid(shape=result_shape, block_shape=result_block_shape, dtype=result_dtype_str) assert arr.shape == result_grid.grid_shape result = BlockArray(result_grid, cm) for grid_entry in result_grid.get_entry_iterator(): if isinstance(arr, Block): block: Block = arr else: block: Block = arr[grid_entry] result.blocks[grid_entry] = block return result
def read_fs(self, filename: str): meta = self._fs.read_meta_fs(filename) addresses = meta["addresses"] grid_meta = meta["grid_meta"] grid = ArrayGrid.from_meta(grid_meta) ba: BlockArray = BlockArray(grid, self.cm) for grid_entry in addresses: device_id: DeviceID = DeviceID.from_str(addresses[grid_entry]) ba.blocks[grid_entry].oid = self._fs.read_block_fs( filename, grid_entry, grid_meta, syskwargs={"device_id": device_id}) return ba
def _simple_reshape(self, arr, shape, block_shape): # Reshape the array of blocks only. # This is only used when the difference in shape are factors of 1s, # and the ordering of other factors are maintained. # Check assumptions. assert len(self._strip_ones(arr.shape)) == len(self._strip_ones(shape)) # Create new grid, and perform reshape on blocks # to simplify access to source blocks. grid = ArrayGrid(shape, block_shape, dtype=arr.dtype.__name__) src_blocks = arr.blocks.reshape(grid.grid_shape) rarr = BlockArray(grid, arr.cm) for grid_entry in grid.get_entry_iterator(): src_block: Block = src_blocks[grid_entry] dst_block: Block = rarr.blocks[grid_entry] syskwargs = { "grid_entry": grid_entry, "grid_shape": grid.grid_shape } dst_block.oid = arr.cm.reshape(src_block.oid, dst_block.shape, syskwargs=syskwargs) return rarr
def test_array_rwd(): conn = boto3.resource("s3", region_name="us-east-1") assert conn.Bucket("darrays") not in conn.buckets.all() conn.create_bucket(Bucket="darrays") X: np.ndarray = np.random.random(3) stored_X = StoredArrayS3("darrays/%s_X" % "__test__") stored_X.put_grid( ArrayGrid(shape=X.shape, block_shape=X.shape, dtype=np.float64.__name__) ) stored_X.init_grid() stored_X.put_array(X) assert np.allclose(X, stored_X.get_array()) stored_X.del_array() stored_X.delete_grid()
def test_bounds(): grid: ArrayGrid = ArrayGrid(shape=(2, 6, 10), block_shape=(1, 2, 5), dtype="float32") for cluster_shape in [(1, ), (1, 1), (1, 1, 1), (1, 1, 1, 1)]: cyclic_grid: CyclicDeviceGrid = CyclicDeviceGrid( cluster_shape, "cpu", mock_device_ids(1)) packed_grid: PackedDeviceGrid = PackedDeviceGrid( cluster_shape, "cpu", mock_device_ids(1)) for grid_entry in grid.get_entry_iterator(): cluster_entry = cyclic_grid.get_cluster_entry( grid_entry, grid.grid_shape) assert cluster_entry == tuple([0] * len(cyclic_grid.grid_shape)) cluster_entry = packed_grid.get_cluster_entry( grid_entry, grid.grid_shape) assert cluster_entry == tuple([0] * len(packed_grid.grid_shape))
def _delete(self, filename, store_cls, remote_func): grid = self._get_array_grid(filename, store_cls) result_grid = ArrayGrid( grid.grid_shape, tuple(np.ones_like(grid.shape, dtype=np.int)), dtype=dict.__name__, ) rarr = BlockArray(result_grid, self.cm) for grid_entry in grid.get_entry_iterator(): rarr.blocks[grid_entry].oid = remote_func( filename, grid_entry, grid.to_meta(), syskwargs={"grid_entry": grid_entry, "grid_shape": grid.grid_shape}, ) return rarr
def permutation(self, size, block_size): shape = (size, ) block_shape = (block_size, ) grid: ArrayGrid = ArrayGrid(shape=shape, block_shape=shape, dtype=np.int64.__name__) ba = BlockArray(grid, self._cm) for grid_entry in ba.grid.get_entry_iterator(): rng_params = list(self._rng.new_block_rng_params()) block: Block = ba.blocks[grid_entry] block.oid = self._cm.permutation(rng_params, size, syskwargs={ "grid_entry": grid_entry, "grid_shape": grid.grid_shape }) return ba.reshape(block_shape=block_shape)
def _write(self, ba: BlockArray, filename, remote_func): grid = ba.grid result_grid = ArrayGrid(grid.grid_shape, tuple(np.ones_like(grid.shape, dtype=np.int)), dtype=dict.__name__) rarr = BlockArray(result_grid, self.cm) for grid_entry in grid.get_entry_iterator(): rarr.blocks[grid_entry].oid = remote_func( ba.blocks[grid_entry].oid, filename, grid_entry, grid.to_meta(), syskwargs={ "grid_entry": grid_entry, "grid_shape": grid.grid_shape }) return rarr
def test_split(app_inst: ArrayApplication): # TODO (hme): Implement a split leveraging block_shape param in reshape op. x = app_inst.array(np.array([1.0, 2.0, 3.0, 4.0]), block_shape=(4,)) syskwargs = { "grid_entry": x.blocks[0].grid_entry, "grid_shape": x.blocks[0].grid_shape, "options": {"num_returns": 2} } res1, res2 = x.cm.split(x.blocks[0].oid, 2, axis=0, transposed=False, syskwargs=syskwargs) ba = BlockArray(ArrayGrid((4,), (2,), x.dtype.__name__), x.cm) ba.blocks[0].oid = res1 ba.blocks[1].oid = res2 assert np.allclose([1.0, 2.0, 3.0, 4.0], ba.get())
def __getattr__(self, item): if item == "__array_priority__" or item == "__array_struct__": # This is triggered by a numpy array on the LHS. raise ValueError("Unable to covert numpy array to block array.") elif item == "ndim": return len(self.shape) elif item == "T": metaT = self.grid.to_meta() metaT["shape"] = tuple(reversed(metaT["shape"])) metaT["block_shape"] = tuple(reversed(metaT["block_shape"])) gridT = ArrayGrid.from_meta(metaT) rarrT = BlockArray(gridT, self.cm) rarrT.blocks = np.copy(self.blocks.T) for grid_entry in rarrT.grid.get_entry_iterator(): rarrT.blocks[grid_entry] = rarrT.blocks[grid_entry].transpose() return rarrT else: raise NotImplementedError(item)
def __inequality__(self, op, other): other = self.check_or_convert_other(other) assert (other.shape == () or other.shape == self.shape ), "Currently supports comparison with scalars only." shape = array_utils.broadcast(self.shape, other.shape).shape block_shape = array_utils.broadcast_block_shape( self.shape, other.shape, self.block_shape) dtype = bool.__name__ grid = ArrayGrid(shape, block_shape, dtype) result = BlockArray(grid, self.cm) for grid_entry in result.grid.get_entry_iterator(): if other.shape == (): other_block: Block = other.blocks.item() else: other_block: Block = other.blocks[grid_entry] result.blocks[grid_entry] = self.blocks[grid_entry].bop( op, other_block, args={}) return result
def test_errors(): cluster_shape = (1, 2, 3) device_ids = mock_device_ids(int(np.product(cluster_shape))) grid: ArrayGrid = ArrayGrid(shape=(8, 20, 12), block_shape=(2, 5, 3), dtype="float32") packed_grid: PackedDeviceGrid = PackedDeviceGrid(cluster_shape, "cpu", device_ids) grid_shape = grid.grid_shape grid_entry = tuple(np.array(grid_shape, dtype=int) - 1) assert packed_grid.get_cluster_entry(grid_entry, grid_shape) == (0, 1, 2) grid_entry = np.array(grid_shape, dtype=int) - 1 grid_entry[0] += 1 grid_entry = tuple(grid_entry) with pytest.raises(ValueError): # Out of bounds grid entry. packed_grid.get_cluster_entry(grid_entry, grid_shape)
def __init__(self, source, sel: BasicSelection = None, block_shape: tuple = None): self._source: BlockArrayBase = source self._cm: ComputeManager = self._source.cm if sel is None: sel = BasicSelection.from_shape(self._source.shape) # Currently, this is all we support. assert len(sel.axes) == len(self._source.shape) self.sel = sel self.shape: tuple = self.sel.get_output_shape() if block_shape is None: block_shape: tuple = array_utils.block_shape_from_subscript( self.sel.selector(), self._source.block_shape ) self.block_shape = block_shape assert len(self.block_shape) == len(self.shape) self.grid: ArrayGrid = ArrayGrid( self.shape, self.block_shape, dtype=self._source.dtype.__name__ )
def broadcast_to(self, shape): b = array_utils.broadcast(self.shape, shape) result_block_shape = array_utils.broadcast_block_shape( self.shape, shape, self.block_shape) result: BlockArrayBase = BlockArrayBase( ArrayGrid(b.shape, result_block_shape, self.grid.dtype.__name__), self.cm) extras = [] # Below taken directly from _broadcast_to in numpy's stride_tricks.py. it = np.nditer( (self.blocks, ), flags=['multi_index', 'refs_ok', 'zerosize_ok'] + extras, op_flags=['readonly'], itershape=result.grid.grid_shape, order='C') with it: # never really has writebackifcopy semantics broadcast = it.itviews[0] result.blocks = broadcast return result
def _inspect_block_shape(nps_app_inst): app = nps_app_inst dtypes = [np.float32, np.float64] shapes = [(10**9, 250), (10**4, 10**4), (10**7, 10), (10, 10**7)] cluster_shapes = [(1, 1), (2, 1), (4, 1), (16, 1)] cores_per_node = 64 combos = itertools.product(dtypes, shapes, cluster_shapes) for dtype, shape, cluster_shape in combos: num_cores = np.product(cluster_shape) * cores_per_node block_shape = app.compute_block_shape(shape=shape, dtype=dtype, cluster_shape=cluster_shape, num_cores=num_cores) grid: ArrayGrid = ArrayGrid(shape, block_shape, dtype.__name__) print() print("dtype=%s" % dtype.__name__, "cluster_shape=%s" % str(cluster_shape), "shape=%s" % str(shape)) print("grid_shape", grid.grid_shape, "block_shape", block_shape) print("array size (GB)", np.product(shape) * dtype().nbytes / 10**9, "block size (GB)", np.product(block_shape) * dtype().nbytes / 10**9)
def _sample_basic(self, rfunc_name, shape, block_shape, dtype, rfunc_args) -> BlockArray: if shape is None: assert block_shape is None shape = () block_shape = () else: assert block_shape is not None if dtype is None: dtype = np.float64 assert isinstance(dtype, type) grid: ArrayGrid = ArrayGrid(shape, block_shape, dtype=dtype.__name__) ba: BlockArray = BlockArray(grid, self._cm) for grid_entry in ba.grid.get_entry_iterator(): rng_params = list(self._rng.new_block_rng_params()) # Size and dtype to begin with. this_block_shape = grid.get_block_shape(grid_entry) size = int(np.product(this_block_shape)) # Inconsistent param orderings. if rfunc_name == "random": rfunc_args_final = tuple([size] + list(rfunc_args)) elif rfunc_name == "integers": # rfunc_args == (low, high, dtype, endpoint) rfunc_args_final = tuple( list(rfunc_args[:2]) + [size] + list(rfunc_args[2:])) else: rfunc_args_final = tuple(list(rfunc_args) + [size]) block: Block = ba.blocks[grid_entry] block.oid = self._cm.random_block( rng_params, rfunc_name, rfunc_args_final, this_block_shape, dtype, syskwargs={ "grid_entry": grid_entry, "grid_shape": grid.grid_shape }, ) return ba
def predict(self, X: BlockArray): app: ArrayApplication = _instance() cm: ComputeManager = app.cm cm.register("xgb_predict", xgb_predict_remote, {}) model_block: Block = self.model.blocks[0] result: BlockArray = BlockArray( ArrayGrid(shape=(X.shape[0], ), block_shape=(X.block_shape[0], ), dtype=nps.int.__name__), cm) for grid_entry in X.grid.get_entry_iterator(): i = grid_entry[0] X_block: Block = X.blocks[grid_entry] r_block: Block = result.blocks[i] syskwargs = { "grid_entry": grid_entry, "grid_shape": X.grid.grid_shape } r_block.oid = cm.call("xgb_predict", model_block.oid, X_block.oid, syskwargs=syskwargs) return result
def swapaxes(self, axis1, axis2): meta_swap = self.grid.to_meta() shape = list(meta_swap["shape"]) block_shape = list(meta_swap["block_shape"]) dim = len(shape) if axis1 >= dim or axis2 >= dim: raise ValueError("axis is larger than the array dimension") shape[axis1], shape[axis2] = shape[axis2], shape[axis1] block_shape[axis1], block_shape[axis2] = block_shape[ axis2], block_shape[axis1] meta_swap["shape"] = tuple(shape) meta_swap["block_shape"] = tuple(block_shape) grid_swap = ArrayGrid.from_meta(meta_swap) rarr_src = np.ndarray(self.blocks.shape, dtype="O") for grid_entry in self.grid.get_entry_iterator(): rarr_src[grid_entry] = self.blocks[grid_entry].swapaxes( axis1, axis2) rarr_src = rarr_src.swapaxes(axis1, axis2) rarr_swap = BlockArray(grid_swap, self.cm, rarr_src) return rarr_swap
def _fast_element_wise(self, op_name, other): """ Implements fast scheduling for basic element-wise operations. """ dtype = array_utils.get_bop_output_type(op_name, self.dtype, other.dtype) # Schedule the op first. blocks = np.empty(shape=self.grid.grid_shape, dtype=Block) for grid_entry in self.grid.get_entry_iterator(): self_block: Block = self.blocks[grid_entry] other_block: Block = other.blocks[grid_entry] blocks[grid_entry] = block = Block( grid_entry=grid_entry, grid_shape=self_block.grid_shape, rect=self_block.rect, shape=self_block.shape, dtype=dtype, transposed=False, cm=self.cm, ) block.oid = self.cm.bop( op_name, self_block.oid, other_block.oid, self_block.transposed, other_block.transposed, axes={}, syskwargs={ "grid_entry": grid_entry, "grid_shape": self.grid.grid_shape, }, ) return BlockArray( ArrayGrid(self.shape, self.block_shape, dtype.__name__), self.cm, blocks=blocks, )
def get_parts_fs(filename: AnyStr, grid_meta: Dict): base: pathlib.Path = pathlib.Path(filename) if not base.is_dir(): return None results = [] grid: ArrayGrid = ArrayGrid.from_meta(grid_meta) # This is a multi-dimensional array of blocks, so entries should be relatively small. assert np.all(np.array(grid.block_shape) < 2**32) contains_all = True for grid_entry in grid.get_entry_iterator(): entry_name = "_".join(list(map(str, grid_entry))) + "." + ARRAY_FILETYPE entry_filename = settings.pj(filename, entry_name) if pathlib.Path(entry_filename).is_file(): results.append(grid_entry) else: contains_all = False if contains_all: return "all" else: if len(results) == 0: return None else: return np.array(results, dtype=np.uint32)
def diag(self, X: BlockArray) -> BlockArray: if len(X.shape) == 1: shape = X.shape[0], X.shape[0] block_shape = X.block_shape[0], X.block_shape[0] grid = ArrayGrid(shape, block_shape, X.dtype.__name__) grid_meta = grid.to_meta() rarr = BlockArray(grid, self.cm) for grid_entry in grid.get_entry_iterator(): syskwargs = { "grid_entry": grid_entry, "grid_shape": grid.grid_shape } if np.all(np.diff(grid_entry) == 0): # This is a diagonal block. rarr.blocks[grid_entry].oid = self.cm.diag( X.blocks[grid_entry[0]].oid, syskwargs=syskwargs) else: rarr.blocks[grid_entry].oid = self.cm.new_block( "zeros", grid_entry, grid_meta, syskwargs=syskwargs) elif len(X.shape) == 2: assert X.shape[0] == X.shape[1], "X must be a square array." assert X.block_shape[0] == X.block_shape[ 1], "block_shape must be square." shape = X.shape[0], block_shape = X.block_shape[0], grid = ArrayGrid(shape, block_shape, X.dtype.__name__) rarr = BlockArray(grid, self.cm) for grid_entry in X.grid.get_entry_iterator(): out_grid_entry = grid_entry[:1] out_grid_shape = grid.grid_shape[:1] syskwargs = { "grid_entry": out_grid_entry, "grid_shape": out_grid_shape } if np.all(np.diff(grid_entry) == 0): # This is a diagonal block. rarr.blocks[out_grid_entry].oid = self.cm.diag( X.blocks[grid_entry].oid, syskwargs=syskwargs) else: raise ValueError("X must have 1 or 2 axes.") return rarr