Beispiel #1
0
    def map_uop(self,
                op_name: str,
                arr: BlockArray,
                out: BlockArray = None,
                where=True,
                args=None,
                kwargs=None) -> BlockArray:
        """A map, for unary operators, that applies to every entry of an array.

        Args:
            op_name: An element-wise unary operator.
            arr: A BlockArray.
            out: A BlockArray to which the result is written.
            where: An indicator specifying the indices to which op is applied.
            args: Args provided to op.
            kwargs: Keyword args provided to op.

        Returns:
            A BlockArray.
        """
        if where is not True:
            raise NotImplementedError("'where' argument is not yet supported.")
        args = () if args is None else args
        kwargs = {} if kwargs is None else kwargs
        shape = arr.shape
        block_shape = arr.block_shape
        dtype = array_utils.get_uop_output_type(op_name, arr.dtype)
        assert len(shape) == len(block_shape)
        if out is None:
            grid = ArrayGrid(shape, block_shape, dtype.__name__)
            rarr = BlockArray(grid, self.cm)
        else:
            rarr = out
            grid = rarr.grid
            assert rarr.shape == arr.shape and rarr.block_shape == arr.block_shape
        for grid_entry in grid.get_entry_iterator():
            # TODO(hme): Faster to create ndarray first,
            #  and instantiate block array on return
            #  to avoid instantiating blocks on BlockArray initialization.
            rarr.blocks[grid_entry] = arr.blocks[grid_entry].uop_map(
                op_name, args=args, kwargs=kwargs)
        return rarr
Beispiel #2
0
def test_compute_block_shape(app_inst: ArrayApplication):
    dtype = np.float32
    cores_per_node = 64
    # Tall-skinny.
    for size in [64, 128, 256, 512, 1024]:
        size_str = "%sGB" % size
        num_nodes = size // 64
        cluster_shape = (16, 1)
        shape, expected_block_shape, expected_grid_shape = ideal_tall_skinny_shapes(size_str, dtype)
        block_shape = app_inst.cm.compute_block_shape(shape,
                                                      dtype,
                                                      cluster_shape,
                                                      num_nodes*cores_per_node)
        grid: ArrayGrid = ArrayGrid(shape, block_shape, dtype.__name__)
        print("tall-skinny",
              "cluster_shape=%s" % str(cluster_shape),
              "grid_shape=%s" % str(expected_grid_shape),
              "size=%s" % size_str,
              "bytes computed=%s" % (grid.nbytes() / 10**9))
        assert expected_grid_shape == grid.grid_shape
        assert expected_block_shape == block_shape

    # Square.
    for size in [4, 16, 64, 256, 1024]:
        size_str = "%sGB" % size
        num_nodes = 1 if size < 64 else size//64
        cluster_shape = int(np.sqrt(num_nodes)), int(np.sqrt(num_nodes))
        shape, expected_block_shape, expected_grid_shape = ideal_square_shapes(size_str, dtype)
        block_shape = app_inst.cm.compute_block_shape(shape,
                                                      dtype,
                                                      cluster_shape,
                                                      num_nodes*cores_per_node)
        grid: ArrayGrid = ArrayGrid(shape, block_shape, dtype.__name__)
        print("square",
              "cluster_shape=%s" % str(cluster_shape),
              "grid_shape=%s" % str(expected_grid_shape),
              "size=%s" % size_str,
              "bytes computed=%s" % (grid.nbytes() / 10**9))
        assert expected_grid_shape == grid.grid_shape, "%s != %s" % (expected_grid_shape,
                                                                     grid.grid_shape)
        assert expected_block_shape == block_shape, "%s != %s" % (expected_block_shape,
                                                                  block_shape)
Beispiel #3
0
 def eye(self, shape: tuple, block_shape: tuple, dtype: np.dtype = None):
     assert len(shape) == len(block_shape) == 2
     if dtype is None:
         dtype = np.float64
     grid = ArrayGrid(shape, block_shape, dtype.__name__)
     grid_meta = grid.to_meta()
     rarr = BlockArray(grid, self.cm)
     for grid_entry in grid.get_entry_iterator():
         syskwargs = {
             "grid_entry": grid_entry,
             "grid_shape": grid.grid_shape
         }
         if np.all(np.diff(grid_entry) == 0):
             # This is a diagonal block.
             rarr.blocks[grid_entry].oid = self.cm.new_block(
                 "eye", grid_entry, grid_meta, syskwargs=syskwargs)
         else:
             rarr.blocks[grid_entry].oid = self.cm.new_block(
                 "zeros", grid_entry, grid_meta, syskwargs=syskwargs)
     return rarr
Beispiel #4
0
 def delete_fs(self, filename: str):
     meta = self._fs.read_meta_fs(filename)
     addresses = meta["addresses"]
     grid_meta = meta["grid_meta"]
     grid = ArrayGrid.from_meta(grid_meta)
     result_grid = ArrayGrid(
         grid.grid_shape,
         tuple(np.ones_like(grid.shape, dtype=np.int)),
         dtype=dict.__name__,
     )
     rarr = BlockArray(result_grid, self.cm)
     for grid_entry in addresses:
         device_id: DeviceID = DeviceID.from_str(addresses[grid_entry])
         rarr.blocks[grid_entry].oid = self._fs.delete_block_fs(
             filename,
             grid_entry,
             grid_meta,
             syskwargs={"device_id": device_id})
     self._fs.delete_meta_fs(filename)
     return rarr
Beispiel #5
0
 def vec_from_oids(self, oids, shape, block_shape, dtype):
     arr = BlockArray(
         ArrayGrid(shape=shape, block_shape=shape, dtype=dtype.__name__),
         self.cm)
     # Make sure resulting grid shape is a vector (1 dimensional).
     assert np.sum(arr.grid.grid_shape) == (max(arr.grid.grid_shape) +
                                            len(arr.grid.grid_shape) - 1)
     for i, grid_entry in enumerate(arr.grid.get_entry_iterator()):
         arr.blocks[grid_entry].oid = oids[i]
     if block_shape != shape:
         return arr.reshape(block_shape=block_shape)
     return arr
Beispiel #6
0
 def _group_index_lists_by_block(self, dst_slice_tuples,
                                 src_grid: ArrayGrid, dst_index_list,
                                 src_index_list):
     # TODO(hme): Keep this function here until it's needed for greater support of
     #  selection/assignment operations.
     # Block grid entries needed to write to given dst_slice_selection.
     src_blocks = {}
     dst_slice_np = np.array(dst_slice_tuples).T
     dst_index_arr = np.array(dst_index_list)
     src_index_arr = np.array(src_index_list)
     # Pick the smallest type to represent indices.
     # A set of these indices may be transmitted over the network,
     # so we want to pick the smallest encoding possible.
     index_types = [(2**8, np.uint8), (2**16, np.uint16),
                    (2**32, np.uint32), (2**64, np.uint64)]
     index_type = None
     for bound, curr_index_type in index_types:
         if np.all(np.array(src_grid.block_shape) < bound) and np.all(
                 dst_slice_np[1] < bound):
             index_type = curr_index_type
             break
     if index_type is None:
         raise Exception(
             "Unable to encode block indices, blocks are too large.")
     for grid_entry in src_grid.get_entry_iterator():
         src_slice_np = np.array(src_grid.get_slice_tuples(grid_entry)).T
         index_pairs = []
         for i in range(src_index_arr.shape[0]):
             src_index = src_index_arr[i]
             dst_index = dst_index_arr[i]
             if np.all((src_slice_np[0] <= src_index)
                       & (src_index < src_slice_np[1])):
                 index_pair = ((dst_index -
                                dst_slice_np[0]).astype(index_type),
                               (src_index -
                                src_slice_np[0]).astype(index_type))
                 index_pairs.append(index_pair)
         if len(index_pairs) > 0:
             src_blocks[grid_entry] = index_pairs
     return src_blocks
Beispiel #7
0
def indirect_tsr(app: ArrayApplication, X: BlockArray, reshape_output=True):
    assert len(X.shape) == 2
    # TODO (hme): This assertion is temporary and ensures returned
    #  shape of qr of block is correct.
    assert X.block_shape[0] >= X.shape[1]
    # Compute R for each block.
    grid = X.grid
    grid_shape = grid.grid_shape
    shape = X.shape
    block_shape = X.block_shape
    R_oids = []
    # Assume no blocking along second dim.
    for i in range(grid_shape[0]):
        # Select a row according to block_shape.
        row = []
        for j in range(grid_shape[1]):
            row.append(X.blocks[i, j].oid)
        R_oids.append(app.cm.qr(*row,
                                mode="r",
                                axis=1,
                                syskwargs={
                                    "grid_entry": (i, 0),
                                    "grid_shape": (grid_shape[0], 1),
                                    "options": {"num_returns": 1}
                                })
                      )

    # Construct R by summing over R blocks.
    # TODO (hme): Communication may be inefficient due to redundancy of data.
    R_shape = (shape[1], shape[1])
    R_block_shape = (block_shape[1], block_shape[1])
    tsR = BlockArray(ArrayGrid(shape=R_shape,
                               block_shape=R_shape,
                               dtype=X.dtype.__name__),
                     app.cm)
    tsR.blocks[0, 0].oid = app.cm.qr(*R_oids,
                                     mode="r",
                                     axis=0,
                                     syskwargs={
                                         "grid_entry": (0, 0),
                                         "grid_shape": (1, 1),
                                         "options": {"num_returns": 1}
                                     })
    # If blocking is "tall-skinny," then we're done.
    if R_shape != R_block_shape:
        if reshape_output:
            R = tsR.reshape(R_shape, block_shape=R_block_shape)
        else:
            R = tsR
    else:
        R = tsR
    return R
Beispiel #8
0
 def _new_array(self,
                op_name: str,
                shape: tuple,
                block_shape: tuple,
                dtype: np.dtype = None):
     assert len(shape) == len(block_shape)
     if dtype is None:
         dtype = np.float64
     grid = ArrayGrid(shape, block_shape, dtype.__name__)
     grid_meta = grid.to_meta()
     rarr = BlockArray(grid, self.cm)
     for grid_entry in grid.get_entry_iterator():
         rarr.blocks[grid_entry].oid = self.cm.new_block(
             op_name,
             grid_entry,
             grid_meta,
             syskwargs={
                 "grid_entry": grid_entry,
                 "grid_shape": grid.grid_shape
             },
         )
     return rarr
Beispiel #9
0
def train(params: Dict, data: NumsDMatrix, *args, evals=(), **kwargs):
    X: BlockArray = data.X
    y: BlockArray = data.y
    assert len(X.shape) == 2
    assert X.shape[0] == X.shape[0] and X.block_shape[0] == y.block_shape[0]
    assert len(y.shape) == 1 or (len(y.shape) == 2 and y.shape[1] == 1)

    app: ArrayApplication = _instance()
    cm: ComputeManager = app.cm
    cm.register("xgb_train", xgb_train_remote, {})

    # Start tracker
    num_workers = X.grid.grid_shape[0]
    env = _start_rabit_tracker(num_workers)
    rabit_args = [("%s=%s" % item).encode() for item in env.items()]

    evals_flat = []
    for eval_X, eval_y, eval_method in evals:
        if eval_X.shape != eval_X.block_shape:
            eval_X = eval_X.reshape(shape=eval_X.shape,
                                    block_shape=eval_X.shape)
        if eval_y.shape != eval_y.block_shape:
            eval_y = eval_y.reshape(shape=eval_y.shape,
                                    block_shape=eval_y.shape)
        eval_X_oid = eval_X.blocks.item().oid
        eval_y_oid = eval_y.blocks.item().oid
        evals_flat += [eval_X_oid, eval_y_oid, eval_method]

    X: BlockArray = X.reshape(block_shape=(X.block_shape[0], X.shape[1]))
    result: BlockArray = BlockArray(
        ArrayGrid(shape=(X.grid.grid_shape[0], ),
                  block_shape=(1, ),
                  dtype="dict"), cm)
    for grid_entry in X.grid.get_entry_iterator():
        X_block: Block = X.blocks[grid_entry]
        i = grid_entry[0]
        if len(y.shape) == 1:
            y_block: Block = y.blocks[i]
        else:
            y_block: Block = y.blocks[i, 0]
        syskwargs = {"grid_entry": grid_entry, "grid_shape": X.grid.grid_shape}
        result.blocks[i].oid = cm.call("xgb_train",
                                       X_block.oid,
                                       y_block.oid,
                                       rabit_args,
                                       params,
                                       args,
                                       kwargs,
                                       *evals_flat,
                                       syskwargs=syskwargs)
    return result
Beispiel #10
0
 def from_blocks(cls, arr: np.ndarray, result_shape, cm):
     sample_idx = tuple(0 for dim in arr.shape)
     if isinstance(arr, Block):
         sample_block = arr
         result_shape = ()
     else:
         sample_block = arr[sample_idx]
         if result_shape is None:
             result_shape = array_utils.shape_from_block_array(arr)
     result_block_shape = sample_block.shape
     result_dtype_str = sample_block.dtype.__name__
     result_grid = ArrayGrid(shape=result_shape,
                             block_shape=result_block_shape,
                             dtype=result_dtype_str)
     assert arr.shape == result_grid.grid_shape
     result = BlockArray(result_grid, cm)
     for grid_entry in result_grid.get_entry_iterator():
         if isinstance(arr, Block):
             block: Block = arr
         else:
             block: Block = arr[grid_entry]
         result.blocks[grid_entry] = block
     return result
Beispiel #11
0
 def read_fs(self, filename: str):
     meta = self._fs.read_meta_fs(filename)
     addresses = meta["addresses"]
     grid_meta = meta["grid_meta"]
     grid = ArrayGrid.from_meta(grid_meta)
     ba: BlockArray = BlockArray(grid, self.cm)
     for grid_entry in addresses:
         device_id: DeviceID = DeviceID.from_str(addresses[grid_entry])
         ba.blocks[grid_entry].oid = self._fs.read_block_fs(
             filename,
             grid_entry,
             grid_meta,
             syskwargs={"device_id": device_id})
     return ba
Beispiel #12
0
    def _simple_reshape(self, arr, shape, block_shape):
        # Reshape the array of blocks only.
        # This is only used when the difference in shape are factors of 1s,
        # and the ordering of other factors are maintained.

        # Check assumptions.
        assert len(self._strip_ones(arr.shape)) == len(self._strip_ones(shape))

        # Create new grid, and perform reshape on blocks
        # to simplify access to source blocks.
        grid = ArrayGrid(shape, block_shape, dtype=arr.dtype.__name__)
        src_blocks = arr.blocks.reshape(grid.grid_shape)
        rarr = BlockArray(grid, arr.cm)
        for grid_entry in grid.get_entry_iterator():
            src_block: Block = src_blocks[grid_entry]
            dst_block: Block = rarr.blocks[grid_entry]
            syskwargs = {
                "grid_entry": grid_entry,
                "grid_shape": grid.grid_shape
            }
            dst_block.oid = arr.cm.reshape(src_block.oid,
                                           dst_block.shape,
                                           syskwargs=syskwargs)
        return rarr
Beispiel #13
0
def test_array_rwd():
    conn = boto3.resource("s3", region_name="us-east-1")
    assert conn.Bucket("darrays") not in conn.buckets.all()
    conn.create_bucket(Bucket="darrays")

    X: np.ndarray = np.random.random(3)
    stored_X = StoredArrayS3("darrays/%s_X" % "__test__")
    stored_X.put_grid(
        ArrayGrid(shape=X.shape, block_shape=X.shape, dtype=np.float64.__name__)
    )
    stored_X.init_grid()
    stored_X.put_array(X)
    assert np.allclose(X, stored_X.get_array())
    stored_X.del_array()
    stored_X.delete_grid()
Beispiel #14
0
def test_bounds():
    grid: ArrayGrid = ArrayGrid(shape=(2, 6, 10),
                                block_shape=(1, 2, 5),
                                dtype="float32")
    for cluster_shape in [(1, ), (1, 1), (1, 1, 1), (1, 1, 1, 1)]:
        cyclic_grid: CyclicDeviceGrid = CyclicDeviceGrid(
            cluster_shape, "cpu", mock_device_ids(1))
        packed_grid: PackedDeviceGrid = PackedDeviceGrid(
            cluster_shape, "cpu", mock_device_ids(1))
        for grid_entry in grid.get_entry_iterator():
            cluster_entry = cyclic_grid.get_cluster_entry(
                grid_entry, grid.grid_shape)
            assert cluster_entry == tuple([0] * len(cyclic_grid.grid_shape))
            cluster_entry = packed_grid.get_cluster_entry(
                grid_entry, grid.grid_shape)
            assert cluster_entry == tuple([0] * len(packed_grid.grid_shape))
Beispiel #15
0
 def _delete(self, filename, store_cls, remote_func):
     grid = self._get_array_grid(filename, store_cls)
     result_grid = ArrayGrid(
         grid.grid_shape,
         tuple(np.ones_like(grid.shape, dtype=np.int)),
         dtype=dict.__name__,
     )
     rarr = BlockArray(result_grid, self.cm)
     for grid_entry in grid.get_entry_iterator():
         rarr.blocks[grid_entry].oid = remote_func(
             filename,
             grid_entry,
             grid.to_meta(),
             syskwargs={"grid_entry": grid_entry, "grid_shape": grid.grid_shape},
         )
     return rarr
Beispiel #16
0
 def permutation(self, size, block_size):
     shape = (size, )
     block_shape = (block_size, )
     grid: ArrayGrid = ArrayGrid(shape=shape,
                                 block_shape=shape,
                                 dtype=np.int64.__name__)
     ba = BlockArray(grid, self._cm)
     for grid_entry in ba.grid.get_entry_iterator():
         rng_params = list(self._rng.new_block_rng_params())
         block: Block = ba.blocks[grid_entry]
         block.oid = self._cm.permutation(rng_params,
                                          size,
                                          syskwargs={
                                              "grid_entry": grid_entry,
                                              "grid_shape": grid.grid_shape
                                          })
     return ba.reshape(block_shape=block_shape)
Beispiel #17
0
 def _write(self, ba: BlockArray, filename, remote_func):
     grid = ba.grid
     result_grid = ArrayGrid(grid.grid_shape,
                             tuple(np.ones_like(grid.shape, dtype=np.int)),
                             dtype=dict.__name__)
     rarr = BlockArray(result_grid, self.cm)
     for grid_entry in grid.get_entry_iterator():
         rarr.blocks[grid_entry].oid = remote_func(
             ba.blocks[grid_entry].oid,
             filename,
             grid_entry,
             grid.to_meta(),
             syskwargs={
                 "grid_entry": grid_entry,
                 "grid_shape": grid.grid_shape
             })
     return rarr
Beispiel #18
0
def test_split(app_inst: ArrayApplication):
    # TODO (hme): Implement a split leveraging block_shape param in reshape op.
    x = app_inst.array(np.array([1.0, 2.0, 3.0, 4.0]), block_shape=(4,))
    syskwargs = {
        "grid_entry": x.blocks[0].grid_entry,
        "grid_shape": x.blocks[0].grid_shape,
        "options": {"num_returns": 2}
    }
    res1, res2 = x.cm.split(x.blocks[0].oid,
                            2,
                            axis=0,
                            transposed=False,
                            syskwargs=syskwargs)
    ba = BlockArray(ArrayGrid((4,), (2,), x.dtype.__name__), x.cm)
    ba.blocks[0].oid = res1
    ba.blocks[1].oid = res2
    assert np.allclose([1.0, 2.0, 3.0, 4.0], ba.get())
Beispiel #19
0
 def __getattr__(self, item):
     if item == "__array_priority__" or item == "__array_struct__":
         # This is triggered by a numpy array on the LHS.
         raise ValueError("Unable to covert numpy array to block array.")
     elif item == "ndim":
         return len(self.shape)
     elif item == "T":
         metaT = self.grid.to_meta()
         metaT["shape"] = tuple(reversed(metaT["shape"]))
         metaT["block_shape"] = tuple(reversed(metaT["block_shape"]))
         gridT = ArrayGrid.from_meta(metaT)
         rarrT = BlockArray(gridT, self.cm)
         rarrT.blocks = np.copy(self.blocks.T)
         for grid_entry in rarrT.grid.get_entry_iterator():
             rarrT.blocks[grid_entry] = rarrT.blocks[grid_entry].transpose()
         return rarrT
     else:
         raise NotImplementedError(item)
Beispiel #20
0
    def __inequality__(self, op, other):
        other = self.check_or_convert_other(other)
        assert (other.shape == () or other.shape == self.shape
                ), "Currently supports comparison with scalars only."
        shape = array_utils.broadcast(self.shape, other.shape).shape
        block_shape = array_utils.broadcast_block_shape(
            self.shape, other.shape, self.block_shape)
        dtype = bool.__name__
        grid = ArrayGrid(shape, block_shape, dtype)
        result = BlockArray(grid, self.cm)
        for grid_entry in result.grid.get_entry_iterator():
            if other.shape == ():
                other_block: Block = other.blocks.item()
            else:
                other_block: Block = other.blocks[grid_entry]
            result.blocks[grid_entry] = self.blocks[grid_entry].bop(
                op, other_block, args={})

        return result
Beispiel #21
0
def test_errors():
    cluster_shape = (1, 2, 3)
    device_ids = mock_device_ids(int(np.product(cluster_shape)))
    grid: ArrayGrid = ArrayGrid(shape=(8, 20, 12),
                                block_shape=(2, 5, 3),
                                dtype="float32")
    packed_grid: PackedDeviceGrid = PackedDeviceGrid(cluster_shape, "cpu",
                                                     device_ids)

    grid_shape = grid.grid_shape
    grid_entry = tuple(np.array(grid_shape, dtype=int) - 1)
    assert packed_grid.get_cluster_entry(grid_entry, grid_shape) == (0, 1, 2)

    grid_entry = np.array(grid_shape, dtype=int) - 1
    grid_entry[0] += 1
    grid_entry = tuple(grid_entry)
    with pytest.raises(ValueError):
        # Out of bounds grid entry.
        packed_grid.get_cluster_entry(grid_entry, grid_shape)
Beispiel #22
0
    def __init__(self, source, sel: BasicSelection = None, block_shape: tuple = None):
        self._source: BlockArrayBase = source
        self._cm: ComputeManager = self._source.cm

        if sel is None:
            sel = BasicSelection.from_shape(self._source.shape)
        # Currently, this is all we support.
        assert len(sel.axes) == len(self._source.shape)
        self.sel = sel

        self.shape: tuple = self.sel.get_output_shape()
        if block_shape is None:
            block_shape: tuple = array_utils.block_shape_from_subscript(
                self.sel.selector(), self._source.block_shape
            )
        self.block_shape = block_shape
        assert len(self.block_shape) == len(self.shape)
        self.grid: ArrayGrid = ArrayGrid(
            self.shape, self.block_shape, dtype=self._source.dtype.__name__
        )
Beispiel #23
0
 def broadcast_to(self, shape):
     b = array_utils.broadcast(self.shape, shape)
     result_block_shape = array_utils.broadcast_block_shape(
         self.shape, shape, self.block_shape)
     result: BlockArrayBase = BlockArrayBase(
         ArrayGrid(b.shape, result_block_shape, self.grid.dtype.__name__),
         self.cm)
     extras = []
     # Below taken directly from _broadcast_to in numpy's stride_tricks.py.
     it = np.nditer(
         (self.blocks, ),
         flags=['multi_index', 'refs_ok', 'zerosize_ok'] + extras,
         op_flags=['readonly'],
         itershape=result.grid.grid_shape,
         order='C')
     with it:
         # never really has writebackifcopy semantics
         broadcast = it.itviews[0]
     result.blocks = broadcast
     return result
Beispiel #24
0
def _inspect_block_shape(nps_app_inst):
    app = nps_app_inst
    dtypes = [np.float32, np.float64]
    shapes = [(10**9, 250), (10**4, 10**4), (10**7, 10), (10, 10**7)]
    cluster_shapes = [(1, 1), (2, 1), (4, 1), (16, 1)]
    cores_per_node = 64
    combos = itertools.product(dtypes, shapes, cluster_shapes)
    for dtype, shape, cluster_shape in combos:
        num_cores = np.product(cluster_shape) * cores_per_node
        block_shape = app.compute_block_shape(shape=shape,
                                              dtype=dtype,
                                              cluster_shape=cluster_shape,
                                              num_cores=num_cores)
        grid: ArrayGrid = ArrayGrid(shape, block_shape, dtype.__name__)
        print()
        print("dtype=%s" % dtype.__name__,
              "cluster_shape=%s" % str(cluster_shape), "shape=%s" % str(shape))
        print("grid_shape", grid.grid_shape, "block_shape", block_shape)
        print("array size (GB)",
              np.product(shape) * dtype().nbytes / 10**9, "block size (GB)",
              np.product(block_shape) * dtype().nbytes / 10**9)
Beispiel #25
0
 def _sample_basic(self, rfunc_name, shape, block_shape, dtype,
                   rfunc_args) -> BlockArray:
     if shape is None:
         assert block_shape is None
         shape = ()
         block_shape = ()
     else:
         assert block_shape is not None
     if dtype is None:
         dtype = np.float64
     assert isinstance(dtype, type)
     grid: ArrayGrid = ArrayGrid(shape, block_shape, dtype=dtype.__name__)
     ba: BlockArray = BlockArray(grid, self._cm)
     for grid_entry in ba.grid.get_entry_iterator():
         rng_params = list(self._rng.new_block_rng_params())
         # Size and dtype to begin with.
         this_block_shape = grid.get_block_shape(grid_entry)
         size = int(np.product(this_block_shape))
         # Inconsistent param orderings.
         if rfunc_name == "random":
             rfunc_args_final = tuple([size] + list(rfunc_args))
         elif rfunc_name == "integers":
             # rfunc_args == (low, high, dtype, endpoint)
             rfunc_args_final = tuple(
                 list(rfunc_args[:2]) + [size] + list(rfunc_args[2:]))
         else:
             rfunc_args_final = tuple(list(rfunc_args) + [size])
         block: Block = ba.blocks[grid_entry]
         block.oid = self._cm.random_block(
             rng_params,
             rfunc_name,
             rfunc_args_final,
             this_block_shape,
             dtype,
             syskwargs={
                 "grid_entry": grid_entry,
                 "grid_shape": grid.grid_shape
             },
         )
     return ba
Beispiel #26
0
 def predict(self, X: BlockArray):
     app: ArrayApplication = _instance()
     cm: ComputeManager = app.cm
     cm.register("xgb_predict", xgb_predict_remote, {})
     model_block: Block = self.model.blocks[0]
     result: BlockArray = BlockArray(
         ArrayGrid(shape=(X.shape[0], ),
                   block_shape=(X.block_shape[0], ),
                   dtype=nps.int.__name__), cm)
     for grid_entry in X.grid.get_entry_iterator():
         i = grid_entry[0]
         X_block: Block = X.blocks[grid_entry]
         r_block: Block = result.blocks[i]
         syskwargs = {
             "grid_entry": grid_entry,
             "grid_shape": X.grid.grid_shape
         }
         r_block.oid = cm.call("xgb_predict",
                               model_block.oid,
                               X_block.oid,
                               syskwargs=syskwargs)
     return result
Beispiel #27
0
    def swapaxes(self, axis1, axis2):
        meta_swap = self.grid.to_meta()
        shape = list(meta_swap["shape"])
        block_shape = list(meta_swap["block_shape"])
        dim = len(shape)
        if axis1 >= dim or axis2 >= dim:
            raise ValueError("axis is larger than the array dimension")
        shape[axis1], shape[axis2] = shape[axis2], shape[axis1]
        block_shape[axis1], block_shape[axis2] = block_shape[
            axis2], block_shape[axis1]
        meta_swap["shape"] = tuple(shape)
        meta_swap["block_shape"] = tuple(block_shape)
        grid_swap = ArrayGrid.from_meta(meta_swap)
        rarr_src = np.ndarray(self.blocks.shape, dtype="O")

        for grid_entry in self.grid.get_entry_iterator():
            rarr_src[grid_entry] = self.blocks[grid_entry].swapaxes(
                axis1, axis2)
        rarr_src = rarr_src.swapaxes(axis1, axis2)

        rarr_swap = BlockArray(grid_swap, self.cm, rarr_src)
        return rarr_swap
Beispiel #28
0
 def _fast_element_wise(self, op_name, other):
     """
     Implements fast scheduling for basic element-wise operations.
     """
     dtype = array_utils.get_bop_output_type(op_name, self.dtype,
                                             other.dtype)
     # Schedule the op first.
     blocks = np.empty(shape=self.grid.grid_shape, dtype=Block)
     for grid_entry in self.grid.get_entry_iterator():
         self_block: Block = self.blocks[grid_entry]
         other_block: Block = other.blocks[grid_entry]
         blocks[grid_entry] = block = Block(
             grid_entry=grid_entry,
             grid_shape=self_block.grid_shape,
             rect=self_block.rect,
             shape=self_block.shape,
             dtype=dtype,
             transposed=False,
             cm=self.cm,
         )
         block.oid = self.cm.bop(
             op_name,
             self_block.oid,
             other_block.oid,
             self_block.transposed,
             other_block.transposed,
             axes={},
             syskwargs={
                 "grid_entry": grid_entry,
                 "grid_shape": self.grid.grid_shape,
             },
         )
     return BlockArray(
         ArrayGrid(self.shape, self.block_shape, dtype.__name__),
         self.cm,
         blocks=blocks,
     )
Beispiel #29
0
def get_parts_fs(filename: AnyStr, grid_meta: Dict):
    base: pathlib.Path = pathlib.Path(filename)
    if not base.is_dir():
        return None
    results = []
    grid: ArrayGrid = ArrayGrid.from_meta(grid_meta)
    # This is a multi-dimensional array of blocks, so entries should be relatively small.
    assert np.all(np.array(grid.block_shape) < 2**32)
    contains_all = True
    for grid_entry in grid.get_entry_iterator():
        entry_name = "_".join(list(map(str,
                                       grid_entry))) + "." + ARRAY_FILETYPE
        entry_filename = settings.pj(filename, entry_name)
        if pathlib.Path(entry_filename).is_file():
            results.append(grid_entry)
        else:
            contains_all = False
    if contains_all:
        return "all"
    else:
        if len(results) == 0:
            return None
        else:
            return np.array(results, dtype=np.uint32)
Beispiel #30
0
 def diag(self, X: BlockArray) -> BlockArray:
     if len(X.shape) == 1:
         shape = X.shape[0], X.shape[0]
         block_shape = X.block_shape[0], X.block_shape[0]
         grid = ArrayGrid(shape, block_shape, X.dtype.__name__)
         grid_meta = grid.to_meta()
         rarr = BlockArray(grid, self.cm)
         for grid_entry in grid.get_entry_iterator():
             syskwargs = {
                 "grid_entry": grid_entry,
                 "grid_shape": grid.grid_shape
             }
             if np.all(np.diff(grid_entry) == 0):
                 # This is a diagonal block.
                 rarr.blocks[grid_entry].oid = self.cm.diag(
                     X.blocks[grid_entry[0]].oid, syskwargs=syskwargs)
             else:
                 rarr.blocks[grid_entry].oid = self.cm.new_block(
                     "zeros", grid_entry, grid_meta, syskwargs=syskwargs)
     elif len(X.shape) == 2:
         assert X.shape[0] == X.shape[1], "X must be a square array."
         assert X.block_shape[0] == X.block_shape[
             1], "block_shape must be square."
         shape = X.shape[0],
         block_shape = X.block_shape[0],
         grid = ArrayGrid(shape, block_shape, X.dtype.__name__)
         rarr = BlockArray(grid, self.cm)
         for grid_entry in X.grid.get_entry_iterator():
             out_grid_entry = grid_entry[:1]
             out_grid_shape = grid.grid_shape[:1]
             syskwargs = {
                 "grid_entry": out_grid_entry,
                 "grid_shape": out_grid_shape
             }
             if np.all(np.diff(grid_entry) == 0):
                 # This is a diagonal block.
                 rarr.blocks[out_grid_entry].oid = self.cm.diag(
                     X.blocks[grid_entry].oid, syskwargs=syskwargs)
     else:
         raise ValueError("X must have 1 or 2 axes.")
     return rarr