Esempio n. 1
0
 def assign_references(self, dst_sel: BasicSelection, value):
     # TODO (hme): This seems overly complicated, but correct. Double check it.
     #  Also, revisit some of the variable names. They will likely
     #  be confusing in the future.
     # The destination has same block shape as value,
     # but the destination selection may not have the same shape as value.
     # May need to broadcast value to destination selection output shape.
     dst_offset = dst_sel.position().value // np.array(
         self._source.block_shape, dtype=np.int)
     # Do we need to broadcast?
     if (isinstance(value, ArrayView) and
         (dst_sel.get_output_shape() != value.sel.get_output_shape())):
         value = value.create()
     if isinstance(value, ArrayView):
         # This is the best case.
         # We don't need to create value to perform the reference copy.
         # No broadcasting required, so this should be okay.
         src_offset = value.sel.position().value // np.array(
             value._source.block_shape, dtype=np.int)
         src_inflated_shape = dst_sel.get_broadcastable_shape()
         src_inflated_block_shape = dst_sel.get_broadcastable_block_shape(
             value.block_shape)
         src_inflated_grid: ArrayGrid = ArrayGrid(src_inflated_shape,
                                                  src_inflated_block_shape,
                                                  self.grid.dtype.__name__)
         for src_grid_entry_inflated in src_inflated_grid.get_entry_iterator(
         ):
             # Num axes in value grid may be too small.
             dst_grid_entry = tuple(
                 (np.array(src_grid_entry_inflated, dtype=np.int) +
                  dst_offset).tolist())
             src_grid_entry = tuple(
                 (np.array(src_grid_entry_inflated, dtype=np.int) +
                  src_offset).tolist())
             self._source.blocks[dst_grid_entry] = value._source.blocks[
                 src_grid_entry].copy()
     elif isinstance(value, BlockArrayBase):
         # The value has already been created, so just leverage value's existing grid iterator.
         if value.shape != dst_sel.get_output_shape():
             # Need to broadcast.
             src_ba: BlockArrayBase = value.broadcast_to(
                 dst_sel.get_output_shape())
         else:
             src_ba: BlockArrayBase = value
         src_inflated_shape = dst_sel.get_broadcastable_shape()
         src_inflated_block_shape = dst_sel.get_broadcastable_block_shape(
             src_ba.block_shape)
         src_inflated_grid: ArrayGrid = ArrayGrid(src_inflated_shape,
                                                  src_inflated_block_shape,
                                                  self.grid.dtype.__name__)
         src_grid_entry_iterator = list(src_ba.grid.get_entry_iterator())
         for src_index, src_grid_entry_inflated in \
                 enumerate(src_inflated_grid.get_entry_iterator()):
             src_grid_entry = src_grid_entry_iterator[src_index]
             dst_grid_entry = tuple(
                 (np.array(src_grid_entry_inflated, dtype=np.int) +
                  dst_offset).tolist())
             self._source.blocks[dst_grid_entry] = src_ba.blocks[
                 src_grid_entry].copy()
Esempio n. 2
0
    def diag(self, X: BlockArray) -> BlockArray:
        if len(X.shape) == 1:
            shape = X.shape[0], X.shape[0]
            block_shape = X.block_shape[0], X.block_shape[0]
            grid = ArrayGrid(shape, block_shape, X.dtype.__name__)
            grid_meta = grid.to_meta()
            rarr = BlockArray(grid, self.cm)
            for grid_entry in grid.get_entry_iterator():
                syskwargs = {"grid_entry": grid_entry, "grid_shape": grid.grid_shape}
                if np.all(np.diff(grid_entry) == 0):
                    # This is a diagonal block.
                    rarr.blocks[grid_entry].oid = self.cm.diag(
                        X.blocks[grid_entry[0]].oid, 0, syskwargs=syskwargs
                    )
                else:
                    rarr.blocks[grid_entry].oid = self.cm.new_block(
                        "zeros", grid_entry, grid_meta, syskwargs=syskwargs
                    )
        elif len(X.shape) == 2:
            out_shape = (min(X.shape),)
            out_block_shape = (min(X.block_shape),)
            # Obtain the block indices which contain the diagonal of the matrix.

            diag_meta = array_utils.find_diag_output_blocks(X.blocks, out_shape[0])
            output_block_arrays = []
            out_grid_shape = (len(diag_meta),)
            count = 0
            # Obtain the diagonals.
            for block_indices, offset, total_elements in diag_meta:
                syskwargs = {"grid_entry": (count,), "grid_shape": out_grid_shape}
                result_block_shape = (total_elements,)
                block_grid = ArrayGrid(
                    result_block_shape,
                    result_block_shape,
                    X.blocks[block_indices].dtype.__name__,
                )
                block_array = BlockArray(block_grid, self.cm)
                block_array.blocks[0].oid = self.cm.diag(
                    X.blocks[block_indices].oid, offset, syskwargs=syskwargs
                )
                output_block_arrays.append(block_array)
                count += 1
            if len(output_block_arrays) > 1:
                # If there are multiple blocks, concatenate them.
                return self.concatenate(
                    output_block_arrays, axis=0, axis_block_size=out_block_shape[0]
                )
            return output_block_arrays[0]
        else:
            raise ValueError("X must have 1 or 2 axes.")
        return rarr
Esempio n. 3
0
def test_compute_block_shape(app_inst: ArrayApplication):
    dtype = np.float32
    cores_per_node = 64
    # Tall-skinny.
    for size in [64, 128, 256, 512, 1024]:
        size_str = "%sGB" % size
        num_nodes = size // 64
        cluster_shape = (16, 1)
        shape, expected_block_shape, expected_grid_shape = ideal_tall_skinny_shapes(
            size_str, dtype)
        block_shape = app_inst.cm.compute_block_shape(
            shape, dtype, cluster_shape, num_nodes * cores_per_node)
        grid: ArrayGrid = ArrayGrid(shape, block_shape, dtype.__name__)
        print(
            "tall-skinny",
            "cluster_shape=%s" % str(cluster_shape),
            "grid_shape=%s" % str(expected_grid_shape),
            "size=%s" % size_str,
            "bytes computed=%s" % (grid.nbytes() / 10**9),
        )
        assert expected_grid_shape == grid.grid_shape
        assert expected_block_shape == block_shape

    # Square.
    for size in [4, 16, 64, 256, 1024]:
        size_str = "%sGB" % size
        num_nodes = 1 if size < 64 else size // 64
        cluster_shape = int(np.sqrt(num_nodes)), int(np.sqrt(num_nodes))
        shape, expected_block_shape, expected_grid_shape = ideal_square_shapes(
            size_str, dtype)
        block_shape = app_inst.cm.compute_block_shape(
            shape, dtype, cluster_shape, num_nodes * cores_per_node)
        grid: ArrayGrid = ArrayGrid(shape, block_shape, dtype.__name__)
        print(
            "square",
            "cluster_shape=%s" % str(cluster_shape),
            "grid_shape=%s" % str(expected_grid_shape),
            "size=%s" % size_str,
            "bytes computed=%s" % (grid.nbytes() / 10**9),
        )
        assert expected_grid_shape == grid.grid_shape, "%s != %s" % (
            expected_grid_shape,
            grid.grid_shape,
        )
        assert expected_block_shape == block_shape, "%s != %s" % (
            expected_block_shape,
            block_shape,
        )
Esempio n. 4
0
 def _stack_copy(self, X):
     assert len(X.shape) == 1
     output_shape = (max(X.shape), max(X.shape))
     output_block_shape = (X.block_shape[0], X.block_shape[0])
     output_arr_grid = ArrayGrid(output_shape, output_block_shape, X.dtype.__name__)
     output_block_array = BlockArray(output_arr_grid, self.cm)
     max_block_rows, max_block_cols = (
         output_block_array.blocks.shape[0],
         output_block_array.blocks.shape[1],
     )
     block_row_index = 0
     for i in range(max_block_rows):
         block_row_index = 0
         for j in range(max_block_cols):
             syskwargs = {
                 "grid_entry": (i, j),
                 "grid_shape": output_arr_grid.grid_shape,
             }
             block = output_block_array.blocks[(i, j)]
             rows, cols = block.shape[0], block.shape[1]
             output_block_array.blocks[(i, j)].oid = self.cm.triu_copy(
                 X.blocks[block_row_index].oid, rows, cols, syskwargs=syskwargs
             )
             block_row_index += 1
     return output_block_array
Esempio n. 5
0
def _inspect_block_shape(nps_app_inst):
    app = nps_app_inst
    dtypes = [np.float32, np.float64]
    shapes = [(10**9, 250), (10**4, 10**4), (10**7, 10), (10, 10**7)]
    cluster_shapes = [(1, 1), (2, 1), (4, 1), (16, 1)]
    cores_per_node = 64
    combos = itertools.product(dtypes, shapes, cluster_shapes)
    for dtype, shape, cluster_shape in combos:
        num_cores = np.product(cluster_shape) * cores_per_node
        block_shape = app.compute_block_shape(shape=shape,
                                              dtype=dtype,
                                              cluster_shape=cluster_shape,
                                              num_cores=num_cores)
        grid: ArrayGrid = ArrayGrid(shape, block_shape, dtype.__name__)
        print()
        print(
            "dtype=%s" % dtype.__name__,
            "cluster_shape=%s" % str(cluster_shape),
            "shape=%s" % str(shape),
        )
        print("grid_shape", grid.grid_shape, "block_shape", block_shape)
        print(
            "array size (GB)",
            np.product(shape) * dtype().nbytes / 10**9,
            "block size (GB)",
            np.product(block_shape) * dtype().nbytes / 10**9,
        )
Esempio n. 6
0
 def predict(self, X: BlockArray):
     app: ArrayApplication = _instance()
     cm: ComputeManager = app.cm
     cm.register("xgb_predict", xgb_predict_remote, {})
     model_block: Block = self.model.blocks[0]
     result: BlockArray = BlockArray(
         ArrayGrid(
             shape=(X.shape[0], ),
             block_shape=(X.block_shape[0], ),
             dtype=nps.int.__name__,
         ),
         cm,
     )
     for grid_entry in X.grid.get_entry_iterator():
         i = grid_entry[0]
         X_block: Block = X.blocks[grid_entry]
         r_block: Block = result.blocks[i]
         syskwargs = {
             "grid_entry": grid_entry,
             "grid_shape": X.grid.grid_shape
         }
         r_block.oid = cm.call("xgb_predict",
                               model_block.oid,
                               X_block.oid,
                               syskwargs=syskwargs)
     return result
Esempio n. 7
0
 def argop(self, op_name: str, arr: BlockArray, axis=None):
     if len(arr.shape) > 1:
         raise NotImplementedError(
             "%s currently supports one-dimensional arrays." % op_name)
     if axis is None:
         axis = 0
     assert axis == 0
     grid = ArrayGrid(shape=(), block_shape=(), dtype=np.int64.__name__)
     result = BlockArray(grid, self.cm)
     reduction_result = None, None
     for grid_entry in arr.grid.get_entry_iterator():
         block_slice: slice = arr.grid.get_slice(grid_entry)[0]
         block: Block = arr.blocks[grid_entry]
         syskwargs = {
             "grid_entry": grid_entry,
             "grid_shape": arr.grid.grid_shape,
             "options": {
                 "num_returns": 2
             },
         }
         reduction_result = self.cm.arg_op(op_name,
                                           block.oid,
                                           block_slice,
                                           *reduction_result,
                                           syskwargs=syskwargs)
     argoptima, _ = reduction_result
     result.blocks[()].oid = argoptima
     return result
Esempio n. 8
0
    def _broadcast_bop(self, op_name, arr_1, arr_2) -> BlockArray:
        """We want to avoid invoking this op whenever possible; NumPy's imp is faster.

        Args:
            op_name: Name of binary operation.
            arr_1: A BlockArray.
            arr_2: A BlockArray.

        Returns:
            A BlockArray.
        """
        if arr_1.shape != arr_2.shape:
            output_grid_shape = array_utils.broadcast_shape(
                arr_1.grid.grid_shape, arr_2.grid.grid_shape)
            arr_1 = arr_1.broadcast_to(output_grid_shape)
            arr_2 = arr_2.broadcast_to(output_grid_shape)
        dtype = array_utils.get_bop_output_type(op_name, arr_1.dtype,
                                                arr_2.dtype)
        grid = ArrayGrid(arr_1.shape, arr_1.block_shape, dtype.__name__)
        rarr = BlockArray(grid, self.cm)
        for grid_entry in rarr.grid.get_entry_iterator():
            block_1: Block = arr_1.blocks[grid_entry]
            block_2: Block = arr_2.blocks[grid_entry]
            rarr.blocks[grid_entry] = block_1.bop(op_name, block_2, {})
        return rarr
Esempio n. 9
0
def test_computations():
    grid: ArrayGrid = ArrayGrid(shape=(2, 6, 10),
                                block_shape=(1, 2, 5),
                                dtype="float32")
    cluster_shapes = list(
        itertools.product(list(range(1, 5)), list(range(1, 7)),
                          list(range(1, 11))))
    for cluster_shape in cluster_shapes:
        device_ids = mock_device_ids(int(np.product(cluster_shape)))
        cyclic_grid: CyclicDeviceGrid = CyclicDeviceGrid(
            cluster_shape, "cpu", device_ids)
        for grid_entry in grid.get_entry_iterator():
            cluster_entry = cyclic_grid.get_cluster_entry(
                grid_entry, grid.grid_shape)
            assert cluster_entry == tuple(
                np.array(grid_entry) % np.array(cluster_shape))

    def true_packed_entry(grid_entry, grid_shape, cluster_shape):
        grid_entry = np.array(grid_entry)
        grid_shape = np.array(grid_shape)
        cluster_shape = np.array(cluster_shape)
        r = grid_entry / grid_shape * cluster_shape
        # r = np.min(cluster_shape-1, r, axis=1)
        return tuple(r.astype(int).tolist())

    for cluster_shape in cluster_shapes:
        device_ids = mock_device_ids(int(np.product(cluster_shape)))
        packed_grid: PackedDeviceGrid = PackedDeviceGrid(
            cluster_shape, "cpu", device_ids)
        for grid_entry in grid.get_entry_iterator():
            cluster_entry = packed_grid.get_cluster_entry(
                grid_entry, grid.grid_shape)
            assert cluster_entry == true_packed_entry(grid_entry,
                                                      grid.grid_shape,
                                                      cluster_shape)
Esempio n. 10
0
def test_device_id():
    cluster_shape = (1, 2, 3)
    device_ids = mock_device_ids(int(np.product(cluster_shape)))
    grid: ArrayGrid = ArrayGrid(shape=(8, 20, 12),
                                block_shape=(2, 5, 3),
                                dtype="float32")

    # A basic smoke test.
    device_ids: List[DeviceID] = mock_device_ids(int(
        np.product(cluster_shape)))
    cyclic_grid: CyclicDeviceGrid = CyclicDeviceGrid(cluster_shape, "cpu",
                                                     device_ids)

    touched_devices = set()
    for grid_entry in grid.get_entry_iterator():
        touched_devices.add(
            cyclic_grid.get_device_id(grid_entry, grid.grid_shape))
    assert len(touched_devices) == len(device_ids)

    packed_grid: PackedDeviceGrid = PackedDeviceGrid(cluster_shape, "cpu",
                                                     device_ids)
    touched_devices = set()
    for grid_entry in grid.get_entry_iterator():
        touched_devices.add(
            packed_grid.get_device_id(grid_entry, grid.grid_shape))
    assert len(touched_devices) == len(device_ids)
Esempio n. 11
0
 def create_references(self, concrete_cls) -> BlockArrayBase:
     # TODO (hme): Double check this.
     array_cls = BlockArrayBase if concrete_cls is None else concrete_cls
     dst_ba: BlockArrayBase = array_cls(self.grid, self._cm)
     if 0 in self.shape:
         return dst_ba
     grid_offset = self.sel.position().value // np.array(
         self._source.block_shape, dtype=np.int)
     dst_inflated_shape = self.sel.get_broadcastable_shape()
     dst_inflated_block_shape = self.sel.get_broadcastable_block_shape(
         self.block_shape)
     dst_inflated_grid: ArrayGrid = ArrayGrid(dst_inflated_shape,
                                              dst_inflated_block_shape,
                                              self.grid.dtype.__name__)
     dst_grid_entry_iterator = list(dst_ba.grid.get_entry_iterator())
     for dst_index, dst_inflated_grid_entry in enumerate(
             dst_inflated_grid.get_entry_iterator()):
         dst_grid_entry = dst_grid_entry_iterator[dst_index]
         src_grid_entry = tuple(
             (np.array(dst_inflated_grid_entry, dtype=np.int) +
              grid_offset).tolist())
         dst_ba.blocks[dst_grid_entry].oid = self._source.blocks[
             src_grid_entry].oid
         dst_ba.blocks[dst_grid_entry].transposed \
             = self._source.blocks[src_grid_entry].transposed
     return dst_ba
Esempio n. 12
0
    def arange(self,
               start_in,
               shape,
               block_shape,
               step=1,
               dtype=None) -> BlockArray:
        assert step == 1
        if dtype is None:
            dtype = np.__getattribute__(
                str(np.result_type(start_in, shape[0] + start_in)))

        # Generate ranges per block.
        grid = ArrayGrid(shape, block_shape, dtype.__name__)
        rarr = BlockArray(grid, self.cm)
        for _, grid_entry in enumerate(grid.get_entry_iterator()):
            syskwargs = {
                "grid_entry": grid_entry,
                "grid_shape": grid.grid_shape
            }
            start = start_in + block_shape[0] * grid_entry[0]
            entry_shape = grid.get_block_shape(grid_entry)
            stop = start + entry_shape[0]
            rarr.blocks[grid_entry].oid = self.cm.arange(start,
                                                         stop,
                                                         step,
                                                         dtype,
                                                         syskwargs=syskwargs)
        return rarr
Esempio n. 13
0
 def from_oid(cls, oid, shape, dtype, cm):
     block_shape = shape
     grid = ArrayGrid(shape, block_shape, dtype.__name__)
     ba = BlockArray(grid, cm)
     for i, grid_entry in enumerate(grid.get_entry_iterator()):
         assert i == 0
         ba.blocks[grid_entry].oid = oid
     return ba
Esempio n. 14
0
    def triu(self, X: BlockArray):
        if len(X.shape) == 1:
            return self.triu(self._stack_copy(X))
        elif len(X.shape) == 2:
            if X.shape[0] == 1:
                return X
            diag_meta = array_utils.find_diag_output_blocks(X.blocks, min(X.shape))
            output_arr_grid = ArrayGrid(X.shape, X.block_shape, X.dtype.__name__)
            output_block_array = BlockArray(output_arr_grid, self.cm)
            visited = dict()
            total_row_blocks, total_col_blocks = X.blocks.shape[0], X.blocks.shape[1]
            for block_indices, offset, total_elements in diag_meta:
                syskwargs = {
                    "grid_entry": block_indices,
                    "grid_shape": output_arr_grid.grid_shape,
                }
                output_block_array.blocks[block_indices].oid = self.cm.triu(
                    X.blocks[block_indices].oid,
                    offset,
                    False,
                    total_elements,
                    syskwargs=syskwargs,
                )
                visited[block_indices] = 1
            for block_indices, offset, total_elements in diag_meta:
                row_c, col_c = block_indices[0] + 1, block_indices[1]
                while row_c < total_row_blocks:
                    syskwargs = {
                        "grid_entry": (row_c, col_c),
                        "grid_shape": output_arr_grid.grid_shape,
                    }
                    if (row_c, col_c) in visited:
                        output_block_array.blocks[(row_c, col_c)].oid = self.cm.triu(
                            output_block_array.blocks[(row_c, col_c)].oid,
                            offset,
                            True,
                            total_elements,
                            syskwargs=syskwargs,
                        )
                    else:
                        output_block_array.blocks[(row_c, col_c)].oid = self.cm.triu(
                            X.blocks[(row_c, col_c)].oid,
                            offset,
                            True,
                            total_elements,
                            syskwargs=syskwargs,
                        )
                        visited[(row_c, col_c)] = 1
                    row_c += 1

            for i in range(total_row_blocks):
                for j in range(total_col_blocks):
                    if (i, j) not in visited:
                        output_block_array.blocks[(i, j)].oid = X.blocks[(i, j)].oid
            return output_block_array
        else:
            raise NotImplementedError()
Esempio n. 15
0
def indirect_tsr(app: ArrayApplication, X: BlockArray, reshape_output=True):
    assert len(X.shape) == 2
    # TODO (hme): This assertion is temporary and ensures returned
    #  shape of qr of block is correct.
    assert X.block_shape[0] >= X.shape[1]
    # Compute R for each block.
    grid = X.grid
    grid_shape = grid.grid_shape
    shape = X.shape
    block_shape = X.block_shape
    R_oids = []
    # Assume no blocking along second dim.
    for i in range(grid_shape[0]):
        # Select a row according to block_shape.
        row = []
        for j in range(grid_shape[1]):
            row.append(X.blocks[i, j].oid)
        R_oids.append(
            app.cm.qr(*row,
                      mode="r",
                      axis=1,
                      syskwargs={
                          "grid_entry": (i, 0),
                          "grid_shape": (grid_shape[0], 1),
                          "options": {
                              "num_returns": 1
                          },
                      }))

    # Construct R by summing over R blocks.
    # TODO (hme): Communication may be inefficient due to redundancy of data.
    R_shape = (shape[1], shape[1])
    R_block_shape = (block_shape[1], block_shape[1])
    tsR = BlockArray(
        ArrayGrid(shape=R_shape, block_shape=R_shape, dtype=X.dtype.__name__),
        app.cm)
    tsR.blocks[0, 0].oid = app.cm.qr(*R_oids,
                                     mode="r",
                                     axis=0,
                                     syskwargs={
                                         "grid_entry": (0, 0),
                                         "grid_shape": (1, 1),
                                         "options": {
                                             "num_returns": 1
                                         },
                                     })
    # If blocking is "tall-skinny," then we're done.
    if R_shape != R_block_shape:
        if reshape_output:
            R = tsR.reshape(R_shape, block_shape=R_block_shape)
        else:
            R = tsR
    else:
        R = tsR
    return R
Esempio n. 16
0
 def diag(self, X: BlockArray) -> BlockArray:
     if len(X.shape) == 1:
         shape = X.shape[0], X.shape[0]
         block_shape = X.block_shape[0], X.block_shape[0]
         grid = ArrayGrid(shape, block_shape, X.dtype.__name__)
         grid_meta = grid.to_meta()
         rarr = BlockArray(grid, self.cm)
         for grid_entry in grid.get_entry_iterator():
             syskwargs = {
                 "grid_entry": grid_entry,
                 "grid_shape": grid.grid_shape
             }
             if np.all(np.diff(grid_entry) == 0):
                 # This is a diagonal block.
                 rarr.blocks[grid_entry].oid = self.cm.diag(
                     X.blocks[grid_entry[0]].oid, syskwargs=syskwargs)
             else:
                 rarr.blocks[grid_entry].oid = self.cm.new_block(
                     "zeros", grid_entry, grid_meta, syskwargs=syskwargs)
     elif len(X.shape) == 2:
         assert X.shape[0] == X.shape[1], "X must be a square array."
         assert X.block_shape[0] == X.block_shape[
             1], "block_shape must be square."
         shape = X.shape[0],
         block_shape = X.block_shape[0],
         grid = ArrayGrid(shape, block_shape, X.dtype.__name__)
         rarr = BlockArray(grid, self.cm)
         for grid_entry in X.grid.get_entry_iterator():
             out_grid_entry = grid_entry[:1]
             out_grid_shape = grid.grid_shape[:1]
             syskwargs = {
                 "grid_entry": out_grid_entry,
                 "grid_shape": out_grid_shape
             }
             if np.all(np.diff(grid_entry) == 0):
                 # This is a diagonal block.
                 rarr.blocks[out_grid_entry].oid = self.cm.diag(
                     X.blocks[grid_entry].oid, syskwargs=syskwargs)
     else:
         raise ValueError("X must have 1 or 2 axes.")
     return rarr
Esempio n. 17
0
 def vec_from_oids(self, oids, shape, block_shape, dtype):
     arr = BlockArray(
         ArrayGrid(shape=shape, block_shape=shape, dtype=dtype.__name__),
         self.cm)
     # Make sure resulting grid shape is a vector (1 dimensional).
     assert np.sum(arr.grid.grid_shape) == (max(arr.grid.grid_shape) +
                                            len(arr.grid.grid_shape) - 1)
     for i, grid_entry in enumerate(arr.grid.get_entry_iterator()):
         arr.blocks[grid_entry].oid = oids[i]
     if block_shape != shape:
         return arr.reshape(block_shape=block_shape)
     return arr
Esempio n. 18
0
 def from_np(cls, arr, block_shape, copy, cm):
     dtype_str = str(arr.dtype)
     grid = ArrayGrid(arr.shape, block_shape, dtype_str)
     rarr = BlockArray(grid, cm)
     grid_entry_iterator = grid.get_entry_iterator()
     for grid_entry in grid_entry_iterator:
         grid_slice = grid.get_slice(grid_entry)
         block = arr[grid_slice]
         if copy:
             block = np.copy(block)
         rarr.blocks[grid_entry].oid = cm.put(block)
         rarr.blocks[grid_entry].dtype = getattr(np, dtype_str)
     return rarr
Esempio n. 19
0
def train(params: Dict, data: NumsDMatrix, *args, evals=(), **kwargs):
    X: BlockArray = data.X
    y: BlockArray = data.y
    assert len(X.shape) == 2
    assert X.shape[0] == X.shape[0] and X.block_shape[0] == y.block_shape[0]
    assert len(y.shape) == 1 or (len(y.shape) == 2 and y.shape[1] == 1)

    app: ArrayApplication = _instance()
    cm: ComputeManager = app.cm
    cm.register("xgb_train", xgb_train_remote, {})

    # Start tracker
    num_workers = X.grid.grid_shape[0]
    env = _start_rabit_tracker(num_workers)
    rabit_args = [("%s=%s" % item).encode() for item in env.items()]

    evals_flat = []
    for eval_X, eval_y, eval_method in evals:
        if eval_X.shape != eval_X.block_shape:
            eval_X = eval_X.reshape(shape=eval_X.shape,
                                    block_shape=eval_X.shape)
        if eval_y.shape != eval_y.block_shape:
            eval_y = eval_y.reshape(shape=eval_y.shape,
                                    block_shape=eval_y.shape)
        eval_X_oid = eval_X.blocks.item().oid
        eval_y_oid = eval_y.blocks.item().oid
        evals_flat += [eval_X_oid, eval_y_oid, eval_method]

    X: BlockArray = X.reshape(block_shape=(X.block_shape[0], X.shape[1]))
    result: BlockArray = BlockArray(
        ArrayGrid(shape=(X.grid.grid_shape[0], ),
                  block_shape=(1, ),
                  dtype="dict"), cm)
    for grid_entry in X.grid.get_entry_iterator():
        X_block: Block = X.blocks[grid_entry]
        i = grid_entry[0]
        if len(y.shape) == 1:
            y_block: Block = y.blocks[i]
        else:
            y_block: Block = y.blocks[i, 0]
        syskwargs = {"grid_entry": grid_entry, "grid_shape": X.grid.grid_shape}
        result.blocks[i].oid = cm.call("xgb_train",
                                       X_block.oid,
                                       y_block.oid,
                                       rabit_args,
                                       params,
                                       args,
                                       kwargs,
                                       *evals_flat,
                                       syskwargs=syskwargs)
    return result
Esempio n. 20
0
 def read_csv(self,
              filename,
              dtype=float,
              delimiter=",",
              has_header=False,
              num_workers=4):
     file_size = storage_utils.get_file_size(filename)
     file_batches: storage_utils.Batch = storage_utils.Batch.from_num_batches(
         file_size, num_workers)
     blocks = []
     shape_oids = []
     for i, batch in enumerate(file_batches.batches):
         file_start, file_end = batch
         block_oid, shape_oid = self.cm.call(
             "read_csv_block",
             filename,
             file_start,
             file_end,
             dtype,
             delimiter,
             has_header,
             syskwargs={
                 "grid_entry": (i, ),
                 "grid_shape": (num_workers, ),
                 "options": {
                     "num_returns": 2
                 },
             },
         )
         blocks.append(block_oid)
         shape_oids.append(shape_oid)
     shapes = self.cm.get(shape_oids)
     arrays = []
     for i in range(len(shapes)):
         shape = shapes[i]
         if shape[0] == 0:
             continue
         block = blocks[i]
         grid = ArrayGrid(shape=shape,
                          block_shape=shape,
                          dtype=dtype.__name__)
         arr = BlockArray(grid, self.cm)
         iter_one = True
         for grid_entry in grid.get_entry_iterator():
             assert iter_one
             iter_one = False
             arr.blocks[grid_entry].oid = block
         arrays.append(arr)
     return arrays
Esempio n. 21
0
def test_array_rwd():
    conn = boto3.resource("s3", region_name="us-east-1")
    assert conn.Bucket("darrays") not in conn.buckets.all()
    conn.create_bucket(Bucket="darrays")

    X: np.ndarray = np.random.random(3)
    stored_X = StoredArrayS3("darrays/%s_X" % "__test__")
    stored_X.put_grid(
        ArrayGrid(shape=X.shape, block_shape=X.shape, dtype=np.float64.__name__)
    )
    stored_X.init_grid()
    stored_X.put_array(X)
    assert np.allclose(X, stored_X.get_array())
    stored_X.del_array()
    stored_X.delete_grid()
Esempio n. 22
0
 def empty(cls, shape, block_shape, dtype, cm: ComputeManager):
     grid = ArrayGrid(shape=shape,
                      block_shape=block_shape,
                      dtype=dtype.__name__)
     grid_meta = grid.to_meta()
     arr = BlockArray(grid, cm)
     for grid_entry in grid.get_entry_iterator():
         arr.blocks[grid_entry].oid = cm.empty(grid_entry,
                                               grid_meta,
                                               syskwargs={
                                                   "grid_entry": grid_entry,
                                                   "grid_shape":
                                                   grid.grid_shape
                                               })
     return arr
Esempio n. 23
0
def test_bounds():
    grid: ArrayGrid = ArrayGrid(shape=(2, 6, 10),
                                block_shape=(1, 2, 5),
                                dtype="float32")
    for cluster_shape in [(1, ), (1, 1), (1, 1, 1), (1, 1, 1, 1)]:
        cyclic_grid: CyclicDeviceGrid = CyclicDeviceGrid(
            cluster_shape, "cpu", mock_device_ids(1))
        packed_grid: PackedDeviceGrid = PackedDeviceGrid(
            cluster_shape, "cpu", mock_device_ids(1))
        for grid_entry in grid.get_entry_iterator():
            cluster_entry = cyclic_grid.get_cluster_entry(
                grid_entry, grid.grid_shape)
            assert cluster_entry == tuple([0] * len(cyclic_grid.grid_shape))
            cluster_entry = packed_grid.get_cluster_entry(
                grid_entry, grid.grid_shape)
            assert cluster_entry == tuple([0] * len(packed_grid.grid_shape))
Esempio n. 24
0
 def _delete(self, filename, store_cls, remote_func):
     grid = self._get_array_grid(filename, store_cls)
     result_grid = ArrayGrid(
         grid.grid_shape,
         tuple(np.ones_like(grid.shape, dtype=np.int)),
         dtype=dict.__name__,
     )
     rarr = BlockArray(result_grid, self.cm)
     for grid_entry in grid.get_entry_iterator():
         rarr.blocks[grid_entry].oid = remote_func(
             filename,
             grid_entry,
             grid.to_meta(),
             syskwargs={"grid_entry": grid_entry, "grid_shape": grid.grid_shape},
         )
     return rarr
Esempio n. 25
0
 def _write(self, ba: BlockArray, filename, remote_func):
     grid = ba.grid
     result_grid = ArrayGrid(grid.grid_shape,
                             tuple(np.ones_like(grid.shape, dtype=np.int)),
                             dtype=dict.__name__)
     rarr = BlockArray(result_grid, self.cm)
     for grid_entry in grid.get_entry_iterator():
         rarr.blocks[grid_entry].oid = remote_func(
             ba.blocks[grid_entry].oid,
             filename,
             grid_entry,
             grid.to_meta(),
             syskwargs={
                 "grid_entry": grid_entry,
                 "grid_shape": grid.grid_shape
             })
     return rarr
Esempio n. 26
0
 def _new_array(
     self, op_name: str, shape: tuple, block_shape: tuple, dtype: np.dtype = None
 ):
     assert len(shape) == len(block_shape)
     if dtype is None:
         dtype = np.float64
     grid = ArrayGrid(shape, block_shape, dtype.__name__)
     grid_meta = grid.to_meta()
     rarr = BlockArray(grid, self.cm)
     for grid_entry in grid.get_entry_iterator():
         rarr.blocks[grid_entry].oid = self.cm.new_block(
             op_name,
             grid_entry,
             grid_meta,
             syskwargs={"grid_entry": grid_entry, "grid_shape": grid.grid_shape},
         )
     return rarr
Esempio n. 27
0
def test_split(app_inst: ArrayApplication):
    # TODO (hme): Implement a split leveraging block_shape param in reshape op.
    x = app_inst.array(np.array([1.0, 2.0, 3.0, 4.0]), block_shape=(4,))
    syskwargs = {
        "grid_entry": x.blocks[0].grid_entry,
        "grid_shape": x.blocks[0].grid_shape,
        "options": {"num_returns": 2}
    }
    res1, res2 = x.cm.split(x.blocks[0].oid,
                            2,
                            axis=0,
                            transposed=False,
                            syskwargs=syskwargs)
    ba = BlockArray(ArrayGrid((4,), (2,), x.dtype.__name__), x.cm)
    ba.blocks[0].oid = res1
    ba.blocks[1].oid = res2
    assert np.allclose([1.0, 2.0, 3.0, 4.0], ba.get())
Esempio n. 28
0
 def permutation(self, size, block_size):
     shape = (size, )
     block_shape = (block_size, )
     grid: ArrayGrid = ArrayGrid(shape=shape,
                                 block_shape=shape,
                                 dtype=np.int64.__name__)
     ba = BlockArray(grid, self._cm)
     for grid_entry in ba.grid.get_entry_iterator():
         rng_params = list(self._rng.new_block_rng_params())
         block: Block = ba.blocks[grid_entry]
         block.oid = self._cm.permutation(rng_params,
                                          size,
                                          syskwargs={
                                              "grid_entry": grid_entry,
                                              "grid_shape": grid.grid_shape
                                          })
     return ba.reshape(block_shape=block_shape)
Esempio n. 29
0
    def map_uop(
        self,
        op_name: str,
        arr: BlockArray,
        out: BlockArray = None,
        where=True,
        args=None,
        kwargs=None,
    ) -> BlockArray:
        """A map, for unary operators, that applies to every entry of an array.

        Args:
            op_name: An element-wise unary operator.
            arr: A BlockArray.
            out: A BlockArray to which the result is written.
            where: An indicator specifying the indices to which op is applied.
            args: Args provided to op.
            kwargs: Keyword args provided to op.

        Returns:
            A BlockArray.
        """
        if where is not True:
            raise NotImplementedError("'where' argument is not yet supported.")
        args = () if args is None else args
        kwargs = {} if kwargs is None else kwargs
        shape = arr.shape
        block_shape = arr.block_shape
        dtype = array_utils.get_uop_output_type(op_name, arr.dtype)
        assert len(shape) == len(block_shape)
        if out is None:
            grid = ArrayGrid(shape, block_shape, dtype.__name__)
            rarr = BlockArray(grid, self.cm)
        else:
            rarr = out
            grid = rarr.grid
            assert rarr.shape == arr.shape and rarr.block_shape == arr.block_shape
        for grid_entry in grid.get_entry_iterator():
            # TODO(hme): Faster to create ndarray first,
            #  and instantiate block array on return
            #  to avoid instantiating blocks on BlockArray initialization.
            rarr.blocks[grid_entry] = arr.blocks[grid_entry].uop_map(
                op_name, args=args, kwargs=kwargs
            )
        return rarr
Esempio n. 30
0
 def delete_fs(self, filename: str):
     meta = self._fs.read_meta_fs(filename)
     addresses = meta["addresses"]
     grid_meta = meta["grid_meta"]
     grid = ArrayGrid.from_meta(grid_meta)
     result_grid = ArrayGrid(grid.grid_shape,
                             tuple(np.ones_like(grid.shape, dtype=np.int)),
                             dtype=dict.__name__)
     rarr = BlockArray(result_grid, self.cm)
     for grid_entry in addresses:
         device_id: DeviceID = DeviceID.from_str(addresses[grid_entry])
         rarr.blocks[grid_entry].oid = self._fs.delete_block_fs(
             filename,
             grid_entry,
             grid_meta,
             syskwargs={"device_id": device_id})
     self._fs.delete_meta_fs(filename)
     return rarr