Esempio n. 1
0
 def assign_references(self, dst_sel: BasicSelection, value):
     # TODO (hme): This seems overly complicated, but correct. Double check it.
     #  Also, revisit some of the variable names. They will likely
     #  be confusing in the future.
     # The destination has same block shape as value,
     # but the destination selection may not have the same shape as value.
     # May need to broadcast value to destination selection output shape.
     dst_offset = dst_sel.position().value // np.array(
         self._source.block_shape, dtype=np.int)
     # Do we need to broadcast?
     if (isinstance(value, ArrayView) and
         (dst_sel.get_output_shape() != value.sel.get_output_shape())):
         value = value.create()
     if isinstance(value, ArrayView):
         # This is the best case.
         # We don't need to create value to perform the reference copy.
         # No broadcasting required, so this should be okay.
         src_offset = value.sel.position().value // np.array(
             value._source.block_shape, dtype=np.int)
         src_inflated_shape = dst_sel.get_broadcastable_shape()
         src_inflated_block_shape = dst_sel.get_broadcastable_block_shape(
             value.block_shape)
         src_inflated_grid: ArrayGrid = ArrayGrid(src_inflated_shape,
                                                  src_inflated_block_shape,
                                                  self.grid.dtype.__name__)
         for src_grid_entry_inflated in src_inflated_grid.get_entry_iterator(
         ):
             # Num axes in value grid may be too small.
             dst_grid_entry = tuple(
                 (np.array(src_grid_entry_inflated, dtype=np.int) +
                  dst_offset).tolist())
             src_grid_entry = tuple(
                 (np.array(src_grid_entry_inflated, dtype=np.int) +
                  src_offset).tolist())
             self._source.blocks[dst_grid_entry] = value._source.blocks[
                 src_grid_entry].copy()
     elif isinstance(value, BlockArrayBase):
         # The value has already been created, so just leverage value's existing grid iterator.
         if value.shape != dst_sel.get_output_shape():
             # Need to broadcast.
             src_ba: BlockArrayBase = broadcast_to(
                 value, dst_sel.get_output_shape())
         else:
             src_ba: BlockArrayBase = value
         src_inflated_shape = dst_sel.get_broadcastable_shape()
         src_inflated_block_shape = dst_sel.get_broadcastable_block_shape(
             src_ba.block_shape)
         src_inflated_grid: ArrayGrid = ArrayGrid(src_inflated_shape,
                                                  src_inflated_block_shape,
                                                  self.grid.dtype.__name__)
         src_grid_entry_iterator = list(src_ba.grid.get_entry_iterator())
         for src_index, src_grid_entry_inflated in \
                 enumerate(src_inflated_grid.get_entry_iterator()):
             src_grid_entry = src_grid_entry_iterator[src_index]
             dst_grid_entry = tuple(
                 (np.array(src_grid_entry_inflated, dtype=np.int) +
                  dst_offset).tolist())
             self._source.blocks[dst_grid_entry] = src_ba.blocks[
                 src_grid_entry].copy()
Esempio n. 2
0
 def argop(self, op_name: str, arr: BlockArray, axis=None):
     if len(arr.shape) > 1:
         raise NotImplementedError(
             "%s currently supports one-dimensional arrays." % op_name)
     if axis is None:
         axis = 0
     assert axis == 0
     grid = ArrayGrid(shape=(), block_shape=(), dtype=np.int64.__name__)
     result = BlockArray(grid, self.system)
     reduction_result = None, None
     for grid_entry in arr.grid.get_entry_iterator():
         block_slice: slice = arr.grid.get_slice(grid_entry)[0]
         block: Block = arr.blocks[grid_entry]
         syskwargs = {
             "grid_entry": grid_entry,
             "grid_shape": arr.grid.grid_shape,
             "options": {
                 "num_returns": 2
             },
         }
         reduction_result = self.system.arg_op(op_name,
                                               block.oid,
                                               block_slice,
                                               *reduction_result,
                                               syskwargs=syskwargs)
     argoptima, _ = reduction_result
     result.blocks[()].oid = argoptima
     return result
Esempio n. 3
0
 def reduce_axis(self, op_name, axis, keepdims=False):
     result_blocks = np.empty_like(self.blocks, dtype=Block)
     for grid_entry in self.grid.get_entry_iterator():
         result_blocks[grid_entry] = self.blocks[grid_entry].reduce_axis(op_name,
                                                                         axis,
                                                                         keepdims=keepdims)
     result_shape = []
     result_block_shape = []
     for curr_axis in range(len(self.shape)):
         axis_size, axis_block_size = self.shape[curr_axis], self.block_shape[curr_axis]
         if curr_axis == axis:
             if keepdims:
                 axis_size, axis_block_size = 1, 1
             else:
                 continue
         result_shape.append(axis_size)
         result_block_shape.append(axis_block_size)
     result_shape = tuple(result_shape)
     result_block_shape = tuple(result_block_shape)
     result_dtype = array_utils.get_reduce_output_type(op_name, self.dtype)
     result_grid = ArrayGrid(shape=result_shape,
                             block_shape=result_block_shape,
                             dtype=result_dtype.__name__)
     result = BlockArray(result_grid, self.system)
     op_func = np.__getattribute__(op_name)
     reduced_blocks = op_func(result_blocks, axis=axis, keepdims=keepdims)
     if result.shape == ():
         result.blocks[()] = reduced_blocks
     else:
         result.blocks = reduced_blocks
     return result
Esempio n. 4
0
 def _tensordot(self, other, axes):
     this_axes = self.grid.grid_shape[:-axes]
     this_sum_axes = self.grid.grid_shape[-axes:]
     other_axes = other.grid.grid_shape[axes:]
     other_sum_axes = other.grid.grid_shape[:axes]
     assert this_sum_axes == other_sum_axes
     result_shape = tuple(self.shape[:-axes] + other.shape[axes:])
     result_block_shape = tuple(self.block_shape[:-axes] + other.block_shape[axes:])
     result_grid = ArrayGrid(shape=result_shape,
                             block_shape=result_block_shape,
                             dtype=array_utils.get_bop_output_type("tensordot",
                                                                   self.dtype,
                                                                   other.dtype).__name__)
     assert result_grid.grid_shape == tuple(this_axes + other_axes)
     result = BlockArray(result_grid, self.system)
     this_dims = list(itertools.product(*map(range, this_axes)))
     other_dims = list(itertools.product(*map(range, other_axes)))
     sum_dims = list(itertools.product(*map(range, this_sum_axes)))
     for i in this_dims:
         for j in other_dims:
             grid_entry = tuple(i + j)
             result_block = None
             for k in sum_dims:
                 self_block: Block = self.blocks[tuple(i + k)]
                 other_block: Block = other.blocks[tuple(k + j)]
                 dotted_block = self_block.tensordot(other_block, axes=axes)
                 if result_block is None:
                     result_block = dotted_block
                 else:
                     result_block += dotted_block
             result.blocks[grid_entry] = result_block
     return result
Esempio n. 5
0
 def _sample_basic_sparse(self, density, format, shape, block_shape,
                          dtype) -> BlockArray:
     if shape is None:
         assert block_shape is None
         shape = ()
         block_shape = ()
     else:
         assert block_shape is not None
     if dtype is None:
         dtype = np.float64
     assert isinstance(dtype, type)
     grid: ArrayGrid = ArrayGrid(shape, block_shape, dtype=dtype.__name__)
     ba: SparseBlockArray = SparseBlockArray(grid, self._system)
     for grid_entry in ba.grid.get_entry_iterator():
         # Size and dtype to begin with.
         m, n = grid.get_block_shape(grid_entry)
         block = ba.blocks[grid_entry]
         block.oid = self._system.random_block_sparse(m,
                                                      n,
                                                      density,
                                                      format,
                                                      dtype,
                                                      syskwargs={
                                                          "grid_entry":
                                                          grid_entry,
                                                          "grid_shape":
                                                          grid.grid_shape
                                                      })
     return ba
Esempio n. 6
0
 def create_references(self, concrete_cls) -> BlockArrayBase:
     # TODO (hme): Double check this.
     array_cls = BlockArrayBase if concrete_cls is None else concrete_cls
     dst_ba: BlockArrayBase = array_cls(self.grid, self._system)
     if 0 in self.shape:
         return dst_ba
     grid_offset = self.sel.position().value // np.array(
         self._source.block_shape, dtype=np.int)
     dst_inflated_shape = self.sel.get_broadcastable_shape()
     dst_inflated_block_shape = self.sel.get_broadcastable_block_shape(
         self.block_shape)
     dst_inflated_grid: ArrayGrid = ArrayGrid(dst_inflated_shape,
                                              dst_inflated_block_shape,
                                              self.grid.dtype.__name__)
     dst_grid_entry_iterator = list(dst_ba.grid.get_entry_iterator())
     for dst_index, dst_inflated_grid_entry in enumerate(
             dst_inflated_grid.get_entry_iterator()):
         dst_grid_entry = dst_grid_entry_iterator[dst_index]
         src_grid_entry = tuple(
             (np.array(dst_inflated_grid_entry, dtype=np.int) +
              grid_offset).tolist())
         dst_ba.blocks[dst_grid_entry].oid = self._source.blocks[
             src_grid_entry].oid
         dst_ba.blocks[dst_grid_entry].transposed \
             = self._source.blocks[src_grid_entry].transposed
     return dst_ba
Esempio n. 7
0
 def from_oid(cls, oid, shape, dtype, system):
     block_shape = shape
     grid = ArrayGrid(shape, block_shape, dtype.__name__)
     ba = BlockArray(grid, system)
     for i, grid_entry in enumerate(grid.get_entry_iterator()):
         assert i == 0
         ba.blocks[grid_entry].oid = oid
     return ba
Esempio n. 8
0
 def _vecdot(self, other):
     assert self.shape[-1] == other.shape[0], str((self.shape[1], other.shape[0]))
     result_shape = tuple(self.shape[:-1] + other.shape[1:])
     result_block_shape = tuple(self.block_shape[:-1] + other.block_shape[1:])
     result_grid = ArrayGrid(shape=result_shape,
                             block_shape=result_block_shape,
                             dtype=self.dtype.__name__)
     result = BlockArray(result_grid, self.system)
     self_num_axes = len(self.grid.grid_shape)
     other_num_axes = len(other.grid.grid_shape)
     oids = []
     for i in range(self.grid.grid_shape[-1]):
         self_grid_entry = tuple(i if axis == self_num_axes-1 else 0
                                 for axis in range(self_num_axes))
         other_grid_entry = tuple(i if axis == 0 else 0 for axis in range(other_num_axes))
         self_block: Block = self.blocks[self_grid_entry]
         other_block: Block = other.blocks[other_grid_entry]
         if self_block.transposed != other_block.transposed:
             # The vectors are aligned if their transpositions satisfy the xor relation.
             if self_block.transposed:
                 # Use other grid entry for dot,
                 # because physically,
                 # other block is located on same node as self block.
                 sch_grid_entry = other_grid_entry
                 sch_grid_shape = other.grid.grid_shape
             elif other_block.transposed:
                 # Use self grid entry for dot.
                 sch_grid_entry = self_grid_entry
                 sch_grid_shape = self.grid.grid_shape
             else:
                 raise Exception("Impossible.")
         else:
             # They're either both transposed or not.
             # Either way, one will need to be transmitted, so transmit other.
             sch_grid_entry = self_grid_entry
             sch_grid_shape = self.grid.grid_shape
         dot_oid = self.system.bop("tensordot",
                                   a1=self_block.oid,
                                   a2=other_block.oid,
                                   a1_shape=self_block.shape,
                                   a2_shape=other_block.shape,
                                   a1_T=self_block.transposed,
                                   a2_T=other_block.transposed,
                                   axes=1,
                                   syskwargs={
                                       "grid_entry": sch_grid_entry,
                                       "grid_shape": sch_grid_shape
                                   })
         oids.append(dot_oid)
     result_grid_entry = tuple(0 for _ in range(len(result.grid.grid_shape)))
     result_oid = self.system.sum_reduce(*oids,
                                         syskwargs={
                                             "grid_entry": result_grid_entry,
                                             "grid_shape": result.grid.grid_shape
                                         })
     result.blocks[result_grid_entry].oid = result_oid
     return result
Esempio n. 9
0
    def indirect_tsr(self, X: BlockArray, reshape_output=True):
        assert len(X.shape) == 2
        # TODO (hme): This assertion is temporary and ensures returned
        #  shape of qr of block is correct.
        assert X.block_shape[0] >= X.shape[1]
        # Compute R for each block.
        grid = X.grid
        grid_shape = grid.grid_shape
        shape = X.shape
        block_shape = X.block_shape
        R_oids = []
        # Assume no blocking along second dim.
        for i in range(grid_shape[0]):
            # Select a row according to block_shape.
            row = []
            for j in range(grid_shape[1]):
                row.append(X.blocks[i, j].oid)
            R_oids.append(
                self._system.qr(*row,
                                mode="r",
                                axis=1,
                                syskwargs={
                                    "grid_entry": (i, 0),
                                    "grid_shape": (grid_shape[0], 1),
                                    "options": {
                                        "num_return_vals": 1
                                    }
                                }))

        # Construct R by summing over R blocks.
        # TODO (hme): Communication may be inefficient due to redundancy of data.
        R_shape = (shape[1], shape[1])
        R_block_shape = (block_shape[1], block_shape[1])
        tsR = BlockArray(
            ArrayGrid(shape=R_shape,
                      block_shape=R_shape,
                      dtype=X.dtype.__name__), self._system)
        tsR.blocks[0, 0].oid = self._system.qr(*R_oids,
                                               mode="r",
                                               axis=0,
                                               syskwargs={
                                                   "grid_entry": (0, 0),
                                                   "grid_shape": (1, 1),
                                                   "options": {
                                                       "num_return_vals": 1
                                                   }
                                               })
        # If blocking is "tall-skinny," then we're done.
        if R_shape != R_block_shape:
            if reshape_output:
                R = tsR.reshape(shape=R_shape, block_shape=R_block_shape)
            else:
                R = tsR
        else:
            R = tsR
        return R
Esempio n. 10
0
def test_array_rwd():
    X: np.ndarray = np.random.random(3)
    stored_X = StoredArrayS3("darrays/%s_X" % "__test__")
    stored_X.put_grid(
        ArrayGrid(shape=X.shape,
                  block_shape=X.shape,
                  dtype=np.float64.__name__))
    stored_X.init_grid()
    stored_X.put_array(X)
    assert np.allclose(X, stored_X.get_array())
    stored_X.del_array()
    stored_X.delete_grid()
Esempio n. 11
0
    def create_basic_single_step(self, concrete_cls) -> BlockArrayBase:
        array_cls = BlockArrayBase if concrete_cls is None else concrete_cls
        dst_ba: BlockArrayBase = array_cls(self.grid, self._system)
        if 0 in self.shape:
            return dst_ba

        src_sel_arr: np.ndarray = selection.BasicSelection.block_selection(self._source.shape,
                                                                           self._source.block_shape)
        # TODO(hme): The following op is very slow for integer subscripts of large arrays.
        src_sel_clipped: np.ndarray = src_sel_arr & self.sel
        assert src_sel_clipped.shape == self._source.grid.grid_shape

        broadcast_shape = self.sel.get_broadcastable_shape()
        broadcast_block_shape = self.sel.get_broadcastable_block_shape(dst_ba.block_shape)
        dst_grid_bc: ArrayGrid = ArrayGrid(broadcast_shape,
                                           broadcast_block_shape,
                                           self.grid.dtype.__name__)
        dst_sel_arr: np.ndarray = selection.BasicSelection.block_selection(broadcast_shape,
                                                                           broadcast_block_shape)
        dst_sel_offset: np.ndarray = dst_sel_arr + self.sel.position()
        dst_entry_iterator = list(dst_ba.grid.get_entry_iterator())
        for dst_index, dst_grid_entry_bc in enumerate(dst_grid_bc.get_entry_iterator()):
            dst_sel_offset_block: BasicSelection = dst_sel_offset[dst_grid_entry_bc]
            if dst_sel_offset_block.is_empty():
                continue
            src_dst_intersection_arr = src_sel_clipped & dst_sel_offset_block
            sys: System = self._system
            src_oids = []
            src_params = []
            dst_params = []
            for _, src_grid_entry in enumerate(self._source.grid.get_entry_iterator()):
                src_dst_intersection_block: BasicSelection = src_dst_intersection_arr[
                    src_grid_entry]
                if src_dst_intersection_block.is_empty():
                    continue
                src_block: Block = self._source.blocks[src_grid_entry]
                src_oids.append(src_block.oid)
                src_sel_block: BasicSelection = src_sel_arr[src_grid_entry]
                src_dep_sel_loc = src_dst_intersection_block - src_sel_block.position()
                src_params.append((src_dep_sel_loc.selector(), src_block.transposed))
                dst_block_sel_loc = src_dst_intersection_block - dst_sel_offset_block.position()
                dst_params.append((dst_block_sel_loc.selector(), False))
            dst_block: Block = dst_ba.blocks.reshape(dst_grid_bc.grid_shape)[dst_grid_entry_bc]
            dst_block.oid = sys.create_block(*src_oids,
                                             src_params=src_params,
                                             dst_params=dst_params,
                                             dst_shape=dst_block.shape,
                                             dst_shape_bc=dst_sel_offset_block.get_output_shape(),
                                             syskwargs={
                                                 "grid_entry": dst_entry_iterator[dst_index],
                                                 "grid_shape": self.grid.grid_shape
                                             })
        return dst_ba
Esempio n. 12
0
 def _vec_from_oids(self, oids, shape, block_shape, dtype):
     arr = BlockArray(
         ArrayGrid(shape=shape, block_shape=shape, dtype=dtype.__name__),
         self._system)
     # Make sure resulting grid shape is a vector (1 dimensional).
     assert np.sum(arr.grid.grid_shape) == (max(arr.grid.grid_shape) +
                                            len(arr.grid.grid_shape) - 1)
     for i, grid_entry in enumerate(arr.grid.get_entry_iterator()):
         arr.blocks[grid_entry].oid = oids[i]
     if block_shape != shape:
         return arr.reshape(block_shape=block_shape)
     return arr
Esempio n. 13
0
    def from_np(cls, arr, block_shape, copy, system):
        dtype_str = str(arr.dtype)
        grid = ArrayGrid(arr.shape, block_shape, dtype_str)
        rarr = SparseBlockArray(grid, system)
        grid_entry_iterator = grid.get_entry_iterator()
        for grid_entry in grid_entry_iterator:
            grid_slice = grid.get_slice(grid_entry)
            block = scipy.sparse.csr_matrix(arr[grid_slice])

            rarr.blocks[grid_entry].oid = system.put(block)
            rarr.blocks[grid_entry].dtype = getattr(np, dtype_str)
        return rarr
Esempio n. 14
0
 def diag(self, X: BlockArray) -> BlockArray:
     if len(X.shape) == 1:
         shape = X.shape[0], X.shape[0]
         block_shape = X.block_shape[0], X.block_shape[0]
         grid = ArrayGrid(shape, block_shape, X.dtype.__name__)
         grid_meta = grid.to_meta()
         rarr = BlockArray(grid, self.system)
         for grid_entry in grid.get_entry_iterator():
             syskwargs = {
                 "grid_entry": grid_entry,
                 "grid_shape": grid.grid_shape
             }
             if np.all(np.diff(grid_entry) == 0):
                 # This is a diagonal block.
                 rarr.blocks[grid_entry].oid = self.system.diag(
                     X.blocks[grid_entry[0]].oid, syskwargs=syskwargs)
             else:
                 rarr.blocks[grid_entry].oid = self.system.new_block(
                     "zeros", grid_entry, grid_meta, syskwargs=syskwargs)
     elif len(X.shape) == 2:
         assert X.shape[0] == X.shape[1]
         assert X.block_shape[0] == X.block_shape[1]
         shape = X.shape[0],
         block_shape = X.block_shape[0],
         grid = ArrayGrid(shape, block_shape, X.dtype.__name__)
         rarr = BlockArray(grid, self.system)
         for grid_entry in X.grid.get_entry_iterator():
             out_grid_entry = grid_entry[:1]
             out_grid_shape = grid.grid_shape[:1]
             syskwargs = {
                 "grid_entry": out_grid_entry,
                 "grid_shape": out_grid_shape
             }
             if np.all(np.diff(grid_entry) == 0):
                 # This is a diagonal block.
                 rarr.blocks[out_grid_entry].oid = self.system.diag(
                     X.blocks[grid_entry].oid, syskwargs=syskwargs)
     else:
         raise ValueError("X must have 1 or 2 axes.")
     return rarr
Esempio n. 15
0
 def _matvec(self, other):
     # Schedule block matmult on existing block nodes of the matrix.
     # This is cheaper than moving matrix and vec blocks to result node.
     assert self.shape[1] == other.shape[0], str(
         (self.shape[1], other.shape[0]))
     result_shape = tuple(self.shape[:1] + other.shape[1:])
     result_block_shape = tuple(self.block_shape[:1] +
                                other.block_shape[1:])
     result_grid = ArrayGrid(shape=result_shape,
                             block_shape=result_block_shape,
                             dtype=self.dtype.__name__)
     result = BlockArray(result_grid, self.system)
     for i in range(self.grid.grid_shape[0]):
         row = []
         for j in range(self.grid.grid_shape[1]):
             grid_entry = (i, j)
             self_block: Block = self.blocks[grid_entry]
             if len(other.shape) == 2:
                 other_block: Block = other.blocks[(grid_entry[1], 0)]
                 result_grid_entry = (i, 0)
             else:
                 other_block: Block = other.blocks[grid_entry[1]]
                 result_grid_entry = (i, )
             if self_block.transposed:
                 # Reverse grid shape and entry to obtain virtual layout of matrix blocks.
                 sch_grid_shape = tuple(reversed(self.grid.grid_shape))
                 sch_grid_entry = tuple(reversed(grid_entry))
             else:
                 sch_grid_shape = self.grid.grid_shape
                 sch_grid_entry = grid_entry
             dot_oid = self.system.bop("tensordot",
                                       a1=self_block.oid,
                                       a2=other_block.oid,
                                       a1_shape=self_block.shape,
                                       a2_shape=other_block.shape,
                                       a1_T=self_block.transposed,
                                       a2_T=other_block.transposed,
                                       axes=1,
                                       syskwargs={
                                           "grid_entry": sch_grid_entry,
                                           "grid_shape": sch_grid_shape
                                       })
             row.append(dot_oid)
         result_oid = self.system.sum_reduce(*row,
                                             syskwargs={
                                                 "grid_entry":
                                                 result_grid_entry,
                                                 "grid_shape":
                                                 result.grid.grid_shape
                                             })
         result.blocks[result_grid_entry].oid = result_oid
     return result
Esempio n. 16
0
 def empty(cls, shape, block_shape, dtype, system):
     grid = ArrayGrid(shape=shape,
                      block_shape=block_shape,
                      dtype=dtype.__name__)
     grid_meta = grid.to_meta()
     arr = BlockArray(grid, system)
     for grid_entry in grid.get_entry_iterator():
         arr.blocks[grid_entry].oid = system.empty(grid_entry, grid_meta,
                                                   syskwargs={
                                                       "grid_entry": grid_entry,
                                                       "grid_shape": grid.grid_shape
                                                   })
     return arr
Esempio n. 17
0
 def from_np(cls, arr, block_shape, copy, system):
     dtype_str = str(arr.dtype)
     grid = ArrayGrid(arr.shape, block_shape, dtype_str)
     rarr = BlockArray(grid, system)
     grid_entry_iterator = grid.get_entry_iterator()
     for grid_entry in grid_entry_iterator:
         grid_slice = grid.get_slice(grid_entry)
         block = arr[grid_slice]
         if copy:
             block = np.copy(block)
         rarr.blocks[grid_entry].oid = system.put(block)
         rarr.blocks[grid_entry].dtype = getattr(np, dtype_str)
     return rarr
Esempio n. 18
0
def train(params: Dict, data: NumsDMatrix, *args, evals=(), **kwargs):
    X: BlockArray = data.X
    y: BlockArray = data.y
    assert len(X.shape) == 2
    assert X.shape[0] == X.shape[0] and X.block_shape[0] == y.block_shape[0]
    assert len(y.shape) == 1 or (len(y.shape) == 2 and y.shape[1] == 1)

    app: ArrayApplication = _instance()
    sys: System = app.system
    sys.register("xgb_train", xgb_train_remote, {})

    # Start tracker
    num_workers = X.grid.grid_shape[0]
    env = _start_rabit_tracker(num_workers)
    rabit_args = [("%s=%s" % item).encode() for item in env.items()]

    evals_flat = []
    for eval_X, eval_y, eval_method in evals:
        if eval_X.shape != eval_X.block_shape:
            eval_X = eval_X.reshape(shape=eval_X.shape,
                                    block_shape=eval_X.shape)
        if eval_y.shape != eval_y.block_shape:
            eval_y = eval_y.reshape(shape=eval_y.shape,
                                    block_shape=eval_y.shape)
        eval_X_oid = eval_X.blocks.item().oid
        eval_y_oid = eval_y.blocks.item().oid
        evals_flat += [eval_X_oid, eval_y_oid, eval_method]

    X: BlockArray = X.reshape(block_shape=(X.block_shape[0], X.shape[1]))
    result: BlockArray = BlockArray(
        ArrayGrid(shape=(X.grid.grid_shape[0], ),
                  block_shape=(1, ),
                  dtype="dict"), sys)
    for grid_entry in X.grid.get_entry_iterator():
        X_block: Block = X.blocks[grid_entry]
        i = grid_entry[0]
        if len(y.shape) == 1:
            y_block: Block = y.blocks[i]
        else:
            y_block: Block = y.blocks[i, 0]
        syskwargs = {"grid_entry": grid_entry, "grid_shape": X.grid.grid_shape}
        result.blocks[i].oid = sys.call("xgb_train",
                                        X_block.oid,
                                        y_block.oid,
                                        rabit_args,
                                        params,
                                        args,
                                        kwargs,
                                        *evals_flat,
                                        syskwargs=syskwargs)
    return result
Esempio n. 19
0
 def tensordot(self, other, axes=2):
     other = self.other_to_ba(other)
     # TODO: Reuse BlockArrayBase tensordot operator.
     this_axes = self.grid.grid_shape[:-axes]
     this_sum_axes = self.grid.grid_shape[-axes:]
     other_axes = other.grid.grid_shape[axes:]
     other_sum_axes = other.grid.grid_shape[:axes]
     assert this_sum_axes == other_sum_axes
     result_shape = tuple(self.shape[:-axes] + other.shape[axes:])
     result_block_shape = tuple(self.block_shape[:-axes] +
                                other.block_shape[axes:])
     result_grid = ArrayGrid(shape=result_shape,
                             block_shape=result_block_shape,
                             dtype=self.dtype.__name__)
     assert result_grid.grid_shape == tuple(this_axes + other_axes)
     result_graphs = np.empty(shape=result_grid.grid_shape, dtype=np.object)
     this_dims = list(itertools.product(*map(range, this_axes)))
     other_dims = list(itertools.product(*map(range, other_axes)))
     sum_dims = list(itertools.product(*map(range, this_sum_axes)))
     for i in this_dims:
         for j in other_dims:
             grid_entry = tuple(i + j)
             if len(sum_dims) == 1:
                 k = sum_dims[0]
                 self_node: TreeNode = self.graphs[tuple(i + k)]
                 other_node: TreeNode = other.graphs[tuple(k + j)]
                 dot_node: TreeNode = self_node.tensordot(other_node,
                                                          axes=axes)
                 result_graphs[grid_entry] = dot_node
             else:
                 add_reduce_op = ReductionOp()
                 add_reduce_op.cluster_state = self.cluster_state
                 add_reduce_op.op_name = "add"
                 add_reduce_op.copy_on_op = self.copy_on_op
                 for k in sum_dims:
                     self_node: TreeNode = self.graphs[tuple(i + k)]
                     other_node: TreeNode = other.graphs[tuple(k + j)]
                     dot_node: TreeNode = self_node.tensordot(other_node,
                                                              axes=axes)
                     # Explicitly add parent here, since sum depends on prod.
                     # Not needed for other ops; make_bop takes care of it.
                     # We don't need to copy the node here since the local
                     # tree structure here is never exposed.
                     dot_node.parent = add_reduce_op
                     add_reduce_op.add_child(dot_node)
                 result_graphs[grid_entry] = add_reduce_op
     return GraphArray(result_grid,
                       self.cluster_state,
                       result_graphs,
                       copy_on_op=self.copy_on_op)
Esempio n. 20
0
def test_split(app_inst: ArrayApplication):
    # TODO (hme): Implement a split leveraging block_shape param in reshape op.
    x = app_inst.array(np.array([1.0, 2.0, 3.0, 4.0]), block_shape=(4, ))
    syskwargs = x.blocks[0].syskwargs()
    syskwargs["options"] = {"num_returns": 2}
    res1, res2 = x.system.split(x.blocks[0].oid,
                                2,
                                axis=0,
                                transposed=False,
                                syskwargs=syskwargs)
    ba = BlockArray(ArrayGrid((4, ), (2, ), x.dtype.__name__), x.system)
    ba.blocks[0].oid = res1
    ba.blocks[1].oid = res2
    assert np.allclose([1.0, 2.0, 3.0, 4.0], ba.get())
Esempio n. 21
0
 def permutation(self, size, block_size):
     shape = (size,)
     block_shape = (block_size,)
     grid: ArrayGrid = ArrayGrid(shape=shape, block_shape=shape, dtype=np.int64.__name__)
     ba = BlockArray(grid, self._system)
     for grid_entry in ba.grid.get_entry_iterator():
         rng_params = list(self._rng.new_block_rng_params())
         block: Block = ba.blocks[grid_entry]
         block.oid = self._system.permutation(rng_params,
                                              size,
                                              syskwargs={
                                                   "grid_entry": grid_entry,
                                                   "grid_shape": grid.grid_shape
                                               })
     return ba.reshape(block_shape=block_shape)
Esempio n. 22
0
 def read_csv(self,
              filename,
              dtype=float,
              delimiter=',',
              has_header=False,
              num_workers=4):
     file_size = storage_utils.get_file_size(filename)
     file_batches: storage_utils.Batch = storage_utils.Batch.from_num_batches(
         file_size, num_workers)
     blocks = []
     shape_oids = []
     for i, batch in enumerate(file_batches.batches):
         file_start, file_end = batch
         block_oid, shape_oid = self.system.call("read_csv_block",
                                                 filename,
                                                 file_start,
                                                 file_end,
                                                 dtype,
                                                 delimiter,
                                                 has_header,
                                                 syskwargs={
                                                     "grid_entry": (i, ),
                                                     "grid_shape":
                                                     (num_workers, ),
                                                     "options": {
                                                         "num_returns": 2
                                                     }
                                                 })
         blocks.append(block_oid)
         shape_oids.append(shape_oid)
     shapes = self.system.get(shape_oids)
     arrays = []
     for i in range(len(shapes)):
         shape = shapes[i]
         if shape[0] == 0:
             continue
         block = blocks[i]
         grid = ArrayGrid(shape=shape,
                          block_shape=shape,
                          dtype=dtype.__name__)
         arr = BlockArray(grid, self.system)
         iter_one = True
         for grid_entry in grid.get_entry_iterator():
             assert iter_one
             iter_one = False
             arr.blocks[grid_entry].oid = block
         arrays.append(arr)
     return arrays
Esempio n. 23
0
 def arange(self, shape, block_shape, step=1, dtype=np.int64) -> BlockArray:
     assert step == 1
     # Generate ranges per block.
     grid = ArrayGrid(shape, block_shape, dtype.__name__)
     rarr = BlockArray(grid, self.system)
     for _, grid_entry in enumerate(grid.get_entry_iterator()):
         syskwargs = {
             "grid_entry": grid_entry,
             "grid_shape": grid.grid_shape
         }
         start = block_shape[0] * grid_entry[0]
         entry_shape = grid.get_block_shape(grid_entry)
         stop = start + entry_shape[0]
         rarr.blocks[grid_entry].oid = self.system.arange(
             start, stop, step, dtype, syskwargs=syskwargs)
     return rarr
Esempio n. 24
0
def test_array_rwd():
    conn = boto3.resource('s3', region_name='us-east-1')
    assert conn.Bucket('darrays') not in conn.buckets.all()
    conn.create_bucket(Bucket='darrays')

    X: np.ndarray = np.random.random(3)
    stored_X = StoredArrayS3("darrays/%s_X" % "__test__")
    stored_X.put_grid(
        ArrayGrid(shape=X.shape,
                  block_shape=X.shape,
                  dtype=np.float64.__name__))
    stored_X.init_grid()
    stored_X.put_array(X)
    assert np.allclose(X, stored_X.get_array())
    stored_X.del_array()
    stored_X.delete_grid()
Esempio n. 25
0
 def delete_fs(self, filename: str):
     meta = self._filesystem.read_meta_fs(filename)
     addresses = meta["addresses"]
     grid_meta = meta["grid_meta"]
     grid = ArrayGrid.from_meta(grid_meta)
     result_grid = ArrayGrid(grid.grid_shape,
                             tuple(np.ones_like(grid.shape, dtype=np.int)),
                             dtype=dict.__name__)
     rarr = BlockArray(result_grid, self._system)
     for grid_entry in addresses:
         node_address = addresses[grid_entry]
         options = {"resources": {node_address: 1.0 / 10**4}}
         rarr.blocks[grid_entry].oid = self._filesystem.delete_block_fs(
             filename, grid_entry, grid_meta, options=options)
     self._filesystem.delete_meta_fs(filename)
     return rarr
Esempio n. 26
0
 def broadcast_to(self, shape):
     b = array_utils.broadcast(self.shape, shape)
     result_block_shape = array_utils.broadcast_block_shape(self.shape, shape, self.block_shape)
     result: BlockArrayBase = BlockArrayBase(ArrayGrid(b.shape,
                                                       result_block_shape,
                                                       self.grid.dtype.__name__), self.system)
     extras = []
     # Below taken directly from _broadcast_to in numpy's stride_tricks.py.
     it = np.nditer(
         (self.blocks,), flags=['multi_index', 'refs_ok', 'zerosize_ok'] + extras,
         op_flags=['readonly'], itershape=result.grid.grid_shape, order='C')
     with it:
         # never really has writebackifcopy semantics
         broadcast = it.itviews[0]
     result.blocks = broadcast
     return result
Esempio n. 27
0
 def ga_from_arr(self, arr, result_shape):
     sample_idx = tuple(0 for dim in arr.shape)
     if isinstance(arr, TreeNode):
         sample_node: TreeNode = arr
         assert result_shape == ()
     else:
         sample_node: TreeNode = arr[sample_idx]
     result_block_shape = sample_node.shape()
     result_dtype_str = self.grid.dtype.__name__
     result_grid = ArrayGrid(shape=result_shape,
                             block_shape=result_block_shape,
                             dtype=result_dtype_str)
     assert arr.shape == result_grid.grid_shape
     return GraphArray(result_grid,
                       self.cluster_state,
                       arr,
                       copy_on_op=self.copy_on_op)
Esempio n. 28
0
 def _delete(self, filename, store_cls, remote_func):
     grid = self._get_array_grid(filename, store_cls)
     result_grid = ArrayGrid(grid.grid_shape,
                             tuple(np.ones_like(grid.shape, dtype=np.int)),
                             dtype=dict.__name__)
     rarr = BlockArray(result_grid, self._system)
     for grid_entry in grid.get_entry_iterator():
         rarr.blocks[grid_entry].oid = remote_func(filename,
                                                   grid_entry,
                                                   grid.to_meta(),
                                                   syskwargs={
                                                       "grid_entry":
                                                       grid_entry,
                                                       "grid_shape":
                                                       grid.grid_shape
                                                   })
     return rarr
Esempio n. 29
0
 def _write(self, ba: BlockArray, filename, remote_func):
     grid = ba.grid
     result_grid = ArrayGrid(grid.grid_shape,
                             tuple(np.ones_like(grid.shape, dtype=np.int)),
                             dtype=dict.__name__)
     rarr = BlockArray(result_grid, self.system)
     for grid_entry in grid.get_entry_iterator():
         rarr.blocks[grid_entry].oid = remote_func(
             ba.blocks[grid_entry].oid,
             filename,
             grid_entry,
             grid.to_meta(),
             syskwargs={
                 "grid_entry": grid_entry,
                 "grid_shape": grid.grid_shape
             })
     return rarr
Esempio n. 30
0
    def __init__(self, source, sel: BasicSelection = None, block_shape: tuple = None):
        self._source: BlockArrayBase = source
        self._system: System = self._source.system

        if sel is None:
            sel = BasicSelection.from_shape(self._source.shape)
        # Currently, this is all we support.
        assert len(sel.axes) == len(self._source.shape)
        self.sel = sel

        self.shape: tuple = self.sel.get_output_shape()
        if block_shape is None:
            block_shape: tuple = array_utils.block_shape_from_subscript(self.sel.selector(),
                                                                        self._source.block_shape)
        self.block_shape = block_shape
        assert len(self.block_shape) == len(self.shape)
        self.grid: ArrayGrid = ArrayGrid(self.shape, self.block_shape,
                                         dtype=self._source.dtype.__name__)