Esempio n. 1
0
def compute_graph_array(app: ArrayApplication, ga: GraphArray) -> BlockArray:
    result_ga: GraphArray = RandomTS(seed=random_seed,
                                     max_samples_per_step=1,
                                     max_reduction_pairs=1,
                                     force_final_action=True).solve(ga)
    result_ga.grid, result_ga.to_blocks()
    return BlockArray(result_ga.grid, app.system, result_ga.to_blocks())
Esempio n. 2
0
 def argop(self, op_name: str, arr: BlockArray, axis=None):
     if len(arr.shape) > 1:
         raise NotImplementedError(
             "%s currently supports one-dimensional arrays." % op_name
         )
     if axis is None:
         axis = 0
     assert axis == 0
     grid = ArrayGrid(shape=(), block_shape=(), dtype=np.int64.__name__)
     result = BlockArray(grid, self.cm)
     reduction_result = None, None
     for grid_entry in arr.grid.get_entry_iterator():
         block_slice: slice = arr.grid.get_slice(grid_entry)[0]
         block: Block = arr.blocks[grid_entry]
         syskwargs = {
             "grid_entry": grid_entry,
             "grid_shape": arr.grid.grid_shape,
             "options": {"num_returns": 2},
         }
         reduction_result = self.cm.arg_op(
             op_name, block.oid, block_slice, *reduction_result, syskwargs=syskwargs
         )
     argoptima, _ = reduction_result
     result.blocks[()].oid = argoptima
     return result
Esempio n. 3
0
 def norm(self, X: BlockArray, order=2) -> BlockArray:
     assert len(X.shape) == 1, "Only vector norms are supported."
     assert order in (1, 2), "Only order 1 and 2 norms supported."
     if order == 2:
         return self.sqrt(X.transpose(defer=True) @ X)
     else:
         return self.sum(self.abs(X))
Esempio n. 4
0
 def _stack_copy(self, X):
     assert len(X.shape) == 1
     output_shape = (max(X.shape), max(X.shape))
     output_block_shape = (X.block_shape[0], X.block_shape[0])
     output_arr_grid = ArrayGrid(output_shape, output_block_shape, X.dtype.__name__)
     output_block_array = BlockArray(output_arr_grid, self.cm)
     max_block_rows, max_block_cols = (
         output_block_array.blocks.shape[0],
         output_block_array.blocks.shape[1],
     )
     block_row_index = 0
     for i in range(max_block_rows):
         block_row_index = 0
         for j in range(max_block_cols):
             syskwargs = {
                 "grid_entry": (i, j),
                 "grid_shape": output_arr_grid.grid_shape,
             }
             block = output_block_array.blocks[(i, j)]
             rows, cols = block.shape[0], block.shape[1]
             output_block_array.blocks[(i, j)].oid = self.cm.triu_copy(
                 X.blocks[block_row_index].oid, rows, cols, syskwargs=syskwargs
             )
             block_row_index += 1
     return output_block_array
Esempio n. 5
0
 def reduce(
     self, op_name: str, X: BlockArray, axis=None, keepdims=False, dtype=None
 ):
     res = X.reduce_axis(op_name, axis, keepdims=keepdims)
     if dtype is not None:
         res = res.astype(dtype)
     return res
Esempio n. 6
0
    def arange(self,
               start_in,
               shape,
               block_shape,
               step=1,
               dtype=None) -> BlockArray:
        assert step == 1
        if dtype is None:
            dtype = np.__getattribute__(
                str(np.result_type(start_in, shape[0] + start_in)))

        # Generate ranges per block.
        grid = ArrayGrid(shape, block_shape, dtype.__name__)
        rarr = BlockArray(grid, self.cm)
        for _, grid_entry in enumerate(grid.get_entry_iterator()):
            syskwargs = {
                "grid_entry": grid_entry,
                "grid_shape": grid.grid_shape
            }
            start = start_in + block_shape[0] * grid_entry[0]
            entry_shape = grid.get_block_shape(grid_entry)
            stop = start + entry_shape[0]
            rarr.blocks[grid_entry].oid = self.cm.arange(start,
                                                         stop,
                                                         step,
                                                         dtype,
                                                         syskwargs=syskwargs)
        return rarr
Esempio n. 7
0
    def quantile(
        self, arr: BlockArray, q: float, interpolation="linear", method="tdigest"
    ) -> BlockArray:
        """Compute the q-th quantile of the array elements.
        Args:
            arr: BlockArray.
            q: quantile to compute, which must be between 0 and 1 inclusive.
            interpolation: interpolation method to use when the desired quantile lies between two
            data points i < j.
            also see https://numpy.org/doc/1.20/reference/generated/numpy.quantile.html.
            also see https://docs.dask.org/en/latest/_modules/dask/array/percentile.html.


        Returns:
            Returns the q-th quantile of the array elements.
        """
        # pylint: disable = import-outside-toplevel, unused-import
        try:
            import crick
        except Exception as e:
            raise Exception(
                "Unable to import crick. \
                Install crick with command 'pip install cython; pip install crick'"
            ) from e

        if arr.ndim != 1:
            raise NotImplementedError("Only 1D 'arr' is currently supported.")
        if q < 0.0 or q > 1.0:
            raise ValueError("Quantiles must be in the range [0, 1]")
        assert interpolation == "linear"
        assert method == "tdigest"

        arr_oids = arr.flattened_oids()
        num_arrs = len(arr_oids)
        q = [q]
        t_oids = []

        for i, arr_oid in enumerate(arr_oids):
            syskwargs = {
                "grid_entry": (i,),
                "grid_shape": (num_arrs,),
                "options": {"num_returns": 1},
            }
            t_oids.append(self.cm.tdigest_chunk(arr_oid, syskwargs=syskwargs))

        p_oid = self.cm.percentiles_from_tdigest(q, *t_oids, syskwargs=syskwargs)
        return BlockArray.from_oid(p_oid, (1,), np.float64, self.cm)
Esempio n. 8
0
    def map_bop(self,
                op_name: str,
                arr_1: BlockArray,
                arr_2: BlockArray,
                out: BlockArray = None,
                where=True,
                args=None,
                kwargs=None) -> BlockArray:
        # TODO (hme): Move this into BlockArray, and invoke on operator implementations.
        """
        A map, for binary operators, that applies element-wise to every entry of the input arrays.
        :param op_name: An element-wise binary operator.
        :param arr_1: A BlockArray.
        :param arr_2: A BlockArray.
        :param out: A BlockArray to which the result is written.
        :param where: An indicator specifying the indices to which op is applied.
        :param args: Args provided to op.
        :param kwargs: Keyword args provided to op.
        :return: A BlockArray.
        """
        if where is not True:
            raise NotImplementedError("'where' argument is not yet supported.")
        if args is not None:
            raise NotImplementedError("'args' is not yet supported.")
        if not (kwargs is None or len(kwargs) == 0):
            raise NotImplementedError("'kwargs' is not yet supported.")

        try:
            ufunc = np.__getattribute__(op_name)
            if (op_name.endswith("max") or op_name == "maximum"
                    or op_name.endswith("min") or op_name == "minimum"
                    or op_name.startswith("logical")):
                rarr = self._broadcast_bop(op_name, arr_1, arr_2)
            else:
                result_blocks: np.ndarray = ufunc(arr_1.blocks, arr_2.blocks)
                rarr = BlockArray.from_blocks(result_blocks,
                                              result_shape=None,
                                              system=self.system)
        except Exception as _:
            rarr = self._broadcast_bop(op_name, arr_1, arr_2)
        if out is not None:
            assert out.grid.grid_shape == rarr.grid.grid_shape
            assert out.shape == rarr.shape
            assert out.block_shape == rarr.block_shape
            out.blocks[:] = rarr.blocks[:]
            rarr = out
        return rarr
Esempio n. 9
0
def from_modin(df):
    # pylint: disable = import-outside-toplevel, protected-access, unidiomatic-typecheck
    import numpy as np

    try:
        from modin.pandas.dataframe import DataFrame
        from modin.engines.ray.pandas_on_ray.frame.data import PandasOnRayFrame
        from modin.engines.ray.pandas_on_ray.frame.partition import (
            PandasOnRayFramePartition, )
    except Exception as e:
        raise Exception(
            "Unable to import modin. Install modin with command 'pip install modin'"
        ) from e

    assert isinstance(
        df, DataFrame), "Unexpected dataframe type %s" % str(type(df))
    assert isinstance(df._query_compiler._modin_frame,
                      PandasOnRayFrame), "Unexpected dataframe type %s" % str(
                          type(df._query_compiler._modin_frame))
    frame: PandasOnRayFrame = df._query_compiler._modin_frame

    app: ArrayApplication = _instance()
    system = app.cm

    # Make sure the partitions are numeric.
    dtype = frame.dtypes[0]
    if not array_utils.is_supported(dtype, type_test=True):
        raise TypeError("%s is not supported." % str(dtype))
    for dt in frame.dtypes:
        if dt != dtype:
            raise TypeError("Mixed types are not supported (%s != %s).")

    dtype = np.__getattribute__(str(dtype))

    # Convert from Pandas to NumPy.
    pd_parts = frame._partition_mgr_cls.map_partitions(frame._partitions,
                                                       lambda df: np.array(df))
    grid_shape = len(frame._row_lengths), len(frame._column_widths)

    shape = (np.sum(frame._row_lengths), np.sum(frame._column_widths))
    block_shape = app.get_block_shape(shape, dtype)
    rows = []
    for i in range(grid_shape[0]):
        cols = []
        for j in range(grid_shape[1]):
            curr_block_shape = (frame._row_lengths[i], frame._column_widths[j])
            part: PandasOnRayFramePartition = pd_parts[(i, j)]
            part.drain_call_queue()
            ba: BlockArray = BlockArray.from_oid(part.oid, curr_block_shape,
                                                 dtype, system)
            cols.append(ba)
        if grid_shape[1] == 1:
            row_ba: BlockArray = cols[0]
        else:
            row_ba: BlockArray = app.concatenate(
                cols, axis=1, axis_block_size=block_shape[1])
        rows.append(row_ba)
    result = app.concatenate(rows, axis=0, axis_block_size=block_shape[0])
    return result
Esempio n. 10
0
 def permutation(self, size, block_size):
     shape = (size, )
     block_shape = (block_size, )
     grid: ArrayGrid = ArrayGrid(shape=shape,
                                 block_shape=shape,
                                 dtype=np.int64.__name__)
     ba = BlockArray(grid, self._cm)
     for grid_entry in ba.grid.get_entry_iterator():
         rng_params = list(self._rng.new_block_rng_params())
         block: Block = ba.blocks[grid_entry]
         block.oid = self._cm.permutation(rng_params,
                                          size,
                                          syskwargs={
                                              "grid_entry": grid_entry,
                                              "grid_shape": grid.grid_shape
                                          })
     return ba.reshape(block_shape=block_shape)
Esempio n. 11
0
def test_split(app_inst: ArrayApplication):
    # TODO (hme): Implement a split leveraging block_shape param in reshape op.
    x = app_inst.array(np.array([1.0, 2.0, 3.0, 4.0]), block_shape=(4,))
    syskwargs = {
        "grid_entry": x.blocks[0].grid_entry,
        "grid_shape": x.blocks[0].grid_shape,
        "options": {"num_returns": 2}
    }
    res1, res2 = x.cm.split(x.blocks[0].oid,
                            2,
                            axis=0,
                            transposed=False,
                            syskwargs=syskwargs)
    ba = BlockArray(ArrayGrid((4,), (2,), x.dtype.__name__), x.cm)
    ba.blocks[0].oid = res1
    ba.blocks[1].oid = res2
    assert np.allclose([1.0, 2.0, 3.0, 4.0], ba.get())
Esempio n. 12
0
 def mean(self, X: BlockArray, axis=None, keepdims=False, dtype=None):
     if X.dtype not in (float, np.float32, np.float64):
         X = X.astype(np.float64)
     num_summed = np.product(X.shape) if axis is None else X.shape[axis]
     res = self.sum(X, axis=axis, keepdims=keepdims) / num_summed
     if dtype is not None:
         res = res.astype(dtype)
     return res
Esempio n. 13
0
    def map_uop(
        self,
        op_name: str,
        arr: BlockArray,
        out: BlockArray = None,
        where=True,
        args=None,
        kwargs=None,
    ) -> BlockArray:
        """A map, for unary operators, that applies to every entry of an array.

        Args:
            op_name: An element-wise unary operator.
            arr: A BlockArray.
            out: A BlockArray to which the result is written.
            where: An indicator specifying the indices to which op is applied.
            args: Args provided to op.
            kwargs: Keyword args provided to op.

        Returns:
            A BlockArray.
        """
        if where is not True:
            raise NotImplementedError("'where' argument is not yet supported.")
        args = () if args is None else args
        kwargs = {} if kwargs is None else kwargs
        shape = arr.shape
        block_shape = arr.block_shape
        dtype = array_utils.get_uop_output_type(op_name, arr.dtype)
        assert len(shape) == len(block_shape)
        if out is None:
            grid = ArrayGrid(shape, block_shape, dtype.__name__)
            rarr = BlockArray(grid, self.cm)
        else:
            rarr = out
            grid = rarr.grid
            assert rarr.shape == arr.shape and rarr.block_shape == arr.block_shape
        for grid_entry in grid.get_entry_iterator():
            # TODO(hme): Faster to create ndarray first,
            #  and instantiate block array on return
            #  to avoid instantiating blocks on BlockArray initialization.
            rarr.blocks[grid_entry] = arr.blocks[grid_entry].uop_map(
                op_name, args=args, kwargs=kwargs
            )
        return rarr
Esempio n. 14
0
    def triu(self, X: BlockArray):
        if len(X.shape) == 1:
            return self.triu(self._stack_copy(X))
        elif len(X.shape) == 2:
            if X.shape[0] == 1:
                return X
            diag_meta = array_utils.find_diag_output_blocks(X.blocks, min(X.shape))
            output_arr_grid = ArrayGrid(X.shape, X.block_shape, X.dtype.__name__)
            output_block_array = BlockArray(output_arr_grid, self.cm)
            visited = dict()
            total_row_blocks, total_col_blocks = X.blocks.shape[0], X.blocks.shape[1]
            for block_indices, offset, total_elements in diag_meta:
                syskwargs = {
                    "grid_entry": block_indices,
                    "grid_shape": output_arr_grid.grid_shape,
                }
                output_block_array.blocks[block_indices].oid = self.cm.triu(
                    X.blocks[block_indices].oid,
                    offset,
                    False,
                    total_elements,
                    syskwargs=syskwargs,
                )
                visited[block_indices] = 1
            for block_indices, offset, total_elements in diag_meta:
                row_c, col_c = block_indices[0] + 1, block_indices[1]
                while row_c < total_row_blocks:
                    syskwargs = {
                        "grid_entry": (row_c, col_c),
                        "grid_shape": output_arr_grid.grid_shape,
                    }
                    if (row_c, col_c) in visited:
                        output_block_array.blocks[(row_c, col_c)].oid = self.cm.triu(
                            output_block_array.blocks[(row_c, col_c)].oid,
                            offset,
                            True,
                            total_elements,
                            syskwargs=syskwargs,
                        )
                    else:
                        output_block_array.blocks[(row_c, col_c)].oid = self.cm.triu(
                            X.blocks[(row_c, col_c)].oid,
                            offset,
                            True,
                            total_elements,
                            syskwargs=syskwargs,
                        )
                        visited[(row_c, col_c)] = 1
                    row_c += 1

            for i in range(total_row_blocks):
                for j in range(total_col_blocks):
                    if (i, j) not in visited:
                        output_block_array.blocks[(i, j)].oid = X.blocks[(i, j)].oid
            return output_block_array
        else:
            raise NotImplementedError()
Esempio n. 15
0
def inv(app: ArrayApplication, X: BlockArray):
    # TODO (hme): Implement scalable version.
    block_shape = X.block_shape
    assert len(X.shape) == 2
    assert X.shape[0] == X.shape[1]
    single_block = X.shape[0] == X.block_shape[0] and X.shape[1] == X.block_shape[1]
    if single_block:
        result = X.copy()
    else:
        result = X.reshape(block_shape=X.shape)
    result.blocks[0, 0].oid = app.cm.inv(result.blocks[0, 0].oid,
                                         syskwargs={
                                             "grid_entry": (0, 0),
                                             "grid_shape": (1, 1)
                                         })
    if not single_block:
        result = result.reshape(block_shape=block_shape)
    return result
Esempio n. 16
0
def inv(a: BlockArray):
    if not a.is_single_block():
        warnings.warn(
            "nums.numpy.linalg.inv is not a scalable implementation. "
            + ("Input array is %s bytes. " % a.nbytes)
            + "Abort this operation if input array is too large to "
            + "execute on a single node."
        )
    return linalg.inv(_instance(), a)
Esempio n. 17
0
 def gradient(
     self,
     X: BlockArray,
     y: BlockArray,
     mu: BlockArray = None,
     beta: BlockArray = None,
 ):
     if mu is None:
         mu = self.forward(X)
     return X.transpose(defer=True) @ (mu - y)
Esempio n. 18
0
 def hessian(self, X: BlockArray, y: BlockArray, mu: BlockArray = None):
     if mu is None:
         mu = self.forward(X)
     dim, block_dim = mu.shape[0], mu.block_shape[0]
     s = (mu * (self._app.one - mu)).reshape((dim, 1),
                                             block_shape=(block_dim, 1))
     r = X.transpose(defer=True) @ (s * X)
     if self._penalty is not None:
         r += self.hessian_penalty()
     return r
Esempio n. 19
0
 def column_stack(self, tup, axis_block_size=None):
     # Based on numpy source.
     arrays = []
     for obj in tup:
         arr = BlockArray.to_block_array(obj, self.cm)
         if arr.ndim < 2:
             arrays.append(self.atleast_2d(arr).T)
         else:
             arrays.append(self.atleast_2d(arr))
     return self.concatenate(arrays, 1, axis_block_size=axis_block_size)
Esempio n. 20
0
def train_test_split(*arrays,
                     test_size: Union[int, float] = None,
                     train_size: Union[int, float] = None,
                     random_state: Optional[Union[NumsRandomState,
                                                  int]] = None,
                     shuffle: bool = True,
                     stratify=None):
    # pylint: disable = protected-access
    updated_arrays = []
    for array in arrays:
        updated_arrays.append(_check_array(array))
    syskwargs = {
        "options": {
            "num_returns": 2 * len(updated_arrays)
        },
        "grid_entry": (0, ),
        "grid_shape": (1, ),
    }

    if random_state is None:
        rng_params = None
    else:
        if isinstance(random_state, int):
            # It's a seed.
            random_state: NumsRandomState = instance().random_state(
                random_state)
        rng_params = random_state._rng.new_block_rng_params()

    array_oids = [array.flattened_oids()[0] for array in updated_arrays]
    result_oids = instance().cm.call("train_test_split",
                                     *array_oids,
                                     rng_params=rng_params,
                                     test_size=test_size,
                                     train_size=train_size,
                                     shuffle=shuffle,
                                     stratify=stratify,
                                     syskwargs=syskwargs)
    # Optimize by computing this directly.
    shape_dtype_oids = [
        instance().cm.shape_dtype(r_oid,
                                  syskwargs={
                                      "grid_entry": (0, ),
                                      "grid_shape": (1, )
                                  }) for r_oid in result_oids
    ]
    shape_dtypes = instance().cm.get(shape_dtype_oids)
    results = []
    for i, r_oid in enumerate(result_oids):
        shape, dtype = shape_dtypes[i]
        results.append(
            BlockArray.from_oid(r_oid,
                                shape=shape,
                                dtype=dtype,
                                cm=instance().cm))
    return results
Esempio n. 21
0
 def where(self, condition: BlockArray, x=None, y=None):
     result_oids = []
     shape_oids = []
     num_axes = max(1, len(condition.shape))
     # Stronger constraint than necessary, but no reason for anything stronger.
     if x is not None or y is not None:
         assert x is not None and y is not None
         assert condition.shape == x.shape == y.shape
         assert condition.block_shape == x.block_shape == y.block_shape
     for grid_entry in condition.grid.get_entry_iterator():
         block: Block = condition.blocks[grid_entry]
         block_slice_tuples = condition.grid.get_slice_tuples(grid_entry)
         roids = self.system.where(block.oid,
                                   x,
                                   y,
                                   block_slice_tuples,
                                   syskwargs={
                                       "grid_entry": grid_entry,
                                       "grid_shape":
                                       condition.grid.grid_shape,
                                       "options": {
                                           "num_returns": num_axes + 1
                                       }
                                   })
         block_oids, shape_oid = roids[:-1], roids[-1]
         shape_oids.append(shape_oid)
         result_oids.append(block_oids)
     shapes = self.system.get(shape_oids)
     result_shape = (np.sum(shapes), )
     if result_shape == (0, ):
         return (self.array(np.array([], dtype=np.int64),
                            block_shape=(0, )), )
     # Remove empty shapes.
     result_shape_pair = []
     for i, shape in enumerate(shapes):
         if np.sum(shape) > 0:
             result_shape_pair.append((result_oids[i], shape))
     result_block_shape = self.compute_block_shape(result_shape, np.int64)
     result_arrays = []
     for axis in range(num_axes):
         block_arrays = []
         for i in range(len(result_oids)):
             if shapes[i] == (0, ):
                 continue
             block_arrays.append(
                 BlockArray.from_oid(result_oids[i][axis], shapes[i],
                                     np.int64, self.system))
         if len(block_arrays) == 1:
             axis_result = block_arrays[0]
         else:
             axis_result = self.concatenate(block_arrays, 0,
                                            result_block_shape[0])
         result_arrays.append(axis_result)
     return tuple(result_arrays)
Esempio n. 22
0
 def score(self,
           X: BlockArray,
           y: BlockArray,
           sample_weight: BlockArray = None):
     _check_array(X, True)
     _check_array(y, True)
     if sample_weight is not None:
         _check_array(sample_weight, True)
         sample_weight = sample_weight.flattened_oids()[0]
     r_oid = instance().cm.call_actor_method(
         self.actor,
         "score",
         X.flattened_oids()[0],
         y.flattened_oids()[0],
         sample_weight,
     )
     return BlockArray.from_oid(r_oid,
                                shape=(),
                                dtype=float,
                                cm=instance().cm)
Esempio n. 23
0
 def _inv(self, remote_func, kwargs, X: BlockArray):
     # TODO (hme): Implement scalable version.
     block_shape = X.block_shape
     assert len(X.shape) == 2
     assert X.shape[0] == X.shape[1]
     single_block = X.shape[0] == X.block_shape[0] and X.shape[
         1] == X.block_shape[1]
     if single_block:
         result = X.copy()
     else:
         result = X.reshape(block_shape=X.shape)
     result.blocks[0, 0].oid = remote_func(result.blocks[0, 0].oid,
                                           **kwargs,
                                           syskwargs={
                                               "grid_entry": (0, 0),
                                               "grid_shape": (1, 1)
                                           })
     if not single_block:
         result = result.reshape(block_shape=block_shape)
     return result
Esempio n. 24
0
 def diag(self, X: BlockArray) -> BlockArray:
     if len(X.shape) == 1:
         shape = X.shape[0], X.shape[0]
         block_shape = X.block_shape[0], X.block_shape[0]
         grid = ArrayGrid(shape, block_shape, X.dtype.__name__)
         grid_meta = grid.to_meta()
         rarr = BlockArray(grid, self.cm)
         for grid_entry in grid.get_entry_iterator():
             syskwargs = {
                 "grid_entry": grid_entry,
                 "grid_shape": grid.grid_shape
             }
             if np.all(np.diff(grid_entry) == 0):
                 # This is a diagonal block.
                 rarr.blocks[grid_entry].oid = self.cm.diag(
                     X.blocks[grid_entry[0]].oid, syskwargs=syskwargs)
             else:
                 rarr.blocks[grid_entry].oid = self.cm.new_block(
                     "zeros", grid_entry, grid_meta, syskwargs=syskwargs)
     elif len(X.shape) == 2:
         assert X.shape[0] == X.shape[1], "X must be a square array."
         assert X.block_shape[0] == X.block_shape[
             1], "block_shape must be square."
         shape = X.shape[0],
         block_shape = X.block_shape[0],
         grid = ArrayGrid(shape, block_shape, X.dtype.__name__)
         rarr = BlockArray(grid, self.cm)
         for grid_entry in X.grid.get_entry_iterator():
             out_grid_entry = grid_entry[:1]
             out_grid_shape = grid.grid_shape[:1]
             syskwargs = {
                 "grid_entry": out_grid_entry,
                 "grid_shape": out_grid_shape
             }
             if np.all(np.diff(grid_entry) == 0):
                 # This is a diagonal block.
                 rarr.blocks[out_grid_entry].oid = self.cm.diag(
                     X.blocks[grid_entry].oid, syskwargs=syskwargs)
     else:
         raise ValueError("X must have 1 or 2 axes.")
     return rarr
Esempio n. 25
0
 def array(self, array: Union[np.ndarray, List[float]], block_shape: tuple = None):
     if not isinstance(array, np.ndarray):
         if array_utils.is_array_like(array):
             array = np.array(array)
         else:
             raise ValueError(
                 "Unable to instantiate array from type %s" % type(array)
             )
     assert len(array.shape) == len(block_shape)
     return BlockArray.from_np(
         array, block_shape=block_shape, copy=False, cm=self.cm
     )
Esempio n. 26
0
 def read_fs(self, filename: str):
     meta = self._filesystem.read_meta_fs(filename)
     addresses = meta["addresses"]
     grid_meta = meta["grid_meta"]
     grid = ArrayGrid.from_meta(grid_meta)
     ba: BlockArray = BlockArray(grid, self._system)
     for grid_entry in addresses:
         node_address = addresses[grid_entry]
         options = {"resources": {node_address: 1.0 / 10**4}}
         ba.blocks[grid_entry].oid = self._filesystem.read_block_fs(
             filename, grid_entry, grid_meta, options=options)
     return ba
Esempio n. 27
0
    def median(self, arr: BlockArray) -> BlockArray:
        """Compute the median of a BlockArray.

        Args:
            a: A BlockArray.

        Returns:
            The median value.
        """
        if arr.ndim != 1:
            raise NotImplementedError("Only 1D 'arr' is currently supported.")

        a_oids = arr.flattened_oids()
        if arr.size % 2 == 1:
            m_oid = self.quickselect(a_oids, arr.size // 2)
            return BlockArray.from_oid(m_oid, (1, ), arr.dtype, self.cm)
        else:
            m0_oid = self.quickselect(a_oids, arr.size // 2 - 1)
            m0 = BlockArray.from_oid(m0_oid, (1, ), arr.dtype, self.cm)
            m1_oid = self.quickselect(a_oids, arr.size // 2)
            m1 = BlockArray.from_oid(m1_oid, (1, ), arr.dtype, self.cm)
            return (m0 + m1) / 2
Esempio n. 28
0
 def nanvar(self, a: BlockArray, axis=None, ddof=0, keepdims=False, dtype=None):
     mean = self.nanmean(a, axis=axis, keepdims=True)
     ss = self.reduce(
         "nansum", (a - mean) ** self.two, axis=axis, dtype=dtype, keepdims=keepdims
     )
     num_summed = (
         self.sum(~a.ufunc("isnan"), axis=axis, dtype=a.dtype, keepdims=keepdims)
         - ddof
     )
     res = ss / num_summed
     if dtype is not None:
         res = res.astype(dtype)
     return res
Esempio n. 29
0
 def atleast_1d(self, *arys):
     res = []
     for ary in arys:
         ary = BlockArray.to_block_array(ary, self.cm)
         if ary.ndim == 0:
             result = ary.reshape(1)
         else:
             result = ary
         res.append(result)
     if len(res) == 1:
         return res[0]
     else:
         return res
Esempio n. 30
0
    def top_k(self,
              arr: BlockArray,
              k: int,
              largest=True) -> Tuple[BlockArray, BlockArray]:
        """Find the `k` largest or smallest elements of a BlockArray.

        If there are multiple kth elements that are equal in value, then no guarantees are made as
        to which ones are included in the top k.

        Args:
            arr: A BlockArray.
            k: Number of top elements to return.
            largest: Whether to return largest or smallest elements.

        Returns:
            A tuple containing two BlockArrays, (`values`, `indices`).
            values: Values of the top k elements, unsorted.
            indices: Indices of the top k elements, ordered by their corresponding values.
        """
        if arr.ndim != 1:
            raise NotImplementedError("Only 1D 'arr' is currently supported.")
        if k <= 0 or arr.size < k:
            raise IndexError(
                "'k' must be at least 1 and at most the size of 'arr'.")
        arr_oids = arr.flattened_oids()
        if largest:
            k_oid = self.quickselect(arr_oids, k - 1)
            k_val = BlockArray.from_oid(k_oid, (1, ), arr.dtype, self.cm)
            ie_indices = self.where(arr > k_val[0])[0]
        else:
            k_oid = self.quickselect(arr_oids, -k)
            k_val = BlockArray.from_oid(k_oid, (1, ), arr.dtype, self.cm)
            ie_indices = self.where(arr < k_val[0])[0]
        eq_indices = self.where(arr == k_val[0])[0]
        eq_indices_pad = eq_indices[:k - ie_indices.size]
        axis_block_size = self.compute_block_shape((k, ), int)[0]
        indices = self.concatenate([ie_indices, eq_indices_pad], 0,
                                   axis_block_size)
        return arr[indices], indices