Beispiel #1
0
def right_shift(x1: BlockArray,
                x2: BlockArray,
                out: BlockArray = None,
                where=True,
                **kwargs) -> BlockArray:
    return _instance().map_bop(op_name="right_shift",
                               arr_1=x1,
                               arr_2=x2,
                               out=out,
                               where=where,
                               kwargs=numpy_utils.ufunc_kwargs(kwargs))
Beispiel #2
0
def true_divide(
    x1: BlockArray, x2: BlockArray, out: BlockArray = None, where=True, **kwargs
) -> BlockArray:
    return _instance().map_bop(
        op_name="true_divide",
        arr_1=x1,
        arr_2=x2,
        out=out,
        where=where,
        kwargs=numpy_utils.ufunc_kwargs(kwargs),
    )
Beispiel #3
0
def exp(x: BlockArray,
        out: BlockArray = None,
        where=True,
        **kwargs) -> BlockArray:
    return _instance().map_uop(
        op_name="exp",
        arr=x,
        out=out,
        where=where,
        kwargs=numpy_utils.ufunc_kwargs(kwargs),
    )
Beispiel #4
0
def arange(start=None, stop=None, step=1, dtype=np.int64) -> BlockArray:
    if stop is None:
        stop = start
        start = 0
    if step != 1:
        raise NotImplementedError(
            "Only step size of 1 is currently supported.")
    shape = (stop - start, )
    app = _instance()
    block_shape = app.get_block_shape(shape, dtype)
    return app.arange(shape, block_shape, step, dtype)
Beispiel #5
0
def greater_equal(x1: BlockArray,
                  x2: BlockArray,
                  out: BlockArray = None,
                  where=True,
                  **kwargs) -> BlockArray:
    return _instance().map_bop(op_name="greater_equal",
                               arr_1=x1,
                               arr_2=x2,
                               out=out,
                               where=where,
                               kwargs=numpy_utils.ufunc_kwargs(kwargs))
Beispiel #6
0
 def _get_shapes(self, size=None, dtype=None):
     if dtype is None:
         dtype = _np.float64
     if size is None:
         size = ()
     if not isinstance(size, tuple):
         assert _array_utils.is_int(size)
         shape = (size, )
     else:
         shape = size
     block_shape = _instance().get_block_shape(shape, dtype)
     return shape, block_shape
Beispiel #7
0
 def permutation(self, x):
     app = _instance()
     if _array_utils.is_int(x):
         shape = (x, )
         block_shape = app.compute_block_shape(shape=shape, dtype=_np.int64)
         return self.rs().permutation(shape[0], block_shape[0])
     else:
         assert isinstance(x, BlockArray)
         shape = x.shape
         block_shape = x.shape
         arr_perm = self.rs().permutation(shape[0], block_shape[0]).get()
         return x[arr_perm]
Beispiel #8
0
    def update(self):
        for i in range(self.n_users):
            q = self.R[i, :] > 0
            V_j = self.V[:, q]

            Q = nps.matmul(V_j, V_j.T) + self.lambda_U * nps.identity(self.n_dims)
            QQ = _instance().inv(Q)

            Y = self.R[:, q][i, :]
            YY = nps.matmul(Y, V_j.T)

            self.U[:, i] = nps.matmul(QQ, YY)
Beispiel #9
0
def loadtxt(
    fname,
    dtype=float,
    comments="# ",
    delimiter=" ",
    converters=None,
    skiprows=0,
    usecols=None,
    unpack=False,
    ndmin=0,
    encoding="bytes",
    max_rows=None,
) -> BlockArray:
    app = _instance()
    num_rows = app.cm.num_cores_total()
    try:
        ba: BlockArray = app.loadtxt(
            fname,
            dtype=dtype,
            comments=comments,
            delimiter=delimiter,
            converters=converters,
            skiprows=skiprows,
            usecols=usecols,
            unpack=unpack,
            ndmin=ndmin,
            encoding=encoding,
            max_rows=max_rows,
            num_workers=num_rows,
        )
        shape = ba.shape
        block_shape = app.compute_block_shape(shape, dtype)
        return ba.reshape(block_shape=block_shape)
    except Exception as _:
        warnings.warn(
            "Failed to load text data in parallel; using np.loadtxt locally.")
        np_arr = np.loadtxt(
            fname,
            dtype=dtype,
            comments=comments,
            delimiter=delimiter,
            converters=converters,
            skiprows=skiprows,
            usecols=usecols,
            unpack=unpack,
            ndmin=ndmin,
            encoding=encoding,
            max_rows=max_rows,
        )
        shape = np_arr.shape
        block_shape = app.compute_block_shape(shape, dtype)
        return app.array(np_arr, block_shape=block_shape)
Beispiel #10
0
def max(a: BlockArray,
        axis=None,
        out=None,
        keepdims=False,
        initial=None,
        where=None) -> BlockArray:
    if initial is not None:
        raise NotImplementedError("'initial' is currently not supported.")
    if where is not None:
        raise NotImplementedError("'where' is currently not supported.")
    if out is not None:
        raise NotImplementedError("'out' is currently not supported.")
    return _instance().max(a, axis=axis, keepdims=keepdims)
Beispiel #11
0
def linspace(start,
             stop,
             num=50,
             endpoint=True,
             retstep=False,
             dtype=None,
             axis=0):
    shape = (num, )
    dtype = np.float64 if dtype is None else dtype
    app = _instance()
    block_shape = app.get_block_shape(shape, dtype)
    return app.linspace(start, stop, shape, block_shape, endpoint, retstep,
                        dtype, axis)
Beispiel #12
0
def read_csv(filename, dtype=float, delimiter=",", has_header=False) -> BlockArray:
    """Read a csv text file.

    Args:
        filename: The filename of the csv.
        dtype: The data type of the csv file's entries.
        delimiter: The value delimiter for each row; usually a comma.
        has_header: Whether the csv file has a header. The header is discarded.

    Returns:
        A BlockArray instance.
    """
    return _instance().read_csv(filename, dtype, delimiter, has_header)
Beispiel #13
0
def read_csv(filename,
             dtype=np.float,
             delimiter=',',
             has_header=False) -> BlockArray:
    """
    Read a csv text file.
    :param filename: The filename of the csv.
    :param dtype: The data type of the csv file's entries.
    :param delimiter: The value delimiter for each row; usually a comma.
    :param has_header: Whether the csv file has a header. The header is discarded.
    :return: A BlockArray instance.
    """
    return _instance().read_csv(filename, dtype, delimiter, has_header)
Beispiel #14
0
def block_sgd(model: GLM, beta, X: BlockArray, y: BlockArray, tol: BlockArray,
              max_iter: int, lr: BlockArray):
    # SGD with batches equal to block shape along first axis.
    app = _instance()
    for _ in range(max_iter):
        for (start, stop) in X.grid.grid_slices[0]:
            X_batch, y_batch = X[start:stop], y[start:stop]
            mu = model.forward(X_batch, beta)
            g = model.gradient(X_batch, y_batch, mu, beta=beta)
            beta += -lr * g
            if app.max(app.abs(g)) <= tol:
                break
    return beta
Beispiel #15
0
def std(a: BlockArray,
        axis=None,
        dtype=None,
        out=None,
        ddof=0,
        keepdims=False):
    if out is not None:
        raise NotImplementedError("'out' is currently not supported.")
    return _instance().std(a,
                           axis=axis,
                           ddof=ddof,
                           keepdims=keepdims,
                           dtype=dtype)
Beispiel #16
0
def train(params: Dict, data: NumsDMatrix, *args, evals=(), **kwargs):
    X: BlockArray = data.X
    y: BlockArray = data.y
    assert len(X.shape) == 2
    assert X.shape[0] == X.shape[0] and X.block_shape[0] == y.block_shape[0]
    assert len(y.shape) == 1 or (len(y.shape) == 2 and y.shape[1] == 1)

    app: ArrayApplication = _instance()
    cm: ComputeManager = app.cm
    cm.register("xgb_train", xgb_train_remote, {})

    # Start tracker
    num_workers = X.grid.grid_shape[0]
    env = _start_rabit_tracker(num_workers)
    rabit_args = [("%s=%s" % item).encode() for item in env.items()]

    evals_flat = []
    for eval_X, eval_y, eval_method in evals:
        if eval_X.shape != eval_X.block_shape:
            eval_X = eval_X.reshape(shape=eval_X.shape,
                                    block_shape=eval_X.shape)
        if eval_y.shape != eval_y.block_shape:
            eval_y = eval_y.reshape(shape=eval_y.shape,
                                    block_shape=eval_y.shape)
        eval_X_oid = eval_X.blocks.item().oid
        eval_y_oid = eval_y.blocks.item().oid
        evals_flat += [eval_X_oid, eval_y_oid, eval_method]

    X: BlockArray = X.reshape(block_shape=(X.block_shape[0], X.shape[1]))
    result: BlockArray = BlockArray(
        ArrayGrid(shape=(X.grid.grid_shape[0], ),
                  block_shape=(1, ),
                  dtype="dict"), cm)
    for grid_entry in X.grid.get_entry_iterator():
        X_block: Block = X.blocks[grid_entry]
        i = grid_entry[0]
        if len(y.shape) == 1:
            y_block: Block = y.blocks[i]
        else:
            y_block: Block = y.blocks[i, 0]
        syskwargs = {"grid_entry": grid_entry, "grid_shape": X.grid.grid_shape}
        result.blocks[i].oid = cm.call("xgb_train",
                                       X_block.oid,
                                       y_block.oid,
                                       rabit_args,
                                       params,
                                       args,
                                       kwargs,
                                       *evals_flat,
                                       syskwargs=syskwargs)
    return result
Beispiel #17
0
def arange(start=None, stop=None, step=1, dtype=None) -> BlockArray:
    if start is None:
        raise TypeError("Missing required argument start")
    if stop is None:
        stop = start
        start = 0
    if step != 1:
        raise NotImplementedError("Only step size of 1 is currently supported.")
    if dtype is None:
        dtype = np.__getattribute__(str(np.result_type(start, stop)))
    shape = (int(np.ceil(stop - start)),)
    app = _instance()
    block_shape = app.get_block_shape(shape, dtype)
    return app.arange(start, shape, block_shape, step, dtype)
Beispiel #18
0
    def __init__(
        self,
        penalty="none",
        alpha=1.0,
        l1_ratio=0.5,
        tol=0.0001,
        max_iter=100,
        solver="newton",
        lr=0.01,
        random_state=None,
        fit_intercept=True,
        normalize=False,
    ):

        if fit_intercept is False:
            raise NotImplementedError(
                "fit_incercept=False currently not supported.")
        if normalize is True:
            raise NotImplementedError(
                "normalize=True currently not supported.")

        self._app = _instance()
        if random_state is None:
            self.rs: NumsRandomState = self._app.random
        elif array_utils.is_int(random_state):
            self.rs: NumsRandomState = NumsRandomState(cm=self._app.cm,
                                                       seed=random_state)
        elif isinstance(random_state, NumsRandomState):
            self.rs: NumsRandomState = random_state
        else:
            raise Exception("Unexpected type for random_state %s" %
                            str(type(random_state)))
        self._penalty = None if penalty == "none" else penalty
        if self._penalty not in (None, "l1", "l2", "elasticnet"):
            raise NotImplementedError("%s penalty not supported" %
                                      self._penalty)
        # All sources use lambda as regularization term, and alpha l1/l2 ratio.
        self._lambda = alpha
        self._l1penalty = None
        self._l1penalty_vec = None
        self._l2penalty = None
        self._l2penalty_vec = None
        self._l2penalty_diag = None
        self.alpha = l1_ratio
        self._tol = tol
        self._max_iter = max_iter
        self._opt = solver
        self._lr = lr
        self._beta = None
        self._beta0 = None
Beispiel #19
0
def from_modin(df):
    # pylint: disable = import-outside-toplevel, protected-access, unidiomatic-typecheck
    try:
        from modin.pandas.dataframe import DataFrame
        from modin.engines.ray.pandas_on_ray.frame.data import PandasOnRayFrame
        from modin.engines.ray.pandas_on_ray.frame.partition import PandasOnRayFramePartition
    except Exception as e:
        raise Exception("Unable to import modin. Install modin with command 'pip install modin'") \
            from e

    assert isinstance(df, DataFrame), "Unexpected dataframe type %s" % str(type(df))
    assert isinstance(df._query_compiler._modin_frame, PandasOnRayFrame), \
        "Unexpected dataframe type %s" % str(type(df._query_compiler._modin_frame))
    frame: PandasOnRayFrame = df._query_compiler._modin_frame

    app: ArrayApplication = _instance()
    system = app.cm

    # Make sure the partitions are numeric.
    dtype = frame.dtypes[0]
    assert dtype in (float, np.float, np.float32, np.float64, int, np.int, np.int32, np.int64)
    # Make sure dtypes are equal.
    for dt in frame.dtypes:
        if type(frame.dtypes.dtype) == np.dtype:
            continue
        assert dt == frame.dtypes
    dtype = np.__getattribute__(str(dtype))

    # Convert from Pandas to NumPy.
    pd_parts = frame._frame_mgr_cls.map_partitions(frame._partitions, lambda df: np.array(df))
    grid_shape = len(frame._row_lengths), len(frame._column_widths)

    shape = (np.sum(frame._row_lengths), np.sum(frame._column_widths))
    block_shape = app.get_block_shape(shape, dtype)
    rows = []
    for i in range(grid_shape[0]):
        cols = []
        for j in range(grid_shape[1]):
            curr_block_shape = (frame._row_lengths[i], frame._column_widths[j])
            part: PandasOnRayFramePartition = pd_parts[(i, j)]
            part.drain_call_queue()
            ba: BlockArray = BlockArray.from_oid(part.oid, curr_block_shape, dtype, system)
            cols.append(ba)
        if grid_shape[1] == 1:
            row_ba: BlockArray = cols[0]
        else:
            row_ba: BlockArray = app.concatenate(cols, axis=1, axis_block_size=block_shape[1])
        rows.append(row_ba)
    result = app.concatenate(rows, axis=0, axis_block_size=block_shape[0])
    return result
Beispiel #20
0
def sgd(model: GLM, beta, X: BlockArray, y: BlockArray, tol: BlockArray,
        max_iter: int, lr: BlockArray):
    # Classic SGD.
    app = _instance()
    for _ in range(max_iter):
        # Sample an entry uniformly at random.
        idx = model.rs.numpy().integers(X.shape[0])
        X_sample, y_sample = X[idx:idx + 1], y[idx:idx + 1]
        mu = model.forward(X_sample, beta)
        g = model.gradient(X_sample, y_sample, mu, beta=beta)
        beta += -lr * g
        if app.max(app.abs(g)) <= tol:
            # sklearn uses max instead of l2 norm.
            break
    return beta
Beispiel #21
0
 def __init__(self,
              model: GLM,
              m=3,
              max_iter=100,
              thresh=1e-5,
              dtype=np.float64):
     self.app: ArrayApplication = _instance()
     self.model: GLM = model
     self.m = m
     self.max_iter = max_iter
     self.thresh = thresh
     self.dtype = dtype
     self.k = 0
     self.identity = None
     self.memory: Union[List[LBFGSMemory], List[None]] = [None] * m
     self.ls = BackTrackingLineSearch(model)
Beispiel #22
0
def median(a: BlockArray, axis=None, out=None, keepdims=False) -> BlockArray:
    """Compute the median of a BlockArray.

    Args:
        a: A BlockArray.

    Returns:
        The median value.
    """
    if axis is not None:
        raise NotImplementedError("'axis' is currently not supported.")
    if out is not None:
        raise NotImplementedError("'out' is currently not supported.")
    if keepdims:
        raise NotImplementedError("'keepdims' is currently not supported.")
    return _instance().median(a)
Beispiel #23
0
def gd(
    model: GLM,
    beta,
    X: BlockArray,
    y: BlockArray,
    tol: BlockArray,
    max_iter: int,
    lr: BlockArray,
):
    app = _instance()
    for _ in range(max_iter):
        mu = model.forward(X, beta)
        g = model.gradient(X, y, mu, beta=beta)
        beta += -lr * g
        if app.max(app.abs(g)) <= tol:
            break
    return beta
Beispiel #24
0
    def __init__(self, train_size=0.75, lambda_U = 0.3, lambda_V = 0.3):

        self._app = _instance()

        self.n_dims = 5
        self.parameters = {}

        self.lambda_U = lambda_U
        self.lambda_V = lambda_V
        self.n_users = 10
        self.n_movies = 10
       
        self.train_set = nps.random.randn_sparse(10, 10) 
        self.test_set = nps.random.randn_sparse(3, 3) 

        self.R = self.train_set
        self.U = nps.zeros((self.n_dims, self.n_users), dtype=np.float64)
        self.V = nps.random.randn(self.n_dims, self.n_movies)
Beispiel #25
0
def logspace(start,
             stop,
             num=50,
             endpoint=True,
             base=10.0,
             dtype=None,
             axis=0):
    app = _instance()
    ba: BlockArray = linspace(start,
                              stop,
                              num,
                              endpoint,
                              dtype=None,
                              axis=axis)
    ba = power(app.scalar(base), ba)
    if dtype is not None and dtype != ba.dtype:
        ba = ba.astype(dtype)
    return ba
Beispiel #26
0
    def __init__(
        self,
        penalty="none",
        C=1.0,
        tol=0.0001,
        max_iter=100,
        solver="newton-cg",
        lr=0.01,
        random_state=None,
        fit_intercept=True,
        normalize=False,
    ):

        if fit_intercept is False:
            raise NotImplementedError("fit_incercept=False currently not supported.")
        if normalize is True:
            raise NotImplementedError("normalize=True currently not supported.")

        self._app = _instance()
        if random_state is None:
            self.rs: NumsRandomState = self._app.random
        elif array_utils.is_int(random_state):
            self.rs: NumsRandomState = NumsRandomState(
                cm=self._app.cm, seed=random_state
            )
        elif isinstance(random_state, NumsRandomState):
            self.rs: NumsRandomState = random_state
        else:
            raise Exception(
                "Unexpected type for random_state %s" % str(type(random_state))
            )
        self._penalty = None if penalty == "none" else penalty
        if not (self._penalty is None or self._penalty == "l2"):
            raise NotImplementedError("%s penalty not supported" % self._penalty)
        self._lambda = 1.0 / C
        self._lambda_vec = None
        self._tol = tol
        self._max_iter = max_iter
        self._opt = solver
        self._lr = lr
        self._beta = None
        self._beta0 = None
Beispiel #27
0
def top_k(
    a: BlockArray, k: int, largest=True, sorted=False
) -> Tuple[BlockArray, BlockArray]:
    """Find the `k` largest or smallest elements of a BlockArray.

    If there are multiple kth elements that are equal in value, then no guarantees are made as
    to which ones are included in the top k.

    Args:
        a: A BlockArray.
        k: Number of top elements to return.
        largest: Whether to return largest or smallest elements.

    Returns:
        A tuple containing two BlockArrays, (`values`, `indices`).
        values: Values of the top k elements, unsorted.
        indices: Indices of the top k elements, ordered by their corresponding values.
    """
    if sorted:
        # The result can be sorted when sorting is implemented.
        raise NotImplementedError("'sorted' is currently not supported.")
    return _instance().top_k(a, k, largest=largest)
Beispiel #28
0
 def predict(self, X: BlockArray):
     app: ArrayApplication = _instance()
     sys: System = app.system
     sys.register("xgb_predict", xgb_predict_remote, {})
     model_block: Block = self.model.blocks[0]
     result: BlockArray = BlockArray(
         ArrayGrid(shape=(X.shape[0], ),
                   block_shape=(X.block_shape[0], ),
                   dtype=nps.int.__name__), sys)
     for grid_entry in X.grid.get_entry_iterator():
         i = grid_entry[0]
         X_block: Block = X.blocks[grid_entry]
         r_block: Block = result.blocks[i]
         syskwargs = {
             "grid_entry": grid_entry,
             "grid_shape": X.grid.grid_shape
         }
         r_block.oid = sys.call("xgb_predict",
                                model_block.oid,
                                X_block.oid,
                                syskwargs=syskwargs)
     return result
Beispiel #29
0
def array(object, dtype=None, copy=True, order="K", ndmin=0, subok=False) -> BlockArray:
    if order is not None and order != "K":
        raise NotImplementedError("Only order='K' is supported.")
    if ndmin != 0:
        raise NotImplementedError("Only ndmin=0 is currently supported.")
    if subok:
        raise ValueError("subok must be False.")
    if isinstance(object, BlockArray):
        if copy:
            object = object.copy()
        if dtype is not None:
            if dtype is not object.dtype:
                object = object.astype(dtype)
        return object
    result = np.array(
        object, dtype=dtype, copy=copy, order=order, ndmin=ndmin, subok=subok
    )
    dtype = np.__getattribute__(str(result.dtype))
    shape = result.shape
    app = _instance()
    block_shape = app.compute_block_shape(shape, dtype)
    return app.array(result, block_shape)
Beispiel #30
0
def qr(a, mode="reduced"):
    if mode != "reduced":
        raise NotImplementedError("Only reduced QR decomposition is supported.")
    return _instance().qr(a)