def nanmean(self, a: BlockArray, axis=None, keepdims=False, dtype=None): if not array_utils.is_float(a): a = a.astype(np.float64) num_summed = self.sum(~a.ufunc("isnan"), axis=axis, dtype=a.dtype, keepdims=keepdims) if num_summed.ndim == 0 and num_summed == 0: return self.scalar(np.nan) if num_summed.ndim > 0: num_summed = self.where( num_summed == 0, self.empty(num_summed.shape, num_summed.block_shape) * np.nan, num_summed, ) res = (self.reduce( "nansum", a, axis=axis, dtype=dtype, keepdims=keepdims) / num_summed) if dtype is not None: res = res.astype(dtype) return res
def mean(self, X: BlockArray, axis=None, keepdims=False, dtype=None): if X.dtype not in (float, np.float32, np.float64): X = X.astype(np.float64) num_summed = np.product(X.shape) if axis is None else X.shape[axis] res = self.sum(X, axis=axis, keepdims=keepdims) / num_summed if dtype is not None: res = res.astype(dtype) return res
def fit(self, X: BlockArray, y: BlockArray): # Note, it's critically important from a performance point-of-view # to maintain the original block shape of X below, along axis 1. # Otherwise, the concatenation operation will not construct the new X # by referencing X's existing blocks. # TODO: Option to do concat. # TODO: Provide support for batching. if np.issubdtype(X.dtype, np.integer): X = X.astype(float) X = self._app.concatenate( [ X, self._app.ones( shape=(X.shape[0], 1), block_shape=(X.block_shape[0], 1), dtype=X.dtype, ), ], axis=1, axis_block_size=X.block_shape[1], ) assert len(X.shape) == 2 and len(y.shape) == 1 beta: BlockArray = self._app.zeros( (X.shape[1],), (X.block_shape[1],), dtype=X.dtype ) tol: BlockArray = self._app.scalar(self._tol) max_iter: int = self._max_iter if self._penalty == "l2": self._lambda_vec = ( self._app.ones(beta.shape, beta.block_shape, beta.dtype) * self._lambda ) if self._opt == "gd" or self._opt == "sgd" or self._opt == "block_sgd": lr: BlockArray = self._app.scalar(self._lr) if self._opt == "gd": beta = gd(self, beta, X, y, tol, max_iter, lr) elif self._opt == "sgd": beta = sgd(self, beta, X, y, tol, max_iter, lr) else: beta = block_sgd(self, beta, X, y, tol, max_iter, lr) elif self._opt == "newton" or self._opt == "newton-cg": beta = newton(self._app, self, beta, X, y, tol, max_iter) elif self._opt == "irls": # TODO (hme): Provide irls for all GLMs. assert isinstance(self, LogisticRegression) beta = irls(self._app, self, beta, X, y, tol, max_iter) elif self._opt == "lbfgs": beta = lbfgs(self._app, self, beta, X, y, tol, max_iter) else: raise Exception("Unsupported optimizer specified %s." % self._opt) self._beta0 = beta[-1] self._beta = beta[:-1]
def xlogy(self, x: BlockArray, y: BlockArray) -> BlockArray: if x.dtype not in (float, np.float32, np.float64): x = x.astype(np.float64) if x.dtype not in (float, np.float32, np.float64): y = y.astype(np.float64) return self.map_bop("xlogy", x, y)
def mean(self, X: BlockArray, axis=0, keepdims=False): if X.dtype not in (float, np.float32, np.float64): X = X.astype(np.float64) return self.sum(X, axis=axis, keepdims=keepdims) / X.shape[axis]
def sqrt(self, X: BlockArray) -> BlockArray: if X.dtype not in (float, np.float32, np.float64): X = X.astype(np.float64) return X.ufunc("sqrt")