def norm(self, X: BlockArray, order=2) -> BlockArray: assert len(X.shape) == 1, "Only vector norms are supported." assert order in (1, 2), "Only order 1 and 2 norms supported." if order == 2: return self.sqrt(X.transpose(defer=True) @ X) else: return self.sum(self.abs(X))
def gradient( self, X: BlockArray, y: BlockArray, mu: BlockArray = None, beta: BlockArray = None, ): if mu is None: mu = self.forward(X) return X.transpose(defer=True) @ (mu - y)
def hessian(self, X: BlockArray, y: BlockArray, mu: BlockArray = None): if mu is None: mu = self.forward(X) dim, block_dim = mu.shape[0], mu.block_shape[0] s = (mu * (self._app.one - mu)).reshape((dim, 1), block_shape=(block_dim, 1)) r = X.transpose(defer=True) @ (s * X) if self._penalty is not None: r += self.hessian_penalty() return r
def gradient( self, X: BlockArray, y: BlockArray, mu: BlockArray = None, beta: BlockArray = None, ): if mu is None: mu = self.forward(X) r = X.transpose(defer=True) @ (mu - y) if self._penalty is not None: assert beta is not None r += self.grad_penalty(beta) return r
def ridge_regression(app: ArrayApplication, X: BlockArray, y: BlockArray, lamb: float): assert len(X.shape) == 2 assert len(y.shape) == 1 assert lamb >= 0 block_shape = X.block_shape shape = X.shape R_shape = (shape[1], shape[1]) R_block_shape = (block_shape[1], block_shape[1]) R = indirect_tsr(app, X) lamb_vec = app.array(lamb * np.eye(R_shape[0]), block_shape=R_block_shape) # TODO (hme): A better solution exists, which inverts R by augmenting X and y. # See Murphy 7.5.2. theta = inv(app, lamb_vec + R.transpose(defer=True) @ R) @ (X.transpose(defer=True) @ y) return theta
def irls( app: ArrayApplication, model: LogisticRegression, beta, X: BlockArray, y: BlockArray, tol: BlockArray, max_iter: int, ): for _ in range(max_iter): eta: BlockArray = X @ beta mu: BlockArray = model.link_inv(eta) s = mu * (1 - mu) + 1e-16 XT_s = X.transpose(defer=True) * s # These are PSD, but inv is faster than psd inv. XTsX_inv = linalg.inv(app, XT_s @ X) z = eta + (y - mu) / s beta = XTsX_inv @ XT_s @ z g = model.gradient(X, y, mu, beta) if app.max(app.abs(g)) <= tol: break return beta
def hessian(self, X: BlockArray, y: BlockArray, mu: BlockArray = None): if mu is None: mu = self.forward(X) # TODO (hme): This is sub-optimal as it forces the computation of X.T. return (X.transpose(defer=True) * mu) @ X
def hessian(self, X: BlockArray, y: BlockArray, mu: BlockArray = None): r = X.transpose(defer=True) @ X if self._penalty is not None: r += self.hessian_penalty() return r