def d(h): eps_vec = np.zeros_like(z) eps_vec[h] = eps yhat_plus = logistic(W @ (z + eps_vec)) yhat_minus = logistic(W @ (z - eps_vec)) L_plus = self.loss(yhat_plus, y) L_minus = self.loss(yhat_minus, y) return (L_plus - L_minus) / (2 * eps)
def d(j): eps_vec = np.zeros_like(V) eps_vec[h, j] = eps z_plus = tanh((V + eps_vec) @ x) z_minus = tanh((V - eps_vec) @ x) z_plus[-1] = 1 z_minus[-1] = 1 yhat_plus = logistic(W @ z_plus) yhat_minus = logistic(W @ z_minus) L_plus = self.loss(yhat_plus, y) L_minus = self.loss(yhat_minus, y) return (L_plus - L_minus) / (2 * eps)
def d(h): eps_vec = np.zeros_like(z) eps_vec[h] = eps yhat_plus = logistic(W @ (z + eps_vec)) yhat_minus = logistic(W @ (z - eps_vec)) return (yhat_plus[k] - yhat_minus[k]) / (2 * eps)
def d(h): eps_vec = np.zeros_like(W) eps_vec[k, h] = eps yhat_plus = logistic((W + eps_vec) @ z) yhat_minus = logistic((W - eps_vec) @ z) return (yhat_plus[k] - yhat_minus[k]) / (2 * eps)
def forward(self, X, V, W): Z = tanh(V @ X.T) Z[-1, :] = 1 # The last row of V is unused; z[-1] must always be 1, just as x[-1]. Yhat = logistic(self.W @ Z).T return Z, Yhat
def logistic_regression_newton_update(w, X, y, _lambda): s = logistic(X @ w) gradient = 2 * _lambda * w - X.T @ (y - s) B = np.diag((s * (1 - s) + 2 * _lambda).ravel()) hessian = X.T @ B @ X return w - np.inv(hessian) @ gradient
def gradient(self, X, y, w): return 2 * self._lambda * w - X.T @ (y - logistic(X @ w))
def loglike(self, w, X, y): Xw = X @ w return (log(logistic(Xw)**y) + log((1 - logistic(Xw))**(1 - y))).sum()
def predict(self, X): prob = logistic(X @ self.w) return np.array(prob > 0.5, dtype=np.int)