def _logistic_loss_and_grad(w, X, y, alpha, sample_weight=None, rho=None, q=None): """Computes the logistic loss and gradient. Parameters ---------- w : ndarray, shape (n_features,) or (n_features + 1,) Coefficient vector. X : {array-like, sparse matrix}, shape (n_samples, n_features) Training data. y : ndarray, shape (n_samples,) Array of labels. alpha : float Regularization parameter. alpha is equal to 1 / C. sample_weight : ndarray, shape (n_samples,) optional Array of weights that are assigned to individual samples. If not provided, then each sample is given unit weight. Returns ------- out : float Logistic loss. grad : ndarray, shape (n_features,) or (n_features + 1,) Logistic gradient. """ _, n_features = X.shape grad = np.empty_like(w) if sample_weight is None: sample_weight = np.ones(y.shape[0]) # 0: noise, 1: clean if q is None: q = np.zeros_like(y) y01 = np.array(y == 1, dtype=int) w, c, yz = _intercept_dot(w, X, y) loss_yzp = -log_logistic(+yz) loss_yzn = -log_logistic(-yz) wp = 1 - np.take(rho, 1 - y01) wn = np.take(rho, y01) noise_loss = np.sum(sample_weight * (1-q) * (wp * loss_yzp - wn * loss_yzn)) / (1 - rho[0] - rho[1]) clean_loss = np.sum(sample_weight * q * loss_yzp) out = clean_loss + noise_loss + .5 * alpha * np.dot(w, w) z = expit(yz) z0 = sample_weight * (q * (z-1) * y + (1-q) * (wp * (z-1) * y + wn * z * y)) grad[:n_features] = safe_sparse_dot(X.T, z0) + alpha * w # Case where we fit the intercept. if grad.shape[0] > n_features: grad[-1] = z0.sum() return out, grad
def _logistic_loss_and_grad(w, X, y, alpha, mask, sample_weight=None): """Computes the logistic loss and gradient. Parameters ---------- w : ndarray, shape (n_features,) or (n_features + 1,) Coefficient vector. X : {array-like, sparse matrix}, shape (n_samples, n_features) Training data. y : ndarray, shape (n_samples,) Array of labels. alpha : float Regularization parameter. alpha is equal to 1 / C. mask : array-like, shape (n_features), (n_classes, n_features) optional Masking array for coef. sample_weight : array-like, shape (n_samples,) optional Array of weights that are assigned to individual samples. If not provided, then each sample is given unit weight. Returns ------- out : float Logistic loss. grad : ndarray, shape (n_features,) or (n_features + 1,) Logistic gradient. """ n_samples, n_features = X.shape if mask is not None: w[:n_features] *= mask grad = np.empty_like(w) w, c, yz = _intercept_dot(w, X, y) if sample_weight is None: sample_weight = np.ones(n_samples) # Logistic loss is the negative of the log of the logistic function. out = -np.sum(sample_weight * log_logistic(yz)) / n_samples out += .5 * alpha * np.dot(w, w) z = expit(yz) z0 = sample_weight * (z - 1) * y grad[:n_features] = (safe_sparse_dot(X.T, z0) / n_samples) + alpha * w if mask is not None: grad[:n_features] *= mask # Case where we fit the intercept. if grad.shape[0] > n_features: grad[-1] = z0.sum() / n_samples return out, grad
def _logistic_loss(w, X, y, alpha, sample_weight=None, rho=None, q=None): """Computes the logistic loss. Parameters ---------- w : ndarray, shape (n_features,) or (n_features + 1,) Coefficient vector. X : {array-like, sparse matrix}, shape (n_samples, n_features) Training data. y : ndarray, shape (n_samples,) Array of labels. alpha : float Regularization parameter. alpha is equal to 1 / C. sample_weight : ndarray, shape (n_samples,) optional Array of weights that are assigned to individual samples. If not provided, then each sample is given unit weight. Returns ------- out : float Logistic loss. """ w, c, yz = _intercept_dot(w, X, y) if sample_weight is None: sample_weight = np.ones(y.shape[0]) # Logistic loss is the negative of the log of the logistic function. out = -np.sum(sample_weight * log_logistic(yz)) + .5 * alpha * np.dot(w, w) # add noise term if q is None: q = np.zeros_like(y) y01 = np.array(y == 1, dtype=int) qnoise = np.array(q==0, dtype=np.bool) if(q is None or np.any(qnoise)): rho_y = np.array([[rho[1-label],rho[label]] for label in y01]) yzq = yz[qnoise] wq = sample_weight[qnoise] out += np.sum(wq * log_noise_logistic(yzq, rho_y[qnoise,:])) return out
def _logistic_loss(w, X, y, alpha, sample_weight=None, rho=None, q=None): """Computes the logistic loss. Parameters ---------- w : ndarray, shape (n_features,) or (n_features + 1,) Coefficient vector. X : {array-like, sparse matrix}, shape (n_samples, n_features) Training data. y : ndarray, shape (n_samples,) Array of labels. alpha : float Regularization parameter. alpha is equal to 1 / C. sample_weight : ndarray, shape (n_samples,) optional Array of weights that are assigned to individual samples. If not provided, then each sample is given unit weight. Returns ------- out : float Logistic loss. """ # 0: noise, 1: clean if q is None: q = np.zeros_like(y) if sample_weight is None: sample_weight = np.ones(y.shape[0]) y01 = np.array(y == 1, dtype=int) w, c, yz = _intercept_dot(w, X, y) loss_yzp = -log_logistic(+yz) loss_yzn = -log_logistic(-yz) wp = (1-np.take(rho, 1-y01)) / (1-rho[0]-rho[1]) wn = ( -np.take(rho, y01)) / (1-rho[0]-rho[1]) noise_loss = np.sum(sample_weight * (1-q) * (wp * loss_yzp + wn * loss_yzn)) clean_loss = np.sum(sample_weight * q * loss_yzp) out = clean_loss + noise_loss + .5 * alpha * np.dot(w, w) return out
def _logistic_loss_and_grad(w, alpha, X, y, lamda, sample_weight=None): """Computes the logistic loss and gradient. Parameters ---------- w : ndarray, shape (n_features,) or (n_features + 1,) Coefficient vector. X : {array-like, sparse matrix}, shape (n_samples, n_features) Training data. y : ndarray, shape (n_samples,) Array of labels. alpha : float Regularization parameter. alpha is equal to 1 / C. sample_weight : array-like, shape (n_samples,) optional Array of weights that are assigned to individual samples. If not provided, then each sample is given unit weight. Returns ------- out : float Logistic loss. grad : ndarray, shape (n_features,) or (n_features + 1,) Logistic gradient. """ n_patients = len(X) out = 0. grad = np.zeros_like(w) sample_weight_orig = sample_weight.copy() if sample_weight is not None \ else None for i in range(n_patients): n_kernels, n_samples, n_features = X[i].shape x_i = np.tensordot(w, X[i], axes=1) alpha_i, c, yz = _intercept_dot(alpha[i], x_i, y[i]) if sample_weight_orig is None: sample_weight = np.ones(n_samples) # Logistic loss is the negative of the log of the logistic function. out += -np.sum(sample_weight * log_logistic(yz)) z = expit(yz) z0 = sample_weight * (z - 1) * y[i] grad += safe_sparse_dot(X[i].dot(alpha_i), z0) # alpha_i, c_i, x_i = _intercept_dot(alpha[i][:-1], X[i], 1.) # out_i, grad_i = _loglossgrad( # np.append(w, alpha[i][-1]), x_i.T, y[i], 0, # sample_weight=sample_weight) # out += out_i # grad += grad_i[:n_kernels] out += .5 * lamda * np.dot(w, w) grad += lamda * w return out, grad
def update_rho(w, X, y, rho, q, beta=np.ones((2, 2))): qnoise = np.array(q == 0, dtype=np.bool) _, _, logit = _intercept_dot(w, X[qnoise, :], np.ones(np.sum(qnoise))) z = expit(logit) rho = update_noise_rates(z, y, rho, q, beta) return rho
def _logistic_loss_and_grad(w, X, y, alpha, sample_weight=None, rho=None, q=None): """Computes the logistic loss and gradient. Parameters ---------- w : ndarray, shape (n_features,) or (n_features + 1,) Coefficient vector. X : {array-like, sparse matrix}, shape (n_samples, n_features) Training data. y : ndarray, shape (n_samples,) Array of labels. alpha : float Regularization parameter. alpha is equal to 1 / C. sample_weight : ndarray, shape (n_samples,) optional Array of weights that are assigned to individual samples. If not provided, then each sample is given unit weight. Returns ------- out : float Logistic loss. grad : ndarray, shape (n_features,) or (n_features + 1,) Logistic gradient. """ _, n_features = X.shape grad = np.empty_like(w) w, c, yz = _intercept_dot(w, X, y) if sample_weight is None: sample_weight = np.ones(y.shape[0]) # Logistic loss is the negative of the log of the logistic function. out = -np.sum(sample_weight * log_logistic(yz)) + .5 * alpha * np.dot(w, w) z = expit(yz) # add noise term if q is None: q = np.zeros_like(y) y01 = np.array(y == 1, dtype=int) qnoise = np.array(q==0, dtype=np.bool) if np.any(qnoise): rho_y = np.array([[rho[1-label],rho[label]] for label in y01]) z += expit_noise(yz, qnoise, rho_y) yzq = yz[qnoise] wq = sample_weight[qnoise] out += np.sum(wq * log_noise_logistic(yzq, rho_y[qnoise,:])) z0 = sample_weight * (z - 1) * y grad[:n_features] = safe_sparse_dot(X.T, z0) + alpha * w # Case where we fit the intercept. if grad.shape[0] > n_features: grad[-1] = z0.sum() return out, grad