def _update_beta_jac_bcd(X, y, beta, dbeta, dual_var, ddual_var, alpha, L, compute_jac=True): n_samples, n_features = X.shape for j in range(n_features): beta_old = beta[j] if compute_jac: dbeta_old = dbeta[j] # compute derivatives sigmar = sigma(dual_var) grad_j = X[:, j] @ (y * (sigmar - 1)) L_temp = np.sum(X[:, j]**2 * sigmar * (1 - sigmar)) L_temp /= n_samples zj = beta[j] - grad_j / (L_temp * n_samples) beta[j] = ST(zj, alpha[j] / L_temp) dual_var += y * X[:, j] * (beta[j] - beta_old) if compute_jac: dsigmar = sigmar * (1 - sigmar) * ddual_var hess_fj = X[:, j] @ (y * dsigmar) dzj = dbeta[j] - hess_fj / (L_temp * n_samples) dbeta[j:j + 1] = np.abs(np.sign(beta[j])) * dzj dbeta[j:j + 1] -= alpha[j] * np.sign(beta[j]) / L_temp # update residuals ddual_var += y * X[:, j] * (dbeta[j] - dbeta_old)
def _update_beta_jac_bcd_sparse( data, indptr, indices, y, n_samples, n_features, beta, dbeta, dual_var, ddual_var, alphas, L, compute_jac=True): non_zeros = np.where(L != 0)[0] for j in non_zeros: # get the j-st column of X in sparse format Xjs = data[indptr[j]:indptr[j+1]] # get non zero idices idx_nz = indices[indptr[j]:indptr[j+1]] ########################################### beta_old = beta[j] if compute_jac: dbeta_old = dbeta[j, :].copy() zj = beta[j] + dual_var[idx_nz] @ Xjs / (L[j] * n_samples) beta[j:j+1] = ST(zj, alphas[j] / L[j]) if compute_jac: dzj = dbeta[j, :] + Xjs @ ddual_var[idx_nz, :] / \ (L[j] * n_samples) dbeta[j:j+1, :] = np.abs(np.sign(beta[j])) * dzj dbeta[j:j+1, j] -= alphas[j] * np.sign(beta[j]) / L[j] # update residuals ddual_var[idx_nz, :] -= np.outer( Xjs, (dbeta[j, :] - dbeta_old)) dual_var[idx_nz] -= Xjs * (beta[j] - beta_old)
def _update_beta_jac_bcd(X, y, beta, dbeta, dual_var, ddual_var, alpha, L, compute_jac=True): n_samples, n_features = X.shape non_zeros = np.where(L != 0)[0] for j in non_zeros: beta_old = beta[j] if compute_jac: dbeta_old = dbeta[j, :].copy() zj = beta[j] + dual_var @ X[:, j] / (L[j] * n_samples) beta[j:j + 1] = ST(zj, alpha[j] / L[j]) if compute_jac: dzj = dbeta[j, :] + X[:, j] @ ddual_var / (L[j] * n_samples) dbeta[j:j + 1, :] = np.abs(np.sign(beta[j])) * dzj dbeta[j:j + 1, j] -= alpha[j] * np.sign(beta[j]) / L[j] # update residuals ddual_var -= np.outer(X[:, j], (dbeta[j, :] - dbeta_old)) dual_var -= X[:, j] * (beta[j] - beta_old)
def _get_dobj(dual_var, X, beta, alpha, y=None): # the dual variable is theta = (y - X beta) / (alpha[0] * n_samples) n_samples = X.shape[0] theta = dual_var / (alpha[0] * n_samples) dobj = alpha[0] * y @ theta dobj -= alpha[0]**2 * n_samples / 2 * np.dot(theta, theta) dobj -= alpha[0]**2 / alpha[1] / 2 * (ST(X.T @ theta, 1)**2).sum() return dobj
def _update_beta_jac_bcd_sparse(data, indptr, indices, y, n_samples, n_features, beta, dbeta, dual_var, ddual_var, alphas, L, compute_jac=True): for j in range(n_features): # get the j-st column of X in sparse format Xjs = data[indptr[j]:indptr[j + 1]] # get the non zero indices idx_nz = indices[indptr[j]:indptr[j + 1]] beta_old = beta[j] if compute_jac: dbeta_old = dbeta[j] sigmar = sigma(dual_var[idx_nz]) grad_j = Xjs @ (y[idx_nz] * (sigmar - 1)) L_temp = (Xjs**2 * sigmar * (1 - sigmar)).sum() # Xjs2 = (Xjs ** 2 * sigmar * (1 - sigmar)).sum() # temp1 = # # temp2 = temp1 * Xjs2 # L_temp = temp2.sum() L_temp /= n_samples if L_temp != 0: zj = beta[j] - grad_j / (L_temp * n_samples) beta[j:j + 1] = ST(zj, alphas[j] / L_temp) if compute_jac: dsigmar = sigmar * (1 - sigmar) * ddual_var[idx_nz] hess_fj = Xjs @ (y[idx_nz] * dsigmar) dzj = dbeta[j] - hess_fj / (L_temp * n_samples) dbeta[j:j + 1] = np.abs(np.sign(beta[j])) * dzj dbeta[j:j + 1] -= alphas[j] * np.sign(beta[j]) / L_temp # update residuals ddual_var[idx_nz] += y[idx_nz] * Xjs * \ (dbeta[j] - dbeta_old) dual_var[idx_nz] += y[idx_nz] * Xjs * (beta[j] - beta_old)
def _update_bcd_jac( X, beta, dbeta, r, dr, alpha, L, compute_jac=True): n_samples, n_features = X.shape non_zeros = np.where(L != 0)[0] for j in non_zeros: beta_old = beta[j] if compute_jac: dbeta_old = dbeta[j] # compute derivatives zj = beta[j] + r @ X[:, j] / (L[j] * n_samples) beta[j:j+1] = ST(zj, alpha[j] / L[j]) if compute_jac: dzj = dbeta[j] + X[:, j] @ dr / (L[j] * n_samples) dbeta[j:j+1] = np.abs(np.sign(beta[j])) * dzj dbeta[j:j+1] -= alpha[j] * np.sign(beta[j]) / L[j] # update residuals dr -= X[:, j] * (dbeta[j] - dbeta_old) r -= X[:, j] * (beta[j] - beta_old)
def _update_beta_jac_bcd_sparse( data, indptr, indices, y, n_samples, n_features, beta, dbeta, r, dr, alphas, L, compute_jac=True): non_zeros = np.where(L != 0)[0] for j in non_zeros: # get the j-st column of X in sparse format Xjs = data[indptr[j]:indptr[j+1]] # get the non zero indices idx_nz = indices[indptr[j]:indptr[j+1]] beta_old = beta[j] if compute_jac: dbeta_old = dbeta[j] zj = beta[j] + r[idx_nz] @ Xjs / (L[j] * n_samples) beta[j:j+1] = ST(zj, alphas[j] / L[j]) if compute_jac: dzj = dbeta[j] + Xjs @ dr[idx_nz] / (L[j] * n_samples) dbeta[j:j+1] = np.abs(np.sign(beta[j])) * dzj dbeta[j:j+1] -= alphas[j] * np.sign(beta[j]) / L[j] # update residuals dr[idx_nz] -= Xjs * (dbeta[j] - dbeta_old) r[idx_nz] -= Xjs * (beta[j] - beta_old)