def lars_on_lasso(): ''' Lars path should coincide with lasso coordinate descent solution on the way ''' import basis_fnc as basis N = 50 # num of samples p = 10 # dim of theta t = np.linspace(0, 1, N) c = np.linspace(t[0], t[-1], p) + 0.01 * np.random.randn(p) w = 0.1 * np.ones((p, )) + 0.01 * np.random.rand(p) X = basis.create_gauss_regressor(c, w, t, include_intercept=False) # _, Xdot2 = basis.create_acc_weight_mat(p, t) p_hidden = 4 # actual params np.random.seed(seed=1) beta = np.vstack((np.random.randn(p_hidden, 1), np.zeros( (p - p_hidden, 1)))) y = np.dot(X, beta) + 0.01 * np.random.randn(N, 1) y = y.squeeze() alphas, _, coefs = lm.lars_path(X, y, method='lasso', verbose=True) xx = np.sum(np.abs(coefs.T), axis=1) xx /= xx[-1] print xx print coefs.T plt.plot(xx, coefs.T) ymin, ymax = plt.ylim() plt.vlines(xx, ymin, ymax, linestyle='dashed') plt.xlabel('|coef| / max|coef|') plt.ylabel('Coefficients') plt.title('LASSO Path') plt.axis('tight') plt.show() return
def train_multi_dof_sparse_weights(args, p=10, plot_regr=False, path=False, ex=0, save=False, verbose=False, measure_time=False): ''' Train for only one demonstration but multiple dofs''' import multi_dof_lasso as lasso joint_dict, ball_dict = serve.run_serve_demo(args) # train multi task elastic net idx_move = joint_dict['idx_move'] idx = np.arange(idx_move[0, ex], idx_move[1, ex]) q = joint_dict['x'][idx, :] t = joint_dict['t'][idx] t -= t[0] c = np.linspace(t[0], t[-1], p) # centers w = 0.1 * np.ones((p, )) # widths X = basis.create_gauss_regressor(c, w, time=t) C, Xdot2 = basis.create_acc_weight_mat(c, w, t, include_intercept=True) # # theta, res = l2_pen_regr(X, q, C) # theta, res = multi_task_lasso(X, q) # theta, res = multi_task_elastic_net(X, q) # theta, res = multi_task_weighted_elastic_net(X, q, Xdot2) X, theta, c, w, a, r, Xdot2 = iter_multi_dof_lasso(t, q, p, measure_time) if verbose: print 'Res. norm:', np.linalg.norm(q - np.dot(X, theta), 'fro') print 'Acc. norm:', np.linalg.norm(np.dot(Xdot2, theta), 'fro') print 'No. params:', theta.size if path: _, _, theta_path = lasso.multi_task_weighted_elastic_net(X, q, Xdot2, alpha=a, rho=r, path=True) order = find_ordering_from_path(theta_path) else: order = None if plot_regr: plot_regression(X, theta, q) # last one params are the intercepts! if save: filename = "rbf_" + str(ex) + ".json" dump_json_regression_obj(c, w, theta, file_name=filename, order=order)
def elastic_net_cost(c, w, t, q, theta, lamb1, lamb2): import basis_fnc as basis X = basis.create_gauss_regressor(c, w, t) _, Xdot2 = basis.create_acc_weight_mat(c, w, t) res = q - np.dot(X, theta) cost = np.linalg.norm(res, 'fro')**2 theta_21_norm = np.sum(np.sqrt(np.sum(theta*theta, axis=1))) l2_acc_pen = np.linalg.norm(np.dot(Xdot2, theta), 'fro')**2 cost += lamb1 * theta_21_norm cost += lamb2 * l2_acc_pen return cost
def test_elastic_net_to_lasso_transform(): ''' The solutions to elastic net and lasso should be identical after transformation ''' import sklearn.linear_model as lm import basis_fnc as basis N = 50 # num of samples p = 10 # dim of theta t = np.linspace(0, 1, N) c = np.linspace(t[0], t[-1], p) + 0.01 * np.random.randn(p) w = 0.1 * np.ones((p,)) + 0.01 * np.random.rand(p) X = basis.create_gauss_regressor(c, w, t, include_intercept=False) # _, Xdot2 = basis.create_acc_weight_mat(p, t) p_hidden = 4 # actual params np.random.seed(seed=1) # 10 passes beta = np.vstack((np.random.randn(p_hidden, 1), np.zeros((p-p_hidden, 1)))) y = np.dot(X, beta) + 0.01 * np.random.randn(N, 1) alpha_elastic = 0.3 ratio = 0.5 clf = lm.ElasticNet(alpha=alpha_elastic, l1_ratio=ratio, fit_intercept=False) clf.fit(X, y) beta_hat_1 = clf.coef_ lamb1 = 2*N*alpha_elastic*ratio lamb2 = N*alpha_elastic*(1-ratio) y_bar = np.vstack((y, np.zeros((p, 1)))) # if unweighted # y_bar = np.vstack((y, np.zeros((N, 1)))) mult = np.sqrt(1.0/(1+lamb2)) X_bar = mult * np.vstack((X, np.sqrt(lamb2)*np.eye(p))) # if unweighted # X_bar = mult * np.vstack((X, np.sqrt(lamb2)*Xdot2)) lamb_bar = lamb1 * mult alpha_lasso = lamb_bar/(2*N) clf2 = lm.Lasso(alpha=alpha_lasso, fit_intercept=False) clf2.fit(X_bar, y_bar) beta_hat_2 = clf2.coef_ * mult # transform back print 'Actual param:', beta.T print 'Elastic net est:', beta_hat_1 print 'Lasso est:', beta_hat_2 # print mult # return X assert np.allclose(beta_hat_1, beta_hat_2, atol=1e-3)
def elastic_net_cost_der(c, w, t, q, theta, lamb2): import basis_fnc as basis X = basis.create_gauss_regressor(c, w, t) _, Xdot2 = basis.create_acc_weight_mat(c, w, t) res = q - np.dot(X, theta) M = basis.create_gauss_regressor_der( c, w, t, include_intercept=True, der='c') Mdot2 = basis.create_acc_weight_mat_der( c, w, t, include_intercept=True, der='c') grad_c = -2 * np.diag(np.dot(np.dot(theta, res.T), M)) grad_c += 2*lamb2 * \ np.sum(np.dot(np.dot(Xdot2, theta), theta.T)*Mdot2, axis=0) M = basis.create_gauss_regressor_der( c, w, t, include_intercept=True, der='w') Mdot2 = basis.create_acc_weight_mat_der( c, w, t, include_intercept=True, der='w') grad_w = -2 * np.diag(np.dot(np.dot(theta, res.T), M)) grad_w += 2*lamb2 * \ np.sum(np.dot(np.dot(Xdot2, theta), theta.T)*Mdot2, axis=0) return np.hstack((grad_c[:-1], grad_w[:-1]))
def stack_regressors(c, w, t, n_dofs=7, include_intercept=True): ''' For multi-demo grouping, form X matrix and its second derivative by stacking X's corresponding to different dofs ''' n_tp = len(t) p = len(c) / n_dofs if include_intercept: X = np.zeros((n_dofs * n_tp, p + 1)) Xdot2 = np.zeros((n_dofs * n_tp, p + 1)) else: X = np.zeros((n_dofs * n_tp, p)) Xdot2 = np.zeros((n_dofs * n_tp, p)) for j in range(n_dofs): v = j * n_tp + np.arange(0, n_tp, 1) c_dof = c[j * p:(j + 1) * p] w_dof = w[j * p:(j + 1) * p] X[v, :] = basis.create_gauss_regressor(c_dof, w_dof, t, include_intercept) _, M = basis.create_acc_weight_mat(c_dof, w_dof, t, include_intercept) Xdot2[v, :] = M return X, Xdot2
def train_l2_reg_regr(args, plot_regr=True, ex=18, save=False, p=10, verbose=True): ''' Here we have multiple demonstrations ''' joint_dict, ball_dict = serve.run_serve_demo(args) # train multi task elastic net idx_move = joint_dict['idx_move'] idx = np.arange(idx_move[0, ex], idx_move[1, ex]) q = joint_dict['x'][idx, :] t = joint_dict['t'][idx] t -= t[0] c = np.linspace(t[0], t[-1], p) # centers w = 0.1 * np.ones((p, )) # widths X = basis.create_gauss_regressor(c, w, time=t) C, Xdot2 = basis.create_acc_weight_mat(c, w, t, include_intercept=True) # theta, res = l2_pen_regr(t, X, q, C, 1e-4) if verbose: print 'Res. norm:', np.linalg.norm(q - np.dot(X, theta), 'fro') print 'Acc. norm:', np.linalg.norm(np.dot(Xdot2, theta), 'fro') print 'No. params:', theta.size
def iter_multi_dof_lasso(t, q, p, measure_time=False): ''' Multi-Task grouping is performed across degrees of freedom(joints). Iterative MultiTaskElasticNet with nonlinear optimization(BFGS) to update BOTH the RBF parameters as well as the regression parameters. ''' import multi_dof_lasso as lasso # initialize the iteration c = np.linspace(t[0], t[-1], p) + 0.01 * np.random.randn(p) w = 0.1 * np.ones((p, )) + 0.01 * np.random.rand(p) X = basis.create_gauss_regressor(c, w, t) _, Xdot2 = basis.create_acc_weight_mat(c, w, t) iter_max = 3 a = 0.001 # alpha r = 0.99999 # rho N = q.shape[0] lamb1 = 2 * N * a * r lamb2 = N * a * (1 - r) def f_opt(x): c_opt = x[:p] w_opt = x[p:] f = lasso.elastic_net_cost(c_opt, w_opt, t, q, theta, lamb1, lamb2) df = lasso.elastic_net_cost_der(c_opt, w_opt, t, q, theta, lamb2) return f, df xopt = np.hstack((c, w)) theta, residual, _ = lasso.multi_task_weighted_elastic_net( X, q, Xdot2, alpha=a, rho=r, measure_time=time) for i in range(iter_max): theta, c, w, p = lasso.prune_params(theta, c, w) xopt = np.hstack((c, w)) # update RBF weights time_init = time.time() bfgs_options = {'maxiter': 1000} result = opt.minimize(f_opt, xopt, jac=True, method="BFGS", options=bfgs_options) if measure_time: print 'Elapsed BFGS time:', time.time() - time_init xopt = result.x c = xopt[:p] w = xopt[p:] # print c, w X = basis.create_gauss_regressor(c, w, t) _, Xdot2 = basis.create_acc_weight_mat(c, w, t) # perform lasso res_last = residual theta, residual, _ = lasso.multi_task_weighted_elastic_net( X, q, Xdot2, alpha=a, rho=r, measure_time=time) # shrink the regularizers # to scale lasso throughout iterations a /= (np.linalg.norm(res_last, 'fro') / np.linalg.norm(residual, 'fro'))**2 lamb1 = 2 * N * a * r lamb2 = N * a * (1 - r) theta, c, w, p = lasso.prune_params(theta, c, w) X = basis.create_gauss_regressor(c, w, t) Xdot2 = basis.create_gauss_regressor_der(c, w, t) return X, theta, c, w, a, r, Xdot2