def optimize(self, opt_data): assert opt_data.instances_to_keep is None, 'Not implemented yet!' W_x = array_functions.make_rbf(opt_data.X, self.sigma_x) W = W_x if not self.no_f_x: W_y = array_functions.make_rbf(opt_data.Y, self.sigma_y) W = W_x * W_y n = W.shape[0] selected = array_functions.false(W.shape[0]) splits = [array_functions.true(n)] num_per_split = [opt_data.subset_size] if self.num_class_splits is not None: assert self.num_class_splits == 2 I1 = opt_data.Y <= opt_data.Y.mean() splits = [I1, ~I1] num_per_split = [opt_data.subset_size/2, opt_data.subset_size/2] for split, num in zip(splits, num_per_split): W_split = W[np.ix_(split, split)] split_selections = self.optimize_for_data(W_split, num) split_inds = split.nonzero()[0] selected[split_inds[split_selections]] = True #selected = self.compute_centroids_for_spectral_clustering(W, cluster_inds) self.W = W self.selected = selected if selected.sum() < opt_data.subset_size: #print 'Empty clusters' pass #self.learned_distribution = compute_p(selected, opt_data) self.learned_distribution = selected self.optimization_value = 0
def optimize(self, opt_data): instances_to_keep = getattr(opt_data, 'instances_to_keep', None) if self.no_spectral_kernel: W_x = array_functions.make_graph_distance(opt_data.X) else: W_x = array_functions.make_rbf(opt_data.X, self.sigma_x) W = W_x if not self.no_f_x: W_y = array_functions.make_rbf(opt_data.Y, self.sigma_y) W = W_x * W_y num_clusters = opt_data.subset_size if instances_to_keep is not None: num_clusters += instances_to_keep.sum() self.spectral_cluster, cluster_inds = \ self.cluster_spectral(W, num_clusters, self.spectral_cluster) ''' print [self.sigma_x, self.sigma_y] array_functions.plot_histogram(cluster_inds, 21) from matplotlib import pyplot as plt plt.close() ''' if not self.running_cv: I = cluster_inds _, I2 = \ self.cluster_spectral(W, num_clusters, self.spectral_cluster) print '' if self.cluster_select_singleton: selected = self.compute_centroids_for_spectral_clustering(W, cluster_inds, ) else: selected = self.sample_from_clusters(W, cluster_inds, num_clusters) #If there are instances we have to select if instances_to_keep is not None: for i in range(num_clusters): this_cluster = cluster_inds == i selected_this_cluster = selected & this_cluster to_keep_this_cluster = instances_to_keep & this_cluster has_fixed_instances = to_keep_this_cluster.any() if has_fixed_instances: selected[selected_this_cluster] = False if selected.sum() > opt_data.subset_size: selected[selected.nonzero()[0][opt_data.subset_size:]] = False selected[instances_to_keep] = True self.W = W self.cluster_inds = cluster_inds self.selected = selected if selected.sum() < opt_data.subset_size: print 'Empty clusters' pass #self.learned_distribution = compute_p(selected, opt_data) self.learned_distribution = selected self.optimization_value = 0
def optimize(self, opt_data): #self.sigma_p = 1 #self.sigma_y = 1 #self.C = 1 assert (opt_data.instances_to_keep is None or opt_data.instances_to_keep.sum() == 0), 'Not implemented yet!' W_p = density.compute_kernel(opt_data.X, None, self.sigma_p) W_y = array_functions.make_rbf(opt_data.X, self.sigma_y) n = W_p.shape[0] selected = array_functions.false(n) y_true = self.f_x p_true = self.p_x for i in range(opt_data.subset_size): new_scores = np.zeros(n) new_scores[:] = np.inf for j in range(n): if selected[j]: continue b = array_functions.false(n) b[j] = True new_scores[j] = self.evaluate_selection(W_p, W_y, b | selected, y_true, p_true) best_idx = new_scores.argmin() selected[best_idx] = True self.selected = selected if selected.sum() < opt_data.subset_size: # print 'Empty clusters' pass # self.learned_distribution = compute_p(selected, opt_data) self.learned_distribution = selected self.optimization_value = 0
def train(self, data): assert data.is_regression y_s, y_true = self.get_predictions(data) I = data.is_target & data.is_labeled #y_s = y_s[I] y_s = data.y[data.is_source] y_true = data.true_y[I] x_s = data.x[data.is_source] x_s = array_functions.append_column(x_s, data.y[data.is_source]) x_s = array_functions.standardize(x_s) x_t = data.x[I] x_t = array_functions.append_column(x_t, data.y[I]) x_t = array_functions.standardize(x_t) Wrbf = array_functions.make_rbf(x_t, self.sigma, self.metric, x2=x_s) S = array_functions.make_smoothing_matrix(Wrbf) w = cvx.Variable(x_s.shape[0]) constraints = [w >= 0] reg = cvx.norm(w)**2 loss = cvx.sum_entries( cvx.power( S*cvx.diag(w)*y_s - y_true,2 ) ) obj = cvx.Minimize(loss + self.C*reg) prob = cvx.Problem(obj,constraints) assert prob.is_dcp() try: prob.solve() #g_value = np.reshape(np.asarray(g.value),n_labeled) w_value = w.value except: k = 0 #assert prob.status is None print 'CVX problem: setting g = ' + str(k) print '\tsigma=' + str(self.sigma) print '\tC=' + str(self.C) w_value = k*np.ones(x_s.shape[0]) all_data = data.get_transfer_subset(self.configs.labels_to_keep,include_unlabeled=True) all_data.instance_weights = np.ones(all_data.n) all_data.instance_weights[all_data.is_source] = w.value self.instance_weights = all_data.instance_weights self.target_learner.train_and_test(all_data) self.x = all_data.x[all_data.is_source] self.w = all_data.instance_weights[all_data.is_source]
def train(self, data): assert data.is_regression is_labeled = data.is_labeled y_s = data.y_s[is_labeled] y = data.y[is_labeled] assert not is_labeled.all() labeled_inds = is_labeled.nonzero()[0] n_labeled = len(labeled_inds) g = cvx.Variable(n_labeled) w = cvx.Variable(n_labeled) W_ll = array_functions.make_rbf(data.x[is_labeled, :], self.sigma, self.configs.metric) self.x = data.x[is_labeled, :] self.y = y self.R_ll = W_ll * np.linalg.inv(W_ll + self.C * np.eye(W_ll.shape[0])) R_ul = self.make_R_ul(data.x) err = cvx.diag(self.R_ll * w) * y_s + self.R_ll * g - y err_l2 = cvx.power(err, 2) reg = cvx.norm(self.R_ll * w - 1)**2 loss = cvx.sum_entries(err_l2) + self.C2 * reg constraints = [] if not self.include_scale: constraints.append(w == 1) obj = cvx.Minimize(loss) prob = cvx.Problem(obj, constraints) assert prob.is_dcp() try: prob.solve() g_value = np.reshape(np.asarray(g.value), n_labeled) w_value = np.reshape(np.asarray(w.value), n_labeled) except: k = 0 #assert prob.status is None print 'CVX problem: setting g = ' + str(k) print '\tC=' + str(self.C) print '\tC2=' + str(self.C2) print '\tsigma=' + str(self.sigma) g_value = k * np.ones(n_labeled) w_value = np.ones(n_labeled) self.g = g_value self.w = w_value
def predict(self, data): # d = data_lib.Data(np.expand_dims(data.source_y_pred, 1), data.y) y_pred_source = data.source_y_pred I = np.arange(y_pred_source.size) if self.predict_sample is not None and self.predict_sample < y_pred_source.size: I = np.random.choice(y_pred_source.size, self.predict_sample, replace=False) if self.use_rbf: #L = array_functions.make_laplacian(y_pred_source[I], self.sigma_tr) W_source_pred = array_functions.make_rbf(y_pred_source[I], self.sigma_tr) if self.oracle_guidance is not None: y = data.true_y[I] n_y = y.size num_to_sample = math.ceil(self.oracle_guidance * n_y**2) rand_index1 = np.random.choice(n_y, int(num_to_sample), replace=True) rand_index2 = np.random.choice(n_y, int(num_to_sample), replace=True) if self.oracle_guidance_binary: target_distances = array_functions.make_graph_distance(y) distance_threshold = .2 * (y.max() - y.min()) W_source_pred[rand_index1, rand_index2] = target_distances[ rand_index1, rand_index2] <= distance_threshold W_source_pred[rand_index2, rand_index1] = target_distances[ rand_index2, rand_index1] <= distance_threshold else: y_scaled = array_functions.normalize(y) * ( y_pred_source.max() - y_pred_source.min()) W_oracle_pred = array_functions.make_rbf( y_scaled, self.sigma_tr) W_source_pred[rand_index1, rand_index2] = W_oracle_pred[rand_index1, rand_index2] W_source_pred[rand_index2, rand_index1] = W_oracle_pred[rand_index2, rand_index1] W = array_functions.make_rbf(self.transform.transform(self.x), self.sigma_nw, x2=self.transform.transform( data.x[I, :])).T else: assert self.oracle_guidance is None k_L = int(self.sigma_tr * I.size) #L = array_functions.make_laplacian_kNN(y_pred_source[I], k_L) W_source_pred = array_functions.make_knn(y_pred_source[I], k_L) k_W = int(self.sigma_nw * self.x.shape[0]) W = array_functions.make_knn(self.transform.transform( data.x[I, :]), k_W, x2=self.transform.transform(self.x)) sparsify_prediction_graph = False if self.use_prediction_graph_radius: sparsify_prediction_graph = True W_sparse = array_functions.make_graph_radius( self.transform.transform(data.x[I, :]), radius=self.radius, ) if self.use_prediction_graph_sparsification: sparsify_prediction_graph = True W_sparse = array_functions.make_knn(self.transform.transform( data.x[I, :]), self.k_sparsification, normalize_entries=False) #W_L = array_functions.make_knn(y_pred_source[I], k_L) if sparsify_prediction_graph: W_source_pred = W_source_pred * W_sparse S = array_functions.make_smoothing_matrix(W) timing_test = False C = self.C * self.x.shape[0] / W_source_pred[:].sum() if self.nystrom_percentage > 0 or timing_test: if timing_test: tic() Sy = S.dot(self.y) if C != 0: lamb = 1 / float(C) f = None tic() inv_approx, _ = array_functions.nystrom_woodbury_laplacian( W_source_pred, lamb, self.nystrom_percentage) self.predict_time = toc() #_, f2 = array_functions.nystrom_woodbury_laplacian(W_source_pred, lamb, self.nystrom_percentage, v=Sy) if f is not None: f *= lamb else: inv_approx *= lamb f = inv_approx.dot(Sy) else: f = Sy if timing_test: toc() if self.nystrom_percentage == 0 or self.nystrom_percentage is None or timing_test: if timing_test: tic() L = array_functions.make_laplacian_with_W(W_source_pred, normalized=False) A = np.eye(I.size) + C * L try: tic() f = np.linalg.lstsq(A, S.dot(self.y))[0] self.predict_time = toc() except: print 'GraphTransferNW:predict failed, returning mean' f = self.y.mean() * np.ones(data.true_y.shape) if timing_test: toc() if timing_test: A_inv = np.linalg.inv(A) print 'approx error: ' + str( norm(inv_approx - A_inv) / norm(A_inv)) o = results.Output(data) if self.predict_sample is not None: nw_data = data_lib.Data(data.x[I, :], f) self.nw_learner.train_and_test(nw_data) nw_output = self.nw_learner.predict(data) o.y = nw_output.y o.fu = nw_output.y else: o.y = f o.fu = f return o
def make_R_ul(self, x): W_ul = array_functions.make_rbf(x, self.sigma, self.configs.metric, self.x) R_ul = W_ul.dot(self.R_ll) return R_ul
def train(self, data): y_s = np.squeeze(data.y_s[:, 0]) y_t = np.squeeze(data.y_t[:, 0]) y = data.y if self.constant_b: self.g = (y_t - y_s).mean() return is_labeled = data.is_labeled labeled_inds = is_labeled.nonzero()[0] n_labeled = len(labeled_inds) plot_lasso_path = False if plot_lasso_path: y_tilde = y_s - y #x = data.x[is_labeled, :] x = self.transform.fit_transform(data.x[is_labeled, :]) from sklearn import linear_model import matplotlib.pyplot as plt regs, _, coefs = linear_model.lars_path( x, y_tilde, method='lasso', ) xx = np.sum(np.abs(coefs.T), axis=1) xx /= xx[-1] plt.plot(xx, coefs.T) ymin, ymax = plt.ylim() plt.vlines(xx, ymin, ymax, linestyle='dashed') plt.xlabel('Normalized Norm of Coefficients') plt.ylabel('Coefficients') plt.title('LASSO Path for Boston Housing Data') plt.legend(data.feature_names) plt.axis('tight') xmin, xmax = plt.xlim() plt.xlim(xmin, xmax + .3) plt.show() if self.linear_b: g = cvx.Variable(data.p) b = cvx.Variable(1) x = self.transform.fit_transform(data.x[is_labeled, :]) err = self.C3 * y_t + (1 - self.C3) * (y_s + x * g + b) - y reg = cvx.square(cvx.norm2(g)) else: g = cvx.Variable(n_labeled) if self.use_radius: W = array_functions.make_graph_radius(data.x[is_labeled, :], self.radius, self.configs.metric) else: #W = array_functions.make_graph_adjacent(data.x[is_labeled,:], self.configs.metric) #W = array_functions.make_graph_adjacent(data.x[is_labeled, :], self.configs.metric) W = array_functions.make_rbf(data.x[is_labeled, :], self.sigma, self.configs.metric) W = array_functions.try_toarray(W) W = .5 * (W + W.T) if W.sum() > 0: W = W / W.sum() reg = 0 if W.any(): if self.use_fused_lasso: reg = cvx_functions.create_fused_lasso(W, g) else: L = array_functions.make_laplacian_with_W(W) L += 1e-6 * np.eye(L.shape[0]) reg = cvx.quad_form(g, L) #reg = g.T * L * g err = self.C3 * y_t + (1 - self.C3) * (y_s + g) - y err_l2 = cvx.power(err, 2) loss = cvx.sum_entries(err_l2) if not self.use_l2: reg = cvx.norm(g, 1) #constraints = [g >= -2, g <= 2] #constraints = [g >= -4, g <= 0] #constraints = [g >= 4, g <= 4] if self.linear_b: constraints = [f(g, b, x) for f in self.configs.constraints] else: constraints = [f(g) for f in self.configs.constraints] obj = cvx.Minimize(loss + self.C * reg) #obj = cvx.Minimize(loss + self.C*reg + self.C2*cvx.norm(g)) prob = cvx.Problem(obj, constraints) assert prob.is_dcp() try: prob.solve() if self.linear_b: b_value = b.value g_value = np.reshape(np.asarray(g.value), data.p) else: g_value = np.reshape(np.asarray(g.value), n_labeled) except: k = 0 #assert prob.status is None print 'CVX problem: setting g = ' + str(k) g_value = k * np.ones(n_labeled) if self.linear_b: g_value = k * np.ones(data.p) b_value = 0 print '\tC=' + str(self.C) print '\tC2=' + str(self.C2) print '\tC3=' + str(self.C3) if self.linear_b: self.g = g_value self.b = b_value g_pred = x.dot(g_value) self.g_min = g_pred.min() self.g_max = g_pred.max() return #labeled_train_data = data.get_subset(labeled_inds) training_data = data.get_subset(data.is_train) assert training_data.y.shape == g_value.shape training_data.is_regression = True training_data.y = g_value training_data.true_y = g_value self.g_nw.train_and_test(training_data)
def train(self, data): assert data.is_regression y_s, y_true = self.get_predictions(data) I_target = data.is_target I_target_labeled = data.is_target & data.is_labeled & data.is_train y_s = data.y[I_target_labeled] y_true = data.true_y[I_target_labeled] x = array_functions.standardize(data.x) x_t = x[I_target] x_tl = x[I_target_labeled] C = self.C C2 = self.C2 W_ll = array_functions.make_rbf(x_tl, self.sigma, self.metric) W_ll_reg_inv = np.linalg.inv(W_ll+C2*np.eye(W_ll.shape[0])) W_ul = array_functions.make_rbf(x_t, self.sigma, self.metric, x2=x_tl) R_ll = W_ll.dot(W_ll_reg_inv) R_ul = W_ul.dot(W_ll_reg_inv) assert not array_functions.has_invalid(R_ll) assert not array_functions.has_invalid(R_ul) reg = lambda gh: SMSTransfer.reg(gh, R_ul) #f = lambda gh: SMSTransfer.eval(gh, R_ll, R_ul, y_s, y_true, C, reg) f = SMSTransfer.eval jac = SMSTransfer.gradient g0 = np.zeros((R_ll.shape[0] * 2, 1)) gh_ids = np.zeros(g0.shape) gh_ids[R_ll.shape[0]:] = 1 maxfun = np.inf maxitr = np.inf constraints = [] options = { 'disp': False, 'maxiter': maxitr, 'maxfun': maxfun } method = 'L-BFGS-B' #R_ll = np.eye(R_ll.shape[0]) #R_ul = np.eye(R_ll.shape[0]) #y_s = 1*np.ones(y_s.shape) #y_true = 1*np.ones(y_s.shape) args = (R_ll, R_ul, y_s, y_true, C, reg) results = optimize.minimize( f, g0, method=method, jac=jac, options=options, constraints=constraints, args=args ) check_results = False if check_results: results2 = optimize.minimize( f, g0, method=method, jac=None, options=options, constraints=constraints, args=args ) print self.params scipy_opt_methods.compare_results(results, results2, gh_ids) diff = results.x-results2.x print results.x print results2.x g, h = SMSTransfer.unpack_gh(results.x, R_ll.shape[0]) self.opt_succeeded = results.success if not results.success: print 'SMS Opt failed' data.R_ul = R_ul self.g = g self.h = h #assert results.success pass