def predict(self, data): # d = data_lib.Data(np.expand_dims(data.source_y_pred, 1), data.y) y_pred_source = data.source_y_pred I = np.arange(y_pred_source.size) if self.predict_sample is not None and self.predict_sample < y_pred_source.size: I = np.random.choice(y_pred_source.size, self.predict_sample, replace=False) #L = array_functions.make_laplacian(y_pred_source[I], self.sigma_tr) #W = array_functions.make_rbf(self.transform.transform(self.x), self.sigma_nw, x2=self.transform.transform(data.x[I,:])).T k_L = int(self.sigma_tr*I.size) L = array_functions.make_laplacian_kNN(y_pred_source[I], k_L) k_W = int(self.sigma_nw*self.x.shape[0]) W = array_functions.make_knn(self.transform.transform(data.x[I, :]), k_W, x2=self.transform.transform(self.x)) S = array_functions.make_smoothing_matrix(W) A = np.eye(I.size) + self.C*L try: f = np.linalg.lstsq(A, S.dot(self.y))[0] except: print 'GraphTransferNW:predict failed, returning mean' f = self.y.mean() * np.ones(data.true_y.shape) o = results.Output(data) if self.predict_sample is not None: nw_data = data_lib.Data(data.x[I,:], f) self.nw_learner.train_and_test(nw_data) nw_output = self.nw_learner.predict(data) o.y = nw_output.y o.fu = nw_output.y else: o.y = f o.fu = f return o
def create_reg(self, x): L = array_functions.make_laplacian_kNN(x, self.k, self.metric) r = lambda g: ScipyOptNonparametricHypothesisTransfer.reg(g, L) return r
def train(self, data): ''' self.C = 1 self.C2 = 10 self.k = 1 ''' #self.C = 100 #self.configs.use_fused_lasso = False g_max = 2 g0_oracle = np.zeros(data.n) g0_oracle[data.x[:, 0] < .5] = g_max f = self.create_eval(data, self.C) g = self.create_gradient(data, self.C) bounds = list((0, g_max) for i in range(data.n)) #bounds = list((i, i) for i in range(data.n)) #bounds = list((0, 0) for i in range(data.n)) #bounds[0] = (0,None) #self.include_bias = False if self.include_bias: bounds = [(None, None)] + bounds #bounds = [(10, 10)] + bounds else: bounds = [(0, 0)] + bounds n = data.n + 1 g0 = np.zeros(n) g0[1:] = g0_oracle #g0[:] = 1 x = data.x y_s = np.squeeze(data.y_s[:, 0]) y_t = np.squeeze(data.y_t[:, 0]) y = data.y W = -array_functions.make_laplacian_kNN(data.x, self.k, self.configs.metric) #W = array_functions.make_graph_radius(data.x, self.radius, self.configs.metric) #W = array_functions.make_graph_adjacent(data.x, self.configs.metric) W = array_functions.try_toarray(W) if not data.is_regression: y = array_functions.make_label_matrix( data.y)[:, data.classes].toarray() y = y[:, 0] reg = self.create_reg(data.x) reg2 = self.create_reg2(data.x) if self.configs.use_fused_lasso: method = 'SLSQP' max_iter = 10000 maxfun = 10000 fused_lasso = ScipyOptNonparametricHypothesisTransfer.fused_lasso lasso = lambda x: self.C - fused_lasso(x, W) constraints = [{'type': 'ineq', 'fun': lasso}] if self.configs.no_reg: constraints = () args = (x, y, y_s, y_t, 0, reg, self.C2, reg2) else: method = 'L-BFGS-B' max_iter = np.inf maxfun = np.inf constraints = () args = (x, y, y_s, y_t, self.C, reg, self.C2, reg2) if self.g_supervised: x = np.squeeze(data.x) assert x.ndim == 1 min_i = x.argmin() max_i = x.argmax() bounds[min_i] = (1, None) bounds[max_i] = (0, 0) options = { 'disp': False, 'maxiter': max_iter, 'maxfun': maxfun, #'pgtol': 1e-8 } results = optimize.minimize( f, g0, method=method, bounds=bounds, jac=g, options=options, constraints=constraints, args=args, ) compare_results = False if compare_results or not results.success: options['disp'] = False options['approx_grad'] = True results2 = optimize.minimize(f, g0, method=method, bounds=bounds, options=options, constraints=constraints, args=args) if compare_results: err = results.x - results2.x if norm(results2.x[1:]) == 0: print 'All zeros - using absolute error' print 'Abs Error - g: ' + str(norm(err[1:])) else: print 'Rel Error - g: ' + str( norm(err[1:]) / norm(results2.x[1:])) if not self.include_bias: if norm(results2.x[0]) == 0: print 'Abs Error - b: ' + str(norm(err[0])) else: print 'Rel Error - b: ' + str( norm(err[0]) / norm(results2.x[0])) rel_error = norm(results.fun - results2.fun) / norm(results2.fun) print 'Rel Error - f(g*): ' + str(rel_error) if rel_error > .001 and norm(results2.x) > 0: print 'Big error: C=' + str(self.C) + ' C2=' + str(self.C2) if not results.success: results = results2 self.g = results.x[1:] self.bias = results.x[0] if not results.success: self.g[:] = 0 self.bias = 0 #print 'Failed: ' + results.message ''' I = data.arg_sort() x = (data.x[I,:]) g = array_functions.vec_to_2d(results.x[I]) v = np.hstack((x,g)) print v print '' ''' s = 'C=' + str(self.C) + ',C2=' + str(self.C2) + ',k=' + str( self.k) + '-' if not results.success: s += 'Opt failed - ' has_nonneg = (self.g[1:] < -1e-6).any() if has_nonneg: s += 'Negative g - min value: ' + str(self.g.min()) if not results.success or has_nonneg: print s + ': ' + results.message self.g[:] = 0 else: pass g_data = data_lib.Data() g_data.x = data.x g_data.y = results.x[1:] g_data.is_regression = True g_data.set_train() g_data.set_target() g_data.set_true_y() self.g_nw.train_and_test(g_data) if results.success: pass
def create_reg(self,x): L = array_functions.make_laplacian_kNN(x,self.k,self.metric) r = lambda g: ScipyOptNonparametricHypothesisTransfer.reg(g,L) return r
def train(self, data): ''' self.C = 1 self.C2 = 10 self.k = 1 ''' #self.C = 100 #self.configs.use_fused_lasso = False g_max = 2 g0_oracle = np.zeros(data.n) g0_oracle[data.x[:,0] < .5] = g_max f = self.create_eval(data, self.C) g = self.create_gradient(data, self.C) bounds = list((0, g_max) for i in range(data.n)) #bounds = list((i, i) for i in range(data.n)) #bounds = list((0, 0) for i in range(data.n)) #bounds[0] = (0,None) #self.include_bias = False if self.include_bias: bounds = [(None, None)] + bounds #bounds = [(10, 10)] + bounds else: bounds = [(0, 0)] + bounds n = data.n + 1 g0 = np.zeros(n) g0[1:] = g0_oracle #g0[:] = 1 x = data.x y_s = np.squeeze(data.y_s[:,0]) y_t = np.squeeze(data.y_t[:,0]) y = data.y W = -array_functions.make_laplacian_kNN(data.x,self.k,self.configs.metric) #W = array_functions.make_graph_radius(data.x, self.radius, self.configs.metric) #W = array_functions.make_graph_adjacent(data.x, self.configs.metric) W = array_functions.try_toarray(W) if not data.is_regression: y = array_functions.make_label_matrix(data.y)[:,data.classes].toarray() y = y[:,0] reg = self.create_reg(data.x) reg2 = self.create_reg2(data.x) if self.configs.use_fused_lasso: method = 'SLSQP' max_iter = 10000 maxfun = 10000 fused_lasso = ScipyOptNonparametricHypothesisTransfer.fused_lasso lasso = lambda x : self.C - fused_lasso(x,W) constraints = [{ 'type': 'ineq', 'fun': lasso }] if self.configs.no_reg: constraints = () args = (x,y,y_s,y_t,0,reg,self.C2,reg2) else: method = 'L-BFGS-B' max_iter = np.inf maxfun = np.inf constraints = () args = (x,y,y_s,y_t,self.C,reg,self.C2,reg2) if self.g_supervised: x = np.squeeze(data.x) assert x.ndim == 1 min_i = x.argmin() max_i = x.argmax() bounds[min_i] = (1,None) bounds[max_i] = (0,0) options = { 'disp': False, 'maxiter':max_iter, 'maxfun': maxfun, #'pgtol': 1e-8 } results = optimize.minimize( f, g0, method=method, bounds=bounds, jac=g, options=options, constraints=constraints, args=args, ) compare_results = False if compare_results or not results.success: options['disp'] = False options['approx_grad'] = True results2 = optimize.minimize( f, g0, method=method, bounds=bounds, options=options, constraints=constraints, args=args ) if compare_results: err = results.x - results2.x if norm(results2.x[1:]) == 0: print 'All zeros - using absolute error' print 'Abs Error - g: ' + str(norm(err[1:])) else: print 'Rel Error - g: ' + str(norm(err[1:])/norm(results2.x[1:])) if not self.include_bias: if norm(results2.x[0]) == 0: print 'Abs Error - b: ' + str(norm(err[0])) else: print 'Rel Error - b: ' + str(norm(err[0])/norm(results2.x[0])) rel_error = norm(results.fun-results2.fun)/norm(results2.fun) print 'Rel Error - f(g*): ' + str(rel_error) if rel_error > .001 and norm(results2.x) > 0: print 'Big error: C=' + str(self.C) + ' C2=' + str(self.C2) if not results.success: results = results2 self.g = results.x[1:] self.bias = results.x[0] if not results.success: self.g[:] = 0 self.bias = 0 #print 'Failed: ' + results.message ''' I = data.arg_sort() x = (data.x[I,:]) g = array_functions.vec_to_2d(results.x[I]) v = np.hstack((x,g)) print v print '' ''' s = 'C=' + str(self.C) + ',C2=' + str(self.C2) + ',k=' + str(self.k) + '-' if not results.success: s += 'Opt failed - ' has_nonneg = (self.g[1:] < -1e-6).any() if has_nonneg: s += 'Negative g - min value: ' + str(self.g.min()) if not results.success or has_nonneg: print s + ': ' + results.message self.g[:] = 0 else: pass g_data = data_lib.Data() g_data.x = data.x g_data.y = results.x[1:] g_data.is_regression = True g_data.set_train() g_data.set_target() g_data.set_true_y() self.g_nw.train_and_test(g_data) if results.success: pass
def train_g_nonparametric(self, target_data): y_t, y_s, y_true = self.get_predictions(target_data) is_labeled = target_data.is_labeled labeled_inds = is_labeled.nonzero()[0] n_labeled = len(labeled_inds) g = cvx.Variable(n_labeled) ''' L = array_functions.make_laplacian_uniform(target_data.x[labeled_inds,:],self.radius,metric) \ + .0001*np.identity(n_labeled) ''' L = array_functions.make_laplacian_kNN(target_data.x[labeled_inds,:],self.k,self.metric) \ + .0001*np.identity(n_labeled) if self.use_fused_lasso: reg = cvx_functions.create_fused_lasso(-L, g) else: reg = cvx.quad_form(g,L) loss = cvx.sum_entries( cvx.power( cvx.mul_elemwise(y_s[:,0], g) + cvx.mul_elemwise(y_t[:,0], (1-g)) - y_true[:,0], 2 ) ) constraints = [g >= 0, g <= .5] #constraints += [g[0] == .5, g[-1] == 0] obj = cvx.Minimize(loss + self.C*reg) prob = cvx.Problem(obj,constraints) assert prob.is_dcp() try: prob.solve() g_value = np.reshape(np.asarray(g.value),n_labeled) except: k = 0 #assert prob.status is None print 'CVX problem: setting g = ' + str(k) print '\tsigma=' + str(self.sigma) print '\tC=' + str(self.C) print '\tradius=' + str(self.radius) g_value = k*np.ones(n_labeled) if self.should_plot_g and enable_plotting and target_data.x.shape[1] == 1: array_functions.plot_2d(target_data.x[labeled_inds,:],g_value) labeled_train_data = target_data.get_subset(labeled_inds) assert labeled_train_data.y.shape == g_value.shape g_nw = method.NadarayaWatsonMethod(copy.deepcopy(self.configs)) labeled_train_data.is_regression = True labeled_train_data.y = g_value labeled_train_data.true_y = g_value g_nw.configs.loss_function = loss_function.MeanSquaredError() g_nw.tune_loo(labeled_train_data) g_nw.train(labeled_train_data) ''' a =np.hstack((g_value[labeled_train_data.x.argsort(0)], np.sort(labeled_train_data.x,0))) print str(a) print 'g_nw sigma: ' + str(g_nw.sigma) print 'C:' + str(self.C) ''' target_data.is_regression = True self.g = g_nw.predict(target_data).fu self.g[labeled_inds] = g_value assert not np.any(np.isnan(self.g))