def train_rls(): #Selects both the gamma parameter for Gaussian kernel, and regparam with loocv X_train, Y_train, X_test, Y_test = load_housing() regparams = [2.**i for i in range(-15, 16)] gammas = regparams best_regparam = None best_gamma = None best_error = float("inf") best_learner = None for gamma in gammas: #New RLS is initialized for each kernel parameter learner = LeaveOneOutRLS(X_train, Y_train, kernel="GaussianKernel", gamma=gamma, regparams=regparams) e = np.min(learner.cv_performances) if e < best_error: best_error = e best_regparam = learner.regparam best_gamma = gamma best_learner = learner P_test = best_learner.predict(X_test) print("best parameters gamma %f regparam %f" % (best_gamma, best_regparam)) print("best leave-one-out error %f" % best_error) print("test error %f" % sqerror(Y_test, P_test))
def train_rls(): X_train, Y_train, X_test, Y_test = load_housing() #select randomly 100 basis vectors indices = range(X_train.shape[0]) indices = random.sample(indices, 100) basis_vectors = X_train[indices] kernel = GaussianKernel(basis_vectors, gamma=0.00003) K_train = kernel.getKM(X_train) K_rr = kernel.getKM(basis_vectors) K_test = kernel.getKM(X_test) learner = RLS(K_train, Y_train, basis_vectors=K_rr, kernel="PrecomputedKernel", regparam=0.0003) #Leave-one-out cross-validation predictions, this is fast due to #computational short-cut P_loo = learner.leave_one_out() #Test set predictions P_test = learner.predict(K_test) print("leave-one-out error %f" % sqerror(Y_train, P_loo)) print("test error %f" % sqerror(Y_test, P_test)) #Sanity check, can we do better than predicting mean of training labels? print("mean predictor %f" % sqerror(Y_test, np.ones(Y_test.shape) * np.mean(Y_train)))
def train_rls(): #Select regparam with leave-one-out cross-validation X_train, Y_train, X_test, Y_test = load_housing() learner = RLS(X_train, Y_train) best_regparam = None best_error = float("inf") #exponential grid of possible regparam values log_regparams = range(-15, 16) for log_regparam in log_regparams: regparam = 2.**log_regparam #RLS is re-trained with the new regparam, this #is very fast due to computational short-cut learner.solve(regparam) #Leave-one-out cross-validation predictions, this is fast due to #computational short-cut P_loo = learner.leave_one_out() e = sqerror(Y_train, P_loo) print("regparam 2**%d, loo-error %f" %(log_regparam, e)) if e < best_error: best_error = e best_regparam = regparam learner.solve(best_regparam) P_test = learner.predict(X_test) print("best regparam %f with loo-error %f" %(best_regparam, best_error)) print("test error %f" %sqerror(Y_test, P_test))
def train_rls(): #Selects both the gamma parameter for Gaussian kernel, and regparam with loocv X_train, Y_train, X_test, Y_test = load_housing() regparams = [2.**i for i in range(-15, 16)] gammas = regparams best_regparam = None best_gamma = None best_error = float("inf") for gamma in gammas: #New RLS is initialized for each kernel parameter learner = RLS(X_train, Y_train, kernel="GaussianKernel", gamma=gamma) for regparam in regparams: #RLS is re-trained with the new regparam, this #is very fast due to computational short-cut learner.solve(regparam) #Leave-one-out cross-validation predictions, this is fast due to #computational short-cut P_loo = learner.leave_one_out() e = sqerror(Y_train, P_loo) #print "regparam", regparam, "gamma", gamma, "loo-error", e if e < best_error: best_error = e best_regparam = regparam best_gamma = gamma learner = RLS(X_train, Y_train, regparam=best_regparam, kernel="GaussianKernel", gamma=best_gamma) P_test = learner.predict(X_test) print("best parameters gamma %f regparam %f" % (best_gamma, best_regparam)) print("best leave-one-out error %f" % best_error) print("test error %f" % sqerror(Y_test, P_test))
def train_rls(): #Select regparam with leave-one-out cross-validation X_train, Y_train, X_test, Y_test = load_housing() learner = RLS(X_train, Y_train) best_regparam = None best_error = float("inf") #exponential grid of possible regparam values log_regparams = range(-15, 16) for log_regparam in log_regparams: regparam = 2.**log_regparam #RLS is re-trained with the new regparam, this #is very fast due to computational short-cut learner.solve(regparam) #Leave-one-out cross-validation predictions, this is fast due to #computational short-cut P_loo = learner.leave_one_out() e = sqerror(Y_train, P_loo) print("regparam 2**%d, loo-error %f" % (log_regparam, e)) if e < best_error: best_error = e best_regparam = regparam learner.solve(best_regparam) P_test = learner.predict(X_test) print("best regparam %f with loo-error %f" % (best_regparam, best_error)) print("test error %f" % sqerror(Y_test, P_test))
def train_rls(): #Trains RLS with default parameters (regparam=1.0, kernel='LinearKernel') X_train, Y_train, X_test, Y_test = load_housing() learner = GlobalRankRLS(X_train, Y_train) #Test set predictions P_test = learner.predict(X_test) print("test cindex %f" %cindex(Y_test, P_test))
def train_rls(): #Selects both the gamma parameter for Gaussian kernel, and regparam with loocv X_train, Y_train, X_test, Y_test = load_housing() regparams = [2.**i for i in range(-15, 16)] gammas = regparams best_regparam = None best_gamma = None best_error = float("inf") for gamma in gammas: #New RLS is initialized for each kernel parameter learner = RLS(X_train, Y_train, kernel="GaussianKernel", gamma=gamma) for regparam in regparams: #RLS is re-trained with the new regparam, this #is very fast due to computational short-cut learner.solve(regparam) #Leave-one-out cross-validation predictions, this is fast due to #computational short-cut P_loo = learner.leave_one_out() e = sqerror(Y_train, P_loo) #print "regparam", regparam, "gamma", gamma, "loo-error", e if e < best_error: best_error = e best_regparam = regparam best_gamma = gamma learner = RLS(X_train, Y_train, regparam = best_regparam, kernel="GaussianKernel", gamma=best_gamma) P_test = learner.predict(X_test) print("best parameters gamma %f regparam %f" %(best_gamma, best_regparam)) print("best leave-one-out error %f" %best_error) print("test error %f" %sqerror(Y_test, P_test))
def train_rls(): #Selects both the gamma parameter for Gaussian kernel, and regparam with kfoldcv X_train, Y_train, X_test, Y_test = load_housing() folds = random_folds(len(Y_train), 5, 10) regparams = [2.**i for i in range(-15, 16)] gammas = regparams best_regparam = None best_gamma = None best_perf = 0. best_learner = None for gamma in gammas: #New RLS is initialized for each kernel parameter learner = KfoldRankRLS(X_train, Y_train, kernel="GaussianKernel", folds=folds, gamma=gamma, regparams=regparams, measure=cindex) perf = np.max(learner.cv_performances) if perf > best_perf: best_perf = perf best_regparam = learner.regparam best_gamma = gamma best_learner = learner P_test = best_learner.predict(X_test) print("best parameters gamma %f regparam %f" % (best_gamma, best_regparam)) print("best kfoldcv cindex %f" % best_perf) print("test cindex %f" % cindex(Y_test, P_test))
def train_rls(): X_train, Y_train, X_test, Y_test = load_housing() cb = Callback(X_test, Y_test) learner = GreedyRLS(X_train, Y_train, 13, callbackfun=cb) #Test set predictions P_test = learner.predict(X_test) print("test error %f" % sqerror(Y_test, P_test)) print("Selected features " + str(learner.selected))
def train_rls(): X_train, Y_train, X_test, Y_test = load_housing() #we select 5 features learner = GreedyRLS(X_train, Y_train, 5) #Test set predictions P_test = learner.predict(X_test) print("test error %f" %sqerror(Y_test, P_test)) print("Selected features " +str(learner.selected))
def train_rls(): X_train, Y_train, X_test, Y_test = load_housing() cb = Callback(X_test, Y_test) learner = GreedyRLS(X_train, Y_train, 13, callbackfun = cb) #Test set predictions P_test = learner.predict(X_test) print("test error %f" %sqerror(Y_test, P_test)) print("Selected features " +str(learner.selected))
def train_rls(): #Trains RLS with automatically selected regularization parameter X_train, Y_train, X_test, Y_test = load_housing() regparams = [2.**i for i in range(-15, 16)] learner = LeaveOneOutRLS(X_train, Y_train, regparams = regparams) loo_errors = learner.cv_performances P_test = learner.predict(X_test) print("leave-one-out errors " +str(loo_errors)) print("chosen regparam %f" %learner.regparam) print("test error %f" %sqerror(Y_test, P_test))
def train_rls(): #Trains RLS with automatically selected regularization parameter X_train, Y_train, X_test, Y_test = load_housing() regparams = [2.**i for i in range(-15, 16)] learner = LeaveOneOutRLS(X_train, Y_train, regparams = regparams, measure=cindex) loo_errors = learner.cv_performances P_test = learner.predict(X_test) print("leave-one-out cindex " +str(loo_errors)) print("chosen regparam %f" %learner.regparam) print("test cindex %f" %cindex(Y_test, P_test))
def train_rls(): #Trains RankRLS with automatically selected regularization parameter X_train, Y_train, X_test, Y_test = load_housing() regparams = [2.**i for i in range(-10, 10)] learner = LeavePairOutRankRLS(X_train, Y_train, regparams = regparams) loo_errors = learner.cv_performances P_test = learner.predict(X_test) print("leave-pair-out performances " +str(loo_errors)) print("chosen regparam %f" %learner.regparam) print("test set cindex %f" %cindex(Y_test, P_test))
def train_rls(): #Trains RankRLS with automatically selected regularization parameter X_train, Y_train, X_test, Y_test = load_housing() #generate fold partition, arguments: train_size, k, random_seed folds = random_folds(len(Y_train), 5, 10) regparams = [2.**i for i in range(-10, 10)] learner = KfoldRankRLS(X_train, Y_train, folds = folds, regparams = regparams, measure=cindex) kfold_perfs = learner.cv_performances P_test = learner.predict(X_test) print("kfold performances " +str(kfold_perfs)) print("chosen regparam %f" %learner.regparam) print("test set cindex %f" %cindex(Y_test, P_test))
def train_rls(): X_train, Y_train, X_test, Y_test = load_housing() learner = RLS(X_train, Y_train, kernel="GaussianKernel", regparam=1, gamma=1) #Leave-one-out cross-validation predictions, this is fast due to #computational short-cut P_loo = learner.leave_one_out() #Test set predictions P_test = learner.predict(X_test) print("leave-one-out error %f" %sqerror(Y_train, P_loo)) print("test error %f" %sqerror(Y_test, P_test)) #Sanity check, can we do better than predicting mean of training labels? print("mean predictor %f" %sqerror(Y_test, np.ones(Y_test.shape)*np.mean(Y_train)))
def train_rls(): X_train, Y_train, X_test, Y_test = load_housing() learner = RLS(X_train, Y_train, kernel="GaussianKernel", regparam=0.0003, gamma=0.00003) #This is how we make predictions P_test = learner.predict(X_test) #We can separate the predictor from learner predictor = learner.predictor #And do the same predictions P_test = predictor.predict(X_test) #Let's get the coefficients of the predictor A = predictor.A print("A-coefficients " +str(A)) print("number of coefficients %d" %len(A))
def train_rls(): X_train, Y_train, X_test, Y_test = load_housing() learner = RLS(X_train, Y_train, kernel="LinearKernel", bias=1, regparam=1) #Leave-one-out cross-validation predictions, this is fast due to #computational short-cut P_loo = learner.leave_one_out() #Test set predictions P_test = learner.predict(X_test) print("leave-one-out error %f" % sqerror(Y_train, P_loo)) print("test error %f" % sqerror(Y_test, P_test)) #Sanity check, can we do better than predicting mean of training labels? print("mean predictor %f" % sqerror(Y_test, np.ones(Y_test.shape) * np.mean(Y_train)))
def train_rls(): #Trains RLS with default parameters (regparam=1.0, kernel='LinearKernel') X_train, Y_train, X_test, Y_test = load_housing() #generate fold partition, arguments: train_size, k, random_seed folds = random_folds(len(Y_train), 5, 10) learner = GlobalRankRLS(X_train, Y_train) perfs = [] for fold in folds: P = learner.holdout(fold) c = cindex(Y_train[fold], P) perfs.append(c) perf = np.mean(perfs) print("5-fold cross-validation cindex %f" % perf) P_test = learner.predict(X_test) print("test cindex %f" % cindex(Y_test, P_test))
def train_rls(): #Trains RLS with default parameters (regparam=1.0, kernel='LinearKernel') X_train, Y_train, X_test, Y_test = load_housing() #generate fold partition, arguments: train_size, k, random_seed folds = random_folds(len(Y_train), 5, 10) learner = GlobalRankRLS(X_train, Y_train) perfs = [] for fold in folds: P = learner.holdout(fold) c = cindex(Y_train[fold], P) perfs.append(c) perf = np.mean(perfs) print("5-fold cross-validation cindex %f" %perf) P_test = learner.predict(X_test) print("test cindex %f" %cindex(Y_test, P_test))
def train_rls(): X_train, Y_train, X_test, Y_test = load_housing() #select randomly 100 basis vectors indices = range(X_train.shape[0]) indices = random.sample(indices, 100) basis_vectors = X_train[indices] learner = RLS(X_train, Y_train, basis_vectors = basis_vectors, kernel="GaussianKernel", regparam=0.0003, gamma=0.00003) #Leave-one-out cross-validation predictions, this is fast due to #computational short-cut P_loo = learner.leave_one_out() #Test set predictions P_test = learner.predict(X_test) print("leave-one-out error %f" %sqerror(Y_train, P_loo)) print("test error %f" %sqerror(Y_test, P_test)) #Sanity check, can we do better than predicting mean of training labels? print("mean predictor %f" %sqerror(Y_test, np.ones(Y_test.shape)*np.mean(Y_train)))
def train_rls(): #Trains RLS with default parameters (regparam=1.0, kernel='LinearKernel') X_train, Y_train, X_test, Y_test = load_housing() learner = GreedyRLS(X_train, Y_train, 5) #This is how we make predictions P_test = learner.predict(X_test) #We can separate the predictor from learner predictor = learner.predictor #And do the same predictions P_test = predictor.predict(X_test) #Let's get the coefficients of the predictor w = predictor.W b = predictor.b print("number of coefficients %d" %len(w)) print("w-coefficients " +str(w)) print("bias term %f" %b)
def train_rls(): #Trains RLS with default parameters (regparam=1.0, kernel='LinearKernel') X_train, Y_train, X_test, Y_test = load_housing() learner = RLS(X_train, Y_train) #This is how we make predictions P_test = learner.predict(X_test) #We can separate the predictor from learner predictor = learner.predictor #And do the same predictions P_test = predictor.predict(X_test) #Let's get the coefficients of the predictor w = predictor.W b = predictor.b print("number of coefficients %d" %len(w)) print("w-coefficients " +str(w)) print("bias term %f" %b)
def train_rls(): #Trains RankRLS with automatically selected regularization parameter X_train, Y_train, X_test, Y_test = load_housing() #generate fold partition, arguments: train_size, k, random_seed folds = random_folds(len(Y_train), 5, 10) regparams = [2.**i for i in range(-10, 10)] learner = KfoldRankRLS(X_train, Y_train, folds=folds, regparams=regparams, measure=cindex) kfold_perfs = learner.cv_performances P_test = learner.predict(X_test) print("kfold performances " + str(kfold_perfs)) print("chosen regparam %f" % learner.regparam) print("test set cindex %f" % cindex(Y_test, P_test))
def train_rls(): X_train, Y_train, X_test, Y_test = load_housing() learner = RLS(X_train, Y_train, kernel="GaussianKernel", regparam=0.0003, gamma=0.00003) #This is how we make predictions P_test = learner.predict(X_test) #We can separate the predictor from learner predictor = learner.predictor #And do the same predictions P_test = predictor.predict(X_test) #Let's get the coefficients of the predictor A = predictor.A print("A-coefficients " + str(A)) print("number of coefficients %d" % len(A))
def train_rls(): X_train, Y_train, X_test, Y_test = load_housing() #select randomly 20 basis vectors indices = range(X_train.shape[0]) indices = random.sample(indices, 20) basis_vectors = X_train[indices] learner = RLS(X_train, Y_train, basis_vectors = basis_vectors, kernel="GaussianKernel", regparam=0.0003, gamma=0.00003) #Test set predictions P_test = learner.predict(X_test) #We can separate the predictor from learner predictor = learner.predictor #And do the same predictions P_test = predictor.predict(X_test) #Let's get the coefficients of the predictor A = predictor.A print("A-coefficients " +str(A)) print("number of coefficients %d" %len(A))
def train_rls(): # Trains RLS with a precomputed kernel matrix X_train, Y_train, X_test, Y_test = load_housing() # Minor techincal detail: adding 1.0 simulates the effect of adding a # constant valued bias feature, as is done by 'LinearKernel' by deafault K_train = np.dot(X_train, X_train.T) + 1.0 K_test = np.dot(X_test, X_train.T) + 1.0 learner = RLS(K_train, Y_train, kernel="PrecomputedKernel") # Leave-one-out cross-validation predictions, this is fast due to # computational short-cut P_loo = learner.leave_one_out() # Test set predictions P_test = learner.predict(K_test) print("leave-one-out error %f" % sqerror(Y_train, P_loo)) print("test error %f" % sqerror(Y_test, P_test)) # Sanity check, can we do better than predicting mean of training labels? print("mean predictor %f" % sqerror(Y_test, np.ones(Y_test.shape) * np.mean(Y_train)))
def train_rls(): #Trains RLS with a precomputed kernel matrix X_train, Y_train, X_test, Y_test = load_housing() #Minor techincal detail: adding 1.0 simulates the effect of adding a #constant valued bias feature, as is done by 'LinearKernel' by deafault K_train = np.dot(X_train, X_train.T) + 1.0 K_test = np.dot(X_test, X_train.T) + 1.0 learner = RLS(K_train, Y_train, kernel="PrecomputedKernel") #Leave-one-out cross-validation predictions, this is fast due to #computational short-cut P_loo = learner.leave_one_out() #Test set predictions P_test = learner.predict(K_test) print("leave-one-out error %f" % sqerror(Y_train, P_loo)) print("test error %f" % sqerror(Y_test, P_test)) #Sanity check, can we do better than predicting mean of training labels? print("mean predictor %f" % sqerror(Y_test, np.ones(Y_test.shape) * np.mean(Y_train)))
def train_rls(): #Trains RLS with default parameters (regparam=1.0, kernel='LinearKernel') X_train, Y_train, X_test, Y_test = load_housing() pairs_start = [] pairs_end = [] #Sample 1000 pairwise preferences from the data trange = range(len(Y_train)) while len(pairs_start) < 1000: ind0 = random.choice(trange) ind1 = random.choice(trange) if Y_train[ind0] > Y_train[ind1]: pairs_start.append(ind0) pairs_end.append(ind1) elif Y_train[ind0] < Y_train[ind1]: pairs_start.append(ind1) pairs_end.append(ind0) learner = PPRankRLS(X_train, pairs_start, pairs_end) #Test set predictions P_test = learner.predict(X_test) print("test cindex %f" % cindex(Y_test, P_test))
def train_rls(): X_train, Y_train, X_test, Y_test = load_housing() kernel = GaussianKernel(X_train, gamma=0.00003) K_train = kernel.getKM(X_train) K_test = kernel.getKM(X_test) learner = RLS(K_train, Y_train, kernel="PrecomputedKernel", regparam=0.0003) #Leave-one-out cross-validation predictions, this is fast due to #computational short-cut P_loo = learner.leave_one_out() #Test set predictions P_test = learner.predict(K_test) print("leave-one-out error %f" % sqerror(Y_train, P_loo)) print("test error %f" % sqerror(Y_test, P_test)) #Sanity check, can we do better than predicting mean of training labels? print("mean predictor %f" % sqerror(Y_test, np.ones(Y_test.shape) * np.mean(Y_train)))
def train_rls(): #Selects both the gamma parameter for Gaussian kernel, and regparam with loocv X_train, Y_train, X_test, Y_test = load_housing() regparams = [2.**i for i in range(-15, 16)] gammas = regparams best_regparam = None best_gamma = None best_error = float("inf") best_learner = None for gamma in gammas: #New RLS is initialized for each kernel parameter learner = LeaveOneOutRLS(X_train, Y_train, kernel="GaussianKernel", gamma=gamma, regparams=regparams) e = np.min(learner.cv_performances) if e < best_error: best_error = e best_regparam = learner.regparam best_gamma = gamma best_learner = learner P_test = best_learner.predict(X_test) print("best parameters gamma %f regparam %f" %(best_gamma, best_regparam)) print("best leave-one-out error %f" %best_error) print("test error %f" %sqerror(Y_test, P_test))
def train_rls(): X_train, Y_train, X_test, Y_test = load_housing() #select randomly 20 basis vectors indices = range(X_train.shape[0]) indices = random.sample(indices, 20) basis_vectors = X_train[indices] learner = RLS(X_train, Y_train, basis_vectors=basis_vectors, kernel="GaussianKernel", regparam=0.0003, gamma=0.00003) #Test set predictions P_test = learner.predict(X_test) #We can separate the predictor from learner predictor = learner.predictor #And do the same predictions P_test = predictor.predict(X_test) #Let's get the coefficients of the predictor A = predictor.A print("A-coefficients " + str(A)) print("number of coefficients %d" % len(A))
def train_rls(): #Selects both the gamma parameter for Gaussian kernel, and regparam with kfoldcv X_train, Y_train, X_test, Y_test = load_housing() folds = random_folds(len(Y_train), 5, 10) regparams = [2.**i for i in range(-15, 16)] gammas = regparams best_regparam = None best_gamma = None best_perf = 0. best_learner = None for gamma in gammas: #New RLS is initialized for each kernel parameter learner = KfoldRankRLS(X_train, Y_train, kernel = "GaussianKernel", folds = folds, gamma = gamma, regparams = regparams, measure=cindex) perf = np.max(learner.cv_performances) if perf > best_perf: best_perf = perf best_regparam = learner.regparam best_gamma = gamma best_learner = learner P_test = best_learner.predict(X_test) print("best parameters gamma %f regparam %f" %(best_gamma, best_regparam)) print("best kfoldcv cindex %f" %best_perf) print("test cindex %f" %cindex(Y_test, P_test))