def train_rls(): X_train, Y_train, foo = read_svmlight("a1a.t") X_test, Y_test, foo = read_svmlight("a1a") #select randomly 500 basis vectors indices = range(X_train.shape[0]) indices = random.sample(indices, 500) basis_vectors = X_train[indices] regparams = [2.**i for i in range(-15, 16)] gammas = regparams best_regparam = None best_gamma = None best_acc = 0. best_learner = None for gamma in gammas: #New RLS is initialized for each kernel parameter learner = LeaveOneOutRLS(X_train, Y_train, basis_vectors=basis_vectors, kernel="GaussianKernel", gamma=gamma, regparams=regparams, measure=accuracy) acc = np.max(learner.cv_performances) if acc > best_acc: best_acc = acc best_regparam = learner.regparam best_gamma = gamma best_learner = learner P_test = best_learner.predict(X_test) print("best parameters gamma %f regparam %f" % (best_gamma, best_regparam)) print("best leave-one-out accuracy %f" % best_acc) print("test set accuracy %f" % accuracy(Y_test, P_test))
def train_rls(): X_train, Y_train, foo = read_svmlight("a1a.t") X_test, Y_test, foo = read_svmlight("a1a", X_train.shape[1]) lpo_aucs = [] test_aucs = [] for i in range(1000): X_small = X_train[i*30: i*30 + 30] Y_small = Y_train[i*30: i*30 + 30] pairs_start = [] pairs_end = [] for i in range(len(Y_small)): for j in range(len(Y_small)): if Y_small[i] == 1. and Y_small[j] == -1.: pairs_start.append(i) pairs_end.append(j) learner = RLS(X_small, Y_small) pairs_start = np.array(pairs_start) pairs_end = np.array(pairs_end) P_start, P_end = learner.leave_pair_out(pairs_start, pairs_end) lpo_a = np.mean(P_start > P_end + 0.5 * (P_start == P_end)) P_test = learner.predict(X_test) test_a = auc(Y_test, P_test) lpo_aucs.append(lpo_a) test_aucs.append(test_a) print("mean lpo over auc over 1000 repetitions: %f" %np.mean(lpo_aucs)) print("mean test auc over 1000 repetitions %f" %np.mean(test_aucs))
def train_rls(): X_train, Y_train, foo = read_svmlight("a1a.t") X_test, Y_test, foo = read_svmlight("a1a", X_train.shape[1]) learner = RLS(X_train, Y_train) best_regparam = None best_accuracy = 0. #exponential grid of possible regparam values log_regparams = range(-15, 16) for log_regparam in log_regparams: regparam = 2.**log_regparam #RLS is re-trained with the new regparam, this #is very fast due to computational short-cut learner.solve(regparam) #Leave-one-out cross-validation predictions, this is fast due to #computational short-cut P_loo = learner.leave_one_out() acc = accuracy(Y_train, P_loo) print("regparam 2**%d, loo-accuracy %f" %(log_regparam, acc)) if acc > best_accuracy: best_accuracy = acc best_regparam = regparam learner.solve(best_regparam) P_test = learner.predict(X_test) print("best regparam %f with loo-accuracy %f" %(best_regparam, best_accuracy)) print("test set accuracy %f" %accuracy(Y_test, P_test))
def train_rls(): X_train, Y_train, foo = read_svmlight("a1a.t") #subsample, leave-pair-out on whole data would take #a lot of time X_train = X_train[:1000] Y_train = Y_train[:1000] X_test, Y_test, foo = read_svmlight("a1a", X_train.shape[1]) regparams = [2.**-5, 1., 2.**5] learner = LeavePairOutRLS(X_train, Y_train, regparams=regparams) print("best regparam %f" % learner.regparam) print("lpo auc " + str(learner.cv_performances)) P_test = learner.predict(X_test) print("test auc %f" % auc(Y_test, P_test))
def train_rls(): X_train, Y_train, foo = read_svmlight("a1a.t") X_test, Y_test, foo = read_svmlight("a1a", X_train.shape[1]) loo_aucs = [] test_aucs = [] for i in range(1000): X_small = X_train[i * 30:i * 30 + 30] Y_small = Y_train[i * 30:i * 30 + 30] learner = RLS(X_small, Y_small) P_loo = learner.leave_one_out() loo_a = auc(Y_small, P_loo) P_test = learner.predict(X_test) test_a = auc(Y_test, P_test) loo_aucs.append(loo_a) test_aucs.append(test_a) print("mean loo auc over 1000 repetitions %f" % np.mean(loo_aucs)) print("mean test auc over 1000 repetitions %f" % np.mean(test_aucs))