Example #1
0
def train_rls():
    #Selects both the gamma parameter for Gaussian kernel, and regparam with loocv
    X_train, Y_train, X_test, Y_test = load_housing()
    regparams = [2.**i for i in range(-15, 16)]
    gammas = regparams
    best_regparam = None
    best_gamma = None
    best_error = float("inf")
    for gamma in gammas:
        #New RLS is initialized for each kernel parameter
        learner = RLS(X_train, Y_train, kernel="GaussianKernel", gamma=gamma)
        for regparam in regparams:
            #RLS is re-trained with the new regparam, this
            #is very fast due to computational short-cut
            learner.solve(regparam)
            #Leave-one-out cross-validation predictions, this is fast due to
            #computational short-cut
            P_loo = learner.leave_one_out()
            e = sqerror(Y_train, P_loo)
            #print "regparam", regparam, "gamma", gamma, "loo-error", e
            if e < best_error:
                best_error = e
                best_regparam = regparam
                best_gamma = gamma
    learner = RLS(X_train, Y_train, regparam = best_regparam, kernel="GaussianKernel", gamma=best_gamma)
    P_test = learner.predict(X_test)
    print("best parameters gamma %f regparam %f" %(best_gamma, best_regparam))
    print("best leave-one-out error %f" %best_error)
    print("test error %f" %sqerror(Y_test, P_test))
Example #2
0
def train_rls():
    X_train, Y_train, foo = read_svmlight("a1a.t")
    X_test, Y_test, foo = read_svmlight("a1a", X_train.shape[1])
    learner = RLS(X_train, Y_train)
    best_regparam = None
    best_accuracy = 0.
    #exponential grid of possible regparam values
    log_regparams = range(-15, 16)
    for log_regparam in log_regparams:
        regparam = 2.**log_regparam
        #RLS is re-trained with the new regparam, this
        #is very fast due to computational short-cut
        learner.solve(regparam)
        #Leave-one-out cross-validation predictions, this is fast due to
        #computational short-cut
        P_loo = learner.leave_one_out()
        acc = accuracy(Y_train, P_loo)
        print("regparam 2**%d, loo-accuracy %f" %(log_regparam, acc))
        if acc > best_accuracy:
            best_accuracy = acc
            best_regparam = regparam
    learner.solve(best_regparam)
    P_test = learner.predict(X_test)
    print("best regparam %f with loo-accuracy %f" %(best_regparam, best_accuracy)) 
    print("test set accuracy %f" %accuracy(Y_test, P_test))
Example #3
0
def train_rls():
    #Select regparam with leave-one-out cross-validation
    X_train, Y_train, X_test, Y_test = load_housing()
    learner = RLS(X_train, Y_train)
    best_regparam = None
    best_error = float("inf")
    #exponential grid of possible regparam values
    log_regparams = range(-15, 16)
    for log_regparam in log_regparams:
        regparam = 2.**log_regparam
        #RLS is re-trained with the new regparam, this
        #is very fast due to computational short-cut
        learner.solve(regparam)
        #Leave-one-out cross-validation predictions, this is fast due to
        #computational short-cut
        P_loo = learner.leave_one_out()
        e = sqerror(Y_train, P_loo)
        print("regparam 2**%d, loo-error %f" %(log_regparam, e))
        if e < best_error:
            best_error = e
            best_regparam = regparam
    learner.solve(best_regparam)
    P_test = learner.predict(X_test)
    print("best regparam %f with loo-error %f" %(best_regparam, best_error)) 
    print("test error %f" %sqerror(Y_test, P_test))
Example #4
0
def train_rls():
    #Select regparam with leave-one-out cross-validation
    X_train, Y_train, X_test, Y_test = load_housing()
    learner = RLS(X_train, Y_train)
    best_regparam = None
    best_error = float("inf")
    #exponential grid of possible regparam values
    log_regparams = range(-15, 16)
    for log_regparam in log_regparams:
        regparam = 2.**log_regparam
        #RLS is re-trained with the new regparam, this
        #is very fast due to computational short-cut
        learner.solve(regparam)
        #Leave-one-out cross-validation predictions, this is fast due to
        #computational short-cut
        P_loo = learner.leave_one_out()
        e = sqerror(Y_train, P_loo)
        print("regparam 2**%d, loo-error %f" % (log_regparam, e))
        if e < best_error:
            best_error = e
            best_regparam = regparam
    learner.solve(best_regparam)
    P_test = learner.predict(X_test)
    print("best regparam %f with loo-error %f" % (best_regparam, best_error))
    print("test error %f" % sqerror(Y_test, P_test))
Example #5
0
def train_rls():
    #Selects both the gamma parameter for Gaussian kernel, and regparam with loocv
    X_train, Y_train, X_test, Y_test = load_housing()
    regparams = [2.**i for i in range(-15, 16)]
    gammas = regparams
    best_regparam = None
    best_gamma = None
    best_error = float("inf")
    for gamma in gammas:
        #New RLS is initialized for each kernel parameter
        learner = RLS(X_train, Y_train, kernel="GaussianKernel", gamma=gamma)
        for regparam in regparams:
            #RLS is re-trained with the new regparam, this
            #is very fast due to computational short-cut
            learner.solve(regparam)
            #Leave-one-out cross-validation predictions, this is fast due to
            #computational short-cut
            P_loo = learner.leave_one_out()
            e = sqerror(Y_train, P_loo)
            #print "regparam", regparam, "gamma", gamma, "loo-error", e
            if e < best_error:
                best_error = e
                best_regparam = regparam
                best_gamma = gamma
    learner = RLS(X_train,
                  Y_train,
                  regparam=best_regparam,
                  kernel="GaussianKernel",
                  gamma=best_gamma)
    P_test = learner.predict(X_test)
    print("best parameters gamma %f regparam %f" % (best_gamma, best_regparam))
    print("best leave-one-out error %f" % best_error)
    print("test error %f" % sqerror(Y_test, P_test))
Example #6
0
def train_rls():
    X_train, Y_train, foo = read_svmlight("a1a.t")
    X_test, Y_test, foo = read_svmlight("a1a", X_train.shape[1])
    lpo_aucs = []
    test_aucs = []
    for i in range(1000):
        X_small = X_train[i * 30:i * 30 + 30]
        Y_small = Y_train[i * 30:i * 30 + 30]
        pairs_start = []
        pairs_end = []
        for i in range(len(Y_small)):
            for j in range(len(Y_small)):
                if Y_small[i] == 1. and Y_small[j] == -1.:
                    pairs_start.append(i)
                    pairs_end.append(j)
        learner = RLS(X_small, Y_small)
        pairs_start = np.array(pairs_start)
        pairs_end = np.array(pairs_end)
        P_start, P_end = learner.leave_pair_out(pairs_start, pairs_end)
        lpo_a = np.mean(P_start > P_end + 0.5 * (P_start == P_end))
        P_test = learner.predict(X_test)
        test_a = auc(Y_test, P_test)
        lpo_aucs.append(lpo_a)
        test_aucs.append(test_a)
    print("mean lpo over auc over 1000 repetitions: %f" % np.mean(lpo_aucs))
    print("mean test auc over 1000 repetitions %f" % np.mean(test_aucs))
Example #7
0
def train_rls():
    X_train, Y_train, foo = read_svmlight("a1a.t")
    X_test, Y_test, foo = read_svmlight("a1a", X_train.shape[1])
    learner = RLS(X_train, Y_train)
    best_regparam = None
    best_accuracy = 0.
    #exponential grid of possible regparam values
    log_regparams = range(-15, 16)
    for log_regparam in log_regparams:
        regparam = 2.**log_regparam
        #RLS is re-trained with the new regparam, this
        #is very fast due to computational short-cut
        learner.solve(regparam)
        #Leave-one-out cross-validation predictions, this is fast due to
        #computational short-cut
        P_loo = learner.leave_one_out()
        acc = accuracy(Y_train, P_loo)
        print("regparam 2**%d, loo-accuracy %f" %(log_regparam, acc))
        if acc > best_accuracy:
            best_accuracy = acc
            best_regparam = regparam
    learner.solve(best_regparam)
    P_test = learner.predict(X_test)
    print("best regparam %f with loo-accuracy %f" %(best_regparam, best_accuracy)) 
    print("test set accuracy %f" %accuracy(Y_test, P_test))
Example #8
0
 def test_holdout(self):
     for X in [self.Xtrain1, self.Xtrain2]:
         for Y in [self.Ytrain1, self.Ytrain2]:
             m = X.shape[0]
             hoindices = [3, 5, 8, 10, 17, 21]
             hocompl = list(set(range(m)) - set(hoindices))
             #Holdout with linear kernel
             rls1 = RLS(X, Y)
             rls2 = RLS(X[hocompl], Y[hocompl])
             P1 = rls1.holdout(hoindices)
             P2 = rls2.predict(X[hoindices])
             assert_allclose(P1, P2)
             #Holdout with bias
             rls1 = RLS(X, Y, bias=3.0)
             rls2 = RLS(X[hocompl], Y[hocompl], bias=3.0)
             P1 = rls1.holdout(hoindices)
             P2 = rls2.predict(X[hoindices])
             assert_allclose(P1, P2)
             #Fast regularization
             for i in range(-15, 15):
                 rls1.solve(2**i)
                 rls2.solve(2**i)
                 P1 = rls1.holdout(hoindices)
                 P2 = rls2.predict(X[hoindices])
                 assert_allclose(P1, P2)
             #Kernel holdout
             rls1 = RLS(X, Y, kernel="GaussianKernel", gamma=0.01)
             rls2 = RLS(X[hocompl],
                        Y[hocompl],
                        kernel="GaussianKernel",
                        gamma=0.01)
             P1 = rls1.holdout(hoindices)
             P2 = rls2.predict(X[hoindices])
             assert_allclose(P1, P2)
             for i in range(-15, 15):
                 rls1.solve(2**i)
                 rls2.solve(2**i)
                 P1 = rls1.holdout(hoindices)
                 P2 = rls2.predict(X[hoindices])
                 assert_allclose(P1, P2)
             #Incorrect indices
             I = [0, 3, 100]
             self.assertRaises(IndexError, rls1.holdout, I)
             I = [-1, 0, 2]
             self.assertRaises(IndexError, rls1.holdout, I)
             I = [1, 1, 2]
             self.assertRaises(IndexError, rls1.holdout, I)
Example #9
0
 def test_holdout(self):
     for X in [self.Xtrain1, self.Xtrain2]:
         for Y in [self.Ytrain1, self.Ytrain2]:
             m = X.shape[0]
             hoindices = [3, 5, 8, 10, 17, 21]
             hocompl = list(set(range(m)) - set(hoindices))
             #Holdout with linear kernel
             rls1 = RLS(X, Y)
             rls2 = RLS(X[hocompl], Y[hocompl])
             P1 = rls1.holdout(hoindices)
             P2 = rls2.predict(X[hoindices])
             assert_allclose(P1, P2)
             #Holdout with bias
             rls1 = RLS(X, Y, bias = 3.0)
             rls2 = RLS(X[hocompl], Y[hocompl], bias = 3.0)
             P1 = rls1.holdout(hoindices)
             P2 = rls2.predict(X[hoindices])
             assert_allclose(P1, P2)
             #Fast regularization
             for i in range(-15, 15):
                 rls1.solve(2**i)
                 rls2.solve(2**i)
                 P1 = rls1.holdout(hoindices)
                 P2 = rls2.predict(X[hoindices])
                 assert_allclose(P1, P2)
             #Kernel holdout
             rls1 = RLS(X, Y, kernel = "GaussianKernel", gamma = 0.01)
             rls2 = RLS(X[hocompl], Y[hocompl], kernel = "GaussianKernel", gamma = 0.01)
             P1 = rls1.holdout(hoindices)
             P2 = rls2.predict(X[hoindices])
             assert_allclose(P1, P2)
             for i in range(-15, 15):
                 rls1.solve(2**i)
                 rls2.solve(2**i)
                 P1 = rls1.holdout(hoindices)
                 P2 = rls2.predict(X[hoindices])
                 assert_allclose(P1, P2)
             #Incorrect indices
             I = [0, 3, 100]
             self.assertRaises(IndexError, rls1.holdout, I)
             I = [-1, 0, 2]
             self.assertRaises(IndexError, rls1.holdout, I)
             I = [1,1,2]
             self.assertRaises(IndexError, rls1.holdout, I)
Example #10
0
 def test_loo(self):
     for X in [self.Xtrain1, self.Xtrain2]:
         for Y in [self.Ytrain1, self.Ytrain2]:
             m = X.shape[0]
             #LOO with linear kernel
             rls1 = RLS(X, Y, regparam=7.0, bias=3.0)
             P1 = rls1.leave_one_out()
             P2 = []
             for i in range(X.shape[0]):
                 X_train = np.delete(X, i, axis=0)
                 Y_train = np.delete(Y, i, axis=0)
                 X_test = X[i]
                 rls2 = RLS(X_train, Y_train, regparam=7.0, bias=3.0)
                 P2.append(rls2.predict(X_test))
             P2 = np.array(P2)
             assert_allclose(P1, P2)
             #Fast regularization
             rls1.solve(1024)
             P1 = rls1.leave_one_out()
             P2 = []
             for i in range(X.shape[0]):
                 X_train = np.delete(X, i, axis=0)
                 Y_train = np.delete(Y, i, axis=0)
                 X_test = X[i]
                 rls2 = RLS(X_train, Y_train, regparam=1024, bias=3.0)
                 P2.append(rls2.predict(X_test))
             P2 = np.array(P2)
             assert_allclose(P1, P2)
             #kernels
             rls1 = RLS(X, Y, kernel="GaussianKernel", gamma=0.01)
             P1 = rls1.leave_one_out()
             P2 = []
             for i in range(X.shape[0]):
                 X_train = np.delete(X, i, axis=0)
                 Y_train = np.delete(Y, i, axis=0)
                 X_test = X[i]
                 rls2 = RLS(X_train,
                            Y_train,
                            kernel="GaussianKernel",
                            gamma=0.01)
                 P2.append(rls2.predict(X_test))
             P2 = np.array(P2)
             assert_allclose(P1, P2)
Example #11
0
def train_rls():
    X_train, Y_train, X_test, Y_test = load_housing()
    learner = RLS(X_train, Y_train, kernel="GaussianKernel", regparam=1, gamma=1)
    #Leave-one-out cross-validation predictions, this is fast due to
    #computational short-cut
    P_loo = learner.leave_one_out()
    #Test set predictions
    P_test = learner.predict(X_test)
    print("leave-one-out error %f" %sqerror(Y_train, P_loo))
    print("test error %f" %sqerror(Y_test, P_test))
    #Sanity check, can we do better than predicting mean of training labels?
    print("mean predictor %f" %sqerror(Y_test, np.ones(Y_test.shape)*np.mean(Y_train)))
Example #12
0
 def test_loo(self):
     for X in [self.Xtrain1, self.Xtrain2]:
         for Y in [self.Ytrain1, self.Ytrain2]:
             m = X.shape[0]
             #LOO with linear kernel
             rls1 = RLS(X, Y, regparam = 7.0, bias=3.0)
             P1 = rls1.leave_one_out()
             P2 = []
             for i in range(X.shape[0]): 
                 X_train = np.delete(X, i, axis=0)
                 Y_train = np.delete(Y, i, axis=0)
                 X_test = X[i]
                 rls2 = RLS(X_train, Y_train, regparam = 7.0, bias = 3.0)
                 P2.append(rls2.predict(X_test))
             P2 = np.array(P2)
             assert_allclose(P1, P2)
             #Fast regularization
             rls1.solve(1024)
             P1 = rls1.leave_one_out()
             P2 = []
             for i in range(X.shape[0]): 
                 X_train = np.delete(X, i, axis=0)
                 Y_train = np.delete(Y, i, axis=0)
                 X_test = X[i]
                 rls2 = RLS(X_train, Y_train, regparam = 1024, bias = 3.0)
                 P2.append(rls2.predict(X_test))
             P2 = np.array(P2)
             assert_allclose(P1, P2)
             #kernels
             rls1 = RLS(X, Y, kernel = "GaussianKernel", gamma = 0.01)
             P1 = rls1.leave_one_out()
             P2 = []
             for i in range(X.shape[0]): 
                 X_train = np.delete(X, i, axis=0)
                 Y_train = np.delete(Y, i, axis=0)
                 X_test = X[i]
                 rls2 = RLS(X_train, Y_train, kernel = "GaussianKernel", gamma = 0.01)
                 P2.append(rls2.predict(X_test))
             P2 = np.array(P2)
             assert_allclose(P1, P2)
Example #13
0
def train_rls():
    X_train, Y_train, X_test, Y_test = load_housing()
    learner = RLS(X_train, Y_train, kernel="GaussianKernel", regparam=0.0003, gamma=0.00003)
    #This is how we make predictions
    P_test = learner.predict(X_test)
    #We can separate the predictor from learner
    predictor = learner.predictor
    #And do the same predictions
    P_test = predictor.predict(X_test)
    #Let's get the coefficients of the predictor
    A = predictor.A
    print("A-coefficients " +str(A))
    print("number of coefficients %d" %len(A))
Example #14
0
def train_rls():
    X_train, Y_train, X_test, Y_test = load_housing()
    learner = RLS(X_train, Y_train, kernel="LinearKernel", bias=1, regparam=1)
    #Leave-one-out cross-validation predictions, this is fast due to
    #computational short-cut
    P_loo = learner.leave_one_out()
    #Test set predictions
    P_test = learner.predict(X_test)
    print("leave-one-out error %f" % sqerror(Y_train, P_loo))
    print("test error %f" % sqerror(Y_test, P_test))
    #Sanity check, can we do better than predicting mean of training labels?
    print("mean predictor %f" %
          sqerror(Y_test,
                  np.ones(Y_test.shape) * np.mean(Y_train)))
Example #15
0
def plot_rls():
    #Select regparam with k-fold cross-validation,
    #where instances related to a single sentence form
    #together a fold
    X_train =  read_sparse("train_2000_x.txt")
    Y_train =  np.loadtxt("train_2000_y.txt")
    X_test =  read_sparse("test_2000_x.txt", X_train.shape[1])
    Y_test =  np.loadtxt("test_2000_y.txt")
    #list of sentence ids
    ids =  np.loadtxt("train_2000_qids.txt")
    #mapped to a list of lists, where each list
    #contains indices for one fold
    folds = map_ids(ids)
    learner = RLS(X_train, Y_train)
    best_regparam = None
    best_error = float("inf")
    #exponential grid of possible regparam values
    log_regparams = range(-15, 16)
    kfold_errors = []
    loo_errors = []
    test_errors = []
    for log_regparam in log_regparams:
        regparam = 2.**log_regparam
        #RLS is re-trained with the new regparam, this
        #is very fast due to computational short-cut
        learner.solve(regparam)
        #K-fold cross-validation
        perfs = []
        for fold in folds:
            #computes holdout predictions, where instances
            #in fold are left out of training set
            P = learner.holdout(fold)
            perfs.append(sqerror(Y_train[fold], P))
        e_kfold = np.mean(perfs)
        kfold_errors.append(e_kfold)
        P_loo = learner.leave_one_out()
        e_loo = sqerror(Y_train, P_loo)
        loo_errors.append(e_loo)
        P_test = learner.predict(X_test)
        e_test = sqerror(Y_test, P_test)
        test_errors.append(e_test)
    plt.semilogy(log_regparams, loo_errors, label = "leave-one-out")
    plt.semilogy(log_regparams, kfold_errors, label = "leave-sentence-out")
    plt.semilogy(log_regparams, test_errors, label = "test error")
    plt.xlabel("$log_2(\lambda)$")
    plt.ylabel("mean squared error")
    plt.legend(loc=3)
    plt.show()
Example #16
0
def plot_rls():
    #Select regparam with k-fold cross-validation,
    #where instances related to a single sentence form
    #together a fold
    X_train = read_sparse("train_2000_x.txt")
    Y_train = np.loadtxt("train_2000_y.txt")
    X_test = read_sparse("test_2000_x.txt", X_train.shape[1])
    Y_test = np.loadtxt("test_2000_y.txt")
    #list of sentence ids
    ids = np.loadtxt("train_2000_qids.txt")
    #mapped to a list of lists, where each list
    #contains indices for one fold
    folds = map_ids(ids)
    learner = RLS(X_train, Y_train)
    best_regparam = None
    best_error = float("inf")
    #exponential grid of possible regparam values
    log_regparams = range(-15, 16)
    kfold_errors = []
    loo_errors = []
    test_errors = []
    for log_regparam in log_regparams:
        regparam = 2.**log_regparam
        #RLS is re-trained with the new regparam, this
        #is very fast due to computational short-cut
        learner.solve(regparam)
        #K-fold cross-validation
        perfs = []
        for fold in folds:
            #computes holdout predictions, where instances
            #in fold are left out of training set
            P = learner.holdout(fold)
            perfs.append(sqerror(Y_train[fold], P))
        e_kfold = np.mean(perfs)
        kfold_errors.append(e_kfold)
        P_loo = learner.leave_one_out()
        e_loo = sqerror(Y_train, P_loo)
        loo_errors.append(e_loo)
        P_test = learner.predict(X_test)
        e_test = sqerror(Y_test, P_test)
        test_errors.append(e_test)
    plt.semilogy(log_regparams, loo_errors, label="leave-one-out")
    plt.semilogy(log_regparams, kfold_errors, label="leave-sentence-out")
    plt.semilogy(log_regparams, test_errors, label="test error")
    plt.xlabel("$log_2(\lambda)$")
    plt.ylabel("mean squared error")
    plt.legend(loc=3)
    plt.show()
Example #17
0
def train_rls():
    X_train, Y_train, X_test, Y_test = load_housing()
    #select randomly 100 basis vectors
    indices = range(X_train.shape[0])
    indices = random.sample(indices, 100)
    basis_vectors = X_train[indices]    
    learner = RLS(X_train, Y_train, basis_vectors = basis_vectors, kernel="GaussianKernel", regparam=0.0003, gamma=0.00003)
    #Leave-one-out cross-validation predictions, this is fast due to
    #computational short-cut
    P_loo = learner.leave_one_out()
    #Test set predictions
    P_test = learner.predict(X_test)
    print("leave-one-out error %f" %sqerror(Y_train, P_loo))
    print("test error %f" %sqerror(Y_test, P_test))
    #Sanity check, can we do better than predicting mean of training labels?
    print("mean predictor %f" %sqerror(Y_test, np.ones(Y_test.shape)*np.mean(Y_train)))
Example #18
0
def train_rls():
    #Trains RLS with default parameters (regparam=1.0, kernel='LinearKernel')
    X_train, Y_train, X_test, Y_test = load_housing()
    learner = RLS(X_train, Y_train)
    #This is how we make predictions
    P_test = learner.predict(X_test)
    #We can separate the predictor from learner
    predictor = learner.predictor
    #And do the same predictions
    P_test = predictor.predict(X_test)
    #Let's get the coefficients of the predictor
    w = predictor.W
    b = predictor.b
    print("number of coefficients %d" %len(w))
    print("w-coefficients " +str(w))
    print("bias term %f" %b)
Example #19
0
def train_rls():
    X_train, Y_train, X_test, Y_test = load_housing()
    #select randomly 100 basis vectors
    indices = range(X_train.shape[0])
    indices = random.sample(indices, 100)
    basis_vectors = X_train[indices]    
    learner = RLS(X_train, Y_train, basis_vectors = basis_vectors, kernel="GaussianKernel", regparam=0.0003, gamma=0.00003)
    #Leave-one-out cross-validation predictions, this is fast due to
    #computational short-cut
    P_loo = learner.leave_one_out()
    #Test set predictions
    P_test = learner.predict(X_test)
    print("leave-one-out error %f" %sqerror(Y_train, P_loo))
    print("test error %f" %sqerror(Y_test, P_test))
    #Sanity check, can we do better than predicting mean of training labels?
    print("mean predictor %f" %sqerror(Y_test, np.ones(Y_test.shape)*np.mean(Y_train)))
Example #20
0
def train_rls():
    X_train, Y_train, foo = read_svmlight("a1a.t")
    X_test, Y_test, foo = read_svmlight("a1a", X_train.shape[1])
    loo_aucs = []
    test_aucs = []
    for i in range(1000):
        X_small = X_train[i * 30:i * 30 + 30]
        Y_small = Y_train[i * 30:i * 30 + 30]
        learner = RLS(X_small, Y_small)
        P_loo = learner.leave_one_out()
        loo_a = auc(Y_small, P_loo)
        P_test = learner.predict(X_test)
        test_a = auc(Y_test, P_test)
        loo_aucs.append(loo_a)
        test_aucs.append(test_a)
    print("mean loo auc over 1000 repetitions %f" % np.mean(loo_aucs))
    print("mean test auc over 1000 repetitions %f" % np.mean(test_aucs))
Example #21
0
def train_rls():
    # Trains RLS with a precomputed kernel matrix
    X_train, Y_train, X_test, Y_test = load_housing()
    # Minor techincal detail: adding 1.0 simulates the effect of adding a
    # constant valued bias feature, as is done by 'LinearKernel' by deafault
    K_train = np.dot(X_train, X_train.T) + 1.0
    K_test = np.dot(X_test, X_train.T) + 1.0
    learner = RLS(K_train, Y_train, kernel="PrecomputedKernel")
    # Leave-one-out cross-validation predictions, this is fast due to
    # computational short-cut
    P_loo = learner.leave_one_out()
    # Test set predictions
    P_test = learner.predict(K_test)
    print("leave-one-out error %f" % sqerror(Y_train, P_loo))
    print("test error %f" % sqerror(Y_test, P_test))
    # Sanity check, can we do better than predicting mean of training labels?
    print("mean predictor %f" % sqerror(Y_test, np.ones(Y_test.shape) * np.mean(Y_train)))
Example #22
0
def train_rls():
    X_train, Y_train, foo = read_svmlight("a1a.t")
    X_test, Y_test, foo = read_svmlight("a1a", X_train.shape[1])
    loo_aucs = []
    test_aucs = []
    for i in range(1000):
        X_small = X_train[i*30: i*30 + 30]
        Y_small = Y_train[i*30: i*30 + 30]
        learner = RLS(X_small, Y_small)
        P_loo = learner.leave_one_out()
        loo_a = auc(Y_small, P_loo)
        P_test = learner.predict(X_test)
        test_a = auc(Y_test, P_test)
        loo_aucs.append(loo_a)
        test_aucs.append(test_a)
    print("mean loo auc over 1000 repetitions %f" %np.mean(loo_aucs))
    print("mean test auc over 1000 repetitions %f" %np.mean(test_aucs))
Example #23
0
def train_rls():
    X_train, Y_train, X_test, Y_test = load_housing()
    learner = RLS(X_train,
                  Y_train,
                  kernel="GaussianKernel",
                  regparam=0.0003,
                  gamma=0.00003)
    #This is how we make predictions
    P_test = learner.predict(X_test)
    #We can separate the predictor from learner
    predictor = learner.predictor
    #And do the same predictions
    P_test = predictor.predict(X_test)
    #Let's get the coefficients of the predictor
    A = predictor.A
    print("A-coefficients " + str(A))
    print("number of coefficients %d" % len(A))
Example #24
0
def train_rls():
    #Trains RLS with a precomputed kernel matrix
    X_train, Y_train, X_test, Y_test = load_housing()
    #Minor techincal detail: adding 1.0 simulates the effect of adding a
    #constant valued bias feature, as is done by 'LinearKernel' by deafault
    K_train = np.dot(X_train, X_train.T) + 1.0
    K_test = np.dot(X_test, X_train.T) + 1.0
    learner = RLS(K_train, Y_train, kernel="PrecomputedKernel")
    #Leave-one-out cross-validation predictions, this is fast due to
    #computational short-cut
    P_loo = learner.leave_one_out()
    #Test set predictions
    P_test = learner.predict(K_test)
    print("leave-one-out error %f" % sqerror(Y_train, P_loo))
    print("test error %f" % sqerror(Y_test, P_test))
    #Sanity check, can we do better than predicting mean of training labels?
    print("mean predictor %f" %
          sqerror(Y_test,
                  np.ones(Y_test.shape) * np.mean(Y_train)))
Example #25
0
def train_rls():
    X_train, Y_train, X_test, Y_test = load_housing()
    kernel = GaussianKernel(X_train, gamma=0.00003)
    K_train = kernel.getKM(X_train)
    K_test = kernel.getKM(X_test)
    learner = RLS(K_train,
                  Y_train,
                  kernel="PrecomputedKernel",
                  regparam=0.0003)
    #Leave-one-out cross-validation predictions, this is fast due to
    #computational short-cut
    P_loo = learner.leave_one_out()
    #Test set predictions
    P_test = learner.predict(K_test)
    print("leave-one-out error %f" % sqerror(Y_train, P_loo))
    print("test error %f" % sqerror(Y_test, P_test))
    #Sanity check, can we do better than predicting mean of training labels?
    print("mean predictor %f" %
          sqerror(Y_test,
                  np.ones(Y_test.shape) * np.mean(Y_train)))
Example #26
0
def train_rls():
    #Select regparam with k-fold cross-validation,
    #where instances related to a single sentence form
    #together a fold
    X_train =  read_sparse("train_2000_x.txt")
    Y_train =  np.loadtxt("train_2000_y.txt")
    X_test =  read_sparse("test_2000_x.txt", X_train.shape[1])
    Y_test =  np.loadtxt("test_2000_y.txt")
    #list of sentence ids
    ids =  np.loadtxt("train_2000_qids.txt")
    #mapped to a list of lists, where each list
    #contains indices for one fold
    folds = map_ids(ids)
    learner = RLS(X_train, Y_train)
    best_regparam = None
    best_error = float("inf")
    #exponential grid of possible regparam values
    log_regparams = range(-15, 16)
    for log_regparam in log_regparams:
        regparam = 2.**log_regparam
        #RLS is re-trained with the new regparam, this
        #is very fast due to computational short-cut
        learner.solve(regparam)
        #K-fold cross-validation
        P = np.zeros(Y_train.shape)
        for fold in folds:
            #computes holdout predictions, where instances
            #in fold are left out of training set
            P[fold] = learner.holdout(fold)
        e = sqerror(Y_train, P)
        print("regparam 2**%d, k-fold error %f" %(log_regparam, e))
        if e < best_error:
            best_error = e
            best_regparam = regparam
    learner.solve(best_regparam)
    P_test = learner.predict(X_test)
    print("best regparam %f k-fold error %f" %(best_regparam, best_error))
    print("test error %f" %sqerror(Y_test, P_test))
Example #27
0
def train_rls():
    #Select regparam with k-fold cross-validation,
    #where instances related to a single sentence form
    #together a fold
    X_train = read_sparse("train_2000_x.txt")
    Y_train = np.loadtxt("train_2000_y.txt")
    X_test = read_sparse("test_2000_x.txt", X_train.shape[1])
    Y_test = np.loadtxt("test_2000_y.txt")
    #list of sentence ids
    ids = np.loadtxt("train_2000_qids.txt")
    #mapped to a list of lists, where each list
    #contains indices for one fold
    folds = map_ids(ids)
    learner = RLS(X_train, Y_train)
    best_regparam = None
    best_error = float("inf")
    #exponential grid of possible regparam values
    log_regparams = range(-15, 16)
    for log_regparam in log_regparams:
        regparam = 2.**log_regparam
        #RLS is re-trained with the new regparam, this
        #is very fast due to computational short-cut
        learner.solve(regparam)
        #K-fold cross-validation
        P = np.zeros(Y_train.shape)
        for fold in folds:
            #computes holdout predictions, where instances
            #in fold are left out of training set
            P[fold] = learner.holdout(fold)
        e = sqerror(Y_train, P)
        print("regparam 2**%d, k-fold error %f" % (log_regparam, e))
        if e < best_error:
            best_error = e
            best_regparam = regparam
    learner.solve(best_regparam)
    P_test = learner.predict(X_test)
    print("best regparam %f k-fold error %f" % (best_regparam, best_error))
    print("test error %f" % sqerror(Y_test, P_test))
class LooRLS(object):
    def __init__(self):
        self.learner = None
        self.y_src = None
        self.measure = None

    def fit(self,
            X_src,
            y_src,
            X_tgt_known,
            y_tgt_known,
            X_tgt_unknown,
            y_tgt_unknown,
            verbose=False):
        # Map labels from set {1,2,3} to one-vs-all encoding

        if np.count_nonzero(y_src) >= len(y_src):
            zerolabels = False
        else:
            zerolabels = True

        y_src = to_one_vs_all(y_src, zerolabels)

        regparams = [2.**i for i in range(-15, 16)]
        if len(np.unique(y_src)) > 2:
            self.measure = ova_accuracy
        else:
            self.measure = accuracy

        self.learner = LeaveOneOutRLS(X_src,
                                      y_src,
                                      regparams=regparams,
                                      measure=self.measure)
        p_tgt = self.learner.predict(X_tgt_known)
        # ova_accuracy computes one-vs-all classification accuracy directly between transformed
        # class label matrix, and a matrix of predictions, where each column corresponds to a class
        self.learner = RLS(X_src, y_src)
        best_regparam = None
        best_accuracy = 0.
        # exponential grid of possible regparam values
        log_regparams = range(-15, 16)
        for log_regparam in log_regparams:
            regparam = 2.**log_regparam
            # RLS is re-trained with the new regparam, this
            # is very fast due to computational short-cut
            self.learner.solve(regparam)
            # Leave-one-out cross-validation predictions, this is fast due to
            # computational short-cut
            P_loo = self.learner.leave_one_out()
            acc = self.measure(y_src, P_loo)
            if verbose == True:
                print("LooRLS regparam 2**%d, loo-accuracy %f" %
                      (log_regparam, acc))
            if acc > best_accuracy:
                best_accuracy = acc
                best_regparam = regparam
        self.learner.solve(best_regparam)
        if verbose == True:
            print("LooRLS best regparam %f with loo-accuracy %f" %
                  (best_regparam, best_accuracy))

    def predict(self, X, y=None):
        ypred = self.learner.predict(X)
        if y is not None:
            if np.count_nonzero(y) >= len(y):
                zerolabels = False
            else:
                zerolabels = True
            y = to_one_vs_all(y, zerolabels)
            return ypred, self.measure(y, ypred)
        return ypred