def test_holdout(self):
     for X in [self.Xtrain1, self.Xtrain2]:
         for Y in [self.Ytrain1, self.Ytrain2]:
             m = X.shape[0]
             qids, L = generate_qids(m)
             qids = np.array(qids)
             hoindices = np.where(qids == 1)[0]
             hocompl = list(set(range(m)) - set(hoindices))
             #Holdout with linear kernel
             rls1 = QueryRankRLS(X, Y, qids)
             rls2 = QueryRankRLS(X[hocompl], Y[hocompl], qids[hocompl])
             P1 = rls1.holdout(hoindices)
             P2 = rls2.predict(X[hoindices])
             assert_allclose(P1, P2)
             #Holdout with bias
             rls1 = QueryRankRLS(X, Y, qids, bias=3.0)
             rls2 = QueryRankRLS(X[hocompl],
                                 Y[hocompl],
                                 qids[hocompl],
                                 bias=3.0)
             P1 = rls1.holdout(hoindices)
             P2 = rls2.predict(X[hoindices])
             assert_allclose(P1, P2)
             #Fast regularization
             for i in range(-5, 5):
                 rls1.solve(2**i)
                 rls2.solve(2**i)
                 P1 = rls1.holdout(hoindices)
                 P2 = rls2.predict(X[hoindices])
                 assert_allclose(P1, P2)
             #Kernel holdout
             rls1 = QueryRankRLS(X,
                                 Y,
                                 qids,
                                 kernel="GaussianKernel",
                                 gamma=0.01)
             rls2 = QueryRankRLS(X[hocompl],
                                 Y[hocompl],
                                 qids[hocompl],
                                 kernel="GaussianKernel",
                                 gamma=0.01)
             P1 = rls1.holdout(hoindices)
             P2 = rls2.predict(X[hoindices])
             assert_allclose(P1, P2)
             for i in range(-15, 15):
                 rls1.solve(2**i)
                 rls2.solve(2**i)
                 P1 = rls1.holdout(hoindices)
                 P2 = rls2.predict(X[hoindices])
                 assert_allclose(P1, P2)
             #Incorrect indices
             I = [0, 3, 100]
             self.assertRaises(IndexError, rls1.holdout, I)
             I = [-1, 0, 2]
             self.assertRaises(IndexError, rls1.holdout, I)
             I = [1, 1, 2]
             self.assertRaises(IndexError, rls1.holdout, I)
             I = [0, 4, 8]
             self.assertRaises(IndexError, rls1.holdout, I)
Exemple #2
0
 def test_holdout(self):
     for X in [self.Xtrain1, self.Xtrain2]:
         for Y in [self.Ytrain1, self.Ytrain2]:
             m = X.shape[0]
             qids, L = generate_qids(m)
             qids = np.array(qids)
             hoindices = np.where(qids == 1)[0]
             hocompl = list(set(range(m)) - set(hoindices))
             #Holdout with linear kernel
             rls1 = QueryRankRLS(X, Y, qids)
             rls2 = QueryRankRLS(X[hocompl], Y[hocompl], qids[hocompl])
             P1 = rls1.holdout(hoindices)
             P2 = rls2.predict(X[hoindices])
             assert_allclose(P1, P2)
             #Holdout with bias
             rls1 = QueryRankRLS(X, Y, qids, bias = 3.0)
             rls2 = QueryRankRLS(X[hocompl], Y[hocompl], qids[hocompl], bias = 3.0)
             P1 = rls1.holdout(hoindices)
             P2 = rls2.predict(X[hoindices])
             assert_allclose(P1, P2)
             #Fast regularization
             for i in range(-5, 5):
                 rls1.solve(2**i)
                 rls2.solve(2**i)
                 P1 = rls1.holdout(hoindices)
                 P2 = rls2.predict(X[hoindices])
                 assert_allclose(P1, P2)
             #Kernel holdout
             rls1 = QueryRankRLS(X, Y, qids, kernel = "GaussianKernel", gamma = 0.01)
             rls2 = QueryRankRLS(X[hocompl], Y[hocompl], qids[hocompl], kernel = "GaussianKernel", gamma = 0.01)
             P1 = rls1.holdout(hoindices)
             P2 = rls2.predict(X[hoindices])
             assert_allclose(P1, P2)
             for i in range(-15, 15):
                 rls1.solve(2**i)
                 rls2.solve(2**i)
                 P1 = rls1.holdout(hoindices)
                 P2 = rls2.predict(X[hoindices])
                 assert_allclose(P1, P2)
             #Incorrect indices
             I = [0, 3, 100]
             self.assertRaises(IndexError, rls1.holdout, I)
             I = [-1, 0, 2]
             self.assertRaises(IndexError, rls1.holdout, I)
             I = [1,1,2]
             self.assertRaises(IndexError, rls1.holdout, I)
             I = [0,4,8]
             self.assertRaises(IndexError, rls1.holdout, I)
Exemple #3
0
def train_rls():
    #Select regparam with k-fold cross-validation,
    #where instances related to a single sentence form
    #together a fold
    X_train = read_sparse("train_2000_x.txt")
    Y_train = np.loadtxt("train_2000_y.txt")
    X_test = read_sparse("test_2000_x.txt", X_train.shape[1])
    Y_test = np.loadtxt("test_2000_y.txt")
    #list of sentence ids
    qids_train = np.loadtxt("train_2000_qids.txt")
    qids_test = np.loadtxt("test_2000_qids.txt")
    learner = QueryRankRLS(X_train, Y_train, qids_train)
    P_test = learner.predict(X_test)
    folds = map_ids(qids_train)
    perfs = []
    for fold in folds:
        if np.var(Y_train[fold]) != 0:
            P = learner.holdout(fold)
            c = cindex(Y_train[fold], P)
            perfs.append(c)
    perf = np.mean(perfs)
    print("leave-query-out cross-validation cindex %f" % perf)
    partition = map_ids(qids_test)
    test_perfs = []
    #compute the ranking accuracy separately for each test query
    for query in partition:
        #skip such queries, where all instances have the same
        #score, since in this case cindex is undefined
        if np.var(Y_test[query]) != 0:
            perf = cindex(Y_test[query], P_test[query])
            test_perfs.append(perf)
    test_perf = np.mean(test_perfs)
    print("test cindex %f" % test_perf)
Exemple #4
0
def train_rls():
    #Select regparam with k-fold cross-validation,
    #where instances related to a single sentence form
    #together a fold
    X_train =  read_sparse("train_2000_x.txt")
    Y_train =  np.loadtxt("train_2000_y.txt")
    X_test =  read_sparse("test_2000_x.txt", X_train.shape[1])
    Y_test =  np.loadtxt("test_2000_y.txt")
    #list of sentence ids
    qids_train =  np.loadtxt("train_2000_qids.txt")
    qids_test = np.loadtxt("test_2000_qids.txt")
    learner = QueryRankRLS(X_train, Y_train, qids_train)
    P_test = learner.predict(X_test)
    folds = map_ids(qids_train)
    perfs = []
    for fold in folds:
        if np.var(Y_train[fold]) != 0:
            P = learner.holdout(fold)
            c = cindex(Y_train[fold], P)
            perfs.append(c)
    perf = np.mean(perfs)
    print("leave-query-out cross-validation cindex %f" %perf)
    partition = map_ids(qids_test)
    test_perfs = []
    #compute the ranking accuracy separately for each test query
    for query in partition:
        #skip such queries, where all instances have the same
        #score, since in this case cindex is undefined
        if np.var(Y_test[query]) != 0:
            perf = cindex(Y_test[query], P_test[query])
            test_perfs.append(perf)
    test_perf = np.mean(test_perfs)
    print("test cindex %f" %test_perf)