def test_holdout(self):
     for X in [self.Xtrain1, self.Xtrain2]:
         for Y in [self.Ytrain1, self.Ytrain2]:
             m = X.shape[0]
             qids, L = generate_qids(m)
             qids = np.array(qids)
             hoindices = np.where(qids == 1)[0]
             hocompl = list(set(range(m)) - set(hoindices))
             #Holdout with linear kernel
             rls1 = QueryRankRLS(X, Y, qids)
             rls2 = QueryRankRLS(X[hocompl], Y[hocompl], qids[hocompl])
             P1 = rls1.holdout(hoindices)
             P2 = rls2.predict(X[hoindices])
             assert_allclose(P1, P2)
             #Holdout with bias
             rls1 = QueryRankRLS(X, Y, qids, bias=3.0)
             rls2 = QueryRankRLS(X[hocompl],
                                 Y[hocompl],
                                 qids[hocompl],
                                 bias=3.0)
             P1 = rls1.holdout(hoindices)
             P2 = rls2.predict(X[hoindices])
             assert_allclose(P1, P2)
             #Fast regularization
             for i in range(-5, 5):
                 rls1.solve(2**i)
                 rls2.solve(2**i)
                 P1 = rls1.holdout(hoindices)
                 P2 = rls2.predict(X[hoindices])
                 assert_allclose(P1, P2)
             #Kernel holdout
             rls1 = QueryRankRLS(X,
                                 Y,
                                 qids,
                                 kernel="GaussianKernel",
                                 gamma=0.01)
             rls2 = QueryRankRLS(X[hocompl],
                                 Y[hocompl],
                                 qids[hocompl],
                                 kernel="GaussianKernel",
                                 gamma=0.01)
             P1 = rls1.holdout(hoindices)
             P2 = rls2.predict(X[hoindices])
             assert_allclose(P1, P2)
             for i in range(-15, 15):
                 rls1.solve(2**i)
                 rls2.solve(2**i)
                 P1 = rls1.holdout(hoindices)
                 P2 = rls2.predict(X[hoindices])
                 assert_allclose(P1, P2)
             #Incorrect indices
             I = [0, 3, 100]
             self.assertRaises(IndexError, rls1.holdout, I)
             I = [-1, 0, 2]
             self.assertRaises(IndexError, rls1.holdout, I)
             I = [1, 1, 2]
             self.assertRaises(IndexError, rls1.holdout, I)
             I = [0, 4, 8]
             self.assertRaises(IndexError, rls1.holdout, I)
Exemple #2
0
 def test_holdout(self):
     for X in [self.Xtrain1, self.Xtrain2]:
         for Y in [self.Ytrain1, self.Ytrain2]:
             m = X.shape[0]
             qids, L = generate_qids(m)
             qids = np.array(qids)
             hoindices = np.where(qids == 1)[0]
             hocompl = list(set(range(m)) - set(hoindices))
             #Holdout with linear kernel
             rls1 = QueryRankRLS(X, Y, qids)
             rls2 = QueryRankRLS(X[hocompl], Y[hocompl], qids[hocompl])
             P1 = rls1.holdout(hoindices)
             P2 = rls2.predict(X[hoindices])
             assert_allclose(P1, P2)
             #Holdout with bias
             rls1 = QueryRankRLS(X, Y, qids, bias = 3.0)
             rls2 = QueryRankRLS(X[hocompl], Y[hocompl], qids[hocompl], bias = 3.0)
             P1 = rls1.holdout(hoindices)
             P2 = rls2.predict(X[hoindices])
             assert_allclose(P1, P2)
             #Fast regularization
             for i in range(-5, 5):
                 rls1.solve(2**i)
                 rls2.solve(2**i)
                 P1 = rls1.holdout(hoindices)
                 P2 = rls2.predict(X[hoindices])
                 assert_allclose(P1, P2)
             #Kernel holdout
             rls1 = QueryRankRLS(X, Y, qids, kernel = "GaussianKernel", gamma = 0.01)
             rls2 = QueryRankRLS(X[hocompl], Y[hocompl], qids[hocompl], kernel = "GaussianKernel", gamma = 0.01)
             P1 = rls1.holdout(hoindices)
             P2 = rls2.predict(X[hoindices])
             assert_allclose(P1, P2)
             for i in range(-15, 15):
                 rls1.solve(2**i)
                 rls2.solve(2**i)
                 P1 = rls1.holdout(hoindices)
                 P2 = rls2.predict(X[hoindices])
                 assert_allclose(P1, P2)
             #Incorrect indices
             I = [0, 3, 100]
             self.assertRaises(IndexError, rls1.holdout, I)
             I = [-1, 0, 2]
             self.assertRaises(IndexError, rls1.holdout, I)
             I = [1,1,2]
             self.assertRaises(IndexError, rls1.holdout, I)
             I = [0,4,8]
             self.assertRaises(IndexError, rls1.holdout, I)
Exemple #3
0
def train_rls():
    #Select regparam with k-fold cross-validation,
    #where instances related to a single sentence form
    #together a fold
    X_train = read_sparse("train_2000_x.txt")
    Y_train = np.loadtxt("train_2000_y.txt")
    X_test = read_sparse("test_2000_x.txt", X_train.shape[1])
    Y_test = np.loadtxt("test_2000_y.txt")
    #list of sentence ids
    qids_train = np.loadtxt("train_2000_qids.txt")
    qids_test = np.loadtxt("test_2000_qids.txt")
    learner = QueryRankRLS(X_train, Y_train, qids_train)
    P_test = learner.predict(X_test)
    folds = map_ids(qids_train)
    perfs = []
    for fold in folds:
        if np.var(Y_train[fold]) != 0:
            P = learner.holdout(fold)
            c = cindex(Y_train[fold], P)
            perfs.append(c)
    perf = np.mean(perfs)
    print("leave-query-out cross-validation cindex %f" % perf)
    partition = map_ids(qids_test)
    test_perfs = []
    #compute the ranking accuracy separately for each test query
    for query in partition:
        #skip such queries, where all instances have the same
        #score, since in this case cindex is undefined
        if np.var(Y_test[query]) != 0:
            perf = cindex(Y_test[query], P_test[query])
            test_perfs.append(perf)
    test_perf = np.mean(test_perfs)
    print("test cindex %f" % test_perf)
Exemple #4
0
def train_rls():
    #Select regparam with k-fold cross-validation,
    #where instances related to a single sentence form
    #together a fold
    X_train =  read_sparse("train_2000_x.txt")
    Y_train =  np.loadtxt("train_2000_y.txt")
    X_test =  read_sparse("test_2000_x.txt", X_train.shape[1])
    Y_test =  np.loadtxt("test_2000_y.txt")
    #list of sentence ids
    qids_train =  np.loadtxt("train_2000_qids.txt")
    qids_test = np.loadtxt("test_2000_qids.txt")
    learner = QueryRankRLS(X_train, Y_train, qids_train)
    P_test = learner.predict(X_test)
    folds = map_ids(qids_train)
    perfs = []
    for fold in folds:
        if np.var(Y_train[fold]) != 0:
            P = learner.holdout(fold)
            c = cindex(Y_train[fold], P)
            perfs.append(c)
    perf = np.mean(perfs)
    print("leave-query-out cross-validation cindex %f" %perf)
    partition = map_ids(qids_test)
    test_perfs = []
    #compute the ranking accuracy separately for each test query
    for query in partition:
        #skip such queries, where all instances have the same
        #score, since in this case cindex is undefined
        if np.var(Y_test[query]) != 0:
            perf = cindex(Y_test[query], P_test[query])
            test_perfs.append(perf)
    test_perf = np.mean(test_perfs)
    print("test cindex %f" %test_perf)
Exemple #5
0
    def testLabelRankRLS(self):
        
        print("Testing the cross-validation routines of the QueryRankRLS module.\n")
        
        np.random.seed(100)
        floattype = np.float64
        
        m, n = 100, 400 #data, features
        Xtrain = np.mat(np.random.rand(m, n))
        K = Xtrain * Xtrain.T
        ylen = 1
        Y = np.mat(np.zeros((m, ylen), dtype=floattype))
        Y[:, 0] = np.sum(Xtrain, 1)
        
        
        labelcount = 5
        
        hoindices = range(labelcount)
        hocompl = list(set(range(m)) - set(hoindices))
        
        qidlist = [0 for i in range(100)]
        for h in range(5, 12):
            qidlist[h] = 1
        for h in range(12, 32):
            qidlist[h] = 2
        for h in range(32, 34):
            qidlist[h] = 3
        for h in range(34, 85):
            qidlist[h] = 4
        for h in range(85, 100):
            qidlist[h] = 5
        qidlist_cv = qidlist[5: len(qidlist)]
        
        objcount = max(qidlist) + 1
        P = np.mat(np.zeros((m, objcount), dtype=np.float64))
        for i in range(m):
            qid = qidlist[i]
            P[i, qid] = 1.
        labelcounts = np.sum(P, axis=0)
        P = np.divide(P, np.sqrt(labelcounts))
        D = np.mat(np.ones((1, m), dtype=np.float64))
        L = np.multiply(np.eye(m), D) - P * P.T
        
        Kcv = K[np.ix_(hocompl, hocompl)]
        Lcv = L[np.ix_(hocompl, hocompl)]
        
        Xcv = Xtrain[hocompl]
        Xtest = Xtrain[hoindices]
        Yho = Y[hocompl]
        
        rpool = {}
        rpool["X"] = Xtrain
        rpool["Y"] = Y
        rpool["qids"] = qidlist
        primalrls = QueryRankRLS(**rpool)        
        
        rpool = {}
        rpool["X"] = K
        rpool['kernel'] = 'PrecomputedKernel'
        rpool["Y"] = Y
        rpool["qids"] = qidlist        
        dualrls = QueryRankRLS(**rpool)
        
        rpool = {}
        rpool['X'] = Xcv
        rpool['Y'] = Yho
        rpool['qids'] = qidlist_cv
        primalrls_naive = QueryRankRLS(**rpool)

        rpool = {}
        rpool['X'] = Kcv
        rpool['kernel'] = 'PrecomputedKernel'        
        rpool['Y'] = Yho
        #rpool['X'] = Xcv
        rpool['qids'] = qidlist_cv
        dualrls_naive = QueryRankRLS(**rpool)
        
        testkm = K[np.ix_(hocompl, hoindices)]
        
        loglambdas = range(-5, 5)
        for j in range(0, len(loglambdas)):
            regparam = 2. ** loglambdas[j]
            print
            print("Regparam 2^%1d" % loglambdas[j])
            
            
            print(str(np.squeeze(np.array((testkm.T * la.inv(Lcv * Kcv + regparam * np.eye(Lcv.shape[0])) * Lcv * Yho).T))) + ' Dumb HO')
            
            predhos = []
            primalrls_naive.solve(regparam)
            predho = primalrls_naive.predictor.predict(Xtest)
            print(str(predho.T) + ' Naive HO (primal)')
            predhos.append(predho)
            
            dualrls_naive.solve(regparam)
            predho = dualrls_naive.predictor.predict(testkm.T)
            print(str(predho.T) + ' Naive HO (dual)')
            predhos.append(predho)
            
            primalrls.solve(regparam)
            predho = np.squeeze(primalrls.holdout(hoindices))
            print(str(predho.T) + ' Fast HO (primal)')
            predhos.append(predho)
            
            dualrls.solve(regparam)
            predho = np.squeeze(dualrls.holdout(hoindices))
            print(str(predho.T) + ' Fast HO (dual)')
            predhos.append(predho)
            
            predho0 = predhos.pop(0)
            for predho in predhos:
                self.assertEqual(predho0.shape, predho.shape)
                for row in range(predho.shape[0]):
                    #for col in range(predho.shape[1]):
                    #    self.assertAlmostEqual(predho0[row,col],predho[row,col], places=5)
                        self.assertAlmostEqual(predho0[row],predho[row], places=5)
Exemple #6
0
    def testLabelRankRLS(self):
        
        print("Testing the cross-validation routines of the QueryRankRLS module.\n")
        
        np.random.seed(100)
        floattype = np.float64
        
        m, n = 100, 400 #data, features
        Xtrain = np.mat(np.random.rand(m, n))
        K = Xtrain * Xtrain.T
        ylen = 1
        Y = np.mat(np.zeros((m, ylen), dtype=floattype))
        Y[:, 0] = np.sum(Xtrain, 1)
        
        
        labelcount = 5
        
        hoindices = range(labelcount)
        hocompl = list(set(range(m)) - set(hoindices))
        
        qidlist = [0 for i in range(100)]
        for h in range(5, 12):
            qidlist[h] = 1
        for h in range(12, 32):
            qidlist[h] = 2
        for h in range(32, 34):
            qidlist[h] = 3
        for h in range(34, 85):
            qidlist[h] = 4
        for h in range(85, 100):
            qidlist[h] = 5
        qidlist_cv = qidlist[5: len(qidlist)]
        
        objcount = max(qidlist) + 1
        P = np.mat(np.zeros((m, objcount), dtype=np.float64))
        for i in range(m):
            qid = qidlist[i]
            P[i, qid] = 1.
        labelcounts = np.sum(P, axis=0)
        P = np.divide(P, np.sqrt(labelcounts))
        D = np.mat(np.ones((1, m), dtype=np.float64))
        L = np.multiply(np.eye(m), D) - P * P.T
        
        Kcv = K[np.ix_(hocompl, hocompl)]
        Lcv = L[np.ix_(hocompl, hocompl)]
        
        Xcv = Xtrain[hocompl]
        Xtest = Xtrain[hoindices]
        Yho = Y[hocompl]
        
        rpool = {}
        rpool["X"] = Xtrain
        rpool["Y"] = Y
        rpool["qids"] = qidlist
        primalrls = QueryRankRLS(**rpool)        
        
        rpool = {}
        rpool["X"] = K
        rpool['kernel'] = 'PrecomputedKernel'
        rpool["Y"] = Y
        rpool["qids"] = qidlist        
        dualrls = QueryRankRLS(**rpool)
        
        rpool = {}
        rpool['X'] = Xcv
        rpool['Y'] = Yho
        rpool['qids'] = qidlist_cv
        primalrls_naive = QueryRankRLS(**rpool)

        rpool = {}
        rpool['X'] = Kcv
        rpool['kernel'] = 'PrecomputedKernel'        
        rpool['Y'] = Yho
        #rpool['X'] = Xcv
        rpool['qids'] = qidlist_cv
        dualrls_naive = QueryRankRLS(**rpool)
        
        testkm = K[np.ix_(hocompl, hoindices)]
        
        loglambdas = range(-5, 5)
        for j in range(0, len(loglambdas)):
            regparam = 2. ** loglambdas[j]
            print
            print("Regparam 2^%1d" % loglambdas[j])
            
            
            print(str(np.squeeze(np.array((testkm.T * la.inv(Lcv * Kcv + regparam * np.eye(Lcv.shape[0])) * Lcv * Yho).T))) + ' Dumb HO')
            
            predhos = []
            primalrls_naive.solve(regparam)
            predho = primalrls_naive.predictor.predict(Xtest)
            print(str(predho.T) + ' Naive HO (primal)')
            predhos.append(predho)
            
            dualrls_naive.solve(regparam)
            predho = dualrls_naive.predictor.predict(testkm.T)
            print(str(predho.T) + ' Naive HO (dual)')
            predhos.append(predho)
            
            primalrls.solve(regparam)
            predho = np.squeeze(primalrls.holdout(hoindices))
            print(str(predho.T) + ' Fast HO (primal)')
            predhos.append(predho)
            
            dualrls.solve(regparam)
            predho = np.squeeze(dualrls.holdout(hoindices))
            print(str(predho.T) + ' Fast HO (dual)')
            predhos.append(predho)
            
            predho0 = predhos.pop(0)
            for predho in predhos:
                self.assertEqual(predho0.shape, predho.shape)
                for row in range(predho.shape[0]):
                    #for col in range(predho.shape[1]):
                    #    self.assertAlmostEqual(predho0[row,col],predho[row,col], places=5)
                        self.assertAlmostEqual(predho0[row],predho[row], places=5)