def test_holdout(self): for X in [self.Xtrain1, self.Xtrain2]: for Y in [self.Ytrain1, self.Ytrain2]: m = X.shape[0] qids, L = generate_qids(m) qids = np.array(qids) hoindices = np.where(qids == 1)[0] hocompl = list(set(range(m)) - set(hoindices)) #Holdout with linear kernel rls1 = QueryRankRLS(X, Y, qids) rls2 = QueryRankRLS(X[hocompl], Y[hocompl], qids[hocompl]) P1 = rls1.holdout(hoindices) P2 = rls2.predict(X[hoindices]) assert_allclose(P1, P2) #Holdout with bias rls1 = QueryRankRLS(X, Y, qids, bias=3.0) rls2 = QueryRankRLS(X[hocompl], Y[hocompl], qids[hocompl], bias=3.0) P1 = rls1.holdout(hoindices) P2 = rls2.predict(X[hoindices]) assert_allclose(P1, P2) #Fast regularization for i in range(-5, 5): rls1.solve(2**i) rls2.solve(2**i) P1 = rls1.holdout(hoindices) P2 = rls2.predict(X[hoindices]) assert_allclose(P1, P2) #Kernel holdout rls1 = QueryRankRLS(X, Y, qids, kernel="GaussianKernel", gamma=0.01) rls2 = QueryRankRLS(X[hocompl], Y[hocompl], qids[hocompl], kernel="GaussianKernel", gamma=0.01) P1 = rls1.holdout(hoindices) P2 = rls2.predict(X[hoindices]) assert_allclose(P1, P2) for i in range(-15, 15): rls1.solve(2**i) rls2.solve(2**i) P1 = rls1.holdout(hoindices) P2 = rls2.predict(X[hoindices]) assert_allclose(P1, P2) #Incorrect indices I = [0, 3, 100] self.assertRaises(IndexError, rls1.holdout, I) I = [-1, 0, 2] self.assertRaises(IndexError, rls1.holdout, I) I = [1, 1, 2] self.assertRaises(IndexError, rls1.holdout, I) I = [0, 4, 8] self.assertRaises(IndexError, rls1.holdout, I)
def test_holdout(self): for X in [self.Xtrain1, self.Xtrain2]: for Y in [self.Ytrain1, self.Ytrain2]: m = X.shape[0] qids, L = generate_qids(m) qids = np.array(qids) hoindices = np.where(qids == 1)[0] hocompl = list(set(range(m)) - set(hoindices)) #Holdout with linear kernel rls1 = QueryRankRLS(X, Y, qids) rls2 = QueryRankRLS(X[hocompl], Y[hocompl], qids[hocompl]) P1 = rls1.holdout(hoindices) P2 = rls2.predict(X[hoindices]) assert_allclose(P1, P2) #Holdout with bias rls1 = QueryRankRLS(X, Y, qids, bias = 3.0) rls2 = QueryRankRLS(X[hocompl], Y[hocompl], qids[hocompl], bias = 3.0) P1 = rls1.holdout(hoindices) P2 = rls2.predict(X[hoindices]) assert_allclose(P1, P2) #Fast regularization for i in range(-5, 5): rls1.solve(2**i) rls2.solve(2**i) P1 = rls1.holdout(hoindices) P2 = rls2.predict(X[hoindices]) assert_allclose(P1, P2) #Kernel holdout rls1 = QueryRankRLS(X, Y, qids, kernel = "GaussianKernel", gamma = 0.01) rls2 = QueryRankRLS(X[hocompl], Y[hocompl], qids[hocompl], kernel = "GaussianKernel", gamma = 0.01) P1 = rls1.holdout(hoindices) P2 = rls2.predict(X[hoindices]) assert_allclose(P1, P2) for i in range(-15, 15): rls1.solve(2**i) rls2.solve(2**i) P1 = rls1.holdout(hoindices) P2 = rls2.predict(X[hoindices]) assert_allclose(P1, P2) #Incorrect indices I = [0, 3, 100] self.assertRaises(IndexError, rls1.holdout, I) I = [-1, 0, 2] self.assertRaises(IndexError, rls1.holdout, I) I = [1,1,2] self.assertRaises(IndexError, rls1.holdout, I) I = [0,4,8] self.assertRaises(IndexError, rls1.holdout, I)
def train_rls(): #Select regparam with k-fold cross-validation, #where instances related to a single sentence form #together a fold X_train = read_sparse("train_2000_x.txt") Y_train = np.loadtxt("train_2000_y.txt") X_test = read_sparse("test_2000_x.txt", X_train.shape[1]) Y_test = np.loadtxt("test_2000_y.txt") #list of sentence ids qids_train = np.loadtxt("train_2000_qids.txt") qids_test = np.loadtxt("test_2000_qids.txt") learner = QueryRankRLS(X_train, Y_train, qids_train) P_test = learner.predict(X_test) folds = map_ids(qids_train) perfs = [] for fold in folds: if np.var(Y_train[fold]) != 0: P = learner.holdout(fold) c = cindex(Y_train[fold], P) perfs.append(c) perf = np.mean(perfs) print("leave-query-out cross-validation cindex %f" % perf) partition = map_ids(qids_test) test_perfs = [] #compute the ranking accuracy separately for each test query for query in partition: #skip such queries, where all instances have the same #score, since in this case cindex is undefined if np.var(Y_test[query]) != 0: perf = cindex(Y_test[query], P_test[query]) test_perfs.append(perf) test_perf = np.mean(test_perfs) print("test cindex %f" % test_perf)
def train_rls(): #Select regparam with k-fold cross-validation, #where instances related to a single sentence form #together a fold X_train = read_sparse("train_2000_x.txt") Y_train = np.loadtxt("train_2000_y.txt") X_test = read_sparse("test_2000_x.txt", X_train.shape[1]) Y_test = np.loadtxt("test_2000_y.txt") #list of sentence ids qids_train = np.loadtxt("train_2000_qids.txt") qids_test = np.loadtxt("test_2000_qids.txt") learner = QueryRankRLS(X_train, Y_train, qids_train) P_test = learner.predict(X_test) folds = map_ids(qids_train) perfs = [] for fold in folds: if np.var(Y_train[fold]) != 0: P = learner.holdout(fold) c = cindex(Y_train[fold], P) perfs.append(c) perf = np.mean(perfs) print("leave-query-out cross-validation cindex %f" %perf) partition = map_ids(qids_test) test_perfs = [] #compute the ranking accuracy separately for each test query for query in partition: #skip such queries, where all instances have the same #score, since in this case cindex is undefined if np.var(Y_test[query]) != 0: perf = cindex(Y_test[query], P_test[query]) test_perfs.append(perf) test_perf = np.mean(test_perfs) print("test cindex %f" %test_perf)