def score_models(X, y): #cindex pairs_cindex = 0 reg_cindex = 0 lr_cindex = 0 #tau pairs_tau = 0 reg_tau = 0 lr_tau = 0 #r2 pairs_r2 = 0 reg_r2 = 0 lr_r2 = 0 for i in range(10): kfold = cross_validation.KFold(len(X), n_folds=5, shuffle=True) for train, test in kfold: pairs_start, pairs_end = get_pairs(X[train], y[train]) learner = PPRankRLS(X[train], pairs_start, pairs_end) reg = GlobalRankRLS(X[train], y[train]) lr_estimator = linear_model.LinearRegression(fit_intercept=True) lr_estimator.fit(X[train], y[train]) #Test set predictions y_pred = learner.predict(X[test]) reg_pred = reg.predict(X[test]) lr_pred = lr_estimator.predict(X[test]) #cindex pairs_cindex += cindex(y_compute[test], y_pred) reg_cindex += cindex(y_compute[test], reg_pred) lr_cindex += cindex(y_compute[test], lr_pred) #tau pairs_tau += stats.kendalltau(y_compute[test], y_pred)[0] reg_tau += stats.kendalltau(y_compute[test], reg_pred)[0] lr_tau += stats.kendalltau(y_compute[test], lr_pred)[0] #r2 pairs_r2 += metrics.r2_score(y_compute[test], y_pred) reg_r2 += metrics.r2_score(y_compute[test], reg_pred) lr_r2 += metrics.r2_score(y_compute[test], lr_pred) print('Cindex score:') print("RLS: %.5f" % (pairs_cindex / 50)) print("Reg: %.5f" % (reg_cindex / 50)) print("Lr: %.5f" % (lr_cindex / 50)) print('\nTau score:') print("RLS: %.5f" % (pairs_tau / 50)) print("Reg: %.5f" % (reg_tau / 50)) print("Lr: %.5f" % (lr_tau / 50)) print('\nr2 score:') print("RLS: %.5f" % (pairs_r2 / 50)) print("Reg: %.5f" % (reg_r2 / 50)) print("Lr: %.5f" % (lr_r2 / 50))
def eval_num_pairs(X, y): for i in [24, 16, 12, 8, 6, 4, 3, 2]: print("\n FOLDS = %i" % (i)) print("INSTANCES = %i" % (48 / i)) print("PAIRS = %i" % (comb(48 / i, 2) * 2)) #cindex pairs_cindex = 0 lr_cindex = 0 #tau pairs_tau = 0 lr_tau = 0 #r2 pairs_r2 = 0 lr_r2 = 0 for j in range(10): kfold = cross_validation.KFold(len(X), n_folds=i, shuffle=True) for train, test in kfold: #train on smaller test set pairs_start, pairs_end = get_pairs(X[test], y[test]) learner = PPRankRLS(X[test], pairs_start, pairs_end) lr_estimator = linear_model.LinearRegression( fit_intercept=True) lr_estimator.fit(X[test], y[test]) #predict on larger training set y_pred = learner.predict(X[train]) lr_pred = lr_estimator.predict(X[train]) #cindex pairs_cindex += cindex(y_compute[train], y_pred) lr_cindex += cindex(y_compute[train], lr_pred) #tau pairs_tau += stats.kendalltau(y_compute[train], y_pred)[0] lr_tau += stats.kendalltau(y_compute[train], lr_pred)[0] #r2 pairs_r2 += metrics.r2_score(y_compute[train], y_pred) lr_r2 += metrics.r2_score(y_compute[train], lr_pred) print('Cindex score:') print("RLS: %.5f" % (pairs_cindex / (i * 10))) print("Lr: %.5f" % (lr_cindex / (i * 10))) print('\nTau score:') print("RLS: %.5f" % (pairs_tau / (i * 10))) print("Lr: %.5f" % (lr_tau / (i * 10))) print('\nr2 score:') print("RLS: %.5f" % (pairs_r2 / (i * 10))) print("Lr: %.5f" % (lr_r2 / (i * 10)))
def cv_rls(X, y): #Trains RLS with default parameters (regparam=1.0, kernel='LinearKernel') kfold = cross_validation.KFold(len(X), n_folds=5, shuffle=True) avg_cindex = 0 avg_tau = 0 for train, test in kfold: get_pairs(X[train], y) learner = PPRankRLS(X_train, pairs_start, pairs_end) #Test set predictions y_pred = learner.predict(X[test]) avg_cindex += cindex(y[test], y_pred) avg_tau += stats.kendalltau(y_compute[test], y_pred)[0] print("Average cindex score: %f" % (avg_score / 5)) print("Average tau score: %f" % (avg_tauS / 5))
import numpy as np from rankit.build.rank_script.rlscore.learner.rls import LeavePairOutRLS from rankit.build.rank_script.rlscore.utilities.reader import read_sparse from rankit.build.rank_script.rlscore.measure import cindex train_labels = np.loadtxt("./legacy_tests/data/rank_train.labels") test_labels = np.loadtxt("./legacy_tests/data/rank_test.labels") train_features = read_sparse("./legacy_tests/data/rank_train.features") test_features = read_sparse("./legacy_tests/data/rank_test.features") kwargs = {} kwargs['measure'] = cindex kwargs['regparams'] = [2**i for i in range(-10, 11)] kwargs["Y"] = train_labels kwargs["X"] = train_features learner = LeavePairOutRLS(**kwargs) grid = kwargs['regparams'] perfs = learner.cv_performances for i in range(len(grid)): print("parameter %f cv_performance %f" % (grid[i], perfs[i])) P = learner.predict(test_features) test_perf = cindex(test_labels, P) print("test set performance: %f" % test_perf)
train_labels = np.loadtxt("./legacy_tests/data/rank_train.labels") test_labels = np.loadtxt("./legacy_tests/data/rank_test.labels") train_qids = read_qids("./legacy_tests/data/rank_train.qids") test_features = read_sparse("./legacy_tests/data/rank_test.features") train_features = read_sparse("./legacy_tests/data/rank_train.features") test_qids = read_qids("./legacy_tests/data/rank_test.qids") kwargs = {} kwargs['measure']=cindex kwargs['regparams'] = [2**i for i in range(-10,11)] kwargs["Y"] = train_labels kwargs["X"] = train_features kwargs["qids"] = train_qids learner = LeaveQueryOutRankRLS(**kwargs) grid = kwargs['regparams'] perfs = learner.cv_performances for i in range(len(grid)): print("parameter %f cv_performance %f" %(grid[i], perfs[i])) P = learner.predict(test_features) from rankit.build.rank_script.rlscore.measure.measure_utilities import UndefinedPerformance from rankit.build.rank_script.rlscore.measure.measure_utilities import qids_to_splits test_qids = qids_to_splits(test_qids) perfs = [] for query in test_qids: try: perf = cindex(test_labels[query], P[query]) perfs.append(perf) except UndefinedPerformance: pass test_perf = np.mean(perfs) print("test set performance: %f" %test_perf)