Example #1
0
def score_models(X, y):

    #cindex
    pairs_cindex = 0
    reg_cindex = 0
    lr_cindex = 0
    #tau
    pairs_tau = 0
    reg_tau = 0
    lr_tau = 0
    #r2
    pairs_r2 = 0
    reg_r2 = 0
    lr_r2 = 0

    for i in range(10):

        kfold = cross_validation.KFold(len(X), n_folds=5, shuffle=True)

        for train, test in kfold:
            pairs_start, pairs_end = get_pairs(X[train], y[train])
            learner = PPRankRLS(X[train], pairs_start, pairs_end)
            reg = GlobalRankRLS(X[train], y[train])
            lr_estimator = linear_model.LinearRegression(fit_intercept=True)
            lr_estimator.fit(X[train], y[train])

            #Test set predictions
            y_pred = learner.predict(X[test])
            reg_pred = reg.predict(X[test])
            lr_pred = lr_estimator.predict(X[test])

            #cindex
            pairs_cindex += cindex(y_compute[test], y_pred)
            reg_cindex += cindex(y_compute[test], reg_pred)
            lr_cindex += cindex(y_compute[test], lr_pred)
            #tau
            pairs_tau += stats.kendalltau(y_compute[test], y_pred)[0]
            reg_tau += stats.kendalltau(y_compute[test], reg_pred)[0]
            lr_tau += stats.kendalltau(y_compute[test], lr_pred)[0]
            #r2
            pairs_r2 += metrics.r2_score(y_compute[test], y_pred)
            reg_r2 += metrics.r2_score(y_compute[test], reg_pred)
            lr_r2 += metrics.r2_score(y_compute[test], lr_pred)

    print('Cindex score:')
    print("RLS: %.5f" % (pairs_cindex / 50))
    print("Reg: %.5f" % (reg_cindex / 50))
    print("Lr: %.5f" % (lr_cindex / 50))

    print('\nTau score:')
    print("RLS: %.5f" % (pairs_tau / 50))
    print("Reg: %.5f" % (reg_tau / 50))
    print("Lr: %.5f" % (lr_tau / 50))

    print('\nr2 score:')
    print("RLS: %.5f" % (pairs_r2 / 50))
    print("Reg: %.5f" % (reg_r2 / 50))
    print("Lr: %.5f" % (lr_r2 / 50))
Example #2
0
def eval_num_pairs(X, y):

    for i in [24, 16, 12, 8, 6, 4, 3, 2]:

        print("\n FOLDS = %i" % (i))
        print("INSTANCES = %i" % (48 / i))
        print("PAIRS = %i" % (comb(48 / i, 2) * 2))

        #cindex
        pairs_cindex = 0
        lr_cindex = 0
        #tau
        pairs_tau = 0
        lr_tau = 0
        #r2
        pairs_r2 = 0
        lr_r2 = 0

        for j in range(10):
            kfold = cross_validation.KFold(len(X), n_folds=i, shuffle=True)
            for train, test in kfold:
                #train on smaller test set
                pairs_start, pairs_end = get_pairs(X[test], y[test])
                learner = PPRankRLS(X[test], pairs_start, pairs_end)
                lr_estimator = linear_model.LinearRegression(
                    fit_intercept=True)
                lr_estimator.fit(X[test], y[test])

                #predict on larger training set
                y_pred = learner.predict(X[train])
                lr_pred = lr_estimator.predict(X[train])

                #cindex
                pairs_cindex += cindex(y_compute[train], y_pred)
                lr_cindex += cindex(y_compute[train], lr_pred)
                #tau
                pairs_tau += stats.kendalltau(y_compute[train], y_pred)[0]
                lr_tau += stats.kendalltau(y_compute[train], lr_pred)[0]
                #r2
                pairs_r2 += metrics.r2_score(y_compute[train], y_pred)
                lr_r2 += metrics.r2_score(y_compute[train], lr_pred)

        print('Cindex score:')
        print("RLS: %.5f" % (pairs_cindex / (i * 10)))
        print("Lr: %.5f" % (lr_cindex / (i * 10)))

        print('\nTau score:')
        print("RLS: %.5f" % (pairs_tau / (i * 10)))
        print("Lr: %.5f" % (lr_tau / (i * 10)))

        print('\nr2 score:')
        print("RLS: %.5f" % (pairs_r2 / (i * 10)))
        print("Lr: %.5f" % (lr_r2 / (i * 10)))
Example #3
0
def cv_rls(X, y):
    #Trains RLS with default parameters (regparam=1.0, kernel='LinearKernel')
    kfold = cross_validation.KFold(len(X), n_folds=5, shuffle=True)

    avg_cindex = 0
    avg_tau = 0
    for train, test in kfold:

        get_pairs(X[train], y)
        learner = PPRankRLS(X_train, pairs_start, pairs_end)
        #Test set predictions
        y_pred = learner.predict(X[test])
        avg_cindex += cindex(y[test], y_pred)
        avg_tau += stats.kendalltau(y_compute[test], y_pred)[0]

    print("Average cindex score: %f" % (avg_score / 5))
    print("Average tau score: %f" % (avg_tauS / 5))
Example #4
0
import numpy as np
from rankit.build.rank_script.rlscore.learner.rls import LeavePairOutRLS
from rankit.build.rank_script.rlscore.utilities.reader import read_sparse
from rankit.build.rank_script.rlscore.measure import cindex
train_labels = np.loadtxt("./legacy_tests/data/rank_train.labels")
test_labels = np.loadtxt("./legacy_tests/data/rank_test.labels")
train_features = read_sparse("./legacy_tests/data/rank_train.features")
test_features = read_sparse("./legacy_tests/data/rank_test.features")
kwargs = {}
kwargs['measure'] = cindex
kwargs['regparams'] = [2**i for i in range(-10, 11)]
kwargs["Y"] = train_labels
kwargs["X"] = train_features
learner = LeavePairOutRLS(**kwargs)
grid = kwargs['regparams']
perfs = learner.cv_performances
for i in range(len(grid)):
    print("parameter %f cv_performance %f" % (grid[i], perfs[i]))
P = learner.predict(test_features)
test_perf = cindex(test_labels, P)
print("test set performance: %f" % test_perf)
Example #5
0
train_labels = np.loadtxt("./legacy_tests/data/rank_train.labels")
test_labels = np.loadtxt("./legacy_tests/data/rank_test.labels")
train_qids = read_qids("./legacy_tests/data/rank_train.qids")
test_features = read_sparse("./legacy_tests/data/rank_test.features")
train_features = read_sparse("./legacy_tests/data/rank_train.features")
test_qids = read_qids("./legacy_tests/data/rank_test.qids")
kwargs = {}
kwargs['measure']=cindex
kwargs['regparams'] = [2**i for i in range(-10,11)]
kwargs["Y"] = train_labels
kwargs["X"] = train_features
kwargs["qids"] = train_qids
learner = LeaveQueryOutRankRLS(**kwargs)
grid = kwargs['regparams']
perfs = learner.cv_performances
for i in range(len(grid)):
    print("parameter %f cv_performance %f" %(grid[i], perfs[i]))
P = learner.predict(test_features)
from rankit.build.rank_script.rlscore.measure.measure_utilities import UndefinedPerformance
from rankit.build.rank_script.rlscore.measure.measure_utilities import qids_to_splits
test_qids = qids_to_splits(test_qids)
perfs = []
for query in test_qids:
    try:
        perf = cindex(test_labels[query], P[query])
        perfs.append(perf)
    except UndefinedPerformance:
        pass
test_perf = np.mean(perfs)
print("test set performance: %f" %test_perf)