Exemple #1
0
def train_rls():
    #Select regparam with k-fold cross-validation,
    #where instances related to a single sentence form
    #together a fold
    X_train = read_sparse("train_2000_x.txt")
    Y_train = np.loadtxt("train_2000_y.txt")
    X_test = read_sparse("test_2000_x.txt", X_train.shape[1])
    Y_test = np.loadtxt("test_2000_y.txt")
    #list of sentence ids
    qids_train = np.loadtxt("train_2000_qids.txt")
    qids_test = np.loadtxt("test_2000_qids.txt")
    learner = QueryRankRLS(X_train, Y_train, qids_train)
    P_test = learner.predict(X_test)
    folds = map_ids(qids_train)
    perfs = []
    for fold in folds:
        if np.var(Y_train[fold]) != 0:
            P = learner.holdout(fold)
            c = cindex(Y_train[fold], P)
            perfs.append(c)
    perf = np.mean(perfs)
    print("leave-query-out cross-validation cindex %f" % perf)
    partition = map_ids(qids_test)
    test_perfs = []
    #compute the ranking accuracy separately for each test query
    for query in partition:
        #skip such queries, where all instances have the same
        #score, since in this case cindex is undefined
        if np.var(Y_test[query]) != 0:
            perf = cindex(Y_test[query], P_test[query])
            test_perfs.append(perf)
    test_perf = np.mean(test_perfs)
    print("test cindex %f" % test_perf)
Exemple #2
0
def train_rls():
    #Select regparam with k-fold cross-validation,
    #where instances related to a single sentence form
    #together a fold
    X_train =  read_sparse("train_2000_x.txt")
    Y_train =  np.loadtxt("train_2000_y.txt")
    X_test =  read_sparse("test_2000_x.txt", X_train.shape[1])
    Y_test =  np.loadtxt("test_2000_y.txt")
    #list of sentence ids
    qids_train =  np.loadtxt("train_2000_qids.txt")
    qids_test = np.loadtxt("test_2000_qids.txt")
    learner = QueryRankRLS(X_train, Y_train, qids_train)
    P_test = learner.predict(X_test)
    folds = map_ids(qids_train)
    perfs = []
    for fold in folds:
        if np.var(Y_train[fold]) != 0:
            P = learner.holdout(fold)
            c = cindex(Y_train[fold], P)
            perfs.append(c)
    perf = np.mean(perfs)
    print("leave-query-out cross-validation cindex %f" %perf)
    partition = map_ids(qids_test)
    test_perfs = []
    #compute the ranking accuracy separately for each test query
    for query in partition:
        #skip such queries, where all instances have the same
        #score, since in this case cindex is undefined
        if np.var(Y_test[query]) != 0:
            perf = cindex(Y_test[query], P_test[query])
            test_perfs.append(perf)
    test_perf = np.mean(test_perfs)
    print("test cindex %f" %test_perf)
Exemple #3
0
def main():
    X1_train, X2_train, Y_train, X1_test, X2_test, Y_test = davis_data.setting3_split()
    learner = TwoStepRLS(X1 = X1_train, X2 = X2_train, Y = Y_train, regparam1=1.0, regparam2=1.0)
    log_regparams1 = range(-8, -4)
    log_regparams2 = range(20,25)
    for log_regparam1 in log_regparams1:
        for log_regparam2 in log_regparams2:
            learner.solve(2.**log_regparam1, 2.**log_regparam2)
            P = learner.predict(X1_test, X2_test)
            perf = cindex(Y_test, P)
            print("regparam 2**%d 2**%d, test set cindex %f" %(log_regparam1, log_regparam2, perf))
            P = learner.leave_x2_out()
            perf = cindex(Y_train, P)
            print("regparam 2**%d 2**%d, leave-column-out cindex %f" %(log_regparam1, log_regparam2, perf))
Exemple #4
0
def main():
    X1_train, X2_train, Y_train, X1_test, X2_test, Y_test = davis_data.setting4_split()
    learner = TwoStepRLS(X1 = X1_train, X2 = X2_train, Y = Y_train, regparam1=1.0, regparam2=1.0)
    log_regparams1 = range(-8, -4)
    log_regparams2 = range(20,25)
    for log_regparam1 in log_regparams1:
        for log_regparam2 in log_regparams2:
            learner.solve(2.**log_regparam1, 2.**log_regparam2)
            P = learner.predict(X1_test, X2_test)
            perf = cindex(Y_test, P)
            print("regparam 2**%d 2**%d, test set cindex %f" %(log_regparam1, log_regparam2, perf))
            P = learner.out_of_sample_loo()
            perf = cindex(Y_train, P)
            print("regparam 2**%d 2**%d, out-of-sample loo cindex %f" %(log_regparam1, log_regparam2, perf))
Exemple #5
0
def train_rls():
    #Trains RLS with default parameters (regparam=1.0, kernel='LinearKernel')
    X_train, Y_train, X_test, Y_test = load_housing()
    #generate fold partition, arguments: train_size, k, random_seed
    folds = random_folds(len(Y_train), 5, 10)
    learner = GlobalRankRLS(X_train, Y_train)
    perfs = []
    for fold in folds:
        P = learner.holdout(fold)
        c = cindex(Y_train[fold], P)
        perfs.append(c)
    perf = np.mean(perfs)
    print("5-fold cross-validation cindex %f" % perf)
    P_test = learner.predict(X_test)
    print("test cindex %f" % cindex(Y_test, P_test))
Exemple #6
0
def train_rls():
    #Trains RLS with default parameters (regparam=1.0, kernel='LinearKernel')
    X_train, Y_train, X_test, Y_test = load_housing()
    #generate fold partition, arguments: train_size, k, random_seed
    folds = random_folds(len(Y_train), 5, 10)
    learner = GlobalRankRLS(X_train, Y_train)
    perfs = []
    for fold in folds:
        P = learner.holdout(fold)
        c = cindex(Y_train[fold], P)
        perfs.append(c)
    perf = np.mean(perfs)
    print("5-fold cross-validation cindex %f" %perf)
    P_test = learner.predict(X_test)
    print("test cindex %f" %cindex(Y_test, P_test))
Exemple #7
0
def train_rls():
    #Trains RLS with default parameters (regparam=1.0, kernel='LinearKernel')
    X_train, Y_train, X_test, Y_test = load_housing()
    learner = GlobalRankRLS(X_train, Y_train)
    #Test set predictions
    P_test = learner.predict(X_test)
    print("test cindex %f" %cindex(Y_test, P_test))
Exemple #8
0
def train_rls():
    #Selects both the gamma parameter for Gaussian kernel, and regparam with kfoldcv
    X_train, Y_train, X_test, Y_test = load_housing()
    folds = random_folds(len(Y_train), 5, 10)
    regparams = [2.**i for i in range(-15, 16)]
    gammas = regparams
    best_regparam = None
    best_gamma = None
    best_perf = 0.
    best_learner = None
    for gamma in gammas:
        #New RLS is initialized for each kernel parameter
        learner = KfoldRankRLS(X_train,
                               Y_train,
                               kernel="GaussianKernel",
                               folds=folds,
                               gamma=gamma,
                               regparams=regparams,
                               measure=cindex)
        perf = np.max(learner.cv_performances)
        if perf > best_perf:
            best_perf = perf
            best_regparam = learner.regparam
            best_gamma = gamma
            best_learner = learner
    P_test = best_learner.predict(X_test)
    print("best parameters gamma %f regparam %f" % (best_gamma, best_regparam))
    print("best kfoldcv cindex %f" % best_perf)
    print("test cindex %f" % cindex(Y_test, P_test))
Exemple #9
0
def skcv_rls(coordinates, Xdata, Ydata, number_of_folds, dzradii, regparam, visualization):
    print("Starting skcv-rls analysis...")
    # Calculate sorted pairwise distance matrix and indexes
    performanceTable = np.zeros([len(dzradii),2])
    data_distances, data_distance_indexes = distanceMatrix(coordinates)
    folds = random_folds(len(Ydata), number_of_folds)
    for rind, dzradius in enumerate(dzradii): 
        print("Analysis ongoing, dead zone radius: " + str(dzradius) + "m / " + str(dzradii[len(dzradii)-1]) + "m")
        # Calculate dead zone folds
        dz_folds = dzfolds(dzradius, folds, data_distances, data_distance_indexes)
        learner = RLS(Xdata, Ydata, regparam=regparam)
        P = np.zeros(Ydata.shape)
        for fold_id, dz_fold in enumerate(dz_folds):
            preds = learner.holdout(dz_fold)
            if preds.ndim == 0:
                P[folds[fold_id]] = preds         
            else:
                P[folds[fold_id]] = preds[0:len(folds[fold_id])]
            if visualization: # Check for visualization 
                testcoords = coordinates[folds[fold_id],:]
                dzcoords = coordinates[dz_fold, :]
                visualize_skcv(coordinates, testcoords, dzcoords, dzradius)
        perf = cindex(Ydata, P)
        performanceTable[rind,0] = dzradius
        performanceTable[rind, 1] = perf
        plotRes_skcv(performanceTable, rind, number_of_folds, "rls")
    print("Analysis done.")
    return performanceTable
Exemple #10
0
def skcv_knn(coordinates, Xdata, Ydata, number_of_folds, dzradii, k_neighbors, visualization):
    print("Starting skcv-knn analysis...")
    # Calculate sorted pairwise distance matrix and indexes
    performanceTable = np.zeros([len(dzradii),2])
    data_distances, data_distance_indexes = distanceMatrix(coordinates)
    folds = random_folds(len(Ydata), number_of_folds)
    for rind, dzradius in enumerate(dzradii):
        print("Analysis ongoing, dead zone radius: " + str(dzradius) + "m / " + str(dzradii[len(dzradii)-1]) + "m")
        # Calculate dead zone folds
        dz_folds = dzfolds(dzradius, folds, data_distances, data_distance_indexes)
        # Initialize performance variables   
        P = np.zeros(Ydata.shape)
        for fold_id, dz_fold in enumerate(dz_folds):
            X_tr = np.delete(Xdata, dz_fold, axis=0)
            Y_tr = np.delete(Ydata, dz_fold, axis=0)
            learner = KNeighborsRegressor(n_neighbors=k_neighbors)
            learner.fit(X_tr, Y_tr)
            preds = learner.predict(Xdata[dz_fold])
            if preds.ndim == 0:
                P[folds[fold_id]] = preds         
            else:
                P[folds[fold_id]] = preds[0:len(folds[fold_id])]
            if visualization: # Check for visualization
                testcoords = coordinates[folds[fold_id],:]
                dzcoords = coordinates[dz_fold, :]
                visualize_skcv(coordinates, testcoords, dzcoords, dzradius)                
        perf = cindex(Ydata, P)
        performanceTable[rind,0] = dzradius
        performanceTable[rind, 1] = perf
        plotRes_skcv(performanceTable, rind, number_of_folds, "K-nn")
    print("Analysis done.")
    return performanceTable
Exemple #11
0
def train_rls():
    #Select regparam with k-fold cross-validation,
    #where instances related to a single sentence form
    #together a fold
    X_train =  read_sparse("train_2000_x.txt")
    Y_train =  np.loadtxt("train_2000_y.txt")
    X_test =  read_sparse("test_2000_x.txt", X_train.shape[1])
    Y_test =  np.loadtxt("test_2000_y.txt")
    #list of sentence ids
    qids_train =  np.loadtxt("train_2000_qids.txt")
    qids_test = np.loadtxt("test_2000_qids.txt")
    regparams = [2.**i for i in range(-10, 10)]
    learner = LeaveQueryOutRankRLS(X_train, Y_train, qids_train, regparams = regparams, measure = cindex)
    lqo_perfs = learner.cv_performances
    P_test = learner.predict(X_test)
    print("leave-query-out performances " +str(lqo_perfs))
    print("chosen regparam %f" %learner.regparam)
    partition = map_ids(qids_test)
    #compute the ranking accuracy separately for each test query
    test_perfs = []
    for query in partition:
        #skip such queries, where all instances have the same
        #score, since in this case cindex is undefined
        if np.var(Y_test[query]) != 0:
            perf = cindex(Y_test[query], P_test[query])
            test_perfs.append(perf)
    test_perf = np.mean(test_perfs)
    print("test cindex %f" %test_perf)
Exemple #12
0
def main():
    X1_train, X2_train, Y_train, X1_test, X2_test, Y_test = davis_data.settingC_split()
    m = X2_train.shape[0]
    learner = TwoStepRLS(X1 = X1_train, X2 = X2_train, Y = Y_train, regparam1=1.0, regparam2=1.0)
    log_regparams1 = range(-8, -4)
    log_regparams2 = range(20,25)
    #Create random split to 5 folds for the targets
    folds = random_folds(m, 5, seed=12345)
    for log_regparam1 in log_regparams1:
        for log_regparam2 in log_regparams2:
            learner.solve(2.**log_regparam1, 2.**log_regparam2)
            P = learner.predict(X1_test, X2_test)
            perf = cindex(Y_test, P)
            print("regparam 2**%d 2**%d, test set cindex %f" %(log_regparam1, log_regparam2, perf))
            P = learner.x2_kfold_cv(folds)
            perf = cindex(Y_train, P)
            print("regparam 2**%d 2**%d, K-fold cindex %f" %(log_regparam1, log_regparam2, perf))
Exemple #13
0
def main():
    X1_train, X2_train, Y_train, X1_test, X2_test, Y_test = davis_data.settingB_split()
    learner = KronRLS(X1 = X1_train, X2 = X2_train, Y = Y_train)
    log_regparams = range(15, 35)
    for log_regparam in log_regparams:
        learner.solve(2.**log_regparam)
        P = learner.predict(X1_test, X2_test)
        perf = cindex(Y_test, P)
        print("regparam 2**%d, cindex %f" %(log_regparam, perf))
Exemple #14
0
def main():
    X1_train, X2_train, Y_train, X1_test, X2_test, Y_test = davis_data.setting2_split()
    learner = KronRLS(X1 = X1_train, X2 = X2_train, Y = Y_train)
    log_regparams = range(15, 35)
    for log_regparam in log_regparams:
        learner.solve(2.**log_regparam)
        P = learner.predict(X1_test, X2_test)
        perf = cindex(Y_test, P)
        print("regparam 2**%d, cindex %f" %(log_regparam, perf))
Exemple #15
0
def train_rls():
    #Trains RankRLS with automatically selected regularization parameter
    X_train, Y_train, X_test, Y_test = load_housing()
    regparams = [2.**i for i in range(-10, 10)]
    learner = LeavePairOutRankRLS(X_train, Y_train, regparams = regparams)
    loo_errors = learner.cv_performances
    P_test = learner.predict(X_test)
    print("leave-pair-out performances " +str(loo_errors))
    print("chosen regparam %f" %learner.regparam)
    print("test set cindex %f" %cindex(Y_test, P_test))
Exemple #16
0
def main():
    X1, X2, Y = davis_data.load_davis()
    Y = Y.ravel(order='F')
    learner = KronRLS(X1 = X1, X2 = X2, Y = Y)
    log_regparams = range(15, 35)
    for log_regparam in log_regparams:
        learner.solve(2.**log_regparam)
        P = learner.in_sample_loo()
        perf = cindex(Y, P)
        print("regparam 2**%d, cindex %f" %(log_regparam, perf))
Exemple #17
0
def train_rls():
    #Trains RLS with automatically selected regularization parameter
    X_train, Y_train, X_test, Y_test = load_housing()
    regparams = [2.**i for i in range(-15, 16)]
    learner = LeaveOneOutRLS(X_train, Y_train, regparams = regparams, measure=cindex)
    loo_errors = learner.cv_performances
    P_test = learner.predict(X_test)
    print("leave-one-out cindex " +str(loo_errors))
    print("chosen regparam %f" %learner.regparam)
    print("test cindex %f" %cindex(Y_test, P_test))
Exemple #18
0
def main():
    X1_train, X2_train, Y_train, X1_test, X2_test, Y_test = davis_data.settingD_split()
    n = X1_train.shape[0]
    m = X2_train.shape[0]
    learner = TwoStepRLS(X1 = X1_train, X2 = X2_train, Y = Y_train, regparam1=1.0, regparam2=1.0)
    log_regparams1 = range(-8, -4)
    log_regparams2 = range(20,25)
    #Create random split to 5 folds for both drugs and targets
    drug_folds = random_folds(n, 5, seed=123)
    target_folds = random_folds(m, 5, seed=456)
    for log_regparam1 in log_regparams1:
        for log_regparam2 in log_regparams2:
            learner.solve(2.**log_regparam1, 2.**log_regparam2)
            P = learner.predict(X1_test, X2_test)
            perf = cindex(Y_test, P)
            print("regparam 2**%d 2**%d, test set cindex %f" %(log_regparam1, log_regparam2, perf))
            P = learner.out_of_sample_kfold_cv(drug_folds, target_folds)
            perf = cindex(Y_train, P)
            print("regparam 2**%d 2**%d, out-of-sample loo cindex %f" %(log_regparam1, log_regparam2, perf))
Exemple #19
0
def main():
    X1, X2, Y = davis_data.load_davis()
    Y = Y.ravel(order='F')
    learner = TwoStepRLS(X1 = X1, X2 = X2, Y = Y, regparam1=1.0, regparam2=1.0)
    log_regparams1 = range(-8, -4)
    log_regparams2 = range(20,25)
    for log_regparam1 in log_regparams1:
        for log_regparam2 in log_regparams2:
            learner.solve(2.**log_regparam1, 2.**log_regparam2)
            P = learner.in_sample_loo()
            perf = cindex(Y, P)
            print("regparam 2**%d 2**%d, cindex %f" %(log_regparam1, log_regparam2, perf))
Exemple #20
0
def train_rls():
    #Trains RankRLS with automatically selected regularization parameter
    X_train, Y_train, X_test, Y_test = load_housing()
    #generate fold partition, arguments: train_size, k, random_seed
    folds = random_folds(len(Y_train), 5, 10)
    regparams = [2.**i for i in range(-10, 10)]
    learner = KfoldRankRLS(X_train, Y_train, folds = folds, regparams = regparams, measure=cindex)
    kfold_perfs = learner.cv_performances
    P_test = learner.predict(X_test)
    print("kfold performances " +str(kfold_perfs))
    print("chosen regparam %f" %learner.regparam)
    print("test set cindex %f" %cindex(Y_test, P_test))
Exemple #21
0
def main():
    X = np.loadtxt("drug-drug_similarities_2D.txt")
    Y = np.loadtxt("drug-drug_similarities_ECFP4.txt")
    Y = Y.ravel(order='F')
    K = np.dot(X, X)
    learner = TwoStepRLS(K1 = K, K2 = K, Y = Y, regparam1=1.0, regparam2=1.0)
    log_regparams = range(-10, 0)
    for log_regparam in log_regparams:
        learner.solve(2.**log_regparam, 2.**log_regparam)
        P = learner.out_of_sample_loo_symmetric()
        perf = cindex(Y, P)
        print("regparam 2**%d, cindex %f" %(log_regparam, perf))
def main():
    X = np.loadtxt("drug-drug_similarities_2D.txt")
    Y = np.loadtxt("drug-drug_similarities_ECFP4.txt")
    Y = Y.ravel(order='F')
    K = np.dot(X, X)
    learner = TwoStepRLS(K1=K, K2=K, Y=Y, regparam1=1.0, regparam2=1.0)
    log_regparams = range(-10, 0)
    for log_regparam in log_regparams:
        learner.solve(2.**log_regparam, 2.**log_regparam)
        P = learner.out_of_sample_loo_symmetric()
        perf = cindex(Y, P)
        print("regparam 2**%d, cindex %f" % (log_regparam, perf))
Exemple #23
0
def main():
    X1_train, X2_train, Y_train, X1_test, X2_test, Y_test = davis_data.settingB_split(
    )
    learner = TwoStepRLS(X1=X1_train,
                         X2=X2_train,
                         Y=Y_train,
                         regparam1=1.0,
                         regparam2=1.0)
    log_regparams1 = range(-8, -4)
    log_regparams2 = range(20, 25)
    for log_regparam1 in log_regparams1:
        for log_regparam2 in log_regparams2:
            learner.solve(2.**log_regparam1, 2.**log_regparam2)
            P = learner.predict(X1_test, X2_test)
            perf = cindex(Y_test, P)
            print("regparam 2**%d 2**%d, test set cindex %f" %
                  (log_regparam1, log_regparam2, perf))
            P = learner.leave_x1_out()
            perf = cindex(Y_train, P)
            print("regparam 2**%d 2**%d, leave-row-out cindex %f" %
                  (log_regparam1, log_regparam2, perf))
Exemple #24
0
def main():
    X1, X2, Y = davis_data.load_davis()
    Y = Y.ravel(order='F')
    learner = TwoStepRLS(X1=X1, X2=X2, Y=Y, regparam1=1.0, regparam2=1.0)
    log_regparams1 = range(-8, -4)
    log_regparams2 = range(20, 25)
    for log_regparam1 in log_regparams1:
        for log_regparam2 in log_regparams2:
            learner.solve(2.**log_regparam1, 2.**log_regparam2)
            P = learner.in_sample_loo()
            perf = cindex(Y, P)
            print("regparam 2**%d 2**%d, cindex %f" %
                  (log_regparam1, log_regparam2, perf))
Exemple #25
0
def train_rls():
    #Trains RLS with automatically selected regularization parameter
    X_train, Y_train, X_test, Y_test = load_housing()
    regparams = [2.**i for i in range(-15, 16)]
    learner = LeaveOneOutRLS(X_train,
                             Y_train,
                             regparams=regparams,
                             measure=cindex)
    loo_errors = learner.cv_performances
    P_test = learner.predict(X_test)
    print("leave-one-out cindex " + str(loo_errors))
    print("chosen regparam %f" % learner.regparam)
    print("test cindex %f" % cindex(Y_test, P_test))
Exemple #26
0
def main():
    X1_train, X2_train, Y_train, X1_test, X2_test, Y_test = davis_data.settingD_split()
    kernel1 = GaussianKernel(X1_train, gamma=0.01)
    kernel2 = GaussianKernel(X2_train, gamma=10**-9)
    K1_train = kernel1.getKM(X1_train)
    K1_test = kernel1.getKM(X1_test)
    K2_train = kernel2.getKM(X2_train)
    K2_test = kernel2.getKM(X2_test)
    learner = KronRLS(K1 = K1_train, K2 = K2_train, Y = Y_train)
    log_regparams = range(-15, 15)
    for log_regparam in log_regparams:
        learner.solve(2.**log_regparam)
        P = learner.predict(K1_test, K2_test)
        perf = cindex(Y_test, P)
        print("regparam 2**%d, cindex %f" %(log_regparam, perf))
Exemple #27
0
def main():
    X1_train, X2_train, Y_train, X1_test, X2_test, Y_test = davis_data.settingC_split(
    )
    m = X2_train.shape[0]
    learner = TwoStepRLS(X1=X1_train,
                         X2=X2_train,
                         Y=Y_train,
                         regparam1=1.0,
                         regparam2=1.0)
    log_regparams1 = range(-8, -4)
    log_regparams2 = range(20, 25)
    #Create random split to 5 folds for the targets
    folds = random_folds(m, 5, seed=12345)
    for log_regparam1 in log_regparams1:
        for log_regparam2 in log_regparams2:
            learner.solve(2.**log_regparam1, 2.**log_regparam2)
            P = learner.predict(X1_test, X2_test)
            perf = cindex(Y_test, P)
            print("regparam 2**%d 2**%d, test set cindex %f" %
                  (log_regparam1, log_regparam2, perf))
            P = learner.x2_kfold_cv(folds)
            perf = cindex(Y_train, P)
            print("regparam 2**%d 2**%d, K-fold cindex %f" %
                  (log_regparam1, log_regparam2, perf))
Exemple #28
0
def train_rls():
    #Trains RankRLS with automatically selected regularization parameter
    X_train, Y_train, X_test, Y_test = load_housing()
    #generate fold partition, arguments: train_size, k, random_seed
    folds = random_folds(len(Y_train), 5, 10)
    regparams = [2.**i for i in range(-10, 10)]
    learner = KfoldRankRLS(X_train,
                           Y_train,
                           folds=folds,
                           regparams=regparams,
                           measure=cindex)
    kfold_perfs = learner.cv_performances
    P_test = learner.predict(X_test)
    print("kfold performances " + str(kfold_perfs))
    print("chosen regparam %f" % learner.regparam)
    print("test set cindex %f" % cindex(Y_test, P_test))
Exemple #29
0
def main():
    X1, X2, Y = davis_data.load_davis()
    n = X1.shape[0]
    m = X2.shape[0]
    Y = Y.ravel(order='F')
    learner = TwoStepRLS(X1 = X1, X2 = X2, Y = Y, regparam1=1.0, regparam2=1.0)
    log_regparams1 = range(-8, -4)
    log_regparams2 = range(20,25)
    #Create random split to 5 folds for the drug-target pairs
    folds = random_folds(n*m, 5, seed=12345)
    #Map the indices back to (drug_indices, target_indices)
    folds = [np.unravel_index(fold, (n,m)) for fold in folds]
    for log_regparam1 in log_regparams1:
        for log_regparam2 in log_regparams2:
            learner.solve(2.**log_regparam1, 2.**log_regparam2)
            P = learner.in_sample_kfoldcv(folds)
            perf = cindex(Y, P)
            print("regparam 2**%d 2**%d, cindex %f" %(log_regparam1, log_regparam2, perf))
Exemple #30
0
def main():
    X1, X2, Y = davis_data.load_davis()
    n = X1.shape[0]
    m = X2.shape[0]
    Y = Y.ravel(order='F')
    learner = TwoStepRLS(X1=X1, X2=X2, Y=Y, regparam1=1.0, regparam2=1.0)
    log_regparams1 = range(-8, -4)
    log_regparams2 = range(20, 25)
    #Create random split to 5 folds for the drug-target pairs
    folds = random_folds(n * m, 5, seed=12345)
    #Map the indices back to (drug_indices, target_indices)
    folds = [np.unravel_index(fold, (n, m)) for fold in folds]
    for log_regparam1 in log_regparams1:
        for log_regparam2 in log_regparams2:
            learner.solve(2.**log_regparam1, 2.**log_regparam2)
            P = learner.in_sample_kfoldcv(folds)
            perf = cindex(Y, P)
            print("regparam 2**%d 2**%d, cindex %f" %
                  (log_regparam1, log_regparam2, perf))
Exemple #31
0
def train_rls():
    #Trains RLS with default parameters (regparam=1.0, kernel='LinearKernel')
    X_train, Y_train, X_test, Y_test = load_housing()
    pairs_start = []
    pairs_end = []
    #Sample 1000 pairwise preferences from the data
    trange = range(len(Y_train))
    while len(pairs_start) < 1000:
        ind0 = random.choice(trange)
        ind1 = random.choice(trange)
        if Y_train[ind0] > Y_train[ind1]:
            pairs_start.append(ind0)
            pairs_end.append(ind1)
        elif Y_train[ind0] < Y_train[ind1]:
            pairs_start.append(ind1)
            pairs_end.append(ind0)
    learner = PPRankRLS(X_train, pairs_start, pairs_end)
    #Test set predictions
    P_test = learner.predict(X_test)
    print("test cindex %f" % cindex(Y_test, P_test))
Exemple #32
0
def train_rls():
    #Selects both the gamma parameter for Gaussian kernel, and regparam with kfoldcv
    X_train, Y_train, X_test, Y_test = load_housing()
    folds = random_folds(len(Y_train), 5, 10)
    regparams = [2.**i for i in range(-15, 16)]
    gammas = regparams
    best_regparam = None
    best_gamma = None
    best_perf = 0.
    best_learner = None
    for gamma in gammas:
        #New RLS is initialized for each kernel parameter
        learner = KfoldRankRLS(X_train, Y_train, kernel = "GaussianKernel", folds = folds, gamma = gamma, regparams = regparams, measure=cindex)
        perf = np.max(learner.cv_performances)
        if perf > best_perf:
            best_perf = perf
            best_regparam = learner.regparam
            best_gamma = gamma
            best_learner = learner
    P_test = best_learner.predict(X_test)
    print("best parameters gamma %f regparam %f" %(best_gamma, best_regparam))
    print("best kfoldcv cindex %f" %best_perf)
    print("test cindex %f" %cindex(Y_test, P_test))
Exemple #33
0
import numpy as np
from rlscore.learner.global_rankrls import KfoldRankRLS
from rlscore.utilities.reader import read_folds
from rlscore.utilities.reader import read_sparse
from rlscore.measure import cindex
train_labels = np.loadtxt("./legacy_tests/data/rank_train.labels")
test_labels = np.loadtxt("./legacy_tests/data/rank_test.labels")
folds = read_folds("./legacy_tests/data/folds.txt")
train_features = read_sparse("./legacy_tests/data/rank_train.features")
test_features = read_sparse("./legacy_tests/data/rank_test.features")
kwargs = {}
kwargs['measure']=cindex
kwargs['regparams'] = [2**i for i in range(-10,11)]
kwargs["Y"] = train_labels
kwargs["X"] = train_features
kwargs["folds"] = folds
learner = KfoldRankRLS(**kwargs)
grid = kwargs['regparams']
perfs = learner.cv_performances
for i in range(len(grid)):
    print "parameter %f cv_performance %f" %(grid[i], perfs[i])
P = learner.predict(test_features)
test_perf = cindex(test_labels, P)
print "test set performance: %f" %test_perf
Exemple #34
0
import numpy as np
from rlscore.learner.global_rankrls import LeavePairOutRankRLS
from rlscore.reader import read_sparse
from rlscore.reader import read_sparse
from rlscore.measure import cindex
train_labels = np.loadtxt("./examples/data/rank_train.labels")
test_labels = np.loadtxt("./examples/data/rank_test.labels")
train_features = read_sparse("./examples/data/rank_train.features")
test_features = read_sparse("./examples/data/rank_test.features")
kwargs = {}
kwargs['measure'] = cindex
kwargs['regparams'] = [2**i for i in range(-10, 11)]
kwargs["Y"] = train_labels
kwargs["X"] = train_features
learner = LeavePairOutRankRLS(**kwargs)
grid = kwargs['regparams']
perfs = learner.cv_performances
for i in range(len(grid)):
    print "parameter %f cv_performance %f" % (grid[i], perfs[i])
P = learner.predict(test_features)
test_perf = cindex(test_labels, P)
print "test set performance: %f" % test_perf
Exemple #35
0
 def callback(self, learner):
     if self.iter % 10 == 0:
         P = learner.predict(self.X1, self.X2, self.row_inds, self.col_inds)
         perf = cindex(self.Y, P)
         print("iteration %d cindex %f" % (self.iter, perf))
     self.iter += 1
Exemple #36
0
import numpy as np

from rlscore.measure import cindex

#Concordance index is a pairwise ranking measure

#Equivalent to AUC for bi-partite ranking problems
Y = [-1, -1, -1, 1, 1]
P = [-5, 2, -1, 1, 3.2]

cind1 = cindex(Y, P)

print("My cindex is %f" %cind1)

#Can handle also real-valued Y-values

Y2 = [-2.2, -1.3, -0.2, 0.5, 1.1]
#Almost correct ranking, but last two inverted
P2 = [-2.7, -1.1, 0.3, 0.6, 0.5]

cind2 = cindex(Y2, P2)

print("My cindex is %f" %cind2)

#Most performance measures take average over the columns for multi-target problems:

Y_big = np.vstack((Y, Y2)).T
P_big = np.vstack((P, P2)).T
print(Y_big)
print(P_big)
print("(cind1+cind2)/2 %f" %((cind1+cind2)/2.))
         pred=clf.predict(K_test)
         #print(pred.shape)
         #print(Y_train.shape)
         mse=np.sqrt(sqerror(np.ravel(Y_test,'F'),np.ravel(pred,'F')))
         MSE[i,j]=mse
     MSE_m = np.mean(MSE, axis=0)
     opt_regpram = 10. ** (reg_par[np.argmin(MSE_m)])
     clf = KernelRidge(alpha=opt_regpram)
     K_train_o=K[train_out][:,train_out]
     K_test_o = K[test_out][:, train_out]
     Y_train_o = Y[train_out]
     Y_test_o = Y[test_out]
     clf.fit(K_train_o, Y_train_o)
     pred_o = clf.predict(K_test_o)
     mse_o[k] = np.sqrt(sqerror(np.ravel(Y_test_o, 'F'), np.ravel(pred_o, 'F')))
     cind_o[k] = cindex(np.ravel(Y_test_o, 'F'), np.ravel(pred_o, 'F'))
     pear_o[k] = pearsonr(np.ravel(Y_test_o, 'F'), np.ravel(pred_o, 'F'))[0]
     spear_o[k] = spearmanr(np.ravel(Y_test_o, 'F'), np.ravel(pred_o, 'F'))[0]
     #plt.scatter(np.ravel(Y_test_o, 'F'), np.ravel(pred_o, 'F'))
     #plt.show()
     print("op_reg:%f, fold RMSE:%f, fold Cindex:%f, fold Pearson:%f, fold Spearman:%f" % (opt_regpram,mse_o[k],
                                                                                      cind_o[k],
                                                                                      pear_o[k],
                                                                                      spear_o[k]))
     k+=1
 #MSE_diff_pca_comp[s]=np.mean(mse_o)
 #s+=1
 print("overall RMSE:%f, overall Cindex:%f, overall Pearson:%f, overall Spearman:%f" % (np.mean(mse_o),
                                                                                  np.mean(cind_o),
                                                                                  np.mean(pear_o),
                                                                                np.mean(spear_o)))
Exemple #38
0
train_labels = np.loadtxt("./examples/data/rank_train.labels")
test_labels = np.loadtxt("./examples/data/rank_test.labels")
train_qids = read_qids("./examples/data/rank_train.qids")
test_features = read_sparse("./examples/data/rank_test.features")
train_features = read_sparse("./examples/data/rank_train.features")
test_qids = read_qids("./examples/data/rank_test.qids")
kwargs = {}
kwargs['measure'] = cindex
kwargs['regparams'] = [2**i for i in range(-10, 11)]
kwargs["Y"] = train_labels
kwargs["X"] = train_features
kwargs["qids"] = train_qids
learner = LeaveQueryOutRankRLS(**kwargs)
grid = kwargs['regparams']
perfs = learner.cv_performances
for i in range(len(grid)):
    print "parameter %f cv_performance %f" % (grid[i], perfs[i])
P = learner.predict(test_features)
from rlscore.measure.measure_utilities import UndefinedPerformance
from rlscore.measure.measure_utilities import qids_to_splits
test_qids = qids_to_splits(test_qids)
perfs = []
for query in test_qids:
    try:
        perf = cindex(test_labels[query], P[query])
        perfs.append(perf)
    except UndefinedPerformance:
        pass
test_perf = np.mean(perfs)
print "test set performance: %f" % test_perf
Exemple #39
0
import numpy as np
from rlscore.learner.query_rankrls import QueryRankRLS
from rlscore.utilities.reader import read_qids
from rlscore.utilities.reader import read_sparse
from rlscore.measure import cindex
train_labels = np.loadtxt("./legacy_tests/data/rank_train.labels")
test_labels = np.loadtxt("./legacy_tests/data/rank_test.labels")
train_qids = read_qids("./legacy_tests/data/rank_train.qids")
test_features = read_sparse("./legacy_tests/data/rank_test.features")
train_features = read_sparse("./legacy_tests/data/rank_train.features")
test_qids = read_qids("./legacy_tests/data/rank_test.qids")
kwargs = {}
kwargs["Y"] = train_labels
kwargs["X"] = train_features
kwargs["qids"] = train_qids
kwargs["regparam"] = 1
learner = QueryRankRLS(**kwargs)
P = learner.predict(test_features)
from rlscore.measure.measure_utilities import UndefinedPerformance
from rlscore.measure.measure_utilities import qids_to_splits
test_qids = qids_to_splits(test_qids)
perfs = []
for query in test_qids:
    try:
        perf = cindex(test_labels[query], P[query])
        perfs.append(perf)
    except UndefinedPerformance:
        pass
test_perf = np.mean(perfs)
print("test set performance: %f" %test_perf)
Exemple #40
0
 def callback(self, learner):
     if self.iter%10 == 0:
         P = learner.predict(self.X1, self.X2, self.row_inds, self.col_inds)
         perf = cindex(self.Y, P)
         print("iteration %d cindex %f" %(self.iter, perf))
     self.iter += 1
Exemple #41
0
import numpy as np

from rlscore.measure import cindex

#Concordance index is a pairwise ranking measure

#Equivalent to AUC for bi-partite ranking problems
Y = [-1, -1, -1, 1, 1]
P = [-5, 2, -1, 1, 3.2]

cind1 = cindex(Y, P)

print("My cindex is %f" % cind1)

#Can handle also real-valued Y-values

Y2 = [-2.2, -1.3, -0.2, 0.5, 1.1]
#Almost correct ranking, but last two inverted
P2 = [-2.7, -1.1, 0.3, 0.6, 0.5]

cind2 = cindex(Y2, P2)

print("My cindex is %f" % cind2)

#Most performance measures take average over the columns for multi-target problems:

Y_big = np.vstack((Y, Y2)).T
P_big = np.vstack((P, P2)).T
print(Y_big)
print(P_big)
print("(cind1+cind2)/2 %f" % ((cind1 + cind2) / 2.))