Beispiel #1
0
def skcv_knn(coordinates, Xdata, Ydata, number_of_folds, dzradii, k_neighbors, visualization):
    print("Starting skcv-knn analysis...")
    # Calculate sorted pairwise distance matrix and indexes
    performanceTable = np.zeros([len(dzradii),2])
    data_distances, data_distance_indexes = distanceMatrix(coordinates)
    folds = random_folds(len(Ydata), number_of_folds)
    for rind, dzradius in enumerate(dzradii):
        print("Analysis ongoing, dead zone radius: " + str(dzradius) + "m / " + str(dzradii[len(dzradii)-1]) + "m")
        # Calculate dead zone folds
        dz_folds = dzfolds(dzradius, folds, data_distances, data_distance_indexes)
        # Initialize performance variables   
        P = np.zeros(Ydata.shape)
        for fold_id, dz_fold in enumerate(dz_folds):
            X_tr = np.delete(Xdata, dz_fold, axis=0)
            Y_tr = np.delete(Ydata, dz_fold, axis=0)
            learner = KNeighborsRegressor(n_neighbors=k_neighbors)
            learner.fit(X_tr, Y_tr)
            preds = learner.predict(Xdata[dz_fold])
            if preds.ndim == 0:
                P[folds[fold_id]] = preds         
            else:
                P[folds[fold_id]] = preds[0:len(folds[fold_id])]
            if visualization: # Check for visualization
                testcoords = coordinates[folds[fold_id],:]
                dzcoords = coordinates[dz_fold, :]
                visualize_skcv(coordinates, testcoords, dzcoords, dzradius)                
        perf = cindex(Ydata, P)
        performanceTable[rind,0] = dzradius
        performanceTable[rind, 1] = perf
        plotRes_skcv(performanceTable, rind, number_of_folds, "K-nn")
    print("Analysis done.")
    return performanceTable
Beispiel #2
0
def skcv_rls(coordinates, Xdata, Ydata, number_of_folds, dzradii, regparam, visualization):
    print("Starting skcv-rls analysis...")
    # Calculate sorted pairwise distance matrix and indexes
    performanceTable = np.zeros([len(dzradii),2])
    data_distances, data_distance_indexes = distanceMatrix(coordinates)
    folds = random_folds(len(Ydata), number_of_folds)
    for rind, dzradius in enumerate(dzradii): 
        print("Analysis ongoing, dead zone radius: " + str(dzradius) + "m / " + str(dzradii[len(dzradii)-1]) + "m")
        # Calculate dead zone folds
        dz_folds = dzfolds(dzradius, folds, data_distances, data_distance_indexes)
        learner = RLS(Xdata, Ydata, regparam=regparam)
        P = np.zeros(Ydata.shape)
        for fold_id, dz_fold in enumerate(dz_folds):
            preds = learner.holdout(dz_fold)
            if preds.ndim == 0:
                P[folds[fold_id]] = preds         
            else:
                P[folds[fold_id]] = preds[0:len(folds[fold_id])]
            if visualization: # Check for visualization 
                testcoords = coordinates[folds[fold_id],:]
                dzcoords = coordinates[dz_fold, :]
                visualize_skcv(coordinates, testcoords, dzcoords, dzradius)
        perf = cindex(Ydata, P)
        performanceTable[rind,0] = dzradius
        performanceTable[rind, 1] = perf
        plotRes_skcv(performanceTable, rind, number_of_folds, "rls")
    print("Analysis done.")
    return performanceTable
Beispiel #3
0
def train_rls():
    #Selects both the gamma parameter for Gaussian kernel, and regparam with kfoldcv
    X_train, Y_train, X_test, Y_test = load_housing()
    folds = random_folds(len(Y_train), 5, 10)
    regparams = [2.**i for i in range(-15, 16)]
    gammas = regparams
    best_regparam = None
    best_gamma = None
    best_perf = 0.
    best_learner = None
    for gamma in gammas:
        #New RLS is initialized for each kernel parameter
        learner = KfoldRankRLS(X_train,
                               Y_train,
                               kernel="GaussianKernel",
                               folds=folds,
                               gamma=gamma,
                               regparams=regparams,
                               measure=cindex)
        perf = np.max(learner.cv_performances)
        if perf > best_perf:
            best_perf = perf
            best_regparam = learner.regparam
            best_gamma = gamma
            best_learner = learner
    P_test = best_learner.predict(X_test)
    print("best parameters gamma %f regparam %f" % (best_gamma, best_regparam))
    print("best kfoldcv cindex %f" % best_perf)
    print("test cindex %f" % cindex(Y_test, P_test))
Beispiel #4
0
def main():
    X1_train, X2_train, Y_train, X1_test, X2_test, Y_test = davis_data.settingD_split()
    n = X1_train.shape[0]
    m = X2_train.shape[0]
    learner = TwoStepRLS(X1 = X1_train, X2 = X2_train, Y = Y_train, regparam1=1.0, regparam2=1.0)
    log_regparams1 = range(-8, -4)
    log_regparams2 = range(20,25)
    #Create random split to 5 folds for both drugs and targets
    drug_folds = random_folds(n, 5, seed=123)
    target_folds = random_folds(m, 5, seed=456)
    for log_regparam1 in log_regparams1:
        for log_regparam2 in log_regparams2:
            learner.solve(2.**log_regparam1, 2.**log_regparam2)
            P = learner.predict(X1_test, X2_test)
            perf = cindex(Y_test, P)
            print("regparam 2**%d 2**%d, test set cindex %f" %(log_regparam1, log_regparam2, perf))
            P = learner.out_of_sample_kfold_cv(drug_folds, target_folds)
            perf = cindex(Y_train, P)
            print("regparam 2**%d 2**%d, out-of-sample loo cindex %f" %(log_regparam1, log_regparam2, perf))
Beispiel #5
0
def train_rls():
    #Trains RankRLS with automatically selected regularization parameter
    X_train, Y_train, X_test, Y_test = load_housing()
    #generate fold partition, arguments: train_size, k, random_seed
    folds = random_folds(len(Y_train), 5, 10)
    regparams = [2.**i for i in range(-10, 10)]
    learner = KfoldRankRLS(X_train, Y_train, folds = folds, regparams = regparams, measure=cindex)
    kfold_perfs = learner.cv_performances
    P_test = learner.predict(X_test)
    print("kfold performances " +str(kfold_perfs))
    print("chosen regparam %f" %learner.regparam)
    print("test set cindex %f" %cindex(Y_test, P_test))
Beispiel #6
0
def train_rls():
    #Trains RLS with default parameters (regparam=1.0, kernel='LinearKernel')
    X_train, Y_train, X_test, Y_test = load_housing()
    #generate fold partition, arguments: train_size, k, random_seed
    folds = random_folds(len(Y_train), 5, 10)
    learner = GlobalRankRLS(X_train, Y_train)
    perfs = []
    for fold in folds:
        P = learner.holdout(fold)
        c = cindex(Y_train[fold], P)
        perfs.append(c)
    perf = np.mean(perfs)
    print("5-fold cross-validation cindex %f" % perf)
    P_test = learner.predict(X_test)
    print("test cindex %f" % cindex(Y_test, P_test))
Beispiel #7
0
def train_rls():
    #Trains RLS with default parameters (regparam=1.0, kernel='LinearKernel')
    X_train, Y_train, X_test, Y_test = load_housing()
    #generate fold partition, arguments: train_size, k, random_seed
    folds = random_folds(len(Y_train), 5, 10)
    learner = GlobalRankRLS(X_train, Y_train)
    perfs = []
    for fold in folds:
        P = learner.holdout(fold)
        c = cindex(Y_train[fold], P)
        perfs.append(c)
    perf = np.mean(perfs)
    print("5-fold cross-validation cindex %f" %perf)
    P_test = learner.predict(X_test)
    print("test cindex %f" %cindex(Y_test, P_test))
Beispiel #8
0
def train_rls():
    #Trains RankRLS with automatically selected regularization parameter
    X_train, Y_train, X_test, Y_test = load_housing()
    #generate fold partition, arguments: train_size, k, random_seed
    folds = random_folds(len(Y_train), 5, 10)
    regparams = [2.**i for i in range(-10, 10)]
    learner = KfoldRankRLS(X_train,
                           Y_train,
                           folds=folds,
                           regparams=regparams,
                           measure=cindex)
    kfold_perfs = learner.cv_performances
    P_test = learner.predict(X_test)
    print("kfold performances " + str(kfold_perfs))
    print("chosen regparam %f" % learner.regparam)
    print("test set cindex %f" % cindex(Y_test, P_test))
Beispiel #9
0
def main():
    X1_train, X2_train, Y_train, X1_test, X2_test, Y_test = davis_data.settingC_split()
    m = X2_train.shape[0]
    learner = TwoStepRLS(X1 = X1_train, X2 = X2_train, Y = Y_train, regparam1=1.0, regparam2=1.0)
    log_regparams1 = range(-8, -4)
    log_regparams2 = range(20,25)
    #Create random split to 5 folds for the targets
    folds = random_folds(m, 5, seed=12345)
    for log_regparam1 in log_regparams1:
        for log_regparam2 in log_regparams2:
            learner.solve(2.**log_regparam1, 2.**log_regparam2)
            P = learner.predict(X1_test, X2_test)
            perf = cindex(Y_test, P)
            print("regparam 2**%d 2**%d, test set cindex %f" %(log_regparam1, log_regparam2, perf))
            P = learner.x2_kfold_cv(folds)
            perf = cindex(Y_train, P)
            print("regparam 2**%d 2**%d, K-fold cindex %f" %(log_regparam1, log_regparam2, perf))
Beispiel #10
0
def main():
    X1, X2, Y = davis_data.load_davis()
    n = X1.shape[0]
    m = X2.shape[0]
    Y = Y.ravel(order='F')
    learner = TwoStepRLS(X1 = X1, X2 = X2, Y = Y, regparam1=1.0, regparam2=1.0)
    log_regparams1 = range(-8, -4)
    log_regparams2 = range(20,25)
    #Create random split to 5 folds for the drug-target pairs
    folds = random_folds(n*m, 5, seed=12345)
    #Map the indices back to (drug_indices, target_indices)
    folds = [np.unravel_index(fold, (n,m)) for fold in folds]
    for log_regparam1 in log_regparams1:
        for log_regparam2 in log_regparams2:
            learner.solve(2.**log_regparam1, 2.**log_regparam2)
            P = learner.in_sample_kfoldcv(folds)
            perf = cindex(Y, P)
            print("regparam 2**%d 2**%d, cindex %f" %(log_regparam1, log_regparam2, perf))
Beispiel #11
0
def main():
    X1, X2, Y = davis_data.load_davis()
    n = X1.shape[0]
    m = X2.shape[0]
    Y = Y.ravel(order='F')
    learner = TwoStepRLS(X1=X1, X2=X2, Y=Y, regparam1=1.0, regparam2=1.0)
    log_regparams1 = range(-8, -4)
    log_regparams2 = range(20, 25)
    #Create random split to 5 folds for the drug-target pairs
    folds = random_folds(n * m, 5, seed=12345)
    #Map the indices back to (drug_indices, target_indices)
    folds = [np.unravel_index(fold, (n, m)) for fold in folds]
    for log_regparam1 in log_regparams1:
        for log_regparam2 in log_regparams2:
            learner.solve(2.**log_regparam1, 2.**log_regparam2)
            P = learner.in_sample_kfoldcv(folds)
            perf = cindex(Y, P)
            print("regparam 2**%d 2**%d, cindex %f" %
                  (log_regparam1, log_regparam2, perf))
Beispiel #12
0
def train_rls():
    #Selects both the gamma parameter for Gaussian kernel, and regparam with kfoldcv
    X_train, Y_train, X_test, Y_test = load_housing()
    folds = random_folds(len(Y_train), 5, 10)
    regparams = [2.**i for i in range(-15, 16)]
    gammas = regparams
    best_regparam = None
    best_gamma = None
    best_perf = 0.
    best_learner = None
    for gamma in gammas:
        #New RLS is initialized for each kernel parameter
        learner = KfoldRankRLS(X_train, Y_train, kernel = "GaussianKernel", folds = folds, gamma = gamma, regparams = regparams, measure=cindex)
        perf = np.max(learner.cv_performances)
        if perf > best_perf:
            best_perf = perf
            best_regparam = learner.regparam
            best_gamma = gamma
            best_learner = learner
    P_test = best_learner.predict(X_test)
    print("best parameters gamma %f regparam %f" %(best_gamma, best_regparam))
    print("best kfoldcv cindex %f" %best_perf)
    print("test cindex %f" %cindex(Y_test, P_test))
Beispiel #13
0
def main():
    X1_train, X2_train, Y_train, X1_test, X2_test, Y_test = davis_data.settingC_split(
    )
    m = X2_train.shape[0]
    learner = TwoStepRLS(X1=X1_train,
                         X2=X2_train,
                         Y=Y_train,
                         regparam1=1.0,
                         regparam2=1.0)
    log_regparams1 = range(-8, -4)
    log_regparams2 = range(20, 25)
    #Create random split to 5 folds for the targets
    folds = random_folds(m, 5, seed=12345)
    for log_regparam1 in log_regparams1:
        for log_regparam2 in log_regparams2:
            learner.solve(2.**log_regparam1, 2.**log_regparam2)
            P = learner.predict(X1_test, X2_test)
            perf = cindex(Y_test, P)
            print("regparam 2**%d 2**%d, test set cindex %f" %
                  (log_regparam1, log_regparam2, perf))
            P = learner.x2_kfold_cv(folds)
            perf = cindex(Y_train, P)
            print("regparam 2**%d 2**%d, K-fold cindex %f" %
                  (log_regparam1, log_regparam2, perf))