def skcv_knn(coordinates, Xdata, Ydata, number_of_folds, dzradii, k_neighbors, visualization): print("Starting skcv-knn analysis...") # Calculate sorted pairwise distance matrix and indexes performanceTable = np.zeros([len(dzradii),2]) data_distances, data_distance_indexes = distanceMatrix(coordinates) folds = random_folds(len(Ydata), number_of_folds) for rind, dzradius in enumerate(dzradii): print("Analysis ongoing, dead zone radius: " + str(dzradius) + "m / " + str(dzradii[len(dzradii)-1]) + "m") # Calculate dead zone folds dz_folds = dzfolds(dzradius, folds, data_distances, data_distance_indexes) # Initialize performance variables P = np.zeros(Ydata.shape) for fold_id, dz_fold in enumerate(dz_folds): X_tr = np.delete(Xdata, dz_fold, axis=0) Y_tr = np.delete(Ydata, dz_fold, axis=0) learner = KNeighborsRegressor(n_neighbors=k_neighbors) learner.fit(X_tr, Y_tr) preds = learner.predict(Xdata[dz_fold]) if preds.ndim == 0: P[folds[fold_id]] = preds else: P[folds[fold_id]] = preds[0:len(folds[fold_id])] if visualization: # Check for visualization testcoords = coordinates[folds[fold_id],:] dzcoords = coordinates[dz_fold, :] visualize_skcv(coordinates, testcoords, dzcoords, dzradius) perf = cindex(Ydata, P) performanceTable[rind,0] = dzradius performanceTable[rind, 1] = perf plotRes_skcv(performanceTable, rind, number_of_folds, "K-nn") print("Analysis done.") return performanceTable
def skcv_rls(coordinates, Xdata, Ydata, number_of_folds, dzradii, regparam, visualization): print("Starting skcv-rls analysis...") # Calculate sorted pairwise distance matrix and indexes performanceTable = np.zeros([len(dzradii),2]) data_distances, data_distance_indexes = distanceMatrix(coordinates) folds = random_folds(len(Ydata), number_of_folds) for rind, dzradius in enumerate(dzradii): print("Analysis ongoing, dead zone radius: " + str(dzradius) + "m / " + str(dzradii[len(dzradii)-1]) + "m") # Calculate dead zone folds dz_folds = dzfolds(dzradius, folds, data_distances, data_distance_indexes) learner = RLS(Xdata, Ydata, regparam=regparam) P = np.zeros(Ydata.shape) for fold_id, dz_fold in enumerate(dz_folds): preds = learner.holdout(dz_fold) if preds.ndim == 0: P[folds[fold_id]] = preds else: P[folds[fold_id]] = preds[0:len(folds[fold_id])] if visualization: # Check for visualization testcoords = coordinates[folds[fold_id],:] dzcoords = coordinates[dz_fold, :] visualize_skcv(coordinates, testcoords, dzcoords, dzradius) perf = cindex(Ydata, P) performanceTable[rind,0] = dzradius performanceTable[rind, 1] = perf plotRes_skcv(performanceTable, rind, number_of_folds, "rls") print("Analysis done.") return performanceTable
def train_rls(): #Selects both the gamma parameter for Gaussian kernel, and regparam with kfoldcv X_train, Y_train, X_test, Y_test = load_housing() folds = random_folds(len(Y_train), 5, 10) regparams = [2.**i for i in range(-15, 16)] gammas = regparams best_regparam = None best_gamma = None best_perf = 0. best_learner = None for gamma in gammas: #New RLS is initialized for each kernel parameter learner = KfoldRankRLS(X_train, Y_train, kernel="GaussianKernel", folds=folds, gamma=gamma, regparams=regparams, measure=cindex) perf = np.max(learner.cv_performances) if perf > best_perf: best_perf = perf best_regparam = learner.regparam best_gamma = gamma best_learner = learner P_test = best_learner.predict(X_test) print("best parameters gamma %f regparam %f" % (best_gamma, best_regparam)) print("best kfoldcv cindex %f" % best_perf) print("test cindex %f" % cindex(Y_test, P_test))
def main(): X1_train, X2_train, Y_train, X1_test, X2_test, Y_test = davis_data.settingD_split() n = X1_train.shape[0] m = X2_train.shape[0] learner = TwoStepRLS(X1 = X1_train, X2 = X2_train, Y = Y_train, regparam1=1.0, regparam2=1.0) log_regparams1 = range(-8, -4) log_regparams2 = range(20,25) #Create random split to 5 folds for both drugs and targets drug_folds = random_folds(n, 5, seed=123) target_folds = random_folds(m, 5, seed=456) for log_regparam1 in log_regparams1: for log_regparam2 in log_regparams2: learner.solve(2.**log_regparam1, 2.**log_regparam2) P = learner.predict(X1_test, X2_test) perf = cindex(Y_test, P) print("regparam 2**%d 2**%d, test set cindex %f" %(log_regparam1, log_regparam2, perf)) P = learner.out_of_sample_kfold_cv(drug_folds, target_folds) perf = cindex(Y_train, P) print("regparam 2**%d 2**%d, out-of-sample loo cindex %f" %(log_regparam1, log_regparam2, perf))
def train_rls(): #Trains RankRLS with automatically selected regularization parameter X_train, Y_train, X_test, Y_test = load_housing() #generate fold partition, arguments: train_size, k, random_seed folds = random_folds(len(Y_train), 5, 10) regparams = [2.**i for i in range(-10, 10)] learner = KfoldRankRLS(X_train, Y_train, folds = folds, regparams = regparams, measure=cindex) kfold_perfs = learner.cv_performances P_test = learner.predict(X_test) print("kfold performances " +str(kfold_perfs)) print("chosen regparam %f" %learner.regparam) print("test set cindex %f" %cindex(Y_test, P_test))
def train_rls(): #Trains RLS with default parameters (regparam=1.0, kernel='LinearKernel') X_train, Y_train, X_test, Y_test = load_housing() #generate fold partition, arguments: train_size, k, random_seed folds = random_folds(len(Y_train), 5, 10) learner = GlobalRankRLS(X_train, Y_train) perfs = [] for fold in folds: P = learner.holdout(fold) c = cindex(Y_train[fold], P) perfs.append(c) perf = np.mean(perfs) print("5-fold cross-validation cindex %f" % perf) P_test = learner.predict(X_test) print("test cindex %f" % cindex(Y_test, P_test))
def train_rls(): #Trains RLS with default parameters (regparam=1.0, kernel='LinearKernel') X_train, Y_train, X_test, Y_test = load_housing() #generate fold partition, arguments: train_size, k, random_seed folds = random_folds(len(Y_train), 5, 10) learner = GlobalRankRLS(X_train, Y_train) perfs = [] for fold in folds: P = learner.holdout(fold) c = cindex(Y_train[fold], P) perfs.append(c) perf = np.mean(perfs) print("5-fold cross-validation cindex %f" %perf) P_test = learner.predict(X_test) print("test cindex %f" %cindex(Y_test, P_test))
def train_rls(): #Trains RankRLS with automatically selected regularization parameter X_train, Y_train, X_test, Y_test = load_housing() #generate fold partition, arguments: train_size, k, random_seed folds = random_folds(len(Y_train), 5, 10) regparams = [2.**i for i in range(-10, 10)] learner = KfoldRankRLS(X_train, Y_train, folds=folds, regparams=regparams, measure=cindex) kfold_perfs = learner.cv_performances P_test = learner.predict(X_test) print("kfold performances " + str(kfold_perfs)) print("chosen regparam %f" % learner.regparam) print("test set cindex %f" % cindex(Y_test, P_test))
def main(): X1_train, X2_train, Y_train, X1_test, X2_test, Y_test = davis_data.settingC_split() m = X2_train.shape[0] learner = TwoStepRLS(X1 = X1_train, X2 = X2_train, Y = Y_train, regparam1=1.0, regparam2=1.0) log_regparams1 = range(-8, -4) log_regparams2 = range(20,25) #Create random split to 5 folds for the targets folds = random_folds(m, 5, seed=12345) for log_regparam1 in log_regparams1: for log_regparam2 in log_regparams2: learner.solve(2.**log_regparam1, 2.**log_regparam2) P = learner.predict(X1_test, X2_test) perf = cindex(Y_test, P) print("regparam 2**%d 2**%d, test set cindex %f" %(log_regparam1, log_regparam2, perf)) P = learner.x2_kfold_cv(folds) perf = cindex(Y_train, P) print("regparam 2**%d 2**%d, K-fold cindex %f" %(log_regparam1, log_regparam2, perf))
def main(): X1, X2, Y = davis_data.load_davis() n = X1.shape[0] m = X2.shape[0] Y = Y.ravel(order='F') learner = TwoStepRLS(X1 = X1, X2 = X2, Y = Y, regparam1=1.0, regparam2=1.0) log_regparams1 = range(-8, -4) log_regparams2 = range(20,25) #Create random split to 5 folds for the drug-target pairs folds = random_folds(n*m, 5, seed=12345) #Map the indices back to (drug_indices, target_indices) folds = [np.unravel_index(fold, (n,m)) for fold in folds] for log_regparam1 in log_regparams1: for log_regparam2 in log_regparams2: learner.solve(2.**log_regparam1, 2.**log_regparam2) P = learner.in_sample_kfoldcv(folds) perf = cindex(Y, P) print("regparam 2**%d 2**%d, cindex %f" %(log_regparam1, log_regparam2, perf))
def main(): X1, X2, Y = davis_data.load_davis() n = X1.shape[0] m = X2.shape[0] Y = Y.ravel(order='F') learner = TwoStepRLS(X1=X1, X2=X2, Y=Y, regparam1=1.0, regparam2=1.0) log_regparams1 = range(-8, -4) log_regparams2 = range(20, 25) #Create random split to 5 folds for the drug-target pairs folds = random_folds(n * m, 5, seed=12345) #Map the indices back to (drug_indices, target_indices) folds = [np.unravel_index(fold, (n, m)) for fold in folds] for log_regparam1 in log_regparams1: for log_regparam2 in log_regparams2: learner.solve(2.**log_regparam1, 2.**log_regparam2) P = learner.in_sample_kfoldcv(folds) perf = cindex(Y, P) print("regparam 2**%d 2**%d, cindex %f" % (log_regparam1, log_regparam2, perf))
def train_rls(): #Selects both the gamma parameter for Gaussian kernel, and regparam with kfoldcv X_train, Y_train, X_test, Y_test = load_housing() folds = random_folds(len(Y_train), 5, 10) regparams = [2.**i for i in range(-15, 16)] gammas = regparams best_regparam = None best_gamma = None best_perf = 0. best_learner = None for gamma in gammas: #New RLS is initialized for each kernel parameter learner = KfoldRankRLS(X_train, Y_train, kernel = "GaussianKernel", folds = folds, gamma = gamma, regparams = regparams, measure=cindex) perf = np.max(learner.cv_performances) if perf > best_perf: best_perf = perf best_regparam = learner.regparam best_gamma = gamma best_learner = learner P_test = best_learner.predict(X_test) print("best parameters gamma %f regparam %f" %(best_gamma, best_regparam)) print("best kfoldcv cindex %f" %best_perf) print("test cindex %f" %cindex(Y_test, P_test))
def main(): X1_train, X2_train, Y_train, X1_test, X2_test, Y_test = davis_data.settingC_split( ) m = X2_train.shape[0] learner = TwoStepRLS(X1=X1_train, X2=X2_train, Y=Y_train, regparam1=1.0, regparam2=1.0) log_regparams1 = range(-8, -4) log_regparams2 = range(20, 25) #Create random split to 5 folds for the targets folds = random_folds(m, 5, seed=12345) for log_regparam1 in log_regparams1: for log_regparam2 in log_regparams2: learner.solve(2.**log_regparam1, 2.**log_regparam2) P = learner.predict(X1_test, X2_test) perf = cindex(Y_test, P) print("regparam 2**%d 2**%d, test set cindex %f" % (log_regparam1, log_regparam2, perf)) P = learner.x2_kfold_cv(folds) perf = cindex(Y_train, P) print("regparam 2**%d 2**%d, K-fold cindex %f" % (log_regparam1, log_regparam2, perf))