def train_rls(): #Select regparam with k-fold cross-validation, #where instances related to a single sentence form #together a fold X_train = read_sparse("train_2000_x.txt") Y_train = np.loadtxt("train_2000_y.txt") X_test = read_sparse("test_2000_x.txt", X_train.shape[1]) Y_test = np.loadtxt("test_2000_y.txt") #list of sentence ids qids_train = np.loadtxt("train_2000_qids.txt") qids_test = np.loadtxt("test_2000_qids.txt") learner = QueryRankRLS(X_train, Y_train, qids_train) P_test = learner.predict(X_test) folds = map_ids(qids_train) perfs = [] for fold in folds: if np.var(Y_train[fold]) != 0: P = learner.holdout(fold) c = cindex(Y_train[fold], P) perfs.append(c) perf = np.mean(perfs) print("leave-query-out cross-validation cindex %f" % perf) partition = map_ids(qids_test) test_perfs = [] #compute the ranking accuracy separately for each test query for query in partition: #skip such queries, where all instances have the same #score, since in this case cindex is undefined if np.var(Y_test[query]) != 0: perf = cindex(Y_test[query], P_test[query]) test_perfs.append(perf) test_perf = np.mean(test_perfs) print("test cindex %f" % test_perf)
def train_rls(): #Select regparam with k-fold cross-validation, #where instances related to a single sentence form #together a fold X_train = read_sparse("train_2000_x.txt") Y_train = np.loadtxt("train_2000_y.txt") X_test = read_sparse("test_2000_x.txt", X_train.shape[1]) Y_test = np.loadtxt("test_2000_y.txt") #list of sentence ids qids_train = np.loadtxt("train_2000_qids.txt") qids_test = np.loadtxt("test_2000_qids.txt") learner = QueryRankRLS(X_train, Y_train, qids_train) P_test = learner.predict(X_test) folds = map_ids(qids_train) perfs = [] for fold in folds: if np.var(Y_train[fold]) != 0: P = learner.holdout(fold) c = cindex(Y_train[fold], P) perfs.append(c) perf = np.mean(perfs) print("leave-query-out cross-validation cindex %f" %perf) partition = map_ids(qids_test) test_perfs = [] #compute the ranking accuracy separately for each test query for query in partition: #skip such queries, where all instances have the same #score, since in this case cindex is undefined if np.var(Y_test[query]) != 0: perf = cindex(Y_test[query], P_test[query]) test_perfs.append(perf) test_perf = np.mean(test_perfs) print("test cindex %f" %test_perf)
def main(): X1_train, X2_train, Y_train, X1_test, X2_test, Y_test = davis_data.setting3_split() learner = TwoStepRLS(X1 = X1_train, X2 = X2_train, Y = Y_train, regparam1=1.0, regparam2=1.0) log_regparams1 = range(-8, -4) log_regparams2 = range(20,25) for log_regparam1 in log_regparams1: for log_regparam2 in log_regparams2: learner.solve(2.**log_regparam1, 2.**log_regparam2) P = learner.predict(X1_test, X2_test) perf = cindex(Y_test, P) print("regparam 2**%d 2**%d, test set cindex %f" %(log_regparam1, log_regparam2, perf)) P = learner.leave_x2_out() perf = cindex(Y_train, P) print("regparam 2**%d 2**%d, leave-column-out cindex %f" %(log_regparam1, log_regparam2, perf))
def main(): X1_train, X2_train, Y_train, X1_test, X2_test, Y_test = davis_data.setting4_split() learner = TwoStepRLS(X1 = X1_train, X2 = X2_train, Y = Y_train, regparam1=1.0, regparam2=1.0) log_regparams1 = range(-8, -4) log_regparams2 = range(20,25) for log_regparam1 in log_regparams1: for log_regparam2 in log_regparams2: learner.solve(2.**log_regparam1, 2.**log_regparam2) P = learner.predict(X1_test, X2_test) perf = cindex(Y_test, P) print("regparam 2**%d 2**%d, test set cindex %f" %(log_regparam1, log_regparam2, perf)) P = learner.out_of_sample_loo() perf = cindex(Y_train, P) print("regparam 2**%d 2**%d, out-of-sample loo cindex %f" %(log_regparam1, log_regparam2, perf))
def train_rls(): #Trains RLS with default parameters (regparam=1.0, kernel='LinearKernel') X_train, Y_train, X_test, Y_test = load_housing() #generate fold partition, arguments: train_size, k, random_seed folds = random_folds(len(Y_train), 5, 10) learner = GlobalRankRLS(X_train, Y_train) perfs = [] for fold in folds: P = learner.holdout(fold) c = cindex(Y_train[fold], P) perfs.append(c) perf = np.mean(perfs) print("5-fold cross-validation cindex %f" % perf) P_test = learner.predict(X_test) print("test cindex %f" % cindex(Y_test, P_test))
def train_rls(): #Trains RLS with default parameters (regparam=1.0, kernel='LinearKernel') X_train, Y_train, X_test, Y_test = load_housing() #generate fold partition, arguments: train_size, k, random_seed folds = random_folds(len(Y_train), 5, 10) learner = GlobalRankRLS(X_train, Y_train) perfs = [] for fold in folds: P = learner.holdout(fold) c = cindex(Y_train[fold], P) perfs.append(c) perf = np.mean(perfs) print("5-fold cross-validation cindex %f" %perf) P_test = learner.predict(X_test) print("test cindex %f" %cindex(Y_test, P_test))
def train_rls(): #Trains RLS with default parameters (regparam=1.0, kernel='LinearKernel') X_train, Y_train, X_test, Y_test = load_housing() learner = GlobalRankRLS(X_train, Y_train) #Test set predictions P_test = learner.predict(X_test) print("test cindex %f" %cindex(Y_test, P_test))
def train_rls(): #Selects both the gamma parameter for Gaussian kernel, and regparam with kfoldcv X_train, Y_train, X_test, Y_test = load_housing() folds = random_folds(len(Y_train), 5, 10) regparams = [2.**i for i in range(-15, 16)] gammas = regparams best_regparam = None best_gamma = None best_perf = 0. best_learner = None for gamma in gammas: #New RLS is initialized for each kernel parameter learner = KfoldRankRLS(X_train, Y_train, kernel="GaussianKernel", folds=folds, gamma=gamma, regparams=regparams, measure=cindex) perf = np.max(learner.cv_performances) if perf > best_perf: best_perf = perf best_regparam = learner.regparam best_gamma = gamma best_learner = learner P_test = best_learner.predict(X_test) print("best parameters gamma %f regparam %f" % (best_gamma, best_regparam)) print("best kfoldcv cindex %f" % best_perf) print("test cindex %f" % cindex(Y_test, P_test))
def skcv_rls(coordinates, Xdata, Ydata, number_of_folds, dzradii, regparam, visualization): print("Starting skcv-rls analysis...") # Calculate sorted pairwise distance matrix and indexes performanceTable = np.zeros([len(dzradii),2]) data_distances, data_distance_indexes = distanceMatrix(coordinates) folds = random_folds(len(Ydata), number_of_folds) for rind, dzradius in enumerate(dzradii): print("Analysis ongoing, dead zone radius: " + str(dzradius) + "m / " + str(dzradii[len(dzradii)-1]) + "m") # Calculate dead zone folds dz_folds = dzfolds(dzradius, folds, data_distances, data_distance_indexes) learner = RLS(Xdata, Ydata, regparam=regparam) P = np.zeros(Ydata.shape) for fold_id, dz_fold in enumerate(dz_folds): preds = learner.holdout(dz_fold) if preds.ndim == 0: P[folds[fold_id]] = preds else: P[folds[fold_id]] = preds[0:len(folds[fold_id])] if visualization: # Check for visualization testcoords = coordinates[folds[fold_id],:] dzcoords = coordinates[dz_fold, :] visualize_skcv(coordinates, testcoords, dzcoords, dzradius) perf = cindex(Ydata, P) performanceTable[rind,0] = dzradius performanceTable[rind, 1] = perf plotRes_skcv(performanceTable, rind, number_of_folds, "rls") print("Analysis done.") return performanceTable
def skcv_knn(coordinates, Xdata, Ydata, number_of_folds, dzradii, k_neighbors, visualization): print("Starting skcv-knn analysis...") # Calculate sorted pairwise distance matrix and indexes performanceTable = np.zeros([len(dzradii),2]) data_distances, data_distance_indexes = distanceMatrix(coordinates) folds = random_folds(len(Ydata), number_of_folds) for rind, dzradius in enumerate(dzradii): print("Analysis ongoing, dead zone radius: " + str(dzradius) + "m / " + str(dzradii[len(dzradii)-1]) + "m") # Calculate dead zone folds dz_folds = dzfolds(dzradius, folds, data_distances, data_distance_indexes) # Initialize performance variables P = np.zeros(Ydata.shape) for fold_id, dz_fold in enumerate(dz_folds): X_tr = np.delete(Xdata, dz_fold, axis=0) Y_tr = np.delete(Ydata, dz_fold, axis=0) learner = KNeighborsRegressor(n_neighbors=k_neighbors) learner.fit(X_tr, Y_tr) preds = learner.predict(Xdata[dz_fold]) if preds.ndim == 0: P[folds[fold_id]] = preds else: P[folds[fold_id]] = preds[0:len(folds[fold_id])] if visualization: # Check for visualization testcoords = coordinates[folds[fold_id],:] dzcoords = coordinates[dz_fold, :] visualize_skcv(coordinates, testcoords, dzcoords, dzradius) perf = cindex(Ydata, P) performanceTable[rind,0] = dzradius performanceTable[rind, 1] = perf plotRes_skcv(performanceTable, rind, number_of_folds, "K-nn") print("Analysis done.") return performanceTable
def train_rls(): #Select regparam with k-fold cross-validation, #where instances related to a single sentence form #together a fold X_train = read_sparse("train_2000_x.txt") Y_train = np.loadtxt("train_2000_y.txt") X_test = read_sparse("test_2000_x.txt", X_train.shape[1]) Y_test = np.loadtxt("test_2000_y.txt") #list of sentence ids qids_train = np.loadtxt("train_2000_qids.txt") qids_test = np.loadtxt("test_2000_qids.txt") regparams = [2.**i for i in range(-10, 10)] learner = LeaveQueryOutRankRLS(X_train, Y_train, qids_train, regparams = regparams, measure = cindex) lqo_perfs = learner.cv_performances P_test = learner.predict(X_test) print("leave-query-out performances " +str(lqo_perfs)) print("chosen regparam %f" %learner.regparam) partition = map_ids(qids_test) #compute the ranking accuracy separately for each test query test_perfs = [] for query in partition: #skip such queries, where all instances have the same #score, since in this case cindex is undefined if np.var(Y_test[query]) != 0: perf = cindex(Y_test[query], P_test[query]) test_perfs.append(perf) test_perf = np.mean(test_perfs) print("test cindex %f" %test_perf)
def main(): X1_train, X2_train, Y_train, X1_test, X2_test, Y_test = davis_data.settingC_split() m = X2_train.shape[0] learner = TwoStepRLS(X1 = X1_train, X2 = X2_train, Y = Y_train, regparam1=1.0, regparam2=1.0) log_regparams1 = range(-8, -4) log_regparams2 = range(20,25) #Create random split to 5 folds for the targets folds = random_folds(m, 5, seed=12345) for log_regparam1 in log_regparams1: for log_regparam2 in log_regparams2: learner.solve(2.**log_regparam1, 2.**log_regparam2) P = learner.predict(X1_test, X2_test) perf = cindex(Y_test, P) print("regparam 2**%d 2**%d, test set cindex %f" %(log_regparam1, log_regparam2, perf)) P = learner.x2_kfold_cv(folds) perf = cindex(Y_train, P) print("regparam 2**%d 2**%d, K-fold cindex %f" %(log_regparam1, log_regparam2, perf))
def main(): X1_train, X2_train, Y_train, X1_test, X2_test, Y_test = davis_data.settingB_split() learner = KronRLS(X1 = X1_train, X2 = X2_train, Y = Y_train) log_regparams = range(15, 35) for log_regparam in log_regparams: learner.solve(2.**log_regparam) P = learner.predict(X1_test, X2_test) perf = cindex(Y_test, P) print("regparam 2**%d, cindex %f" %(log_regparam, perf))
def main(): X1_train, X2_train, Y_train, X1_test, X2_test, Y_test = davis_data.setting2_split() learner = KronRLS(X1 = X1_train, X2 = X2_train, Y = Y_train) log_regparams = range(15, 35) for log_regparam in log_regparams: learner.solve(2.**log_regparam) P = learner.predict(X1_test, X2_test) perf = cindex(Y_test, P) print("regparam 2**%d, cindex %f" %(log_regparam, perf))
def train_rls(): #Trains RankRLS with automatically selected regularization parameter X_train, Y_train, X_test, Y_test = load_housing() regparams = [2.**i for i in range(-10, 10)] learner = LeavePairOutRankRLS(X_train, Y_train, regparams = regparams) loo_errors = learner.cv_performances P_test = learner.predict(X_test) print("leave-pair-out performances " +str(loo_errors)) print("chosen regparam %f" %learner.regparam) print("test set cindex %f" %cindex(Y_test, P_test))
def main(): X1, X2, Y = davis_data.load_davis() Y = Y.ravel(order='F') learner = KronRLS(X1 = X1, X2 = X2, Y = Y) log_regparams = range(15, 35) for log_regparam in log_regparams: learner.solve(2.**log_regparam) P = learner.in_sample_loo() perf = cindex(Y, P) print("regparam 2**%d, cindex %f" %(log_regparam, perf))
def train_rls(): #Trains RLS with automatically selected regularization parameter X_train, Y_train, X_test, Y_test = load_housing() regparams = [2.**i for i in range(-15, 16)] learner = LeaveOneOutRLS(X_train, Y_train, regparams = regparams, measure=cindex) loo_errors = learner.cv_performances P_test = learner.predict(X_test) print("leave-one-out cindex " +str(loo_errors)) print("chosen regparam %f" %learner.regparam) print("test cindex %f" %cindex(Y_test, P_test))
def main(): X1_train, X2_train, Y_train, X1_test, X2_test, Y_test = davis_data.settingD_split() n = X1_train.shape[0] m = X2_train.shape[0] learner = TwoStepRLS(X1 = X1_train, X2 = X2_train, Y = Y_train, regparam1=1.0, regparam2=1.0) log_regparams1 = range(-8, -4) log_regparams2 = range(20,25) #Create random split to 5 folds for both drugs and targets drug_folds = random_folds(n, 5, seed=123) target_folds = random_folds(m, 5, seed=456) for log_regparam1 in log_regparams1: for log_regparam2 in log_regparams2: learner.solve(2.**log_regparam1, 2.**log_regparam2) P = learner.predict(X1_test, X2_test) perf = cindex(Y_test, P) print("regparam 2**%d 2**%d, test set cindex %f" %(log_regparam1, log_regparam2, perf)) P = learner.out_of_sample_kfold_cv(drug_folds, target_folds) perf = cindex(Y_train, P) print("regparam 2**%d 2**%d, out-of-sample loo cindex %f" %(log_regparam1, log_regparam2, perf))
def main(): X1, X2, Y = davis_data.load_davis() Y = Y.ravel(order='F') learner = TwoStepRLS(X1 = X1, X2 = X2, Y = Y, regparam1=1.0, regparam2=1.0) log_regparams1 = range(-8, -4) log_regparams2 = range(20,25) for log_regparam1 in log_regparams1: for log_regparam2 in log_regparams2: learner.solve(2.**log_regparam1, 2.**log_regparam2) P = learner.in_sample_loo() perf = cindex(Y, P) print("regparam 2**%d 2**%d, cindex %f" %(log_regparam1, log_regparam2, perf))
def train_rls(): #Trains RankRLS with automatically selected regularization parameter X_train, Y_train, X_test, Y_test = load_housing() #generate fold partition, arguments: train_size, k, random_seed folds = random_folds(len(Y_train), 5, 10) regparams = [2.**i for i in range(-10, 10)] learner = KfoldRankRLS(X_train, Y_train, folds = folds, regparams = regparams, measure=cindex) kfold_perfs = learner.cv_performances P_test = learner.predict(X_test) print("kfold performances " +str(kfold_perfs)) print("chosen regparam %f" %learner.regparam) print("test set cindex %f" %cindex(Y_test, P_test))
def main(): X = np.loadtxt("drug-drug_similarities_2D.txt") Y = np.loadtxt("drug-drug_similarities_ECFP4.txt") Y = Y.ravel(order='F') K = np.dot(X, X) learner = TwoStepRLS(K1 = K, K2 = K, Y = Y, regparam1=1.0, regparam2=1.0) log_regparams = range(-10, 0) for log_regparam in log_regparams: learner.solve(2.**log_regparam, 2.**log_regparam) P = learner.out_of_sample_loo_symmetric() perf = cindex(Y, P) print("regparam 2**%d, cindex %f" %(log_regparam, perf))
def main(): X = np.loadtxt("drug-drug_similarities_2D.txt") Y = np.loadtxt("drug-drug_similarities_ECFP4.txt") Y = Y.ravel(order='F') K = np.dot(X, X) learner = TwoStepRLS(K1=K, K2=K, Y=Y, regparam1=1.0, regparam2=1.0) log_regparams = range(-10, 0) for log_regparam in log_regparams: learner.solve(2.**log_regparam, 2.**log_regparam) P = learner.out_of_sample_loo_symmetric() perf = cindex(Y, P) print("regparam 2**%d, cindex %f" % (log_regparam, perf))
def main(): X1_train, X2_train, Y_train, X1_test, X2_test, Y_test = davis_data.settingB_split( ) learner = TwoStepRLS(X1=X1_train, X2=X2_train, Y=Y_train, regparam1=1.0, regparam2=1.0) log_regparams1 = range(-8, -4) log_regparams2 = range(20, 25) for log_regparam1 in log_regparams1: for log_regparam2 in log_regparams2: learner.solve(2.**log_regparam1, 2.**log_regparam2) P = learner.predict(X1_test, X2_test) perf = cindex(Y_test, P) print("regparam 2**%d 2**%d, test set cindex %f" % (log_regparam1, log_regparam2, perf)) P = learner.leave_x1_out() perf = cindex(Y_train, P) print("regparam 2**%d 2**%d, leave-row-out cindex %f" % (log_regparam1, log_regparam2, perf))
def main(): X1, X2, Y = davis_data.load_davis() Y = Y.ravel(order='F') learner = TwoStepRLS(X1=X1, X2=X2, Y=Y, regparam1=1.0, regparam2=1.0) log_regparams1 = range(-8, -4) log_regparams2 = range(20, 25) for log_regparam1 in log_regparams1: for log_regparam2 in log_regparams2: learner.solve(2.**log_regparam1, 2.**log_regparam2) P = learner.in_sample_loo() perf = cindex(Y, P) print("regparam 2**%d 2**%d, cindex %f" % (log_regparam1, log_regparam2, perf))
def train_rls(): #Trains RLS with automatically selected regularization parameter X_train, Y_train, X_test, Y_test = load_housing() regparams = [2.**i for i in range(-15, 16)] learner = LeaveOneOutRLS(X_train, Y_train, regparams=regparams, measure=cindex) loo_errors = learner.cv_performances P_test = learner.predict(X_test) print("leave-one-out cindex " + str(loo_errors)) print("chosen regparam %f" % learner.regparam) print("test cindex %f" % cindex(Y_test, P_test))
def main(): X1_train, X2_train, Y_train, X1_test, X2_test, Y_test = davis_data.settingD_split() kernel1 = GaussianKernel(X1_train, gamma=0.01) kernel2 = GaussianKernel(X2_train, gamma=10**-9) K1_train = kernel1.getKM(X1_train) K1_test = kernel1.getKM(X1_test) K2_train = kernel2.getKM(X2_train) K2_test = kernel2.getKM(X2_test) learner = KronRLS(K1 = K1_train, K2 = K2_train, Y = Y_train) log_regparams = range(-15, 15) for log_regparam in log_regparams: learner.solve(2.**log_regparam) P = learner.predict(K1_test, K2_test) perf = cindex(Y_test, P) print("regparam 2**%d, cindex %f" %(log_regparam, perf))
def main(): X1_train, X2_train, Y_train, X1_test, X2_test, Y_test = davis_data.settingC_split( ) m = X2_train.shape[0] learner = TwoStepRLS(X1=X1_train, X2=X2_train, Y=Y_train, regparam1=1.0, regparam2=1.0) log_regparams1 = range(-8, -4) log_regparams2 = range(20, 25) #Create random split to 5 folds for the targets folds = random_folds(m, 5, seed=12345) for log_regparam1 in log_regparams1: for log_regparam2 in log_regparams2: learner.solve(2.**log_regparam1, 2.**log_regparam2) P = learner.predict(X1_test, X2_test) perf = cindex(Y_test, P) print("regparam 2**%d 2**%d, test set cindex %f" % (log_regparam1, log_regparam2, perf)) P = learner.x2_kfold_cv(folds) perf = cindex(Y_train, P) print("regparam 2**%d 2**%d, K-fold cindex %f" % (log_regparam1, log_regparam2, perf))
def train_rls(): #Trains RankRLS with automatically selected regularization parameter X_train, Y_train, X_test, Y_test = load_housing() #generate fold partition, arguments: train_size, k, random_seed folds = random_folds(len(Y_train), 5, 10) regparams = [2.**i for i in range(-10, 10)] learner = KfoldRankRLS(X_train, Y_train, folds=folds, regparams=regparams, measure=cindex) kfold_perfs = learner.cv_performances P_test = learner.predict(X_test) print("kfold performances " + str(kfold_perfs)) print("chosen regparam %f" % learner.regparam) print("test set cindex %f" % cindex(Y_test, P_test))
def main(): X1, X2, Y = davis_data.load_davis() n = X1.shape[0] m = X2.shape[0] Y = Y.ravel(order='F') learner = TwoStepRLS(X1 = X1, X2 = X2, Y = Y, regparam1=1.0, regparam2=1.0) log_regparams1 = range(-8, -4) log_regparams2 = range(20,25) #Create random split to 5 folds for the drug-target pairs folds = random_folds(n*m, 5, seed=12345) #Map the indices back to (drug_indices, target_indices) folds = [np.unravel_index(fold, (n,m)) for fold in folds] for log_regparam1 in log_regparams1: for log_regparam2 in log_regparams2: learner.solve(2.**log_regparam1, 2.**log_regparam2) P = learner.in_sample_kfoldcv(folds) perf = cindex(Y, P) print("regparam 2**%d 2**%d, cindex %f" %(log_regparam1, log_regparam2, perf))
def main(): X1, X2, Y = davis_data.load_davis() n = X1.shape[0] m = X2.shape[0] Y = Y.ravel(order='F') learner = TwoStepRLS(X1=X1, X2=X2, Y=Y, regparam1=1.0, regparam2=1.0) log_regparams1 = range(-8, -4) log_regparams2 = range(20, 25) #Create random split to 5 folds for the drug-target pairs folds = random_folds(n * m, 5, seed=12345) #Map the indices back to (drug_indices, target_indices) folds = [np.unravel_index(fold, (n, m)) for fold in folds] for log_regparam1 in log_regparams1: for log_regparam2 in log_regparams2: learner.solve(2.**log_regparam1, 2.**log_regparam2) P = learner.in_sample_kfoldcv(folds) perf = cindex(Y, P) print("regparam 2**%d 2**%d, cindex %f" % (log_regparam1, log_regparam2, perf))
def train_rls(): #Trains RLS with default parameters (regparam=1.0, kernel='LinearKernel') X_train, Y_train, X_test, Y_test = load_housing() pairs_start = [] pairs_end = [] #Sample 1000 pairwise preferences from the data trange = range(len(Y_train)) while len(pairs_start) < 1000: ind0 = random.choice(trange) ind1 = random.choice(trange) if Y_train[ind0] > Y_train[ind1]: pairs_start.append(ind0) pairs_end.append(ind1) elif Y_train[ind0] < Y_train[ind1]: pairs_start.append(ind1) pairs_end.append(ind0) learner = PPRankRLS(X_train, pairs_start, pairs_end) #Test set predictions P_test = learner.predict(X_test) print("test cindex %f" % cindex(Y_test, P_test))
def train_rls(): #Selects both the gamma parameter for Gaussian kernel, and regparam with kfoldcv X_train, Y_train, X_test, Y_test = load_housing() folds = random_folds(len(Y_train), 5, 10) regparams = [2.**i for i in range(-15, 16)] gammas = regparams best_regparam = None best_gamma = None best_perf = 0. best_learner = None for gamma in gammas: #New RLS is initialized for each kernel parameter learner = KfoldRankRLS(X_train, Y_train, kernel = "GaussianKernel", folds = folds, gamma = gamma, regparams = regparams, measure=cindex) perf = np.max(learner.cv_performances) if perf > best_perf: best_perf = perf best_regparam = learner.regparam best_gamma = gamma best_learner = learner P_test = best_learner.predict(X_test) print("best parameters gamma %f regparam %f" %(best_gamma, best_regparam)) print("best kfoldcv cindex %f" %best_perf) print("test cindex %f" %cindex(Y_test, P_test))
import numpy as np from rlscore.learner.global_rankrls import KfoldRankRLS from rlscore.utilities.reader import read_folds from rlscore.utilities.reader import read_sparse from rlscore.measure import cindex train_labels = np.loadtxt("./legacy_tests/data/rank_train.labels") test_labels = np.loadtxt("./legacy_tests/data/rank_test.labels") folds = read_folds("./legacy_tests/data/folds.txt") train_features = read_sparse("./legacy_tests/data/rank_train.features") test_features = read_sparse("./legacy_tests/data/rank_test.features") kwargs = {} kwargs['measure']=cindex kwargs['regparams'] = [2**i for i in range(-10,11)] kwargs["Y"] = train_labels kwargs["X"] = train_features kwargs["folds"] = folds learner = KfoldRankRLS(**kwargs) grid = kwargs['regparams'] perfs = learner.cv_performances for i in range(len(grid)): print "parameter %f cv_performance %f" %(grid[i], perfs[i]) P = learner.predict(test_features) test_perf = cindex(test_labels, P) print "test set performance: %f" %test_perf
import numpy as np from rlscore.learner.global_rankrls import LeavePairOutRankRLS from rlscore.reader import read_sparse from rlscore.reader import read_sparse from rlscore.measure import cindex train_labels = np.loadtxt("./examples/data/rank_train.labels") test_labels = np.loadtxt("./examples/data/rank_test.labels") train_features = read_sparse("./examples/data/rank_train.features") test_features = read_sparse("./examples/data/rank_test.features") kwargs = {} kwargs['measure'] = cindex kwargs['regparams'] = [2**i for i in range(-10, 11)] kwargs["Y"] = train_labels kwargs["X"] = train_features learner = LeavePairOutRankRLS(**kwargs) grid = kwargs['regparams'] perfs = learner.cv_performances for i in range(len(grid)): print "parameter %f cv_performance %f" % (grid[i], perfs[i]) P = learner.predict(test_features) test_perf = cindex(test_labels, P) print "test set performance: %f" % test_perf
def callback(self, learner): if self.iter % 10 == 0: P = learner.predict(self.X1, self.X2, self.row_inds, self.col_inds) perf = cindex(self.Y, P) print("iteration %d cindex %f" % (self.iter, perf)) self.iter += 1
import numpy as np from rlscore.measure import cindex #Concordance index is a pairwise ranking measure #Equivalent to AUC for bi-partite ranking problems Y = [-1, -1, -1, 1, 1] P = [-5, 2, -1, 1, 3.2] cind1 = cindex(Y, P) print("My cindex is %f" %cind1) #Can handle also real-valued Y-values Y2 = [-2.2, -1.3, -0.2, 0.5, 1.1] #Almost correct ranking, but last two inverted P2 = [-2.7, -1.1, 0.3, 0.6, 0.5] cind2 = cindex(Y2, P2) print("My cindex is %f" %cind2) #Most performance measures take average over the columns for multi-target problems: Y_big = np.vstack((Y, Y2)).T P_big = np.vstack((P, P2)).T print(Y_big) print(P_big) print("(cind1+cind2)/2 %f" %((cind1+cind2)/2.))
pred=clf.predict(K_test) #print(pred.shape) #print(Y_train.shape) mse=np.sqrt(sqerror(np.ravel(Y_test,'F'),np.ravel(pred,'F'))) MSE[i,j]=mse MSE_m = np.mean(MSE, axis=0) opt_regpram = 10. ** (reg_par[np.argmin(MSE_m)]) clf = KernelRidge(alpha=opt_regpram) K_train_o=K[train_out][:,train_out] K_test_o = K[test_out][:, train_out] Y_train_o = Y[train_out] Y_test_o = Y[test_out] clf.fit(K_train_o, Y_train_o) pred_o = clf.predict(K_test_o) mse_o[k] = np.sqrt(sqerror(np.ravel(Y_test_o, 'F'), np.ravel(pred_o, 'F'))) cind_o[k] = cindex(np.ravel(Y_test_o, 'F'), np.ravel(pred_o, 'F')) pear_o[k] = pearsonr(np.ravel(Y_test_o, 'F'), np.ravel(pred_o, 'F'))[0] spear_o[k] = spearmanr(np.ravel(Y_test_o, 'F'), np.ravel(pred_o, 'F'))[0] #plt.scatter(np.ravel(Y_test_o, 'F'), np.ravel(pred_o, 'F')) #plt.show() print("op_reg:%f, fold RMSE:%f, fold Cindex:%f, fold Pearson:%f, fold Spearman:%f" % (opt_regpram,mse_o[k], cind_o[k], pear_o[k], spear_o[k])) k+=1 #MSE_diff_pca_comp[s]=np.mean(mse_o) #s+=1 print("overall RMSE:%f, overall Cindex:%f, overall Pearson:%f, overall Spearman:%f" % (np.mean(mse_o), np.mean(cind_o), np.mean(pear_o), np.mean(spear_o)))
train_labels = np.loadtxt("./examples/data/rank_train.labels") test_labels = np.loadtxt("./examples/data/rank_test.labels") train_qids = read_qids("./examples/data/rank_train.qids") test_features = read_sparse("./examples/data/rank_test.features") train_features = read_sparse("./examples/data/rank_train.features") test_qids = read_qids("./examples/data/rank_test.qids") kwargs = {} kwargs['measure'] = cindex kwargs['regparams'] = [2**i for i in range(-10, 11)] kwargs["Y"] = train_labels kwargs["X"] = train_features kwargs["qids"] = train_qids learner = LeaveQueryOutRankRLS(**kwargs) grid = kwargs['regparams'] perfs = learner.cv_performances for i in range(len(grid)): print "parameter %f cv_performance %f" % (grid[i], perfs[i]) P = learner.predict(test_features) from rlscore.measure.measure_utilities import UndefinedPerformance from rlscore.measure.measure_utilities import qids_to_splits test_qids = qids_to_splits(test_qids) perfs = [] for query in test_qids: try: perf = cindex(test_labels[query], P[query]) perfs.append(perf) except UndefinedPerformance: pass test_perf = np.mean(perfs) print "test set performance: %f" % test_perf
import numpy as np from rlscore.learner.query_rankrls import QueryRankRLS from rlscore.utilities.reader import read_qids from rlscore.utilities.reader import read_sparse from rlscore.measure import cindex train_labels = np.loadtxt("./legacy_tests/data/rank_train.labels") test_labels = np.loadtxt("./legacy_tests/data/rank_test.labels") train_qids = read_qids("./legacy_tests/data/rank_train.qids") test_features = read_sparse("./legacy_tests/data/rank_test.features") train_features = read_sparse("./legacy_tests/data/rank_train.features") test_qids = read_qids("./legacy_tests/data/rank_test.qids") kwargs = {} kwargs["Y"] = train_labels kwargs["X"] = train_features kwargs["qids"] = train_qids kwargs["regparam"] = 1 learner = QueryRankRLS(**kwargs) P = learner.predict(test_features) from rlscore.measure.measure_utilities import UndefinedPerformance from rlscore.measure.measure_utilities import qids_to_splits test_qids = qids_to_splits(test_qids) perfs = [] for query in test_qids: try: perf = cindex(test_labels[query], P[query]) perfs.append(perf) except UndefinedPerformance: pass test_perf = np.mean(perfs) print("test set performance: %f" %test_perf)
def callback(self, learner): if self.iter%10 == 0: P = learner.predict(self.X1, self.X2, self.row_inds, self.col_inds) perf = cindex(self.Y, P) print("iteration %d cindex %f" %(self.iter, perf)) self.iter += 1
import numpy as np from rlscore.measure import cindex #Concordance index is a pairwise ranking measure #Equivalent to AUC for bi-partite ranking problems Y = [-1, -1, -1, 1, 1] P = [-5, 2, -1, 1, 3.2] cind1 = cindex(Y, P) print("My cindex is %f" % cind1) #Can handle also real-valued Y-values Y2 = [-2.2, -1.3, -0.2, 0.5, 1.1] #Almost correct ranking, but last two inverted P2 = [-2.7, -1.1, 0.3, 0.6, 0.5] cind2 = cindex(Y2, P2) print("My cindex is %f" % cind2) #Most performance measures take average over the columns for multi-target problems: Y_big = np.vstack((Y, Y2)).T P_big = np.vstack((P, P2)).T print(Y_big) print(P_big) print("(cind1+cind2)/2 %f" % ((cind1 + cind2) / 2.))