def bayes(self): """ bayes for serveral continuous variables """ results={} self.current_call=0 def target(base_lr,l1_reg,l2_reg): config=self.config train_loader=self.train_loader val_loader=self.val_loader psp_model=self.psp_model config.model.use_reg=True config.model.learning_rate=base_lr config.model.l1_reg=l1_reg config.model.l2_reg=l2_reg net = psp_model(config) best_val_miou=keras_fit(net, train_loader, val_loader) cols=['base_lr','l1_reg','l2_reg','val_miou'] for col,value in zip(cols,(base_lr,l1_reg,l2_reg,best_val_miou)): if col in results.keys(): results[col].append(value) else: results[col]=[value] tasks=pd.DataFrame(results,columns=cols) tasks.to_csv(path_or_buf='output/bayes_%s_%s.tab'%(config.args.note,self.time_str),sep='\t') self.current_call+=1 print('%s/%s calls,score=%0.3f'%(self.current_call,self.n_calls,best_val_miou)) return best_val_miou bo=bayesopt(target,{'base_lr':[1e-4,0.01],'l1_reg':[1e-7,1e-3],'l2_reg':[1e-7,1e-3]}) bo.maximize(init_points=5,n_iter=self.n_calls,kappa=2) best=bo.res['max'] print('*'*50) print(best)
def get_cv_accuracy(dpath, site, dtype, description, RESULTPATH, k_tune_params={}, USE_NCA=False, graphParams={}, nca_train_params={}, USE_PCA=False, USE_BAGGING=False, bagging_params={}, bo_lims={}, bo_expl={}, bo_params={}): """ Get KNN cross validation accuracy with or without PCA and NCA """ # Get a dict of function params and save params_all = locals() with open(RESULTPATH + description + \ 'params_all.pkl','wb') as f: _pickle.dump(params_all, f) # result directory structure RESULTPATH_NCA = RESULTPATH + "nca/" RESULTPATH_KNN = RESULTPATH + "knn/" LOADPATH = None os.system('mkdir ' + RESULTPATH_NCA) os.system('mkdir ' + RESULTPATH_KNN) # Instantiate a KNN survival model. knnmodel = knn.SurvivalKNN(RESULTPATH_KNN, description=description) # instantiate NCA model if USE_NCA: ncamodel = nca.SurvivalNCA(RESULTPATH_NCA, description=description, LOADPATH=LOADPATH) #%% ======================================================================= # Define relevant methods #========================================================================== def _get_numpc_optim(feats_train, feats_valid, T_train, C_train, T_valid, C_valid): """ Given PCA-transformed traing and validation sets, find the optimal no of principal components to maximize the Ci """ print("\n\tnumpc\tCi") print("\t--------------") cis = [] numpc_max = np.min([feats_train.shape[1], 200]) for numpc in range(4, numpc_max, 4): feats_train_new = feats_train[:, 0:numpc] feats_valid_new = feats_valid[:, 0:numpc] # get neighbor indices neighbor_idxs = knnmodel._get_neighbor_idxs( feats_valid_new, feats_train_new, norm=k_tune_params['norm']) # Predict validation set _, Ci = knnmodel.predict(neighbor_idxs, Survival_train=T_train, Censored_train=C_train, Survival_test=T_valid, Censored_test=C_valid, K=k_tune_params['K_init'], Method=k_tune_params['Method']) cis.append([numpc, Ci]) print("\t{}\t{}".format(numpc, Ci)) # now get optimal no of PC's cis = np.array(cis) numpc_optim = cis[cis[:, 1].argmax(), 0] print("\nnumpc_optim = {}".format(round(numpc_optim, 3))) return int(numpc_optim) #%% ======================================================================= # Begin main body #========================================================================== print("\n--------------------------------------") print("Getting cv accuracy: {}, {}".format(site, dtype)) print("--------------------------------------\n") print("Loading data.") Data = loadmat(dpath) Features = Data[dtype + '_X'].copy() N = Features.shape[0] Survival = Data['Survival'].reshape([ N, ]) Censored = Data['Censored'].reshape([ N, ]) fnames = Data[dtype + '_Symbs'] Data = None if USE_NCA: # build computational graph for NCA model graphParams['dim_input'] = Features.shape[1] ncamodel.build_computational_graph(COMPUT_GRAPH_PARAMS=graphParams) with open(dpath.split('.mat')[0] + '_splitIdxs.pkl', 'rb') as f: splitIdxs = _pickle.load(f) # Go through folds, optimize and get accuracy #========================================================================== # initialize n_folds = len(splitIdxs['train']) CIs = np.zeros([n_folds]) # # itirate through folds # #fold = 0 for fold in range(n_folds): print("\nfold {} of {}\n".format(fold, n_folds - 1)) # Isolate various sets # Note, this is done for each loop # since they will be modified locally in each outer loop X = Features.copy() x_train = X[splitIdxs['train'][fold], :] x_valid = X[splitIdxs['valid'][fold], :] x_test = X[splitIdxs['test'][fold], :] X = None #%% =================================================================== # Unsupervised dimensionality reduction - PCA #====================================================================== if USE_PCA: print("\nFinding optimal number of PC's.") # Find optimal number of PC's pca = PCA() x_train = pca.fit_transform(x_train) x_valid = pca.transform(x_valid) x_test = pca.transform(x_test) # keep optimal number of PC's numpc_optim = _get_numpc_optim( feats_train=x_train, feats_valid=x_valid, T_train=Survival[splitIdxs['train'][fold]], C_train=Censored[splitIdxs['train'][fold]], T_valid=Survival[splitIdxs['valid'][fold]], C_valid=Censored[splitIdxs['valid'][fold]]) x_train = x_train[:, 0:numpc_optim] x_valid = x_valid[:, 0:numpc_optim] x_test = x_test[:, 0:numpc_optim] #%% =================================================================== # Supervized dimensionality reduction - NCA #====================================================================== if USE_NCA: #%% --------------------------------------------------------------- # Bayesian optimization of NCA hyperparameters #------------------------------------------------------------------ print("\nBayesian Optimization of NCA hyperparameters.\n") nca_train_params['MONITOR'] = True #False def run_nca(ALPHA, LAMBDA, SIGMA, DROPOUT_FRACTION): """ Wrapper to run NCA and fetch validation accuracy using specified tunable hyperparameters """ graph_hyperparams = { 'ALPHA': ALPHA, 'LAMBDA': LAMBDA, 'SIGMA': SIGMA, 'DROPOUT_FRACTION': DROPOUT_FRACTION, } W = ncamodel.train( features=x_train, survival=Survival[splitIdxs['train'][fold]], censored=Censored[splitIdxs['train'][fold]], features_valid=x_valid, survival_valid=Survival[splitIdxs['valid'][fold]], censored_valid=Censored[splitIdxs['valid'][fold]], graph_hyperparams=graph_hyperparams, **nca_train_params) ncamodel.reset_TrainHistory() # transform x_train_transformed = np.dot(x_train, W) x_valid_transformed = np.dot(x_valid, W) # get neighbor indices neighbor_idxs = knnmodel._get_neighbor_idxs( x_valid_transformed, x_train_transformed, norm=nca_train_params['norm']) # Predict validation set _, Ci = knnmodel.predict( neighbor_idxs, Survival_train=Survival[splitIdxs['train'][fold]], Censored_train=Censored[splitIdxs['train'][fold]], Survival_test=Survival[splitIdxs['valid'][fold]], Censored_test=Censored[splitIdxs['valid'][fold]], K=nca_train_params['K'], Method=nca_train_params['Method']) return Ci # # Run core bayesopt model # bo = bayesopt(run_nca, bo_lims) bo.explore(bo_expl) bo.maximize(init_points=bo_params['init_points'], n_iter=bo_params['n_itir']) # fetch optimal params Optim_params = bo.res['max']['max_params'] ALPHA_OPTIM = Optim_params['ALPHA'] LAMBDA_OPTIM = Optim_params['LAMBDA'] SIGMA_OPTIM = Optim_params['SIGMA'] DROPOUT_FRACTION_OPTIM = Optim_params['DROPOUT_FRACTION'] print("\tOptimal NCA params:") print("\t--------------------") print("\tALPHA\tLAMBDA\tSIGMA\tDROPOUT_FRACTION") print("\t{}\t{}\t{}\t".format(\ ALPHA_OPTIM, LAMBDA_OPTIM, SIGMA_OPTIM, DROPOUT_FRACTION_OPTIM)) #%%---------------------------------------------------------------- # Learn final NCA matrix #------------------------------------------------------------------ print("\nLearning final NCA matrix\n") nca_train_params['MONITOR'] = True graph_hyperparams = { 'ALPHA': ALPHA_OPTIM, 'LAMBDA': LAMBDA_OPTIM, 'SIGMA': SIGMA_OPTIM, 'DROPOUT_FRACTION': DROPOUT_FRACTION_OPTIM, } # Learn NCA matrix W = ncamodel.train( features=x_train, survival=Survival[splitIdxs['train'][fold]], censored=Censored[splitIdxs['train'][fold]], features_valid=x_valid, survival_valid=Survival[splitIdxs['valid'][fold]], censored_valid=Censored[splitIdxs['valid'][fold]], graph_hyperparams=graph_hyperparams, **nca_train_params) # Ranks features if not USE_PCA: ncamodel.rankFeats(W, fnames, rank_type="weights", PLOT=nca_train_params['PLOT']) # Transform features according to learned nca model x_train = np.dot(x_train, W) x_valid = np.dot(x_valid, W) x_test = np.dot(x_test, W) #%% =================================================================== # Tune K #====================================================================== # Get neighbor indices neighbor_idxs = knnmodel._get_neighbor_idxs(\ x_valid, x_train, norm = k_tune_params['norm']) print("\tK \t Ci") CIs_k = np.zeros([len(k_tune_params['Ks'])]) for kidx, K in enumerate(k_tune_params['Ks']): # Predict validation set _, Ci = knnmodel.predict(\ neighbor_idxs=neighbor_idxs, Survival_train=Survival[splitIdxs['train'][fold]], Censored_train=Censored[splitIdxs['train'][fold]], Survival_test=Survival[splitIdxs['valid'][fold]], Censored_test=Censored[splitIdxs['valid'][fold]], K=K, Method=k_tune_params['Method']) CIs_k[kidx] = Ci print("\t{} \t {}".format(K, round(Ci, 3))) K_optim = k_tune_params['Ks'][np.argmax(CIs_k)] print("\nK_optim = {}".format(K_optim)) #%% =================================================================== # Get final accuracy #====================================================================== print("\nGetting accuracy.") # combined training and validation sets combinedIdxs = splitIdxs['train'][fold] + splitIdxs['valid'][fold] if USE_BAGGING: _, ci = knnmodel.predict_with_bagging(\ X_test=x_test, X_train=np.concatenate((x_train, x_valid), axis=0), Survival_train=Survival[combinedIdxs], Censored_train=Censored[combinedIdxs], Survival_test=Survival[splitIdxs['test'][fold]], Censored_test=Censored[splitIdxs['test'][fold]], **bagging_params, K=K_optim, Method=k_tune_params['Method'], norm=k_tune_params['norm']) else: neighbor_idxs = knnmodel._get_neighbor_idxs(\ x_test, np.concatenate((x_train, x_valid), axis=0), norm = k_tune_params['norm']) _, ci = knnmodel.predict(\ neighbor_idxs=neighbor_idxs, Survival_train=Survival[combinedIdxs], Censored_train=Censored[combinedIdxs], Survival_test=Survival[splitIdxs['test'][fold]], Censored_test=Censored[splitIdxs['test'][fold]], K=K_optim, Method=k_tune_params['Method']) # record result CIs[fold] = ci print("Ci = {}".format(round(ci, 3))) #%% print("\nAccuracy") print("------------------------") print("25th percentile = {}".format(np.percentile(CIs, 25))) print("50th percentile = {}".format(np.percentile(CIs, 50))) print("75th percentile = {}".format(np.percentile(CIs, 75))) # Save results print("\nSaving final results.") with open(RESULTPATH + description + 'testing_Ci.txt', 'wb') as f: np.savetxt(f, CIs, fmt='%s', delimiter='\t')
'KEEP_PROB': (0.1, 1), } # initial points to explore bo_expl = { 'LEARN_RATE': [0.001, 0.001, 0.001], 'DEPTH': [3, 2, 1], 'MAXWIDTH': [500, 700, 1400], 'KEEP_PROB': [0.4, 0.4, 0.4], } INIT_POINTS = 5 N_ITIR = 15 KAPPA = 2.576 bo = bayesopt(Run_Training, bo_lims) bo.explore(bo_expl) bo.maximize(init_points=INIT_POINTS, n_iter=N_ITIR, kappa=KAPPA) # Fetching and modifying the other parameters to be used for the actual training Optim_params = bo.res['max']['max_params'] # print(dict(Optim_params)) featrisks, c_index = train(INPUT_ARG) else: featrisks, c_index = train(INPUT_ARG) # ************************************************************************** # C - index calculation
return s.data.cpu().numpy() def fn_skopt(params): x,y,z=params px=torch.tensor(x,device='cpu',requires_grad=True) py=torch.tensor(y,device='cpu',requires_grad=True) s=torch.tensor(0.5,device='cpu',requires_grad=True) for i in trange(10,leave=False): s=s+0.5*px+py sleep(0.1) return float(s.data.cpu().numpy()) if args.fn=='fn_skopt': res_gp=gp_minimize(func=fn_skopt, dimensions=[Real(-10,10,'uniform',name='x'), Real(-10,10,'uniform',name='y'), Integer(-10,10,name='z')], n_calls=15, random_state=0) print("Best score=%.4f" % res_gp.fun) print('best param',res_gp.x) best=res_gp.fun else: bo=bayesopt(fn_bayes,{'x':[-10,10],'y':[-10,10],'z':[-10,10]}) bo.maximize(init_points=5,n_iter=10,kappa=2) best=bo.res['max'] print(bo.res['all']) print(best)
def get_cv_accuracy(dpath, site, dtype, description, RESULTPATH, k_tune_params={}, knn_params={}, USE_NCA=False, graphParams={}, nca_train_params={}, elastic_net_params={}, USE_PCA=False, USE_BAGGING=False, bagging_params={}): """ Get KNN cross validation accuracy with or without PCA and NCA """ # Get a dict of function params and save params_all = locals() with open(RESULTPATH + description + \ 'params_all.pkl','wb') as f: _pickle.dump(params_all, f) #%% ======================================================================= # Define relevant methods #========================================================================== def _get_numpc_optim(feats_train, feats_valid, T_train, C_train, T_valid, C_valid): """ Given PCA-transformed traing and validation sets, find the optimal no of principal components to maximize the Ci """ print("\nFinding optimal number of PC's.") print("\n\tnumpc\tCi") print("\t--------------") cis = [] numpc_max = np.min([feats_train.shape[1], 200]) for numpc in range(4, numpc_max, 4): feats_train_new = feats_train[:, 0:numpc] feats_valid_new = feats_valid[:, 0:numpc] # get neighbor indices neighbor_idxs = knnmodel._get_neighbor_idxs(feats_valid_new, feats_train_new, norm = norm) # Predict validation set _, Ci = knnmodel.predict(neighbor_idxs, Survival_train=T_train, Censored_train=C_train, Survival_test =T_valid, Censored_test =C_valid, K=elastic_net_params['K'], Method = Method) cis.append([numpc, Ci]) print("\t{}\t{}".format(numpc, Ci)) # now get optimal no of PC's cis = np.array(cis) numpc_optim = cis[cis[:,1].argmax(), 0] print("\nnumpc_optim = {}".format(round(numpc_optim, 3))) return int(numpc_optim) #%% ======================================================================= # Begin main body #========================================================================== print("\n--------------------------------------") print("Getting cv accuracy: {}, {}".format(site, dtype)) print("--------------------------------------\n") print("Loading data.") #Data = loadmat(dpath) #Features = Data[dtype + '_X'] #N = Features.shape[0] with open(dpath.split('.mat')[0] + '_splitIdxs.pkl','rb') as f: splitIdxs = _pickle.load(f) # # result structure # RESULTPATH_NCA = RESULTPATH + "nca/" RESULTPATH_KNN = RESULTPATH + "knn/" LOADPATH = None os.system('mkdir ' + RESULTPATH_NCA) os.system('mkdir ' + RESULTPATH_KNN) # Go through outer folds, optimize and get accuracy #========================================================================== # Instantiate a KNN survival model. knnmodel = knn.SurvivalKNN(RESULTPATH_KNN, description=description) # # initialize # n_outer_folds = len(splitIdxs['idx_optim']) n_folds = len(splitIdxs['fold_cv_test'][0]) CIs = np.zeros([n_folds, n_outer_folds]) # # itirate through folds # #outer_fold = 0 for outer_fold in range(n_outer_folds): print("\nOuter fold {} of {}\n".format(outer_fold, n_outer_folds-1)) # Note, this is done for each outer loop # since they will be modified locally in each outer loop print("Loading data ...") Data = loadmat(dpath) X = Data[dtype + '_X'].copy() N = X.shape[0] Survival = Data['Survival'].reshape([N,]) Censored = Data['Censored'].reshape([N,]) Data = None # Isolate optimization set (and divide into training and validation) optimIdxs = splitIdxs['idx_optim'][outer_fold] if (USE_NCA or USE_PCA): stoppoint = int(elastic_net_params['VALID_RATIO'] * len(optimIdxs)) optimIdxs_valid = optimIdxs[0:stoppoint] optimIdxs_train = optimIdxs[stoppoint:] x_train = X[optimIdxs_train, :] x_valid = X[optimIdxs_valid, :] #%% =================================================================== # Unsupervised dimensionality reduction - PCA #====================================================================== if USE_PCA: # Find optimal number of PC's pca = PCA() x_train = pca.fit_transform(x_train) x_valid = pca.transform(x_valid) # keep optimal number of PC's numpc_optim = _get_numpc_optim(feats_train=x_train, feats_valid=x_valid, T_train=Survival[optimIdxs_train], C_train=Censored[optimIdxs_train], T_valid=Survival[optimIdxs_valid], C_valid=Censored[optimIdxs_valid]) x_train = x_train[:, 0:numpc_optim] x_valid = x_valid[:, 0:numpc_optim] # Now learn final PC matrix on full optimization set print("\nLearning final PCA matrix.") pca = PCA(n_components=numpc_optim) pca.fit(X[optimIdxs, :]) X = pca.transform(X) #%% =================================================================== # Supervized dimensionality reduction - NCA #====================================================================== if USE_NCA: #%% --------------------------------------------------------------- # Bayesian optimization of NCA hyperparameters #------------------------------------------------------------------ print("\nBayesian Optimization of NCA hyperparameters.\n") # instantiate NCA model ncamodel = nca.SurvivalNCA(RESULTPATH_NCA, description = description, LOADPATH = LOADPATH) def run_nca(ALPHA, LAMBDA, SIGMA): """ Wrapper to run NCA and fetch validation accuracy using specified tunable hyperparameters """ graphParams['ALPHA'] = ALPHA graphParams['LAMBDA'] = LAMBDA graphParams['SIGMA'] = SIGMA w = ncamodel.train(features = x_train, survival = Survival[optimIdxs_train], censored = Censored[optimIdxs_train], COMPUT_GRAPH_PARAMS = graphParams, **nca_train_params) W = np.zeros([len(w), len(w)]) np.fill_diagonal(W, w) ncamodel.reset_TrainHistory() # transform x_valid_transformed = np.dot(x_valid, W) x_train_transformed = np.dot(x_train, W) # get neighbor indices neighbor_idxs = knnmodel._get_neighbor_idxs(x_valid_transformed, x_train_transformed, norm = norm) # Predict validation set _, Ci = knnmodel.predict(neighbor_idxs, Survival_train=Survival[optimIdxs_train], Censored_train=Censored[optimIdxs_train], Survival_test = Survival[optimIdxs_valid], Censored_test = Censored[optimIdxs_valid], K = elastic_net_params['K'], Method = Method) return Ci # limits of interval to explore bo_lims = { 'ALPHA': (0, 1), 'LAMBDA': (0, 1), 'SIGMA': (0.2, 15) } # initial points to explore bo_expl = { 'ALPHA': [0, 0, 1, 0, 0], 'LAMBDA': [0, 1, 0, 0, 0], 'SIGMA': [1, 1, 1, 5, 0.5], } bo = bayesopt(run_nca, bo_lims) bo.explore(bo_expl) bo.maximize(init_points = 2, n_iter = 20) Optim_params = bo.res['max']['max_params'] ALPHA_OPTIM = Optim_params['ALPHA'] LAMBDA_OPTIM = Optim_params['LAMBDA'] SIGMA_OPTIM = Optim_params['SIGMA'] print("Optimal:\tALPHA\tLAMBDA\tSIGMA") print("\t{}\t{}\t{}".format(ALPHA_OPTIM, LAMBDA_OPTIM, SIGMA_OPTIM)) #%% # Learn final NCA matrix on optimization set # print("\nLearning final NCA matrix\n") graphParams['ALPHA'] = ALPHA_OPTIM graphParams['LAMBDA'] = LAMBDA_OPTIM graphParams['SIGMA'] = SIGMA_OPTIM # Learn NCA matrix w = ncamodel.train(features = X[optimIdxs, :], survival = Survival[optimIdxs], censored = Censored[optimIdxs], COMPUT_GRAPH_PARAMS = graphParams, **nca_train_params) W = np.zeros([len(w), len(w)]) np.fill_diagonal(W, w) # Transform features according to learned nca model X = np.dot(X, W) #%% =================================================================== # Now get accuracy #====================================================================== print("\nGetting accuracy.") ci, _ = knnmodel.cv_accuracy(X, Survival, Censored, splitIdxs, outer_fold=outer_fold, k_tune_params=k_tune_params, USE_BAGGING=USE_BAGGING, bagging_params=bagging_params) # record result CIs[:, outer_fold] = ci #%% print("\nAccuracy") print("------------------------") print("25th percentile = {}".format(np.percentile(CIs, 25))) print("50th percentile = {}".format(np.percentile(CIs, 50))) print("75th percentile = {}".format(np.percentile(CIs, 75))) # Save results print("\nSaving final results.") with open(RESULTPATH + description + 'testing_Ci.txt','wb') as f: np.savetxt(f, CIs, fmt='%s', delimiter='\t')