time_elapsed_filename = model_filename[:-3] + '_time.txt' if not os.path.isfile(model_filename): # print('*** Fitting with hyperparam:', hyperparam, # '-- cross val index:', cross_val_idx, flush=True) surv_model.fit( fold_X_train_std, (fold_y_train[:, 0], fold_y_train[:, 1]), batch_size, n_epochs, verbose=False) surv_model.compute_baseline_hazards() elapsed = time.time() - tic print('Time elapsed: %f second(s)' % elapsed) np.savetxt(time_elapsed_filename, np.array(elapsed).reshape(1, -1)) surv_model.save_net(model_filename) else: # print('*** Loading ***', flush=True) surv_model.load_net(model_filename) elapsed = float(np.loadtxt(time_elapsed_filename)) print('Time elapsed (from previous fitting): ' + '%f second(s)' % elapsed) surv_df = surv_model.predict_surv_df(fold_X_val_std) ev = EvalSurv(surv_df, fold_y_val[:, 0], fold_y_val[:, 1], censor_surv='km') sorted_fold_y_val = np.sort(np.unique(fold_y_val[:, 0])) time_grid = np.linspace(sorted_fold_y_val[0],
def pycox_deep(filename, Y_train, Y_test, opt, choice): # choice = {'lr_rate': l, 'batch': b, 'decay': 0, 'weighted_decay': wd, 'net': net, 'index': index} X_train, X_test = enc_using_trained_ae(filename, TARGET=opt, ALPHA=0.01, N_ITER=100, L1R=-9999) path = './models/analysis/' check = 0 savename = 'model_check_autoen_m5_test_batch+dropout+wd.csv' # r=root, d=directories, f = files for r, d, f in os.walk(path): for file in f: if savename in file: check = 1 # X_train = X_train.drop('UR_SG3', axis=1) # X_test = X_test.drop('UR_SG3', axis=1) x_train = X_train x_test = X_test x_train['SVDTEPC_G'] = Y_train['SVDTEPC_G'] x_train['PC_YN'] = Y_train['PC_YN'] x_test['SVDTEPC_G'] = Y_test['SVDTEPC_G'] x_test['PC_YN'] = Y_test['PC_YN'] ## DataFrameMapper ## cols_standardize = list(X_train.columns) cols_standardize.remove('SVDTEPC_G') cols_standardize.remove('PC_YN') standardize = [(col, None) for col in cols_standardize] x_mapper = DataFrameMapper(standardize) _ = x_mapper.fit_transform(X_train).astype('float32') X_train = x_mapper.transform(X_train).astype('float32') X_test = x_mapper.transform(X_test).astype('float32') get_target = lambda df: (df['SVDTEPC_G'].values, df['PC_YN'].values) y_train = get_target(x_train) durations_test, events_test = get_target(x_test) in_features = X_train.shape[1] print(in_features) num_nodes = choice['nodes'] out_features = 1 batch_norm = True # False for batch_normalization dropout = 0.01 output_bias = False # net = choice['net'] net = tt.practical.MLPVanilla(in_features, num_nodes, out_features, batch_norm, dropout, output_bias=output_bias) print("training") model = CoxPH(net, tt.optim.Adam) # lrfinder = model.lr_finder(X_train, y_train, batch_size) # lr_best = lrfinder.get_best_lr() lr_best = 0.0001 model.optimizer.set_lr(choice['lr_rate']) weighted_decay = choice['weighted_decay'] verbose = True batch_size = choice['batch'] epochs = 100 if weighted_decay == 0: callbacks = [tt.callbacks.EarlyStopping(patience=epochs)] # model.fit(X_train, y_train, batch_size, epochs, callbacks, verbose=verbose) else: callbacks = [tt.callbacks.DecoupledWeightDecay(weight_decay=choice['decay'])] # model.fit(X_train, y_train, batch_size, epochs, callbacks, verbose) '''''' # dataloader = model.make_dataloader(tt.tuplefy(X_train, y_train),batch_size,True) datas = tt.tuplefy(X_train, y_train).to_tensor() print(datas) make_dataset = tt.data.DatasetTuple; DataLoader = tt.data.DataLoaderBatch dataset = make_dataset(*datas) dataloader = DataLoader(dataset, batch_size, False, sampler=StratifiedSampler(datas, batch_size)) # dataloader = DataLoader(dataset,batch_size, True) model.fit_dataloader(dataloader, epochs, callbacks, verbose) # model.fit(X_train, y_train, batch_size, epochs, callbacks, verbose) # model.partial_log_likelihood(*val).mean() print("predicting") baseline_hazards = model.compute_baseline_hazards(datas[0], datas[1]) baseline_hazards = df(baseline_hazards) surv = model.predict_surv_df(X_test) surv = 1 - surv ev = EvalSurv(surv, durations_test, events_test, censor_surv='km') print("scoring") c_index = ev.concordance_td() print("c-index(", opt, "): ", c_index) if int(c_index * 10) == 0: hazardname = 'pycox_model_hazard_m5_v2_' + opt + '_0' netname = 'pycox_model_net_m5_v2_' + opt + '_0' weightname = 'pycox_model_weight_m5_v2_' + opt + '_0' else: hazardname = 'pycox_model_hazard_m5_' + opt + '_' netname = 'pycox_model_net_m5_' + opt + '_' weightname = 'pycox_model_weight_m5_' + opt + '_' baseline_hazards.to_csv('./test/'+hazardname + str(int(c_index * 100)) + '_' + str(index) + '.csv', index=False) netname = netname + str(int(c_index * 100)) + '_' + str(index) + '.sav' weightname = weightname + str(int(c_index * 100)) + '_' + str(index) + '.sav' model.save_net('./test/' + netname) model.save_model_weights('./test/' + weightname) pred = df(surv) pred = pred.transpose() surv_final = [] pred_final = [] for i in range(len(pred)): pred_final.append(float(1-pred[Y_test['SVDTEPC_G'][i]][i])) surv_final.append(float(pred[Y_test['SVDTEPC_G'][i]][i])) Y_test_cox = CoxformY(Y_test) #print(surv_final) c_cox, concordant, discordant,_,_ = concordance_index_censored(Y_test_cox['PC_YN'], Y_test_cox['SVDTEPC_G'], surv_final) c_cox_pred = concordance_index_censored(Y_test_cox['PC_YN'], Y_test_cox['SVDTEPC_G'], pred_final)[0] print("c-index(", opt, ") - sksurv: ", round(c_cox, 4)) print("cox-concordant(", opt, ") - sksurv: ", concordant) print("cox-disconcordant(", opt, ") - sksurv: ", discordant) print("c-index_pred(", opt, ") - sksurv: ", round(c_cox_pred, 4)) fpr, tpr, _ = metrics.roc_curve(Y_test['PC_YN'], pred_final) auc = metrics.auc(fpr, tpr) print("auc(", opt, "): ", round(auc, 4)) if check == 1: model_check = pd.read_csv(path+savename) else: model_check = df(columns=['option', 'gender', 'c-td', 'c-index', 'auc']) line_append = {'option':str(choice), 'gender':opt, 'c-td':round(c_index,4), 'c-index':round(c_cox_pred,4), 'auc':round(auc,4)} model_check = model_check.append(line_append, ignore_index=True) model_check.to_csv(path+savename, index=False) del X_train del X_test return surv_final
def main(): parser = setup_parser() args = parser.parse_args() if args.which_gpu != 'none': os.environ["CUDA_VISIBLE_DEVICES"] = args.which_gpu # save setting if not os.path.exists(os.path.join(args.save_path, args.model_name)): os.mkdir(os.path.join(args.save_path, args.model_name)) # data reading seeting singnal_data_path = args.signal_dataset_path table_path = args.table_path time_col = 'SurvivalDays' event_col = 'Mortality' # dataset data_pathes, times, events = read_dataset(singnal_data_path, table_path, time_col, event_col, args.sample_ratio) data_pathes_train, data_pathes_test, times_train, times_test, events_train, events_test = train_test_split( data_pathes, times, events, test_size=0.3, random_state=369) data_pathes_train, data_pathes_val, times_train, times_val, events_train, events_val = train_test_split( data_pathes_train, times_train, events_train, test_size=0.2, random_state=369) labels_train = label_transfer(times_train, events_train) dataset_train = VsDatasetBatch(data_pathes_train, *labels_train) dl_train = tt.data.DataLoaderBatch(dataset_train, args.train_batch_size, shuffle=True) labels_val = label_transfer(times_val, events_val) dataset_val = VsDatasetBatch(data_pathes_val, *labels_val) dl_val = tt.data.DataLoaderBatch(dataset_val, args.train_batch_size, shuffle=True) labels_test = label_transfer(times_test, events_test) dataset_test_x = VsTestInput(data_pathes_test) dl_test_x = DataLoader(dataset_test_x, args.test_batch_size, shuffle=False) net = resnet18(args) model = CoxPH( net, tt.optim.Adam(lr=args.lr, betas=(0.9, 0.999), eps=1e-08, weight_decay=5e-4, amsgrad=False)) # callbacks = [tt.cb.EarlyStopping(patience=15)] callbacks = [ tt.cb.BestWeights(file_path=os.path.join( args.save_path, args.model_name, args.model_name + '_bestWeight'), rm_file=False) ] verbose = True model_log = model.fit_dataloader(dl_train, args.epochs, callbacks, verbose, val_dataloader=dl_val) save_args(os.path.join(args.save_path, args.model_name), args) model_log.to_pandas().to_csv(os.path.join(args.save_path, args.model_name, 'loss.csv'), index=False) _ = model.compute_baseline_hazards( get_vs_data(dataset_train), (dataset_train.time, dataset_train.event)) model.save_net( path=os.path.join(args.save_path, args.model_name, args.model_name + '_final')) surv = model.predict_surv_df(dl_test_x) surv.to_csv(os.path.join(args.save_path, args.model_name, 'test_sur_df.csv'), index=False) ev = EvalSurv(surv, np.array(labels_test[0]), np.array(labels_test[1]), 'km') print(ev.concordance_td()) save_cindex(os.path.join(args.save_path, args.model_name), ev.concordance_td()) print('done')