def train_deepsurv(x_train, y_train, x_test, y_test, **kwargs): # Standardize the datasets train_mean = x_train.mean(axis=0) train_std = x_train.std(axis=0) x_train = (x_train - train_mean) / train_std x_test = (x_test - train_mean) / train_std train_data = format_to_deepsurv(x_train, y_train) valid_data = format_to_deepsurv(x_test, y_test) hyperparams = get_hyperparams(kwargs) # Set up Tensorboard loggers # TODO improve the model_id for Tensorboard to better partition runs model_id = str(hash(str(hyperparams))) run_id = model_id + '_' + str(uuid.uuid4()) logger = TensorboardLogger( 'hyperparam_search', os.path.join(logdir, "tensor_logs", model_id, run_id)) network = deepsurv.DeepSurv(n_in=x_train.shape[1], **hyperparams) metrics = network.train(train_data, n_epochs=num_epochs, logger=logger, update_fn=update_fn, verbose=False) result = network.get_concordance_index(**valid_data) main_logger.info( 'Run id: %s | %s | C-Index: %f | Train Loss %f' % (run_id, str(hyperparams), result, metrics['loss'][-1][1])) return result
def train_deepsurv(x_train, y_train, x_test, y_test, **kwargs): # Standardize the datasets train_mean = x_train.mean(axis=0) train_std = x_train.std(axis=0) x_train = (x_train - train_mean) / train_std x_test = (x_test - train_mean) / train_std train_data = format_to_deepsurv(x_train, y_train) valid_data = format_to_deepsurv(x_test, y_test) hyperparams = get_hyperparams(kwargs) network = deepsurv.DeepSurv(n_in=train_data['x'].shape[1], **hyperparams) metrics = network.train(train_data, n_epochs=num_epochs, update_fn=update_fn, verbose=False) result = network.get_concordance_index(**valid_data) return result
def ds(typ): c_idx = np.zeros(5) # for each cross validation set for no in range(5): train_features_c = np.genfromtxt( '/home/anton/Documents/anton/ICLR/data/SEER/train_c_feature_values_{}_{}.csv' .format(no, typ), delimiter=',', dtype=np.float32) train_features_uc = np.genfromtxt( '/home/anton/Documents/anton/ICLR/data/SEER/train_uc_feature_values_{}_{}.csv' .format(no, typ), delimiter=',', dtype=np.float32) train_features = np.append(train_features_c, train_features_uc, axis=0) train_labels = np.append(np.zeros(train_features_c.shape[0], dtype=np.int32), np.ones(train_features_uc.shape[0], dtype=np.int32), axis=0) train_surv_values = np.genfromtxt( '/home/anton/Documents/anton/ICLR/data/SEER/train_surv_time_x_{}_{}.csv' .format(no, typ), delimiter=',') val_features_c = np.genfromtxt( '/home/anton/Documents/anton/ICLR/data/SEER/val_c_feature_values_{}_{}.csv' .format(no, typ), delimiter=',', dtype=np.float32) val_features_uc = np.genfromtxt( '/home/anton/Documents/anton/ICLR/data/SEER/val_uc_feature_values_{}_{}.csv' .format(no, typ), delimiter=',', dtype=np.float32) val_features = np.append(val_features_c, val_features_uc, axis=0) val_labels = np.append(np.zeros(val_features_c.shape[0], dtype=np.int32), np.ones(val_features_uc.shape[0], dtype=np.int32), axis=0) val_surv_values = np.genfromtxt( '/home/anton/Documents/anton/ICLR/data/SEER/val_surv_time_x_{}_{}.csv' .format(no, typ), delimiter=',') test_features_c = np.genfromtxt( '/home/anton/Documents/anton/ICLR/data/SEER/test_c_feature_values_{}_{}.csv' .format(no, typ), delimiter=',', dtype=np.float32) test_features_uc = np.genfromtxt( '/home/anton/Documents/anton/ICLR/data/SEER/test_uc_feature_values_{}_{}.csv' .format(no, typ), delimiter=',', dtype=np.float32) test_features = np.append(test_features_c, test_features_uc, axis=0) test_labels = np.append(np.zeros(test_features_c.shape[0], dtype=np.int32), np.ones(test_features_uc.shape[0], dtype=np.int32), axis=0) test_surv_values = np.genfromtxt( '/home/anton/Documents/anton/ICLR/data/SEER/test_surv_time_x_{}_{}.csv' .format(no, typ), delimiter=',') train_data = { 'x': train_features, 't': train_surv_values, 'e': train_labels } val_data = {'x': val_features, 't': val_surv_values, 'e': val_labels} test_data = { 'x': test_features, 't': test_surv_values, 'e': test_labels } # hyper parameters n_in = train_features.shape[1] lr = 0.0000000001 l = np.rint((train_features.shape[1] + np.amax(train_surv_values)) / 2).astype(int) n_hidden = [l, l] dp = 0.3 bn = True l2_reg = 0 l1_reg = 0 # run network = deepsurv.DeepSurv(n_in=n_in, learning_rate=lr, hidden_layers_sizes=n_hidden, dropout=dp, batch_norm=bn, L2_reg=l2_reg, L1_reg=l1_reg) log = network.train(train_data, val_data, n_epochs=1000) #500 # get results preds = network.predict_risk(test_features) patient_no = test_features.shape[0] total_pair = 0.0 cor_pair = 0.0 for i in range(patient_no): if (test_labels[i] == 1): T1 = test_surv_values[i] R1 = preds[i] for j in range(patient_no): T2 = test_surv_values[j] R2 = preds[j] if (T1 < T2): total_pair += 1. if (R1 > R2): cor_pair += 1. c_idx[no] = cor_pair / total_pair print(c_idx[no]) print(np.mean(c_idx)) print(np.mean(c_idx) + np.std(c_idx) * 1.96) print(np.mean(c_idx) - np.std(c_idx) * 1.96)
if eq_train_ratio: train = equalize_num_case_control(train, data['eq_cases_train_cols']) train_data = { "x": train[cols].values.astype("float32"), "t": train[time_col_train].values.astype("float32"), "e": train.is_diab.values.astype("int32") } test_data = { "x": test[cols].values.astype("float32"), "t": test[time_col_test].values.astype("float32"), "e": test.is_diab.values.astype("int32") } network = deepsurv.DeepSurv(n_in=train_data['x'].shape[1], **hyperparams) log = network.train(train_data, n_epochs=1000, update_fn=update_fn) train_cindex = network.get_concordance_index(**train_data) test_cindex = network.get_concordance_index(**test_data) # Get c-index case train_case_data = { "x": train.query('is_diab == 1')[cols].values.astype("float32"), "t": train.query('is_diab == 1')[time_col_train].values.astype("float32"), "e": train.query('is_diab == 1').is_diab.values.astype("int32") } test_case_data = { "x": test.query('is_diab == 1')[cols].values.astype("float32"), "t": test.query('is_diab == 1')[time_col_test].values.astype("float32"),
#---------------------------------------------------------------------- # SeepSurv: #---------------------------------------------------------------------- import lasagne import deepsurv as DeepSurv simulator = DeepSurv.datasets.SimulatedData(hr_ratio=2) train_set = simulator.generate_data(N = 3000, method='linear') valid_set = simulator.generate_data(N = 1000, method='linear') test_set = simulator.generate_data(N = 1000, method='linear') model = DeepSurv.DeepSurv(n_in = 10, learning_rate = 0.1, hidden_layers_sizes = list((3,3))) log = model.train(train_set, valid_set, n_epochs=30) model.get_concordance_index(**test_set) DeepSurv.plot_log(log) model.plot_risk_surface(test_set['x']) #============================================================================== # Done. #============================================================================== file_name = 'Loss.png'
grid_search_test = np.zeros((len(l2_array),n_folds)) for i in range(len(l2_array)): print(str(i+1) + '/' + str(len(l2_array))) j=0 cv_folds = kf.split(x_train) for traincv, testcv in cv_folds: train_data_deepsurv_cv = { 'x':x_train[traincv].astype('float32'), 't':data_train.loc[traincv].time.values.astype('float32'), 'e':data_train.loc[traincv].dead.values.astype('int32')} test_data_deepsurv_cv = { 'x':x_train[testcv].astype('float32'), 't':data_train.loc[testcv].time.values.astype('float32'), 'e':data_train.loc[testcv].dead.values.astype('int32')} hyperparams = {'n_in': n_in, 'learning_rate': learning_rate, 'L2_reg': l2_array[i],'hidden_layers_sizes': [hidden_layers_sizes]} network = deepsurv.DeepSurv(**hyperparams) log = network.train(train_data_deepsurv_cv, test_data_deepsurv_cv, n_epochs=n_epochs) grid_search_test[i,j] = log['best_valid_loss'] j=j+1 print(np.average(grid_search_test,axis=1)) l2_final = l2_array[np.argmax(-np.average(grid_search_test,axis=1))] l2_final = 1e-3 hyperparams = { 'n_in': n_in, 'learning_rate': learning_rate, 'L2_reg': l2_final, 'hidden_layers_sizes': [hidden_layers_sizes] }
test_data = dataframe_to_deepsurv_ds(test_features, event_col = 'Longterm_TransplantOutcome', time_col= 'tenure') hyperparams = { 'L2_reg': 1.0, 'batch_norm': True, 'dropout': 0.4, 'hidden_layers_sizes': [100, 50, 20, 5, 2], 'learning_rate': 1e-01, 'lr_decay': 0.05, 'momentum': 0.5, 'n_in': train_data['x'].shape[1], 'standardize': True } # Create an instance of DeepSurv using the hyperparams defined above model = deepsurv.DeepSurv(**hyperparams) # DeepSurv can now leverage TensorBoard to monitor training and validation # This section of code is optional. If you don't want to use the tensorboard logger # Uncomment the below line, and comment out the other three lines: # logger = None experiment_name = 'DeepSurv model' logdir = r'T:\tbase\logs\\' logger = TensorboardLogger(experiment_name, logdir=logdir) # Now we train the model update_fn=lasagne.updates.nesterov_momentum # The type of optimizer to use. \ # Check out http://lasagne.readthedocs.io/en/latest/modules/updates.html \ # for other optimizers to use n_epochs = 10