def train_deepsurv(x_train, y_train, x_test, y_test, **kwargs):
        # Standardize the datasets
        train_mean = x_train.mean(axis=0)
        train_std = x_train.std(axis=0)

        x_train = (x_train - train_mean) / train_std
        x_test = (x_test - train_mean) / train_std

        train_data = format_to_deepsurv(x_train, y_train)
        valid_data = format_to_deepsurv(x_test, y_test)

        hyperparams = get_hyperparams(kwargs)

        # Set up Tensorboard loggers
        # TODO improve the model_id for Tensorboard to better partition runs
        model_id = str(hash(str(hyperparams)))
        run_id = model_id + '_' + str(uuid.uuid4())
        logger = TensorboardLogger(
            'hyperparam_search',
            os.path.join(logdir, "tensor_logs", model_id, run_id))

        network = deepsurv.DeepSurv(n_in=x_train.shape[1], **hyperparams)
        metrics = network.train(train_data,
                                n_epochs=num_epochs,
                                logger=logger,
                                update_fn=update_fn,
                                verbose=False)

        result = network.get_concordance_index(**valid_data)
        main_logger.info(
            'Run id: %s | %s | C-Index: %f | Train Loss %f' %
            (run_id, str(hyperparams), result, metrics['loss'][-1][1]))
        return result
Ejemplo n.º 2
0
    def train_deepsurv(x_train, y_train, x_test, y_test, **kwargs):
        # Standardize the datasets
        train_mean = x_train.mean(axis=0)
        train_std = x_train.std(axis=0)
        x_train = (x_train - train_mean) / train_std
        x_test = (x_test - train_mean) / train_std

        train_data = format_to_deepsurv(x_train, y_train)
        valid_data = format_to_deepsurv(x_test, y_test)

        hyperparams = get_hyperparams(kwargs)

        network = deepsurv.DeepSurv(n_in=train_data['x'].shape[1],
                                    **hyperparams)
        metrics = network.train(train_data,
                                n_epochs=num_epochs,
                                update_fn=update_fn,
                                verbose=False)

        result = network.get_concordance_index(**valid_data)
        return result
def ds(typ):

    c_idx = np.zeros(5)

    # for each cross validation set
    for no in range(5):
        train_features_c = np.genfromtxt(
            '/home/anton/Documents/anton/ICLR/data/SEER/train_c_feature_values_{}_{}.csv'
            .format(no, typ),
            delimiter=',',
            dtype=np.float32)
        train_features_uc = np.genfromtxt(
            '/home/anton/Documents/anton/ICLR/data/SEER/train_uc_feature_values_{}_{}.csv'
            .format(no, typ),
            delimiter=',',
            dtype=np.float32)
        train_features = np.append(train_features_c, train_features_uc, axis=0)
        train_labels = np.append(np.zeros(train_features_c.shape[0],
                                          dtype=np.int32),
                                 np.ones(train_features_uc.shape[0],
                                         dtype=np.int32),
                                 axis=0)
        train_surv_values = np.genfromtxt(
            '/home/anton/Documents/anton/ICLR/data/SEER/train_surv_time_x_{}_{}.csv'
            .format(no, typ),
            delimiter=',')

        val_features_c = np.genfromtxt(
            '/home/anton/Documents/anton/ICLR/data/SEER/val_c_feature_values_{}_{}.csv'
            .format(no, typ),
            delimiter=',',
            dtype=np.float32)
        val_features_uc = np.genfromtxt(
            '/home/anton/Documents/anton/ICLR/data/SEER/val_uc_feature_values_{}_{}.csv'
            .format(no, typ),
            delimiter=',',
            dtype=np.float32)
        val_features = np.append(val_features_c, val_features_uc, axis=0)
        val_labels = np.append(np.zeros(val_features_c.shape[0],
                                        dtype=np.int32),
                               np.ones(val_features_uc.shape[0],
                                       dtype=np.int32),
                               axis=0)
        val_surv_values = np.genfromtxt(
            '/home/anton/Documents/anton/ICLR/data/SEER/val_surv_time_x_{}_{}.csv'
            .format(no, typ),
            delimiter=',')

        test_features_c = np.genfromtxt(
            '/home/anton/Documents/anton/ICLR/data/SEER/test_c_feature_values_{}_{}.csv'
            .format(no, typ),
            delimiter=',',
            dtype=np.float32)
        test_features_uc = np.genfromtxt(
            '/home/anton/Documents/anton/ICLR/data/SEER/test_uc_feature_values_{}_{}.csv'
            .format(no, typ),
            delimiter=',',
            dtype=np.float32)
        test_features = np.append(test_features_c, test_features_uc, axis=0)
        test_labels = np.append(np.zeros(test_features_c.shape[0],
                                         dtype=np.int32),
                                np.ones(test_features_uc.shape[0],
                                        dtype=np.int32),
                                axis=0)
        test_surv_values = np.genfromtxt(
            '/home/anton/Documents/anton/ICLR/data/SEER/test_surv_time_x_{}_{}.csv'
            .format(no, typ),
            delimiter=',')

        train_data = {
            'x': train_features,
            't': train_surv_values,
            'e': train_labels
        }
        val_data = {'x': val_features, 't': val_surv_values, 'e': val_labels}
        test_data = {
            'x': test_features,
            't': test_surv_values,
            'e': test_labels
        }

        # hyper parameters
        n_in = train_features.shape[1]
        lr = 0.0000000001
        l = np.rint((train_features.shape[1] + np.amax(train_surv_values)) /
                    2).astype(int)
        n_hidden = [l, l]
        dp = 0.3
        bn = True
        l2_reg = 0
        l1_reg = 0

        # run
        network = deepsurv.DeepSurv(n_in=n_in,
                                    learning_rate=lr,
                                    hidden_layers_sizes=n_hidden,
                                    dropout=dp,
                                    batch_norm=bn,
                                    L2_reg=l2_reg,
                                    L1_reg=l1_reg)
        log = network.train(train_data, val_data, n_epochs=1000)  #500

        # get results
        preds = network.predict_risk(test_features)
        patient_no = test_features.shape[0]

        total_pair = 0.0
        cor_pair = 0.0

        for i in range(patient_no):
            if (test_labels[i] == 1):
                T1 = test_surv_values[i]
                R1 = preds[i]
                for j in range(patient_no):
                    T2 = test_surv_values[j]
                    R2 = preds[j]
                    if (T1 < T2):
                        total_pair += 1.
                        if (R1 > R2):
                            cor_pair += 1.

        c_idx[no] = cor_pair / total_pair
        print(c_idx[no])

    print(np.mean(c_idx))
    print(np.mean(c_idx) + np.std(c_idx) * 1.96)
    print(np.mean(c_idx) - np.std(c_idx) * 1.96)
Ejemplo n.º 4
0
    if eq_train_ratio:
        train = equalize_num_case_control(train, data['eq_cases_train_cols'])

    train_data = {
        "x": train[cols].values.astype("float32"),
        "t": train[time_col_train].values.astype("float32"),
        "e": train.is_diab.values.astype("int32")
    }

    test_data = {
        "x": test[cols].values.astype("float32"),
        "t": test[time_col_test].values.astype("float32"),
        "e": test.is_diab.values.astype("int32")
    }

    network = deepsurv.DeepSurv(n_in=train_data['x'].shape[1], **hyperparams)
    log = network.train(train_data, n_epochs=1000, update_fn=update_fn)
    train_cindex = network.get_concordance_index(**train_data)
    test_cindex = network.get_concordance_index(**test_data)

    # Get c-index case
    train_case_data = {
        "x": train.query('is_diab == 1')[cols].values.astype("float32"),
        "t":
        train.query('is_diab == 1')[time_col_train].values.astype("float32"),
        "e": train.query('is_diab == 1').is_diab.values.astype("int32")
    }
    test_case_data = {
        "x": test.query('is_diab == 1')[cols].values.astype("float32"),
        "t":
        test.query('is_diab == 1')[time_col_test].values.astype("float32"),
Ejemplo n.º 5
0
#----------------------------------------------------------------------
# SeepSurv:
#----------------------------------------------------------------------
import lasagne
import deepsurv as DeepSurv

simulator = DeepSurv.datasets.SimulatedData(hr_ratio=2)

train_set = simulator.generate_data(N = 3000, method='linear')
valid_set = simulator.generate_data(N = 1000, method='linear')
test_set  = simulator.generate_data(N = 1000, method='linear')


model = DeepSurv.DeepSurv(n_in = 10,
                  learning_rate = 0.1,
                  hidden_layers_sizes = list((3,3)))

log = model.train(train_set, valid_set, n_epochs=30)

model.get_concordance_index(**test_set)
DeepSurv.plot_log(log)

model.plot_risk_surface(test_set['x'])

#==============================================================================
# Done.
#==============================================================================

file_name = 'Loss.png'
Ejemplo n.º 6
0
grid_search_test = np.zeros((len(l2_array),n_folds))
for i in range(len(l2_array)):
	print(str(i+1) + '/' + str(len(l2_array)))
	j=0
	cv_folds = kf.split(x_train)
	for traincv, testcv in cv_folds:
		train_data_deepsurv_cv = {
			'x':x_train[traincv].astype('float32'),
			't':data_train.loc[traincv].time.values.astype('float32'),
			'e':data_train.loc[traincv].dead.values.astype('int32')}
		test_data_deepsurv_cv = {
			'x':x_train[testcv].astype('float32'),
			't':data_train.loc[testcv].time.values.astype('float32'),
			'e':data_train.loc[testcv].dead.values.astype('int32')}
		hyperparams = {'n_in': n_in, 'learning_rate': learning_rate, 'L2_reg': l2_array[i],'hidden_layers_sizes': [hidden_layers_sizes]}
		network = deepsurv.DeepSurv(**hyperparams)
		log = network.train(train_data_deepsurv_cv, test_data_deepsurv_cv, n_epochs=n_epochs)
		grid_search_test[i,j] = log['best_valid_loss']
		j=j+1

print(np.average(grid_search_test,axis=1))
l2_final = l2_array[np.argmax(-np.average(grid_search_test,axis=1))]
l2_final = 1e-3

hyperparams = {
            'n_in': n_in,
            'learning_rate': learning_rate,
            'L2_reg': l2_final,
            'hidden_layers_sizes': [hidden_layers_sizes]
        }
test_data = dataframe_to_deepsurv_ds(test_features, event_col = 'Longterm_TransplantOutcome', time_col= 'tenure')

hyperparams = {
    'L2_reg': 1.0,
    'batch_norm': True,
    'dropout': 0.4,
    'hidden_layers_sizes': [100, 50, 20, 5, 2],
    'learning_rate': 1e-01,
    'lr_decay': 0.05,
    'momentum': 0.5,
    'n_in': train_data['x'].shape[1],
    'standardize': True
}

# Create an instance of DeepSurv using the hyperparams defined above
model = deepsurv.DeepSurv(**hyperparams)

# DeepSurv can now leverage TensorBoard to monitor training and validation
# This section of code is optional. If you don't want to use the tensorboard logger
# Uncomment the below line, and comment out the other three lines: 
# logger = None

experiment_name = 'DeepSurv model'
logdir = r'T:\tbase\logs\\'
logger = TensorboardLogger(experiment_name, logdir=logdir)

# Now we train the model
update_fn=lasagne.updates.nesterov_momentum # The type of optimizer to use. \
                                            # Check out http://lasagne.readthedocs.io/en/latest/modules/updates.html \
                                            # for other optimizers to use
n_epochs = 10