Ejemplo n.º 1
0
                    optimizer = tt.optim.Adam(lr=lr)
                    labtrans = DeepHitSingle.label_transform(
                        init_num_durations)
                    y_train_discrete = labtrans.fit_transform(*y_train.T)
                    net = tt.practical.MLPVanilla(
                        X_train_std.shape[1],
                        [n_nodes for layer_idx in range(n_layers)],
                        labtrans.out_features,
                        batch_norm,
                        dropout,
                        output_bias=output_bias)

                    surv_model = DeepHitSingle(net,
                                               optimizer,
                                               alpha=init_alpha,
                                               sigma=init_sigma,
                                               duration_index=labtrans.cuts)

                    model_filename = \
                        os.path.join(output_dir, 'models',
                                     '%s_%s_exp%d_bs%d_nep%d_nla%d_nno%d_'
                                     % (init_survival_estimator_name, dataset,
                                        experiment_idx, init_batch_size,
                                        init_n_epochs, n_layers, n_nodes)
                                     +
                                     'lr%f_a%f_s%f_nd%d_test.pt'
                                     % (init_lr, init_alpha, init_sigma,
                                        init_num_durations))
                    time_elapsed_filename = model_filename[:-3] + '_time.txt'
                    print('*** Pre-training...')
Ejemplo n.º 2
0
type(labtrans)

# Making Neural Network with torch:
in_features = x_train.shape[1]
num_nodes = [32, 32]
out_features = labtrans.out_features
batch_norm = True
dropout = 0.1

net = tt.practical.MLPVanilla(in_features, num_nodes, out_features, batch_norm,
                              dropout)

model = DeepHitSingle(net,
                      tt.optim.Adam,
                      alpha=0.2,
                      sigma=0.1,
                      duration_index=labtrans.cuts)

batch_size = 256
lr_finder = model.lr_finder(x_train, y_train, batch_size, tolerance=3)
_ = lr_finder.plot()

lr_finder.get_best_lr()

model.optimizer.set_lr(0.01)

# Training with best learning rate:
epochs = 100
callbacks = [tt.callbacks.EarlyStopping()]
log = model.fit(x_train, y_train, batch_size, epochs, callbacks, val_data=val)
Ejemplo n.º 3
0
def main():
    parser = setup_parser()
    args = parser.parse_args()

    if args.which_gpu != 'none':
        os.environ["CUDA_VISIBLE_DEVICES"] = args.which_gpu

    # save setting
    if not os.path.exists(os.path.join(args.save_path, args.model_name)):
        os.mkdir(os.path.join(args.save_path, args.model_name))

    # label transform
    labtrans = DeepHitSingle.label_transform(args.durations)

    # data reading seeting
    singnal_data_path = args.signal_dataset_path
    table_path = args.table_path
    time_col = 'SurvivalDays'
    event_col = 'Mortality'

    # dataset
    data_pathes, times, events = read_dataset(singnal_data_path, table_path,
                                              time_col, event_col,
                                              args.sample_ratio)

    data_pathes_train, data_pathes_test, times_train, times_test, events_train, events_test = train_test_split(
        data_pathes, times, events, test_size=0.3, random_state=369)
    data_pathes_train, data_pathes_val, times_train, times_val, events_train, events_val = train_test_split(
        data_pathes_train,
        times_train,
        events_train,
        test_size=0.2,
        random_state=369)

    labels_train = label_transfer(times_train, events_train)
    target_train = labtrans.fit_transform(*labels_train)
    dataset_train = VsDatasetBatch(data_pathes_train, *target_train)
    dl_train = tt.data.DataLoaderBatch(dataset_train,
                                       args.train_batch_size,
                                       shuffle=True)

    labels_val = label_transfer(times_val, events_val)
    target_val = labtrans.transform(*labels_val)
    dataset_val = VsDatasetBatch(data_pathes_val, *target_val)
    dl_val = tt.data.DataLoaderBatch(dataset_val,
                                     args.train_batch_size,
                                     shuffle=True)

    labels_test = label_transfer(times_test, events_test)
    dataset_test_x = VsTestInput(data_pathes_test)
    dl_test_x = DataLoader(dataset_test_x, args.test_batch_size, shuffle=False)

    net = resnet18(args)
    model = DeepHitSingle(net,
                          tt.optim.Adam(lr=args.lr,
                                        betas=(0.9, 0.999),
                                        eps=1e-08,
                                        weight_decay=5e-4,
                                        amsgrad=False),
                          duration_index=labtrans.cuts)
    # callbacks = [tt.cb.EarlyStopping(patience=15)]
    callbacks = [
        tt.cb.BestWeights(file_path=os.path.join(
            args.save_path, args.model_name, args.model_name + '_bestWeight'),
                          rm_file=False)
    ]
    verbose = True
    model_log = model.fit_dataloader(dl_train,
                                     args.epochs,
                                     callbacks,
                                     verbose,
                                     val_dataloader=dl_val)

    save_args(os.path.join(args.save_path, args.model_name), args)
    model_log.to_pandas().to_csv(os.path.join(args.save_path, args.model_name,
                                              'loss.csv'),
                                 index=False)
    model.save_net(
        path=os.path.join(args.save_path, args.model_name, args.model_name +
                          '_final'))
    surv = model.predict_surv_df(dl_test_x)
    surv.to_csv(os.path.join(args.save_path, args.model_name,
                             'test_sur_df.csv'),
                index=False)
    ev = EvalSurv(surv, *labels_test, 'km')
    print(ev.concordance_td())
    save_cindex(os.path.join(args.save_path, args.model_name),
                ev.concordance_td())
    print('done')
Ejemplo n.º 4
0
def _train_dht(x, t, e, folds, params):
    """Helper Function to train a deep-hit model (van der schaar et. al).

  Args:
    x:
      a numpy array of input features (Training Data).
    t:
      a numpy vector of event times (Training Data).
    e:
      a numpy vector of event indicators (1 if event occured, 0 otherwise)
      (Training Data).
    folds:
       vector of the training cv folds.

  Returns:
    Trained pycox.DeepHitSingle model.

  """
    if params is None:
        num_nodes = [100, 100]
        lr = 1e-3
        bs = 128
    else:
        num_nodes = params['num_nodes']
        lr = params['lr']
        bs = params['bs']

    x = x.astype('float32')
    t = t.astype('float32')
    e = e.astype('int32')

    #   num_durations = int(0.5*max(t))
    #   print ("num_durations:", num_durations)

    num_durations = int(max(t))
    #num_durations = int(30)

    print("num_durations:", num_durations)

    labtrans = DeepHitSingle.label_transform(num_durations, scheme='quantiles')
    #labtrans = DeepHitSingle.label_transform(num_durations,)

    #print (labtrans)

    in_features = x.shape[1]
    batch_norm = False
    dropout = 0.0
    output_bias = False

    fold_model = {}

    for f in set(folds):

        xf = x[folds != f]
        tf = t[folds != f]
        ef = e[folds != f]

        validx = sorted(
            np.random.choice(len(xf),
                             size=(int(0.15 * len(xf))),
                             replace=False))

        vidx = np.array([False] * len(xf))
        vidx[validx] = True

        y_train = labtrans.fit_transform(tf[~vidx], ef[~vidx])
        y_val = labtrans.transform(tf[vidx], ef[vidx])
        out_features = labtrans.out_features

        net = ttup.practical.MLPVanilla(in_features, num_nodes, out_features,
                                        batch_norm, dropout)

        model = DeepHitSingle(net,
                              ttup.optim.Adam,
                              alpha=0.5,
                              sigma=1,
                              duration_index=labtrans.cuts)

        y_train = y_train[0].astype('int64'), y_train[1].astype('float32')
        y_val = y_val[0].astype('int64'), y_val[1].astype('float32')

        val = xf[vidx], y_val
        train = xf[~vidx], y_train

        batch_size = bs
        model.optimizer.set_lr(lr)
        epochs = 10
        callbacks = [ttup.callbacks.EarlyStopping()]

        model.fit(
            xf[~vidx],
            y_train,
            batch_size,
            epochs,
            callbacks,
            True,
            val_data=val,
        )

        fold_model[f] = model

    return fold_model