Beispiel #1
0
    def _model_factory(self,
                       n_trees=None,
                       n_input_features=None,
                       n_neurons=None):
        if self.algorithm == 'CPH':
            return CoxPHFitter()
        elif self.algorithm == 'RSF':
            return RandomSurvivalForestModel(num_trees=n_trees)
        elif self.algorithm in self._pycox_methods:
            net_args = {
                'in_features': n_input_features,
                'num_nodes': n_neurons,
                'batch_norm': True,
                'dropout': 0.1,
            }

            if self.algorithm == 'DeepSurv':
                net = tt.practical.MLPVanilla(out_features=1,
                                              output_bias=False,
                                              **net_args)
                model = CoxPH(net, tt.optim.Adam)

                return model
            if self.algorithm == 'CoxTime':
                net = MLPVanillaCoxTime(**net_args)
                model = CoxTime(net, tt.optim.Adam)

                return model
            if self.algorithm in self._discrete_time_methods:
                num_durations = 30
                print(f'   {num_durations} equidistant intervals')
            if self.algorithm == 'DeepHit':
                labtrans = DeepHitSingle.label_transform(num_durations)
                net = self._get_discrete_time_net(labtrans, net_args)
                model = DeepHitSingle(net,
                                      tt.optim.Adam,
                                      alpha=0.2,
                                      sigma=0.1,
                                      duration_index=labtrans.cuts)

                return model
            if self.algorithm == 'MTLR':
                labtrans = MTLR.label_transform(num_durations)
                net = self._get_discrete_time_net(labtrans, net_args)
                model = MTLR(net, tt.optim.Adam, duration_index=labtrans.cuts)

                return model
            if self.algorithm == 'Nnet-survival':
                labtrans = LogisticHazard.label_transform(num_durations)
                net = self._get_discrete_time_net(labtrans, net_args)
                model = LogisticHazard(net,
                                       tt.optim.Adam(0.01),
                                       duration_index=labtrans.cuts)

                return model
        else:
            raise Exception('Unrecognized model.')
Beispiel #2
0
def test_logistic_hazard_runs(numpy, num_durations):
    data = make_dataset(True)
    input, target = data
    labtrans = LogisticHazard.label_transform(num_durations)
    target = labtrans.fit_transform(*target)
    data = tt.tuplefy(input, target)
    if not numpy:
        data = data.to_tensor()
    net = tt.practical.MLPVanilla(input.shape[1], [4], labtrans.out_features)
    model = LogisticHazard(net)
    fit_model(data, model)
    assert_survs(input, model)
    model.duration_index = labtrans.cuts
    assert_survs(input, model)
    cdi = model.interpolate(3, 'const_pdf')
    assert_survs(input, cdi)
    def train_model(self):
        """
        Function responsible for creating the model and training it.
        Also uses test dataset to predict survival after training the model.
        """
        self.logger.info("Creating Model..\n")
        self.logger.info("Number of groups: {}\n".format(self.cuts))
        if self.cuts == 15:
            cuts = np.array([
                0., 48., 82., 126., 184., 231., 279., 330., 383., 436., 507.,
                633., 764., 1044., 1785.
            ])

        elif self.cuts == 10:
            cuts = np.array(
                [0., 70., 134., 213., 290., 365., 450., 623., 829., 1785.])

        self.logger.info("Generating model..\n")
        if self.mode == 'ResNet':
            net = resnet.generate_model(model_depth=self.model_depth,
                                        n_classes=self.cuts,
                                        n_input_channels=4,
                                        shortcut_type='B',
                                        conv1_t_size=7,
                                        conv1_t_stride=1,
                                        no_max_pool=False,
                                        widen_factor=1.0)
        else:
            net = CNNModel(self.cuts)

        self.logger.info("Creating DataLoaders..\n")
        train_loader, val_loader, test_loader = self.create_dataloaders()
        self.logger.info("Creating DataLoaders Done\n")
        model = LogisticHazard(net,
                               self.optimizer,
                               duration_index=cuts,
                               device=0)
        num_epochs = 100
        self.logger.info("Number of epochs: {}\n".format(num_epochs))
        self.logger.info("Begin Training..\n")
        log = model.fit_dataloader(train_loader,
                                   num_epochs, [self.callback],
                                   self.verbose,
                                   val_dataloader=val_loader)
        self.logger.info("Training Done\n")
        currentDT = datetime.datetime.now()
        model.save_model_weights('./Results/{}_{}.pt'.format(
            self.mode, currentDT.strftime("%Y-%m-%d_%H-%M-%S")))
        self.logger.info("Predicting with Test Dataset..\n")
        predictions = model.predict_surv_df(test_loader)
        self.logger.info("Predicting with Test Dataset done\n")
        self.logger.info("Predicting with interpolated Test Dataset..\n")
        predictions_interpolated = model.interpolate(
            self.cuts).predict_surv_df(test_loader)
        self.logger.info("Predicting with interpolated Test Dataset.. done\n")
        return log, predictions, predictions_interpolated
def transform_data(df_train,df_test,df_val, mod, scale, cols_standardize, log_columns, num_durations=100):
    
    tf_train = df_train.copy()
    tf_test = df_test.copy()
    tf_val = df_val.copy()
    if scale == "minmax":
        standardize = [([col], MinMaxScaler()) for col in cols_standardize]
    elif scale == "standard":
        standardize = [([col], StandardScaler()) for col in cols_standardize]
    elif scale == "robust":
        standardize = [([col], RobustScaler()) for col in cols_standardize]
    elif scale == "power":
        standardize = [([col], PowerTransformer()) for col in cols_standardize]

    if len(log_columns) != 0:
        log_scaler = lambda x: np.log(np.abs(x)+1e-7)
        
        for c in log_columns:
            tf_train.loc[:,c] = log_scaler(tf_train.loc[:,c])
            tf_val.loc[:,c] = log_scaler(tf_val.loc[:,c])
            tf_test.loc[:,c] = log_scaler(tf_test.loc[:,c])
   
    x_mapper = DataFrameMapper(standardize)
    
    x_train = x_mapper.fit_transform(tf_train).astype('float32')
    x_val = x_mapper.transform(tf_val).astype('float32')
    x_test = x_mapper.transform(tf_test).astype('float32')
    
    pca = PCA(n_components=10,whiten=True)
    x_train = pca.fit_transform(x_train)
    x_val = pca.transform(x_val)
    x_test = pca.transform(x_test)
    
    if mod == "LogisticHazard":
        labtrans = LogisticHazard.label_transform(num_durations)
    elif mod == "MTLR":
        labtrans = MTLR.label_transform(num_durations)
    elif mod == "DeepHitSingle":
        labtrans = DeepHitSingle.label_transform(num_durations) 
    
    get_target = lambda tf: (tf['duration'].values.astype("float32"), tf['event'].values)
    y_train = labtrans.fit_transform(*get_target(tf_train))
    y_val = labtrans.transform(*get_target(tf_val))
    
    train = (x_train, y_train)
    val = (x_val, y_val)
    
    # We don't need to transform the test labels
    durations_test, events_test = get_target(tf_test)
    
    return x_mapper, labtrans, train, val, x_test, durations_test, events_test, pca
def initialize_model(dim,labtrans,in_features):
    num_nodes = [dim,dim]
    out_features = labtrans.out_features
    batch_norm = True
    dropout = 0.1
    
    net = tt.practical.MLPVanilla(in_features, num_nodes, out_features, batch_norm, dropout)


    #model = MTLR(net, tt.optim.Adam, duration_index=labtrans.cuts)
    model = LogisticHazard(net, tt.optim.Adam, duration_index=labtrans.cuts)
    #model = DeepHitSingle(net, tt.optim.Adam, alpha=0.2, sigma=0.1, duration_index=labtrans.cuts)

    return model
print('[Dataset: %s, experiment: %d]' % (dataset, experiment_idx))
print()

X_train, y_train, X_test, y_test, feature_names, \
        compute_features_and_transformer, transform_features = \
    load_dataset(dataset, experiment_idx)

print('Testing...', flush=True)
X_train_std, transformer = \
        compute_features_and_transformer(X_train)
X_test_std = transform_features(X_test, transformer)
X_train_std = X_train_std.astype('float32')
X_test_std = X_test_std.astype('float32')
y_train = y_train.astype('float32')
y_test = y_test.astype('float32')
labtrans = LogisticHazard.label_transform(num_durations)
y_train_discrete = labtrans.fit_transform(*y_train.T)

torch.manual_seed(method_random_seed)
torch.cuda.manual_seed_all(method_random_seed)
np.random.seed(method_random_seed)

batch_norm = True
dropout = 0.0
output_bias = True

net = tt.practical.MLPVanilla(X_train_std.shape[1],
                              [n_nodes for layer_idx
                               in range(n_layers)],
                              labtrans.out_features,
                              batch_norm,
Beispiel #7
0
print('[Dataset: %s, experiment: %d]' % (dataset, experiment_idx))
print()

X_train, y_train, X_test, y_test, feature_names, \
        compute_features_and_transformer, transform_features = \
    load_dataset(dataset, experiment_idx)

print('Testing...', flush=True)
X_train_std, transformer = \
        compute_features_and_transformer(X_train)
X_test_std = transform_features(X_test, transformer)
X_train_std = X_train_std.astype('float32')
X_test_std = X_test_std.astype('float32')
y_train = y_train.astype('float32')
y_test = y_test.astype('float32')
labtrans = LogisticHazard.label_transform(num_durations)
y_train_discrete = labtrans.fit_transform(*y_train.T)

torch.manual_seed(method_random_seed)
np.random.seed(method_random_seed)

batch_norm = True
dropout = 0.0
output_bias = True

net = tt.practical.MLPVanilla(X_train_std.shape[1],
                              [n_nodes for layer_idx in range(n_layers)],
                              labtrans.out_features,
                              batch_norm,
                              dropout,
                              output_bias=output_bias)
Beispiel #8
0
                    fold_X_val_std = transform_features(
                        fold_X_val, transformer)
                    fold_X_train_std = fold_X_train_std.astype('float32')
                    fold_X_val_std = fold_X_val_std.astype('float32')

                    tic = time.time()
                    torch.manual_seed(method_random_seed)
                    torch.cuda.manual_seed_all(method_random_seed)
                    np.random.seed(method_random_seed)

                    batch_norm = True
                    dropout = 0.
                    output_bias = True

                    optimizer = tt.optim.Adam(lr=lr)
                    labtrans = LogisticHazard.label_transform(num_durations)
                    fold_y_train_discrete \
                        = labtrans.fit_transform(*fold_y_train.T)
                    fold_y_val_discrete \
                        = labtrans.transform(*fold_y_val.T)
                    net = tt.practical.MLPVanilla(
                        fold_X_train_std.shape[1],
                        [n_nodes for layer_idx in range(n_layers)],
                        labtrans.out_features,
                        batch_norm,
                        dropout,
                        output_bias=output_bias)

                    surv_model = LogisticHazard(net,
                                                optimizer,
                                                duration_index=labtrans.cuts)