def _model_factory(self, n_trees=None, n_input_features=None, n_neurons=None): if self.algorithm == 'CPH': return CoxPHFitter() elif self.algorithm == 'RSF': return RandomSurvivalForestModel(num_trees=n_trees) elif self.algorithm in self._pycox_methods: net_args = { 'in_features': n_input_features, 'num_nodes': n_neurons, 'batch_norm': True, 'dropout': 0.1, } if self.algorithm == 'DeepSurv': net = tt.practical.MLPVanilla(out_features=1, output_bias=False, **net_args) model = CoxPH(net, tt.optim.Adam) return model if self.algorithm == 'CoxTime': net = MLPVanillaCoxTime(**net_args) model = CoxTime(net, tt.optim.Adam) return model if self.algorithm in self._discrete_time_methods: num_durations = 30 print(f' {num_durations} equidistant intervals') if self.algorithm == 'DeepHit': labtrans = DeepHitSingle.label_transform(num_durations) net = self._get_discrete_time_net(labtrans, net_args) model = DeepHitSingle(net, tt.optim.Adam, alpha=0.2, sigma=0.1, duration_index=labtrans.cuts) return model if self.algorithm == 'MTLR': labtrans = MTLR.label_transform(num_durations) net = self._get_discrete_time_net(labtrans, net_args) model = MTLR(net, tt.optim.Adam, duration_index=labtrans.cuts) return model if self.algorithm == 'Nnet-survival': labtrans = LogisticHazard.label_transform(num_durations) net = self._get_discrete_time_net(labtrans, net_args) model = LogisticHazard(net, tt.optim.Adam(0.01), duration_index=labtrans.cuts) return model else: raise Exception('Unrecognized model.')
def test_logistic_hazard_runs(numpy, num_durations): data = make_dataset(True) input, target = data labtrans = LogisticHazard.label_transform(num_durations) target = labtrans.fit_transform(*target) data = tt.tuplefy(input, target) if not numpy: data = data.to_tensor() net = tt.practical.MLPVanilla(input.shape[1], [4], labtrans.out_features) model = LogisticHazard(net) fit_model(data, model) assert_survs(input, model) model.duration_index = labtrans.cuts assert_survs(input, model) cdi = model.interpolate(3, 'const_pdf') assert_survs(input, cdi)
def train_model(self): """ Function responsible for creating the model and training it. Also uses test dataset to predict survival after training the model. """ self.logger.info("Creating Model..\n") self.logger.info("Number of groups: {}\n".format(self.cuts)) if self.cuts == 15: cuts = np.array([ 0., 48., 82., 126., 184., 231., 279., 330., 383., 436., 507., 633., 764., 1044., 1785. ]) elif self.cuts == 10: cuts = np.array( [0., 70., 134., 213., 290., 365., 450., 623., 829., 1785.]) self.logger.info("Generating model..\n") if self.mode == 'ResNet': net = resnet.generate_model(model_depth=self.model_depth, n_classes=self.cuts, n_input_channels=4, shortcut_type='B', conv1_t_size=7, conv1_t_stride=1, no_max_pool=False, widen_factor=1.0) else: net = CNNModel(self.cuts) self.logger.info("Creating DataLoaders..\n") train_loader, val_loader, test_loader = self.create_dataloaders() self.logger.info("Creating DataLoaders Done\n") model = LogisticHazard(net, self.optimizer, duration_index=cuts, device=0) num_epochs = 100 self.logger.info("Number of epochs: {}\n".format(num_epochs)) self.logger.info("Begin Training..\n") log = model.fit_dataloader(train_loader, num_epochs, [self.callback], self.verbose, val_dataloader=val_loader) self.logger.info("Training Done\n") currentDT = datetime.datetime.now() model.save_model_weights('./Results/{}_{}.pt'.format( self.mode, currentDT.strftime("%Y-%m-%d_%H-%M-%S"))) self.logger.info("Predicting with Test Dataset..\n") predictions = model.predict_surv_df(test_loader) self.logger.info("Predicting with Test Dataset done\n") self.logger.info("Predicting with interpolated Test Dataset..\n") predictions_interpolated = model.interpolate( self.cuts).predict_surv_df(test_loader) self.logger.info("Predicting with interpolated Test Dataset.. done\n") return log, predictions, predictions_interpolated
def transform_data(df_train,df_test,df_val, mod, scale, cols_standardize, log_columns, num_durations=100): tf_train = df_train.copy() tf_test = df_test.copy() tf_val = df_val.copy() if scale == "minmax": standardize = [([col], MinMaxScaler()) for col in cols_standardize] elif scale == "standard": standardize = [([col], StandardScaler()) for col in cols_standardize] elif scale == "robust": standardize = [([col], RobustScaler()) for col in cols_standardize] elif scale == "power": standardize = [([col], PowerTransformer()) for col in cols_standardize] if len(log_columns) != 0: log_scaler = lambda x: np.log(np.abs(x)+1e-7) for c in log_columns: tf_train.loc[:,c] = log_scaler(tf_train.loc[:,c]) tf_val.loc[:,c] = log_scaler(tf_val.loc[:,c]) tf_test.loc[:,c] = log_scaler(tf_test.loc[:,c]) x_mapper = DataFrameMapper(standardize) x_train = x_mapper.fit_transform(tf_train).astype('float32') x_val = x_mapper.transform(tf_val).astype('float32') x_test = x_mapper.transform(tf_test).astype('float32') pca = PCA(n_components=10,whiten=True) x_train = pca.fit_transform(x_train) x_val = pca.transform(x_val) x_test = pca.transform(x_test) if mod == "LogisticHazard": labtrans = LogisticHazard.label_transform(num_durations) elif mod == "MTLR": labtrans = MTLR.label_transform(num_durations) elif mod == "DeepHitSingle": labtrans = DeepHitSingle.label_transform(num_durations) get_target = lambda tf: (tf['duration'].values.astype("float32"), tf['event'].values) y_train = labtrans.fit_transform(*get_target(tf_train)) y_val = labtrans.transform(*get_target(tf_val)) train = (x_train, y_train) val = (x_val, y_val) # We don't need to transform the test labels durations_test, events_test = get_target(tf_test) return x_mapper, labtrans, train, val, x_test, durations_test, events_test, pca
def initialize_model(dim,labtrans,in_features): num_nodes = [dim,dim] out_features = labtrans.out_features batch_norm = True dropout = 0.1 net = tt.practical.MLPVanilla(in_features, num_nodes, out_features, batch_norm, dropout) #model = MTLR(net, tt.optim.Adam, duration_index=labtrans.cuts) model = LogisticHazard(net, tt.optim.Adam, duration_index=labtrans.cuts) #model = DeepHitSingle(net, tt.optim.Adam, alpha=0.2, sigma=0.1, duration_index=labtrans.cuts) return model
print('[Dataset: %s, experiment: %d]' % (dataset, experiment_idx)) print() X_train, y_train, X_test, y_test, feature_names, \ compute_features_and_transformer, transform_features = \ load_dataset(dataset, experiment_idx) print('Testing...', flush=True) X_train_std, transformer = \ compute_features_and_transformer(X_train) X_test_std = transform_features(X_test, transformer) X_train_std = X_train_std.astype('float32') X_test_std = X_test_std.astype('float32') y_train = y_train.astype('float32') y_test = y_test.astype('float32') labtrans = LogisticHazard.label_transform(num_durations) y_train_discrete = labtrans.fit_transform(*y_train.T) torch.manual_seed(method_random_seed) torch.cuda.manual_seed_all(method_random_seed) np.random.seed(method_random_seed) batch_norm = True dropout = 0.0 output_bias = True net = tt.practical.MLPVanilla(X_train_std.shape[1], [n_nodes for layer_idx in range(n_layers)], labtrans.out_features, batch_norm,
print('[Dataset: %s, experiment: %d]' % (dataset, experiment_idx)) print() X_train, y_train, X_test, y_test, feature_names, \ compute_features_and_transformer, transform_features = \ load_dataset(dataset, experiment_idx) print('Testing...', flush=True) X_train_std, transformer = \ compute_features_and_transformer(X_train) X_test_std = transform_features(X_test, transformer) X_train_std = X_train_std.astype('float32') X_test_std = X_test_std.astype('float32') y_train = y_train.astype('float32') y_test = y_test.astype('float32') labtrans = LogisticHazard.label_transform(num_durations) y_train_discrete = labtrans.fit_transform(*y_train.T) torch.manual_seed(method_random_seed) np.random.seed(method_random_seed) batch_norm = True dropout = 0.0 output_bias = True net = tt.practical.MLPVanilla(X_train_std.shape[1], [n_nodes for layer_idx in range(n_layers)], labtrans.out_features, batch_norm, dropout, output_bias=output_bias)
fold_X_val_std = transform_features( fold_X_val, transformer) fold_X_train_std = fold_X_train_std.astype('float32') fold_X_val_std = fold_X_val_std.astype('float32') tic = time.time() torch.manual_seed(method_random_seed) torch.cuda.manual_seed_all(method_random_seed) np.random.seed(method_random_seed) batch_norm = True dropout = 0. output_bias = True optimizer = tt.optim.Adam(lr=lr) labtrans = LogisticHazard.label_transform(num_durations) fold_y_train_discrete \ = labtrans.fit_transform(*fold_y_train.T) fold_y_val_discrete \ = labtrans.transform(*fold_y_val.T) net = tt.practical.MLPVanilla( fold_X_train_std.shape[1], [n_nodes for layer_idx in range(n_layers)], labtrans.out_features, batch_norm, dropout, output_bias=output_bias) surv_model = LogisticHazard(net, optimizer, duration_index=labtrans.cuts)