y_train = y_train.astype('float32') y_test = y_test.astype('float32') print('Pre-training...') model_filename = \ os.path.join(output_dir, 'models', '%s_%s_exp%d_mf%d_msl%d_km%d_test.pkl' % (init_survival_estimator_name, dataset, experiment_idx, max_features, min_samples_leaf, use_km)) if not os.path.isfile(model_filename): surv_model = \ RandomSurvivalForest(n_estimators=100, max_features=max_features, max_depth=None, oob_score=False, feature_importance=False, min_samples_leaf=min_samples_leaf, random_state=init_random_seed, n_jobs=n_jobs) surv_model.fit(X_train, y_train) surv_model.save(model_filename) else: surv_model = RandomSurvivalForest.load(model_filename) print('*** Extracting proximity matrix...') prox_filename = model_filename[:-4] + '_prox_matrix.txt' if not os.path.isfile(prox_filename): leaf_ids = surv_model.predict_leaf_ids(X_train) n = len(X_train) prox_matrix = np.eye(n)
tic = time.time() model_filename = \ os.path.join(output_dir, 'models', '%s_%s_exp%d_%s_mf%d_msl%d_fold%d.pkl' % (survival_estimator_name, dataset, experiment_idx, val_string, max_features, min_samples_leaf, fold_idx)) time_elapsed_filename = model_filename[:-4] + '_time.txt' if not os.path.isfile(model_filename): surv_model = \ RandomSurvivalForest( n_estimators=100, max_features=max_features, max_depth=None, oob_score=False, feature_importance=False, min_samples_leaf=min_samples_leaf, random_state=method_random_seed, n_jobs=n_jobs) surv_model.fit(fold_X_train, fold_y_train) elapsed = time.time() - tic print('Time elapsed: %f second(s)' % elapsed) np.savetxt(time_elapsed_filename, np.array(elapsed).reshape(1, -1)) surv_model.save(model_filename) else: surv_model = RandomSurvivalForest.load(model_filename) elapsed = float(np.loadtxt(time_elapsed_filename)) print('Time elapsed (from previous fitting): ' + '%f second(s)' % elapsed)
arg_min_IPEC_scores = [None for idx in range(num_IPEC_horizons)] for n_estimators, max_depth in hyperparams: cindex_scores = [] IPEC_scores = [[] for idx in range(num_IPEC_horizons)] for train_idx, val_idx in kf.split(X_train): fold_X_train = X_train[train_idx] fold_y_train = y_train[train_idx] fold_X_val = X_train[val_idx] fold_y_val = y_train[val_idx] surv_model = RandomSurvivalForest(n_estimators=n_estimators, max_depth=max_depth, oob_score=False, feature_importance=False, min_samples_leaf=3, random_state=rng, n_jobs=-1) surv_model.fit(fold_X_train, fold_y_train) sorted_fold_y_val = np.sort(np.unique(fold_y_val[:, 0])) if sorted_fold_y_val[0] != 0: mesh_points = np.concatenate(([0.], sorted_fold_y_val)) else: mesh_points = sorted_fold_y_val surv = \ surv_model.predict_surv(fold_X_val, mesh_points, presorted_times=True) # -------------------------------------------------------------
sys.exit() print('[Dataset: %s, experiment: %d]' % (dataset, experiment_idx)) print() X_train, y_train, X_test, y_test, feature_names, \ compute_features_and_transformer, transform_features = \ load_dataset(dataset, experiment_idx) print('Testing...', flush=True) model_filename = \ os.path.join(output_dir, 'models', '%s_%s_exp%d_%s_mf%d_msl%d_test.pkl' % (survival_estimator_name, dataset, experiment_idx, val_string, max_features, min_samples_leaf)) surv_model = RandomSurvivalForest.load(model_filename) surv_model.n_jobs = n_jobs sorted_unique_y_train = np.unique(y_train[:, 0]) surv = surv_model.predict_surv(X_test, sorted_unique_y_train, presorted_times=True) print() print('[Test data statistics]') sorted_y_test_times = np.sort(y_test[:, 0]) print('Quartiles:') print('- Min observed time:', np.min(y_test[:, 0])) print('- Q1 observed time:', sorted_y_test_times[int(0.25 * len(sorted_y_test_times))]) print('- Median observed time:', np.median(y_test[:, 0])) print('- Q3 observed time:',
print('Pre-training...') tic = time.time() model_filename = \ os.path.join(output_dir, 'models', '%s_%s_exp%d_%s_mf%d_msl%d_test.pkl' % (init_survival_estimator_name, dataset, experiment_idx, init_val_string, max_features, min_samples_leaf)) time_elapsed_filename = model_filename[:-4] + '_time.txt' if not os.path.isfile(model_filename): surv_model = \ RandomSurvivalForest(n_estimators=100, max_features=max_features, max_depth=None, oob_score=False, feature_importance=False, min_samples_leaf=min_samples_leaf, random_state=init_random_seed, n_jobs=n_jobs) surv_model.fit(X_train, y_train) elapsed = time.time() - tic print('Time elapsed: %f second(s)' % elapsed) np.savetxt(time_elapsed_filename, np.array(elapsed).reshape(1, -1)) surv_model.save(model_filename) else: surv_model = RandomSurvivalForest.load(model_filename) elapsed = float(np.loadtxt(time_elapsed_filename)) print('Time elapsed (from previous fitting): %f second(s)' % elapsed)