Ejemplo n.º 1
0
y_train = y_train.astype('float32')
y_test = y_test.astype('float32')

print('Pre-training...')
model_filename = \
    os.path.join(output_dir, 'models',
                 '%s_%s_exp%d_mf%d_msl%d_km%d_test.pkl'
                 % (init_survival_estimator_name, dataset,
                    experiment_idx, max_features,
                    min_samples_leaf, use_km))
if not os.path.isfile(model_filename):
    surv_model = \
        RandomSurvivalForest(n_estimators=100,
                             max_features=max_features,
                             max_depth=None,
                             oob_score=False,
                             feature_importance=False,
                             min_samples_leaf=min_samples_leaf,
                             random_state=init_random_seed,
                             n_jobs=n_jobs)
    surv_model.fit(X_train, y_train)
    surv_model.save(model_filename)
else:
    surv_model = RandomSurvivalForest.load(model_filename)

print('*** Extracting proximity matrix...')
prox_filename = model_filename[:-4] + '_prox_matrix.txt'
if not os.path.isfile(prox_filename):
    leaf_ids = surv_model.predict_leaf_ids(X_train)
    n = len(X_train)
    prox_matrix = np.eye(n)
Ejemplo n.º 2
0
                    tic = time.time()

                    model_filename = \
                        os.path.join(output_dir, 'models',
                                     '%s_%s_exp%d_%s_mf%d_msl%d_fold%d.pkl'
                                     % (survival_estimator_name, dataset,
                                        experiment_idx, val_string,
                                        max_features, min_samples_leaf,
                                        fold_idx))
                    time_elapsed_filename = model_filename[:-4] + '_time.txt'
                    if not os.path.isfile(model_filename):
                        surv_model = \
                            RandomSurvivalForest(
                                n_estimators=100,
                                max_features=max_features,
                                max_depth=None,
                                oob_score=False,
                                feature_importance=False,
                                min_samples_leaf=min_samples_leaf,
                                random_state=method_random_seed,
                                n_jobs=n_jobs)
                        surv_model.fit(fold_X_train, fold_y_train)
                        elapsed = time.time() - tic
                        print('Time elapsed: %f second(s)' % elapsed)
                        np.savetxt(time_elapsed_filename,
                                   np.array(elapsed).reshape(1, -1))
                        surv_model.save(model_filename)
                    else:
                        surv_model = RandomSurvivalForest.load(model_filename)
                        elapsed = float(np.loadtxt(time_elapsed_filename))
                        print('Time elapsed (from previous fitting): ' +
                              '%f second(s)' % elapsed)
Ejemplo n.º 3
0
        arg_min_IPEC_scores = [None for idx in range(num_IPEC_horizons)]

        for n_estimators, max_depth in hyperparams:
            cindex_scores = []
            IPEC_scores = [[] for idx in range(num_IPEC_horizons)]

            for train_idx, val_idx in kf.split(X_train):
                fold_X_train = X_train[train_idx]
                fold_y_train = y_train[train_idx]
                fold_X_val = X_train[val_idx]
                fold_y_val = y_train[val_idx]

                surv_model = RandomSurvivalForest(n_estimators=n_estimators,
                                                  max_depth=max_depth,
                                                  oob_score=False,
                                                  feature_importance=False,
                                                  min_samples_leaf=3,
                                                  random_state=rng,
                                                  n_jobs=-1)
                surv_model.fit(fold_X_train, fold_y_train)

                sorted_fold_y_val = np.sort(np.unique(fold_y_val[:, 0]))
                if sorted_fold_y_val[0] != 0:
                    mesh_points = np.concatenate(([0.], sorted_fold_y_val))
                else:
                    mesh_points = sorted_fold_y_val
                surv = \
                    surv_model.predict_surv(fold_X_val, mesh_points,
                                            presorted_times=True)

                # -------------------------------------------------------------
Ejemplo n.º 4
0
    sys.exit()

print('[Dataset: %s, experiment: %d]' % (dataset, experiment_idx))
print()

X_train, y_train, X_test, y_test, feature_names, \
        compute_features_and_transformer, transform_features = \
    load_dataset(dataset, experiment_idx)

print('Testing...', flush=True)
model_filename = \
    os.path.join(output_dir, 'models',
                 '%s_%s_exp%d_%s_mf%d_msl%d_test.pkl'
                 % (survival_estimator_name, dataset, experiment_idx,
                    val_string, max_features, min_samples_leaf))
surv_model = RandomSurvivalForest.load(model_filename)
surv_model.n_jobs = n_jobs
sorted_unique_y_train = np.unique(y_train[:, 0])
surv = surv_model.predict_surv(X_test,
                               sorted_unique_y_train,
                               presorted_times=True)

print()
print('[Test data statistics]')
sorted_y_test_times = np.sort(y_test[:, 0])
print('Quartiles:')
print('- Min observed time:', np.min(y_test[:, 0]))
print('- Q1 observed time:',
      sorted_y_test_times[int(0.25 * len(sorted_y_test_times))])
print('- Median observed time:', np.median(y_test[:, 0]))
print('- Q3 observed time:',
Ejemplo n.º 5
0
        print('Pre-training...')
        tic = time.time()
        model_filename = \
            os.path.join(output_dir, 'models',
                         '%s_%s_exp%d_%s_mf%d_msl%d_test.pkl'
                         % (init_survival_estimator_name, dataset,
                            experiment_idx, init_val_string, max_features,
                            min_samples_leaf))
        time_elapsed_filename = model_filename[:-4] + '_time.txt'
        if not os.path.isfile(model_filename):
            surv_model = \
                RandomSurvivalForest(n_estimators=100,
                                     max_features=max_features,
                                     max_depth=None,
                                     oob_score=False,
                                     feature_importance=False,
                                     min_samples_leaf=min_samples_leaf,
                                     random_state=init_random_seed,
                                     n_jobs=n_jobs)
            surv_model.fit(X_train, y_train)
            elapsed = time.time() - tic
            print('Time elapsed: %f second(s)' % elapsed)
            np.savetxt(time_elapsed_filename, np.array(elapsed).reshape(1, -1))
            surv_model.save(model_filename)
        else:
            surv_model = RandomSurvivalForest.load(model_filename)
            elapsed = float(np.loadtxt(time_elapsed_filename))
            print('Time elapsed (from previous fitting): %f second(s)' %
                  elapsed)