Beispiel #1
0
def test_balanced_accuracy():
    """Tests ensure the accuracy of accuracy calculations!"""

    num_trials = 10

    for num_classes in np.random.randint(2, 100, num_trials):
        cm_100 = np.zeros((num_classes, num_classes), int)
        # no errors! sizes are imbalanced
        np.fill_diagonal(cm_100, np.random.randint(10, 100, num_classes))
        if not np.isclose(balanced_accuracy(cm_100), 1.0):
            raise ArithmeticError('accuracy calculations on perfect classifier '
                                  'does not return 100% accuracy!!')

        cm_100perc_wrong = np.random.randint(10, 100, (num_classes, num_classes))
        # ALL errors! sizes are imbalanced
        np.fill_diagonal(cm_100perc_wrong, 0.0)
        if not np.isclose(balanced_accuracy(cm_100perc_wrong), 0.0):
            raise ArithmeticError('accuracy calculations on 100% wrong classifier '
                                  'does not return 0% accuracy!!')

        cm = np.random.randint(10, 100, (num_classes, num_classes)).astype('float64')
        np.fill_diagonal(cm, 0)
        cls_sizes_wout_diag_elem = cm.sum(axis=1)
        chosen_accuracy = np.random.rand(num_classes)
        factor = chosen_accuracy / (1.0 - chosen_accuracy)
        # filling the diag in order to reach certain level of chosen accuracy
        diag_values = np.around(cls_sizes_wout_diag_elem * factor).astype('float64')
        np.fill_diagonal(cm, diag_values)
        computed_acc = balanced_accuracy(cm)
        expected_acc = np.mean(chosen_accuracy)
        if not np.isclose(computed_acc, expected_acc, atol=1e-2):
            raise ArithmeticError(
                'accuracy calculations do not match the expected!!\n'
                ' Expected : {:.8f}\n'
                ' Estimated: {:.8f}\n'
                ' Differ by: {:.8f}\n'
                ''.format(expected_acc, computed_acc,
                          expected_acc - computed_acc))
Beispiel #2
0
def holdout_trial_compare_datasets(datasets,
                                   impute_strategy,
                                   train_size_common,
                                   feat_sel_size,
                                   train_perc,
                                   total_test_samples,
                                   num_classes,
                                   num_features_per_dataset,
                                   class_set,
                                   method_names,
                                   pos_class_index,
                                   out_results_dir,
                                   grid_search_level,
                                   classifier_name,
                                   feat_select_method,
                                   rep_id=None):
    """
    Runs a single iteration of optimizing the chosen pipeline on the chosen
    training set, and evaluations on the given test set.

    Parameters
    ----------
    datasets

    impute_strategy : str
        Strategy to handle the missing data: whether to raise an error if data is
        missing, or to impute them using the method chosen here.

    train_size_common
    feat_sel_size
    train_perc
    total_test_samples
    num_classes
    num_features_per_dataset
    class_set
    method_names
    pos_class_index
    out_results_dir
    rep_id

    grid_search_level : str
        If 'light', grid search resolution will be reduced to speed up optimization.
        If 'exhaustive', broadest range of values for most parameters will be used
        for optimization.

    Returns
    -------

    """

    common_ds = datasets[cfg.COMMON_DATASET_INDEX]
    num_datasets = len(datasets)

    # multi-class metrics
    confusion_matrix = np.full([num_classes, num_classes, num_datasets],
                               np.nan)
    accuracy_balanced = np.full(num_datasets, np.nan)
    auc_weighted = np.full(num_datasets, np.nan)
    best_params = [None] * num_datasets
    misclsfd_ids_this_run = [None] * num_datasets

    feature_importances = [None] * num_datasets
    for idx in range(num_datasets):
        feature_importances[idx] = np.full(num_features_per_dataset[idx],
                                           np.nan)

    # set of subjects for training and testing, common for all datasets.
    train_set, test_set = common_ds.train_test_split_ids(
        count_per_class=train_size_common)
    # NOTE test labels are the same for all datasets - each feature/model
    # combination is being evaluated against the same set of test samplets
    true_test_labels = np.array([
        common_ds.targets[sid] for sid in test_set if sid in common_ds.targets
    ])

    pred_prob_per_class = np.full(
        [num_datasets, total_test_samples, num_classes], np.nan)
    pred_labels_per_rep_fs = np.empty([num_datasets, total_test_samples],
                                      dtype=true_test_labels.dtype)

    # to uniquely identify this iteration
    if rep_id is None:
        rep_proc_id = 'process{}'.format(os.getpid())  # str(os.getpid())
    else:
        rep_proc_id = str(rep_id)
    print_options = get_pretty_print_options(method_names, num_datasets)

    # evaluating each feature/dataset
    for dd in range(num_datasets):
        print("CV trial {rep:6} "
              "feature {index:{nd}} "
              "{name:>{namewidth}} : "
              "".format(rep=rep_proc_id,
                        index=dd,
                        name=method_names[dd],
                        nd=print_options.num_digits,
                        namewidth=print_options.str_width),
              end='')

        # using the same train/test sets for all feature sets.
        train_fs = datasets[dd].get_subset(train_set)
        test_fs = datasets[dd].get_subset(test_set)

        pred_prob_per_class[dd, :, :], pred_labels_per_rep_fs[dd,:], \
        _ignored_true_test_labels, conf_mat, misclsfd_ids_this_run[dd], \
        feature_importances[dd], best_params[dd] = \
            eval_optimized_model_on_testset(train_fs, test_fs,
                                            impute_strategy=impute_strategy,
                                            train_perc=train_perc,
                                            feat_sel_size=feat_sel_size,
                                            label_order_in_conf_matrix=class_set,
                                            grid_search_level=grid_search_level,
                                            classifier_name=classifier_name,
                                            feat_select_method=feat_select_method)

        # TODO new feature: add additional metrics such as PPV
        accuracy_balanced[dd] = balanced_accuracy(conf_mat)
        confusion_matrix[:, :, dd] = conf_mat
        print('balanced accuracy: {:.4f} '.format(accuracy_balanced[dd]),
              end='')

        if num_classes == 2:
            auc_weighted[dd] = roc_auc_score(
                true_test_labels,
                pred_prob_per_class[dd, :, pos_class_index],
                average='weighted')
            print('\t weighted AUC: {:.4f}'.format(auc_weighted[dd]), end='')

        print('', flush=True)
        sys.stdout.flush()
        sys.stderr.flush()

    results_list = [
        pred_prob_per_class, pred_labels_per_rep_fs, true_test_labels,
        accuracy_balanced, confusion_matrix, auc_weighted, feature_importances,
        best_params, misclsfd_ids_this_run, test_set
    ]

    tmp_dir = get_temp_dir(out_results_dir)
    out_path = pjoin(tmp_dir, '{}_{}.pkl'.format(cfg.temp_prefix_rhst,
                                                 rep_proc_id))
    logging.info('results from rep {} saved to {}'.format(
        rep_proc_id, out_path))
    with open(out_path, 'bw') as of:
        pickle.dump(results_list, of)

    return results_list