예제 #1
0
def test_each_combination_works():
    """Ensures each of combination of feature selection and classifier works."""

    nrep = 10
    nproc = 1
    gsl = 'none'  # to speed up the process
    failed_combos = list()
    for clf_name in cfg.classifier_choices:
        for fs_name in cfg.all_dim_red_methods:
            # ensure a fresh start
            remove_neuropredict_results(out_dir)
            try:
                cli_str = 'np_classify -y {} -t {} -n {} -c {} -o {} ' \
                          ' -e {} -dr {} -g {} ' \
                          ''.format(out_path1, train_perc, nrep, nproc, out_dir,
                                    clf_name, fs_name, gsl)
                sys.argv = shlex.split(cli_str)
                cli()
            except:
                failed_combos.append('{:35} {:35}'.format(clf_name, fs_name))
                traceback.print_exc()

    print('\nCombinations failed:\n{}'.format('\n'.join(failed_combos)))
    if len(failed_combos) > 4:
        print(
            '\n  -----> 5 or more combinations of DR and CLF failed! Fix them')
예제 #2
0
def test_separable_100perc():
    """Test to ensure fully separable classes lead to close to perfect prediction!
    """

    separable_ds = make_fully_separable_classes(max_class_size=100,
                                                max_dim=np.random.randint(2,
                                                                          max_dim))
    separable_ds.description = 'fully_separable_dataset'
    out_path_sep = os.path.join(out_dir, 'two_separable_classes.pkl')
    out_dir_sep = os.path.join(out_dir, 'fully_separable_test')
    os.makedirs(out_dir_sep, exist_ok=True)
    separable_ds.save(out_path_sep)

    nrep = 10
    gsl = 'none'  # to speed up the process
    for clf_name in cfg.classifier_choices:
        for fs_name in cfg.all_dim_red_methods:

            cli_str = 'neuropredict -y {} -t {} -n {} -c {} -g {} -o {} -e {} -dr ' \
                      '{}' \
                      ''.format(out_path_sep, train_perc, nrep, 1, gsl, out_dir_sep,
                                clf_name, fs_name)
            sys.argv = shlex.split(cli_str)
            cli()

            cv_results = neuropredict.reports.load_results_from_folder(out_dir_sep)
            for sg, result in cv_results.items():
                raise_if_mean_differs_from(result['accuracy_balanced'],
                                           result['target_sizes'],
                                           reference_level=1.0,
                                           # comparing to perfect
                                           eps_chance_acc=0.5,
                                           method_descr='{} {}'.format(fs_name,
                                                                       clf_name))
예제 #3
0
def test_basic_run():

    sys.argv = shlex.split(
        'np_classify -y {} {} -t {} -n {} -c {} -g {} -o {} '
        '-e {} -dr {} -k {} --sub_groups {} -p {} -cl {} -cm {}'
        ''.format(out_path1, out_path2, train_perc, num_rep_cv, num_procs,
                  gs_level, out_dir, estimator, dr_method, dr_size, sg_list,
                  positive_class, covar_arg, deconf_method))
    cli()
예제 #4
0
def test_arff():
    arff_path = realpath(pjoin(dirname(dirname(dirname(__file__))),  # 3 levels up
                               'example_datasets', 'arff', 'iris.arff'))
    sys.argv = shlex.split('neuropredict -a {} -t {} -n {} -c {} -g {} -o {} '
                           '-e {} -dr {}'.format(arff_path, train_perc,
                                                 num_repetitions, num_procs,
                                                 gs_level, out_dir, classifier,
                                                 fs_method))
    cli()
예제 #5
0
def test_print_options():
    " ensures the CLI works. "

    known_out_dir = out_dir
    options_path = pjoin(out_dir, cfg.file_name_options)

    if pexists(options_path):
        with raises(SystemExit):
            sys.argv = shlex.split('neuropredict --print_options {}'
                                   ''.format(known_out_dir))
            cli()

    known_nonexisting_dir = known_out_dir + '_43_34563$#*$@)'
    with raises(IOError):
        sys.argv = shlex.split('neuropredict --po {}'
                               ''.format(known_nonexisting_dir))
        cli()
예제 #6
0
def test_vis():
    " ensures the CLI works. "

    res_path = pjoin(out_dir, 'rhst_results.pkl')
    if pexists(res_path):
        with raises(SystemExit):
            sys.argv = shlex.split('neuropredict --make_vis {}'.format(out_dir))
            cli()
            expected_results = ['balanced_accuracy.pdf',
                                'compare_misclf_rates.pdf',
                                'feature_importance.pdf']
            for rpath in expected_results:
                if not pexists(rpath):
                    raise ValueError('an expected result {} not produced'
                                     ''.format(rpath))
    else:
        print('previously computed results not found in \n {}'.format(out_dir))
예제 #7
0
def test_each_combination_works():
    "Ensures each of combination of feature selection and classifier works."

    nrep = 10
    gsl = 'none'  # to speed up the process
    for clf_name in cfg.classifier_choices:
        for fs_name in cfg.all_dim_red_methods:
            try:
                cli_str = 'neuropredict -y {} -t {} -n {} -c {} -o {} ' \
                          ' -e {} -dr {} -g {} ' \
                          ''.format(out_path, train_perc, nrep, num_procs, out_dir,
                                    clf_name, fs_name, gsl)
                sys.argv = shlex.split(cli_str)
                cli()
            except:
                print('\n ---> combination failed: {} {}'.format(clf_name, fs_name))
                raise
예제 #8
0
def test_chance_clf_binary_svm():
    global ds_path_list, method_names, out_dir, num_repetitions, \
        gs_level, train_perc, num_procs

    sys.argv = shlex.split('neuropredict -y {} {} -t {} -n {} -c {} -g {} -o {} '
                           '-e {} -dr {}'.format(out_path, out_path2, train_perc,
                                                 min_rep_per_class *
                                                 rand_two_class.num_targets,
                                                 num_procs, gs_level, out_dir,
                                                 classifier, fs_method))
    cli()

    cv_results = neuropredict.reports.load_results_from_folder(out_dir)
    for sg, result in cv_results.items():
        raise_if_mean_differs_from(result['accuracy_balanced'],
                                   result['target_sizes'],
                                   eps_chance_acc=eps_chance_acc_binary)
예제 #9
0
def test_chance_multiclass():
    global ds_path_list, method_names, out_dir, num_repetitions, \
        gs_level, train_perc, num_procs

    clf = 'randomforestclassifier'
    fs_method = 'variancethreshold'
    nrep = total_num_classes * min_rep_per_class
    gsl = 'none'  # to speed up the process
    sys.argv = shlex.split('neuropredict -y {} -t {} -n {} -c {} -g {} '
                           '-o {} -e {} -dr {}'
                           ''.format(out_path_multiclass, train_perc, nrep,
                                     num_procs, gsl, out_dir, clf, fs_method))
    cli()

    cv_results = neuropredict.reports.load_results_from_folder(out_dir)
    for sg, result in cv_results.items():
        raise_if_mean_differs_from(result['accuracy_balanced'],
                                   result['target_sizes'],
                                   eps_chance_acc,
                                   method_descr='{} {} gsl {}'
                                                ''.format(fs_method, clf, gsl))
예제 #10
0
def test_chance_clf_binary_svm():

    sys.argv = shlex.split(
        'neuropredict -y {} {} -t {} -n {} -c {} -g {} -o {} '
        '-e {} -dr {}'
        ''.format(out_path1, out_path2, train_perc,
                  min_rep_per_class * ds_two.num_targets, num_procs, gs_level,
                  out_dir, estimator, dr_method))
    result_paths = cli()
    import pickle
    for sg_id, res_path in result_paths.items():
        with open(res_path, 'rb') as res_fid:
            result = pickle.load(res_fid)

        perf = result['results']

        bal_acc_all_dsets = list(
            perf.metric_val['balanced_accuracy_score'].values())
        raise_if_mean_differs_from(np.column_stack(bal_acc_all_dsets),
                                   result['_target_sizes'],
                                   eps_chance_acc=eps_chance_acc_binary)
예제 #11
0
def main():
    "Entry point."

    classify.cli()
예제 #12
0
def test_versioning():
    " ensures the CLI works. "

    with raises(SystemExit):
        sys.argv = shlex.split('neuropredict -v')
        cli()