Ejemplo n.º 1
0
def test_arff():

    arff_path = realpath(pjoin(dirname(dirname(dirname(__file__))), # 3 levels up
                               'example_datasets', 'arff', 'iris.arff'))
    sys.argv = shlex.split('neuropredict -a {} -t {} -n {} -c {} -g {} -o {} -e {} -fs {}'.format(arff_path,
                    train_perc, num_repetitions, num_procs, gs_level, out_dir, classifier, fs_method))
    cli()
Ejemplo n.º 2
0
def test_separable_100perc():
    """Test to ensure fully separable classes lead to close to perfect prediction!
    """

    separable_ds = make_fully_separable_classes(max_class_size=100,
                                                max_dim=np.random.randint(
                                                    2, max_dim))
    separable_ds.description = 'fully_separable_dataset'
    out_path_sep = os.path.join(out_dir, 'two_separable_classes.pkl')
    out_dir_sep = os.path.join(out_dir, 'fully_separable_test')
    os.makedirs(out_dir_sep, exist_ok=True)
    separable_ds.save(out_path_sep)

    nrep = 10
    gsl = 'none'  # to speed up the process
    for clf_name in cfg.classifier_choices:
        for fs_name in cfg.all_dim_red_methods:

            cli_str = 'neuropredict -y {} -t {} -n {} -c {} -g {} -o {} -e {} -fs {}' \
                      ''.format(out_path_sep, train_perc, nrep, 1, gsl, out_dir_sep,
                                clf_name, fs_name)
            sys.argv = shlex.split(cli_str)
            cli()

            cv_results = rhst.load_results_from_folder(out_dir_sep)
            for sg, result in cv_results.items():
                raise_if_mean_differs_from(
                    result['accuracy_balanced'],
                    result['class_sizes'],
                    reference_level=1.0,  #comparing to perfect
                    eps_chance_acc=0.5,
                    method_descr='{} {}'.format(fs_name, clf_name))
Ejemplo n.º 3
0
def test_print_options():
    " ensures the CLI works. "

    known_out_dir = '/Users/Reddy/rotman/CANBIND/results/base_vs_network_thk_curv_area_HistInt_many_subgroups'
    with raises(SystemExit):
        sys.argv = shlex.split(
            'neuropredict --print_options {}'.format(known_out_dir))
        cli()
Ejemplo n.º 4
0
def test_vis():
    " ensures the CLI works. "

    res_path = pjoin(out_dir, 'rhst_results.pkl')
    if pexists(res_path):
        with raises(SystemExit):
            sys.argv = shlex.split('neuropredict --make_vis {}'.format(out_dir))
            cli()
            expected_results = ['balanced_accuracy.pdf', 'compare_misclf_rates.pdf', 'feature_importance.pdf']
            for rpath in expected_results:
                if not pexists(rpath):
                    raise ValueError('an expected result {} not produced'.format(rpath))
    else:
        print('previously computed results not found in \n {}'.format(out_dir))
Ejemplo n.º 5
0
def test_chance_clf_binary_svm():

    global ds_path_list, method_names, out_dir, num_repetitions, gs_level, train_perc, num_procs

    sys.argv = shlex.split('neuropredict -y {} {} -t {} -n {} -c {} -g {} -o {} -e {} -fs {}'.format(out_path, out_path2,
                                train_perc, min_rep_per_class*rand_two_class.num_classes, num_procs, gs_level,
                                out_dir, classifier, fs_method))
    cli()

    cv_results = rhst.load_results_from_folder(out_dir)
    for sg, result in cv_results.items():
        raise_if_mean_differs_from_chance(result['accuracy_balanced'],
                                          result['class_sizes'],
                                          eps_chance_acc_binary)
Ejemplo n.º 6
0
def test_each_combination_works():
    "Ensures each of combination of feature selection and classifier works."

    nrep = 10
    gsl = 'none' # to speed up the process
    for clf_name in cfg.classifier_choices:
        for fs_name in cfg.feature_selection_choices:
            try:
                cli_str = 'neuropredict -y {} -t {} -n {} -c {} -o {}  -e {} -fs {} -g {} '.format(out_path,
                            train_perc, nrep, num_procs, out_dir, clf_name, fs_name, gsl)
                sys.argv = shlex.split(cli_str)
                cli()
            except:
                print(' ---> combination failed: {} {}'.format(clf_name, fs_name))
                raise
Ejemplo n.º 7
0
def test_chance_multiclass():

    global ds_path_list, method_names, out_dir, num_repetitions, gs_level, train_perc, num_procs

    clf = 'randomforestclassifier'
    fs_method = 'variancethreshold'
    nrep = total_num_classes*min_rep_per_class
    gsl = 'none'  # to speed up the process
    sys.argv = shlex.split('neuropredict -y {} -t {} -n {} -c {} -g {} -o {} -e {} -fs {}'.format(out_path_multiclass,
                                train_perc, nrep, num_procs, gsl, out_dir, clf, fs_method))
    cli()

    cv_results = rhst.load_results_from_folder(out_dir)
    for sg, result in cv_results.items():
        raise_if_mean_differs_from_chance(result['accuracy_balanced'], result['class_sizes'], eps_chance_acc)
Ejemplo n.º 8
0
def test_print_options():
    " ensures the CLI works. "

    known_out_dir = out_dir
    options_path = pjoin(out_dir, cfg.file_name_options)

    if pexists(options_path):
        with raises(SystemExit):
            sys.argv = shlex.split('neuropredict --print_options {}'
                                   ''.format(known_out_dir))
            cli()

    known_nonexisting_dir = known_out_dir + '_43_34563$#*$@)'
    with raises(IOError):
        sys.argv = shlex.split('neuropredict --po {}'
                               ''.format(known_nonexisting_dir))
        cli()
Ejemplo n.º 9
0
def test_versioning():
    " ensures the CLI works. "

    with raises(SystemExit):
        sys.argv = shlex.split('neuropredict -v')
        cli()