def test_each_combination_works(): """Ensures each of combination of feature selection and classifier works.""" nrep = 10 nproc = 1 gsl = 'none' # to speed up the process failed_combos = list() for clf_name in cfg.classifier_choices: for fs_name in cfg.all_dim_red_methods: # ensure a fresh start remove_neuropredict_results(out_dir) try: cli_str = 'np_classify -y {} -t {} -n {} -c {} -o {} ' \ ' -e {} -dr {} -g {} ' \ ''.format(out_path1, train_perc, nrep, nproc, out_dir, clf_name, fs_name, gsl) sys.argv = shlex.split(cli_str) cli() except: failed_combos.append('{:35} {:35}'.format(clf_name, fs_name)) traceback.print_exc() print('\nCombinations failed:\n{}'.format('\n'.join(failed_combos))) if len(failed_combos) > 4: print( '\n -----> 5 or more combinations of DR and CLF failed! Fix them')
def test_separable_100perc(): """Test to ensure fully separable classes lead to close to perfect prediction! """ separable_ds = make_fully_separable_classes(max_class_size=100, max_dim=np.random.randint(2, max_dim)) separable_ds.description = 'fully_separable_dataset' out_path_sep = os.path.join(out_dir, 'two_separable_classes.pkl') out_dir_sep = os.path.join(out_dir, 'fully_separable_test') os.makedirs(out_dir_sep, exist_ok=True) separable_ds.save(out_path_sep) nrep = 10 gsl = 'none' # to speed up the process for clf_name in cfg.classifier_choices: for fs_name in cfg.all_dim_red_methods: cli_str = 'neuropredict -y {} -t {} -n {} -c {} -g {} -o {} -e {} -dr ' \ '{}' \ ''.format(out_path_sep, train_perc, nrep, 1, gsl, out_dir_sep, clf_name, fs_name) sys.argv = shlex.split(cli_str) cli() cv_results = neuropredict.reports.load_results_from_folder(out_dir_sep) for sg, result in cv_results.items(): raise_if_mean_differs_from(result['accuracy_balanced'], result['target_sizes'], reference_level=1.0, # comparing to perfect eps_chance_acc=0.5, method_descr='{} {}'.format(fs_name, clf_name))
def test_basic_run(): sys.argv = shlex.split( 'np_classify -y {} {} -t {} -n {} -c {} -g {} -o {} ' '-e {} -dr {} -k {} --sub_groups {} -p {} -cl {} -cm {}' ''.format(out_path1, out_path2, train_perc, num_rep_cv, num_procs, gs_level, out_dir, estimator, dr_method, dr_size, sg_list, positive_class, covar_arg, deconf_method)) cli()
def test_arff(): arff_path = realpath(pjoin(dirname(dirname(dirname(__file__))), # 3 levels up 'example_datasets', 'arff', 'iris.arff')) sys.argv = shlex.split('neuropredict -a {} -t {} -n {} -c {} -g {} -o {} ' '-e {} -dr {}'.format(arff_path, train_perc, num_repetitions, num_procs, gs_level, out_dir, classifier, fs_method)) cli()
def test_print_options(): " ensures the CLI works. " known_out_dir = out_dir options_path = pjoin(out_dir, cfg.file_name_options) if pexists(options_path): with raises(SystemExit): sys.argv = shlex.split('neuropredict --print_options {}' ''.format(known_out_dir)) cli() known_nonexisting_dir = known_out_dir + '_43_34563$#*$@)' with raises(IOError): sys.argv = shlex.split('neuropredict --po {}' ''.format(known_nonexisting_dir)) cli()
def test_vis(): " ensures the CLI works. " res_path = pjoin(out_dir, 'rhst_results.pkl') if pexists(res_path): with raises(SystemExit): sys.argv = shlex.split('neuropredict --make_vis {}'.format(out_dir)) cli() expected_results = ['balanced_accuracy.pdf', 'compare_misclf_rates.pdf', 'feature_importance.pdf'] for rpath in expected_results: if not pexists(rpath): raise ValueError('an expected result {} not produced' ''.format(rpath)) else: print('previously computed results not found in \n {}'.format(out_dir))
def test_each_combination_works(): "Ensures each of combination of feature selection and classifier works." nrep = 10 gsl = 'none' # to speed up the process for clf_name in cfg.classifier_choices: for fs_name in cfg.all_dim_red_methods: try: cli_str = 'neuropredict -y {} -t {} -n {} -c {} -o {} ' \ ' -e {} -dr {} -g {} ' \ ''.format(out_path, train_perc, nrep, num_procs, out_dir, clf_name, fs_name, gsl) sys.argv = shlex.split(cli_str) cli() except: print('\n ---> combination failed: {} {}'.format(clf_name, fs_name)) raise
def test_chance_clf_binary_svm(): global ds_path_list, method_names, out_dir, num_repetitions, \ gs_level, train_perc, num_procs sys.argv = shlex.split('neuropredict -y {} {} -t {} -n {} -c {} -g {} -o {} ' '-e {} -dr {}'.format(out_path, out_path2, train_perc, min_rep_per_class * rand_two_class.num_targets, num_procs, gs_level, out_dir, classifier, fs_method)) cli() cv_results = neuropredict.reports.load_results_from_folder(out_dir) for sg, result in cv_results.items(): raise_if_mean_differs_from(result['accuracy_balanced'], result['target_sizes'], eps_chance_acc=eps_chance_acc_binary)
def test_chance_multiclass(): global ds_path_list, method_names, out_dir, num_repetitions, \ gs_level, train_perc, num_procs clf = 'randomforestclassifier' fs_method = 'variancethreshold' nrep = total_num_classes * min_rep_per_class gsl = 'none' # to speed up the process sys.argv = shlex.split('neuropredict -y {} -t {} -n {} -c {} -g {} ' '-o {} -e {} -dr {}' ''.format(out_path_multiclass, train_perc, nrep, num_procs, gsl, out_dir, clf, fs_method)) cli() cv_results = neuropredict.reports.load_results_from_folder(out_dir) for sg, result in cv_results.items(): raise_if_mean_differs_from(result['accuracy_balanced'], result['target_sizes'], eps_chance_acc, method_descr='{} {} gsl {}' ''.format(fs_method, clf, gsl))
def test_chance_clf_binary_svm(): sys.argv = shlex.split( 'neuropredict -y {} {} -t {} -n {} -c {} -g {} -o {} ' '-e {} -dr {}' ''.format(out_path1, out_path2, train_perc, min_rep_per_class * ds_two.num_targets, num_procs, gs_level, out_dir, estimator, dr_method)) result_paths = cli() import pickle for sg_id, res_path in result_paths.items(): with open(res_path, 'rb') as res_fid: result = pickle.load(res_fid) perf = result['results'] bal_acc_all_dsets = list( perf.metric_val['balanced_accuracy_score'].values()) raise_if_mean_differs_from(np.column_stack(bal_acc_all_dsets), result['_target_sizes'], eps_chance_acc=eps_chance_acc_binary)
def main(): "Entry point." classify.cli()
def test_versioning(): " ensures the CLI works. " with raises(SystemExit): sys.argv = shlex.split('neuropredict -v') cli()