def _prepare(self): """Checks in inputs, parameters and their combinations""" self.num_rep_cv = int(self.num_rep_cv) if not np.isfinite(self.num_rep_cv): raise ValueError( "Infinite number of repetitions is not recommended!") if self.num_rep_cv <= 1: raise ValueError("More than 1 repetition is necessary!") if self.train_perc <= 0.0 or self.train_perc >= 1.0: raise ValueError('Train perc > 0.0 and < 1.0') self.num_procs = check_num_procs(self.num_procs) if self.grid_search_level.lower() not in cfg.GRIDSEARCH_LEVELS: raise ValueError('Unrecognized level of grid search.' ' Valid choices: {}'.format( cfg.GRIDSEARCH_LEVELS)) # TODO for API use, pred_model and dim_reducer must be validated here again # if not isinstance(self.pred_model, BaseEstimator): self._id_list = list(self.datasets.samplet_ids) self._num_samples = len(self._id_list) self._train_set_size = np.int64( np.floor(self._num_samples * self.train_perc)) self._train_set_size = max( 1, min(self._num_samples, self._train_set_size)) self._out_results_path = pjoin(self.out_dir, cfg.results_file_name) self._summarize_expt()
def parse_args(): """Parser/validator for the cmd line args.""" parser = get_parser() if len(sys.argv) < 2: print('Too few arguments!') parser.print_help() parser.exit(1) # parsing try: user_args = parser.parse_args() except: parser.exit(1) if len(sys.argv) == 3 and not_unspecified(user_args.make_vis): out_dir = realpath(user_args.make_vis) res_path = pjoin(out_dir, cfg.file_name_results) if pexists(out_dir) and pexists(res_path): print('\n\nSaving the visualizations to \n{}'.format(out_dir)) make_visualizations(res_path, out_dir) sys.exit(0) else: raise ValueError('Given folder does not exist, or has no results!') user_feature_paths, user_feature_type, fs_subject_dir, meta_data_path, meta_data_format = organize_inputs( user_args) if not meta_data_path: if user_args.meta_file is not None: meta_file = abspath(user_args.meta_file) if not pexists(meta_file): raise IOError("Meta data file doesn't exist.") else: raise ValueError( 'Metadata file must be provided when not using pyradigm/ARFF inputs.' ) sample_ids, classes = get_metadata(meta_file) else: print('Using meta data from:\n{}'.format(meta_data_path)) sample_ids, classes = get_metadata_in_pyradigm(meta_data_path, meta_data_format) if user_args.out_dir is not None: out_dir = realpath(user_args.out_dir) else: out_dir = pjoin(realpath(os.getcwd()), cfg.output_dir_default) try: os.makedirs(out_dir, exist_ok=True) except: raise IOError('Output folder could not be created.') train_perc = np.float32(user_args.train_perc) if not (0.01 <= train_perc <= 0.99): raise ValueError( "Training percentage {} out of bounds - must be >= 0.01 and <= 0.99" .format(train_perc)) num_rep_cv = np.int64(user_args.num_rep_cv) if num_rep_cv < 10: raise ValueError("Atleast 10 repetitions of CV is recommened.") num_procs = check_num_procs(user_args.num_procs) class_set, subgroups, positive_class = validate_class_set( classes, user_args.sub_groups, user_args.positive_class) feature_selection_size = validate_feature_selection_size( user_args.num_features_to_select) grid_search_level = user_args.gs_level.lower() if grid_search_level not in cfg.GRIDSEARCH_LEVELS: raise ValueError( 'Unrecognized level of grid search. Valid choices: {}'.format( cfg.GRIDSEARCH_LEVELS)) classifier = check_classifier(user_args.classifier) feat_select_method = user_args.feat_select_method.lower() # saving the validated and expanded values to disk for later use. options_to_save = [ sample_ids, classes, out_dir, user_feature_paths, user_feature_type, fs_subject_dir, train_perc, num_rep_cv, positive_class, subgroups, feature_selection_size, num_procs, grid_search_level, classifier, feat_select_method ] options_path = save_options(options_to_save, out_dir) return sample_ids, classes, out_dir, options_path, \ user_feature_paths, user_feature_type, fs_subject_dir, \ train_perc, num_rep_cv, \ positive_class, subgroups, \ feature_selection_size, num_procs, \ grid_search_level, classifier, feat_select_method
def parse_common_args(parser): """Common utility to parse common CLI args""" if len(sys.argv) < 2: print('Too few arguments!') parser.print_help() parser.exit(1) # parsing try: user_args = parser.parse_args() except: parser.exit(1) if len(sys.argv) == 3: # only if no features were specified to be assessed if not any( not_unspecified(getattr(user_args, attr)) for attr in ('user_feature_paths', 'data_matrix_paths', 'pyradigm_paths', 'arff_paths')): if not_unspecified( user_args.print_opt_dir) and user_args.print_opt_dir: run_dir = realpath(user_args.print_opt_dir) print_options(run_dir) if not_unspecified(user_args.make_vis): out_dir = realpath(user_args.make_vis) res_path = pjoin(out_dir, cfg.file_name_results) if pexists(out_dir) and pexists(res_path): if not_unspecified(user_args.make_vis): print( 'Making vis from existing results is not supported ' 'yet in the redesigned workflow') # print('\n\nSaving the visualizations to \n{}' # ''.format(out_dir)) # make_visualizations(res_path, out_dir) else: raise ValueError('Given folder does not exist, ' 'or has no results file!') sys.exit(0) user_feature_paths, user_feature_type, fs_subject_dir, meta_data_path, \ meta_data_format = organize_inputs(user_args) if not meta_data_path: if user_args.meta_file is not None: meta_file = abspath(user_args.meta_file) if not pexists(meta_file): raise IOError("Meta data file doesn't exist.") else: raise ValueError('Metadata file must be provided ' 'when not using pyradigm/ARFF inputs.') sample_ids, classes = get_metadata(meta_file) else: print('Using meta data from:\n\t{}\n'.format(meta_data_path)) sample_ids, classes = get_metadata_in_pyradigm(meta_data_path, meta_data_format) if user_args.out_dir is not None: out_dir = realpath(user_args.out_dir) else: out_dir = pjoin(realpath(getcwd()), cfg.output_dir_default) try: makedirs(out_dir, exist_ok=True) except: raise IOError('Output folder could not be created.') train_perc = np.float32(user_args.train_perc) if not (0.01 <= train_perc <= 0.99): raise ValueError("Training percentage {} out of bounds " "- must be >= 0.01 and <= 0.99".format(train_perc)) num_rep_cv = np.int64(user_args.num_rep_cv) if num_rep_cv < 10: raise ValueError("Atleast 10 repetitions of CV is recommened.") num_procs = check_num_procs(user_args.num_procs) reduced_dim_size = validate_feature_selection_size( user_args.reduced_dim_size) impute_strategy = validate_impute_strategy(user_args.impute_strategy) covar_list, covar_method = check_covariate_options(user_args.covariates, user_args.covar_method) grid_search_level = user_args.gs_level.lower() if grid_search_level not in cfg.GRIDSEARCH_LEVELS: raise ValueError('Unrecognized level of grid search. Valid choices: {}' ''.format(cfg.GRIDSEARCH_LEVELS)) dim_red_method = user_args.dim_red_method.lower() return user_args, user_feature_paths, user_feature_type, fs_subject_dir, \ meta_data_path, meta_data_format, sample_ids, classes, out_dir, \ train_perc, num_rep_cv, num_procs, reduced_dim_size, impute_strategy, \ covar_list, covar_method, grid_search_level, dim_red_method