def make_method_list(fs_subject_dir, user_feature_paths, user_feature_type='dir_of_dirs'): """ Returns an organized list of feature paths and methods to read in features. Parameters ---------- fs_subject_dir : str user_feature_paths : list of str user_feature_type : str Returns ------- feature_dir : list method_list : list """ freesurfer_readers = [aseg_stats_subcortical, aseg_stats_whole_brain] userdefined_readers = { 'dir_of_dirs': get_dir_of_dirs, 'data_matrix': get_data_matrix, 'pyradigm': get_pyradigm, 'arff': get_arff } feature_dir = list() method_list = list() if not_unspecified(user_feature_paths): if user_feature_type not in userdefined_readers: raise NotImplementedError( "Invalid feature type or its reader is not implemented yet!") for upath in user_feature_paths: feature_dir.append(upath) method_list.append(userdefined_readers[user_feature_type]) if not_unspecified(fs_subject_dir): for fsrdr in freesurfer_readers: feature_dir.append(fs_subject_dir) method_list.append(fsrdr) if len(method_list) != len(feature_dir): raise ValueError('Invalid specification for features!') if len(method_list) < 1: raise ValueError('Atleast one feature set must be specified.') print("\nRequested features for analysis:") for mm, method in enumerate(method_list): print("{} from {}".format(method.__name__, feature_dir[mm])) return feature_dir, method_list
def validate_class_set(classes, subgroups, positive_class=None): "Ensures class names are valid and sub-groups exist." class_set = list(set(classes.values())) sub_group_list = list() if subgroups != 'all': if isinstance(subgroups, str): subgroups = [ subgroups, ] for comb in subgroups: cls_list = comb.split(',') # ensuring each subgroup has atleast two classes if len(set(cls_list)) < 2: raise ValueError( 'This subgroup {} does not contain two unique classes.'. format(comb)) # verify each of them were defined in meta for cls in cls_list: if cls not in class_set: raise ValueError("Class {} in combination {} " "does not exist in meta data.".format( cls, comb)) sub_group_list.append(cls_list) else: # using all classes sub_group_list.append(class_set) # the following loop is required to preserve original order # this does not: class_order_in_meta = list(set(classes.values())) class_order_in_meta = list() for x in class_set: if x not in class_order_in_meta: class_order_in_meta.append(x) num_classes = len(class_order_in_meta) if num_classes < 2: raise ValueError( "Atleast two classes are required for predictive analysis! " "Only one given ({})".format(set(classes.values()))) if num_classes == 2: if not_unspecified(positive_class): if positive_class not in class_order_in_meta: raise ValueError( 'Positive class specified does not exist in meta data.\n' 'Choose one of {}'.format(class_order_in_meta)) print('Positive class specified for AUC calculation: {}'.format( positive_class)) else: positive_class = class_order_in_meta[-1] print('Positive class inferred for AUC calculation: {}'.format( positive_class)) return class_set, sub_group_list, positive_class
def organize_inputs(user_args): """ Validates the input features specified and returns organized list of paths and readers. Parameters ---------- user_args : ArgParse object Various options specified by the user. Returns ------- user_feature_paths : list List of paths to specified input features user_feature_type : str String identifying the type of user-defined input fs_subject_dir : str Path to freesurfer subject directory, if supplied. """ atleast_one_feature_specified = False # specifying pyradigm avoids the need for separate meta data file meta_data_supplied = False meta_data_format = None if not_unspecified(user_args.fs_subject_dir): fs_subject_dir = abspath(user_args.fs_subject_dir) if not pexists(fs_subject_dir): raise IOError("Given Freesurfer directory doesn't exist.") atleast_one_feature_specified = True else: fs_subject_dir = None # ensuring only one type is specified mutually_excl_formats = [ 'user_feature_paths', 'data_matrix_paths', 'pyradigm_paths', 'arff_paths' ] not_none_count = 0 for format in mutually_excl_formats: if not_unspecified(getattr(user_args, format)): not_none_count = not_none_count + 1 if not_none_count > 1: raise ValueError( 'Only one of the following formats can be specified:\n{}'.format( mutually_excl_formats)) if not_unspecified(user_args.user_feature_paths): user_feature_paths = check_paths( user_args.user_feature_paths, path_type='user defined (dir_of_dirs)') atleast_one_feature_specified = True user_feature_type = 'dir_of_dirs' elif not_unspecified(user_args.data_matrix_paths): user_feature_paths = check_paths(user_args.data_matrix_paths, path_type='data matrix') atleast_one_feature_specified = True user_feature_type = 'data_matrix' elif not_unspecified(user_args.pyradigm_paths): user_feature_paths = check_paths(user_args.pyradigm_paths, path_type='pyradigm') atleast_one_feature_specified = True meta_data_supplied = user_feature_paths[0] meta_data_format = 'pyradigm' user_feature_type = 'pyradigm' elif not_unspecified(user_args.arff_paths): user_feature_paths = check_paths(user_args.arff_paths, path_type='ARFF') atleast_one_feature_specified = True user_feature_type = 'arff' meta_data_supplied = user_feature_paths[0] meta_data_format = 'arff' else: user_feature_paths = None user_feature_type = None # map in python 3 returns a generator, not a list, so len() wouldnt work if not isinstance(user_feature_paths, list): user_feature_paths = list(user_feature_paths) if not atleast_one_feature_specified: raise ValueError( 'Atleast one method specifying features must be specified. ' 'It can be a path(s) to pyradigm dataset, matrix file, user-defined folder or a Freesurfer subject directory.' ) return user_feature_paths, user_feature_type, fs_subject_dir, meta_data_supplied, meta_data_format
def parse_args(): """Parser/validator for the cmd line args.""" parser = get_parser() if len(sys.argv) < 2: print('Too few arguments!') parser.print_help() parser.exit(1) # parsing try: user_args = parser.parse_args() except: parser.exit(1) if len(sys.argv) == 3 and not_unspecified(user_args.make_vis): out_dir = realpath(user_args.make_vis) res_path = pjoin(out_dir, cfg.file_name_results) if pexists(out_dir) and pexists(res_path): print('\n\nSaving the visualizations to \n{}'.format(out_dir)) make_visualizations(res_path, out_dir) sys.exit(0) else: raise ValueError('Given folder does not exist, or has no results!') user_feature_paths, user_feature_type, fs_subject_dir, meta_data_path, meta_data_format = organize_inputs( user_args) if not meta_data_path: if user_args.meta_file is not None: meta_file = abspath(user_args.meta_file) if not pexists(meta_file): raise IOError("Meta data file doesn't exist.") else: raise ValueError( 'Metadata file must be provided when not using pyradigm/ARFF inputs.' ) sample_ids, classes = get_metadata(meta_file) else: print('Using meta data from:\n{}'.format(meta_data_path)) sample_ids, classes = get_metadata_in_pyradigm(meta_data_path, meta_data_format) if user_args.out_dir is not None: out_dir = realpath(user_args.out_dir) else: out_dir = pjoin(realpath(os.getcwd()), cfg.output_dir_default) try: os.makedirs(out_dir, exist_ok=True) except: raise IOError('Output folder could not be created.') train_perc = np.float32(user_args.train_perc) if not (0.01 <= train_perc <= 0.99): raise ValueError( "Training percentage {} out of bounds - must be >= 0.01 and <= 0.99" .format(train_perc)) num_rep_cv = np.int64(user_args.num_rep_cv) if num_rep_cv < 10: raise ValueError("Atleast 10 repetitions of CV is recommened.") num_procs = check_num_procs(user_args.num_procs) class_set, subgroups, positive_class = validate_class_set( classes, user_args.sub_groups, user_args.positive_class) feature_selection_size = validate_feature_selection_size( user_args.num_features_to_select) grid_search_level = user_args.gs_level.lower() if grid_search_level not in cfg.GRIDSEARCH_LEVELS: raise ValueError( 'Unrecognized level of grid search. Valid choices: {}'.format( cfg.GRIDSEARCH_LEVELS)) classifier = check_classifier(user_args.classifier) feat_select_method = user_args.feat_select_method.lower() # saving the validated and expanded values to disk for later use. options_to_save = [ sample_ids, classes, out_dir, user_feature_paths, user_feature_type, fs_subject_dir, train_perc, num_rep_cv, positive_class, subgroups, feature_selection_size, num_procs, grid_search_level, classifier, feat_select_method ] options_path = save_options(options_to_save, out_dir) return sample_ids, classes, out_dir, options_path, \ user_feature_paths, user_feature_type, fs_subject_dir, \ train_perc, num_rep_cv, \ positive_class, subgroups, \ feature_selection_size, num_procs, \ grid_search_level, classifier, feat_select_method
def parse_common_args(parser): """Common utility to parse common CLI args""" if len(sys.argv) < 2: print('Too few arguments!') parser.print_help() parser.exit(1) # parsing try: user_args = parser.parse_args() except: parser.exit(1) if len(sys.argv) == 3: # only if no features were specified to be assessed if not any( not_unspecified(getattr(user_args, attr)) for attr in ('user_feature_paths', 'data_matrix_paths', 'pyradigm_paths', 'arff_paths')): if not_unspecified( user_args.print_opt_dir) and user_args.print_opt_dir: run_dir = realpath(user_args.print_opt_dir) print_options(run_dir) if not_unspecified(user_args.make_vis): out_dir = realpath(user_args.make_vis) res_path = pjoin(out_dir, cfg.file_name_results) if pexists(out_dir) and pexists(res_path): if not_unspecified(user_args.make_vis): print( 'Making vis from existing results is not supported ' 'yet in the redesigned workflow') # print('\n\nSaving the visualizations to \n{}' # ''.format(out_dir)) # make_visualizations(res_path, out_dir) else: raise ValueError('Given folder does not exist, ' 'or has no results file!') sys.exit(0) user_feature_paths, user_feature_type, fs_subject_dir, meta_data_path, \ meta_data_format = organize_inputs(user_args) if not meta_data_path: if user_args.meta_file is not None: meta_file = abspath(user_args.meta_file) if not pexists(meta_file): raise IOError("Meta data file doesn't exist.") else: raise ValueError('Metadata file must be provided ' 'when not using pyradigm/ARFF inputs.') sample_ids, classes = get_metadata(meta_file) else: print('Using meta data from:\n\t{}\n'.format(meta_data_path)) sample_ids, classes = get_metadata_in_pyradigm(meta_data_path, meta_data_format) if user_args.out_dir is not None: out_dir = realpath(user_args.out_dir) else: out_dir = pjoin(realpath(getcwd()), cfg.output_dir_default) try: makedirs(out_dir, exist_ok=True) except: raise IOError('Output folder could not be created.') train_perc = np.float32(user_args.train_perc) if not (0.01 <= train_perc <= 0.99): raise ValueError("Training percentage {} out of bounds " "- must be >= 0.01 and <= 0.99".format(train_perc)) num_rep_cv = np.int64(user_args.num_rep_cv) if num_rep_cv < 10: raise ValueError("Atleast 10 repetitions of CV is recommened.") num_procs = check_num_procs(user_args.num_procs) reduced_dim_size = validate_feature_selection_size( user_args.reduced_dim_size) impute_strategy = validate_impute_strategy(user_args.impute_strategy) covar_list, covar_method = check_covariate_options(user_args.covariates, user_args.covar_method) grid_search_level = user_args.gs_level.lower() if grid_search_level not in cfg.GRIDSEARCH_LEVELS: raise ValueError('Unrecognized level of grid search. Valid choices: {}' ''.format(cfg.GRIDSEARCH_LEVELS)) dim_red_method = user_args.dim_red_method.lower() return user_args, user_feature_paths, user_feature_type, fs_subject_dir, \ meta_data_path, meta_data_format, sample_ids, classes, out_dir, \ train_perc, num_rep_cv, num_procs, reduced_dim_size, impute_strategy, \ covar_list, covar_method, grid_search_level, dim_red_method