Example #1
0
def make_method_list(fs_subject_dir,
                     user_feature_paths,
                     user_feature_type='dir_of_dirs'):
    """
    Returns an organized list of feature paths and methods to read in features.

    Parameters
    ----------
    fs_subject_dir : str
    user_feature_paths : list of str
    user_feature_type : str

    Returns
    -------
    feature_dir : list
    method_list : list


    """

    freesurfer_readers = [aseg_stats_subcortical, aseg_stats_whole_brain]
    userdefined_readers = {
        'dir_of_dirs': get_dir_of_dirs,
        'data_matrix': get_data_matrix,
        'pyradigm': get_pyradigm,
        'arff': get_arff
    }

    feature_dir = list()
    method_list = list()
    if not_unspecified(user_feature_paths):
        if user_feature_type not in userdefined_readers:
            raise NotImplementedError(
                "Invalid feature type or its reader is not implemented yet!")

        for upath in user_feature_paths:
            feature_dir.append(upath)
            method_list.append(userdefined_readers[user_feature_type])

    if not_unspecified(fs_subject_dir):
        for fsrdr in freesurfer_readers:
            feature_dir.append(fs_subject_dir)
            method_list.append(fsrdr)

    if len(method_list) != len(feature_dir):
        raise ValueError('Invalid specification for features!')

    if len(method_list) < 1:
        raise ValueError('Atleast one feature set must be specified.')

    print("\nRequested features for analysis:")
    for mm, method in enumerate(method_list):
        print("{} from {}".format(method.__name__, feature_dir[mm]))

    return feature_dir, method_list
Example #2
0
def validate_class_set(classes, subgroups, positive_class=None):
    "Ensures class names are valid and sub-groups exist."

    class_set = list(set(classes.values()))

    sub_group_list = list()
    if subgroups != 'all':
        if isinstance(subgroups, str):
            subgroups = [
                subgroups,
            ]

        for comb in subgroups:
            cls_list = comb.split(',')
            # ensuring each subgroup has atleast two classes
            if len(set(cls_list)) < 2:
                raise ValueError(
                    'This subgroup {} does not contain two unique classes.'.
                    format(comb))

            # verify each of them were defined in meta
            for cls in cls_list:
                if cls not in class_set:
                    raise ValueError("Class {} in combination {} "
                                     "does not exist in meta data.".format(
                                         cls, comb))

            sub_group_list.append(cls_list)
    else:
        # using all classes
        sub_group_list.append(class_set)

    # the following loop is required to preserve original order
    # this does not: class_order_in_meta = list(set(classes.values()))
    class_order_in_meta = list()
    for x in class_set:
        if x not in class_order_in_meta:
            class_order_in_meta.append(x)

    num_classes = len(class_order_in_meta)
    if num_classes < 2:
        raise ValueError(
            "Atleast two classes are required for predictive analysis! "
            "Only one given ({})".format(set(classes.values())))

    if num_classes == 2:
        if not_unspecified(positive_class):
            if positive_class not in class_order_in_meta:
                raise ValueError(
                    'Positive class specified does not exist in meta data.\n'
                    'Choose one of {}'.format(class_order_in_meta))
            print('Positive class specified for AUC calculation: {}'.format(
                positive_class))
        else:
            positive_class = class_order_in_meta[-1]
            print('Positive class inferred for AUC calculation: {}'.format(
                positive_class))

    return class_set, sub_group_list, positive_class
Example #3
0
def organize_inputs(user_args):
    """
    Validates the input features specified and returns organized list of paths and readers.

    Parameters
    ----------
    user_args : ArgParse object
        Various options specified by the user.

    Returns
    -------
    user_feature_paths : list
        List of paths to specified input features
    user_feature_type : str
        String identifying the type of user-defined input
    fs_subject_dir : str
        Path to freesurfer subject directory, if supplied.

    """

    atleast_one_feature_specified = False
    # specifying pyradigm avoids the need for separate meta data file
    meta_data_supplied = False
    meta_data_format = None

    if not_unspecified(user_args.fs_subject_dir):
        fs_subject_dir = abspath(user_args.fs_subject_dir)
        if not pexists(fs_subject_dir):
            raise IOError("Given Freesurfer directory doesn't exist.")
        atleast_one_feature_specified = True
    else:
        fs_subject_dir = None

    # ensuring only one type is specified
    mutually_excl_formats = [
        'user_feature_paths', 'data_matrix_paths', 'pyradigm_paths',
        'arff_paths'
    ]
    not_none_count = 0
    for format in mutually_excl_formats:
        if not_unspecified(getattr(user_args, format)):
            not_none_count = not_none_count + 1
    if not_none_count > 1:
        raise ValueError(
            'Only one of the following formats can be specified:\n{}'.format(
                mutually_excl_formats))

    if not_unspecified(user_args.user_feature_paths):
        user_feature_paths = check_paths(
            user_args.user_feature_paths,
            path_type='user defined (dir_of_dirs)')
        atleast_one_feature_specified = True
        user_feature_type = 'dir_of_dirs'

    elif not_unspecified(user_args.data_matrix_paths):
        user_feature_paths = check_paths(user_args.data_matrix_paths,
                                         path_type='data matrix')
        atleast_one_feature_specified = True
        user_feature_type = 'data_matrix'

    elif not_unspecified(user_args.pyradigm_paths):
        user_feature_paths = check_paths(user_args.pyradigm_paths,
                                         path_type='pyradigm')
        atleast_one_feature_specified = True
        meta_data_supplied = user_feature_paths[0]
        meta_data_format = 'pyradigm'
        user_feature_type = 'pyradigm'

    elif not_unspecified(user_args.arff_paths):
        user_feature_paths = check_paths(user_args.arff_paths,
                                         path_type='ARFF')
        atleast_one_feature_specified = True
        user_feature_type = 'arff'
        meta_data_supplied = user_feature_paths[0]
        meta_data_format = 'arff'
    else:
        user_feature_paths = None
        user_feature_type = None

    # map in python 3 returns a generator, not a list, so len() wouldnt work
    if not isinstance(user_feature_paths, list):
        user_feature_paths = list(user_feature_paths)

    if not atleast_one_feature_specified:
        raise ValueError(
            'Atleast one method specifying features must be specified. '
            'It can be a path(s) to pyradigm dataset, matrix file, user-defined folder or a Freesurfer subject directory.'
        )

    return user_feature_paths, user_feature_type, fs_subject_dir, meta_data_supplied, meta_data_format
Example #4
0
def parse_args():
    """Parser/validator for the cmd line args."""

    parser = get_parser()

    if len(sys.argv) < 2:
        print('Too few arguments!')
        parser.print_help()
        parser.exit(1)

    # parsing
    try:
        user_args = parser.parse_args()
    except:
        parser.exit(1)

    if len(sys.argv) == 3 and not_unspecified(user_args.make_vis):
        out_dir = realpath(user_args.make_vis)
        res_path = pjoin(out_dir, cfg.file_name_results)
        if pexists(out_dir) and pexists(res_path):
            print('\n\nSaving the visualizations to \n{}'.format(out_dir))
            make_visualizations(res_path, out_dir)
            sys.exit(0)
        else:
            raise ValueError('Given folder does not exist, or has no results!')

    user_feature_paths, user_feature_type, fs_subject_dir, meta_data_path, meta_data_format = organize_inputs(
        user_args)

    if not meta_data_path:
        if user_args.meta_file is not None:
            meta_file = abspath(user_args.meta_file)
            if not pexists(meta_file):
                raise IOError("Meta data file doesn't exist.")
        else:
            raise ValueError(
                'Metadata file must be provided when not using pyradigm/ARFF inputs.'
            )

        sample_ids, classes = get_metadata(meta_file)
    else:
        print('Using meta data from:\n{}'.format(meta_data_path))
        sample_ids, classes = get_metadata_in_pyradigm(meta_data_path,
                                                       meta_data_format)

    if user_args.out_dir is not None:
        out_dir = realpath(user_args.out_dir)
    else:
        out_dir = pjoin(realpath(os.getcwd()), cfg.output_dir_default)

    try:
        os.makedirs(out_dir, exist_ok=True)
    except:
        raise IOError('Output folder could not be created.')

    train_perc = np.float32(user_args.train_perc)
    if not (0.01 <= train_perc <= 0.99):
        raise ValueError(
            "Training percentage {} out of bounds - must be >= 0.01 and <= 0.99"
            .format(train_perc))

    num_rep_cv = np.int64(user_args.num_rep_cv)
    if num_rep_cv < 10:
        raise ValueError("Atleast 10 repetitions of CV is recommened.")

    num_procs = check_num_procs(user_args.num_procs)

    class_set, subgroups, positive_class = validate_class_set(
        classes, user_args.sub_groups, user_args.positive_class)

    feature_selection_size = validate_feature_selection_size(
        user_args.num_features_to_select)

    grid_search_level = user_args.gs_level.lower()
    if grid_search_level not in cfg.GRIDSEARCH_LEVELS:
        raise ValueError(
            'Unrecognized level of grid search. Valid choices: {}'.format(
                cfg.GRIDSEARCH_LEVELS))

    classifier = check_classifier(user_args.classifier)
    feat_select_method = user_args.feat_select_method.lower()

    # saving the validated and expanded values to disk for later use.
    options_to_save = [
        sample_ids, classes, out_dir, user_feature_paths, user_feature_type,
        fs_subject_dir, train_perc, num_rep_cv, positive_class, subgroups,
        feature_selection_size, num_procs, grid_search_level, classifier,
        feat_select_method
    ]
    options_path = save_options(options_to_save, out_dir)

    return sample_ids, classes, out_dir, options_path, \
           user_feature_paths, user_feature_type, fs_subject_dir, \
           train_perc, num_rep_cv, \
           positive_class, subgroups, \
           feature_selection_size, num_procs, \
           grid_search_level, classifier, feat_select_method
Example #5
0
def parse_common_args(parser):
    """Common utility to parse common CLI args"""

    if len(sys.argv) < 2:
        print('Too few arguments!')
        parser.print_help()
        parser.exit(1)

    # parsing
    try:
        user_args = parser.parse_args()
    except:
        parser.exit(1)

    if len(sys.argv) == 3:
        # only if no features were specified to be assessed
        if not any(
                not_unspecified(getattr(user_args, attr))
                for attr in ('user_feature_paths', 'data_matrix_paths',
                             'pyradigm_paths', 'arff_paths')):

            if not_unspecified(
                    user_args.print_opt_dir) and user_args.print_opt_dir:
                run_dir = realpath(user_args.print_opt_dir)
                print_options(run_dir)

            if not_unspecified(user_args.make_vis):
                out_dir = realpath(user_args.make_vis)
                res_path = pjoin(out_dir, cfg.file_name_results)
                if pexists(out_dir) and pexists(res_path):
                    if not_unspecified(user_args.make_vis):
                        print(
                            'Making vis from existing results is not supported '
                            'yet in the redesigned workflow')
                        # print('\n\nSaving the visualizations to \n{}'
                        #       ''.format(out_dir))
                        # make_visualizations(res_path, out_dir)
                else:
                    raise ValueError('Given folder does not exist, '
                                     'or has no results file!')

            sys.exit(0)

    user_feature_paths, user_feature_type, fs_subject_dir, meta_data_path, \
    meta_data_format = organize_inputs(user_args)

    if not meta_data_path:
        if user_args.meta_file is not None:
            meta_file = abspath(user_args.meta_file)
            if not pexists(meta_file):
                raise IOError("Meta data file doesn't exist.")
        else:
            raise ValueError('Metadata file must be provided '
                             'when not using pyradigm/ARFF inputs.')

        sample_ids, classes = get_metadata(meta_file)
    else:
        print('Using meta data from:\n\t{}\n'.format(meta_data_path))
        sample_ids, classes = get_metadata_in_pyradigm(meta_data_path,
                                                       meta_data_format)

    if user_args.out_dir is not None:
        out_dir = realpath(user_args.out_dir)
    else:
        out_dir = pjoin(realpath(getcwd()), cfg.output_dir_default)

    try:
        makedirs(out_dir, exist_ok=True)
    except:
        raise IOError('Output folder could not be created.')

    train_perc = np.float32(user_args.train_perc)
    if not (0.01 <= train_perc <= 0.99):
        raise ValueError("Training percentage {} out of bounds "
                         "- must be >= 0.01 and <= 0.99".format(train_perc))

    num_rep_cv = np.int64(user_args.num_rep_cv)
    if num_rep_cv < 10:
        raise ValueError("Atleast 10 repetitions of CV is recommened.")

    num_procs = check_num_procs(user_args.num_procs)

    reduced_dim_size = validate_feature_selection_size(
        user_args.reduced_dim_size)

    impute_strategy = validate_impute_strategy(user_args.impute_strategy)

    covar_list, covar_method = check_covariate_options(user_args.covariates,
                                                       user_args.covar_method)

    grid_search_level = user_args.gs_level.lower()
    if grid_search_level not in cfg.GRIDSEARCH_LEVELS:
        raise ValueError('Unrecognized level of grid search. Valid choices: {}'
                         ''.format(cfg.GRIDSEARCH_LEVELS))

    dim_red_method = user_args.dim_red_method.lower()

    return user_args, user_feature_paths, user_feature_type, fs_subject_dir, \
           meta_data_path, meta_data_format, sample_ids, classes, out_dir, \
           train_perc, num_rep_cv, num_procs, reduced_dim_size, impute_strategy, \
           covar_list, covar_method, grid_search_level, dim_red_method