Example #1
0
    def _prepare(self):
        """Checks in inputs, parameters and their combinations"""

        self.num_rep_cv = int(self.num_rep_cv)
        if not np.isfinite(self.num_rep_cv):
            raise ValueError(
                "Infinite number of repetitions is not recommended!")

        if self.num_rep_cv <= 1:
            raise ValueError("More than 1 repetition is necessary!")

        if self.train_perc <= 0.0 or self.train_perc >= 1.0:
            raise ValueError('Train perc > 0.0 and < 1.0')

        self.num_procs = check_num_procs(self.num_procs)

        if self.grid_search_level.lower() not in cfg.GRIDSEARCH_LEVELS:
            raise ValueError('Unrecognized level of grid search.'
                             ' Valid choices: {}'.format(
                                 cfg.GRIDSEARCH_LEVELS))

        # TODO for API use, pred_model and dim_reducer must be validated here again
        # if not isinstance(self.pred_model, BaseEstimator):

        self._id_list = list(self.datasets.samplet_ids)
        self._num_samples = len(self._id_list)
        self._train_set_size = np.int64(
            np.floor(self._num_samples * self.train_perc))
        self._train_set_size = max(
            1, min(self._num_samples, self._train_set_size))

        self._out_results_path = pjoin(self.out_dir, cfg.results_file_name)

        self._summarize_expt()
Example #2
0
def parse_args():
    """Parser/validator for the cmd line args."""

    parser = get_parser()

    if len(sys.argv) < 2:
        print('Too few arguments!')
        parser.print_help()
        parser.exit(1)

    # parsing
    try:
        user_args = parser.parse_args()
    except:
        parser.exit(1)

    if len(sys.argv) == 3 and not_unspecified(user_args.make_vis):
        out_dir = realpath(user_args.make_vis)
        res_path = pjoin(out_dir, cfg.file_name_results)
        if pexists(out_dir) and pexists(res_path):
            print('\n\nSaving the visualizations to \n{}'.format(out_dir))
            make_visualizations(res_path, out_dir)
            sys.exit(0)
        else:
            raise ValueError('Given folder does not exist, or has no results!')

    user_feature_paths, user_feature_type, fs_subject_dir, meta_data_path, meta_data_format = organize_inputs(
        user_args)

    if not meta_data_path:
        if user_args.meta_file is not None:
            meta_file = abspath(user_args.meta_file)
            if not pexists(meta_file):
                raise IOError("Meta data file doesn't exist.")
        else:
            raise ValueError(
                'Metadata file must be provided when not using pyradigm/ARFF inputs.'
            )

        sample_ids, classes = get_metadata(meta_file)
    else:
        print('Using meta data from:\n{}'.format(meta_data_path))
        sample_ids, classes = get_metadata_in_pyradigm(meta_data_path,
                                                       meta_data_format)

    if user_args.out_dir is not None:
        out_dir = realpath(user_args.out_dir)
    else:
        out_dir = pjoin(realpath(os.getcwd()), cfg.output_dir_default)

    try:
        os.makedirs(out_dir, exist_ok=True)
    except:
        raise IOError('Output folder could not be created.')

    train_perc = np.float32(user_args.train_perc)
    if not (0.01 <= train_perc <= 0.99):
        raise ValueError(
            "Training percentage {} out of bounds - must be >= 0.01 and <= 0.99"
            .format(train_perc))

    num_rep_cv = np.int64(user_args.num_rep_cv)
    if num_rep_cv < 10:
        raise ValueError("Atleast 10 repetitions of CV is recommened.")

    num_procs = check_num_procs(user_args.num_procs)

    class_set, subgroups, positive_class = validate_class_set(
        classes, user_args.sub_groups, user_args.positive_class)

    feature_selection_size = validate_feature_selection_size(
        user_args.num_features_to_select)

    grid_search_level = user_args.gs_level.lower()
    if grid_search_level not in cfg.GRIDSEARCH_LEVELS:
        raise ValueError(
            'Unrecognized level of grid search. Valid choices: {}'.format(
                cfg.GRIDSEARCH_LEVELS))

    classifier = check_classifier(user_args.classifier)
    feat_select_method = user_args.feat_select_method.lower()

    # saving the validated and expanded values to disk for later use.
    options_to_save = [
        sample_ids, classes, out_dir, user_feature_paths, user_feature_type,
        fs_subject_dir, train_perc, num_rep_cv, positive_class, subgroups,
        feature_selection_size, num_procs, grid_search_level, classifier,
        feat_select_method
    ]
    options_path = save_options(options_to_save, out_dir)

    return sample_ids, classes, out_dir, options_path, \
           user_feature_paths, user_feature_type, fs_subject_dir, \
           train_perc, num_rep_cv, \
           positive_class, subgroups, \
           feature_selection_size, num_procs, \
           grid_search_level, classifier, feat_select_method
Example #3
0
def parse_common_args(parser):
    """Common utility to parse common CLI args"""

    if len(sys.argv) < 2:
        print('Too few arguments!')
        parser.print_help()
        parser.exit(1)

    # parsing
    try:
        user_args = parser.parse_args()
    except:
        parser.exit(1)

    if len(sys.argv) == 3:
        # only if no features were specified to be assessed
        if not any(
                not_unspecified(getattr(user_args, attr))
                for attr in ('user_feature_paths', 'data_matrix_paths',
                             'pyradigm_paths', 'arff_paths')):

            if not_unspecified(
                    user_args.print_opt_dir) and user_args.print_opt_dir:
                run_dir = realpath(user_args.print_opt_dir)
                print_options(run_dir)

            if not_unspecified(user_args.make_vis):
                out_dir = realpath(user_args.make_vis)
                res_path = pjoin(out_dir, cfg.file_name_results)
                if pexists(out_dir) and pexists(res_path):
                    if not_unspecified(user_args.make_vis):
                        print(
                            'Making vis from existing results is not supported '
                            'yet in the redesigned workflow')
                        # print('\n\nSaving the visualizations to \n{}'
                        #       ''.format(out_dir))
                        # make_visualizations(res_path, out_dir)
                else:
                    raise ValueError('Given folder does not exist, '
                                     'or has no results file!')

            sys.exit(0)

    user_feature_paths, user_feature_type, fs_subject_dir, meta_data_path, \
    meta_data_format = organize_inputs(user_args)

    if not meta_data_path:
        if user_args.meta_file is not None:
            meta_file = abspath(user_args.meta_file)
            if not pexists(meta_file):
                raise IOError("Meta data file doesn't exist.")
        else:
            raise ValueError('Metadata file must be provided '
                             'when not using pyradigm/ARFF inputs.')

        sample_ids, classes = get_metadata(meta_file)
    else:
        print('Using meta data from:\n\t{}\n'.format(meta_data_path))
        sample_ids, classes = get_metadata_in_pyradigm(meta_data_path,
                                                       meta_data_format)

    if user_args.out_dir is not None:
        out_dir = realpath(user_args.out_dir)
    else:
        out_dir = pjoin(realpath(getcwd()), cfg.output_dir_default)

    try:
        makedirs(out_dir, exist_ok=True)
    except:
        raise IOError('Output folder could not be created.')

    train_perc = np.float32(user_args.train_perc)
    if not (0.01 <= train_perc <= 0.99):
        raise ValueError("Training percentage {} out of bounds "
                         "- must be >= 0.01 and <= 0.99".format(train_perc))

    num_rep_cv = np.int64(user_args.num_rep_cv)
    if num_rep_cv < 10:
        raise ValueError("Atleast 10 repetitions of CV is recommened.")

    num_procs = check_num_procs(user_args.num_procs)

    reduced_dim_size = validate_feature_selection_size(
        user_args.reduced_dim_size)

    impute_strategy = validate_impute_strategy(user_args.impute_strategy)

    covar_list, covar_method = check_covariate_options(user_args.covariates,
                                                       user_args.covar_method)

    grid_search_level = user_args.gs_level.lower()
    if grid_search_level not in cfg.GRIDSEARCH_LEVELS:
        raise ValueError('Unrecognized level of grid search. Valid choices: {}'
                         ''.format(cfg.GRIDSEARCH_LEVELS))

    dim_red_method = user_args.dim_red_method.lower()

    return user_args, user_feature_paths, user_feature_type, fs_subject_dir, \
           meta_data_path, meta_data_format, sample_ids, classes, out_dir, \
           train_perc, num_rep_cv, num_procs, reduced_dim_size, impute_strategy, \
           covar_list, covar_method, grid_search_level, dim_red_method