Beispiel #1
0
def main():
    option_parser, opts, args = parse_command_line_parameters(**script_info)
    input_data = opts.input_data
    mapping_file = opts.mapping_file
    category = opts.category
    ntree = opts.ntree
    errortype = opts.errortype
    output_dir = opts.output_dir
    verbose = opts.verbose
    force = opts.force
    collate_results_fp = opts.collate_results_fp

    # create the output directories
    try:
        makedirs(opts.output_dir)
    except OSError:
        if force:
            pass
        else:
            # This check helps users avoid overwriting previous output.
            option_parser.error(
                "Output directory already exists. Please choose"
                " a different directory, or force overwrite with -f.")

    # verify that category is in mapping file
    map_list = parse_mapping_file(open(mapping_file, 'U').readlines())
    if not category in map_list[1][1:]:
        option_parser.error(
            "Category '%s' not found in mapping file columns:" % (category))
        print map_list[1][1:]
        exit(1)

    # if input is a single otu table
    if isdir(input_data) is False:

        # run the supervised learning algorithm
        result = run_supervised_learning(input_data, mapping_file, category,
                                         ntree, errortype, output_dir, verbose)

    # if input is a directory of otu tables
    if isdir(input_data) is True:
        input_tables = glob('%s/*biom' % input_data)

        coll_est_error = []
        coll_est_error_stdev = []
        baseline_error = []

        for table_fp in input_tables:
            # create output dir on per-table basis with convention:
            # "sl_TABLENAME_CATEGORY/"
            output_basename = table_fp.split('/')[-1]
            output_basename = output_basename.replace('.biom', '')
            output_name = "sl_%s_%s/" % (output_basename, category)
            output_fp = join(output_dir, output_name)
            # create the output directories
            try:
                makedirs(output_fp)
            except OSError:
                if force:
                    pass
                else:
                    # This check helps users avoid overwriting previous output.
                    option_parser.error(
                        "Output directory already exists. Please choose"
                        " a different directory, or force overwrite with -f.")

            result = run_supervised_learning(table_fp, mapping_file, category,
                                             ntree, errortype, output_fp,
                                             verbose)

            # retrieve the estimated error and baseline error
            est_error_line, baseline_error_line = \
                result['summary'].readlines()[2:4]

            est_error_line = est_error_line.split('\t')[1]
            coll_est_error.append(float(est_error_line.split(' ')[0]))

            # only collect standard deviations for cv5 and cv10 errortypes
            if errortype in ['cv5', 'cv10']:
                est_error_stdev = est_error_line.split(' ')[2].strip()
                coll_est_error_stdev.append(float(est_error_stdev))

            # make sure baseline error is the same across all tables (it should be)
            if baseline_error == []:
                baseline_error.append(
                    float(baseline_error_line.split('\t')[1].strip()))

        if collate_results_fp:
            output_file = open(collate_results_fp, 'w')

            # get assembled results
            results = assemble_results(coll_est_error, coll_est_error_stdev,
                                       baseline_error[0], errortype, ntree)
            output_file.write('\n'.join(results))
            output_file.close()
Beispiel #2
0
def main():
    option_parser, opts, args = parse_command_line_parameters(**script_info)
    input_data = opts.input_data
    mapping_file = opts.mapping_file
    category = opts.category
    ntree = opts.ntree
    errortype = opts.errortype
    output_dir = opts.output_dir
    verbose = opts.verbose
    force = opts.force
    collate_results_fp = opts.collate_results_fp

    # create the output directories
    try:
        makedirs(opts.output_dir)
    except OSError:
        if force:
            pass
        else:
            # This check helps users avoid overwriting previous output.
            option_parser.error("Output directory already exists. Please choose"
                                " a different directory, or force overwrite with -f.")

    # verify that category is in mapping file
    map_list = parse_mapping_file(open(mapping_file, 'U').readlines())
    if not category in map_list[1][1:]:
        option_parser.error(
            "Category '%s' not found in mapping file columns:" %
            (category))
        print map_list[1][1:]
        exit(1)

    # if input is a single otu table
    if isdir(input_data) is False:

        # run the supervised learning algorithm
        result = run_supervised_learning(input_data, mapping_file, category,
                                         ntree, errortype, output_dir, verbose)

    # if input is a directory of otu tables
    if isdir(input_data) is True:
        input_tables = glob('%s/*biom' % input_data)

        coll_est_error = []
        coll_est_error_stdev = []
        baseline_error = []

        for table_fp in input_tables:
            # create output dir on per-table basis with convention:
            # "sl_TABLENAME_CATEGORY/"
            output_basename = table_fp.split('/')[-1]
            output_basename = output_basename.replace('.biom', '')
            output_name = "sl_%s_%s/" % (output_basename, category)
            output_fp = join(output_dir, output_name)
                # create the output directories
            try:
                makedirs(output_fp)
            except OSError:
                if force:
                    pass
                else:
                    # This check helps users avoid overwriting previous output.
                    option_parser.error("Output directory already exists. Please choose"
                                        " a different directory, or force overwrite with -f.")

            result = run_supervised_learning(table_fp, mapping_file, category,
                                             ntree, errortype, output_fp, verbose)

            # retrieve the estimated error and baseline error
            est_error_line, baseline_error_line = \
                result['summary'].readlines()[2:4]

            est_error_line = est_error_line.split('\t')[1]
            coll_est_error.append(float(est_error_line.split(' ')[0]))

            # only collect standard deviations for cv5 and cv10 errortypes
            if errortype in ['cv5', 'cv10']:
                est_error_stdev = est_error_line.split(' ')[2].strip()
                coll_est_error_stdev.append(float(est_error_stdev))

            # make sure baseline error is the same across all tables (it should
            # be)
            if baseline_error == []:
                baseline_error.append(
                    float(baseline_error_line.split('\t')[1].strip()))

        if collate_results_fp:
            output_file = open(collate_results_fp, 'w')

            # get assembled results
            results = assemble_results(coll_est_error, coll_est_error_stdev,
                                       baseline_error[0], errortype, ntree)
            output_file.write('\n'.join(results))
            output_file.close()