Example #1
0
    def setUp(self):

        # Temporary input file
        fd, self.tmp_otu_filepath = mkstemp(prefix='R_test_otu_table_',
                                           suffix='.txt')
        close(fd)
        seq_file = open(self.tmp_otu_filepath, 'w')
        seq_file.write(test_otu_table)
        seq_file.close()

        fd, self.tmp_map_filepath = mkstemp(prefix='R_test_map_',
                                           suffix='.txt')
        close(fd)
        seq_file = open(self.tmp_map_filepath, 'w')
        seq_file.write(test_map)
        seq_file.close()

        self.files_to_remove = \
            [self.tmp_otu_filepath, self.tmp_map_filepath]

        # Prep input files in R format
        output_dir = mkdtemp()
        self.dirs_to_remove = [output_dir]

        # get random forests results
        mkdir(join(output_dir, 'random_forest'))
        self.results = run_supervised_learning(
            self.tmp_otu_filepath, self.tmp_map_filepath, 'Individual',
            ntree=100, errortype='oob',
            output_dir=output_dir)
    def setUp(self):
        
        # Temporary input file
        self.tmp_otu_filepath = get_tmp_filename(
            prefix='R_test_otu_table_',
            suffix='.txt'
            )
        seq_file = open(self.tmp_otu_filepath, 'w')
        seq_file.write(test_otu_table)
        seq_file.close()

        self.tmp_map_filepath = get_tmp_filename(
            prefix='R_test_map_',
            suffix='.txt'
            )
        seq_file = open(self.tmp_map_filepath, 'w')
        seq_file.write(test_map)
        seq_file.close()


        self.files_to_remove = \
         [self.tmp_otu_filepath, self.tmp_map_filepath]
   
        # Prep input files in R format
        output_dir = mkdtemp()
        self.dirs_to_remove = [output_dir]

        # get random forests results
        mkdir(join(output_dir, 'random_forest'))
        self.results = run_supervised_learning(
            self.tmp_otu_filepath, self.tmp_map_filepath,'Individual',
            ntree=100, errortype='oob',
            output_dir=output_dir)
    def setUp(self):

        # Temporary input file
        self.tmp_otu_filepath = get_tmp_filename(prefix="R_test_otu_table_", suffix=".txt")
        seq_file = open(self.tmp_otu_filepath, "w")
        seq_file.write(test_otu_table)
        seq_file.close()

        self.tmp_map_filepath = get_tmp_filename(prefix="R_test_map_", suffix=".txt")
        seq_file = open(self.tmp_map_filepath, "w")
        seq_file.write(test_map)
        seq_file.close()

        self.files_to_remove = [self.tmp_otu_filepath, self.tmp_map_filepath]

        # Prep input files in R format
        output_dir = mkdtemp()
        self.dirs_to_remove = [output_dir]

        # get random forests results
        mkdir(join(output_dir, "random_forest"))
        self.results = run_supervised_learning(
            self.tmp_otu_filepath,
            self.tmp_map_filepath,
            "Individual",
            ntree=100,
            errortype="oob",
            output_dir=output_dir,
        )
Example #4
0
def main():
    option_parser, opts, args = parse_command_line_parameters(**script_info)

    # create the output directories
    try:
        makedirs(opts.output_dir)
    except OSError:
        if opts.force:
            pass
        else:
            # This check helps users avoid overwriting previous output.
            print "Output directory already exists. Please choose "+\
             "a different directory, or force overwrite with -f."
            exit(1)

    # verify that category is in mapping file
    map_list = parse_mapping_file(open(opts.mapping_file, 'U').readlines())
    if not opts.category in map_list[1][1:]:
        print "Category '%s' not found in mapping file columns:" % (
            opts.category)
        print map_list[1][1:]
        exit(1)

    # run the supervised learning algorithm
    result = run_supervised_learning(opts.input_data,
                                     opts.mapping_file,
                                     opts.category,
                                     ntree=opts.ntree,
                                     errortype=opts.errortype,
                                     output_dir=opts.output_dir,
                                     verbose=opts.verbose)
Example #5
0
def main():
    option_parser, opts, args = parse_command_line_parameters(**script_info)

    # create the output directories
    try:
        makedirs(opts.output_dir)
    except OSError:
        if opts.force:
            pass
        else:
            # This check helps users avoid overwriting previous output.
            option_parser.error("Output directory already exists. Please choose"
                " a different directory, or force overwrite with -f.")

    # verify that category is in mapping file
    map_list = parse_mapping_file(open(opts.mapping_file,'U').readlines())
    if not opts.category in map_list[1][1:]:
        print "Category '%s' not found in mapping file columns:" %(opts.category)
        print map_list[1][1:]
        exit(1)

    # run the supervised learning algorithm
    result = run_supervised_learning(opts.input_data, opts.mapping_file, opts.category,
            ntree=opts.ntree, errortype=opts.errortype,
            output_dir=opts.output_dir, verbose=opts.verbose)
Example #6
0
def main():
    option_parser, opts, args = parse_command_line_parameters(**script_info)
    input_data = opts.input_data
    mapping_file = opts.mapping_file
    category = opts.category
    ntree = opts.ntree
    errortype = opts.errortype
    output_dir = opts.output_dir
    verbose = opts.verbose
    force = opts.force
    collate_results_fp = opts.collate_results_fp

    # create the output directories
    try:
        makedirs(opts.output_dir)
    except OSError:
        if force:
            pass
        else:
            # This check helps users avoid overwriting previous output.
            option_parser.error(
                "Output directory already exists. Please choose"
                " a different directory, or force overwrite with -f.")

    # verify that category is in mapping file
    map_list = parse_mapping_file(open(mapping_file, 'U').readlines())
    if not category in map_list[1][1:]:
        option_parser.error(
            "Category '%s' not found in mapping file columns:" % (category))
        print map_list[1][1:]
        exit(1)

    # if input is a single otu table
    if isdir(input_data) is False:

        # run the supervised learning algorithm
        result = run_supervised_learning(input_data, mapping_file, category,
                                         ntree, errortype, output_dir, verbose)

    # if input is a directory of otu tables
    if isdir(input_data) is True:
        input_tables = glob('%s/*biom' % input_data)

        coll_est_error = []
        coll_est_error_stdev = []
        baseline_error = []

        for table_fp in input_tables:
            # create output dir on per-table basis with convention:
            # "sl_TABLENAME_CATEGORY/"
            output_basename = table_fp.split('/')[-1]
            output_basename = output_basename.replace('.biom', '')
            output_name = "sl_%s_%s/" % (output_basename, category)
            output_fp = join(output_dir, output_name)
            # create the output directories
            try:
                makedirs(output_fp)
            except OSError:
                if force:
                    pass
                else:
                    # This check helps users avoid overwriting previous output.
                    option_parser.error(
                        "Output directory already exists. Please choose"
                        " a different directory, or force overwrite with -f.")

            result = run_supervised_learning(table_fp, mapping_file, category,
                                             ntree, errortype, output_fp,
                                             verbose)

            # retrieve the estimated error and baseline error
            est_error_line, baseline_error_line = \
                result['summary'].readlines()[2:4]

            est_error_line = est_error_line.split('\t')[1]
            coll_est_error.append(float(est_error_line.split(' ')[0]))

            # only collect standard deviations for cv5 and cv10 errortypes
            if errortype in ['cv5', 'cv10']:
                est_error_stdev = est_error_line.split(' ')[2].strip()
                coll_est_error_stdev.append(float(est_error_stdev))

            # make sure baseline error is the same across all tables (it should be)
            if baseline_error == []:
                baseline_error.append(
                    float(baseline_error_line.split('\t')[1].strip()))

        if collate_results_fp:
            output_file = open(collate_results_fp, 'w')

            # get assembled results
            results = assemble_results(coll_est_error, coll_est_error_stdev,
                                       baseline_error[0], errortype, ntree)
            output_file.write('\n'.join(results))
            output_file.close()
Example #7
0
def main():
    option_parser, opts, args = parse_command_line_parameters(**script_info)
    input_data = opts.input_data
    mapping_file = opts.mapping_file
    category = opts.category
    ntree = opts.ntree
    errortype = opts.errortype
    output_dir = opts.output_dir
    verbose = opts.verbose
    force = opts.force
    collate_results_fp = opts.collate_results_fp

    # create the output directories
    try:
        makedirs(opts.output_dir)
    except OSError:
        if force:
            pass
        else:
            # This check helps users avoid overwriting previous output.
            option_parser.error("Output directory already exists. Please choose"
                                " a different directory, or force overwrite with -f.")

    # verify that category is in mapping file
    map_list = parse_mapping_file(open(mapping_file, 'U').readlines())
    if not category in map_list[1][1:]:
        option_parser.error(
            "Category '%s' not found in mapping file columns:" %
            (category))
        print map_list[1][1:]
        exit(1)

    # if input is a single otu table
    if isdir(input_data) is False:

        # run the supervised learning algorithm
        result = run_supervised_learning(input_data, mapping_file, category,
                                         ntree, errortype, output_dir, verbose)

    # if input is a directory of otu tables
    if isdir(input_data) is True:
        input_tables = glob('%s/*biom' % input_data)

        coll_est_error = []
        coll_est_error_stdev = []
        baseline_error = []

        for table_fp in input_tables:
            # create output dir on per-table basis with convention:
            # "sl_TABLENAME_CATEGORY/"
            output_basename = table_fp.split('/')[-1]
            output_basename = output_basename.replace('.biom', '')
            output_name = "sl_%s_%s/" % (output_basename, category)
            output_fp = join(output_dir, output_name)
                # create the output directories
            try:
                makedirs(output_fp)
            except OSError:
                if force:
                    pass
                else:
                    # This check helps users avoid overwriting previous output.
                    option_parser.error("Output directory already exists. Please choose"
                                        " a different directory, or force overwrite with -f.")

            result = run_supervised_learning(table_fp, mapping_file, category,
                                             ntree, errortype, output_fp, verbose)

            # retrieve the estimated error and baseline error
            est_error_line, baseline_error_line = \
                result['summary'].readlines()[2:4]

            est_error_line = est_error_line.split('\t')[1]
            coll_est_error.append(float(est_error_line.split(' ')[0]))

            # only collect standard deviations for cv5 and cv10 errortypes
            if errortype in ['cv5', 'cv10']:
                est_error_stdev = est_error_line.split(' ')[2].strip()
                coll_est_error_stdev.append(float(est_error_stdev))

            # make sure baseline error is the same across all tables (it should
            # be)
            if baseline_error == []:
                baseline_error.append(
                    float(baseline_error_line.split('\t')[1].strip()))

        if collate_results_fp:
            output_file = open(collate_results_fp, 'w')

            # get assembled results
            results = assemble_results(coll_est_error, coll_est_error_stdev,
                                       baseline_error[0], errortype, ntree)
            output_file.write('\n'.join(results))
            output_file.close()