def main(): option_parser, opts, args = parse_command_line_parameters(**script_info) input_data = opts.input_data mapping_file = opts.mapping_file category = opts.category ntree = opts.ntree errortype = opts.errortype output_dir = opts.output_dir verbose = opts.verbose force = opts.force collate_results_fp = opts.collate_results_fp # create the output directories try: makedirs(opts.output_dir) except OSError: if force: pass else: # This check helps users avoid overwriting previous output. option_parser.error( "Output directory already exists. Please choose" " a different directory, or force overwrite with -f.") # verify that category is in mapping file map_list = parse_mapping_file(open(mapping_file, 'U').readlines()) if not category in map_list[1][1:]: option_parser.error( "Category '%s' not found in mapping file columns:" % (category)) print map_list[1][1:] exit(1) # if input is a single otu table if isdir(input_data) is False: # run the supervised learning algorithm result = run_supervised_learning(input_data, mapping_file, category, ntree, errortype, output_dir, verbose) # if input is a directory of otu tables if isdir(input_data) is True: input_tables = glob('%s/*biom' % input_data) coll_est_error = [] coll_est_error_stdev = [] baseline_error = [] for table_fp in input_tables: # create output dir on per-table basis with convention: # "sl_TABLENAME_CATEGORY/" output_basename = table_fp.split('/')[-1] output_basename = output_basename.replace('.biom', '') output_name = "sl_%s_%s/" % (output_basename, category) output_fp = join(output_dir, output_name) # create the output directories try: makedirs(output_fp) except OSError: if force: pass else: # This check helps users avoid overwriting previous output. option_parser.error( "Output directory already exists. Please choose" " a different directory, or force overwrite with -f.") result = run_supervised_learning(table_fp, mapping_file, category, ntree, errortype, output_fp, verbose) # retrieve the estimated error and baseline error est_error_line, baseline_error_line = \ result['summary'].readlines()[2:4] est_error_line = est_error_line.split('\t')[1] coll_est_error.append(float(est_error_line.split(' ')[0])) # only collect standard deviations for cv5 and cv10 errortypes if errortype in ['cv5', 'cv10']: est_error_stdev = est_error_line.split(' ')[2].strip() coll_est_error_stdev.append(float(est_error_stdev)) # make sure baseline error is the same across all tables (it should be) if baseline_error == []: baseline_error.append( float(baseline_error_line.split('\t')[1].strip())) if collate_results_fp: output_file = open(collate_results_fp, 'w') # get assembled results results = assemble_results(coll_est_error, coll_est_error_stdev, baseline_error[0], errortype, ntree) output_file.write('\n'.join(results)) output_file.close()
def main(): option_parser, opts, args = parse_command_line_parameters(**script_info) input_data = opts.input_data mapping_file = opts.mapping_file category = opts.category ntree = opts.ntree errortype = opts.errortype output_dir = opts.output_dir verbose = opts.verbose force = opts.force collate_results_fp = opts.collate_results_fp # create the output directories try: makedirs(opts.output_dir) except OSError: if force: pass else: # This check helps users avoid overwriting previous output. option_parser.error("Output directory already exists. Please choose" " a different directory, or force overwrite with -f.") # verify that category is in mapping file map_list = parse_mapping_file(open(mapping_file, 'U').readlines()) if not category in map_list[1][1:]: option_parser.error( "Category '%s' not found in mapping file columns:" % (category)) print map_list[1][1:] exit(1) # if input is a single otu table if isdir(input_data) is False: # run the supervised learning algorithm result = run_supervised_learning(input_data, mapping_file, category, ntree, errortype, output_dir, verbose) # if input is a directory of otu tables if isdir(input_data) is True: input_tables = glob('%s/*biom' % input_data) coll_est_error = [] coll_est_error_stdev = [] baseline_error = [] for table_fp in input_tables: # create output dir on per-table basis with convention: # "sl_TABLENAME_CATEGORY/" output_basename = table_fp.split('/')[-1] output_basename = output_basename.replace('.biom', '') output_name = "sl_%s_%s/" % (output_basename, category) output_fp = join(output_dir, output_name) # create the output directories try: makedirs(output_fp) except OSError: if force: pass else: # This check helps users avoid overwriting previous output. option_parser.error("Output directory already exists. Please choose" " a different directory, or force overwrite with -f.") result = run_supervised_learning(table_fp, mapping_file, category, ntree, errortype, output_fp, verbose) # retrieve the estimated error and baseline error est_error_line, baseline_error_line = \ result['summary'].readlines()[2:4] est_error_line = est_error_line.split('\t')[1] coll_est_error.append(float(est_error_line.split(' ')[0])) # only collect standard deviations for cv5 and cv10 errortypes if errortype in ['cv5', 'cv10']: est_error_stdev = est_error_line.split(' ')[2].strip() coll_est_error_stdev.append(float(est_error_stdev)) # make sure baseline error is the same across all tables (it should # be) if baseline_error == []: baseline_error.append( float(baseline_error_line.split('\t')[1].strip())) if collate_results_fp: output_file = open(collate_results_fp, 'w') # get assembled results results = assemble_results(coll_est_error, coll_est_error_stdev, baseline_error[0], errortype, ntree) output_file.write('\n'.join(results)) output_file.close()