def main(): option_parser, opts, args = parse_command_line_parameters(**script_info) if opts.num_permutations < 10: option_parser.error('Number of permuations must be greater than or ' 'equal to 10.') rarefaction_lines = open(opts.alpha_diversity_fp, 'U') mapping_lines = open(opts.mapping_fp, 'U') category = opts.category depth = int(opts.depth) output_path = opts.output_fp result = compare_alpha_diversities(rarefaction_lines, mapping_lines, category, depth, opts.test_type, opts.num_permutations) rarefaction_lines.close() mapping_lines.close() corrected_result = _correct_compare_alpha_results(result, opts.correction_method) # write results outfile = open(output_path, 'w') header = 'Comparison\ttval\tpval' lines = [header] for k,v in corrected_result.items(): lines.append('\t'.join(map(str,[k,v[0],v[1]]))) outfile.write('\n'.join(lines)) outfile.close()
def main(): option_parser, opts, args = parse_command_line_parameters(**script_info) assignment_method = opts.assignment_method if assignment_method == 'blast': if not opts.id_to_taxonomy_fp: option_parser.error('Option --id_to_taxonomy_fp is required when ' 'assigning with blast.') if not (opts.reference_seqs_fp or opts.blast_db): option_parser.error('Either a blast db (via -b) or a collection of ' 'reference sequences (via -r) must be passed to ' 'assign taxonomy using blast.') if assignment_method == 'rdp': try: validate_rdp_version() except RuntimeError, e: option_parser.error(e) if opts.id_to_taxonomy_fp is not None: if opts.reference_seqs_fp is None: option_parser.error( 'A filepath for reference sequences must be ' 'specified (via -r) along with the id_to_taxonomy ' 'file to train the Rdp Classifier.') elif opts.reference_seqs_fp is not None: option_parser.error( 'A filepath for an id to taxonomy map must be ' 'specified (via -t) along with the reference ' 'sequences fp to train the Rdp Classifier.') else: pass
def main(): option_parser, opts, args = parse_command_line_parameters(**script_info) alpha_fps = opts.alpha_fps mapping_fp = opts.mapping_fp output_mapping_fp = opts.output_mapping_fp binning_method = opts.binning_method missing_value_name = opts.missing_value_name depth = opts.depth number_of_bins = opts.number_of_bins collated_input = opts.collated_input # if using collated data, make sure they specify a depth if collated_input: alpha_dict = {} # build up a dictionary with the filenames as keys and lines as values for single_alpha_fp in alpha_fps: alpha_dict[splitext(basename(single_alpha_fp))[0]] = open(single_alpha_fp, "U").readlines() # format the collated data try: metrics, alpha_sample_ids, alpha_data = mean_alpha(alpha_dict, depth) except ValueError, e: # see mean_alpha for the possible exceptions option_parser.error(e.message)
def main(): option_parser, opts, args =\ parse_command_line_parameters(**script_info) otu_files = map(open, opts.otu_map_fps) failures_fp = opts.failures_fp output_fp = opts.output_fp if failures_fp: failures_f = open(failures_fp, 'U') else: failures_f = None try: result = map_otu_map_files(otu_files, failures_file=failures_f) except KeyError as e: print ('Some keys do not map (' + str(e) + ') -- is the order of' ' your OTU maps equivalent to the order in which the OTU pickers' ' were run? If expanding a failures file, did you remember to leave' ' out the otu map from the run which generated the failures file?') exit(1) if failures_fp is not None: of = open(output_fp, 'w') of.write('\n'.join(result)) of.close() else: write_otu_map(result.items(), output_fp)
def main(): option_parser, opts, args =\ parse_command_line_parameters(**script_info) sample_id_map_fp = opts.sample_id_map_fp if sample_id_map_fp: sample_id_map = dict([(k,v[0]) \ for k,v in fields_to_dict(open(sample_id_map_fp, "U")).items()]) else: sample_id_map = None input_dm_fps = opts.input_dms.split(',') output_f = open(opts.output_fp,'w') output_f.write(comment) output_f.write('DM1\tDM2\tNumber of entries\tMantel p-value\n') num_iterations = opts.num_iterations for i,fp1 in enumerate(input_dm_fps): for fp2 in input_dm_fps[i+1:]: (dm1_labels, dm1), (dm2_labels, dm2) =\ make_compatible_distance_matrices(parse_distmat(open(fp1,'U')), parse_distmat(open(fp2,'U')), lookup=sample_id_map) if len(dm1_labels) < 2: output_f.write('%s\t%s\t%d\tToo few samples\n' % (fp1,fp2,len(dm1_labels))) continue p = mantel(dm1,dm2,n=num_iterations) p_str = format_p_value_for_num_iters(p,num_iterations) output_f.write('%s\t%s\t%d\t%s\n' % (fp1,fp2,len(dm1_labels),p_str)) output_f.close()
def main(): option_parser, opts, args = parse_command_line_parameters(**script_info) output_dir = opts.output_dir mapping_category = opts.mapping_category try: makedirs(output_dir) except OSError: pass percent_failures_data, percent_failures_plot, num_new_otus_data, \ num_new_otus_plot = generate_new_diversity_plots( [open(otu_table_fp, 'U') for otu_table_fp in opts.otu_table_fps], open(opts.gg_fasta_fp, 'U'), open(opts.mapping_fp, 'U'), mapping_category, opts.min_num_samples, opts.category_values_to_exclude.split(','), opts.verbose) # Save plots as PDFs. percent_failures_plot.savefig(join(output_dir, 'percent_novel_seqs_by_%s.pdf' % mapping_category)) num_new_otus_plot.savefig(join(output_dir, 'num_novel_otus_by_%s.pdf' % mapping_category)) # Pickle plot raw data in case we need to load up the data again into new # plots and interactively tweak them (it'll take too long to rerun the # whole script for these tweaks). dump(percent_failures_data, open(join(output_dir, 'percent_novel_seqs_by_%s.p' % mapping_category), 'wb')) dump(num_new_otus_data, open(join(output_dir, 'num_novel_otus_by_%s.p' % mapping_category), 'wb'))
def main(): option_parser, opts, args =\ parse_command_line_parameters(**script_info) if opts.attempt_read_reorientation: if not opts.mapping_fp: option_parser.error("To use --attempt_read_reorientation, one must " "supply a mapping file that contains both LinkerPrimerSequence " "and ReversePrimer columns.") if opts.input_type == "barcode_paired_end": if not opts.fastq2: option_parser.error("To use input_type of barcode_paired_end, " "a second fastq file must be specified with --fastq2") if not opts.fastq2: disable_header_match = True else: disable_header_match = opts.disable_header_match fastq1 = qiime_open(opts.fastq1) if opts.fastq2: fastq2 = qiime_open(opts.fastq2) else: fastq2 = None create_dir(opts.output_dir) if opts.mapping_fp: map_fp = qiime_open(opts.mapping_fp) else: map_fp = None extract_barcodes(fastq1, fastq2, opts.output_dir, opts.input_type, opts.bc1_len, opts.bc2_len, opts.rev_comp_bc1, opts.rev_comp_bc2, opts.char_delineator, opts.switch_bc_order, map_fp, opts.attempt_read_reorientation, disable_header_match)
def main(): option_parser, opts, args =\ parse_command_line_parameters(**script_info) fasta_fp = opts.fasta_fp mapping_fp = opts.mapping_fp output_dir = opts.output_dir truncate_option = opts.truncate_option primer_mismatches = int(opts.primer_mismatches) create_dir(output_dir) if truncate_option not in ['truncate_only', 'truncate_remove']: raise ValueError('-z option must be either truncate_only or ' + 'truncate_remove') try: fasta_f = open(fasta_fp, "U") fasta_f.close() except IOError: raise IOError("Unable to open fasta file, please check path/" + "permissions.") try: mapping_f = open(fasta_fp, "U") mapping_f.close() except IOError: raise IOError("Unable to open mapping file, please check path/" + "permissions.") truncate_reverse_primer(fasta_fp, mapping_fp, output_dir, truncate_option, primer_mismatches)
def main(): option_parser, opts, args = parse_command_line_parameters(**script_info) columns_to_merge = opts.columns_to_merge mapping_fp = opts.mapping_fp output_fp = opts.output_fp try: data, headers, comments = parse_mapping_file(open(mapping_fp, 'U')) except: option_parser.error('Bro, that doesn\'t look like a mapping file') for merging in columns_to_merge: retrieve = lambda x: headers.index(x) indices = map(retrieve, merging.split('&&')) headers.append(''.join([headers[element] for element in indices])) for line in data: line.append(''.join([line[element] for element in indices])) # this should never happen assert len(headers) == len(data[0]), "Something went horribly wrong, "+\ "that's what you get for using non-unit-tested software" lines = format_mapping_file(headers, data, comments) fd = open(output_fp, 'w') fd.writelines(lines) fd.close()
def main(): option_parser, opts, args = parse_command_line_parameters(**script_info) reference_seqs_filepath = opts.reference_seqs_fp input_seqs_filepath = opts.fasta_fp input_otu_filepath = opts.otu_fp result_path = opts.result_fp or\ '%s_rep_set.fasta' % input_seqs_filepath log_path = opts.log_fp if reference_seqs_filepath: rep_set_picker =\ reference_rep_set_picking_methods[opts.rep_set_picking_method] rep_set_picker(input_seqs_filepath, input_otu_filepath, reference_seqs_filepath, result_path=result_path, log_path=log_path, sort_by=opts.sort_by) else: if not input_seqs_filepath: option_parser.error('--fasta_fp must be provided when not picking' ' representative against a reference set.') rep_set_picker =\ rep_set_picking_methods[opts.rep_set_picking_method] rep_set_picker(input_seqs_filepath, input_otu_filepath, result_path=result_path, log_path=log_path, sort_by=opts.sort_by)
def main(): option_parser, opts, args =\ parse_command_line_parameters(**script_info) output_fp = opts.output_fp map_data, header, comments = parse_mapping_file(opts.input_fp) if opts.category not in header: option_parser.error("%s doesn't appear to exist in the mapping file!" % opts.category) # use stdout or the user supplied file path if output_fp: fd = open(output_fp, 'w') else: fd = stdout result = defaultdict(int) cat_idx = header.index(opts.category) for samp in map_data: result[samp[cat_idx]] += 1 for cat_val in natsort(result): if not cat_val: fd.write("***UNSPECIFIED***\t%d\n" % result[cat_val]) else: fd.write("%s\t%d\n" % (cat_val, result[cat_val])) fd.close()
def main(): option_parser, opts, args =\ parse_command_line_parameters(**script_info) input_dir = opts.input_dir output_dir = opts.output_dir create_dir(output_dir) lanes = opts.lanes.split(',') bases = opts.bases read = opts.read for lane in lanes: read1_fps = glob('%s/s_%s_%d_*qseq.txt' % (input_dir, lane.replace(',',''), read)) # sort so results will be consistent across different runs (important # so amplicon and barcodes read headers will match) read1_fps.sort() for read1_fp in read1_fps: output_fp = '%s/s_%s_%s_sequences.fastq' % (output_dir,lane,read) output_f = open(output_fp,'w') for record in iter_split_lines(open(read1_fp,'U')): fastq_s = illumina_data_to_fastq(record, number_of_bases=bases) output_f.write('%s\n' % fastq_s) output_f.close()
def main(): option_parser, opts, args =\ parse_command_line_parameters(**script_info) if opts.submit_jobs and not opts.make_jobs: option_parser.error('Must pass -m if passing -s. (Sorry about this, '+\ 'it\'s for backwards-compatibility.)') min_args = 2 if len(args) != min_args: option_parser.error('Program requires <commands file> and <job prefix>') if (len(args[1])>10 or len(args[1])==0): option_parser.error('job prefix must be 1-10 characters long') commands = list(open(args[0])) job_prefix = args[1] if(not exists(opts.job_dir)): try: makedirs(opts.job_dir) except OSError: exit(" Jobs directory can not be created. " +"Check for permissions or file with the same name: %s\n" % opts.job_dir) if (opts.make_jobs): filenames = make_jobs(commands, job_prefix, opts.queue, opts.job_dir) else: exit("Should we ever get here???") if (opts.submit_jobs): submit_jobs(filenames, opts.verbose)
def main(): option_parser, opts, args = parse_command_line_parameters(**script_info) otu_table_fp = opts.otu_table_fp mapping_fp = opts.mapping_fp mapping_field = opts.mapping_field output_dir = opts.output_dir # column_rename_ids = opts.column_rename_ids # include_repeat_cols = opts.include_repeat_cols create_dir(output_dir) # split mapping file mapping_f = open(mapping_fp, 'U') for fp_str, sub_mapping_s in split_mapping_file_on_field(mapping_f, mapping_field): mapping_output_fp = join(output_dir, 'mapping_%s.txt' % fp_str) open(mapping_output_fp, 'w').write(sub_mapping_s) # split otu table otu_table_base_name = splitext(split(otu_table_fp)[1])[0] mapping_f = open(mapping_fp, 'U') otu_table = load_table(otu_table_fp) try: for fp_str, sub_otu_table_s in split_otu_table_on_sample_metadata( otu_table, mapping_f, mapping_field): otu_table_output_fp = join(output_dir, '%s_%s.biom' % ( otu_table_base_name, fp_str)) write_biom_table(sub_otu_table_s, otu_table_output_fp) except OTUTableSplitError as e: option_parser.error(e)
def main(): option_parser, opts, args =\ parse_command_line_parameters(**script_info) input_table = parse_biom_table(open(opts.input_otu_table_fp, 'U')) output_table_f = open(opts.output_otu_table_fp, 'w') metadata_field = opts.metadata_field positive_taxa = opts.positive_taxa negative_taxa = opts.negative_taxa if positive_taxa is not None: positive_taxa = positive_taxa.split(',') else: positive_taxa = None if negative_taxa is not None: negative_taxa = negative_taxa.split(',') else: negative_taxa = None filter_fn = get_otu_ids_from_taxonomy_f( positive_taxa, negative_taxa, metadata_field) output_table = input_table.filterObservations(filter_fn) output_table_f.write(format_biom_table(output_table)) output_table_f.close()
def main(): option_parser, opts, args = parse_command_line_parameters(**script_info) if opts.show_metrics: print("Known metrics are: %s\n" % (', '.join(list_known_metrics()),)) print("For more information, see http://scikit-bio.org/docs/latest/ge" "nerated/skbio.diversity.alpha.html#module-skbio.diversity.alpha") exit(0) almost_required_options = ['input_path', 'output_path', 'metrics'] for option in almost_required_options: if getattr(opts, option) is None: option_parser.error('Required option --%s omitted.' % option) if os.path.isdir(opts.input_path): multiple_file_alpha(opts.input_path, opts.output_path, opts.metrics, opts.tree_path) elif os.path.isfile(opts.input_path): try: f = open(opts.output_path, 'w') f.close() except IOError: if os.path.isdir(opts.output_path): option_parser.error( "ioerror, couldn't create output file. The output path is a directory, which should be a single file") else: option_parser.error("ioerror, couldn't create output file") single_file_alpha(opts.input_path, opts.metrics, opts.output_path, opts.tree_path)
def main(): option_parser, opts, args = parse_command_line_parameters(**script_info) if not (opts.refseqs_path or opts.blast_db): option_parser.error('Either a blast db (via -b) or a collection of ' 'reference sequences (via -r) must be passed') if opts.refseqs_path and opts.blast_db: option_parser.error('You should provide only a blast db (via -b) ' 'or a collection of reference sequences (via -r), but not both') # create dict of command-line options params = eval(str(opts)) parallel_runner = ParallelBlaster( cluster_jobs_fp=opts.cluster_jobs_fp, jobs_to_start=opts.jobs_to_start, retain_temp_files=opts.retain_temp_files, suppress_polling=opts.suppress_polling, seconds_to_sleep=opts.seconds_to_sleep) parallel_runner(opts.infile_path, opts.output_dir, params, job_prefix=opts.job_prefix, poll_directly=opts.poll_directly, suppress_submit_jobs=False)
def main(): option_parser, opts, args =\ parse_command_line_parameters(**script_info) if isdir(opts.otu_table_fp): ret_code = create_dir(opts.output_fp, fail_on_exist=False) # run on each file in dir for fp in glob(opts.otu_table_fp + '/*biom'): parent_dir_name, file_name = split(fp) basename, extension = splitext(file_name) out_fp = opts.output_fp + "/" + basename + "_shared_OTUs.txt" with open(out_fp, 'w') as out_fh: out_fh.write(calc_shared_phylotypes(load_table(fp), opts.reference_sample)) else: # run in single file mode try: out_fh = open(opts.output_fp, "w") except IOError as message: exit(("Can't open output file %s for writing. Check the " "permissions or existing directory with identical " "name.\n%s") % (opts.output_fp, message)) out_fh.write(calc_shared_phylotypes(load_table(opts.otu_table_fp), opts.reference_sample))
def main(): option_parser, opts, args =\ parse_command_line_parameters(suppress_verbose=True, **script_info) mapping_fp = opts.mapping_fp has_barcodes = not opts.not_barcoded variable_len_barcodes = opts.variable_len_barcodes output_dir = opts.output_dir + "/" char_replace = opts.char_replace verbose = opts.verbose disable_primer_check = opts.disable_primer_check added_demultiplex_field = opts.added_demultiplex_field # Create output directory, check path/access to mapping file create_dir(output_dir) # Test for valid replacement characters valid_replacement_chars = digits + letters + "_" + "." if char_replace not in valid_replacement_chars: option_parser.error('-c option requires alphanumeric, period, or '+\ 'underscore character.') if len(char_replace) != 1: option_parser.error('-c parameter must be a single character.') check_mapping_file(mapping_fp, output_dir, has_barcodes, char_replace,\ verbose, variable_len_barcodes, disable_primer_check, added_demultiplex_field)
def main(): option_parser, opts, args =\ parse_command_line_parameters(**script_info) negate = opts.negate if 1 != sum(map(bool,[opts.otu_map, opts.seq_id_fp, opts.subject_fasta_fp, opts.seq_id_prefix])): option_parser.error("Must pass exactly one of -a, -s, -p, or -m.") if opts.otu_map: seqs_to_keep_lookup =\ get_seqs_to_keep_lookup_from_otu_map( open(opts.otu_map,'U')) elif opts.seq_id_fp: seqs_to_keep_lookup =\ get_seqs_to_keep_lookup_from_seq_id_file( open(opts.seq_id_fp,'U')) elif opts.subject_fasta_fp: seqs_to_keep_lookup =\ get_seqs_to_keep_lookup_from_fasta_file( open(opts.subject_fasta_fp,'U')) elif opts.seq_id_prefix: seqs_to_keep_lookup =\ get_seqs_to_keep_lookup_from_prefix( open(opts.input_fasta_fp),opts.seq_id_prefix) else: option_parser.error("Must pass exactly one of -a, -s, or -m.") filter_fasta_fp(opts.input_fasta_fp, opts.output_fasta_fp, seqs_to_keep_lookup, negate)
def main(): option_parser, opts, args =\ parse_command_line_parameters(**script_info) input_rf = open(opts.temp_input, 'r+') listOfIds = parseRules(input_rf) print listOfIds
def main(): option_parser, opts, args = parse_command_line_parameters(**script_info) if not opts.counts_fname: parser.error("An otu table file must be specified") if not opts.map_fname: parser.error("A Map file must be specified") prefs,data,background_color,label_color, ball_scale, arrow_colors= \ sample_color_prefs_and_map_data_from_options(opts) dir_path = opts.dir_path if dir_path==None or dir_path=='': dir_path = get_random_directory_name() create_dir(dir_path) create_dir(os.path.join(dir_path,"otu_network")) create_dir(os.path.join(dir_path,"otu_network/props")) create_dir(os.path.join(dir_path,"otu_network/stats")) map_lines = open(opts.map_fname,'U').readlines() otu_sample_lines = open(opts.counts_fname, 'U').readlines() create_network_and_stats(dir_path,map_lines,otu_sample_lines,prefs,data,background_color,label_color)
def main(): option_parser, opts, args = parse_command_line_parameters(**script_info) output_f = open(opts.output_distance_matrix, 'w') if opts.otu_table_fp: otu_table = load_table(opts.otu_table_fp) samples_to_keep = otu_table.ids() # samples_to_keep = \ # sample_ids_from_otu_table(open(opts.otu_table_fp,'U')) elif opts.sample_id_fp: samples_to_keep = \ get_seqs_to_keep_lookup_from_seq_id_file( open(opts.sample_id_fp, 'U')) elif opts.mapping_fp and opts.valid_states: try: samples_to_keep = sample_ids_from_metadata_description( open(opts.mapping_fp, 'U'), opts.valid_states) except ValueError as e: option_parser.error(e.message) else: option_parser.error('must pass either --sample_id_fp, -t, or -m and ' '-s') # note that negate gets a little weird here. The function we're calling # removes the specified samples from the distance matrix, but the other # QIIME filter scripts keep these samples specified. So, the interface of # this script is designed to keep the specified samples, and therefore # negate=True is passed to filter_samples_from_distance_matrix by default. d = filter_samples_from_distance_matrix( parse_distmat( open(opts.input_distance_matrix, 'U')), samples_to_keep, negate=not opts.negate) output_f.write(d) output_f.close()
def main(): option_parser, opts, args =\ parse_command_line_parameters(**script_info) split_fasta_on_sample_ids_to_files(MinimalFastaParser(open(opts.input_fasta_fp,'U')), opts.output_dir, opts.buffer_size)
def main(): option_parser, opts, args = parse_command_line_parameters(**script_info) if not (opts.reference_seqs_fp or opts.blast_db): option_parser.error( "Either a blast db (via -b) or a collection of " "reference sequences (via -r) must be passed to " "assign taxonomy using blast." ) # create dict of command-line options params = eval(str(opts)) parallel_runner = ParallelBlastTaxonomyAssigner( cluster_jobs_fp=opts.cluster_jobs_fp, jobs_to_start=opts.jobs_to_start, retain_temp_files=opts.retain_temp_files, suppress_polling=opts.suppress_polling, seconds_to_sleep=opts.seconds_to_sleep, ) parallel_runner( opts.input_fasta_fp, abspath(opts.output_dir), params, job_prefix=opts.job_prefix, poll_directly=opts.poll_directly, suppress_submit_jobs=opts.suppress_submit_jobs, )
def main(): option_parser, opts, args =\ parse_command_line_parameters(**script_info) otu_table_data = parse_otu_table(open(opts.input_otu_table,'U')) sort_field = opts.sort_field mapping_fp = opts.mapping_fp sorted_sample_ids_fp = opts.sorted_sample_ids_fp if sort_field and mapping_fp: mapping_data = parse_mapping_file(open(mapping_fp,'U')) result = sort_otu_table_by_mapping_field(otu_table_data, mapping_data, sort_field) elif sorted_sample_ids_fp: sorted_sample_ids = sample_ids_from_f(open(sorted_sample_ids_fp,'U')) result = sort_otu_table(otu_table_data, sorted_sample_ids) else: parser.error("must provide either --sort_field and --mapping_fp OR --sorted_sample_ids_fp") # format and write the otu table result_str = format_otu_table(result[0],result[1],result[2],result[3]) of = open(opts.output_fp,'w') of.write(result_str) of.close()
def main(): option_parser, opts, args = parse_command_line_parameters(**script_info) input_path = opts.input_path output_path = opts.output_path if isdir(input_path): # Run PCoA on all distance matrices in the input dir # Create the output directory if it does not exists if not exists(output_path): makedirs(output_path) # Get all the filenames present in the input directory file_names = [fname for fname in listdir(input_path) if not (fname.startswith('.') or isdir(fname))] # Loop through all the input files for fname in file_names: # Get the path to the input distance matrix infile = join(input_path, fname) # Run PCoA on the input distance matrix with open(infile, 'U') as lines: pcoa_scores = pcoa(lines) # Store the PCoA results on the output directory base_fname, ext = splitext(fname) out_file = join(output_path, 'pcoa_%s.txt' % base_fname) pcoa_scores.write(out_file) else: # Run PCoA on the input distance matrix with open(input_path, 'U') as f: pcoa_scores = pcoa(f) # Store the results in the output file pcoa_scores.write(output_path)
def main(): option_parser, opts, args = parse_command_line_parameters(**script_info) if opts.show_metrics: print("Known metrics are: %s\n" \ % (', '.join(list_known_metrics()),)) print("For more information, see http://qiime.org/scripts/alpha_diversity_metrics.html") exit(0) almost_required_options = ['input_path','output_path','metrics'] for option in almost_required_options: if getattr(opts,option) == None: option_parser.error('Required option --%s omitted.' % option) if os.path.isdir(opts.input_path): multiple_file_alpha(opts.input_path, opts.output_path, opts.metrics, opts.tree_path) elif os.path.isfile(opts.input_path): try: f = open(opts.output_path, 'w') f.close() except IOError: print("ioerror, couldn't create output file") exit(1) single_file_alpha(opts.input_path, opts.metrics, opts.output_path, opts.tree_path) else: print("io error, input path not valid. does it exist?") exit(1)
def main(): option_parser, opts, args = parse_command_line_parameters(**script_info) output_table_fp = opts.output_otu_table_fp metadata_field = opts.metadata_field positive_taxa = opts.positive_taxa negative_taxa = opts.negative_taxa input_table = load_table(opts.input_otu_table_fp) if positive_taxa is not None: positive_taxa = positive_taxa.split(',') else: positive_taxa = None if negative_taxa is not None: negative_taxa = negative_taxa.split(',') else: negative_taxa = None filter_fn = get_otu_ids_from_taxonomy_f(positive_taxa, negative_taxa, metadata_field) input_table.filter(filter_fn, axis='observation') try: write_biom_table(input_table, output_table_fp) except EmptyBIOMTableError: option_parser.error( "Filtering resulted in an empty BIOM table. " "This indicates that no OTUs remained after filtering.")
def main(): option_parser, opts, args = parse_command_line_parameters(**script_info) output_dir = opts.output_dir if output_dir: create_dir(output_dir) else: if isfile(opts.input_dir): # if output_dir is empty after the split, then a relative path was # passed, and the input file is in the current directory output_dir = split(opts.input_dir)[0] or '.' else: # opts.input_dir is a directory output_dir = opts.input_dir if opts.no_trim and not opts.use_sfftools: raise ValueError( "When using the --no_trim option you must have the sfftools installed and must also pass the --use_sfftools option") prep_sffs_in_dir( opts.input_dir, output_dir, make_flowgram=opts.make_flowgram, convert_to_flx=opts.convert_to_FLX, use_sfftools=opts.use_sfftools, no_trim=opts.no_trim)
def main(): option_parser, opts, args = parse_command_line_parameters(**script_info) verbose = opts.verbose input_fasta_fp = opts.input_fasta_fp output_fp = opts.output_fp retain_seq_id = opts.retain_seq_id if retain_seq_id: seq_desc_mapper = null_seq_desc_mapper else: seq_desc_mapper = append_rc if not output_fp: input_file_basename, input_file_ext = \ splitext(split(input_fasta_fp)[1]) output_fp = '%s_rc%s' % (input_file_basename, input_file_ext) rc_fasta_file(input_fasta_fp, output_fp, seq_desc_mapper)
def main(): option_parser, opts, args = parse_command_line_parameters(**script_info) params = eval(str(opts)) params['metrics'] = ','.join(opts.metrics) parallel_runner = ParallelAlphaDiversity( cluster_jobs_fp=opts.cluster_jobs_fp, jobs_to_start=opts.jobs_to_start, retain_temp_files=opts.retain_temp_files, suppress_polling=opts.suppress_polling, seconds_to_sleep=opts.seconds_to_sleep) input_fps = glob(join(opts.input_path, '*')) parallel_runner(input_fps, opts.output_path, params, job_prefix=opts.job_prefix, poll_directly=opts.poll_directly, suppress_submit_jobs=opts.suppress_submit_jobs)
def main(): option_parser, opts, args = parse_command_line_parameters(**script_info) if opts.step <= 0: option_parser.error("nonpositive step not allowed (%s was supplied)" % \ (opts.step,)) create_dir(opts.output_path, fail_on_exist=False) maker = RarefactionMaker(opts.input_path, opts.min, opts.max, opts.step, opts.num_reps) if opts.subsample_multinomial: subsample_f = subsample_multinomial else: subsample_f = subsample maker.rarefy_to_files(opts.output_path, False, include_lineages=opts.lineages_included, empty_otus_removed=(not opts.keep_empty_otus), subsample_f=subsample_f)
def main(): option_parser, opts, args = parse_command_line_parameters(**script_info) verbose = opts.verbose input_fasta_fp = opts.input_fasta_fp mapping_fp = opts.mapping_fp output_dir = opts.output_dir tree_fp = opts.tree_fp tree_subset = opts.tree_subset tree_exact_match = opts.tree_exact_match same_seq_lens = opts.same_seq_lens all_ids_found = opts.all_ids_found create_dir(output_dir) # Test optional filepaths and requirements try: test_mapping_fp = open(mapping_fp, "U") test_mapping_fp.close() except IOError: raise IOError("Unable to open mapping file, please check " "filepath and read permissions.") if tree_fp: try: test_tree_fp = open(tree_fp, "U") test_tree_fp.close() except IOError: raise IOError("Unable to open provided tree filepath, please " + "filepath and permissions.") if tree_subset or tree_exact_match: if not tree_fp: raise ValueError('Must provide tree filepath if -s or -e options ' + 'are enabled.') validate_fasta( input_fasta_fp, mapping_fp, output_dir, tree_fp, tree_subset, tree_exact_match, same_seq_lens, all_ids_found, opts.suppress_barcode_checks, opts.suppress_primer_checks)
def main(): option_parser, opts, args = parse_command_line_parameters(**script_info) if opts.output_dir: #try to make the output directory try: mkdir(opts.output_dir) except OSError: pass else: opts.output_dir = opts.input_dir if opts.no_trim and not opts.use_sfftools: raise ValueError, "When using the --no_trim option you must have the sfftools installed and must also pass the --use_sfftools option" prep_sffs_in_dir(opts.input_dir, opts.output_dir, make_flowgram=opts.make_flowgram, convert_to_flx=opts.convert_to_FLX, use_sfftools=opts.use_sfftools, no_trim=opts.no_trim)
def main(): option_parser, opts, args = parse_command_line_parameters(**script_info) otu_table_data = load_table(opts.input_otu_table) sort_field = opts.sort_field mapping_fp = opts.mapping_fp sorted_sample_ids_fp = opts.sorted_sample_ids_fp if sort_field and mapping_fp: mapping_data = parse_mapping_file(open(mapping_fp, 'U')) result = sort_otu_table_by_mapping_field(otu_table_data, mapping_data, sort_field) elif sorted_sample_ids_fp: sorted_sample_ids = sample_ids_from_f(open(sorted_sample_ids_fp, 'U')) result = sort_otu_table(otu_table_data, sorted_sample_ids) else: result = sort_otu_table(otu_table_data, natsort_case_insensitive(otu_table_data.ids())) write_biom_table(result, opts.output_fp)
def main(): option_parser, opts, args = parse_command_line_parameters(**script_info) if opts.blast_db is None and opts.refseqs_fp is None: option_parser.error('Either blast_db or refseqs_fp must be provided.') # create dict of command-line options params = eval(str(opts)) parallel_runner = ParallelPickOtusBlast( cluster_jobs_fp=opts.cluster_jobs_fp, jobs_to_start=opts.jobs_to_start, retain_temp_files=opts.retain_temp_files, suppress_polling=opts.suppress_polling, seconds_to_sleep=opts.seconds_to_sleep) parallel_runner(opts.input_fasta_fp, opts.output_dir, params, job_prefix=opts.job_prefix, poll_directly=opts.poll_directly, suppress_submit_jobs=opts.suppress_submit_jobs)
def main(): option_parser, opts, args =\ parse_command_line_parameters(**script_info) centroid_seqs = \ [parse_fasta(open(e, 'U')) for e in opts.centroid_fps] singleton_seqs = \ [parse_fasta(open(e, 'U')) for e in opts.singleton_fps] fasta_seqs = \ [parse_fasta(open(e, 'U')) for e in opts.fasta_fps] denoiser_map_fs = \ [open(e, 'U') for e in opts.denoiser_map_fps] output_fasta_fp = opts.output_fasta_fp output_f = open(opts.output_fasta_fp, 'w') for s in inflate_denoiser_output(chain.from_iterable(centroid_seqs), chain.from_iterable(singleton_seqs), chain.from_iterable(denoiser_map_fs), chain.from_iterable(fasta_seqs)): output_f.write('>%s\n%s\n' % s) output_f.close()
def main(): option_parser, opts, args =\ parse_command_line_parameters(**script_info) otu_files = map(open, opts.otu_map_fps) failures_fp = opts.failures_fp output_fp = opts.output_fp if failures_fp: failures_f = open(failures_fp, 'U') else: failures_f = None try: result = map_otu_map_files(otu_files, failures_file=failures_f) except KeyError, e: print( 'Some keys do not map (' + str(e) + ') -- is the order of' ' your OTU maps equivalent to the order in which the OTU pickers' ' were run? If expanding a failures file, did you remember to leave' ' out the otu map from the run which generated the failures file?') exit(1)
def main(): option_parser, opts, args =\ parse_command_line_parameters(suppress_verbose=True, **script_info) qual_fp = opts.qual_fp output_dir = opts.output_dir score_min = int(opts.score_min) verbose = opts.verbose create_dir(output_dir) if qual_fp.endswith('.fastq') or qual_fp.endswith('.fastq.gz'): qual_parser = parse_fastq_qual_score else: qual_parser = parse_qual_score generate_histogram(qual_fp, output_dir, score_min, verbose, qual_parser=qual_parser)
def main(): option_parser, opts, args = parse_command_line_parameters(**script_info) input_path = opts.input_path out_path = opts.out_path output_CSS_statistics = opts.output_CSS_statistics DESeq_negatives_to_zero = opts.DESeq_negatives_to_zero algorithm = opts.algorithm list_algorithms = opts.list_algorithms if list_algorithms: print 'Available normalization algorithms are:\n%s' % ', '.join( algorithm_list()) else: almost_required_options = ['input_path', 'out_path'] for option in almost_required_options: if getattr(opts, option) is None: option_parser.error('Required option --%s omitted.' % option) if algorithm == 'CSS': if os.path.isdir(input_path): multiple_file_normalize_CSS(input_path, out_path, output_CSS_statistics) elif os.path.isfile(input_path): normalize_CSS(input_path, out_path, output_CSS_statistics) else: # it shouldn't be possible to get here option_parser.error("Unknown input type: %s" % input_path) elif algorithm == 'DESeq2': if os.path.isdir(input_path): multiple_file_normalize_DESeq2(input_path, out_path, DESeq_negatives_to_zero) elif os.path.isfile(input_path): normalize_DESeq2(input_path, out_path, DESeq_negatives_to_zero) else: # it shouldn't be possible to get here option_parser.error("Unknown input type: %s" % input_path) else: # it shouldn't be possible to get here option_parser.error("Unknown normalization algorithm: %s" % algorithm)
def main(): option_parser, opts, args =\ parse_command_line_parameters(**script_info) if opts.submit_jobs and not opts.make_jobs: option_parser.error('Must pass -m if passing -s. (Sorry about this, ' 'it\'s for backwards-compatibility.)') min_args = 2 if len(args) != min_args: option_parser.error('Program requires <commands file> and ' '<job prefix>') if (len(args[1]) > 10 or len(args[1]) == 0): option_parser.error('job prefix must be 1-10 characters long') commands = list(open(args[0])) job_prefix = args[1] if(not exists(opts.job_dir)): try: makedirs(opts.job_dir) except OSError: exit(" Jobs directory can not be created. " "Check for permissions or file with the same name: %s\n" % opts.job_dir) if (opts.make_jobs): filenames = make_jobs( commands, job_prefix, opts.queue, opts.job_dir, (str(opts.max_walltime) + ":00:00"), opts.cpus, opts.nodes) else: exit("Should we ever get here???") if (opts.submit_jobs): submit_jobs(filenames, opts.verbose)
def main(): option_parser, opts, args = parse_command_line_parameters(**script_info) input_dirs = opts.input_dirs.split(',') assignment_methods = opts.assignment_methods.split(',') confidences = opts.confidences if confidences is not None: confidences = map(float, opts.confidences.split(',')) e_values = opts.e_values if e_values is not None: e_values = map(float, opts.e_values.split(',')) if opts.print_only: command_handler = print_commands else: command_handler = call_commands_serially if opts.verbose: status_update_callback = print_to_stdout else: status_update_callback = no_status_updates assign_taxonomy_multiple_times( input_dirs, opts.output_dir, assignment_methods, opts.reference_seqs_fp, opts.input_fasta_filename, opts.clean_otu_table_filename, id_to_taxonomy_fp=opts.id_to_taxonomy_fp, confidences=confidences, e_values=e_values, read_1_seqs_fp=opts.read_1_seqs_fp, read_2_seqs_fp=opts.read_2_seqs_fp, rdp_max_memory=opts.rdp_max_memory, command_handler=command_handler, status_update_callback=status_update_callback, force=opts.force)
def main(): option_parser, opts, args = parse_command_line_parameters(**script_info) #Individual parameters are separated by commas params_list = opts.params.split(",") params = dict() for pair in params_list: #The value is separated from the parameter by an equals sign key_value = pair.split("=") params[key_value[0]] = key_value[1] if os.path.isdir(opts.input_path): multiple_file_manifold(opts.input_path, opts.output_path, opts.algorithm, params) elif os.path.isfile(opts.input_path): manifold_res_string = compute_manifold(opts.input_path, opts.algorithm, params) f = open(opts.output_path, 'w') f.write(manifold_res_string) f.close() else: print("io error, check input file path") exit(1)
def main(): option_parser, opts, args = parse_command_line_parameters(**script_info) # Create the output dir if it doesn't already exist. output_dir = opts.output_dir try: create_dir(output_dir) except: option_parser.error("Could not create or access output directory " "specified with the -o/--output_dir option.") otu_table_fp = opts.otu_table_fp table = load_table(otu_table_fp) estimator = ObservationRichnessEstimator(table, Chao1MultinomialPointEstimator) results = estimator(opts.min, opts.max, opts.num_steps, opts.confidence_level) out_fp = join(output_dir, 'estimates_table.txt') with open(out_fp, 'w') as out_f: results.toTable(out_f)
def main(): option_parser, opts, args = parse_command_line_parameters(**script_info) # create dict of command-line options params = eval(str(opts)) if not opts.step > 0: option_parser.error(("Error: step size must be greater than 0.\n" "If min = max, just leave step size at 1.")) parallel_runner = ParallelMultipleRarefactions( cluster_jobs_fp=opts.cluster_jobs_fp, jobs_to_start=opts.jobs_to_start, retain_temp_files=opts.retain_temp_files, suppress_polling=opts.suppress_polling, seconds_to_sleep=opts.seconds_to_sleep) parallel_runner(opts.input_path, opts.output_path, params, job_prefix=opts.job_prefix, poll_directly=opts.poll_directly, suppress_submit_jobs=opts.suppress_submit_jobs)
def main(): option_parser, opts, args =\ parse_command_line_parameters(**script_info) suppress_errors = opts.suppress_errors input_fps = opts.input_fps output_fp = opts.output_fp count_data, total, inaccessible_filepaths = count_seqs_in_filepaths( input_fps) r = format_output( count_data, total, inaccessible_filepaths, suppress_errors) if opts.output_fp: f = open(output_fp, 'w') f.write(r) f.close() else: print r
def main(): option_parser, opts, args = parse_command_line_parameters(**script_info) options, args = option_parser.parse_args() if options.debug: print "PRODUCING DEBUG OUTPUT" bad_seq_ids = set() bad_otu_ids = None # if we got a file to screen against, find the relevant ids and delete them if options.screened_rep_seqs: bad_otu_ids = get_first_id(open(options.screened_rep_seqs, 'U')) if not options.otus: raise RuntimeError( "Must specify an OTU file if performing a screen.") for line in open(options.otus, 'U'): fields = line.split() if fields[0] in bad_otu_ids: bad_seq_ids.update(fields[1:]) if options.debug: if bad_otu_ids is not None: print "Found %s bad otu ids: %s" % (len(bad_otu_ids), bad_otu_ids) print "Found %s bad seq ids: %s" % (len(bad_seq_ids), bad_seq_ids) ids = get_ids(open(options.in_fasta, 'U'), options.field, bad_seq_ids, options.debug) # add empty unassigned ids for file creation if 'Unassigned' not in ids: ids['Unassigned'] = [] if not exists(options.outdir): makedirs(options.outdir) for k, idlist in ids.items(): outfile = open(join(options.outdir, k + '.txt'), 'w') outfile.write('\n'.join(sorted(idlist))) outfile.close()
def main(): option_parser, opts, args =\ parse_command_line_parameters(**script_info) qiime_test_data_dir = opts.qiime_test_data_dir qiime_scripts_dir = opts.qiime_scripts_dir working_dir = opts.working_dir verbose = opts.verbose tests = opts.tests if tests != None: tests = [e.rstrip('/') for e in tests.split(',')] failure_log_fp = opts.failure_log_fp result_summary, num_failures = run_script_usage_tests( qiime_test_data_dir, qiime_scripts_dir, working_dir, verbose=verbose, tests=tests, failure_log_fp=failure_log_fp) if verbose: print result_summary
def main(): option_parser, opts, args = parse_command_line_parameters(**script_info) if opts.print_only: command_handler = print_commands else: command_handler = call_commands_serially if opts.verbose: status_update_callback = print_to_stdout else: status_update_callback = no_status_updates generate_most_wanted_list( opts.output_dir, opts.otu_table_fps, opts.rep_set_fp, opts.gg_fp, opts.nt_fp, opts.mapping_fp, opts.mapping_category, opts.top_n, opts.min_abundance, opts.max_abundance, opts.min_categories, opts.num_categories_to_plot, opts.max_gg_similarity, opts.max_nt_similarity, opts.e_value, opts.word_size, opts.merged_otu_table_fp, opts.suppress_taxonomic_output, opts.jobs_to_start, command_handler, status_update_callback, opts.force)
def main(): option_parser, opts, args = parse_command_line_parameters(**script_info) prefs, data, background_color, label_color, ball_scale, arrow_colors = \ sample_color_prefs_and_map_data_from_options(opts) dir_path = opts.output_dir create_dir(dir_path) create_dir(os.path.join(dir_path, "otu_network")) create_dir(os.path.join(dir_path, "otu_network/props")) create_dir(os.path.join(dir_path, "otu_network/stats")) map_lines = open(opts.map_fname, 'U').readlines() create_network_and_stats( dir_path, map_lines, opts.input_fp, prefs, data, background_color, label_color)
def main(): option_parser, opts, args = parse_command_line_parameters(**script_info) input_path = opts.input_path out_path = opts.out_path algorithm = opts.algorithm mapping_fp = opts.mapping_file_path mapping_category = opts.mapping_file_category subcategory_1 = opts.mapping_file_subcategory_1 subcategory_2 = opts.mapping_file_subcategory_2 list_algorithms = opts.list_algorithms DESeq2_diagnostic_plots = opts.DESeq2_diagnostic_plots if list_algorithms: print 'Available differential abundance algorithms are:\n%s' % ', '.join(algorithm_list()) else: almost_required_options = ['input_path', 'out_path', 'mapping_file_path', 'mapping_file_category', 'mapping_file_subcategory_1', 'mapping_file_subcategory_2'] for option in almost_required_options: if getattr(opts, option) is None: option_parser.error('Required option --%s omitted.' % option) if algorithm == 'metagenomeSeq_fitZIG': if os.path.isdir(input_path): multiple_file_DA_fitZIG(input_path, out_path, mapping_fp, mapping_category, subcategory_1, subcategory_2) elif os.path.isfile(input_path): DA_fitZIG(input_path, out_path, mapping_fp, mapping_category, subcategory_1, subcategory_2) else: # it shouldn't be possible to get here option_parser.error("Unknown input type: %s" % input_path) elif algorithm == 'DESeq2_nbinom': if os.path.isdir(input_path): multiple_file_DA_DESeq2(input_path, out_path, mapping_fp, mapping_category, subcategory_1, subcategory_2, DESeq2_diagnostic_plots) elif os.path.isfile(input_path): DA_DESeq2(input_path, out_path, mapping_fp, mapping_category, subcategory_1, subcategory_2, DESeq2_diagnostic_plots) else: # it shouldn't be possible to get here option_parser.error("Unknown input type: %s" % input_path) else: # it shouldn't be possible to get here option_parser.error("Unknown normalization algorithm: %s" % algorithm)
def main(): option_parser, opts, args = parse_command_line_parameters(**script_info) mapping_fp = opts.mapping_fp collapse_fields = opts.collapse_fields.split(',') input_biom_fp = opts.input_biom_fp collapse_mode = opts.collapse_mode output_biom_fp = opts.output_biom_fp output_mapping_fp = opts.output_mapping_fp normalize = opts.normalize collapsed_metadata, collapsed_table = \ collapse_samples(load_table(input_biom_fp), open(mapping_fp, 'U'), collapse_fields, collapse_mode) if normalize: collapsed_table.norm(axis='sample', inplace=True) write_biom_table(collapsed_table, output_biom_fp) output_map_lines = mapping_lines_from_collapsed_df(collapsed_metadata) with open(output_mapping_fp, 'w') as output_mapping_f: output_mapping_f.write('\n'.join(output_map_lines))
def main(): option_parser, opts, args = parse_command_line_parameters(**script_info) if opts.output_path is not None: outf = open(opts.output_path, 'w') else: outf = sys.stdout dists = parse_distmat(open(opts.input_path, 'U')) map_data = parse_mapping_file_to_dict(open(opts.map, 'U')) diff_dists, same_dists = clust_qual_ratio(dists, map_data, opts.category) if opts.short: print >> outf, numpy.mean(diff_dists) / numpy.mean(same_dists) else: print >> outf, "dissimilarity ratio between/within (large for clustered data):" print >> outf, numpy.mean(diff_dists) / numpy.mean(same_dists) print >> outf, "dissimilarities between clusters: mean, std, num:" print >> outf, '\t'.join(map(str, [numpy.mean(diff_dists), numpy.std(diff_dists), len(diff_dists)])) print >> outf, "dissimilarities within clusters: mean, std, num:" print >> outf, '\t'.join(map(str, [numpy.mean(same_dists), numpy.std(same_dists), len(same_dists)]))
def main(): option_parser, opts, args =\ parse_command_line_parameters(**script_info) if opts.attempt_read_reorientation: if not opts.mapping_fp: option_parser.error( "To use --attempt_read_reorientation, one must " "supply a mapping file that contains both LinkerPrimerSequence " "and ReversePrimer columns.") if opts.input_type == "barcode_paired_end": if not opts.fastq2: option_parser.error( "To use input_type of barcode_paired_end, " "a second fastq file must be specified with --fastq2") if not opts.fastq2: disable_header_match = True else: disable_header_match = opts.disable_header_match fastq1 = qiime_open(opts.fastq1) if opts.fastq2: fastq2 = qiime_open(opts.fastq2) else: fastq2 = None create_dir(opts.output_dir) if opts.mapping_fp: map_fp = qiime_open(opts.mapping_fp) else: map_fp = None extract_barcodes(fastq1, fastq2, opts.output_dir, opts.input_type, opts.bc1_len, opts.bc2_len, opts.rev_comp_bc1, opts.rev_comp_bc2, opts.char_delineator, opts.switch_bc_order, map_fp, opts.attempt_read_reorientation, disable_header_match)
def main(): option_parser, opts, args =\ parse_command_line_parameters(**script_info) test_dir = abspath(dirname(__file__)) unittest_good_pattern = re.compile('OK\s*$') application_not_found_pattern = re.compile('ApplicationNotFoundError') python_name = 'python' bad_tests = [] missing_application_tests = [] # Run through all of FastUnifrac's unit tests, and keep track of any files # which fail unit tests. unittest_names = [] for root, dirs, files in walk(test_dir): for name in files: if name.startswith('test_') and name.endswith('.py'): unittest_names.append(join(root, name)) unittest_names.sort() for unittest_name in unittest_names: print "Testing %s:\n" % unittest_name command = '%s %s -v' % (python_name, unittest_name) result = Popen(command,shell=True,universal_newlines=True,\ stdout=PIPE,stderr=STDOUT).stdout.read() print result if not unittest_good_pattern.search(result): if application_not_found_pattern.search(result): missing_application_tests.append(unittest_name) else: bad_tests.append(unittest_name) if bad_tests: print "\nFailed the following unit tests.\n%s" % '\n'.join(bad_tests)
def main(): option_parser, opts, args = parse_command_line_parameters(**script_info) input_path = opts.input_path output_path = opts.output_path if isdir(input_path): # Run PCoA on all distance matrices in the input dir # Create the output directory if it does not exists if not exists(output_path): makedirs(output_path) # Get all the filenames present in the input directory file_names = [fname for fname in listdir(input_path) if not (fname.startswith('.') or isdir(fname))] # Loop through all the input files for fname in file_names: # Get the path to the input distance matrix infile = join(input_path, fname) # Run PCoA on the input distance matrix with open(infile, 'U') as lines: pcoa_scores = pcoa(lines) # Store the PCoA results on the output directory base_fname, ext = splitext(fname) out_file = join(output_path, 'pcoa_%s.txt' % base_fname) with open(out_file, 'w') as f: pcoa_scores.to_file(f) else: # Run PCoA on the input distance matrix with open(input_path, 'U') as f: pcoa_scores = pcoa(f) # Store the results in the output file with open(output_path, 'w') as f: pcoa_scores.to_file(f)
def main(): option_parser, opts, args = parse_command_line_parameters(**script_info) verbose = opts.verbose input_fasta_fp = opts.input_fasta_fp input_tree_fp = opts.input_tree_fp output_dir = opts.output_dir run_id = opts.run_id similarity_thresholds = map(int, opts.similarity_thresholds.split(',')) verbose = opts.verbose print_only = opts.print_only try: makedirs(output_dir) except OSError: print "Output directory already exists. Please choose "+\ "a different directory, or force overwrite with -f." exit(1) if print_only: command_handler = print_commands else: command_handler = call_commands_serially if verbose: status_update_callback = print_to_stdout else: status_update_callback = no_status_updates pick_nested_reference_otus(input_fasta_fp=input_fasta_fp, input_tree_fp=input_tree_fp, output_dir=output_dir, run_id=run_id, similarity_thresholds=similarity_thresholds, command_handler=command_handler, status_update_callback=status_update_callback)
def main(): option_parser, opts, args = parse_command_line_parameters(**script_info) if not (opts.reference_seqs_fp or opts.blast_db): option_parser.error('Either a blast db (via -b) or a collection of ' 'reference sequences (via -r) must be passed to ' 'assign taxonomy using blast.') # create dict of command-line options params = eval(str(opts)) parallel_runner = ParallelBlastTaxonomyAssigner( cluster_jobs_fp=opts.cluster_jobs_fp, jobs_to_start=opts.jobs_to_start, retain_temp_files=opts.retain_temp_files, suppress_polling=opts.suppress_polling, seconds_to_sleep=opts.seconds_to_sleep) parallel_runner(opts.input_fasta_fp, opts.output_dir, params, job_prefix=opts.job_prefix, poll_directly=opts.poll_directly, suppress_submit_jobs=False)
def main(): option_parser, opts, args = parse_command_line_parameters(**script_info) exclude_otus_fp = opts.exclude_otus_fp outfile = open(opts.output_biom_fp, 'w') if not opts.taxonomy_fname: otu_to_taxonomy = None else: infile = open(opts.taxonomy_fname, 'U') otu_to_taxonomy = parse_taxonomy(infile) ids_to_exclude = [] if exclude_otus_fp: if splitext(exclude_otus_fp)[1] in ('.fasta', '.fna'): ids_to_exclude = \ get_seq_ids_from_fasta_file(open(exclude_otus_fp, 'U')) else: ids_to_exclude = \ get_seq_ids_from_seq_id_file(open(exclude_otus_fp, 'U')) biom_otu_table = make_otu_table(open(opts.otu_map_fp, 'U'), otu_to_taxonomy, ids_to_exclude) outfile.write(biom_otu_table)