def main(): option_parser, opts, args =\ parse_command_line_parameters(**script_info) negate = opts.negate error_msg = "Must pass exactly one of -a, -b, -s, -p, -m, or --valid_states and --mapping_fp." if 1 != sum( map(bool, [ opts.otu_map, opts.seq_id_fp, opts.subject_fasta_fp, opts.seq_id_prefix, opts.biom_fp, opts.sample_id_fp, opts.mapping_fp and opts.valid_states ])): option_parser.error(error_msg) seqid_f = None if opts.otu_map: seqs_to_keep_lookup =\ get_seqs_to_keep_lookup_from_otu_map( open(opts.otu_map, 'U')) elif opts.seq_id_fp: seqs_to_keep_lookup =\ get_seqs_to_keep_lookup_from_seq_id_file( open(opts.seq_id_fp, 'U')) elif opts.subject_fasta_fp: seqs_to_keep_lookup =\ get_seqs_to_keep_lookup_from_fasta_file( open(opts.subject_fasta_fp, 'U')) elif opts.seq_id_prefix: seqs_to_keep_lookup = None seqid_f = lambda x: x.startswith(opts.seq_id_prefix) elif opts.mapping_fp and opts.valid_states: seqs_to_keep_lookup =\ get_seqs_to_keep_lookup_from_mapping_file( open(opts.mapping_fp, 'U'), opts.valid_states) seqid_f = lambda x: x.split()[0].rsplit('_')[0] in seqs_to_keep_lookup elif opts.biom_fp: seqs_to_keep_lookup = \ get_seqs_to_keep_lookup_from_biom(opts.biom_fp) elif opts.sample_id_fp: sample_ids = set( [e.strip().split()[0] for e in open(opts.sample_id_fp, 'U')]) seqs_to_keep_lookup = \ get_seqs_to_keep_lookup_from_sample_ids(sample_ids) seqid_f = lambda x: x.split()[0].rsplit('_')[0] in seqs_to_keep_lookup else: option_parser.error(error_msg) if opts.input_fasta_fp.endswith('.fastq'): filter_fp_f = filter_fastq else: filter_fp_f = filter_fasta input_fasta_f = open(opts.input_fasta_fp, 'U') output_fasta_f = open(opts.output_fasta_fp, 'w') filter_fp_f(input_fasta_f, output_fasta_f, seqs_to_keep_lookup, negate, seqid_f=seqid_f)
def main(): option_parser, opts, args =\ parse_command_line_parameters(**script_info) negate = opts.negate error_msg = "Must pass exactly one of -a, -b, -s, -p, -m, or --valid_states and --mapping_fp." if 1 != sum(map(bool, [opts.otu_map, opts.seq_id_fp, opts.subject_fasta_fp, opts.seq_id_prefix, opts.biom_fp, opts.sample_id_fp, opts.mapping_fp and opts.valid_states])): option_parser.error(error_msg) if opts.otu_map: seqs_to_keep_lookup =\ get_seqs_to_keep_lookup_from_otu_map( open(opts.otu_map, 'U')) elif opts.seq_id_fp: seqs_to_keep_lookup =\ get_seqs_to_keep_lookup_from_seq_id_file( open(opts.seq_id_fp, 'U')) elif opts.subject_fasta_fp: seqs_to_keep_lookup =\ get_seqs_to_keep_lookup_from_fasta_file( open(opts.subject_fasta_fp, 'U')) elif opts.seq_id_prefix: seqs_to_keep_lookup =\ get_seqs_to_keep_lookup_from_prefix( open(opts.input_fasta_fp), opts.seq_id_prefix) elif opts.mapping_fp and opts.valid_states: seqs_to_keep_lookup =\ get_seqs_to_keep_lookup_from_mapping_file( open(opts.input_fasta_fp, 'U'), open(opts.mapping_fp, 'U'), opts.valid_states) elif opts.biom_fp: seqs_to_keep_lookup = \ get_seqs_to_keep_lookup_from_biom(opts.biom_fp) elif opts.sample_id_fp: sample_ids = set([e.strip().split()[0] for e in open(opts.sample_id_fp, 'U')]) seqs_to_keep_lookup = get_seqs_to_keep_lookup_from_sample_ids( open(opts.input_fasta_fp), sample_ids) else: option_parser.error(error_msg) if opts.input_fasta_fp.endswith('.fastq'): filter_fp_f = filter_fastq else: filter_fp_f = filter_fasta input_fasta_f = open(opts.input_fasta_fp, 'U') output_fasta_f = open(opts.output_fasta_fp, 'w') filter_fp_f(input_fasta_f, output_fasta_f, seqs_to_keep_lookup, negate)
def main(): option_parser, opts, args = parse_command_line_parameters(**script_info) negate = opts.negate error_msg = "Must pass exactly one of -a, -b, -s, -p, -m, or --valid_states and --mapping_fp." if 1 != sum( map( bool, [ opts.otu_map, opts.seq_id_fp, opts.subject_fasta_fp, opts.seq_id_prefix, opts.biom_fp, opts.sample_id_fp, opts.mapping_fp and opts.valid_states, ], ) ): option_parser.error(error_msg) seqid_f = None if opts.otu_map: seqs_to_keep_lookup = get_seqs_to_keep_lookup_from_otu_map(open(opts.otu_map, "U")) elif opts.seq_id_fp: seqs_to_keep_lookup = get_seqs_to_keep_lookup_from_seq_id_file(open(opts.seq_id_fp, "U")) elif opts.subject_fasta_fp: seqs_to_keep_lookup = get_seqs_to_keep_lookup_from_fasta_file(open(opts.subject_fasta_fp, "U")) elif opts.seq_id_prefix: seqs_to_keep_lookup = None seqid_f = lambda x: x.startswith(opts.seq_id_prefix) elif opts.mapping_fp and opts.valid_states: seqs_to_keep_lookup = get_seqs_to_keep_lookup_from_mapping_file(open(opts.mapping_fp, "U"), opts.valid_states) seqid_f = lambda x: x.split()[0].rsplit("_")[0] in seqs_to_keep_lookup elif opts.biom_fp: seqs_to_keep_lookup = get_seqs_to_keep_lookup_from_biom(opts.biom_fp) elif opts.sample_id_fp: sample_ids = set([e.strip().split()[0] for e in open(opts.sample_id_fp, "U")]) seqs_to_keep_lookup = get_seqs_to_keep_lookup_from_sample_ids(sample_ids) seqid_f = lambda x: x.split()[0].rsplit("_")[0] in seqs_to_keep_lookup else: option_parser.error(error_msg) if opts.input_fasta_fp.endswith(".fastq"): filter_fp_f = filter_fastq else: filter_fp_f = filter_fasta input_fasta_f = open(opts.input_fasta_fp, "U") output_fasta_f = open(opts.output_fasta_fp, "w") filter_fp_f(input_fasta_f, output_fasta_f, seqs_to_keep_lookup, negate, seqid_f=seqid_f)