コード例 #1
0
ファイル: filter_fasta.py プロジェクト: DSWallach/qiime
def main():
    option_parser, opts, args =\
        parse_command_line_parameters(**script_info)

    negate = opts.negate
    error_msg = "Must pass exactly one of -a, -b, -s, -p, -m, or --valid_states and --mapping_fp."
    if 1 != sum(
            map(bool, [
                opts.otu_map, opts.seq_id_fp, opts.subject_fasta_fp,
                opts.seq_id_prefix, opts.biom_fp, opts.sample_id_fp,
                opts.mapping_fp and opts.valid_states
            ])):
        option_parser.error(error_msg)

    seqid_f = None
    if opts.otu_map:
        seqs_to_keep_lookup =\
            get_seqs_to_keep_lookup_from_otu_map(
                open(opts.otu_map, 'U'))
    elif opts.seq_id_fp:
        seqs_to_keep_lookup =\
            get_seqs_to_keep_lookup_from_seq_id_file(
                open(opts.seq_id_fp, 'U'))
    elif opts.subject_fasta_fp:
        seqs_to_keep_lookup =\
            get_seqs_to_keep_lookup_from_fasta_file(
                open(opts.subject_fasta_fp, 'U'))
    elif opts.seq_id_prefix:
        seqs_to_keep_lookup = None
        seqid_f = lambda x: x.startswith(opts.seq_id_prefix)
    elif opts.mapping_fp and opts.valid_states:
        seqs_to_keep_lookup =\
            get_seqs_to_keep_lookup_from_mapping_file(
                open(opts.mapping_fp, 'U'),
                opts.valid_states)
        seqid_f = lambda x: x.split()[0].rsplit('_')[0] in seqs_to_keep_lookup
    elif opts.biom_fp:
        seqs_to_keep_lookup = \
            get_seqs_to_keep_lookup_from_biom(opts.biom_fp)
    elif opts.sample_id_fp:
        sample_ids = set(
            [e.strip().split()[0] for e in open(opts.sample_id_fp, 'U')])
        seqs_to_keep_lookup = \
                get_seqs_to_keep_lookup_from_sample_ids(sample_ids)
        seqid_f = lambda x: x.split()[0].rsplit('_')[0] in seqs_to_keep_lookup
    else:
        option_parser.error(error_msg)

    if opts.input_fasta_fp.endswith('.fastq'):
        filter_fp_f = filter_fastq
    else:
        filter_fp_f = filter_fasta

    input_fasta_f = open(opts.input_fasta_fp, 'U')
    output_fasta_f = open(opts.output_fasta_fp, 'w')
    filter_fp_f(input_fasta_f,
                output_fasta_f,
                seqs_to_keep_lookup,
                negate,
                seqid_f=seqid_f)
コード例 #2
0
ファイル: filter_fasta.py プロジェクト: Honglongwu/qiime
def main():
    option_parser, opts, args =\
        parse_command_line_parameters(**script_info)

    negate = opts.negate
    error_msg = "Must pass exactly one of -a, -b, -s, -p, -m, or --valid_states and --mapping_fp."
    if 1 != sum(map(bool, [opts.otu_map,
                           opts.seq_id_fp,
                           opts.subject_fasta_fp,
                           opts.seq_id_prefix,
                           opts.biom_fp,
                           opts.sample_id_fp,
                           opts.mapping_fp and opts.valid_states])):
        option_parser.error(error_msg)

    if opts.otu_map:
        seqs_to_keep_lookup =\
            get_seqs_to_keep_lookup_from_otu_map(
                open(opts.otu_map, 'U'))
    elif opts.seq_id_fp:
        seqs_to_keep_lookup =\
            get_seqs_to_keep_lookup_from_seq_id_file(
                open(opts.seq_id_fp, 'U'))
    elif opts.subject_fasta_fp:
        seqs_to_keep_lookup =\
            get_seqs_to_keep_lookup_from_fasta_file(
                open(opts.subject_fasta_fp, 'U'))
    elif opts.seq_id_prefix:
        seqs_to_keep_lookup =\
            get_seqs_to_keep_lookup_from_prefix(
                open(opts.input_fasta_fp), opts.seq_id_prefix)
    elif opts.mapping_fp and opts.valid_states:
        seqs_to_keep_lookup =\
            get_seqs_to_keep_lookup_from_mapping_file(
                open(opts.input_fasta_fp, 'U'),
                open(opts.mapping_fp, 'U'),
                opts.valid_states)
    elif opts.biom_fp:
        seqs_to_keep_lookup = \
            get_seqs_to_keep_lookup_from_biom(opts.biom_fp)
    elif opts.sample_id_fp:
        sample_ids = set([e.strip().split()[0]
                         for e in open(opts.sample_id_fp, 'U')])
        seqs_to_keep_lookup = get_seqs_to_keep_lookup_from_sample_ids(
            open(opts.input_fasta_fp), sample_ids)
    else:
        option_parser.error(error_msg)

    if opts.input_fasta_fp.endswith('.fastq'):
        filter_fp_f = filter_fastq
    else:
        filter_fp_f = filter_fasta

    input_fasta_f = open(opts.input_fasta_fp, 'U')
    output_fasta_f = open(opts.output_fasta_fp, 'w')
    filter_fp_f(input_fasta_f,
                output_fasta_f,
                seqs_to_keep_lookup,
                negate)
コード例 #3
0
ファイル: filter_fasta.py プロジェクト: ElDeveloper/qiime
def main():
    option_parser, opts, args = parse_command_line_parameters(**script_info)

    negate = opts.negate
    error_msg = "Must pass exactly one of -a, -b, -s, -p, -m, or --valid_states and --mapping_fp."
    if 1 != sum(
        map(
            bool,
            [
                opts.otu_map,
                opts.seq_id_fp,
                opts.subject_fasta_fp,
                opts.seq_id_prefix,
                opts.biom_fp,
                opts.sample_id_fp,
                opts.mapping_fp and opts.valid_states,
            ],
        )
    ):
        option_parser.error(error_msg)

    seqid_f = None
    if opts.otu_map:
        seqs_to_keep_lookup = get_seqs_to_keep_lookup_from_otu_map(open(opts.otu_map, "U"))
    elif opts.seq_id_fp:
        seqs_to_keep_lookup = get_seqs_to_keep_lookup_from_seq_id_file(open(opts.seq_id_fp, "U"))
    elif opts.subject_fasta_fp:
        seqs_to_keep_lookup = get_seqs_to_keep_lookup_from_fasta_file(open(opts.subject_fasta_fp, "U"))
    elif opts.seq_id_prefix:
        seqs_to_keep_lookup = None
        seqid_f = lambda x: x.startswith(opts.seq_id_prefix)
    elif opts.mapping_fp and opts.valid_states:
        seqs_to_keep_lookup = get_seqs_to_keep_lookup_from_mapping_file(open(opts.mapping_fp, "U"), opts.valid_states)
        seqid_f = lambda x: x.split()[0].rsplit("_")[0] in seqs_to_keep_lookup
    elif opts.biom_fp:
        seqs_to_keep_lookup = get_seqs_to_keep_lookup_from_biom(opts.biom_fp)
    elif opts.sample_id_fp:
        sample_ids = set([e.strip().split()[0] for e in open(opts.sample_id_fp, "U")])
        seqs_to_keep_lookup = get_seqs_to_keep_lookup_from_sample_ids(sample_ids)
        seqid_f = lambda x: x.split()[0].rsplit("_")[0] in seqs_to_keep_lookup
    else:
        option_parser.error(error_msg)

    if opts.input_fasta_fp.endswith(".fastq"):
        filter_fp_f = filter_fastq
    else:
        filter_fp_f = filter_fasta

    input_fasta_f = open(opts.input_fasta_fp, "U")
    output_fasta_f = open(opts.output_fasta_fp, "w")
    filter_fp_f(input_fasta_f, output_fasta_f, seqs_to_keep_lookup, negate, seqid_f=seqid_f)