def main():
    """Run chimera checker with given options>"""

    option_parser, opts, args = parse_command_line_parameters(**script_info)

    #additional option checks
    if opts.chimera_detection_method == 'blast_fragments':
        if not (opts.blast_db or opts.reference_seqs_fp):
            option_parser.error('Must provide either --blast_db or'+\
                ' --reference_seqs_fp and --id_to_taxonomy_fp when'+\
                ' method is blast_fragments.')
        if not opts.id_to_taxonomy_fp:
            option_parser.error('Must provide --id_to_taxonomy_fp when method'+\
                ' is blast_fragments.')

        if opts.num_fragments < 2:
            option_parser.error('Invalid number of fragments (-n %d) Must be >= 2.' \
                                    % opts.num_fragments)

    elif opts.chimera_detection_method == 'ChimeraSlayer':
        if not opts.aligned_reference_seqs_fp:
            option_parser.error("Must provide --aligned_reference_seqs_fp "+\
                                    "when using method ChimeraSlayer")
            
    verbose = opts.verbose #not used yet ...
    input_seqs_fp = opts.input_fasta_fp
    id_to_taxonomy_fp = opts.id_to_taxonomy_fp
    reference_seqs_fp = opts.reference_seqs_fp
    chimera_detection_method = opts.chimera_detection_method
    num_fragments = opts.num_fragments
    output_fp = opts.output_fp
    taxonomy_depth = opts.taxonomy_depth
    max_e_value = opts.max_e_value
    blast_db = opts.blast_db
    keep_intermediates = opts.keep_intermediates
    
    if not output_fp:
        input_basename = splitext(split(input_seqs_fp)[1])[0]
        output_fp = '%s_chimeric.txt' % input_basename
    if chimera_detection_method == 'blast_fragments':
        blast_fragments_identify_chimeras(input_seqs_fp,
             id_to_taxonomy_fp,\
            reference_seqs_fp,blast_db=blast_db,
            num_fragments=opts.num_fragments,\
            max_e_value=max_e_value,\
            output_fp=output_fp,
            taxonomy_depth=taxonomy_depth)

    elif chimera_detection_method == 'ChimeraSlayer':
         chimeraSlayer_identify_chimeras(input_seqs_fp, 
                                         output_fp=output_fp,
                                         db_FASTA_fp=opts.reference_seqs_fp,
                                         db_NAST_fp=opts.aligned_reference_seqs_fp,
                                         min_div_ratio=opts.min_div_ratio,
                                         keep_intermediates=keep_intermediates)
Beispiel #2
0
def main():
    """Run chimera checker with given options>"""

    option_parser, opts, args = parse_command_line_parameters(**script_info)

    #additional option checks
    if opts.chimera_detection_method == 'blast_fragments':
        if not (opts.blast_db or opts.reference_seqs_fp):
            option_parser.error('Must provide either --blast_db or'+\
                ' --reference_seqs_fp and --id_to_taxonomy_fp when'+\
                ' method is blast_fragments.')
        if not opts.id_to_taxonomy_fp:
            option_parser.error('Must provide --id_to_taxonomy_fp when method'+\
                ' is blast_fragments.')

        if opts.num_fragments < 2:
            option_parser.error('Invalid number of fragments (-n %d) Must be >= 2.' \
                                    % opts.num_fragments)

    elif opts.chimera_detection_method == 'ChimeraSlayer':
        if not opts.aligned_reference_seqs_fp:
            option_parser.error("Must provide --aligned_reference_seqs_fp "+\
                                    "when using method ChimeraSlayer")
            
    verbose = opts.verbose #not used yet ...
    input_seqs_fp = opts.input_fasta_fp
    id_to_taxonomy_fp = opts.id_to_taxonomy_fp
    reference_seqs_fp = opts.reference_seqs_fp
    chimera_detection_method = opts.chimera_detection_method
    num_fragments = opts.num_fragments
    output_fp = opts.output_fp
    taxonomy_depth = opts.taxonomy_depth
    max_e_value = opts.max_e_value
    blast_db = opts.blast_db
    keep_intermediates = opts.keep_intermediates
    
    if not output_fp:
        input_basename = splitext(split(input_seqs_fp)[1])[0]
        output_fp = '%s_chimeric.txt' % input_basename
    if chimera_detection_method == 'blast_fragments':
        blast_fragments_identify_chimeras(input_seqs_fp,
             id_to_taxonomy_fp,\
            reference_seqs_fp,blast_db=blast_db,
            num_fragments=opts.num_fragments,\
            max_e_value=max_e_value,\
            output_fp=output_fp,
            taxonomy_depth=taxonomy_depth)

    elif chimera_detection_method == 'ChimeraSlayer':
         chimeraSlayer_identify_chimeras(input_seqs_fp, 
                                         output_fp=output_fp,
                                         db_FASTA_fp=opts.reference_seqs_fp,
                                         db_NAST_fp=opts.aligned_reference_seqs_fp,
                                         min_div_ratio=opts.min_div_ratio,
                                         keep_intermediates=keep_intermediates)
def main():
    """Run chimera checker with given options>"""

    option_parser, opts, args = parse_command_line_parameters(**script_info)

    #additional option checks
    if opts.chimera_detection_method == 'blast_fragments':
        if not (opts.blast_db or opts.reference_seqs_fp):
            option_parser.error('Must provide either --blast_db or'+\
                ' --reference_seqs_fp and --id_to_taxonomy_fp when'+\
                ' method is blast_fragments.')
        if not opts.id_to_taxonomy_fp:
            option_parser.error('Must provide --id_to_taxonomy_fp when method'+\
                ' is blast_fragments.')
        if opts.num_fragments < 2:
            option_parser.error('Invalid number of fragments (-n %d) Must be >= 2.' \
                                    % opts.num_fragments)
    elif opts.chimera_detection_method == 'ChimeraSlayer':
        if not opts.aligned_reference_seqs_fp:
            option_parser.error("Must provide --aligned_reference_seqs_fp "
                                    "when using method ChimeraSlayer")
    elif opts.chimera_detection_method == 'usearch61':
        if opts.suppress_usearch61_ref and opts.suppress_usearch61_denovo:
            option_parser.error("Supressing both de novo and reference "
             "chimera detection not allowed.")
        if not opts.reference_seqs_fp and not opts.suppress_usearch61_ref:
            option_parser.error("--reference_seqs_fp required for reference "
             "based chimera detection, suppress reference based chimera "
             "detection with --suppress_usearch61_ref")
        if opts.reference_seqs_fp:
            try:
                temp_f = open(opts.reference_seqs_fp, "U")
                temp_f.close()
            except IOError:
                raise IOError,("Unable to open --reference_seqs_fp, please "
                 "check filepath and permissions.")
        if opts.non_chimeras_retention not in ['intersection', 'union']:
            option_parser.error("--non_chimeras_retention must be either "
             "'union' or 'intersection'")
        if opts.usearch61_xn <= 1:
            option_parser.error("--usearch61_xn must be > 1")
        if opts.usearch61_dn <= 0:
            option_parser.error("--usearch61_dn must be > 0")
        if opts.usearch61_mindiffs <= 0:
            option_parser.error("--usearch61_mindiffs must be > 0")
        if opts.usearch61_mindiv <= 0:
            option_parser.error("--usearch61_mindiv must be > 0")
        if opts.usearch61_abundance_skew <= 0:
            option_parser.error("--usearch61_abundance_skew must be > 0")
            
        
    verbose = opts.verbose #not used yet ...
    input_seqs_fp = opts.input_fasta_fp
    id_to_taxonomy_fp = opts.id_to_taxonomy_fp
    reference_seqs_fp = opts.reference_seqs_fp
    chimera_detection_method = opts.chimera_detection_method
    num_fragments = opts.num_fragments
    output_fp = opts.output_fp
    taxonomy_depth = opts.taxonomy_depth
    max_e_value = opts.max_e_value
    blast_db = opts.blast_db
    keep_intermediates = opts.keep_intermediates
    threads = opts.threads
    
                
    # calculate threads as 1 per CPU, or use float of input value    
    if threads == 'one_per_cpu':
        threads = float(1/cpu_count())
    else:
         # Make sure input is a float
         try:
             threads = float(threads)
         except ValueError:
             option_parser.error("--threads must be a float value if "
              "default 'one_per_cpu' value overridden.")
    
    if not output_fp:
        if chimera_detection_method == "usearch61":
            output_dir = "usearch61_chimeras/"
            create_dir(output_dir, fail_on_exist=False)
        else:    
            input_basename = splitext(split(input_seqs_fp)[1])[0]
            output_fp = '%s_chimeric.txt' % input_basename
    elif chimera_detection_method == "usearch61":
        output_dir = output_fp
        create_dir(output_dir, fail_on_exist=False)

        
    if chimera_detection_method == 'blast_fragments':
        blast_fragments_identify_chimeras(input_seqs_fp,
            id_to_taxonomy_fp,\
            reference_seqs_fp,blast_db=blast_db,
            num_fragments=opts.num_fragments,\
            max_e_value=max_e_value,\
            output_fp=output_fp,
            taxonomy_depth=taxonomy_depth)
    elif chimera_detection_method == 'ChimeraSlayer':
        chimeraSlayer_identify_chimeras(input_seqs_fp, 
                                        output_fp=output_fp,
                                        db_FASTA_fp=opts.reference_seqs_fp,
                                        db_NAST_fp=opts.aligned_reference_seqs_fp,
                                        min_div_ratio=opts.min_div_ratio,
                                        keep_intermediates=keep_intermediates)
    elif chimera_detection_method == 'usearch61':
        usearch61_chimera_check(input_seqs_fp,
         output_dir=output_dir,
         reference_seqs_fp=reference_seqs_fp,
         suppress_usearch61_intermediates=opts.suppress_usearch61_intermediates,
         suppress_usearch61_ref = opts.suppress_usearch61_ref,
         suppress_usearch61_denovo = opts.suppress_usearch61_denovo,
         split_by_sampleid = opts.split_by_sampleid,
         non_chimeras_retention = opts.non_chimeras_retention,
         usearch61_minh = opts.usearch61_minh,
         usearch61_xn = opts.usearch61_xn,
         usearch61_dn = opts.usearch61_dn,
         usearch61_mindiffs = opts.usearch61_mindiffs,
         usearch61_mindiv = opts.usearch61_mindiv,
         usearch61_abundance_skew = opts.usearch61_abundance_skew,
         percent_id_usearch61 = opts.percent_id_usearch61,
         minlen = opts.minlen,
         word_length = opts.word_length,
         max_accepts = opts.max_accepts,
         max_rejects = opts.max_rejects,
         verbose = opts.verbose,
         threads = threads)
Beispiel #4
0
def main():
    """Run chimera checker with given options>"""

    option_parser, opts, args = parse_command_line_parameters(**script_info)

    #additional option checks
    if opts.chimera_detection_method == 'blast_fragments':
        if not (opts.blast_db or opts.reference_seqs_fp):
            option_parser.error('Must provide either --blast_db or'+\
                ' --reference_seqs_fp and --id_to_taxonomy_fp when'+\
                ' method is blast_fragments.')
        if not opts.id_to_taxonomy_fp:
            option_parser.error('Must provide --id_to_taxonomy_fp when method'+\
                ' is blast_fragments.')
        if opts.num_fragments < 2:
            option_parser.error('Invalid number of fragments (-n %d) Must be >= 2.' \
                                    % opts.num_fragments)
    elif opts.chimera_detection_method == 'ChimeraSlayer':
        if not opts.aligned_reference_seqs_fp:
            option_parser.error("Must provide --aligned_reference_seqs_fp "
                                "when using method ChimeraSlayer")
    elif opts.chimera_detection_method == 'usearch61':
        if opts.suppress_usearch61_ref and opts.suppress_usearch61_denovo:
            option_parser.error("Supressing both de novo and reference "
                                "chimera detection not allowed.")
        if not opts.reference_seqs_fp and not opts.suppress_usearch61_ref:
            option_parser.error(
                "--reference_seqs_fp required for reference "
                "based chimera detection, suppress reference based chimera "
                "detection with --suppress_usearch61_ref")
        if opts.reference_seqs_fp:
            try:
                temp_f = open(opts.reference_seqs_fp, "U")
                temp_f.close()
            except IOError:
                raise IOError, ("Unable to open --reference_seqs_fp, please "
                                "check filepath and permissions.")
        if opts.non_chimeras_retention not in ['intersection', 'union']:
            option_parser.error("--non_chimeras_retention must be either "
                                "'union' or 'intersection'")
        if opts.usearch61_xn <= 1:
            option_parser.error("--usearch61_xn must be > 1")
        if opts.usearch61_dn <= 0:
            option_parser.error("--usearch61_dn must be > 0")
        if opts.usearch61_mindiffs <= 0:
            option_parser.error("--usearch61_mindiffs must be > 0")
        if opts.usearch61_mindiv <= 0:
            option_parser.error("--usearch61_mindiv must be > 0")
        if opts.usearch61_abundance_skew <= 0:
            option_parser.error("--usearch61_abundance_skew must be > 0")

    verbose = opts.verbose  #not used yet ...
    input_seqs_fp = opts.input_fasta_fp
    id_to_taxonomy_fp = opts.id_to_taxonomy_fp
    reference_seqs_fp = opts.reference_seqs_fp
    chimera_detection_method = opts.chimera_detection_method
    num_fragments = opts.num_fragments
    output_fp = opts.output_fp
    taxonomy_depth = opts.taxonomy_depth
    max_e_value = opts.max_e_value
    blast_db = opts.blast_db
    keep_intermediates = opts.keep_intermediates
    threads = opts.threads

    # calculate threads as 1 per CPU, or use float of input value
    if threads == 'one_per_cpu':
        threads = float(1 / cpu_count())
    else:
        # Make sure input is a float
        try:
            threads = float(threads)
        except ValueError:
            option_parser.error("--threads must be a float value if "
                                "default 'one_per_cpu' value overridden.")

    if not output_fp:
        if chimera_detection_method == "usearch61":
            output_dir = "usearch61_chimeras/"
            create_dir(output_dir, fail_on_exist=False)
        else:
            input_basename = splitext(split(input_seqs_fp)[1])[0]
            output_fp = '%s_chimeric.txt' % input_basename
    elif chimera_detection_method == "usearch61":
        output_dir = output_fp
        create_dir(output_dir, fail_on_exist=False)

    if chimera_detection_method == 'blast_fragments':
        blast_fragments_identify_chimeras(input_seqs_fp,
            id_to_taxonomy_fp,\
            reference_seqs_fp,blast_db=blast_db,
            num_fragments=opts.num_fragments,\
            max_e_value=max_e_value,\
            output_fp=output_fp,
            taxonomy_depth=taxonomy_depth)
    elif chimera_detection_method == 'ChimeraSlayer':
        chimeraSlayer_identify_chimeras(
            input_seqs_fp,
            output_fp=output_fp,
            db_FASTA_fp=opts.reference_seqs_fp,
            db_NAST_fp=opts.aligned_reference_seqs_fp,
            min_div_ratio=opts.min_div_ratio,
            keep_intermediates=keep_intermediates)
    elif chimera_detection_method == 'usearch61':
        usearch61_chimera_check(
            input_seqs_fp,
            output_dir=output_dir,
            reference_seqs_fp=reference_seqs_fp,
            suppress_usearch61_intermediates=opts.
            suppress_usearch61_intermediates,
            suppress_usearch61_ref=opts.suppress_usearch61_ref,
            suppress_usearch61_denovo=opts.suppress_usearch61_denovo,
            split_by_sampleid=opts.split_by_sampleid,
            non_chimeras_retention=opts.non_chimeras_retention,
            usearch61_minh=opts.usearch61_minh,
            usearch61_xn=opts.usearch61_xn,
            usearch61_dn=opts.usearch61_dn,
            usearch61_mindiffs=opts.usearch61_mindiffs,
            usearch61_mindiv=opts.usearch61_mindiv,
            usearch61_abundance_skew=opts.usearch61_abundance_skew,
            percent_id_usearch61=opts.percent_id_usearch61,
            minlen=opts.minlen,
            word_length=opts.word_length,
            max_accepts=opts.max_accepts,
            max_rejects=opts.max_rejects,
            verbose=opts.verbose,
            threads=threads)