Beispiel #1
0
def main():
    option_parser, opts, args = parse_command_line_parameters(**script_info)
    if opts.num_permutations < 10:
        option_parser.error('Number of permuations must be greater than or '
                            'equal to 10.')

    rarefaction_lines = open(opts.alpha_diversity_fp, 'U')
    mapping_lines = open(opts.mapping_fp, 'U')
    category = opts.category
    depth = int(opts.depth)
    output_path = opts.output_fp

    result = compare_alpha_diversities(rarefaction_lines, mapping_lines,
        category, depth, opts.test_type, opts.num_permutations)
    
    rarefaction_lines.close()
    mapping_lines.close()

    corrected_result = _correct_compare_alpha_results(result,
        opts.correction_method)

    # write results
    outfile = open(output_path, 'w')
    header = 'Comparison\ttval\tpval'
    lines = [header]
    for k,v in corrected_result.items():
        lines.append('\t'.join(map(str,[k,v[0],v[1]])))
    outfile.write('\n'.join(lines))
    outfile.close()
Beispiel #2
0
def main():
    option_parser, opts, args = parse_command_line_parameters(**script_info)
    assignment_method = opts.assignment_method

    if assignment_method == 'blast':
        if not opts.id_to_taxonomy_fp:
            option_parser.error('Option --id_to_taxonomy_fp is required when '
                         'assigning with blast.')
        if not (opts.reference_seqs_fp or opts.blast_db):
            option_parser.error('Either a blast db (via -b) or a collection of '
                         'reference sequences (via -r) must be passed to '
                         'assign taxonomy using blast.')

    if assignment_method == 'rdp':
        try:
            validate_rdp_version()
        except RuntimeError, e:
            option_parser.error(e)

        if opts.id_to_taxonomy_fp is not None:
            if opts.reference_seqs_fp is None:
                option_parser.error(
                    'A filepath for reference sequences must be '
                    'specified (via -r) along with the id_to_taxonomy '
                    'file to train the Rdp Classifier.')
        elif opts.reference_seqs_fp is not None:
                option_parser.error(
                    'A filepath for an id to taxonomy map must be '
                    'specified (via -t) along with the reference '
                    'sequences fp to train the Rdp Classifier.')
        else:
            pass
def main():
    option_parser, opts, args = parse_command_line_parameters(**script_info)

    alpha_fps = opts.alpha_fps
    mapping_fp = opts.mapping_fp
    output_mapping_fp = opts.output_mapping_fp
    binning_method = opts.binning_method
    missing_value_name = opts.missing_value_name
    depth = opts.depth
    number_of_bins = opts.number_of_bins
    collated_input = opts.collated_input

    # if using collated data, make sure they specify a depth
    if collated_input:
        alpha_dict = {}

        # build up a dictionary with the filenames as keys and lines as values
        for single_alpha_fp in alpha_fps:
            alpha_dict[splitext(basename(single_alpha_fp))[0]] = open(single_alpha_fp, "U").readlines()

        # format the collated data
        try:
            metrics, alpha_sample_ids, alpha_data = mean_alpha(alpha_dict, depth)
        except ValueError, e:  # see mean_alpha for the possible exceptions
            option_parser.error(e.message)
Beispiel #4
0
def main():
    option_parser, opts, args =\
        parse_command_line_parameters(**script_info)

    otu_files = map(open, opts.otu_map_fps)
    failures_fp = opts.failures_fp
    output_fp = opts.output_fp
    if failures_fp:
        failures_f = open(failures_fp, 'U')
    else:
        failures_f = None

    try:
        result = map_otu_map_files(otu_files, failures_file=failures_f)
    except KeyError as e:
        print ('Some keys do not map (' + str(e) + ') -- is the order of'
               ' your OTU maps equivalent to the order in which the OTU pickers'
               ' were run? If expanding a failures file, did you remember to leave'
               ' out the otu map from the run which generated the failures file?')
        exit(1)

    if failures_fp is not None:
        of = open(output_fp, 'w')
        of.write('\n'.join(result))
        of.close()
    else:
        write_otu_map(result.items(), output_fp)
def main():
    option_parser, opts, args =\
       parse_command_line_parameters(**script_info)
       
    sample_id_map_fp = opts.sample_id_map_fp
    if sample_id_map_fp:
        sample_id_map = dict([(k,v[0]) \
         for k,v in fields_to_dict(open(sample_id_map_fp, "U")).items()])
    else:
        sample_id_map = None
    
    input_dm_fps = opts.input_dms.split(',')
    output_f = open(opts.output_fp,'w')
    output_f.write(comment)
    output_f.write('DM1\tDM2\tNumber of entries\tMantel p-value\n')
    num_iterations = opts.num_iterations
    for i,fp1 in enumerate(input_dm_fps):
        for fp2 in input_dm_fps[i+1:]:
            (dm1_labels, dm1), (dm2_labels, dm2) =\
             make_compatible_distance_matrices(parse_distmat(open(fp1,'U')),
                                               parse_distmat(open(fp2,'U')),
                                               lookup=sample_id_map)
            if len(dm1_labels) < 2:
                output_f.write('%s\t%s\t%d\tToo few samples\n' % (fp1,fp2,len(dm1_labels)))
                continue
            p = mantel(dm1,dm2,n=num_iterations)
            p_str = format_p_value_for_num_iters(p,num_iterations)
            output_f.write('%s\t%s\t%d\t%s\n' % (fp1,fp2,len(dm1_labels),p_str))
    output_f.close()
Beispiel #6
0
def main():
    option_parser, opts, args = parse_command_line_parameters(**script_info)

    output_dir = opts.output_dir
    mapping_category = opts.mapping_category

    try:
        makedirs(output_dir)
    except OSError:
        pass

    percent_failures_data, percent_failures_plot, num_new_otus_data, \
           num_new_otus_plot = generate_new_diversity_plots(
            [open(otu_table_fp, 'U') for otu_table_fp in opts.otu_table_fps],
            open(opts.gg_fasta_fp, 'U'), open(opts.mapping_fp, 'U'),
            mapping_category, opts.min_num_samples,
            opts.category_values_to_exclude.split(','), opts.verbose)

    # Save plots as PDFs.
    percent_failures_plot.savefig(join(output_dir,
                                  'percent_novel_seqs_by_%s.pdf' %
                                  mapping_category))
    num_new_otus_plot.savefig(join(output_dir,
                              'num_novel_otus_by_%s.pdf' %
                              mapping_category))

    # Pickle plot raw data in case we need to load up the data again into new
    # plots and interactively tweak them (it'll take too long to rerun the
    # whole script for these tweaks).
    dump(percent_failures_data, open(join(output_dir,
            'percent_novel_seqs_by_%s.p' % mapping_category), 'wb'))
    dump(num_new_otus_data, open(join(output_dir,
            'num_novel_otus_by_%s.p' % mapping_category), 'wb'))
Beispiel #7
0
def main():
    option_parser, opts, args =\
        parse_command_line_parameters(**script_info)

    if opts.attempt_read_reorientation:
        if not opts.mapping_fp:
            option_parser.error("To use --attempt_read_reorientation, one must "
                                "supply a mapping file that contains both LinkerPrimerSequence "
                                "and ReversePrimer columns.")
    if opts.input_type == "barcode_paired_end":
        if not opts.fastq2:
            option_parser.error("To use input_type of barcode_paired_end, "
                                "a second fastq file must be specified with --fastq2")

    if not opts.fastq2:
        disable_header_match = True
    else:
        disable_header_match = opts.disable_header_match

    fastq1 = qiime_open(opts.fastq1)
    if opts.fastq2:
        fastq2 = qiime_open(opts.fastq2)
    else:
        fastq2 = None
    create_dir(opts.output_dir)
    if opts.mapping_fp:
        map_fp = qiime_open(opts.mapping_fp)
    else:
        map_fp = None

    extract_barcodes(fastq1, fastq2, opts.output_dir, opts.input_type,
                     opts.bc1_len, opts.bc2_len, opts.rev_comp_bc1, opts.rev_comp_bc2,
                     opts.char_delineator, opts.switch_bc_order, map_fp,
                     opts.attempt_read_reorientation, disable_header_match)
def main():
    option_parser, opts, args =\
        parse_command_line_parameters(**script_info)

    fasta_fp = opts.fasta_fp
    mapping_fp = opts.mapping_fp
    output_dir = opts.output_dir
    truncate_option = opts.truncate_option
    primer_mismatches = int(opts.primer_mismatches)

    create_dir(output_dir)

    if truncate_option not in ['truncate_only', 'truncate_remove']:
        raise ValueError('-z option must be either truncate_only or ' +
                         'truncate_remove')

    try:
        fasta_f = open(fasta_fp, "U")
        fasta_f.close()
    except IOError:
        raise IOError("Unable to open fasta file, please check path/" +
                      "permissions.")
    try:
        mapping_f = open(fasta_fp, "U")
        mapping_f.close()
    except IOError:
        raise IOError("Unable to open mapping file, please check path/" +
                      "permissions.")

    truncate_reverse_primer(fasta_fp, mapping_fp, output_dir, truncate_option,
                            primer_mismatches)
def main():
    option_parser, opts, args = parse_command_line_parameters(**script_info)

    columns_to_merge = opts.columns_to_merge
    mapping_fp = opts.mapping_fp
    output_fp = opts.output_fp

    try:
        data, headers, comments = parse_mapping_file(open(mapping_fp, 'U'))
    except:
        option_parser.error('Bro, that doesn\'t look like a mapping file')

    for merging in columns_to_merge:
        retrieve = lambda x: headers.index(x)
        indices = map(retrieve, merging.split('&&'))

        headers.append(''.join([headers[element] for element in indices]))

        for line in data:
            line.append(''.join([line[element] for element in indices]))

    # this should never happen
    assert len(headers) == len(data[0]), "Something went horribly wrong, "+\
        "that's what you get for using non-unit-tested software"

    lines = format_mapping_file(headers, data, comments)

    fd = open(output_fp, 'w')
    fd.writelines(lines)
    fd.close()
Beispiel #10
0
def main():
    option_parser, opts, args = parse_command_line_parameters(**script_info)
    
    reference_seqs_filepath = opts.reference_seqs_fp
    input_seqs_filepath = opts.fasta_fp
    input_otu_filepath = opts.otu_fp
    result_path = opts.result_fp or\
     '%s_rep_set.fasta' % input_seqs_filepath
    log_path = opts.log_fp
    
    if reference_seqs_filepath:
        rep_set_picker =\
            reference_rep_set_picking_methods[opts.rep_set_picking_method]        
        rep_set_picker(input_seqs_filepath, 
                       input_otu_filepath, 
                       reference_seqs_filepath,
                       result_path=result_path,
                       log_path=log_path,
                       sort_by=opts.sort_by)
    else:
        if not input_seqs_filepath:
            option_parser.error('--fasta_fp must be provided when not picking'
                                ' representative against a reference set.')
        rep_set_picker =\
            rep_set_picking_methods[opts.rep_set_picking_method]
        rep_set_picker(input_seqs_filepath, 
                       input_otu_filepath,
                       result_path=result_path,
                       log_path=log_path,
                       sort_by=opts.sort_by)
def main():
    option_parser, opts, args =\
       parse_command_line_parameters(**script_info)

    output_fp = opts.output_fp

    map_data, header, comments = parse_mapping_file(opts.input_fp)

    if opts.category not in header:
        option_parser.error("%s doesn't appear to exist in the mapping file!" % opts.category)

    # use stdout or the user supplied file path
    if output_fp:
        fd = open(output_fp, 'w')
    else:
        fd = stdout

    result = defaultdict(int)
    cat_idx = header.index(opts.category)
    for samp in map_data:
        result[samp[cat_idx]] += 1

    for cat_val in natsort(result):
        if not cat_val:
            fd.write("***UNSPECIFIED***\t%d\n" % result[cat_val])
        else:
            fd.write("%s\t%d\n" % (cat_val, result[cat_val]))

    fd.close()
def main():
    option_parser, opts, args =\
       parse_command_line_parameters(**script_info)
    
    input_dir = opts.input_dir
    output_dir = opts.output_dir
    create_dir(output_dir)
    lanes = opts.lanes.split(',')
    bases = opts.bases
    read = opts.read
    
    for lane in lanes:
        read1_fps =  glob('%s/s_%s_%d_*qseq.txt' % (input_dir,
                                                   lane.replace(',',''),
                                                   read))
        # sort so results will be consistent across different runs (important
        # so amplicon and barcodes read headers will match)
        read1_fps.sort()
        for read1_fp in read1_fps:                
            output_fp =  '%s/s_%s_%s_sequences.fastq' % (output_dir,lane,read)
            output_f = open(output_fp,'w')
            for record in iter_split_lines(open(read1_fp,'U')):
                fastq_s = illumina_data_to_fastq(record,
                                                 number_of_bases=bases)
                output_f.write('%s\n' % fastq_s)
            output_f.close()
def main():
    option_parser, opts, args =\
       parse_command_line_parameters(**script_info)
       
    if opts.submit_jobs and not opts.make_jobs:
        option_parser.error('Must pass -m if passing -s. (Sorry about this, '+\
        'it\'s for backwards-compatibility.)') 

    min_args = 2
    if len(args) != min_args:
        option_parser.error('Program requires <commands file> and  <job prefix>')

    if (len(args[1])>10 or len(args[1])==0):
        option_parser.error('job prefix must be 1-10 characters long')
 
    commands = list(open(args[0]))
    job_prefix = args[1]

    if(not exists(opts.job_dir)):
        try:
            makedirs(opts.job_dir)
        except OSError:
            exit(" Jobs directory can not be created. "
                 +"Check for permissions or file with the same name: %s\n"
                 % opts.job_dir)

    if (opts.make_jobs):
        filenames = make_jobs(commands, job_prefix, opts.queue, opts.job_dir)
    else:
        exit("Should we ever get here???")
    if (opts.submit_jobs):
        submit_jobs(filenames, opts.verbose)
Beispiel #14
0
def main():
    option_parser, opts, args = parse_command_line_parameters(**script_info)

    otu_table_fp = opts.otu_table_fp
    mapping_fp = opts.mapping_fp
    mapping_field = opts.mapping_field
    output_dir = opts.output_dir
    # column_rename_ids = opts.column_rename_ids
    # include_repeat_cols = opts.include_repeat_cols

    create_dir(output_dir)

    # split mapping file
    mapping_f = open(mapping_fp, 'U')
    for fp_str, sub_mapping_s in split_mapping_file_on_field(mapping_f, mapping_field):
        mapping_output_fp = join(output_dir, 'mapping_%s.txt' % fp_str)
        open(mapping_output_fp, 'w').write(sub_mapping_s)

    # split otu table
    otu_table_base_name = splitext(split(otu_table_fp)[1])[0]
    mapping_f = open(mapping_fp, 'U')

    otu_table = load_table(otu_table_fp)

    try:
        for fp_str, sub_otu_table_s in split_otu_table_on_sample_metadata(
                otu_table,
                mapping_f,
                mapping_field):
            otu_table_output_fp = join(output_dir, '%s_%s.biom' % (
                otu_table_base_name, fp_str))

            write_biom_table(sub_otu_table_s, otu_table_output_fp)
    except OTUTableSplitError as e:
        option_parser.error(e)
def main():
    option_parser, opts, args =\
        parse_command_line_parameters(**script_info)

    input_table = parse_biom_table(open(opts.input_otu_table_fp, 'U'))
    output_table_f = open(opts.output_otu_table_fp, 'w')
    metadata_field = opts.metadata_field
    positive_taxa = opts.positive_taxa
    negative_taxa = opts.negative_taxa

    if positive_taxa is not None:
        positive_taxa = positive_taxa.split(',')
    else:
        positive_taxa = None

    if negative_taxa is not None:
        negative_taxa = negative_taxa.split(',')
    else:
        negative_taxa = None

    filter_fn = get_otu_ids_from_taxonomy_f(
        positive_taxa,
        negative_taxa,
        metadata_field)
    output_table = input_table.filterObservations(filter_fn)
    output_table_f.write(format_biom_table(output_table))
    output_table_f.close()
Beispiel #16
0
def main():
    option_parser, opts, args = parse_command_line_parameters(**script_info)
    if opts.show_metrics:
        print("Known metrics are: %s\n"
              % (', '.join(list_known_metrics()),))
        print("For more information, see http://scikit-bio.org/docs/latest/ge"
              "nerated/skbio.diversity.alpha.html#module-skbio.diversity.alpha")
        exit(0)
    almost_required_options = ['input_path', 'output_path', 'metrics']
    for option in almost_required_options:
        if getattr(opts, option) is None:
            option_parser.error('Required option --%s omitted.' % option)

    if os.path.isdir(opts.input_path):
        multiple_file_alpha(opts.input_path, opts.output_path, opts.metrics,
                            opts.tree_path)
    elif os.path.isfile(opts.input_path):
        try:
            f = open(opts.output_path, 'w')
            f.close()
        except IOError:
            if os.path.isdir(opts.output_path):
                option_parser.error(
                    "ioerror, couldn't create output file. The output path is a directory, which should be a single file")
            else:
                option_parser.error("ioerror, couldn't create output file")
        single_file_alpha(opts.input_path, opts.metrics,
                          opts.output_path, opts.tree_path)
Beispiel #17
0
def main():
    option_parser, opts, args = parse_command_line_parameters(**script_info)

    if not (opts.refseqs_path or opts.blast_db):
        option_parser.error('Either a blast db (via -b) or a collection of '
                'reference sequences (via -r) must be passed')
    if opts.refseqs_path and opts.blast_db:
        option_parser.error('You should provide only a blast db (via -b) '
                'or a collection of reference sequences (via -r), but not both')

    # create dict of command-line options
    params = eval(str(opts))

    parallel_runner = ParallelBlaster(
            cluster_jobs_fp=opts.cluster_jobs_fp,
            jobs_to_start=opts.jobs_to_start,
            retain_temp_files=opts.retain_temp_files,
            suppress_polling=opts.suppress_polling,
            seconds_to_sleep=opts.seconds_to_sleep)

    parallel_runner(opts.infile_path,
                    opts.output_dir,
                    params,
                    job_prefix=opts.job_prefix,
                    poll_directly=opts.poll_directly,
                    suppress_submit_jobs=False)
def main():
    option_parser, opts, args =\
        parse_command_line_parameters(**script_info)

    if isdir(opts.otu_table_fp):
        ret_code = create_dir(opts.output_fp, fail_on_exist=False)
        # run on each file in dir
        for fp in glob(opts.otu_table_fp + '/*biom'):
            parent_dir_name, file_name = split(fp)
            basename, extension = splitext(file_name)
            out_fp = opts.output_fp + "/" + basename + "_shared_OTUs.txt"

            with open(out_fp, 'w') as out_fh:
                out_fh.write(calc_shared_phylotypes(load_table(fp),
                                                    opts.reference_sample))
    else:
        # run in single file mode
        try:
            out_fh = open(opts.output_fp, "w")
        except IOError as message:
            exit(("Can't open output file %s for writing. Check the "
                  "permissions or existing directory with identical "
                  "name.\n%s") % (opts.output_fp, message))
        out_fh.write(calc_shared_phylotypes(load_table(opts.otu_table_fp),
                                            opts.reference_sample))
Beispiel #19
0
def main():
    option_parser, opts, args =\
     parse_command_line_parameters(suppress_verbose=True, **script_info)
      
    mapping_fp = opts.mapping_fp
    has_barcodes = not opts.not_barcoded
    variable_len_barcodes = opts.variable_len_barcodes
    output_dir = opts.output_dir + "/"
    char_replace = opts.char_replace
    verbose = opts.verbose
    disable_primer_check = opts.disable_primer_check
    added_demultiplex_field = opts.added_demultiplex_field
        
    # Create output directory, check path/access to mapping file
    create_dir(output_dir)
    
    # Test for valid replacement characters
    valid_replacement_chars = digits + letters + "_" + "."
    if char_replace not in valid_replacement_chars:
        option_parser.error('-c option requires alphanumeric, period, or '+\
        'underscore character.')
    if len(char_replace) != 1:
        option_parser.error('-c parameter must be a single character.')
    
    check_mapping_file(mapping_fp, output_dir, has_barcodes, char_replace,\
     verbose, variable_len_barcodes,
     disable_primer_check, added_demultiplex_field)
def main():
    option_parser, opts, args =\
       parse_command_line_parameters(**script_info)

    negate = opts.negate

    if 1 != sum(map(bool,[opts.otu_map,
                          opts.seq_id_fp,
                          opts.subject_fasta_fp,
                          opts.seq_id_prefix])): 
        option_parser.error("Must pass exactly one of -a, -s, -p, or -m.")

    if opts.otu_map:
        seqs_to_keep_lookup =\
         get_seqs_to_keep_lookup_from_otu_map(
         open(opts.otu_map,'U'))
    elif opts.seq_id_fp:
        seqs_to_keep_lookup =\
         get_seqs_to_keep_lookup_from_seq_id_file(
         open(opts.seq_id_fp,'U'))
    elif opts.subject_fasta_fp:
        seqs_to_keep_lookup =\
         get_seqs_to_keep_lookup_from_fasta_file(
         open(opts.subject_fasta_fp,'U'))
    elif opts.seq_id_prefix:
        seqs_to_keep_lookup =\
         get_seqs_to_keep_lookup_from_prefix(
         open(opts.input_fasta_fp),opts.seq_id_prefix)
    else:
        option_parser.error("Must pass exactly one of -a, -s, or -m.")
    
    filter_fasta_fp(opts.input_fasta_fp,
                    opts.output_fasta_fp,
                    seqs_to_keep_lookup,
                    negate)
Beispiel #21
0
def main():
    option_parser, opts, args =\
       parse_command_line_parameters(**script_info)
    input_rf = open(opts.temp_input, 'r+')

    listOfIds = parseRules(input_rf)
    print listOfIds
def main():
    option_parser, opts, args = parse_command_line_parameters(**script_info)
    if not opts.counts_fname:
        parser.error("An otu table file must be specified")

    if not opts.map_fname:
        parser.error("A Map file must be specified")

    prefs,data,background_color,label_color, ball_scale, arrow_colors= \
             sample_color_prefs_and_map_data_from_options(opts)


    dir_path = opts.dir_path

    if dir_path==None or dir_path=='':
        dir_path = get_random_directory_name()

    create_dir(dir_path)
    create_dir(os.path.join(dir_path,"otu_network"))
    create_dir(os.path.join(dir_path,"otu_network/props"))
    create_dir(os.path.join(dir_path,"otu_network/stats"))

    map_lines = open(opts.map_fname,'U').readlines()
    otu_sample_lines = open(opts.counts_fname, 'U').readlines()
    create_network_and_stats(dir_path,map_lines,otu_sample_lines,prefs,data,background_color,label_color)
def main():
    option_parser, opts, args = parse_command_line_parameters(**script_info)

    output_f = open(opts.output_distance_matrix, 'w')
    if opts.otu_table_fp:
        otu_table = load_table(opts.otu_table_fp)
        samples_to_keep = otu_table.ids()
        # samples_to_keep = \
        # sample_ids_from_otu_table(open(opts.otu_table_fp,'U'))
    elif opts.sample_id_fp:
        samples_to_keep = \
            get_seqs_to_keep_lookup_from_seq_id_file(
                open(opts.sample_id_fp, 'U'))
    elif opts.mapping_fp and opts.valid_states:
        try:
            samples_to_keep = sample_ids_from_metadata_description(
                open(opts.mapping_fp, 'U'), opts.valid_states)
        except ValueError as e:
            option_parser.error(e.message)
    else:
        option_parser.error('must pass either --sample_id_fp, -t, or -m and '
                            '-s')
    # note that negate gets a little weird here. The function we're calling
    # removes the specified samples from the distance matrix, but the other
    # QIIME filter scripts keep these samples specified.  So, the interface of
    # this script is designed to keep the specified samples, and therefore
    # negate=True is passed to filter_samples_from_distance_matrix by default.
    d = filter_samples_from_distance_matrix(
        parse_distmat(
            open(opts.input_distance_matrix, 'U')),
        samples_to_keep,
        negate=not opts.negate)
    output_f.write(d)
    output_f.close()
def main():
    option_parser, opts, args =\
       parse_command_line_parameters(**script_info)
    
    split_fasta_on_sample_ids_to_files(MinimalFastaParser(open(opts.input_fasta_fp,'U')),
                                       opts.output_dir,
                                       opts.buffer_size)
def main():
    option_parser, opts, args = parse_command_line_parameters(**script_info)

    if not (opts.reference_seqs_fp or opts.blast_db):
        option_parser.error(
            "Either a blast db (via -b) or a collection of "
            "reference sequences (via -r) must be passed to "
            "assign taxonomy using blast."
        )

    # create dict of command-line options
    params = eval(str(opts))

    parallel_runner = ParallelBlastTaxonomyAssigner(
        cluster_jobs_fp=opts.cluster_jobs_fp,
        jobs_to_start=opts.jobs_to_start,
        retain_temp_files=opts.retain_temp_files,
        suppress_polling=opts.suppress_polling,
        seconds_to_sleep=opts.seconds_to_sleep,
    )

    parallel_runner(
        opts.input_fasta_fp,
        abspath(opts.output_dir),
        params,
        job_prefix=opts.job_prefix,
        poll_directly=opts.poll_directly,
        suppress_submit_jobs=opts.suppress_submit_jobs,
    )
def main():
    option_parser, opts, args =\
      parse_command_line_parameters(**script_info)

    otu_table_data = parse_otu_table(open(opts.input_otu_table,'U'))
    sort_field = opts.sort_field
    mapping_fp = opts.mapping_fp
    sorted_sample_ids_fp = opts.sorted_sample_ids_fp
    
    if sort_field and mapping_fp:
        mapping_data = parse_mapping_file(open(mapping_fp,'U'))
        result = sort_otu_table_by_mapping_field(otu_table_data,
                                                 mapping_data,
                                                 sort_field)
    elif sorted_sample_ids_fp:
        sorted_sample_ids = sample_ids_from_f(open(sorted_sample_ids_fp,'U'))
        result = sort_otu_table(otu_table_data,
                                sorted_sample_ids)
    else:
        parser.error("must provide either --sort_field and --mapping_fp OR --sorted_sample_ids_fp")

    # format and write the otu table
    result_str = format_otu_table(result[0],result[1],result[2],result[3])
    of = open(opts.output_fp,'w')
    of.write(result_str)
    of.close()
def main():
    option_parser, opts, args = parse_command_line_parameters(**script_info)
    input_path = opts.input_path
    output_path = opts.output_path

    if isdir(input_path):
        # Run PCoA on all distance matrices in the input dir
        # Create the output directory if it does not exists
        if not exists(output_path):
            makedirs(output_path)

        # Get all the filenames present in the input directory
        file_names = [fname for fname in listdir(input_path)
                      if not (fname.startswith('.') or isdir(fname))]

        # Loop through all the input files
        for fname in file_names:
            # Get the path to the input distance matrix
            infile = join(input_path, fname)

            # Run PCoA on the input distance matrix
            with open(infile, 'U') as lines:
                pcoa_scores = pcoa(lines)

            # Store the PCoA results on the output directory
            base_fname, ext = splitext(fname)
            out_file = join(output_path, 'pcoa_%s.txt' % base_fname)
            pcoa_scores.write(out_file)

    else:
        # Run PCoA on the input distance matrix
        with open(input_path, 'U') as f:
            pcoa_scores = pcoa(f)
        # Store the results in the output file
        pcoa_scores.write(output_path)
def main():
    option_parser, opts, args = parse_command_line_parameters(**script_info)
    if opts.show_metrics:
        print("Known metrics are: %s\n" \
              % (', '.join(list_known_metrics()),))
        print("For more information, see http://qiime.org/scripts/alpha_diversity_metrics.html")
        exit(0)
    almost_required_options = ['input_path','output_path','metrics']
    for option in almost_required_options:
        if getattr(opts,option) == None:
            option_parser.error('Required option --%s omitted.' % option)
    
    if os.path.isdir(opts.input_path):
      multiple_file_alpha(opts.input_path, opts.output_path, opts.metrics, 
        opts.tree_path)
    elif os.path.isfile(opts.input_path):
      try:
          f = open(opts.output_path, 'w')
          f.close()
      except IOError:
          print("ioerror, couldn't create output file")
          exit(1)
      single_file_alpha(opts.input_path, opts.metrics, 
          opts.output_path, opts.tree_path)
    else:
      print("io error, input path not valid. does it exist?")
      exit(1)
def main():
    option_parser, opts, args = parse_command_line_parameters(**script_info)

    output_table_fp = opts.output_otu_table_fp
    metadata_field = opts.metadata_field
    positive_taxa = opts.positive_taxa
    negative_taxa = opts.negative_taxa

    input_table = load_table(opts.input_otu_table_fp)

    if positive_taxa is not None:
        positive_taxa = positive_taxa.split(',')
    else:
        positive_taxa = None

    if negative_taxa is not None:
        negative_taxa = negative_taxa.split(',')
    else:
        negative_taxa = None

    filter_fn = get_otu_ids_from_taxonomy_f(positive_taxa, negative_taxa,
                                            metadata_field)
    input_table.filter(filter_fn, axis='observation')

    try:
        write_biom_table(input_table, output_table_fp)
    except EmptyBIOMTableError:
        option_parser.error(
            "Filtering resulted in an empty BIOM table. "
            "This indicates that no OTUs remained after filtering.")
Beispiel #30
0
def main():
    option_parser, opts, args = parse_command_line_parameters(**script_info)

    output_dir = opts.output_dir

    if output_dir:
        create_dir(output_dir)
    else:
        if isfile(opts.input_dir):
            # if output_dir is empty after the split, then a relative path was
            # passed, and the input file is in the current directory
            output_dir = split(opts.input_dir)[0] or '.'

        else:  # opts.input_dir is a directory
            output_dir = opts.input_dir

    if opts.no_trim and not opts.use_sfftools:
        raise ValueError(
            "When using the --no_trim option you must have the sfftools installed and must also pass the --use_sfftools option")

    prep_sffs_in_dir(
        opts.input_dir,
        output_dir,
        make_flowgram=opts.make_flowgram,
        convert_to_flx=opts.convert_to_FLX,
        use_sfftools=opts.use_sfftools,
        no_trim=opts.no_trim)
Beispiel #31
0
def main():
    option_parser, opts, args = parse_command_line_parameters(**script_info)

    verbose = opts.verbose

    input_fasta_fp = opts.input_fasta_fp
    output_fp = opts.output_fp
    retain_seq_id = opts.retain_seq_id

    if retain_seq_id:
        seq_desc_mapper = null_seq_desc_mapper
    else:
        seq_desc_mapper = append_rc

    if not output_fp:
        input_file_basename, input_file_ext = \
         splitext(split(input_fasta_fp)[1])
        output_fp = '%s_rc%s' % (input_file_basename, input_file_ext)

    rc_fasta_file(input_fasta_fp, output_fp, seq_desc_mapper)
Beispiel #32
0
def main():
    option_parser, opts, args = parse_command_line_parameters(**script_info)

    params = eval(str(opts))

    params['metrics'] = ','.join(opts.metrics)

    parallel_runner = ParallelAlphaDiversity(
        cluster_jobs_fp=opts.cluster_jobs_fp,
        jobs_to_start=opts.jobs_to_start,
        retain_temp_files=opts.retain_temp_files,
        suppress_polling=opts.suppress_polling,
        seconds_to_sleep=opts.seconds_to_sleep)
    input_fps = glob(join(opts.input_path, '*'))
    parallel_runner(input_fps,
                    opts.output_path,
                    params,
                    job_prefix=opts.job_prefix,
                    poll_directly=opts.poll_directly,
                    suppress_submit_jobs=opts.suppress_submit_jobs)
Beispiel #33
0
def main():
    option_parser, opts, args = parse_command_line_parameters(**script_info)

    if opts.step <= 0:
        option_parser.error("nonpositive step not allowed (%s was supplied)" % \
          (opts.step,))
    create_dir(opts.output_path, fail_on_exist=False)
    maker = RarefactionMaker(opts.input_path, opts.min, opts.max, opts.step,
                             opts.num_reps)

    if opts.subsample_multinomial:
        subsample_f = subsample_multinomial
    else:
        subsample_f = subsample

    maker.rarefy_to_files(opts.output_path,
                          False,
                          include_lineages=opts.lineages_included,
                          empty_otus_removed=(not opts.keep_empty_otus),
                          subsample_f=subsample_f)
def main():
    option_parser, opts, args = parse_command_line_parameters(**script_info)

    verbose = opts.verbose

    input_fasta_fp = opts.input_fasta_fp
    mapping_fp = opts.mapping_fp
    output_dir = opts.output_dir
    tree_fp = opts.tree_fp
    tree_subset = opts.tree_subset
    tree_exact_match = opts.tree_exact_match
    same_seq_lens = opts.same_seq_lens
    all_ids_found = opts.all_ids_found

    create_dir(output_dir)

    # Test optional filepaths and requirements
    try:
        test_mapping_fp = open(mapping_fp, "U")
        test_mapping_fp.close()
    except IOError:
        raise IOError("Unable to open mapping file, please check "
                      "filepath and read permissions.")

    if tree_fp:
        try:
            test_tree_fp = open(tree_fp, "U")
            test_tree_fp.close()
        except IOError:
            raise IOError("Unable to open provided tree filepath, please " +
                          "filepath and permissions.")

    if tree_subset or tree_exact_match:
        if not tree_fp:
            raise ValueError('Must provide tree filepath if -s or -e options ' +
                             'are enabled.')

    validate_fasta(
        input_fasta_fp, mapping_fp, output_dir, tree_fp, tree_subset,
        tree_exact_match, same_seq_lens, all_ids_found,
        opts.suppress_barcode_checks, opts.suppress_primer_checks)
Beispiel #35
0
def main():
    option_parser, opts, args = parse_command_line_parameters(**script_info)

    if opts.output_dir:
        #try to make the output directory
        try:
            mkdir(opts.output_dir)
        except OSError:
            pass
    else:
        opts.output_dir = opts.input_dir

    if opts.no_trim and not opts.use_sfftools:
        raise ValueError, "When using the --no_trim option you must have the sfftools installed and must also pass the --use_sfftools option"

    prep_sffs_in_dir(opts.input_dir,
                     opts.output_dir,
                     make_flowgram=opts.make_flowgram,
                     convert_to_flx=opts.convert_to_FLX,
                     use_sfftools=opts.use_sfftools,
                     no_trim=opts.no_trim)
Beispiel #36
0
def main():
    option_parser, opts, args = parse_command_line_parameters(**script_info)

    otu_table_data = load_table(opts.input_otu_table)
    sort_field = opts.sort_field
    mapping_fp = opts.mapping_fp
    sorted_sample_ids_fp = opts.sorted_sample_ids_fp

    if sort_field and mapping_fp:
        mapping_data = parse_mapping_file(open(mapping_fp, 'U'))
        result = sort_otu_table_by_mapping_field(otu_table_data, mapping_data,
                                                 sort_field)
    elif sorted_sample_ids_fp:
        sorted_sample_ids = sample_ids_from_f(open(sorted_sample_ids_fp, 'U'))
        result = sort_otu_table(otu_table_data,
                                sorted_sample_ids)
    else:
        result = sort_otu_table(otu_table_data,
            natsort_case_insensitive(otu_table_data.ids()))

    write_biom_table(result, opts.output_fp)
def main():
    option_parser, opts, args = parse_command_line_parameters(**script_info)

    if opts.blast_db is None and opts.refseqs_fp is None:
        option_parser.error('Either blast_db or refseqs_fp must be provided.')

    # create dict of command-line options
    params = eval(str(opts))

    parallel_runner = ParallelPickOtusBlast(
        cluster_jobs_fp=opts.cluster_jobs_fp,
        jobs_to_start=opts.jobs_to_start,
        retain_temp_files=opts.retain_temp_files,
        suppress_polling=opts.suppress_polling,
        seconds_to_sleep=opts.seconds_to_sleep)
    parallel_runner(opts.input_fasta_fp,
                    opts.output_dir,
                    params,
                    job_prefix=opts.job_prefix,
                    poll_directly=opts.poll_directly,
                    suppress_submit_jobs=opts.suppress_submit_jobs)
Beispiel #38
0
def main():
    option_parser, opts, args =\
        parse_command_line_parameters(**script_info)

    centroid_seqs = \
        [parse_fasta(open(e, 'U')) for e in opts.centroid_fps]
    singleton_seqs = \
        [parse_fasta(open(e, 'U')) for e in opts.singleton_fps]
    fasta_seqs = \
        [parse_fasta(open(e, 'U')) for e in opts.fasta_fps]
    denoiser_map_fs = \
        [open(e, 'U') for e in opts.denoiser_map_fps]
    output_fasta_fp = opts.output_fasta_fp

    output_f = open(opts.output_fasta_fp, 'w')
    for s in inflate_denoiser_output(chain.from_iterable(centroid_seqs),
                                     chain.from_iterable(singleton_seqs),
                                     chain.from_iterable(denoiser_map_fs),
                                     chain.from_iterable(fasta_seqs)):
        output_f.write('>%s\n%s\n' % s)
    output_f.close()
Beispiel #39
0
def main():
    option_parser, opts, args =\
       parse_command_line_parameters(**script_info)

    otu_files = map(open, opts.otu_map_fps)
    failures_fp = opts.failures_fp
    output_fp = opts.output_fp
    if failures_fp:
        failures_f = open(failures_fp, 'U')
    else:
        failures_f = None

    try:
        result = map_otu_map_files(otu_files, failures_file=failures_f)
    except KeyError, e:
        print(
            'Some keys do not map (' + str(e) + ') -- is the order of'
            ' your OTU maps equivalent to the order in which the OTU pickers'
            ' were run? If expanding a failures file, did you remember to leave'
            ' out the otu map from the run which generated the failures file?')
        exit(1)
Beispiel #40
0
def main():
    option_parser, opts, args =\
        parse_command_line_parameters(suppress_verbose=True, **script_info)

    qual_fp = opts.qual_fp
    output_dir = opts.output_dir
    score_min = int(opts.score_min)
    verbose = opts.verbose

    create_dir(output_dir)

    if qual_fp.endswith('.fastq') or qual_fp.endswith('.fastq.gz'):
        qual_parser = parse_fastq_qual_score
    else:
        qual_parser = parse_qual_score

    generate_histogram(qual_fp,
                       output_dir,
                       score_min,
                       verbose,
                       qual_parser=qual_parser)
Beispiel #41
0
def main():
    option_parser, opts, args = parse_command_line_parameters(**script_info)

    input_path = opts.input_path
    out_path = opts.out_path
    output_CSS_statistics = opts.output_CSS_statistics
    DESeq_negatives_to_zero = opts.DESeq_negatives_to_zero
    algorithm = opts.algorithm
    list_algorithms = opts.list_algorithms

    if list_algorithms:
        print 'Available normalization algorithms are:\n%s' % ', '.join(
            algorithm_list())
    else:
        almost_required_options = ['input_path', 'out_path']
        for option in almost_required_options:
            if getattr(opts, option) is None:
                option_parser.error('Required option --%s omitted.' % option)
        if algorithm == 'CSS':
            if os.path.isdir(input_path):
                multiple_file_normalize_CSS(input_path, out_path,
                                            output_CSS_statistics)
            elif os.path.isfile(input_path):
                normalize_CSS(input_path, out_path, output_CSS_statistics)
            else:
                # it shouldn't be possible to get here
                option_parser.error("Unknown input type: %s" % input_path)
        elif algorithm == 'DESeq2':
            if os.path.isdir(input_path):
                multiple_file_normalize_DESeq2(input_path, out_path,
                                               DESeq_negatives_to_zero)
            elif os.path.isfile(input_path):
                normalize_DESeq2(input_path, out_path, DESeq_negatives_to_zero)
            else:
                # it shouldn't be possible to get here
                option_parser.error("Unknown input type: %s" % input_path)
        else:
            # it shouldn't be possible to get here
            option_parser.error("Unknown normalization algorithm: %s" %
                                algorithm)
Beispiel #42
0
def main():
    option_parser, opts, args =\
        parse_command_line_parameters(**script_info)

    if opts.submit_jobs and not opts.make_jobs:
        option_parser.error('Must pass -m if passing -s. (Sorry about this, '
                            'it\'s for backwards-compatibility.)')

    min_args = 2
    if len(args) != min_args:
        option_parser.error('Program requires <commands file> and '
                            '<job prefix>')

    if (len(args[1]) > 10 or len(args[1]) == 0):
        option_parser.error('job prefix must be 1-10 characters long')

    commands = list(open(args[0]))
    job_prefix = args[1]

    if(not exists(opts.job_dir)):
        try:
            makedirs(opts.job_dir)
        except OSError:
            exit(" Jobs directory can not be created. "
                 "Check for permissions or file with the same name: %s\n"
                 % opts.job_dir)

    if (opts.make_jobs):
        filenames = make_jobs(
            commands,
            job_prefix,
            opts.queue,
            opts.job_dir,
            (str(opts.max_walltime) + ":00:00"),
            opts.cpus,
            opts.nodes)
    else:
        exit("Should we ever get here???")
    if (opts.submit_jobs):
        submit_jobs(filenames, opts.verbose)
def main():
    option_parser, opts, args = parse_command_line_parameters(**script_info)

    input_dirs = opts.input_dirs.split(',')
    assignment_methods = opts.assignment_methods.split(',')

    confidences = opts.confidences
    if confidences is not None:
        confidences = map(float, opts.confidences.split(','))

    e_values = opts.e_values
    if e_values is not None:
        e_values = map(float, opts.e_values.split(','))

    if opts.print_only:
        command_handler = print_commands
    else:
        command_handler = call_commands_serially

    if opts.verbose:
        status_update_callback = print_to_stdout
    else:
        status_update_callback = no_status_updates

    assign_taxonomy_multiple_times(
        input_dirs,
        opts.output_dir,
        assignment_methods,
        opts.reference_seqs_fp,
        opts.input_fasta_filename,
        opts.clean_otu_table_filename,
        id_to_taxonomy_fp=opts.id_to_taxonomy_fp,
        confidences=confidences,
        e_values=e_values,
        read_1_seqs_fp=opts.read_1_seqs_fp,
        read_2_seqs_fp=opts.read_2_seqs_fp,
        rdp_max_memory=opts.rdp_max_memory,
        command_handler=command_handler,
        status_update_callback=status_update_callback,
        force=opts.force)
Beispiel #44
0
def main():
    option_parser, opts, args = parse_command_line_parameters(**script_info)

    #Individual parameters are separated by commas
    params_list = opts.params.split(",")
    params = dict()
    for pair in params_list:
        #The value is separated from the parameter by an equals sign
        key_value = pair.split("=")
        params[key_value[0]] = key_value[1]

    if os.path.isdir(opts.input_path):
        multiple_file_manifold(opts.input_path, opts.output_path, opts.algorithm, params)
    elif os.path.isfile(opts.input_path):
        manifold_res_string = compute_manifold(opts.input_path, opts.algorithm, params)

        f = open(opts.output_path, 'w')
        f.write(manifold_res_string)
        f.close()
    else:
        print("io error, check input file path")
        exit(1)
def main():
    option_parser, opts, args = parse_command_line_parameters(**script_info)

    # Create the output dir if it doesn't already exist.
    output_dir = opts.output_dir
    try:
        create_dir(output_dir)
    except:
        option_parser.error("Could not create or access output directory "
                            "specified with the -o/--output_dir option.")

    otu_table_fp = opts.otu_table_fp
    table = load_table(otu_table_fp)

    estimator = ObservationRichnessEstimator(table,
                                             Chao1MultinomialPointEstimator)
    results = estimator(opts.min, opts.max, opts.num_steps,
                        opts.confidence_level)

    out_fp = join(output_dir, 'estimates_table.txt')
    with open(out_fp, 'w') as out_f:
        results.toTable(out_f)
Beispiel #46
0
def main():
    option_parser, opts, args = parse_command_line_parameters(**script_info)

    # create dict of command-line options
    params = eval(str(opts))

    if not opts.step > 0:
        option_parser.error(("Error: step size must be greater than 0.\n"
                             "If min = max, just leave step size at 1."))

    parallel_runner = ParallelMultipleRarefactions(
        cluster_jobs_fp=opts.cluster_jobs_fp,
        jobs_to_start=opts.jobs_to_start,
        retain_temp_files=opts.retain_temp_files,
        suppress_polling=opts.suppress_polling,
        seconds_to_sleep=opts.seconds_to_sleep)
    parallel_runner(opts.input_path,
                    opts.output_path,
                    params,
                    job_prefix=opts.job_prefix,
                    poll_directly=opts.poll_directly,
                    suppress_submit_jobs=opts.suppress_submit_jobs)
Beispiel #47
0
def main():
    option_parser, opts, args =\
        parse_command_line_parameters(**script_info)
    suppress_errors = opts.suppress_errors
    input_fps = opts.input_fps

    output_fp = opts.output_fp

    count_data, total, inaccessible_filepaths = count_seqs_in_filepaths(
        input_fps)
    r = format_output(
        count_data,
        total,
        inaccessible_filepaths,
        suppress_errors)

    if opts.output_fp:
        f = open(output_fp, 'w')
        f.write(r)
        f.close()
    else:
        print r
def main():
    option_parser, opts, args = parse_command_line_parameters(**script_info)

    options, args = option_parser.parse_args()
    if options.debug:
        print "PRODUCING DEBUG OUTPUT"

    bad_seq_ids = set()
    bad_otu_ids = None

    # if we got a file to screen against, find the relevant ids and delete them
    if options.screened_rep_seqs:
        bad_otu_ids = get_first_id(open(options.screened_rep_seqs, 'U'))
        if not options.otus:
            raise RuntimeError(
                "Must specify an OTU file if performing a screen.")
        for line in open(options.otus, 'U'):
            fields = line.split()
            if fields[0] in bad_otu_ids:
                bad_seq_ids.update(fields[1:])

    if options.debug:
        if bad_otu_ids is not None:
            print "Found %s bad otu ids: %s" % (len(bad_otu_ids), bad_otu_ids)
        print "Found %s bad seq ids: %s" % (len(bad_seq_ids), bad_seq_ids)

    ids = get_ids(open(options.in_fasta, 'U'), options.field, bad_seq_ids,
                  options.debug)

    # add empty unassigned ids for file creation
    if 'Unassigned' not in ids:
        ids['Unassigned'] = []

    if not exists(options.outdir):
        makedirs(options.outdir)
    for k, idlist in ids.items():
        outfile = open(join(options.outdir, k + '.txt'), 'w')
        outfile.write('\n'.join(sorted(idlist)))
        outfile.close()
def main():
    option_parser, opts, args =\
       parse_command_line_parameters(**script_info)

    qiime_test_data_dir = opts.qiime_test_data_dir
    qiime_scripts_dir = opts.qiime_scripts_dir
    working_dir = opts.working_dir
    verbose = opts.verbose
    tests = opts.tests
    if tests != None:
        tests = [e.rstrip('/') for e in tests.split(',')]
    failure_log_fp = opts.failure_log_fp

    result_summary, num_failures = run_script_usage_tests(
        qiime_test_data_dir,
        qiime_scripts_dir,
        working_dir,
        verbose=verbose,
        tests=tests,
        failure_log_fp=failure_log_fp)
    if verbose:
        print result_summary
Beispiel #50
0
def main():
    option_parser, opts, args = parse_command_line_parameters(**script_info)

    if opts.print_only:
        command_handler = print_commands
    else:
        command_handler = call_commands_serially

    if opts.verbose:
        status_update_callback = print_to_stdout
    else:
        status_update_callback = no_status_updates

    generate_most_wanted_list(
        opts.output_dir, opts.otu_table_fps, opts.rep_set_fp, opts.gg_fp,
        opts.nt_fp, opts.mapping_fp, opts.mapping_category, opts.top_n,
        opts.min_abundance, opts.max_abundance, opts.min_categories,
        opts.num_categories_to_plot, opts.max_gg_similarity,
        opts.max_nt_similarity, opts.e_value, opts.word_size,
        opts.merged_otu_table_fp, opts.suppress_taxonomic_output,
        opts.jobs_to_start, command_handler, status_update_callback,
        opts.force)
Beispiel #51
0
def main():
    option_parser, opts, args = parse_command_line_parameters(**script_info)

    prefs, data, background_color, label_color, ball_scale, arrow_colors = \
        sample_color_prefs_and_map_data_from_options(opts)

    dir_path = opts.output_dir

    create_dir(dir_path)
    create_dir(os.path.join(dir_path, "otu_network"))
    create_dir(os.path.join(dir_path, "otu_network/props"))
    create_dir(os.path.join(dir_path, "otu_network/stats"))

    map_lines = open(opts.map_fname, 'U').readlines()
    create_network_and_stats(
        dir_path,
        map_lines,
        opts.input_fp,
        prefs,
        data,
        background_color,
        label_color)
def main():
    option_parser, opts, args = parse_command_line_parameters(**script_info)
    input_path = opts.input_path
    out_path = opts.out_path
    algorithm = opts.algorithm
    mapping_fp = opts.mapping_file_path
    mapping_category = opts.mapping_file_category
    subcategory_1 = opts.mapping_file_subcategory_1
    subcategory_2 = opts.mapping_file_subcategory_2
    list_algorithms = opts.list_algorithms
    DESeq2_diagnostic_plots = opts.DESeq2_diagnostic_plots

    if list_algorithms:
        print 'Available differential abundance algorithms are:\n%s' % ', '.join(algorithm_list())
    else:
        almost_required_options = ['input_path', 'out_path', 'mapping_file_path', 'mapping_file_category', 'mapping_file_subcategory_1', 'mapping_file_subcategory_2']
        for option in almost_required_options:
            if getattr(opts, option) is None:
                option_parser.error('Required option --%s omitted.' % option)        
        if algorithm == 'metagenomeSeq_fitZIG':
            if os.path.isdir(input_path):
                multiple_file_DA_fitZIG(input_path, out_path, mapping_fp, mapping_category, subcategory_1, subcategory_2)
            elif os.path.isfile(input_path):
                DA_fitZIG(input_path, out_path, mapping_fp, mapping_category, subcategory_1, subcategory_2)
            else:
                # it shouldn't be possible to get here
                option_parser.error("Unknown input type: %s" % input_path)        
        elif algorithm == 'DESeq2_nbinom':
            if os.path.isdir(input_path):
                multiple_file_DA_DESeq2(input_path, out_path, mapping_fp, mapping_category, subcategory_1, subcategory_2, DESeq2_diagnostic_plots)
            elif os.path.isfile(input_path):
                DA_DESeq2(input_path, out_path, mapping_fp, mapping_category, subcategory_1, subcategory_2, DESeq2_diagnostic_plots)
            else:
                # it shouldn't be possible to get here
                option_parser.error("Unknown input type: %s" % input_path)
        else:
            # it shouldn't be possible to get here
            option_parser.error("Unknown normalization algorithm: %s" % algorithm)
Beispiel #53
0
def main():
    option_parser, opts, args = parse_command_line_parameters(**script_info)
    mapping_fp = opts.mapping_fp
    collapse_fields = opts.collapse_fields.split(',')
    input_biom_fp = opts.input_biom_fp
    collapse_mode = opts.collapse_mode
    output_biom_fp = opts.output_biom_fp
    output_mapping_fp = opts.output_mapping_fp
    normalize = opts.normalize

    collapsed_metadata, collapsed_table = \
        collapse_samples(load_table(input_biom_fp),
                         open(mapping_fp, 'U'),
                         collapse_fields,
                         collapse_mode)

    if normalize:
        collapsed_table.norm(axis='sample', inplace=True)

    write_biom_table(collapsed_table, output_biom_fp)
    output_map_lines = mapping_lines_from_collapsed_df(collapsed_metadata)
    with open(output_mapping_fp, 'w') as output_mapping_f:
        output_mapping_f.write('\n'.join(output_map_lines))
Beispiel #54
0
def main():
    option_parser, opts, args = parse_command_line_parameters(**script_info)

    if opts.output_path is not None:
        outf = open(opts.output_path, 'w')
    else:
        outf = sys.stdout

    dists = parse_distmat(open(opts.input_path, 'U'))
    map_data = parse_mapping_file_to_dict(open(opts.map, 'U'))
    diff_dists, same_dists = clust_qual_ratio(dists, map_data, opts.category)

    if opts.short:
        print >> outf, numpy.mean(diff_dists) / numpy.mean(same_dists)
    else:
        print >> outf, "dissimilarity ratio between/within (large for clustered data):"
        print >> outf, numpy.mean(diff_dists) / numpy.mean(same_dists)
        print >> outf, "dissimilarities between clusters: mean, std, num:"
        print >> outf, '\t'.join(map(str, [numpy.mean(diff_dists), numpy.std(diff_dists),
                                           len(diff_dists)]))
        print >> outf, "dissimilarities within clusters: mean, std, num:"
        print >> outf, '\t'.join(map(str, [numpy.mean(same_dists), numpy.std(same_dists),
                                           len(same_dists)]))
Beispiel #55
0
def main():
    option_parser, opts, args =\
        parse_command_line_parameters(**script_info)

    if opts.attempt_read_reorientation:
        if not opts.mapping_fp:
            option_parser.error(
                "To use --attempt_read_reorientation, one must "
                "supply a mapping file that contains both LinkerPrimerSequence "
                "and ReversePrimer columns.")
    if opts.input_type == "barcode_paired_end":
        if not opts.fastq2:
            option_parser.error(
                "To use input_type of barcode_paired_end, "
                "a second fastq file must be specified with --fastq2")

    if not opts.fastq2:
        disable_header_match = True
    else:
        disable_header_match = opts.disable_header_match

    fastq1 = qiime_open(opts.fastq1)
    if opts.fastq2:
        fastq2 = qiime_open(opts.fastq2)
    else:
        fastq2 = None
    create_dir(opts.output_dir)
    if opts.mapping_fp:
        map_fp = qiime_open(opts.mapping_fp)
    else:
        map_fp = None

    extract_barcodes(fastq1, fastq2, opts.output_dir, opts.input_type,
                     opts.bc1_len, opts.bc2_len, opts.rev_comp_bc1,
                     opts.rev_comp_bc2, opts.char_delineator,
                     opts.switch_bc_order, map_fp,
                     opts.attempt_read_reorientation, disable_header_match)
Beispiel #56
0
def main():
    option_parser, opts, args =\
       parse_command_line_parameters(**script_info)

    test_dir = abspath(dirname(__file__))

    unittest_good_pattern = re.compile('OK\s*$')
    application_not_found_pattern = re.compile('ApplicationNotFoundError')
    python_name = 'python'
    bad_tests = []
    missing_application_tests = []

    # Run through all of FastUnifrac's unit tests, and keep track of any files
    # which fail unit tests.
    unittest_names = []

    for root, dirs, files in walk(test_dir):
        for name in files:
            if name.startswith('test_') and name.endswith('.py'):
                unittest_names.append(join(root, name))

    unittest_names.sort()

    for unittest_name in unittest_names:
        print "Testing %s:\n" % unittest_name
        command = '%s %s -v' % (python_name, unittest_name)
        result = Popen(command,shell=True,universal_newlines=True,\
                       stdout=PIPE,stderr=STDOUT).stdout.read()
        print result
        if not unittest_good_pattern.search(result):
            if application_not_found_pattern.search(result):
                missing_application_tests.append(unittest_name)
            else:
                bad_tests.append(unittest_name)

    if bad_tests:
        print "\nFailed the following unit tests.\n%s" % '\n'.join(bad_tests)
def main():
    option_parser, opts, args = parse_command_line_parameters(**script_info)
    input_path = opts.input_path
    output_path = opts.output_path

    if isdir(input_path):
        # Run PCoA on all distance matrices in the input dir
        # Create the output directory if it does not exists
        if not exists(output_path):
            makedirs(output_path)

        # Get all the filenames present in the input directory
        file_names = [fname for fname in listdir(input_path)
                      if not (fname.startswith('.') or isdir(fname))]

        # Loop through all the input files
        for fname in file_names:
            # Get the path to the input distance matrix
            infile = join(input_path, fname)

            # Run PCoA on the input distance matrix
            with open(infile, 'U') as lines:
                pcoa_scores = pcoa(lines)

            # Store the PCoA results on the output directory
            base_fname, ext = splitext(fname)
            out_file = join(output_path, 'pcoa_%s.txt' % base_fname)
            with open(out_file, 'w') as f:
                pcoa_scores.to_file(f)

    else:
        # Run PCoA on the input distance matrix
        with open(input_path, 'U') as f:
            pcoa_scores = pcoa(f)
        # Store the results in the output file
        with open(output_path, 'w') as f:
            pcoa_scores.to_file(f)
def main():
    option_parser, opts, args = parse_command_line_parameters(**script_info)
    verbose = opts.verbose

    input_fasta_fp = opts.input_fasta_fp
    input_tree_fp = opts.input_tree_fp
    output_dir = opts.output_dir
    run_id = opts.run_id
    similarity_thresholds = map(int, opts.similarity_thresholds.split(','))

    verbose = opts.verbose
    print_only = opts.print_only

    try:
        makedirs(output_dir)
    except OSError:
        print "Output directory already exists. Please choose "+\
         "a different directory, or force overwrite with -f."
        exit(1)

    if print_only:
        command_handler = print_commands
    else:
        command_handler = call_commands_serially

    if verbose:
        status_update_callback = print_to_stdout
    else:
        status_update_callback = no_status_updates

    pick_nested_reference_otus(input_fasta_fp=input_fasta_fp,
                               input_tree_fp=input_tree_fp,
                               output_dir=output_dir,
                               run_id=run_id,
                               similarity_thresholds=similarity_thresholds,
                               command_handler=command_handler,
                               status_update_callback=status_update_callback)
def main():
    option_parser, opts, args = parse_command_line_parameters(**script_info)

    if not (opts.reference_seqs_fp or opts.blast_db):
        option_parser.error('Either a blast db (via -b) or a collection of '
                            'reference sequences (via -r) must be passed to '
                            'assign taxonomy using blast.')

    # create dict of command-line options
    params = eval(str(opts))

    parallel_runner = ParallelBlastTaxonomyAssigner(
        cluster_jobs_fp=opts.cluster_jobs_fp,
        jobs_to_start=opts.jobs_to_start,
        retain_temp_files=opts.retain_temp_files,
        suppress_polling=opts.suppress_polling,
        seconds_to_sleep=opts.seconds_to_sleep)

    parallel_runner(opts.input_fasta_fp,
                    opts.output_dir,
                    params,
                    job_prefix=opts.job_prefix,
                    poll_directly=opts.poll_directly,
                    suppress_submit_jobs=False)
Beispiel #60
0
def main():
    option_parser, opts, args = parse_command_line_parameters(**script_info)

    exclude_otus_fp = opts.exclude_otus_fp

    outfile = open(opts.output_biom_fp, 'w')

    if not opts.taxonomy_fname:
        otu_to_taxonomy = None
    else:
        infile = open(opts.taxonomy_fname, 'U')
        otu_to_taxonomy = parse_taxonomy(infile)

    ids_to_exclude = []
    if exclude_otus_fp:
        if splitext(exclude_otus_fp)[1] in ('.fasta', '.fna'):
            ids_to_exclude = \
                get_seq_ids_from_fasta_file(open(exclude_otus_fp, 'U'))
        else:
            ids_to_exclude = \
                get_seq_ids_from_seq_id_file(open(exclude_otus_fp, 'U'))
    biom_otu_table = make_otu_table(open(opts.otu_map_fp, 'U'),
                                    otu_to_taxonomy, ids_to_exclude)
    outfile.write(biom_otu_table)