Beispiel #1
0
def get_sorted_counts_per_sample(biom_table, reverse=False):
    """gets a sorted list of sequences per sample from min to max
    
    inputs:
    biom_table: biom table object
    revers: reverse the ordering value i. e. from max to min
    
    outputs:
    sorted_counts_per_sample: list of tuples sorted on first element which
    gives [(seqs/sample, sampleId)... ]
    """

    sample_counts = compute_seqs_per_library_stats(biom_table)[4]

    sorted_counts_per_sample = [(v, k) for k, v in sample_counts.items()]
    sorted_counts_per_sample.sort()

    if reverse:
        sorted_counts_per_sample.reverse()

    return sorted_counts_per_sample
def get_sorted_counts_per_sample(biom_table, reverse=False):
    """gets a sorted list of sequences per sample from min to max
    
    inputs:
    biom_table: biom table object
    revers: reverse the ordering value i. e. from max to min
    
    outputs:
    sorted_counts_per_sample: list of tuples sorted on first element which
    gives [(seqs/sample, sampleId)... ]
    """

    sample_counts = compute_seqs_per_library_stats(biom_table)[4]
    
    sorted_counts_per_sample = [(v,k) for k,v in sample_counts.items()]
    sorted_counts_per_sample.sort()
    
    if reverse:
        sorted_counts_per_sample.reverse()

    return sorted_counts_per_sample
Beispiel #3
0
def main():
    option_parser, opts, args = parse_command_line_parameters(**script_info)
    otu_table_fp = opts.otu_table_fp
    otu_table = parse_biom_table(qiime_open(otu_table_fp))
    min_counts, max_counts, median_counts, mean_counts, counts_per_sample = compute_seqs_per_library_stats(
        otu_table, opts.num_otus
    )
    num_otus = len(otu_table.ObservationIds)

    counts_per_sample_values = counts_per_sample.values()
    med_abs_dev = median_absolute_deviation(counts_per_sample_values)[0]
    even_sampling_depth = guess_even_sampling_depth(counts_per_sample_values)

    num_samples = len(counts_per_sample)
    print "Num samples: %s" % str(num_samples)
    print "Num otus: %s" % str(num_otus)
    if not opts.num_otus:
        num_observations = sum(counts_per_sample_values)
        print "Num observations (sequences): %s" % str(num_observations)
        # port denisty functionality to a tested function. the following is broken (should be
        # count of non-zero cells rather than number of observations in the numerator)
        # print 'Table density (fraction of non-zero values): %1.4f' % (num_observations/(num_samples * num_otus))
    print

    if opts.num_otus:
        print "OTUs/sample summary:"
    else:
        print "Seqs/sample summary:"
    print " Min: %s" % str(min_counts)
    print " Max: %s" % str(max_counts)
    print " Median: %s" % str(median_counts)
    print " Mean: %s" % str(mean_counts)
    print " Std. dev.: %s" % (str(std(counts_per_sample_values)))
    print " Median Absolute Deviation: %s" % str(med_abs_dev)
    print " Default even sampling depth in\n  core_qiime_analyses.py (just a suggestion): %s" % str(even_sampling_depth)
    print ""
    if opts.num_otus:
        print "OTUs/sample detail:"
    else:
        print "Seqs/sample detail:"
    sorted_counts_per_sample = [(v, k) for k, v in counts_per_sample.items()]
    sorted_counts_per_sample.sort()
    total_count = 0
    for v, k in sorted_counts_per_sample:
        total_count += v
        print " %s: %s" % (k, str(v))

    if opts.mapping_fp:
        if not opts.output_mapping_fp:
            raise RuntimeError("input mapping file supplied, but no path to" + " output file")
        f = open(opts.mapping_fp, "U")
        mapping_lines, headers, comments = parse_mapping_file(f)
        f.close()
        if len(headers) == 1:
            endoffset = 0  # if we only have the sample id, this data -> last col
        else:
            endoffset = 1  # usually make this data the penultimate column.
        headers.insert(len(headers) - endoffset, "NumIndividuals")
        for map_line in mapping_lines:
            sample_id = map_line
            try:
                depth = str(counts_per_sample[map_line[0]])
            except KeyError:
                depth = "na"
            map_line.insert(len(map_line) - endoffset, depth)

        new_map_str = format_mapping_file(headers, mapping_lines, comments)
        f = open(opts.output_mapping_fp, "w")
        f.write(new_map_str)
        f.close()
Beispiel #4
0
def main():
    option_parser, opts,args = parse_command_line_parameters(**script_info)
    otu_table_fp = opts.otu_table_fp
    otu_table = parse_biom_table(qiime_open(otu_table_fp))
    min_counts, max_counts, median_counts, mean_counts, counts_per_sample =\
     compute_seqs_per_library_stats(otu_table, opts.num_otus)
    num_otus = len(otu_table.ObservationIds)
    
    counts_per_sample_values = counts_per_sample.values()
    med_abs_dev = median_absolute_deviation(counts_per_sample_values)[0]
    even_sampling_depth = guess_even_sampling_depth(counts_per_sample_values)
    
    try:
        sample_md_keys = otu_table.SampleMetadata[0].keys()
    except TypeError:
        sample_md_keys = ["None provided"]
    try:
        observation_md_keys = otu_table.ObservationMetadata[0].keys()
    except TypeError:
        observation_md_keys = ["None provided"]
    
    num_samples = len(counts_per_sample)
    print 'Num samples: %s' % str(num_samples)
    print 'Num otus: %s' % str(num_otus)
    if not opts.num_otus:
        num_observations = sum(counts_per_sample_values)
        print 'Num observations (sequences): %s' % str(num_observations)
        print 'Table density (fraction of non-zero values): %1.4f' % \
              otu_table.getTableDensity()
    print

    if opts.num_otus:
        print 'OTUs/sample summary:'
    else:
        print 'Seqs/sample summary:' 
    print ' Min: %s' % str(min_counts)
    print ' Max: %s' % str(max_counts)
    print ' Median: %s' % str(median_counts)
    print ' Mean: %s' % str(mean_counts)
    print ' Std. dev.: %s' % (str(std(counts_per_sample_values)))
    print ' Median Absolute Deviation: %s' % str(med_abs_dev)
    print ' Default even sampling depth in\n  core_qiime_analyses.py (just a suggestion): %s' %\
     str(even_sampling_depth)
    print ' Sample Metadata Categories: %s' % '; '.join(sample_md_keys)
    print ' Observation Metadata Categories: %s' % '; '.join(observation_md_keys)
     
    print ''
    if opts.num_otus:
        print 'OTUs/sample detail:'
    else:
        print 'Seqs/sample detail:'
    sorted_counts_per_sample = [(v,k) for k,v in counts_per_sample.items()]
    sorted_counts_per_sample.sort()
    total_count = 0
    for v,k in sorted_counts_per_sample:
        total_count += v
        print ' %s: %s' % (k,str(v))

    if opts.mapping_fp:
        if not opts.output_mapping_fp:
            raise RuntimeError('input mapping file supplied, but no path to'+\
             ' output file')
        f = open(opts.mapping_fp,'U')
        mapping_lines, headers, comments = parse_mapping_file(f)
        f.close()
        if len(headers)==1:
            endoffset = 0 # if we only have the sample id, this data -> last col
        else:
            endoffset = 1 # usually make this data the penultimate column.
        headers.insert(len(headers)-endoffset,'SequenceCount')
        for map_line in mapping_lines:
            sample_id = map_line
            try:
                depth = str(counts_per_sample[map_line[0]])
            except KeyError:
                depth = 'na'
            map_line.insert(len(map_line)-endoffset,depth)

        new_map_str = format_mapping_file(headers, mapping_lines, comments)
        f = open(opts.output_mapping_fp, 'w')
        f.write(new_map_str)
        f.close()
Beispiel #5
0
def main():
    option_parser, opts, args = parse_command_line_parameters(**script_info)
    otu_table_fp = opts.otu_table_fp
    otu_table = parse_biom_table(qiime_open(otu_table_fp))
    min_counts, max_counts, median_counts, mean_counts, counts_per_sample =\
     compute_seqs_per_library_stats(otu_table, opts.num_otus)
    num_otus = len(otu_table.ObservationIds)

    counts_per_sample_values = counts_per_sample.values()
    med_abs_dev = median_absolute_deviation(counts_per_sample_values)[0]
    even_sampling_depth = guess_even_sampling_depth(counts_per_sample_values)

    num_samples = len(counts_per_sample)
    print 'Num samples: %s' % str(num_samples)
    print 'Num otus: %s' % str(num_otus)
    if not opts.num_otus:
        num_observations = sum(counts_per_sample_values)
        print 'Num observations (sequences): %s' % str(num_observations)
        # port denisty functionality to a tested function. the following is broken (should be
        # count of non-zero cells rather than number of observations in the numerator)
        #print 'Table density (fraction of non-zero values): %1.4f' % (num_observations/(num_samples * num_otus))
    print

    if opts.num_otus:
        print 'OTUs/sample summary:'
    else:
        print 'Seqs/sample summary:'
    print ' Min: %s' % str(min_counts)
    print ' Max: %s' % str(max_counts)
    print ' Median: %s' % str(median_counts)
    print ' Mean: %s' % str(mean_counts)
    print ' Std. dev.: %s' % (str(std(counts_per_sample_values)))
    print ' Median Absolute Deviation: %s' % str(med_abs_dev)
    print ' Default even sampling depth in\n  core_qiime_analyses.py (just a suggestion): %s' %\
     str(even_sampling_depth)
    print ''
    if opts.num_otus:
        print 'OTUs/sample detail:'
    else:
        print 'Seqs/sample detail:'
    sorted_counts_per_sample = [(v, k) for k, v in counts_per_sample.items()]
    sorted_counts_per_sample.sort()
    total_count = 0
    for v, k in sorted_counts_per_sample:
        total_count += v
        print ' %s: %s' % (k, str(v))

    if opts.mapping_fp:
        if not opts.output_mapping_fp:
            raise RuntimeError('input mapping file supplied, but no path to'+\
             ' output file')
        f = open(opts.mapping_fp, 'U')
        mapping_lines, headers, comments = parse_mapping_file(f)
        f.close()
        if len(headers) == 1:
            endoffset = 0  # if we only have the sample id, this data -> last col
        else:
            endoffset = 1  # usually make this data the penultimate column.
        headers.insert(len(headers) - endoffset, 'NumIndividuals')
        for map_line in mapping_lines:
            sample_id = map_line
            try:
                depth = str(counts_per_sample[map_line[0]])
            except KeyError:
                depth = 'na'
            map_line.insert(len(map_line) - endoffset, depth)

        new_map_str = format_mapping_file(headers, mapping_lines, comments)
        f = open(opts.output_mapping_fp, 'w')
        f.write(new_map_str)
        f.close()
Beispiel #6
0
def run_alpha_rarefaction(otu_table_fp,
                          mapping_fp,
                          output_dir,
                          command_handler,
                          params,
                          qiime_config,
                          tree_fp=None,
                          num_steps=10,
                          parallel=False,
                          logger=None,
                          min_rare_depth=10,
                          max_rare_depth=None,
                          suppress_md5=False,
                          status_update_callback=print_to_stdout,
                          plot_stderr_and_stddev=False):
    """ Run the data preparation steps of Qiime 
    
        The steps performed by this function are:
          1) Generate rarefied OTU tables;
          2) Compute alpha diversity metrics for each rarefied OTU table;
          3) Collate alpha diversity results;
          4) Generate alpha rarefaction plots.
    
    """
    # Prepare some variables for the later steps
    otu_table_dir, otu_table_filename = split(otu_table_fp)
    otu_table_basename, otu_table_ext = splitext(otu_table_filename)
    create_dir(output_dir)
    commands = []
    python_exe_fp = qiime_config['python_exe_fp']
    script_dir = get_qiime_scripts_dir()
    if logger == None:
        logger = WorkflowLogger(generate_log_fp(output_dir),
                                params=params,
                                qiime_config=qiime_config)
        close_logger_on_success = True
    else:
        close_logger_on_success = False

    if not suppress_md5:
        log_input_md5s(logger, [otu_table_fp, mapping_fp, tree_fp])

    if max_rare_depth == None:
        min_count, max_count, median_count, mean_count, counts_per_sample =\
         compute_seqs_per_library_stats(parse_biom_table(open(otu_table_fp,'U')))
        max_rare_depth = median_count
    step = int((max_rare_depth - min_rare_depth) / num_steps) or 1
    max_rare_depth = int(max_rare_depth)

    rarefaction_dir = '%s/rarefaction/' % output_dir
    create_dir(rarefaction_dir)
    try:
        params_str = get_params_str(params['multiple_rarefactions'])
    except KeyError:
        params_str = ''
    if parallel:
        params_str += ' %s' % get_params_str(params['parallel'])
        # Build the rarefaction command
        rarefaction_cmd = \
         '%s %s/parallel_multiple_rarefactions.py -T -i %s -m %s -x %s -s %s -o %s %s' %\
         (python_exe_fp, script_dir, otu_table_fp, min_rare_depth, max_rare_depth,
          step, rarefaction_dir, params_str)
    else:
        # Build the rarefaction command
        rarefaction_cmd = \
         '%s %s/multiple_rarefactions.py -i %s -m %s -x %s -s %s -o %s %s' %\
         (python_exe_fp, script_dir, otu_table_fp, min_rare_depth, max_rare_depth,
          step, rarefaction_dir, params_str)
    commands.append([('Alpha rarefaction', rarefaction_cmd)])

    # Prep the alpha diversity command
    alpha_diversity_dir = '%s/alpha_div/' % output_dir
    create_dir(alpha_diversity_dir)
    try:
        params_str = get_params_str(params['alpha_diversity'])
    except KeyError:
        params_str = ''
    if tree_fp:
        params_str += ' -t %s' % tree_fp
    if parallel:
        params_str += ' %s' % get_params_str(params['parallel'])
        # Build the alpha diversity command
        alpha_diversity_cmd = \
         "%s %s/parallel_alpha_diversity.py -T -i %s -o %s %s" %\
         (python_exe_fp, script_dir, rarefaction_dir, alpha_diversity_dir,
          params_str)
    else:
        # Build the alpha diversity command
        alpha_diversity_cmd = \
         "%s %s/alpha_diversity.py -i %s -o %s %s" %\
         (python_exe_fp, script_dir, rarefaction_dir, alpha_diversity_dir,
          params_str)

    commands.append(\
     [('Alpha diversity on rarefied OTU tables',alpha_diversity_cmd)])

    # Prep the alpha diversity collation command
    alpha_collated_dir = '%s/alpha_div_collated/' % output_dir
    create_dir(alpha_collated_dir)
    try:
        params_str = get_params_str(params['collate_alpha'])
    except KeyError:
        params_str = ''
    # Build the alpha diversity collation command
    alpha_collated_cmd = '%s %s/collate_alpha.py -i %s -o %s %s' %\
     (python_exe_fp, script_dir, alpha_diversity_dir, \
      alpha_collated_dir, params_str)
    commands.append([('Collate alpha', alpha_collated_cmd)])

    # Prep the make rarefaction plot command(s)
    try:
        params_str = get_params_str(params['make_rarefaction_plots'])
    except KeyError:
        params_str = ''

    if 'std_type' in params[
            'make_rarefaction_plots'] or not plot_stderr_and_stddev:
        rarefaction_plot_dir = '%s/alpha_rarefaction_plots/' % output_dir
        create_dir(rarefaction_plot_dir)

        # Build the make rarefaction plot command(s)
        #for metric in alpha_diversity_metrics:
        make_rarefaction_plot_cmd =\
             '%s %s/make_rarefaction_plots.py -i %s -m %s -o %s %s' %\
             (python_exe_fp, script_dir, alpha_collated_dir, mapping_fp,
              rarefaction_plot_dir, params_str)
        commands.append(\
             [('Rarefaction plot: %s' % 'All metrics',make_rarefaction_plot_cmd)])
    else:
        rarefaction_plot_dir_stddev = '%s/alpha_rarefaction_plots_stddev/' % output_dir
        rarefaction_plot_dir_stderr = '%s/alpha_rarefaction_plots_stderr/' % output_dir
        create_dir(rarefaction_plot_dir_stddev)
        create_dir(rarefaction_plot_dir_stderr)

        # Build the make rarefaction plot command(s)
        # for metric in alpha_diversity_metrics:
        make_rarefaction_plot_cmd =\
             '%s %s/make_rarefaction_plots.py -i %s -m %s -o %s %s --std_type stddev' %\
             (python_exe_fp, script_dir, alpha_collated_dir, mapping_fp,
              rarefaction_plot_dir_stddev, params_str)
        commands.append(\
             [('Rarefaction plot: %s' % 'All metrics',make_rarefaction_plot_cmd)])
        make_rarefaction_plot_cmd =\
             '%s %s/make_rarefaction_plots.py -i %s -m %s -o %s %s --std_type stderr' %\
             (python_exe_fp, script_dir, alpha_collated_dir, mapping_fp,
              rarefaction_plot_dir_stderr, params_str)
        commands.append(\
             [('Rarefaction plot: %s' % 'All metrics',make_rarefaction_plot_cmd)])

    # Call the command handler on the list of commands
    command_handler(commands,
                    status_update_callback,
                    logger=logger,
                    close_logger_on_success=close_logger_on_success)
Beispiel #7
0
def run_alpha_rarefaction(otu_table_fp, 
                          mapping_fp,
                          output_dir,
                          command_handler,
                          params,
                          qiime_config,
                          tree_fp=None,
                          num_steps=10,
                          parallel=False,
                          logger=None,
                          min_rare_depth=10,
                          max_rare_depth=None,
                          suppress_md5=False,
                          status_update_callback=print_to_stdout,
                          plot_stderr_and_stddev=False):
    """ Run the data preparation steps of Qiime 
    
        The steps performed by this function are:
          1) Generate rarefied OTU tables;
          2) Compute alpha diversity metrics for each rarefied OTU table;
          3) Collate alpha diversity results;
          4) Generate alpha rarefaction plots.
    
    """
    # Prepare some variables for the later steps
    otu_table_dir, otu_table_filename = split(otu_table_fp)
    otu_table_basename, otu_table_ext = splitext(otu_table_filename)
    create_dir(output_dir)
    commands = []
    python_exe_fp = qiime_config['python_exe_fp']
    script_dir = get_qiime_scripts_dir()
    if logger == None:
        logger = WorkflowLogger(generate_log_fp(output_dir),
                                params=params,
                                qiime_config=qiime_config)
        close_logger_on_success = True
    else:
        close_logger_on_success = False

    if not suppress_md5:
        log_input_md5s(logger,[otu_table_fp,mapping_fp,tree_fp])
    
    if max_rare_depth == None:
        min_count, max_count, median_count, mean_count, counts_per_sample =\
         compute_seqs_per_library_stats(parse_biom_table(open(otu_table_fp,'U')))
        max_rare_depth = median_count
    step = int((max_rare_depth - min_rare_depth) / num_steps) or 1
    max_rare_depth = int(max_rare_depth)
    
    rarefaction_dir = '%s/rarefaction/' % output_dir
    create_dir(rarefaction_dir)
    try:
        params_str = get_params_str(params['multiple_rarefactions'])
    except KeyError:
        params_str = ''
    if parallel:
        params_str += ' %s' % get_params_str(params['parallel'])        
        # Build the rarefaction command
        rarefaction_cmd = \
         '%s %s/parallel_multiple_rarefactions.py -T -i %s -m %s -x %s -s %s -o %s %s' %\
         (python_exe_fp, script_dir, otu_table_fp, min_rare_depth, max_rare_depth,
          step, rarefaction_dir, params_str)
    else:
        # Build the rarefaction command
        rarefaction_cmd = \
         '%s %s/multiple_rarefactions.py -i %s -m %s -x %s -s %s -o %s %s' %\
         (python_exe_fp, script_dir, otu_table_fp, min_rare_depth, max_rare_depth,
          step, rarefaction_dir, params_str)
    commands.append([('Alpha rarefaction', rarefaction_cmd)])
    
    # Prep the alpha diversity command
    alpha_diversity_dir = '%s/alpha_div/' % output_dir
    create_dir(alpha_diversity_dir)
    try:
        params_str = get_params_str(params['alpha_diversity'])
    except KeyError:
        params_str = ''
    if tree_fp:
        params_str += ' -t %s' % tree_fp
    if parallel:
        params_str += ' %s' % get_params_str(params['parallel'])   
        # Build the alpha diversity command
        alpha_diversity_cmd = \
         "%s %s/parallel_alpha_diversity.py -T -i %s -o %s %s" %\
         (python_exe_fp, script_dir, rarefaction_dir, alpha_diversity_dir,
          params_str)
    else:  
        # Build the alpha diversity command
        alpha_diversity_cmd = \
         "%s %s/alpha_diversity.py -i %s -o %s %s" %\
         (python_exe_fp, script_dir, rarefaction_dir, alpha_diversity_dir,
          params_str)

    commands.append(\
     [('Alpha diversity on rarefied OTU tables',alpha_diversity_cmd)])
     
    # Prep the alpha diversity collation command
    alpha_collated_dir = '%s/alpha_div_collated/' % output_dir
    create_dir(alpha_collated_dir)
    try:
        params_str = get_params_str(params['collate_alpha'])
    except KeyError:
        params_str = ''
    # Build the alpha diversity collation command
    alpha_collated_cmd = '%s %s/collate_alpha.py -i %s -o %s %s' %\
     (python_exe_fp, script_dir, alpha_diversity_dir, \
      alpha_collated_dir, params_str)
    commands.append([('Collate alpha',alpha_collated_cmd)])

    # Prep the make rarefaction plot command(s)
    try:
        params_str = get_params_str(params['make_rarefaction_plots'])
    except KeyError:
        params_str = ''
    
    if 'std_type' in params['make_rarefaction_plots'] or not plot_stderr_and_stddev:
        rarefaction_plot_dir = '%s/alpha_rarefaction_plots/' % output_dir
        create_dir(rarefaction_plot_dir)
        
        # Build the make rarefaction plot command(s)
        #for metric in alpha_diversity_metrics:
        make_rarefaction_plot_cmd =\
             '%s %s/make_rarefaction_plots.py -i %s -m %s -o %s %s' %\
             (python_exe_fp, script_dir, alpha_collated_dir, mapping_fp,
              rarefaction_plot_dir, params_str)
        commands.append(\
             [('Rarefaction plot: %s' % 'All metrics',make_rarefaction_plot_cmd)])
    else:
        rarefaction_plot_dir_stddev = '%s/alpha_rarefaction_plots_stddev/' % output_dir
        rarefaction_plot_dir_stderr = '%s/alpha_rarefaction_plots_stderr/' % output_dir
        create_dir(rarefaction_plot_dir_stddev)
        create_dir(rarefaction_plot_dir_stderr)
        
        # Build the make rarefaction plot command(s)
        # for metric in alpha_diversity_metrics:
        make_rarefaction_plot_cmd =\
             '%s %s/make_rarefaction_plots.py -i %s -m %s -o %s %s --std_type stddev' %\
             (python_exe_fp, script_dir, alpha_collated_dir, mapping_fp,
              rarefaction_plot_dir_stddev, params_str)
        commands.append(\
             [('Rarefaction plot: %s' % 'All metrics',make_rarefaction_plot_cmd)])
        make_rarefaction_plot_cmd =\
             '%s %s/make_rarefaction_plots.py -i %s -m %s -o %s %s --std_type stderr' %\
             (python_exe_fp, script_dir, alpha_collated_dir, mapping_fp,
              rarefaction_plot_dir_stderr, params_str)
        commands.append(\
             [('Rarefaction plot: %s' % 'All metrics',make_rarefaction_plot_cmd)])
   
    
    # Call the command handler on the list of commands
    command_handler(commands,
                    status_update_callback,
                    logger=logger,
                    close_logger_on_success=close_logger_on_success)