Exemple #1
0
def get_coefficients(run, key):
    """Given a parsed taxa summary table, will find and return correlation coefficients"""
    pearson_compare = compare_taxa_summaries(run, key, 'paired', 'pearson')
    spearman_compare = compare_taxa_summaries(run, key, 'paired', 'spearman')

    pearson_coeff = pearson_compare[2].split('\n')[-2].split()[0]
    spearman_coeff = spearman_compare[2].split('\n')[-2].split()[0]

    return pearson_coeff, spearman_coeff
def main():
    option_parser, opts, args = parse_command_line_parameters(**script_info)

    if len(opts.taxa_summary_fps) != 2:
        option_parser.error("Exactly two taxa summary files are required. You "
                            "provided %d." % len(opts.taxa_summary_fps))

    # Create the output dir if it doesn't already exist.
    try:
        create_dir(opts.output_dir)
    except:
        option_parser.error("Could not create or access output directory "
                            "specified with the -o option.")

    sample_id_map = None
    if opts.sample_id_map_fp:
        sample_id_map = parse_sample_id_map(open(opts.sample_id_map_fp, 'U'))

    results = compare_taxa_summaries(
        parse_taxa_summary_table(open(opts.taxa_summary_fps[0], 'U')),
        parse_taxa_summary_table(open(opts.taxa_summary_fps[1], 'U')),
        opts.comparison_mode,
        correlation_type=opts.correlation_type,
        tail_type=opts.tail_type,
        num_permutations=opts.num_permutations,
        confidence_level=opts.confidence_level,
        perform_detailed_comparisons=opts.perform_detailed_comparisons,
        sample_id_map=sample_id_map,
        expected_sample_id=opts.expected_sample_id)

    # Write out the sorted and filled taxa summaries, basing their
    # filenames on the original input filenames. If the filenames are the same,
    # append a number to each filename.
    same_filenames = False
    if basename(opts.taxa_summary_fps[0]) == \
       basename(opts.taxa_summary_fps[1]):
        same_filenames = True

    for orig_ts_fp, filled_ts_lines, file_num in zip(opts.taxa_summary_fps,
                                                     results[:2], range(0, 2)):
        filename_suffix = '_sorted_and_filled'
        if same_filenames:
            filename_suffix += '_%d' % file_num
        filled_ts_fp = add_filename_suffix(orig_ts_fp, filename_suffix)
        filled_ts_f = open(join(opts.output_dir, filled_ts_fp), 'w')
        filled_ts_f.write(filled_ts_lines)
        filled_ts_f.close()

    # Write the overall comparison result.
    overall_comp_f = open(join(opts.output_dir, 'overall_comparison.txt'), 'w')
    overall_comp_f.write(results[2])
    overall_comp_f.close()

    # Write the correlation vector containing the pairwise sample comparisons.
    if opts.perform_detailed_comparisons:
        corr_vec_f = open(join(opts.output_dir, 'detailed_comparisons.txt'),
                          'w')
        corr_vec_f.write(results[3])
        corr_vec_f.close()
def main():
    option_parser, opts, args = parse_command_line_parameters(**script_info)

    if len(opts.taxa_summary_fps) != 2:
        option_parser.error("Exactly two taxa summary files are required. You "
                            "provided %d." % len(opts.taxa_summary_fps))

    # Create the output dir if it doesn't already exist.
    try:
        create_dir(opts.output_dir)
    except:
        option_parser.error("Could not create or access output directory "
                            "specified with the -o option.")

    sample_id_map = None
    if opts.sample_id_map_fp:
        sample_id_map = parse_sample_id_map(open(opts.sample_id_map_fp, 'U'))

    results = compare_taxa_summaries(
        parse_taxa_summary_table(open(opts.taxa_summary_fps[0], 'U')),
        parse_taxa_summary_table(open(opts.taxa_summary_fps[1], 'U')),
        opts.comparison_mode, correlation_type=opts.correlation_type,
        tail_type=opts.tail_type, num_permutations=opts.num_permutations,
        confidence_level=opts.confidence_level,
        perform_detailed_comparisons=opts.perform_detailed_comparisons,
        sample_id_map=sample_id_map,
        expected_sample_id=opts.expected_sample_id)

    # Write out the sorted and filled taxa summaries, basing their
    # filenames on the original input filenames. If the filenames are the same,
    # append a number to each filename.
    same_filenames = False
    if basename(opts.taxa_summary_fps[0]) == \
       basename(opts.taxa_summary_fps[1]):
        same_filenames = True

    for orig_ts_fp, filled_ts_lines, file_num in zip(opts.taxa_summary_fps,
                                                     results[:2], range(0, 2)):
        filename_suffix = '_sorted_and_filled'
        if same_filenames:
            filename_suffix += '_%d' % file_num
        filled_ts_fp = add_filename_suffix(orig_ts_fp, filename_suffix)
        filled_ts_f = open(join(opts.output_dir, filled_ts_fp), 'w')
        filled_ts_f.write(filled_ts_lines)
        filled_ts_f.close()

    # Write the overall comparison result.
    overall_comp_f = open(join(opts.output_dir, 'overall_comparison.txt'), 'w')
    overall_comp_f.write(results[2])
    overall_comp_f.close()

    # Write the correlation vector containing the pairwise sample comparisons.
    if opts.perform_detailed_comparisons:
        corr_vec_f = open(join(opts.output_dir,
                               'detailed_comparisons.txt'), 'w')
        corr_vec_f.write(results[3])
        corr_vec_f.close()