Ejemplo n.º 1
0
def main():
    option_parser, opts, args = parse_command_line_parameters(**script_info)

    if len(opts.taxa_summary_fps) != 2:
        option_parser.error("Exactly two taxa summary files are required. You "
                            "provided %d." % len(opts.taxa_summary_fps))

    # Create the output dir if it doesn't already exist.
    try:
        create_dir(opts.output_dir)
    except:
        option_parser.error("Could not create or access output directory "
                            "specified with the -o option.")

    sample_id_map = None
    if opts.sample_id_map_fp:
        sample_id_map = parse_sample_id_map(open(opts.sample_id_map_fp, 'U'))

    results = compare_taxa_summaries(
        parse_taxa_summary_table(open(opts.taxa_summary_fps[0], 'U')),
        parse_taxa_summary_table(open(opts.taxa_summary_fps[1], 'U')),
        opts.comparison_mode,
        correlation_type=opts.correlation_type,
        tail_type=opts.tail_type,
        num_permutations=opts.num_permutations,
        confidence_level=opts.confidence_level,
        perform_detailed_comparisons=opts.perform_detailed_comparisons,
        sample_id_map=sample_id_map,
        expected_sample_id=opts.expected_sample_id)

    # Write out the sorted and filled taxa summaries, basing their
    # filenames on the original input filenames. If the filenames are the same,
    # append a number to each filename.
    same_filenames = False
    if basename(opts.taxa_summary_fps[0]) == \
       basename(opts.taxa_summary_fps[1]):
        same_filenames = True

    for orig_ts_fp, filled_ts_lines, file_num in zip(opts.taxa_summary_fps,
                                                     results[:2], range(0, 2)):
        filename_suffix = '_sorted_and_filled'
        if same_filenames:
            filename_suffix += '_%d' % file_num
        filled_ts_fp = add_filename_suffix(orig_ts_fp, filename_suffix)
        filled_ts_f = open(join(opts.output_dir, filled_ts_fp), 'w')
        filled_ts_f.write(filled_ts_lines)
        filled_ts_f.close()

    # Write the overall comparison result.
    overall_comp_f = open(join(opts.output_dir, 'overall_comparison.txt'), 'w')
    overall_comp_f.write(results[2])
    overall_comp_f.close()

    # Write the correlation vector containing the pairwise sample comparisons.
    if opts.perform_detailed_comparisons:
        corr_vec_f = open(join(opts.output_dir, 'detailed_comparisons.txt'),
                          'w')
        corr_vec_f.write(results[3])
        corr_vec_f.close()
Ejemplo n.º 2
0
def main():
    option_parser, opts, args = parse_command_line_parameters(**script_info)

    if len(opts.taxa_summary_fps) != 2:
        option_parser.error("Exactly two taxa summary files are required. You "
                            "provided %d." % len(opts.taxa_summary_fps))

    # Create the output dir if it doesn't already exist.
    try:
        create_dir(opts.output_dir)
    except:
        option_parser.error("Could not create or access output directory "
                            "specified with the -o option.")

    sample_id_map = None
    if opts.sample_id_map_fp:
        sample_id_map = parse_sample_id_map(open(opts.sample_id_map_fp, 'U'))

    results = compare_taxa_summaries(
        parse_taxa_summary_table(open(opts.taxa_summary_fps[0], 'U')),
        parse_taxa_summary_table(open(opts.taxa_summary_fps[1], 'U')),
        opts.comparison_mode, correlation_type=opts.correlation_type,
        tail_type=opts.tail_type, num_permutations=opts.num_permutations,
        confidence_level=opts.confidence_level,
        perform_detailed_comparisons=opts.perform_detailed_comparisons,
        sample_id_map=sample_id_map,
        expected_sample_id=opts.expected_sample_id)

    # Write out the sorted and filled taxa summaries, basing their
    # filenames on the original input filenames. If the filenames are the same,
    # append a number to each filename.
    same_filenames = False
    if basename(opts.taxa_summary_fps[0]) == \
       basename(opts.taxa_summary_fps[1]):
        same_filenames = True

    for orig_ts_fp, filled_ts_lines, file_num in zip(opts.taxa_summary_fps,
                                                     results[:2], range(0, 2)):
        filename_suffix = '_sorted_and_filled'
        if same_filenames:
            filename_suffix += '_%d' % file_num
        filled_ts_fp = add_filename_suffix(orig_ts_fp, filename_suffix)
        filled_ts_f = open(join(opts.output_dir, filled_ts_fp), 'w')
        filled_ts_f.write(filled_ts_lines)
        filled_ts_f.close()

    # Write the overall comparison result.
    overall_comp_f = open(join(opts.output_dir, 'overall_comparison.txt'), 'w')
    overall_comp_f.write(results[2])
    overall_comp_f.close()

    # Write the correlation vector containing the pairwise sample comparisons.
    if opts.perform_detailed_comparisons:
        corr_vec_f = open(join(opts.output_dir,
                               'detailed_comparisons.txt'), 'w')
        corr_vec_f.write(results[3])
        corr_vec_f.close()
    def test_valid_get_coefficients_input(self):
        """Functions correctly using standard valid input data"""
        exp = ('-0.2336','-0.7924')

        run = parse_taxa_summary_table(open(self.L18S_fp, 'U'))
        key = parse_taxa_summary_table(open(self.key_fp, 'U'))

        obs = get_coefficients(run, key)

        self.assertEqual(obs, exp)
    def test_valid_get_coefficients_input(self):
        """Functions correctly using standard valid input data"""
        exp = ('-0.2336', '-0.7924')

        run = parse_taxa_summary_table(open(self.L18S_fp, 'U'))
        key = parse_taxa_summary_table(open(self.key_fp, 'U'))

        obs = get_coefficients(run, key)

        self.assertEqual(obs, exp)
Ejemplo n.º 5
0
 def test_parse_taxa_summary_table(self):
     """ parse_taxa_summary_table functions as expected """
     actual = parse_taxa_summary_table(self.taxa_summary1.split('\n'))
     self.assertEqual(actual[0],self.taxa_summary1_expected[0])
     self.assertEqual(actual[1],self.taxa_summary1_expected[1])
     self.assertEqual(actual[2],self.taxa_summary1_expected[2])
     self.assertEqual(actual,self.taxa_summary1_expected)
Ejemplo n.º 6
0
def main():
    option_parser, opts, args = parse_command_line_parameters(**script_info)
    
    data={}
    mapping,headers,comments = get_map(opts, data)
    
    mapping_headers_to_use=opts.mapping_headers_to_use
    background_color=opts.background_color
    monte_carlo_dist=opts.monte_carlo_dist
    ball_scale=opts.ball_scale
    arrow_line_color=opts.arrow_line_color
    arrow_head_color=opts.arrow_head_color
    
    taxonomy_count_file = opts.input_taxa_file
    
    if taxonomy_count_file:
        try:
            counts_f = open(taxonomy_count_file, 'U').readlines()
            _, taxa_ids, _ = \
                       parse_taxa_summary_table(counts_f)
        except (TypeError, IOError):
            raise ValueError, 'Summarized taxa file could not be parsed.'
    else:
        taxa_ids=None
        
    out = build_prefs_string(mapping_headers_to_use, background_color, \
                                monte_carlo_dist, headers, taxa_ids, \
                                ball_scale, arrow_line_color, arrow_head_color)
                                
    f = open(opts.output_fp,'w')
    f.write(out)
    f.close()
Ejemplo n.º 7
0
def main():
    option_parser, opts, args = parse_command_line_parameters(**script_info)

    data = {}
    mapping, headers, comments = get_map(opts, data)

    mapping_headers_to_use = opts.mapping_headers_to_use
    background_color = opts.background_color
    monte_carlo_dist = opts.monte_carlo_dist
    ball_scale = opts.ball_scale
    arrow_line_color = opts.arrow_line_color
    arrow_head_color = opts.arrow_head_color

    taxonomy_count_file = opts.input_taxa_file

    if taxonomy_count_file:
        try:
            counts_f = open(taxonomy_count_file, 'U').readlines()
            _, taxa_ids, _ = \
                parse_taxa_summary_table(counts_f)
        except (TypeError, IOError):
            raise ValueError('Summarized taxa file could not be parsed.')
    else:
        taxa_ids = None

    out = build_prefs_string(mapping_headers_to_use, background_color,
                             monte_carlo_dist, headers, taxa_ids, ball_scale,
                             arrow_line_color, arrow_head_color)

    f = open(opts.output_fp, 'w')
    f.write(out)
    f.close()
    def setUp(self):
        """Set up files/environment that will be used by the tests."""
        # The prefix to use for temporary files. This prefix may be added to,
        # but all temp dirs and files created by the tests will have this
        # prefix at a minimum.
        self.prefix = 'generate_taxa_compare_table_tests'

        self.start_dir = getcwd()
        self.dirs_to_remove = []
        self.files_to_remove = []
        
        self.tmp_dir = get_qiime_temp_dir()
        if not exists(self.tmp_dir):
            makedirs(self.tmp_dir)
            # if test creates the temp dir, also remove it
            self.dirs_to_remove.append(self.tmp_dir)

        # setup temporary root input directory
        self.root_dir = mkdtemp(dir=self.tmp_dir,
                                prefix='%s_root_dir_' %self.prefix)
        self.dirs_to_remove.append(self.root_dir)

        L18S_dir = '/L18S-1/blast_1.0/'
        makedirs(self.root_dir+L18S_dir)
        self.L18S_fp = self.root_dir+L18S_dir+'/otu_table_mc2_no_pynast_failures_w_taxa_L5.txt'
        with open(self.L18S_fp, 'w') as f:
            f.writelines(L18S_L5_blast_one_multiple_assign_output)
        self.files_to_remove.append(self.L18S_fp)

        # setup temporary key directory
        self.key_dir = mkdtemp(dir=self.tmp_dir,
                                prefix='%s_key_dir_' %self.prefix)
        self.dirs_to_remove.append(self.key_dir)
        self.key_fp = self.key_dir+'/L18S_key.txt'
        with open(self.key_fp, 'w') as f:
            f.writelines(L18S_key)
        self.files_to_remove.append(self.key_fp)
        self.bad_key = self.key_dir+'/L18S_key.txt'

        # setup temporary output directory
        self.output_dir = mkdtemp(dir=self.tmp_dir,
                                  prefix='%s_output_dir_' %self.prefix)
        self.dirs_to_remove.append(self.output_dir)

        self.ts1 = parse_taxa_summary_table(ts1.split('\n'))

        initiate_timeout(60)
Ejemplo n.º 9
0
def generate_taxa_compare_table(root, key_directory, levels=None):
    """Finds otu tables in root and compares them against the keys in key_directory.

    Walks a file tree starting at root and finds the otu tables output by
    multiple_assign_taxonomy.py. Then compares the found otu tables to their corresponding
    key in key_directory. Returns a dict containing another dict for every level of output
    compared. Output is of the format:
    {level: {name of study: {method_and_params: (pearson, spearman)}}}

    Parameters:
    root: path to root of multiple_assign_taxonomy.py output.
    key_directory: path to directory containing known/expected compositions. Each study
        should be in its own otu table.
    levels: INCOMPLETE. Use other than default will cause unexpected results. The
        multiple_assign_taxonomy.py output levels to be analyzed."""
    key_fps = get_key_files(key_directory)

    results = {}

    if not levels:
        levels = [2,3,4,5,6]
    if len(levels) > 5:
        raise WorkflowError('Too many levels.')
    for l in levels:
        if l < 2 or l > 6:
            raise WorkflowError('Level out of range: ' + str(l))

    for l in levels:
        results[l] = dict()

    for(path, dirs, files) in walk(root):
        for choice in assignment_method_choices:
            #Checks if this dir's name includes a known assignment method (and therefor contains that output)
            if choice in path:
                study = path.split('/')[-2].rstrip('-123').capitalize()
                for f in files:
                    if 'otu_table_mc2_w_taxa_L' in f and not f.endswith('~'):
                        name = path.split('/')[-2].capitalize()
                        level = int(f[-5])
                        if level not in levels:
                            #If that level wasn't requested, skip it.
                            continue

                        with open(join(path,f),'U') as run_file:
                            #Open and parse run file
                            test = run_file.readline()
                            if('Taxon\t' not in test):
                                raise WorkflowError('Invalid multiple_assign_taxonomy output file, check for corrupted file: '+path)
                            run_file.seek(0)
                            run = parse_taxa_summary_table(run_file)

                        with open(key_fps[study],'U') as key_file:
                            #Open and parse key file
                            test = key_file.readline()
                            if('Taxon\t' not in test):
                                raise WorkflowError('Invalid key file in directory: '+path)
                            key_file.seek(0)
                            key = parse_taxa_summary_table(key_file)

                        try:
                            pearson_coeff, spearman_coeff = get_coefficients(run, key)
                        except ValueError:
                            #compare_taxa_summaries couldn't find a match between the 2
                            #Likely due to mismatch between key and input sample names.
                            pearson_coeff = 'X'
                            spearman_coeff = 'X'
                        try:
                            results[level][name]
                        except KeyError:
                            results[level][name] = dict()
                        results[level][name][path.split('/')[-1]] = (pearson_coeff, spearman_coeff)
    return results