def main(): option_parser, opts, args = parse_command_line_parameters(**script_info) if len(opts.taxa_summary_fps) != 2: option_parser.error("Exactly two taxa summary files are required. You " "provided %d." % len(opts.taxa_summary_fps)) # Create the output dir if it doesn't already exist. try: create_dir(opts.output_dir) except: option_parser.error("Could not create or access output directory " "specified with the -o option.") sample_id_map = None if opts.sample_id_map_fp: sample_id_map = parse_sample_id_map(open(opts.sample_id_map_fp, 'U')) results = compare_taxa_summaries( parse_taxa_summary_table(open(opts.taxa_summary_fps[0], 'U')), parse_taxa_summary_table(open(opts.taxa_summary_fps[1], 'U')), opts.comparison_mode, correlation_type=opts.correlation_type, tail_type=opts.tail_type, num_permutations=opts.num_permutations, confidence_level=opts.confidence_level, perform_detailed_comparisons=opts.perform_detailed_comparisons, sample_id_map=sample_id_map, expected_sample_id=opts.expected_sample_id) # Write out the sorted and filled taxa summaries, basing their # filenames on the original input filenames. If the filenames are the same, # append a number to each filename. same_filenames = False if basename(opts.taxa_summary_fps[0]) == \ basename(opts.taxa_summary_fps[1]): same_filenames = True for orig_ts_fp, filled_ts_lines, file_num in zip(opts.taxa_summary_fps, results[:2], range(0, 2)): filename_suffix = '_sorted_and_filled' if same_filenames: filename_suffix += '_%d' % file_num filled_ts_fp = add_filename_suffix(orig_ts_fp, filename_suffix) filled_ts_f = open(join(opts.output_dir, filled_ts_fp), 'w') filled_ts_f.write(filled_ts_lines) filled_ts_f.close() # Write the overall comparison result. overall_comp_f = open(join(opts.output_dir, 'overall_comparison.txt'), 'w') overall_comp_f.write(results[2]) overall_comp_f.close() # Write the correlation vector containing the pairwise sample comparisons. if opts.perform_detailed_comparisons: corr_vec_f = open(join(opts.output_dir, 'detailed_comparisons.txt'), 'w') corr_vec_f.write(results[3]) corr_vec_f.close()
def test_valid_get_coefficients_input(self): """Functions correctly using standard valid input data""" exp = ('-0.2336','-0.7924') run = parse_taxa_summary_table(open(self.L18S_fp, 'U')) key = parse_taxa_summary_table(open(self.key_fp, 'U')) obs = get_coefficients(run, key) self.assertEqual(obs, exp)
def test_valid_get_coefficients_input(self): """Functions correctly using standard valid input data""" exp = ('-0.2336', '-0.7924') run = parse_taxa_summary_table(open(self.L18S_fp, 'U')) key = parse_taxa_summary_table(open(self.key_fp, 'U')) obs = get_coefficients(run, key) self.assertEqual(obs, exp)
def test_parse_taxa_summary_table(self): """ parse_taxa_summary_table functions as expected """ actual = parse_taxa_summary_table(self.taxa_summary1.split('\n')) self.assertEqual(actual[0],self.taxa_summary1_expected[0]) self.assertEqual(actual[1],self.taxa_summary1_expected[1]) self.assertEqual(actual[2],self.taxa_summary1_expected[2]) self.assertEqual(actual,self.taxa_summary1_expected)
def main(): option_parser, opts, args = parse_command_line_parameters(**script_info) data={} mapping,headers,comments = get_map(opts, data) mapping_headers_to_use=opts.mapping_headers_to_use background_color=opts.background_color monte_carlo_dist=opts.monte_carlo_dist ball_scale=opts.ball_scale arrow_line_color=opts.arrow_line_color arrow_head_color=opts.arrow_head_color taxonomy_count_file = opts.input_taxa_file if taxonomy_count_file: try: counts_f = open(taxonomy_count_file, 'U').readlines() _, taxa_ids, _ = \ parse_taxa_summary_table(counts_f) except (TypeError, IOError): raise ValueError, 'Summarized taxa file could not be parsed.' else: taxa_ids=None out = build_prefs_string(mapping_headers_to_use, background_color, \ monte_carlo_dist, headers, taxa_ids, \ ball_scale, arrow_line_color, arrow_head_color) f = open(opts.output_fp,'w') f.write(out) f.close()
def main(): option_parser, opts, args = parse_command_line_parameters(**script_info) data = {} mapping, headers, comments = get_map(opts, data) mapping_headers_to_use = opts.mapping_headers_to_use background_color = opts.background_color monte_carlo_dist = opts.monte_carlo_dist ball_scale = opts.ball_scale arrow_line_color = opts.arrow_line_color arrow_head_color = opts.arrow_head_color taxonomy_count_file = opts.input_taxa_file if taxonomy_count_file: try: counts_f = open(taxonomy_count_file, 'U').readlines() _, taxa_ids, _ = \ parse_taxa_summary_table(counts_f) except (TypeError, IOError): raise ValueError('Summarized taxa file could not be parsed.') else: taxa_ids = None out = build_prefs_string(mapping_headers_to_use, background_color, monte_carlo_dist, headers, taxa_ids, ball_scale, arrow_line_color, arrow_head_color) f = open(opts.output_fp, 'w') f.write(out) f.close()
def setUp(self): """Set up files/environment that will be used by the tests.""" # The prefix to use for temporary files. This prefix may be added to, # but all temp dirs and files created by the tests will have this # prefix at a minimum. self.prefix = 'generate_taxa_compare_table_tests' self.start_dir = getcwd() self.dirs_to_remove = [] self.files_to_remove = [] self.tmp_dir = get_qiime_temp_dir() if not exists(self.tmp_dir): makedirs(self.tmp_dir) # if test creates the temp dir, also remove it self.dirs_to_remove.append(self.tmp_dir) # setup temporary root input directory self.root_dir = mkdtemp(dir=self.tmp_dir, prefix='%s_root_dir_' %self.prefix) self.dirs_to_remove.append(self.root_dir) L18S_dir = '/L18S-1/blast_1.0/' makedirs(self.root_dir+L18S_dir) self.L18S_fp = self.root_dir+L18S_dir+'/otu_table_mc2_no_pynast_failures_w_taxa_L5.txt' with open(self.L18S_fp, 'w') as f: f.writelines(L18S_L5_blast_one_multiple_assign_output) self.files_to_remove.append(self.L18S_fp) # setup temporary key directory self.key_dir = mkdtemp(dir=self.tmp_dir, prefix='%s_key_dir_' %self.prefix) self.dirs_to_remove.append(self.key_dir) self.key_fp = self.key_dir+'/L18S_key.txt' with open(self.key_fp, 'w') as f: f.writelines(L18S_key) self.files_to_remove.append(self.key_fp) self.bad_key = self.key_dir+'/L18S_key.txt' # setup temporary output directory self.output_dir = mkdtemp(dir=self.tmp_dir, prefix='%s_output_dir_' %self.prefix) self.dirs_to_remove.append(self.output_dir) self.ts1 = parse_taxa_summary_table(ts1.split('\n')) initiate_timeout(60)
def generate_taxa_compare_table(root, key_directory, levels=None): """Finds otu tables in root and compares them against the keys in key_directory. Walks a file tree starting at root and finds the otu tables output by multiple_assign_taxonomy.py. Then compares the found otu tables to their corresponding key in key_directory. Returns a dict containing another dict for every level of output compared. Output is of the format: {level: {name of study: {method_and_params: (pearson, spearman)}}} Parameters: root: path to root of multiple_assign_taxonomy.py output. key_directory: path to directory containing known/expected compositions. Each study should be in its own otu table. levels: INCOMPLETE. Use other than default will cause unexpected results. The multiple_assign_taxonomy.py output levels to be analyzed.""" key_fps = get_key_files(key_directory) results = {} if not levels: levels = [2,3,4,5,6] if len(levels) > 5: raise WorkflowError('Too many levels.') for l in levels: if l < 2 or l > 6: raise WorkflowError('Level out of range: ' + str(l)) for l in levels: results[l] = dict() for(path, dirs, files) in walk(root): for choice in assignment_method_choices: #Checks if this dir's name includes a known assignment method (and therefor contains that output) if choice in path: study = path.split('/')[-2].rstrip('-123').capitalize() for f in files: if 'otu_table_mc2_w_taxa_L' in f and not f.endswith('~'): name = path.split('/')[-2].capitalize() level = int(f[-5]) if level not in levels: #If that level wasn't requested, skip it. continue with open(join(path,f),'U') as run_file: #Open and parse run file test = run_file.readline() if('Taxon\t' not in test): raise WorkflowError('Invalid multiple_assign_taxonomy output file, check for corrupted file: '+path) run_file.seek(0) run = parse_taxa_summary_table(run_file) with open(key_fps[study],'U') as key_file: #Open and parse key file test = key_file.readline() if('Taxon\t' not in test): raise WorkflowError('Invalid key file in directory: '+path) key_file.seek(0) key = parse_taxa_summary_table(key_file) try: pearson_coeff, spearman_coeff = get_coefficients(run, key) except ValueError: #compare_taxa_summaries couldn't find a match between the 2 #Likely due to mismatch between key and input sample names. pearson_coeff = 'X' spearman_coeff = 'X' try: results[level][name] except KeyError: results[level][name] = dict() results[level][name][path.split('/')[-1]] = (pearson_coeff, spearman_coeff) return results