def test_run_pick_closed_reference_otus_sortmerna(self): """run_pick_closed_reference_otus generates expected results using sortmerna """ self.params['pick_otus']['otu_picking_method'] = "sortmerna" run_pick_closed_reference_otus( self.test_data['seqs'][0], self.test_data['refseqs'][0], self.test_out, self.test_data['refseqs_tax'][0], call_commands_serially, self.params, self.qiime_config, parallel=False, status_update_callback=no_status_updates) input_file_basename = splitext(split(self.test_data['seqs'][0])[1])[0] otu_map_fp = join(self.test_out, 'sortmerna_picked_otus', '%s_otus.txt' % input_file_basename) otu_table_fp = join(self.test_out, 'otu_table.biom') otu_table = load_table(otu_table_fp) expected_sample_ids = ['f1', 'f2', 'f3', 'f4', 'p1', 'p2', 't1', 't2'] self.assertItemsEqual(otu_table.ids(), expected_sample_ids) # Number of OTUs matches manually confirmed result otu_map_lines = list(open(otu_map_fp)) num_otus = len(otu_map_lines) otu_map_otu_ids = [o.split()[0] for o in otu_map_lines] self.assertEqual(num_otus, 3) # parse the otu table otu_table = load_table(otu_table_fp) expected_sample_ids = ['f1', 'f2', 'f3', 'f4', 'p1', 'p2', 't1', 't2'] # sample IDs are as expected self.assertItemsEqual(otu_table.ids(), expected_sample_ids) # otu ids are as expected self.assertItemsEqual(otu_table.ids(axis='observation'), otu_map_otu_ids) # expected number of sequences in OTU table number_seqs_in_otu_table = sum( [v.sum() for v in otu_table.iter_data()]) self.assertEqual(number_seqs_in_otu_table, 117) # One tax assignment per otu self.assertEqual(len(otu_table.metadata(axis='observation')), 3) # Check that the log file is created and has size > 0 log_fp = glob(join(self.test_out, 'log*.txt'))[0] self.assertTrue(getsize(log_fp) > 0)
def test_run_pick_closed_reference_otus_sortmerna(self): """run_pick_closed_reference_otus generates expected results using sortmerna """ self.params['pick_otus']['otu_picking_method'] = "sortmerna" run_pick_closed_reference_otus( self.test_data['seqs'][0], self.test_data['refseqs'][0], self.test_out, self.test_data['refseqs_tax'][0], call_commands_serially, self.params, self.qiime_config, parallel=False, status_update_callback=no_status_updates) input_file_basename = splitext(split(self.test_data['seqs'][0])[1])[0] otu_map_fp = join(self.test_out, 'sortmerna_picked_otus', '%s_otus.txt' % input_file_basename) otu_table_fp = join(self.test_out, 'otu_table.biom') otu_table = load_table(otu_table_fp) expected_sample_ids = ['f1', 'f2', 'f3', 'f4', 'p1', 'p2', 't1', 't2'] self.assertItemsEqual(otu_table.ids(), expected_sample_ids) # Number of OTUs matches manually confirmed result otu_map_lines = list(open(otu_map_fp)) num_otus = len(otu_map_lines) otu_map_otu_ids = [o.split()[0] for o in otu_map_lines] self.assertEqual(num_otus, 3) # parse the otu table otu_table = load_table(otu_table_fp) expected_sample_ids = ['f1', 'f2', 'f3', 'f4', 'p1', 'p2', 't1', 't2'] # sample IDs are as expected self.assertItemsEqual(otu_table.ids(), expected_sample_ids) # otu ids are as expected self.assertItemsEqual(otu_table.ids(axis='observation'), otu_map_otu_ids) # expected number of sequences in OTU table number_seqs_in_otu_table = sum([v.sum() for v in otu_table.iter_data()]) self.assertEqual(number_seqs_in_otu_table, 117) # One tax assignment per otu self.assertEqual(len(otu_table.metadata(axis='observation')), 3) # Check that the log file is created and has size > 0 log_fp = glob(join(self.test_out, 'log*.txt'))[0] self.assertTrue(getsize(log_fp) > 0)
def main(): option_parser, opts, args = parse_command_line_parameters(**script_info) verbose = opts.verbose input_fp = opts.input_fp reference_fp = opts.reference_fp taxonomy_fp = opts.taxonomy_fp output_dir = opts.output_dir verbose = opts.verbose print_only = opts.print_only parallel = opts.parallel # No longer checking that jobs_to_start > 2, but # commenting as we may change our minds about this. #if parallel: raise_error_on_parallel_unavailable() if opts.parameter_fp: try: parameter_f = open(opts.parameter_fp, 'U') except IOError: raise IOError,\ "Can't open parameters file (%s). Does it exist? Do you have read access?"\ % opts.parameter_fp params = parse_qiime_parameters(parameter_f) parameter_f.close() else: params = parse_qiime_parameters([]) # empty list returns empty defaultdict for now jobs_to_start = opts.jobs_to_start default_jobs_to_start = qiime_config['jobs_to_start'] validate_and_set_jobs_to_start(params, jobs_to_start, default_jobs_to_start, parallel, option_parser) try: makedirs(output_dir) except OSError: if opts.force: pass else: option_parser.error("Output directory already exists. Please choose" " a different directory, or force overwrite with -f.") if print_only: command_handler = print_commands else: command_handler = call_commands_serially if verbose: status_update_callback = print_to_stdout else: status_update_callback = no_status_updates run_pick_closed_reference_otus( input_fp, reference_fp, output_dir, taxonomy_fp, command_handler=command_handler, params=params, qiime_config=qiime_config, parallel=parallel, status_update_callback=status_update_callback)
def test_run_pick_closed_reference_otus_rdp_tax_assign(self): """run_pick_closed_reference_otus with RDP tax assignment """ self.params['assign_taxonomy']['assignment_method'] = 'rdp' self.params['assign_taxonomy']['id_to_taxonomy_fp'] = \ self.test_data['refseqs_tax'][0] self.params['assign_taxonomy']['reference_seqs_fp'] = \ self.test_data['refseqs'][0] run_pick_closed_reference_otus( self.test_data['seqs'][0], self.test_data['refseqs'][0], self.test_out, self.test_data['refseqs_tax'][0], call_commands_serially, self.params, self.qiime_config, assign_taxonomy=True, parallel=False, status_update_callback=no_status_updates) input_file_basename = splitext(split(self.test_data['seqs'][0])[1])[0] otu_map_fp = join(self.test_out, 'uclust_ref_picked_otus', '%s_otus.txt' % input_file_basename) assigned_taxonomy_fp = join( self.test_out, 'rdp_assigned_taxonomy/' '%s_rep_set_tax_assignments.txt' % input_file_basename) expected_taxonomies = [ [ '295053', 'k__Bacteria;p__Proteobacteria;c__Gammaproteobacteria;o__Enterobacteriales;f__Enterobacteriaceae;g__;s__', '1.000' ], [ '879972', 'k__Bacteria;p__Firmicutes;c__Bacilli;o__Lactobacillales;f__Streptococcaceae;g__Streptococcus;s__', '1.000' ], [ '42684', 'k__Bacteria;p__Proteobacteria;c__Betaproteobacteria;o__Neisseriales;f__Neisseriaceae;g__;s__', '1.000' ] ] # Assigned taxonomies match expected taxonomies with open(assigned_taxonomy_fp, 'U') as taxonomy_f: for line in taxonomy_f: entry = line.strip().split('\t') self.assertTrue(entry in expected_taxonomies) otu_table_fp = join(self.test_out, 'otu_table.biom') otu_table = load_table(otu_table_fp) expected_sample_ids = ['f1', 'f2', 'f3', 'f4', 'p1', 'p2', 't1', 't2'] self.assertItemsEqual(otu_table.ids(), expected_sample_ids) # Number of OTUs matches manually confirmed result otu_map_lines = list(open(otu_map_fp)) num_otus = len(otu_map_lines) otu_map_otu_ids = [o.split()[0] for o in otu_map_lines] self.assertEqual(num_otus, 3) # parse the otu table otu_table = load_table(otu_table_fp) expected_sample_ids = ['f1', 'f2', 'f3', 'f4', 'p1', 'p2', 't1', 't2'] # sample IDs are as expected self.assertItemsEqual(otu_table.ids(), expected_sample_ids) # otu ids are as expected self.assertItemsEqual(otu_table.ids(axis='observation'), otu_map_otu_ids) # expected number of sequences in OTU table number_seqs_in_otu_table = sum( [v.sum() for v in otu_table.iter_data()]) self.assertEqual(number_seqs_in_otu_table, 117) # One tax assignment per otu self.assertEqual(len(otu_table.metadata(axis='observation')), 3) # Check that the log file is created and has size > 0 log_fp = glob(join(self.test_out, 'log*.txt'))[0] self.assertTrue(getsize(log_fp) > 0)
def main(): option_parser, opts, args = parse_command_line_parameters(**script_info) verbose = opts.verbose input_fp = opts.input_fp reference_fp = opts.reference_fp taxonomy_fp = opts.taxonomy_fp output_dir = opts.output_dir verbose = opts.verbose print_only = opts.print_only assign_taxonomy = opts.assign_taxonomy parallel = opts.parallel # No longer checking that jobs_to_start > 2, but # commenting as we may change our minds about this. # if parallel: raise_error_on_parallel_unavailable() if opts.parameter_fp: try: parameter_f = open(opts.parameter_fp, 'U') except IOError: raise IOError( "Can't open parameters file (%s). Does it exist? Do you have read access?" % opts.parameter_fp) params = parse_qiime_parameters(parameter_f) parameter_f.close() else: params = parse_qiime_parameters([]) # empty list returns empty defaultdict for now jobs_to_start = opts.jobs_to_start default_jobs_to_start = qiime_config['jobs_to_start'] validate_and_set_jobs_to_start(params, jobs_to_start, default_jobs_to_start, parallel, option_parser) if print_only: command_handler = print_commands else: command_handler = call_commands_serially try: makedirs(output_dir) except OSError: if opts.force: pass else: option_parser.error( "Output directory already exists. Please choose" " a different directory, or force overwrite with -f.") if verbose: status_update_callback = print_to_stdout else: status_update_callback = no_status_updates run_pick_closed_reference_otus( input_fp, reference_fp, output_dir, taxonomy_fp, assign_taxonomy=assign_taxonomy, command_handler=command_handler, params=params, qiime_config=qiime_config, parallel=parallel, status_update_callback=status_update_callback)
def test_run_pick_closed_reference_otus_rdp_tax_assign(self): """run_pick_closed_reference_otus with RDP tax assignment """ self.params['assign_taxonomy']['assignment_method'] = 'rdp' self.params['assign_taxonomy']['id_to_taxonomy_fp'] = \ self.test_data['refseqs_tax'][0] self.params['assign_taxonomy']['reference_seqs_fp'] = \ self.test_data['refseqs'][0] run_pick_closed_reference_otus( self.test_data['seqs'][0], self.test_data['refseqs'][0], self.test_out, self.test_data['refseqs_tax'][0], call_commands_serially, self.params, self.qiime_config, assign_taxonomy=True, parallel=False, status_update_callback=no_status_updates) input_file_basename = splitext(split(self.test_data['seqs'][0])[1])[0] otu_map_fp = join(self.test_out, 'uclust_ref_picked_otus', '%s_otus.txt' % input_file_basename) assigned_taxonomy_fp = join(self.test_out, 'rdp_assigned_taxonomy/' '%s_rep_set_tax_assignments.txt' % input_file_basename) expected_taxonomies = [['295053', 'k__Bacteria;p__Proteobacteria;c__Gammaproteobacteria;o__Enterobacteriales;f__Enterobacteriaceae;g__;s__', '1.000'], ['879972', 'k__Bacteria;p__Firmicutes;c__Bacilli;o__Lactobacillales;f__Streptococcaceae;g__Streptococcus;s__', '1.000'], ['42684', 'k__Bacteria;p__Proteobacteria;c__Betaproteobacteria;o__Neisseriales;f__Neisseriaceae;g__;s__', '1.000']] # Assigned taxonomies match expected taxonomies with open(assigned_taxonomy_fp, 'U') as taxonomy_f: for line in taxonomy_f: entry = line.strip().split('\t') self.assertTrue(entry in expected_taxonomies) otu_table_fp = join(self.test_out, 'otu_table.biom') otu_table = load_table(otu_table_fp) expected_sample_ids = ['f1', 'f2', 'f3', 'f4', 'p1', 'p2', 't1', 't2'] self.assertItemsEqual(otu_table.ids(), expected_sample_ids) # Number of OTUs matches manually confirmed result otu_map_lines = list(open(otu_map_fp)) num_otus = len(otu_map_lines) otu_map_otu_ids = [o.split()[0] for o in otu_map_lines] self.assertEqual(num_otus, 3) # parse the otu table otu_table = load_table(otu_table_fp) expected_sample_ids = ['f1', 'f2', 'f3', 'f4', 'p1', 'p2', 't1', 't2'] # sample IDs are as expected self.assertItemsEqual(otu_table.ids(), expected_sample_ids) # otu ids are as expected self.assertItemsEqual(otu_table.ids(axis='observation'), otu_map_otu_ids) # expected number of sequences in OTU table number_seqs_in_otu_table = sum([v.sum() for v in otu_table.iter_data()]) self.assertEqual(number_seqs_in_otu_table, 117) # One tax assignment per otu self.assertEqual(len(otu_table.metadata(axis='observation')), 3) # Check that the log file is created and has size > 0 log_fp = glob(join(self.test_out, 'log*.txt'))[0] self.assertTrue(getsize(log_fp) > 0)