예제 #1
0
    def test_run_pick_closed_reference_otus_sortmerna(self):
        """run_pick_closed_reference_otus generates expected results
           using sortmerna
        """

        self.params['pick_otus']['otu_picking_method'] = "sortmerna"

        run_pick_closed_reference_otus(
            self.test_data['seqs'][0],
            self.test_data['refseqs'][0],
            self.test_out,
            self.test_data['refseqs_tax'][0],
            call_commands_serially,
            self.params,
            self.qiime_config,
            parallel=False,
            status_update_callback=no_status_updates)

        input_file_basename = splitext(split(self.test_data['seqs'][0])[1])[0]
        otu_map_fp = join(self.test_out, 'sortmerna_picked_otus',
                          '%s_otus.txt' % input_file_basename)
        otu_table_fp = join(self.test_out, 'otu_table.biom')
        otu_table = load_table(otu_table_fp)
        expected_sample_ids = ['f1', 'f2', 'f3', 'f4', 'p1', 'p2', 't1', 't2']
        self.assertItemsEqual(otu_table.ids(), expected_sample_ids)

        # Number of OTUs matches manually confirmed result
        otu_map_lines = list(open(otu_map_fp))
        num_otus = len(otu_map_lines)
        otu_map_otu_ids = [o.split()[0] for o in otu_map_lines]
        self.assertEqual(num_otus, 3)

        # parse the otu table
        otu_table = load_table(otu_table_fp)
        expected_sample_ids = ['f1', 'f2', 'f3', 'f4', 'p1', 'p2', 't1', 't2']
        # sample IDs are as expected
        self.assertItemsEqual(otu_table.ids(), expected_sample_ids)
        # otu ids are as expected
        self.assertItemsEqual(otu_table.ids(axis='observation'),
                              otu_map_otu_ids)

        # expected number of sequences in OTU table
        number_seqs_in_otu_table = sum(
            [v.sum() for v in otu_table.iter_data()])
        self.assertEqual(number_seqs_in_otu_table, 117)

        # One tax assignment per otu
        self.assertEqual(len(otu_table.metadata(axis='observation')), 3)

        # Check that the log file is created and has size > 0
        log_fp = glob(join(self.test_out, 'log*.txt'))[0]
        self.assertTrue(getsize(log_fp) > 0)
예제 #2
0
    def test_run_pick_closed_reference_otus_sortmerna(self):
        """run_pick_closed_reference_otus generates expected results
           using sortmerna
        """

        self.params['pick_otus']['otu_picking_method'] = "sortmerna"

        run_pick_closed_reference_otus(
            self.test_data['seqs'][0],
            self.test_data['refseqs'][0],
            self.test_out,
            self.test_data['refseqs_tax'][0],
            call_commands_serially,
            self.params,
            self.qiime_config,
            parallel=False,
            status_update_callback=no_status_updates)

        input_file_basename = splitext(split(self.test_data['seqs'][0])[1])[0]
        otu_map_fp = join(self.test_out, 'sortmerna_picked_otus',
                          '%s_otus.txt' % input_file_basename)
        otu_table_fp = join(self.test_out, 'otu_table.biom')
        otu_table = load_table(otu_table_fp)
        expected_sample_ids = ['f1', 'f2', 'f3', 'f4', 'p1', 'p2', 't1', 't2']
        self.assertItemsEqual(otu_table.ids(), expected_sample_ids)

        # Number of OTUs matches manually confirmed result
        otu_map_lines = list(open(otu_map_fp))
        num_otus = len(otu_map_lines)
        otu_map_otu_ids = [o.split()[0] for o in otu_map_lines]
        self.assertEqual(num_otus, 3)

        # parse the otu table
        otu_table = load_table(otu_table_fp)
        expected_sample_ids = ['f1', 'f2', 'f3', 'f4', 'p1', 'p2', 't1', 't2']
        # sample IDs are as expected
        self.assertItemsEqual(otu_table.ids(), expected_sample_ids)
        # otu ids are as expected
        self.assertItemsEqual(otu_table.ids(axis='observation'),
                              otu_map_otu_ids)

        # expected number of sequences in OTU table
        number_seqs_in_otu_table = sum([v.sum()
                                       for v in otu_table.iter_data()])
        self.assertEqual(number_seqs_in_otu_table, 117)

        # One tax assignment per otu
        self.assertEqual(len(otu_table.metadata(axis='observation')), 3)

        # Check that the log file is created and has size > 0
        log_fp = glob(join(self.test_out, 'log*.txt'))[0]
        self.assertTrue(getsize(log_fp) > 0)
예제 #3
0
def main():
    option_parser, opts, args = parse_command_line_parameters(**script_info)

    verbose = opts.verbose
    
    input_fp = opts.input_fp
    reference_fp = opts.reference_fp
    taxonomy_fp = opts.taxonomy_fp
    output_dir = opts.output_dir
    verbose = opts.verbose
    print_only = opts.print_only
    
    parallel = opts.parallel
    # No longer checking that jobs_to_start > 2, but
    # commenting as we may change our minds about this.
    #if parallel: raise_error_on_parallel_unavailable()
    
    if opts.parameter_fp:
        try:
            parameter_f = open(opts.parameter_fp, 'U')
        except IOError:
            raise IOError,\
             "Can't open parameters file (%s). Does it exist? Do you have read access?"\
             % opts.parameter_fp
        params = parse_qiime_parameters(parameter_f)
        parameter_f.close()
    else:
        params = parse_qiime_parameters([]) 
        # empty list returns empty defaultdict for now
    
    jobs_to_start = opts.jobs_to_start
    default_jobs_to_start = qiime_config['jobs_to_start']
    validate_and_set_jobs_to_start(params,
                                   jobs_to_start,
                                   default_jobs_to_start,
                                   parallel,
                                   option_parser)
    
    try:
        makedirs(output_dir)
    except OSError:
        if opts.force:
            pass
        else:
            option_parser.error("Output directory already exists. Please choose"
                " a different directory, or force overwrite with -f.")

    if print_only:
        command_handler = print_commands
    else:
        command_handler = call_commands_serially
    
    if verbose:
        status_update_callback = print_to_stdout
    else:
        status_update_callback = no_status_updates

    run_pick_closed_reference_otus(
     input_fp, 
     reference_fp,
     output_dir,
     taxonomy_fp,
     command_handler=command_handler,
     params=params,
     qiime_config=qiime_config,
     parallel=parallel,
     status_update_callback=status_update_callback)
예제 #4
0
    def test_run_pick_closed_reference_otus_rdp_tax_assign(self):
        """run_pick_closed_reference_otus with RDP tax assignment
        """
        self.params['assign_taxonomy']['assignment_method'] = 'rdp'
        self.params['assign_taxonomy']['id_to_taxonomy_fp'] = \
            self.test_data['refseqs_tax'][0]
        self.params['assign_taxonomy']['reference_seqs_fp'] = \
            self.test_data['refseqs'][0]

        run_pick_closed_reference_otus(
            self.test_data['seqs'][0],
            self.test_data['refseqs'][0],
            self.test_out,
            self.test_data['refseqs_tax'][0],
            call_commands_serially,
            self.params,
            self.qiime_config,
            assign_taxonomy=True,
            parallel=False,
            status_update_callback=no_status_updates)

        input_file_basename = splitext(split(self.test_data['seqs'][0])[1])[0]
        otu_map_fp = join(self.test_out, 'uclust_ref_picked_otus',
                          '%s_otus.txt' % input_file_basename)
        assigned_taxonomy_fp = join(
            self.test_out, 'rdp_assigned_taxonomy/'
            '%s_rep_set_tax_assignments.txt' % input_file_basename)
        expected_taxonomies = [
            [
                '295053',
                'k__Bacteria;p__Proteobacteria;c__Gammaproteobacteria;o__Enterobacteriales;f__Enterobacteriaceae;g__;s__',
                '1.000'
            ],
            [
                '879972',
                'k__Bacteria;p__Firmicutes;c__Bacilli;o__Lactobacillales;f__Streptococcaceae;g__Streptococcus;s__',
                '1.000'
            ],
            [
                '42684',
                'k__Bacteria;p__Proteobacteria;c__Betaproteobacteria;o__Neisseriales;f__Neisseriaceae;g__;s__',
                '1.000'
            ]
        ]
        # Assigned taxonomies match expected taxonomies
        with open(assigned_taxonomy_fp, 'U') as taxonomy_f:
            for line in taxonomy_f:
                entry = line.strip().split('\t')
                self.assertTrue(entry in expected_taxonomies)

        otu_table_fp = join(self.test_out, 'otu_table.biom')
        otu_table = load_table(otu_table_fp)
        expected_sample_ids = ['f1', 'f2', 'f3', 'f4', 'p1', 'p2', 't1', 't2']
        self.assertItemsEqual(otu_table.ids(), expected_sample_ids)

        # Number of OTUs matches manually confirmed result
        otu_map_lines = list(open(otu_map_fp))
        num_otus = len(otu_map_lines)
        otu_map_otu_ids = [o.split()[0] for o in otu_map_lines]
        self.assertEqual(num_otus, 3)

        # parse the otu table
        otu_table = load_table(otu_table_fp)
        expected_sample_ids = ['f1', 'f2', 'f3', 'f4', 'p1', 'p2', 't1', 't2']
        # sample IDs are as expected
        self.assertItemsEqual(otu_table.ids(), expected_sample_ids)
        # otu ids are as expected
        self.assertItemsEqual(otu_table.ids(axis='observation'),
                              otu_map_otu_ids)

        # expected number of sequences in OTU table
        number_seqs_in_otu_table = sum(
            [v.sum() for v in otu_table.iter_data()])
        self.assertEqual(number_seqs_in_otu_table, 117)

        # One tax assignment per otu
        self.assertEqual(len(otu_table.metadata(axis='observation')), 3)

        # Check that the log file is created and has size > 0
        log_fp = glob(join(self.test_out, 'log*.txt'))[0]
        self.assertTrue(getsize(log_fp) > 0)
예제 #5
0
def main():
    option_parser, opts, args = parse_command_line_parameters(**script_info)

    verbose = opts.verbose

    input_fp = opts.input_fp
    reference_fp = opts.reference_fp
    taxonomy_fp = opts.taxonomy_fp
    output_dir = opts.output_dir
    verbose = opts.verbose
    print_only = opts.print_only
    assign_taxonomy = opts.assign_taxonomy

    parallel = opts.parallel
    # No longer checking that jobs_to_start > 2, but
    # commenting as we may change our minds about this.
    # if parallel: raise_error_on_parallel_unavailable()

    if opts.parameter_fp:
        try:
            parameter_f = open(opts.parameter_fp, 'U')
        except IOError:
            raise IOError(
                "Can't open parameters file (%s). Does it exist? Do you have read access?"
                % opts.parameter_fp)
        params = parse_qiime_parameters(parameter_f)
        parameter_f.close()
    else:
        params = parse_qiime_parameters([])
        # empty list returns empty defaultdict for now

    jobs_to_start = opts.jobs_to_start
    default_jobs_to_start = qiime_config['jobs_to_start']
    validate_and_set_jobs_to_start(params, jobs_to_start,
                                   default_jobs_to_start, parallel,
                                   option_parser)

    if print_only:
        command_handler = print_commands
    else:
        command_handler = call_commands_serially
        try:
            makedirs(output_dir)
        except OSError:
            if opts.force:
                pass
            else:
                option_parser.error(
                    "Output directory already exists. Please choose"
                    " a different directory, or force overwrite with -f.")

    if verbose:
        status_update_callback = print_to_stdout
    else:
        status_update_callback = no_status_updates

    run_pick_closed_reference_otus(
        input_fp,
        reference_fp,
        output_dir,
        taxonomy_fp,
        assign_taxonomy=assign_taxonomy,
        command_handler=command_handler,
        params=params,
        qiime_config=qiime_config,
        parallel=parallel,
        status_update_callback=status_update_callback)
예제 #6
0
    def test_run_pick_closed_reference_otus_rdp_tax_assign(self):
        """run_pick_closed_reference_otus with RDP tax assignment
        """
        self.params['assign_taxonomy']['assignment_method'] = 'rdp'
        self.params['assign_taxonomy']['id_to_taxonomy_fp'] = \
            self.test_data['refseqs_tax'][0]
        self.params['assign_taxonomy']['reference_seqs_fp'] = \
            self.test_data['refseqs'][0]

        run_pick_closed_reference_otus(
            self.test_data['seqs'][0],
            self.test_data['refseqs'][0],
            self.test_out,
            self.test_data['refseqs_tax'][0],
            call_commands_serially,
            self.params,
            self.qiime_config,
            assign_taxonomy=True,
            parallel=False,
            status_update_callback=no_status_updates)

        input_file_basename = splitext(split(self.test_data['seqs'][0])[1])[0]
        otu_map_fp = join(self.test_out, 'uclust_ref_picked_otus',
                          '%s_otus.txt' % input_file_basename)
        assigned_taxonomy_fp = join(self.test_out, 'rdp_assigned_taxonomy/'
                                    '%s_rep_set_tax_assignments.txt' %
                                    input_file_basename)
        expected_taxonomies = [['295053', 'k__Bacteria;p__Proteobacteria;c__Gammaproteobacteria;o__Enterobacteriales;f__Enterobacteriaceae;g__;s__', '1.000'],
                               ['879972', 'k__Bacteria;p__Firmicutes;c__Bacilli;o__Lactobacillales;f__Streptococcaceae;g__Streptococcus;s__', '1.000'],
                               ['42684', 'k__Bacteria;p__Proteobacteria;c__Betaproteobacteria;o__Neisseriales;f__Neisseriaceae;g__;s__', '1.000']]
        # Assigned taxonomies match expected taxonomies
        with open(assigned_taxonomy_fp, 'U') as taxonomy_f:
            for line in taxonomy_f:
                entry = line.strip().split('\t')
                self.assertTrue(entry in expected_taxonomies)

        otu_table_fp = join(self.test_out, 'otu_table.biom')
        otu_table = load_table(otu_table_fp)
        expected_sample_ids = ['f1', 'f2', 'f3', 'f4', 'p1', 'p2', 't1', 't2']
        self.assertItemsEqual(otu_table.ids(), expected_sample_ids)

        # Number of OTUs matches manually confirmed result
        otu_map_lines = list(open(otu_map_fp))
        num_otus = len(otu_map_lines)
        otu_map_otu_ids = [o.split()[0] for o in otu_map_lines]
        self.assertEqual(num_otus, 3)

        # parse the otu table
        otu_table = load_table(otu_table_fp)
        expected_sample_ids = ['f1', 'f2', 'f3', 'f4', 'p1', 'p2', 't1', 't2']
        # sample IDs are as expected
        self.assertItemsEqual(otu_table.ids(), expected_sample_ids)
        # otu ids are as expected
        self.assertItemsEqual(otu_table.ids(axis='observation'),
                              otu_map_otu_ids)

        # expected number of sequences in OTU table
        number_seqs_in_otu_table = sum([v.sum()
                                       for v in otu_table.iter_data()])
        self.assertEqual(number_seqs_in_otu_table, 117)

        # One tax assignment per otu
        self.assertEqual(len(otu_table.metadata(axis='observation')), 3)

        # Check that the log file is created and has size > 0
        log_fp = glob(join(self.test_out, 'log*.txt'))[0]
        self.assertTrue(getsize(log_fp) > 0)