Beispiel #1
0
    def test_parallel_database_mapper_usearch(self):
        """ parallel_database_mapper_usearch functions as expected """

        params = {
            'refseqs_fp': self.refseqs1_fp,
            'min_percent_id': 0.97,
            'evalue': 1e-10,
            'max_accepts': 1,
            'max_rejects': 32,
            'queryalnfract': 0.35,
            'targetalnfract': 0.0,
            'observation_metadata_fp': None
        }

        app = ParallelDatabaseMapperUsearch()
        r = app(self.inseqs1_fp,
                self.test_out,
                params,
                job_prefix='PTEST',
                poll_directly=True,
                suppress_submit_jobs=False)
        observation_map_fp = glob(join(self.test_out,
                                       'observation_map.txt'))[0]
        omap = parse_otu_map(open(observation_map_fp, 'U'))
        self.assertEqual(len(omap[0]), 3)
        self.assertItemsEqual(
            omap[1], ['eco:b0015', 'eco:b0122', 'eco:b0015:duplicate'])
        self.assertItemsEqual(omap[2], ['eco:b0015-pr', 'eco:b0122-pr'])
def main():
    option_parser, opts, args =\
        parse_command_line_parameters(**script_info)
    params = eval(str(opts))
    params['save_uc_files'] = True

    if opts.assignment_method == 'usearch':
        parallel_runner = ParallelDatabaseMapperUsearch(
            cluster_jobs_fp=opts.cluster_jobs_fp,
            jobs_to_start=opts.jobs_to_start,
            retain_temp_files=opts.retain_temp_files,
            suppress_polling=opts.suppress_polling,
            seconds_to_sleep=opts.seconds_to_sleep)
        parallel_runner(opts.input_seqs_filepath,
                        opts.output_dir,
                        params,
                        job_prefix=opts.job_prefix,
                        poll_directly=opts.poll_directly,
                        suppress_submit_jobs=opts.suppress_submit_jobs)
    elif opts.assignment_method == 'blat':
        parallel_runner = ParallelDatabaseMapperBlat(
            cluster_jobs_fp=opts.cluster_jobs_fp,
            jobs_to_start=opts.jobs_to_start,
            retain_temp_files=opts.retain_temp_files,
            suppress_polling=opts.suppress_polling,
            seconds_to_sleep=opts.seconds_to_sleep)
        parallel_runner(opts.input_seqs_filepath,
                        opts.output_dir,
                        params,
                        job_prefix=opts.job_prefix,
                        poll_directly=opts.poll_directly,
                        suppress_submit_jobs=opts.suppress_submit_jobs)
    elif opts.assignment_method == 'bwa-short':
        # cast max_diff to an int if it's being passed as an int
        if params['max_diff'] is not None and params['max_diff'] > 1.0:
            params['max_diff'] = int(params['max_diff'])
        parallel_runner = ParallelDatabaseMapperBwaShort(
            cluster_jobs_fp=opts.cluster_jobs_fp,
            jobs_to_start=opts.jobs_to_start,
            retain_temp_files=opts.retain_temp_files,
            suppress_polling=opts.suppress_polling,
            seconds_to_sleep=opts.seconds_to_sleep)
        parallel_runner(opts.input_seqs_filepath,
                        opts.output_dir,
                        params,
                        job_prefix=opts.job_prefix,
                        poll_directly=opts.poll_directly,
                        suppress_submit_jobs=opts.suppress_submit_jobs)
    else:
        # other -- shouldn't be able to get here as a KeyError would have
        # been raised earlier
        raise ValueError(
            "Unknown read mapping method: %s" %
            opts.assignment_method)