def test_parallel_database_mapper_usearch(self): """ parallel_database_mapper_usearch functions as expected """ params = { 'refseqs_fp': self.refseqs1_fp, 'min_percent_id': 0.97, 'evalue': 1e-10, 'max_accepts': 1, 'max_rejects': 32, 'queryalnfract': 0.35, 'targetalnfract': 0.0, 'observation_metadata_fp': None } app = ParallelDatabaseMapperUsearch() r = app(self.inseqs1_fp, self.test_out, params, job_prefix='PTEST', poll_directly=True, suppress_submit_jobs=False) observation_map_fp = glob(join(self.test_out, 'observation_map.txt'))[0] omap = parse_otu_map(open(observation_map_fp, 'U')) self.assertEqual(len(omap[0]), 3) self.assertItemsEqual( omap[1], ['eco:b0015', 'eco:b0122', 'eco:b0015:duplicate']) self.assertItemsEqual(omap[2], ['eco:b0015-pr', 'eco:b0122-pr'])
def main(): option_parser, opts, args =\ parse_command_line_parameters(**script_info) params = eval(str(opts)) params['save_uc_files'] = True if opts.assignment_method == 'usearch': parallel_runner = ParallelDatabaseMapperUsearch( cluster_jobs_fp=opts.cluster_jobs_fp, jobs_to_start=opts.jobs_to_start, retain_temp_files=opts.retain_temp_files, suppress_polling=opts.suppress_polling, seconds_to_sleep=opts.seconds_to_sleep) parallel_runner(opts.input_seqs_filepath, opts.output_dir, params, job_prefix=opts.job_prefix, poll_directly=opts.poll_directly, suppress_submit_jobs=opts.suppress_submit_jobs) elif opts.assignment_method == 'blat': parallel_runner = ParallelDatabaseMapperBlat( cluster_jobs_fp=opts.cluster_jobs_fp, jobs_to_start=opts.jobs_to_start, retain_temp_files=opts.retain_temp_files, suppress_polling=opts.suppress_polling, seconds_to_sleep=opts.seconds_to_sleep) parallel_runner(opts.input_seqs_filepath, opts.output_dir, params, job_prefix=opts.job_prefix, poll_directly=opts.poll_directly, suppress_submit_jobs=opts.suppress_submit_jobs) elif opts.assignment_method == 'bwa-short': # cast max_diff to an int if it's being passed as an int if params['max_diff'] is not None and params['max_diff'] > 1.0: params['max_diff'] = int(params['max_diff']) parallel_runner = ParallelDatabaseMapperBwaShort( cluster_jobs_fp=opts.cluster_jobs_fp, jobs_to_start=opts.jobs_to_start, retain_temp_files=opts.retain_temp_files, suppress_polling=opts.suppress_polling, seconds_to_sleep=opts.seconds_to_sleep) parallel_runner(opts.input_seqs_filepath, opts.output_dir, params, job_prefix=opts.job_prefix, poll_directly=opts.poll_directly, suppress_submit_jobs=opts.suppress_submit_jobs) else: # other -- shouldn't be able to get here as a KeyError would have # been raised earlier raise ValueError( "Unknown read mapping method: %s" % opts.assignment_method)