def test_parallel_rdp_taxonomy_assigner(self): """ parallel_rdp_taxonomy_assigner functions as expected """ params = { 'id_to_taxonomy_fp': self.id_to_taxonomy_file.name, 'rdp_max_memory': 1500, 'rdp_classifier_fp': getenv('RDP_JAR_PATH'), 'confidence': 0.80, 'reference_seqs_fp': self.reference_seqs_file.name } app = ParallelRdpTaxonomyAssigner() r = app(self.tmp_seq_filepath, self.test_out, params, job_prefix='RDPTEST', poll_directly=True, suppress_submit_jobs=False) results = fields_to_dict( open(glob(join(self.test_out, '*_tax_assignments.txt'))[0], 'U')) # some basic sanity checks: we should get the same number of sequences # as our input with the same seq IDs. We should have a taxonomy string # and a confidence value for each seq as well. self.assertEqual(len(results), 2) self.assertEqual(len(results['X67228']), 2) self.assertEqual(len(results['EF503697']), 2)
def main(): option_parser, opts, args = parse_command_line_parameters(**script_info) # create dict of command-line options params = eval(str(opts)) parallel_runner = ParallelRdpTaxonomyAssigner( cluster_jobs_fp=opts.cluster_jobs_fp, jobs_to_start=opts.jobs_to_start, retain_temp_files=opts.retain_temp_files, suppress_polling=opts.suppress_polling, seconds_to_sleep=opts.seconds_to_sleep) parallel_runner(opts.input_fasta_fp, opts.output_dir, params, job_prefix=opts.job_prefix, poll_directly=opts.poll_directly, suppress_submit_jobs=opts.suppress_submit_jobs)