예제 #1
0
    def test_parallel_rdp_taxonomy_assigner(self):
        """ parallel_rdp_taxonomy_assigner functions as expected """

        params = {
            'id_to_taxonomy_fp': self.id_to_taxonomy_file.name,
            'rdp_max_memory': 1500,
            'rdp_classifier_fp': getenv('RDP_JAR_PATH'),
            'confidence': 0.80,
            'reference_seqs_fp': self.reference_seqs_file.name
        }

        app = ParallelRdpTaxonomyAssigner()
        r = app(self.tmp_seq_filepath,
                self.test_out,
                params,
                job_prefix='RDPTEST',
                poll_directly=True,
                suppress_submit_jobs=False)
        results = fields_to_dict(
            open(glob(join(self.test_out, '*_tax_assignments.txt'))[0], 'U'))
        # some basic sanity checks: we should get the same number of sequences
        # as our input with the same seq IDs. We should have a taxonomy string
        # and a confidence value for each seq as well.
        self.assertEqual(len(results), 2)
        self.assertEqual(len(results['X67228']), 2)
        self.assertEqual(len(results['EF503697']), 2)
예제 #2
0
def main():
    option_parser, opts, args = parse_command_line_parameters(**script_info)

    # create dict of command-line options
    params = eval(str(opts))

    parallel_runner = ParallelRdpTaxonomyAssigner(
        cluster_jobs_fp=opts.cluster_jobs_fp,
        jobs_to_start=opts.jobs_to_start,
        retain_temp_files=opts.retain_temp_files,
        suppress_polling=opts.suppress_polling,
        seconds_to_sleep=opts.seconds_to_sleep)

    parallel_runner(opts.input_fasta_fp,
                    opts.output_dir,
                    params,
                    job_prefix=opts.job_prefix,
                    poll_directly=opts.poll_directly,
                    suppress_submit_jobs=opts.suppress_submit_jobs)