コード例 #1
0
    def test_save_extract(self):
        gene_name = "gspD"
        gene = CoreGene(self.model_location, gene_name, self.profile_factory)
        shutil.copy(
            self.find_data("hmm", gene_name + self.cfg.res_search_suffix()),
            self.cfg.working_dir())
        report_path = os.path.join(self.cfg.working_dir(),
                                   gene_name + self.cfg.res_search_suffix())
        report = GembaseHMMReport(gene, report_path, self.cfg)
        report.extract()
        report.save_extract()
        extract_filename = gene_name + self.cfg.res_extract_suffix()
        extract_path = os.path.join(self.cfg.working_dir(),
                                    self.cfg.hmmer_dir(), extract_filename)
        self.assertTrue(os.path.exists(extract_path))
        self.assertTrue(os.path.isfile(extract_path))

        hits = [
            CoreHit(gene, "NC_xxxxx_xx_056141", 803, "NC_xxxxx_xx", 141,
                    float(2e-236), float(779.2), float(1.000000),
                    (741.0 - 104.0 + 1) / 803, 104, 741),
            CoreHit(gene, "PSAE001c01_006940", 803, "PSAE001c01", 68,
                    float(1.2e-234), float(779.2), float(1.000000),
                    (741.0 - 104.0 + 1) / 803, 104, 741),
            CoreHit(gene, "PSAE001c01_013980", 759, "PSAE001c01", 69,
                    float(3.7e-76), float(255.8), float(1.000000),
                    (736.0 - 105.0 + 1) / 759, 105, 736),
            CoreHit(gene, "PSAE001c01_017350", 600, "PSAE001c01", 70,
                    float(3.2e-27), float(94.2), float(0.500000),
                    (506.0 - 226.0 + 1) / 600, 226, 506),
            CoreHit(gene, "PSAE001c01_018920", 776, "PSAE001c01", 71,
                    float(6.1e-183), float(608.4), float(1.000000),
                    (606.0 - 48.0 + 1) / 776, 48, 606),
            CoreHit(gene, "PSAE001c01_031420", 658, "PSAE001c01", 73,
                    float(1.8e-210), float(699.3), float(1.000000),
                    (614.0 - 55.0 + 1) / 658, 55, 614)
        ]

        expected_extract_path = os.path.join(self.cfg.working_dir(),
                                             'expected_extract')
        with open(expected_extract_path, 'w') as expected_extract:
            extract = """# gene: {name} extract from {path} hmm output
# profile length= {len_profile:d}
# i_evalue threshold= {i_evalue:.3f}
# coverage threshold= {cov:.3f}
# hit_id replicon_name position_hit hit_sequence_length gene_name gene_system i_eval score profile_coverage sequence_coverage begin end
""".format(name=gene.name,
            path=report_path,
            len_profile=len(gene.profile),
            i_evalue=self.cfg.i_evalue_sel(),
            cov=self.cfg.coverage_profile())
            expected_extract.write(extract)
            for h in hits:
                expected_extract.write(str(h))

        self.assertFileEqual(extract_path, expected_extract_path)
コード例 #2
0
    def test_GembaseHMMReport_extract_concurent(self):
        system = System(self.cfg, "T2SS", 10)
        gene_name = "gspD"
        gene = Gene(self.cfg, gene_name, system, self.profile_registry)
        shutil.copy(
            os.path.join(self._data_dir, "hmm",
                         gene_name + self.cfg.res_search_suffix),
            self.cfg.working_dir)
        report_path = os.path.join(self.cfg.working_dir,
                                   gene_name + self.cfg.res_search_suffix)
        reports = []
        for i in range(5):
            report = GembaseHMMReport(gene, report_path, self.cfg)
            reports.append(report)

        import threading

        def worker(report):
            report.extract()

        for report in reports:
            t = threading.Thread(target=worker, args=(report, ))
            t.start()
        main_thread = threading.currentThread()
        for t in threading.enumerate():
            if t is main_thread:
                continue
        t.join()

        #gene, system,     hit_id,        hit_seq_length replicon_name, pos_hit, i_eval,          score,       profile_coverage, sequence_coverage, begin_match, end_match
        hits = [
            Hit(gene, system, "NC_xxxxx_xx_056141", 803, "NC_xxxxx_xx", 141,
                float(2e-236), float(779.2), float(1.000000),
                (741.0 - 104.0 + 1) / 803, 104, 741),
            Hit(gene, system, "PSAE001c01_006940", 803, "PSAE001c01", 68,
                float(1.2e-234), float(779.2), float(1.000000),
                (741.0 - 104.0 + 1) / 803, 104, 741),
            Hit(gene, system, "PSAE001c01_013980", 759, "PSAE001c01", 69,
                float(3.7e-76), float(255.8), float(1.000000),
                (736.0 - 105.0 + 1) / 759, 105, 736),
            Hit(gene, system, "PSAE001c01_017350", 600, "PSAE001c01", 70,
                float(3.2e-27), float(94.2), float(0.500000),
                (506.0 - 226.0 + 1) / 600, 226, 506),
            Hit(gene, system, "PSAE001c01_018920", 776, "PSAE001c01", 71,
                float(6.1e-183), float(608.4), float(1.000000),
                (606.0 - 48.0 + 1) / 776, 48, 606),
            Hit(gene, system, "PSAE001c01_031420", 658, "PSAE001c01", 73,
                float(1.8e-210), float(699.3), float(1.000000),
                (614.0 - 55.0 + 1) / 658, 55, 614)
        ]
        for report in reports:
            report.save_extract()
            self.assertEqual(len(report.hits), len(hits))
            self.assertListEqual(report.hits, hits)
コード例 #3
0
ファイル: test_Report.py プロジェクト: fjossandon/macsyfinder
    def test_GembaseHMMReport_extract_concurent(self):
        system = System(self.cfg, "T2SS", 10)
        gene_name = "gspD"
        gene = Gene(self.cfg, gene_name, system, self.profile_registry)
        shutil.copy(os.path.join(self._data_dir, "hmm", gene_name + self.cfg.res_search_suffix), self.cfg.working_dir)
        report_path = os.path.join(self.cfg.working_dir, gene_name + self.cfg.res_search_suffix)
        reports = []
        for i in range(5):
            report = GembaseHMMReport(gene, report_path, self.cfg)
            reports.append(report)
 
        import threading
 
        def worker(report):
            report.extract()
 
        for report in reports:
            t = threading.Thread(target = worker, args = (report,))
            t.start()
        main_thread = threading.currentThread()
        for t in threading.enumerate():
            if t is main_thread:
                continue
        t.join()
 
                        #gene, system,     hit_id,        hit_seq_length replicon_name, pos_hit, i_eval,          score,       profile_coverage, sequence_coverage, begin_match, end_match
        hits=[ Hit(gene, system, "PSAE001c01_006940", 803,"PSAE001c01", 68, float(1.2e-234), float(779.2), float(1.000000), (741.0 - 104.0 + 1)/ 803, 104, 741),
               Hit(gene, system, "PSAE001c01_013980", 759,"PSAE001c01", 69, float(3.7e-76), float(255.8), float(1.000000), (736.0 - 105.0 + 1)/ 759, 105, 736),
               Hit(gene, system, "PSAE001c01_017350", 600,"PSAE001c01", 70, float(3.2e-27), float(94.2), float(0.500000), (506.0 - 226.0 + 1)/ 600,  226, 506),
               Hit(gene, system, "PSAE001c01_018920", 776,"PSAE001c01", 71, float(6.1e-183), float(608.4), float(1.000000), (606.0 - 48.0 + 1)/ 776, 48, 606),
               Hit(gene, system, "PSAE001c01_031420", 658,"PSAE001c01", 73, float(1.8e-210), float(699.3), float(1.000000), (614.0 - 55.0 + 1)/ 658, 55, 614)
        ]
        for report in reports:
            report.save_extract()
            self.assertEqual(len(report.hits), len(hits))
            self.assertListEqual(hits, report.hits)