コード例 #1
0
 def test_GembaseHMMReport_extract(self):
     system = System(self.cfg, "T2SS", 10)
     gene_name = "gspD"
     gene = Gene(self.cfg, gene_name, system, self.profile_registry)
     shutil.copy(
         os.path.join(self._data_dir, "hmm",
                      gene_name + self.cfg.res_search_suffix),
         self.cfg.working_dir)
     report_path = os.path.join(self.cfg.working_dir,
                                gene_name + self.cfg.res_search_suffix)
     report = GembaseHMMReport(gene, report_path, self.cfg)
     report.extract()
     self.assertEqual(len(report.hits), 6)
     #   gene, system,     hit_id,        hit_seq_length replicon_name, pos_hit, i_eval,          score,       profile_coverage, sequence_coverage, begin_match, end_match
     hits = [
         Hit(gene, system, "NC_xxxxx_xx_056141", 803, "NC_xxxxx_xx", 141,
             float(2e-236), float(779.2), float(1.000000),
             (741.0 - 104.0 + 1) / 803, 104, 741),
         Hit(gene, system, "PSAE001c01_006940", 803, "PSAE001c01", 68,
             float(1.2e-234), float(779.2), float(1.000000),
             (741.0 - 104.0 + 1) / 803, 104, 741),
         Hit(gene, system, "PSAE001c01_013980", 759, "PSAE001c01", 69,
             float(3.7e-76), float(255.8), float(1.000000),
             (736.0 - 105.0 + 1) / 759, 105, 736),
         Hit(gene, system, "PSAE001c01_017350", 600, "PSAE001c01", 70,
             float(3.2e-27), float(94.2), float(0.500000),
             (506.0 - 226.0 + 1) / 600, 226, 506),
         Hit(gene, system, "PSAE001c01_018920", 776, "PSAE001c01", 71,
             float(6.1e-183), float(608.4), float(1.000000),
             (606.0 - 48.0 + 1) / 776, 48, 606),
         Hit(gene, system, "PSAE001c01_031420", 658, "PSAE001c01", 73,
             float(1.8e-210), float(699.3), float(1.000000),
             (614.0 - 55.0 + 1) / 658, 55, 614)
     ]
     self.assertListEqual(hits, report.hits)
コード例 #2
0
    def test_save_extract(self):
        gene_name = "gspD"
        gene = CoreGene(self.model_location, gene_name, self.profile_factory)
        shutil.copy(
            self.find_data("hmm", gene_name + self.cfg.res_search_suffix()),
            self.cfg.working_dir())
        report_path = os.path.join(self.cfg.working_dir(),
                                   gene_name + self.cfg.res_search_suffix())
        report = GembaseHMMReport(gene, report_path, self.cfg)
        report.extract()
        report.save_extract()
        extract_filename = gene_name + self.cfg.res_extract_suffix()
        extract_path = os.path.join(self.cfg.working_dir(),
                                    self.cfg.hmmer_dir(), extract_filename)
        self.assertTrue(os.path.exists(extract_path))
        self.assertTrue(os.path.isfile(extract_path))

        hits = [
            CoreHit(gene, "NC_xxxxx_xx_056141", 803, "NC_xxxxx_xx", 141,
                    float(2e-236), float(779.2), float(1.000000),
                    (741.0 - 104.0 + 1) / 803, 104, 741),
            CoreHit(gene, "PSAE001c01_006940", 803, "PSAE001c01", 68,
                    float(1.2e-234), float(779.2), float(1.000000),
                    (741.0 - 104.0 + 1) / 803, 104, 741),
            CoreHit(gene, "PSAE001c01_013980", 759, "PSAE001c01", 69,
                    float(3.7e-76), float(255.8), float(1.000000),
                    (736.0 - 105.0 + 1) / 759, 105, 736),
            CoreHit(gene, "PSAE001c01_017350", 600, "PSAE001c01", 70,
                    float(3.2e-27), float(94.2), float(0.500000),
                    (506.0 - 226.0 + 1) / 600, 226, 506),
            CoreHit(gene, "PSAE001c01_018920", 776, "PSAE001c01", 71,
                    float(6.1e-183), float(608.4), float(1.000000),
                    (606.0 - 48.0 + 1) / 776, 48, 606),
            CoreHit(gene, "PSAE001c01_031420", 658, "PSAE001c01", 73,
                    float(1.8e-210), float(699.3), float(1.000000),
                    (614.0 - 55.0 + 1) / 658, 55, 614)
        ]

        expected_extract_path = os.path.join(self.cfg.working_dir(),
                                             'expected_extract')
        with open(expected_extract_path, 'w') as expected_extract:
            extract = """# gene: {name} extract from {path} hmm output
# profile length= {len_profile:d}
# i_evalue threshold= {i_evalue:.3f}
# coverage threshold= {cov:.3f}
# hit_id replicon_name position_hit hit_sequence_length gene_name gene_system i_eval score profile_coverage sequence_coverage begin end
""".format(name=gene.name,
            path=report_path,
            len_profile=len(gene.profile),
            i_evalue=self.cfg.i_evalue_sel(),
            cov=self.cfg.coverage_profile())
            expected_extract.write(extract)
            for h in hits:
                expected_extract.write(str(h))

        self.assertFileEqual(extract_path, expected_extract_path)
コード例 #3
0
ファイル: test_Report.py プロジェクト: fjossandon/macsyfinder
 def test_GembaseHMMReport_extract(self):
     system = System(self.cfg, "T2SS", 10)
     gene_name = "gspD"
     gene = Gene(self.cfg, gene_name, system, self.profile_registry)
     shutil.copy(os.path.join(self._data_dir, "hmm", gene_name + self.cfg.res_search_suffix), self.cfg.working_dir)
     report_path = os.path.join(self.cfg.working_dir, gene_name + self.cfg.res_search_suffix)
     report = GembaseHMMReport(gene, report_path, self.cfg)
     report.extract()
     self.assertEqual(len(report.hits), 5)
             #gene, system,     hit_id,        hit_seq_length replicon_name, pos_hit, i_eval,          score,       profile_coverage, sequence_coverage, begin_match, end_match
     hits=[ Hit(gene, system, "PSAE001c01_006940", 803,"PSAE001c01", 68, float(1.2e-234), float(779.2), float(1.000000), (741.0 - 104.0 + 1)/ 803, 104, 741),
            Hit(gene, system, "PSAE001c01_013980", 759,"PSAE001c01", 69, float(3.7e-76), float(255.8), float(1.000000), (736.0 - 105.0 + 1)/ 759, 105, 736),
            Hit(gene, system, "PSAE001c01_017350", 600,"PSAE001c01", 70, float(3.2e-27), float(94.2), float(0.500000), (506.0 - 226.0 + 1)/ 600,  226, 506),
            Hit(gene, system, "PSAE001c01_018920", 776,"PSAE001c01", 71, float(6.1e-183), float(608.4), float(1.000000), (606.0 - 48.0 + 1)/ 776, 48, 606),
            Hit(gene, system, "PSAE001c01_031420", 658,"PSAE001c01", 73, float(1.8e-210), float(699.3), float(1.000000), (614.0 - 55.0 + 1)/ 658, 55, 614)
     ]
     self.assertListEqual(hits, report.hits)
コード例 #4
0
    def test_best_hit(self):
        gene_name = 'gspD'
        c_gene = CoreGene(self.model_location, gene_name, self.profile_factory)

        shutil.copy(
            self.find_data("hmm", gene_name + self.cfg.res_search_suffix()),
            self.cfg.working_dir())
        report_path = os.path.join(self.cfg.working_dir(),
                                   gene_name + self.cfg.res_search_suffix())
        report = GembaseHMMReport(c_gene, report_path, self.cfg)
        self.assertIsNone(report.best_hit())
        report.extract()
        best_hit = report.best_hit()
        hit_expected = CoreHit(c_gene, "NC_xxxxx_xx_056141", 803,
                               "NC_xxxxx_xx", 141, float(2e-236), float(779.2),
                               float(1.000000), (741.0 - 104.0 + 1) / 803, 104,
                               741)
        self.assertEqual(hit_expected, best_hit)
コード例 #5
0
 def test_str(self):
     system = System(self.cfg, "T2SS", 10)
     gene_name = "gspD"
     gene = Gene(self.cfg, gene_name, system, self.profile_registry)
     shutil.copy(
         os.path.join(self._data_dir, "hmm",
                      gene_name + self.cfg.res_search_suffix),
         self.cfg.working_dir)
     report_path = os.path.join(self.cfg.working_dir,
                                gene_name + self.cfg.res_search_suffix)
     report = GembaseHMMReport(gene, report_path, self.cfg)
     report.extract()
     hits = [
         Hit(gene, system, "NC_xxxxx_xx_056141", 803, "NC_xxxxx_xx", 141,
             float(2e-236), float(779.2), float(1.000000),
             (741.0 - 104.0 + 1) / 803, 104, 741),
         Hit(gene, system, "PSAE001c01_006940", 803, "PSAE001c01", 68,
             float(1.2e-234), float(779.2), float(1.000000),
             (741.0 - 104.0 + 1) / 803, 104, 741),
         Hit(gene, system, "PSAE001c01_013980", 759, "PSAE001c01", 69,
             float(3.7e-76), float(255.8), float(1.000000),
             (736.0 - 105.0 + 1) / 759, 105, 736),
         Hit(gene, system, "PSAE001c01_017350", 600, "PSAE001c01", 70,
             float(3.2e-27), float(94.2), float(0.500000),
             (506.0 - 226.0 + 1) / 600, 226, 506),
         Hit(gene, system, "PSAE001c01_018920", 776, "PSAE001c01", 71,
             float(6.1e-183), float(608.4), float(1.000000),
             (606.0 - 48.0 + 1) / 776, 48, 606),
         Hit(gene, system, "PSAE001c01_031420", 658, "PSAE001c01", 73,
             float(1.8e-210), float(699.3), float(1.000000),
             (614.0 - 55.0 + 1) / 658, 55, 614)
     ]
     s = ""
     s = "# gene: {0} extract from {1} hmm output\n".format(
         gene.name, report_path)
     s += "# profile length= {0:d}\n".format(len(gene.profile))
     s += "# i_evalue threshold= {0:.3f}\n".format(self.cfg.i_evalue_sel)
     s += "# coverage threshold= {0:.3f}\n".format(
         self.cfg.coverage_profile)
     s += "# hit_id replicon_name position_hit hit_sequence_length gene_name gene_system i_eval score profile_coverage sequence_coverage begin end\n"
     for h in hits:
         s += str(h)
     self.assertEqual(str(report), s)
コード例 #6
0
    def test_str(self):
        gene_name = 'gspD'
        c_gene = CoreGene(self.model_location, gene_name, self.profile_factory)

        shutil.copy(
            self.find_data("hmm", gene_name + self.cfg.res_search_suffix()),
            self.cfg.working_dir())
        report_path = os.path.join(self.cfg.working_dir(),
                                   gene_name + self.cfg.res_search_suffix())
        report = GembaseHMMReport(c_gene, report_path, self.cfg)
        report.extract()

        hits = [
            CoreHit(c_gene, "NC_xxxxx_xx_056141", 803, "NC_xxxxx_xx", 141,
                    float(2e-236), float(779.2), float(1.000000),
                    (741.0 - 104.0 + 1) / 803, 104, 741),
            CoreHit(c_gene, "PSAE001c01_006940", 803, "PSAE001c01", 68,
                    float(1.2e-234), float(779.2), float(1.000000),
                    (741.0 - 104.0 + 1) / 803, 104, 741),
            CoreHit(c_gene, "PSAE001c01_013980", 759, "PSAE001c01", 69,
                    float(3.7e-76), float(255.8), float(1.000000),
                    (736.0 - 105.0 + 1) / 759, 105, 736),
            CoreHit(c_gene, "PSAE001c01_017350", 600, "PSAE001c01", 70,
                    float(3.2e-27), float(94.2), float(0.500000),
                    (506.0 - 226.0 + 1) / 600, 226, 506),
            CoreHit(c_gene, "PSAE001c01_018920", 776, "PSAE001c01", 71,
                    float(6.1e-183), float(608.4), float(1.000000),
                    (606.0 - 48.0 + 1) / 776, 48, 606),
            CoreHit(c_gene, "PSAE001c01_031420", 658, "PSAE001c01", 73,
                    float(1.8e-210), float(699.3), float(1.000000),
                    (614.0 - 55.0 + 1) / 658, 55, 614)
        ]

        s = f"# gene: {c_gene.name} extract from {report_path} hmm output\n"
        s += f"# profile length= {len(c_gene.profile):d}\n"
        s += f"# i_evalue threshold= {self.cfg.i_evalue_sel():.3f}\n"
        s += f"# coverage threshold= {self.cfg.coverage_profile():.3f}\n"
        s += "# hit_id replicon_name position_hit hit_sequence_length gene_name gene_system i_eval score " \
             "profile_coverage sequence_coverage begin end\n"
        for h in hits:
            s += str(h)
        self.assertMultiLineEqual(str(report), s)
コード例 #7
0
    def test_extract(self):
        gene_name = "gspD"
        c_gene = CoreGene(self.model_location, gene_name, self.profile_factory)
        shutil.copy(
            self.find_data("hmm", gene_name + self.cfg.res_search_suffix()),
            self.cfg.working_dir())
        report_path = os.path.join(self.cfg.working_dir(),
                                   gene_name + self.cfg.res_search_suffix())
        report = GembaseHMMReport(c_gene, report_path, self.cfg)
        report.extract()
        self.assertEqual(len(report.hits), 6)
        #           gene, model,     hit_id,         hit_seq_ length   replicon_name, pos_hit, i_eval,
        #           score,       profile_coverage, sequence_coverage, begin_match, end_match
        hits = [
            CoreHit(c_gene, "NC_xxxxx_xx_056141", 803, "NC_xxxxx_xx", 141,
                    float(2e-236), float(779.2), float(1.000000),
                    (741.0 - 104.0 + 1) / 803, 104, 741),
            CoreHit(c_gene, "PSAE001c01_006940", 803, "PSAE001c01", 68,
                    float(1.2e-234), float(779.2), float(1.000000),
                    (741.0 - 104.0 + 1) / 803, 104, 741),
            CoreHit(c_gene, "PSAE001c01_013980", 759, "PSAE001c01", 69,
                    float(3.7e-76), float(255.8), float(1.000000),
                    (736.0 - 105.0 + 1) / 759, 105, 736),
            CoreHit(c_gene, "PSAE001c01_017350", 600, "PSAE001c01", 70,
                    float(3.2e-27), float(94.2), float(0.500000),
                    (506.0 - 226.0 + 1) / 600, 226, 506),
            CoreHit(c_gene, "PSAE001c01_018920", 776, "PSAE001c01", 71,
                    float(6.1e-183), float(608.4), float(1.000000),
                    (606.0 - 48.0 + 1) / 776, 48, 606),
            CoreHit(c_gene, "PSAE001c01_031420", 658, "PSAE001c01", 73,
                    float(1.8e-210), float(699.3), float(1.000000),
                    (614.0 - 55.0 + 1) / 658, 55, 614)
        ]
        self.assertListEqual(hits, report.hits)

        report = GembaseHMMReport(c_gene, report_path, self.cfg)
        report.hits = hits
        self.assertIsNone(report.extract())
コード例 #8
0
ファイル: test_Report.py プロジェクト: fjossandon/macsyfinder
 def test_str(self):
     system = System(self.cfg, "T2SS", 10)
     gene_name = "gspD"
     gene = Gene(self.cfg, gene_name, system, self.profile_registry)
     shutil.copy(os.path.join(self._data_dir, "hmm", gene_name + self.cfg.res_search_suffix), self.cfg.working_dir)
     report_path = os.path.join(self.cfg.working_dir, gene_name + self.cfg.res_search_suffix)
     report = GembaseHMMReport(gene, report_path, self.cfg)
     report.extract()
     hits=[ Hit(gene, system, "PSAE001c01_006940", 803,"PSAE001c01", 68, float(1.2e-234), float(779.2), float(1.000000), (741.0 - 104.0 + 1)/ 803, 104, 741),
            Hit(gene, system, "PSAE001c01_013980", 759,"PSAE001c01", 69, float(3.7e-76), float(255.8), float(1.000000), (736.0 - 105.0 + 1)/ 759, 105, 736),
            Hit(gene, system, "PSAE001c01_017350", 600,"PSAE001c01", 70, float(3.2e-27), float(94.2), float(0.500000), (506.0 - 226.0 + 1)/ 600,  226, 506),
            Hit(gene, system, "PSAE001c01_018920", 776,"PSAE001c01", 71, float(6.1e-183), float(608.4), float(1.000000), (606.0 - 48.0 + 1)/ 776, 48, 606),
            Hit(gene, system, "PSAE001c01_031420", 658,"PSAE001c01", 73, float(1.8e-210), float(699.3), float(1.000000), (614.0 - 55.0 + 1)/ 658, 55, 614)
     ]
     s = ""
     s = "# gene: %s extract from %s hmm output\n" % (gene.name, report_path)
     s += "# profile length= %d\n" % len(gene.profile)
     s += "# i_evalue threshold= %f\n" % self.cfg.i_evalue_sel
     s += "# coverage threshold= %f\n" % self.cfg.coverage_profile
     s += "# hit_id replicon_name position_hit hit_sequence_length gene_name gene_system i_eval score profile_coverage sequence_coverage begin end\n"
     for h in hits:
         s += str(h)
     self.assertEqual(str(report), s)