Ejemplo n.º 1
0
    def test_nisin_complete(self):
        with TemporaryDirectory() as output_dir:
            args = [
                "--minimal", "--enable-tta", "--tta-threshold", "0",
                "--output-dir", output_dir,
                helpers.get_path_to_nisin_genbank()
            ]
            options = build_config(args,
                                   isolated=True,
                                   modules=antismash.get_all_modules())
            antismash.run_antismash(helpers.get_path_to_nisin_genbank(),
                                    options)

            # regen the results
            update_config(
                {"reuse_results": os.path.join(output_dir, "nisin.json")})
            prior_results = read_data(None, options)
            record = prior_results.records[0]
            results = prior_results.results[0]
            tta_results = tta.regenerate_previous_results(
                results.get("antismash.modules.tta"), record, options)
            assert isinstance(tta_results, tta.TTAResults)
            assert len(tta_results.features) == 174

            # raise the threshold above the gc_content and ensure regenned has no hits
            update_config({"tta_threshold": 0.65})
            tta_results = tta.regenerate_previous_results(
                results.get("antismash.modules.tta"), record, options)
            assert isinstance(tta_results, tta.TTAResults)
            assert not tta_results.features
 def test_trim(self):
     nisin_path = helpers.get_path_to_nisin_genbank()
     records = record_processing.parse_input_sequence(nisin_path, start=10, end=5000)
     assert len(records) == 1
     assert isinstance(records[0], Record)
     assert len(records[0].get_cds_features()) == 2
     assert len(records[0].seq) == 4990
Ejemplo n.º 3
0
    def test_nisin_minimal(self):
        # make sure the output directory isn't filled
        out_dir = self.default_options.output_dir
        assert not list(glob.glob(os.path.join(out_dir, "*")))

        # die with neither inputs provided
        with self.assertRaisesRegex(
                ValueError, "No sequence file or prior results to read"):
            run_antismash(None, self.default_options)

        # make sure no files created
        assert not list(glob.glob(os.path.join(out_dir, "*")))

        # do a normal run
        run_antismash(get_path_to_nisin_genbank(), self.default_options)
        self.check_output_files()

        # remove html file and make sure it's recreated
        os.unlink(os.path.join(self.default_options.output_dir, "index.html"))
        update_config({
            "reuse_results":
            os.path.join(self.default_options.output_dir, "nisin.json")
        })
        run_antismash(None, self.default_options)
        self.check_output_files()
Ejemplo n.º 4
0
    def test_add_to_record(self):
        nisin = helpers.get_path_to_nisin_genbank()
        record = record_processing.parse_input_sequence(nisin)[0]
        assert not record.get_pfam_domains()

        # add a test PFAM
        pfam = PFAMDomain(FeatureLocation(2, 5),
                          description="test",
                          protein_start=5,
                          protein_end=10,
                          identifier="PF00005",
                          domain="PF00005",
                          tool="test")
        pfam.domain_id = "test"
        record.add_pfam_domain(pfam)
        assert len(record.get_pfam_domains()) == 1

        # run pfam2go and add the results
        results = pfam2go.run_on_record(record, None, self.options)
        assert pfam in results.pfam_domains_with_gos

        assert not pfam.gene_ontologies
        results.add_to_record(record)
        assert pfam.gene_ontologies

        # check the contents of the annotation
        for domain in record.get_pfam_domains():
            assert domain.gene_ontologies
            assert sorted(domain.gene_ontologies.ids) == sorted(
                results.get_all_gos(domain))
Ejemplo n.º 5
0
 def test_missing_locations_caught(self):
     rec = list(Bio.SeqIO.parse(get_path_to_nisin_genbank(), "genbank"))[0]
     Record.from_biopython(rec, taxon="bacteria")
     rec.features.append(SeqFeature(None, type="broken"))
     with self.assertRaisesRegex(SecmetInvalidInputError,
                                 "missing or invalid location"):
         Record.from_biopython(rec, taxon="bacteria")
Ejemplo n.º 6
0
    def test_record_conversion_from_biopython(self):
        before = list(Bio.SeqIO.parse(helpers.get_path_to_nisin_genbank(), "genbank"))[0]
        # sort notes, because direct comparisons otherwise are awful
        for feature in before.features:
            if "note" in feature.qualifiers:
                feature.qualifiers["note"] = sorted(feature.qualifiers["note"])
        before_features = sorted(map(str, before.features))
        type_counts = defaultdict(lambda: 0)
        for feature in before.features:
            type_counts[feature.type] += 1
        record = Record.from_biopython(before, taxon="bacteria")
        after = record.to_biopython()

        # ensure new features are correct
        assert len(before_features) == len(after.features)
        for bef, aft in zip(before_features, sorted(map(str, after.features))):
            assert bef == aft

        # ensure we haven't changed the original record or feature list
        assert id(before) != id(after)
        assert id(before.features) != id(after.features)
        for i in range(len(before.features)):
            assert id(before.features[i]) != id(after.features[i])
        for bef, aft in zip(before_features, sorted(map(str, before.features))):
            assert bef == aft

        # ensure that the counts of each match
        assert type_counts["CDS"] == len(record.get_cds_features())
        assert type_counts["PFAM_domain"] == len(record.get_pfam_domains())
        assert type_counts["cluster"] == len(record.get_clusters())
        assert type_counts["aSDomain"] == len(record.get_antismash_domains())
Ejemplo n.º 7
0
 def test_nisin(self):
     nisin_path = helpers.get_path_to_nisin_genbank()
     records = record_processing.parse_input_sequence(nisin_path)
     assert len(records) == 1
     assert isinstance(records[0], Record)
     assert len(records[0].get_cds_features()) == 11
     assert len(records[0].seq) == 15016
Ejemplo n.º 8
0
 def test_record_to_json_and_back(self):
     filename = get_path_to_nisin_genbank()
     records = list(seqio.parse(open(filename), "genbank"))
     records = [
         Record.from_biopython(rec, taxon="bacteria") for rec in records
     ]
     rec_results = [{}, {}, {}]
     results = serialiser.AntismashResults(filename, records, rec_results,
                                           "dummy")
     json_handle = StringIO()
     results.write_to_file(json_handle)
     json_handle.seek(0)
     new_results = serialiser.AntismashResults.from_file(json_handle,
                                                         taxon="bacteria")
     assert results.to_json() == new_results.to_json()
     # check no records were lost
     assert len(new_results.records) == len(results.records)
     # check that the contents of the records is the same
     #  by converting to biopython and writing to genbanks
     original = self.create_data_stream(results.records)
     new = self.create_data_stream(new_results.records)
     oldvalue = original.getvalue()
     newvalue = new.getvalue()
     with TemporaryDirectory(change=True):
         open("old.json", "w").write(oldvalue)
         open("new.json", "w").write(newvalue)
         for oldline, newline in zip(oldvalue.split('\n'),
                                     newvalue.split('\n')):
             assert oldline == newline
    def test_reuse(self):
        nisin = helpers.get_path_to_nisin_genbank()
        record = record_processing.parse_input_sequence(nisin)[0]

        results = helpers.run_and_regenerate_results_for_module(
            nisin, cluster_hmmer, self.options)
        json = results.to_json()
        assert len(results.hits) == 24
        self.check_add_to_record(nisin, results)

        # test regeneration when thresholds are less restrictive
        new_score_threshold = self.original_min_score - .1
        self.set_min_score(new_score_threshold)
        new_results = cluster_hmmer.regenerate_previous_results(
            json, record, self.options)
        assert new_results is None
        self.set_min_score(self.original_min_score)

        new_evalue_threshold = self.original_max_evalue + .1
        self.set_max_evalue(new_evalue_threshold)
        new_results = cluster_hmmer.regenerate_previous_results(
            json, record, self.options)
        assert new_results is None
        self.set_max_evalue(self.original_max_evalue)

        # test regeneration when evalue threshold is more restrictive
        new_evalue_threshold = sorted(hit["evalue"]
                                      for hit in results.hits)[12]
        assert new_evalue_threshold < self.original_max_evalue
        new_hits = []
        for hit in results.hits:
            if hit["evalue"] <= new_evalue_threshold:
                new_hits.append(hit)
        new_hits.sort(key=lambda x: x["evalue"])
        assert len(new_hits) < 24

        self.set_max_evalue(new_evalue_threshold)
        new_results = cluster_hmmer.regenerate_previous_results(
            json, record, self.options)
        self.set_max_evalue(self.original_max_evalue)
        assert sorted(new_results.hits, key=lambda x: x["evalue"]) == new_hits
        self.check_add_to_record(nisin, results)

        # test regeneration when score threshold is more restrictive
        new_score_threshold = sorted(hit["score"] for hit in results.hits)[12]
        assert new_score_threshold > cluster_hmmer.MIN_SCORE
        new_hits = []
        for hit in results.hits:
            if hit["score"] >= new_score_threshold:
                new_hits.append(hit)
        new_hits.sort(key=lambda x: x["score"])
        assert len(new_hits) < 24

        self.set_min_score(new_score_threshold)
        new_results = cluster_hmmer.regenerate_previous_results(
            json, record, self.options)
        self.set_min_score(self.original_min_score)
        assert sorted(new_results.hits, key=lambda x: x["score"]) == new_hits
        self.check_add_to_record(nisin, results)
Ejemplo n.º 10
0
 def test_read_from_file(self):
     # very basic testing to ensure that the file IO itself functions
     recs = Record.from_genbank(get_path_to_nisin_genbank())
     assert len(recs) == 1
     rec = recs[0]
     assert rec.get_feature_count() == 24
     assert len(rec.get_cds_features()) == 11
     assert isinstance(rec.get_cds_by_name("nisB"), CDSFeature)
Ejemplo n.º 11
0
    def test_trees_complete(self):
        with TemporaryDirectory() as output_dir:
            args = [
                "--minimal", "--enable-genefunctions", "--smcog-trees",
                "--output-dir", output_dir,
                helpers.get_path_to_nisin_genbank()
            ]
            options = build_config(args,
                                   isolated=True,
                                   modules=antismash.get_all_modules())
            antismash.run_antismash(helpers.get_path_to_nisin_genbank(),
                                    options)

            with open(os.path.join(output_dir, "nisin.json")) as res_file:
                assert "antismash.modules.smcog_trees" in res_file.read()

            tree_files = list(
                glob.glob(os.path.join(output_dir, "smcogs", "*.png")))
            assert len(tree_files) == 7
            sample_tree = tree_files[0]

            # regen the results
            update_config(
                {"reuse_results": os.path.join(output_dir, "nisin.json")})
            prior_results = read_data(None, options)
            record = prior_results.records[0]
            results = prior_results.results[0]
            tree_results = results["antismash.modules.smcog_trees"]

            smcogs_results = smcog_trees.regenerate_previous_results(
                tree_results, record, options)
            assert len(smcogs_results.tree_images) == 7
            assert os.path.exists(sample_tree)

            os.unlink(sample_tree)
            assert not os.path.exists(sample_tree)

            # attempt to regen the results, the deleted tree image will prevent it
            prior_results = read_data(None, options)
            record = prior_results.records[0]
            results = prior_results.results[0]
            smcogs_results = smcog_trees.regenerate_previous_results(
                tree_results, record, options)
            assert smcogs_results is None
Ejemplo n.º 12
0
    def test_protein_sequences_caught(self):
        before = list(Bio.SeqIO.parse(get_path_to_nisin_genbank(), "genbank"))[0]

        # as a sanity check, make sure it's a seq and it functions as expected
        assert isinstance(before.seq, Seq)
        Record.from_biopython(before, taxon="bacteria")

        before.seq = Seq("AAAA", IUPACProtein())
        with self.assertRaisesRegex(ValueError, "protein records are not supported"):
            Record.from_biopython(before, taxon="bacteria")
Ejemplo n.º 13
0
 def test_nisin_end_to_end(self):
     # skip fimo being disabled for this, we already test the computational
     # side elsewhere
     if self.options.without_fimo:
         return
     nisin = helpers.get_path_to_nisin_genbank()
     result = helpers.run_and_regenerate_results_for_module(
         nisin, lanthipeptides, self.options)
     assert list(result.motifs_by_locus) == ["nisB"]
     prepeptide = result.motifs_by_locus["nisB"][0]
     self.assertAlmostEqual(3336.0, prepeptide.molecular_weight, delta=0.05)
Ejemplo n.º 14
0
    def test_dna_casing(self):
        before = list(Bio.SeqIO.parse(get_path_to_nisin_genbank(),
                                      "genbank"))[0]
        for molecule in ["DNA", "dna", "Dna"]:
            before.annotations["molecule_type"] = molecule
            Record.from_biopython(before, taxon="bacteria")

            before.annotations["molecule_type"] = molecule + "x"
            with self.assertRaisesRegex(ValueError,
                                        "records are not supported"):
                Record.from_biopython(before, taxon="bacteria")
Ejemplo n.º 15
0
    def test_nisin_complete(self):
        with TemporaryDirectory() as output_dir:
            args = [
                "--minimal", "--tta", "--output-dir", output_dir,
                helpers.get_path_to_nisin_genbank()
            ]
            options = build_config(args,
                                   isolated=True,
                                   modules=antismash.get_all_modules())
            antismash.run_antismash(helpers.get_path_to_nisin_genbank(),
                                    options)

            # regen the results
            update_config(
                {"reuse_results": os.path.join(output_dir, "nisin.json")})
            prior_results = read_data(None, options)
            record = prior_results.records[0]
            results = prior_results.results[0]
            regenned = regenerate_results_for_record(record, options, [tta],
                                                     results)
            tta_results = regenned["antismash.modules.tta"]
            assert isinstance(tta_results, tta.TTAResults)
            assert len(tta_results.features) == 174
Ejemplo n.º 16
0
    def test_subregions_annotated(self, _patched_rules):
        record = Record.from_genbank(get_path_to_nisin_genbank())[0]
        record.strip_antismash_annotations()
        assert not record.get_regions()
        assert not record.get_subregions()

        results = hmm_detection.run_on_record(record, None, self.options)
        assert not results.get_predicted_protoclusters()
        for cds in ["nisB", "nisC"]:
            assert not record.get_cds_by_name(cds).sec_met
        cutoff = record.get_cds_by_name("nisB").location.end + 10
        record.add_subregion(DummySubRegion(end=cutoff))

        results = hmm_detection.run_on_record(record, None, self.options)
        assert record.get_cds_by_name("nisB").sec_met
        assert not record.get_cds_by_name("nisC").sec_met

        # and then a json conversion, even without subregion added
        record = Record.from_genbank(get_path_to_nisin_genbank())[0]
        record.strip_antismash_annotations()
        raw = json.loads(json.dumps(results.to_json()))
        hmm_detection.regenerate_previous_results(raw, record, self.options)
        assert record.get_cds_by_name("nisB").sec_met
        assert not record.get_cds_by_name("nisC").sec_met
Ejemplo n.º 17
0
    def test_nisin_complete(self):
        with TemporaryDirectory() as output_dir:
            args = [
                "run_antismash.py", "--minimal", "--enable-lanthipeptides",
                "--output-dir", output_dir
            ]
            options = build_config(args,
                                   isolated=True,
                                   modules=antismash.get_all_modules())
            antismash.run_antismash(helpers.get_path_to_nisin_genbank(),
                                    options)

            # make sure the html_output section was tested
            with open(os.path.join(output_dir, "index.html")) as handle:
                content = handle.read()
                assert "nisA leader / core peptide" in content
    def test_minimum_length(self):
        nisin_path = helpers.get_path_to_nisin_genbank()
        records = record_processing.parse_input_sequence(nisin_path,
                                                         minimum_length=-16)
        assert len(records) == 1

        records = record_processing.parse_input_sequence(nisin_path,
                                                         minimum_length=15016)
        assert len(records) == 1

        with self.assertRaisesRegex(AntismashInputError, "no valid records found"):
            record_processing.parse_input_sequence(nisin_path, minimum_length=15017)

        for bad_len in [5.6, None, "5"]:
            with self.assertRaisesRegex(TypeError, "minimum_length must be an int"):
                record_processing.parse_input_sequence(path, minimum_length=bad_len)
Ejemplo n.º 19
0
    def test_reuse(self):
        nisin = helpers.get_path_to_nisin_genbank()
        results = helpers.run_and_regenerate_results_for_module(
            nisin, pfam2go, self.options)
        #  are the expected go ids for pfams found/no wrong ids for pfams?
        expected_pfams_and_gos_with_descs = {
            "PF00005": {
                "GO:0005524": "ATP binding",
                "GO:0016887": "ATPase activity"
            },
            "PF00072": {
                "GO:0000160": "phosphorelay signal transduction system"
            },
            "PF00486": {
                "GO:0003677": "DNA binding",
                "GO:0000160": "phosphorelay signal transduction system",
                "GO:0006355": "regulation of transcription, DNA-templated"
            }
        }
        expected_pfams_found = set()
        for pfam, all_ontologies in results.pfam_domains_with_gos.items():
            pfam_ids_without_versions = [
                pfam_id.partition(".")[0] for pfam_id in pfam.db_xref
            ]
            # make sure the Pfams without gos aren't in the results
            assert "PF05147" not in pfam_ids_without_versions and "PF04738" not in pfam_ids_without_versions
            for ontologies in all_ontologies:
                # make sure GeneOntologies' pfam id actually is one found in the domain's ids
                assert ontologies.pfam in pfam_ids_without_versions
                # did it find the right amount of GO IDs for the sample Pfams, and did it find the right ones?
                if ontologies.pfam in expected_pfams_and_gos_with_descs:
                    expected_pfams_found.add(ontologies.pfam)
                    go_ids = [
                        str(go_entry) for go_entry in ontologies.go_entries
                    ]
                    assert len(go_ids) == len(
                        expected_pfams_and_gos_with_descs[ontologies.pfam])
                    for go_id in go_ids:
                        assert go_id in expected_pfams_and_gos_with_descs[
                            ontologies.pfam]
        # make sure all expected pfams have been found
        assert len(expected_pfams_found) == len(
            expected_pfams_and_gos_with_descs)

        self.check_add_to_record(nisin, results)
Ejemplo n.º 20
0
    def test_nisin(self):
        "Test lanthipeptide prediction for nisin A"
        expected_html_snippet = "nisA leader / core peptide"
        genbank = helpers.get_path_to_nisin_genbank()
        rec, _ = self.run_lanthi(genbank, expected_html_snippet)

        prepeptide = rec.get_cds_motifs()[0]
        # real monoisotopic mass is 3351.51, but we overpredict a Dha
        self.assertAlmostEqual(3333.6, prepeptide.monoisotopic_mass, delta=0.05)
        # real mw is 3354.5, see above
        self.assertAlmostEqual(3336.0, prepeptide.molecular_weight, delta=0.05)
        for expected, calculated in zip([3354.0, 3372.1, 3390.1, 3408.1],
                                        prepeptide.alternative_weights):
            self.assertAlmostEqual(expected, calculated, delta=0.05)
        assert prepeptide.detailed_information.lan_bridges == 5
        self.assertEqual("MSTKDFNLDLVSVSKKDSGASPR", prepeptide.leader)
        self.assertEqual("ITSISLCTPGCKTGALMGCNMKTATCHCSIHVSK", prepeptide.core)
        self.assertEqual('Class I', prepeptide.peptide_subclass)
Ejemplo n.º 21
0
    def test_reuse(self):
        raw_results = sideloader.loader.load_validated_json(
            GOOD_FILE, sideloader.general._SCHEMA_FILE)

        nisin = helpers.get_path_to_nisin_genbank()
        results = helpers.run_and_regenerate_results_for_module(
            nisin, sideloader, self.options)

        assert results.record_id == raw_results["records"][0]["name"]

        record_section = raw_results["records"][0]
        for result, raw in zip(results.subregions,
                               record_section["subregions"]):
            assert result.tool.name == raw_results["tool"]["name"]
            assert result.start == raw["start"]
            assert result.end == raw["end"]
            assert result.label == raw["label"]
            assert result.details == {
                "score": ["6.5"],
                "some_option_name": ["yes"],
                "some_other_detail": ["first", "second", "etc"]
            }
Ejemplo n.º 22
0
    def test_add_to_record(self):
        nisin = helpers.get_path_to_nisin_genbank()
        record = record_processing.parse_input_sequence(nisin)[0]
        assert not record.get_pfam_domains()

        # add a test PFAM
        pfam = helpers.DummyPFAMDomain(identifier="PF00005", domain="PF00005")
        record.add_pfam_domain(pfam)
        assert len(record.get_pfam_domains()) == 1

        # run pfam2go and add the results
        results = pfam2go.run_on_record(record, None, self.options)
        assert pfam in results.pfam_domains_with_gos

        assert not pfam.gene_ontologies
        results.add_to_record(record)
        assert pfam.gene_ontologies

        # check the contents of the annotation
        for domain in record.get_pfam_domains():
            assert domain.gene_ontologies
            assert sorted(domain.gene_ontologies.ids) == sorted(
                results.get_all_gos(domain))
Ejemplo n.º 23
0
 def test_results_from_invalid_file_raises_error(self):
     filename = get_path_to_nisin_genbank()
     self.assertRaisesRegex(ValueError, "Cannot load results to reuse",
                            serialiser.AntismashResults.from_file, filename)
Ejemplo n.º 24
0
 def read_nisin(self):
     records = record_processing.parse_input_sequence(
         helpers.get_path_to_nisin_genbank())
     assert len(records) == 1
     return records
Ejemplo n.º 25
0
 def check_nisin(self, expected):
     return self.run_antismash(helpers.get_path_to_nisin_genbank(),
                               expected)
Ejemplo n.º 26
0
 def test_nisin_minimal(self):
     run_antismash(get_path_to_nisin_genbank(), self.default_options)
     self.check_output_files()
Ejemplo n.º 27
0
 def test_nisin_minimal(self):
     run_antismash(get_path_to_nisin_genbank(), self.config)
     self.check_output_files()