def test_nisin_complete(self): with TemporaryDirectory() as output_dir: args = [ "--minimal", "--enable-tta", "--tta-threshold", "0", "--output-dir", output_dir, helpers.get_path_to_nisin_genbank() ] options = build_config(args, isolated=True, modules=antismash.get_all_modules()) antismash.run_antismash(helpers.get_path_to_nisin_genbank(), options) # regen the results update_config( {"reuse_results": os.path.join(output_dir, "nisin.json")}) prior_results = read_data(None, options) record = prior_results.records[0] results = prior_results.results[0] tta_results = tta.regenerate_previous_results( results.get("antismash.modules.tta"), record, options) assert isinstance(tta_results, tta.TTAResults) assert len(tta_results.features) == 174 # raise the threshold above the gc_content and ensure regenned has no hits update_config({"tta_threshold": 0.65}) tta_results = tta.regenerate_previous_results( results.get("antismash.modules.tta"), record, options) assert isinstance(tta_results, tta.TTAResults) assert not tta_results.features
def test_trim(self): nisin_path = helpers.get_path_to_nisin_genbank() records = record_processing.parse_input_sequence(nisin_path, start=10, end=5000) assert len(records) == 1 assert isinstance(records[0], Record) assert len(records[0].get_cds_features()) == 2 assert len(records[0].seq) == 4990
def test_nisin_minimal(self): # make sure the output directory isn't filled out_dir = self.default_options.output_dir assert not list(glob.glob(os.path.join(out_dir, "*"))) # die with neither inputs provided with self.assertRaisesRegex( ValueError, "No sequence file or prior results to read"): run_antismash(None, self.default_options) # make sure no files created assert not list(glob.glob(os.path.join(out_dir, "*"))) # do a normal run run_antismash(get_path_to_nisin_genbank(), self.default_options) self.check_output_files() # remove html file and make sure it's recreated os.unlink(os.path.join(self.default_options.output_dir, "index.html")) update_config({ "reuse_results": os.path.join(self.default_options.output_dir, "nisin.json") }) run_antismash(None, self.default_options) self.check_output_files()
def test_add_to_record(self): nisin = helpers.get_path_to_nisin_genbank() record = record_processing.parse_input_sequence(nisin)[0] assert not record.get_pfam_domains() # add a test PFAM pfam = PFAMDomain(FeatureLocation(2, 5), description="test", protein_start=5, protein_end=10, identifier="PF00005", domain="PF00005", tool="test") pfam.domain_id = "test" record.add_pfam_domain(pfam) assert len(record.get_pfam_domains()) == 1 # run pfam2go and add the results results = pfam2go.run_on_record(record, None, self.options) assert pfam in results.pfam_domains_with_gos assert not pfam.gene_ontologies results.add_to_record(record) assert pfam.gene_ontologies # check the contents of the annotation for domain in record.get_pfam_domains(): assert domain.gene_ontologies assert sorted(domain.gene_ontologies.ids) == sorted( results.get_all_gos(domain))
def test_missing_locations_caught(self): rec = list(Bio.SeqIO.parse(get_path_to_nisin_genbank(), "genbank"))[0] Record.from_biopython(rec, taxon="bacteria") rec.features.append(SeqFeature(None, type="broken")) with self.assertRaisesRegex(SecmetInvalidInputError, "missing or invalid location"): Record.from_biopython(rec, taxon="bacteria")
def test_record_conversion_from_biopython(self): before = list(Bio.SeqIO.parse(helpers.get_path_to_nisin_genbank(), "genbank"))[0] # sort notes, because direct comparisons otherwise are awful for feature in before.features: if "note" in feature.qualifiers: feature.qualifiers["note"] = sorted(feature.qualifiers["note"]) before_features = sorted(map(str, before.features)) type_counts = defaultdict(lambda: 0) for feature in before.features: type_counts[feature.type] += 1 record = Record.from_biopython(before, taxon="bacteria") after = record.to_biopython() # ensure new features are correct assert len(before_features) == len(after.features) for bef, aft in zip(before_features, sorted(map(str, after.features))): assert bef == aft # ensure we haven't changed the original record or feature list assert id(before) != id(after) assert id(before.features) != id(after.features) for i in range(len(before.features)): assert id(before.features[i]) != id(after.features[i]) for bef, aft in zip(before_features, sorted(map(str, before.features))): assert bef == aft # ensure that the counts of each match assert type_counts["CDS"] == len(record.get_cds_features()) assert type_counts["PFAM_domain"] == len(record.get_pfam_domains()) assert type_counts["cluster"] == len(record.get_clusters()) assert type_counts["aSDomain"] == len(record.get_antismash_domains())
def test_nisin(self): nisin_path = helpers.get_path_to_nisin_genbank() records = record_processing.parse_input_sequence(nisin_path) assert len(records) == 1 assert isinstance(records[0], Record) assert len(records[0].get_cds_features()) == 11 assert len(records[0].seq) == 15016
def test_record_to_json_and_back(self): filename = get_path_to_nisin_genbank() records = list(seqio.parse(open(filename), "genbank")) records = [ Record.from_biopython(rec, taxon="bacteria") for rec in records ] rec_results = [{}, {}, {}] results = serialiser.AntismashResults(filename, records, rec_results, "dummy") json_handle = StringIO() results.write_to_file(json_handle) json_handle.seek(0) new_results = serialiser.AntismashResults.from_file(json_handle, taxon="bacteria") assert results.to_json() == new_results.to_json() # check no records were lost assert len(new_results.records) == len(results.records) # check that the contents of the records is the same # by converting to biopython and writing to genbanks original = self.create_data_stream(results.records) new = self.create_data_stream(new_results.records) oldvalue = original.getvalue() newvalue = new.getvalue() with TemporaryDirectory(change=True): open("old.json", "w").write(oldvalue) open("new.json", "w").write(newvalue) for oldline, newline in zip(oldvalue.split('\n'), newvalue.split('\n')): assert oldline == newline
def test_reuse(self): nisin = helpers.get_path_to_nisin_genbank() record = record_processing.parse_input_sequence(nisin)[0] results = helpers.run_and_regenerate_results_for_module( nisin, cluster_hmmer, self.options) json = results.to_json() assert len(results.hits) == 24 self.check_add_to_record(nisin, results) # test regeneration when thresholds are less restrictive new_score_threshold = self.original_min_score - .1 self.set_min_score(new_score_threshold) new_results = cluster_hmmer.regenerate_previous_results( json, record, self.options) assert new_results is None self.set_min_score(self.original_min_score) new_evalue_threshold = self.original_max_evalue + .1 self.set_max_evalue(new_evalue_threshold) new_results = cluster_hmmer.regenerate_previous_results( json, record, self.options) assert new_results is None self.set_max_evalue(self.original_max_evalue) # test regeneration when evalue threshold is more restrictive new_evalue_threshold = sorted(hit["evalue"] for hit in results.hits)[12] assert new_evalue_threshold < self.original_max_evalue new_hits = [] for hit in results.hits: if hit["evalue"] <= new_evalue_threshold: new_hits.append(hit) new_hits.sort(key=lambda x: x["evalue"]) assert len(new_hits) < 24 self.set_max_evalue(new_evalue_threshold) new_results = cluster_hmmer.regenerate_previous_results( json, record, self.options) self.set_max_evalue(self.original_max_evalue) assert sorted(new_results.hits, key=lambda x: x["evalue"]) == new_hits self.check_add_to_record(nisin, results) # test regeneration when score threshold is more restrictive new_score_threshold = sorted(hit["score"] for hit in results.hits)[12] assert new_score_threshold > cluster_hmmer.MIN_SCORE new_hits = [] for hit in results.hits: if hit["score"] >= new_score_threshold: new_hits.append(hit) new_hits.sort(key=lambda x: x["score"]) assert len(new_hits) < 24 self.set_min_score(new_score_threshold) new_results = cluster_hmmer.regenerate_previous_results( json, record, self.options) self.set_min_score(self.original_min_score) assert sorted(new_results.hits, key=lambda x: x["score"]) == new_hits self.check_add_to_record(nisin, results)
def test_read_from_file(self): # very basic testing to ensure that the file IO itself functions recs = Record.from_genbank(get_path_to_nisin_genbank()) assert len(recs) == 1 rec = recs[0] assert rec.get_feature_count() == 24 assert len(rec.get_cds_features()) == 11 assert isinstance(rec.get_cds_by_name("nisB"), CDSFeature)
def test_trees_complete(self): with TemporaryDirectory() as output_dir: args = [ "--minimal", "--enable-genefunctions", "--smcog-trees", "--output-dir", output_dir, helpers.get_path_to_nisin_genbank() ] options = build_config(args, isolated=True, modules=antismash.get_all_modules()) antismash.run_antismash(helpers.get_path_to_nisin_genbank(), options) with open(os.path.join(output_dir, "nisin.json")) as res_file: assert "antismash.modules.smcog_trees" in res_file.read() tree_files = list( glob.glob(os.path.join(output_dir, "smcogs", "*.png"))) assert len(tree_files) == 7 sample_tree = tree_files[0] # regen the results update_config( {"reuse_results": os.path.join(output_dir, "nisin.json")}) prior_results = read_data(None, options) record = prior_results.records[0] results = prior_results.results[0] tree_results = results["antismash.modules.smcog_trees"] smcogs_results = smcog_trees.regenerate_previous_results( tree_results, record, options) assert len(smcogs_results.tree_images) == 7 assert os.path.exists(sample_tree) os.unlink(sample_tree) assert not os.path.exists(sample_tree) # attempt to regen the results, the deleted tree image will prevent it prior_results = read_data(None, options) record = prior_results.records[0] results = prior_results.results[0] smcogs_results = smcog_trees.regenerate_previous_results( tree_results, record, options) assert smcogs_results is None
def test_protein_sequences_caught(self): before = list(Bio.SeqIO.parse(get_path_to_nisin_genbank(), "genbank"))[0] # as a sanity check, make sure it's a seq and it functions as expected assert isinstance(before.seq, Seq) Record.from_biopython(before, taxon="bacteria") before.seq = Seq("AAAA", IUPACProtein()) with self.assertRaisesRegex(ValueError, "protein records are not supported"): Record.from_biopython(before, taxon="bacteria")
def test_nisin_end_to_end(self): # skip fimo being disabled for this, we already test the computational # side elsewhere if self.options.without_fimo: return nisin = helpers.get_path_to_nisin_genbank() result = helpers.run_and_regenerate_results_for_module( nisin, lanthipeptides, self.options) assert list(result.motifs_by_locus) == ["nisB"] prepeptide = result.motifs_by_locus["nisB"][0] self.assertAlmostEqual(3336.0, prepeptide.molecular_weight, delta=0.05)
def test_dna_casing(self): before = list(Bio.SeqIO.parse(get_path_to_nisin_genbank(), "genbank"))[0] for molecule in ["DNA", "dna", "Dna"]: before.annotations["molecule_type"] = molecule Record.from_biopython(before, taxon="bacteria") before.annotations["molecule_type"] = molecule + "x" with self.assertRaisesRegex(ValueError, "records are not supported"): Record.from_biopython(before, taxon="bacteria")
def test_nisin_complete(self): with TemporaryDirectory() as output_dir: args = [ "--minimal", "--tta", "--output-dir", output_dir, helpers.get_path_to_nisin_genbank() ] options = build_config(args, isolated=True, modules=antismash.get_all_modules()) antismash.run_antismash(helpers.get_path_to_nisin_genbank(), options) # regen the results update_config( {"reuse_results": os.path.join(output_dir, "nisin.json")}) prior_results = read_data(None, options) record = prior_results.records[0] results = prior_results.results[0] regenned = regenerate_results_for_record(record, options, [tta], results) tta_results = regenned["antismash.modules.tta"] assert isinstance(tta_results, tta.TTAResults) assert len(tta_results.features) == 174
def test_subregions_annotated(self, _patched_rules): record = Record.from_genbank(get_path_to_nisin_genbank())[0] record.strip_antismash_annotations() assert not record.get_regions() assert not record.get_subregions() results = hmm_detection.run_on_record(record, None, self.options) assert not results.get_predicted_protoclusters() for cds in ["nisB", "nisC"]: assert not record.get_cds_by_name(cds).sec_met cutoff = record.get_cds_by_name("nisB").location.end + 10 record.add_subregion(DummySubRegion(end=cutoff)) results = hmm_detection.run_on_record(record, None, self.options) assert record.get_cds_by_name("nisB").sec_met assert not record.get_cds_by_name("nisC").sec_met # and then a json conversion, even without subregion added record = Record.from_genbank(get_path_to_nisin_genbank())[0] record.strip_antismash_annotations() raw = json.loads(json.dumps(results.to_json())) hmm_detection.regenerate_previous_results(raw, record, self.options) assert record.get_cds_by_name("nisB").sec_met assert not record.get_cds_by_name("nisC").sec_met
def test_nisin_complete(self): with TemporaryDirectory() as output_dir: args = [ "run_antismash.py", "--minimal", "--enable-lanthipeptides", "--output-dir", output_dir ] options = build_config(args, isolated=True, modules=antismash.get_all_modules()) antismash.run_antismash(helpers.get_path_to_nisin_genbank(), options) # make sure the html_output section was tested with open(os.path.join(output_dir, "index.html")) as handle: content = handle.read() assert "nisA leader / core peptide" in content
def test_minimum_length(self): nisin_path = helpers.get_path_to_nisin_genbank() records = record_processing.parse_input_sequence(nisin_path, minimum_length=-16) assert len(records) == 1 records = record_processing.parse_input_sequence(nisin_path, minimum_length=15016) assert len(records) == 1 with self.assertRaisesRegex(AntismashInputError, "no valid records found"): record_processing.parse_input_sequence(nisin_path, minimum_length=15017) for bad_len in [5.6, None, "5"]: with self.assertRaisesRegex(TypeError, "minimum_length must be an int"): record_processing.parse_input_sequence(path, minimum_length=bad_len)
def test_reuse(self): nisin = helpers.get_path_to_nisin_genbank() results = helpers.run_and_regenerate_results_for_module( nisin, pfam2go, self.options) # are the expected go ids for pfams found/no wrong ids for pfams? expected_pfams_and_gos_with_descs = { "PF00005": { "GO:0005524": "ATP binding", "GO:0016887": "ATPase activity" }, "PF00072": { "GO:0000160": "phosphorelay signal transduction system" }, "PF00486": { "GO:0003677": "DNA binding", "GO:0000160": "phosphorelay signal transduction system", "GO:0006355": "regulation of transcription, DNA-templated" } } expected_pfams_found = set() for pfam, all_ontologies in results.pfam_domains_with_gos.items(): pfam_ids_without_versions = [ pfam_id.partition(".")[0] for pfam_id in pfam.db_xref ] # make sure the Pfams without gos aren't in the results assert "PF05147" not in pfam_ids_without_versions and "PF04738" not in pfam_ids_without_versions for ontologies in all_ontologies: # make sure GeneOntologies' pfam id actually is one found in the domain's ids assert ontologies.pfam in pfam_ids_without_versions # did it find the right amount of GO IDs for the sample Pfams, and did it find the right ones? if ontologies.pfam in expected_pfams_and_gos_with_descs: expected_pfams_found.add(ontologies.pfam) go_ids = [ str(go_entry) for go_entry in ontologies.go_entries ] assert len(go_ids) == len( expected_pfams_and_gos_with_descs[ontologies.pfam]) for go_id in go_ids: assert go_id in expected_pfams_and_gos_with_descs[ ontologies.pfam] # make sure all expected pfams have been found assert len(expected_pfams_found) == len( expected_pfams_and_gos_with_descs) self.check_add_to_record(nisin, results)
def test_nisin(self): "Test lanthipeptide prediction for nisin A" expected_html_snippet = "nisA leader / core peptide" genbank = helpers.get_path_to_nisin_genbank() rec, _ = self.run_lanthi(genbank, expected_html_snippet) prepeptide = rec.get_cds_motifs()[0] # real monoisotopic mass is 3351.51, but we overpredict a Dha self.assertAlmostEqual(3333.6, prepeptide.monoisotopic_mass, delta=0.05) # real mw is 3354.5, see above self.assertAlmostEqual(3336.0, prepeptide.molecular_weight, delta=0.05) for expected, calculated in zip([3354.0, 3372.1, 3390.1, 3408.1], prepeptide.alternative_weights): self.assertAlmostEqual(expected, calculated, delta=0.05) assert prepeptide.detailed_information.lan_bridges == 5 self.assertEqual("MSTKDFNLDLVSVSKKDSGASPR", prepeptide.leader) self.assertEqual("ITSISLCTPGCKTGALMGCNMKTATCHCSIHVSK", prepeptide.core) self.assertEqual('Class I', prepeptide.peptide_subclass)
def test_reuse(self): raw_results = sideloader.loader.load_validated_json( GOOD_FILE, sideloader.general._SCHEMA_FILE) nisin = helpers.get_path_to_nisin_genbank() results = helpers.run_and_regenerate_results_for_module( nisin, sideloader, self.options) assert results.record_id == raw_results["records"][0]["name"] record_section = raw_results["records"][0] for result, raw in zip(results.subregions, record_section["subregions"]): assert result.tool.name == raw_results["tool"]["name"] assert result.start == raw["start"] assert result.end == raw["end"] assert result.label == raw["label"] assert result.details == { "score": ["6.5"], "some_option_name": ["yes"], "some_other_detail": ["first", "second", "etc"] }
def test_add_to_record(self): nisin = helpers.get_path_to_nisin_genbank() record = record_processing.parse_input_sequence(nisin)[0] assert not record.get_pfam_domains() # add a test PFAM pfam = helpers.DummyPFAMDomain(identifier="PF00005", domain="PF00005") record.add_pfam_domain(pfam) assert len(record.get_pfam_domains()) == 1 # run pfam2go and add the results results = pfam2go.run_on_record(record, None, self.options) assert pfam in results.pfam_domains_with_gos assert not pfam.gene_ontologies results.add_to_record(record) assert pfam.gene_ontologies # check the contents of the annotation for domain in record.get_pfam_domains(): assert domain.gene_ontologies assert sorted(domain.gene_ontologies.ids) == sorted( results.get_all_gos(domain))
def test_results_from_invalid_file_raises_error(self): filename = get_path_to_nisin_genbank() self.assertRaisesRegex(ValueError, "Cannot load results to reuse", serialiser.AntismashResults.from_file, filename)
def read_nisin(self): records = record_processing.parse_input_sequence( helpers.get_path_to_nisin_genbank()) assert len(records) == 1 return records
def check_nisin(self, expected): return self.run_antismash(helpers.get_path_to_nisin_genbank(), expected)
def test_nisin_minimal(self): run_antismash(get_path_to_nisin_genbank(), self.default_options) self.check_output_files()
def test_nisin_minimal(self): run_antismash(get_path_to_nisin_genbank(), self.config) self.check_output_files()