def test_nisin(self): "Test lantipeptide prediction for nisin A" rec = seqio.read(utils.get_full_path(__file__, 'nisin.gbk')) self.assertEqual(38, len(rec.features)) specific_analysis(rec, None) self.assertEqual(40, len(rec.features)) prepeptides = h._find_core_peptides(utils.get_cluster_by_nr(rec, 1), rec) self.assertEqual(1, len(prepeptides)) prepeptide = prepeptides[0] leaders = h._find_leader_peptides(utils.get_cluster_by_nr(rec, 1), rec) self.assertEqual(1, len(leaders)) leader = leaders[0] # real monoisotopic mass is 3351.51, but we overpredict a Dha self.assertAlmostEqual(3333.6, h._get_monoisotopic_mass(prepeptide)) # real mw is 3354.5, see above self.assertAlmostEqual(3336.0, h._get_molecular_weight(prepeptide)) self.assertEqual([3354.0, 3372.1, 3390.1, 3408.1], h._get_alternative_weights(prepeptide)) self.assertEqual(5, h._get_number_bridges(prepeptide)) self.assertEqual("MSTKDFNLDLVSVSKKDSGASPR", h._get_leader_peptide_sequence(leader)) self.assertEqual("ITSISLCTPGCKTGALMGCNMKTATCHCSIHVSK", h._get_core_peptide_sequence(prepeptide)) self.assertEqual('Class I', h._get_core_peptide_class(prepeptide))
def test_epicidin(self): "Test lantipeptide prediction for epicidin 280" rec = seqio.read(utils.get_full_path(__file__, 'epicidin_280.gbk')) self.assertEqual(21, len(rec.features)) specific_analysis(rec, None) self.assertEqual(23, len(rec.features)) prepeptides = h._find_core_peptides(utils.get_cluster_by_nr(rec, 1), rec) self.assertEqual(1, len(prepeptides)) prepeptide = prepeptides[0] leaders = h._find_leader_peptides(utils.get_cluster_by_nr(rec, 1), rec) self.assertEqual(1, len(leaders)) leader = leaders[0] self.assertAlmostEqual(3115.7, h._get_monoisotopic_mass(prepeptide)) self.assertAlmostEqual(3117.7, h._get_molecular_weight(prepeptide)) self.assertEqual([3135.7, 3153.7, 3171.7], h._get_alternative_weights(prepeptide)) self.assertEqual(3, h._get_number_bridges(prepeptide)) self.assertEqual("MENKKDLFDLEIKKDNMENNNELEAQ", h._get_leader_peptide_sequence(leader)) self.assertEqual("SLGPAIKATRQVCPKATRFVTVSCKKSDCQ", h._get_core_peptide_sequence(prepeptide)) self.assertEqual('Class I', h._get_core_peptide_class(prepeptide)) self.assertEqual(['Lac'], h._get_core_peptide_extra_modifications(prepeptide))
def test_microbisporicin(self): "Test lantipeptide prediction for microbisporicin" rec = seqio.read(utils.get_full_path(__file__, 'microbisporicin.gbk')) self.assertEqual(56, len(rec.features)) specific_analysis(rec, None) self.assertEqual(58, len(rec.features)) prepeptides = h._find_core_peptides(utils.get_cluster_by_nr(rec, 1), rec) self.assertEqual(1, len(prepeptides)) prepeptide = prepeptides[0] leaders = h._find_leader_peptides(utils.get_cluster_by_nr(rec, 1), rec) self.assertEqual(1, len(leaders)) leader = leaders[0] # NOTE: this is not the correct weight for microbisporicin # there are some additional modifications we do not predict yet self.assertAlmostEqual(2212.9, h._get_monoisotopic_mass(prepeptide)) self.assertAlmostEqual(2214.5, h._get_molecular_weight(prepeptide)) self.assertEqual(4, h._get_number_bridges(prepeptide)) self.assertEqual("MPADILETRTSETEDLLDLDLSIGVEEITAGPA", h._get_leader_peptide_sequence(leader)) self.assertEqual("VTSWSLCTPGCTSPGGGSNCSFCC", h._get_core_peptide_sequence(prepeptide)) self.assertEqual('Class I', h._get_core_peptide_class(prepeptide)) self.assertEqual(['AviCys', 'Cl', 'OH'], h._get_core_peptide_extra_modifications(prepeptide))
def test_thiostrepton(self): "Test thiopeptide prediction for thiostrepton" rec = seqio.read( path.get_full_path(__file__, 'data', 'thiostrepton_before_analysis.gbk')) rec = secmet.Record.from_biopython(rec, "bacteria") assert rec.get_feature_count() == 27 # two existing motifs assert len(rec.get_cds_motifs()) == 2 results = thiopeptides.specific_analysis(rec) assert len(results.motifs) == 1 # ensure record not adjusted yet self.assertEqual(27, rec.get_feature_count()) results.add_to_record(rec) # the new motif is added assert rec.get_feature_count() == 28 assert len(rec.get_cds_motifs()) == 3 prepeptides = rec.get_cds_motifs() prepeptide = None for feature in prepeptides: if isinstance(feature, secmet.feature.Prepeptide): prepeptide = feature break # and the motif that was added is exactly the one in results assert prepeptide is results.motifs[0] self.check_thiostrepton_values(prepeptide)
def test_lactazole(self): "Test thiopeptide prediction for lactazole - lazA" rec = seqio.read( path.get_full_path(__file__, 'data', 'lac_before_analysis.gbk')) rec = secmet.Record.from_biopython(rec, "bacteria") assert rec.get_feature_count() == 21 assert not rec.get_cds_motifs() results = thiopeptides.specific_analysis(rec) assert len(results.motifs) == 1 # ensure record not adjusted yet assert rec.get_feature_count() == 21 assert not rec.get_cds_motifs() # add and check new motif added results.add_to_record(rec) assert rec.get_feature_count() == 22 assert len(rec.get_cds_motifs()) == 1 prepeptide = rec.get_cds_motifs()[0] assert prepeptide is results.motifs[0] self.assertAlmostEqual(1362.5, prepeptide.monoisotopic_mass, places=1) self.assertAlmostEqual(1363.5, prepeptide.molecular_weight, places=1) assert prepeptide.leader == "MSDITASRVESLDLQDLDLSELTVTSLRDTVALPENGA" assert prepeptide.core == "SWGSCSCQASSSCA" assert not prepeptide.macrocycle assert prepeptide.peptide_subclass == "Type III" assert prepeptide.core_features == 'Central ring: pyridine trisubstituted' assert prepeptide.tail == 'QPQDM' for calc, expected in zip( prepeptide.alternative_weights, [1381.5, 1399.5, 1417.5, 1435.5, 1453.6, 1471.6]): self.assertAlmostEqual(calc, expected, places=1) assert len(prepeptide.to_biopython()) == 3 # leader, core, tail
def test_nisin(self): "Test lanthipeptide prediction for nisin A" rec = Record.from_biopython(seqio.read( helpers.get_path_to_nisin_with_detection()), taxon="bacteria") assert not rec.get_cds_motifs() result = run_specific_analysis(rec) assert len(result.clusters) == 1 motifs = self.gather_all_motifs(result) assert len(motifs) == 1 assert not rec.get_cds_motifs() result.add_to_record(rec) assert len(rec.get_cds_motifs()) == 1 prepeptide = motifs[0] # real monoisotopic mass is 3351.51, but we overpredict a Dha self.assertAlmostEqual(3333.6, prepeptide.monoisotopic_mass, delta=0.05) # real mw is 3354.5, see above self.assertAlmostEqual(3336.0, prepeptide.molecular_weight, delta=0.05) for expected, calculated in zip([3354.0, 3372.1, 3390.1, 3408.1], prepeptide.alternative_weights): self.assertAlmostEqual(expected, calculated, delta=0.05) assert prepeptide.lan_bridges == 5 self.assertEqual("MSTKDFNLDLVSVSKKDSGASPR", prepeptide.leader) self.assertEqual("ITSISLCTPGCKTGALMGCNMKTATCHCSIHVSK", prepeptide.core) self.assertEqual('Class I', prepeptide.peptide_subclass) initial_json = result.to_json() regenerated = LanthiResults.from_json(initial_json, rec) assert list(result.motifs_by_locus) == ["nisB"] assert str(result.motifs_by_locus) == str(regenerated.motifs_by_locus) assert result.clusters == regenerated.clusters assert initial_json == regenerated.to_json()
def test_labyrinthopeptin(self): "Test lantipeptide prediction for labyrinthopeptin" rec = seqio.read(utils.get_full_path(__file__, 'labyrinthopeptin.gbk')) self.assertEqual(7, len(rec.features)) specific_analysis(rec, None) self.assertEqual(11, len(rec.features))
def build_record(self, genbank): # construct a working record with open(genbank) as handle: seq_record = seqio.read(handle, "genbank") record = secmet.Record.from_biopython(seq_record, taxon="bacteria") assert record.get_protoclusters() assert record.get_protocluster(0).cds_children return record
def test_read_fasta_no_header(self): "Test reading a fasta record without header" with open(get_file_path('no_header.fasta'), 'rU') as h: # plain BioPython reading should fail self.assertRaises(ValueError, seqio.read, h) h.seek(0) # robust reading should work record = seqio.read(h, robust=True) self.assertEqual("DUMMY", record.id)
def test_labyrinthopeptin(self): "Test lanthipeptide prediction for labyrinthopeptin" filename = path.get_full_path(__file__, 'data', 'labyrinthopeptin.gbk') rec = Record.from_biopython(seqio.read(filename), taxon="bacteria") assert not rec.get_cds_motifs() result = run_specific_analysis(rec) motifs = self.gather_all_motifs(result) assert len(motifs) == 2 assert not rec.get_cds_motifs() result.add_to_record(rec) assert len(rec.get_cds_motifs()) == 2
def test_sco_cluster3(self): "Test lantipeptide prediction for SCO cluster #3" rec = seqio.read(utils.get_full_path(__file__, 'sco_cluster3.gbk')) self.assertEqual(69, len(rec.features)) specific_analysis(rec, None) self.assertEqual(71, len(rec.features)) prepeptides = h._find_core_peptides(utils.get_cluster_by_nr(rec, 1), rec) self.assertEqual(1, len(prepeptides)) prepeptide = prepeptides[0] self.assertEqual('Class I', h._get_core_peptide_class(prepeptide))
def test_sco_cluster3(self): "Test lanthipeptide prediction for SCO cluster #3" filename = path.get_full_path(__file__, 'data', 'sco_cluster3.gbk') rec = Record.from_biopython(seqio.read(filename), taxon="bacteria") assert not rec.get_cds_motifs() result = run_specific_analysis(rec) motifs = self.gather_all_motifs(result) assert len(motifs) == 1 assert not rec.get_cds_motifs() result.add_to_record(rec) assert len(rec.get_cds_motifs()) == 1 self.assertEqual('Class I', motifs[0].peptide_subclass)
def test_lactocin_s(self): """Test lanthipeptide prediction for lactocin S""" filename = path.get_full_path(__file__, 'data', 'lactocin_s.gbk') rec = Record.from_biopython(seqio.read(filename), taxon="bacteria") assert not rec.get_cds_motifs() result = run_specific_analysis(rec) assert len(result.clusters) == 1 assert result.clusters[1] == set(["lasM"]) assert len(result.motifs_by_locus["lasM"]) == 1 motifs = result.motifs_by_locus["lasM"] assert len(motifs) == 1 assert not rec.get_cds_motifs() result.add_to_record(rec) assert len(rec.get_cds_motifs()) == 1 self.assertEqual('Class II', motifs[0].peptide_subclass)
def integration_run(self): """Run a sanity check on the Balhimycin cluster""" infile = path.join(path.dirname(__file__), 'Y16952.gbk') with TemporaryDirectory() as tempdir: argv = [ 'run_antismash.py', '--outputfolder', tempdir, infile, ] sys.argv = argv main() assert_same_trace(self.trace_tracker, "") outfile = path.join(tempdir, 'Y16952.3.final.gbk') self.assertTrue(path.exists(outfile), "Failed to create ouput") rec = seqio.read(outfile) self.assertIsNotNone(rec, "Failed to parse output")
def test_epidermin(self): "Test lanthipeptide prediction for epidermin" filename = path.get_full_path(__file__, 'data', 'epidermin.gbk') rec = Record.from_biopython(seqio.read(filename), taxon="bacteria") assert not rec.get_cds_motifs() result = run_specific_analysis(rec) motifs = self.gather_all_motifs(result) assert len(motifs) == 1 assert not rec.get_cds_motifs() result.add_to_record(rec) assert len(rec.get_cds_motifs()) == 1 prepeptide = motifs[0] self.assertAlmostEqual(2164, prepeptide.monoisotopic_mass, delta=0.5) self.assertAlmostEqual(2165.6, prepeptide.molecular_weight, delta=0.5) self.assertEqual(3, prepeptide.lan_bridges) self.assertEqual("MEAVKEKNDLFNLDVKVNAKESNDSGAEPR", prepeptide.leader) self.assertEqual("IASKFICTPGCAKTGSFNSYCC", prepeptide.core) self.assertEqual('Class I', prepeptide.peptide_subclass) self.assertEqual(['AviCys'], prepeptide.get_modifications())
def test_nosiheptide(self): "Test thiopeptide prediction for nosiheptide - nosM" rec = seqio.read( path.get_full_path(__file__, 'data', 'nosi_before_analysis.gbk')) rec = secmet.Record.from_biopython(rec, "bacteria") rec.get_cluster(1).trim_overlapping() assert rec.get_feature_count() == 56 assert not rec.get_cds_motifs() result = thiopeptides.specific_analysis(rec) assert rec.get_feature_count() == 56 assert len(result.motifs) == 1 result.add_to_record(rec) for i in rec.get_cds_motifs(): print(i, i.leader, i.score, i.rodeo_score) assert len(rec.get_cds_motifs()) == 1, rec.get_cds_motifs() assert rec.get_feature_count() == 57 # check the motif in an existing CDS prepeptide = rec.get_cds_motifs()[0] assert prepeptide is result.motifs[0] self.assertAlmostEqual(1315.3, prepeptide.monoisotopic_mass, places=1) self.assertAlmostEqual(1316.5, prepeptide.molecular_weight, places=1) assert prepeptide.leader == "MDAAHLSDLDIDALEISEFLDESRLEDSEVVAKVMSA" assert prepeptide.core == "SCTTCECCCSCSS" assert prepeptide.macrocycle == "26-member" assert prepeptide.peptide_subclass == "Type I" self.assertAlmostEqual(1222.4, prepeptide.mature_weights[0], places=1) self.assertAlmostEqual(1221.2, prepeptide.mature_weights[1], places=1) for calc, expected in zip( prepeptide.mature_weights[2:], [1240.4, 1258.4, 1276.5, 1294.5, 1312.5, 1330.5]): self.assertAlmostEqual(calc, expected, places=1) expected_core_features = ( "Central ring: pyridine tetrasubstituted (hydroxyl group present);" " second macrocycle") assert prepeptide.core_features == expected_core_features assert prepeptide.tail_reaction == 'dealkylation of C-Terminal residue; amidation'
def test_epicidin(self): "Test lanthipeptide prediction for epicidin 280" filename = path.get_full_path(__file__, 'data', 'epicidin_280.gbk') rec = Record.from_biopython(seqio.read(filename), taxon="bacteria") assert len(rec.get_cds_motifs()) == 1 result = run_specific_analysis(rec) motifs = self.gather_all_motifs(result) assert len(motifs) == 1 assert len(rec.get_cds_motifs()) == 1 result.add_to_record(rec) assert len(rec.get_cds_motifs()) == 2 prepeptide = motifs[0] self.assertAlmostEqual(3115.7, prepeptide.monoisotopic_mass, delta=0.5) self.assertAlmostEqual(3117.7, prepeptide.molecular_weight, delta=0.5) for expected, calculated in zip([3135.7, 3153.7, 3171.7], prepeptide.alternative_weights): self.assertAlmostEqual(expected, calculated, delta=0.05) self.assertEqual(3, prepeptide.lan_bridges) self.assertEqual("MENKKDLFDLEIKKDNMENNNELEAQ", prepeptide.leader) self.assertEqual("SLGPAIKATRQVCPKATRFVTVSCKKSDCQ", prepeptide.core) self.assertEqual('Class I', prepeptide.peptide_subclass) self.assertEqual(['Lac'], prepeptide.get_modifications())
def test_epidermin(self): "Test lantipeptide prediction for epidermin" rec = seqio.read(utils.get_full_path(__file__, 'epidermin.gbk')) self.assertEqual(18, len(rec.features)) specific_analysis(rec, None) self.assertEqual(20, len(rec.features)) prepeptides = h._find_core_peptides(utils.get_cluster_by_nr(rec, 1), rec) self.assertEqual(1, len(prepeptides)) prepeptide = prepeptides[0] leaders = h._find_leader_peptides(utils.get_cluster_by_nr(rec, 1), rec) self.assertEqual(1, len(leaders)) leader = leaders[0] self.assertAlmostEqual(2164, h._get_monoisotopic_mass(prepeptide)) self.assertAlmostEqual(2165.6, h._get_molecular_weight(prepeptide)) self.assertEqual(3, h._get_number_bridges(prepeptide)) self.assertEqual("MEAVKEKNDLFNLDVKVNAKESNDSGAEPR", h._get_leader_peptide_sequence(leader)) self.assertEqual("IASKFICTPGCAKTGSFNSYCC", h._get_core_peptide_sequence(prepeptide)) self.assertEqual('Class I', h._get_core_peptide_class(prepeptide)) self.assertEqual(['AviCys'], h._get_core_peptide_extra_modifications(prepeptide))
def test_microbisporicin(self): "Test lanthipeptide prediction for microbisporicin" filename = path.get_full_path(__file__, 'data', 'microbisporicin.gbk') rec = Record.from_biopython(seqio.read(filename), taxon="bacteria") assert not rec.get_cds_motifs() result = run_specific_analysis(rec) motifs = self.gather_all_motifs(result) assert len(motifs) == 1 assert not rec.get_cds_motifs() result.add_to_record(rec) assert len(rec.get_cds_motifs()) == 1 prepeptide = motifs[0] # NOTE: this is not the correct weight for microbisporicin # there are some additional modifications we do not predict yet self.assertAlmostEqual(2212.9, prepeptide.monoisotopic_mass, delta=0.5) self.assertAlmostEqual(2214.5, prepeptide.molecular_weight, delta=0.5) self.assertEqual(4, prepeptide.lan_bridges) self.assertEqual("MPADILETRTSETEDLLDLDLSIGVEEITAGPA", prepeptide.leader) self.assertEqual("VTSWSLCTPGCTSPGGGSNCSFCC", prepeptide.core) self.assertEqual('Class I', prepeptide.peptide_subclass) self.assertEqual(['AviCys', 'Cl', 'OH'], prepeptide.get_modifications())
def test_read_genbank_path(self): "Test reading a gzipped GenBank file specified by path" fname = get_file_path('melanin.gbk.gz') record = seqio.read(fname) self.assertEqual("AB070938.1", record.id)
def test_read_embl_valid(self): "Test reading a valid embl record" with open(get_file_path('melanin.embl'), 'rU') as h: record = seqio.read(h) self.assertEqual("AB070938.1", record.id)
def test_read_fasta_valid(self): "Test reading a valid fasta record" with open(get_file_path('melanin.fasta'), 'rU') as h: record = seqio.read(h) self.assertEqual("AB070938", record.id)
def test_read_genbank(self): "Test reading a gzipped GenBank file" with open(get_file_path('melanin.gbk.gz'), 'rb') as h: record = seqio.read(h) self.assertEqual("AB070938.1", record.id)
def setUp(self): self.record = seqio.read(get_file_path("melanin.gbk"))
def test_read_calls_biopython(self): "Test reading a single sequence via Bio.SeqIO" mock("Bio.SeqIO.read", tracker=self.tt, returns=[]) expected_trace = " Called Bio.SeqIO.read(DummyHandle('test.gbk'), 'genbank')" seqio.read(self.handle) assert_same_trace(self.tt, expected_trace)
def test_read_seqtype(self): "Test reading a single sequence via Bio.SeqIO" mock("Bio.SeqIO.read", tracker=self.tt, returns=[]) expected_trace = " Called Bio.SeqIO.read(DummyHandle('test.gbk'), 'embl')" seqio.read(self.handle, 'embl') assert_same_trace(self.tt, expected_trace)
def setUp(self): "set-up required variables, etc; load demo sequence and parse XML" self.options = Namespace() QualifierTags = Namespace() QualifierTags.ASF_scaffold = 'aSASF_scaffold' QualifierTags.ASF_choice = 'aSASF_choice' QualifierTags.ASF_prediction = 'aSASF_prediction' result = Namespace() result.id = "fullhmmer_oxyB_0001" result.hsps = [Namespace] result.hsps[0] = Namespace() result.hsps[0].query_start = 1 result.hsps[0].query_end = 99 result.hsps[0].hit_start = 322 result.hsps[0].hit_end = 428 result.hsps[0].aln = [Namespace(), Namespace()] result.hsps[0].aln[ 0].seq = "RAVDELIRYLTVPYGPTPRIAKQDVTVGDQVIKAGESVICSLPAANRDPALVPDADRLDVTR--------DPVPHVAFGHGIHHCLGAALARLELRTVFTALWRRF" result.hsps[0].aln[ 1].seq = "avikEtLRlhpvvplllpRevtkdvvirgylipkGtevivnlyalhrdpevfpnPeeFdpeRFldekgsrksfaflPFGaGpRnCiGerlArmelklflatlLqnF" self.result = result self.options.QualifierTags = QualifierTags self.seq_record = seqio.read( utils.get_full_path(__file__, 'Y16952.3.final.gbk')) self.assertEqual(361, len(self.seq_record.features)) xmltext = """<?xml version="1.0" encoding="UTF-8"?> <resource xmlns:xsd="http://www.w3.org/2001/XMLSchema" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"> <analysis name='ASP_P450Oxy' type="active_site"> <Prerequisite> <primary_tag_type>PFAM_domain</primary_tag_type> <tag>domain</tag> <tag_value>p450</tag_value> </Prerequisite> <Execute program="hmmscan" CaptureConsole="TRUE"> <!-- Currently, the location of the hmmpfam2 binary and the database location is inferred from the antismash configuration file! --> <parameters> <!-- If prefixes for parameters are required they can be added as attribute prefix --> <parameter name="evalue" prefix="--domE">0.1</parameter> <parameter name="cpus" prefix="--cpu">1</parameter> </parameters> <database>p450.hmm3</database> <db_source>PFAM24</db_source> <BioPythonParser>hmmer3-text</BioPythonParser> </Execute> <Alignment> <scaffold> <scaffoldOffset>327,330,400,403,409</scaffoldOffset> <scaffoldValue>E,R,F,G,G</scaffoldValue> </scaffold> <choice result="active site cystein present"> <offset>407</offset> <value>C</value> <comment>Cytochrome P450 oxygenase active site cystein; coordinates heme Fe ligand</comment> </choice> </Alignment> <description>Pediction of cytochrome P450 active site cystein</description> <referenceList> <reference>Del Vecchio, F., H. Petkovic, S. G. Kendrew, L. Low, B. Wilkinson, R. Lill, J. Cortes, B. A. Rudd, J. Staunton, and P. F. Leadlay. 2003. Active-site residue, domain and module swaps in modular polyketide synthases. J Ind. Microbiol Biotechnol 30:489-494.</reference> </referenceList> </analysis> </resource> """ ETObj = ET.fromstring(xmltext) test = ETObj.find('./analysis') self.ETObj = ETObj # now acutally generate ASF object myASF = antismash.generic_modules.active_site_finder.active_site_finder( self.seq_record, self.options) assert_is_instance( myASF, antismash.generic_modules.active_site_finder.active_site_finder) self.my_ASF = myASF
def test_count_codons_ignore_invalid(self): invalid = seqio.read(get_file_path("invalid_codons.gbk")) codons = count_codons(invalid) self.assertEqual(143, codons["GCC"])