def test_invalid_vector(self): """Assert an error is raised on a vector with invalid overhangs. """ seqv = Seq("CCATGCTTGTCTTCCACAGAAGACTTATGCGG") vector = self.MockVector(CircularRecord(seqv, "vector")) seqm = Seq("GAAGACTTATGCCACAATGCTTGTCTTC") module = self.MockModule(CircularRecord(seqm, "module")) # vector is invalid because both overhangs are the same self.assertRaises(errors.InvalidSequence, vector.assemble, module)
def test_missing_module(self): """Assert an error is raised when a module is missing. """ # ATGC ---- CGTA seqv = Seq("CCATGCTTGTCTTCCACAGAAGACTTCGTAGG") vector = self.MockVector(CircularRecord(seqv, "vector")) # CGTA --- ATGA seqm1 = Seq("GAAGACTTATGACACACGTATTGTCTTC") mod1 = self.MockModule(CircularRecord(seqm1, "mod1")) with self.assertRaises(errors.MissingModule) as ctx: vector.assemble(mod1) msg = "no module with 'ATGC' start overhang" self.assertEqual(str(ctx.exception), msg)
def test_contains(self): """Assert `_ in CircularRecord` works as expected. """ sr = SeqRecord(seq=Seq("ATGC"), id="test_init") cr = CircularRecord(sr) self.assertIn("ATGC", cr) self.assertIn("GCAT", cr) self.assertNotIn("ATGCAT", cr)
def test_init(self): """Assert a `CircularRecord` can be created from a `SeqRecord`. """ sr = SeqRecord(seq=Seq("ATGCATGCATGC"), id="test_init") cr = CircularRecord(sr) self.assertIsInstance(cr, CircularRecord) self.assertNotIsInstance(cr.seq, SeqRecord) self.assertEqual(cr.seq, sr.seq) self.assertEqual(cr.id, sr.id)
def test_valid_rotated(self): """Assert a valid plasmid is considered valid, even after a rotation. """ seqv = Seq("CCATGCTTGTCTTCCACAGAAGACTTCGTAGG") vector = self.MockEntryVector(CircularRecord(seqv, "vector")) self.assertTrue(vector.is_valid()) vector = self.MockEntryVector(vector.record >> 10) self.assertTrue(vector.is_valid()) vector = self.MockEntryVector(vector.record >> 10) self.assertTrue(vector.is_valid())
def load_data(self, name): archive_path = "cases/{}.tar.xz".format(name) if not DATAFS.exists(archive_path): raise unittest.SkipTest("no test case found") with contexter.Contexter() as ctx: # open FASTA files casefs = ctx << fs.archive.open_archive(DATAFS, archive_path) result_fa = ctx << casefs.open("result.fa") vector_fa = ctx << casefs.open("vector.fa") modules_fa = ctx << casefs.open("modules.fa") # load records from FASTA handles res = CircularRecord(Bio.SeqIO.read(result_fa, "fasta")) vec = CircularRecord(Bio.SeqIO.read(vector_fa, "fasta")) mods = { record.id: CircularRecord(record) for record in Bio.SeqIO.parse(modules_fa, "fasta") } return res, vec, mods
def test_duplicate_modules(self): """Assert an error is raised when assembling with duplicate modules. """ # ATGC ---- CGTA seqv = Seq("CCATGCTTGTCTTCCACAGAAGACTTCGTAGG") vector = self.MockVector(CircularRecord(seqv, "vector")) # ATGC --- CGTA seqm1 = Seq("GAAGACTTATGCCACACGTATTGTCTTC") mod1 = self.MockModule(CircularRecord(seqm1, "mod1")) # CGTA --- ATGC seqm2 = Seq("GAAGACTTATGCTATACGTATTGTCTTC") mod2 = self.MockModule(CircularRecord(seqm2, "mod2")) with self.assertRaises(errors.DuplicateModules) as ctx: vector.assemble(mod1, mod2) self.assertEqual(set(ctx.exception.duplicates), {mod1, mod2}) self.assertEqual(ctx.exception.details, "same start overhang: 'ATGC'") msg = "duplicate modules: mod1, mod2 (same start overhang: 'ATGC')" self.assertEqual(str(ctx.exception), msg)
def test_unused_modules(self): """Assert an error is raised on unused modules during assembly. """ # ATGC ---- CGTA seqv = Seq("CCATGCTTGTCTTCCACAGAAGACTTCGTAGG") vector = self.MockVector(CircularRecord(seqv, "vector")) # CGTA --- ATGC seqm1 = Seq("GAAGACTTATGCTATACGTATTGTCTTC") mod1 = self.MockModule(CircularRecord(seqm1, "mod1")) # AAAA --- CCCC seqm2 = Seq("GAAGACTTAAAACACACCCCTTGTCTTC") mod2 = self.MockModule(CircularRecord(seqm2, "mod2")) with warnings.catch_warnings(record=True) as captured: vector.assemble(mod1, mod2) self.assertEqual(len(captured), 1) self.assertIsInstance(captured[0].message, errors.UnusedModules) self.assertEqual(captured[0].message.remaining, (mod2, )) self.assertEqual(str(captured[0].message), "unused: mod2")
def test_shift_features(self): """Assert a `CircularRecord` shifts its features as intended. """ ft = [ SeqFeature( FeatureLocation(ExactPosition(0), ExactPosition(2), strand=+1), type="promoter", ), SeqFeature( FeatureLocation(ExactPosition(2), ExactPosition(4), strand=+1), type="promoter", ), ] sr = SeqRecord(seq=Seq("ATGC"), id="feats", features=ft) cr = CircularRecord(sr) cr_1 = cr >> 1 self.assertEqual( cr_1.features[0].location, FeatureLocation(ExactPosition(1), ExactPosition(3), strand=+1), ) self.assertEqual( cr_1.features[1].location, FeatureLocation(ExactPosition(3), ExactPosition(5), strand=+1), ) cr_2 = cr_1 >> 1 self.assertEqual( cr_2.features[0].location, FeatureLocation(ExactPosition(2), ExactPosition(4), strand=+1), ) self.assertEqual( cr_2.features[1].location, FeatureLocation(ExactPosition(0), ExactPosition(2), strand=+1), ) cr_3 = cr_2 >> 1 self.assertEqual( cr_3.features[0].location, FeatureLocation(ExactPosition(3), ExactPosition(5), strand=+1), ) self.assertEqual( cr_3.features[1].location, FeatureLocation(ExactPosition(1), ExactPosition(3), strand=+1), )
def test_shift_seq(self): """Assert a `CircularRecord` shifts its sequence as intended. """ cr = CircularRecord(seq=Seq("ATGCATGCATGC"), id="test_shift_seq") self.assertEqual((cr >> 2).seq, Seq("GCATGCATGCAT")) self.assertEqual((cr >> 27).seq, Seq("TGCATGCATGCA")) self.assertEqual((cr >> len(cr)).seq, cr.seq) self.assertEqual((cr >> 0).seq, cr.seq) self.assertEqual((cr >> -1).seq, (cr << 1).seq) self.assertEqual((cr << 1).seq, "TGCATGCATGCA") self.assertEqual((cr << 14).seq, "GCATGCATGCAT") self.assertEqual((cr << 0).seq, cr.seq) self.assertEqual((cr << len(cr)).seq, cr.seq) self.assertEqual((cr << -5).seq, (cr >> 5).seq) self.assertEqual((cr << -3).seq, (cr >> 3).seq) self.assertEqual((cr >> -3).seq, (cr << 3).seq)
def test_radd(self): """Assert right-adding to a `CircularRecord` raises a type error. """ cr = CircularRecord(seq=Seq("ATGCATGCATGC"), id="test_shift_seq") with self.assertRaises(TypeError): cr += cr
info = { "resistance": resistance, # "name": id_, "id": id_, # "type": type_, "location": row.find("b").text.strip().replace(" / ", ""), "addgene_id": row.find("a").get("href").strip("/"), } # get the ZIP sequence for path in ("{} cor.gbk", "{}.gbk", "{}.gb"): if archive.exists(path.format(id_)): break with archive.open(path.format(id_), encoding="latin-1") as f: rec = f.read().replace("Exported File", "Exported ") gb_archive = CircularRecord(read(six.StringIO(rec), "gb")) # get the AddGene sequences page url = "https://www.addgene.org/{}/sequences/".format(info["addgene_id"]) with session.get(url) as res: soup = bs.BeautifulSoup(res.text, "html.parser") # get the addgene full sequence gb_url = soup.find("a", class_="genbank-file-download").get("href") with requests.get(gb_url) as res: gb = info["gb"] = CircularRecord(read(io.StringIO(res.text), "gb")) if id_ == "pICSL30008": gb = gb.reverse_complement(True, True, True, True, True, True, True) # elif id_ == "pICSL50004": # gb_archive = gb_archive.reverse_complement(True, True, True, True, True, True, True)
def test_invalid(self): """Assert an invalid product is considered invalid. """ seqv = Seq("ATG") vector = self.MockEntryVector(CircularRecord(seqv, "vector")) self.assertFalse(vector.is_valid())
# "type": type_, "location": row.find("b").text.strip().replace(" / ", ""), "addgene_id": row.find("a").get("href").strip("/"), } # get the online full sequence if id_ in FULL_SEQUENCES: # Load the AddGene sequences page and get the full sequence with requests.get(FULL_SEQUENCES[id_]) as res: soup = bs.BeautifulSoup(res.text, "html.parser") section = soup.find("section", id="depositor-full") gb_url = soup.find("a", class_="genbank-file-download").get('href') # Get the Genbank file with requests.get(gb_url) as res: gb = CircularRecord(read(io.StringIO(res.text), "gb")) # get the pBP-SJM901 sequence and patch it elif id_.startswith("pBP-SJM"): # get pBP-SJM # Load the AddGene sequences page and get the full sequence with requests.get(FULL_SEQUENCES["pBP-SJM901"]) as res: soup = bs.BeautifulSoup(res.text, "html.parser") section = soup.find("section", id="depositor-full") gb_url = soup.find("a", class_="genbank-file-download").get('href') # Get the Genbank file with requests.get(gb_url) as res: gb = CircularRecord(read(io.StringIO(res.text), "gb")) # replace the target sequence gb.seq = Seq(
"type": type_, "location": row.find("b").text.strip().replace(" / ", ""), "addgene_id": row.find("a").get("href").strip("/"), } # get the AddGene sequences page url = "https://www.addgene.org/{}/sequences/".format( info["addgene_id"]) with session.get(url) as res: soup = bs.BeautifulSoup(res.text, "html.parser") # get the addgene full sequence section = soup.find("section", id="addgene-full") gb_url = section.find("a", class_="genbank-file-download").get("href") with requests.get(gb_url) as res: gbd = info["gb_depositor"] = CircularRecord( read(io.StringIO(res.text), "gb")) # get the AddGene plasmid page url = "https://www.addgene.org/{}/".format(info["addgene_id"]) with session.get(url) as res: soup = bs.BeautifulSoup(res.text, "html.parser") # get the deposited record section = soup.find("ul", class_="addgene-document-list") gb_url = section.find("a").get("href") with requests.get(gb_url) as res: gba = info["gb_addgene"] = CircularRecord( read(io.StringIO(res.text), "gb")) # Sanity check if len(gba) != len(gbd):