def test_empty(self): ''' test YEp24PGK_XK''' import os import pydna cwd = os.getcwd() YEp24PGK_XK_correct = pydna.read("YEp24PGK_XK_correct.gb") os.chdir("../docs/cookbook/") p1 = pydna.read("primer1.txt", ds = False) p3 = pydna.read("primer3.txt", ds = False) XKS1 = pydna.read("XKS1_orf.txt") YEp24PGK = pydna.read("YEp24PGK.txt") os.chdir(cwd) PCR_prod = pydna.pcr(p1, p3, XKS1) from Bio.Restriction import BamHI stuffer1, insert, stuffer2 = PCR_prod.cut(BamHI) from Bio.Restriction import BglII YEp24PGK_BglII = YEp24PGK.cut(BglII).pop() YEp24PGK_XK = YEp24PGK_BglII + insert YEp24PGK_XK=YEp24PGK_XK.looped() YEp24PGK_XK = YEp24PGK_XK.synced("gaattctgaaccagtcctaaaacgagtaaataggaccggcaattc") #YEp24PGK) self.assertTrue( pydna.eq(YEp24PGK_XK, YEp24PGK_XK_correct)) self.assertEqual( YEp24PGK_XK_correct.seguid() ,"HRVpCEKWcFsKhw_W-25ednUfldI" ) self.assertEqual( YEp24PGK_XK.seguid() ,"HRVpCEKWcFsKhw_W-25ednUfldI" )
def test_empty(self): """ test YEp24PGK_XK""" import os import pydna cwd = os.getcwd() YEp24PGK_XK_correct = pydna.read("YEp24PGK_XK_correct.gb") os.chdir("../docs/cookbook/") p1 = pydna.read("primer1.txt", ds=False) p3 = pydna.read("primer3.txt", ds=False) XKS1 = pydna.read("XKS1_orf.txt") YEp24PGK = pydna.read("YEp24PGK.txt") os.chdir(cwd) PCR_prod = pydna.pcr(p1, p3, XKS1) from Bio.Restriction import BamHI stuffer1, insert, stuffer2 = PCR_prod.cut(BamHI) from Bio.Restriction import BglII YEp24PGK_BglII = YEp24PGK.cut(BglII).pop() YEp24PGK_XK = YEp24PGK_BglII + insert YEp24PGK_XK = YEp24PGK_XK.looped() YEp24PGK_XK = YEp24PGK_XK.synced("gaattctgaaccagtcctaaaacgagtaaataggaccggcaattc") # YEp24PGK) self.assertTrue(pydna.eq(YEp24PGK_XK, YEp24PGK_XK_correct)) self.assertEqual(YEp24PGK_XK_correct.seguid(), "HRVpCEKWcFsKhw_W-25ednUfldI") self.assertEqual(YEp24PGK_XK.seguid(), "HRVpCEKWcFsKhw_W-25ednUfldI")
def test_synced3(self): pGUP1 = read("pGUP1_correct.gb") pGREG505 = read("pGREG505.gb") pGUP1_not_synced = read("pGUP1_not_synced.gb") self.assertEqual( pGUP1_not_synced.synced(pGREG505).seguid(), '42wIByERn2kSe_Exn405RYwhffU')
def test_read_from_file(): a = read("./read1.gb") b = read("./read2.gb") c = read("./read3.fasta") d = read("./read4.fasta") a.format("gb") b.format("gb") c.format("gb") d.format("gb") assert str(a.seq).lower()==str(b.seq).lower()==str(c.seq).lower()==str(d.seq).lower()
def test_synced2(self): pUC19 = read("./pUC19.gb") pUC19_small_gene = read("./pUC19_small_gene.gb") correct = str(pUC19_small_gene.seq).upper() for i in range(1, len(pUC19_small_gene), 500): cand = pUC19_small_gene.shifted(i) self.assertEqual(str(cand.synced("tcgcgcgtttcggtgatgacggtga").seq).upper(), correct, str(cand.synced(pUC19).seq).upper()) print i, print
def test_synced2(self): pUC19 = read("./pUC19.gb") pUC19_small_gene = read("./pUC19_small_gene.gb") correct = str(pUC19_small_gene.seq).upper() for i in range(1, len(pUC19_small_gene), 500): cand = pUC19_small_gene.shifted(i) self.assertEqual( str(cand.synced("tcgcgcgtttcggtgatgacggtga").seq).upper(), correct, str(cand.synced(pUC19).seq).upper()) print i, print
def test_synced(self): pUC19 = read("./pUC19.gb") pUC19_LAC4 = read("./pUC_LAC4.gb") pUC19_LAC4_c = read("pUC_LAC4_correct_rotation.gb") correct = str(pUC19_LAC4_c.seq).upper() for i in range(1, len(pUC19_LAC4), 500): cand = pUC19_LAC4.shifted(i) self.assertEqual(str(cand.synced("tcgcgcgtttcggtgatgacggtga").seq).upper(), correct, str(pUC19_LAC4.synced(pUC19).seq).upper()) print i, print
def test_synced(self): pUC19 = read("./pUC19.gb") pUC19_LAC4 = read("./pUC_LAC4.gb") pUC19_LAC4_c = read("pUC_LAC4_correct_rotation.gb") correct = str(pUC19_LAC4_c.seq).upper() for i in range(1, len(pUC19_LAC4), 500): cand = pUC19_LAC4.shifted(i) self.assertEqual( str(cand.synced("tcgcgcgtttcggtgatgacggtga").seq).upper(), correct, str(pUC19_LAC4.synced(pUC19).seq).upper()) print i, print
def test_read_from_file(): a = read("./read1.gb") b = read("./read2.gb") c = read("./read3.fasta") d = read("./read4.fasta") x,y = parse( "pth1.txt" ) a.format("gb") b.format("gb") c.format("gb") d.format("gb") x.format("gb") y.format("gb") assert x.format()[3314:3325] == '2micron 2\xc2\xb5' assert x.features[13].qualifiers['label'][0] == '2micron 2\xc2\xb5' assert str(a.seq).lower()==str(b.seq).lower()==str(c.seq).lower()==str(d.seq).lower()
def test_map2(self): pCR_MCT1_HA46 = read("pCR_MCT1_HA46.gb") slc = pCR_MCT1_HA46.find_aa("VFFKE YPYDVPDYA IEG".replace(" ", "")) pCR_MCT1_HA46.map_target = slc map_ = pCR_MCT1_HA46.map_trace_files("*.ab1") self.assertTrue( set(map_) == set([ '28-1rev_D04_026.ab1', '32-3rev_H04_018.ab1', '36-5rev_D05_041.ab1' ])) self.assertTrue( set([x.fname for x in pCR_MCT1_HA46.matching_reads]) == set([ '28-1rev_D04_026.ab1', '32-3rev_H04_018.ab1', '36-5rev_D05_041.ab1' ])) self.assertTrue( set([x.fname for x in pCR_MCT1_HA46.not_matching_reads]) == set( ['02-G1_B01_013.ab1'])) self.assertTrue( pCR_MCT1_HA46.find_aa("YPYDVPDYA".replace(" ", "")) == slice( 1088, 1115, None)) self.assertTrue( pCR_MCT1_HA46.find_aa("VFFKE YPYDVPDYA IEG".replace(" ", "")) == slice(1073, 1124, None))
def test_read_from_file(): a = read("./read1.gb") b = read("./read2.gb") c = read("./read3.fasta") d = read("./read4.fasta") x, y = parse("pth1.txt") a.format("gb") b.format("gb") c.format("gb") d.format("gb") x.format("gb") y.format("gb") assert x.format()[3314:3325] == '2micron 2\xc2\xb5' assert x.features[13].qualifiers['label'][0] == '2micron 2\xc2\xb5' assert str(a.seq).lower() == str(b.seq).lower() == str( c.seq).lower() == str(d.seq).lower()
def test_empty(self): ''' test mark budde''' import pydna a = pydna.read('pGREG505.gb') self.assertTrue(a.name, "pGREG505") self.assertTrue(a.looped().name, "pGREG505") #self.assertTrue( a.annotations ,"pGREG505") self.assertTrue(a.id, "pGREG505") self.assertTrue(a.looped().id, "pGREG505") """
def test_empty(self): """ test mark budde""" import pydna a = pydna.read("pGREG505.gb") self.assertTrue(a.name, "pGREG505") self.assertTrue(a.looped().name, "pGREG505") # self.assertTrue( a.annotations ,"pGREG505") self.assertTrue(a.id, "pGREG505") self.assertTrue(a.looped().id, "pGREG505") """
def test_shift_origin(self): pCAPs = read("./pCAPs.gb") self.assertTrue( pCAPs.circular ) pCAPs_b = shift_origin(pCAPs, 200) self.assertEqual( len(pCAPs), len(pCAPs_b) ) self.assertTrue( pCAPs_b.circular ) self.assertTrue( eq(pCAPs, pCAPs_b) ) pCAPs_b_linear = pCAPs_b.tolinear() self.assertTrue( eq(pCAPs, pCAPs_b_linear, circular=True) ) pCAPs_c = pCAPs[200:]+pCAPs[:200] self.assertTrue( eq(pCAPs, pCAPs_c, circular=True) ) with self.assertRaisesRegexp(ValueError, "shift"): pCAPs_b = shift_origin(pCAPs, 20000)
def test_shift_origin(self): pCAPs = read("./pCAPs.gb") self.assertTrue(pCAPs.circular) pCAPs_b = shift_origin(pCAPs, 200) self.assertEqual(len(pCAPs), len(pCAPs_b)) self.assertTrue(pCAPs_b.circular) self.assertTrue(eq(pCAPs, pCAPs_b)) pCAPs_b_linear = pCAPs_b.tolinear() self.assertTrue(eq(pCAPs, pCAPs_b_linear, circular=True)) pCAPs_c = pCAPs[200:] + pCAPs[:200] self.assertTrue(eq(pCAPs, pCAPs_c, circular=True)) with self.assertRaisesRegexp(ValueError, "shift"): pCAPs_b = shift_origin(pCAPs, 20000)
def test_empty(self): ''' test pGUP1''' import os cwd = os.getcwd() os.chdir("../docs/cookbook/") import pydna GUP1rec1sens = pydna.read("GUP1rec1sens.txt") GUP1rec2AS = pydna.read("GUP1rec2AS.txt") GUP1_locus = pydna.read("GUP1_locus.gb") pGREG505 = pydna.read("pGREG505.gb") os.chdir(cwd) insert = pydna.pcr(GUP1rec1sens, GUP1rec2AS, GUP1_locus) from Bio.Restriction import SalI lin_vect, his3 = pGREG505.cut(SalI) a = pydna.Assembly([insert, lin_vect], limit=28) pGUP1 = a.circular_products[0] pGUP1 = pGUP1.synced(pGREG505.seq[:50]) pGUP1_correct = pydna.read("pGUP1_correct.gb") self.assertEqual(len(pGUP1_correct), 9981) self.assertEqual(len(pGUP1), 9981) self.assertTrue(pydna.eq(pGUP1, pGUP1_correct)) self.assertEqual(pGUP1_correct.seguid(), "42wIByERn2kSe_Exn405RYwhffU") self.assertEqual(pGUP1.seguid(), "42wIByERn2kSe_Exn405RYwhffU")
def test_empty(self): ''' test pGUP1''' import os cwd = os.getcwd() os.chdir("../docs/cookbook/") import pydna GUP1rec1sens = pydna.read("GUP1rec1sens.txt") GUP1rec2AS = pydna.read("GUP1rec2AS.txt") GUP1_locus = pydna.read("GUP1_locus.gb") pGREG505 = pydna.read("pGREG505.gb") os.chdir(cwd) insert = pydna.pcr(GUP1rec1sens, GUP1rec2AS, GUP1_locus) from Bio.Restriction import SalI lin_vect, his3 = pGREG505.cut(SalI) a = pydna.Assembly([insert, lin_vect], limit=28) pGUP1 = a.circular_products[0] pGUP1 = pGUP1.synced(pGREG505.seq[:50]) pGUP1_correct = pydna.read("pGUP1_correct.gb") self.assertEqual(len(pGUP1_correct), 9981) self.assertEqual(len(pGUP1), 9981) self.assertTrue( pydna.eq(pGUP1, pGUP1_correct) ) self.assertEqual(pGUP1_correct.seguid(), "42wIByERn2kSe_Exn405RYwhffU") self.assertEqual(pGUP1.seguid(), "42wIByERn2kSe_Exn405RYwhffU")
def test_cut_feat(self): puc19 = read('PUC19_MarkBudde.gb') pf, pr = cloning_primers(puc19) pcrProd = pcr(pf, pr, puc19) self.assertEqual(23, len(pcrProd.features)) #print len(pcrProd.cut(EcoRI)[1].features) self.assertEqual(17, len(pcrProd.cut(EcoRI)[1].features)) def amplicon_to_dseqrecord(a): d = Dseqrecord(a.seq) d.features = a.features return d pcrProdDseqrecord = amplicon_to_dseqrecord(pcrProd) self.assertEqual(17, len(pcrProdDseqrecord.cut(EcoRI)[1].features))
def ape(self, line): import pydna seq = '' #print self.shell.user_ns[line] try: seq = self.shell.user_ns[line] except KeyError: pass try: seq = pydna.read(line) except ValueError: pass if seq: seq.description = line # new MyMagics._apeloader.open(seq) #(*args,**kwargs) return
def ape(self, line): import pydna seq='' #print self.shell.user_ns[line] try: seq = self.shell.user_ns[line] except KeyError: pass try: seq = pydna.read(line) except ValueError: pass if seq: seq.description = line # new MyMagics._apeloader.open(seq) #(*args,**kwargs) return
def test_map2(self): pCR_MCT1_HA46 = read("pCR_MCT1_HA46.gb") slc = pCR_MCT1_HA46.find_aa("VFFKE YPYDVPDYA IEG".replace(" ", "")) pCR_MCT1_HA46.map_target = slc map_ = pCR_MCT1_HA46.map_trace_files("*.ab1") self.assertTrue(set(map_)==set(['28-1rev_D04_026.ab1', '32-3rev_H04_018.ab1', '36-5rev_D05_041.ab1'])) self.assertTrue(set([x.fname for x in pCR_MCT1_HA46.matching_reads])==set(['28-1rev_D04_026.ab1', '32-3rev_H04_018.ab1', '36-5rev_D05_041.ab1'])) self.assertTrue(set([x.fname for x in pCR_MCT1_HA46.not_matching_reads])==set(['02-G1_B01_013.ab1'])) self.assertTrue(pCR_MCT1_HA46.find_aa("YPYDVPDYA".replace(" ", "")) == slice(1088, 1115, None)) self.assertTrue(pCR_MCT1_HA46.find_aa("VFFKE YPYDVPDYA IEG".replace(" ", "")) == slice(1073, 1124, None))
def test_read_from_string(): input_ =''' LOCUS New_DNA 4 bp ds-DNA linear 30-MAR-2013 DEFINITION . ACCESSION VERSION SOURCE . ORGANISM . COMMENT COMMENT ApEinfo:methylated:1 FEATURES Location/Qualifiers misc_feature 2..3 /label=NewFeature /ApEinfo_fwdcolor=cyan /ApEinfo_revcolor=green /ApEinfo_graphicformat=arrow_data {{0 1 2 0 0 -1} {} 0} width 5 offset 0 ORIGIN 1 acgt // ''' a = read(input_) assert str(a.seq)=="ACGT" input_ ='''>hej acgt''' assert str(a.seq)=="ACGT" input_ =u''' LOCUS New_DNA 4 bp ds-DNA linear 30-MAR-2013 DEFINITION . ACCESSION VERSION SOURCE . ORGANISM . COMMENT COMMENT ApEinfo:methylated:1 FEATURES Location/Qualifiers misc_feature 2..3 /label=NewFeature /ApEinfo_fwdcolor=cyan /ApEinfo_revcolor=green /ApEinfo_graphicformat=arrow_data {{0 1 2 0 0 -1} {} 0} width 5 offset 0 ORIGIN 1 acgt // ''' a = read(input_) assert str(a.seq)=="ACGT" input_ =u'''>hej acgt''' assert str(a.seq)=="ACGT" input_ =u'''>hej öööh! acgt''' assert str(a.seq)=="ACGT" input_ =u''' LOCUS New_DNA 4 bp ds-DNA linear 30-MAR-2013 DEFINITION öööh! ACCESSION VERSION SOURCE . ORGANISM . COMMENT COMMENT ApEinfo:methylated:1 FEATURES Location/Qualifiers misc_feature 2..3 /label=öööh! /ApEinfo_fwdcolor=cyan /ApEinfo_revcolor=green /ApEinfo_graphicformat=arrow_data {{0 1 2 0 0 -1} {} 0} width 5 offset 0 ORIGIN 1 acgt // ''' a = read(input_) assert str(a.seq)=="ACGT"
def test_Dseqrecord_cutting_adding(self): from Bio.Restriction import Bsu36I, BstAPI pCAPs = read("./pCAPs.gb") a, b = pCAPs.cut(Bsu36I, BstAPI) c = (a + b).looped() self.assertTrue(eq(c, pCAPs)) a = ( Dseqrecord( Dseq('AATTCACANGGTACCNGGTACCNGCGGATATC', 'GTGTNCCATGGNCCATGGNCGCCTATAG'[::-1], -4)), Dseqrecord( Dseq('CACANGGTACCNGGTACCNGCGGATATC', 'GTGTNCCATGGNCCATGGNCGCCTATAG'[::-1], 0)), Dseqrecord( Dseq('CACANGGTACCNGGTACCNGCGGATATC', 'AATTGTGTNCCATGGNCCATGGNCGCCTATAG'[::-1], 4)), ) from Bio.Restriction import KpnI, Acc65I, NlaIV enzymes = [Acc65I, NlaIV, KpnI] for enz in enzymes: for f in a: b, c, d = f.cut(enz) e = b + c + d assert str(e.seq).lower() == str(f.seq).lower() #from pydna import * #from pydna_helper import gb, ape from Bio.Restriction import KpnI, BamHI, Acc65I, NlaIV, EcoRI, EcoRV a = read(''' LOCUS New_DNA 10 bp ds-DNA linear 02-APR-2013 DEFINITION ACCESSION New_DNA VERSION New_DNA KEYWORDS . SOURCE ORGANISM . . COMMENT COMMENT ApEinfo:methylated:1 FEATURES Location/Qualifiers misc_feature 1..1 /label=1 /ApEinfo_fwdcolor=cyan /ApEinfo_revcolor=green /ApEinfo_graphicformat=arrow_data {{0 1 2 0 0 -1} {} 0} width 5 offset 0 misc_feature 2..2 /label=2 /ApEinfo_fwdcolor=cyan /ApEinfo_revcolor=green /ApEinfo_graphicformat=arrow_data {{0 1 2 0 0 -1} {} 0} width 5 offset 0 misc_feature 3..3 /label=3 /ApEinfo_fwdcolor=cyan /ApEinfo_revcolor=green /ApEinfo_graphicformat=arrow_data {{0 1 2 0 0 -1} {} 0} width 5 offset 0 misc_feature 4..4 /label=4 /ApEinfo_fwdcolor=cyan /ApEinfo_revcolor=green /ApEinfo_graphicformat=arrow_data {{0 1 2 0 0 -1} {} 0} width 5 offset 0 misc_feature 5..5 /label=5 /ApEinfo_fwdcolor=cyan /ApEinfo_revcolor=green /ApEinfo_graphicformat=arrow_data {{0 1 2 0 0 -1} {} 0} width 5 offset 0 misc_feature 6..6 /label=6 /ApEinfo_fwdcolor=cyan /ApEinfo_revcolor=green /ApEinfo_graphicformat=arrow_data {{0 1 2 0 0 -1} {} 0} width 5 offset 0 misc_feature 7..7 /label=7 /ApEinfo_fwdcolor=cyan /ApEinfo_revcolor=green /ApEinfo_graphicformat=arrow_data {{0 1 2 0 0 -1} {} 0} width 5 offset 0 misc_feature 8..8 /label=8 /ApEinfo_fwdcolor=cyan /ApEinfo_revcolor=green /ApEinfo_graphicformat=arrow_data {{0 1 2 0 0 -1} {} 0} width 5 offset 0 misc_feature 9..9 /label=9 /ApEinfo_fwdcolor=cyan /ApEinfo_revcolor=green /ApEinfo_graphicformat=arrow_data {{0 1 2 0 0 -1} {} 0} width 5 offset 0 misc_feature 10..10 /label=10 /ApEinfo_fwdcolor=cyan /ApEinfo_revcolor=green /ApEinfo_graphicformat=arrow_data {{0 1 2 0 0 -1} {} 0} width 5 offset 0 ORIGIN 1 ttGGTACCgg //''') b, c = a.cut(Acc65I) self.assertEqual([f.qualifiers["label"] for f in b.features], [['1'], ['2'], ['3'], ['4'], ['5'], ['6'], ['7']]) self.assertEqual([f.qualifiers["label"] for f in c.features], [['4'], ['5'], ['6'], ['7'], ['8'], ['9'], ['10']]) a = read(''' LOCUS New_DNA 33 bp ds-DNA linear 08-NOV-2012 DEFINITION . ACCESSION VERSION SOURCE . ORGANISM . COMMENT COMMENT ApEinfo:methylated:1 FEATURES Location/Qualifiers misc_feature 1..11 /label=Acc65I-1 /ApEinfo_fwdcolor=cyan /ApEinfo_revcolor=green /ApEinfo_graphicformat=arrow_data {{0 1 2 0 0 -1} {} 0} width 5 offset 0 misc_feature 12..18 /label=Acc65I-2 /ApEinfo_fwdcolor=cyan /ApEinfo_revcolor=green /ApEinfo_graphicformat=arrow_data {{0 1 2 0 0 -1} {} 0} width 5 offset 0 misc_feature 19..33 /label=Acc65I-3 /ApEinfo_fwdcolor=cyan /ApEinfo_revcolor=green /ApEinfo_graphicformat=arrow_data {{0 1 2 0 0 -1} {} 0} width 5 offset 0 misc_feature 1..15 /label=KpnI-1 /ApEinfo_fwdcolor=cyan /ApEinfo_revcolor=green /ApEinfo_graphicformat=arrow_data {{0 1 2 0 0 -1} {} 0} width 5 offset 0 misc_feature 16..22 /label=KpnI-2 /ApEinfo_fwdcolor=cyan /ApEinfo_revcolor=green /ApEinfo_graphicformat=arrow_data {{0 1 2 0 0 -1} {} 0} width 5 offset 0 misc_feature 23..33 /label=KpnI-3 /ApEinfo_fwdcolor=cyan /ApEinfo_revcolor=green /ApEinfo_graphicformat=arrow_data {{0 1 2 0 0 -1} {} 0} width 5 offset 0 misc_feature 1..13 /label=NlaIV-1 /ApEinfo_fwdcolor=cyan /ApEinfo_revcolor=green /ApEinfo_graphicformat=arrow_data {{0 1 2 0 0 -1} {} 0} width 5 offset 0 misc_feature 14..20 /label=NlaIV-2 /ApEinfo_fwdcolor=cyan /ApEinfo_revcolor=green /ApEinfo_graphicformat=arrow_data {{0 1 2 0 0 -1} {} 0} width 5 offset 0 misc_feature 21..33 /label=NlaIV-3 /ApEinfo_fwdcolor=cyan /ApEinfo_revcolor=green /ApEinfo_graphicformat=arrow_data {{0 1 2 0 0 -1} {} 0} width 5 offset 0 ORIGIN 1 GAATTCacan ggtaccnGGT ACCngcgGAT ATC // ''') self.assertTrue(a.seguid() == "di3hL8t2G4iQQsxlm_CtvnUMBz8") self.assertTrue(([x.qualifiers["label"][0] for x in a.features] == [ 'Acc65I-1', 'Acc65I-2', 'Acc65I-3', 'KpnI-1', 'KpnI-2', 'KpnI-3', 'NlaIV-1', 'NlaIV-2', 'NlaIV-3' ])) b, c, d = a.cut(Acc65I) self.assertTrue( [x.qualifiers["label"][0] for x in b.features] == ['Acc65I-1', 'KpnI-1', 'NlaIV-1']) self.assertTrue( [x.qualifiers["label"][0] for x in c.features] == ['Acc65I-2', 'KpnI-2', 'NlaIV-2']) self.assertTrue( [x.qualifiers["label"][0] for x in d.features] == ['Acc65I-3', 'KpnI-3', 'NlaIV-3']) e = b + c + d self.assertTrue( sorted([x.qualifiers["label"][0] for x in e.features]) == [x.qualifiers["label"][0] for x in a.features]) self.assertTrue(str(a.seq) == str(e.seq)) b, c, d = a.cut(KpnI) self.assertTrue( [x.qualifiers["label"][0] for x in b.features] == ['Acc65I-1', 'KpnI-1', 'NlaIV-1']) self.assertTrue( [x.qualifiers["label"][0] for x in c.features] == ['Acc65I-2', 'KpnI-2', 'NlaIV-2']) self.assertTrue( [x.qualifiers["label"][0] for x in d.features] == ['Acc65I-3', 'KpnI-3', 'NlaIV-3']) e = b + c + d self.assertTrue( sorted([x.qualifiers["label"][0] for x in e.features]) == [x.qualifiers["label"][0] for x in a.features]) b, c, d = a.cut(NlaIV) self.assertTrue([x.qualifiers["label"][0] for x in b.features] == ['Acc65I-1', 'NlaIV-1']) self.assertTrue([x.qualifiers["label"][0] for x in c.features] == ['NlaIV-2']) self.assertTrue([x.qualifiers["label"][0] for x in d.features] == ['KpnI-3', 'NlaIV-3']) e = b + c + d self.assertTrue(str(a.seq) == str(e.seq)) b, c = a.cut(EcoRI) e = b + c self.assertTrue(str(a.seq) == str(e.seq)) b, c = a.cut(EcoRV) e = b + c self.assertTrue(str(a.seq) == str(e.seq)) b, c, d = a.cut(EcoRI, EcoRV) e = b + c + d self.assertTrue(str(a.seq) == str(e.seq)) b, c, d, f = a.cut(Acc65I, EcoRI) e = b + c + d + f self.assertTrue(str(a.seq) == str(e.seq)) b, c, d, f = a.cut(EcoRI, Acc65I) e = b + c + d + f self.assertTrue(str(a.seq) == str(e.seq))
def test_synced3(self): pGUP1 = read("pGUP1_correct.gb") pGREG505 = read("pGREG505.gb") pGUP1_not_synced = read("pGUP1_not_synced.gb") self.assertEqual(pGUP1_not_synced.synced(pGREG505).seguid(), '42wIByERn2kSe_Exn405RYwhffU')
def test_read_from_string(): input_ = ''' LOCUS New_DNA 4 bp ds-DNA linear 30-MAR-2013 DEFINITION . ACCESSION VERSION SOURCE . ORGANISM . COMMENT COMMENT ApEinfo:methylated:1 FEATURES Location/Qualifiers misc_feature 2..3 /label=NewFeature /ApEinfo_fwdcolor=cyan /ApEinfo_revcolor=green /ApEinfo_graphicformat=arrow_data {{0 1 2 0 0 -1} {} 0} width 5 offset 0 ORIGIN 1 acgt // ''' a = read(input_) assert str(a.seq) == "ACGT" input_ = '''>hej acgt''' assert str(a.seq) == "ACGT" input_ = u''' LOCUS New_DNA 4 bp ds-DNA linear 30-MAR-2013 DEFINITION . ACCESSION VERSION SOURCE . ORGANISM . COMMENT COMMENT ApEinfo:methylated:1 FEATURES Location/Qualifiers misc_feature 2..3 /label=NewFeature /ApEinfo_fwdcolor=cyan /ApEinfo_revcolor=green /ApEinfo_graphicformat=arrow_data {{0 1 2 0 0 -1} {} 0} width 5 offset 0 ORIGIN 1 acgt // ''' a = read(input_) assert str(a.seq) == "ACGT" input_ = u'''>hej acgt''' assert str(a.seq) == "ACGT" input_ = u'''>hej öööh! acgt''' assert str(a.seq) == "ACGT" input_ = u''' LOCUS New_DNA 4 bp ds-DNA linear 30-MAR-2013 DEFINITION öööh! ACCESSION VERSION SOURCE . ORGANISM . COMMENT COMMENT ApEinfo:methylated:1 FEATURES Location/Qualifiers misc_feature 2..3 /label=öööh! /ApEinfo_fwdcolor=cyan /ApEinfo_revcolor=green /ApEinfo_graphicformat=arrow_data {{0 1 2 0 0 -1} {} 0} width 5 offset 0 ORIGIN 1 acgt // ''' a = read(input_) assert str(a.seq) == "ACGT"
def test_parse1(self): ''' test parsing fasta sequences from a text''' text = ''' points....: 1 The sequence seq below represents a double stranded linear DNA molecule. >seq CTCCCCTATCACCAGGGTACCGATAGCCACGAATCT Give the sequence(s) of the fragment(s) formed after digesting seq with the restriction enzyme Acc65I in the order that they appear in seq. Use FASTA format and give the Watson strand(s) in 5'-3' direction below. Give the sequences the names frag1,frag2,... etc. >frag1 CTCCCCTATCACCAGG >frag2 GTACCGATAGCCACGAATCT *********** Question 4 *********** QuestionID: ''' result = parse(text) correct = ['CTCCCCTATCACCAGGGTACCGATAGCCACGAATCT', 'CTCCCCTATCACCAGG', 'GTACCGATAGCCACGAATCT'] self.assertEqual( [str(s.seq) for s in result], correct ) self.assertEqual( [s.linear for s in result], [True,True,True] ) input = ''' LOCUS ScCYC1 330 bp DNA UNK 01-JAN-1980 DEFINITION ScCYC1 ACCESSION ScCYC1 VERSION ScCYC1 KEYWORDS . SOURCE . ORGANISM . . FEATURES Location/Qualifiers ORIGIN 1 ATGACTGAAT TCAAGGCCGG TTCTGCTAAG AAAGGTGCTA CACTTTTCAA GACTAGATGT 61 CTACAATGCC ACACCGTGGA AAAGGGTGGC CCACATAAGG TTGGTCCAAA CTTGCATGGT 121 ATCTTTGGCA GACACTCTGG TCAAGCTGAA GGGTATTCGT ACACAGATGC CAATATCAAG 181 AAAAACGTGT TGTGGGACGA AAATAACATG TCAGAGTACT TGACTAACCC AAAGAAATAT 241 ATTCCTGGTA CCAAGATGGC CTTTGGTGGG TTGAAGAAGG AAAAAGACAG AAACGACTTA 301 ATTACCTACT TGAAAAAAGC CTGTGAGTAA // ''' result = parse(input).pop() self.assertEqual( str(result.seq) , str(read(input).seq) ) correct = '''ATGACTGAATTCAAGGCCGGTTCTGCTAAGAAAGGTGCTACACTTTTCAAGACTAGATGTCTACAATGCCACACCGTGGAAAAGGGTGGCCCACATAAGGTTGGTCCAAACTTGCATGGTATCTTTGGCAGACACTCTGGTCAAGCTGAAGGGTATTCGTACACAGATGCCAATATCAAGAAAAACGTGTTGTGGGACGAAAATAACATGTCAGAGTACTTGACTAACCCAAAGAAATATATTCCTGGTACCAAGATGGCCTTTGGTGGGTTGAAGAAGGAAAAAGACAGAAACGACTTAATTACCTACTTGAAAAAAGCCTGTGAGTAA''' self.assertEqual( str(result.seq) , correct ) self.assertTrue( result.linear == True ) self.assertTrue( result.circular == False ) seqs = parse('./RefDataBjorn.fas') self.assertEqual( len(seqs) , 771 ) self.assertEqual( list(set([len (a) for a in seqs])) ,[901]) pAG25 = read("./pAG25.gb") self.assertTrue( pAG25.circular == True ) self.assertTrue( pAG25.linear == False) pCAPs = read("./pCAPs.gb") self.assertTrue( pCAPs.circular == True ) self.assertTrue( pCAPs.linear == False) pUC19 = read("./pUC19.gb") self.assertTrue( pUC19.circular == True ) self.assertTrue( pUC19.linear == False)
from time import gmtime, strftime import zipfile import cStringIO import sys import os import errno import codecs from docutils.core import publish_string from docutils.writers.html4css1 import Writer as HisWriter from pkg_resources import resource_filename from Bio.Restriction import ZraI, AjiI, EcoRV import pydna pYPKa = pydna.read( resource_filename('ypkpathway', os.path.join('data', 'pYPKa.txt'))) pYPK0 = pydna.read( resource_filename('ypkpathway', os.path.join('data', 'pYPK0.txt'))) pYPKpw = pydna.read( resource_filename('ypkpathway', os.path.join('data', 'pYPKpw.txt'))) (p577, p578, p468, p467, p567, p568, p775, p778, p342) = pydna.parse( u''' >577 gttctgatcctcgagcatcttaagaattc >578
def test_copy_features(self): from pydna.utils import seguid from pydna import read,copy_features a=read("./pCAPs.gb") b=read("./pCAPs_fasta.txt") for sh in [1,2,3,3127,3128,3129]: newb = (b[sh:]+b[:sh]).looped() copy_features(a, newb) #print "a",[len(str(f.extract(a).seq.lower()) for f in a.features if len(f)>10] #print "b",[len(str(f.extract(newb).seq).lower()) for f in newb.features] self.assertTrue( sorted([str(f.extract(a).seq).lower() for f in a.features if len(f)>10],key=len) == sorted([str(f.extract(newb).seq).lower() for f in newb.features],key=len)) b=b.rc() for sh in [1,2,3,3127,3128,3129]: newb = b[sh:]+b[:sh] copy_features(a, newb) self.assertTrue( sorted([str(f.extract(a).seq).lower() for f in a.features if len(f)>10],key=len) == sorted([str(f.extract(newb).seq).lower() for f in newb.features],key=len)) seguid_bla = "riT98j2v4NxVS8sbw_Q8epCwQwo" seguid_cre = "xLZ2xs2O8CUMmWh2OrhmNFp5ZLg" copy_features(a, b) assert [seguid(f.extract(b).seq) for f in b.features] == [seguid_cre, seguid_cre, seguid_bla, seguid_bla] b=read("./pCAPs_fasta.txt").looped() b=b.synced("attaacgagtgccgtaaacgacgatggttttacc") copy_features(a, b) assert [seguid(f.extract(b).seq) for f in b.features] == [seguid_cre,seguid_cre,seguid_bla,seguid_bla] b=read("./pCAPs_fasta.txt").looped() b=b.synced("ttaacgagtgccgtaaacgacgatggttttacc") copy_features(a, b) assert [seguid(f.extract(b).seq) for f in b.features] == [seguid_cre,seguid_cre,seguid_bla,seguid_bla] b=read("./pCAPs_fasta.txt").looped() b=b.synced("taacgagtgccgtaaacgacgatggttttacc") copy_features(a, b) assert [seguid(f.extract(b).seq) for f in b.features] == [seguid_bla,seguid_bla] b=read("./pCAPs_fasta.txt").looped() b=b.synced("gttaccaatgcttaatcagtgaggcacctatctcagc") copy_features(a, b) assert [seguid(f.extract(b).seq) for f in b.features] == [seguid_cre,seguid_cre,seguid_bla,seguid_bla] b=read("./pCAPs_fasta.txt").looped() b=b.synced("ttaccaatgcttaatcagtgaggcacctatctcagc") copy_features(a, b) assert [seguid(f.extract(b).seq) for f in b.features] == [seguid_cre,seguid_cre,seguid_bla,seguid_bla] b=read("./pCAPs_fasta.txt").looped() b=b.synced("taccaatgcttaatcagtgaggcacctatctcagc") copy_features(a, b) assert [seguid(f.extract(b).seq) for f in b.features] == [seguid_cre,seguid_cre,]
pYPKa = pydna.read(''' LOCUS pYPKa 3128 bp DNA circular UNK 08-MAY-2015 DEFINITION Product_568_pCAPsAjiIR (22-mer)_567_pCAPsAjiIF (23-mer) cSEGUID_aV1eIrzOiCjvw01yvKkxDXHKLMk_2015-05-08T16:41:28.034624 ACCESSION 3128bp TmU_6uVdgKq5aQhTpHux7dVk9J8 VERSION 3128bp TmU_6uVdgKq5aQhTpHux7dVk9J8 KEYWORDS . SOURCE . ORGANISM . . FEATURES Location/Qualifiers primer_bind complement(558..578) /note="567_pCAPsAjiIF" /ApEinfo_fwdcolor="green" /ApEinfo_revcolor="red" misc complement(558..580) /label="567_pCAPsAjiIF" misc 581..602 /label="568_pCAPsAjiIR" primer_bind 583..602 /note="568_pCAPsAjiIR" /ApEinfo_fwdcolor="green" /ApEinfo_revcolor="red" rep_origin 1313 /direction=BOTH gene complement(2072..2932) /gene="bla" CDS complement(2072..2932) /product="beta-lactamase" /codon_start=1 /transl_table=11 /db_xref="GI:2769263" /db_xref="GOA:Q79DR3" /db_xref="HSSP:P62593" /db_xref="InterPro:IPR000871" /db_xref="InterPro:IPR001466" /db_xref="InterPro:IPR012338" /db_xref="UniProtKB/TrEMBL:Q79DR3" /translation="MSIQHFRVALIPFFAAFCLPVFAHPETLVKVKDAEDQLGARVGYI ELDLNSGKILESFRPEERFPMMSTFKVLLCGAVLSRIDAGQEQLGRRIHYSQNDLVEYS PVTEKHLTDGMTVRELCSAAITMSDNTAANLLLTTIGGPKELTAFLHNMGDHVTRLDRW EPELNEAIPNDERDTTMPVAMATTLRKLLTGELLTLASRQQLIDWMEADKVAGPLLRSA LPAGWFIADKSGAGERGSRGIIAALGPDGKPSRIVVIYTTGSQATMDERNRQIAEIGAS LIKHW" /gene="bla" /protein_id="CAA04868.1" ORIGIN 1 tcgcgcgttt cggtgatgac ggtgaaaacc tctgacacat gcagctcccg gagacggtca 61 cagcttgtct gtaagcggat gccgggagca gacaagcccg tcagggcgcg tcagcgggtg 121 ttggcgggtg tcggggctgg cttaactatg cggcatcaga gcagattgta ctgagagtgc 181 accatagatc ctgaggatcg gggtgataaa tcagtctgcg ccacatcggg ggaaacaaaa 241 tggcgcgaga tctaaaaaaa aaggctccaa aaggagcctt tcgcgctacc aggtaacgcg 301 ccactccgac gggattaacg agtgccgtaa acgacgatgg ttttaccgtg tgcggagatc 361 aggttctgat cctcgagcat cttaagaatt cgtcccacgg tttgtctaga gcagccgaca 421 atctggccaa tttcctgacg ggtaattttg atttgcatgc cgtccgggtg agtcatagcg 481 tctggttgtt ttgccagatt cagcagagtc tgtgcaatgc ggccgctgac gtcgaggaac 541 gccaggttgc ccactttctc actagtgacc tgcagccgac gtgccatctg tgcagacaaa 601 cgcatcagga tatccggatt tacctgaatc aattggcgaa attttttgta cgaaatttca 661 gccacttcac aggcggtttt cgcacgtacc catgcgctac gttcctggcc ctcttcaaac 721 aggcccagtt cgccaataaa atcaccctga ttcagatagg agaggatcat ttctttaccc 781 tcttcgtctt tgatcagcac tgccacagag cctttaacga tgtagtacag cgtttccgct 841 ttttcaccct ggtgaataag cgtgctcttg gatgggtact tatgaatgtg gcaatgagac 901 aagaaccatt cgagagtagg atccgtttga ggtttaccaa gtaccataag atccttaaat 961 ttttattatc tagctagatg ataatattat atcaagaatt gtacctgaaa gcaaataaat 1021 tttttatctg gcttaactat gcggcatcag agcagattgt actgagagtg caccatatgc 1081 ggtgtgaaat accgcacaga tgcgtaagga gaaaataccg catcaggcgc tcttccgctt 1141 cctcgctcac tgactcgctg cgctcggtcg ttcggctgcg gcgagcggta tcagctcact 1201 caaaggcggt aatacggtta tccacagaat caggggataa cgcaggaaag aacatgtgag 1261 caaaaggcca gcaaaaggcc aggaaccgta aaaaggccgc gttgctggcg tttttccata 1321 ggctccgccc ccctgacgag catcacaaaa atcgacgctc aagtcagagg tggcgaaacc 1381 cgacaggact ataaagatac caggcgtttc cccctggaag ctccctcgtg cgctctcctg 1441 ttccgaccct gccgcttacc ggatacctgt ccgcctttct cccttcggga agcgtggcgc 1501 tttctcatag ctcacgctgt aggtatctca gttcggtgta ggtcgttcgc tccaagctgg 1561 gctgtgtgca cgaacccccc gttcagcccg accgctgcgc cttatccggt aactatcgtc 1621 ttgagtccaa cccggtaaga cacgacttat cgccactggc agcagccact ggtaacagga 1681 ttagcagagc gaggtatgta ggcggtgcta cagagttctt gaagtggtgg cctaactacg 1741 gctacactag aaggacagta tttggtatct gcgctctgct gaagccagtt accttcggaa 1801 aaagagttgg tagctcttga tccggcaaac aaaccaccgc tggtagcggt ggtttttttg 1861 tttgcaagca gcagattacg cgcagaaaaa aaggatctca agaagatcct ttgatctttt 1921 ctacggggtc tgacgctcag tggaacgaaa actcacgtta agggattttg gtcatgagat 1981 tatcaaaaag gatcttcacc tagatccttt taaattaaaa atgaagtttt aaatcaatct 2041 aaagtatata tgagtaaact tggtctgaca gttaccaatg cttaatcagt gaggcaccta 2101 tctcagcgat ctgtctattt cgttcatcca tagttgcctg actccccgtc gtgtagataa 2161 ctacgatacg ggagggctta ccatctggcc ccagtgctgc aatgataccg cgagacccac 2221 gctcaccggc tccagattta tcagcaataa accagccagc cggaagggcc gagcgcagaa 2281 gtggtcctgc aactttatcc gcctccatcc agtctattaa ttgttgccgg gaagctagag 2341 taagtagttc gccagttaat agtttgcgca acgttgttgc cattgctaca ggcatcgtgg 2401 tgtcacgctc gtcgtttggt atggcttcat tcagctccgg ttcccaacga tcaaggcgag 2461 ttacatgatc ccccatgttg tgcaaaaaag cggttagctc cttcggtcct ccgatcgttg 2521 tcagaagtaa gttggccgca gtgttatcac tcatggttat ggcagcactg cataattctc 2581 ttactgtcat gccatccgta agatgctttt ctgtgactgg tgagtactca accaagtcat 2641 tctgagaata gtgtatgcgg cgaccgagtt gctcttgccc ggcgtcaata cgggataata 2701 ccgcgccaca tagcagaact ttaaaagtgc tcatcattgg aaaacgttct tcggggcgaa 2761 aactctcaag gatcttaccg ctgttgagat ccagttcgat gtaacccact cgtgcaccca 2821 actgatcttc agcatctttt actttcacca gcgtttctgg gtgagcaaaa acaggaaggc 2881 aaaatgccgc aaaaaaggga ataagggcga cacggaaatg ttgaatactc atactcttcc 2941 tttttcaata ttattgaagc atttatcagg gttattgtct catgagcgga tacatatttg 3001 aatgtattta gaaaaataaa caaatagggg ttccgcgcac atttccccga aaagtgccac 3061 ctgctaagaa accattatta tcatgacatt aacctataaa aataggcgta tcacgaggcc 3121 ctttcgtc // ''')
67 4249–4255. ''' raw_input("press return!\n") gb=Genbank("*****@*****.**") if gb.test(): xks1_gene = gb.nucleotide("Z72979") print "Genbank record Z72979 downloaded from NCBI" YEp24PGK = gb.nucleotide("KC562906") print "Genbank record KC562906 downloaded from NCBI\n" else: xks1_gene = read("Z72979.gb") print "A local copy of Genbank record Z72979 is used" YEp24PGK = read("KC562906.gb") print "A local copy of Genbank record KC562906 is used\n" raw_input("press return!\n") primers=''' >primer1 GCGGATCCTCTAGAATGGTTTGTTCAGTAATTCAG >primer3 AGATCTGGATCCTTAGATGAGAGTCTTTTCCAG ''' primer1, primer2 = parse(primers, ds=False) xks1_pcr_product = pcr(primer1, primer2, xks1_gene)
Open fasta format file and returns the sequence. """ f = SeqIO.parse(open(fasta, 'rU'), 'fasta').next() return f.seq ####################################################### gb = pydna.Genbank("*****@*****.**") # Tell Genbank who you are! gene = gb.nucleotide("X06997") # Kluyveromyces lactis LAC12 gene for lactose permease. ####################################################### genome = pydna.read('FSC237.fasta') #genome = fasta_seq('FSC237.fasta') primer_f,primer_r = pydna.parse(''' >B4_400_1-F AGCAGTGCCTGTTGTACC >B4_400_1-R AGTTTCTCAACATGGAAT ''', ds=False) pcr_prod = pydna.pcr(primer_f,primer_r, genome) #stssearch -seqall genome.fasta -infile primers.txt -stdout --auto primersearch -seqall genome.fasta -infile primers.txt -mismatchpercent 1 -stdout --auto -mismatchpercent 10
def test_Dseq_cutting_adding(self): from Bio.Seq import Seq from Bio.Restriction import BamHI,EcoRI, PstI, EcoRV, SmaI from Bio.Alphabet.IUPAC import IUPACAmbiguousDNA from Bio.SeqUtils.CheckSum import seguid from pydna import Dseq a = Dseq('GGATCCtcatctactatcatcgtagcgtactgatctattctgctgctcatcatcggtactctctataattatatatatatgcgcgtGGATCC', 'CCTAGGagtagatgatagtagcatcgcatgactagataagacgacgagtagtagccatgagagatattaatatatatatacgcgcaCCTAGG'[::-1], linear=True, ovhg=0) b = a.cut(BamHI)[1] self.assertEqual( b.watson , "GATCCtcatctactatcatcgtagcgtactgatctattctgctgctcatcatcggtactctctataattatatatatatgcgcgtG") self.assertEqual( b.crick , "GATCCacgcgcatatatatataattatagagagtaccgatgatgagcagcagaatagatcagtacgctacgatgatagtagatgaG") c = Dseq('nCTGCAGtcatctactatcatcgtagcgtactgatctattctgctgctcatcatcggtactctctataattatatatatatgcgcgtGAATTCn', 'nGACGTCagtagatgatagtagcatcgcatgactagataagacgacgagtagtagccatgagagatattaatatatatatacgcgcaCTTAAGn'[::-1], linear=True, ovhg=0) f,d,l = c.cut((EcoRI, PstI)) self.assertEqual( d.watson , "GtcatctactatcatcgtagcgtactgatctattctgctgctcatcatcggtactctctataattatatatatatgcgcgtG") self.assertEqual( d.crick , "AATTCacgcgcatatatatataattatagagagtaccgatgatgagcagcagaatagatcagtacgctacgatgatagtagatgaCTGCA") e = Dseq("nGAATTCtcatctactatcatcgtagcgtactgatctattctgctgctcatcatcggtactctctataattatatatatatgcgcgtCTGCAGn", "nCTTAAGagtagatgatagtagcatcgcatgactagataagacgacgagtagtagccatgagagatattaatatatatatacgcgcaGACGTCn"[::-1], linear=True, ovhg=0) f = e.cut((EcoRI,PstI))[1] self.assertEqual( f.watson ,"AATTCtcatctactatcatcgtagcgtactgatctattctgctgctcatcatcggtactctctataattatatatatatgcgcgtCTGCA") self.assertEqual( f.crick , "GacgcgcatatatatataattatagagagtaccgatgatgagcagcagaatagatcagtacgctacgatgatagtagatgaG") ''' blunt cloning ''' pUC19 = read("./pUC19.gb") self.assertFalse( pUC19.linear ) self.assertTrue( len(pUC19) == 2686 ) self.assertTrue( len(pUC19.seq.watson) == 2686 ) self.assertTrue( len(pUC19.seq.crick) == 2686 ) self.assertTrue( pUC19.seq.circular == True) self.assertTrue( pUC19.seq.linear == False) pUC19_SmaI = pUC19.cut(SmaI) self.assertTrue( len(pUC19_SmaI) == 1) pUC19_SmaI = pUC19_SmaI.pop() self.assertTrue( pUC19_SmaI.linear ) self.assertTrue( len(pUC19_SmaI) == 2686 ) self.assertTrue( pUC19_SmaI.linear ) pUC19_SmaI_a = pUC19_SmaI.seq + a self.assertTrue( pUC19_SmaI_a.linear ) self.assertFalse( pUC19_SmaI_a.circular ) pUC19_SmaI_a=pUC19_SmaI_a.looped() self.assertTrue( len(pUC19_SmaI_a) == 2778 ) self.assertTrue( pUC19_SmaI_a.circular ) self.assertFalse( pUC19_SmaI_a.linear ) self.assertTrue( eq(pUC19_SmaI_a, read("./pUC19-SmaI-a.gb") )) ''' sticky end cloning ''' pUC19_BamHI = pUC19.cut(BamHI) self.assertTrue( len(pUC19_BamHI) == 1) pUC19_BamHI = pUC19_BamHI.pop().seq self.assertTrue( len(pUC19_BamHI.watson) == len(pUC19_BamHI.crick) == 2686 ) pUC19_BamHI_a = pUC19_BamHI+b self.assertTrue( len(pUC19_BamHI_a.watson) == len(pUC19_BamHI_a.crick) == 2772 ) self.assertTrue( pUC19_BamHI_a.circular == False) self.assertTrue( pUC19_BamHI_a.linear == True) pUC19_BamHI_a = pUC19_BamHI_a.looped() self.assertTrue( pUC19_BamHI_a.circular == True) self.assertTrue( pUC19_BamHI_a.linear == False) self.assertTrue( eq(pUC19_BamHI_a, read("./pUC19-BamHI-a.gb"))) pUC19_BamHI_a_rc = pUC19_BamHI+b.rc() pUC19_BamHI_a_rc = pUC19_BamHI_a_rc.looped() self.assertTrue( pUC19_BamHI_a.circular == True) self.assertTrue( pUC19_BamHI_a.linear == False) self.assertTrue( eq(pUC19_BamHI_a_rc, read("./pUC19-BamHI-a-rc.gb"))) ''' adding (ligating) dsDNA objects ''' with self.assertRaisesRegexp(TypeError, "circular"): pUC19+a with self.assertRaisesRegexp(TypeError, "circular"): a+pUC19 with self.assertRaisesRegexp(TypeError, "compatible"): a+b with self.assertRaisesRegexp(TypeError, "compatible"): b+a with self.assertRaisesRegexp(TypeError, "compatible"): d+d ''' directional cloning ''' pUC19_EcoRI_PstI = pUC19.cut(EcoRI, PstI).pop(0) with self.assertRaisesRegexp(TypeError, "compatible"): pUC19_EcoRI_PstI + d pUC19_EcoRI_PstI_d = pUC19_EcoRI_PstI + d.rc() pUC19_EcoRI_PstI_d = pUC19_EcoRI_PstI_d.looped() self.assertTrue( eq(pUC19_EcoRI_PstI_d, read("./pUC19-EcoRI_PstI-d-rc.gb"))) self.assertTrue( eq(pUC19_EcoRI_PstI_d.rc(), read("./pUC19-EcoRI_PstI-d-rc.gb")))
def test_Dseqrecord_cutting_adding(self): from Bio.Restriction import Bsu36I, BstAPI pCAPs = read("./pCAPs.gb") a,b = pCAPs.cut(Bsu36I, BstAPI) c=(a+b).looped() self.assertTrue( eq(c, pCAPs) ) a = (Dseqrecord( Dseq( 'AATTCACANGGTACCNGGTACCNGCGGATATC', 'GTGTNCCATGGNCCATGGNCGCCTATAG'[::-1], -4)), Dseqrecord( Dseq( 'CACANGGTACCNGGTACCNGCGGATATC', 'GTGTNCCATGGNCCATGGNCGCCTATAG'[::-1], 0)), Dseqrecord( Dseq( 'CACANGGTACCNGGTACCNGCGGATATC', 'AATTGTGTNCCATGGNCCATGGNCGCCTATAG'[::-1], 4)),) from Bio.Restriction import KpnI, Acc65I, NlaIV enzymes = [Acc65I, NlaIV, KpnI] for enz in enzymes: for f in a: b,c,d = f.cut(enz) e=b+c+d assert str(e.seq).lower() == str(f.seq).lower() #from pydna import * #from pydna_helper import gb, ape from Bio.Restriction import KpnI, BamHI, Acc65I, NlaIV, EcoRI, EcoRV a=read(''' LOCUS New_DNA 10 bp ds-DNA linear 02-APR-2013 DEFINITION ACCESSION New_DNA VERSION New_DNA KEYWORDS . SOURCE ORGANISM . . COMMENT COMMENT ApEinfo:methylated:1 FEATURES Location/Qualifiers misc_feature 1..1 /label=1 /ApEinfo_fwdcolor=cyan /ApEinfo_revcolor=green /ApEinfo_graphicformat=arrow_data {{0 1 2 0 0 -1} {} 0} width 5 offset 0 misc_feature 2..2 /label=2 /ApEinfo_fwdcolor=cyan /ApEinfo_revcolor=green /ApEinfo_graphicformat=arrow_data {{0 1 2 0 0 -1} {} 0} width 5 offset 0 misc_feature 3..3 /label=3 /ApEinfo_fwdcolor=cyan /ApEinfo_revcolor=green /ApEinfo_graphicformat=arrow_data {{0 1 2 0 0 -1} {} 0} width 5 offset 0 misc_feature 4..4 /label=4 /ApEinfo_fwdcolor=cyan /ApEinfo_revcolor=green /ApEinfo_graphicformat=arrow_data {{0 1 2 0 0 -1} {} 0} width 5 offset 0 misc_feature 5..5 /label=5 /ApEinfo_fwdcolor=cyan /ApEinfo_revcolor=green /ApEinfo_graphicformat=arrow_data {{0 1 2 0 0 -1} {} 0} width 5 offset 0 misc_feature 6..6 /label=6 /ApEinfo_fwdcolor=cyan /ApEinfo_revcolor=green /ApEinfo_graphicformat=arrow_data {{0 1 2 0 0 -1} {} 0} width 5 offset 0 misc_feature 7..7 /label=7 /ApEinfo_fwdcolor=cyan /ApEinfo_revcolor=green /ApEinfo_graphicformat=arrow_data {{0 1 2 0 0 -1} {} 0} width 5 offset 0 misc_feature 8..8 /label=8 /ApEinfo_fwdcolor=cyan /ApEinfo_revcolor=green /ApEinfo_graphicformat=arrow_data {{0 1 2 0 0 -1} {} 0} width 5 offset 0 misc_feature 9..9 /label=9 /ApEinfo_fwdcolor=cyan /ApEinfo_revcolor=green /ApEinfo_graphicformat=arrow_data {{0 1 2 0 0 -1} {} 0} width 5 offset 0 misc_feature 10..10 /label=10 /ApEinfo_fwdcolor=cyan /ApEinfo_revcolor=green /ApEinfo_graphicformat=arrow_data {{0 1 2 0 0 -1} {} 0} width 5 offset 0 ORIGIN 1 ttGGTACCgg //''') b,c = a.cut(Acc65I) self.assertEqual( [f.qualifiers["label"] for f in b.features], [['1'], ['2'], ['3'], ['4'], ['5'], ['6'], ['7']]) self.assertEqual( [f.qualifiers["label"] for f in c.features], [['4'], ['5'], ['6'], ['7'], ['8'], ['9'], ['10']]) a=read(''' LOCUS New_DNA 33 bp ds-DNA linear 08-NOV-2012 DEFINITION . ACCESSION VERSION SOURCE . ORGANISM . COMMENT COMMENT ApEinfo:methylated:1 FEATURES Location/Qualifiers misc_feature 1..11 /label=Acc65I-1 /ApEinfo_fwdcolor=cyan /ApEinfo_revcolor=green /ApEinfo_graphicformat=arrow_data {{0 1 2 0 0 -1} {} 0} width 5 offset 0 misc_feature 12..18 /label=Acc65I-2 /ApEinfo_fwdcolor=cyan /ApEinfo_revcolor=green /ApEinfo_graphicformat=arrow_data {{0 1 2 0 0 -1} {} 0} width 5 offset 0 misc_feature 19..33 /label=Acc65I-3 /ApEinfo_fwdcolor=cyan /ApEinfo_revcolor=green /ApEinfo_graphicformat=arrow_data {{0 1 2 0 0 -1} {} 0} width 5 offset 0 misc_feature 1..15 /label=KpnI-1 /ApEinfo_fwdcolor=cyan /ApEinfo_revcolor=green /ApEinfo_graphicformat=arrow_data {{0 1 2 0 0 -1} {} 0} width 5 offset 0 misc_feature 16..22 /label=KpnI-2 /ApEinfo_fwdcolor=cyan /ApEinfo_revcolor=green /ApEinfo_graphicformat=arrow_data {{0 1 2 0 0 -1} {} 0} width 5 offset 0 misc_feature 23..33 /label=KpnI-3 /ApEinfo_fwdcolor=cyan /ApEinfo_revcolor=green /ApEinfo_graphicformat=arrow_data {{0 1 2 0 0 -1} {} 0} width 5 offset 0 misc_feature 1..13 /label=NlaIV-1 /ApEinfo_fwdcolor=cyan /ApEinfo_revcolor=green /ApEinfo_graphicformat=arrow_data {{0 1 2 0 0 -1} {} 0} width 5 offset 0 misc_feature 14..20 /label=NlaIV-2 /ApEinfo_fwdcolor=cyan /ApEinfo_revcolor=green /ApEinfo_graphicformat=arrow_data {{0 1 2 0 0 -1} {} 0} width 5 offset 0 misc_feature 21..33 /label=NlaIV-3 /ApEinfo_fwdcolor=cyan /ApEinfo_revcolor=green /ApEinfo_graphicformat=arrow_data {{0 1 2 0 0 -1} {} 0} width 5 offset 0 ORIGIN 1 GAATTCacan ggtaccnGGT ACCngcgGAT ATC // ''') self.assertTrue( a.seguid()=="di3hL8t2G4iQQsxlm_CtvnUMBz8" ) self.assertTrue( ([x.qualifiers["label"][0] for x in a.features] == ['Acc65I-1', 'Acc65I-2', 'Acc65I-3', 'KpnI-1', 'KpnI-2', 'KpnI-3', 'NlaIV-1', 'NlaIV-2', 'NlaIV-3'])) b,c,d = a.cut(Acc65I) self.assertTrue( [x.qualifiers["label"][0] for x in b.features] == ['Acc65I-1', 'KpnI-1', 'NlaIV-1']) self.assertTrue( [x.qualifiers["label"][0] for x in c.features] == ['Acc65I-2', 'KpnI-2', 'NlaIV-2']) self.assertTrue( [x.qualifiers["label"][0] for x in d.features] == ['Acc65I-3', 'KpnI-3', 'NlaIV-3']) e = b+c+d self.assertTrue( sorted([x.qualifiers["label"][0] for x in e.features]) == [x.qualifiers["label"][0] for x in a.features]) self.assertTrue( str(a.seq)==str(e.seq)) b,c,d = a.cut(KpnI) self.assertTrue( [x.qualifiers["label"][0] for x in b.features] == ['Acc65I-1', 'KpnI-1', 'NlaIV-1']) self.assertTrue( [x.qualifiers["label"][0] for x in c.features] == ['Acc65I-2', 'KpnI-2', 'NlaIV-2']) self.assertTrue( [x.qualifiers["label"][0] for x in d.features] == ['Acc65I-3', 'KpnI-3', 'NlaIV-3']) e = b+c+d self.assertTrue( sorted([x.qualifiers["label"][0] for x in e.features]) == [x.qualifiers["label"][0] for x in a.features]) b,c,d = a.cut(NlaIV) self.assertTrue( [x.qualifiers["label"][0] for x in b.features] == ['Acc65I-1', 'NlaIV-1']) self.assertTrue( [x.qualifiers["label"][0] for x in c.features] == ['NlaIV-2']) self.assertTrue( [x.qualifiers["label"][0] for x in d.features] == [ 'KpnI-3', 'NlaIV-3']) e = b+c+d self.assertTrue( str(a.seq)==str(e.seq)) b,c = a.cut(EcoRI) e = b+c self.assertTrue( str(a.seq)==str(e.seq)) b,c = a.cut(EcoRV) e = b+c self.assertTrue( str(a.seq)==str(e.seq)) b,c,d = a.cut(EcoRI,EcoRV) e = b+c+d self.assertTrue( str(a.seq)==str(e.seq)) b,c,d, f = a.cut(Acc65I,EcoRI) e = b+c+d+f self.assertTrue( str(a.seq)==str(e.seq)) b,c,d, f = a.cut(EcoRI,Acc65I) e = b+c+d+f self.assertTrue( str(a.seq)==str(e.seq))
def test_initialization(self): a=[] a.append( Dseqrecord("attt") ) a.append( Dseqrecord(Dseq("attt")) ) a.append( Dseqrecord(Seq("attt")) ) a.append( Dseqrecord(Srec(Seq("attt")))) a.append( Dseqrecord(Dseqrecord("attt")) ) for b in a: self.assertTrue( type(b.seq) == Dseq ) self.assertTrue( str(b.seq.watson) == "attt" ) self.assertTrue( str(b.seq.crick) == "aaat" ) self.assertTrue( str(b.seq) == "attt" ) self.assertTrue( str(b.seq) == "attt" ) self.assertTrue(b.linear == b.seq.linear ) self.assertTrue(b.linear == True ) self.assertTrue(b.circular == False ) self.assertTrue(b.seq.linear == True ) self.assertTrue(b.seq.circular == False ) a=[] a.append( Dseqrecord("attt", circular=True) ) a.append( Dseqrecord(Dseq("attt"), circular=True) ) a.append( Dseqrecord(Seq("attt"), circular=True) ) a.append( Dseqrecord(Srec(Seq("attt")), circular=True)) a.append( Dseqrecord(Dseqrecord("attt"), circular=True )) for b in a: self.assertTrue( type(b.seq) == Dseq ) self.assertTrue( str(b.seq.watson) == "attt" ) self.assertTrue( str(b.seq.crick) == "aaat" ) self.assertTrue( str(b.seq) == "attt" ) self.assertTrue( str(b.seq) == "attt" ) self.assertTrue(b.linear == b.seq.linear ) self.assertTrue(b.linear == False ) self.assertTrue(b.circular == True ) self.assertTrue(b.seq.linear == False ) self.assertTrue(b.seq.circular == True ) a=[] a.append(Dseqrecord(Dseq("attt",circular=True), circular=True)) a.append(Dseqrecord(Dseq("attt",circular=False), circular=True)) a.append(Dseqrecord(Dseq("attt",circular=True), circular=False)) a.append(Dseqrecord(Dseq("attt",circular=False), circular=False)) circular = [True,True,False,False] linear = [False,False,True,True] for b,ci,li in zip(a,circular,linear): self.assertTrue( type(b.seq) == Dseq ) self.assertTrue( str(b.seq.watson) == "attt" ) self.assertTrue( str(b.seq.crick) == "aaat" ) self.assertTrue( str(b.seq) == "attt" ) self.assertTrue( str(b.seq) == "attt" ) self.assertTrue(b.linear == b.seq.linear ) self.assertTrue(b.linear == li ) self.assertTrue(b.circular == ci ) self.assertTrue(b.seq.linear == li ) self.assertTrue(b.seq.circular == ci ) a=[] ds = Dseq("attt", "taaa") self.assertTrue(ds.linear == True) self.assertTrue(ds.ovhg == -1) self.assertTrue( str(ds.watson) == "attt" ) self.assertTrue( str(ds.crick) == "taaa" ) # attt # aaat a.append(Dseqrecord(ds, circular = False)) self.assertTrue(ds.linear == True) a.append(Dseqrecord(ds, linear = True)) self.assertTrue(ds.linear == True) a.append(Dseqrecord(ds, circular=True)) self.assertTrue(ds.linear == True) a.append(Dseqrecord(ds, linear=False)) self.assertTrue(ds.linear == True) circular = [False,False,True,True] linear = [True,True,False,False] crick = ["taaa","taaa","aaat","aaat"] sek = ["attta","attta","attt", "attt"] for b,ci,li,s,cri in zip(a,circular,linear, sek, crick): self.assertTrue( type(b.seq) == Dseq ) self.assertTrue( str(b.seq.watson) == "attt" ) self.assertTrue( str(b.seq.crick) == cri ) self.assertTrue( str(b.seq) == s ) self.assertTrue(b.linear == b.seq.linear ) self.assertTrue(b.linear == li ) self.assertTrue(b.circular == ci ) self.assertTrue(b.seq.linear == li ) self.assertTrue(b.seq.circular == ci ) a=[] ds = Dseq("attt", "caaa") self.assertTrue(ds.linear == True) self.assertTrue(ds.ovhg == -1) a.append(Dseqrecord(ds, circular=False)) self.assertTrue(ds.linear == True) a.append(Dseqrecord(ds, linear=True)) self.assertTrue(ds.linear == True) with self.assertRaises(TypeError): Dseqrecord(ds, circular=True) self.assertTrue(ds.linear == True) with self.assertRaises(TypeError): Dseqrecord(ds, linear=False) self.assertTrue(ds.linear == True) with self.assertRaises(TypeError): b = Dseqrecord([]) with self.assertRaises(TypeError): b = Dseqrecord(("a",)) with self.assertRaises(TypeError): b = Dseqrecord(0) from pydna import read input = ''' LOCUS New_DNA 4 bp ds-DNA linear 30-MAR-2013 DEFINITION . ACCESSION VERSION SOURCE . ORGANISM . COMMENT COMMENT ApEinfo:methylated:1 FEATURES Location/Qualifiers misc_feature 2..3 /label=NewFeature /ApEinfo_fwdcolor=cyan /ApEinfo_revcolor=green /ApEinfo_graphicformat=arrow_data {{0 1 2 0 0 -1} {} 0} width 5 offset 0 ORIGIN 1 acgt // ''' a = read(input) self.assertEqual( a.features[0].extract(a).seq.watson, "CG") b = a+a for f in b.features: self.assertEqual( b.features[0].extract(a).seq.watson, "CG") feature = a.features[0] s = Dseq("agctt","agcta") #print s.fig() #Dseq(-6) # agctt #atcga b = Dseqrecord(s) b.features.append(feature) cb = Dseqrecord(b,circular=True) self.assertEqual(b.features[0].extract(b).seq.watson.lower(), cb.features[0].extract(b).seq.watson.lower() ) self.assertEqual(b.features[0].extract(b).seq.crick.lower(), cb.features[0].extract(b).seq.crick.lower() ) s = Dseq("aagct","aagct") #print s.fig() #Dseq(-6) #aagct # tcgaa b = Dseqrecord(s) with self.assertRaises(TypeError): cb = Dseqrecord(b, circular=True) s = Dseq("agctt","agcta") #print s.fig() #Dseq(-6) # agcta #ttcga b = Dseqrecord(s) b.features.append(feature) cb = Dseqrecord(b,circular=True) self.assertEqual(b.features[0].extract(b).seq.watson.lower(), cb.features[0].extract(b).seq.watson.lower() ) self.assertEqual(b.features[0].extract(b).seq.crick.lower(), cb.features[0].extract(b).seq.crick.lower() )
def test_Dseq_cutting_adding(self): from Bio.Seq import Seq from Bio.Restriction import BamHI, EcoRI, PstI, EcoRV, SmaI from Bio.Alphabet.IUPAC import IUPACAmbiguousDNA from Bio.SeqUtils.CheckSum import seguid from pydna import Dseq a = Dseq( 'GGATCCtcatctactatcatcgtagcgtactgatctattctgctgctcatcatcggtactctctataattatatatatatgcgcgtGGATCC', 'CCTAGGagtagatgatagtagcatcgcatgactagataagacgacgagtagtagccatgagagatattaatatatatatacgcgcaCCTAGG'[:: -1], linear=True, ovhg=0) b = a.cut(BamHI)[1] self.assertEqual( b.watson, "GATCCtcatctactatcatcgtagcgtactgatctattctgctgctcatcatcggtactctctataattatatatatatgcgcgtG" ) self.assertEqual( b.crick, "GATCCacgcgcatatatatataattatagagagtaccgatgatgagcagcagaatagatcagtacgctacgatgatagtagatgaG" ) c = Dseq( 'nCTGCAGtcatctactatcatcgtagcgtactgatctattctgctgctcatcatcggtactctctataattatatatatatgcgcgtGAATTCn', 'nGACGTCagtagatgatagtagcatcgcatgactagataagacgacgagtagtagccatgagagatattaatatatatatacgcgcaCTTAAGn'[:: -1], linear=True, ovhg=0) f, d, l = c.cut((EcoRI, PstI)) self.assertEqual( d.watson, "GtcatctactatcatcgtagcgtactgatctattctgctgctcatcatcggtactctctataattatatatatatgcgcgtG" ) self.assertEqual( d.crick, "AATTCacgcgcatatatatataattatagagagtaccgatgatgagcagcagaatagatcagtacgctacgatgatagtagatgaCTGCA" ) e = Dseq( "nGAATTCtcatctactatcatcgtagcgtactgatctattctgctgctcatcatcggtactctctataattatatatatatgcgcgtCTGCAGn", "nCTTAAGagtagatgatagtagcatcgcatgactagataagacgacgagtagtagccatgagagatattaatatatatatacgcgcaGACGTCn"[:: -1], linear=True, ovhg=0) f = e.cut((EcoRI, PstI))[1] self.assertEqual( f.watson, "AATTCtcatctactatcatcgtagcgtactgatctattctgctgctcatcatcggtactctctataattatatatatatgcgcgtCTGCA" ) self.assertEqual( f.crick, "GacgcgcatatatatataattatagagagtaccgatgatgagcagcagaatagatcagtacgctacgatgatagtagatgaG" ) ''' blunt cloning ''' pUC19 = read("./pUC19.gb") self.assertFalse(pUC19.linear) self.assertTrue(len(pUC19) == 2686) self.assertTrue(len(pUC19.seq.watson) == 2686) self.assertTrue(len(pUC19.seq.crick) == 2686) self.assertTrue(pUC19.seq.circular == True) self.assertTrue(pUC19.seq.linear == False) pUC19_SmaI = pUC19.cut(SmaI) self.assertTrue(len(pUC19_SmaI) == 1) pUC19_SmaI = pUC19_SmaI.pop() self.assertTrue(pUC19_SmaI.linear) self.assertTrue(len(pUC19_SmaI) == 2686) self.assertTrue(pUC19_SmaI.linear) pUC19_SmaI_a = pUC19_SmaI.seq + a self.assertTrue(pUC19_SmaI_a.linear) self.assertFalse(pUC19_SmaI_a.circular) pUC19_SmaI_a = pUC19_SmaI_a.looped() self.assertTrue(len(pUC19_SmaI_a) == 2778) self.assertTrue(pUC19_SmaI_a.circular) self.assertFalse(pUC19_SmaI_a.linear) self.assertTrue(eq(pUC19_SmaI_a, read("./pUC19-SmaI-a.gb"))) ''' sticky end cloning ''' pUC19_BamHI = pUC19.cut(BamHI) self.assertTrue(len(pUC19_BamHI) == 1) pUC19_BamHI = pUC19_BamHI.pop().seq self.assertTrue( len(pUC19_BamHI.watson) == len(pUC19_BamHI.crick) == 2686) pUC19_BamHI_a = pUC19_BamHI + b self.assertTrue( len(pUC19_BamHI_a.watson) == len(pUC19_BamHI_a.crick) == 2772) self.assertTrue(pUC19_BamHI_a.circular == False) self.assertTrue(pUC19_BamHI_a.linear == True) pUC19_BamHI_a = pUC19_BamHI_a.looped() self.assertTrue(pUC19_BamHI_a.circular == True) self.assertTrue(pUC19_BamHI_a.linear == False) self.assertTrue(eq(pUC19_BamHI_a, read("./pUC19-BamHI-a.gb"))) pUC19_BamHI_a_rc = pUC19_BamHI + b.rc() pUC19_BamHI_a_rc = pUC19_BamHI_a_rc.looped() self.assertTrue(pUC19_BamHI_a.circular == True) self.assertTrue(pUC19_BamHI_a.linear == False) self.assertTrue(eq(pUC19_BamHI_a_rc, read("./pUC19-BamHI-a-rc.gb"))) ''' adding (ligating) dsDNA objects ''' with self.assertRaisesRegexp(TypeError, "circular"): pUC19 + a with self.assertRaisesRegexp(TypeError, "circular"): a + pUC19 with self.assertRaisesRegexp(TypeError, "compatible"): a + b with self.assertRaisesRegexp(TypeError, "compatible"): b + a with self.assertRaisesRegexp(TypeError, "compatible"): d + d ''' directional cloning ''' pUC19_EcoRI_PstI = pUC19.cut(EcoRI, PstI).pop(0) with self.assertRaisesRegexp(TypeError, "compatible"): pUC19_EcoRI_PstI + d pUC19_EcoRI_PstI_d = pUC19_EcoRI_PstI + d.rc() pUC19_EcoRI_PstI_d = pUC19_EcoRI_PstI_d.looped() self.assertTrue( eq(pUC19_EcoRI_PstI_d, read("./pUC19-EcoRI_PstI-d-rc.gb"))) self.assertTrue( eq(pUC19_EcoRI_PstI_d.rc(), read("./pUC19-EcoRI_PstI-d-rc.gb")))
def test_parse1(self): ''' test parsing fasta sequences from a text''' text = ''' points....: 1 The sequence seq below represents a double stranded linear DNA molecule. >seq CTCCCCTATCACCAGGGTACCGATAGCCACGAATCT Give the sequence(s) of the fragment(s) formed after digesting seq with the restriction enzyme Acc65I in the order that they appear in seq. Use FASTA format and give the Watson strand(s) in 5'-3' direction below. Give the sequences the names frag1,frag2,... etc. >frag1 CTCCCCTATCACCAGG >frag2 GTACCGATAGCCACGAATCT *********** Question 4 *********** QuestionID: ''' result = parse(text) correct = [ 'CTCCCCTATCACCAGGGTACCGATAGCCACGAATCT', 'CTCCCCTATCACCAGG', 'GTACCGATAGCCACGAATCT' ] self.assertEqual([str(s.seq) for s in result], correct) self.assertEqual([s.linear for s in result], [True, True, True]) input = ''' LOCUS ScCYC1 330 bp DNA UNK 01-JAN-1980 DEFINITION ScCYC1 ACCESSION ScCYC1 VERSION ScCYC1 KEYWORDS . SOURCE . ORGANISM . . FEATURES Location/Qualifiers ORIGIN 1 ATGACTGAAT TCAAGGCCGG TTCTGCTAAG AAAGGTGCTA CACTTTTCAA GACTAGATGT 61 CTACAATGCC ACACCGTGGA AAAGGGTGGC CCACATAAGG TTGGTCCAAA CTTGCATGGT 121 ATCTTTGGCA GACACTCTGG TCAAGCTGAA GGGTATTCGT ACACAGATGC CAATATCAAG 181 AAAAACGTGT TGTGGGACGA AAATAACATG TCAGAGTACT TGACTAACCC AAAGAAATAT 241 ATTCCTGGTA CCAAGATGGC CTTTGGTGGG TTGAAGAAGG AAAAAGACAG AAACGACTTA 301 ATTACCTACT TGAAAAAAGC CTGTGAGTAA // ''' result = parse(input).pop() self.assertEqual(str(result.seq), str(read(input).seq)) correct = '''ATGACTGAATTCAAGGCCGGTTCTGCTAAGAAAGGTGCTACACTTTTCAAGACTAGATGTCTACAATGCCACACCGTGGAAAAGGGTGGCCCACATAAGGTTGGTCCAAACTTGCATGGTATCTTTGGCAGACACTCTGGTCAAGCTGAAGGGTATTCGTACACAGATGCCAATATCAAGAAAAACGTGTTGTGGGACGAAAATAACATGTCAGAGTACTTGACTAACCCAAAGAAATATATTCCTGGTACCAAGATGGCCTTTGGTGGGTTGAAGAAGGAAAAAGACAGAAACGACTTAATTACCTACTTGAAAAAAGCCTGTGAGTAA''' self.assertEqual(str(result.seq), correct) self.assertTrue(result.linear == True) self.assertTrue(result.circular == False) seqs = parse('./RefDataBjorn.fas') self.assertEqual(len(seqs), 771) self.assertEqual(list(set([len(a) for a in seqs])), [901]) pAG25 = read("./pAG25.gb") self.assertTrue(pAG25.circular == True) self.assertTrue(pAG25.linear == False) pCAPs = read("./pCAPs.gb") self.assertTrue(pCAPs.circular == True) self.assertTrue(pCAPs.linear == False) pUC19 = read("./pUC19.gb") self.assertTrue(pUC19.circular == True) self.assertTrue(pUC19.linear == False)
def test_features_change_ori(self): s = read(''' LOCUS New_DNA 13 bp ds-DNA circular 12-NOV-2013 DEFINITION . ACCESSION VERSION SOURCE . ORGANISM . COMMENT COMMENT ApEinfo:methylated:1 FEATURES Location/Qualifiers misc_feature join(9..10,12..13,1..1,3..6) /label=hej /ApEinfo_fwdcolor=cyan /ApEinfo_revcolor=green /ApEinfo_graphicformat=arrow_data {{0 1 2 0 0 -1} {} 0} width 5 offset 0 ORIGIN 1 gattttaatc acc //''') #from pydna_helper import ape for i in range(1, len(s)): b = s.shifted(i) self.assertTrue( str(b.features[0].extract(b).seq).lower() == "tcccgtttt") s = read(''' LOCUS New_DNA 21 bp ds-DNA circular 03-APR-2013 DEFINITION a ACCESSION VERSION SOURCE . ORGANISM . COMMENT COMMENT ApEinfo:methylated:1 FEATURES Location/Qualifiers misc_feature join(18..21,1..4) /label=bb /ApEinfo_fwdcolor=cyan /ApEinfo_revcolor=green /ApEinfo_graphicformat=arrow_data {{0 1 2 0 0 -1} {} 0} width 5 offset 0 misc_feature 5..17 /label=ins /ApEinfo_fwdcolor=#e03c2b /ApEinfo_revcolor=green /ApEinfo_graphicformat=arrow_data {{0 1 2 0 0 -1} {} 0} width 5 offset 0 ORIGIN 1 aaaGGTACCt ttGGATCCggg // ''') self.assertTrue(str(s.features[0].extract(s).seq) == "CGGGAAAG") self.assertTrue(str(s.features[1].extract(s).seq) == "GTACCTTTGGATC") for i in range(1, len(s)): b = s.shifted(i) self.assertTrue([ str(f.extract(b).seq) for f in b.features if f.qualifiers["label"][0] == 'ins' ][0] == "GTACCTTTGGATC") self.assertTrue([ str(f.extract(b).seq) for f in b.features if f.qualifiers["label"][0] == 'bb' ][0] == "CGGGAAAG") from Bio.Restriction import Acc65I, KpnI, BamHI bb1, ins1 = sorted(s.cut(Acc65I, BamHI), key=len, reverse=True) for i in range(1, len(s)): b = s.shifted(i) bb, ins = sorted(b.cut(Acc65I, BamHI), key=len, reverse=True) self.assertTrue(eq(bb1, bb)) self.assertTrue(eq(ins1, ins)) self.assertTrue( bb.features[0].extract(bb).seq.watson == "CGGGAAAG") self.assertTrue(bb.features[0].extract(bb).seq.crick == "CTTTCCCG") self.assertTrue( eq(bb.features[0].extract(bb), s.features[0].extract(s))) self.assertTrue( ins.features[0].extract(ins).seq.watson == "GTACCTTTG") self.assertTrue( ins.features[0].extract(ins).seq.crick == "GATCCAAAG") self.assertTrue( str(ins.features[0].extract(ins).seq) == str( s.features[1].extract(s).seq))
cloning in yeast. Gene, 344: 43–51. http://www.ncbi.nlm.nih.gov/pubmed/15656971 ''' print info raw_input("Press any key and wait for the script to finish!") # Establish the two primers. These sequences can be found in (1) GUP1rec1sens = SeqRecord(Seq("gaattcgatatcaagcttatcgataccgatgtcgctgatcagcatcctgtctcc")) GUP1rec2AS = SeqRecord(Seq("gacataactaattacatgactcgaggtcgactcagcattttaggtaaattccg")) # Read the GUP1 locus sequence into a Dseqrecord object # This sequence was taken from the Saccharomyces genome Database: # http://www.yeastgenome.org/cgi-bin/getSeq?query=YGL084C&flankl=1000&flankr=1000&format=fasta GUP1 = read("GUP1_locus.gb") # The insert is formed by PCR using the two primers and the template sequence insert = pcr(GUP1rec1sens, GUP1rec2AS, GUP1) # The sequence for the plasmid is read into a Dseqrecord object called pGREG505 # this sequence was found at # http://www.euroscarf.de/plasmid_details.php?accno=P30350 # This sequence is circular, this information is parsed from the Genbank file. pGREG505 = read("pGREG505.gb") # Import the SalI restriction enzyme from Biopython from Bio.Restriction import SalI # Cut the circular pGREG505 plasmid with SalI # this enzyme cuts twice, so two fragments are formed
def test_initialization(self): a = [] a.append(Dseqrecord("attt")) a.append(Dseqrecord(Dseq("attt"))) a.append(Dseqrecord(Seq("attt"))) a.append(Dseqrecord(Srec(Seq("attt")))) a.append(Dseqrecord(Dseqrecord("attt"))) for b in a: self.assertTrue(type(b.seq) == Dseq) self.assertTrue(str(b.seq.watson) == "attt") self.assertTrue(str(b.seq.crick) == "aaat") self.assertTrue(str(b.seq) == "attt") self.assertTrue(str(b.seq) == "attt") self.assertTrue(b.linear == b.seq.linear) self.assertTrue(b.linear == True) self.assertTrue(b.circular == False) self.assertTrue(b.seq.linear == True) self.assertTrue(b.seq.circular == False) a = [] a.append(Dseqrecord("attt", circular=True)) a.append(Dseqrecord(Dseq("attt"), circular=True)) a.append(Dseqrecord(Seq("attt"), circular=True)) a.append(Dseqrecord(Srec(Seq("attt")), circular=True)) a.append(Dseqrecord(Dseqrecord("attt"), circular=True)) for b in a: self.assertTrue(type(b.seq) == Dseq) self.assertTrue(str(b.seq.watson) == "attt") self.assertTrue(str(b.seq.crick) == "aaat") self.assertTrue(str(b.seq) == "attt") self.assertTrue(str(b.seq) == "attt") self.assertTrue(b.linear == b.seq.linear) self.assertTrue(b.linear == False) self.assertTrue(b.circular == True) self.assertTrue(b.seq.linear == False) self.assertTrue(b.seq.circular == True) a = [] a.append(Dseqrecord(Dseq("attt", circular=True), circular=True)) a.append(Dseqrecord(Dseq("attt", circular=False), circular=True)) a.append(Dseqrecord(Dseq("attt", circular=True), circular=False)) a.append(Dseqrecord(Dseq("attt", circular=False), circular=False)) circular = [True, True, False, False] linear = [False, False, True, True] for b, ci, li in zip(a, circular, linear): self.assertTrue(type(b.seq) == Dseq) self.assertTrue(str(b.seq.watson) == "attt") self.assertTrue(str(b.seq.crick) == "aaat") self.assertTrue(str(b.seq) == "attt") self.assertTrue(str(b.seq) == "attt") self.assertTrue(b.linear == b.seq.linear) self.assertTrue(b.linear == li) self.assertTrue(b.circular == ci) self.assertTrue(b.seq.linear == li) self.assertTrue(b.seq.circular == ci) a = [] ds = Dseq("attt", "taaa") self.assertTrue(ds.linear == True) self.assertTrue(ds.ovhg == -1) self.assertTrue(str(ds.watson) == "attt") self.assertTrue(str(ds.crick) == "taaa") # attt # aaat a.append(Dseqrecord(ds, circular=False)) self.assertTrue(ds.linear == True) a.append(Dseqrecord(ds, linear=True)) self.assertTrue(ds.linear == True) a.append(Dseqrecord(ds, circular=True)) self.assertTrue(ds.linear == True) a.append(Dseqrecord(ds, linear=False)) self.assertTrue(ds.linear == True) circular = [False, False, True, True] linear = [True, True, False, False] crick = ["taaa", "taaa", "aaat", "aaat"] sek = ["attta", "attta", "attt", "attt"] for b, ci, li, s, cri in zip(a, circular, linear, sek, crick): self.assertTrue(type(b.seq) == Dseq) self.assertTrue(str(b.seq.watson) == "attt") self.assertTrue(str(b.seq.crick) == cri) self.assertTrue(str(b.seq) == s) self.assertTrue(b.linear == b.seq.linear) self.assertTrue(b.linear == li) self.assertTrue(b.circular == ci) self.assertTrue(b.seq.linear == li) self.assertTrue(b.seq.circular == ci) a = [] ds = Dseq("attt", "caaa") self.assertTrue(ds.linear == True) self.assertTrue(ds.ovhg == -1) a.append(Dseqrecord(ds, circular=False)) self.assertTrue(ds.linear == True) a.append(Dseqrecord(ds, linear=True)) self.assertTrue(ds.linear == True) with self.assertRaises(TypeError): Dseqrecord(ds, circular=True) self.assertTrue(ds.linear == True) with self.assertRaises(TypeError): Dseqrecord(ds, linear=False) self.assertTrue(ds.linear == True) with self.assertRaises(TypeError): b = Dseqrecord([]) with self.assertRaises(TypeError): b = Dseqrecord(("a", )) with self.assertRaises(TypeError): b = Dseqrecord(0) from pydna import read input = ''' LOCUS New_DNA 4 bp ds-DNA linear 30-MAR-2013 DEFINITION . ACCESSION VERSION SOURCE . ORGANISM . COMMENT COMMENT ApEinfo:methylated:1 FEATURES Location/Qualifiers misc_feature 2..3 /label=NewFeature /ApEinfo_fwdcolor=cyan /ApEinfo_revcolor=green /ApEinfo_graphicformat=arrow_data {{0 1 2 0 0 -1} {} 0} width 5 offset 0 ORIGIN 1 acgt // ''' a = read(input) self.assertEqual(a.features[0].extract(a).seq.watson, "CG") b = a + a for f in b.features: self.assertEqual(b.features[0].extract(a).seq.watson, "CG") feature = a.features[0] s = Dseq("agctt", "agcta") #print s.fig() #Dseq(-6) # agctt #atcga b = Dseqrecord(s) b.features.append(feature) cb = Dseqrecord(b, circular=True) self.assertEqual(b.features[0].extract(b).seq.watson.lower(), cb.features[0].extract(b).seq.watson.lower()) self.assertEqual(b.features[0].extract(b).seq.crick.lower(), cb.features[0].extract(b).seq.crick.lower()) s = Dseq("aagct", "aagct") #print s.fig() #Dseq(-6) #aagct # tcgaa b = Dseqrecord(s) with self.assertRaises(TypeError): cb = Dseqrecord(b, circular=True) s = Dseq("agctt", "agcta") #print s.fig() #Dseq(-6) # agcta #ttcga b = Dseqrecord(s) b.features.append(feature) cb = Dseqrecord(b, circular=True) self.assertEqual(b.features[0].extract(b).seq.watson.lower(), cb.features[0].extract(b).seq.watson.lower()) self.assertEqual(b.features[0].extract(b).seq.crick.lower(), cb.features[0].extract(b).seq.crick.lower())
''' Thanks to Min RK, UC Berkeley for this''' def _repr_pretty_(self, p, cycle): p.text(self) class pretty_unicode(unicode): def _repr_pretty_(self, p, cycle): p.text(self) class pretty_string(str): def _repr_pretty_(self, p, cycle): p.text(self) if __name__=="__main__": import pydna print pydna.read("/home/bjorn/Desktop/python_packages/pydna/pydna/pydna_read_test.txt").format() print pydna.read("/home/bjorn/Desktop/python_packages/pydna/pydna/pydna_read_test2.txt").format()[3270:3281] import sys;sys.exit(42) import StringIO from Bio import SeqIO from Bio.Alphabet.IUPAC import IUPACAmbiguousDNA import textwrap, re raw = open("pydna_read_test.txt", 'rU').read() pattern = r"(?:>.+\n^(?:^[^>]+?)(?=\n\n|>|LOCUS|ID))|(?:(?:LOCUS|ID)(?:(?:.|\n)+?)^//)" rawseq = re.findall(pattern, textwrap.dedent(raw + "\n\n"), flags=re.MULTILINE).pop(0) handle = StringIO.StringIO(raw)
Lignocellulosic Hydrolysate, Applied and Environmental Microbiology 67 4249–4255. ''' raw_input("press return!\n") gb = Genbank("*****@*****.**") if gb.test(): xks1_gene = gb.nucleotide("Z72979") print "Genbank record Z72979 downloaded from NCBI" YEp24PGK = gb.nucleotide("KC562906") print "Genbank record KC562906 downloaded from NCBI\n" else: xks1_gene = read("Z72979.gb") print "A local copy of Genbank record Z72979 is used" YEp24PGK = read("KC562906.gb") print "A local copy of Genbank record KC562906 is used\n" raw_input("press return!\n") primers = ''' >primer1 GCGGATCCTCTAGAATGGTTTGTTCAGTAATTCAG >primer3 AGATCTGGATCCTTAGATGAGAGTCTTTTCCAG ''' primer1, primer2 = parse(primers, ds=False) xks1_pcr_product = pcr(primer1, primer2, xks1_gene)
def test_copy_features(self): from pydna.utils import seguid from pydna import read, copy_features a = read("./pCAPs.gb") b = read("./pCAPs_fasta.txt") for sh in [1, 2, 3, 3127, 3128, 3129]: newb = (b[sh:] + b[:sh]).looped() copy_features(a, newb) #print "a",[len(str(f.extract(a).seq.lower()) for f in a.features if len(f)>10] #print "b",[len(str(f.extract(newb).seq).lower()) for f in newb.features] self.assertTrue( sorted([ str(f.extract(a).seq).lower() for f in a.features if len(f) > 10 ], key=len) == sorted( [str(f.extract(newb).seq).lower() for f in newb.features], key=len)) b = b.rc() for sh in [1, 2, 3, 3127, 3128, 3129]: newb = b[sh:] + b[:sh] copy_features(a, newb) self.assertTrue( sorted([ str(f.extract(a).seq).lower() for f in a.features if len(f) > 10 ], key=len) == sorted( [str(f.extract(newb).seq).lower() for f in newb.features], key=len)) seguid_bla = "riT98j2v4NxVS8sbw_Q8epCwQwo" seguid_cre = "xLZ2xs2O8CUMmWh2OrhmNFp5ZLg" copy_features(a, b) assert [seguid(f.extract(b).seq) for f in b.features ] == [seguid_cre, seguid_cre, seguid_bla, seguid_bla] b = read("./pCAPs_fasta.txt").looped() b = b.synced("attaacgagtgccgtaaacgacgatggttttacc") copy_features(a, b) assert [seguid(f.extract(b).seq) for f in b.features ] == [seguid_cre, seguid_cre, seguid_bla, seguid_bla] b = read("./pCAPs_fasta.txt").looped() b = b.synced("ttaacgagtgccgtaaacgacgatggttttacc") copy_features(a, b) assert [seguid(f.extract(b).seq) for f in b.features ] == [seguid_cre, seguid_cre, seguid_bla, seguid_bla] b = read("./pCAPs_fasta.txt").looped() b = b.synced("taacgagtgccgtaaacgacgatggttttacc") copy_features(a, b) assert [seguid(f.extract(b).seq) for f in b.features] == [seguid_bla, seguid_bla] b = read("./pCAPs_fasta.txt").looped() b = b.synced("gttaccaatgcttaatcagtgaggcacctatctcagc") copy_features(a, b) assert [seguid(f.extract(b).seq) for f in b.features ] == [seguid_cre, seguid_cre, seguid_bla, seguid_bla] b = read("./pCAPs_fasta.txt").looped() b = b.synced("ttaccaatgcttaatcagtgaggcacctatctcagc") copy_features(a, b) assert [seguid(f.extract(b).seq) for f in b.features ] == [seguid_cre, seguid_cre, seguid_bla, seguid_bla] b = read("./pCAPs_fasta.txt").looped() b = b.synced("taccaatgcttaatcagtgaggcacctatctcagc") copy_features(a, b) assert [seguid(f.extract(b).seq) for f in b.features] == [ seguid_cre, seguid_cre, ]
class pretty_unicode(unicode): def _repr_pretty_(self, p, cycle): p.text(self) class pretty_string(str): def _repr_pretty_(self, p, cycle): p.text(self) if __name__ == "__main__": import pydna print pydna.read( "/home/bjorn/Desktop/python_packages/pydna/pydna/pydna_read_test.txt" ).format() print pydna.read( "/home/bjorn/Desktop/python_packages/pydna/pydna/pydna_read_test2.txt" ).format()[3270:3281] import sys sys.exit(42) import StringIO from Bio import SeqIO from Bio.Alphabet.IUPAC import IUPACAmbiguousDNA import textwrap, re raw = open("pydna_read_test.txt", 'rU').read() pattern = r"(?:>.+\n^(?:^[^>]+?)(?=\n\n|>|LOCUS|ID))|(?:(?:LOCUS|ID)(?:(?:.|\n)+?)^//)"
""" print info raw_input("Press any key and wait for the script to finish!") # Establish the two primers. These sequences can be found in (1) GUP1rec1sens = SeqRecord( Seq("gaattcgatatcaagcttatcgataccgatgtcgctgatcagcatcctgtctcc")) GUP1rec2AS = SeqRecord( Seq("gacataactaattacatgactcgaggtcgactcagcattttaggtaaattccg")) # Read the GUP1 locus sequence into a Dseqrecord object # This sequence was taken from the Saccharomyces genome Database: # http://www.yeastgenome.org/cgi-bin/getSeq?query=YGL084C&flankl=1000&flankr=1000&format=fasta GUP1 = read("GUP1_locus.gb") # The insert is formed by PCR using the two primers and the template sequence insert = pcr(GUP1rec1sens, GUP1rec2AS, GUP1) # The sequence for the plasmid is read into a Dseqrecord object called pGREG505 # this sequence was found at # http://www.euroscarf.de/plasmid_details.php?accno=P30350 # This sequence is circular, this information is parsed from the Genbank file. pGREG505 = read("pGREG505.gb") # Import the SalI restriction enzyme from Biopython from Bio.Restriction import SalI # Cut the circular pGREG505 plasmid with SalI # this enzyme cuts twice, so two fragments are formed
def test_features_change_ori(self): s = read(''' LOCUS New_DNA 13 bp ds-DNA circular 12-NOV-2013 DEFINITION . ACCESSION VERSION SOURCE . ORGANISM . COMMENT COMMENT ApEinfo:methylated:1 FEATURES Location/Qualifiers misc_feature join(9..10,12..13,1..1,3..6) /label=hej /ApEinfo_fwdcolor=cyan /ApEinfo_revcolor=green /ApEinfo_graphicformat=arrow_data {{0 1 2 0 0 -1} {} 0} width 5 offset 0 ORIGIN 1 gattttaatc acc //''') #from pydna_helper import ape for i in range(1, len(s)): b=s.shifted(i) self.assertTrue( str(b.features[0].extract(b).seq).lower()=="tcccgtttt") s = read(''' LOCUS New_DNA 21 bp ds-DNA circular 03-APR-2013 DEFINITION a ACCESSION VERSION SOURCE . ORGANISM . COMMENT COMMENT ApEinfo:methylated:1 FEATURES Location/Qualifiers misc_feature join(18..21,1..4) /label=bb /ApEinfo_fwdcolor=cyan /ApEinfo_revcolor=green /ApEinfo_graphicformat=arrow_data {{0 1 2 0 0 -1} {} 0} width 5 offset 0 misc_feature 5..17 /label=ins /ApEinfo_fwdcolor=#e03c2b /ApEinfo_revcolor=green /ApEinfo_graphicformat=arrow_data {{0 1 2 0 0 -1} {} 0} width 5 offset 0 ORIGIN 1 aaaGGTACCt ttGGATCCggg // ''') self.assertTrue( str(s.features[0].extract(s).seq) == "CGGGAAAG" ) self.assertTrue( str(s.features[1].extract(s).seq) == "GTACCTTTGGATC" ) for i in range(1, len(s)): b = s.shifted(i) self.assertTrue( [str(f.extract(b).seq) for f in b.features if f.qualifiers["label"][0]=='ins'][0] == "GTACCTTTGGATC" ) self.assertTrue( [str(f.extract(b).seq) for f in b.features if f.qualifiers["label"][0]=='bb'][0] == "CGGGAAAG" ) from Bio.Restriction import Acc65I,KpnI, BamHI bb1, ins1 = sorted(s.cut(Acc65I, BamHI), key=len, reverse=True) for i in range(1, len(s)): b = s.shifted(i) bb, ins = sorted(b.cut(Acc65I, BamHI), key=len, reverse=True) self.assertTrue( eq(bb1, bb) ) self.assertTrue( eq(ins1,ins) ) self.assertTrue( bb.features[0].extract(bb).seq.watson == "CGGGAAAG" ) self.assertTrue( bb.features[0].extract(bb).seq.crick == "CTTTCCCG" ) self.assertTrue( eq(bb.features[0].extract(bb), s.features[0].extract(s) ) ) self.assertTrue( ins.features[0].extract(ins).seq.watson == "GTACCTTTG" ) self.assertTrue( ins.features[0].extract(ins).seq.crick == "GATCCAAAG" ) self.assertTrue( str(ins.features[0].extract(ins).seq) == str(s.features[1].extract(s).seq) )