def test_cut_circular(): from pydna.dseq import Dseq from Bio.Restriction import BsaI, KpnI, Acc65I, NotI test = "aaaaaaGGTACCggtctcaaaa" for i in range(len(test)): nt = test[i:] + test[:i] a = Dseq(nt, circular=True).cut(Acc65I)[0] # G^GTACC assert a.watson.upper() == "GTACCGGTCTCAAAAAAAAAAG" assert a.crick.upper() == "GTACCTTTTTTTTTTGAGACCG" assert a.ovhg == -4 # CggtctcaaaaaaaaaaGGTAC b = Dseq(nt, circular=True).cut(KpnI)[0] # GGTAC^C assert b.watson.upper() == "CGGTCTCAAAAAAAAAAGGTAC" assert b.crick.upper() == "CTTTTTTTTTTGAGACCGGTAC" assert b.ovhg == 4 c = Dseq(nt, circular=True).cut(BsaI)[0] # ggtctcnnn assert c.watson.upper() == "AAAAAAAAAGGTACCGGTCTCA" assert c.crick.upper() == "TTTTTGAGACCGGTACCTTTTT" assert c.ovhg == -4 d = Dseq(nt, circular=True).cut(NotI) assert d == ()
def test_lcs(): from Bio.Seq import Seq from Bio.SeqRecord import SeqRecord as BSeqRecord from pydna.dseq import Dseq from pydna.dseqrecord import Dseqrecord from pydna.seqrecord import SeqRecord from pydna.seqfeature import SeqFeature from Bio.SeqFeature import FeatureLocation, ExactPosition s = SeqRecord(Seq("GGATCC")) expected = SeqFeature() expected.__dict__ = { "location": FeatureLocation(ExactPosition(0), ExactPosition(6), strand=1), "type": "read", "id": "<unknown id>", "qualifiers": { "label": ["sequence"], "ApEinfo_fwdcolor": ["#DAFFCF"], "ApEinfo_revcolor": ["#DFFDFF"], }, } assert s.lcs("GGATCC", limit=4).__dict__ == expected.__dict__ assert s.lcs(Seq("GGATCC"), limit=4).__dict__ == expected.__dict__ assert (s.lcs(BSeqRecord(Seq("GGATCC"), name="sequence"), limit=4).__dict__ == expected.__dict__) assert s.lcs(Dseq("GGATCC"), limit=4).__dict__ == expected.__dict__ assert (s.lcs(Dseqrecord(Dseq("GGATCC"), name="sequence"), limit=4).__dict__ == expected.__dict__) assert (s.lcs(Dseqrecord("GGATCC", name="sequence"), limit=4).__dict__ == expected.__dict__)
def test_Dseq_cutting_adding(): from pydna.dseq import Dseq from Bio.Restriction import BamHI, PstI, EcoRI a = Dseq( "GGATCCtcatctactatcatcgtagcgtactgatctattctgctgctcatcatcggtactctctataattatatatatatgcgcgtGGATCC", "CCTAGGagtagatgatagtagcatcgcatgactagataagacgacgagtagtagccatgagagatattaatatatatatacgcgcaCCTAGG"[:: -1], linear=True, ovhg=0, ) b = a.cut(BamHI)[1] assert ( b.watson == "GATCCtcatctactatcatcgtagcgtactgatctattctgctgctcatcatcggtactctctataattatatatatatgcgcgtG" ) assert ( b.crick == "GATCCacgcgcatatatatataattatagagagtaccgatgatgagcagcagaatagatcagtacgctacgatgatagtagatgaG" ) c = Dseq( "nCTGCAGtcatctactatcatcgtagcgtactgatctattctgctgctcatcatcggtactctctataattatatatatatgcgcgtGAATTCn", "nGACGTCagtagatgatagtagcatcgcatgactagataagacgacgagtagtagccatgagagatattaatatatatatacgcgcaCTTAAGn"[:: -1], linear=True, ovhg=0, ) f, d, l = c.cut((EcoRI, PstI)) assert ( d.watson == "GtcatctactatcatcgtagcgtactgatctattctgctgctcatcatcggtactctctataattatatatatatgcgcgtG" ) assert ( d.crick == "AATTCacgcgcatatatatataattatagagagtaccgatgatgagcagcagaatagatcagtacgctacgatgatagtagatgaCTGCA" ) e = Dseq( "nGAATTCtcatctactatcatcgtagcgtactgatctattctgctgctcatcatcggtactctctataattatatatatatgcgcgtCTGCAGn", "nCTTAAGagtagatgatagtagcatcgcatgactagataagacgacgagtagtagccatgagagatattaatatatatatacgcgcaGACGTCn"[:: -1], linear=True, ovhg=0, ) f = e.cut((EcoRI, PstI))[1] assert ( f.watson == "AATTCtcatctactatcatcgtagcgtactgatctattctgctgctcatcatcggtactctctataattatatatatatgcgcgtCTGCA" ) assert ( f.crick == "GacgcgcatatatatataattatagagagtaccgatgatgagcagcagaatagatcagtacgctacgatgatagtagatgaG" )
def test_Dseq_slicing2(): from pydna.dseq import Dseq from Bio.Restriction import BamHI, EcoRI, KpnI a = Dseq("aaGGATCCnnnnnnnnnGAATTCccc", circular=True) assert a.cut(EcoRI, BamHI, KpnI,) == a.cut( BamHI, EcoRI, KpnI, )[::-1]
def cut_and_religate_Dseq(seq_string, enz, top): ds = Dseq(seq_string, linear=top) frags = list(ds.cut(enz)) if not frags: return a = frags.pop(0) for f in frags: a += f if not top: a = a.looped() assert eq(a, ds)
def test_misc(): from pydna.dseq import Dseq x = Dseq("ctcgGCGGCCGCcagcggccg", circular=True) from Bio.Restriction import NotI a, b = x.cut(NotI) z = (a + b).looped() assert z.shifted(5) == x
def test_olaps(): from Bio.Seq import Seq from Bio.SeqRecord import SeqRecord as BSeqRecord from pydna.dseq import Dseq from pydna.dseqrecord import Dseqrecord from pydna.seqrecord import SeqRecord from Bio.Alphabet.IUPAC import IUPACAmbiguousDNA s = SeqRecord(Seq("GGATCC",alphabet=IUPACAmbiguousDNA())) assert "GGATCC" == str(s.olaps("GGATCC", limit = 4)[0].seq) assert "GGATCC" == str(s.olaps(Seq("GGATCC",alphabet=IUPACAmbiguousDNA()), limit = 4)[0].seq) assert "GGATCC" == str(s.olaps(BSeqRecord(Seq("GGATCC",alphabet=IUPACAmbiguousDNA())), limit = 4)[0].seq) assert "GGATCC" == str(s.olaps(Dseq("GGATCC",alphabet=IUPACAmbiguousDNA()), limit = 4)[0].seq) assert "GGATCC" == str(s.olaps(Dseqrecord(Dseq("GGATCC")), limit = 4)[0].seq) assert "GGATCC" == str(s.olaps(Dseqrecord("GGATCC"), limit = 4)[0].seq)
def test_shifted(): from pydna.dseq import Dseq a = Dseq("gatc", circular=True) assert a.shifted(1) == Dseq("atcg", circular=True) assert a.shifted(4) == a b = Dseq("gatc", circular=False) with pytest.raises(TypeError): b.shifted(1)
def from_SeqRecord(cls, record: _SeqRecord, *args, linear=True, circular=False, n=5e-14, **kwargs): obj = cls.__new__(cls) # Does not call __init__ obj._seq = _Dseq.quick( str(record.seq), _rc(str(record.seq)), ovhg=0, linear=linear, circular=circular, ) obj.id = record.id obj.name = record.name obj.description = record.description obj.dbxrefs = record.dbxrefs obj.annotations = {"molecule_type": "DNA"} obj.annotations.update(record.annotations) obj._per_letter_annotations = record._per_letter_annotations obj.features = record.features obj.map_target = None obj.n = n return obj
def from_string(cls, record: str = "", *args, linear=True, circular=False, n=5e-14, **kwargs): # def from_string(cls, record:str="", *args, linear=True, circular=False, n = 5E-14, **kwargs): obj = cls.__new__(cls) # Does not call __init__ obj._seq = _Dseq.quick(record, _rc(record), ovhg=0, linear=linear, circular=circular) obj.id = _pretty_str("id") obj.name = _pretty_str("name") obj.description = _pretty_str("description") obj.dbxrefs = [] obj.annotations = {"molecule_type": "DNA"} obj._per_letter_annotations = {} obj.features = [] obj.map_target = None obj.n = n obj.__dict__.update(kwargs) return obj
def test_Dseq_arguments(): from pydna.dseq import Dseq f0, r0 = parse_primers('''>ForwardPrimer gctactacacacgtactgactg >ReversePrimer tgtggttactgactctatcttg''') t0 = Dseqrecord("gctactacacacgtactgactgcctccaagatagagtcagtaaccaca") f = Dseq(str(f0.seq)) r = Dseq(str(r0.seq)) t = Dseq(str(t0.seq)) assert str(pcr( f, r, t).seq) == "gctactacacacgtactgactgcctccaagatagagtcagtaaccaca"
def test_add_feature(): from Bio.Seq import Seq from Bio.SeqRecord import SeqRecord as BSeqRecord from pydna.dseq import Dseq from pydna.dseqrecord import Dseqrecord from pydna.seqrecord import SeqRecord s = SeqRecord("tttGGATCCaaa") s.add_feature(3, 9) assert s.extract_feature(0).seq == SeqRecord("GGATCC").seq s = SeqRecord("tttGGATCCaaa") s.add_feature(seq="GGATCC") assert s.extract_feature(0).seq == SeqRecord("GGATCC").seq s = SeqRecord("tttGGATCCaaa") s.add_feature(seq=Seq("GGATCC")) assert s.extract_feature(0).seq == SeqRecord("GGATCC").seq s = SeqRecord("tttGGATCCaaa") s.add_feature(seq=Dseq("GGATCC")) assert s.extract_feature(0).seq == SeqRecord("GGATCC").seq s = SeqRecord("tttGGATCCaaa") s.add_feature(seq=SeqRecord("GGATCC")) assert s.extract_feature(0).seq == SeqRecord("GGATCC").seq s = SeqRecord("tttGGATCCaaa") s.add_feature(seq=BSeqRecord("GGATCC")) assert s.extract_feature(0).seq == SeqRecord("GGATCC").seq s = SeqRecord("tttGGATCCaaa") s.add_feature(seq=Dseqrecord("GGATCC")) assert s.extract_feature(0).seq == SeqRecord("GGATCC").seq s = SeqRecord("tttGGATCCaaa") with pytest.raises(TypeError): s.add_feature(seq=Dseqrecord("GGGGGG")) s = SeqRecord("tttATGaaaTAAggg") s.add_feature(3, 12) assert s.features[0].qualifiers["label"] == ["orf9"] from Bio.Seq import Seq from pydna.seqrecord import SeqRecord a = SeqRecord(Seq("atgtaa")) a.add_feature(2, 4) assert ( a.list_features() == "+-----+---------------+-----+-----+-----+-----+------+------+\n| Ft# | Label or Note | Dir | Sta | End | Len | type | orf? |\n+-----+---------------+-----+-----+-----+-----+------+------+\n| 0 | L:ft2 | --> | 2 | 4 | 2 | misc | no |\n+-----+---------------+-----+-----+-----+-----+------+------+" ) a.features[0].qualifiers del a.features[0].qualifiers["label"] assert ( a.list_features() == "+-----+---------------+-----+-----+-----+-----+------+------+\n| Ft# | Label or Note | Dir | Sta | End | Len | type | orf? |\n+-----+---------------+-----+-----+-----+-----+------+------+\n| 0 | nd | --> | 2 | 4 | 2 | misc | no |\n+-----+---------------+-----+-----+-----+-----+------+------+" ) a.features[0].qualifiers["note"] = ["AwesomeFeature"] assert ( a.list_features() == "+-----+------------------+-----+-----+-----+-----+------+------+\n| Ft# | Label or Note | Dir | Sta | End | Len | type | orf? |\n+-----+------------------+-----+-----+-----+-----+------+------+\n| 0 | N:AwesomeFeature | --> | 2 | 4 | 2 | misc | no |\n+-----+------------------+-----+-----+-----+-----+------+------+" )
def test_Dseq_slicing(): from pydna.dseq import Dseq from pydna.readers import read from pydna.utils import eq from Bio.Seq import Seq from Bio.SeqRecord import SeqRecord as Srec from Bio.Restriction import BamHI a = Dseq("ggatcc", "ggatcc", 0) assert a[:].watson == a.watson assert a[:].crick == a.crick assert a.ovhg == a[:].ovhg b, c = a.cut(BamHI) d = b[1:5] e = d.rc() #assert d+e == Dseq("gatc","gatc",0) assert e + d == Dseq("gatc", "gatc", 0)
def tester(i, fwd, rev, overhang=None): print("Test#%d:" % (i)) a = Dseq(fwd, rev, ovhg=overhang) print(a.fig()) print(a.five_prime_end()) print(a.three_prime_end()) a = DSeq(fwd, rev, overhang=overhang) print(a.alignment.reference_start, a.alignment.reference_end) print(a.highlight()) print(a.five_prime_end()) print(a.three_prime_end()) print(a.alignment.read_start, a.alignment.read_end) print('') return i + 1
def get_prop_primers(primers, vector): vector_name = vector.name seq = str.lower(vector.sequence) seq = Dseq(seq.replace(' ', '')) L = len(str(seq)) rseq = seq.reverse_complement() for primer in primers: primer_nor = primer.sequence.replace(' ', '') p_seq = Dseq(primer_nor) p_seq_s = str.lower(str(p_seq)) # all lower case nt = 'atcgn' idt_codes_subtract1 = ['icy5', 'icy3', '5biosg', '(am)'] idt_codes_subtract2 = ['dspacer'] idt_codes_subtract3 = ['dbcoteg'] idt_codes_subtract4 = ['biotinteg'] idt_codes_plus1 = ['ds', 'idsp'] Lp_subtract = 0 for s1 in idt_codes_subtract1: if p_seq_s.find(s1) >= 0: Lp_subtract += 1 for s2 in idt_codes_subtract2: if p_seq_s.find(s2) >= 0: Lp_subtract += 2 for s3 in idt_codes_subtract3: if p_seq_s.find(s3) >= 0: Lp_subtract += 3 for s4 in idt_codes_subtract4: if p_seq_s.find(s4) >= 0: Lp_subtract += 4 for p1 in idt_codes_plus1: if p_seq_s.find(p1) >= 0: Lp_subtract -= 1 Lp = 0 for i in nt: Lp += p_seq_s.count(i) Lp = Lp - Lp_subtract if seq.find(p_seq_s) != -1: # match it position = seq.find(p_seq_s) + 1 dir = 'forward' in_vector = True elif rseq.find(p_seq_s) != -1: position = rseq.find(p_seq_s) - L - 1 dir = 'reverse' in_vector = True else: position = -1 dir = 'none' in_vector = False primer.position = position primer.dir = dir primer.in_vector = in_vector primer.length = Lp primer.save() return vector_name, seq, L, position, dir, in_vector
def toDSEQ(self, graph, edges, nodes): complements = self.generateComplements(nodes) dna = [] offset = -10 for edge in edges: seq = Dseq(edges[edge], complements[edge[1]], ovhg=offset) x = Dseqrecord(seq) x.name = edge[0] + "_" + edge[1] x.seq = seq dna.append(x) return dna
def makeDseqFromDF(part, partslist, col="part"): """looks up the part named "part" in the column specified as col, and converts it into a pydna object.""" pseq = partslist[partslist[col] == part].sequence.iloc[0].lower() pcirc = partslist[partslist[col] == part].circular.iloc[0] p5pover = int(partslist[partslist[col] == part]["5pend"].iloc[0]) p3pover = int(partslist[partslist[col] == part]["3pend"].iloc[0]) povhg = int(p5pover) pseqRC = str(Dseq(pseq).rc()).lower() if (p5pover > 0): pseq = pseq[p5pover:] elif (p5pover < 0): pseqRC = pseqRC[:p5pover] if (p3pover < 0): pseq = pseq[:p3pover] elif (p3pover > 0): pseqRC = pseqRC[p5pover:] pDseq = Dseq(pseq, pseqRC, ovhg=povhg) #this defines a dsdna linear sequence if (pcirc): #this makes the sequence circular, if we have to pDseq = pDseq.looped() return pDseq
def test_cut_with_no_enzymes(): from pydna.dseq import Dseq x = Dseq("ctcgGCGGCCGCcagcggccg") assert x.cut([]) == () x = Dseq("ctcgGCGGCCGCcagcggccg", circular=True) assert x.cut([]) == ()
def chewback(seqtochew, chewamt, end="fiveprime"): """chews back the amount mentioned, from the end mentioned.""" wat = seqtochew.watson cri = seqtochew.crick if (len(seqtochew) > chewamt * 2 + 1): if (end == "fiveprime"): cwat = wat[chewamt:] ccri = cri[chewamt:] else: cwat = wat[:-chewamt] ccri = cri[:-chewamt] newseq = Dseq(cwat, ccri, ovhg=chewamt) return newseq else: return None
def test_cut_missing_enzyme(): from pydna.dseq import Dseq x = Dseq("ctcgGCGGCCGCcagcggccg") from Bio.Restriction import PstI assert x.cut(PstI) == () x = Dseq("ctcgGCGGCCGCcagcggccg", circular=True) assert x.cut(PstI) == ()
def plotpcr(template, p1, p2): #p1 is forward, p2 is reverse template = str.lower(template.replace(' ', '')) p1 = str.lower(p1.replace(' ', '')) p2 = str.lower(p2.replace(' ', ''))[::-1] r_template = str(Dseq(template).reverse_complement())[::-1] L = len(template) n_seg = 100 n_sep = 120 i_end = np.append(np.arange(n_seg, L, n_seg), L) indicator_pos = generate_indicator(template) seq_show = '' pf = template.find(p1) + 1 pr = r_template.find(p2) + 1 # primer_f_seq_show = '' # primer_r_seq_show = '' for i, n_end in enumerate(i_end): if (pf >= i * n_seg) and (pf < n_end): # in that range if (len(p1) <= n_end - pf): #not exceed a row primer_f_seq_show = '.' * ( pf - 1 - 3 - i * n_seg) + "5'-" + p1 + "-3'-->>dir" + '\n' template_seq = primer_f_seq_show + template[i * n_seg:n_end] + '\n' seq_show += template_seq + indicator_pos[i] + '\n' # seq_show += '-' * n_seg + '\n' else: #not in range template_seq = template[i * n_seg:n_end] + '\n' seq_show += template_seq + indicator_pos[i] + '\n' # seq_show += '-' * n_seg + '\n' if (pr >= i * n_seg) and (pr < n_end): if (len(p2) <= n_end - pr): primer_r_seq_show = '.' * ( pr - i * n_seg - 10) + "dir<<-3'-" + p2 + "-5'" + '\n' r_template_seq = r_template[i * n_seg:n_end] + '\n' seq_show += r_template_seq + '\n' + primer_r_seq_show seq_show += '-' * n_sep + '\n' else: seq_show += r_template[i * n_seg:n_end] + '\n' + '-' * n_sep + '\n' return seq_show
def test_Dseq___getitem__(): from pydna.dseq import Dseq s = Dseq("GGATCC", circular=False) assert s[1:-1] == Dseq("GATC", circular=False) t = Dseq("GGATCC", circular=True) assert t[1:5] == Dseq("GATC") assert t[1:5].__dict__ == Dseq("GATC").__dict__ assert s[1:5] == Dseq("GATC") assert s[1:5] == Dseq("GATC", circular=False) assert s[5:1:-1] == Dseq("CCTA") assert t[5:1] == Dseq("CG") assert s[9:1] == Dseq("") assert t[9:1] == Dseq("")
#!/usr/bin/env python3 # -*- coding: utf-8 -*- # Copyright 2013-2020 by Björn Johansson. All rights reserved. # This code is part of the Python-dna distribution and governed by its # license. Please see the LICENSE.txt file that should have been included # as part of this package. """This module provide most pydna functionality in the local namespace. Example ------- >>> from pydna.all import * >>> Dseq("aaa") Dseq(-3) aaa ttt >>> Dseqrecord("aaa") Dseqrecord(-3) >>> from pydna.all import __all__ >>> __all__ ['Anneal', 'pcr', 'Assembly', 'genbank', 'Genbank', 'download_text\ ', 'Dseqrecord', 'Dseq', 'read', 'read_primer', 'parse', 'parse_primers\ ', 'ape', 'primer_design', 'assembly_fragments', 'circular_assembly_fragments\ ', 'eq', 'gbtext_clean', 'primerlist'] >>> """ __all__ = [ "Anneal", "pcr", "Assembly",
def test_repr(): from pydna.dseq import Dseq a = Dseq("gattcgtatgctgatcgtacgtactgaaaac") assert repr(a) == 'Dseq(-31)\ngatt..aaac\nctaa..tttg' b = Dseq("gattcgtatgctgatcgtacgtactgaaaac", "gactagcatgcatgacttttc"[::-1]) assert repr(b) == 'Dseq(-31)\ngattcgtatgctga..aaac\n gact..tttc' c = Dseq("gattcgtatgctgatcgtacgtactgaaaac", "actagcatgcatgacttttc"[::-1]) assert repr(c) == 'Dseq(-31)\ngatt..atgctgat..aaac\n acta..tttc' d = Dseq("gattcgtatgctgatcgtacg", "gactagcatgc"[::-1]) assert repr(d) == 'Dseq(-21)\ngattcgtatgctgatcgtacg\n gactagcatgc' e = Dseq("gactagcatgcatgacttttc", "gattcgtatgctgatcgtacgtactgaaaac"[::-1]) assert repr(e) == 'Dseq(-31)\n gact..tttc\ngattcgtatgctga..aaac' f = Dseq("Ggactagcatgcatgacttttc", "gattcgtatgctgatcgtacgtactgaaaac"[::-1]) assert repr(f) == 'Dseq(-31)\n Ggac..tttc\ngattcgtatgctg..aaac' g = Dseq("gattcgtatgctgatcgtacgtactgaaaac", "ctaagcatacgactagc"[::-1]) assert repr(g) == 'Dseq(-31)\ngatt..atcgtacg..aaac\nctaa..tagc ' h = Dseq("cgtatgctgatcgtacgtactgaaaac", "gcatacgactagc"[::-1]) assert repr(h) == 'Dseq(-27)\ncgtatgctgatcgtacgtactgaaaac\ngcatacgactagc' i = Dseq("cgtatgctgatcgtacgtactgaaaacagact", "gcatacgactagc"[::-1]) assert repr(i) == 'Dseq(-32)\ncgta..atcgtacg..gact\ngcat..tagc ' j = Dseq("gattcgtatgctgatcgtacgtactgaaaac", "acAAGGAGAGAtg", ovhg=11) assert repr( j) == 'Dseq(-42)\n gattcg..aaac\ngtAG..GGAAca ' k = Dseq("g", "gattcgtatgctgatcgtacgtactgaaaac", ovhg=0) assert repr(k) == 'Dseq(-31)\ng \ncaaaa..ttag' x = Dseq("gattcgtatgctgatcgtacgtactgaaaa") assert repr( x ) == 'Dseq(-30)\ngattcgtatgctgatcgtacgtactgaaaa\nctaagcatacgactagcatgcatgactttt' y = Dseq("gattcgtatgctgatcgtacgtactgaaaa", "gactagcatgcatgactttt"[::-1]) assert repr( y ) == 'Dseq(-30)\ngattcgtatgctgatcgtacgtactgaaaa\n gactagcatgcatgactttt' z = Dseq("gattcgtatgctgatcgtacgtactgaaaa", "actagcatgcatgactttt"[::-1]) assert repr( z ) == 'Dseq(-30)\ngattcgtatgctgatcgtacgtactgaaaa\n actagcatgcatgactttt'
def test_initialization(): import pytest from pydna.dseq import Dseq obj = Dseq("a", "t", 0) assert obj * 3 == Dseq("aaa", "ttt", 0) assert not obj == 123 assert obj * 0 == Dseq("") with pytest.raises(TypeError): obj * 2.3 assert obj.seguid() == "bc1M4j2I4u6VaLpUbAB8Y9kTHBs" assert obj == Dseq("a", "t", circular=False, linear=True) with pytest.raises(ValueError): Dseq("a", ovhg=0) with pytest.raises(ValueError): Dseq("ttt", "tt") with pytest.raises(ValueError): Dseq("ttt", "aa") obj2 = Dseq("gata") assert obj2.linear == True assert obj2.circular == False l = Dseq("gt") c = l.looped() assert l.linear assert not l.circular assert c.circular assert not c.linear assert Dseq("gt", linear=None, circular=None) == l assert Dseq("gt", linear=None, circular=False) == l assert Dseq("gt", linear=None, circular=True) == c assert Dseq("gt", linear=False, circular=None) == c assert Dseq("gt", linear=False, circular=False) == l assert Dseq("gt", linear=False, circular=True) == c assert Dseq("gt", linear=True, circular=None) == l assert Dseq("gt", linear=True, circular=False) == l assert Dseq("gt", linear=True, circular=True) == l assert Dseq.from_string("A") == Dseq("A") == Dseq("A", linear=True) assert ( Dseq.from_string("A", linear=False, circular=True) == Dseq("A", circular=True) == Dseq("A", linear=False) )
def test_repr(): from pydna.dseq import Dseq a = Dseq("gattcgtatgctgatcgtacgtactgaaaac") assert repr(a) == "Dseq(-31)\ngatt..aaac\nctaa..tttg" b = Dseq("gattcgtatgctgatcgtacgtactgaaaac", "gactagcatgcatgacttttc"[::-1]) assert repr(b) == "Dseq(-31)\ngattcgtatgctga..aaac\n gact..tttc" c = Dseq("gattcgtatgctgatcgtacgtactgaaaac", "actagcatgcatgacttttc"[::-1]) assert repr(c) == "Dseq(-31)\ngatt..atgctgat..aaac\n acta..tttc" d = Dseq("gattcgtatgctgatcgtacg", "gactagcatgc"[::-1]) assert repr(d) == "Dseq(-21)\ngattcgtatgctgatcgtacg\n gactagcatgc" e = Dseq("gactagcatgcatgacttttc", "gattcgtatgctgatcgtacgtactgaaaac"[::-1]) assert repr(e) == "Dseq(-31)\n gact..tttc\ngattcgtatgctga..aaac" f = Dseq("Ggactagcatgcatgacttttc", "gattcgtatgctgatcgtacgtactgaaaac"[::-1]) assert repr(f) == "Dseq(-31)\n Ggac..tttc\ngattcgtatgctg..aaac" g = Dseq("gattcgtatgctgatcgtacgtactgaaaac", "ctaagcatacgactagc"[::-1]) assert repr(g) == "Dseq(-31)\ngatt..atcgtacg..aaac\nctaa..tagc " h = Dseq("cgtatgctgatcgtacgtactgaaaac", "gcatacgactagc"[::-1]) assert repr(h) == "Dseq(-27)\ncgtatgctgatcgtacgtactgaaaac\ngcatacgactagc" i = Dseq("cgtatgctgatcgtacgtactgaaaacagact", "gcatacgactagc"[::-1]) assert repr(i) == "Dseq(-32)\ncgta..atcgtacg..gact\ngcat..tagc " j = Dseq("gattcgtatgctgatcgtacgtactgaaaac", "acAAGGAGAGAtg", ovhg=11) assert repr(j) == "Dseq(-42)\n gattcg..aaac\ngtAG..GGAAca " k = Dseq("g", "gattcgtatgctgatcgtacgtactgaaaac", ovhg=0) assert repr(k) == "Dseq(-31)\ng \ncaaaa..ttag" x = Dseq("gattcgtatgctgatcgtacgtactgaaaa") assert ( repr(x) == "Dseq(-30)\ngattcgtatgctgatcgtacgtactgaaaa\nctaagcatacgactagcatgcatgactttt" ) y = Dseq("gattcgtatgctgatcgtacgtactgaaaa", "gactagcatgcatgactttt"[::-1]) assert ( repr(y) == "Dseq(-30)\ngattcgtatgctgatcgtacgtactgaaaa\n gactagcatgcatgactttt" ) z = Dseq("gattcgtatgctgatcgtacgtactgaaaa", "actagcatgcatgactttt"[::-1]) assert ( repr(z) == "Dseq(-30)\ngattcgtatgctgatcgtacgtactgaaaa\n actagcatgcatgactttt" )
def makeEchoFile(parts,aslist,gga=ggaPD,partsFm=partsFm,source=source,\ output = "output.csv",selenzyme=selenzyme,fname="recentassembly",\ protocolsDF=None,sepfiles=True,sepfilename="outputLDV.csv",printstuff=True,progbar=None): """makes an echo csv using the given list of assemblies and source plate of parts.. inputs: parts: dataframe of what's in the source plate aslist: dataframe of what we need to assemble gga: a short dictionary indicating what volume of all the components go into the reaction mix partsFm: how many femtomoles of each part to use source: the name of the source plate. like "384PP_AQ_BP or something output: the name of the output file selenzyme: the enzyme we are going to use for assembly. everything is assembled with the same enzyme! actually this does nothing because the enzyme is taken from the aslist thing anyway fname: this is the name of the folder to save the successfully assembled dna files into protocolsDF: a dataframe containing a descriptor for different possible protocols. For instance it would say how much DNA volume and concentration we need for GGA or gibson.""" #this is the boilerplate columns list outfile = "Source Plate Name,Source Plate Barcode,Source Plate Type,Source Well,\ Sample ID,Sample Name,Sample Group,Sample Comment,Destination Plate Name,\ Destination Well,Transfer Volume\n" outfile2 = "Source Plate Name,Source Plate Barcode,Source Plate Type,Source Well,\ Sample ID,Sample Name,Sample Group,Sample Comment,Destination Plate Name,\ Destination Well,Transfer Volume\n" f2init = len(outfile2) #this iterates through rows in the assembly list file. Each row #defines an assembly, with the columns representing what parts go in. #this may not be ideal but it's fairly human readable and we only do #four parts + vector for each assembly. _, fname = os.path.split(fname) if ("." in fname): fname = fname[:fname.index(".")] #the following is for making a spreadsheet style sequence list for performing further assemblies prodSeqSpread = "well,part,description,type,left,right,conc (nM),date,numvalue,sequence,circular,5pend,3pend,length\n" prevplate = None prevtype = None maxprog = float(len(aslist)) for assnum in range(len(aslist)): if (progbar != None): progbar.value = float(assnum + 1) / maxprog assembly = aslist[assnum:assnum + 1] #cuts out one row of dataframe dwell = assembly.targwell[ assembly.targwell.index[0]] #well where assembly will happen #print("pick enzyme") #print(assembly) try: selenzyme = assembly.enzyme[assembly.enzyme.index[0]] #if the user forgot to define an enzyme assume it is BsaI. That's the most common one we use except KeyError: selenzyme = "BsaI" if (protocolsDF != None): cprt_temp = "gga" if (selenzyme == "gibson"): cprt_temp = "gibson" curprot = {"dnasln": protocolsDF[(protocolsDF.protocol==cprt_temp)&\ (protocolsDF.component == "dnasln")].amount.iloc[0]} partsFm = curprot[curprot.component == partfm].amount.iloc[0] vectorFm = curprot[curprot.component == vectorfm].amount.iloc[0] else: curprot = ggaPD partsFm = ggaFm vectorFm = ggavecGm if (selenzyme == "gibson"): curprot = gibassyPD partsFm = gibFm vectorFm = gibvecFm water = float(curprot[curprot.component == "dnasln"].volume ) * 1000 #total amount of water, to start with if (printstuff): print("assembling with " + selenzyme) aind = assembly.index[ 0] #necessary for dataframes probably because I'm dumb frags = [] if (not selenzyme == "gibson"): enzyme = enzymes[selenzyme] esite = enzyme.site.lower() esiterc = str(Dseq(enzyme.site).rc()).lower() for col in assembly: if (col == "targwell" ): #since every row is terminated by the "target well", #we'll take this opportunity to put in the water if (int(water) < 25): #echo gets mad if you tell it to pipet significantly less than 25 nl water = 25 ewat = int( water) #the echo automatically rounds to the nearest 25, #so it's not really necessary to round here. #dsrfrags = [Dseqrecord(a) for a in frags] #x = pydAssembly(dsrfrags,limit = 4) #print(frags) #print(len(frags)) allprod = [] nefrags = [] cutfrags = [] if (selenzyme != "gibson"): enzyme = enzymes[selenzyme] for frag in frags: if (selenzyme == "gibson"): if (len(frag) > chewnt * 2 + 1): nefrags += [chewback(frag, chewnt)] else: raise ValueError("part with sequence "+frag+" is too "+\ "short for gibson! (<= 80 nt)") else: newpcs = frag.cut(enzyme) if (len(newpcs) == 0): newpcs += [frag] for pcs in newpcs: if (pcs.find(esite) + pcs.find(esiterc) == -2): nefrags += [pcs] allprod = DPallCombDseq(nefrags) goodprod = [] newpath = os.path.join(dnaPath, fname) if (printstuff): print("saving in folder {}".format(newpath)) Cname = "" try: #this part gathers the "name" column to create the output sequence Cname = assembly.name[assembly.name.index[0]] except KeyError: Cname = "" if (Cname == "" or str(Cname) == "nan"): Cname = "well" + dwell if (printstuff): print("Parts in construct {}".format(Cname)) if not os.path.exists(newpath): if (printstuff): print("made dirs!") os.makedirs(newpath) num = 0 for prod in allprod: Cnamenum = Cname if (len(allprod) > 1): wout = open( os.path.join(newpath, Cname + "_" + str(num) + ".gbk"), "w") Cnamenum = Cname + "_" + str(num) else: wout = open(os.path.join(newpath, Cname + ".gbk"), "w") if (bluntLeft(prod) and bluntRight(prod)): num += 1 goodprod += [prod] topo = ["linear", "circular"][int(prod.circular)] booltopo = ["FALSE", "TRUE"][int(prod.circular)] #wout.write("\r\n>Construct"+str(num)+"_"+topo) un_prod = "_".join(Cnamenum.split()) wout.write( "LOCUS {} {} bp ds-DNA {} SYN 01-JAN-0001\n" .format(un_prod, len(prod), topo)) wout.write("ORIGIN\n") wout.write(str(prod) + "\n//") now = datetime.datetime.now() nowdate = "{}/{}/{}".format(now.month, now.day, now.year) prodSeqSpread += "{},{},assembled with {},,,,30,{},,{},{},{},{},{}\n".format(\ dwell,un_prod, selenzyme,nowdate,prod,booltopo,0,0,len(prod)) wout.close() assembend = ["y", "ies"][int(len(goodprod) > 1)] if (printstuff): print("Detected {} possible assembl{}".format( len(goodprod), assembend)) frags = [] if (water <= 0): print("WARNING!!!! water <=0 in well {}".format(dwell)) else: #print("water from {} to {}, {} nl".format(waterwell,dwell,ewat)) if (prevplate == None): #print("normalwater") outfile += echoline(waterwell, dwell, ewat) else: #print("platewater") watline = echoline(waterwell, dwell, ewat, spname=prevplate, sptype=prevtype, platebc=prevplate) if ("LDV" in prevtype): outfile2 += watline else: outfile += watline #add water to the well! if (printstuff): print("") elif (col == "comment"): #skip this column! pass elif (col == "enzyme"): pass elif (col == "name"): pass else: part = assembly[col][aind] #print(assembly[col][aind]) #print("currently on "+part) #part = assembly[col][aind] #well corresponding to the part we want if (str(part) == 'nan'): #this means we skip this part if (printstuff): print("skip one!") else: part = assembly[col][aind] #this is the name of the part! #parts[parts.part==assembly[col][aind]].well.iloc[0] evol = 0 if (':' in str(part)): #this means we have multiple parts to mix! subparts = part.split(':') t_partsFm = partsFm / len(subparts) t_vecFm = vectorFm / len(subparts) for subpart in subparts: useFm = t_partsFm if (col == "vector"): #use the vector at lower concentration!! useFm = t_vecFm e1,e2,pDseq,prevplate,prevtype = echoSinglePart(parts,\ subpart,useFm,dwell,printstuff=printstuff) frags += [pDseq] evol += e2 if (sepfiles): if ("LDV" in e1): outfile2 += e1 else: outfile += e1 else: outfile += e1 else: useFm = partsFm if (col == "vector"): #use the vector at lower concentration!! useFm = vectorFm e1,e2,pDseq,prevplate,prevtype = echoSinglePart(parts,\ part,useFm,dwell,printstuff=printstuff) frags += [pDseq] evol += e2 if (sepfiles): if ("LDV" in e1): outfile2 += e1 else: outfile += e1 else: outfile += e1 water = water - evol pspread = open(os.path.join(newpath, fname + ".csv"), "w") pspread.write(prodSeqSpread) pspread.close() seqdispDF = pd.read_csv(os.path.join(newpath, fname + ".csv"), usecols=["well", "part", "circular", "length"]) display(seqdispDF) display(FileLink(os.path.join(newpath, fname + ".csv"))) ofle = open(output, "w") ofle.write(outfile) ofle.close() display(FileLink(output)) if (sepfiles and (len(outfile2) > f2init)): if (printstuff): print("wrote LDV steps in {}".format(sepfilename)) ofle2 = open(sepfilename, "w") ofle2.write(outfile2) ofle2.close() display(FileLink(sepfilename))
def DPallCombDseq(partslist): '''Finds all paths through the partsist using a graph type of approach. First a graph is constructed from all possible overhang interactions, then the program makes paths from every part to a logical conclusion in the graph, then it backtracks and actually assembles the DNA.''' #actually, we need to produce a graph which describes the parts FIRST #then, starting from any part, traverse the graph in every possible path and store #the paths which are "valid" i.e., produce blunt ended or circular products. edgeDict = defaultdict( lambda: []) #dictionary of all edges in the partslist! nodeDict = {} #defaultdict(lambda : []) partDict = {} #defaultdict(lambda : []) pind = 0 for part in partslist: Lend = "" Rend = "" Ltype, Lseq = part.five_prime_end() Rtype, Rseq = part.three_prime_end() if (Ltype == "blunt"): Lend = "blunt" edgeDict[Lend].append([pind, 0]) #pushDict(edgeDict,Lend,((pind,0),)) else: if (Ltype == "3'"): Lend = str(Dseq(Lseq).rc()).lower() else: Lend = str(Lseq).lower() edgeDict[Lend].append([pind, 0]) #pushDict(edgeDict,Lend,((pind,0),)) if (Rtype == "blunt"): Rend = "blunt" edgeDict[Rend].append([pind, 1]) #pushDict(edgeDict,Rend,((pind,1),)) else: if (Rtype == "5'"): Rend = str(Dseq(Rseq).rc()).lower() else: Rend = str(Rseq).lower() edgeDict[Rend].append([pind, 1]) #pushDict(edgeDict,Rend,((pind,1),)) nodeDict[pind] = (Lend, Rend) pind += 1 paths = [] for pind in list(nodeDict.keys()): paths += findDNAPaths(pind, nodeDict, edgeDict) goodpaths = [] #print("paths are {}".format(paths)) for path in paths: #print("path is") #print(path) fpart = path[0] rpart = path[-1] npart = False if (nodeDict[fpart][0] == "blunt" and nodeDict[rpart][1] == "blunt"): #this means we have a blunt ended path! good npart = True accpart = partslist[fpart] for pind in path[1:]: accpart += partslist[pind] elif (nodeDict[fpart][0] == nodeDict[rpart][1]): npart = True #this means we have a circular part! also good! accpart = partslist[fpart] for pind in path[1:]: accpart += partslist[pind] accpart = accpart.looped() if (npart): if (isNewDseq(accpart, goodpaths)): goodpaths += [accpart] return goodpaths
def test_dseq(): import textwrap from pydna.dseq import Dseq obj1 = Dseq("a", "t", circular=True) obj2 = Dseq("a", "t") with pytest.raises(TypeError): obj1 + obj2 with pytest.raises(TypeError): obj2 + obj1 with pytest.raises(TypeError): obj1 + "" with pytest.raises(AttributeError): obj2 + "" obj1 = Dseq("at", "t") obj2 = Dseq("a", "t") with pytest.raises(TypeError): obj1 + obj2 obj = Dseq("aaa", "ttt", circular=True) assert obj[1:2] == Dseq("a", "t", 0) assert obj[:] == Dseq("aaa", "ttt", circular=False) obj = Dseq("atg", "cat", 0, circular=False) assert obj[1:2]._data == "atg"[1:2] assert obj[2:1]._data == "atg"[2:1] assert obj.reverse_complement() == obj.rc() == Dseq("cat", "atg", 0) obj = Dseq("atg", "cat", circular=True) assert obj.looped() == obj assert obj[:] == Dseq("atg", "cat", 0, circular=False) assert obj[1:2]._data == "atg"[1:2] assert obj[2:1]._data == "ga" obj = Dseq("G", "", 0) assert obj.five_prime_end() == ("5'", "g") obj = Dseq("", "C", 0) assert obj.five_prime_end() == ("3'", "c") obj = Dseq("ccGGATCC", "aaggatcc", -2) assert obj._data == "ccGGATCCtt" assert str(obj.mung()) == "GGATCC" rpr = textwrap.dedent( """ Dseq(-10) ccGGATCC cctaggaa """ ).strip() assert repr(obj) == rpr assert obj[3] == Dseq("G", "c", 0) assert obj.fill_in() == Dseq("ccGGATCCtt", "aaggatccgg", 0) assert obj + Dseq("") == obj assert Dseq("") + obj == obj obj = Dseq("gatcAAAAAA", "gatcTTTTTT") assert obj.fill_in("gatc") == Dseq("gatcAAAAAAgatc", "gatcTTTTTTgatc") assert obj.fill_in("atc") == obj assert obj.fill_in("ac") == obj assert obj.fill_in("at") == obj obj = Dseq("AAAAAAgatc", "TTTTTTgatc") assert obj.fill_in("gatc") == obj assert obj.fill_in("atc") == obj assert obj.fill_in("ac") == obj assert obj.fill_in("at") == obj obj = Dseq("gatcAAAAAA", "gatcTTTTTT") assert obj.t4() == Dseq("gatcAAAAAAgatc", "gatcTTTTTTgatc") assert obj.t4("at") == obj assert obj.t4("atg") == Dseq("gatcAAAAAAgat", "gatcTTTTTTgat") assert obj.t4("atgc") == Dseq("gatcAAAAAAgatc", "gatcTTTTTTgatc") assert obj.mung() == Dseq("AAAAAA", "TTTTTT") obj = Dseq("AAAAAAgatc", "TTTTTTgatc") assert obj.t4() == obj.t4("at") == Dseq("AAAAAA") assert obj.t4("atc") == obj.t4("atg") == obj.t4("atcg") == Dseq("AAAAAA") assert Dseq("GGATCC", "GGATCC").t4() == Dseq("GGATCC", "GGATCC") assert Dseq("GGATCCa", "GGATCC").t4() == Dseq("GGATCC", "GGATCC") assert Dseq("aGGATCC", "GGATCC").t4() == Dseq("aGGATCC", "GGATCCt") assert Dseq("aGGATCCa", "GGATCC").t4() == Dseq("aGGATCC", "GGATCCt") assert Dseq("GGATCC", "aGGATCC").t4() == Dseq("GGATCCt", "aGGATCC") assert Dseq("GGATCC", "GGATCCa").t4() == Dseq("GGATCC", "GGATCC") assert Dseq("GGATCC", "aGGATCCa").t4() == Dseq("GGATCCt", "aGGATCC") assert Dseq("GGATCC", "ATCC").t4("g") == Dseq("gg", "", ovhg=0) assert Dseq("GGATCC", "GGATCC").t4("gat") == Dseq("ggat", "ggat", ovhg=-2) a2 = Dseq("ccGGATCCaa", "ggatcc", -2) assert a2._data == "ccGGATCCaa" assert a2._data == "ccGGATCCaa" assert str(a2.mung()) == "GGATCC" rpr = textwrap.dedent( """ Dseq(-10) ccGGATCCaa cctagg """ ).strip() assert repr(a2) == rpr a3 = Dseq("ccGGATCC", "ggatcc", -2) assert a3._data == "ccGGATCC" assert a3._data == "ccGGATCC" assert str(a3.mung()) == "GGATCC" rpr = textwrap.dedent( """ Dseq(-8) ccGGATCC cctagg """ ).strip() assert repr(a3) == rpr b = Dseq("GGATCC", "aaggatcccc", 2) assert b._data == "ggGGATCCtt" assert b._data == "ggGGATCCtt" assert str(b.mung()) == "GGATCC" rpr = textwrap.dedent( """ Dseq(-10) GGATCC cccctaggaa """ ).strip() assert repr(b) == rpr b2 = Dseq("GGATCCaa", "ggatcccc", 2) assert b2._data == "ggGGATCCaa" assert b2._data == "ggGGATCCaa" assert str(b2.mung()) == "GGATCC" rpr = textwrap.dedent( """ Dseq(-10) GGATCCaa cccctagg """ ).strip() assert repr(b2) == rpr assert b2.seguid() == "hPNrcQ0sluXyfu4XuUh1trsnygc" b3 = Dseq("GGATCC", "ggatcccc", 2) assert b3._data == "ggGGATCC" assert b3._data == "ggGGATCC" assert str(b3.mung()) == "GGATCC" rpr = textwrap.dedent( """ Dseq(-8) GGATCC cccctagg """ ).strip() assert repr(b3) == rpr c = Dseq("GGATCCaaa", "ggatcc", 0) assert c._data == "GGATCCaaa" assert c._data == "GGATCCaaa" assert str(c.mung()) == "GGATCC" rpr = textwrap.dedent( """ Dseq(-9) GGATCCaaa cctagg """ ).strip() assert repr(c) == rpr d = Dseq("GGATCC", "aaaggatcc", 0) assert d._data == "GGATCCttt" assert d._data == "GGATCCttt" assert str(d.mung()) == "GGATCC" rpr = textwrap.dedent( """ Dseq(-9) GGATCC cctaggaaa """ ).strip() assert repr(d) == rpr obj = Dseq("GGATCCaaa", "ggatcc", 0) from Bio.Restriction import BamHI frag1 = Dseq("G", "gatcc", 0) frag2 = Dseq("GATCCaaa", "g", -4) assert obj.cut(BamHI) == (frag1, frag2) assert frag1 + frag2 == obj obj.seguid() == "HtK7-_BmOJw0BmtYE8f1yGdHc0c" assert frag1.seguid() == "yJkorWG5V2etvSLp6E6QNK-KMlQ" assert frag2.seguid() == "Aw3buI-N85OztBZAzeGJvXGlwO8" obj = Dseq("tagcgtagctgtagtatgtgatctggtcta", "tagaccagatcacatactacagctacgcta") assert ( repr(obj) == "Dseq(-30)\ntagcgtagctgtagtatgtgatctggtcta\natcgcatcgacatcatacactagaccagat" ) obj2 = Dseq("tagcgtagctgtagtatgtgatctggtcta") obj3 = obj = Dseq( "tagcgtagctgtagtatgtgatctggtcta", "tagaccagatcacatactacagctacgcta", 0 ) assert obj == obj2 == obj3 assert obj.find("ggatcc") == -1 assert obj.find("tgtagta") == 9 obj = Dseq("tagcgtagctgtagtatgtgatctggtctaa", "ttagaccagatcacatactacagctacgcta") obj = Dseq("tagcgtagctgtagtatgtgatctggtctaa", "CCCttagaccagatcacatactacagctacgcta") assert repr(obj) == "Dseq(-34)\ntagc..ctaa \natcg..gattCCC" obj = Dseq("tagcgtagctgtagtatgtgatctggtctaaCCC", "ttagaccagatcacatactacagctacgcta") assert repr(obj) == "Dseq(-34)\ntagc..ctaaCCC\natcg..gatt " obj = Dseq("agcgtagctgtagtatgtgatctggtctaa", "ttagaccagatcacatactacagctacgcta") assert repr(obj) == "Dseq(-31)\n agcg..ctaa\natcgc..gatt" obj = Dseq("Atagcgtagctgtagtatgtgatctggtctaa", "ttagaccagatcacatactacagctacgcta") assert repr(obj) == "Dseq(-32)\nAtagc..ctaa\n atcg..gatt" obj = Dseq( "tagcgtagctgtagtatgtgatctggtctaa", "tatcgcatcgacatcatacactagaccagatt"[::-1] ) assert repr(obj) == "Dseq(-32)\n tagc..ctaa\ntatcg..gatt" assert round(obj.mw(), 1) == 19535.6 obj1 = Dseq( "tagcgtagctgtagtatgtgatctggtcta", "tagaccagatcacatactacagctacgcta", circular=True, linear=False, ) obj2 = Dseq( "tagcgtagctgtagtatgtgatctggtcta", "tagaccagatcacatactacagctacgcta", circular=True, ) obj3 = Dseq( "tagcgtagctgtagtatgtgatctggtcta", "tagaccagatcacatactacagctacgcta", linear=False ) assert obj1 == obj2 == obj3 assert obj1.find("ggatcc") == -1 assert obj1.find("tgtagta") == 9 assert ( Dseq( "tagcgtagctgtagtatgtgatctggtcta", "tagaccagatcacatactacagctacgcta" ).looped() == obj1 ) from Bio.Restriction import BglII, BamHI obj = Dseq("ggatcc") assert BglII in obj.no_cutters() assert BamHI not in obj.no_cutters() assert BamHI in obj.unique_cutters() assert BamHI in obj.once_cutters() assert BamHI in (obj + obj).twice_cutters() assert BamHI not in obj.twice_cutters() assert BamHI in obj.n_cutters(1) assert BamHI in obj.cutters() from Bio.Restriction import RestrictionBatch rb = RestrictionBatch((BamHI, BglII)) assert obj.cut(rb) == obj.cut(BamHI, BglII) == obj.cut(BglII, BamHI) obj = Dseq("ggatccAGATCT") assert obj.cut(rb) == obj.cut(BamHI, BglII) == obj.cut(BglII, BamHI) obj = Dseq("AGATCTggatcc") assert obj.cut(rb) == obj.cut(BamHI, BglII) == obj.cut(BglII, BamHI) obj = Dseq("ggatccAGATCT", circular=True) assert obj.cut(rb) == obj.cut(BamHI, BglII) != obj.cut(BglII, BamHI) obj = Dseq("AGATCTggatcc", circular=True) assert obj.cut(rb) == obj.cut(BglII, BamHI) != obj.cut(BamHI, BglII)
def calpcr(request): # pbr = Vector.objects.get(name='pbr322') primers = Primer.objects.all() vector = primers[0].vector vector_name = vector.name seq = str.lower(vector.sequence) seq = Dseq(seq.replace(' ', '')) L = len(str(seq)) rseq = seq.reverse_complement() poss = [] for primer in primers: p_seq = Dseq(primer.sequence.replace(' ', '')) p_seq_s = str.lower(str(p_seq)) # all lower case nt='atcgn' idt_codes_subtract1 = ['icy5', 'icy3', '5biosg', '(am)'] idt_codes_subtract2 = ['dspacer'] idt_codes_subtract3 = ['dbcoteg'] idt_codes_subtract4 = ['biotinteg'] idt_codes_plus1 = ['ds', 'idsp'] Lp_subtract = 0 for s1 in idt_codes_subtract1: if p_seq_s.find(s1) >= 0: Lp_subtract += 1 for s2 in idt_codes_subtract2: if p_seq_s.find(s2) >= 0: Lp_subtract += 2 for s3 in idt_codes_subtract3: if p_seq_s.find(s3) >= 0: Lp_subtract += 3 for s4 in idt_codes_subtract4: if p_seq_s.find(s4) >= 0: Lp_subtract += 4 for p1 in idt_codes_plus1: if p_seq_s.find(p1) >= 0: Lp_subtract -= 1 Lp = 0 for i in nt: Lp += p_seq_s.count(i) Lp = Lp - Lp_subtract # Lp = len(p_seq_s) if seq.find(p_seq_s) != -1: # match it position = seq.find(p_seq_s) + 1 dir = 'forward' in_vector = True elif rseq.find(p_seq_s) != -1: position = rseq.find(p_seq_s) - L - 1 dir = 'reverse' in_vector = True else: position = -1 dir = 'none' in_vector = False primer.position = position primer.dir = dir primer.in_vector = in_vector primer.length = Lp primer.save() primers = primers.filter(in_vector=True).order_by('position') # primers = Primer.objects.all().order_by('-created_at') primerFilter = PrimerFilter(queryset=primers) if request.method == 'POST' and 'Search' in request.POST: primerFilter = PrimerFilter(request.POST, queryset=primers) L = len(str(seq)) # if request.method == 'POST' and 'cal' in request.POST: check_box_list = request.POST.getlist("check_box") if len(check_box_list) == 2: primer_1 = Primer.objects.get(id=check_box_list[0]) p1_name = primer_1.name primer_2 = Primer.objects.get(id=check_box_list[1]) p2_name = primer_2.name if primer_1.dir == 'reverse' and primer_2.dir == 'forward': pr = primer_1.position pr_name = primer_1.name pr_seq = primer_1.sequence pf = primer_2.position pf_name = primer_2.name pf_seq = primer_2.sequence elif primer_2.dir == 'reverse' and primer_1.dir == 'forward': pr = primer_2.position pr_name = primer_2.name pr_seq = primer_2.sequence pf = primer_1.position pf_name = primer_1.name pf_seq = primer_1.sequence else: pr = 0 pf = 0 if abs(pr) >= abs(pf): L_pcr = -pr - pf else: L_pcr = L - pr - pf show_seq = plotpcr(str(seq), pf_seq, pr_seq) primer_name = [pf_name] + [pr_name] primer_position = [pf, pr] else: L_pcr = 'You can only select two primers!!' primer_name = ['',''] show_seq = "can't pcr" primer_name = [''] + [''] primer_position = ['x', 'x'] return render(request, template_name='primer/seq.html', context={'seq': seq, 'L': L, 'primers': primers, 'show_seq': show_seq, 'primerFilter': primerFilter, 'primer_name': primer_name, 'L_pcr': L_pcr, 'vector_name': vector_name, 'primer_position': primer_position, })