def test_primer_design_four_fragments(): x = [primer_design(f) for f in frags] fourth = Dseqrecord("TAAAAATAAAATTGTTGACAGCAGAAGTGATATAGAAATTTGTTAATTATTA") y = assembly_fragments(x + [fourth], 20) z = Assembly(y, limit=20) result = z.assemble_linear()[0] assert result.seq == (frags[0] + frags[1] + frags[2] + fourth).seq
def assemble(self, assemblies): primer_builder = ExtractionPrimerBuilder() for assembly in assemblies: part_records = [] part_amplicons = [] for part_node in assembly.parts: part = part_node.part part_amplicon, part_seq_record = primer_builder.design_primer_for_part(part) part_records.append(part_seq_record) part_amplicons.append(part_amplicon) fragments = assembly_fragments(part_amplicons) for idx in range(len(fragments)): fragments[idx].locus = part_records[idx].name #print(fragments) assemblyobj = Assembly( fragments, limit=20) linear_contigs = assemblyobj.assemble_linear() if len(linear_contigs) != 1: raise Exception( '%s resulted in %d contigs. We were hoping for just one.' % ( len(linear_contigs) )) assembly.contig = linear_contigs[0] #print(assembly.contig.figure()) return assemblies
def test_circular_assembly_fragments2(): x = [primer_design(f) for f in frags] y = circular_assembly_fragments((frags[0], x[1], x[2]), 20) z = Assembly(y, limit=20) result = z.assemble_circular()[0] assert str( result.seq ) == "ccaaggacacaatcgagctccgatccgtactgtcgagaaacttgtatccctctaactagtatggatagccgtgtcttcactgtgctgcggctacccatcgtagtgaaacatacacgttgctcgggttcaccccggtccgttctgagtcga"
def AssembleNAnneal(graph, nodes, edges, start, end): dseq_list = enc.toDSEQ(graph, edges, nodes) p1 = Dseqrecord(nodes[start]) p2 = Dseqrecord(enc.getSeqComplement(nodes[end])) assembly = Assembly(dseq_list, limit=10, only_terminal_overlaps=True) print("\n" + str(assembly) + "\n") candidates = [] for i in range(len(assembly.linear_products)): product = assembly.linear_products[i] template = Dseqrecord(product) pcr = Anneal([p1, p2], template, limit=10) gel = len(nodes) * enc.SEQ_LEN if len(pcr.products) != 0: print(product.detailed_figure()) print(product.figure()) for p in pcr.products: if len(p.seq) == gel: p.seq = p.seq[10:] p.seq = p.seq[:-10] candidates.append(p) # print("\n" +str(nodes)) # print(str(edges) +"\n") return candidates
def test_primer_design_same_first_and_third_Dseqrecord(): from pydna.dseqrecord import Dseqrecord x = [primer_design(f) for f in frags] y = assembly_fragments([frags[0], x[1], frags[0]], 20) z = Assembly(y, limit=20) result = z.assemble_circular()[0] assert result.cseguid() == (frags[0] + frags[1]).looped().cseguid() a = Dseqrecord("ccaaggacacaatcgagctccgatccgtactgtcgagaaacttgtatcc", name="a") b = Dseqrecord( "ctgtcgagaaacttgtatccctctaactagtatggatagccgtgtcttcactgtgctgcggctacccatcccaaggacacaatcgagctc", name="b", ) z = Assembly((a, b, a), limit=20) result = z.assemble_linear()[0] assert ( str(result.seq) == "ccaaggacacaatcgagctccgatccgtactgtcgagaaacttgtatccctctaactagtatggatagccgtgtcttcactgtgctgcggctacccatcccaaggacacaatcgagctccgatccgtactgtcgagaaacttgtatcc" ) result = z.assemble_circular()[0] assert result.cseguid() == (frags[0] + frags[1]).looped().cseguid()
def infer_ref(line): contig=line[0] kmers=contig2kmer(contig) sp_case,cov_case,sp_control,cov_control,refpairs=contig_sp_cov.loc[contig].tolist() refpairs=refpairs.split(',') if len(refpairs)==1:return (refpairs[0],sp_case,cov_case,sp_control,cov_control) try: refseq=Assembly([Dseqrecord(i) for i in refpairs],limit=15).assemble_linear(max_nodes=3)[0].seq.watson if maxoverlap(contig,refseq)[2]<15:refseq=assemDNA(refpairs)#for sake of low complexity sequences except:refseq=assemDNA(refpairs) if maxoverlap(contig,refseq)[2]<15:refseq=str(Seq(refseq).reverse_complement()) return (refseq,sp_case,cov_case,sp_control,cov_control)
def test_primer_Design_with_linker(): ''' test_primer_design''' b = Dseqrecord( "agctactgactattaggggttattctgatcatctgatctactatctgactgtactgatcta") l = Dseqrecord("AAATTTCCCGGG") c = Dseqrecord( "tctgatctactatctgactgtactgatctattgacactgtgatcattctagtgtattactc") frags = assembly_fragments((primer_design(b), l, primer_design(c))) asm1 = Assembly(frags) assert asm1.assemble_linear()[0].seguid(), ( b + l + c).seguid() == 'l95igKB8iKAKrvvqE9CYksyNx40' frags = assembly_fragments( (primer_design(b), l, primer_design(c), primer_design(b))) b2 = pcr(frags[-1].forward_primer, frags[0].reverse_primer, b) asm2 = Assembly((b2, frags[1], frags[2])) assert (b + l + c).looped().cseguid() == asm2.assemble_circular()[0].cseguid() assert (b + l + c).looped().cseguid() == 'jdHXfQI5k4Sk2ESiZYfKv4oP2FI'
def infer_ref_unpair(line,unpair_reads_dict): contig,refseq=line[0],'' kmers=contig2kmer(contig) sp_case,cov_case,sp_control,cov_control,refpairs=contig_sp_cov.loc[contig].tolist() refpairs=refpairs.split(',') related_reads=unpair_reads_dict[contig] refseq='NA' if len(refpairs)>2:#indels should have no more than 2 paired refs.(head and tail) try: refseq=Assembly([Dseqrecord(i) for i in refpairs],limit=15).assemble_linear(max_nodes=3)[0].seq.watson except:refseq=assemDNA(refpairs) if len(related_reads)>lowdepth/2 and len(refseq)<len(contig):refseq=CAP(contig,related_reads) if maxoverlap(contig,refseq)[2]<15:refseq=str(Seq(refseq).reverse_complement()) return (refseq,sp_case,cov_case,sp_control,cov_control)
def golden_gate(*elements,enzyme = 'BspQI'): """ Forward Gibson reaction for fragments with overlaps """ fragments = [] for element in elements: product = digest(element,enzyme) fragments += [tools.get_largest(product)] for f in fragments: print(f) print('\n')#print(f.seq) products = Assembly(fragments,limit = 4,only_terminal_overlaps = True) circ = products.circular_products if len(circ) > 1: print('WARNING: {} circular plasmids detected, returning largest.'.format(circ)) if len(circ) == 0: print('WARNING: no circular plasmids detected.') return None return tools.get_largest(circ)
def test_primer_Design(): ''' test_primer_design''' a = Dseqrecord( "atgactgctaacccttccttggtgttgaacaagatcgacgacatttcgttcgaaacttacgatg") b = Dseqrecord( "ccaaacccaccaggtaccttatgtaagtacttcaagtcgccagaagacttcttggtcaagttgcc") c = Dseqrecord( "tgtactggtgctgaaccttgtatcaagttgggtgttgacgccattgccccaggtggtcgtttcgtt") frags = assembly_fragments([primer_design(r) for r in (a, b, c)]) asm = Assembly(frags) assert asm.assemble_linear()[0].seguid() == "1eNv3d_1PqDPP8qJZIVoA45Www8" frags = assembly_fragments([primer_design(r) for r in (a, b, c, a)]) a2 = pcr(frags[-1].forward_primer, frags[0].reverse_primer, a) asm = Assembly((a2, frags[1], frags[2])) assert asm.assemble_circular()[0].cseguid( ) == "V3Mi8zilejgyoH833UbjJOtDMbc"
def test_primer_design_two_fragments_linker_in_between(): # x = [primer_design(f) for f in frags] y = assembly_fragments([x[0], bam, x[1]], 20, 20) z = Assembly(y, limit=20) result = z.assemble_linear()[0] assert result.seq == (frags[0] + bam + frags[1]).seq
def test_primer_design_linker_first(): x = [primer_design(f) for f in frags] y = assembly_fragments([bam, x[0], x[1], x[2]], 20) z = Assembly(y, limit=20) result = z.assemble_linear()[0] assert result.seq == (bam + frags[0] + frags[1] + frags[2]).seq
print((al1)+'\n'+(al2)) #seqs.append(lines[1].strip('\n')) #seqs.append(lines[3]) hetdif = difflib.ndiff(al1,al2) #check where are the differences in between the heterozygots difs = [(p,n) for n,p in enumerate(hetdif) if p[0] in ('+','-')] print (difs) else: print('genotype '+f+' homozygot') Nhomozygots += 1 seqs.append(lines[1]) print(str(Nheterozygots)+' heterozygous amplicons; '+str(Nhomozygots)+' homozygous amplicons') nseqs = (len(seqs)) if (nseqs == 3 and Nheterozygots == 0): # do assembly for homozygots in all three amplicons print ("processing homozygous only Assembly:"+s+tlr) asem = Assembly((Dseqrecord(seqs[0]),Dseqrecord(seqs[1]),Dseqrecord(seqs[2])), only_terminal_overlaps = True, limit = 14) if asem.linear_products == []: print (s+tlr+' assembly failed\n\n') # if I do it for only homozygous sequences, than no fails else: assembled_N_samam += 1 aswat = asem.linear_products[0].seq.watson aswatlen = len(aswat) print(s+tlr+'assembly successful length: '+str(aswatlen)+'\n\n') with open('../finalassem/asemseq'+s+tlr+'.fasta','w') as asfile: asfile.write(('>conitg1\n%s') % (aswat)) elif Nheterozygots > 0 print("Assembly finished with "+str(assembled_N_samam)+" successfully assembled samam from "+str(total_N_samam))
def test_primer_design_all_pcr_products(): x = [primer_design(f) for f in frags] y = assembly_fragments(x, 20) z = Assembly(y, limit=20) result = z.assemble_linear()[0] assert result.seq == (frags[0] + frags[1] + frags[2]).seq
def test_primer_design_first_and_third_Dseqrecord(): x = [primer_design(f) for f in frags] y = assembly_fragments([frags[0], x[1], frags[2]], 20) z = Assembly(y, limit=20) result = z.assemble_linear()[0] assert result.seq == (frags[0] + frags[1] + frags[2]).seq
gttctgatcctcgagcatcttaagaattc >468_pCAPs_release_fw (25-mer) gtcgaggaacgccaggttgcccact >467_pCAPs_release_re (31-mer) ATTTAAatcctgatgcgtttgtctgcacaga >568_pCAPsAjiIR (22-mer) GTGCcatctgtgcagacaaacg >578_crp42-70 (29-mer) gttcttgtctcattgccacattcataagt''') p = pcr(p577, p567, pYPKa_Z_prom) g = pcr(p468, p467, pYPKa_A_saat) t = pcr(p568, p578, pYPKa_E_term) pYPKpw = read("pYPKpw.gb") from Bio.Restriction import ZraI pYPKpw_lin = pYPKpw.linearize(ZraI) asm = Assembly((pYPKpw_lin, p, g, t)) candidate = asm.assemble_circular()[0] pYPK0_TDH3_FaPDC_TEF1 = candidate.synced(pYPKa) pYPK0_TDH3_FaPDC_TEF1.write("pYPK0_TDH3_FaPDC_TPI1.gb")
def test_primer_design_linker_third(): # x = [primer_design(f) for f in frags] y = assembly_fragments([x[0], x[1], bam, x[2]], maxlink=6, overlap=20) z = Assembly(y, limit=20) result = z.assemble_linear()[0] assert result.seq == (frags[0] + frags[1] + bam + frags[2]).seq
def test_reverse_complement(monkeypatch): from pydna._pretty import pretty_str from pydna.assembly import Assembly from pydna.dseqrecord import Dseqrecord a = Dseqrecord("acgatgctatactgtgCCNCCtgtgctgtgctcta") #12345678901234 b = Dseqrecord("tgtgctgtgctctaTTTTTTTtattctggctgtatc") #123456789012345 c = Dseqrecord("tattctggctgtatcGGGGGtacgatgctatactgtg") a.name = "aaa" #1234567890123456 b.name = "bbb" c.name = "ccc" asm = Assembly((a, b, c), limit=14) x = asm.assemble_circular()[0] y = x.rc() z = y.rc() assert x.figure() == z.figure() assert x.detailed_figure() == z.detailed_figure() xfig = '''\ -|aaa|14 | \\/ | /\\ | 14|bbb|15 | \\/ | /\\ | 15|ccc|16 | \\/ | /\\ | 16- | | ----------------------- '''.rstrip() xdfig = pretty_str('''\ |||||||||||||||| acgatgctatactgtgCCNCCtgtgctgtgctcta TGTGCTGTGCTCTA tgtgctgtgctctaTTTTTTTtattctggctgtatc TATTCTGGCTGTATC tattctggctgtatcGGGGGtacgatgctatactgtg ACGATGCTATACTGTG '''.rstrip() + "\n") assert x.figure() == xfig assert x.detailed_figure() == xdfig yfig = '''\ -|ccc_rc|15 | \\/ | /\\ | 15|bbb_rc|14 | \\/ | /\\ | 14|aaa_rc|16 | \\/ | /\\ | 16- | | -------------------------------- '''.rstrip() ydfig = '''\ |||||||||||||||| cacagtatagcatcgtaCCCCCgatacagccagaata GATACAGCCAGAATA gatacagccagaataAAAAAAAtagagcacagcaca TAGAGCACAGCACA tagagcacagcacaGGNGGcacagtatagcatcgt CACAGTATAGCATCGT '''.rstrip() + "\n" assert y.figure() == yfig assert y.detailed_figure() == ydfig
def test_primer_design_linker_second_before_Dseqrecord(): x = [primer_design(f) for f in frags] y = assembly_fragments([x[0], bam, frags[1], x[2]], 20) z = Assembly(y, limit=20) result = z.assemble_linear()[0] assert result.seq == (frags[0] + bam + frags[1] + frags[2]).seq
def test_primer_design_two_fragments(): x = [primer_design(f) for f in frags] y = assembly_fragments([x[0], x[1]], 20) z = Assembly(y, limit=20) result = z.assemble_linear()[0] assert result.seq == (frags[0] + frags[1]).seq
def gibson(*fragments,limit = 18,only_terminal_overlaps = False): """ Forward Gibson reaction for fragments with overlaps """ products = Assembly(fragments,limit = 18, only_terminal_overlaps = only_terminal_overlaps) print(products) return products.circular_products
def test_primer_design_two_fragments_flanking_linkers(): x = [primer_design(f) for f in frags] y = assembly_fragments([bam, x[0], x[1], bam], 20) z = Assembly(y, limit=20) result = z.assemble_linear()[0] assert result.seq == (bam + frags[0] + frags[1] + bam).seq
gttctgatcctcgagcatcttaagaattc >468_pCAPs_release_fw (25-mer) gtcgaggaacgccaggttgcccact >467_pCAPs_release_re (31-mer) ATTTAAatcctgatgcgtttgtctgcacaga >568_pCAPsAjiIR (22-mer) GTGCcatctgtgcagacaaacg >578_crp42-70 (29-mer) gttcttgtctcattgccacattcataagt''') p = pcr(p577, p567, pYPKa_Z_prom) g = pcr(p468, p467, pYPKa_A_saat) t = pcr(p568, p578, pYPKa_E_term) pYPKpw = read("pYPKpw.gb") from Bio.Restriction import ZraI pYPKpw_lin = pYPKpw.linearize(ZraI) asm = Assembly( (pYPKpw_lin, p, g, t) ) candidate = asm.circular_products[0] pYPK0_TDH3_FaPDC_TEF1 = candidate.synced(pYPKa) pYPK0_TDH3_FaPDC_TEF1.write("pYPK0_TDH3_FaPDC_TPI1.gb")
def test_contig(monkeypatch): monkeypatch.setenv("pydna_cached_funcs", "") from pydna import contig from pydna.assembly import Assembly from pydna.dseqrecord import Dseqrecord a = Dseqrecord("acgatgctatactgCCCCCtgtgctgtgctcta", name="one") b = Dseqrecord("tgtgctgtgctctaTTTTTtattctggctgtatc", name="two") c = Dseqrecord("tattctggctgtatcGGGGGtacgatgctatactg", name="three") asm = Assembly((a, b, c), limit=14) cnt = asm.assemble_circular()[0] assert repr(cnt) == "Contig(o59)" assert cnt.detailed_figure() == str( "||||||||||||||\n" "acgatgctatactgCCCCCtgtgctgtgctcta\n" " TGTGCTGTGCTCTA\n" " tgtgctgtgctctaTTTTTtattctggctgtatc\n" " TATTCTGGCTGTATC\n" " tattctggctgtatcGGGGGtacgatgctatactg\n" " ACGATGCTATACTG\n" ) from textwrap import indent fig = """ -|one|14 | \\/ | /\\ | 14|two|15 | \\/ | /\\ | 15|three|14 | \\/ | /\\ | 14- | | -------------------------""" cnt2 = asm.assemble_linear()[0] fig = ('one|14\n' ' \\/\n' ' /\\\n' ' 14|two|15\n' ' \\/\n' ' /\\\n' ' 15|three') assert fig == cnt2.figure() assert repr(cnt2) == 'Contig(-73)' #print(repr(cnt2._repr_html_())) assert cnt2._repr_html_( ) == '<pre>one|14\n \\/\n /\\\n 14|two|15\n \\/\n /\\\n 15|three</pre>' from unittest.mock import MagicMock pp = MagicMock() cnt2._repr_pretty_(pp, None) pp.text.assert_called_with('Contig(-73)') from Bio.Seq import Seq from Bio.Alphabet.IUPAC import IUPACAmbiguousDNA from pydna.seqrecord import SeqRecord arg = SeqRecord(Seq("aaa", IUPACAmbiguousDNA())) import networkx as nx x = contig.Contig.from_SeqRecord(arg, graph=nx.MultiDiGraph())