return _pretty_str("Assembly\n" "fragments..: {sequences}\n" "limit(bp)..: {limit}\n" "G.nodes....: {nodes}\n" "algorithm..: {al}".format( sequences=" ".join( "{}bp".format(len(x["mixed"])) for x in self.fragments), limit=self.limit, nodes=self.G.order(), al=self.algorithm.__name__, )) example_fragments = ( _Dseqrecord("AacgatCAtgctcc", name="a"), _Dseqrecord("TtgctccTAAattctgc", name="b"), _Dseqrecord("CattctgcGAGGacgatG", name="c"), ) linear_results = ( _Dseqrecord("AacgatCAtgctccTAAattctgcGAGGacgatG", name="abc"), _Dseqrecord("ggagcaTGatcgtCCTCgcagaatG", name="ac_rc"), _Dseqrecord("AacgatG", name="ac"), ) circular_results = ( _Dseqrecord("acgatCAtgctccTAAattctgcGAGG", name="abc", circular=True), _Dseqrecord("ggagcaTGatcgtCCTCgcagaatTTA", name="abc_rc", circular=True), )
def pcr(*args, **kwargs): """pcr is a convenience function for the Anneal class to simplify its usage, especially from the command line. If more than one or no PCR product is formed, a ValueError is raised. args is any iterable of Dseqrecords or an iterable of iterables of Dseqrecords. args will be greedily flattened. Parameters ---------- args : iterable containing sequence objects Several arguments are also accepted. limit : int = 13, optional limit length of the annealing part of the primers. Notes ----- sequences in args could be of type: * string * Seq * SeqRecord (or subclass) * Dseqrecord (or sublcass) The last sequence will be assumed to be the template while all preceeding sequences will be assumed to be primers. This is a powerful function, use with care! Returns ------- product : Amplicon An :class:`pydna.amplicon.Amplicon` object representing the PCR product. The direction of the PCR product will be the same as for the template sequence. Examples -------- >>> from pydna.dseqrecord import Dseqrecord >>> from pydna.readers import read >>> from pydna.amplify import pcr >>> from pydna.primer import Primer >>> template = Dseqrecord("tacactcaccgtctatcattatctac\ tatcgactgtatcatctgatagcac") >>> from Bio.SeqRecord import SeqRecord >>> p1 = Primer("tacactcaccgtctatcattatc") >>> p2 = Primer("cgactgtatcatctgatagcac").reverse_complement() >>> pcr(p1, p2, template) Amplicon(51) >>> pcr([p1, p2], template) Amplicon(51) >>> pcr((p1,p2,), template) Amplicon(51) >>> """ output = _flatten(args) # flatten new = [] for s in output: if hasattr(s, "watson"): s = _SeqRecord(_Seq(s.watson)) elif hasattr(s, "transcribe"): s = _SeqRecord(s) elif isinstance(s, str): s = _SeqRecord(_Seq(s)) elif hasattr(s, "features"): pass else: raise TypeError("arguments need to be a string, Bio.Seq, SeqRecord" ", Primer, Dseqrecord or Amplicon object") new.append(s) # A single Amplicon object if len(new) == 1 and hasattr(new[0], "forward_primer"): new = [new[0].forward_primer, new[0].reverse_primer, new[0]] if not hasattr(new[-1].seq, "watson"): new[-1] = _Dseqrecord(s) anneal_primers = Anneal(new[:-1], new[-1], **kwargs) if len(anneal_primers.products) == 1: return anneal_primers.products[0] elif len(anneal_primers.products) == 0: raise ValueError("No PCR product! {}".format(anneal_primers.report())) raise ValueError("PCR not specific! {}".format(anneal_primers.report()))
def products(self): if self._products: return self._products self._products = [] for fp in self.forward_primers: for rp in self.reverse_primers: if self.template.circular: tmpl = self.template.shifted(fp.position - fp._fp) tmpl = tmpl[:] * 2 for f in tmpl.features: for x, y in zip(f.location.parts, f.location.parts[1:]): if x.end == y.start + len(self.template): f.location = _FeatureLocation( x.start, y.end + len(self.template), strand=f.location.strand, ) if fp.position > rp.position: tmpl = tmpl[:len(self.template) - fp.position + rp.position + rp._fp + fp._fp] else: tmpl = tmpl[:rp.position + rp._fp - (fp.position - fp._fp)] else: tmpl = self.template[fp.position - fp._fp:rp.position + rp._fp] prd = (_Dseqrecord(fp.tail) + tmpl + _Dseqrecord(rp.tail).reverse_complement()) full_tmpl_features = [ f for f in tmpl.features if f.location.start == 0 and f.location.end == len(tmpl) ] new_identifier = "" if full_tmpl_features: ft = full_tmpl_features[0] if "label" in ft.qualifiers: new_identifier = " ".join(ft.qualifiers["label"]) elif "note" in ft.qualifiers: new_identifier = " ".join(ft.qualifiers["note"]) from pydna.utils import ( identifier_from_string as _identifier_from_string, ) # TODO: clean this up prd.name = (_identifier_from_string(new_identifier)[:16] or self.kwargs.get("name") or "{}bp_PCR_prod".format(len(prd))[:16]) prd.id = (_identifier_from_string(new_identifier)[:16] or self.kwargs.get("id") or "{}bp {}".format( str(len(prd))[:14], prd.seguid())) prd.description = self.kwargs.get( "description") or "pcr product_{}_{}".format( fp.description, rp.description) amplicon = _Amplicon(prd, template=self.template, forward_primer=fp, reverse_primer=rp, **self.kwargs) # amplicon.forward_primer.amplicon = amplicon # amplicon.reverse_primer.amplicon = amplicon self._products.append(amplicon) return self._products
def assembly_fragments(f, overlap=35, maxlink=40): '''This function return a list of :mod:`pydna.amplicon.Amplicon` objects where primers have been modified with tails so that the fragments can be fused in the order they appear in the list by for example Gibson assembly or homologous recombination. Given that we have two linear :mod:`pydna.amplicon.Amplicon` objects a and b we can modify the reverse primer of a and forward primer of b with tails to allow fusion by fusion PCR, Gibson assembly or in-vivo homologous recombination. The basic requirements for the primers for the three techniques are the same. :: <--> _________ a _________ __________ b ________ / \\ / \\ agcctatcatcttggtctctgca TTTATATCGCATGACTCTTCTTT ||||||||||||||||||||||| ||||||||||||||||||||||| <gacgt <AGAAA agcct> TTTAT> ||||||||||||||||||||||| ||||||||||||||||||||||| tcggatagtagaaccagagacgt AAATATAGCGTACTGAGAAGAAA agcctatcatcttggtctctgcaTTTATATCGCATGACTCTTCTTT |||||||||||||||||||||||||||||||||||||||||||||| tcggatagtagaaccagagacgtAAATATAGCGTACTGAGAAGAAA \\___________________ c ______________________/ Design tailed primers incorporating a part of the next or previous fragment to be assembled. :: agcctatcatcttggtctctgca ||||||||||||||||||||||| gagacgtAAATATA ||||||||||||||||||||||| tcggatagtagaaccagagacgt TTTATATCGCATGACTCTTCTTT ||||||||||||||||||||||| ctctgcaTTTATAT ||||||||||||||||||||||| AAATATAGCGTACTGAGAAGAAA PCR products with flanking sequences are formed in the PCR process. :: agcctatcatcttggtctctgcaTTTATAT |||||||||||||||||||||||||||||| tcggatagtagaaccagagacgtAAATATA \\____________/ identical sequences ____________ / \\ ctctgcaTTTATATCGCATGACTCTTCTTT |||||||||||||||||||||||||||||| gagacgtAAATATAGCGTACTGAGAAGAAA The fragments can be fused by any of the techniques mentioned earlier to form c: :: agcctatcatcttggtctctgcaTTTATATCGCATGACTCTTCTTT |||||||||||||||||||||||||||||||||||||||||||||| tcggatagtagaaccagagacgtAAATATAGCGTACTGAGAAGAAA The first argument of this function is a list of sequence objects containing Amplicons and other similar objects. **At least every second sequence object needs to be an Amplicon** This rule exists because if a sequence object is that is not a PCR product is to be fused with another fragment, that other fragment needs to be an Amplicon so that the primer of the other object can be modified to include the whole stretch of sequence homology needed for the fusion. See the example below where a is a non-amplicon (a linear plasmid vector for instance) :: _________ a _________ __________ b ________ / \\ / \\ agcctatcatcttggtctctgca <--> TTTATATCGCATGACTCTTCTTT ||||||||||||||||||||||| ||||||||||||||||||||||| tcggatagtagaaccagagacgt <AGAAA TTTAT> ||||||||||||||||||||||| <--> AAATATAGCGTACTGAGAAGAAA agcctatcatcttggtctctgcaTTTATATCGCATGACTCTTCTTT |||||||||||||||||||||||||||||||||||||||||||||| tcggatagtagaaccagagacgtAAATATAGCGTACTGAGAAGAAA \\___________________ c ______________________/ In this case only the forward primer of b is fitted with a tail with a part a: :: agcctatcatcttggtctctgca ||||||||||||||||||||||| tcggatagtagaaccagagacgt TTTATATCGCATGACTCTTCTTT ||||||||||||||||||||||| <AGAAA tcttggtctctgcaTTTATAT ||||||||||||||||||||||| AAATATAGCGTACTGAGAAGAAA PCR products with flanking sequences are formed in the PCR process. :: agcctatcatcttggtctctgcaTTTATAT |||||||||||||||||||||||||||||| tcggatagtagaaccagagacgtAAATATA \\____________/ identical sequences ____________ / \\ ctctgcaTTTATATCGCATGACTCTTCTTT |||||||||||||||||||||||||||||| gagacgtAAATATAGCGTACTGAGAAGAAA The fragments can be fused by for example Gibson assembly: :: agcctatcatcttggtctctgcaTTTATAT |||||||||||||||||||||||||||||| tcggatagtagaacca TCGCATGACTCTTCTTT |||||||||||||||||||||||||||||| gagacgtAAATATAGCGTACTGAGAAGAAA to form c: :: agcctatcatcttggtctctgcaTTTATATCGCATGACTCTTCTTT |||||||||||||||||||||||||||||||||||||||||||||| tcggatagtagaaccagagacgtAAATATAGCGTACTGAGAAGAAA The first argument of this function is a list of sequence objects containing Amplicons and other similar objects. The overlap argument controls how many base pairs of overlap required between adjacent sequence fragments. In the junction between Amplicons, tails with the length of about half of this value is added to the two primers closest to the junction. :: > < Amplicon1 Amplicon2 > < ⇣ > <- Amplicon1 Amplicon2 -> < In the case of an Amplicon adjacent to a Dseqrecord object, the tail will be twice as long (1*overlap) since the recombining sequence is present entirely on this primer: :: Dseqrecd1 Amplicon1 > < ⇣ Dseqrecd1 Amplicon1 --> < Note that if the sequence of DNA fragments starts or stops with an Amplicon, the very first and very last prinmer will not be modified i.e. assembles are always assumed to be linear. There are simple tricks around that for circular assemblies depicted in the last two examples below. The maxlink arguments controls the cut off length for sequences that will be synhtesized by adding them to primers for the adjacent fragment(s). The argument list may contain short spacers (such as spacers between fusion proteins). :: Example 1: Linear assembly of PCR products (pydna.amplicon.Amplicon class objects) ------ > < > < Amplicon1 Amplicon3 Amplicon2 Amplicon4 > < > < ⇣ pydna.design.assembly_fragments ⇣ > <- -> <- pydna.assembly.Assembly Amplicon1 Amplicon3 Amplicon2 Amplicon4 ➤ Amplicon1Amplicon2Amplicon3Amplicon4 -> <- -> < Example 2: Linear assembly of alternating Amplicons and other fragments > < > < Amplicon1 Amplicon2 Dseqrecd1 Dseqrecd2 ⇣ pydna.design.assembly_fragments ⇣ > <-- --> <-- pydna.assembly.Assembly Amplicon1 Amplicon2 Dseqrecd1 Dseqrecd2 ➤ Amplicon1Dseqrecd1Amplicon2Dseqrecd2 Example 3: Linear assembly of alternating Amplicons and other fragments Dseqrecd1 Dseqrecd2 Amplicon1 Amplicon2 > < --> < ⇣ pydna.design.assembly_fragments ⇣ pydna.assembly.Assembly Dseqrecd1 Dseqrecd2 Amplicon1 Amplicon2 ➤ Dseqrecd1Amplicon1Dseqrecd2Amplicon2 --> <-- --> < Example 4: Circular assembly of alternating Amplicons and other fragments -> <== Dseqrecd1 Amplicon2 Amplicon1 Dseqrecd1 --> <- ⇣ pydna.design.assembly_fragments ⇣ pydna.assembly.Assembly -> <== Dseqrecd1 Amplicon2 -Dseqrecd1Amplicon1Amplicon2- Amplicon1 ➤ | | --> <- ----------------------------- ------ Example 5: Circular assembly of Amplicons > < > < Amplicon1 Amplicon3 Amplicon2 Amplicon1 > < > < ⇣ pydna.design.assembly_fragments ⇣ > <= -> <- Amplicon1 Amplicon3 Amplicon2 Amplicon1 -> <- +> < ⇣ make new Amplicon using the Amplicon1.template and the last fwd primer and the first rev primer. ⇣ pydna.assembly.Assembly +> <= -> <- Amplicon1 Amplicon3 -Amplicon1Amplicon2Amplicon3- Amplicon2 ➤ | | -> <- ----------------------------- Parameters ---------- f : list of :mod:`pydna.amplicon.Amplicon` and other Dseqrecord like objects list Amplicon and Dseqrecord object for which fusion primers should be constructed. overlap : int, optional Length of required overlap between fragments. maxlink : int, optional Maximum length of spacer sequences that may be present in f. These will be included in tails for designed primers. Returns ------- seqs : list of :mod:`pydna.amplicon.Amplicon` and other Dseqrecord like objects :mod:`pydna.amplicon.Amplicon` objects :: [Amplicon1, Amplicon2, ...] Examples -------- >>> from pydna.dseqrecord import Dseqrecord >>> from pydna.design import primer_design >>> a=primer_design(Dseqrecord("atgactgctaacccttccttggtgttgaacaagatcgacgacatttcgttcgaaacttacgatg")) >>> b=primer_design(Dseqrecord("ccaaacccaccaggtaccttatgtaagtacttcaagtcgccagaagacttcttggtcaagttgcc")) >>> c=primer_design(Dseqrecord("tgtactggtgctgaaccttgtatcaagttgggtgttgacgccattgccccaggtggtcgtttcgtt")) >>> from pydna.design import assembly_fragments >>> # We would like a circular recombination, so the first sequence has to be repeated >>> fa1,fb,fc,fa2 = assembly_fragments([a,b,c,a]) >>> # Since all fragments are Amplicons, we need to extract the rp of the 1st and fp of the last fragments. >>> from pydna.amplify import pcr >>> fa = pcr(fa2.forward_primer, fa1.reverse_primer, a) >>> [fa,fb,fc] [Amplicon(100), Amplicon(101), Amplicon(102)] >>> from pydna.assembly import Assembly >>> assemblyobj = Assembly([fa,fb,fc]) >>> assemblyobj Assembly fragments....: 100bp 101bp 102bp limit(bp)....: 25 G.nodes......: 6 algorithm....: common_sub_strings >>> assemblyobj.assemble_linear() [Contig(-231), Contig(-166), Contig(-36)] >>> assemblyobj.assemble_circular()[0].cseguid() 'V3Mi8zilejgyoH833UbjJOtDMbc' >>> (a+b+c).looped().cseguid() 'V3Mi8zilejgyoH833UbjJOtDMbc' >>> print(assemblyobj.assemble_circular()[0].figure()) -|100bp_PCR_prod|36 | \\/ | /\\ | 36|101bp_PCR_prod|36 | \\/ | /\\ | 36|102bp_PCR_prod|36 | \\/ | /\\ | 36- | | -------------------------------------------------------- >>> ''' # sanity check for arguments nf = [item for item in f if len(item)>maxlink] if not all(hasattr(i[0],"template") or hasattr(i[1],"template") for i in zip(nf,nf[1:])): raise ValueError("Every second fragment larger than maxlink has to be an Amplicon object") _module_logger.debug("### assembly fragments ###") _module_logger.debug("overlap = %s", overlap) _module_logger.debug("max_link = %s", maxlink) f = [_copy.copy(f) for f in f] first_fragment_length = len(f[0]) last_fragment_length = len(f[-1]) if first_fragment_length<=maxlink: # first fragment should be removed and added to second fragment (new first fragment) forward primer f[1].forward_primer = f[0].seq._data + f[1].forward_primer _module_logger.debug("first fragment removed since len(f[0]) = %s", first_fragment_length) f=f[1:] else: _module_logger.debug("first fragment stays since len(f[0]) = %s", first_fragment_length) if last_fragment_length<=maxlink: f[-2].reverse_primer = f[-1].seq.reverse_complement()._data + f[-2].reverse_primer f=f[:-1] _module_logger.debug("last fragment removed since len(f[%s]) = %s", len(f), last_fragment_length) else: _module_logger.debug("last fragment stays since len(f[%s]) = %s", len(f),last_fragment_length) empty = _Dseqrecord("") _module_logger.debug(f) _module_logger.debug("loop through fragments in groups of three:") tail_length = _math.ceil(overlap/2) for i in range(len(f)-1): first = f[i] secnd = f[i+1] secnd_len = len(secnd) _module_logger.debug( "first = %s", str(first.seq)) _module_logger.debug( "secnd = %s", str(secnd.seq)) if secnd_len <= maxlink: _module_logger.debug("secnd is smaller or equal to maxlink; should be added to primer(s)") third = f[i+2] _module_logger.debug( "third = %s", str(third.seq)) if hasattr(f[i], "template") and hasattr(third, "template"): _module_logger.debug("secnd is is flanked by amplicons, so half of secnd should be added each flanking primers") first.reverse_primer = secnd.seq.reverse_complement()._data[secnd_len//2:] + first.reverse_primer third.forward_primer = secnd.seq._data[secnd_len//2:] + third.forward_primer lnk = (third.seq.reverse_complement()._data+secnd.reverse_complement().seq._data[:secnd_len//2])[-tail_length:] _module_logger.debug("1 %s", lnk) first.reverse_primer = lnk + first.reverse_primer lnk = (first.seq._data + secnd.seq._data[:secnd_len//2])[-tail_length:] _module_logger.debug("2 %s", lnk) third.forward_primer = lnk + third.forward_primer elif hasattr(first , "template"): first.reverse_primer = secnd.seq.reverse_complement()._data + first.reverse_primer lnk = str(third.seq[:overlap].reverse_complement()) first.reverse_primer = lnk + first.reverse_primer elif hasattr(third , "template"): third.forward_primer = secnd.seq._data + third.forward_primer lnk = str(first.seq[-overlap:]) third.forward_primer = lnk + third.forward_primer secnd=empty f[i+2] = third else: # secnd is larger than maxlink if hasattr(first, "template") and hasattr(secnd, "template"): lnk = str(first.seq[-tail_length:]) #_module_logger.debug("4 %s", lnk) secnd.forward_primer = lnk + secnd.forward_primer lnk = str(secnd.seq[:tail_length].reverse_complement()) #_module_logger.debug("5 %s", lnk) first.reverse_primer = lnk + first.reverse_primer elif hasattr(first , "template"): lnk = str(secnd.seq[:overlap].reverse_complement()) #_module_logger.debug("6 %s", lnk) first.reverse_primer = lnk + first.reverse_primer elif hasattr(secnd , "template"): lnk = str(first.seq[-overlap:]) #_module_logger.debug("7 %s", lnk) secnd.forward_primer = lnk + secnd.forward_primer f[i] = first f[i+1] = secnd _module_logger.debug("loop ended") f = [item for item in f if len(item)] return [_pcr(p.forward_primer, p.reverse_primer, p.template) if hasattr(p, "template") else p for p in f]
circular=True) for cp in cps.values()), key=len, reverse=True) def __repr__(self): # https://pyformat.info return _pretty_str( "Assembly\n" "fragments..: {sequences}\n" "limit(bp)..: {limit}\n" "G.nodes....: {nodes}\n" "algorithm..: {al}".format(sequences = " ".join("{}bp".format(len(x["mixed"])) for x in self.fragments), limit = self.limit, nodes = self.G.order(), al = self.algorithm.__name__)) example_fragments = ( _Dseqrecord("AacgatCAtgctcc", name ="a"), _Dseqrecord("TtgctccTAAattctgc", name ="b"), _Dseqrecord("CattctgcGAGGacgatG",name ="c") ) linear_results = ( _Dseqrecord("AacgatCAtgctccTAAattctgcGAGGacgatG", name ="abc"), _Dseqrecord("ggagcaTGatcgtCCTCgcagaatG", name ="ac_rc"), _Dseqrecord("AacgatG", name ="ac") ) circular_results = ( _Dseqrecord("acgatCAtgctccTAAattctgcGAGG", name ="abc", circular=True), _Dseqrecord("ggagcaTGatcgtCCTCgcagaatTTA", name ="abc_rc", circular=True)) if __name__=="__main__":
# !!!!!!!!!!!![]!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!![]!!!!!!!!!!!!! # ^^^^^^^^^^^^[]^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^[]^^^^^^^^^^^^^ # [] [] # [] [] # [] [] # ~~^-~^_~^~/ \~^-~^~_~^-~_^~-^~_^~~-^~_~^~-~_~-^~_^/ \~^-~_~^-~~- # ~ _~~- ~^-^~-^~~- ^~_^-^~~_ -~^_ -~_-~~^- _~~_~-^_ ~^-^~~-_^-~ ~^ """Assembly of sequences by GoldenGate ligation assebmly.""" from pydna.dseqrecord import Dseqrecord as _Dseqrecord from copy import deepcopy as _deepcopy import logging as _logging _module_logger = _logging.getLogger("pydna." + __name__) from Bio.Restriction import BsaI, BsmBI, BbsI, FokI DNA = _Dseqrecord("gatcGAAGACtagagtctgattcg") a,b = DNA.cut(BbsI) assert a+b == DNA # MoClo