def cut(self, *enzymes): """Returns a list of linear Dseq fragments produced in the digestion. If there are no cuts, an empty list is returned. Parameters ---------- enzymes : enzyme object or iterable of such objects A Bio.Restriction.XXX restriction objects or iterable. Returns ------- frags : list list of Dseq objects formed by the digestion Examples -------- >>> from pydna.dseq import Dseq >>> seq=Dseq("ggatccnnngaattc") >>> seq Dseq(-15) ggatccnnngaattc cctaggnnncttaag >>> from Bio.Restriction import BamHI,EcoRI >>> type(seq.cut(BamHI)) <class 'tuple'> >>> for frag in seq.cut(BamHI): print(repr(frag)) Dseq(-5) g cctag Dseq(-14) gatccnnngaattc gnnncttaag >>> seq.cut(EcoRI, BamHI) == seq.cut(BamHI, EcoRI) True >>> a,b,c = seq.cut(EcoRI, BamHI) >>> a+b+c Dseq(-15) ggatccnnngaattc cctaggnnncttaag >>> """ pad = "n" * 50 if self.linear: dsseq = self.mung() else: dsseq = Dseq.from_string(self._data, linear=True, circular=False) if len(enzymes) == 1 and hasattr(enzymes[0], "intersection"): # RestrictionBatch enzymecuts = [] for e in enzymes[0]: # cuts = e.search(dsseq+dsseq[:e.size-1] if self.circular else dsseq) cuts = e.search( _Seq(pad + dsseq.watson + dsseq.watson[:e.size - 1] + pad) if self.circular else dsseq) enzymecuts.append((cuts, e)) enzymecuts.sort() enzymes = [e for (c, e) in enzymecuts if c] else: enzymes = [ e for e in list(dict.fromkeys(_flatten(enzymes))) if e.search( _Seq(pad + dsseq.watson + dsseq.watson[:e.size - 1] + pad) if self.circular else dsseq) ] # flatten if not enzymes: return () if self.linear: frags = [self] else: l = len(self) for e in enzymes: wpos = [ x - len(pad) - 1 for x in e.search( _Seq(pad + self.watson + self.watson[:e.size - 1]) + pad) ][::-1] cpos = [ x - len(pad) - 1 for x in e.search( _Seq(pad + self.crick + self.crick[:e.size - 1]) + pad) ][::-1] for w, c in _itertools.product(wpos, cpos): if w % len(self) == (self.length - c + e.ovhg) % len(self): frags = [ Dseq( self.watson[w % l:] + self.watson[:w % l], self.crick[c % l:] + self.crick[:c % l], ovhg=e.ovhg, pos=w, ) ] break else: continue break newfrags = [] for enz in enzymes: for frag in frags: ws = [x - 1 for x in enz.search(_Seq(frag.watson) + "N")] cs = [x - 1 for x in enz.search(_Seq(frag.crick) + "N")] sitepairs = [(sw, sc) for sw, sc in _itertools.product(ws, cs[::-1]) if (sw + max(0, frag.ovhg) - max(0, enz.ovhg) == len(frag.crick) - sc - min(0, frag.ovhg) + min(0, enz.ovhg))] sitepairs.append((self.length, 0)) w2, c1 = sitepairs[0] newfrags.append( Dseq(frag.watson[:w2], frag.crick[c1:], ovhg=frag.ovhg, pos=frag.pos)) for (w1, c2), (w2, c1) in zip(sitepairs[:-1], sitepairs[1:]): newfrags.append( Dseq( frag.watson[w1:w2], frag.crick[c1:c2], ovhg=enz.ovhg, pos=frag.pos + w1 - max(0, enz.ovhg), )) frags = newfrags newfrags = [] return tuple(frags)
def pcr(*args, **kwargs): """pcr is a convenience function for the Anneal class to simplify its usage, especially from the command line. If more than one or no PCR product is formed, a ValueError is raised. args is any iterable of Dseqrecords or an iterable of iterables of Dseqrecords. args will be greedily flattened. Parameters ---------- args : iterable containing sequence objects Several arguments are also accepted. limit : int = 13, optional limit length of the annealing part of the primers. Notes ----- sequences in args could be of type: * string * Seq * SeqRecord (or subclass) * Dseqrecord (or sublcass) The last sequence will be assumed to be the template while all preceeding sequences will be assumed to be primers. This is a powerful function, use with care! Returns ------- product : Amplicon An :class:`pydna.amplicon.Amplicon` object representing the PCR product. The direction of the PCR product will be the same as for the template sequence. Examples -------- >>> from pydna.dseqrecord import Dseqrecord >>> from pydna.readers import read >>> from pydna.amplify import pcr >>> from pydna.primer import Primer >>> template = Dseqrecord("tacactcaccgtctatcattatctac\ tatcgactgtatcatctgatagcac") >>> from Bio.SeqRecord import SeqRecord >>> p1 = Primer("tacactcaccgtctatcattatc") >>> p2 = Primer("cgactgtatcatctgatagcac").reverse_complement() >>> pcr(p1, p2, template) Amplicon(51) >>> pcr([p1, p2], template) Amplicon(51) >>> pcr((p1,p2,), template) Amplicon(51) >>> """ output = _flatten(args) # flatten new = [] for s in output: if hasattr(s, "watson"): s = _SeqRecord(_Seq(s.watson)) elif hasattr(s, "transcribe"): s = _SeqRecord(s) elif isinstance(s, str): s = _SeqRecord(_Seq(s)) elif hasattr(s, "features"): pass else: raise TypeError("arguments need to be a string, Bio.Seq, SeqRecord" ", Primer, Dseqrecord or Amplicon object") new.append(s) # A single Amplicon object if len(new) == 1 and hasattr(new[0], "forward_primer"): new = [new[0].forward_primer, new[0].reverse_primer, new[0]] if not hasattr(new[-1].seq, "watson"): new[-1] = _Dseqrecord(s) anneal_primers = Anneal(new[:-1], new[-1], **kwargs) if len(anneal_primers.products) == 1: return anneal_primers.products[0] elif len(anneal_primers.products) == 0: raise ValueError("No PCR product! {}".format(anneal_primers.report())) raise ValueError("PCR not specific! {}".format(anneal_primers.report()))
def number_of_cuts(self, *enzymes): """This method returns the number of cuts by digestion with the Restriction enzymes contained in the iterable.""" return sum([ len(enzyme.search(self.seq)) for enzyme in _flatten(enzymes) ]) # flatten