Ejemplo n.º 1
0
    def cut(self, *enzymes):
        """Returns a list of linear Dseq fragments produced in the digestion.
        If there are no cuts, an empty list is returned.

        Parameters
        ----------

        enzymes : enzyme object or iterable of such objects
            A Bio.Restriction.XXX restriction objects or iterable.

        Returns
        -------
        frags : list
            list of Dseq objects formed by the digestion


        Examples
        --------

        >>> from pydna.dseq import Dseq
        >>> seq=Dseq("ggatccnnngaattc")
        >>> seq
        Dseq(-15)
        ggatccnnngaattc
        cctaggnnncttaag
        >>> from Bio.Restriction import BamHI,EcoRI
        >>> type(seq.cut(BamHI))
        <class 'tuple'>
        >>> for frag in seq.cut(BamHI): print(repr(frag))
        Dseq(-5)
        g
        cctag
        Dseq(-14)
        gatccnnngaattc
            gnnncttaag
        >>> seq.cut(EcoRI, BamHI) ==  seq.cut(BamHI, EcoRI)
        True
        >>> a,b,c = seq.cut(EcoRI, BamHI)
        >>> a+b+c
        Dseq(-15)
        ggatccnnngaattc
        cctaggnnncttaag
        >>>

        """

        pad = "n" * 50

        if self.linear:
            dsseq = self.mung()
        else:
            dsseq = Dseq.from_string(self._data, linear=True, circular=False)

        if len(enzymes) == 1 and hasattr(enzymes[0],
                                         "intersection"):  # RestrictionBatch
            enzymecuts = []
            for e in enzymes[0]:
                # cuts = e.search(dsseq+dsseq[:e.size-1] if self.circular else dsseq)
                cuts = e.search(
                    _Seq(pad + dsseq.watson + dsseq.watson[:e.size - 1] +
                         pad) if self.circular else dsseq)
                enzymecuts.append((cuts, e))
            enzymecuts.sort()
            enzymes = [e for (c, e) in enzymecuts if c]
        else:
            enzymes = [
                e for e in list(dict.fromkeys(_flatten(enzymes))) if e.search(
                    _Seq(pad + dsseq.watson + dsseq.watson[:e.size - 1] +
                         pad) if self.circular else dsseq)
            ]  # flatten

        if not enzymes:
            return ()

        if self.linear:
            frags = [self]
        else:
            l = len(self)
            for e in enzymes:
                wpos = [
                    x - len(pad) - 1 for x in e.search(
                        _Seq(pad + self.watson + self.watson[:e.size - 1]) +
                        pad)
                ][::-1]
                cpos = [
                    x - len(pad) - 1 for x in e.search(
                        _Seq(pad + self.crick + self.crick[:e.size - 1]) + pad)
                ][::-1]

                for w, c in _itertools.product(wpos, cpos):
                    if w % len(self) == (self.length - c + e.ovhg) % len(self):
                        frags = [
                            Dseq(
                                self.watson[w % l:] + self.watson[:w % l],
                                self.crick[c % l:] + self.crick[:c % l],
                                ovhg=e.ovhg,
                                pos=w,
                            )
                        ]
                        break
                else:
                    continue
                break

        newfrags = []

        for enz in enzymes:
            for frag in frags:

                ws = [x - 1 for x in enz.search(_Seq(frag.watson) + "N")]
                cs = [x - 1 for x in enz.search(_Seq(frag.crick) + "N")]

                sitepairs = [(sw, sc)
                             for sw, sc in _itertools.product(ws, cs[::-1])
                             if (sw + max(0, frag.ovhg) -
                                 max(0, enz.ovhg) == len(frag.crick) - sc -
                                 min(0, frag.ovhg) + min(0, enz.ovhg))]

                sitepairs.append((self.length, 0))

                w2, c1 = sitepairs[0]

                newfrags.append(
                    Dseq(frag.watson[:w2],
                         frag.crick[c1:],
                         ovhg=frag.ovhg,
                         pos=frag.pos))

                for (w1, c2), (w2, c1) in zip(sitepairs[:-1], sitepairs[1:]):
                    newfrags.append(
                        Dseq(
                            frag.watson[w1:w2],
                            frag.crick[c1:c2],
                            ovhg=enz.ovhg,
                            pos=frag.pos + w1 - max(0, enz.ovhg),
                        ))

            frags = newfrags
            newfrags = []

        return tuple(frags)
Ejemplo n.º 2
0
def pcr(*args, **kwargs):
    """pcr is a convenience function for the Anneal class to simplify its
    usage, especially from the command line. If more than one or no PCR
    product is formed, a ValueError is raised.

    args is any iterable of Dseqrecords or an iterable of iterables of
    Dseqrecords. args will be greedily flattened.

    Parameters
    ----------

    args : iterable containing sequence objects
        Several arguments are also accepted.

    limit : int = 13, optional
        limit length of the annealing part of the primers.

    Notes
    -----

    sequences in args could be of type:

    * string
    * Seq
    * SeqRecord (or subclass)
    * Dseqrecord (or sublcass)

    The last sequence will be assumed to be the template while
    all preceeding sequences will be assumed to be primers.

    This is a powerful function, use with care!

    Returns
    -------

    product : Amplicon
        An :class:`pydna.amplicon.Amplicon` object representing the PCR
        product. The direction of the PCR product will be the same as for
        the template sequence.

    Examples
    --------

    >>> from pydna.dseqrecord import Dseqrecord
    >>> from pydna.readers import read
    >>> from pydna.amplify import pcr
    >>> from pydna.primer import Primer
    >>> template = Dseqrecord("tacactcaccgtctatcattatctac\
tatcgactgtatcatctgatagcac")
    >>> from Bio.SeqRecord import SeqRecord
    >>> p1 = Primer("tacactcaccgtctatcattatc")
    >>> p2 = Primer("cgactgtatcatctgatagcac").reverse_complement()
    >>> pcr(p1, p2, template)
    Amplicon(51)
    >>> pcr([p1, p2], template)
    Amplicon(51)
    >>> pcr((p1,p2,), template)
    Amplicon(51)
    >>>

    """

    output = _flatten(args)  # flatten
    new = []
    for s in output:
        if hasattr(s, "watson"):
            s = _SeqRecord(_Seq(s.watson))
        elif hasattr(s, "transcribe"):
            s = _SeqRecord(s)
        elif isinstance(s, str):
            s = _SeqRecord(_Seq(s))
        elif hasattr(s, "features"):
            pass
        else:
            raise TypeError("arguments need to be a string, Bio.Seq, SeqRecord"
                            ", Primer, Dseqrecord or Amplicon object")
        new.append(s)

    # A single Amplicon object
    if len(new) == 1 and hasattr(new[0], "forward_primer"):
        new = [new[0].forward_primer, new[0].reverse_primer, new[0]]

    if not hasattr(new[-1].seq, "watson"):
        new[-1] = _Dseqrecord(s)

    anneal_primers = Anneal(new[:-1], new[-1], **kwargs)

    if len(anneal_primers.products) == 1:
        return anneal_primers.products[0]
    elif len(anneal_primers.products) == 0:
        raise ValueError("No PCR product! {}".format(anneal_primers.report()))
    raise ValueError("PCR not specific! {}".format(anneal_primers.report()))
Ejemplo n.º 3
0
 def number_of_cuts(self, *enzymes):
     """This method returns the number of cuts by digestion with the Restriction enzymes contained in
     the iterable."""
     return sum([
         len(enzyme.search(self.seq)) for enzyme in _flatten(enzymes)
     ])  # flatten