Beispiel #1
0
    def looped(self):
        """
        Returns a circular version of the Dseqrecord object. The
        underlying Dseq object has to have compatible ends.


        Examples
        --------
        >>> from pydna.dseqrecord import Dseqrecord
        >>> a=Dseqrecord("aaa")
        >>> a
        Dseqrecord(-3)
        >>> b=a.looped()
        >>> b
        Dseqrecord(o3)
        >>>

        See also
        --------
        pydna.dseq.Dseq.looped
        """
        new = _copy.copy(self)
        for key, value in list(self.__dict__.items()):
            setattr(new, key, value)
        new._seq = self.seq.looped()
        five_prime = self.seq.five_prime_end()
        for fn, fo in zip(new.features, self.features):
            if five_prime[0] == "5'":
                fn.location = fn.location + self.seq.ovhg
            elif five_prime[0] == "3'":
                fn.location = fn.location + (-self.seq.ovhg)

            if fn.location.start < 0:
                loc1 = _FeatureLocation(len(new) + fn.location.start,
                                        len(new),
                                        strand=fn.strand)
                loc2 = _FeatureLocation(0, fn.location.end, strand=fn.strand)
                fn.location = _CompoundLocation([loc1, loc2])

            if fn.location.end > len(new):
                loc1 = _FeatureLocation(fn.location.start,
                                        len(new),
                                        strand=fn.strand)
                loc2 = _FeatureLocation(0,
                                        fn.location.end - len(new),
                                        strand=fn.strand)
                fn.location = _CompoundLocation([loc1, loc2])

            fn.qualifiers = fo.qualifiers
        return new
Beispiel #2
0
    def assemble_circular(self):
        cps = {} # circular assembly
        cpsrc = {}
        cpaths = sorted( _nx.simple_cycles(self.G), key=len)
        cpaths_sorted=[]
        for cpath in cpaths:
            order, node = min((self.G.nodes[node]["order"],node) for node in cpath)
            i=cpath.index(node)
            cpaths_sorted.append((order, cpath[i:]+cpath[:i]))
        cpaths_sorted.sort()        

        for _, cp in cpaths_sorted:        # cpaths is a list of nodes representing a circular assembly
            edgelol = []                   # edgelol is a list of lists of all edges along cp
            cp+= cp[0:1]
            for u,v in zip(cp, cp[1:]):
                e=[]
                for d in self.G[u][v].values():
                    e.append((u,v,d))
                edgelol.append(e)

            for edges in _itertools.product(*edgelol):
                if [True for ((u,v,e),(x,y,z)) in zip(edges, edges[1:]) if (e["seq"],e["piece"].stop) == (z["seq"],z["piece"].start)]:
                    continue
                ct = "".join(e["seq"][e["piece"]] for u,v,e in edges)
                key=ct.upper()
                
                if key in cps or key in cpsrc: continue  # TODO: cpsrc not needed? 
                sg=_nx.DiGraph()
                sg.add_edges_from(edges)   
                sg.add_nodes_from( (n,d) for n,d in self.G.nodes(data=True) if n in cp )
                
                edgefeatures=[]
                offset=0

                for u,v,e in edges:
                    feats = _deepcopy(e["features"])
                    for feat in feats:
                        feat.location+=offset
                    edgefeatures.extend(feats)
                    offset+=e["piece"].stop-e["piece"].start
                    for f in edgefeatures:
                        if f.location.start>len(ct) and f.location.end>len(ct):                        
                            f.location+=(-len(ct))
                        elif f.location.end>len(ct):
                            f.location = _CompoundLocation((_FeatureLocation(f.location.start,_ExactPosition(len(ct))),_FeatureLocation(_ExactPosition(0), f.location.end-len(ct))))

                cps[key] = cpsrc[_rc(key)] = ct, edgefeatures, sg, {n:self.nodemap[n] for n in cp[:-1]}, cp 

        return sorted((_Contig.from_string(cp[0], 
                               features = cp[1], 
                               graph    = cp[2], 
                               nodemap  = cp[3],
                               linear=False,
                               circular=True) for cp in cps.values()), key=len, reverse=True)
Beispiel #3
0
    def __init__(self, primers, template, limit=13, **kwargs):
        r"""The Anneal class has to be initiated with at least an iterable of
        primers and a template.



        Parameters
        ----------
        primers : iterable of :class:`Primer` or Biopython SeqRecord like
                  objects Primer sequences 5'-3'.

        template : Dseqrecord
            The template sequence 5'-3'.

        limit : int, optional
            limit length of the annealing part of the primers.

        Attributes
        ----------
        products: list
            A list of Amplicon objects, one for each primer pair that may
            form a PCR product.


        Examples
        --------
        >>> from pydna.readers import read
        >>> from pydna.amplify import Anneal
        >>> from pydna.dseqrecord import Dseqrecord as Ds
        >>> t = Ds("tacactcaccgtctatcattatcta"
        ...        "ctatcgactgtatcatctgatagcac")
        >>> from Bio.SeqRecord import SeqRecord
        >>> p1 = read(">p1\ntacactcaccgtctatcattatc", ds = False)
        >>> p2 = read(">p2\ngtgctatcagatgatacagtcg", ds = False)
        >>> ann = Anneal((p1, p2), t)
        >>> print(ann.report())
        Template name 51 nt linear:
        p1 anneals forward (--->) at 23
        p2 anneals reverse (<---) at 29
        >>> ann.products
        [Amplicon(51)]
        >>> amplicon_list = ann.products
        >>> amplicon = amplicon_list.pop()
        >>> amplicon
        Amplicon(51)
        >>> print(amplicon.figure())
        5tacactcaccgtctatcattatc...cgactgtatcatctgatagcac3
                                   ||||||||||||||||||||||
                                  3gctgacatagtagactatcgtg5
        5tacactcaccgtctatcattatc3
         |||||||||||||||||||||||
        3atgtgagtggcagatagtaatag...gctgacatagtagactatcgtg5
        >>> print(amplicon)
        Dseqrecord
        circular: False
        size: 51
        ID: 51bp U96-TO06Y6pFs74SQx8M1IVTBiY
        Name: 51bp_PCR_prod
        Description: pcr product_p1_p2
        Number of features: 2
        /molecule_type=DNA
        Dseq(-51)
        taca..gcac
        atgt..cgtg
        >>> print(amplicon.program())
        <BLANKLINE>
        |95°C|95°C               |    |tmf:59.5
        |____|_____          72°C|72°C|tmr:59.7
        |5min|30s  \ 47.7°C _____|____|30s/kb
        |    |      \______/ 0: 1|5min|GC 39%
        |    |       30s         |    |51bp
        >>>

        """
        self.primers = primers
        self.template = _copy.deepcopy(template)

        self.limit = limit
        self.kwargs = kwargs

        self._products = None

        self.forward_primers = []
        self.reverse_primers = []

        twl = len(self.template.seq.watson)
        tcl = len(self.template.seq.crick)

        if self.template.linear:
            tw = self.template.seq.watson
            tc = self.template.seq.crick
        else:
            tw = self.template.seq.watson + self.template.seq.watson
            tc = self.template.seq.crick + self.template.seq.crick

        for p in self.primers:
            self.forward_primers.extend((
                _Primer(
                    p,
                    #          template = self.template,
                    position=tcl - pos - min(self.template.seq.ovhg, 0),
                    footprint=fp,
                )
                for pos, fp in _annealing_positions(str(p.seq), tc, self.limit)
                if pos < tcl))
            self.reverse_primers.extend((
                _Primer(
                    p,
                    #          template = self.template,
                    position=pos + max(0, self.template.seq.ovhg),
                    footprint=fp,
                )
                for pos, fp in _annealing_positions(str(p.seq), tw, self.limit)
                if pos < twl))

        self.forward_primers.sort(key=_operator.attrgetter("position"))
        self.reverse_primers.sort(key=_operator.attrgetter("position"),
                                  reverse=True)

        for fp in self.forward_primers:
            if fp.position - fp._fp >= 0:
                start = fp.position - fp._fp
                end = fp.position
                self.template.features.append(
                    _SeqFeature(
                        _FeatureLocation(start, end),
                        type="primer_bind",
                        strand=1,
                        qualifiers={
                            "label": [fp.name],
                            "ApEinfo_fwdcolor": ["#baffa3"],
                            "ApEinfo_revcolor": ["#ffbaba"],
                        },
                    ))
            else:
                start = len(self.template) - fp._fp + fp.position
                end = start + fp._fp - len(self.template)
                sf = _SeqFeature(
                    _CompoundLocation([
                        _FeatureLocation(start, len(self.template)),
                        _FeatureLocation(0, end),
                    ]),
                    type="primer_bind",
                    location_operator="join",
                    qualifiers={
                        "label": [fp.name],
                        "ApEinfo_fwdcolor": ["#baffa3"],
                        "ApEinfo_revcolor": ["#ffbaba"],
                    },
                )
                self.template.features.append(sf)

        for rp in self.reverse_primers:
            if rp.position + rp._fp <= len(self.template):
                start = rp.position
                end = rp.position + rp._fp
                self.template.features.append(
                    _SeqFeature(
                        _FeatureLocation(start, end),
                        type="primer_bind",
                        strand=-1,
                        qualifiers={
                            "label": [rp.name],
                            "ApEinfo_fwdcolor": ["#baffa3"],
                            "ApEinfo_revcolor": ["#ffbaba"],
                        },
                    ))
            else:
                start = rp.position
                end = rp.position + rp._fp - len(self.template)
                self.template.features.append(
                    _SeqFeature(
                        _CompoundLocation([
                            _FeatureLocation(0, end),
                            _FeatureLocation(start, len(self.template)),
                        ]),
                        type="primer_bind",
                        location_operator="join",
                        strand=-1,
                        qualifiers={"label": [rp.name]},
                    ))
Beispiel #4
0
    def map_trace_files(self, pth, limit=25):  # TODO allow path-like objects
        import glob

        traces = []
        for name in glob.glob(pth):
            trace = SeqIO.read(name, "abi").lower()
            trace.annotations["filename"] = trace.fname = name
            traces.append(trace)
        if not traces:
            raise ValueError("No trace files found in {}".format(pth))
        if hasattr(self.map_target, "step"):
            area = self.map_target
        elif hasattr(self.map_target, "extract"):
            area = slice(self.map_target.location.start,
                         self.map_target.location.end)
        else:
            area = None  # TODO allow other objects as well and do some checks on map target

        if area:
            self.matching_reads = []
            self.not_matching_reads = []
            target = str(self[area].seq).lower()
            target_rc = str(self[area].seq.rc()).lower()
            for trace in traces:
                if target in str(trace.seq) or target_rc in str(trace.seq):
                    self.matching_reads.append(trace)
                else:
                    self.not_matching_reads.append(trace)
            reads = self.matching_reads
        else:
            self.matching_reads = None
            self.not_matching_reads = None
            reads = traces

        matching_reads = []

        for read_ in reads:

            matches = _common_sub_strings(
                str(self.seq).lower(), str(read_.seq), limit)

            if not matches:
                continue

            if len(matches) > 1:
                newmatches = [
                    matches[0],
                ]
                for i, x in enumerate(matches[1:]):
                    g, f, h = matches[i]
                    if g + h < x[0] and f + h < x[1]:
                        newmatches.append(x)
            else:  # len(matches)==1
                newmatches = matches

            matching_reads.append(read_)

            if len(newmatches) > 1:
                ms = []
                for m in newmatches:
                    ms.append(_FeatureLocation(m[0], m[0] + m[2]))
                loc = _CompoundLocation(ms)
            else:
                a, b, c = newmatches[0]
                loc = _FeatureLocation(a, a + c)

            self.features.append(
                _SeqFeature(
                    loc,
                    qualifiers={"label": [read_.annotations["filename"]]},
                    type="trace",
                ))

        return [x.annotations["filename"] for x in matching_reads]
Beispiel #5
0
    def __init__(
            self,
            primers,
            template,
            limit=13,
            primerc=1000.0,  # nM
            saltc=50,  # mM
            **kwargs):
        r'''The Anneal class has to be initiated with at least an iterable of primers and a template.



        Parameters
        ----------
        primers : iterable of :class:`Primer` or Biopython SeqRecord like objects
            Primer sequences 5'-3'.

        template : Dseqrecord
            The template sequence 5'-3'.

        limit : int, optional
            limit length of the annealing part of the primers.

        fprimerc : float, optional
            Concentration of forward primer in nM, set to 1000.0 nM by default

        rprimerc : float, optional
            Concentration of reverse primer in nM, set to 1000.0 nM by default

        saltc  : float, optional
            Salt concentration (monovalet cations) :mod:`tmbresluc` set to 50.0 mM by default

        Attributes
        ----------
        products: list
            A list of Amplicon objects, one for each primer pair that may form a PCR product.


        Examples
        --------
        >>> from pydna.readers import read
        >>> from pydna.amplify import Anneal
        >>> from pydna.dseqrecord import Dseqrecord
        >>> template = Dseqrecord("tacactcaccgtctatcattatctactatcgactgtatcatctgatagcac")
        >>> from Bio.SeqRecord import SeqRecord
        >>> p1 = read(">p1\ntacactcaccgtctatcattatc", ds = False)
        >>> p2 = read(">p2\ngtgctatcagatgatacagtcg", ds = False)
        >>> ann = Anneal((p1, p2), template)
        >>> print(ann.report())
        Template name 51 nt linear:
        Primer p1 anneals forward at position 23
        <BLANKLINE>
        Primer p2 anneals reverse at position 29
        >>> ann.products
        [Amplicon(51)]
        >>> amplicon_list = ann.products
        >>> amplicon = amplicon_list.pop()
        >>> amplicon
        Amplicon(51)
        >>> print(amplicon.figure())
        5tacactcaccgtctatcattatc...cgactgtatcatctgatagcac3
                                   |||||||||||||||||||||| tm 55.9 (dbd) 60.5
                                  3gctgacatagtagactatcgtg5
        5tacactcaccgtctatcattatc3
         ||||||||||||||||||||||| tm 54.6 (dbd) 58.8
        3atgtgagtggcagatagtaatag...gctgacatagtagactatcgtg5
        >>> amplicon.annotations['date'] = '02-FEB-2013'   # Set the date for this example to pass the doctest
        >>> print(amplicon)
        Dseqrecord
        circular: False
        size: 51
        ID: 51bp U96-TO06Y6pFs74SQx8M1IVTBiY
        Name: 51bp_PCR_prod
        Description: pcr product_p1_p2
        Number of features: 2
        /date=02-FEB-2013
        Dseq(-51)
        taca..gcac
        atgt..cgtg
        >>> print(amplicon.program())
        <BLANKLINE>
        Taq (rate 30 nt/s) 35 cycles             |51bp
        95.0°C    |95.0°C                 |      |Tm formula: Biopython Tm_NN
        |_________|_____          72.0°C  |72.0°C|SaltC 50mM
        | 03min00s|30s  \         ________|______|Primer1C 1.0µM
        |         |      \ 45.4°C/ 0min 2s| 5min |Primer2C 1.0µM
        |         |       \_____/         |      |GC 39%
        |         |         30s           |      |4-12°C

        >>>

        '''
        self.primers = primers
        self.primerc = primerc
        self.saltc = saltc
        self.template = _copy.deepcopy(template)

        self.limit = limit
        self.kwargs = defaultdict(str, kwargs)

        self._products = None

        self.forward_primers = []
        self.reverse_primers = []

        twl = len(self.template.seq.watson)
        tcl = len(self.template.seq.crick)

        if self.template.linear:
            tw = self.template.seq.watson
            tc = self.template.seq.crick
        else:
            tw = self.template.seq.watson + self.template.seq.watson
            tc = self.template.seq.crick + self.template.seq.crick

        for p in self.primers:
            self.forward_primers.extend((
                _Primer(p,
                        position=tcl - pos - min(self.template.seq.ovhg, 0),
                        footprint=fp)
                for pos, fp in _annealing_positions(str(p.seq), tc, self.limit)
                if pos < tcl))
            self.reverse_primers.extend((
                _Primer(p,
                        position=pos + max(0, self.template.seq.ovhg),
                        footprint=fp)
                for pos, fp in _annealing_positions(str(p.seq), tw, self.limit)
                if pos < twl))

        self.forward_primers.sort(key=_operator.attrgetter('position'))
        self.reverse_primers.sort(key=_operator.attrgetter('position'),
                                  reverse=True)

        for fp in self.forward_primers:
            if fp.position - fp._fp >= 0:
                start = fp.position - fp._fp
                end = fp.position
                self.template.features.append(
                    _SeqFeature(_FeatureLocation(start, end),
                                type="primer_bind",
                                strand=1,
                                qualifiers={
                                    "label": [fp.name],
                                    "ApEinfo_fwdcolor": ["#baffa3"],
                                    "ApEinfo_revcolor": ["#ffbaba"]
                                }))
            else:
                start = len(self.template) - fp._fp + fp.position
                end = start + fp._fp - len(self.template)
                sf = _SeqFeature(_CompoundLocation([
                    _FeatureLocation(start, len(self.template)),
                    _FeatureLocation(0, end)
                ]),
                                 type="primer_bind",
                                 location_operator="join",
                                 qualifiers={
                                     "label": [fp.name],
                                     "ApEinfo_fwdcolor": ["#baffa3"],
                                     "ApEinfo_revcolor": ["#ffbaba"]
                                 })
                self.template.features.append(sf)
        for rp in self.reverse_primers:
            if rp.position + rp._fp <= len(self.template):
                start = rp.position
                end = rp.position + rp._fp
                self.template.features.append(
                    _SeqFeature(_FeatureLocation(start, end),
                                type="primer_bind",
                                strand=-1,
                                qualifiers={
                                    "label": [rp.name],
                                    "ApEinfo_fwdcolor": ["#baffa3"],
                                    "ApEinfo_revcolor": ["#ffbaba"]
                                }))
            else:
                start = rp.position
                end = rp.position + rp._fp - len(self.template)
                self.template.features.append(
                    _SeqFeature(_CompoundLocation([
                        _FeatureLocation(0, end),
                        _FeatureLocation(start, len(self.template))
                    ]),
                                type="primer_bind",
                                location_operator="join",
                                strand=-1,
                                qualifiers={"label": [rp.name]}))