예제 #1
0
 def from_SeqRecord(cls,
                    record: _SeqRecord,
                    *args,
                    linear=True,
                    circular=False,
                    n=5e-14,
                    **kwargs):
     obj = cls.__new__(cls)  # Does not call __init__
     obj._seq = _Dseq.quick(
         str(record.seq),
         _rc(str(record.seq)),
         ovhg=0,
         linear=linear,
         circular=circular,
     )
     obj.id = record.id
     obj.name = record.name
     obj.description = record.description
     obj.dbxrefs = record.dbxrefs
     obj.annotations = {"molecule_type": "DNA"}
     obj.annotations.update(record.annotations)
     obj._per_letter_annotations = record._per_letter_annotations
     obj.features = record.features
     obj.map_target = None
     obj.n = n
     return obj
예제 #2
0
 def from_string(cls,
                 record: str = "",
                 *args,
                 linear=True,
                 circular=False,
                 n=5e-14,
                 **kwargs):
     # def from_string(cls, record:str="", *args, linear=True, circular=False, n = 5E-14, **kwargs):
     obj = cls.__new__(cls)  # Does not call __init__
     obj._seq = _Dseq.quick(record,
                            _rc(record),
                            ovhg=0,
                            linear=linear,
                            circular=circular)
     obj.id = _pretty_str("id")
     obj.name = _pretty_str("name")
     obj.description = _pretty_str("description")
     obj.dbxrefs = []
     obj.annotations = {"molecule_type": "DNA"}
     obj._per_letter_annotations = {}
     obj.features = []
     obj.map_target = None
     obj.n = n
     obj.__dict__.update(kwargs)
     return obj
예제 #3
0
 def _fill_in_five_prime(self, nucleotides):
     stuffer = ""
     type, se = self.five_prime_end()
     if type == "5'":
         for n in _rc(se):
             if n in nucleotides:
                 stuffer += n
             else:
                 break
     return self.crick + stuffer, self._ovhg + len(stuffer)
예제 #4
0
 def _fill_in_three_prime(self, nucleotides):
     stuffer = ""
     type, se = self.three_prime_end()
     if type == "5'":
         for n in _rc(se):
             if n in nucleotides:
                 stuffer += n
             else:
                 break
     return self.watson + stuffer
예제 #5
0
    def looped(self):
        """Returns a circularized Dseq object. This can only be done if the
        two ends are compatible, otherwise a TypeError is raised.

        Examples
        --------
        >>> from pydna.dseq import Dseq
        >>> a=Dseq("catcgatc")
        >>> a
        Dseq(-8)
        catcgatc
        gtagctag
        >>> a.looped()
        Dseq(o8)
        catcgatc
        gtagctag
        >>> a.T4("t")
        Dseq(-8)
        catcgat
         tagctag
        >>> a.T4("t").looped()
        Dseq(o7)
        catcgat
        gtagcta
        >>> a.T4("a")
        Dseq(-8)
        catcga
          agctag
        >>> a.T4("a").looped()
        Traceback (most recent call last):
          File "<stdin>", line 1, in <module>
          File "/usr/local/lib/python2.7/dist-packages/pydna/dsdna.py", line 357, in looped
            if type5 == type3 and str(sticky5) == str(rc(sticky3)):
        TypeError: DNA cannot be circularized.
        5' and 3' sticky ends not compatible!
        >>>

        """
        if self.circular:
            return self
        type5, sticky5 = self.five_prime_end()
        type3, sticky3 = self.three_prime_end()
        if type5 == type3 and str(sticky5) == str(_rc(sticky3)):
            nseq = Dseq.quick(
                self.watson,
                self.crick[-self._ovhg:] + self.crick[:-self._ovhg],
                ovhg=0,
                linear=False,
                circular=True,
            )
            assert len(nseq.crick) == len(nseq.watson)
            return nseq
        else:
            raise TypeError("DNA cannot be circularized.\n"
                            "5' and 3' sticky ends not compatible!")
예제 #6
0
파일: assembly.py 프로젝트: uswa1/pydna
    def assemble_circular(self):
        cps = {} # circular assembly
        cpsrc = {}
        cpaths = sorted( _nx.simple_cycles(self.G), key=len)
        cpaths_sorted=[]
        for cpath in cpaths:
            order, node = min((self.G.nodes[node]["order"],node) for node in cpath)
            i=cpath.index(node)
            cpaths_sorted.append((order, cpath[i:]+cpath[:i]))
        cpaths_sorted.sort()        

        for _, cp in cpaths_sorted:        # cpaths is a list of nodes representing a circular assembly
            edgelol = []                   # edgelol is a list of lists of all edges along cp
            cp+= cp[0:1]
            for u,v in zip(cp, cp[1:]):
                e=[]
                for d in self.G[u][v].values():
                    e.append((u,v,d))
                edgelol.append(e)

            for edges in _itertools.product(*edgelol):
                if [True for ((u,v,e),(x,y,z)) in zip(edges, edges[1:]) if (e["seq"],e["piece"].stop) == (z["seq"],z["piece"].start)]:
                    continue
                ct = "".join(e["seq"][e["piece"]] for u,v,e in edges)
                key=ct.upper()
                
                if key in cps or key in cpsrc: continue  # TODO: cpsrc not needed? 
                sg=_nx.DiGraph()
                sg.add_edges_from(edges)   
                sg.add_nodes_from( (n,d) for n,d in self.G.nodes(data=True) if n in cp )
                
                edgefeatures=[]
                offset=0

                for u,v,e in edges:
                    feats = _deepcopy(e["features"])
                    for feat in feats:
                        feat.location+=offset
                    edgefeatures.extend(feats)
                    offset+=e["piece"].stop-e["piece"].start
                    for f in edgefeatures:
                        if f.location.start>len(ct) and f.location.end>len(ct):                        
                            f.location+=(-len(ct))
                        elif f.location.end>len(ct):
                            f.location = _CompoundLocation((_FeatureLocation(f.location.start,_ExactPosition(len(ct))),_FeatureLocation(_ExactPosition(0), f.location.end-len(ct))))

                cps[key] = cpsrc[_rc(key)] = ct, edgefeatures, sg, {n:self.nodemap[n] for n in cp[:-1]}, cp 

        return sorted((_Contig.from_string(cp[0], 
                               features = cp[1], 
                               graph    = cp[2], 
                               nodemap  = cp[3],
                               linear=False,
                               circular=True) for cp in cps.values()), key=len, reverse=True)
예제 #7
0
 def quick(cls,
           watson: str,
           crick: str,
           ovhg=0,
           linear=True,
           circular=False,
           pos=0):
     obj = cls.__new__(cls)  # Does not call __init__
     obj.watson = _pretty_str(watson)
     obj.crick = _pretty_str(crick)
     obj._ovhg = ovhg
     obj._circular = circular
     obj._linear = linear
     obj.length = max(
         len(watson) + max(0, ovhg),
         len(crick) + max(0, -ovhg))
     obj.pos = pos
     obj._data = (_rc(crick[-max(0, ovhg) or len(crick):]) + watson +
                  _rc(crick[:max(0,
                                 len(crick) - ovhg - len(watson))]))
     # obj.alphabet = _generic_dna
     return obj
예제 #8
0
 def from_string(cls,
                 dna: str,
                 *args,
                 linear=True,
                 circular=False,
                 **kwargs):
     obj = cls.__new__(cls)  # Does not call __init__
     obj.watson = _pretty_str(dna)
     obj.crick = _pretty_str(_rc(dna))
     obj._ovhg = 0
     obj._circular = circular
     obj._linear = linear
     obj.length = len(dna)
     obj.pos = 0
     obj._data = dna
     # obj.alphabet = _generic_dna
     return obj
예제 #9
0
파일: dseq.py 프로젝트: uswa1/pydna
    def __add__(self, other):
        '''Simulates ligation between two DNA fragments.

        Add other Dseq object at the end of the sequence.
        Type error is raised if any of the points below are fulfilled:

        * one or more objects are circular
        * if three prime sticky end of self is not the same type
          (5' or 3') as the sticky end of other
        * three prime sticky end of self complementary with five
          prime sticky end of other.

        Phosphorylation and dephosphorylation is not considered.
        
        DNA is allways presumed to have the necessary 5' phospate
        group necessary for ligation.

       '''
        # test for circular DNA
        if self.circular:
            raise TypeError("circular DNA cannot be ligated!")
        try:
            if other.circular:
                raise TypeError("circular DNA cannot be ligated!")
        except AttributeError:
            pass

        self_type, self_tail = self.three_prime_end()
        other_type, other_tail = other.five_prime_end()

        if (self_type == other_type
                and str(self_tail) == str(_rc(other_tail))):

            answer = Dseq.quick(self.watson + other.watson,
                                other.crick + self.crick, self._ovhg)
        elif not self:
            answer = _copy.copy(other)
        elif not other:
            answer = _copy.copy(self)
        else:
            raise TypeError("sticky ends not compatible!")
        return answer
예제 #10
0
파일: contig.py 프로젝트: joskid/pydna
 def reverse_complement(self):
     answer = type(self)(super().reverse_complement())
     g = _nx.DiGraph()
     nm = self.nodemap
     g.add_edges_from([
         (nm[v], nm[u], d)
         for u, v, d in list(self.graph.edges(data=True))[::-1]
     ])
     g.add_nodes_from(
         (nm[n], d) for n, d in list(self.graph.nodes(data=True))[::-1])
     for u, v, ed in g.edges(data=True):
         ed["name"] = (ed["name"][:-3] if ed["name"].endswith("_rc") else
                       "{}_rc".format(ed["name"])[:13])
         ed["seq"] = _rc(ed["seq"])
         ln = len(ed["seq"])
         start, stop = ed["piece"].start, ed["piece"].stop
         ed["piece"] = slice(ln - stop - g.nodes[u]["length"],
                             ln - start - g.nodes[v]["length"])
         ed["features"] = [f._flip(ln) for f in ed["features"]]
     answer.graph = g
     answer.nodemap = {v: k for k, v in self.nodemap.items()}
     return answer
예제 #11
0
def _annealing_positions(primer, template, limit=15):
    """Finds the annealing position(s) for a primer on a template where the
    primer anneals perfectly with at least limit nucleotides in the 3' part.
    The primer is the lower strand in the figure below.

    start is a position (integer)

    footprint and tail are strings.

    ::

        <- - - - - - - - - - template - - - - - - - - - - - - - >

        <------- start (int) ------>
     5'-...gctactacacacgtactgactgcctccaagatagagtcagtaaccacactcgat...3'
           ||||||||||||||||||||||||||||||||||||||||||||||||
                                  3'-gttctatctcagtcattggtgtATAGTG-5'

                                     <-footprint length -->

    Parameters
    ----------
    primer : string
        The primer sequence 5'-3'

    template : string
        The template sequence 5'-3'

    limit : int = 15, optional
        footprint needs to be at least of length limit.

    Returns
    -------
    describe : list of tuples (int, int)
        [ (start1, footprint1), (start2, footprint2) ,..., ]
    """

    # return empty list if primer too short
    if len(primer) < limit:
        return []

    prc = _rc(primer)

    # head is minimum part of primer that can anneal
    head = prc[:limit].upper()

    table = {
        "R": "(A|G)",
        "Y": "(C|T)",
        "S": "(G|C)",
        "W": "(A|T)",
        "K": "(G|T)",
        "M": "(A|C)",
        "B": "(C|G|T)",
        "D": "(A|G|T)",
        "H": "(A|C|T)",
        "V": "(A|C|G)",
        "N": "(A|G|C|T)",
    }

    # Make regex pattern that reflects extended IUPAC DNA code
    for key in table:
        head = head.replace(key, table[key])

    positions = [
        m.start() for m in _re.finditer("(?={})".format(head), template, _re.I)
    ]

    if positions:
        tail = prc[limit:]
        length = len(tail)
        results = []
        for match_start in positions:
            tm = template[match_start + limit:match_start + limit + length]
            footprint = len(
                list(
                    _itertools.takewhile(
                        lambda x: x[0].lower() == x[1].lower(), zip(tail,
                                                                    tm))))
            results.append((match_start, footprint + limit))
        return results
    return []
예제 #12
0
    def __init__(self,
                 watson,
                 crick=None,
                 ovhg=None,
                 linear=None,
                 circular=None,
                 pos=0):

        if crick is None:
            if ovhg is None:
                crick = _rc(watson)
                ovhg = 0
                self._data = watson
            else:  # ovhg given, but no crick strand
                raise ValueError("ovhg defined without crick strand!")
        else:  # crick strand given
            if ovhg is None:  # ovhg not given

                olaps = _common_sub_strings(
                    str(watson).lower(),
                    str(_rc(crick).lower()),
                    int(_math.log(len(watson)) / _math.log(4)),
                )
                try:
                    F, T, L = olaps[0]
                except IndexError:
                    raise ValueError(
                        "Could not anneal the two strands. Please provide ovhg value"
                    )
                ovhgs = [ol[1] - ol[0] for ol in olaps if ol[2] == L]
                if len(ovhgs) > 1:
                    raise ValueError(
                        "More than one way of annealing the strands. Please provide ovhg value"
                    )
                ovhg = T - F

                sns = (ovhg * " ") + _pretty_str(watson)
                asn = (-ovhg * " ") + _pretty_str(_rc(crick))

                self._data = "".join([
                    a.strip() or b.strip()
                    for a, b in _itertools.zip_longest(sns, asn, fillvalue=" ")
                ])

            else:  # ovhg given
                if ovhg == 0:
                    if len(watson) == len(crick):
                        self._data = watson
                    elif len(watson) > len(crick):
                        self._data = watson
                    else:
                        self._data = watson + _rc(
                            crick[:len(crick) - len(watson)])
                elif ovhg > 0:
                    if ovhg + len(watson) > len(crick):
                        self._data = _rc(crick[-ovhg:]) + watson
                    else:
                        self._data = (
                            _rc(crick[-ovhg:]) + watson +
                            _rc(crick[:len(crick) - ovhg - len(watson)]))
                else:  # ovhg < 0
                    if -ovhg + len(crick) > len(watson):
                        self._data = watson + _rc(
                            crick[:-ovhg + len(crick) - len(watson)])
                    else:
                        self._data = watson

        self._circular = (bool(circular) and bool(linear) ^ bool(circular)
                          or linear == False and circular is None)
        self._linear = not self._circular
        self.watson = _pretty_str(watson)
        self.crick = _pretty_str(crick)
        # self.length = max(len(watson)+max(0,ovhg), len(crick)+max(0,-ovhg))
        self.length = len(self._data)
        self._ovhg = ovhg
        self.pos = pos
        self._data = self._data