コード例 #1
0
ファイル: test_module_dseq.py プロジェクト: joskid/pydna
def test_cut_circular():

    from pydna.dseq import Dseq
    from Bio.Restriction import BsaI, KpnI, Acc65I, NotI

    test = "aaaaaaGGTACCggtctcaaaa"

    for i in range(len(test)):

        nt = test[i:] + test[:i]

        a = Dseq(nt, circular=True).cut(Acc65I)[0]  # G^GTACC

        assert a.watson.upper() == "GTACCGGTCTCAAAAAAAAAAG"
        assert a.crick.upper() == "GTACCTTTTTTTTTTGAGACCG"
        assert a.ovhg == -4  # CggtctcaaaaaaaaaaGGTAC
        b = Dseq(nt, circular=True).cut(KpnI)[0]  # GGTAC^C
        assert b.watson.upper() == "CGGTCTCAAAAAAAAAAGGTAC"
        assert b.crick.upper() == "CTTTTTTTTTTGAGACCGGTAC"
        assert b.ovhg == 4
        c = Dseq(nt, circular=True).cut(BsaI)[0]  # ggtctcnnn
        assert c.watson.upper() == "AAAAAAAAAGGTACCGGTCTCA"
        assert c.crick.upper() == "TTTTTGAGACCGGTACCTTTTT"
        assert c.ovhg == -4
        d = Dseq(nt, circular=True).cut(NotI)
        assert d == ()
コード例 #2
0
def test_lcs():
    from Bio.Seq import Seq
    from Bio.SeqRecord import SeqRecord as BSeqRecord
    from pydna.dseq import Dseq
    from pydna.dseqrecord import Dseqrecord
    from pydna.seqrecord import SeqRecord

    from pydna.seqfeature import SeqFeature
    from Bio.SeqFeature import FeatureLocation, ExactPosition

    s = SeqRecord(Seq("GGATCC"))

    expected = SeqFeature()
    expected.__dict__ = {
        "location": FeatureLocation(ExactPosition(0),
                                    ExactPosition(6),
                                    strand=1),
        "type": "read",
        "id": "<unknown id>",
        "qualifiers": {
            "label": ["sequence"],
            "ApEinfo_fwdcolor": ["#DAFFCF"],
            "ApEinfo_revcolor": ["#DFFDFF"],
        },
    }

    assert s.lcs("GGATCC", limit=4).__dict__ == expected.__dict__
    assert s.lcs(Seq("GGATCC"), limit=4).__dict__ == expected.__dict__
    assert (s.lcs(BSeqRecord(Seq("GGATCC"), name="sequence"),
                  limit=4).__dict__ == expected.__dict__)
    assert s.lcs(Dseq("GGATCC"), limit=4).__dict__ == expected.__dict__
    assert (s.lcs(Dseqrecord(Dseq("GGATCC"), name="sequence"),
                  limit=4).__dict__ == expected.__dict__)
    assert (s.lcs(Dseqrecord("GGATCC", name="sequence"),
                  limit=4).__dict__ == expected.__dict__)
コード例 #3
0
def test_Dseq_cutting_adding():
    from pydna.dseq import Dseq
    from Bio.Restriction import BamHI, PstI, EcoRI

    a = Dseq(
        "GGATCCtcatctactatcatcgtagcgtactgatctattctgctgctcatcatcggtactctctataattatatatatatgcgcgtGGATCC",
        "CCTAGGagtagatgatagtagcatcgcatgactagataagacgacgagtagtagccatgagagatattaatatatatatacgcgcaCCTAGG"[::
                                                                                                       -1],
        linear=True,
        ovhg=0,
    )

    b = a.cut(BamHI)[1]

    assert (
        b.watson ==
        "GATCCtcatctactatcatcgtagcgtactgatctattctgctgctcatcatcggtactctctataattatatatatatgcgcgtG"
    )
    assert (
        b.crick ==
        "GATCCacgcgcatatatatataattatagagagtaccgatgatgagcagcagaatagatcagtacgctacgatgatagtagatgaG"
    )
    c = Dseq(
        "nCTGCAGtcatctactatcatcgtagcgtactgatctattctgctgctcatcatcggtactctctataattatatatatatgcgcgtGAATTCn",
        "nGACGTCagtagatgatagtagcatcgcatgactagataagacgacgagtagtagccatgagagatattaatatatatatacgcgcaCTTAAGn"[::
                                                                                                         -1],
        linear=True,
        ovhg=0,
    )

    f, d, l = c.cut((EcoRI, PstI))

    assert (
        d.watson ==
        "GtcatctactatcatcgtagcgtactgatctattctgctgctcatcatcggtactctctataattatatatatatgcgcgtG"
    )
    assert (
        d.crick ==
        "AATTCacgcgcatatatatataattatagagagtaccgatgatgagcagcagaatagatcagtacgctacgatgatagtagatgaCTGCA"
    )

    e = Dseq(
        "nGAATTCtcatctactatcatcgtagcgtactgatctattctgctgctcatcatcggtactctctataattatatatatatgcgcgtCTGCAGn",
        "nCTTAAGagtagatgatagtagcatcgcatgactagataagacgacgagtagtagccatgagagatattaatatatatatacgcgcaGACGTCn"[::
                                                                                                         -1],
        linear=True,
        ovhg=0,
    )

    f = e.cut((EcoRI, PstI))[1]

    assert (
        f.watson ==
        "AATTCtcatctactatcatcgtagcgtactgatctattctgctgctcatcatcggtactctctataattatatatatatgcgcgtCTGCA"
    )
    assert (
        f.crick ==
        "GacgcgcatatatatataattatagagagtaccgatgatgagcagcagaatagatcagtacgctacgatgatagtagatgaG"
    )
コード例 #4
0
ファイル: test_module_dseq.py プロジェクト: joskid/pydna
def test_Dseq_slicing2():
    from pydna.dseq import Dseq
    from Bio.Restriction import BamHI, EcoRI, KpnI

    a = Dseq("aaGGATCCnnnnnnnnnGAATTCccc", circular=True)

    assert a.cut(EcoRI, BamHI, KpnI,) == a.cut(
        BamHI,
        EcoRI,
        KpnI,
    )[::-1]
コード例 #5
0
ファイル: test_module_dseq.py プロジェクト: joskid/pydna
 def cut_and_religate_Dseq(seq_string, enz, top):
     ds = Dseq(seq_string, linear=top)
     frags = list(ds.cut(enz))
     if not frags:
         return
     a = frags.pop(0)
     for f in frags:
         a += f
     if not top:
         a = a.looped()
     assert eq(a, ds)
コード例 #6
0
ファイル: test_module_dseq.py プロジェクト: joskid/pydna
def test_misc():

    from pydna.dseq import Dseq

    x = Dseq("ctcgGCGGCCGCcagcggccg", circular=True)

    from Bio.Restriction import NotI

    a, b = x.cut(NotI)

    z = (a + b).looped()

    assert z.shifted(5) == x
コード例 #7
0
ファイル: test_module_seqrecord.py プロジェクト: uswa1/pydna
def test_olaps():
    from Bio.Seq import Seq
    from Bio.SeqRecord import SeqRecord as BSeqRecord
    from pydna.dseq import Dseq
    from pydna.dseqrecord import Dseqrecord
    from pydna.seqrecord  import SeqRecord
    from Bio.Alphabet.IUPAC import IUPACAmbiguousDNA
    s = SeqRecord(Seq("GGATCC",alphabet=IUPACAmbiguousDNA()))
    assert "GGATCC" == str(s.olaps("GGATCC", limit = 4)[0].seq)
    assert "GGATCC" == str(s.olaps(Seq("GGATCC",alphabet=IUPACAmbiguousDNA()), limit = 4)[0].seq)
    assert "GGATCC" == str(s.olaps(BSeqRecord(Seq("GGATCC",alphabet=IUPACAmbiguousDNA())), limit = 4)[0].seq)
    assert "GGATCC" == str(s.olaps(Dseq("GGATCC",alphabet=IUPACAmbiguousDNA()), limit = 4)[0].seq)
    assert "GGATCC" == str(s.olaps(Dseqrecord(Dseq("GGATCC")), limit = 4)[0].seq)
    assert "GGATCC" == str(s.olaps(Dseqrecord("GGATCC"), limit = 4)[0].seq)
コード例 #8
0
ファイル: test_module_dseq.py プロジェクト: uswa1/pydna
def test_shifted():
    from pydna.dseq import Dseq
    a = Dseq("gatc", circular=True)

    assert a.shifted(1) == Dseq("atcg", circular=True)

    assert a.shifted(4) == a

    b = Dseq("gatc", circular=False)
    with pytest.raises(TypeError):
        b.shifted(1)
コード例 #9
0
 def from_SeqRecord(cls,
                    record: _SeqRecord,
                    *args,
                    linear=True,
                    circular=False,
                    n=5e-14,
                    **kwargs):
     obj = cls.__new__(cls)  # Does not call __init__
     obj._seq = _Dseq.quick(
         str(record.seq),
         _rc(str(record.seq)),
         ovhg=0,
         linear=linear,
         circular=circular,
     )
     obj.id = record.id
     obj.name = record.name
     obj.description = record.description
     obj.dbxrefs = record.dbxrefs
     obj.annotations = {"molecule_type": "DNA"}
     obj.annotations.update(record.annotations)
     obj._per_letter_annotations = record._per_letter_annotations
     obj.features = record.features
     obj.map_target = None
     obj.n = n
     return obj
コード例 #10
0
 def from_string(cls,
                 record: str = "",
                 *args,
                 linear=True,
                 circular=False,
                 n=5e-14,
                 **kwargs):
     # def from_string(cls, record:str="", *args, linear=True, circular=False, n = 5E-14, **kwargs):
     obj = cls.__new__(cls)  # Does not call __init__
     obj._seq = _Dseq.quick(record,
                            _rc(record),
                            ovhg=0,
                            linear=linear,
                            circular=circular)
     obj.id = _pretty_str("id")
     obj.name = _pretty_str("name")
     obj.description = _pretty_str("description")
     obj.dbxrefs = []
     obj.annotations = {"molecule_type": "DNA"}
     obj._per_letter_annotations = {}
     obj.features = []
     obj.map_target = None
     obj.n = n
     obj.__dict__.update(kwargs)
     return obj
コード例 #11
0
ファイル: test_module_amplify.py プロジェクト: uswa1/pydna
def test_Dseq_arguments():
    from pydna.dseq import Dseq

    f0, r0 = parse_primers('''>ForwardPrimer
                            gctactacacacgtactgactg
                            
                            >ReversePrimer
                            tgtggttactgactctatcttg''')

    t0 = Dseqrecord("gctactacacacgtactgactgcctccaagatagagtcagtaaccaca")

    f = Dseq(str(f0.seq))
    r = Dseq(str(r0.seq))
    t = Dseq(str(t0.seq))

    assert str(pcr(
        f, r, t).seq) == "gctactacacacgtactgactgcctccaagatagagtcagtaaccaca"
コード例 #12
0
def test_add_feature():
    from Bio.Seq import Seq
    from Bio.SeqRecord import SeqRecord as BSeqRecord
    from pydna.dseq import Dseq
    from pydna.dseqrecord import Dseqrecord
    from pydna.seqrecord import SeqRecord

    s = SeqRecord("tttGGATCCaaa")
    s.add_feature(3, 9)
    assert s.extract_feature(0).seq == SeqRecord("GGATCC").seq
    s = SeqRecord("tttGGATCCaaa")
    s.add_feature(seq="GGATCC")
    assert s.extract_feature(0).seq == SeqRecord("GGATCC").seq
    s = SeqRecord("tttGGATCCaaa")
    s.add_feature(seq=Seq("GGATCC"))
    assert s.extract_feature(0).seq == SeqRecord("GGATCC").seq
    s = SeqRecord("tttGGATCCaaa")
    s.add_feature(seq=Dseq("GGATCC"))
    assert s.extract_feature(0).seq == SeqRecord("GGATCC").seq
    s = SeqRecord("tttGGATCCaaa")
    s.add_feature(seq=SeqRecord("GGATCC"))
    assert s.extract_feature(0).seq == SeqRecord("GGATCC").seq
    s = SeqRecord("tttGGATCCaaa")
    s.add_feature(seq=BSeqRecord("GGATCC"))
    assert s.extract_feature(0).seq == SeqRecord("GGATCC").seq
    s = SeqRecord("tttGGATCCaaa")
    s.add_feature(seq=Dseqrecord("GGATCC"))
    assert s.extract_feature(0).seq == SeqRecord("GGATCC").seq
    s = SeqRecord("tttGGATCCaaa")
    with pytest.raises(TypeError):
        s.add_feature(seq=Dseqrecord("GGGGGG"))
    s = SeqRecord("tttATGaaaTAAggg")
    s.add_feature(3, 12)
    assert s.features[0].qualifiers["label"] == ["orf9"]

    from Bio.Seq import Seq

    from pydna.seqrecord import SeqRecord

    a = SeqRecord(Seq("atgtaa"))

    a.add_feature(2, 4)

    assert (
        a.list_features() ==
        "+-----+---------------+-----+-----+-----+-----+------+------+\n| Ft# | Label or Note | Dir | Sta | End | Len | type | orf? |\n+-----+---------------+-----+-----+-----+-----+------+------+\n|   0 | L:ft2         | --> | 2   | 4   |   2 | misc |  no  |\n+-----+---------------+-----+-----+-----+-----+------+------+"
    )
    a.features[0].qualifiers
    del a.features[0].qualifiers["label"]
    assert (
        a.list_features() ==
        "+-----+---------------+-----+-----+-----+-----+------+------+\n| Ft# | Label or Note | Dir | Sta | End | Len | type | orf? |\n+-----+---------------+-----+-----+-----+-----+------+------+\n|   0 | nd            | --> | 2   | 4   |   2 | misc |  no  |\n+-----+---------------+-----+-----+-----+-----+------+------+"
    )
    a.features[0].qualifiers["note"] = ["AwesomeFeature"]
    assert (
        a.list_features() ==
        "+-----+------------------+-----+-----+-----+-----+------+------+\n| Ft# | Label or Note    | Dir | Sta | End | Len | type | orf? |\n+-----+------------------+-----+-----+-----+-----+------+------+\n|   0 | N:AwesomeFeature | --> | 2   | 4   |   2 | misc |  no  |\n+-----+------------------+-----+-----+-----+-----+------+------+"
    )
コード例 #13
0
ファイル: test_module_dseq.py プロジェクト: uswa1/pydna
def test_Dseq_slicing():
    from pydna.dseq import Dseq
    from pydna.readers import read
    from pydna.utils import eq

    from Bio.Seq import Seq
    from Bio.SeqRecord import SeqRecord as Srec
    from Bio.Restriction import BamHI
    a = Dseq("ggatcc", "ggatcc", 0)

    assert a[:].watson == a.watson
    assert a[:].crick == a.crick
    assert a.ovhg == a[:].ovhg
    b, c = a.cut(BamHI)
    d = b[1:5]
    e = d.rc()
    #assert  d+e == Dseq("gatc","gatc",0)
    assert e + d == Dseq("gatc", "gatc", 0)
コード例 #14
0
    def tester(i, fwd, rev, overhang=None):
        print("Test#%d:" % (i))
        a = Dseq(fwd, rev, ovhg=overhang)
        print(a.fig())
        print(a.five_prime_end())
        print(a.three_prime_end())

        a = DSeq(fwd, rev, overhang=overhang)
        print(a.alignment.reference_start, a.alignment.reference_end)
        print(a.highlight())
        print(a.five_prime_end())
        print(a.three_prime_end())
        print(a.alignment.read_start, a.alignment.read_end)
        print('')
        return i + 1
コード例 #15
0
def get_prop_primers(primers, vector):
    vector_name = vector.name
    seq = str.lower(vector.sequence)
    seq = Dseq(seq.replace(' ', ''))
    L = len(str(seq))
    rseq = seq.reverse_complement()
    for primer in primers:
        primer_nor = primer.sequence.replace(' ', '')

        p_seq = Dseq(primer_nor)
        p_seq_s = str.lower(str(p_seq))  # all lower case
        nt = 'atcgn'
        idt_codes_subtract1 = ['icy5', 'icy3', '5biosg', '(am)']
        idt_codes_subtract2 = ['dspacer']
        idt_codes_subtract3 = ['dbcoteg']
        idt_codes_subtract4 = ['biotinteg']
        idt_codes_plus1 = ['ds', 'idsp']

        Lp_subtract = 0
        for s1 in idt_codes_subtract1:
            if p_seq_s.find(s1) >= 0:
                Lp_subtract += 1
        for s2 in idt_codes_subtract2:
            if p_seq_s.find(s2) >= 0:
                Lp_subtract += 2
        for s3 in idt_codes_subtract3:
            if p_seq_s.find(s3) >= 0:
                Lp_subtract += 3
        for s4 in idt_codes_subtract4:
            if p_seq_s.find(s4) >= 0:
                Lp_subtract += 4
        for p1 in idt_codes_plus1:
            if p_seq_s.find(p1) >= 0:
                Lp_subtract -= 1

        Lp = 0
        for i in nt:
            Lp += p_seq_s.count(i)
        Lp = Lp - Lp_subtract
        if seq.find(p_seq_s) != -1:  # match it
            position = seq.find(p_seq_s) + 1
            dir = 'forward'
            in_vector = True
        elif rseq.find(p_seq_s) != -1:
            position = rseq.find(p_seq_s) - L - 1
            dir = 'reverse'
            in_vector = True
        else:
            position = -1
            dir = 'none'
            in_vector = False
        primer.position = position
        primer.dir = dir
        primer.in_vector = in_vector
        primer.length = Lp
        primer.save()
    return vector_name, seq, L, position, dir, in_vector
コード例 #16
0
    def toDSEQ(self, graph, edges, nodes):
        complements = self.generateComplements(nodes)
        dna = []
        offset = -10
        for edge in edges:
            seq = Dseq(edges[edge], complements[edge[1]], ovhg=offset)
            x = Dseqrecord(seq)
            x.name = edge[0] + "_" + edge[1]
            x.seq = seq
            dna.append(x)

        return dna
コード例 #17
0
ファイル: dseq.py プロジェクト: libnano/libnano
    def tester(i, fwd, rev, overhang=None):
        print("Test#%d:" % (i))
        a = Dseq(fwd, rev, ovhg=overhang)
        print(a.fig())
        print(a.five_prime_end())
        print(a.three_prime_end())

        a = DSeq(fwd, rev, overhang=overhang)
        print(a.alignment.reference_start, a.alignment.reference_end)
        print(a.highlight())
        print(a.five_prime_end())
        print(a.three_prime_end())
        print(a.alignment.read_start, a.alignment.read_end)
        print('')
        return i + 1
コード例 #18
0
def makeDseqFromDF(part, partslist, col="part"):
    """looks up the part named "part" in the column specified as col, and
    converts it into a pydna object."""
    pseq = partslist[partslist[col] == part].sequence.iloc[0].lower()
    pcirc = partslist[partslist[col] == part].circular.iloc[0]
    p5pover = int(partslist[partslist[col] == part]["5pend"].iloc[0])
    p3pover = int(partslist[partslist[col] == part]["3pend"].iloc[0])

    povhg = int(p5pover)
    pseqRC = str(Dseq(pseq).rc()).lower()
    if (p5pover > 0):
        pseq = pseq[p5pover:]
    elif (p5pover < 0):
        pseqRC = pseqRC[:p5pover]
    if (p3pover < 0):
        pseq = pseq[:p3pover]
    elif (p3pover > 0):
        pseqRC = pseqRC[p5pover:]
    pDseq = Dseq(pseq, pseqRC, ovhg=povhg)
    #this defines a dsdna linear sequence
    if (pcirc):
        #this makes the sequence circular, if we have to
        pDseq = pDseq.looped()
    return pDseq
コード例 #19
0
ファイル: test_module_dseq.py プロジェクト: joskid/pydna
def test_cut_with_no_enzymes():

    from pydna.dseq import Dseq

    x = Dseq("ctcgGCGGCCGCcagcggccg")

    assert x.cut([]) == ()

    x = Dseq("ctcgGCGGCCGCcagcggccg", circular=True)

    assert x.cut([]) == ()
コード例 #20
0
def chewback(seqtochew, chewamt, end="fiveprime"):
    """chews back the amount mentioned, from the end mentioned."""
    wat = seqtochew.watson
    cri = seqtochew.crick

    if (len(seqtochew) > chewamt * 2 + 1):
        if (end == "fiveprime"):
            cwat = wat[chewamt:]
            ccri = cri[chewamt:]

        else:
            cwat = wat[:-chewamt]
            ccri = cri[:-chewamt]
        newseq = Dseq(cwat, ccri, ovhg=chewamt)
        return newseq
    else:
        return None
コード例 #21
0
ファイル: test_module_dseq.py プロジェクト: joskid/pydna
def test_cut_missing_enzyme():

    from pydna.dseq import Dseq

    x = Dseq("ctcgGCGGCCGCcagcggccg")

    from Bio.Restriction import PstI

    assert x.cut(PstI) == ()

    x = Dseq("ctcgGCGGCCGCcagcggccg", circular=True)

    assert x.cut(PstI) == ()
コード例 #22
0
def plotpcr(template, p1, p2):  #p1 is forward, p2 is reverse
    template = str.lower(template.replace(' ', ''))
    p1 = str.lower(p1.replace(' ', ''))
    p2 = str.lower(p2.replace(' ', ''))[::-1]
    r_template = str(Dseq(template).reverse_complement())[::-1]
    L = len(template)
    n_seg = 100
    n_sep = 120
    i_end = np.append(np.arange(n_seg, L, n_seg), L)
    indicator_pos = generate_indicator(template)
    seq_show = ''
    pf = template.find(p1) + 1
    pr = r_template.find(p2) + 1
    # primer_f_seq_show = ''
    # primer_r_seq_show = ''
    for i, n_end in enumerate(i_end):
        if (pf >= i * n_seg) and (pf < n_end):  # in that range
            if (len(p1) <= n_end - pf):  #not exceed a row
                primer_f_seq_show = '.' * (
                    pf - 1 - 3 - i * n_seg) + "5'-" + p1 + "-3'-->>dir" + '\n'
                template_seq = primer_f_seq_show + template[i *
                                                            n_seg:n_end] + '\n'
                seq_show += template_seq + indicator_pos[i] + '\n'
                # seq_show += '-' * n_seg + '\n'
        else:  #not in range
            template_seq = template[i * n_seg:n_end] + '\n'
            seq_show += template_seq + indicator_pos[i] + '\n'
            # seq_show += '-' * n_seg + '\n'

        if (pr >= i * n_seg) and (pr < n_end):
            if (len(p2) <= n_end - pr):
                primer_r_seq_show = '.' * (
                    pr - i * n_seg - 10) + "dir<<-3'-" + p2 + "-5'" + '\n'
                r_template_seq = r_template[i * n_seg:n_end] + '\n'
                seq_show += r_template_seq + '\n' + primer_r_seq_show
                seq_show += '-' * n_sep + '\n'
        else:
            seq_show += r_template[i * n_seg:n_end] + '\n' + '-' * n_sep + '\n'

    return seq_show
コード例 #23
0
ファイル: test_module_dseq.py プロジェクト: joskid/pydna
def test_Dseq___getitem__():
    from pydna.dseq import Dseq

    s = Dseq("GGATCC", circular=False)
    assert s[1:-1] == Dseq("GATC", circular=False)
    t = Dseq("GGATCC", circular=True)
    assert t[1:5] == Dseq("GATC")
    assert t[1:5].__dict__ == Dseq("GATC").__dict__
    assert s[1:5] == Dseq("GATC")
    assert s[1:5] == Dseq("GATC", circular=False)
    assert s[5:1:-1] == Dseq("CCTA")

    assert t[5:1] == Dseq("CG")

    assert s[9:1] == Dseq("")
    assert t[9:1] == Dseq("")
コード例 #24
0
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
# Copyright 2013-2020 by Björn Johansson.  All rights reserved.
# This code is part of the Python-dna distribution and governed by its
# license.  Please see the LICENSE.txt file that should have been included
# as part of this package.
"""This module provide most pydna functionality in the local namespace.

Example
-------

>>> from pydna.all import *
>>> Dseq("aaa")
Dseq(-3)
aaa
ttt
>>> Dseqrecord("aaa")
Dseqrecord(-3)
>>> from pydna.all import __all__
>>> __all__
['Anneal', 'pcr', 'Assembly', 'genbank', 'Genbank', 'download_text\
', 'Dseqrecord', 'Dseq', 'read', 'read_primer', 'parse', 'parse_primers\
', 'ape', 'primer_design', 'assembly_fragments', 'circular_assembly_fragments\
', 'eq', 'gbtext_clean', 'primerlist']
>>>
"""

__all__ = [
    "Anneal",
    "pcr",
    "Assembly",
コード例 #25
0
ファイル: test_module_dseq.py プロジェクト: uswa1/pydna
def test_repr():
    from pydna.dseq import Dseq
    a = Dseq("gattcgtatgctgatcgtacgtactgaaaac")

    assert repr(a) == 'Dseq(-31)\ngatt..aaac\nctaa..tttg'

    b = Dseq("gattcgtatgctgatcgtacgtactgaaaac", "gactagcatgcatgacttttc"[::-1])

    assert repr(b) == 'Dseq(-31)\ngattcgtatgctga..aaac\n          gact..tttc'

    c = Dseq("gattcgtatgctgatcgtacgtactgaaaac", "actagcatgcatgacttttc"[::-1])

    assert repr(c) == 'Dseq(-31)\ngatt..atgctgat..aaac\n          acta..tttc'

    d = Dseq("gattcgtatgctgatcgtacg", "gactagcatgc"[::-1])

    assert repr(d) == 'Dseq(-21)\ngattcgtatgctgatcgtacg\n          gactagcatgc'

    e = Dseq("gactagcatgcatgacttttc", "gattcgtatgctgatcgtacgtactgaaaac"[::-1])

    assert repr(e) == 'Dseq(-31)\n          gact..tttc\ngattcgtatgctga..aaac'

    f = Dseq("Ggactagcatgcatgacttttc", "gattcgtatgctgatcgtacgtactgaaaac"[::-1])

    assert repr(f) == 'Dseq(-31)\n         Ggac..tttc\ngattcgtatgctg..aaac'

    g = Dseq("gattcgtatgctgatcgtacgtactgaaaac", "ctaagcatacgactagc"[::-1])

    assert repr(g) == 'Dseq(-31)\ngatt..atcgtacg..aaac\nctaa..tagc          '

    h = Dseq("cgtatgctgatcgtacgtactgaaaac", "gcatacgactagc"[::-1])

    assert repr(h) == 'Dseq(-27)\ncgtatgctgatcgtacgtactgaaaac\ngcatacgactagc'

    i = Dseq("cgtatgctgatcgtacgtactgaaaacagact", "gcatacgactagc"[::-1])

    assert repr(i) == 'Dseq(-32)\ncgta..atcgtacg..gact\ngcat..tagc          '

    j = Dseq("gattcgtatgctgatcgtacgtactgaaaac", "acAAGGAGAGAtg", ovhg=11)

    assert repr(
        j) == 'Dseq(-42)\n          gattcg..aaac\ngtAG..GGAAca          '

    k = Dseq("g", "gattcgtatgctgatcgtacgtactgaaaac", ovhg=0)

    assert repr(k) == 'Dseq(-31)\ng          \ncaaaa..ttag'

    x = Dseq("gattcgtatgctgatcgtacgtactgaaaa")

    assert repr(
        x
    ) == 'Dseq(-30)\ngattcgtatgctgatcgtacgtactgaaaa\nctaagcatacgactagcatgcatgactttt'

    y = Dseq("gattcgtatgctgatcgtacgtactgaaaa", "gactagcatgcatgactttt"[::-1])

    assert repr(
        y
    ) == 'Dseq(-30)\ngattcgtatgctgatcgtacgtactgaaaa\n          gactagcatgcatgactttt'

    z = Dseq("gattcgtatgctgatcgtacgtactgaaaa", "actagcatgcatgactttt"[::-1])

    assert repr(
        z
    ) == 'Dseq(-30)\ngattcgtatgctgatcgtacgtactgaaaa\n           actagcatgcatgactttt'
コード例 #26
0
ファイル: test_module_dseq.py プロジェクト: joskid/pydna
def test_initialization():

    import pytest
    from pydna.dseq import Dseq

    obj = Dseq("a", "t", 0)
    assert obj * 3 == Dseq("aaa", "ttt", 0)
    assert not obj == 123
    assert obj * 0 == Dseq("")

    with pytest.raises(TypeError):
        obj * 2.3

    assert obj.seguid() == "bc1M4j2I4u6VaLpUbAB8Y9kTHBs"

    assert obj == Dseq("a", "t", circular=False, linear=True)

    with pytest.raises(ValueError):
        Dseq("a", ovhg=0)

    with pytest.raises(ValueError):
        Dseq("ttt", "tt")

    with pytest.raises(ValueError):
        Dseq("ttt", "aa")

    obj2 = Dseq("gata")

    assert obj2.linear == True
    assert obj2.circular == False

    l = Dseq("gt")
    c = l.looped()

    assert l.linear
    assert not l.circular
    assert c.circular
    assert not c.linear

    assert Dseq("gt", linear=None, circular=None) == l
    assert Dseq("gt", linear=None, circular=False) == l
    assert Dseq("gt", linear=None, circular=True) == c
    assert Dseq("gt", linear=False, circular=None) == c
    assert Dseq("gt", linear=False, circular=False) == l
    assert Dseq("gt", linear=False, circular=True) == c
    assert Dseq("gt", linear=True, circular=None) == l
    assert Dseq("gt", linear=True, circular=False) == l
    assert Dseq("gt", linear=True, circular=True) == l

    assert Dseq.from_string("A") == Dseq("A") == Dseq("A", linear=True)
    assert (
        Dseq.from_string("A", linear=False, circular=True)
        == Dseq("A", circular=True)
        == Dseq("A", linear=False)
    )
コード例 #27
0
ファイル: test_module_dseq.py プロジェクト: joskid/pydna
def test_repr():
    from pydna.dseq import Dseq

    a = Dseq("gattcgtatgctgatcgtacgtactgaaaac")

    assert repr(a) == "Dseq(-31)\ngatt..aaac\nctaa..tttg"

    b = Dseq("gattcgtatgctgatcgtacgtactgaaaac", "gactagcatgcatgacttttc"[::-1])

    assert repr(b) == "Dseq(-31)\ngattcgtatgctga..aaac\n          gact..tttc"

    c = Dseq("gattcgtatgctgatcgtacgtactgaaaac", "actagcatgcatgacttttc"[::-1])

    assert repr(c) == "Dseq(-31)\ngatt..atgctgat..aaac\n          acta..tttc"

    d = Dseq("gattcgtatgctgatcgtacg", "gactagcatgc"[::-1])

    assert repr(d) == "Dseq(-21)\ngattcgtatgctgatcgtacg\n          gactagcatgc"

    e = Dseq("gactagcatgcatgacttttc", "gattcgtatgctgatcgtacgtactgaaaac"[::-1])

    assert repr(e) == "Dseq(-31)\n          gact..tttc\ngattcgtatgctga..aaac"

    f = Dseq("Ggactagcatgcatgacttttc", "gattcgtatgctgatcgtacgtactgaaaac"[::-1])

    assert repr(f) == "Dseq(-31)\n         Ggac..tttc\ngattcgtatgctg..aaac"

    g = Dseq("gattcgtatgctgatcgtacgtactgaaaac", "ctaagcatacgactagc"[::-1])

    assert repr(g) == "Dseq(-31)\ngatt..atcgtacg..aaac\nctaa..tagc          "

    h = Dseq("cgtatgctgatcgtacgtactgaaaac", "gcatacgactagc"[::-1])

    assert repr(h) == "Dseq(-27)\ncgtatgctgatcgtacgtactgaaaac\ngcatacgactagc"

    i = Dseq("cgtatgctgatcgtacgtactgaaaacagact", "gcatacgactagc"[::-1])

    assert repr(i) == "Dseq(-32)\ncgta..atcgtacg..gact\ngcat..tagc          "

    j = Dseq("gattcgtatgctgatcgtacgtactgaaaac", "acAAGGAGAGAtg", ovhg=11)

    assert repr(j) == "Dseq(-42)\n          gattcg..aaac\ngtAG..GGAAca          "

    k = Dseq("g", "gattcgtatgctgatcgtacgtactgaaaac", ovhg=0)

    assert repr(k) == "Dseq(-31)\ng          \ncaaaa..ttag"

    x = Dseq("gattcgtatgctgatcgtacgtactgaaaa")

    assert (
        repr(x)
        == "Dseq(-30)\ngattcgtatgctgatcgtacgtactgaaaa\nctaagcatacgactagcatgcatgactttt"
    )

    y = Dseq("gattcgtatgctgatcgtacgtactgaaaa", "gactagcatgcatgactttt"[::-1])

    assert (
        repr(y)
        == "Dseq(-30)\ngattcgtatgctgatcgtacgtactgaaaa\n          gactagcatgcatgactttt"
    )

    z = Dseq("gattcgtatgctgatcgtacgtactgaaaa", "actagcatgcatgactttt"[::-1])

    assert (
        repr(z)
        == "Dseq(-30)\ngattcgtatgctgatcgtacgtactgaaaa\n           actagcatgcatgactttt"
    )
コード例 #28
0
def makeEchoFile(parts,aslist,gga=ggaPD,partsFm=partsFm,source=source,\
            output = "output.csv",selenzyme=selenzyme,fname="recentassembly",\
            protocolsDF=None,sepfiles=True,sepfilename="outputLDV.csv",printstuff=True,progbar=None):
    """makes an echo csv using the given list of assemblies and source plate of
    parts..
    inputs:
        parts: dataframe of what's in the source plate
        aslist: dataframe of what we need to assemble
        gga: a short dictionary indicating what volume of all the components
            go into the reaction mix
        partsFm: how many femtomoles of each part to use
        source: the name of the source plate. like "384PP_AQ_BP or something
        output: the name of the output file
        selenzyme: the enzyme we are going to use for assembly. everything
            is assembled with the same enzyme! actually this does nothing because
            the enzyme is taken from the aslist thing anyway
        fname: this is the name of the folder to save the successfully assembled
            dna files into
        protocolsDF: a dataframe containing a descriptor for different possible
            protocols. For instance it would say how much DNA volume and
            concentration we need for GGA or gibson."""

    #this is the boilerplate columns list
    outfile = "Source Plate Name,Source Plate Barcode,Source Plate Type,Source Well,\
    Sample ID,Sample Name,Sample Group,Sample Comment,Destination Plate Name,\
    Destination Well,Transfer Volume\n"

    outfile2 = "Source Plate Name,Source Plate Barcode,Source Plate Type,Source Well,\
    Sample ID,Sample Name,Sample Group,Sample Comment,Destination Plate Name,\
    Destination Well,Transfer Volume\n"

    f2init = len(outfile2)
    #this iterates through rows in the assembly list file. Each row
    #defines an assembly, with the columns representing what parts go in.
    #this may not be ideal but it's fairly human readable and we only do
    #four parts + vector for each assembly.
    _, fname = os.path.split(fname)
    if ("." in fname):
        fname = fname[:fname.index(".")]

    #the following is for making a spreadsheet style sequence list for performing further assemblies
    prodSeqSpread = "well,part,description,type,left,right,conc (nM),date,numvalue,sequence,circular,5pend,3pend,length\n"
    prevplate = None
    prevtype = None
    maxprog = float(len(aslist))

    for assnum in range(len(aslist)):
        if (progbar != None):
            progbar.value = float(assnum + 1) / maxprog
        assembly = aslist[assnum:assnum + 1]  #cuts out one row of dataframe
        dwell = assembly.targwell[
            assembly.targwell.index[0]]  #well where assembly will happen

        #print("pick enzyme")
        #print(assembly)
        try:
            selenzyme = assembly.enzyme[assembly.enzyme.index[0]]
            #if the user forgot to define an enzyme assume it is BsaI. That's the most common one we use
        except KeyError:
            selenzyme = "BsaI"
        if (protocolsDF != None):
            cprt_temp = "gga"
            if (selenzyme == "gibson"):
                cprt_temp = "gibson"
            curprot = {"dnasln": protocolsDF[(protocolsDF.protocol==cprt_temp)&\
                            (protocolsDF.component == "dnasln")].amount.iloc[0]}
            partsFm = curprot[curprot.component == partfm].amount.iloc[0]
            vectorFm = curprot[curprot.component == vectorfm].amount.iloc[0]
        else:
            curprot = ggaPD
            partsFm = ggaFm
            vectorFm = ggavecGm
            if (selenzyme == "gibson"):
                curprot = gibassyPD
                partsFm = gibFm
                vectorFm = gibvecFm
        water = float(curprot[curprot.component == "dnasln"].volume
                      ) * 1000  #total amount of water, to start with
        if (printstuff):
            print("assembling with " + selenzyme)
        aind = assembly.index[
            0]  #necessary for dataframes probably because I'm dumb
        frags = []
        if (not selenzyme == "gibson"):
            enzyme = enzymes[selenzyme]
            esite = enzyme.site.lower()
            esiterc = str(Dseq(enzyme.site).rc()).lower()
        for col in assembly:
            if (col == "targwell"
                ):  #since every row is terminated by the "target well",
                #we'll take this opportunity to put in the water
                if (int(water) < 25):
                    #echo gets mad if you tell it to pipet significantly less than 25 nl
                    water = 25
                ewat = int(
                    water)  #the echo automatically rounds to the nearest 25,
                #so it's not really necessary to round here.
                #dsrfrags = [Dseqrecord(a) for a in frags]
                #x = pydAssembly(dsrfrags,limit = 4)
                #print(frags)
                #print(len(frags))
                allprod = []
                nefrags = []
                cutfrags = []
                if (selenzyme != "gibson"):
                    enzyme = enzymes[selenzyme]
                for frag in frags:
                    if (selenzyme == "gibson"):
                        if (len(frag) > chewnt * 2 + 1):
                            nefrags += [chewback(frag, chewnt)]
                        else:
                            raise ValueError("part with sequence "+frag+" is too "+\
                                            "short for gibson! (<= 80 nt)")
                    else:
                        newpcs = frag.cut(enzyme)
                        if (len(newpcs) == 0):
                            newpcs += [frag]
                        for pcs in newpcs:
                            if (pcs.find(esite) + pcs.find(esiterc) == -2):
                                nefrags += [pcs]
                allprod = DPallCombDseq(nefrags)
                goodprod = []
                newpath = os.path.join(dnaPath, fname)
                if (printstuff):
                    print("saving in folder {}".format(newpath))
                Cname = ""
                try:
                    #this part gathers the "name" column to create the output sequence
                    Cname = assembly.name[assembly.name.index[0]]
                except KeyError:
                    Cname = ""
                if (Cname == "" or str(Cname) == "nan"):
                    Cname = "well" + dwell
                if (printstuff):
                    print("Parts in construct {}".format(Cname))
                if not os.path.exists(newpath):
                    if (printstuff):
                        print("made dirs!")
                    os.makedirs(newpath)

                num = 0
                for prod in allprod:
                    Cnamenum = Cname
                    if (len(allprod) > 1):
                        wout = open(
                            os.path.join(newpath,
                                         Cname + "_" + str(num) + ".gbk"), "w")
                        Cnamenum = Cname + "_" + str(num)
                    else:
                        wout = open(os.path.join(newpath, Cname + ".gbk"), "w")
                    if (bluntLeft(prod) and bluntRight(prod)):
                        num += 1
                        goodprod += [prod]
                        topo = ["linear", "circular"][int(prod.circular)]
                        booltopo = ["FALSE", "TRUE"][int(prod.circular)]
                        #wout.write("\r\n>Construct"+str(num)+"_"+topo)
                        un_prod = "_".join(Cnamenum.split())
                        wout.write(
                            "LOCUS       {}                {} bp ds-DNA     {} SYN 01-JAN-0001\n"
                            .format(un_prod, len(prod), topo))
                        wout.write("ORIGIN\n")
                        wout.write(str(prod) + "\n//")
                        now = datetime.datetime.now()
                        nowdate = "{}/{}/{}".format(now.month, now.day,
                                                    now.year)
                        prodSeqSpread += "{},{},assembled with {},,,,30,{},,{},{},{},{},{}\n".format(\
                                        dwell,un_prod,          selenzyme,nowdate,prod,booltopo,0,0,len(prod))
                    wout.close()
                assembend = ["y", "ies"][int(len(goodprod) > 1)]
                if (printstuff):
                    print("Detected {} possible assembl{}".format(
                        len(goodprod), assembend))
                frags = []
                if (water <= 0):
                    print("WARNING!!!! water <=0 in well {}".format(dwell))
                else:
                    #print("water from {} to {}, {} nl".format(waterwell,dwell,ewat))
                    if (prevplate == None):
                        #print("normalwater")
                        outfile += echoline(waterwell, dwell, ewat)
                    else:
                        #print("platewater")
                        watline = echoline(waterwell,
                                           dwell,
                                           ewat,
                                           spname=prevplate,
                                           sptype=prevtype,
                                           platebc=prevplate)
                        if ("LDV" in prevtype):
                            outfile2 += watline
                        else:
                            outfile += watline
                    #add water to the well!
                if (printstuff):
                    print("")
            elif (col == "comment"):  #skip this column!
                pass
            elif (col == "enzyme"):
                pass
            elif (col == "name"):
                pass
            else:
                part = assembly[col][aind]

                #print(assembly[col][aind])
                #print("currently on "+part)
                #part = assembly[col][aind] #well corresponding to the part we want
                if (str(part) == 'nan'):
                    #this means we skip this part
                    if (printstuff):
                        print("skip one!")
                else:
                    part = assembly[col][aind]
                    #this is the name of the part!
                    #parts[parts.part==assembly[col][aind]].well.iloc[0]
                    evol = 0
                    if (':' in str(part)):
                        #this means we have multiple parts to mix!
                        subparts = part.split(':')
                        t_partsFm = partsFm / len(subparts)
                        t_vecFm = vectorFm / len(subparts)
                        for subpart in subparts:
                            useFm = t_partsFm
                            if (col == "vector"):
                                #use the vector at lower concentration!!
                                useFm = t_vecFm
                            e1,e2,pDseq,prevplate,prevtype = echoSinglePart(parts,\
                                    subpart,useFm,dwell,printstuff=printstuff)
                            frags += [pDseq]
                            evol += e2
                            if (sepfiles):
                                if ("LDV" in e1):
                                    outfile2 += e1
                                else:
                                    outfile += e1
                            else:
                                outfile += e1

                    else:
                        useFm = partsFm
                        if (col == "vector"):
                            #use the vector at lower concentration!!
                            useFm = vectorFm
                        e1,e2,pDseq,prevplate,prevtype = echoSinglePart(parts,\
                                    part,useFm,dwell,printstuff=printstuff)
                        frags += [pDseq]
                        evol += e2
                        if (sepfiles):
                            if ("LDV" in e1):
                                outfile2 += e1
                            else:
                                outfile += e1
                        else:
                            outfile += e1
                    water = water - evol
    pspread = open(os.path.join(newpath, fname + ".csv"), "w")
    pspread.write(prodSeqSpread)
    pspread.close()
    seqdispDF = pd.read_csv(os.path.join(newpath, fname + ".csv"),
                            usecols=["well", "part", "circular", "length"])
    display(seqdispDF)
    display(FileLink(os.path.join(newpath, fname + ".csv")))
    ofle = open(output, "w")
    ofle.write(outfile)
    ofle.close()
    display(FileLink(output))
    if (sepfiles and (len(outfile2) > f2init)):
        if (printstuff):
            print("wrote LDV steps in {}".format(sepfilename))
        ofle2 = open(sepfilename, "w")
        ofle2.write(outfile2)
        ofle2.close()
        display(FileLink(sepfilename))
コード例 #29
0
def DPallCombDseq(partslist):
    '''Finds all paths through the partsist using a graph type of approach.
    First a graph is constructed from all possible overhang interactions,
    then the program makes paths from every part to a logical conclusion
    in the graph, then it backtracks and actually assembles the DNA.'''
    #actually, we need to produce a graph which describes the parts FIRST
    #then, starting from any part, traverse the graph in every possible path and store
    #the paths which are "valid" i.e., produce blunt ended or circular products.
    edgeDict = defaultdict(
        lambda: [])  #dictionary of all edges in the partslist!
    nodeDict = {}  #defaultdict(lambda : [])
    partDict = {}  #defaultdict(lambda : [])
    pind = 0
    for part in partslist:
        Lend = ""
        Rend = ""
        Ltype, Lseq = part.five_prime_end()
        Rtype, Rseq = part.three_prime_end()
        if (Ltype == "blunt"):
            Lend = "blunt"
            edgeDict[Lend].append([pind, 0])
            #pushDict(edgeDict,Lend,((pind,0),))
        else:
            if (Ltype == "3'"):
                Lend = str(Dseq(Lseq).rc()).lower()
            else:
                Lend = str(Lseq).lower()
            edgeDict[Lend].append([pind, 0])
            #pushDict(edgeDict,Lend,((pind,0),))
        if (Rtype == "blunt"):
            Rend = "blunt"
            edgeDict[Rend].append([pind, 1])
            #pushDict(edgeDict,Rend,((pind,1),))
        else:
            if (Rtype == "5'"):
                Rend = str(Dseq(Rseq).rc()).lower()
            else:
                Rend = str(Rseq).lower()
            edgeDict[Rend].append([pind, 1])
            #pushDict(edgeDict,Rend,((pind,1),))
        nodeDict[pind] = (Lend, Rend)
        pind += 1
    paths = []
    for pind in list(nodeDict.keys()):
        paths += findDNAPaths(pind, nodeDict, edgeDict)
    goodpaths = []
    #print("paths are {}".format(paths))
    for path in paths:
        #print("path is")
        #print(path)
        fpart = path[0]
        rpart = path[-1]
        npart = False
        if (nodeDict[fpart][0] == "blunt" and nodeDict[rpart][1] == "blunt"):
            #this means we have a blunt ended path! good
            npart = True
            accpart = partslist[fpart]
            for pind in path[1:]:
                accpart += partslist[pind]

        elif (nodeDict[fpart][0] == nodeDict[rpart][1]):
            npart = True
            #this means we have a circular part! also good!
            accpart = partslist[fpart]
            for pind in path[1:]:
                accpart += partslist[pind]
            accpart = accpart.looped()
        if (npart):
            if (isNewDseq(accpart, goodpaths)):
                goodpaths += [accpart]

    return goodpaths
コード例 #30
0
ファイル: test_module_dseq.py プロジェクト: joskid/pydna
def test_dseq():

    import textwrap
    from pydna.dseq import Dseq

    obj1 = Dseq("a", "t", circular=True)
    obj2 = Dseq("a", "t")

    with pytest.raises(TypeError):
        obj1 + obj2

    with pytest.raises(TypeError):
        obj2 + obj1

    with pytest.raises(TypeError):
        obj1 + ""

    with pytest.raises(AttributeError):
        obj2 + ""

    obj1 = Dseq("at", "t")
    obj2 = Dseq("a", "t")

    with pytest.raises(TypeError):
        obj1 + obj2

    obj = Dseq("aaa", "ttt", circular=True)
    assert obj[1:2] == Dseq("a", "t", 0)

    assert obj[:] == Dseq("aaa", "ttt", circular=False)

    obj = Dseq("atg", "cat", 0, circular=False)

    assert obj[1:2]._data == "atg"[1:2]

    assert obj[2:1]._data == "atg"[2:1]

    assert obj.reverse_complement() == obj.rc() == Dseq("cat", "atg", 0)

    obj = Dseq("atg", "cat", circular=True)

    assert obj.looped() == obj

    assert obj[:] == Dseq("atg", "cat", 0, circular=False)

    assert obj[1:2]._data == "atg"[1:2]

    assert obj[2:1]._data == "ga"

    obj = Dseq("G", "", 0)
    assert obj.five_prime_end() == ("5'", "g")
    obj = Dseq("", "C", 0)
    assert obj.five_prime_end() == ("3'", "c")

    obj = Dseq("ccGGATCC", "aaggatcc", -2)
    assert obj._data == "ccGGATCCtt"
    assert str(obj.mung()) == "GGATCC"
    rpr = textwrap.dedent(
        """
    Dseq(-10)
    ccGGATCC
      cctaggaa
    """
    ).strip()
    assert repr(obj) == rpr

    assert obj[3] == Dseq("G", "c", 0)

    assert obj.fill_in() == Dseq("ccGGATCCtt", "aaggatccgg", 0)

    assert obj + Dseq("") == obj
    assert Dseq("") + obj == obj

    obj = Dseq("gatcAAAAAA", "gatcTTTTTT")
    assert obj.fill_in("gatc") == Dseq("gatcAAAAAAgatc", "gatcTTTTTTgatc")
    assert obj.fill_in("atc") == obj
    assert obj.fill_in("ac") == obj
    assert obj.fill_in("at") == obj

    obj = Dseq("AAAAAAgatc", "TTTTTTgatc")
    assert obj.fill_in("gatc") == obj
    assert obj.fill_in("atc") == obj
    assert obj.fill_in("ac") == obj
    assert obj.fill_in("at") == obj

    obj = Dseq("gatcAAAAAA", "gatcTTTTTT")
    assert obj.t4() == Dseq("gatcAAAAAAgatc", "gatcTTTTTTgatc")

    assert obj.t4("at") == obj
    assert obj.t4("atg") == Dseq("gatcAAAAAAgat", "gatcTTTTTTgat")
    assert obj.t4("atgc") == Dseq("gatcAAAAAAgatc", "gatcTTTTTTgatc")
    assert obj.mung() == Dseq("AAAAAA", "TTTTTT")

    obj = Dseq("AAAAAAgatc", "TTTTTTgatc")
    assert obj.t4() == obj.t4("at") == Dseq("AAAAAA")
    assert obj.t4("atc") == obj.t4("atg") == obj.t4("atcg") == Dseq("AAAAAA")

    assert Dseq("GGATCC", "GGATCC").t4() == Dseq("GGATCC", "GGATCC")
    assert Dseq("GGATCCa", "GGATCC").t4() == Dseq("GGATCC", "GGATCC")
    assert Dseq("aGGATCC", "GGATCC").t4() == Dseq("aGGATCC", "GGATCCt")
    assert Dseq("aGGATCCa", "GGATCC").t4() == Dseq("aGGATCC", "GGATCCt")
    assert Dseq("GGATCC", "aGGATCC").t4() == Dseq("GGATCCt", "aGGATCC")
    assert Dseq("GGATCC", "GGATCCa").t4() == Dseq("GGATCC", "GGATCC")
    assert Dseq("GGATCC", "aGGATCCa").t4() == Dseq("GGATCCt", "aGGATCC")

    assert Dseq("GGATCC", "ATCC").t4("g") == Dseq("gg", "", ovhg=0)
    assert Dseq("GGATCC", "GGATCC").t4("gat") == Dseq("ggat", "ggat", ovhg=-2)

    a2 = Dseq("ccGGATCCaa", "ggatcc", -2)
    assert a2._data == "ccGGATCCaa"
    assert a2._data == "ccGGATCCaa"
    assert str(a2.mung()) == "GGATCC"
    rpr = textwrap.dedent(
        """
    Dseq(-10)
    ccGGATCCaa
      cctagg
    """
    ).strip()
    assert repr(a2) == rpr

    a3 = Dseq("ccGGATCC", "ggatcc", -2)
    assert a3._data == "ccGGATCC"
    assert a3._data == "ccGGATCC"
    assert str(a3.mung()) == "GGATCC"
    rpr = textwrap.dedent(
        """
    Dseq(-8)
    ccGGATCC
      cctagg
    """
    ).strip()
    assert repr(a3) == rpr

    b = Dseq("GGATCC", "aaggatcccc", 2)
    assert b._data == "ggGGATCCtt"
    assert b._data == "ggGGATCCtt"
    assert str(b.mung()) == "GGATCC"
    rpr = textwrap.dedent(
        """
    Dseq(-10)
      GGATCC
    cccctaggaa
    """
    ).strip()
    assert repr(b) == rpr

    b2 = Dseq("GGATCCaa", "ggatcccc", 2)
    assert b2._data == "ggGGATCCaa"
    assert b2._data == "ggGGATCCaa"
    assert str(b2.mung()) == "GGATCC"
    rpr = textwrap.dedent(
        """
    Dseq(-10)
      GGATCCaa
    cccctagg
    """
    ).strip()
    assert repr(b2) == rpr

    assert b2.seguid() == "hPNrcQ0sluXyfu4XuUh1trsnygc"

    b3 = Dseq("GGATCC", "ggatcccc", 2)
    assert b3._data == "ggGGATCC"
    assert b3._data == "ggGGATCC"
    assert str(b3.mung()) == "GGATCC"
    rpr = textwrap.dedent(
        """
    Dseq(-8)
      GGATCC
    cccctagg
    """
    ).strip()
    assert repr(b3) == rpr

    c = Dseq("GGATCCaaa", "ggatcc", 0)
    assert c._data == "GGATCCaaa"
    assert c._data == "GGATCCaaa"
    assert str(c.mung()) == "GGATCC"
    rpr = textwrap.dedent(
        """
    Dseq(-9)
    GGATCCaaa
    cctagg
    """
    ).strip()
    assert repr(c) == rpr

    d = Dseq("GGATCC", "aaaggatcc", 0)
    assert d._data == "GGATCCttt"
    assert d._data == "GGATCCttt"
    assert str(d.mung()) == "GGATCC"
    rpr = textwrap.dedent(
        """
    Dseq(-9)
    GGATCC
    cctaggaaa
    """
    ).strip()
    assert repr(d) == rpr

    obj = Dseq("GGATCCaaa", "ggatcc", 0)
    from Bio.Restriction import BamHI

    frag1 = Dseq("G", "gatcc", 0)
    frag2 = Dseq("GATCCaaa", "g", -4)

    assert obj.cut(BamHI) == (frag1, frag2)

    assert frag1 + frag2 == obj

    obj.seguid() == "HtK7-_BmOJw0BmtYE8f1yGdHc0c"

    assert frag1.seguid() == "yJkorWG5V2etvSLp6E6QNK-KMlQ"
    assert frag2.seguid() == "Aw3buI-N85OztBZAzeGJvXGlwO8"

    obj = Dseq("tagcgtagctgtagtatgtgatctggtcta", "tagaccagatcacatactacagctacgcta")
    assert (
        repr(obj)
        == "Dseq(-30)\ntagcgtagctgtagtatgtgatctggtcta\natcgcatcgacatcatacactagaccagat"
    )

    obj2 = Dseq("tagcgtagctgtagtatgtgatctggtcta")

    obj3 = obj = Dseq(
        "tagcgtagctgtagtatgtgatctggtcta", "tagaccagatcacatactacagctacgcta", 0
    )

    assert obj == obj2 == obj3

    assert obj.find("ggatcc") == -1

    assert obj.find("tgtagta") == 9

    obj = Dseq("tagcgtagctgtagtatgtgatctggtctaa", "ttagaccagatcacatactacagctacgcta")

    obj = Dseq("tagcgtagctgtagtatgtgatctggtctaa", "CCCttagaccagatcacatactacagctacgcta")

    assert repr(obj) == "Dseq(-34)\ntagc..ctaa   \natcg..gattCCC"

    obj = Dseq("tagcgtagctgtagtatgtgatctggtctaaCCC", "ttagaccagatcacatactacagctacgcta")

    assert repr(obj) == "Dseq(-34)\ntagc..ctaaCCC\natcg..gatt   "

    obj = Dseq("agcgtagctgtagtatgtgatctggtctaa", "ttagaccagatcacatactacagctacgcta")
    assert repr(obj) == "Dseq(-31)\n agcg..ctaa\natcgc..gatt"

    obj = Dseq("Atagcgtagctgtagtatgtgatctggtctaa", "ttagaccagatcacatactacagctacgcta")
    assert repr(obj) == "Dseq(-32)\nAtagc..ctaa\n atcg..gatt"

    obj = Dseq(
        "tagcgtagctgtagtatgtgatctggtctaa", "tatcgcatcgacatcatacactagaccagatt"[::-1]
    )

    assert repr(obj) == "Dseq(-32)\n tagc..ctaa\ntatcg..gatt"

    assert round(obj.mw(), 1) == 19535.6

    obj1 = Dseq(
        "tagcgtagctgtagtatgtgatctggtcta",
        "tagaccagatcacatactacagctacgcta",
        circular=True,
        linear=False,
    )
    obj2 = Dseq(
        "tagcgtagctgtagtatgtgatctggtcta",
        "tagaccagatcacatactacagctacgcta",
        circular=True,
    )
    obj3 = Dseq(
        "tagcgtagctgtagtatgtgatctggtcta", "tagaccagatcacatactacagctacgcta", linear=False
    )

    assert obj1 == obj2 == obj3

    assert obj1.find("ggatcc") == -1

    assert obj1.find("tgtagta") == 9

    assert (
        Dseq(
            "tagcgtagctgtagtatgtgatctggtcta", "tagaccagatcacatactacagctacgcta"
        ).looped()
        == obj1
    )

    from Bio.Restriction import BglII, BamHI

    obj = Dseq("ggatcc")

    assert BglII in obj.no_cutters()
    assert BamHI not in obj.no_cutters()

    assert BamHI in obj.unique_cutters()

    assert BamHI in obj.once_cutters()

    assert BamHI in (obj + obj).twice_cutters()
    assert BamHI not in obj.twice_cutters()

    assert BamHI in obj.n_cutters(1)
    assert BamHI in obj.cutters()

    from Bio.Restriction import RestrictionBatch

    rb = RestrictionBatch((BamHI, BglII))

    assert obj.cut(rb) == obj.cut(BamHI, BglII) == obj.cut(BglII, BamHI)

    obj = Dseq("ggatccAGATCT")

    assert obj.cut(rb) == obj.cut(BamHI, BglII) == obj.cut(BglII, BamHI)

    obj = Dseq("AGATCTggatcc")

    assert obj.cut(rb) == obj.cut(BamHI, BglII) == obj.cut(BglII, BamHI)

    obj = Dseq("ggatccAGATCT", circular=True)

    assert obj.cut(rb) == obj.cut(BamHI, BglII) != obj.cut(BglII, BamHI)

    obj = Dseq("AGATCTggatcc", circular=True)

    assert obj.cut(rb) == obj.cut(BglII, BamHI) != obj.cut(BamHI, BglII)
コード例 #31
0
def calpcr(request):
    # pbr = Vector.objects.get(name='pbr322')
    primers = Primer.objects.all()
    vector = primers[0].vector
    vector_name = vector.name
    seq = str.lower(vector.sequence)
    seq = Dseq(seq.replace(' ', ''))
    L = len(str(seq))
    rseq = seq.reverse_complement()
    poss = []
    for primer in primers:
        p_seq = Dseq(primer.sequence.replace(' ', ''))
        p_seq_s = str.lower(str(p_seq)) # all lower case
        nt='atcgn'
        idt_codes_subtract1 = ['icy5', 'icy3', '5biosg', '(am)']
        idt_codes_subtract2 = ['dspacer']
        idt_codes_subtract3 = ['dbcoteg']
        idt_codes_subtract4 = ['biotinteg']
        idt_codes_plus1 = ['ds', 'idsp']

        Lp_subtract = 0
        for s1 in idt_codes_subtract1:
            if p_seq_s.find(s1) >= 0:
                Lp_subtract += 1
        for s2 in idt_codes_subtract2:
            if p_seq_s.find(s2) >= 0:
                Lp_subtract += 2
        for s3 in idt_codes_subtract3:
            if p_seq_s.find(s3) >= 0:
                Lp_subtract += 3
        for s4 in idt_codes_subtract4:
            if p_seq_s.find(s4) >= 0:
                Lp_subtract += 4
        for p1 in idt_codes_plus1:
            if p_seq_s.find(p1) >= 0:
                Lp_subtract -= 1

        Lp = 0
        for i in nt:
            Lp += p_seq_s.count(i)
        Lp = Lp - Lp_subtract
        # Lp = len(p_seq_s)
        if seq.find(p_seq_s) != -1: # match it
            position = seq.find(p_seq_s) + 1
            dir = 'forward'
            in_vector = True
        elif rseq.find(p_seq_s) != -1:
            position = rseq.find(p_seq_s) - L - 1
            dir = 'reverse'
            in_vector = True
        else:
            position = -1
            dir = 'none'
            in_vector = False
        primer.position = position
        primer.dir = dir
        primer.in_vector = in_vector
        primer.length = Lp
        primer.save()

    primers = primers.filter(in_vector=True).order_by('position')
    # primers = Primer.objects.all().order_by('-created_at')
    primerFilter = PrimerFilter(queryset=primers)

    if request.method == 'POST' and 'Search' in request.POST:
        primerFilter = PrimerFilter(request.POST, queryset=primers)

    L = len(str(seq))

    # if request.method == 'POST' and 'cal' in request.POST:
    check_box_list = request.POST.getlist("check_box")
    if len(check_box_list) == 2:
        primer_1 = Primer.objects.get(id=check_box_list[0])
        p1_name = primer_1.name
        primer_2 = Primer.objects.get(id=check_box_list[1])
        p2_name = primer_2.name


        if primer_1.dir == 'reverse' and primer_2.dir == 'forward':
            pr = primer_1.position
            pr_name = primer_1.name
            pr_seq = primer_1.sequence
            pf = primer_2.position
            pf_name = primer_2.name
            pf_seq = primer_2.sequence
        elif primer_2.dir == 'reverse' and primer_1.dir == 'forward':
            pr = primer_2.position
            pr_name = primer_2.name
            pr_seq = primer_2.sequence
            pf = primer_1.position
            pf_name = primer_1.name
            pf_seq = primer_1.sequence

        else:
            pr = 0
            pf = 0
        if abs(pr) >= abs(pf):
            L_pcr = -pr - pf
        else:
            L_pcr = L - pr - pf
        show_seq = plotpcr(str(seq), pf_seq, pr_seq)
        primer_name = [pf_name] + [pr_name]
        primer_position = [pf, pr]
    else:
        L_pcr = 'You can only select two primers!!'
        primer_name = ['','']
        show_seq = "can't pcr"
        primer_name = [''] + ['']
        primer_position = ['x', 'x']

    return render(request, template_name='primer/seq.html',
                  context={'seq': seq, 'L': L, 'primers': primers, 'show_seq': show_seq,
                           'primerFilter': primerFilter, 'primer_name': primer_name,
                           'L_pcr': L_pcr, 'vector_name': vector_name, 'primer_position': primer_position,
                           })