Esempio n. 1
0
def test_pydna_gbtext_clean():
    from pydna.readers import read
    from pydna.genbankfixer import gbtext_clean

    files = [
        ("sequence.gb", "j2yAlBCZ-txSTCkakAmykAielRI"),
        ("NCBI_example.gb", "j2yAlBCZ-txSTCkakAmykAielRI"),
        ("YEplac181.txt", "lbnQtxi5LyDONRswRdG88-l8NF0"),
        ("pGADT7-Rec.gb", "rhnYE78wGKdqAZWiyVJzQ7HXXys"),
        ("P30350%20(2013-10-11%2013_49_14).dna.txt",
         "_aEPoGLctHcOZdQdZIh-KyBt5WY"),
        ("ApE_example.gb", "c47i2ifiNZVvvnLQbX5anTVVoPE"),
        ("VectorNTI_example.gb", "bDPbx5P4yigGWh1zK7FiG_SF8qQ"),
        ("hej.txt", "lbnQtxi5LyDONRswRdG88-l8NF0"),
        ("fakeGenBankFile.gb", "ATrCXrjheFhltm8HhLJuFNtWXGw"),
    ]

    for file_, seg in files:
        with open("broken_genbank_files/" + file_, "r") as f:
            infile = f.read()
        if file_ == "hej.txt":
            from Bio import BiopythonParserWarning

            with pytest.warns(BiopythonParserWarning):
                assert read(gbtext_clean(infile).gbtext).seguid() == seg
        else:
            assert read(gbtext_clean(infile).gbtext).seguid() == seg
Esempio n. 2
0
def test_pydna_read_test():
    from pydna.readers import read
    print("sys.getdefaultencoding()", sys.getdefaultencoding())
    import locale
    print("locale.getpreferredencoding()", locale.getpreferredencoding())
    assert read("pydna_read_test.txt").format(
        "gb")[349:368] == '/label="2micron 2µ"'
Esempio n. 3
0
def test_read_with_feature_spanning_ori():
    from pydna.readers import read

    test = '''
    LOCUS       New_DNA                   10 bp ds-DNA     circular     23-AUG-2018
    DEFINITION  .
    ACCESSION   
    VERSION     
    SOURCE      .
      ORGANISM  .
    COMMENT     
    COMMENT     ApEinfo:methylated:1
    FEATURES             Location/Qualifiers
         misc_feature    join(9..10,1..2)
                         /locus_tag="myfeature"
                         /label="myfeature"
                         /ApEinfo_label="myfeature"
                         /ApEinfo_fwdcolor="cyan"
                         /ApEinfo_revcolor="green"
                         /ApEinfo_graphicformat="arrow_data {{0 1 2 0 0 -1} {} 0}
                         width 5 offset 0"
    ORIGIN
            1 accgggtttt     
    //    
    '''
    a = read(test)
    assert str(a.seq).lower() == "accgggtttt"
    assert str(a.features[0].extract(a).seq) == "TTAC"
    assert a.features[0].strand == 1

    b = a.rc()

    assert str(b.seq).lower() == "aaaacccggt"
    assert str(b.features[0].extract(a).seq) == "GTAA"
def test_ypk():

    datafiles = '''pth1.txt|pYPK0_CiGXF1_PsXYL2.gb|iM8oDuvJPMPO995IdW3B0oo0Hkc
                   pth2.txt|pYPK0_SsXYL1_SsXYL2.gb|CB_qLhPgemW0XNLQOQEAdJKFujU
                   pth3.txt|pYPK0_NC_006038_CiGXF1_PsXYL2.gb|48_Bek9U1wxXlq1otmE7YHjYpnk
                   pth4.txt|pYPK0_SsXYL1_SsXYL2_ScXKS1.gb|5OxynmwQA3br0cKAG8It7VVNGrg
                   pth5.txt|pYPK0_SsXYL1_SsXYL2_ScXKS1_ScTAL1.gb|K8z4ijkYa0hA0KEOhv7-6PNJgBM
                   pth6.txt|pYPK0_SsXYL1_SsXYL2_ScXKS1_ScTAL1.gb|K8z4ijkYa0hA0KEOhv7-6PNJgBM
                   pth7.txt|pYPK0_SsXYL1_SsXYL2_ScXKS1_ScTAL1.gb|K8z4ijkYa0hA0KEOhv7-6PNJgBM'''

    for pYPKa_A in (True, False):
        for datafile in textwrap.dedent(datafiles).split():

            file_, name, code = datafile.split("|")

            print()
            print("############################")
            print("datafile = ", file_)
            print("pYPKa_A  = ", pYPKa_A)
            print("############################")

            with open(file_, "r",) as f: text = f.read()

            try:
                shutil.rmtree(tmp)
            except OSError:
                pass

            pw = pathway( parse(text), tmp, pYPKa_A=pYPKa_A)

            s = read( os.path.join(tmp, name) )

            with open(code+".txt") as f: c = f.read()

            assert "".join( x for x in c.lower() if not x.isspace()) == str(s.seq).lower()
Esempio n. 5
0
def test_cut_feat():
    from pydna.readers       import read
    from pydna.amplify       import pcr
    
    from pydna.design        import primer_design
    from pydna.dseqrecord    import Dseqrecord
    
    from Bio.Restriction import EcoRI
    
    puc19 = read('pUC19_MarkBudde.gb')
    assert len(puc19.features) == 19
    puc_lin = puc19[:]
    assert len(puc_lin.features) == 19
    ampl = primer_design(puc_lin)
    pf, pr = ampl.forward_primer, ampl.reverse_primer
    pcrProd = pcr(pf, pr, puc19)
    assert len(pcrProd.features) == 21
    assert len(pcrProd.cut(EcoRI)[1].features) == 16

    def amplicon_to_dseqrecord(a):
        d = Dseqrecord(a.seq)
        d.features = a.features
        return d

    pcrProdDseqrecord = amplicon_to_dseqrecord(pcrProd)
    assert len(pcrProdDseqrecord.cut(EcoRI)[1].features) == 16
Esempio n. 6
0
def test_mark_budde():
    """ test mark budde"""
    a = read("pGREG505.gb")
    assert a.name == "pGREG505"
    assert a.looped().name == "pGREG505"
    # assert a.annotations == "pGREG505"
    assert a.id == "pGREG505"
    assert a.looped().id == "pGREG505"
    """
Esempio n. 7
0
def test_mark_budde():
    ''' test mark budde'''
    a = read('pGREG505.gb')
    assert a.name == "pGREG505"
    assert a.looped().name == "pGREG505"
    #assert a.annotations == "pGREG505"
    assert a.id == "pGREG505"
    assert a.looped().id == "pGREG505"
    """
Esempio n. 8
0
def test_read_from_file():
    from pydna.readers import read
    from pydna.parsers import parse
    a = read("read1.gb")
    b = read("read2.gb")
    c = read("read3.fasta")
    d = read("read4.fasta")
    x, y = parse("pth1.txt")

    a.format("gb")
    b.format("gb")
    c.format("gb")
    d.format("gb")
    x.format("gb")
    y.format("gb")
    assert x.format()[3268:3278] == '2micron 2µ'
    assert x.features[13].qualifiers['label'][0] == u'2micron 2µ'
    assert str(a.seq).lower() == str(b.seq).lower() == str(
        c.seq).lower() == str(d.seq).lower()
Esempio n. 9
0
def test_read():
    from pydna.readers import read
    data = ""
    with pytest.raises(ValueError):
        read(data)
    with pytest.raises(ValueError):
        read(data, ds=False)
    with pytest.raises(ValueError):
        read(data, ds=True)
Esempio n. 10
0
from pydna.readers import read
import os
import shutil
from pathlib import Path

cwd = os.getcwd()

outpath = Path.cwd() / "nb"  # Path(cwd).parent.joinpath("pYPKa_ZE2")

outpath.mkdir(parents=True, exist_ok=True)

shutil.copy("pYPKa.gb", outpath)
shutil.copy("standard_primers.txt", outpath)
shutil.copy("figure_pYPKa_ZE.png", outpath)

pYPKa = read("pYPKa.gb")

with open("notebook_template_pYPKa_ZE.md", "r", encoding="utf8") as f:
    t = f.read()

with open("tp_list.txt", "r") as f:
    tps = [
        l for l in f.read().splitlines() if l and not l.strip().startswith("#")
    ]

tp_dict = {}

lp = PrimerList()

for insertname, letter, pf, pr, comment in (x.split(maxsplit=4) for x in tps
                                            if x):
Esempio n. 11
0
from pydna.genbank  import genbank
from pydna.design   import primer_design
from pydna.amplify  import pcr
from pydna.readers  import read
from pydna.parsers  import parse_primers
from pydna.assembly import Assembly
from pydna.gel      import Gel

###############################################################################

saat = genbank("AF193791 REGION: 78..1895")

saat_pcr_prod = primer_design(saat)

pYPKa=read("pYPKa.gb")

from Bio.Restriction import AjiI

pYPKa_AjiI = pYPKa.linearize(AjiI)

pYPKa_A_saat = ( pYPKa_AjiI + saat_pcr_prod ).looped()

pYPKa_Z_prom = read("pYPKa_Z_TEF1.gb")

pYPKa_E_term = read("pYPKa_E_TPI1.gb")

p567,p577,p468,p467,p568,p578  =  parse_primers('''

>567_pCAPsAjiIF (23-mer)
GTcggctgcaggtcactagtgag
>577_crp585-557 (29-mer)
Esempio n. 12
0
def test_parse1():
    from pydna.parsers import parse
    from pydna.readers import read
    ''' test parsing fasta sequences from a text'''

    text = '''
            points....: 1

            The sequence seq below represents a double stranded linear DNA molecule.

            >seq
            CTCCCCTATCACCAGGGTACCGATAGCCACGAATCT

            Give the sequence(s) of the fragment(s) formed after digesting seq
            with the restriction enzyme Acc65I in the order that they appear in seq.

            Use FASTA format and give the Watson strand(s) in 5'-3' direction below.
            Give the sequences the names frag1,frag2,... etc.
            >frag1
            CTCCCCTATCACCAGG

            >frag2
            GTACCGATAGCCACGAATCT

            *********** Question 4 ***********

            QuestionID:
            '''
    result = parse(text)

    correct = [
        'CTCCCCTATCACCAGGGTACCGATAGCCACGAATCT', 'CTCCCCTATCACCAGG',
        'GTACCGATAGCCACGAATCT'
    ]

    assert [str(s.seq) for s in result] == correct
    assert [s.linear for s in result] == [True, True, True]

    input = '''
            LOCUS       ScCYC1                   330 bp    DNA              UNK 01-JAN-1980
            DEFINITION  ScCYC1
            ACCESSION   ScCYC1
            VERSION     ScCYC1
            KEYWORDS    .
            SOURCE      .
              ORGANISM  .
                        .
            FEATURES             Location/Qualifiers
            ORIGIN
                    1 ATGACTGAAT TCAAGGCCGG TTCTGCTAAG AAAGGTGCTA CACTTTTCAA GACTAGATGT
                   61 CTACAATGCC ACACCGTGGA AAAGGGTGGC CCACATAAGG TTGGTCCAAA CTTGCATGGT
                  121 ATCTTTGGCA GACACTCTGG TCAAGCTGAA GGGTATTCGT ACACAGATGC CAATATCAAG
                  181 AAAAACGTGT TGTGGGACGA AAATAACATG TCAGAGTACT TGACTAACCC AAAGAAATAT
                  241 ATTCCTGGTA CCAAGATGGC CTTTGGTGGG TTGAAGAAGG AAAAAGACAG AAACGACTTA
                  301 ATTACCTACT TGAAAAAAGC CTGTGAGTAA
            //
            '''
    result = parse(input).pop()

    assert str(result.seq) == str(read(input).seq)
    correct = '''ATGACTGAATTCAAGGCCGGTTCTGCTAAGAAAGGTGCTACACTTTTCAAGACTAGATGTCTACAATGCCACACCGTGGAAAAGGGTGGCCCACATAAGGTTGGTCCAAACTTGCATGGTATCTTTGGCAGACACTCTGGTCAAGCTGAAGGGTATTCGTACACAGATGCCAATATCAAGAAAAACGTGTTGTGGGACGAAAATAACATGTCAGAGTACTTGACTAACCCAAAGAAATATATTCCTGGTACCAAGATGGCCTTTGGTGGGTTGAAGAAGGAAAAAGACAGAAACGACTTAATTACCTACTTGAAAAAAGCCTGTGAGTAA'''

    assert str(result.seq) == correct
    assert result.linear == True
    assert result.circular == False

    seqs = parse('RefDataBjorn.fas')

    assert len(seqs) == 771
    assert list(set([len(a) for a in seqs])) == [901]
    pAG25 = read("pAG25.gb")

    assert pAG25.circular == True
    assert pAG25.linear == False

    pCAPs = read("pCAPs.gb")

    assert pCAPs.circular == True
    assert pCAPs.linear == False

    pUC19 = read("pUC19.gb")

    assert pUC19.circular == True
    assert pUC19.linear == False

    input = '''
    ID   example    standard; DNA; UNC; 3 BP.
    SQ   Sequence 3 BP;
         aaa                                                                       3
    //
    '''
    result = parse(input).pop()
    input = '''
    ID   name?      standard; circular DNA; UNK; 100 BP.
    XX
    DT   25-DEC-2017
    XX
    DE   description?.
    XX
    AC   id?;
    XX
    SV   id?
    XX
    KW   .
    XX
    OS   .
    OC   .
    OC   .
    XX
    FH   Key             Location/Qualifiers
    SQ   Sequence 100 BP;
         aaaaaaaaaa aaaaaaaaaa aaaaaaaaaa aaaaaaaaaa aaaaaaaaaa aaaaaaaaaa        60
         aaaaaaaaaa aaaaaaaaaa aaaaaaaaaa aaaaaaaaaa                             100
    //
    '''
    result = parse(input).pop()
Esempio n. 13
0
    def write(self, filename=None, f="gb"):
        """Writes the Dseqrecord to a file using the format f, which must
        be a format supported by Biopython SeqIO for writing [#]_. Default
        is "gb" which is short for Genbank. Note that Biopython SeqIO reads
        more formats than it writes.

        Filename is the path to the file where the sequece is to be
        written. The filename is optional, if it is not given, the
        description property (string) is used together with the format.

        If obj is the Dseqrecord object, the default file name will be:

        ``<obj.locus>.<f>``

        Where <f> is "gb" by default. If the filename already exists and
        AND the sequence it contains is different, a new file name will be
        used so that the old file is not lost:

        ``<obj.locus>_NEW.<f>``

        References
        ----------
        .. [#] http://biopython.org/wiki/SeqIO

        """
        msg = ""
        if not filename:
            filename = "{name}.{type}".format(name=self.locus, type=f)
            # generate a name if no name was given
        if not isinstance(filename, str):  # is filename a string???
            raise ValueError("filename has to be a string, got",
                             type(filename))
        name, ext = _os.path.splitext(filename)
        msg = "<font face=monospace><a href='{filename}' target='_blank'>{filename}</a></font><br>".format(
            filename=filename)
        if not _os.path.isfile(filename):
            with open(filename, "w") as fp:
                fp.write(self.format(f))
        else:
            from pydna.readers import read

            old_file = read(filename)
            if self.seq != old_file.seq:
                # If new sequence is different, the old file is renamed with "_OLD" suffix:
                # TODO: add this timestamp so that all old versions are stored
                # int(time.time() * 1000000)  = 1512035297658778
                old_filename = "{}_OLD{}".format(name, ext)
                _os.rename(filename, old_filename)
                msg = (
                    "<font color='DarkOrange ' face=monospace>"
                    "Sequence changed.<br>"
                    "</font>"
                    "<font color='red' face=monospace>"
                    "new: <a href='{filename}' target='_blank'>{filename}</a> &nbsp&nbsp&nbsp size: {nlen}bp topology: {ntop} SEGUID: {ns}<br>"
                    "</font>"
                    "<font color='green' face=monospace>"
                    "old: <a href='{oldfname}' target='_blank'>{oldfname}</a> size: {olen}bp topology: {otop} SEGUID: {os}<br>"
                    "</font>").format(
                        filename=filename,
                        oldfname=old_filename,
                        nlen=len(self),
                        olen=len(old_file),
                        ns=self.seguid(),
                        os=old_file.seguid(),
                        ntop={
                            True: "-",
                            False: "o"
                        }[self.linear],
                        otop={
                            True: "-",
                            False: "o"
                        }[old_file.linear],
                    )
                with open(filename, "w") as fp:
                    fp.write(self.format(f))
            elif "SEGUID" in old_file.description:
                pattern = r"(lSEGUID|cSEGUID|SEGUID)_(\S{27})(_[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9]{2}\.[0-9]{6}){0,1}"
                oldstamp = _re.search(pattern, old_file.description)
                newstamp = _re.search(pattern, self.description)
                newdescription = self.description
                # print(oldstamp, newstamp)
                if oldstamp and newstamp:
                    if oldstamp.group(0)[:35] == newstamp.group(0)[:35]:
                        newdescription = newdescription.replace(
                            newstamp.group(0), oldstamp.group(0))
                elif oldstamp:
                    newdescription += " " + oldstamp.group(0)
                newobj = _copy.copy(self)
                newobj.description = newdescription

                with open(filename, "w") as fp:
                    fp.write(newobj.format(f))
            else:
                with open(filename, "w") as fp:
                    fp.write(self.format(f))
        return _display_html(_HTML(msg))
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
from pydna.readers import read
from pathlib import Path

new = list(Path(".").glob("*.gb"))
old = list(Path("old").glob("*.gb"))

newset = set(n.name for n in new)
oldset = set(n.name for n in old)

for n in new:
    ns = read(n)
    op = Path("old") / Path(n)
    if op.exists():
        os = read(op)
        print(n.name, ns.cseguid() == os.cseguid())
    else:
        print(n.name, "<<<<<<<<<<<<<<<<<<<<<<")
Esempio n. 15
0
def pathway(pth, dir_="ypkassembly", pYPKa_A=True, print=print):

    if len(pth)==0: # pth has to contain some sequences
        print("No of sequences found.")
        return None, None

    names = [s.name for s in pth] # sequence names has to be unique

    #if len(names)>len(set(names)):
    #    print("Gene names are not unique. Please rename sequences so that each sequence has a unique name.\n")
    #    print("Gene names parsed from Data page:\n\n")
    #    for name in names:
    #        print(name)
    #    return None, None

    log=""

    pYPK0 = read(read_data_file("pYPK0.gb"))
    pYPKa = read(read_data_file("pYPKa.gb"))

    from Bio.Restriction import ZraI, AjiI, EcoRV

    files = {"standard_primers.txt"     : read_data_file("standard_primers.txt"),
             "pYPKa.gb"                 : read_data_file("pYPKa.gb"),
             "pYPKpw.gb"                : read_data_file("pYPKpw.gb"),
             "tp_g_tp.png"              : read_bin_file("tp_g_tp.png"),
             "pYPK_ZE.png"              : read_bin_file("pYPK_ZE.png"),
             "pYPK_A.png"               : read_bin_file("pYPK_A.png"),
             "pw.png"                   : read_bin_file("pw.png"),
             "start.bat"                : read_data_file("start.bat"),
             "start.sh"                 : read_data_file("start.sh"),}

    cas_vectors = ""
    tp_gene_tp_links = ""
    pYPKa_clones=""
    pwname = "pYPK0"
    genes = 0
    nbflag=False

    while pth:
        genes+=1
        first = pth.pop(0)
        # is sequence a tp-gene-tp vector?
        if cloned(pYPK0, (ZraI, EcoRV),  first):
            m = re_cas.search(first.description)
            if not m:
                raise Exception( "{} is a pYPK0 tp-gene_tp sequence but was not correctly named.".format(last.description))
            fn = first.description+".gb"
            files[fn] = first.format("gb")
            cas_vectors+= fn+"\n"
            tp_gene_tp_links+= "\n[{}]({})\n".format( first.description, fn )
            tp1_description  = m.group(1)
            gene_description = m.group(2)
            tp2_description  = m.group(3)
            genes+=1
        else:
            try:
                middle = pth.pop(0)
                last   = pth.pop(0)
            except IndexError:
                raise Exception("not enough sequences")

            prom, gene, term = first, middle, last

            if cloned(pYPKa, ZraI,  prom):
                m = re_Z.search(prom.description)
                if not m:
                    raise Exception( "{} is a pYPKa_A_gene sequence but was incorrectly named.".format(gene.description))
                prom_description = m.group(1)
                files[m.group(0)+".gb"] = prom.format("gb")
            else:
                #print("Z"+str(files.has_key("pYPKa_ZE_{}.md".format(prom.id)))+prom.id)
                if "pYPKa_ZE_{}.md".format(prom.id) not in files:
                    files[prom.id+".gb"] = prom.format("gb")
                    nbtemp = read_data_file("nb_template_pYPKa_ZE_insert.md")
                    files["pYPKa_ZE_{}.md".format(prom.id)] = nbtemp.format(tp=prom.id)
                    pYPKa_clones+="[pYPKa_ZE_{n}](pYPKa_ZE_{n}.ipynb)  \n".format(n=prom.id)
                prom_description = prom.id

            if cloned(pYPKa, AjiI,  gene):
                m = re_A.search(gene.description)
                if not m:
                    raise Exception( "{} is a pYPKa_A_gene sequence but was incorrectly named.".format(gene.description))
                gene_description = m.group(1)
                files[m.group(0)+".gb"] = gene.format("gb")
                if not pYPKa_A:
                    nbflag=True

            else:
                n = "pYPKa_A_{}".format(gene.locus)
                files[gene.locus+".gb"] = gene.format("gb")
                if pYPKa_A:
                    nbtemp = read_data_file("nb_template_pYPKa_A_insert.md")
                    files[n+".md"] = nbtemp.format(insert=gene.locus)
                    gene_description = gene.locus
                    pYPKa_clones+="[{}]({}.ipynb)  \n".format(n, n)
                else:
                    gene_description = gene.locus

            if cloned(pYPKa, EcoRV, term):
                m = re_E.search(term.description)
                if not m:
                    raise Exception( "{} is a pYPKa_A_gene sequence but was incorrectly named.".format(gene.description))
                term_description = m.group(1)
                files[m.group(0)+".gb"] = term.format("gb")
            else:
                #print("E"+str(files.has_key("pYPKa_ZE_{}.md".format(term.id)))+term.id)
                if "pYPKa_ZE_{}.md".format(term.id) not in files:
                    files[term.id+".gb"] = term.format("gb")
                    nbtemp = read_data_file("nb_template_pYPKa_ZE_insert.md")
                    files["pYPKa_ZE_{}.md".format(term.id)] = nbtemp.format(tp=term.id)
                    pYPKa_clones+="[pYPKa_ZE_{n}](pYPKa_ZE_{n}.ipynb)  \n".format(n=term.id)
                term_description = term.id

            x = "pYPK0_{}_{}_{}".format(prom_description, gene_description, term_description)

            if pYPKa_A or nbflag:
                nbtemp = read_data_file("nb_template_pYPK0_tp_gene_tp.md")
                files[x+".md"] = nbtemp.format(tpz=prom_description,
                                                gene=gene_description,
                                                tpe=term_description)
            else:
                nbtemp = read_data_file("nb_template_pYPK0_tp_gene_tp_gap_repair.md")
                files[x+".md"] = nbtemp.format(tpz=prom_description,
                                                gene=gene.locus,
                                                tpe=term_description)
            nbflag=False

            cas_vectors+="\n"+x+".gb\n"
            tp_gene_tp_links+="[{}]({}.ipynb)  \n".format(x, x)




        pwname+="_{}".format(gene_description)

    ###########################################################################

    obj = notedown.MarkdownReader()

    cwd = os.getcwd()

    try:
        os.makedirs(dir_)
    except OSError as exception:
        if exception.errno == errno.EEXIST:
            print("The {} directory already exists! Please delete or choose another name.".format(dir_))
        else:
            print("The {} directory could not be created".format(dir_))
        return None, None

    msg = "created subdirectory {}\n".format(dir_)
    print(msg)
    log+=msg

    os.chdir(dir_)

    msg = "\nsaving files sequence files and images..\n"
    print(msg)
    log+=msg

    for name, content in sorted((n, c) for n, c in list(files.items()) if not n.endswith(".md")):
        msg = "\nsaving: "+name
        print(msg)
        log+=msg
        mode = {True:"wb", False:"w"}[hasattr(content, "decode")]
        with open(name, mode) as f:  #with open(name,"wb") as f: 
            f.write(content) 

    print("\n")
    log+="\n"

    msg = "\nsaving notebook files ..\n"
    print(msg)
    log+=msg

    for name, content in sorted((n, c) for n, c in list(files.items()) if n.endswith(".md")):
        newname = os.path.splitext(name)[0]+".ipynb"
        msg = "\nsaving: "+newname
        print(msg)
        log+=msg
        nb = nbformat.write(obj.to_notebook(content), newname)

    pp = ExecutePreprocessor()
    pp.timeout = 120 # seconds
    pp.interrupt_on_timeout = True

    print("\n")
    log+="\n"

    msg = "\nexecuting pYPKa notebooks..\n"
    print(msg)
    log+=msg

    shell = InteractiveShell.instance()
    #new_primers = []

    g={}
    l={}

    pypkanbs = sorted([f for f in os.listdir(".") if re.match("pYPKa.+\.ipynb", f)])

    if pypkanbs:
        for name in pypkanbs:
            msg = "\nexecuting: "+name
            print(msg)
            log+=msg
            with io.open(name, 'r', encoding='utf-8') as f: nb = nbformat.read(f, 4)
            nb_executed, resources = pp.preprocess(nb, resources={})
            for cell in nb.cells:
                if cell.cell_type == 'code':
                    code = shell.input_transformer_manager.transform_cell(cell.source)
                    exec(code, g, l)
            #new_primers.extend( (l["fp"], l["rp"]) )
            nbformat.write(nb, name)
            g={}
            l={}
    else:
        msg = "\nNo pYPKa notebooks found.\n"
        print(msg)
        log+=msg
    print("\n")
    log+="\n"
    msg = "\nexecuting pYPK0 notebooks..\n"
    print(msg)
    log+=msg

    g={}
    l={}
    resources={}

    pypk0nbs = sorted([f for f in os.listdir(".") if re.match("pYPK0.+\.ipynb", f)])

    if pypk0nbs:
        for name in pypk0nbs:
            msg = "\nexecuting: "+name
            print(msg)
            log+=msg
            with io.open(name, 'r', encoding='utf-8') as f: nb = nbformat.read(f, 4)
            nb_executed, resources = pp.preprocess(nb, resources={})
            nbformat.write(nb, name)
            for cell in nb.cells:
                if cell.cell_type == 'code':
                    code = shell.input_transformer_manager.transform_cell(cell.source)
                    exec(code, g, l)
            #try:
                #new_primers.extend( (l["fp"], l["rp"]) )
            #except KeyError:
            #    pass
            g={}
            l={}
    else:
        msg = "\nNo pYPK0 notebooks found.\n"
        print(msg)
        log+=msg
    nbtemp = read_data_file("nb_template_pYPK0_pw.md")

    #primer_list = "\n".join( p.format("tab") for p in new_primers )

    #if new_primers:
    #    msg = u"\n\nsaving new_primers.txt..\n"
    #with open("new_primers.txt","wb") as f: f.write("\n".join( p.format("fasta") for p in new_primers ))

    #print("qwerty")
    #print(pwname)
    #print(os.path.basename(dir_))
    #print(tp_gene_tp_links)
    #print(add_space(cas_vectors, 17))
    #print(pYPKa_clones)
    #print(str(genes))
    #print("123456789")

    pwnb = nbtemp.format(name=pwname,
                         filename=os.path.basename(dir_),
                         tp_gene_tp_links = tp_gene_tp_links,
                         cas_vectors=add_space(cas_vectors, 17),
                         pYPKa_clones=pYPKa_clones,
                         length=genes)

    nb = nbformat.write(obj.to_notebook(pwnb), "pw.ipynb")

    #nb = nbformat.writes("pw.ipynb", obj.to_notebook(pwnb))
    #with open("pw.ipynb", "w") as f: f.write(nb)

    msg = "\n\nexecuting final pathway notebook..\n"
    print(msg)
    log+=msg
    msg = "\nexecuting: pw.ipynb"
    print(msg)
    log+=msg
    with io.open("pw.ipynb", 'r', encoding='utf-8') as f: nb = nbformat.read(f, 4)
    nb_executed, resources = pp.preprocess(nb, resources={})
    nbformat.write(nb, "pw.ipynb")

    #for nb_ in [f for f in os.listdir(".") if f.endswith(".ipynb")]:
    #    subprocess.Popen(["ipython", "nbconvert", os.path.join(dir_, nb_)])

    os.chdir(cwd)

    fl = FileLink(os.path.join(dir_, "pw.ipynb"))

    #   pp = None

    return fl, log
def test_longername_gives_deseq():
    myseq = read(open("./broken_genbank_files/pGreenLantern1.gb", "r").read())
    assert myseq.circular
    myseq = read(open("./broken_genbank_files/fakeGenBankFile.gb", "r").read())
    assert myseq.circular
Esempio n. 17
0
File: gojs.py Progetto: joskid/pydna
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Tue Jan 10 08:03:27 2017

@author: bjorn
"""

from pydna.readers import read

template = read(">t\\ntacactcaccgtctatcattatctactatcgactgtatcatctgatagcac")
p1 = read(">p1\\ntacactcaccgtctatcattatc", ds=False)
p2 = read(">p2\\ngtgctatcagatgatacagtcg", ds=False)
# ann = pydna.Anneal((p1, p2), template)
Esempio n. 18
0
def test_read_from_string():
    from pydna.readers import read

    input_ = '''
            LOCUS       New_DNA                    4 bp ds-DNA     linear       30-MAR-2013
            DEFINITION  .
            ACCESSION
            VERSION
            SOURCE      .
              ORGANISM  .
            COMMENT
            COMMENT     ApEinfo:methylated:1
            FEATURES             Location/Qualifiers
                 misc_feature    2..3
                                 /label=NewFeature
                                 /ApEinfo_fwdcolor=cyan
                                 /ApEinfo_revcolor=green
                                 /ApEinfo_graphicformat=arrow_data {{0 1 2 0 0 -1} {} 0}
                                 width 5 offset 0
            ORIGIN
                    1 acgt
            //
            '''
    a = read(input_)

    assert str(a.seq) == "ACGT"

    input_ = '''>hej
               acgt'''

    assert str(a.seq) == "ACGT"

    input_ = '''
            LOCUS       New_DNA                    4 bp ds-DNA     linear       30-MAR-2013
            DEFINITION  .
            ACCESSION
            VERSION
            SOURCE      .
              ORGANISM  .
            COMMENT
            COMMENT     ApEinfo:methylated:1
            FEATURES             Location/Qualifiers
                 misc_feature    2..3
                                 /label=NewFeature
                                 /ApEinfo_fwdcolor=cyan
                                 /ApEinfo_revcolor=green
                                 /ApEinfo_graphicformat=arrow_data {{0 1 2 0 0 -1} {} 0}
                                 width 5 offset 0
            ORIGIN
                    1 acgt
            //
            '''
    a = read(input_)
    assert str(a.seq) == "ACGT"

    input_ = '''>hej
                acgt'''
    assert str(a.seq) == "ACGT"

    input_ = '''>hej öööh!
                acgt'''
    assert str(a.seq) == "ACGT"

    input_ = '''
                LOCUS       New_DNA                    4 bp ds-DNA     linear       30-MAR-2013
                DEFINITION  öööh!
                ACCESSION
                VERSION
                SOURCE      .
                  ORGANISM  .
                COMMENT
                COMMENT     ApEinfo:methylated:1
                FEATURES             Location/Qualifiers
                     misc_feature    2..3
                                     /label=öööh!
                                     /ApEinfo_fwdcolor=cyan
                                     /ApEinfo_revcolor=green
                                     /ApEinfo_graphicformat=arrow_data {{0 1 2 0 0 -1} {} 0}
                                     width 5 offset 0
                ORIGIN
                        1 acgt
                //
            '''
    a = read(input_)
    assert str(a.seq) == "ACGT"