Beispiel #1
0

def seq_header_to_chr(header):
    import re
    c = re.compile(r'\[segment (1|2)\]')
    m = c.search(header)
    if not m:
        raise Exception("Unknown or invalid segment in header %s" % header)
    seg = m.group(1)
    return "segment_" + seg


def seq_header_to_genome(header):
    import re
    c = re.compile(r'\[genome (.+)\]')
    m = c.search(header)
    if not m:
        raise Exception("Unknown genome in header %s" % header)
    return m.group(1)


chrs = ["segment_" + seg for seg in ['1', '2']]
ds = GenomesDatasetMultiChrom(__name__,
                              __file__,
                              __spec__,
                              chrs,
                              seq_header_to_chr,
                              seq_header_to_genome=seq_header_to_genome)
ds.add_fasta_path("data/human_picobirnavirus.fasta.gz", relative=True)
sys.modules[__name__] = ds
Beispiel #2
0

def seq_header_to_chr(header):
    import re
    c = re.compile(r'\[segment (L|M|S)\]')
    m = c.search(header)
    if not m:
        raise Exception("Unknown or invalid segment in header %s" % header)
    seg = m.group(1)
    return "segment_" + seg


def seq_header_to_genome(header):
    import re
    c = re.compile(r'\[genome (.+)\]')
    m = c.search(header)
    if not m:
        raise Exception("Unknown genome in header %s" % header)
    return m.group(1)


chrs = ["segment_" + seg for seg in ['L', 'M', 'S']]
ds = GenomesDatasetMultiChrom(__name__,
                              __file__,
                              __spec__,
                              chrs,
                              seq_header_to_chr,
                              seq_header_to_genome=seq_header_to_genome)
ds.add_fasta_path("data/anhanga.fasta.gz", relative=True)
sys.modules[__name__] = ds
Beispiel #3
0
from os import listdir
import sys

from catch.datasets import GenomesDatasetMultiChrom

__author__ = 'Hayden Metsky <*****@*****.**>'

chrs = ["segment_" + seg for seg in ['L', 'M', 'S']]


def seq_header_to_chr(header):
    import re
    c = re.compile(r'\[segment (L|M|S)\]')
    m = c.search(header)
    if not m:
        raise ValueError("Unknown segment in header %s" % header)
    seg = m.group(1)
    valid_segs = ['L', 'M', 'S']
    if seg not in valid_segs:
        raise ValueError("Unknown segment %s" % seg)
    return "segment_" + seg


ds = GenomesDatasetMultiChrom(__name__, __file__, __spec__, chrs,
                              seq_header_to_chr)

for f in listdir(join(dirname(__file__), "data/fugong/")):
    ds.add_fasta_path("data/fugong/" + f, relative=True)

sys.modules[__name__] = ds
Beispiel #4
0
import sys

from catch.datasets import GenomesDatasetMultiChrom


def seq_header_to_chr(header):
    import re
    c = re.compile(r'\[segment (1|10|11|12|2|3|4|5|6|7|8|9)\]')
    m = c.search(header)
    if not m:
        raise Exception("Unknown or invalid segment in header %s" % header)
    seg = m.group(1)
    return "segment_" + seg

def seq_header_to_genome(header):
    import re
    c = re.compile(r'\[genome (.+)\]')
    m = c.search(header)
    if not m:
        raise Exception("Unknown genome in header %s" % header)
    return m.group(1)


chrs = ["segment_" + seg for seg in ['1', '10', '11', '12', '2', '3', '4', '5', '6', '7', '8', '9']]
ds = GenomesDatasetMultiChrom(__name__, __file__, __spec__,
                              chrs, seq_header_to_chr,
                              seq_header_to_genome=seq_header_to_genome)
ds.add_fasta_path("data/colorado_tick_fever.fasta.gz", relative=True)
sys.modules[__name__] = ds
Beispiel #5
0

def seq_header_to_chr(header):
    import re
    c = re.compile(r'\[segment (L|M|S)\]')
    m = c.search(header)
    if not m:
        raise Exception("Unknown or invalid segment in header %s" % header)
    seg = m.group(1)
    return "segment_" + seg


def seq_header_to_genome(header):
    import re
    c = re.compile(r'\[genome (.+)\]')
    m = c.search(header)
    if not m:
        raise Exception("Unknown genome in header %s" % header)
    return m.group(1)


chrs = ["segment_" + seg for seg in ['L', 'M', 'S']]
ds = GenomesDatasetMultiChrom(__name__,
                              __file__,
                              __spec__,
                              chrs,
                              seq_header_to_chr,
                              seq_header_to_genome=seq_header_to_genome)
ds.add_fasta_path("data/capim_orthobunyavirus.fasta.gz", relative=True)
sys.modules[__name__] = ds
Beispiel #6
0
from os import listdir
import sys

from catch.datasets import GenomesDatasetMultiChrom

__author__ = 'Hayden Metsky <*****@*****.**>'

chrs = ["segment_" + seg for seg in ['L', 'M', 'S']]


def seq_header_to_chr(header):
    import re
    c = re.compile(r'\[segment (L|M|S)\]')
    m = c.search(header)
    if not m:
        raise ValueError("Unknown segment in header %s" % header)
    seg = m.group(1)
    valid_segs = ['L', 'M', 'S']
    if seg not in valid_segs:
        raise ValueError("Unknown segment %s" % seg)
    return "segment_" + seg


ds = GenomesDatasetMultiChrom(__name__, __file__, __spec__, chrs,
                              seq_header_to_chr)

for f in listdir(join(dirname(__file__), "data/tete/")):
    ds.add_fasta_path("data/tete/" + f, relative=True)

sys.modules[__name__] = ds
Beispiel #7
0

def seq_header_to_chr(header):
    import re
    c = re.compile(r'\[segment (L|M|S)\]')
    m = c.search(header)
    if not m:
        raise Exception("Unknown or invalid segment in header %s" % header)
    seg = m.group(1)
    return "segment_" + seg


def seq_header_to_genome(header):
    import re
    c = re.compile(r'\[genome (.+)\]')
    m = c.search(header)
    if not m:
        raise Exception("Unknown genome in header %s" % header)
    return m.group(1)


chrs = ["segment_" + seg for seg in ['L', 'M', 'S']]
ds = GenomesDatasetMultiChrom(__name__,
                              __file__,
                              __spec__,
                              chrs,
                              seq_header_to_chr,
                              seq_header_to_genome=seq_header_to_genome)
ds.add_fasta_path("data/bhanja.fasta.gz", relative=True)
sys.modules[__name__] = ds
import sys

from catch.datasets import GenomesDatasetMultiChrom


def seq_header_to_chr(header):
    import re
    c = re.compile(r'\[segment (L|S)\]')
    m = c.search(header)
    if not m:
        raise Exception("Unknown or invalid segment in header %s" % header)
    seg = m.group(1)
    return "segment_" + seg

def seq_header_to_genome(header):
    import re
    c = re.compile(r'\[genome (.+)\]')
    m = c.search(header)
    if not m:
        raise Exception("Unknown genome in header %s" % header)
    return m.group(1)


chrs = ["segment_" + seg for seg in ['L', 'S']]
ds = GenomesDatasetMultiChrom(__name__, __file__, __spec__,
                              chrs, seq_header_to_chr,
                              seq_header_to_genome=seq_header_to_genome)
ds.add_fasta_path("data/serra_do_navio_mammarenavirus.fasta.gz", relative=True)
sys.modules[__name__] = ds
Beispiel #9
0

def seq_header_to_chr(header):
    import re
    c = re.compile(r'\[segment (L|S)\]')
    m = c.search(header)
    if not m:
        raise Exception("Unknown or invalid segment in header %s" % header)
    seg = m.group(1)
    return "segment_" + seg


def seq_header_to_genome(header):
    import re
    c = re.compile(r'\[genome (.+)\]')
    m = c.search(header)
    if not m:
        raise Exception("Unknown genome in header %s" % header)
    return m.group(1)


chrs = ["segment_" + seg for seg in ['L', 'S']]
ds = GenomesDatasetMultiChrom(__name__,
                              __file__,
                              __spec__,
                              chrs,
                              seq_header_to_chr,
                              seq_header_to_genome=seq_header_to_genome)
ds.add_fasta_path("data/mobala_mammarenavirus.fasta.gz", relative=True)
sys.modules[__name__] = ds
Beispiel #10
0
from os import listdir
import sys

from catch.datasets import GenomesDatasetMultiChrom

__author__ = 'Hayden Metsky <*****@*****.**>'

chrs = ["segment_" + seg for seg in ['L', 'M', 'S']]


def seq_header_to_chr(header):
    import re
    c = re.compile(r'\[segment (L|M|S)\]')
    m = c.search(header)
    if not m:
        raise ValueError("Unknown segment in header %s" % header)
    seg = m.group(1)
    valid_segs = ['L', 'M', 'S']
    if seg not in valid_segs:
        raise ValueError("Unknown segment %s" % seg)
    return "segment_" + seg


ds = GenomesDatasetMultiChrom(__name__, __file__, __spec__, chrs,
                              seq_header_to_chr)

for f in listdir(join(dirname(__file__), "data/guaroa/")):
    ds.add_fasta_path("data/guaroa/" + f, relative=True)

sys.modules[__name__] = ds
Beispiel #11
0

def seq_header_to_chr(header):
    import re
    c = re.compile(r'\[segment (1|2|3|4|5|6)\]')
    m = c.search(header)
    if not m:
        raise Exception("Unknown or invalid segment in header %s" % header)
    seg = m.group(1)
    return "segment_" + seg


def seq_header_to_genome(header):
    import re
    c = re.compile(r'\[genome (.+)\]')
    m = c.search(header)
    if not m:
        raise Exception("Unknown genome in header %s" % header)
    return m.group(1)


chrs = ["segment_" + seg for seg in ['1', '2', '3', '4', '5', '6']]
ds = GenomesDatasetMultiChrom(__name__,
                              __file__,
                              __spec__,
                              chrs,
                              seq_header_to_chr,
                              seq_header_to_genome=seq_header_to_genome)
ds.add_fasta_path("data/thogoto_thogotovirus.fasta.gz", relative=True)
sys.modules[__name__] = ds
import sys

from catch.datasets import GenomesDatasetMultiChrom


def seq_header_to_chr(header):
    import re
    c = re.compile(r'\[segment (L|S)\]')
    m = c.search(header)
    if not m:
        raise Exception("Unknown or invalid segment in header %s" % header)
    seg = m.group(1)
    return "segment_" + seg

def seq_header_to_genome(header):
    import re
    c = re.compile(r'\[genome (.+)\]')
    m = c.search(header)
    if not m:
        raise Exception("Unknown genome in header %s" % header)
    return m.group(1)


chrs = ["segment_" + seg for seg in ['L', 'S']]
ds = GenomesDatasetMultiChrom(__name__, __file__, __spec__,
                              chrs, seq_header_to_chr,
                              seq_header_to_genome=seq_header_to_genome)
ds.add_fasta_path("data/flexal_mammarenavirus.fasta.gz", relative=True)
sys.modules[__name__] = ds

def seq_header_to_chr(header):
    import re
    c = re.compile(r'\[segment (L|M|S)\]')
    m = c.search(header)
    if not m:
        raise Exception("Unknown or invalid segment in header %s" % header)
    seg = m.group(1)
    return "segment_" + seg


def seq_header_to_genome(header):
    import re
    c = re.compile(r'\[genome (.+)\]')
    m = c.search(header)
    if not m:
        raise Exception("Unknown genome in header %s" % header)
    return m.group(1)


chrs = ["segment_" + seg for seg in ['L', 'M', 'S']]
ds = GenomesDatasetMultiChrom(__name__,
                              __file__,
                              __spec__,
                              chrs,
                              seq_header_to_chr,
                              seq_header_to_genome=seq_header_to_genome)
ds.add_fasta_path("data/sathuperi_orthobunyavirus.fasta.gz", relative=True)
sys.modules[__name__] = ds

def seq_header_to_chr(header):
    import re
    c = re.compile(r'\[segment (L|S)\]')
    m = c.search(header)
    if not m:
        raise Exception("Unknown or invalid segment in header %s" % header)
    seg = m.group(1)
    return "segment_" + seg


def seq_header_to_genome(header):
    import re
    c = re.compile(r'\[genome (.+)\]')
    m = c.search(header)
    if not m:
        raise Exception("Unknown genome in header %s" % header)
    return m.group(1)


chrs = ["segment_" + seg for seg in ['L', 'S']]
ds = GenomesDatasetMultiChrom(__name__,
                              __file__,
                              __spec__,
                              chrs,
                              seq_header_to_chr,
                              seq_header_to_genome=seq_header_to_genome)
ds.add_fasta_path("data/guanarito_mammarenavirus.fasta.gz", relative=True)
sys.modules[__name__] = ds
Beispiel #15
0
from os.path import join
from os import listdir
import sys

from catch.datasets import GenomesDatasetMultiChrom

__author__ = 'Hayden Metsky <*****@*****.**>'

chrs = ["segment_" + seg for seg in ['L', 'S']]

def seq_header_to_chr(header):
    import re
    c = re.compile(r'\[segment (L|S)\]')
    m = c.search(header)
    if not m:
        raise ValueError("Unknown segment in header %s" % header)
    seg = m.group(1)
    valid_segs = ['L', 'S']
    if seg not in valid_segs:
        raise ValueError("Unknown segment %s" % seg)
    return "segment_" + seg


ds = GenomesDatasetMultiChrom(__name__, __file__, __spec__,
                              chrs, seq_header_to_chr)

for f in listdir(join(dirname(__file__), "data/bruges/")):
    ds.add_fasta_path("data/bruges/" + f, relative=True)

sys.modules[__name__] = ds
Beispiel #16
0

def seq_header_to_chr(header):
    import re
    c = re.compile(r'\[segment (L|M|S)\]')
    m = c.search(header)
    if not m:
        raise Exception("Unknown or invalid segment in header %s" % header)
    seg = m.group(1)
    return "segment_" + seg


def seq_header_to_genome(header):
    import re
    c = re.compile(r'\[genome (.+)\]')
    m = c.search(header)
    if not m:
        raise Exception("Unknown genome in header %s" % header)
    return m.group(1)


chrs = ["segment_" + seg for seg in ['L', 'M', 'S']]
ds = GenomesDatasetMultiChrom(__name__,
                              __file__,
                              __spec__,
                              chrs,
                              seq_header_to_chr,
                              seq_header_to_genome=seq_header_to_genome)
ds.add_fasta_path("data/thailand_orthohantavirus.fasta.gz", relative=True)
sys.modules[__name__] = ds
Beispiel #17
0

def seq_header_to_chr(header):
    import re
    c = re.compile(r'\[segment (L|S)\]')
    m = c.search(header)
    if not m:
        raise Exception("Unknown or invalid segment in header %s" % header)
    seg = m.group(1)
    return "segment_" + seg


def seq_header_to_genome(header):
    import re
    c = re.compile(r'\[genome (.+)\]')
    m = c.search(header)
    if not m:
        raise Exception("Unknown genome in header %s" % header)
    return m.group(1)


chrs = ["segment_" + seg for seg in ['L', 'S']]
ds = GenomesDatasetMultiChrom(__name__,
                              __file__,
                              __spec__,
                              chrs,
                              seq_header_to_chr,
                              seq_header_to_genome=seq_header_to_genome)
ds.add_fasta_path("data/california_reptarenavirus.fasta.gz", relative=True)
sys.modules[__name__] = ds
Beispiel #18
0
    c = re.compile(r'\[segment (1|2|3|4|5|6|7|8)\]')
    m = c.search(header)
    if not m:
        raise ValueError("Unknown segment in header %s" % header)
    seg = m.group(1)
    valid_segs = ['1', '2', '3', '4', '5', '6', '7', '8']
    if seg not in valid_segs:
        raise ValueError("Unknown segment %s" % seg)
    return "segment_" + seg


def seq_header_to_genome(header):
    import re
    c = re.compile(r'\[genome (.+)\]')
    m = c.search(header)
    return m.group(1)


ds = GenomesDatasetMultiChrom(__name__,
                              __file__,
                              __spec__,
                              chrs,
                              seq_header_to_chr,
                              seq_header_to_genome=seq_header_to_genome)

for seg in ['1', '2', '3', '4', '5', '6', '7', '8']:
    ds.add_fasta_path("data/influenza_a_segment" + seg + ".fasta.gz",
                      relative=True)

sys.modules[__name__] = ds
Beispiel #19
0

def seq_header_to_chr(header):
    import re
    c = re.compile(r'\[segment (L|S)\]')
    m = c.search(header)
    if not m:
        raise Exception("Unknown or invalid segment in header %s" % header)
    seg = m.group(1)
    return "segment_" + seg


def seq_header_to_genome(header):
    import re
    c = re.compile(r'\[genome (.+)\]')
    m = c.search(header)
    if not m:
        raise Exception("Unknown genome in header %s" % header)
    return m.group(1)


chrs = ["segment_" + seg for seg in ['L', 'S']]
ds = GenomesDatasetMultiChrom(__name__,
                              __file__,
                              __spec__,
                              chrs,
                              seq_header_to_chr,
                              seq_header_to_genome=seq_header_to_genome)
ds.add_fasta_path("data/ryukyu_mammarenavirus.fasta.gz", relative=True)
sys.modules[__name__] = ds
Beispiel #20
0
    import re
    c = re.compile(r'\[segment (1|10|11|2|3|4|5|6|7|8|9)\]')
    m = c.search(header)
    if not m:
        raise Exception("Unknown or invalid segment in header %s" % header)
    seg = m.group(1)
    return "segment_" + seg


def seq_header_to_genome(header):
    import re
    c = re.compile(r'\[genome (.+)\]')
    m = c.search(header)
    if not m:
        raise Exception("Unknown genome in header %s" % header)
    return m.group(1)


chrs = [
    "segment_" + seg
    for seg in ['1', '10', '11', '2', '3', '4', '5', '6', '7', '8', '9']
]
ds = GenomesDatasetMultiChrom(__name__,
                              __file__,
                              __spec__,
                              chrs,
                              seq_header_to_chr,
                              seq_header_to_genome=seq_header_to_genome)
ds.add_fasta_path("data/rotavirus_g.fasta.gz", relative=True)
sys.modules[__name__] = ds
Beispiel #21
0
from os.path import join
from os import listdir
import sys

from catch.datasets import GenomesDatasetMultiChrom

__author__ = 'Hayden Metsky <*****@*****.**>'

chrs = ["segment_" + seg for seg in ['L', 'M', 'S']]

def seq_header_to_chr(header):
    import re
    c = re.compile(r'\[segment (L|M|S)\]')
    m = c.search(header)
    if not m:
        raise ValueError("Unknown segment in header %s" % header)
    seg = m.group(1)
    valid_segs = ['L', 'M', 'S']
    if seg not in valid_segs:
        raise ValueError("Unknown segment %s" % seg)
    return "segment_" + seg


ds = GenomesDatasetMultiChrom(__name__, __file__, __spec__,
                              chrs, seq_header_to_chr)

for f in listdir(join(dirname(__file__), "data/thottapalayam/")):
    ds.add_fasta_path("data/thottapalayam/" + f, relative=True)

sys.modules[__name__] = ds
Beispiel #22
0
__author__ = 'Hayden Metsky <*****@*****.**>'

chrs = ["segment_" + seg for seg in ['L', 'M', 'S']]

def seq_header_to_chr(header):
    import re
    c = re.compile(r'\[segment (L|M|S)\]')
    m = c.search(header)
    if not m:
        raise ValueError("Unknown segment in header %s" % header)
    seg = m.group(1)
    valid_segs = ['L', 'M', 'S']
    if seg not in valid_segs:
        raise ValueError("Unknown segment %s" % seg)
    return "segment_" + seg

def seq_header_to_genome(header):
    import re
    c = re.compile(r'\[genome (.+)\]')
    m = c.search(header)
    return m.group(1)


ds = GenomesDatasetMultiChrom(__name__, __file__, __spec__,
                              chrs, seq_header_to_chr,
                              seq_header_to_genome=seq_header_to_genome)

ds.add_fasta_path("data/rift_valley_fever.fasta", relative=True)

sys.modules[__name__] = ds
Beispiel #23
0
from os import listdir
import sys

from catch.datasets import GenomesDatasetMultiChrom

__author__ = 'Hayden Metsky <*****@*****.**>'

chrs = ["segment_" + seg for seg in ['L', 'S']]


def seq_header_to_chr(header):
    import re
    c = re.compile(r'\[segment (L|S)\]')
    m = c.search(header)
    if not m:
        raise ValueError("Unknown segment in header %s" % header)
    seg = m.group(1)
    valid_segs = ['L', 'S']
    if seg not in valid_segs:
        raise ValueError("Unknown segment %s" % seg)
    return "segment_" + seg


ds = GenomesDatasetMultiChrom(__name__, __file__, __spec__, chrs,
                              seq_header_to_chr)

for f in listdir(join(dirname(__file__), "data/chapare/")):
    ds.add_fasta_path("data/chapare/" + f, relative=True)

sys.modules[__name__] = ds
Beispiel #24
0

def seq_header_to_chr(header):
    import re
    c = re.compile(r'\[segment (L|M|S)\]')
    m = c.search(header)
    if not m:
        raise Exception("Unknown or invalid segment in header %s" % header)
    seg = m.group(1)
    return "segment_" + seg


def seq_header_to_genome(header):
    import re
    c = re.compile(r'\[genome (.+)\]')
    m = c.search(header)
    if not m:
        raise Exception("Unknown genome in header %s" % header)
    return m.group(1)


chrs = ["segment_" + seg for seg in ['L', 'M', 'S']]
ds = GenomesDatasetMultiChrom(__name__,
                              __file__,
                              __spec__,
                              chrs,
                              seq_header_to_chr,
                              seq_header_to_genome=seq_header_to_genome)
ds.add_fasta_path("data/imjin_orthohantavirus.fasta.gz", relative=True)
sys.modules[__name__] = ds
Beispiel #25
0
from os import listdir
import sys

from catch.datasets import GenomesDatasetMultiChrom

__author__ = 'Hayden Metsky <*****@*****.**>'

chrs = ["segment_" + seg for seg in ['L', 'M', 'S']]


def seq_header_to_chr(header):
    import re
    c = re.compile(r'\[segment (L|M|S)\]')
    m = c.search(header)
    if not m:
        raise ValueError("Unknown segment in header %s" % header)
    seg = m.group(1)
    valid_segs = ['L', 'M', 'S']
    if seg not in valid_segs:
        raise ValueError("Unknown segment %s" % seg)
    return "segment_" + seg


ds = GenomesDatasetMultiChrom(__name__, __file__, __spec__, chrs,
                              seq_header_to_chr)

for f in listdir(join(dirname(__file__), "data/shamonda/")):
    ds.add_fasta_path("data/shamonda/" + f, relative=True)

sys.modules[__name__] = ds
Beispiel #26
0
import sys

from catch.datasets import GenomesDatasetMultiChrom


def seq_header_to_chr(header):
    import re
    c = re.compile(r'\[segment (L|M|S)\]')
    m = c.search(header)
    if not m:
        raise Exception("Unknown or invalid segment in header %s" % header)
    seg = m.group(1)
    return "segment_" + seg

def seq_header_to_genome(header):
    import re
    c = re.compile(r'\[genome (.+)\]')
    m = c.search(header)
    if not m:
        raise Exception("Unknown genome in header %s" % header)
    return m.group(1)


chrs = ["segment_" + seg for seg in ['L', 'M', 'S']]
ds = GenomesDatasetMultiChrom(__name__, __file__, __spec__,
                              chrs, seq_header_to_chr,
                              seq_header_to_genome=seq_header_to_genome)
ds.add_fasta_path("data/arumowot.fasta.gz", relative=True)
sys.modules[__name__] = ds
Beispiel #27
0
import sys

from catch.datasets import GenomesDatasetMultiChrom


def seq_header_to_chr(header):
    import re
    c = re.compile(r'\[segment (L|S)\]')
    m = c.search(header)
    if not m:
        raise Exception("Unknown or invalid segment in header %s" % header)
    seg = m.group(1)
    return "segment_" + seg

def seq_header_to_genome(header):
    import re
    c = re.compile(r'\[genome (.+)\]')
    m = c.search(header)
    if not m:
        raise Exception("Unknown genome in header %s" % header)
    return m.group(1)


chrs = ["segment_" + seg for seg in ['L', 'S']]
ds = GenomesDatasetMultiChrom(__name__, __file__, __spec__,
                              chrs, seq_header_to_chr,
                              seq_header_to_genome=seq_header_to_genome)
ds.add_fasta_path("data/whitewater_arroyo_mammarenavirus.fasta.gz", relative=True)
sys.modules[__name__] = ds
Beispiel #28
0
import sys

from catch.datasets import GenomesDatasetMultiChrom


def seq_header_to_chr(header):
    import re
    c = re.compile(r'\[segment (L|M|S)\]')
    m = c.search(header)
    if not m:
        raise Exception("Unknown or invalid segment in header %s" % header)
    seg = m.group(1)
    return "segment_" + seg

def seq_header_to_genome(header):
    import re
    c = re.compile(r'\[genome (.+)\]')
    m = c.search(header)
    if not m:
        raise Exception("Unknown genome in header %s" % header)
    return m.group(1)


chrs = ["segment_" + seg for seg in ['L', 'M', 'S']]
ds = GenomesDatasetMultiChrom(__name__, __file__, __spec__,
                              chrs, seq_header_to_chr,
                              seq_header_to_genome=seq_header_to_genome)
ds.add_fasta_path("data/ambe.fasta.gz", relative=True)
sys.modules[__name__] = ds
Beispiel #29
0
from os import listdir
import sys

from catch.datasets import GenomesDatasetMultiChrom

__author__ = 'Hayden Metsky <*****@*****.**>'

chrs = ["segment_" + seg for seg in ['L', 'S']]


def seq_header_to_chr(header):
    import re
    c = re.compile(r'\[segment (L|S)\]')
    m = c.search(header)
    if not m:
        raise ValueError("Unknown segment in header %s" % header)
    seg = m.group(1)
    valid_segs = ['L', 'S']
    if seg not in valid_segs:
        raise ValueError("Unknown segment %s" % seg)
    return "segment_" + seg


ds = GenomesDatasetMultiChrom(__name__, __file__, __spec__, chrs,
                              seq_header_to_chr)

for f in listdir(join(dirname(__file__), "data/pirital/")):
    ds.add_fasta_path("data/pirital/" + f, relative=True)

sys.modules[__name__] = ds
Beispiel #30
0
def seq_header_to_chr(header):
    import re
    c = re.compile(r'\[segment (L|M|S)\]')
    m = c.search(header)
    if not m:
        raise ValueError("Unknown segment in header %s" % header)
    seg = m.group(1)
    valid_segs = ['L', 'M', 'S']
    if seg not in valid_segs:
        raise ValueError("Unknown segment %s" % seg)
    return "segment_" + seg


def seq_header_to_genome(header):
    import re
    c = re.compile(r'\[genome (.+)\]')
    m = c.search(header)
    return m.group(1)


ds = GenomesDatasetMultiChrom(__name__,
                              __file__,
                              __spec__,
                              chrs,
                              seq_header_to_chr,
                              seq_header_to_genome=seq_header_to_genome)

ds.add_fasta_path("data/seoul.fasta", relative=True)

sys.modules[__name__] = ds