Esempio n. 1
0

def seq_header_to_chr(header):
    import re
    c = re.compile(r'\[segment (1|2)\]')
    m = c.search(header)
    if not m:
        raise Exception("Unknown or invalid segment in header %s" % header)
    seg = m.group(1)
    return "segment_" + seg


def seq_header_to_genome(header):
    import re
    c = re.compile(r'\[genome (.+)\]')
    m = c.search(header)
    if not m:
        raise Exception("Unknown genome in header %s" % header)
    return m.group(1)


chrs = ["segment_" + seg for seg in ['1', '2']]
ds = GenomesDatasetMultiChrom(__name__,
                              __file__,
                              __spec__,
                              chrs,
                              seq_header_to_chr,
                              seq_header_to_genome=seq_header_to_genome)
ds.add_fasta_path("data/human_picobirnavirus.fasta.gz", relative=True)
sys.modules[__name__] = ds
Esempio n. 2
0

def seq_header_to_chr(header):
    import re
    c = re.compile(r'\[segment (L|M|S)\]')
    m = c.search(header)
    if not m:
        raise Exception("Unknown or invalid segment in header %s" % header)
    seg = m.group(1)
    return "segment_" + seg


def seq_header_to_genome(header):
    import re
    c = re.compile(r'\[genome (.+)\]')
    m = c.search(header)
    if not m:
        raise Exception("Unknown genome in header %s" % header)
    return m.group(1)


chrs = ["segment_" + seg for seg in ['L', 'M', 'S']]
ds = GenomesDatasetMultiChrom(__name__,
                              __file__,
                              __spec__,
                              chrs,
                              seq_header_to_chr,
                              seq_header_to_genome=seq_header_to_genome)
ds.add_fasta_path("data/anhanga.fasta.gz", relative=True)
sys.modules[__name__] = ds
Esempio n. 3
0
from os import listdir
import sys

from catch.datasets import GenomesDatasetMultiChrom

__author__ = 'Hayden Metsky <*****@*****.**>'

chrs = ["segment_" + seg for seg in ['L', 'M', 'S']]


def seq_header_to_chr(header):
    import re
    c = re.compile(r'\[segment (L|M|S)\]')
    m = c.search(header)
    if not m:
        raise ValueError("Unknown segment in header %s" % header)
    seg = m.group(1)
    valid_segs = ['L', 'M', 'S']
    if seg not in valid_segs:
        raise ValueError("Unknown segment %s" % seg)
    return "segment_" + seg


ds = GenomesDatasetMultiChrom(__name__, __file__, __spec__, chrs,
                              seq_header_to_chr)

for f in listdir(join(dirname(__file__), "data/fugong/")):
    ds.add_fasta_path("data/fugong/" + f, relative=True)

sys.modules[__name__] = ds
Esempio n. 4
0
import sys

from catch.datasets import GenomesDatasetMultiChrom


def seq_header_to_chr(header):
    import re
    c = re.compile(r'\[segment (1|10|11|12|2|3|4|5|6|7|8|9)\]')
    m = c.search(header)
    if not m:
        raise Exception("Unknown or invalid segment in header %s" % header)
    seg = m.group(1)
    return "segment_" + seg

def seq_header_to_genome(header):
    import re
    c = re.compile(r'\[genome (.+)\]')
    m = c.search(header)
    if not m:
        raise Exception("Unknown genome in header %s" % header)
    return m.group(1)


chrs = ["segment_" + seg for seg in ['1', '10', '11', '12', '2', '3', '4', '5', '6', '7', '8', '9']]
ds = GenomesDatasetMultiChrom(__name__, __file__, __spec__,
                              chrs, seq_header_to_chr,
                              seq_header_to_genome=seq_header_to_genome)
ds.add_fasta_path("data/colorado_tick_fever.fasta.gz", relative=True)
sys.modules[__name__] = ds
Esempio n. 5
0

def seq_header_to_chr(header):
    import re
    c = re.compile(r'\[segment (L|M|S)\]')
    m = c.search(header)
    if not m:
        raise Exception("Unknown or invalid segment in header %s" % header)
    seg = m.group(1)
    return "segment_" + seg


def seq_header_to_genome(header):
    import re
    c = re.compile(r'\[genome (.+)\]')
    m = c.search(header)
    if not m:
        raise Exception("Unknown genome in header %s" % header)
    return m.group(1)


chrs = ["segment_" + seg for seg in ['L', 'M', 'S']]
ds = GenomesDatasetMultiChrom(__name__,
                              __file__,
                              __spec__,
                              chrs,
                              seq_header_to_chr,
                              seq_header_to_genome=seq_header_to_genome)
ds.add_fasta_path("data/capim_orthobunyavirus.fasta.gz", relative=True)
sys.modules[__name__] = ds
Esempio n. 6
0
from os import listdir
import sys

from catch.datasets import GenomesDatasetMultiChrom

__author__ = 'Hayden Metsky <*****@*****.**>'

chrs = ["segment_" + seg for seg in ['L', 'M', 'S']]


def seq_header_to_chr(header):
    import re
    c = re.compile(r'\[segment (L|M|S)\]')
    m = c.search(header)
    if not m:
        raise ValueError("Unknown segment in header %s" % header)
    seg = m.group(1)
    valid_segs = ['L', 'M', 'S']
    if seg not in valid_segs:
        raise ValueError("Unknown segment %s" % seg)
    return "segment_" + seg


ds = GenomesDatasetMultiChrom(__name__, __file__, __spec__, chrs,
                              seq_header_to_chr)

for f in listdir(join(dirname(__file__), "data/tete/")):
    ds.add_fasta_path("data/tete/" + f, relative=True)

sys.modules[__name__] = ds
Esempio n. 7
0

def seq_header_to_chr(header):
    import re
    c = re.compile(r'\[segment (L|M|S)\]')
    m = c.search(header)
    if not m:
        raise Exception("Unknown or invalid segment in header %s" % header)
    seg = m.group(1)
    return "segment_" + seg


def seq_header_to_genome(header):
    import re
    c = re.compile(r'\[genome (.+)\]')
    m = c.search(header)
    if not m:
        raise Exception("Unknown genome in header %s" % header)
    return m.group(1)


chrs = ["segment_" + seg for seg in ['L', 'M', 'S']]
ds = GenomesDatasetMultiChrom(__name__,
                              __file__,
                              __spec__,
                              chrs,
                              seq_header_to_chr,
                              seq_header_to_genome=seq_header_to_genome)
ds.add_fasta_path("data/bhanja.fasta.gz", relative=True)
sys.modules[__name__] = ds
import sys

from catch.datasets import GenomesDatasetMultiChrom


def seq_header_to_chr(header):
    import re
    c = re.compile(r'\[segment (L|S)\]')
    m = c.search(header)
    if not m:
        raise Exception("Unknown or invalid segment in header %s" % header)
    seg = m.group(1)
    return "segment_" + seg

def seq_header_to_genome(header):
    import re
    c = re.compile(r'\[genome (.+)\]')
    m = c.search(header)
    if not m:
        raise Exception("Unknown genome in header %s" % header)
    return m.group(1)


chrs = ["segment_" + seg for seg in ['L', 'S']]
ds = GenomesDatasetMultiChrom(__name__, __file__, __spec__,
                              chrs, seq_header_to_chr,
                              seq_header_to_genome=seq_header_to_genome)
ds.add_fasta_path("data/serra_do_navio_mammarenavirus.fasta.gz", relative=True)
sys.modules[__name__] = ds
Esempio n. 9
0

def seq_header_to_chr(header):
    import re
    c = re.compile(r'\[segment (L|S)\]')
    m = c.search(header)
    if not m:
        raise Exception("Unknown or invalid segment in header %s" % header)
    seg = m.group(1)
    return "segment_" + seg


def seq_header_to_genome(header):
    import re
    c = re.compile(r'\[genome (.+)\]')
    m = c.search(header)
    if not m:
        raise Exception("Unknown genome in header %s" % header)
    return m.group(1)


chrs = ["segment_" + seg for seg in ['L', 'S']]
ds = GenomesDatasetMultiChrom(__name__,
                              __file__,
                              __spec__,
                              chrs,
                              seq_header_to_chr,
                              seq_header_to_genome=seq_header_to_genome)
ds.add_fasta_path("data/mobala_mammarenavirus.fasta.gz", relative=True)
sys.modules[__name__] = ds
Esempio n. 10
0
from os import listdir
import sys

from catch.datasets import GenomesDatasetMultiChrom

__author__ = 'Hayden Metsky <*****@*****.**>'

chrs = ["segment_" + seg for seg in ['L', 'M', 'S']]


def seq_header_to_chr(header):
    import re
    c = re.compile(r'\[segment (L|M|S)\]')
    m = c.search(header)
    if not m:
        raise ValueError("Unknown segment in header %s" % header)
    seg = m.group(1)
    valid_segs = ['L', 'M', 'S']
    if seg not in valid_segs:
        raise ValueError("Unknown segment %s" % seg)
    return "segment_" + seg


ds = GenomesDatasetMultiChrom(__name__, __file__, __spec__, chrs,
                              seq_header_to_chr)

for f in listdir(join(dirname(__file__), "data/guaroa/")):
    ds.add_fasta_path("data/guaroa/" + f, relative=True)

sys.modules[__name__] = ds
Esempio n. 11
0

def seq_header_to_chr(header):
    import re
    c = re.compile(r'\[segment (1|2|3|4|5|6)\]')
    m = c.search(header)
    if not m:
        raise Exception("Unknown or invalid segment in header %s" % header)
    seg = m.group(1)
    return "segment_" + seg


def seq_header_to_genome(header):
    import re
    c = re.compile(r'\[genome (.+)\]')
    m = c.search(header)
    if not m:
        raise Exception("Unknown genome in header %s" % header)
    return m.group(1)


chrs = ["segment_" + seg for seg in ['1', '2', '3', '4', '5', '6']]
ds = GenomesDatasetMultiChrom(__name__,
                              __file__,
                              __spec__,
                              chrs,
                              seq_header_to_chr,
                              seq_header_to_genome=seq_header_to_genome)
ds.add_fasta_path("data/thogoto_thogotovirus.fasta.gz", relative=True)
sys.modules[__name__] = ds
Esempio n. 12
0
import sys

from catch.datasets import GenomesDatasetMultiChrom


def seq_header_to_chr(header):
    import re
    c = re.compile(r'\[segment (L|S)\]')
    m = c.search(header)
    if not m:
        raise Exception("Unknown or invalid segment in header %s" % header)
    seg = m.group(1)
    return "segment_" + seg

def seq_header_to_genome(header):
    import re
    c = re.compile(r'\[genome (.+)\]')
    m = c.search(header)
    if not m:
        raise Exception("Unknown genome in header %s" % header)
    return m.group(1)


chrs = ["segment_" + seg for seg in ['L', 'S']]
ds = GenomesDatasetMultiChrom(__name__, __file__, __spec__,
                              chrs, seq_header_to_chr,
                              seq_header_to_genome=seq_header_to_genome)
ds.add_fasta_path("data/flexal_mammarenavirus.fasta.gz", relative=True)
sys.modules[__name__] = ds
Esempio n. 13
0

def seq_header_to_chr(header):
    import re
    c = re.compile(r'\[segment (L|M|S)\]')
    m = c.search(header)
    if not m:
        raise Exception("Unknown or invalid segment in header %s" % header)
    seg = m.group(1)
    return "segment_" + seg


def seq_header_to_genome(header):
    import re
    c = re.compile(r'\[genome (.+)\]')
    m = c.search(header)
    if not m:
        raise Exception("Unknown genome in header %s" % header)
    return m.group(1)


chrs = ["segment_" + seg for seg in ['L', 'M', 'S']]
ds = GenomesDatasetMultiChrom(__name__,
                              __file__,
                              __spec__,
                              chrs,
                              seq_header_to_chr,
                              seq_header_to_genome=seq_header_to_genome)
ds.add_fasta_path("data/sathuperi_orthobunyavirus.fasta.gz", relative=True)
sys.modules[__name__] = ds
Esempio n. 14
0

def seq_header_to_chr(header):
    import re
    c = re.compile(r'\[segment (L|S)\]')
    m = c.search(header)
    if not m:
        raise Exception("Unknown or invalid segment in header %s" % header)
    seg = m.group(1)
    return "segment_" + seg


def seq_header_to_genome(header):
    import re
    c = re.compile(r'\[genome (.+)\]')
    m = c.search(header)
    if not m:
        raise Exception("Unknown genome in header %s" % header)
    return m.group(1)


chrs = ["segment_" + seg for seg in ['L', 'S']]
ds = GenomesDatasetMultiChrom(__name__,
                              __file__,
                              __spec__,
                              chrs,
                              seq_header_to_chr,
                              seq_header_to_genome=seq_header_to_genome)
ds.add_fasta_path("data/guanarito_mammarenavirus.fasta.gz", relative=True)
sys.modules[__name__] = ds
Esempio n. 15
0
from os.path import join
from os import listdir
import sys

from catch.datasets import GenomesDatasetMultiChrom

__author__ = 'Hayden Metsky <*****@*****.**>'

chrs = ["segment_" + seg for seg in ['L', 'S']]

def seq_header_to_chr(header):
    import re
    c = re.compile(r'\[segment (L|S)\]')
    m = c.search(header)
    if not m:
        raise ValueError("Unknown segment in header %s" % header)
    seg = m.group(1)
    valid_segs = ['L', 'S']
    if seg not in valid_segs:
        raise ValueError("Unknown segment %s" % seg)
    return "segment_" + seg


ds = GenomesDatasetMultiChrom(__name__, __file__, __spec__,
                              chrs, seq_header_to_chr)

for f in listdir(join(dirname(__file__), "data/bruges/")):
    ds.add_fasta_path("data/bruges/" + f, relative=True)

sys.modules[__name__] = ds
Esempio n. 16
0

def seq_header_to_chr(header):
    import re
    c = re.compile(r'\[segment (L|M|S)\]')
    m = c.search(header)
    if not m:
        raise Exception("Unknown or invalid segment in header %s" % header)
    seg = m.group(1)
    return "segment_" + seg


def seq_header_to_genome(header):
    import re
    c = re.compile(r'\[genome (.+)\]')
    m = c.search(header)
    if not m:
        raise Exception("Unknown genome in header %s" % header)
    return m.group(1)


chrs = ["segment_" + seg for seg in ['L', 'M', 'S']]
ds = GenomesDatasetMultiChrom(__name__,
                              __file__,
                              __spec__,
                              chrs,
                              seq_header_to_chr,
                              seq_header_to_genome=seq_header_to_genome)
ds.add_fasta_path("data/thailand_orthohantavirus.fasta.gz", relative=True)
sys.modules[__name__] = ds
Esempio n. 17
0

def seq_header_to_chr(header):
    import re
    c = re.compile(r'\[segment (L|S)\]')
    m = c.search(header)
    if not m:
        raise Exception("Unknown or invalid segment in header %s" % header)
    seg = m.group(1)
    return "segment_" + seg


def seq_header_to_genome(header):
    import re
    c = re.compile(r'\[genome (.+)\]')
    m = c.search(header)
    if not m:
        raise Exception("Unknown genome in header %s" % header)
    return m.group(1)


chrs = ["segment_" + seg for seg in ['L', 'S']]
ds = GenomesDatasetMultiChrom(__name__,
                              __file__,
                              __spec__,
                              chrs,
                              seq_header_to_chr,
                              seq_header_to_genome=seq_header_to_genome)
ds.add_fasta_path("data/california_reptarenavirus.fasta.gz", relative=True)
sys.modules[__name__] = ds
Esempio n. 18
0
    c = re.compile(r'\[segment (1|2|3|4|5|6|7|8)\]')
    m = c.search(header)
    if not m:
        raise ValueError("Unknown segment in header %s" % header)
    seg = m.group(1)
    valid_segs = ['1', '2', '3', '4', '5', '6', '7', '8']
    if seg not in valid_segs:
        raise ValueError("Unknown segment %s" % seg)
    return "segment_" + seg


def seq_header_to_genome(header):
    import re
    c = re.compile(r'\[genome (.+)\]')
    m = c.search(header)
    return m.group(1)


ds = GenomesDatasetMultiChrom(__name__,
                              __file__,
                              __spec__,
                              chrs,
                              seq_header_to_chr,
                              seq_header_to_genome=seq_header_to_genome)

for seg in ['1', '2', '3', '4', '5', '6', '7', '8']:
    ds.add_fasta_path("data/influenza_a_segment" + seg + ".fasta.gz",
                      relative=True)

sys.modules[__name__] = ds
Esempio n. 19
0

def seq_header_to_chr(header):
    import re
    c = re.compile(r'\[segment (L|S)\]')
    m = c.search(header)
    if not m:
        raise Exception("Unknown or invalid segment in header %s" % header)
    seg = m.group(1)
    return "segment_" + seg


def seq_header_to_genome(header):
    import re
    c = re.compile(r'\[genome (.+)\]')
    m = c.search(header)
    if not m:
        raise Exception("Unknown genome in header %s" % header)
    return m.group(1)


chrs = ["segment_" + seg for seg in ['L', 'S']]
ds = GenomesDatasetMultiChrom(__name__,
                              __file__,
                              __spec__,
                              chrs,
                              seq_header_to_chr,
                              seq_header_to_genome=seq_header_to_genome)
ds.add_fasta_path("data/ryukyu_mammarenavirus.fasta.gz", relative=True)
sys.modules[__name__] = ds
Esempio n. 20
0
    import re
    c = re.compile(r'\[segment (1|10|11|2|3|4|5|6|7|8|9)\]')
    m = c.search(header)
    if not m:
        raise Exception("Unknown or invalid segment in header %s" % header)
    seg = m.group(1)
    return "segment_" + seg


def seq_header_to_genome(header):
    import re
    c = re.compile(r'\[genome (.+)\]')
    m = c.search(header)
    if not m:
        raise Exception("Unknown genome in header %s" % header)
    return m.group(1)


chrs = [
    "segment_" + seg
    for seg in ['1', '10', '11', '2', '3', '4', '5', '6', '7', '8', '9']
]
ds = GenomesDatasetMultiChrom(__name__,
                              __file__,
                              __spec__,
                              chrs,
                              seq_header_to_chr,
                              seq_header_to_genome=seq_header_to_genome)
ds.add_fasta_path("data/rotavirus_g.fasta.gz", relative=True)
sys.modules[__name__] = ds
Esempio n. 21
0
from os.path import join
from os import listdir
import sys

from catch.datasets import GenomesDatasetMultiChrom

__author__ = 'Hayden Metsky <*****@*****.**>'

chrs = ["segment_" + seg for seg in ['L', 'M', 'S']]

def seq_header_to_chr(header):
    import re
    c = re.compile(r'\[segment (L|M|S)\]')
    m = c.search(header)
    if not m:
        raise ValueError("Unknown segment in header %s" % header)
    seg = m.group(1)
    valid_segs = ['L', 'M', 'S']
    if seg not in valid_segs:
        raise ValueError("Unknown segment %s" % seg)
    return "segment_" + seg


ds = GenomesDatasetMultiChrom(__name__, __file__, __spec__,
                              chrs, seq_header_to_chr)

for f in listdir(join(dirname(__file__), "data/thottapalayam/")):
    ds.add_fasta_path("data/thottapalayam/" + f, relative=True)

sys.modules[__name__] = ds
Esempio n. 22
0
__author__ = 'Hayden Metsky <*****@*****.**>'

chrs = ["segment_" + seg for seg in ['L', 'M', 'S']]

def seq_header_to_chr(header):
    import re
    c = re.compile(r'\[segment (L|M|S)\]')
    m = c.search(header)
    if not m:
        raise ValueError("Unknown segment in header %s" % header)
    seg = m.group(1)
    valid_segs = ['L', 'M', 'S']
    if seg not in valid_segs:
        raise ValueError("Unknown segment %s" % seg)
    return "segment_" + seg

def seq_header_to_genome(header):
    import re
    c = re.compile(r'\[genome (.+)\]')
    m = c.search(header)
    return m.group(1)


ds = GenomesDatasetMultiChrom(__name__, __file__, __spec__,
                              chrs, seq_header_to_chr,
                              seq_header_to_genome=seq_header_to_genome)

ds.add_fasta_path("data/rift_valley_fever.fasta", relative=True)

sys.modules[__name__] = ds
Esempio n. 23
0
from os import listdir
import sys

from catch.datasets import GenomesDatasetMultiChrom

__author__ = 'Hayden Metsky <*****@*****.**>'

chrs = ["segment_" + seg for seg in ['L', 'S']]


def seq_header_to_chr(header):
    import re
    c = re.compile(r'\[segment (L|S)\]')
    m = c.search(header)
    if not m:
        raise ValueError("Unknown segment in header %s" % header)
    seg = m.group(1)
    valid_segs = ['L', 'S']
    if seg not in valid_segs:
        raise ValueError("Unknown segment %s" % seg)
    return "segment_" + seg


ds = GenomesDatasetMultiChrom(__name__, __file__, __spec__, chrs,
                              seq_header_to_chr)

for f in listdir(join(dirname(__file__), "data/chapare/")):
    ds.add_fasta_path("data/chapare/" + f, relative=True)

sys.modules[__name__] = ds
Esempio n. 24
0

def seq_header_to_chr(header):
    import re
    c = re.compile(r'\[segment (L|M|S)\]')
    m = c.search(header)
    if not m:
        raise Exception("Unknown or invalid segment in header %s" % header)
    seg = m.group(1)
    return "segment_" + seg


def seq_header_to_genome(header):
    import re
    c = re.compile(r'\[genome (.+)\]')
    m = c.search(header)
    if not m:
        raise Exception("Unknown genome in header %s" % header)
    return m.group(1)


chrs = ["segment_" + seg for seg in ['L', 'M', 'S']]
ds = GenomesDatasetMultiChrom(__name__,
                              __file__,
                              __spec__,
                              chrs,
                              seq_header_to_chr,
                              seq_header_to_genome=seq_header_to_genome)
ds.add_fasta_path("data/imjin_orthohantavirus.fasta.gz", relative=True)
sys.modules[__name__] = ds
Esempio n. 25
0
from os import listdir
import sys

from catch.datasets import GenomesDatasetMultiChrom

__author__ = 'Hayden Metsky <*****@*****.**>'

chrs = ["segment_" + seg for seg in ['L', 'M', 'S']]


def seq_header_to_chr(header):
    import re
    c = re.compile(r'\[segment (L|M|S)\]')
    m = c.search(header)
    if not m:
        raise ValueError("Unknown segment in header %s" % header)
    seg = m.group(1)
    valid_segs = ['L', 'M', 'S']
    if seg not in valid_segs:
        raise ValueError("Unknown segment %s" % seg)
    return "segment_" + seg


ds = GenomesDatasetMultiChrom(__name__, __file__, __spec__, chrs,
                              seq_header_to_chr)

for f in listdir(join(dirname(__file__), "data/shamonda/")):
    ds.add_fasta_path("data/shamonda/" + f, relative=True)

sys.modules[__name__] = ds
Esempio n. 26
0
import sys

from catch.datasets import GenomesDatasetMultiChrom


def seq_header_to_chr(header):
    import re
    c = re.compile(r'\[segment (L|M|S)\]')
    m = c.search(header)
    if not m:
        raise Exception("Unknown or invalid segment in header %s" % header)
    seg = m.group(1)
    return "segment_" + seg

def seq_header_to_genome(header):
    import re
    c = re.compile(r'\[genome (.+)\]')
    m = c.search(header)
    if not m:
        raise Exception("Unknown genome in header %s" % header)
    return m.group(1)


chrs = ["segment_" + seg for seg in ['L', 'M', 'S']]
ds = GenomesDatasetMultiChrom(__name__, __file__, __spec__,
                              chrs, seq_header_to_chr,
                              seq_header_to_genome=seq_header_to_genome)
ds.add_fasta_path("data/arumowot.fasta.gz", relative=True)
sys.modules[__name__] = ds
Esempio n. 27
0
import sys

from catch.datasets import GenomesDatasetMultiChrom


def seq_header_to_chr(header):
    import re
    c = re.compile(r'\[segment (L|S)\]')
    m = c.search(header)
    if not m:
        raise Exception("Unknown or invalid segment in header %s" % header)
    seg = m.group(1)
    return "segment_" + seg

def seq_header_to_genome(header):
    import re
    c = re.compile(r'\[genome (.+)\]')
    m = c.search(header)
    if not m:
        raise Exception("Unknown genome in header %s" % header)
    return m.group(1)


chrs = ["segment_" + seg for seg in ['L', 'S']]
ds = GenomesDatasetMultiChrom(__name__, __file__, __spec__,
                              chrs, seq_header_to_chr,
                              seq_header_to_genome=seq_header_to_genome)
ds.add_fasta_path("data/whitewater_arroyo_mammarenavirus.fasta.gz", relative=True)
sys.modules[__name__] = ds
Esempio n. 28
0
import sys

from catch.datasets import GenomesDatasetMultiChrom


def seq_header_to_chr(header):
    import re
    c = re.compile(r'\[segment (L|M|S)\]')
    m = c.search(header)
    if not m:
        raise Exception("Unknown or invalid segment in header %s" % header)
    seg = m.group(1)
    return "segment_" + seg

def seq_header_to_genome(header):
    import re
    c = re.compile(r'\[genome (.+)\]')
    m = c.search(header)
    if not m:
        raise Exception("Unknown genome in header %s" % header)
    return m.group(1)


chrs = ["segment_" + seg for seg in ['L', 'M', 'S']]
ds = GenomesDatasetMultiChrom(__name__, __file__, __spec__,
                              chrs, seq_header_to_chr,
                              seq_header_to_genome=seq_header_to_genome)
ds.add_fasta_path("data/ambe.fasta.gz", relative=True)
sys.modules[__name__] = ds
Esempio n. 29
0
from os import listdir
import sys

from catch.datasets import GenomesDatasetMultiChrom

__author__ = 'Hayden Metsky <*****@*****.**>'

chrs = ["segment_" + seg for seg in ['L', 'S']]


def seq_header_to_chr(header):
    import re
    c = re.compile(r'\[segment (L|S)\]')
    m = c.search(header)
    if not m:
        raise ValueError("Unknown segment in header %s" % header)
    seg = m.group(1)
    valid_segs = ['L', 'S']
    if seg not in valid_segs:
        raise ValueError("Unknown segment %s" % seg)
    return "segment_" + seg


ds = GenomesDatasetMultiChrom(__name__, __file__, __spec__, chrs,
                              seq_header_to_chr)

for f in listdir(join(dirname(__file__), "data/pirital/")):
    ds.add_fasta_path("data/pirital/" + f, relative=True)

sys.modules[__name__] = ds
Esempio n. 30
0
def seq_header_to_chr(header):
    import re
    c = re.compile(r'\[segment (L|M|S)\]')
    m = c.search(header)
    if not m:
        raise ValueError("Unknown segment in header %s" % header)
    seg = m.group(1)
    valid_segs = ['L', 'M', 'S']
    if seg not in valid_segs:
        raise ValueError("Unknown segment %s" % seg)
    return "segment_" + seg


def seq_header_to_genome(header):
    import re
    c = re.compile(r'\[genome (.+)\]')
    m = c.search(header)
    return m.group(1)


ds = GenomesDatasetMultiChrom(__name__,
                              __file__,
                              __spec__,
                              chrs,
                              seq_header_to_chr,
                              seq_header_to_genome=seq_header_to_genome)

ds.add_fasta_path("data/seoul.fasta", relative=True)

sys.modules[__name__] = ds