Exemple #1
0
__author__ = 'Hayden Metsky <*****@*****.**>'

chrs = ["segment_" + seg for seg in ['L', 'M', 'S']]

def seq_header_to_chr(header):
    import re
    c = re.compile(r'\[segment (L|M|S)\]')
    m = c.search(header)
    if not m:
        raise ValueError("Unknown segment in header %s" % header)
    seg = m.group(1)
    valid_segs = ['L', 'M', 'S']
    if seg not in valid_segs:
        raise ValueError("Unknown segment %s" % seg)
    return "segment_" + seg

def seq_header_to_genome(header):
    import re
    c = re.compile(r'\[genome (.+)\]')
    m = c.search(header)
    return m.group(1)


ds = GenomesDatasetMultiChrom(__name__, __file__, __spec__,
                              chrs, seq_header_to_chr,
                              seq_header_to_genome=seq_header_to_genome)

ds.add_fasta_path("data/sfnv.fasta", relative=True)

sys.modules[__name__] = ds
Exemple #2
0
from os import listdir
import sys

from catch.datasets import GenomesDatasetMultiChrom

__author__ = 'Hayden Metsky <*****@*****.**>'

chrs = ["segment_" + seg for seg in ['L', 'M']]


def seq_header_to_chr(header):
    import re
    c = re.compile(r'\[segment (L|M)\]')
    m = c.search(header)
    if not m:
        raise ValueError("Unknown segment in header %s" % header)
    seg = m.group(1)
    valid_segs = ['L', 'M']
    if seg not in valid_segs:
        raise ValueError("Unknown segment %s" % seg)
    return "segment_" + seg


ds = GenomesDatasetMultiChrom(__name__, __file__, __spec__, chrs,
                              seq_header_to_chr)

for f in listdir(join(dirname(__file__), "data/cano_delgadito/")):
    ds.add_fasta_path("data/cano_delgadito/" + f, relative=True)

sys.modules[__name__] = ds
    import re
    c = re.compile(r'\[segment (L1|L2|L3|M1|M2|M3|S1|S2|S3|S4)\]')
    m = c.search(header)
    if not m:
        raise Exception("Unknown or invalid segment in header %s" % header)
    seg = m.group(1)
    return "segment_" + seg


def seq_header_to_genome(header):
    import re
    c = re.compile(r'\[genome (.+)\]')
    m = c.search(header)
    if not m:
        raise Exception("Unknown genome in header %s" % header)
    return m.group(1)


chrs = [
    "segment_" + seg
    for seg in ['L1', 'L2', 'L3', 'M1', 'M2', 'M3', 'S1', 'S2', 'S3', 'S4']
]
ds = GenomesDatasetMultiChrom(__name__,
                              __file__,
                              __spec__,
                              chrs,
                              seq_header_to_chr,
                              seq_header_to_genome=seq_header_to_genome)
ds.add_fasta_path("data/nelson_bay_orthoreovirus.fasta.gz", relative=True)
sys.modules[__name__] = ds
Exemple #4
0
import sys

from catch.datasets import GenomesDatasetMultiChrom


def seq_header_to_chr(header):
    import re
    c = re.compile(r'\[segment (L|S)\]')
    m = c.search(header)
    if not m:
        raise Exception("Unknown or invalid segment in header %s" % header)
    seg = m.group(1)
    return "segment_" + seg

def seq_header_to_genome(header):
    import re
    c = re.compile(r'\[genome (.+)\]')
    m = c.search(header)
    if not m:
        raise Exception("Unknown genome in header %s" % header)
    return m.group(1)


chrs = ["segment_" + seg for seg in ['L', 'S']]
ds = GenomesDatasetMultiChrom(__name__, __file__, __spec__,
                              chrs, seq_header_to_chr,
                              seq_header_to_genome=seq_header_to_genome)
ds.add_fasta_path("data/brazilian_mammarenavirus.fasta.gz", relative=True)
sys.modules[__name__] = ds
Exemple #5
0
def seq_header_to_chr(header):
    import re
    c = re.compile(r'\[segment (L|S)\]')
    m = c.search(header)
    if not m:
        raise Exception("Unknown or invalid segment in header %s" % header)
    seg = m.group(1)
    return "segment_" + seg


def seq_header_to_genome(header):
    import re
    c = re.compile(r'\[genome (.+)\]')
    m = c.search(header)
    if not m:
        raise Exception("Unknown genome in header %s" % header)
    return m.group(1)


chrs = ["segment_" + seg for seg in ['L', 'S']]
ds = GenomesDatasetMultiChrom(__name__,
                              __file__,
                              __spec__,
                              chrs,
                              seq_header_to_chr,
                              seq_header_to_genome=seq_header_to_genome)
ds.add_fasta_path("data/lymphocytic_choriomeningitis_mammarenavirus.fasta.gz",
                  relative=True)
sys.modules[__name__] = ds
Exemple #6
0
from os import listdir
import sys

from catch.datasets import GenomesDatasetMultiChrom

__author__ = 'Hayden Metsky <*****@*****.**>'

chrs = ["segment_" + seg for seg in ['L', 'M', 'S']]


def seq_header_to_chr(header):
    import re
    c = re.compile(r'\[segment (L|M|S)\]')
    m = c.search(header)
    if not m:
        raise ValueError("Unknown segment in header %s" % header)
    seg = m.group(1)
    valid_segs = ['L', 'M', 'S']
    if seg not in valid_segs:
        raise ValueError("Unknown segment %s" % seg)
    return "segment_" + seg


ds = GenomesDatasetMultiChrom(__name__, __file__, __spec__, chrs,
                              seq_header_to_chr)

for f in listdir(join(dirname(__file__), "data/dera_ghazi_khan/")):
    ds.add_fasta_path("data/dera_ghazi_khan/" + f, relative=True)

sys.modules[__name__] = ds
Exemple #7
0
from os import listdir
import sys

from catch.datasets import GenomesDatasetMultiChrom

__author__ = 'Hayden Metsky <*****@*****.**>'

chrs = ["segment_" + seg for seg in ['L', 'S']]


def seq_header_to_chr(header):
    import re
    c = re.compile(r'\[segment (L|S)\]')
    m = c.search(header)
    if not m:
        raise ValueError("Unknown segment in header %s" % header)
    seg = m.group(1)
    valid_segs = ['L', 'S']
    if seg not in valid_segs:
        raise ValueError("Unknown segment %s" % seg)
    return "segment_" + seg


ds = GenomesDatasetMultiChrom(__name__, __file__, __spec__, chrs,
                              seq_header_to_chr)

for f in listdir(join(dirname(__file__), "data/mopeia_lassa_reassortant_29/")):
    ds.add_fasta_path("data/mopeia_lassa_reassortant_29/" + f, relative=True)

sys.modules[__name__] = ds
Exemple #8
0
from os import listdir
import sys

from catch.datasets import GenomesDatasetMultiChrom

__author__ = 'Hayden Metsky <*****@*****.**>'

chrs = ["segment_" + seg for seg in ['L', 'M', 'S']]


def seq_header_to_chr(header):
    import re
    c = re.compile(r'\[segment (L|M|S)\]')
    m = c.search(header)
    if not m:
        raise ValueError("Unknown segment in header %s" % header)
    seg = m.group(1)
    valid_segs = ['L', 'M', 'S']
    if seg not in valid_segs:
        raise ValueError("Unknown segment %s" % seg)
    return "segment_" + seg


ds = GenomesDatasetMultiChrom(__name__, __file__, __spec__, chrs,
                              seq_header_to_chr)

for f in listdir(join(dirname(__file__), "data/leopards_hill/")):
    ds.add_fasta_path("data/leopards_hill/" + f, relative=True)

sys.modules[__name__] = ds
Exemple #9
0
from catch.datasets import GenomesDatasetMultiChrom

__author__ = 'Hayden Metsky <*****@*****.**>'

chrs = [
    "segment_" + seg
    for seg in ['1', '10', '11', '2', '3', '4', '5', '6', '7', '8', '9']
]


def seq_header_to_chr(header):
    import re
    c = re.compile(r'\[segment (1|10|11|2|3|4|5|6|7|8|9)\]')
    m = c.search(header)
    if not m:
        raise ValueError("Unknown segment in header %s" % header)
    seg = m.group(1)
    valid_segs = ['1', '10', '11', '2', '3', '4', '5', '6', '7', '8', '9']
    if seg not in valid_segs:
        raise ValueError("Unknown segment %s" % seg)
    return "segment_" + seg


ds = GenomesDatasetMultiChrom(__name__, __file__, __spec__, chrs,
                              seq_header_to_chr)

for f in listdir(join(dirname(__file__), "data/rotavirus_g/")):
    ds.add_fasta_path("data/rotavirus_g/" + f, relative=True)

sys.modules[__name__] = ds
Exemple #10
0
def seq_header_to_chr(header):
    import re
    c = re.compile(r'\[segment (L|M|S)\]')
    m = c.search(header)
    if not m:
        raise Exception("Unknown or invalid segment in header %s" % header)
    seg = m.group(1)
    return "segment_" + seg


def seq_header_to_genome(header):
    import re
    c = re.compile(r'\[genome (.+)\]')
    m = c.search(header)
    if not m:
        raise Exception("Unknown genome in header %s" % header)
    return m.group(1)


chrs = ["segment_" + seg for seg in ['L', 'M', 'S']]
ds = GenomesDatasetMultiChrom(__name__,
                              __file__,
                              __spec__,
                              chrs,
                              seq_header_to_chr,
                              seq_header_to_genome=seq_header_to_genome)
ds.add_fasta_path("data/black_creek_canal_orthohantavirus.fasta.gz",
                  relative=True)
sys.modules[__name__] = ds
def seq_header_to_chr(header):
    import re
    c = re.compile(r'\[segment (L|S)\]')
    m = c.search(header)
    if not m:
        raise Exception("Unknown or invalid segment in header %s" % header)
    seg = m.group(1)
    return "segment_" + seg


def seq_header_to_genome(header):
    import re
    c = re.compile(r'\[genome (.+)\]')
    m = c.search(header)
    if not m:
        raise Exception("Unknown genome in header %s" % header)
    return m.group(1)


chrs = ["segment_" + seg for seg in ['L', 'S']]
ds = GenomesDatasetMultiChrom(__name__,
                              __file__,
                              __spec__,
                              chrs,
                              seq_header_to_chr,
                              seq_header_to_genome=seq_header_to_genome)
ds.add_fasta_path("data/mopeia_lassa_virus_reassortant_29.fasta.gz",
                  relative=True)
sys.modules[__name__] = ds
Exemple #12
0
from catch.datasets import GenomesDatasetMultiChrom


def seq_header_to_chr(header):
    import re
    c = re.compile(r'\[segment (1|2|3|4|5|6|7|8)\]')
    m = c.search(header)
    if not m:
        raise Exception("Unknown or invalid segment in header %s" % header)
    seg = m.group(1)
    return "segment_" + seg

def seq_header_to_genome(header):
    import re
    c = re.compile(r'\[genome (.+)\]')
    m = c.search(header)
    if not m:
        raise Exception("Unknown genome in header %s" % header)
    return m.group(1)


chrs = ["segment_" + seg for seg in ['1', '2', '3', '4', '5', '6', '7', '8']]
ds = GenomesDatasetMultiChrom(__name__, __file__, __spec__,
                              chrs, seq_header_to_chr,
                              seq_header_to_genome=seq_header_to_genome)
for seg in ['1', '2', '3', '4', '5', '6', '7', '8']:
    ds.add_fasta_path("data/influenza_b_segment" + seg + ".fasta.gz",
        relative=True)
sys.modules[__name__] = ds
import sys

from catch.datasets import GenomesDatasetMultiChrom


def seq_header_to_chr(header):
    import re
    c = re.compile(r'\[segment (L|M|S)\]')
    m = c.search(header)
    if not m:
        raise Exception("Unknown or invalid segment in header %s" % header)
    seg = m.group(1)
    return "segment_" + seg

def seq_header_to_genome(header):
    import re
    c = re.compile(r'\[genome (.+)\]')
    m = c.search(header)
    if not m:
        raise Exception("Unknown genome in header %s" % header)
    return m.group(1)


chrs = ["segment_" + seg for seg in ['L', 'M', 'S']]
ds = GenomesDatasetMultiChrom(__name__, __file__, __spec__,
                              chrs, seq_header_to_chr,
                              seq_header_to_genome=seq_header_to_genome)
ds.add_fasta_path("data/bujaru_phlebovirus.fasta.gz", relative=True)
sys.modules[__name__] = ds
Exemple #14
0

def seq_header_to_chr(header):
    import re
    c = re.compile(r'\[segment (L|M|S)\]')
    m = c.search(header)
    if not m:
        raise Exception("Unknown or invalid segment in header %s" % header)
    seg = m.group(1)
    return "segment_" + seg


def seq_header_to_genome(header):
    import re
    c = re.compile(r'\[genome (.+)\]')
    m = c.search(header)
    if not m:
        raise Exception("Unknown genome in header %s" % header)
    return m.group(1)


chrs = ["segment_" + seg for seg in ['L', 'M', 'S']]
ds = GenomesDatasetMultiChrom(__name__,
                              __file__,
                              __spec__,
                              chrs,
                              seq_header_to_chr,
                              seq_header_to_genome=seq_header_to_genome)
ds.add_fasta_path("data/shamonda_orthobunyavirus.fasta.gz", relative=True)
sys.modules[__name__] = ds
def seq_header_to_chr(header):
    import re
    c = re.compile(r'\[segment (L|M|S)\]')
    m = c.search(header)
    if not m:
        raise Exception("Unknown or invalid segment in header %s" % header)
    seg = m.group(1)
    return "segment_" + seg


def seq_header_to_genome(header):
    import re
    c = re.compile(r'\[genome (.+)\]')
    m = c.search(header)
    if not m:
        raise Exception("Unknown genome in header %s" % header)
    return m.group(1)


chrs = ["segment_" + seg for seg in ['L', 'M', 'S']]
ds = GenomesDatasetMultiChrom(__name__,
                              __file__,
                              __spec__,
                              chrs,
                              seq_header_to_chr,
                              seq_header_to_genome=seq_header_to_genome)
ds.add_fasta_path("data/nairobi_sheep_disease_orthonairovirus.fasta.gz",
                  relative=True)
sys.modules[__name__] = ds
def seq_header_to_chr(header):
    import re
    c = re.compile(r'\[segment (L|M|S)\]')
    m = c.search(header)
    if not m:
        raise Exception("Unknown or invalid segment in header %s" % header)
    seg = m.group(1)
    return "segment_" + seg


def seq_header_to_genome(header):
    import re
    c = re.compile(r'\[genome (.+)\]')
    m = c.search(header)
    if not m:
        raise Exception("Unknown genome in header %s" % header)
    return m.group(1)


chrs = ["segment_" + seg for seg in ['L', 'M', 'S']]
ds = GenomesDatasetMultiChrom(__name__,
                              __file__,
                              __spec__,
                              chrs,
                              seq_header_to_chr,
                              seq_header_to_genome=seq_header_to_genome)
ds.add_fasta_path("data/dobrava-belgrade_orthohantavirus.fasta.gz",
                  relative=True)
sys.modules[__name__] = ds

def seq_header_to_chr(header):
    import re
    c = re.compile(r'\[segment (L|M|S)\]')
    m = c.search(header)
    if not m:
        raise Exception("Unknown or invalid segment in header %s" % header)
    seg = m.group(1)
    return "segment_" + seg


def seq_header_to_genome(header):
    import re
    c = re.compile(r'\[genome (.+)\]')
    m = c.search(header)
    if not m:
        raise Exception("Unknown genome in header %s" % header)
    return m.group(1)


chrs = ["segment_" + seg for seg in ['L', 'M', 'S']]
ds = GenomesDatasetMultiChrom(__name__,
                              __file__,
                              __spec__,
                              chrs,
                              seq_header_to_chr,
                              seq_header_to_genome=seq_header_to_genome)
ds.add_fasta_path("data/sandfly_fever_sicilian.fasta.gz", relative=True)
sys.modules[__name__] = ds