__author__ = 'Hayden Metsky <*****@*****.**>' chrs = ["segment_" + seg for seg in ['L', 'M', 'S']] def seq_header_to_chr(header): import re c = re.compile(r'\[segment (L|M|S)\]') m = c.search(header) if not m: raise ValueError("Unknown segment in header %s" % header) seg = m.group(1) valid_segs = ['L', 'M', 'S'] if seg not in valid_segs: raise ValueError("Unknown segment %s" % seg) return "segment_" + seg def seq_header_to_genome(header): import re c = re.compile(r'\[genome (.+)\]') m = c.search(header) return m.group(1) ds = GenomesDatasetMultiChrom(__name__, __file__, __spec__, chrs, seq_header_to_chr, seq_header_to_genome=seq_header_to_genome) ds.add_fasta_path("data/sfnv.fasta", relative=True) sys.modules[__name__] = ds
from os import listdir import sys from catch.datasets import GenomesDatasetMultiChrom __author__ = 'Hayden Metsky <*****@*****.**>' chrs = ["segment_" + seg for seg in ['L', 'M']] def seq_header_to_chr(header): import re c = re.compile(r'\[segment (L|M)\]') m = c.search(header) if not m: raise ValueError("Unknown segment in header %s" % header) seg = m.group(1) valid_segs = ['L', 'M'] if seg not in valid_segs: raise ValueError("Unknown segment %s" % seg) return "segment_" + seg ds = GenomesDatasetMultiChrom(__name__, __file__, __spec__, chrs, seq_header_to_chr) for f in listdir(join(dirname(__file__), "data/cano_delgadito/")): ds.add_fasta_path("data/cano_delgadito/" + f, relative=True) sys.modules[__name__] = ds
import re c = re.compile(r'\[segment (L1|L2|L3|M1|M2|M3|S1|S2|S3|S4)\]') m = c.search(header) if not m: raise Exception("Unknown or invalid segment in header %s" % header) seg = m.group(1) return "segment_" + seg def seq_header_to_genome(header): import re c = re.compile(r'\[genome (.+)\]') m = c.search(header) if not m: raise Exception("Unknown genome in header %s" % header) return m.group(1) chrs = [ "segment_" + seg for seg in ['L1', 'L2', 'L3', 'M1', 'M2', 'M3', 'S1', 'S2', 'S3', 'S4'] ] ds = GenomesDatasetMultiChrom(__name__, __file__, __spec__, chrs, seq_header_to_chr, seq_header_to_genome=seq_header_to_genome) ds.add_fasta_path("data/nelson_bay_orthoreovirus.fasta.gz", relative=True) sys.modules[__name__] = ds
import sys from catch.datasets import GenomesDatasetMultiChrom def seq_header_to_chr(header): import re c = re.compile(r'\[segment (L|S)\]') m = c.search(header) if not m: raise Exception("Unknown or invalid segment in header %s" % header) seg = m.group(1) return "segment_" + seg def seq_header_to_genome(header): import re c = re.compile(r'\[genome (.+)\]') m = c.search(header) if not m: raise Exception("Unknown genome in header %s" % header) return m.group(1) chrs = ["segment_" + seg for seg in ['L', 'S']] ds = GenomesDatasetMultiChrom(__name__, __file__, __spec__, chrs, seq_header_to_chr, seq_header_to_genome=seq_header_to_genome) ds.add_fasta_path("data/brazilian_mammarenavirus.fasta.gz", relative=True) sys.modules[__name__] = ds
def seq_header_to_chr(header): import re c = re.compile(r'\[segment (L|S)\]') m = c.search(header) if not m: raise Exception("Unknown or invalid segment in header %s" % header) seg = m.group(1) return "segment_" + seg def seq_header_to_genome(header): import re c = re.compile(r'\[genome (.+)\]') m = c.search(header) if not m: raise Exception("Unknown genome in header %s" % header) return m.group(1) chrs = ["segment_" + seg for seg in ['L', 'S']] ds = GenomesDatasetMultiChrom(__name__, __file__, __spec__, chrs, seq_header_to_chr, seq_header_to_genome=seq_header_to_genome) ds.add_fasta_path("data/lymphocytic_choriomeningitis_mammarenavirus.fasta.gz", relative=True) sys.modules[__name__] = ds
from os import listdir import sys from catch.datasets import GenomesDatasetMultiChrom __author__ = 'Hayden Metsky <*****@*****.**>' chrs = ["segment_" + seg for seg in ['L', 'M', 'S']] def seq_header_to_chr(header): import re c = re.compile(r'\[segment (L|M|S)\]') m = c.search(header) if not m: raise ValueError("Unknown segment in header %s" % header) seg = m.group(1) valid_segs = ['L', 'M', 'S'] if seg not in valid_segs: raise ValueError("Unknown segment %s" % seg) return "segment_" + seg ds = GenomesDatasetMultiChrom(__name__, __file__, __spec__, chrs, seq_header_to_chr) for f in listdir(join(dirname(__file__), "data/dera_ghazi_khan/")): ds.add_fasta_path("data/dera_ghazi_khan/" + f, relative=True) sys.modules[__name__] = ds
from os import listdir import sys from catch.datasets import GenomesDatasetMultiChrom __author__ = 'Hayden Metsky <*****@*****.**>' chrs = ["segment_" + seg for seg in ['L', 'S']] def seq_header_to_chr(header): import re c = re.compile(r'\[segment (L|S)\]') m = c.search(header) if not m: raise ValueError("Unknown segment in header %s" % header) seg = m.group(1) valid_segs = ['L', 'S'] if seg not in valid_segs: raise ValueError("Unknown segment %s" % seg) return "segment_" + seg ds = GenomesDatasetMultiChrom(__name__, __file__, __spec__, chrs, seq_header_to_chr) for f in listdir(join(dirname(__file__), "data/mopeia_lassa_reassortant_29/")): ds.add_fasta_path("data/mopeia_lassa_reassortant_29/" + f, relative=True) sys.modules[__name__] = ds
from os import listdir import sys from catch.datasets import GenomesDatasetMultiChrom __author__ = 'Hayden Metsky <*****@*****.**>' chrs = ["segment_" + seg for seg in ['L', 'M', 'S']] def seq_header_to_chr(header): import re c = re.compile(r'\[segment (L|M|S)\]') m = c.search(header) if not m: raise ValueError("Unknown segment in header %s" % header) seg = m.group(1) valid_segs = ['L', 'M', 'S'] if seg not in valid_segs: raise ValueError("Unknown segment %s" % seg) return "segment_" + seg ds = GenomesDatasetMultiChrom(__name__, __file__, __spec__, chrs, seq_header_to_chr) for f in listdir(join(dirname(__file__), "data/leopards_hill/")): ds.add_fasta_path("data/leopards_hill/" + f, relative=True) sys.modules[__name__] = ds
from catch.datasets import GenomesDatasetMultiChrom __author__ = 'Hayden Metsky <*****@*****.**>' chrs = [ "segment_" + seg for seg in ['1', '10', '11', '2', '3', '4', '5', '6', '7', '8', '9'] ] def seq_header_to_chr(header): import re c = re.compile(r'\[segment (1|10|11|2|3|4|5|6|7|8|9)\]') m = c.search(header) if not m: raise ValueError("Unknown segment in header %s" % header) seg = m.group(1) valid_segs = ['1', '10', '11', '2', '3', '4', '5', '6', '7', '8', '9'] if seg not in valid_segs: raise ValueError("Unknown segment %s" % seg) return "segment_" + seg ds = GenomesDatasetMultiChrom(__name__, __file__, __spec__, chrs, seq_header_to_chr) for f in listdir(join(dirname(__file__), "data/rotavirus_g/")): ds.add_fasta_path("data/rotavirus_g/" + f, relative=True) sys.modules[__name__] = ds
def seq_header_to_chr(header): import re c = re.compile(r'\[segment (L|M|S)\]') m = c.search(header) if not m: raise Exception("Unknown or invalid segment in header %s" % header) seg = m.group(1) return "segment_" + seg def seq_header_to_genome(header): import re c = re.compile(r'\[genome (.+)\]') m = c.search(header) if not m: raise Exception("Unknown genome in header %s" % header) return m.group(1) chrs = ["segment_" + seg for seg in ['L', 'M', 'S']] ds = GenomesDatasetMultiChrom(__name__, __file__, __spec__, chrs, seq_header_to_chr, seq_header_to_genome=seq_header_to_genome) ds.add_fasta_path("data/black_creek_canal_orthohantavirus.fasta.gz", relative=True) sys.modules[__name__] = ds
def seq_header_to_chr(header): import re c = re.compile(r'\[segment (L|S)\]') m = c.search(header) if not m: raise Exception("Unknown or invalid segment in header %s" % header) seg = m.group(1) return "segment_" + seg def seq_header_to_genome(header): import re c = re.compile(r'\[genome (.+)\]') m = c.search(header) if not m: raise Exception("Unknown genome in header %s" % header) return m.group(1) chrs = ["segment_" + seg for seg in ['L', 'S']] ds = GenomesDatasetMultiChrom(__name__, __file__, __spec__, chrs, seq_header_to_chr, seq_header_to_genome=seq_header_to_genome) ds.add_fasta_path("data/mopeia_lassa_virus_reassortant_29.fasta.gz", relative=True) sys.modules[__name__] = ds
from catch.datasets import GenomesDatasetMultiChrom def seq_header_to_chr(header): import re c = re.compile(r'\[segment (1|2|3|4|5|6|7|8)\]') m = c.search(header) if not m: raise Exception("Unknown or invalid segment in header %s" % header) seg = m.group(1) return "segment_" + seg def seq_header_to_genome(header): import re c = re.compile(r'\[genome (.+)\]') m = c.search(header) if not m: raise Exception("Unknown genome in header %s" % header) return m.group(1) chrs = ["segment_" + seg for seg in ['1', '2', '3', '4', '5', '6', '7', '8']] ds = GenomesDatasetMultiChrom(__name__, __file__, __spec__, chrs, seq_header_to_chr, seq_header_to_genome=seq_header_to_genome) for seg in ['1', '2', '3', '4', '5', '6', '7', '8']: ds.add_fasta_path("data/influenza_b_segment" + seg + ".fasta.gz", relative=True) sys.modules[__name__] = ds
import sys from catch.datasets import GenomesDatasetMultiChrom def seq_header_to_chr(header): import re c = re.compile(r'\[segment (L|M|S)\]') m = c.search(header) if not m: raise Exception("Unknown or invalid segment in header %s" % header) seg = m.group(1) return "segment_" + seg def seq_header_to_genome(header): import re c = re.compile(r'\[genome (.+)\]') m = c.search(header) if not m: raise Exception("Unknown genome in header %s" % header) return m.group(1) chrs = ["segment_" + seg for seg in ['L', 'M', 'S']] ds = GenomesDatasetMultiChrom(__name__, __file__, __spec__, chrs, seq_header_to_chr, seq_header_to_genome=seq_header_to_genome) ds.add_fasta_path("data/bujaru_phlebovirus.fasta.gz", relative=True) sys.modules[__name__] = ds
def seq_header_to_chr(header): import re c = re.compile(r'\[segment (L|M|S)\]') m = c.search(header) if not m: raise Exception("Unknown or invalid segment in header %s" % header) seg = m.group(1) return "segment_" + seg def seq_header_to_genome(header): import re c = re.compile(r'\[genome (.+)\]') m = c.search(header) if not m: raise Exception("Unknown genome in header %s" % header) return m.group(1) chrs = ["segment_" + seg for seg in ['L', 'M', 'S']] ds = GenomesDatasetMultiChrom(__name__, __file__, __spec__, chrs, seq_header_to_chr, seq_header_to_genome=seq_header_to_genome) ds.add_fasta_path("data/shamonda_orthobunyavirus.fasta.gz", relative=True) sys.modules[__name__] = ds
def seq_header_to_chr(header): import re c = re.compile(r'\[segment (L|M|S)\]') m = c.search(header) if not m: raise Exception("Unknown or invalid segment in header %s" % header) seg = m.group(1) return "segment_" + seg def seq_header_to_genome(header): import re c = re.compile(r'\[genome (.+)\]') m = c.search(header) if not m: raise Exception("Unknown genome in header %s" % header) return m.group(1) chrs = ["segment_" + seg for seg in ['L', 'M', 'S']] ds = GenomesDatasetMultiChrom(__name__, __file__, __spec__, chrs, seq_header_to_chr, seq_header_to_genome=seq_header_to_genome) ds.add_fasta_path("data/nairobi_sheep_disease_orthonairovirus.fasta.gz", relative=True) sys.modules[__name__] = ds
def seq_header_to_chr(header): import re c = re.compile(r'\[segment (L|M|S)\]') m = c.search(header) if not m: raise Exception("Unknown or invalid segment in header %s" % header) seg = m.group(1) return "segment_" + seg def seq_header_to_genome(header): import re c = re.compile(r'\[genome (.+)\]') m = c.search(header) if not m: raise Exception("Unknown genome in header %s" % header) return m.group(1) chrs = ["segment_" + seg for seg in ['L', 'M', 'S']] ds = GenomesDatasetMultiChrom(__name__, __file__, __spec__, chrs, seq_header_to_chr, seq_header_to_genome=seq_header_to_genome) ds.add_fasta_path("data/dobrava-belgrade_orthohantavirus.fasta.gz", relative=True) sys.modules[__name__] = ds
def seq_header_to_chr(header): import re c = re.compile(r'\[segment (L|M|S)\]') m = c.search(header) if not m: raise Exception("Unknown or invalid segment in header %s" % header) seg = m.group(1) return "segment_" + seg def seq_header_to_genome(header): import re c = re.compile(r'\[genome (.+)\]') m = c.search(header) if not m: raise Exception("Unknown genome in header %s" % header) return m.group(1) chrs = ["segment_" + seg for seg in ['L', 'M', 'S']] ds = GenomesDatasetMultiChrom(__name__, __file__, __spec__, chrs, seq_header_to_chr, seq_header_to_genome=seq_header_to_genome) ds.add_fasta_path("data/sandfly_fever_sicilian.fasta.gz", relative=True) sys.modules[__name__] = ds