from mrjob.job import MRJob from odetta.utils import model_parser splat_model = model_parser([ ('reference', ''), ('flanks', ''), ('a_length', 0), ('b_length', 0), ('intron_length', 0), ('a_start', 0), ('a_end', 0), ('b_start', 0), ('b_end', 0), ('sequence', ''), ('read_count', 0), ('read_IDs', ''), ]) class Splat(MRJob): """ Parse a Splat formatted line. """ def parse_line(self, line): data = splat_model(line.split('\t')) if data['a_start'] <= data['b_start']: data['start'] = data['a_start'] data['end'] = data['b_end']
from mrjob.job import MRJob from odetta.utils import model_parser sam_model = model_parser([ ('ID', ''), ('flag', 0), ('reference', ''), ('start', 0), ('map_quality', 0), ('CIGAR', ''), ('rnext', ''), ('pnext', ''), ('tlen', ''), ('sequence', ''), ('quality', ''), ]) class SAM(MRJob): """ Parse a SAM formatted file. http://samtools.sourceforge.net/samtools.shtml#5 """ def configure_options(self): """Define command-line options.""" super(SAM, self).configure_options() self.add_passthrough_option('--type', default='SAM',
from mrjob.job import MRJob from odetta.utils import model_parser splat_model = model_parser([ ('reference', ''), ('flanks', ''), ('a_length', 0), ('b_length', 0), ('intron_length', 0), ('a_start', 0), ('a_end', 0), ('b_start', 0), ('b_end', 0), ('sequence', ''), ('read_count', 0), ('read_IDs', ''), ]) class Splat(MRJob): """ Parse a Splat formatted line. """ def parse_line(self, line): data = splat_model(line.split('\t')) if data['a_start'] <= data['b_start']: data['start'] = data['a_start']
from mrjob.job import MRJob from odetta.utils import model_parser bowtie_model = model_parser([ ('ID', ''), ('strand', '+'), ('reference', ''), ('start', 0), ('sequence', ''), ]) #TODO unit test class Bowtie(MRJob): """ Parse a bowtie-formatted file. This is a very specific format, output with --suppress 6,7,8 http://bowtie-bio.sourceforge.net/manual.shtml#default-bowtie-output """ def configure_options(self): """Define command-line options.""" super(Bowtie, self).configure_options() self.add_passthrough_option('--type', default='bowtie', help="set the 'type' attribute")
from mrjob.job import MRJob from odetta.utils import model_parser sam_model = model_parser( [ ("ID", ""), ("flag", 0), ("reference", ""), ("start", 0), ("map_quality", 0), ("CIGAR", ""), ("rnext", ""), ("pnext", ""), ("tlen", ""), ("sequence", ""), ("quality", ""), ] ) class SAM(MRJob): """ Parse a SAM formatted file. http://samtools.sourceforge.net/samtools.shtml#5 """ def configure_options(self):