예제 #1
0
파일: splat.py 프로젝트: buchanae/odetta
from mrjob.job import MRJob

from odetta.utils import model_parser

splat_model = model_parser([
    ('reference', ''),
    ('flanks', ''),
    ('a_length', 0),
    ('b_length', 0),
    ('intron_length', 0),
    ('a_start', 0),
    ('a_end', 0),
    ('b_start', 0),
    ('b_end', 0),
    ('sequence', ''),
    ('read_count', 0),
    ('read_IDs', ''),
])


class Splat(MRJob):
    """
    Parse a Splat formatted line.
    """
    def parse_line(self, line):

        data = splat_model(line.split('\t'))

        if data['a_start'] <= data['b_start']:
            data['start'] = data['a_start']
            data['end'] = data['b_end']
예제 #2
0
from mrjob.job import MRJob

from odetta.utils import model_parser

sam_model = model_parser([
    ('ID', ''),
    ('flag', 0),
    ('reference', ''),
    ('start', 0),
    ('map_quality', 0),
    ('CIGAR', ''),
    ('rnext', ''),
    ('pnext', ''),
    ('tlen', ''),
    ('sequence', ''),
    ('quality', ''),
])


class SAM(MRJob):
    """
    Parse a SAM formatted file.

    http://samtools.sourceforge.net/samtools.shtml#5
    """
    def configure_options(self):
        """Define command-line options."""

        super(SAM, self).configure_options()
        self.add_passthrough_option('--type',
                                    default='SAM',
예제 #3
0
파일: splat.py 프로젝트: abuchanan/odetta
from mrjob.job import MRJob

from odetta.utils import model_parser

splat_model = model_parser([
    ('reference', ''),
    ('flanks', ''),
    ('a_length', 0),
    ('b_length', 0),
    ('intron_length', 0),
    ('a_start', 0),
    ('a_end', 0),
    ('b_start', 0),
    ('b_end', 0),
    ('sequence', ''),
    ('read_count', 0),
    ('read_IDs', ''),
])

class Splat(MRJob):

    """
    Parse a Splat formatted line.
    """

    def parse_line(self, line):

        data = splat_model(line.split('\t'))

        if data['a_start'] <= data['b_start']:
            data['start'] = data['a_start']
예제 #4
0
from mrjob.job import MRJob

from odetta.utils import model_parser


bowtie_model = model_parser([
    ('ID', ''),
    ('strand', '+'),
    ('reference', ''),
    ('start', 0),
    ('sequence', ''),
])


#TODO unit test
class Bowtie(MRJob):

    """
    Parse a bowtie-formatted file.  This is a very specific format,
    output with --suppress 6,7,8

    http://bowtie-bio.sourceforge.net/manual.shtml#default-bowtie-output
    """

    def configure_options(self):
        """Define command-line options."""

        super(Bowtie, self).configure_options()
        self.add_passthrough_option('--type', default='bowtie', 
            help="set the 'type' attribute")
예제 #5
0
파일: sam.py 프로젝트: abuchanan/odetta
from mrjob.job import MRJob

from odetta.utils import model_parser


sam_model = model_parser(
    [
        ("ID", ""),
        ("flag", 0),
        ("reference", ""),
        ("start", 0),
        ("map_quality", 0),
        ("CIGAR", ""),
        ("rnext", ""),
        ("pnext", ""),
        ("tlen", ""),
        ("sequence", ""),
        ("quality", ""),
    ]
)


class SAM(MRJob):

    """
    Parse a SAM formatted file.

    http://samtools.sourceforge.net/samtools.shtml#5
    """

    def configure_options(self):