Ejemplo n.º 1
0
 def setUp(self):
     self.hi = HiRapiAligner('rapi_bwa')
     self.reads = test_utils.get_mini_ref_seqs()
     for row in self.reads:
         if len(row) != 5:
             raise RuntimeError(
                 "Unexpected number of fields in mini_ref read record")
         self.hi.load_pair(*row)
Ejemplo n.º 2
0
    def test_base_quality(self):
        hi = HiRapiAligner('rapi_bwa', paired=False)
        one_read = self.reads[0][0:3]
        hi.q_offset = self.hi.Qenc_Sanger
        hi.load_read('sanger_read', one_read[1], one_read[2])

        # 64:  Illumina base quality offset
        # 33:  Sanger base quality offset
        ill_quality = ''.join(chr(ord(c) + (64 - 33)) for c in one_read[2])
        hi.q_offset = self.hi.Qenc_Illumina
        hi.load_read('illumina_read', one_read[1], ill_quality)

        loaded_qualities = [frag[0].qual for frag in hi.ifragments()]
        self.assertEquals(2, len(loaded_qualities))
        self.assertEquals(loaded_qualities[0], loaded_qualities[1])
Ejemplo n.º 3
0
    def __init__(self, ctx):
        super(mapper, self).__init__(ctx)
        self.logger = logging.getLogger("seqal")
        self.__get_configuration(ctx)
        logging.basicConfig(level=self.log_level)
        self.event_monitor = HadoopEventMonitor(self.COUNTER_CLASS, logging.getLogger("mapper"), ctx)

        pe = True # single-end sequencen alignment not yet supported by Seqal
        self.hi_rapi = HiRapiAligner('rapi_bwa', paired=pe)

        # opts
        self.hi_rapi.opts.n_threads = self.nthreads
        self.hi_rapi.opts.isize_max = self.max_isize
        if self.min_isize is not None:
            self.hi_rapi.opts.isize_min = self.min_isize
        self.hi_rapi.qoffset = self.hi_rapi.Qenc_Illumina if self.format == "fastq-illumina" else self.hi_rapi.Qenc_Sanger
        # end opts

        self.logger.info("Using the %s aligner plugin, aligner version %s, plugin version %s",
                self.hi_rapi.aligner_name, self.hi_rapi.aligner_version, self.hi_rapi.plugin_version)
        self.logger.info("Working in %s mode", 'paired-end' if pe else 'single-end')

        # allocate space for reads
        self.logger.debug("Reserving batch space for %s reads", self.batch_size)
        self.hi_rapi.reserve_space(self.batch_size) 

        # load reference
        reference_root = self.get_reference_root_from_archive(utils.get_ref_archive(ctx.getJobConf()))
        self.logger.info("Full reference path (prefix): %s", reference_root)
        with self.event_monitor.time_block("Loading reference %s" % reference_root):
            self.hi_rapi.load_ref(reference_root)

        ######## assemble hit processor chain
        chain = RapiFilterLink(self.event_monitor)
        chain.remove_unmapped = self.remove_unmapped
        chain.min_hit_quality = self.min_hit_quality
        if self.__map_only:
            chain.set_next( RapiEmitSamLink(ctx, self.event_monitor, self.hi_rapi) )
        else:
            raise NotImplementedError("Only mapping mode is supported at the moment")
        self.hit_visitor_chain = chain
Ejemplo n.º 4
0
 def setUp(self):
     self.hi = HiRapiAligner('rapi_bwa')
     self._align_mini_ref_seqs()
Ejemplo n.º 5
0
 def setUp(self):
     self.hi = HiRapiAligner('rapi_bwa')