def __init__(self, ctx): super(type(self), self).__init__(ctx) self.__get_configuration(ctx) logging.basicConfig(level=self.log_level) self.event_monitor = HadoopEventMonitor(self.COUNTER_CLASS, logging.getLogger("mapper"), ctx) self.aligner = BwaAligner() self.aligner.event_monitor = self.event_monitor self.aligner.qformat = self.format self.aligner.max_isize = self.max_isize self.aligner.nthreads = self.nthreads self.aligner.trim_qual = self.trim_qual self.aligner.mmap_enabled = True ######## assemble hit processor chain chain = FilterLink(self.event_monitor) chain.remove_unmapped = self.remove_unmapped chain.min_hit_quality = self.min_hit_quality if self.__map_only: chain.set_next(EmitSamLink(ctx, self.event_monitor)) else: chain.set_next(MarkDuplicatesEmitter(ctx, self.event_monitor)) self.aligner.hit_visitor = chain ######## set the path to the reference index self.ref_archive = utils.get_ref_archive(ctx.getJobConf()) self.aligner.reference = self.get_reference_root(self.ref_archive) # part of the code is a workaround for accumulating records, see #331 isplit = InputSplit(ctx.getInputSplit()) self.split_end = isplit.offset + isplit.length
def __init__(self, ctx): super(type(self), self).__init__(ctx) self.__get_configuration(ctx) logging.basicConfig(level=self.log_level) self.event_monitor = HadoopEventMonitor(self.COUNTER_CLASS, logging.getLogger("mapper"), ctx) self.aligner = BwaAligner() self.aligner.event_monitor = self.event_monitor self.aligner.qformat = self.format self.aligner.max_isize = self.max_isize self.aligner.nthreads = self.nthreads self.aligner.trim_qual = self.trim_qual self.aligner.mmap_enabled = True ######## assemble hit processor chain chain = FilterLink(self.event_monitor) chain.remove_unmapped = self.remove_unmapped chain.min_hit_quality = self.min_hit_quality if self.__map_only: chain.set_next( EmitSamLink(ctx, self.event_monitor) ) else: chain.set_next( MarkDuplicatesEmitter(ctx, self.event_monitor) ) self.aligner.hit_visitor = chain ######## set the path to the reference index self.ref_archive = utils.get_ref_archive(ctx.getJobConf()) self.aligner.reference = self.get_reference_root(self.ref_archive) # part of the code is a workaround for accumulating records, see #331 isplit = InputSplit(ctx.getInputSplit()) self.split_end = isplit.offset + isplit.length
def __init__(self, ctx): super(mapper, self).__init__(ctx) self.logger = logging.getLogger("seqal") self.__get_configuration(ctx) logging.basicConfig(level=self.log_level) self.event_monitor = HadoopEventMonitor(self.COUNTER_CLASS, logging.getLogger("mapper"), ctx) pe = True # single-end sequencen alignment not yet supported by Seqal self.hi_rapi = HiRapiAligner('rapi_bwa', paired=pe) # opts self.hi_rapi.opts.n_threads = self.nthreads self.hi_rapi.opts.isize_max = self.max_isize if self.min_isize is not None: self.hi_rapi.opts.isize_min = self.min_isize self.hi_rapi.qoffset = self.hi_rapi.Qenc_Illumina if self.format == "fastq-illumina" else self.hi_rapi.Qenc_Sanger # end opts self.logger.info("Using the %s aligner plugin, aligner version %s, plugin version %s", self.hi_rapi.aligner_name, self.hi_rapi.aligner_version, self.hi_rapi.plugin_version) self.logger.info("Working in %s mode", 'paired-end' if pe else 'single-end') # allocate space for reads self.logger.debug("Reserving batch space for %s reads", self.batch_size) self.hi_rapi.reserve_space(self.batch_size) # load reference reference_root = self.get_reference_root_from_archive(utils.get_ref_archive(ctx.getJobConf())) self.logger.info("Full reference path (prefix): %s", reference_root) with self.event_monitor.time_block("Loading reference %s" % reference_root): self.hi_rapi.load_ref(reference_root) ######## assemble hit processor chain chain = RapiFilterLink(self.event_monitor) chain.remove_unmapped = self.remove_unmapped chain.min_hit_quality = self.min_hit_quality if self.__map_only: chain.set_next( RapiEmitSamLink(ctx, self.event_monitor, self.hi_rapi) ) else: raise NotImplementedError("Only mapping mode is supported at the moment") self.hit_visitor_chain = chain