def __init__(self, ctx): super(type(self), self).__init__(ctx) self.__get_configuration(ctx) logging.basicConfig(level=self.log_level) self.event_monitor = HadoopEventMonitor(self.COUNTER_CLASS, logging.getLogger("mapper"), ctx) self.aligner = BwaAligner() self.aligner.event_monitor = self.event_monitor self.aligner.qformat = self.format self.aligner.max_isize = self.max_isize self.aligner.nthreads = self.nthreads self.aligner.trim_qual = self.trim_qual self.aligner.mmap_enabled = True ######## assemble hit processor chain chain = FilterLink(self.event_monitor) chain.remove_unmapped = self.remove_unmapped chain.min_hit_quality = self.min_hit_quality if self.__map_only: chain.set_next(EmitSamLink(ctx, self.event_monitor)) else: chain.set_next(MarkDuplicatesEmitter(ctx, self.event_monitor)) self.aligner.hit_visitor = chain ######## set the path to the reference index self.ref_archive = utils.get_ref_archive(ctx.getJobConf()) self.aligner.reference = self.get_reference_root(self.ref_archive) # part of the code is a workaround for accumulating records, see #331 isplit = InputSplit(ctx.getInputSplit()) self.split_end = isplit.offset + isplit.length
def __init__(self, ctx): super(reducer, self).__init__(ctx) jc = ctx.getJobConf() logger = logging.getLogger("seqal") jobconf = deprecation_utils.convert_job_conf(jc, self.DeprecationMap, logger) jc_configure(self, jobconf, 'seal.seqal.log.level', 'log_level', 'INFO') jc_configure_bool(self, jobconf, 'seal.seqal.discard_duplicates', 'discard_duplicates', False) logging.basicConfig(level=self.log_level) self.event_monitor = HadoopEventMonitor(self.COUNTER_CLASS, logging.getLogger("reducer"), ctx) self.__output_sink = EmitSamLink(ctx, self.event_monitor)
def setUp(self): self.map_ctx = map_context(None, None) self.count_group = "Test" self.logger = SavingLogger() self.monitor = HadoopEventMonitor(self.count_group, self.logger, self.map_ctx) self.emitter = EmitSamLink(self.map_ctx, self.monitor) # create two mappings, m1, m2. We put them in self.pair # m1 has: # name = first # tid = tid1 # m2 has: # name = second # tid = tid2 self.pair = [ SimpleMapping(), SimpleMapping() ] self.m1, self.m2 = self.pair self.m1.set_name("first") self.m1.tid = "tid1" self.m2.set_name("second") self.m2.tid = "tid2"
def test_constructor_link(self): h = EmitSamLink(self.map_ctx, self.monitor) self.assertTrue(h.next_link is None) other = HitProcessorChainLink() h = EmitSamLink(self.map_ctx, self.monitor, other) self.assertEqual(other, h.next_link)