Beispiel #1
0
    def __init__(self, ctx):
        super(type(self), self).__init__(ctx)
        self.__get_configuration(ctx)
        logging.basicConfig(level=self.log_level)
        self.event_monitor = HadoopEventMonitor(self.COUNTER_CLASS,
                                                logging.getLogger("mapper"),
                                                ctx)

        self.aligner = BwaAligner()
        self.aligner.event_monitor = self.event_monitor
        self.aligner.qformat = self.format
        self.aligner.max_isize = self.max_isize
        self.aligner.nthreads = self.nthreads
        self.aligner.trim_qual = self.trim_qual
        self.aligner.mmap_enabled = True

        ######## assemble hit processor chain
        chain = FilterLink(self.event_monitor)
        chain.remove_unmapped = self.remove_unmapped
        chain.min_hit_quality = self.min_hit_quality
        if self.__map_only:
            chain.set_next(EmitSamLink(ctx, self.event_monitor))
        else:
            chain.set_next(MarkDuplicatesEmitter(ctx, self.event_monitor))
        self.aligner.hit_visitor = chain

        ######## set the path to the reference index
        self.ref_archive = utils.get_ref_archive(ctx.getJobConf())
        self.aligner.reference = self.get_reference_root(self.ref_archive)

        # part of the code is a workaround for accumulating records, see #331
        isplit = InputSplit(ctx.getInputSplit())
        self.split_end = isplit.offset + isplit.length
Beispiel #2
0
    def __init__(self, ctx):
        super(reducer, self).__init__(ctx)

        jc = ctx.getJobConf()
        logger = logging.getLogger("seqal")
        jobconf = deprecation_utils.convert_job_conf(jc, self.DeprecationMap, logger)

        jc_configure(self, jobconf, 'seal.seqal.log.level', 'log_level', 'INFO')
        jc_configure_bool(self, jobconf, 'seal.seqal.discard_duplicates', 'discard_duplicates', False)

        logging.basicConfig(level=self.log_level)

        self.event_monitor = HadoopEventMonitor(self.COUNTER_CLASS, logging.getLogger("reducer"), ctx)
        self.__output_sink = EmitSamLink(ctx, self.event_monitor)
Beispiel #3
0
	def setUp(self):
		self.map_ctx = map_context(None, None)
		self.count_group = "Test"
		self.logger = SavingLogger()
		self.monitor = HadoopEventMonitor(self.count_group, self.logger, self.map_ctx)
		self.emitter = EmitSamLink(self.map_ctx, self.monitor)
		# create two mappings, m1, m2.  We put them in self.pair
		# m1 has:
		#   name = first
		# 	tid = tid1
		# m2 has:
		#   name = second
		#   tid = tid2
		self.pair = [ SimpleMapping(), SimpleMapping() ]
		self.m1, self.m2 = self.pair
		self.m1.set_name("first")
		self.m1.tid = "tid1"
		self.m2.set_name("second")
		self.m2.tid = "tid2"
Beispiel #4
0
	def test_constructor_link(self):
		h = EmitSamLink(self.map_ctx, self.monitor)
		self.assertTrue(h.next_link is None)
		other = HitProcessorChainLink()
		h = EmitSamLink(self.map_ctx, self.monitor, other)
		self.assertEqual(other, h.next_link)