Beispiel #1
0
    def __get_configuration(self, ctx):
        # TODO:  refactor settings common to mapper and reducer
        jc = ctx.getJobConf()

        jobconf = deprecation_utils.convert_job_conf(jc, self.DeprecationMap, self.logger)

        jc_configure(self, jobconf, 'seal.seqal.log.level', 'log_level', 'INFO')
        jc_configure(self, jobconf, "seal.seqal.fastq-subformat", "format", self.DEFAULT_FORMAT)
        jc_configure_int(self, jobconf, 'seal.seqal.alignment.max.isize', 'max_isize', 1000)
        jc_configure_int(self, jobconf, 'seal.seqal.alignment.min.isize', 'min_isize', None)
        jc_configure_int(self, jobconf, 'seal.seqal.pairing.batch.size', 'batch_size', 10000)
        jc_configure_int(self, jobconf, 'seal.seqal.min_hit_quality', 'min_hit_quality', 0)
        jc_configure_bool(self, jobconf, 'seal.seqal.remove_unmapped', 'remove_unmapped', False)
        jc_configure_int(self, jobconf, 'seal.seqal.nthreads', 'nthreads', 1)
        jc_configure_int(self, jobconf, 'seal.seqal.trim.qual', 'trim_qual', 0)

        try:
            self.log_level = getattr(logging, self.log_level)
        except AttributeError:
            raise ValueError("Unsupported log level: %r" % self.log_level)

        if self.format not in self.SUPPORTED_FORMATS:
            raise_pydoop_exception(
              "seal.seqal.fastq-subformat must be one of %r" %
              (self.SUPPORTED_FORMATS,)
              )

        if self.remove_unmapped:
            raise NotImplementedError("seal.seqal.remove_unmapped is currently unsupported")
        if self.min_hit_quality > 0:
            raise NotImplementedError("seal.seqal.min_hit_quality is currently unsupported")
        if self.trim_qual > 0:
            raise NotImplementedError("seal.seqal.trim_qual is currently unsupported")

        if self.max_isize <= 0:
            raise ValueError("'seal.seqal.alignment.max.isize' must be > 0, if specified [1000]")

        if self.batch_size <= 0:
            raise ValueError("'seal.seqal.pairing.batch.size' must be > 0, if specified [10000]")

        # minimum qual value required for a hit to be kept.  By default outputs all the
        # hits BWA returns.
        if self.min_hit_quality < 0:
            raise ValueError("'seal.seqal.min_hit_quality' must be >= 0, if specified [0]")

        # number of concurrent threads for main alignment operation
        if self.nthreads <= 0:
            raise ValueError("'seal.seqal.nthreads' must be > 0, if specified [1]")

        # trim quality parameter used by BWA from read trimming.  Equivalent to
        # the -q parameter for bwa align
        if self.trim_qual < 0:
            raise ValueError("'seal.seqal.trim.qual' must be >= 0, if specified [0]")

        if jc.hasKey('mapred.reduce.tasks') and jc.getInt('mapred.reduce.tasks') > 0:
            self.__map_only = False
        else:
            self.__map_only = True
Beispiel #2
0
	def __init__(self, ctx):
		super(reducer, self).__init__(ctx)

		jc = ctx.getJobConf()
		logger = logging.getLogger("seqal")
		jobconf = deprecation_utils.convert_job_conf(jc, self.DeprecationMap, logger)

		jc_configure(self, jobconf, 'seal.seqal.log.level', 'log_level', 'INFO')
		jc_configure_bool(self, jobconf, 'seal.seqal.discard_duplicates', 'discard_duplicates', False)

		logging.basicConfig(level=self.log_level)

		self.event_monitor = HadoopEventMonitor(self.COUNTER_CLASS, logging.getLogger("reducer"), ctx)
		self.__output_sink = EmitSamLink(ctx, self.event_monitor)
Beispiel #3
0
    def __init__(self, ctx):
        super(reducer, self).__init__(ctx)

        jc = ctx.getJobConf()
        logger = logging.getLogger("seqal")
        jobconf = deprecation_utils.convert_job_conf(jc, self.DeprecationMap, logger)

        jc_configure(self, jobconf, 'seal.seqal.log.level', 'log_level', 'INFO')
        jc_configure_bool(self, jobconf, 'seal.seqal.discard_duplicates', 'discard_duplicates', False)

        logging.basicConfig(level=self.log_level)

        self.event_monitor = HadoopEventMonitor(self.COUNTER_CLASS, logging.getLogger("reducer"), ctx)
        self.__output_sink = EmitSamLink(ctx, self.event_monitor)