def get_handler(self): """Get mapper handler instance. Returns: handler instance as callable. """ return util.handler_for_name(self.handler_spec)
def get_handler(self): """Get mapper handler instance. Returns: handler instance as callable. """ return util.handler_for_name(self.handler_spec)
def get_handler(self): """Get mapper handler instance. Returns: cached handler instance as callable. """ if self.__handler is None: self.__handler = util.handler_for_name(self.handler_spec) return self.__handler
def get_handler(self): """Get mapper handler instance. Returns: cached handler instance as callable. """ if self.__handler is None: self.__handler = util.handler_for_name(self.handler_spec) return self.__handler
def __iter__(self): ctx = context.get() combiner = None if ctx: combiner_spec = ctx.mapreduce_spec.mapper.params.get( "combiner_spec") if combiner_spec: combiner = util.handler_for_name(combiner_spec) self.current_key = None self.current_values = None for binary_record in super(_ReducerReader, self).__iter__(): proto = file_service_pb.KeyValues() proto.ParseFromString(binary_record) if self.current_key is None: self.current_key = proto.key() self.current_values = [] else: assert proto.key() == self.current_key, ( "inconsistent key sequence. Expected %s but got %s" % (self.current_key, proto.key())) if combiner: combiner_result = combiner(self.current_key, proto.value_list(), self.current_values) if not util.is_generator(combiner_result): raise errors.BadCombinerOutputError( "Combiner %s should yield values instead of returning them (%s)" % (combiner, combiner_result)) self.current_values = [] for value in combiner_result: if isinstance(value, operation.Operation): value(ctx) else: self.current_values.append(value) else: self.current_values.extend(proto.value_list()) if not proto.partial(): key = self.current_key values = self.current_values self.current_key = None self.current_values = None yield (key, values) else: yield input_readers.ALLOW_CHECKPOINT
def __iter__(self): ctx = context.get() combiner = None if ctx: combiner_spec = ctx.mapreduce_spec.mapper.params.get("combiner_spec") if combiner_spec: combiner = util.handler_for_name(combiner_spec) self.current_key = None self.current_values = None for binary_record in super(_ReducerReader, self).__iter__(): proto = file_service_pb.KeyValues() proto.ParseFromString(binary_record) if self.current_key is None: self.current_key = proto.key() self.current_values = [] else: assert proto.key() == self.current_key, ( "inconsistent key sequence. Expected %s but got %s" % (self.current_key, proto.key())) if combiner: combiner_result = combiner( self.current_key, proto.value_list(), self.current_values) if not util.is_generator(combiner_result): raise errors.BadCombinerOutputError( "Combiner %s should yield values instead of returning them (%s)" % (combiner, combiner_result)) self.current_values = [] for value in combiner_result: if isinstance(value, operation.Operation): value(ctx) else: self.current_values.append(value) else: self.current_values.extend(proto.value_list()) if not proto.partial(): key = self.current_key values = self.current_values self.current_key = None self.current_values = None yield (key, values) else: yield input_readers.ALLOW_CHECKPOINT
def get_handler(self): """Get mapper handler instance. This always creates a new instance of the handler. If the handler is a callable instance, MR only wants to create a new instance at the beginning of a shard or shard retry. The pickled callable instance should be accessed from TransientShardState. Returns: handler instance as callable. """ return util.handler_for_name(self.handler_spec)
def get_handler(self): """Get mapper handler instance. This always creates a new instance of the handler. If the handler is a callable instance, MR only wants to create a new instance at the beginning of a shard or shard retry. The pickled callable instance should be accessed from TransientShardState. Returns: handler instance as callable. """ return util.handler_for_name(self.handler_spec)