Example #1
0
    def process_entity(self, entity, ctx, transient_shard_state):
        """Process a single entity.

    Call mapper handler on the entity.

    Args:
      entity: an entity to process.
      ctx: current execution context.

    Returns:
      True if scan should be continued, False if scan should be aborted.
    """
        ctx.counters.increment(context.COUNTER_MAPPER_CALLS)

        handler = ctx.mapreduce_spec.mapper.handler
        if util.is_generator_function(handler):
            for result in handler(entity):
                if isinstance(result, operation.Operation):
                    result(ctx)
                else:
                    output_writer = transient_shard_state.output_writer
                    if not output_writer:
                        logging.error(
                            "Handler yielded %s, but no output writer is set.",
                            result)
                    else:
                        output_writer.write(result, ctx)
        else:
            handler(entity)

        if self._time() - self._start_time > _SLICE_DURATION_SEC:
            logging.debug("Spent %s seconds. Rescheduling",
                          self._time() - self._start_time)
            return False
        return True
Example #2
0
  def process_entity(self, entity, ctx, transient_shard_state):
    """Process a single entity.

    Call mapper handler on the entity.

    Args:
      entity: an entity to process.
      ctx: current execution context.

    Returns:
      True if scan should be continued, False if scan should be aborted.
    """
    ctx.counters.increment(context.COUNTER_MAPPER_CALLS)

    handler = ctx.mapreduce_spec.mapper.handler
    if util.is_generator_function(handler):
      for result in handler(entity):
        if isinstance(result, operation.Operation):
          result(ctx)
        else:
          output_writer = transient_shard_state.output_writer
          if not output_writer:
            logging.error(
                "Handler yielded %s, but no output writer is set.", result)
          else:
            output_writer.write(result, ctx)
    else:
      handler(entity)

    if self._time() - self._start_time > _SLICE_DURATION_SEC:
      logging.debug("Spent %s seconds. Rescheduling",
                    self._time() - self._start_time)
      return False
    return True
Example #3
0
    def process_data(self, data, input_reader, ctx, transient_shard_state):
        """Process a single data piece.

    Call mapper handler on the data.

    Args:
      data: a datum to process.
      input_reader: input reader.
      ctx: current execution context.

    Returns:
      True if scan should be continued, False if scan should be aborted.
    """
        if data is not input_readers.ALLOW_CHECKPOINT:
            ctx.counters.increment(context.COUNTER_MAPPER_CALLS)

            handler = ctx.mapreduce_spec.mapper.handler
            if input_reader.expand_parameters:
                result = handler(*data)
            else:
                result = handler(data)

            if util.is_generator_function(handler):
                for output in result:
                    if isinstance(output, operation.Operation):
                        output(ctx)
                    else:
                        output_writer = transient_shard_state.output_writer
                        if not output_writer:
                            logging.error(
                                "Handler yielded %s, but no output writer is set.",
                                output)
                        else:
                            output_writer.write(output, ctx)

        if self._time() - self._start_time > _SLICE_DURATION_SEC:
            logging.debug("Spent %s seconds. Rescheduling",
                          self._time() - self._start_time)
            return False
        return True
Example #4
0
  def process_data(self, data, input_reader, ctx, transient_shard_state):
    """Process a single data piece.

    Call mapper handler on the data.

    Args:
      data: an data to process.
      input_reader: input reader.
      ctx: current execution context.

    Returns:
      True if scan should be continued, False if scan should be aborted.
    """
    ctx.counters.increment(context.COUNTER_MAPPER_CALLS)

    handler = ctx.mapreduce_spec.mapper.handler
    if input_reader.expand_parameters:
      result = handler(*data)
    else:
      result = handler(data)

    if util.is_generator_function(handler):
      for output in result:
        if isinstance(output, operation.Operation):
          output(ctx)
        else:
          output_writer = transient_shard_state.output_writer
          if not output_writer:
            logging.error(
                "Handler yielded %s, but no output writer is set.", output)
          else:
            output_writer.write(output, ctx)

    if self._time() - self._start_time > _SLICE_DURATION_SEC:
      logging.debug("Spent %s seconds. Rescheduling",
                    self._time() - self._start_time)
      return False
    return True
    def process_entity(self, entity, ctx):
        """Process a single entity.

    Call mapper handler on the entity.

    Args:
      entity: an entity to process.
      ctx: current execution context.

    Returns:
      True if scan should be continued, False if scan should be aborted.
    """
        ctx.counters.increment(context.COUNTER_MAPPER_CALLS)

        handler = ctx.mapreduce_spec.mapper.handler
        if util.is_generator_function(handler):
            for result in handler(entity):
                if callable(result):
                    result(ctx)
                else:
                    try:
                        if len(result) == 2:
                            logging.error("Collectors not implemented yet")
                        else:
                            logging.error("Got bad output tuple of length %d",
                                          len(result))
                    except TypeError:
                        logging.error(
                            "Handler yielded type %s, expected a callable or a tuple",
                            result.__class__.__name__)
        else:
            handler(entity)

        if self._time() - self._start_time > _SLICE_DURATION_SEC:
            logging.debug("Spent %s seconds. Rescheduling",
                          self._time() - self._start_time)
            return False
        return True
Example #6
0
  def process_entity(self, entity, ctx):
    """Process a single entity.

    Call mapper handler on the entity.

    Args:
      entity: an entity to process.
      ctx: current execution context.

    Returns:
      True if scan should be continued, False if scan should be aborted.
    """
    ctx.counters.increment(context.COUNTER_MAPPER_CALLS)

    handler = ctx.mapreduce_spec.mapper.handler
    if util.is_generator_function(handler):
      for result in handler(entity):
        if callable(result):
          result(ctx)
        else:
          try:
            if len(result) == 2:
              logging.error("Collectors not implemented yet")
            else:
              logging.error("Got bad output tuple of length %d", len(result))
          except TypeError:
            logging.error(
                "Handler yielded type %s, expected a callable or a tuple",
                result.__class__.__name__)
    else:
      handler(entity)

    if self._time() - self._start_time > _SLICE_DURATION_SEC:
      logging.debug("Spent %s seconds. Rescheduling",
                    self._time() - self._start_time)
      return False
    return True