Ejemplo n.º 1
0
    def handle(self):
        """Handle request."""
        tstate = model.TransientShardState.from_request(self.request)
        spec = tstate.mapreduce_spec
        self._start_time = self._time()
        shard_id = tstate.shard_id

        shard_state, control = db.get([
            model.ShardState.get_key_by_shard_id(shard_id),
            model.MapreduceControl.get_key_by_job_id(spec.mapreduce_id),
        ])
        if not shard_state:

            logging.error("State not found for shard ID %r; shutting down",
                          shard_id)
            return

        ctx = context.Context(spec,
                              shard_state,
                              task_retry_count=self.task_retry_count())

        if control and control.command == model.MapreduceControl.ABORT:
            logging.info("Abort command received by shard %d of job '%s'",
                         shard_state.shard_number, shard_state.mapreduce_id)
            if tstate.output_writer:
                tstate.output_writer.finalize(ctx, shard_state.shard_number)
            shard_state.active = False
            shard_state.result_status = model.ShardState.RESULT_ABORTED
            shard_state.put(config=util.create_datastore_write_config(spec))
            model.MapreduceControl.abort(spec.mapreduce_id)
            return

        input_reader = tstate.input_reader

        if spec.mapper.params.get("enable_quota", True):
            quota_consumer = quota.QuotaConsumer(
                quota.QuotaManager(memcache.Client()), shard_id,
                _QUOTA_BATCH_SIZE)
        else:
            quota_consumer = None

        context.Context._set(ctx)
        try:

            if not quota_consumer or quota_consumer.check():
                scan_aborted = False
                entity = None

                if not quota_consumer or quota_consumer.consume():
                    for entity in input_reader:
                        if isinstance(entity, db.Model):
                            shard_state.last_work_item = repr(entity.key())
                        else:
                            shard_state.last_work_item = repr(entity)[:100]

                        scan_aborted = not self.process_data(
                            entity, input_reader, ctx, tstate)

                        if (quota_consumer and not scan_aborted
                                and not quota_consumer.consume()):
                            scan_aborted = True
                        if scan_aborted:
                            break
                else:
                    scan_aborted = True

                if not scan_aborted:
                    logging.info("Processing done for shard %d of job '%s'",
                                 shard_state.shard_number,
                                 shard_state.mapreduce_id)

                    if quota_consumer:
                        quota_consumer.put(1)
                    shard_state.active = False
                    shard_state.result_status = model.ShardState.RESULT_SUCCESS

            operation.counters.Increment(
                "mapper-walltime-msec",
                int((time.time() - self._start_time) * 1000))(ctx)

            ctx.flush()

            if not shard_state.active:

                if tstate.output_writer:
                    tstate.output_writer.finalize(ctx,
                                                  shard_state.shard_number)
            shard_state.put(config=util.create_datastore_write_config(spec))
        finally:
            context.Context._set(None)
            if quota_consumer:
                quota_consumer.dispose()

        if shard_state.active:
            self.reschedule(shard_state, tstate)
        gc.collect()
Ejemplo n.º 2
0
  def handle(self):
    """Handle request."""
    tstate = model.TransientShardState.from_request(self.request)
    spec = tstate.mapreduce_spec
    self._start_time = self._time()
    shard_id = tstate.shard_id

    shard_state, control = db.get([
        model.ShardState.get_key_by_shard_id(shard_id),
        model.MapreduceControl.get_key_by_job_id(spec.mapreduce_id),
    ])
    if not shard_state:


      logging.error("State not found for shard ID %r; shutting down",
                    shard_id)
      return

    if not shard_state.active:
      logging.error("Shard is not active. Looks like spurious task execution.")
      return

    ctx = context.Context(spec, shard_state,
                          task_retry_count=self.task_retry_count())

    if control and control.command == model.MapreduceControl.ABORT:
      logging.info("Abort command received by shard %d of job '%s'",
                   shard_state.shard_number, shard_state.mapreduce_id)


      shard_state.active = False
      shard_state.result_status = model.ShardState.RESULT_ABORTED
      shard_state.put(config=util.create_datastore_write_config(spec))
      model.MapreduceControl.abort(spec.mapreduce_id)
      return

    input_reader = tstate.input_reader

    if spec.mapper.params.get("enable_quota", True):
      quota_consumer = quota.QuotaConsumer(
          quota.QuotaManager(memcache.Client()),
          shard_id,
          _QUOTA_BATCH_SIZE)
    else:
      quota_consumer = None






    if ndb is not None:
      ndb_ctx = ndb.get_context()
      ndb_ctx.set_cache_policy(lambda key: False)
      ndb_ctx.set_memcache_policy(lambda key: False)

    context.Context._set(ctx)
    try:


      if not quota_consumer or quota_consumer.check():
        scan_aborted = False
        entity = None

        try:


          if not quota_consumer or quota_consumer.consume():
            for entity in input_reader:
              if isinstance(entity, db.Model):
                shard_state.last_work_item = repr(entity.key())
              else:
                shard_state.last_work_item = repr(entity)[:100]

              scan_aborted = not self.process_data(
                  entity, input_reader, ctx, tstate)


              if (quota_consumer and not scan_aborted and
                  not quota_consumer.consume()):
                scan_aborted = True
              if scan_aborted:
                break
          else:
            scan_aborted = True

          if not scan_aborted:
            logging.info("Processing done for shard %d of job '%s'",
                         shard_state.shard_number, shard_state.mapreduce_id)


            if quota_consumer:
              quota_consumer.put(1)
            shard_state.active = False
            shard_state.result_status = model.ShardState.RESULT_SUCCESS

          operation.counters.Increment(
              context.COUNTER_MAPPER_WALLTIME_MS,
              int((time.time() - self._start_time)*1000))(ctx)



          ctx.flush()
        except errors.RetrySliceError, e:
          logging.error("Slice error: %s", e)
          retry_count = int(
              os.environ.get("HTTP_X_APPENGINE_TASKRETRYCOUNT") or 0)
          if retry_count <= _RETRY_SLICE_ERROR_MAX_RETRIES:
            raise
          logging.error("Too many retries: %d, failing the job", retry_count)
          scan_aborted = True
          shard_state.active = False
          shard_state.result_status = model.ShardState.RESULT_FAILED
        except errors.FailJobError, e:
          logging.error("Job failed: %s", e)
          scan_aborted = True
          shard_state.active = False
          shard_state.result_status = model.ShardState.RESULT_FAILED
Ejemplo n.º 3
0
    def handle(self):
        """Handle request."""
        tstate = model.TransientShardState.from_request(self.request)
        spec = tstate.mapreduce_spec
        self._start_time = self._time()
        shard_id = tstate.shard_id

        shard_state, control = db.get([
            model.ShardState.get_key_by_shard_id(shard_id),
            model.MapreduceControl.get_key_by_job_id(spec.mapreduce_id),
        ])
        if not shard_state:

            logging.error("State not found for shard ID %r; shutting down",
                          shard_id)
            return

        if not shard_state.active:
            logging.error(
                "Shard is not active. Looks like spurious task execution.")
            return

        ctx = context.Context(spec,
                              shard_state,
                              task_retry_count=self.task_retry_count())

        if control and control.command == model.MapreduceControl.ABORT:
            logging.info("Abort command received by shard %d of job '%s'",
                         shard_state.shard_number, shard_state.mapreduce_id)
            if tstate.output_writer:
                tstate.output_writer.finalize(ctx, shard_state.shard_number)

            shard_state.active = False
            shard_state.result_status = model.ShardState.RESULT_ABORTED
            shard_state.put(config=util.create_datastore_write_config(spec))
            model.MapreduceControl.abort(spec.mapreduce_id)
            return

        input_reader = tstate.input_reader

        if spec.mapper.params.get("enable_quota", True):
            quota_consumer = quota.QuotaConsumer(
                quota.QuotaManager(memcache.Client()), shard_id,
                _QUOTA_BATCH_SIZE)
        else:
            quota_consumer = None

        context.Context._set(ctx)
        try:

            if not quota_consumer or quota_consumer.check():
                scan_aborted = False
                entity = None

                try:

                    if not quota_consumer or quota_consumer.consume():
                        for entity in input_reader:
                            if isinstance(entity, db.Model):
                                shard_state.last_work_item = repr(entity.key())
                            else:
                                shard_state.last_work_item = repr(entity)[:100]

                            scan_aborted = not self.process_data(
                                entity, input_reader, ctx, tstate)

                            if (quota_consumer and not scan_aborted
                                    and not quota_consumer.consume()):
                                scan_aborted = True
                            if scan_aborted:
                                break
                    else:
                        scan_aborted = True

                    if not scan_aborted:
                        logging.info(
                            "Processing done for shard %d of job '%s'",
                            shard_state.shard_number, shard_state.mapreduce_id)

                        if quota_consumer:
                            quota_consumer.put(1)
                        shard_state.active = False
                        shard_state.result_status = model.ShardState.RESULT_SUCCESS

                    operation.counters.Increment(
                        context.COUNTER_MAPPER_WALLTIME_MS,
                        int((time.time() - self._start_time) * 1000))(ctx)

                    ctx.flush()
                except errors.FailJobError, e:
                    logging.error("Job failed: %s", e)
                    scan_aborted = True
                    shard_state.active = False
                    shard_state.result_status = model.ShardState.RESULT_FAILED

            if not shard_state.active:

                if tstate.output_writer:
                    tstate.output_writer.finalize(ctx,
                                                  shard_state.shard_number)

            config = util.create_datastore_write_config(spec)

            @db.transactional(retries=5)
            def tx():
                fresh_shard_state = db.get(
                    model.ShardState.get_key_by_shard_id(shard_id))
                if (not fresh_shard_state.active
                        or "worker_active_state_collision"
                        in _TEST_INJECTED_FAULTS):
                    shard_state.active = False
                    logging.error(
                        "Spurious task execution. Aborting the shard.")
                    return
                fresh_shard_state.copy_from(shard_state)
                fresh_shard_state.put(config=config)

            tx()
Ejemplo n.º 4
0
    def handle(self):
        """Handle request."""
        spec = model.MapreduceSpec.from_json_str(
            self.request.get("mapreduce_spec"))
        self._start_time = self._time()
        shard_id = self.shard_id()

        logging.debug("post: shard=%s slice=%s headers=%s", shard_id,
                      self.slice_id(), self.request.headers)

        shard_state, control = db.get([
            model.ShardState.get_key_by_shard_id(shard_id),
            model.MapreduceControl.get_key_by_job_id(spec.mapreduce_id),
        ])
        if not shard_state:
            logging.error("State not found for shard ID %r; shutting down",
                          shard_id)
            return

        if control and control.command == model.MapreduceControl.ABORT:
            logging.info("Abort command received by shard %d of job '%s'",
                         shard_state.shard_number, shard_state.mapreduce_id)
            shard_state.active = False
            shard_state.result_status = model.ShardState.RESULT_ABORTED
            shard_state.put(config=util.create_datastore_write_config(spec))
            model.MapreduceControl.abort(spec.mapreduce_id)
            return

        input_reader = self.input_reader(spec.mapper)

        if spec.mapper.params.get("enable_quota", True):
            quota_consumer = quota.QuotaConsumer(
                quota.QuotaManager(memcache.Client()), shard_id,
                _QUOTA_BATCH_SIZE)
        else:
            quota_consumer = None

        ctx = context.Context(spec,
                              shard_state,
                              task_retry_count=self.task_retry_count())
        context.Context._set(ctx)

        try:
            if not quota_consumer or quota_consumer.check():
                scan_aborted = False
                entity = None

                if not quota_consumer or quota_consumer.consume():
                    for entity in input_reader:
                        if isinstance(entity, db.Model):
                            shard_state.last_work_item = repr(entity.key())
                        else:
                            shard_state.last_work_item = repr(entity)[:100]

                        scan_aborted = not self.process_entity(entity, ctx)

                        if (quota_consumer and not scan_aborted
                                and not quota_consumer.consume()):
                            scan_aborted = True
                        if scan_aborted:
                            break
                else:
                    scan_aborted = True

                if not scan_aborted:
                    logging.info("Processing done for shard %d of job '%s'",
                                 shard_state.shard_number,
                                 shard_state.mapreduce_id)
                    if quota_consumer:
                        quota_consumer.put(1)
                    shard_state.active = False
                    shard_state.result_status = model.ShardState.RESULT_SUCCESS

            ctx.flush()
        finally:
            context.Context._set(None)
            if quota_consumer:
                quota_consumer.dispose()

        if shard_state.active:
            self.reschedule(spec, input_reader)