def handle(self):
        """Handles kick off request."""
        spec = model.MapreduceSpec.from_json_str(
            self._get_required_param("mapreduce_spec"))

        app_id = self.request.get("app", None)
        queue_name = os.environ.get("HTTP_X_APPENGINE_QUEUENAME", "default")
        mapper_input_reader_class = spec.mapper.input_reader_class()

        # StartJobHandler might have already saved the state, but it's OK
        # to override it because we're using the same mapreduce id.
        state = model.MapreduceState.create_new(spec.mapreduce_id)
        state.mapreduce_spec = spec
        state.active = True
        # TODO(user): Initialize UI fields correctly.
        state.char_url = ""
        state.sparkline_url = ""
        if app_id:
            state.app_id = app_id

        input_readers = mapper_input_reader_class.split_input(spec.mapper)
        if not input_readers:
            # We don't have any data. Finish map.
            logging.warning("Found no mapper input data to process.")
            state.active = False
            state.active_shards = 0
            state.put(config=util.create_datastore_write_config(spec))
            return

        # Update state and spec with actual shard count.
        spec.mapper.shard_count = len(input_readers)
        state.active_shards = len(input_readers)
        state.mapreduce_spec = spec

        output_writer_class = spec.mapper.output_writer_class()
        if output_writer_class:
            output_writer_class.init_job(state)

        output_writers = []
        if output_writer_class:
            for shard_number in range(len(input_readers)):
                writer = output_writer_class.create(state, shard_number)
                assert isinstance(writer, output_writer_class)
                output_writers.append(writer)
        else:
            output_writers = [None for ir in input_readers]

        state.put(config=util.create_datastore_write_config(spec))

        KickOffJobHandler._schedule_shards(spec, input_readers, output_writers,
                                           queue_name, self.base_path())

        ControllerCallbackHandler.reschedule(state,
                                             self.base_path(),
                                             spec,
                                             queue_name=queue_name,
                                             serial_id=0)
  def handle(self):
    """Handles kick off request."""
    spec = model.MapreduceSpec.from_json_str(
        self._get_required_param("mapreduce_spec"))

    app_id = self.request.get("app", None)
    queue_name = os.environ.get("HTTP_X_APPENGINE_QUEUENAME", "default")
    mapper_input_reader_class = spec.mapper.input_reader_class()

    # StartJobHandler might have already saved the state, but it's OK
    # to override it because we're using the same mapreduce id.
    state = model.MapreduceState.create_new(spec.mapreduce_id)
    state.mapreduce_spec = spec
    state.active = True
    # TODO(user): Initialize UI fields correctly.
    state.char_url = ""
    state.sparkline_url = ""
    if app_id:
      state.app_id = app_id

    input_readers = mapper_input_reader_class.split_input(spec.mapper)
    if not input_readers:
      # We don't have any data. Finish map.
      logging.warning("Found no mapper input data to process.")
      state.active = False
      state.active_shards = 0
      state.put(config=util.create_datastore_write_config(spec))
      return

    # Update state and spec with actual shard count.
    spec.mapper.shard_count = len(input_readers)
    state.active_shards = len(input_readers)
    state.mapreduce_spec = spec

    output_writer_class = spec.mapper.output_writer_class()
    if output_writer_class:
      output_writer_class.init_job(state)

    output_writers = []
    if output_writer_class:
      for shard_number in range(len(input_readers)):
        writer = output_writer_class.create(state, shard_number)
        assert isinstance(writer, output_writer_class)
        output_writers.append(writer)
    else:
      output_writers = [None for ir in input_readers]

    state.put(config=util.create_datastore_write_config(spec))

    KickOffJobHandler._schedule_shards(
        spec, input_readers, output_writers, queue_name, self.base_path())

    ControllerCallbackHandler.reschedule(
        state, self.base_path(), spec, queue_name=queue_name, serial_id=0)
Exemple #3
0
 def flush(self):
   """Flush all information recorded in context."""
   for pool in self._pools.values():
     pool.flush()
   if self.shard_state:
     self.shard_state.put(
         config=util.create_datastore_write_config(self.mapreduce_spec))
Exemple #4
0
  def _schedule_shards(cls, spec, input_readers, queue_name, base_path):
    """Prepares shard states and schedules their execution.

    Args:
      spec: mapreduce specification as MapreduceSpec.
      input_readers: list of InputReaders describing shard splits.
      queue_name: The queue to run this job on.
      base_path: The base url path of mapreduce callbacks.
    """
    # Note: it's safe to re-attempt this handler because:
    # - shard state has deterministic and unique key.
    # - schedule_slice will fall back gracefully if a task already exists.
    shard_states = []
    for shard_number, input_reader in enumerate(input_readers):
      shard = model.ShardState.create_new(spec.mapreduce_id, shard_number)
      shard.shard_description = str(input_reader)
      shard_states.append(shard)

    # Retrievs already existing shards.
    existing_shard_states = db.get(shard.key() for shard in shard_states)
    existing_shard_keys = set(shard.key() for shard in existing_shard_states
                              if shard is not None)

    # Puts only non-existing shards.
    db.put((shard for shard in shard_states
            if shard.key() not in existing_shard_keys),
           config=util.create_datastore_write_config(spec))

    for shard_number, input_reader in enumerate(input_readers):
      shard_id = model.ShardState.shard_id_from_number(
          spec.mapreduce_id, shard_number)
      MapperWorkerCallbackHandler.schedule_slice(
          base_path, spec, shard_id, 0, input_reader, queue_name=queue_name)
Exemple #5
0
 def flush(self):
     """Flush all information recorded in context."""
     for pool in self._pools.values():
         pool.flush()
     if self.shard_state:
         self.shard_state.put(
             config=util.create_datastore_write_config(self.mapreduce_spec))
Exemple #6
0
    def _schedule_shards(cls, spec, input_readers, queue_name, base_path):
        """Prepares shard states and schedules their execution.

    Args:
      spec: mapreduce specification as MapreduceSpec.
      input_readers: list of InputReaders describing shard splits.
      queue_name: The queue to run this job on.
      base_path: The base url path of mapreduce callbacks.
    """
        # Note: it's safe to re-attempt this handler because:
        # - shard state has deterministic and unique key.
        # - schedule_slice will fall back gracefully if a task already exists.
        shard_states = []
        for shard_number, input_reader in enumerate(input_readers):
            shard = model.ShardState.create_new(spec.mapreduce_id, shard_number)
            shard.shard_description = str(input_reader)
            shard_states.append(shard)

        # Retrievs already existing shards.
        existing_shard_states = db.get(shard.key() for shard in shard_states)
        existing_shard_keys = set(shard.key() for shard in existing_shard_states if shard is not None)

        # Puts only non-existing shards.
        db.put(
            (shard for shard in shard_states if shard.key() not in existing_shard_keys),
            config=util.create_datastore_write_config(spec),
        )

        for shard_number, input_reader in enumerate(input_readers):
            shard_id = model.ShardState.shard_id_from_number(spec.mapreduce_id, shard_number)
            MapperWorkerCallbackHandler.schedule_slice(
                base_path, spec, shard_id, 0, input_reader, queue_name=queue_name
            )
  def _schedule_shards(cls,
                       spec,
                       input_readers,
                       queue_name,
                       base_path,
                       mr_state):
    """Prepares shard states and schedules their execution.

    Args:
      spec: mapreduce specification as MapreduceSpec.
      input_readers: list of InputReaders describing shard splits.
      queue_name: The queue to run this job on.
      base_path: The base url path of mapreduce callbacks.
      mr_state: The MapReduceState of current job.
    """
    # Note: it's safe to re-attempt this handler because:
    # - shard state has deterministic and unique key.
    # - _schedule_slice will fall back gracefully if a task already exists.
    shard_states = []
    writer_class = spec.mapper.output_writer_class()
    output_writers = [None] * len(input_readers)
    for shard_number, input_reader in enumerate(input_readers):
      shard_state = model.ShardState.create_new(spec.mapreduce_id, shard_number)
      shard_state.shard_description = str(input_reader)
      if writer_class:
        output_writers[shard_number] = writer_class.create(
            mr_state, shard_state)
      shard_states.append(shard_state)

    # Retrievs already existing shards.
    existing_shard_states = db.get(shard.key() for shard in shard_states)
    existing_shard_keys = set(shard.key() for shard in existing_shard_states
                              if shard is not None)

    # Puts only non-existing shards.
    db.put((shard for shard in shard_states
            if shard.key() not in existing_shard_keys),
           config=util.create_datastore_write_config(spec))

    # Give each shard some quota to start with.
    processing_rate = int(spec.mapper.params.get(
        "processing_rate") or model._DEFAULT_PROCESSING_RATE_PER_SEC)
    quota_refill = processing_rate / len(shard_states)
    quota_manager = quota.QuotaManager(memcache.Client())
    for shard_state in shard_states:
      quota_manager.put(shard_state.shard_id, quota_refill)

    # Schedule shard tasks.
    for shard_number, (input_reader, output_writer) in enumerate(
        zip(input_readers, output_writers)):
      shard_id = model.ShardState.shard_id_from_number(
          spec.mapreduce_id, shard_number)
      MapperWorkerCallbackHandler._schedule_slice(
          shard_states[shard_number],
          model.TransientShardState(
              base_path, spec, shard_id, 0, input_reader, input_reader,
              output_writer=output_writer),
          queue_name=queue_name)
Exemple #8
0
    def _schedule_shards(cls, spec, input_readers, output_writers, queue_name,
                         base_path):
        """Prepares shard states and schedules their execution.

    Args:
      spec: mapreduce specification as MapreduceSpec.
      input_readers: list of InputReaders describing shard splits.
      queue_name: The queue to run this job on.
      base_path: The base url path of mapreduce callbacks.
    """
        assert len(input_readers) == len(output_writers)
        # Note: it's safe to re-attempt this handler because:
        # - shard state has deterministic and unique key.
        # - _schedule_slice will fall back gracefully if a task already exists.
        shard_states = []
        for shard_number, input_reader in enumerate(input_readers):
            shard_state = model.ShardState.create_new(spec.mapreduce_id,
                                                      shard_number)
            shard_state.shard_description = str(input_reader)
            shard_states.append(shard_state)

        # Retrievs already existing shards.
        existing_shard_states = db.get(shard.key() for shard in shard_states)
        existing_shard_keys = set(shard.key()
                                  for shard in existing_shard_states
                                  if shard is not None)

        # Puts only non-existing shards.
        db.put((shard for shard in shard_states
                if shard.key() not in existing_shard_keys),
               config=util.create_datastore_write_config(spec))

        # Give each shard some quota to start with.
        processing_rate = int(
            spec.mapper.params.get("processing_rate")
            or model._DEFAULT_PROCESSING_RATE_PER_SEC)
        quota_refill = processing_rate / len(shard_states)
        quota_manager = quota.QuotaManager(memcache.Client())
        for shard_state in shard_states:
            quota_manager.put(shard_state.shard_id, quota_refill)

        # Schedule shard tasks.
        for shard_number, (input_reader, output_writer) in enumerate(
                zip(input_readers, output_writers)):
            shard_id = model.ShardState.shard_id_from_number(
                spec.mapreduce_id, shard_number)
            MapperWorkerCallbackHandler._schedule_slice(
                shard_states[shard_number],
                model.TransientShardState(base_path,
                                          spec,
                                          shard_id,
                                          0,
                                          input_reader,
                                          output_writer=output_writer),
                queue_name=queue_name)
Exemple #9
0
 def handle(self):
   mapreduce_id = self.request.get("mapreduce_id")
   mapreduce_state = model.MapreduceState.get_by_job_id(mapreduce_id)
   if mapreduce_state:
     config=util.create_datastore_write_config(mapreduce_state.mapreduce_spec)
     db.delete(model.MapreduceControl.get_key_by_job_id(mapreduce_id),
             config=config)
     shard_states = model.ShardState.find_by_mapreduce_state(mapreduce_state)
     for shard_state in shard_states:
       db.delete(util._HugeTaskPayload.all().ancestor(shard_state),
                 config=config)
     db.delete(shard_states, config=config)
     db.delete(util._HugeTaskPayload.all().ancestor(mapreduce_state),
               config=config)
Exemple #10
0
 def handle(self):
     mapreduce_id = self.request.get("mapreduce_id")
     mapreduce_state = model.MapreduceState.get_by_job_id(mapreduce_id)
     if mapreduce_state:
         config = util.create_datastore_write_config(
             mapreduce_state.mapreduce_spec)
         db.delete(model.MapreduceControl.get_key_by_job_id(mapreduce_id),
                   config=config)
         shard_states = model.ShardState.find_by_mapreduce_state(
             mapreduce_state)
         for shard_state in shard_states:
             db.delete(util._HugeTaskPayload.all().ancestor(shard_state),
                       config=config)
         db.delete(shard_states, config=config)
         db.delete(util._HugeTaskPayload.all().ancestor(mapreduce_state),
                   config=config)
Exemple #11
0
    def _finalize_job(mapreduce_spec, mapreduce_state, base_path):
        """Finalize job execution.

    Finalizes output writer, invokes done callback an schedules
    finalize job execution.

    Args:
      mapreduce_spec: an instance of MapreduceSpec
      mapreduce_state: an instance of MapreduceState
      base_path: handler base path.
    """
        config = util.create_datastore_write_config(mapreduce_spec)

        # Only finalize the output writers if we the job is successful.
        if (mapreduce_spec.mapper.output_writer_class()
                and mapreduce_state.result_status
                == model.MapreduceState.RESULT_SUCCESS):
            mapreduce_spec.mapper.output_writer_class().finalize_job(
                mapreduce_state)

        # Enqueue done_callback if needed.
        def put_state(state):
            state.put(config=config)
            done_callback = mapreduce_spec.params.get(
                model.MapreduceSpec.PARAM_DONE_CALLBACK)
            if done_callback:
                done_task = taskqueue.Task(
                    url=done_callback,
                    headers={"Mapreduce-Id": mapreduce_spec.mapreduce_id},
                    method=mapreduce_spec.params.get("done_callback_method",
                                                     "POST"))
                queue_name = mapreduce_spec.params.get(
                    model.MapreduceSpec.PARAM_DONE_CALLBACK_QUEUE, "default")

                if not _run_task_hook(mapreduce_spec.get_hooks(),
                                      "enqueue_done_task", done_task,
                                      queue_name):
                    done_task.add(queue_name, transactional=True)
            FinalizeJobHandler.schedule(base_path, mapreduce_spec)

        db.run_in_transaction(put_state, mapreduce_state)
  def _finalize_job(mapreduce_spec, mapreduce_state, base_path):
    """Finalize job execution.

    Finalizes output writer, invokes done callback an schedules
    finalize job execution.

    Args:
      mapreduce_spec: an instance of MapreduceSpec
      mapreduce_state: an instance of MapreduceState
      base_path: handler base path.
    """
    config = util.create_datastore_write_config(mapreduce_spec)

    # Only finalize the output writers if we the job is successful.
    if (mapreduce_spec.mapper.output_writer_class() and
        mapreduce_state.result_status == model.MapreduceState.RESULT_SUCCESS):
      mapreduce_spec.mapper.output_writer_class().finalize_job(mapreduce_state)

    # Enqueue done_callback if needed.
    def put_state(state):
      state.put(config=config)
      done_callback = mapreduce_spec.params.get(
          model.MapreduceSpec.PARAM_DONE_CALLBACK)
      if done_callback:
        done_task = taskqueue.Task(
            url=done_callback,
            headers={"Mapreduce-Id": mapreduce_spec.mapreduce_id},
            method=mapreduce_spec.params.get("done_callback_method", "POST"))
        queue_name = mapreduce_spec.params.get(
            model.MapreduceSpec.PARAM_DONE_CALLBACK_QUEUE,
            "default")

        if not _run_task_hook(mapreduce_spec.get_hooks(),
                              "enqueue_done_task",
                              done_task,
                              queue_name):
          done_task.add(queue_name, transactional=True)
      FinalizeJobHandler.schedule(base_path, mapreduce_spec)

    db.run_in_transaction(put_state, mapreduce_state)
 def testForceWrites(self):
   self.spec.params["force_writes"] = "True"
   config = util.create_datastore_write_config(self.spec)
   self.assertTrue(config)
   self.assertTrue(config.force_writes)
Exemple #14
0
    def handle(self):
        """Handle request."""
        spec = model.MapreduceSpec.from_json_str(self.request.get("mapreduce_spec"))
        self._start_time = self._time()
        shard_id = self.shard_id()

        # TODO(user): Make this prettier
        logging.debug("post: shard=%s slice=%s headers=%s", shard_id, self.slice_id(), self.request.headers)

        shard_state, control = db.get(
            [
                model.ShardState.get_key_by_shard_id(shard_id),
                model.MapreduceControl.get_key_by_job_id(spec.mapreduce_id),
            ]
        )
        if not shard_state:
            # We're letting this task to die. It's up to controller code to
            # reinitialize and restart the task.
            logging.error("State not found for shard ID %r; shutting down", shard_id)
            return

        if control and control.command == model.MapreduceControl.ABORT:
            logging.info(
                "Abort command received by shard %d of job '%s'", shard_state.shard_number, shard_state.mapreduce_id
            )
            shard_state.active = False
            shard_state.result_status = model.ShardState.RESULT_ABORTED
            shard_state.put(config=util.create_datastore_write_config(spec))
            model.MapreduceControl.abort(spec.mapreduce_id)
            return

        input_reader = self.input_reader(spec.mapper)

        if spec.mapper.params.get("enable_quota", True):
            quota_consumer = quota.QuotaConsumer(quota.QuotaManager(memcache.Client()), shard_id, _QUOTA_BATCH_SIZE)
        else:
            quota_consumer = None

        ctx = context.Context(spec, shard_state, task_retry_count=self.task_retry_count())
        context.Context._set(ctx)

        try:
            # consume quota ahead, because we do not want to run a datastore
            # query if there's not enough quota for the shard.
            if not quota_consumer or quota_consumer.check():
                scan_aborted = False
                entity = None

                # We shouldn't fetch an entity from the reader if there's not enough
                # quota to process it. Perform all quota checks proactively.
                if not quota_consumer or quota_consumer.consume():
                    for entity in input_reader:
                        if isinstance(entity, db.Model):
                            shard_state.last_work_item = repr(entity.key())
                        else:
                            shard_state.last_work_item = repr(entity)[:100]

                        scan_aborted = not self.process_entity(entity, ctx)

                        # Check if we've got enough quota for the next entity.
                        if quota_consumer and not scan_aborted and not quota_consumer.consume():
                            scan_aborted = True
                        if scan_aborted:
                            break
                else:
                    scan_aborted = True

                if not scan_aborted:
                    logging.info(
                        "Processing done for shard %d of job '%s'", shard_state.shard_number, shard_state.mapreduce_id
                    )
                    # We consumed extra quota item at the end of for loop.
                    # Just be nice here and give it back :)
                    if quota_consumer:
                        quota_consumer.put(1)
                    shard_state.active = False
                    shard_state.result_status = model.ShardState.RESULT_SUCCESS

            # TODO(user): Mike said we don't want this happen in case of
            # exception while scanning. Figure out when it's appropriate to skip.
            ctx.flush()
        finally:
            context.Context._set(None)
            if quota_consumer:
                quota_consumer.dispose()

        # Rescheduling work should always be the last statement. It shouldn't happen
        # if there were any exceptions in code before it.
        if shard_state.active:
            self.reschedule(spec, input_reader)
Exemple #15
0
    def handle(self):
        """Handle request."""
        tstate = model.TransientShardState.from_request(self.request)
        spec = tstate.mapreduce_spec
        self._start_time = self._time()
        shard_id = tstate.shard_id

        shard_state, control = db.get([
            model.ShardState.get_key_by_shard_id(shard_id),
            model.MapreduceControl.get_key_by_job_id(spec.mapreduce_id),
        ])
        if not shard_state:
            # We're letting this task to die. It's up to controller code to
            # reinitialize and restart the task.
            logging.error("State not found for shard ID %r; shutting down",
                          shard_id)
            return

        if not shard_state.active:
            logging.error(
                "Shard is not active. Looks like spurious task execution.")
            return

        ctx = context.Context(spec,
                              shard_state,
                              task_retry_count=self.task_retry_count())

        if control and control.command == model.MapreduceControl.ABORT:
            logging.info("Abort command received by shard %d of job '%s'",
                         shard_state.shard_number, shard_state.mapreduce_id)
            if tstate.output_writer:
                tstate.output_writer.finalize(ctx, shard_state.shard_number)
            # We recieved a command to abort. We don't care if we override
            # some data.
            shard_state.active = False
            shard_state.result_status = model.ShardState.RESULT_ABORTED
            shard_state.put(config=util.create_datastore_write_config(spec))
            model.MapreduceControl.abort(spec.mapreduce_id)
            return

        input_reader = tstate.input_reader

        if spec.mapper.params.get("enable_quota", True):
            quota_consumer = quota.QuotaConsumer(
                quota.QuotaManager(memcache.Client()), shard_id,
                _QUOTA_BATCH_SIZE)
        else:
            quota_consumer = None

        context.Context._set(ctx)
        try:
            # consume quota ahead, because we do not want to run a datastore
            # query if there's not enough quota for the shard.
            if not quota_consumer or quota_consumer.check():
                scan_aborted = False
                entity = None

                # We shouldn't fetch an entity from the reader if there's not enough
                # quota to process it. Perform all quota checks proactively.
                if not quota_consumer or quota_consumer.consume():
                    for entity in input_reader:
                        if isinstance(entity, db.Model):
                            shard_state.last_work_item = repr(entity.key())
                        else:
                            shard_state.last_work_item = repr(entity)[:100]

                        scan_aborted = not self.process_data(
                            entity, input_reader, ctx, tstate)

                        # Check if we've got enough quota for the next entity.
                        if (quota_consumer and not scan_aborted
                                and not quota_consumer.consume()):
                            scan_aborted = True
                        if scan_aborted:
                            break
                else:
                    scan_aborted = True

                if not scan_aborted:
                    logging.info("Processing done for shard %d of job '%s'",
                                 shard_state.shard_number,
                                 shard_state.mapreduce_id)
                    # We consumed extra quota item at the end of for loop.
                    # Just be nice here and give it back :)
                    if quota_consumer:
                        quota_consumer.put(1)
                    shard_state.active = False
                    shard_state.result_status = model.ShardState.RESULT_SUCCESS

            operation.counters.Increment(
                context.COUNTER_MAPPER_WALLTIME_MS,
                int((time.time() - self._start_time) * 1000))(ctx)

            # TODO(user): Mike said we don't want this happen in case of
            # exception while scanning. Figure out when it's appropriate to skip.
            ctx.flush()

            if not shard_state.active:
                # shard is going to stop. Finalize output writer if any.
                if tstate.output_writer:
                    tstate.output_writer.finalize(ctx,
                                                  shard_state.shard_number)

            config = util.create_datastore_write_config(spec)
            # We don't want shard state to override active state, since that
            # may stuck job execution (see issue 116). Do a transactional
            # verification for status.
            # TODO(user): this might still result in some data inconsistency
            # which can be avoided. It doesn't seem to be worth it now, because
            # various crashes might result in all sort of data consistencies
            # anyway.
            @db.transactional(retries=5)
            def tx():
                fresh_shard_state = db.get(
                    model.ShardState.get_key_by_shard_id(shard_id))
                if (not fresh_shard_state.active
                        or "worker_active_state_collision"
                        in _TEST_INJECTED_FAULTS):
                    shard_state.active = False
                    logging.error(
                        "Spurious task execution. Aborting the shard.")
                    return
                fresh_shard_state.copy_from(shard_state)
                fresh_shard_state.put(config=config)

            tx()
        finally:
            context.Context._set(None)
            if quota_consumer:
                quota_consumer.dispose()

        # Rescheduling work should always be the last statement. It shouldn't happen
        # if there were any exceptions in code before it.
        if shard_state.active:
            self.reschedule(shard_state, tstate)
        gc.collect()
 def testDefaultConfig(self):
   config = util.create_datastore_write_config(self.spec)
   self.assertTrue(config)
   self.assertFalse(config.force_writes)
Exemple #17
0
  def handle(self):
    """Handle request."""
    tstate = model.TransientShardState.from_request(self.request)
    spec = tstate.mapreduce_spec
    self._start_time = self._time()
    shard_id = tstate.shard_id

    shard_state, control = db.get([
        model.ShardState.get_key_by_shard_id(shard_id),
        model.MapreduceControl.get_key_by_job_id(spec.mapreduce_id),
    ])
    if not shard_state:
      # We're letting this task to die. It's up to controller code to
      # reinitialize and restart the task.
      logging.error("State not found for shard ID %r; shutting down",
                    shard_id)
      return

    if not shard_state.active:
      logging.error("Shard is not active. Looks like spurious task execution.")
      return

    ctx = context.Context(spec, shard_state,
                          task_retry_count=self.task_retry_count())

    if control and control.command == model.MapreduceControl.ABORT:
      logging.info("Abort command received by shard %d of job '%s'",
                   shard_state.shard_number, shard_state.mapreduce_id)
      # NOTE: When aborting, specifically do not finalize the output writer
      # because it might be in a bad state.
      shard_state.active = False
      shard_state.result_status = model.ShardState.RESULT_ABORTED
      shard_state.put(config=util.create_datastore_write_config(spec))
      model.MapreduceControl.abort(spec.mapreduce_id)
      return

    input_reader = tstate.input_reader

    if spec.mapper.params.get("enable_quota", True):
      quota_consumer = quota.QuotaConsumer(
          quota.QuotaManager(memcache.Client()),
          shard_id,
          _QUOTA_BATCH_SIZE)
    else:
      quota_consumer = None

    # Tell NDB to never cache anything in memcache or in-process. This ensures
    # that entities fetched from Datastore input_readers via NDB will not bloat
    # up the request memory size and Datastore Puts will avoid doing calls
    # to memcache. Without this you get soft memory limit exits, which hurts
    # overall throughput.
    if ndb is not None:
      ndb_ctx = ndb.get_context()
      ndb_ctx.set_cache_policy(lambda key: False)
      ndb_ctx.set_memcache_policy(lambda key: False)

    context.Context._set(ctx)

    try:
      self.process_inputs(
          input_reader, shard_state, tstate, quota_consumer, ctx)

      if not shard_state.active:
        # shard is going to stop. Finalize output writer only when shard is
        # successful because writer might be stuck in some bad state otherwise.
        if (shard_state.result_status == model.ShardState.RESULT_SUCCESS and
            tstate.output_writer):
          tstate.output_writer.finalize(ctx, shard_state.shard_number)

      config = util.create_datastore_write_config(spec)
      # We don't want shard state to override active state, since that
      # may stuck job execution (see issue 116). Do a transactional
      # verification for status.
      # TODO(user): this might still result in some data inconsistency
      # which can be avoided. It doesn't seem to be worth it now, because
      # various crashes might result in all sort of data consistencies
      # anyway.
      @db.transactional(retries=5)
      def tx():
        fresh_shard_state = db.get(
            model.ShardState.get_key_by_shard_id(shard_id))
        if not fresh_shard_state:
          raise db.Rollback()
        if (not fresh_shard_state.active or
            "worker_active_state_collision" in _TEST_INJECTED_FAULTS):
          shard_state.active = False
          logging.error("Spurious task execution. Aborting the shard.")
          return
        fresh_shard_state.copy_from(shard_state)
        fresh_shard_state.put(config=config)
      tx()
    finally:
      context.Context._set(None)
      if quota_consumer:
        quota_consumer.dispose()

    # Rescheduling work should always be the last statement. It shouldn't happen
    # if there were any exceptions in code before it.
    if shard_state.active:
      self.reschedule(shard_state, tstate)
    gc.collect()
Exemple #18
0
  def handle(self):
    """Handle request."""
    spec = model.MapreduceSpec.from_json_str(
        self.request.get("mapreduce_spec"))

    # TODO(user): Make this logging prettier.
    logging.debug("post: id=%s headers=%s spec=%s",
                  spec.mapreduce_id, self.request.headers,
                  self.request.get("mapreduce_spec"))

    state, control = db.get([
        model.MapreduceState.get_key_by_job_id(spec.mapreduce_id),
        model.MapreduceControl.get_key_by_job_id(spec.mapreduce_id),
    ])
    if not state:
      logging.error("State not found for mapreduce_id '%s'; skipping",
                    spec.mapreduce_id)
      return

    shard_states = model.ShardState.find_by_mapreduce_id(spec.mapreduce_id)
    if state.active and len(shard_states) != spec.mapper.shard_count:
      # Some shards were lost
      logging.error("Incorrect number of shard states: %d vs %d; "
                    "aborting job '%s'",
                    len(shard_states), spec.mapper.shard_count,
                    spec.mapreduce_id)
      state.active = False
      state.result_status = model.MapreduceState.RESULT_FAILED
      model.MapreduceControl.abort(spec.mapreduce_id)

    active_shards = [s for s in shard_states if s.active]
    failed_shards = [s for s in shard_states
                     if s.result_status == model.ShardState.RESULT_FAILED]
    aborted_shards = [s for s in shard_states
                     if s.result_status == model.ShardState.RESULT_ABORTED]
    if state.active:
      state.active = bool(active_shards)
      state.active_shards = len(active_shards)
      state.failed_shards = len(failed_shards)
      state.aborted_shards = len(aborted_shards)

    if (not state.active and control and
        control.command == model.MapreduceControl.ABORT):
      # User-initiated abort *after* all shards have completed.
      logging.info("Abort signal received for job '%s'", spec.mapreduce_id)
      state.result_status = model.MapreduceState.RESULT_ABORTED

    if not state.active:
      state.active_shards = 0
      if not state.result_status:
        # Set final result status derived from shard states.
        if [s for s in shard_states
            if s.result_status != model.ShardState.RESULT_SUCCESS]:
          state.result_status = model.MapreduceState.RESULT_FAILED
        else:
          state.result_status = model.MapreduceState.RESULT_SUCCESS
        logging.info("Final result for job '%s' is '%s'",
                     spec.mapreduce_id, state.result_status)

    # We don't need a transaction here, since we change only statistics data,
    # and we don't care if it gets overwritten/slightly inconsistent.
    self.aggregate_state(state, shard_states)
    poll_time = state.last_poll_time
    state.last_poll_time = datetime.datetime.utcfromtimestamp(self._time())

    config = util.create_datastore_write_config(spec)

    if not state.active:
      # This is the last execution.
      # Enqueue done_callback if needed.
      if spec.mapper.output_writer_class():
        spec.mapper.output_writer_class().finalize_job(state)
      def put_state(state):
        state.put(config=config)
        done_callback = spec.params.get(
            model.MapreduceSpec.PARAM_DONE_CALLBACK)
        if done_callback:
          done_task = taskqueue.Task(
              url=done_callback,
              headers={"Mapreduce-Id": spec.mapreduce_id},
              method=spec.params.get("done_callback_method", "POST"))
          queue_name = spec.params.get(
              model.MapreduceSpec.PARAM_DONE_CALLBACK_QUEUE,
              "default")

          if not _run_task_hook(spec.get_hooks(),
                                "enqueue_done_task",
                                done_task,
                                queue_name):
            done_task.add(queue_name, transactional=True)
        FinalizeJobHandler.schedule(self.base_path(), spec)

      db.run_in_transaction(put_state, state)
      return
    else:
      state.put(config=config)

    processing_rate = int(spec.mapper.params.get(
        "processing_rate") or model._DEFAULT_PROCESSING_RATE_PER_SEC)
    self.refill_quotas(poll_time, processing_rate, active_shards)
    ControllerCallbackHandler.reschedule(
        state, self.base_path(), spec, self.serial_id() + 1)
Exemple #19
0
    def _start_map(cls,
                   name,
                   mapper_spec,
                   mapreduce_params,
                   base_path="/mapreduce",
                   queue_name="default",
                   eta=None,
                   countdown=None,
                   hooks_class_name=None,
                   _app=None,
                   transactional=False):
        # Check that handler can be instantiated.
        mapper_spec.get_handler()

        # Check that reader can be instantiated and is configured correctly
        mapper_input_reader_class = mapper_spec.input_reader_class()
        mapper_input_reader_class.validate(mapper_spec)

        mapreduce_id = model.MapreduceState.new_mapreduce_id()
        mapreduce_spec = model.MapreduceSpec(name, mapreduce_id,
                                             mapper_spec.to_json(),
                                             mapreduce_params,
                                             hooks_class_name)

        kickoff_params = {"mapreduce_spec": mapreduce_spec.to_json_str()}
        if _app:
            kickoff_params["app"] = _app
        kickoff_worker_task = taskqueue.Task(url=base_path +
                                             "/kickoffjob_callback",
                                             params=kickoff_params,
                                             eta=eta,
                                             countdown=countdown)

        hooks = mapreduce_spec.get_hooks()
        config = util.create_datastore_write_config(mapreduce_spec)

        def start_mapreduce():
            if not transactional:
                # Save state in datastore so that UI can see it.
                # We can't save state in foreign transaction, but conventional UI
                # doesn't ask for transactional starts anyway.
                state = model.MapreduceState.create_new(
                    mapreduce_spec.mapreduce_id)
                state.mapreduce_spec = mapreduce_spec
                state.active = True
                state.active_shards = mapper_spec.shard_count
                if _app:
                    state.app_id = _app
                state.put(config=config)

            if hooks is not None:
                try:
                    hooks.enqueue_kickoff_task(kickoff_worker_task, queue_name)
                except NotImplementedError:
                    # Use the default task addition implementation.
                    pass
                else:
                    return
            kickoff_worker_task.add(queue_name, transactional=True)

        if transactional:
            start_mapreduce()
        else:
            db.run_in_transaction(start_mapreduce)

        return mapreduce_id
Exemple #20
0
    def _start_map(cls,
                   name,
                   mapper_spec,
                   mapreduce_params,
                   base_path=None,
                   queue_name=None,
                   eta=None,
                   countdown=None,
                   hooks_class_name=None,
                   _app=None,
                   transactional=False,
                   parent_entity=None):
        queue_name = queue_name or os.environ.get("HTTP_X_APPENGINE_QUEUENAME",
                                                  "default")
        if queue_name[0] == "_":
            # We are currently in some special queue. E.g. __cron.
            queue_name = "default"

        if not transactional and parent_entity:
            raise Exception("Parent shouldn't be specfied "
                            "for non-transactional starts.")

        # Check that reader can be instantiated and is configured correctly
        mapper_input_reader_class = mapper_spec.input_reader_class()
        mapper_input_reader_class.validate(mapper_spec)

        mapper_output_writer_class = mapper_spec.output_writer_class()
        if mapper_output_writer_class:
            mapper_output_writer_class.validate(mapper_spec)

        mapreduce_id = model.MapreduceState.new_mapreduce_id()
        mapreduce_spec = model.MapreduceSpec(name, mapreduce_id,
                                             mapper_spec.to_json(),
                                             mapreduce_params,
                                             hooks_class_name)

        # Check that handler can be instantiated.
        ctx = context.Context(mapreduce_spec, None)
        context.Context._set(ctx)
        try:
            mapper_spec.get_handler()
        finally:
            context.Context._set(None)

        kickoff_params = {"mapreduce_spec": mapreduce_spec.to_json_str()}
        if _app:
            kickoff_params["app"] = _app
        kickoff_worker_task = util.HugeTask(url=base_path +
                                            "/kickoffjob_callback",
                                            params=kickoff_params,
                                            eta=eta,
                                            countdown=countdown)

        hooks = mapreduce_spec.get_hooks()
        config = util.create_datastore_write_config(mapreduce_spec)

        def start_mapreduce():
            parent = parent_entity
            if not transactional:
                # Save state in datastore so that UI can see it.
                # We can't save state in foreign transaction, but conventional UI
                # doesn't ask for transactional starts anyway.
                state = model.MapreduceState.create_new(
                    mapreduce_spec.mapreduce_id)
                state.mapreduce_spec = mapreduce_spec
                state.active = True
                state.active_shards = mapper_spec.shard_count
                if _app:
                    state.app_id = _app
                state.put(config=config)
                parent = state

            if hooks is not None:
                try:
                    hooks.enqueue_kickoff_task(kickoff_worker_task, queue_name)
                except NotImplementedError:
                    # Use the default task addition implementation.
                    pass
                else:
                    return
            kickoff_worker_task.add(queue_name,
                                    transactional=True,
                                    parent=parent)

        if transactional:
            start_mapreduce()
        else:
            db.run_in_transaction(start_mapreduce)

        return mapreduce_id
    def handle(self):
        """Handle request."""
        spec = model.MapreduceSpec.from_json_str(
            self.request.get("mapreduce_spec"))

        # TODO(user): Make this logging prettier.
        logging.debug("post: id=%s headers=%s spec=%s", spec.mapreduce_id,
                      self.request.headers, self.request.get("mapreduce_spec"))

        state, control = db.get([
            model.MapreduceState.get_key_by_job_id(spec.mapreduce_id),
            model.MapreduceControl.get_key_by_job_id(spec.mapreduce_id),
        ])
        if not state:
            logging.error("State not found for mapreduce_id '%s'; skipping",
                          spec.mapreduce_id)
            return

        shard_states = model.ShardState.find_by_mapreduce_state(state)
        if state.active and len(shard_states) != spec.mapper.shard_count:
            # Some shards were lost
            logging.error(
                "Incorrect number of shard states: %d vs %d; "
                "aborting job '%s'", len(shard_states),
                spec.mapper.shard_count, spec.mapreduce_id)
            state.active = False
            state.result_status = model.MapreduceState.RESULT_FAILED
            model.MapreduceControl.abort(spec.mapreduce_id)

        active_shards = [s for s in shard_states if s.active]
        failed_shards = [
            s for s in shard_states
            if s.result_status == model.ShardState.RESULT_FAILED
        ]
        aborted_shards = [
            s for s in shard_states
            if s.result_status == model.ShardState.RESULT_ABORTED
        ]
        if state.active:
            state.active = bool(active_shards)
            state.active_shards = len(active_shards)
            state.failed_shards = len(failed_shards)
            state.aborted_shards = len(aborted_shards)

        if (not state.active and control
                and control.command == model.MapreduceControl.ABORT):
            # User-initiated abort *after* all shards have completed.
            logging.info("Abort signal received for job '%s'",
                         spec.mapreduce_id)
            state.result_status = model.MapreduceState.RESULT_ABORTED

        if not state.active:
            state.active_shards = 0
            if not state.result_status:
                # Set final result status derived from shard states.
                if [
                        s for s in shard_states
                        if s.result_status != model.ShardState.RESULT_SUCCESS
                ]:
                    state.result_status = model.MapreduceState.RESULT_FAILED
                else:
                    state.result_status = model.MapreduceState.RESULT_SUCCESS
                logging.info("Final result for job '%s' is '%s'",
                             spec.mapreduce_id, state.result_status)

        # We don't need a transaction here, since we change only statistics data,
        # and we don't care if it gets overwritten/slightly inconsistent.
        self.aggregate_state(state, shard_states)
        poll_time = state.last_poll_time
        state.last_poll_time = datetime.datetime.utcfromtimestamp(self._time())

        config = util.create_datastore_write_config(spec)

        if not state.active:
            # This is the last execution.
            # Enqueue done_callback if needed.
            if spec.mapper.output_writer_class():
                spec.mapper.output_writer_class().finalize_job(state)

            def put_state(state):
                state.put(config=config)
                done_callback = spec.params.get(
                    model.MapreduceSpec.PARAM_DONE_CALLBACK)
                if done_callback:
                    done_task = taskqueue.Task(
                        url=done_callback,
                        headers={"Mapreduce-Id": spec.mapreduce_id},
                        method=spec.params.get("done_callback_method", "POST"))
                    queue_name = spec.params.get(
                        model.MapreduceSpec.PARAM_DONE_CALLBACK_QUEUE,
                        "default")

                    if not _run_task_hook(spec.get_hooks(),
                                          "enqueue_done_task", done_task,
                                          queue_name):
                        done_task.add(queue_name, transactional=True)
                FinalizeJobHandler.schedule(self.base_path(), spec)

            db.run_in_transaction(put_state, state)
            return
        else:
            state.put(config=config)

        processing_rate = int(
            spec.mapper.params.get("processing_rate")
            or model._DEFAULT_PROCESSING_RATE_PER_SEC)
        self.refill_quotas(poll_time, processing_rate, active_shards)
        ControllerCallbackHandler.reschedule(state, self.base_path(), spec,
                                             self.serial_id() + 1)
    def handle(self):
        """Handle request."""
        tstate = model.TransientShardState.from_request(self.request)
        spec = tstate.mapreduce_spec
        self._start_time = self._time()
        shard_id = tstate.shard_id

        shard_state, control = db.get([
            model.ShardState.get_key_by_shard_id(shard_id),
            model.MapreduceControl.get_key_by_job_id(spec.mapreduce_id),
        ])
        if not shard_state:
            # We're letting this task to die. It's up to controller code to
            # reinitialize and restart the task.
            logging.error("State not found for shard ID %r; shutting down",
                          shard_id)
            return

        ctx = context.Context(spec,
                              shard_state,
                              task_retry_count=self.task_retry_count())

        if control and control.command == model.MapreduceControl.ABORT:
            logging.info("Abort command received by shard %d of job '%s'",
                         shard_state.shard_number, shard_state.mapreduce_id)
            if tstate.output_writer:
                tstate.output_writer.finalize(ctx, shard_state.shard_number)
            shard_state.active = False
            shard_state.result_status = model.ShardState.RESULT_ABORTED
            shard_state.put(config=util.create_datastore_write_config(spec))
            model.MapreduceControl.abort(spec.mapreduce_id)
            return

        input_reader = tstate.input_reader

        if spec.mapper.params.get("enable_quota", True):
            quota_consumer = quota.QuotaConsumer(
                quota.QuotaManager(memcache.Client()), shard_id,
                _QUOTA_BATCH_SIZE)
        else:
            quota_consumer = None

        context.Context._set(ctx)
        try:
            # consume quota ahead, because we do not want to run a datastore
            # query if there's not enough quota for the shard.
            if not quota_consumer or quota_consumer.check():
                scan_aborted = False
                entity = None

                # We shouldn't fetch an entity from the reader if there's not enough
                # quota to process it. Perform all quota checks proactively.
                if not quota_consumer or quota_consumer.consume():
                    for entity in input_reader:
                        if isinstance(entity, db.Model):
                            shard_state.last_work_item = repr(entity.key())
                        else:
                            shard_state.last_work_item = repr(entity)[:100]

                        scan_aborted = not self.process_data(
                            entity, input_reader, ctx, tstate)

                        # Check if we've got enough quota for the next entity.
                        if (quota_consumer and not scan_aborted
                                and not quota_consumer.consume()):
                            scan_aborted = True
                        if scan_aborted:
                            break
                else:
                    scan_aborted = True

                if not scan_aborted:
                    logging.info("Processing done for shard %d of job '%s'",
                                 shard_state.shard_number,
                                 shard_state.mapreduce_id)
                    # We consumed extra quota item at the end of for loop.
                    # Just be nice here and give it back :)
                    if quota_consumer:
                        quota_consumer.put(1)
                    shard_state.active = False
                    shard_state.result_status = model.ShardState.RESULT_SUCCESS

            operation.counters.Increment(
                "mapper-walltime-msec",
                int((time.time() - self._start_time) * 1000))(ctx)

            # TODO(user): Mike said we don't want this happen in case of
            # exception while scanning. Figure out when it's appropriate to skip.
            ctx.flush()

            if not shard_state.active:
                # shard is going to stop. Finalize output writer if any.
                if tstate.output_writer:
                    tstate.output_writer.finalize(ctx,
                                                  shard_state.shard_number)
            shard_state.put(config=util.create_datastore_write_config(spec))
        finally:
            context.Context._set(None)
            if quota_consumer:
                quota_consumer.dispose()

        # Rescheduling work should always be the last statement. It shouldn't happen
        # if there were any exceptions in code before it.
        if shard_state.active:
            self.reschedule(shard_state, tstate)
        gc.collect()
Exemple #23
0
  def handle(self):
    """Handle request."""
    spec = model.MapreduceSpec.from_json_str(
        self.request.get("mapreduce_spec"))

    # TODO(user): Make this logging prettier.
    logging.debug("post: id=%s headers=%s spec=%s",
                  spec.mapreduce_id, self.request.headers,
                  self.request.get("mapreduce_spec"))

    state, control = db.get([
        model.MapreduceState.get_key_by_job_id(spec.mapreduce_id),
        model.MapreduceControl.get_key_by_job_id(spec.mapreduce_id),
    ])
    if not state:
      logging.error("State not found for mapreduce_id '%s'; skipping",
                    spec.mapreduce_id)
      return

    shard_states = model.ShardState.find_by_mapreduce_state(state)
    if state.active and len(shard_states) != spec.mapper.shard_count:
      # Some shards were lost
      logging.error("Incorrect number of shard states: %d vs %d; "
                    "aborting job '%s'",
                    len(shard_states), spec.mapper.shard_count,
                    spec.mapreduce_id)
      state.active = False
      state.result_status = model.MapreduceState.RESULT_FAILED
      model.MapreduceControl.abort(spec.mapreduce_id)

    active_shards = [s for s in shard_states if s.active]
    failed_shards = [s for s in shard_states
                     if s.result_status == model.ShardState.RESULT_FAILED]
    aborted_shards = [s for s in shard_states
                     if s.result_status == model.ShardState.RESULT_ABORTED]
    if state.active:
      state.active = bool(active_shards)
      state.active_shards = len(active_shards)
      state.failed_shards = len(failed_shards)
      state.aborted_shards = len(aborted_shards)

    if (not state.active and control and
        control.command == model.MapreduceControl.ABORT):
      # User-initiated abort *after* all shards have completed.
      logging.info("Abort signal received for job '%s'", spec.mapreduce_id)
      state.result_status = model.MapreduceState.RESULT_ABORTED

    if not state.active:
      state.active_shards = 0
      if not state.result_status:
        # Set final result status derived from shard states.
        if [s for s in shard_states
            if s.result_status != model.ShardState.RESULT_SUCCESS]:
          state.result_status = model.MapreduceState.RESULT_FAILED
        else:
          state.result_status = model.MapreduceState.RESULT_SUCCESS
        logging.info("Final result for job '%s' is '%s'",
                     spec.mapreduce_id, state.result_status)

    # We don't need a transaction here, since we change only statistics data,
    # and we don't care if it gets overwritten/slightly inconsistent.
    self.aggregate_state(state, shard_states)
    poll_time = state.last_poll_time
    state.last_poll_time = datetime.datetime.utcfromtimestamp(self._time())

    if not state.active:
      ControllerCallbackHandler._finalize_job(
          spec, state, self.base_path())
      return
    else:
      config = util.create_datastore_write_config(spec)
      state.put(config=config)

    processing_rate = int(spec.mapper.params.get(
        "processing_rate") or model._DEFAULT_PROCESSING_RATE_PER_SEC)
    self.refill_quotas(poll_time, processing_rate, active_shards)
    ControllerCallbackHandler.reschedule(
        state, self.base_path(), spec, self.serial_id() + 1)
          shard_state.result_status = model.ShardState.RESULT_FAILED
        except errors.FailJobError, e:
          logging.error("Job failed: %s", e)
          scan_aborted = True
          shard_state.active = False
          shard_state.result_status = model.ShardState.RESULT_FAILED

      if not shard_state.active:
        # shard is going to stop. Don't finalize output writer unless the job is
        # going to be successful, because writer might be stuck in some bad state
        # otherwise.
        if (shard_state.result_status == model.ShardState.RESULT_SUCCESS and
            tstate.output_writer):
          tstate.output_writer.finalize(ctx, shard_state.shard_number)

      config = util.create_datastore_write_config(spec)
      # We don't want shard state to override active state, since that
      # may stuck job execution (see issue 116). Do a transactional
      # verification for status.
      # TODO(user): this might still result in some data inconsistency
      # which can be avoided. It doesn't seem to be worth it now, because
      # various crashes might result in all sort of data consistencies
      # anyway.
      @db.transactional(retries=5)
      def tx():
        fresh_shard_state = db.get(
            model.ShardState.get_key_by_shard_id(shard_id))
        if not fresh_shard_state:
          raise db.Rollback()
        if (not fresh_shard_state.active or
            "worker_active_state_collision" in _TEST_INJECTED_FAULTS):
  def handle(self):
    """Handle request."""
    tstate = model.TransientShardState.from_request(self.request)
    spec = tstate.mapreduce_spec
    self._start_time = self._time()
    shard_id = tstate.shard_id

    shard_state, control = db.get([
        model.ShardState.get_key_by_shard_id(shard_id),
        model.MapreduceControl.get_key_by_job_id(spec.mapreduce_id),
    ])
    if not shard_state:
      # We're letting this task to die. It's up to controller code to
      # reinitialize and restart the task.
      logging.error("State not found for shard ID %r; shutting down",
                    shard_id)
      return

    if not shard_state.active:
      logging.error("Shard is not active. Looks like spurious task execution.")
      return

    ctx = context.Context(spec, shard_state,
                          task_retry_count=self.task_retry_count())

    if control and control.command == model.MapreduceControl.ABORT:
      logging.info("Abort command received by shard %d of job '%s'",
                   shard_state.shard_number, shard_state.mapreduce_id)
      # NOTE: When aborting, specifically do not finalize the output writer
      # because it might be in a bad state.
      shard_state.active = False
      shard_state.result_status = model.ShardState.RESULT_ABORTED
      shard_state.put(config=util.create_datastore_write_config(spec))
      model.MapreduceControl.abort(spec.mapreduce_id)
      return

    input_reader = tstate.input_reader

    if spec.mapper.params.get("enable_quota", True):
      quota_consumer = quota.QuotaConsumer(
          quota.QuotaManager(memcache.Client()),
          shard_id,
          _QUOTA_BATCH_SIZE)
    else:
      quota_consumer = None

    # Tell NDB to never cache anything in memcache or in-process. This ensures
    # that entities fetched from Datastore input_readers via NDB will not bloat
    # up the request memory size and Datastore Puts will avoid doing calls
    # to memcache. Without this you get soft memory limit exits, which hurts
    # overall throughput.
    if ndb is not None:
      ndb_ctx = ndb.get_context()
      ndb_ctx.set_cache_policy(lambda key: False)
      ndb_ctx.set_memcache_policy(lambda key: False)

    context.Context._set(ctx)
    try:
      # consume quota ahead, because we do not want to run a datastore
      # query if there's not enough quota for the shard.
      if not quota_consumer or quota_consumer.check():
        scan_aborted = False
        entity = None

        try:
          # We shouldn't fetch an entity from the reader if there's not enough
          # quota to process it. Perform all quota checks proactively.
          if not quota_consumer or quota_consumer.consume(verbose=True):
            for entity in input_reader:
              if isinstance(entity, db.Model):
                shard_state.last_work_item = repr(entity.key())
              else:
                shard_state.last_work_item = repr(entity)[:100]

              scan_aborted = not self.process_data(
                  entity, input_reader, ctx, tstate)

              # Check if we've got enough quota for the next entity.
              if (quota_consumer and not scan_aborted and
                  not quota_consumer.consume(verbose=True)):
                scan_aborted = True
              if scan_aborted:
                break
          else:
            scan_aborted = True

          if not scan_aborted:
            logging.info("Processing done for shard %d of job '%s'",
                         shard_state.shard_number, shard_state.mapreduce_id)
            # We consumed extra quota item at the end of for loop.
            # Just be nice here and give it back :)
            if quota_consumer:
              quota_consumer.put(1)
            shard_state.active = False
            shard_state.result_status = model.ShardState.RESULT_SUCCESS

          operation.counters.Increment(
              context.COUNTER_MAPPER_WALLTIME_MS,
              int((time.time() - self._start_time)*1000))(ctx)

          # TODO(user): Mike said we don't want this happen in case of
          # exception while scanning. Figure out when it's appropriate to skip.
          ctx.flush()
        except errors.RetrySliceError, e:
          logging.error("Slice error: %s", e)
          retry_count = int(
              os.environ.get("HTTP_X_APPENGINE_TASKRETRYCOUNT") or 0)
          if retry_count <= _RETRY_SLICE_ERROR_MAX_RETRIES:
            raise
          logging.error("Too many retries: %d, failing the job", retry_count)
          scan_aborted = True
          shard_state.active = False
          shard_state.result_status = model.ShardState.RESULT_FAILED
        except errors.FailJobError, e:
          logging.error("Job failed: %s", e)
          scan_aborted = True
          shard_state.active = False
          shard_state.result_status = model.ShardState.RESULT_FAILED
Exemple #26
0
                    logging.error("Job failed: %s", e)
                    scan_aborted = True
                    shard_state.active = False
                    shard_state.result_status = model.ShardState.RESULT_FAILED

            if not shard_state.active:
                # shard is going to stop. Don't finalize output writer unless the job is
                # going to be successful, because writer might be stuck in some bad state
                # otherwise.
                if (shard_state.result_status
                        == model.ShardState.RESULT_SUCCESS
                        and tstate.output_writer):
                    tstate.output_writer.finalize(ctx,
                                                  shard_state.shard_number)

            config = util.create_datastore_write_config(spec)
            # We don't want shard state to override active state, since that
            # may stuck job execution (see issue 116). Do a transactional
            # verification for status.
            # TODO(user): this might still result in some data inconsistency
            # which can be avoided. It doesn't seem to be worth it now, because
            # various crashes might result in all sort of data consistencies
            # anyway.
            @db.transactional(retries=5)
            def tx():
                fresh_shard_state = db.get(
                    model.ShardState.get_key_by_shard_id(shard_id))
                if not fresh_shard_state:
                    raise db.Rollback()
                if (not fresh_shard_state.active
                        or "worker_active_state_collision"
Exemple #27
0
    def handle(self):
        """Handle request."""
        tstate = model.TransientShardState.from_request(self.request)
        spec = tstate.mapreduce_spec
        self._start_time = self._time()
        shard_id = tstate.shard_id

        shard_state, control = db.get([
            model.ShardState.get_key_by_shard_id(shard_id),
            model.MapreduceControl.get_key_by_job_id(spec.mapreduce_id),
        ])
        if not shard_state:
            # We're letting this task to die. It's up to controller code to
            # reinitialize and restart the task.
            logging.error("State not found for shard ID %r; shutting down",
                          shard_id)
            return

        if not shard_state.active:
            logging.error(
                "Shard is not active. Looks like spurious task execution.")
            return

        ctx = context.Context(spec,
                              shard_state,
                              task_retry_count=self.task_retry_count())

        if control and control.command == model.MapreduceControl.ABORT:
            logging.info("Abort command received by shard %d of job '%s'",
                         shard_state.shard_number, shard_state.mapreduce_id)
            # NOTE: When aborting, specifically do not finalize the output writer
            # because it might be in a bad state.
            shard_state.active = False
            shard_state.result_status = model.ShardState.RESULT_ABORTED
            shard_state.put(config=util.create_datastore_write_config(spec))
            model.MapreduceControl.abort(spec.mapreduce_id)
            return

        input_reader = tstate.input_reader

        if spec.mapper.params.get("enable_quota", True):
            quota_consumer = quota.QuotaConsumer(
                quota.QuotaManager(memcache.Client()), shard_id,
                _QUOTA_BATCH_SIZE)
        else:
            quota_consumer = None

        # Tell NDB to never cache anything in memcache or in-process. This ensures
        # that entities fetched from Datastore input_readers via NDB will not bloat
        # up the request memory size and Datastore Puts will avoid doing calls
        # to memcache. Without this you get soft memory limit exits, which hurts
        # overall throughput.
        if ndb is not None:
            ndb_ctx = ndb.get_context()
            ndb_ctx.set_cache_policy(lambda key: False)
            ndb_ctx.set_memcache_policy(lambda key: False)

        context.Context._set(ctx)
        try:
            # consume quota ahead, because we do not want to run a datastore
            # query if there's not enough quota for the shard.
            if not quota_consumer or quota_consumer.check():
                scan_aborted = False
                entity = None

                try:
                    # We shouldn't fetch an entity from the reader if there's not enough
                    # quota to process it. Perform all quota checks proactively.
                    if not quota_consumer or quota_consumer.consume():
                        for entity in input_reader:
                            if isinstance(entity, db.Model):
                                shard_state.last_work_item = repr(entity.key())
                            else:
                                shard_state.last_work_item = repr(entity)[:100]

                            scan_aborted = not self.process_data(
                                entity, input_reader, ctx, tstate)

                            # Check if we've got enough quota for the next entity.
                            if (quota_consumer and not scan_aborted
                                    and not quota_consumer.consume()):
                                scan_aborted = True
                            if scan_aborted:
                                break
                    else:
                        scan_aborted = True

                    if not scan_aborted:
                        logging.info(
                            "Processing done for shard %d of job '%s'",
                            shard_state.shard_number, shard_state.mapreduce_id)
                        # We consumed extra quota item at the end of for loop.
                        # Just be nice here and give it back :)
                        if quota_consumer:
                            quota_consumer.put(1)
                        shard_state.active = False
                        shard_state.result_status = model.ShardState.RESULT_SUCCESS

                    operation.counters.Increment(
                        context.COUNTER_MAPPER_WALLTIME_MS,
                        int((time.time() - self._start_time) * 1000))(ctx)

                    # TODO(user): Mike said we don't want this happen in case of
                    # exception while scanning. Figure out when it's appropriate to skip.
                    ctx.flush()
                except errors.RetrySliceError, e:
                    logging.error("Slice error: %s", e)
                    retry_count = int(
                        os.environ.get("HTTP_X_APPENGINE_TASKRETRYCOUNT") or 0)
                    if retry_count <= _RETRY_SLICE_ERROR_MAX_RETRIES:
                        raise
                    logging.error("Too many retries: %d, failing the job",
                                  retry_count)
                    scan_aborted = True
                    shard_state.active = False
                    shard_state.result_status = model.ShardState.RESULT_FAILED
                except errors.FailJobError, e:
                    logging.error("Job failed: %s", e)
                    scan_aborted = True
                    shard_state.active = False
                    shard_state.result_status = model.ShardState.RESULT_FAILED
Exemple #28
0
  def handle(self):
    """Handle request."""
    tstate = model.TransientShardState.from_request(self.request)
    spec = tstate.mapreduce_spec
    self._start_time = self._time()
    shard_id = tstate.shard_id

    shard_state, control = db.get([
        model.ShardState.get_key_by_shard_id(shard_id),
        model.MapreduceControl.get_key_by_job_id(spec.mapreduce_id),
    ])
    if not shard_state:
      # We're letting this task to die. It's up to controller code to
      # reinitialize and restart the task.
      logging.error("State not found for shard ID %r; shutting down",
                    shard_id)
      return

    if not shard_state.active:
      logging.error("Shard is not active. Looks like spurious task execution.")
      return

    ctx = context.Context(spec, shard_state,
                          task_retry_count=self.task_retry_count())

    if control and control.command == model.MapreduceControl.ABORT:
      logging.info("Abort command received by shard %d of job '%s'",
                   shard_state.shard_number, shard_state.mapreduce_id)
      if tstate.output_writer:
        tstate.output_writer.finalize(ctx, shard_state.shard_number)
      # We recieved a command to abort. We don't care if we override
      # some data.
      shard_state.active = False
      shard_state.result_status = model.ShardState.RESULT_ABORTED
      shard_state.put(config=util.create_datastore_write_config(spec))
      model.MapreduceControl.abort(spec.mapreduce_id)
      return

    input_reader = tstate.input_reader

    if spec.mapper.params.get("enable_quota", True):
      quota_consumer = quota.QuotaConsumer(
          quota.QuotaManager(memcache.Client()),
          shard_id,
          _QUOTA_BATCH_SIZE)
    else:
      quota_consumer = None

    context.Context._set(ctx)
    try:
      # consume quota ahead, because we do not want to run a datastore
      # query if there's not enough quota for the shard.
      if not quota_consumer or quota_consumer.check():
        scan_aborted = False
        entity = None

        # We shouldn't fetch an entity from the reader if there's not enough
        # quota to process it. Perform all quota checks proactively.
        if not quota_consumer or quota_consumer.consume():
          for entity in input_reader:
            if isinstance(entity, db.Model):
              shard_state.last_work_item = repr(entity.key())
            else:
              shard_state.last_work_item = repr(entity)[:100]

            scan_aborted = not self.process_data(
                entity, input_reader, ctx, tstate)

            # Check if we've got enough quota for the next entity.
            if (quota_consumer and not scan_aborted and
                not quota_consumer.consume()):
              scan_aborted = True
            if scan_aborted:
              break
        else:
          scan_aborted = True


        if not scan_aborted:
          logging.info("Processing done for shard %d of job '%s'",
                       shard_state.shard_number, shard_state.mapreduce_id)
          # We consumed extra quota item at the end of for loop.
          # Just be nice here and give it back :)
          if quota_consumer:
            quota_consumer.put(1)
          shard_state.active = False
          shard_state.result_status = model.ShardState.RESULT_SUCCESS

      operation.counters.Increment(
          context.COUNTER_MAPPER_WALLTIME_MS,
          int((time.time() - self._start_time)*1000))(ctx)

      # TODO(user): Mike said we don't want this happen in case of
      # exception while scanning. Figure out when it's appropriate to skip.
      ctx.flush()

      if not shard_state.active:
        # shard is going to stop. Finalize output writer if any.
        if tstate.output_writer:
          tstate.output_writer.finalize(ctx, shard_state.shard_number)

      config = util.create_datastore_write_config(spec)
      # We don't want shard state to override active state, since that
      # may stuck job execution (see issue 116). Do a transactional
      # verification for status.
      # TODO(user): this might still result in some data inconsistency
      # which can be avoided. It doesn't seem to be worth it now, because
      # various crashes might result in all sort of data consistencies
      # anyway.
      @db.transactional(retries=5)
      def tx():
        fresh_shard_state = db.get(
            model.ShardState.get_key_by_shard_id(shard_id))
        if (not fresh_shard_state.active or
            "worker_active_state_collision" in _TEST_INJECTED_FAULTS):
          shard_state.active = False
          logging.error("Spurious task execution. Aborting the shard.")
          return
        fresh_shard_state.copy_from(shard_state)
        fresh_shard_state.put(config=config)
      tx()
    finally:
      context.Context._set(None)
      if quota_consumer:
        quota_consumer.dispose()

    # Rescheduling work should always be the last statement. It shouldn't happen
    # if there were any exceptions in code before it.
    if shard_state.active:
      self.reschedule(shard_state, tstate)
    gc.collect()
Exemple #29
0
    def handle(self):
        """Handle request."""
        spec = model.MapreduceSpec.from_json_str(
            self.request.get("mapreduce_spec"))

        state, control = db.get([
            model.MapreduceState.get_key_by_job_id(spec.mapreduce_id),
            model.MapreduceControl.get_key_by_job_id(spec.mapreduce_id),
        ])
        if not state:
            logging.error("State not found for mapreduce_id '%s'; skipping",
                          spec.mapreduce_id)
            return

        shard_states = model.ShardState.find_by_mapreduce_state(state)
        if state.active and len(shard_states) != spec.mapper.shard_count:
            # Some shards were lost
            logging.error(
                "Incorrect number of shard states: %d vs %d; "
                "aborting job '%s'", len(shard_states),
                spec.mapper.shard_count, spec.mapreduce_id)
            state.active = False
            state.result_status = model.MapreduceState.RESULT_FAILED
            model.MapreduceControl.abort(spec.mapreduce_id)

        active_shards = [s for s in shard_states if s.active]
        failed_shards = [
            s for s in shard_states
            if s.result_status == model.ShardState.RESULT_FAILED
        ]
        aborted_shards = [
            s for s in shard_states
            if s.result_status == model.ShardState.RESULT_ABORTED
        ]
        if state.active:
            state.active = bool(active_shards)
            state.active_shards = len(active_shards)
            state.failed_shards = len(failed_shards)
            state.aborted_shards = len(aborted_shards)
            if not control and failed_shards:
                model.MapreduceControl.abort(spec.mapreduce_id)

        if (not state.active and control
                and control.command == model.MapreduceControl.ABORT):
            # User-initiated abort *after* all shards have completed.
            logging.info("Abort signal received for job '%s'",
                         spec.mapreduce_id)
            state.result_status = model.MapreduceState.RESULT_ABORTED

        if not state.active:
            state.active_shards = 0
            if not state.result_status:
                # Set final result status derived from shard states.
                if [
                        s for s in shard_states
                        if s.result_status != model.ShardState.RESULT_SUCCESS
                ]:
                    state.result_status = model.MapreduceState.RESULT_FAILED
                else:
                    state.result_status = model.MapreduceState.RESULT_SUCCESS
                logging.info("Final result for job '%s' is '%s'",
                             spec.mapreduce_id, state.result_status)

        # We don't need a transaction here, since we change only statistics data,
        # and we don't care if it gets overwritten/slightly inconsistent.
        self.aggregate_state(state, shard_states)
        poll_time = state.last_poll_time
        state.last_poll_time = datetime.datetime.utcfromtimestamp(self._time())

        if not state.active:
            ControllerCallbackHandler._finalize_job(spec, state,
                                                    self.base_path())
            return
        else:
            config = util.create_datastore_write_config(spec)
            state.put(config=config)

        processing_rate = int(
            spec.mapper.params.get("processing_rate")
            or model._DEFAULT_PROCESSING_RATE_PER_SEC)
        self.refill_quotas(poll_time, processing_rate, active_shards)
        ControllerCallbackHandler.reschedule(state, self.base_path(), spec,
                                             self.serial_id() + 1)
Exemple #30
0
  def _start_map(cls, name, mapper_spec,
                 mapreduce_params,
                 base_path=None,
                 queue_name=None,
                 eta=None,
                 countdown=None,
                 hooks_class_name=None,
                 _app=None,
                 transactional=False):
    queue_name = queue_name or os.environ.get("HTTP_X_APPENGINE_QUEUENAME",
                                              "default")
    if queue_name[0] == "_":
      # We are currently in some special queue. E.g. __cron.
      queue_name = "default"

    # Check that handler can be instantiated.
    mapper_spec.get_handler()

    # Check that reader can be instantiated and is configured correctly
    mapper_input_reader_class = mapper_spec.input_reader_class()
    mapper_input_reader_class.validate(mapper_spec)

    mapper_output_writer_class = mapper_spec.output_writer_class()
    if mapper_output_writer_class:
      mapper_output_writer_class.validate(mapper_spec)

    mapreduce_id = model.MapreduceState.new_mapreduce_id()
    mapreduce_spec = model.MapreduceSpec(
        name,
        mapreduce_id,
        mapper_spec.to_json(),
        mapreduce_params,
        hooks_class_name)

    kickoff_params = {"mapreduce_spec": mapreduce_spec.to_json_str()}
    if _app:
      kickoff_params["app"] = _app
    kickoff_worker_task = util.HugeTask(
        url=base_path + "/kickoffjob_callback",
        params=kickoff_params,
        eta=eta,
        countdown=countdown)

    hooks = mapreduce_spec.get_hooks()
    config = util.create_datastore_write_config(mapreduce_spec)

    def start_mapreduce():
      parent = None
      if not transactional:
        # Save state in datastore so that UI can see it.
        # We can't save state in foreign transaction, but conventional UI
        # doesn't ask for transactional starts anyway.
        state = model.MapreduceState.create_new(mapreduce_spec.mapreduce_id)
        state.mapreduce_spec = mapreduce_spec
        state.active = True
        state.active_shards = mapper_spec.shard_count
        if _app:
          state.app_id = _app
        state.put(config=config)
        parent = state

      if hooks is not None:
        try:
          hooks.enqueue_kickoff_task(kickoff_worker_task, queue_name)
        except NotImplementedError:
          # Use the default task addition implementation.
          pass
        else:
          return
      kickoff_worker_task.add(queue_name, transactional=True, parent=parent)

    if transactional:
      start_mapreduce()
    else:
      db.run_in_transaction(start_mapreduce)

    return mapreduce_id
  def handle(self):
    """Handle request."""
    tstate = model.TransientShardState.from_request(self.request)
    spec = tstate.mapreduce_spec
    self._start_time = self._time()
    shard_id = tstate.shard_id

    shard_state, control = db.get([
        model.ShardState.get_key_by_shard_id(shard_id),
        model.MapreduceControl.get_key_by_job_id(spec.mapreduce_id),
    ])
    if not shard_state:
      # We're letting this task to die. It's up to controller code to
      # reinitialize and restart the task.
      logging.error("State not found for shard ID %r; shutting down",
                    shard_id)
      return

    if not shard_state.active:
      logging.error("Shard is not active. Looks like spurious task execution.")
      return
    if shard_state.retries > tstate.retries:
      logging.error(
          "Got shard %s from previous shard retry %s. Drop",
          shard_state.shard_id,
          tstate.retries)
      return
    elif shard_state.retries < tstate.retries:
      # This happens when the transaction that updates shardstate and enqueues
      # task fails after the task has been added. That transaction will
      # be retried. Adding the same task will result in
      # TaskAlreadyExistsError but the error is ignored.
      raise ValueError(
          "ShardState for %s is behind slice. Waiting for it to catch up",
          shard_state.shard_id)

    ctx = context.Context(spec, shard_state,
                          task_retry_count=self.task_retry_count())

    if control and control.command == model.MapreduceControl.ABORT:
      logging.info("Abort command received by shard %d of job '%s'",
                   shard_state.shard_number, shard_state.mapreduce_id)
      # NOTE: When aborting, specifically do not finalize the output writer
      # because it might be in a bad state.
      shard_state.active = False
      shard_state.result_status = model.ShardState.RESULT_ABORTED
      shard_state.put(config=util.create_datastore_write_config(spec))
      model.MapreduceControl.abort(spec.mapreduce_id)
      return

    input_reader = tstate.input_reader

    if spec.mapper.params.get("enable_quota", True):
      quota_consumer = quota.QuotaConsumer(
          quota.QuotaManager(memcache.Client()),
          shard_id,
          _QUOTA_BATCH_SIZE)
    else:
      quota_consumer = None

    # Tell NDB to never cache anything in memcache or in-process. This ensures
    # that entities fetched from Datastore input_readers via NDB will not bloat
    # up the request memory size and Datastore Puts will avoid doing calls
    # to memcache. Without this you get soft memory limit exits, which hurts
    # overall throughput.
    if ndb is not None:
      ndb_ctx = ndb.get_context()
      ndb_ctx.set_cache_policy(lambda key: False)
      ndb_ctx.set_memcache_policy(lambda key: False)

    context.Context._set(ctx)
    retry_shard = False

    try:
      self.process_inputs(
          input_reader, shard_state, tstate, quota_consumer, ctx)

      if not shard_state.active:
        # shard is going to stop. Finalize output writer only when shard is
        # successful because writer might be stuck in some bad state otherwise.
        if (shard_state.result_status == model.ShardState.RESULT_SUCCESS and
            tstate.output_writer):
          tstate.output_writer.finalize(ctx, shard_state)
    # pylint: disable=broad-except
    except Exception, e:
      retry_shard = self._retry_logic(e, shard_state, tstate, spec.mapreduce_id)