Esempio n. 1
0
  def handle(self):
    """Handles kick off request."""
    spec = model.MapreduceSpec.from_json_str(
        self._get_required_param("mapreduce_spec"))
    app_id = self.request.get("app", None)
    queue_name = os.environ.get("HTTP_X_APPENGINE_QUEUENAME", "default")
    mapper_input_reader_class = spec.mapper.input_reader_class()

    state = model.MapreduceState.create_new(spec.mapreduce_id)
    state.mapreduce_spec = spec
    state.active = True
    state.char_url = ""
    state.sparkline_url = ""
    if app_id:
      state.app_id = app_id

    input_readers = mapper_input_reader_class.split_input(spec.mapper)
    if not input_readers:
      logging.warning("Found no mapper input data to process.")
      state.active = False
      state.active_shards = 0
      state.put(config=util.create_datastore_write_config(spec))
      return

    spec.mapper.shard_count = len(input_readers)
    state.active_shards = len(input_readers)
    state.mapreduce_spec = spec
    state.put(config=util.create_datastore_write_config(spec))

    KickOffJobHandler._schedule_shards(
        spec, input_readers, queue_name, self.base_path())

    ControllerCallbackHandler.reschedule(
        self.base_path(), spec, queue_name=queue_name, serial_id=0)
Esempio n. 2
0
 def flush(self):
   """Flush all information recorded in context."""
   for pool in self._pools.values():
     pool.flush()
   if self.shard_state:
     self.shard_state.put(
         config=util.create_datastore_write_config(self.mapreduce_spec))
Esempio n. 3
0
  def _schedule_shards(cls, spec, input_readers, queue_name, base_path):
    """Prepares shard states and schedules their execution.

    Args:
      spec: mapreduce specification as MapreduceSpec.
      input_readers: list of InputReaders describing shard splits.
      queue_name: The queue to run this job on.
      base_path: The base url path of mapreduce callbacks.
    """
    shard_states = []
    for shard_number, input_reader in enumerate(input_readers):
      shard = model.ShardState.create_new(spec.mapreduce_id, shard_number)
      shard.shard_description = str(input_reader)
      shard_states.append(shard)

    existing_shard_states = db.get(shard.key() for shard in shard_states)
    existing_shard_keys = set(shard.key() for shard in existing_shard_states
                              if shard is not None)

    db.put((shard for shard in shard_states
            if shard.key() not in existing_shard_keys),
           config=util.create_datastore_write_config(spec))

    for shard_number, input_reader in enumerate(input_readers):
      shard_id = model.ShardState.shard_id_from_number(
          spec.mapreduce_id, shard_number)
      MapperWorkerCallbackHandler.schedule_slice(
          base_path, spec, shard_id, 0, input_reader, queue_name=queue_name)
Esempio n. 4
0
  def handle(self):
    """Handle request."""
    tstate = model.TransientShardState.from_request(self.request)
    spec = tstate.mapreduce_spec
    self._start_time = self._time()

    shard_state, control = db.get([
        model.ShardState.get_key_by_shard_id(tstate.shard_id),
        model.MapreduceControl.get_key_by_job_id(spec.mapreduce_id),
    ])

    shard_state = self._try_acquire_lease(shard_state, tstate)
    if shard_state == self._TASK_STATE.RETRY_TASK:
      self.retry_task()
      return
    if shard_state == self._TASK_STATE.DROP_TASK:
      return

    ctx = context.Context(spec, shard_state,
                          task_retry_count=self.task_retry_count())

    if control and control.command == model.MapreduceControl.ABORT:
      logging.info("Abort command received by shard %d of job '%s'",
                   shard_state.shard_number, shard_state.mapreduce_id)


      shard_state.set_for_abort()
      shard_state.put(config=util.create_datastore_write_config(spec))
      return






    ndb_ctx = ndb.get_context()
    ndb_ctx.set_cache_policy(lambda key: False)
    ndb_ctx.set_memcache_policy(lambda key: False)

    context.Context._set(ctx)
    retry_directive = False

    try:
      self.process_inputs(
          tstate.input_reader, shard_state, tstate, ctx)

      if not shard_state.active:


        if (shard_state.result_status == model.ShardState.RESULT_SUCCESS and
            tstate.output_writer):




          tstate.output_writer.finalize(ctx, shard_state)

    except Exception, e:
      retry_directive = self._retry_logic(
          e, shard_state, tstate, spec.mapreduce_id)
Esempio n. 5
0
  def _start_map(cls,
                 name,
                 mapper_spec,
                 mapreduce_params,
                 base_path=None,
                 queue_name=None,
                 eta=None,
                 countdown=None,
                 hooks_class_name=None,
                 _app=None,
                 transactional=False,
                 parent_entity=None):
    """See control.start_map."""
    if not transactional and parent_entity:
      raise Exception("Parent shouldn't be specfied "
                      "for non-transactional starts.")


    mapper_input_reader_class = mapper_spec.input_reader_class()
    mapper_input_reader_class.validate(mapper_spec)

    mapper_output_writer_class = mapper_spec.output_writer_class()
    if mapper_output_writer_class:
      mapper_output_writer_class.validate(mapper_spec)

    mapreduce_id = model.MapreduceState.new_mapreduce_id()
    mapreduce_spec = model.MapreduceSpec(
        name,
        mapreduce_id,
        mapper_spec.to_json(),
        mapreduce_params,
        hooks_class_name)


    ctx = context.Context(mapreduce_spec, None)
    context.Context._set(ctx)
    try:

      mapper_spec.handler
    finally:
      context.Context._set(None)

    if not transactional:

      state = model.MapreduceState.create_new(mapreduce_spec.mapreduce_id)
      state.mapreduce_spec = mapreduce_spec
      state.active = True
      state.active_shards = mapper_spec.shard_count
      if _app:
        state.app_id = _app
      config = util.create_datastore_write_config(mapreduce_spec)
      state.put(config=config)
      parent_entity = state

    cls._add_kickoff_task(
        base_path, mapreduce_spec, eta, countdown, parent_entity,
        queue_name, transactional, _app)

    return mapreduce_id
Esempio n. 6
0
  def _start_map(cls, name, mapper_spec,
                 mapreduce_params,
                 base_path="/mapreduce",
                 queue_name="default",
                 eta=None,
                 countdown=None,
                 hooks_class_name=None,
                 _app=None,
                 transactional=False):
    mapper_spec.get_handler()

    mapper_input_reader_class = mapper_spec.input_reader_class()
    mapper_input_reader_class.validate(mapper_spec)

    mapreduce_id = model.MapreduceState.new_mapreduce_id()
    mapreduce_spec = model.MapreduceSpec(
        name,
        mapreduce_id,
        mapper_spec.to_json(),
        mapreduce_params,
        hooks_class_name)

    kickoff_params = {"mapreduce_spec": mapreduce_spec.to_json_str()}
    if _app:
      kickoff_params["app"] = _app
    kickoff_worker_task = taskqueue.Task(
        url=base_path + "/kickoffjob_callback",
        params=kickoff_params,
        eta=eta, countdown=countdown)

    hooks = mapreduce_spec.get_hooks()
    config = util.create_datastore_write_config(mapreduce_spec)

    def start_mapreduce():
      if not transactional:
        state = model.MapreduceState.create_new(mapreduce_spec.mapreduce_id)
        state.mapreduce_spec = mapreduce_spec
        state.active = True
        state.active_shards = mapper_spec.shard_count
        if _app:
          state.app_id = _app
        state.put(config=config)

      if hooks is not None:
        try:
          hooks.enqueue_kickoff_task(kickoff_worker_task, queue_name)
        except NotImplementedError:
          pass
        else:
          return
      kickoff_worker_task.add(queue_name, transactional=True)

    if transactional:
      start_mapreduce()
    else:
      db.run_in_transaction(start_mapreduce)

    return mapreduce_id
Esempio n. 7
0
  def _schedule_shards(cls,
                       spec,
                       input_readers,
                       queue_name,
                       base_path,
                       mr_state):
    """Prepares shard states and schedules their execution.

    Args:
      spec: mapreduce specification as MapreduceSpec.
      input_readers: list of InputReaders describing shard splits.
      queue_name: The queue to run this job on.
      base_path: The base url path of mapreduce callbacks.
      mr_state: The MapReduceState of current job.
    """



    shard_states = []
    writer_class = spec.mapper.output_writer_class()
    output_writers = [None] * len(input_readers)
    for shard_number, input_reader in enumerate(input_readers):
      shard_state = model.ShardState.create_new(spec.mapreduce_id, shard_number)
      shard_state.shard_description = str(input_reader)
      if writer_class:
        output_writers[shard_number] = writer_class.create(
            mr_state, shard_state)
      shard_states.append(shard_state)


    existing_shard_states = db.get(shard.key() for shard in shard_states)
    existing_shard_keys = set(shard.key() for shard in existing_shard_states
                              if shard is not None)


    db.put((shard for shard in shard_states
            if shard.key() not in existing_shard_keys),
           config=util.create_datastore_write_config(spec))


    processing_rate = int(spec.mapper.params.get(
        "processing_rate") or model._DEFAULT_PROCESSING_RATE_PER_SEC)
    quota_refill = processing_rate / len(shard_states)
    quota_manager = quota.QuotaManager(memcache.Client())
    for shard_state in shard_states:
      quota_manager.put(shard_state.shard_id, quota_refill)


    for shard_number, (input_reader, output_writer) in enumerate(
        zip(input_readers, output_writers)):
      shard_id = model.ShardState.shard_id_from_number(
          spec.mapreduce_id, shard_number)
      MapperWorkerCallbackHandler._schedule_slice(
          shard_states[shard_number],
          model.TransientShardState(
              base_path, spec, shard_id, 0, input_reader, input_reader,
              output_writer=output_writer),
          queue_name=queue_name)
Esempio n. 8
0
    def _txn():

      state = model.MapreduceState.create_new(mapreduce_spec.mapreduce_id)
      state.mapreduce_spec = mapreduce_spec
      state.active = True
      state.active_shards = mapper_spec.shard_count
      if _app:
        state.app_id = _app
      state.put(config=util.create_datastore_write_config(mapreduce_spec))
      return state
Esempio n. 9
0
      def _put_state():
        fresh_state = model.MapreduceState.get_by_job_id(spec.mapreduce_id)


        if not fresh_state.active:
          logging.warning(
              "Job %s is not active. Look like spurious task execution. "
              "Dropping controller task.", spec.mapreduce_id)
          return
        config = util.create_datastore_write_config(spec)
        state.put(config=config)
Esempio n. 10
0
  def _schedule_shards(cls,
                       spec,
                       input_readers,
                       queue_name,
                       base_path,
                       mr_state):
    """Prepares shard states and schedules their execution.

    Args:
      spec: mapreduce specification as MapreduceSpec.
      input_readers: list of InputReaders describing shard splits.
      queue_name: The queue to run this job on.
      base_path: The base url path of mapreduce callbacks.
      mr_state: The MapReduceState of current job.
    """



    shard_states = []
    writer_class = spec.mapper.output_writer_class()
    output_writers = [None] * len(input_readers)
    for shard_number, input_reader in enumerate(input_readers):
      shard_state = model.ShardState.create_new(spec.mapreduce_id, shard_number)
      shard_state.shard_description = str(input_reader)
      if writer_class:
        output_writers[shard_number] = writer_class.create(
            mr_state, shard_state)
      shard_states.append(shard_state)


    existing_shard_states = db.get(shard.key() for shard in shard_states)
    existing_shard_keys = set(shard.key() for shard in existing_shard_states
                              if shard is not None)




    db.put((shard for shard in shard_states
            if shard.key() not in existing_shard_keys),
           config=util.create_datastore_write_config(spec))


    for shard_number, (input_reader, output_writer) in enumerate(
        zip(input_readers, output_writers)):
      shard_id = model.ShardState.shard_id_from_number(
          spec.mapreduce_id, shard_number)
      task = MapperWorkerCallbackHandler._state_to_task(
          model.TransientShardState(
              base_path, spec, shard_id, 0, input_reader, input_reader,
              output_writer=output_writer))
      MapperWorkerCallbackHandler._add_task(task,
                                            shard_states[shard_number],
                                            spec,
                                            queue_name)
Esempio n. 11
0
 def handle(self):
   mapreduce_id = self.request.get("mapreduce_id")
   mapreduce_state = model.MapreduceState.get_by_job_id(mapreduce_id)
   if mapreduce_state:
     config=util.create_datastore_write_config(mapreduce_state.mapreduce_spec)
     db.delete(model.MapreduceControl.get_key_by_job_id(mapreduce_id),
             config=config)
     shard_states = model.ShardState.find_by_mapreduce_state(mapreduce_state)
     for shard_state in shard_states:
       db.delete(util._HugeTaskPayload.all().ancestor(shard_state),
                 config=config)
     db.delete(util._HugeTaskPayload.all().ancestor(mapreduce_state),
               config=config)
Esempio n. 12
0
  def handle(self):
    """Handles kick off request."""
    spec = model.MapreduceSpec.from_json_str(
        self._get_required_param("mapreduce_spec"))

    app_id = self.request.get("app", None)
    queue_name = os.environ.get("HTTP_X_APPENGINE_QUEUENAME", "default")
    mapper_input_reader_class = spec.mapper.input_reader_class()



    state = model.MapreduceState.create_new(spec.mapreduce_id)
    state.mapreduce_spec = spec
    state.active = True
    if app_id:
      state.app_id = app_id

    input_readers = mapper_input_reader_class.split_input(spec.mapper)
    if not input_readers:

      logging.warning("Found no mapper input data to process.")
      state.active = False
      state.active_shards = 0
      ControllerCallbackHandler._finalize_job(spec, state, self.base_path())
      return


    spec.mapper.shard_count = len(input_readers)
    state.active_shards = len(input_readers)
    state.mapreduce_spec = spec

    output_writer_class = spec.mapper.output_writer_class()
    if output_writer_class:
      output_writer_class.init_job(state)

    output_writers = []
    if output_writer_class:
      for shard_number in range(len(input_readers)):
        writer = output_writer_class.create(state, shard_number)
        assert isinstance(writer, output_writer_class)
        output_writers.append(writer)
    else:
      output_writers = [None for ir in input_readers]

    state.put(config=util.create_datastore_write_config(spec))

    KickOffJobHandler._schedule_shards(
        spec, input_readers, output_writers, queue_name, self.base_path())

    ControllerCallbackHandler.reschedule(
        state, self.base_path(), spec, queue_name=queue_name, serial_id=0)
Esempio n. 13
0
  def _finalize_job(cls, mapreduce_spec, mapreduce_state, base_path):
    """Finalize job execution.

    Invokes done callback and save mapreduce state in a transaction,
    and schedule necessary clean ups.

    Args:
      mapreduce_spec: an instance of MapreduceSpec
      mapreduce_state: an instance of MapreduceState
      base_path: handler_base path.
    """
    config = util.create_datastore_write_config(mapreduce_spec)

    queue_name = mapreduce_spec.params.get(
        model.MapreduceSpec.PARAM_DONE_CALLBACK_QUEUE,
        "default")
    done_callback = mapreduce_spec.params.get(
        model.MapreduceSpec.PARAM_DONE_CALLBACK)
    done_task = None
    if done_callback:
      done_task = taskqueue.Task(
          url=done_callback,
          headers={"Mapreduce-Id": mapreduce_spec.mapreduce_id},
          method=mapreduce_spec.params.get("done_callback_method", "POST"))

    @db.transactional(retries=5)
    def _put_state():
      fresh_state = model.MapreduceState.get_by_job_id(
          mapreduce_spec.mapreduce_id)
      if not fresh_state.active:
        logging.warning(
            "Job %s is not active. Look like spurious task execution. "
            "Dropping controller task.", mapreduce_spec.mapreduce_id)
        return
      mapreduce_state.put(config=config)

      if done_task and not _run_task_hook(
          mapreduce_spec.get_hooks(),
          "enqueue_done_task",
          done_task,
          queue_name):
        done_task.add(queue_name, transactional=True)

    _put_state()
    logging.info("Final result for job '%s' is '%s'",
                 mapreduce_spec.mapreduce_id, mapreduce_state.result_status)
    cls._clean_up_mr(mapreduce_spec, base_path)
Esempio n. 14
0
  def _update_state_from_shard_states(self, state, shard_states, control):
    """Update mr state by examing shard states.

    Args:
      state: current mapreduce state as MapreduceState.
      shard_states: all shard states (active and inactive). list of ShardState.
      control: model.MapreduceControl entity.
    """
    active_shards = [s for s in shard_states if s.active]
    failed_shards = [s for s in shard_states
                     if s.result_status == model.ShardState.RESULT_FAILED]
    aborted_shards = [s for s in shard_states
                     if s.result_status == model.ShardState.RESULT_ABORTED]
    spec = state.mapreduce_spec



    state.active = bool(active_shards)
    state.active_shards = len(active_shards)
    state.failed_shards = len(failed_shards)
    state.aborted_shards = len(aborted_shards)
    if not control and (failed_shards or aborted_shards):

      model.MapreduceControl.abort(spec.mapreduce_id)

    self._aggregate_stats(state, shard_states)
    state.last_poll_time = datetime.datetime.utcfromtimestamp(self._time())

    if not state.active:

      if failed_shards or not shard_states:
        state.result_status = model.MapreduceState.RESULT_FAILED


      elif aborted_shards:
        state.result_status = model.MapreduceState.RESULT_ABORTED
      else:
        state.result_status = model.MapreduceState.RESULT_SUCCESS
      self._finalize_job(spec, state, self.base_path())
    else:


      config = util.create_datastore_write_config(spec)
      state.put(config=config)
Esempio n. 15
0
  def _finalize_job(cls, mapreduce_spec, mapreduce_state, base_path):
    """Finalize job execution.

    Finalizes output writer, invokes done callback and save mapreduce state
    in a transaction, and schedule necessary clean ups.

    Args:
      mapreduce_spec: an instance of MapreduceSpec
      mapreduce_state: an instance of MapreduceState
      base_path: handler_base path.
    """
    config = util.create_datastore_write_config(mapreduce_spec)


    if (mapreduce_spec.mapper.output_writer_class() and
        mapreduce_state.result_status == model.MapreduceState.RESULT_SUCCESS):
      mapreduce_spec.mapper.output_writer_class().finalize_job(mapreduce_state)

    queue_name = mapreduce_spec.params.get(
        model.MapreduceSpec.PARAM_DONE_CALLBACK_QUEUE,
        "default")
    done_callback = mapreduce_spec.params.get(
        model.MapreduceSpec.PARAM_DONE_CALLBACK)
    done_task = None
    if done_callback:
      done_task = taskqueue.Task(
          url=done_callback,
          headers={"Mapreduce-Id": mapreduce_spec.mapreduce_id},
          method=mapreduce_spec.params.get("done_callback_method", "POST"))

    def put_state(state):
      state.put(config=config)

      if done_task and not _run_task_hook(
          mapreduce_spec.get_hooks(),
          "enqueue_done_task",
          done_task,
          queue_name):
        done_task.add(queue_name, transactional=True)

    logging.info("Final result for job '%s' is '%s'",
                 mapreduce_spec.mapreduce_id, mapreduce_state.result_status)
    db.run_in_transaction_custom_retries(5, put_state, mapreduce_state)
    cls._clean_up_mr(mapreduce_spec, base_path)
Esempio n. 16
0
  def _finalize_job(mapreduce_spec, mapreduce_state, base_path):
    """Finalize job execution.

    Finalizes output writer, invokes done callback an schedules
    finalize job execution.

    Args:
      mapreduce_spec: an instance of MapreduceSpec
      mapreduce_state: an instance of MapreduceState
      base_path: handler base path.
    """
    config = util.create_datastore_write_config(mapreduce_spec)


    if (mapreduce_spec.mapper.output_writer_class() and
        mapreduce_state.result_status == model.MapreduceState.RESULT_SUCCESS):
      mapreduce_spec.mapper.output_writer_class().finalize_job(mapreduce_state)


    def put_state(state):
      state.put(config=config)
      done_callback = mapreduce_spec.params.get(
          model.MapreduceSpec.PARAM_DONE_CALLBACK)
      if done_callback:
        done_task = taskqueue.Task(
            url=done_callback,
            headers={"Mapreduce-Id": mapreduce_spec.mapreduce_id},
            method=mapreduce_spec.params.get("done_callback_method", "POST"))
        queue_name = mapreduce_spec.params.get(
            model.MapreduceSpec.PARAM_DONE_CALLBACK_QUEUE,
            "default")

        if not _run_task_hook(mapreduce_spec.get_hooks(),
                              "enqueue_done_task",
                              done_task,
                              queue_name):
          done_task.add(queue_name, transactional=True)
      FinalizeJobHandler.schedule(base_path, mapreduce_spec)

    db.run_in_transaction_custom_retries(5, put_state, mapreduce_state)
Esempio n. 17
0
      if not shard_state.active:


        if (shard_state.result_status == model.ShardState.RESULT_SUCCESS and
            tstate.output_writer):
          tstate.output_writer.finalize(ctx, shard_state)

    except Exception, e:
      retry_shard = self._retry_logic(e, shard_state, tstate, spec.mapreduce_id)
    finally:
      context.Context._set(None)
      if quota_consumer:
        quota_consumer.dispose()

    config = util.create_datastore_write_config(spec)




    @db.transactional(retries=5)
    def tx():
      fresh_shard_state = db.get(
          model.ShardState.get_key_by_shard_id(shard_id))
      if not fresh_shard_state:
        raise db.Rollback()
      if (not fresh_shard_state.active or
          "worker_active_state_collision" in _TEST_INJECTED_FAULTS):
        shard_state.active = False
        logging.error("Spurious task execution. Aborting the shard.")
        return
Esempio n. 18
0
  def handle(self):
    """Handle request."""
    spec = model.MapreduceSpec.from_json_str(
        self.request.get("mapreduce_spec"))
    self._start_time = self._time()
    shard_id = self.shard_id()

    logging.debug("post: shard=%s slice=%s headers=%s",
                  shard_id, self.slice_id(), self.request.headers)

    shard_state, control = db.get([
        model.ShardState.get_key_by_shard_id(shard_id),
        model.MapreduceControl.get_key_by_job_id(spec.mapreduce_id),
    ])
    if not shard_state:
      logging.error("State not found for shard ID %r; shutting down",
                    shard_id)
      return

    if control and control.command == model.MapreduceControl.ABORT:
      logging.info("Abort command received by shard %d of job '%s'",
                   shard_state.shard_number, shard_state.mapreduce_id)
      shard_state.active = False
      shard_state.result_status = model.ShardState.RESULT_ABORTED
      shard_state.put(config=util.create_datastore_write_config(spec))
      model.MapreduceControl.abort(spec.mapreduce_id)
      return

    input_reader = self.input_reader(spec.mapper)

    if spec.mapper.params.get("enable_quota", True):
      quota_consumer = quota.QuotaConsumer(
          quota.QuotaManager(memcache.Client()),
          shard_id,
          _QUOTA_BATCH_SIZE)
    else:
      quota_consumer = None

    ctx = context.Context(spec, shard_state,
                          task_retry_count=self.task_retry_count())
    context.Context._set(ctx)

    try:
      if not quota_consumer or quota_consumer.check():
        scan_aborted = False
        entity = None

        if not quota_consumer or quota_consumer.consume():
          for entity in input_reader:
            if isinstance(entity, db.Model):
              shard_state.last_work_item = repr(entity.key())
            else:
              shard_state.last_work_item = repr(entity)[:100]

            scan_aborted = not self.process_entity(entity, ctx)

            if (quota_consumer and not scan_aborted and
                not quota_consumer.consume()):
              scan_aborted = True
            if scan_aborted:
              break
        else:
          scan_aborted = True


        if not scan_aborted:
          logging.info("Processing done for shard %d of job '%s'",
                       shard_state.shard_number, shard_state.mapreduce_id)
          if quota_consumer:
            quota_consumer.put(1)
          shard_state.active = False
          shard_state.result_status = model.ShardState.RESULT_SUCCESS

      ctx.flush()
    finally:
      context.Context._set(None)
      if quota_consumer:
        quota_consumer.dispose()

    if shard_state.active:
      self.reschedule(spec, input_reader)
    def handle(self):
        """Handle request."""
        spec = model.MapreduceSpec.from_json_str(
            self.request.get("mapreduce_spec"))

        logging.debug("post: id=%s headers=%s", spec.mapreduce_id,
                      self.request.headers)

        state, control = db.get([
            model.MapreduceState.get_key_by_job_id(spec.mapreduce_id),
            model.MapreduceControl.get_key_by_job_id(spec.mapreduce_id),
        ])
        if not state:
            logging.error("State not found for mapreduce_id '%s'; skipping",
                          spec.mapreduce_id)
            return

        shard_states = model.ShardState.find_by_mapreduce_id(spec.mapreduce_id)
        if state.active and len(shard_states) != spec.mapper.shard_count:
            logging.error(
                "Incorrect number of shard states: %d vs %d; "
                "aborting job '%s'", len(shard_states),
                spec.mapper.shard_count, spec.mapreduce_id)
            state.active = False
            state.result_status = model.MapreduceState.RESULT_FAILED
            model.MapreduceControl.abort(spec.mapreduce_id)

        active_shards = [s for s in shard_states if s.active]
        failed_shards = [
            s for s in shard_states
            if s.result_status == model.ShardState.RESULT_FAILED
        ]
        aborted_shards = [
            s for s in shard_states
            if s.result_status == model.ShardState.RESULT_ABORTED
        ]
        if state.active:
            state.active = bool(active_shards)
            state.active_shards = len(active_shards)
            state.failed_shards = len(failed_shards)
            state.aborted_shards = len(aborted_shards)

        if (not state.active and control
                and control.command == model.MapreduceControl.ABORT):
            logging.info("Abort signal received for job '%s'",
                         spec.mapreduce_id)
            state.result_status = model.MapreduceState.RESULT_ABORTED

        if not state.active:
            state.active_shards = 0
            if not state.result_status:
                if [
                        s for s in shard_states
                        if s.result_status != model.ShardState.RESULT_SUCCESS
                ]:
                    state.result_status = model.MapreduceState.RESULT_FAILED
                else:
                    state.result_status = model.MapreduceState.RESULT_SUCCESS
                logging.info("Final result for job '%s' is '%s'",
                             spec.mapreduce_id, state.result_status)

        self.aggregate_state(state, shard_states)
        poll_time = state.last_poll_time
        state.last_poll_time = datetime.datetime.utcfromtimestamp(self._time())

        config = util.create_datastore_write_config(spec)

        if not state.active:

            def put_state(state):
                state.put(config=config)
                done_callback = spec.params.get(
                    model.MapreduceSpec.PARAM_DONE_CALLBACK)
                if done_callback:
                    done_task = taskqueue.Task(
                        url=done_callback,
                        headers={"Mapreduce-Id": spec.mapreduce_id})
                    queue_name = spec.params.get(
                        model.MapreduceSpec.PARAM_DONE_CALLBACK_QUEUE,
                        "default")

                    if not _run_task_hook(spec.get_hooks(),
                                          "enqueue_done_task", done_task,
                                          queue_name):
                        done_task.add(queue_name, transactional=True)

            db.run_in_transaction(put_state, state)
            return
        else:
            state.put(config=config)

        processing_rate = int(
            spec.mapper.params.get("processing_rate")
            or model._DEFAULT_PROCESSING_RATE_PER_SEC)
        self.refill_quotas(poll_time, processing_rate, active_shards)
        ControllerCallbackHandler.reschedule(self.base_path(), spec,
                                             self.serial_id() + 1)
    def handle(self):
        """Handle request."""
        spec = model.MapreduceSpec.from_json_str(
            self.request.get("mapreduce_spec"))
        self._start_time = self._time()
        shard_id = self.shard_id()

        logging.debug("post: shard=%s slice=%s headers=%s", shard_id,
                      self.slice_id(), self.request.headers)

        shard_state, control = db.get([
            model.ShardState.get_key_by_shard_id(shard_id),
            model.MapreduceControl.get_key_by_job_id(spec.mapreduce_id),
        ])
        if not shard_state:
            logging.error("State not found for shard ID %r; shutting down",
                          shard_id)
            return

        if control and control.command == model.MapreduceControl.ABORT:
            logging.info("Abort command received by shard %d of job '%s'",
                         shard_state.shard_number, shard_state.mapreduce_id)
            shard_state.active = False
            shard_state.result_status = model.ShardState.RESULT_ABORTED
            shard_state.put(config=util.create_datastore_write_config(spec))
            model.MapreduceControl.abort(spec.mapreduce_id)
            return

        input_reader = self.input_reader(spec.mapper)

        if spec.mapper.params.get("enable_quota", True):
            quota_consumer = quota.QuotaConsumer(
                quota.QuotaManager(memcache.Client()), shard_id,
                _QUOTA_BATCH_SIZE)
        else:
            quota_consumer = None

        ctx = context.Context(spec,
                              shard_state,
                              task_retry_count=self.task_retry_count())
        context.Context._set(ctx)

        try:
            if not quota_consumer or quota_consumer.check():
                scan_aborted = False
                entity = None

                if not quota_consumer or quota_consumer.consume():
                    for entity in input_reader:
                        if isinstance(entity, db.Model):
                            shard_state.last_work_item = repr(entity.key())
                        else:
                            shard_state.last_work_item = repr(entity)[:100]

                        scan_aborted = not self.process_entity(entity, ctx)

                        if (quota_consumer and not scan_aborted
                                and not quota_consumer.consume()):
                            scan_aborted = True
                        if scan_aborted:
                            break
                else:
                    scan_aborted = True

                if not scan_aborted:
                    logging.info("Processing done for shard %d of job '%s'",
                                 shard_state.shard_number,
                                 shard_state.mapreduce_id)
                    if quota_consumer:
                        quota_consumer.put(1)
                    shard_state.active = False
                    shard_state.result_status = model.ShardState.RESULT_SUCCESS

            ctx.flush()
        finally:
            context.Context._set(None)
            if quota_consumer:
                quota_consumer.dispose()

        if shard_state.active:
            self.reschedule(spec, input_reader)
Esempio n. 21
0
  def handle(self):
    """Handle request."""
    tstate = model.TransientShardState.from_request(self.request)
    spec = tstate.mapreduce_spec
    self._start_time = self._time()
    shard_id = tstate.shard_id

    shard_state, control = db.get([
        model.ShardState.get_key_by_shard_id(shard_id),
        model.MapreduceControl.get_key_by_job_id(spec.mapreduce_id),
    ])
    if not shard_state:


      logging.error("State not found for shard ID %r; shutting down",
                    shard_id)
      return

    if not shard_state.active:
      logging.error("Shard is not active. Looks like spurious task execution.")
      return

    ctx = context.Context(spec, shard_state,
                          task_retry_count=self.task_retry_count())

    if control and control.command == model.MapreduceControl.ABORT:
      logging.info("Abort command received by shard %d of job '%s'",
                   shard_state.shard_number, shard_state.mapreduce_id)


      shard_state.active = False
      shard_state.result_status = model.ShardState.RESULT_ABORTED
      shard_state.put(config=util.create_datastore_write_config(spec))
      model.MapreduceControl.abort(spec.mapreduce_id)
      return

    input_reader = tstate.input_reader

    if spec.mapper.params.get("enable_quota", True):
      quota_consumer = quota.QuotaConsumer(
          quota.QuotaManager(memcache.Client()),
          shard_id,
          _QUOTA_BATCH_SIZE)
    else:
      quota_consumer = None






    if ndb is not None:
      ndb_ctx = ndb.get_context()
      ndb_ctx.set_cache_policy(lambda key: False)
      ndb_ctx.set_memcache_policy(lambda key: False)

    context.Context._set(ctx)
    try:


      if not quota_consumer or quota_consumer.check():
        scan_aborted = False
        entity = None

        try:


          if not quota_consumer or quota_consumer.consume():
            for entity in input_reader:
              if isinstance(entity, db.Model):
                shard_state.last_work_item = repr(entity.key())
              else:
                shard_state.last_work_item = repr(entity)[:100]

              scan_aborted = not self.process_data(
                  entity, input_reader, ctx, tstate)


              if (quota_consumer and not scan_aborted and
                  not quota_consumer.consume()):
                scan_aborted = True
              if scan_aborted:
                break
          else:
            scan_aborted = True

          if not scan_aborted:
            logging.info("Processing done for shard %d of job '%s'",
                         shard_state.shard_number, shard_state.mapreduce_id)


            if quota_consumer:
              quota_consumer.put(1)
            shard_state.active = False
            shard_state.result_status = model.ShardState.RESULT_SUCCESS

          operation.counters.Increment(
              context.COUNTER_MAPPER_WALLTIME_MS,
              int((time.time() - self._start_time)*1000))(ctx)



          ctx.flush()
        except errors.RetrySliceError, e:
          logging.error("Slice error: %s", e)
          retry_count = int(
              os.environ.get("HTTP_X_APPENGINE_TASKRETRYCOUNT") or 0)
          if retry_count <= _RETRY_SLICE_ERROR_MAX_RETRIES:
            raise
          logging.error("Too many retries: %d, failing the job", retry_count)
          scan_aborted = True
          shard_state.active = False
          shard_state.result_status = model.ShardState.RESULT_FAILED
        except errors.FailJobError, e:
          logging.error("Job failed: %s", e)
          scan_aborted = True
          shard_state.active = False
          shard_state.result_status = model.ShardState.RESULT_FAILED
Esempio n. 22
0
    def handle(self):
        """Handle request."""
        spec = model.MapreduceSpec.from_json_str(
            self.request.get("mapreduce_spec"))

        logging.debug("post: id=%s headers=%s spec=%s", spec.mapreduce_id,
                      self.request.headers, self.request.get("mapreduce_spec"))

        state, control = db.get([
            model.MapreduceState.get_key_by_job_id(spec.mapreduce_id),
            model.MapreduceControl.get_key_by_job_id(spec.mapreduce_id),
        ])
        if not state:
            logging.error("State not found for mapreduce_id '%s'; skipping",
                          spec.mapreduce_id)
            return

        shard_states = model.ShardState.find_by_mapreduce_state(state)
        if state.active and len(shard_states) != spec.mapper.shard_count:

            logging.error(
                "Incorrect number of shard states: %d vs %d; "
                "aborting job '%s'", len(shard_states),
                spec.mapper.shard_count, spec.mapreduce_id)
            state.active = False
            state.result_status = model.MapreduceState.RESULT_FAILED
            model.MapreduceControl.abort(spec.mapreduce_id)

        active_shards = [s for s in shard_states if s.active]
        failed_shards = [
            s for s in shard_states
            if s.result_status == model.ShardState.RESULT_FAILED
        ]
        aborted_shards = [
            s for s in shard_states
            if s.result_status == model.ShardState.RESULT_ABORTED
        ]
        if state.active:
            state.active = bool(active_shards)
            state.active_shards = len(active_shards)
            state.failed_shards = len(failed_shards)
            state.aborted_shards = len(aborted_shards)
            if not control and failed_shards:
                model.MapreduceControl.abort(spec.mapreduce_id)

        if (not state.active and control
                and control.command == model.MapreduceControl.ABORT):

            logging.info("Abort signal received for job '%s'",
                         spec.mapreduce_id)
            state.result_status = model.MapreduceState.RESULT_ABORTED

        if not state.active:
            state.active_shards = 0
            if not state.result_status:

                if [
                        s for s in shard_states
                        if s.result_status != model.ShardState.RESULT_SUCCESS
                ]:
                    state.result_status = model.MapreduceState.RESULT_FAILED
                else:
                    state.result_status = model.MapreduceState.RESULT_SUCCESS
                logging.info("Final result for job '%s' is '%s'",
                             spec.mapreduce_id, state.result_status)

        self.aggregate_state(state, shard_states)
        poll_time = state.last_poll_time
        state.last_poll_time = datetime.datetime.utcfromtimestamp(self._time())

        if not state.active:
            ControllerCallbackHandler._finalize_job(spec, state,
                                                    self.base_path())
            return
        else:
            config = util.create_datastore_write_config(spec)
            state.put(config=config)

        processing_rate = int(
            spec.mapper.params.get("processing_rate")
            or model._DEFAULT_PROCESSING_RATE_PER_SEC)
        self.refill_quotas(poll_time, processing_rate, active_shards)
        ControllerCallbackHandler.reschedule(state, self.base_path(), spec,
                                             self.serial_id() + 1)
Esempio n. 23
0
    def handle(self):
        """Handle request."""
        tstate = model.TransientShardState.from_request(self.request)
        spec = tstate.mapreduce_spec
        self._start_time = self._time()
        shard_id = tstate.shard_id

        shard_state, control = db.get([
            model.ShardState.get_key_by_shard_id(shard_id),
            model.MapreduceControl.get_key_by_job_id(spec.mapreduce_id),
        ])
        if not shard_state:

            logging.error("State not found for shard ID %r; shutting down",
                          shard_id)
            return

        if control and control.command == model.MapreduceControl.ABORT:
            logging.info("Abort command received by shard %d of job '%s'",
                         shard_state.shard_number, shard_state.mapreduce_id)
            if tstate.output_writer:
                tstate.output_writer.finalize(None, shard_state.shard_number)
            shard_state.active = False
            shard_state.result_status = model.ShardState.RESULT_ABORTED
            shard_state.put(config=util.create_datastore_write_config(spec))
            model.MapreduceControl.abort(spec.mapreduce_id)
            return

        input_reader = tstate.input_reader

        if spec.mapper.params.get("enable_quota", True):
            quota_consumer = quota.QuotaConsumer(
                quota.QuotaManager(memcache.Client()), shard_id,
                _QUOTA_BATCH_SIZE)
        else:
            quota_consumer = None

        ctx = context.Context(spec,
                              shard_state,
                              task_retry_count=self.task_retry_count())
        context.Context._set(ctx)

        try:

            if not quota_consumer or quota_consumer.check():
                scan_aborted = False
                entity = None

                if not quota_consumer or quota_consumer.consume():
                    for entity in input_reader:
                        if isinstance(entity, db.Model):
                            shard_state.last_work_item = repr(entity.key())
                        else:
                            shard_state.last_work_item = repr(entity)[:100]

                        scan_aborted = not self.process_data(
                            entity, input_reader, ctx, tstate)

                        if (quota_consumer and not scan_aborted
                                and not quota_consumer.consume()):
                            scan_aborted = True
                        if scan_aborted:
                            break
                else:
                    scan_aborted = True

                if not scan_aborted:
                    logging.info("Processing done for shard %d of job '%s'",
                                 shard_state.shard_number,
                                 shard_state.mapreduce_id)

                    if quota_consumer:
                        quota_consumer.put(1)
                    shard_state.active = False
                    shard_state.result_status = model.ShardState.RESULT_SUCCESS

            operation.counters.Increment(
                "mapper-walltime-msec",
                int((time.time() - self._start_time) * 1000))(ctx)

            ctx.flush()

            if not shard_state.active:

                if tstate.output_writer:
                    tstate.output_writer.finalize(ctx,
                                                  shard_state.shard_number)
            shard_state.put(config=util.create_datastore_write_config(spec))
        finally:
            context.Context._set(None)
            if quota_consumer:
                quota_consumer.dispose()

        if shard_state.active:
            self.reschedule(shard_state, tstate)
        gc.collect()
Esempio n. 24
0
  def handle(self):
    """Handle request."""
    tstate = model.TransientShardState.from_request(self.request)
    spec = tstate.mapreduce_spec
    self._start_time = self._time()
    shard_id = tstate.shard_id

    shard_state, control = db.get([
        model.ShardState.get_key_by_shard_id(shard_id),
        model.MapreduceControl.get_key_by_job_id(spec.mapreduce_id),
    ])
    if not shard_state:


      logging.error("State not found for shard ID %r; shutting down",
                    shard_id)
      return

    if not shard_state.active:
      logging.error("Shard is not active. Looks like spurious task execution.")
      return

    ctx = context.Context(spec, shard_state,
                          task_retry_count=self.task_retry_count())

    if control and control.command == model.MapreduceControl.ABORT:
      logging.info("Abort command received by shard %d of job '%s'",
                   shard_state.shard_number, shard_state.mapreduce_id)
      if tstate.output_writer:
        tstate.output_writer.finalize(ctx, shard_state.shard_number)


      shard_state.active = False
      shard_state.result_status = model.ShardState.RESULT_ABORTED
      shard_state.put(config=util.create_datastore_write_config(spec))
      model.MapreduceControl.abort(spec.mapreduce_id)
      return

    input_reader = tstate.input_reader

    if spec.mapper.params.get("enable_quota", True):
      quota_consumer = quota.QuotaConsumer(
          quota.QuotaManager(memcache.Client()),
          shard_id,
          _QUOTA_BATCH_SIZE)
    else:
      quota_consumer = None

    context.Context._set(ctx)
    try:


      if not quota_consumer or quota_consumer.check():
        scan_aborted = False
        entity = None

        try:


          if not quota_consumer or quota_consumer.consume():
            for entity in input_reader:
              if isinstance(entity, db.Model):
                shard_state.last_work_item = repr(entity.key())
              else:
                shard_state.last_work_item = repr(entity)[:100]

              scan_aborted = not self.process_data(
                  entity, input_reader, ctx, tstate)


              if (quota_consumer and not scan_aborted and
                  not quota_consumer.consume()):
                scan_aborted = True
              if scan_aborted:
                break
          else:
            scan_aborted = True

          if not scan_aborted:
            logging.info("Processing done for shard %d of job '%s'",
                         shard_state.shard_number, shard_state.mapreduce_id)


            if quota_consumer:
              quota_consumer.put(1)
            shard_state.active = False
            shard_state.result_status = model.ShardState.RESULT_SUCCESS

          operation.counters.Increment(
              context.COUNTER_MAPPER_WALLTIME_MS,
              int((time.time() - self._start_time)*1000))(ctx)



          ctx.flush()
        except errors.FailJobError, e:
          logging.error("Job failed: %s", e)
          scan_aborted = True
          shard_state.active = False
          shard_state.result_status = model.ShardState.RESULT_FAILED

      if not shard_state.active:

        if tstate.output_writer:
          tstate.output_writer.finalize(ctx, shard_state.shard_number)

      config = util.create_datastore_write_config(spec)







      @db.transactional(retries=5)
      def tx():
        fresh_shard_state = db.get(
            model.ShardState.get_key_by_shard_id(shard_id))
        if (not fresh_shard_state.active or
            "worker_active_state_collision" in _TEST_INJECTED_FAULTS):
          shard_state.active = False
          logging.error("Spurious task execution. Aborting the shard.")
          return
        fresh_shard_state.copy_from(shard_state)
        fresh_shard_state.put(config=config)
      tx()
Esempio n. 25
0
  def _save_state_and_schedule_next(self, shard_state, tstate, retry_shard):
    """Save state to datastore and schedule next task for this shard.

    Update and save shard state. Schedule next slice if needed.
    This method handles interactions with datastore and taskqueue.

    Args:
      shard_state: model.ShardState for current shard.
      tstate: model.TransientShardState for current shard.
      retry_shard: whether to retry shard.
    """

    spec = tstate.mapreduce_spec
    config = util.create_datastore_write_config(spec)


    task = None
    if retry_shard:


      task = self._state_to_task(tstate)
    elif shard_state.active:
      shard_state.advance_for_next_slice()
      tstate.advance_for_next_slice()
      countdown = self._get_countdown_for_next_slice(spec)
      task = self._state_to_task(tstate, countdown=countdown)
    queue_name = os.environ.get("HTTP_X_APPENGINE_QUEUENAME", "default")

    @db.transactional(retries=5)
    def _tx():
      fresh_shard_state = model.ShardState.get_by_shard_id(tstate.shard_id)
      if not fresh_shard_state:
        raise db.Rollback()
      if (not fresh_shard_state.active or
          "worker_active_state_collision" in _TEST_INJECTED_FAULTS):
        logging.error("Shard %s is not active. Possible spurious task "
                      "execution. Dropping this task.", tstate.shard_id)
        logging.error("Datastore's %s", str(fresh_shard_state))
        logging.error("Slice's %s", str(shard_state))
        return
      fresh_shard_state.copy_from(shard_state)





      if fresh_shard_state.active:
        assert task is not None


        self._add_task(task, fresh_shard_state, spec, queue_name)
      fresh_shard_state.put(config=config)

    try:
      _tx()
    except (datastore_errors.Error,
            taskqueue.Error,
            runtime.DeadlineExceededError,
            apiproxy_errors.Error), e:
      logging.error(
          "Can't transactionally continue shard. "
          "Will retry slice %s %s for the %s time.",
          tstate.shard_id,
          tstate.slice_id,
          self.task_retry_count() + 1)
      shard_state.slice_id -= 1
      self._try_free_lease(shard_state)
      raise e
Esempio n. 26
0
  def handle(self):
    """Handle request."""
    tstate = model.TransientShardState.from_request(self.request)
    spec = tstate.mapreduce_spec
    self._start_time = self._time()
    shard_id = tstate.shard_id

    shard_state, control = db.get([
        model.ShardState.get_key_by_shard_id(shard_id),
        model.MapreduceControl.get_key_by_job_id(spec.mapreduce_id),
    ])
    if not shard_state:


      logging.error("State not found for shard ID %r; shutting down",
                    shard_id)
      return

    if not shard_state.active:
      logging.error("Shard is not active. Looks like spurious task execution.")
      return

    ctx = context.Context(spec, shard_state,
                          task_retry_count=self.task_retry_count())

    if control and control.command == model.MapreduceControl.ABORT:
      logging.info("Abort command received by shard %d of job '%s'",
                   shard_state.shard_number, shard_state.mapreduce_id)


      shard_state.active = False
      shard_state.result_status = model.ShardState.RESULT_ABORTED
      shard_state.put(config=util.create_datastore_write_config(spec))
      model.MapreduceControl.abort(spec.mapreduce_id)
      return

    input_reader = tstate.input_reader

    if spec.mapper.params.get("enable_quota", True):
      quota_consumer = quota.QuotaConsumer(
          quota.QuotaManager(memcache.Client()),
          shard_id,
          _QUOTA_BATCH_SIZE)
    else:
      quota_consumer = None






    if ndb is not None:
      ndb_ctx = ndb.get_context()
      ndb_ctx.set_cache_policy(lambda key: False)
      ndb_ctx.set_memcache_policy(lambda key: False)

    context.Context._set(ctx)

    try:
      self.process_inputs(
          input_reader, shard_state, tstate, quota_consumer, ctx)

      if not shard_state.active:


        if (shard_state.result_status == model.ShardState.RESULT_SUCCESS and
            tstate.output_writer):
          tstate.output_writer.finalize(ctx, shard_state.shard_number)

      config = util.create_datastore_write_config(spec)







      @db.transactional(retries=5)
      def tx():
        fresh_shard_state = db.get(
            model.ShardState.get_key_by_shard_id(shard_id))
        if not fresh_shard_state:
          raise db.Rollback()
        if (not fresh_shard_state.active or
            "worker_active_state_collision" in _TEST_INJECTED_FAULTS):
          shard_state.active = False
          logging.error("Spurious task execution. Aborting the shard.")
          return
        fresh_shard_state.copy_from(shard_state)
        fresh_shard_state.put(config=config)
      tx()
    finally:
      context.Context._set(None)
      if quota_consumer:
        quota_consumer.dispose()



    if shard_state.active:
      self.reschedule(shard_state, tstate)
    gc.collect()
Esempio n. 27
0
  def handle(self):
    """Handle request."""
    tstate = model.TransientShardState.from_request(self.request)
    spec = tstate.mapreduce_spec
    self._start_time = self._time()
    shard_id = tstate.shard_id

    shard_state, control = db.get([
        model.ShardState.get_key_by_shard_id(shard_id),
        model.MapreduceControl.get_key_by_job_id(spec.mapreduce_id),
    ])
    if not shard_state:


      logging.error("State not found for shard ID %r; shutting down",
                    shard_id)
      return

    if control and control.command == model.MapreduceControl.ABORT:
      logging.info("Abort command received by shard %d of job '%s'",
                   shard_state.shard_number, shard_state.mapreduce_id)
      if tstate.output_writer:
        tstate.output_writer.finalize(None, shard_state.shard_number)
      shard_state.active = False
      shard_state.result_status = model.ShardState.RESULT_ABORTED
      shard_state.put(config=util.create_datastore_write_config(spec))
      model.MapreduceControl.abort(spec.mapreduce_id)
      return

    input_reader = tstate.input_reader

    if spec.mapper.params.get("enable_quota", True):
      quota_consumer = quota.QuotaConsumer(
          quota.QuotaManager(memcache.Client()),
          shard_id,
          _QUOTA_BATCH_SIZE)
    else:
      quota_consumer = None

    ctx = context.Context(spec, shard_state,
                          task_retry_count=self.task_retry_count())
    context.Context._set(ctx)

    try:


      if not quota_consumer or quota_consumer.check():
        scan_aborted = False
        entity = None



        if not quota_consumer or quota_consumer.consume():
          for entity in input_reader:
            if isinstance(entity, db.Model):
              shard_state.last_work_item = repr(entity.key())
            else:
              shard_state.last_work_item = repr(entity)[:100]

            scan_aborted = not self.process_data(
                entity, input_reader, ctx, tstate)


            if (quota_consumer and not scan_aborted and
                not quota_consumer.consume()):
              scan_aborted = True
            if scan_aborted:
              break
        else:
          scan_aborted = True


        if not scan_aborted:
          logging.info("Processing done for shard %d of job '%s'",
                       shard_state.shard_number, shard_state.mapreduce_id)


          if quota_consumer:
            quota_consumer.put(1)
          shard_state.active = False
          shard_state.result_status = model.ShardState.RESULT_SUCCESS

      operation.counters.Increment(
          "mapper-walltime-msec",
          int((time.time() - self._start_time)*1000))(ctx)



      ctx.flush()

      if not shard_state.active:

        if tstate.output_writer:
          tstate.output_writer.finalize(ctx, shard_state.shard_number)
      shard_state.put(config=util.create_datastore_write_config(spec))
    finally:
      context.Context._set(None)
      if quota_consumer:
        quota_consumer.dispose()



    if shard_state.active:
      self.reschedule(shard_state, tstate)
    gc.collect()
Esempio n. 28
0
    def _start_map(cls,
                   name,
                   mapper_spec,
                   mapreduce_params,
                   base_path=None,
                   queue_name=None,
                   eta=None,
                   countdown=None,
                   hooks_class_name=None,
                   _app=None,
                   transactional=False,
                   parent_entity=None):
        queue_name = queue_name or os.environ.get("HTTP_X_APPENGINE_QUEUENAME",
                                                  "default")
        if queue_name[0] == "_":

            queue_name = "default"

        if not transactional and parent_entity:
            raise Exception("Parent shouldn't be specfied "
                            "for non-transactional starts.")

        mapper_input_reader_class = mapper_spec.input_reader_class()
        mapper_input_reader_class.validate(mapper_spec)

        mapper_output_writer_class = mapper_spec.output_writer_class()
        if mapper_output_writer_class:
            mapper_output_writer_class.validate(mapper_spec)

        mapreduce_id = model.MapreduceState.new_mapreduce_id()
        mapreduce_spec = model.MapreduceSpec(name, mapreduce_id,
                                             mapper_spec.to_json(),
                                             mapreduce_params,
                                             hooks_class_name)

        ctx = context.Context(mapreduce_spec, None)
        context.Context._set(ctx)
        try:
            mapper_spec.get_handler()
        finally:
            context.Context._set(None)

        kickoff_params = {"mapreduce_spec": mapreduce_spec.to_json_str()}
        if _app:
            kickoff_params["app"] = _app
        kickoff_worker_task = util.HugeTask(url=base_path +
                                            "/kickoffjob_callback",
                                            params=kickoff_params,
                                            eta=eta,
                                            countdown=countdown)

        hooks = mapreduce_spec.get_hooks()
        config = util.create_datastore_write_config(mapreduce_spec)

        def start_mapreduce():
            parent = parent_entity
            if not transactional:

                state = model.MapreduceState.create_new(
                    mapreduce_spec.mapreduce_id)
                state.mapreduce_spec = mapreduce_spec
                state.active = True
                state.active_shards = mapper_spec.shard_count
                if _app:
                    state.app_id = _app
                state.put(config=config)
                parent = state

            if hooks is not None:
                try:
                    hooks.enqueue_kickoff_task(kickoff_worker_task, queue_name)
                except NotImplementedError:

                    pass
                else:
                    return
            kickoff_worker_task.add(queue_name,
                                    transactional=True,
                                    parent=parent)

        if transactional:
            start_mapreduce()
        else:
            db.run_in_transaction(start_mapreduce)

        return mapreduce_id
Esempio n. 29
0
  def handle(self):
    """Handle request."""
    tstate = model.TransientShardState.from_request(self.request)
    spec = tstate.mapreduce_spec
    self._start_time = self._time()

    shard_state, control = db.get([
        model.ShardState.get_key_by_shard_id(tstate.shard_id),
        model.MapreduceControl.get_key_by_job_id(spec.mapreduce_id),
    ])

    lease_acquired = self._try_acquire_lease(shard_state, tstate)
    if lease_acquired is None:
      self.retry_task()
      return
    if not lease_acquired:
      return

    ctx = context.Context(spec, shard_state,
                          task_retry_count=self.task_retry_count())

    if control and control.command == model.MapreduceControl.ABORT:
      logging.info("Abort command received by shard %d of job '%s'",
                   shard_state.shard_number, shard_state.mapreduce_id)


      shard_state.active = False
      shard_state.result_status = model.ShardState.RESULT_ABORTED
      shard_state.put(config=util.create_datastore_write_config(spec))
      return






    ndb_ctx = ndb.get_context()
    ndb_ctx.set_cache_policy(lambda key: False)
    ndb_ctx.set_memcache_policy(lambda key: False)

    context.Context._set(ctx)
    retry_directive = False

    try:
      self.process_inputs(
          tstate.input_reader, shard_state, tstate, ctx)

      if not shard_state.active:


        if (shard_state.result_status == model.ShardState.RESULT_SUCCESS and
            tstate.output_writer):




          tstate.output_writer.finalize(ctx, shard_state)

    except Exception, e:
      retry_directive = self._retry_logic(
          e, shard_state, tstate, spec.mapreduce_id)
Esempio n. 30
0
  def handle(self):
    """Handle request."""
    tstate = model.TransientShardState.from_request(self.request)
    spec = tstate.mapreduce_spec
    self._start_time = self._time()
    shard_id = tstate.shard_id

    shard_state, control = db.get([
        model.ShardState.get_key_by_shard_id(shard_id),
        model.MapreduceControl.get_key_by_job_id(spec.mapreduce_id),
    ])
    if not shard_state:


      logging.error("State not found for shard ID %r; shutting down",
                    shard_id)
      return

    if not shard_state.active:
      logging.error("Shard is not active. Looks like spurious task execution.")
      return
    if shard_state.retries > tstate.retries:
      logging.error(
          "Got shard %s from previous shard retry %s. Drop",
          shard_state.shard_id,
          tstate.retries)
      return
    elif shard_state.retries < tstate.retries:




      raise ValueError(
          "ShardState for %s is behind slice. Waiting for it to catch up",
          shard_state.shard_id)

    ctx = context.Context(spec, shard_state,
                          task_retry_count=self.task_retry_count())

    if control and control.command == model.MapreduceControl.ABORT:
      logging.info("Abort command received by shard %d of job '%s'",
                   shard_state.shard_number, shard_state.mapreduce_id)


      shard_state.active = False
      shard_state.result_status = model.ShardState.RESULT_ABORTED
      shard_state.put(config=util.create_datastore_write_config(spec))
      model.MapreduceControl.abort(spec.mapreduce_id)
      return

    input_reader = tstate.input_reader

    if spec.mapper.params.get("enable_quota", True):
      quota_consumer = quota.QuotaConsumer(
          quota.QuotaManager(memcache.Client()),
          shard_id,
          _QUOTA_BATCH_SIZE)
    else:
      quota_consumer = None






    if ndb is not None:
      ndb_ctx = ndb.get_context()
      ndb_ctx.set_cache_policy(lambda key: False)
      ndb_ctx.set_memcache_policy(lambda key: False)

    context.Context._set(ctx)
    retry_shard = False

    try:
      self.process_inputs(
          input_reader, shard_state, tstate, quota_consumer, ctx)

      if not shard_state.active:


        if (shard_state.result_status == model.ShardState.RESULT_SUCCESS and
            tstate.output_writer):
          tstate.output_writer.finalize(ctx, shard_state)

    except Exception, e:
      retry_shard = self._retry_logic(e, shard_state, tstate, spec.mapreduce_id)
Esempio n. 31
0
  def _try_acquire_lease(self, shard_state, tstate):
    """Validate datastore and the task payload are consistent.

    If so, attempt to get a lease on this slice's execution.
    See model.ShardState doc on slice_start_time.

    Args:
      shard_state: model.ShardState from datastore.
      tstate: model.TransientShardState from taskqueue paylod.

    Returns:
      True if lease is acquired. None if this task should be retried.
    False if this task should be dropped. Only old tasks
    (comparing to datastore state) will be dropped. Future tasks are
    retried until they naturally become old so that we don't ever stuck MR.
    """

    if not shard_state:
      logging.warning("State not found for shard %s; Possible spurious task "
                      "execution. Dropping this task.",
                      tstate.shard_id)
      return False

    if not shard_state.active:
      logging.warning("Shard %s is not active. Possible spurious task "
                      "execution. Dropping this task.", tstate.shard_id)
      logging.warning(str(shard_state))
      return False


    if shard_state.retries > tstate.retries:
      logging.warning(
          "Got shard %s from previous shard retry %s. Possible spurious "
          "task execution. Dropping this task.",
          tstate.shard_id,
          tstate.retries)
      logging.warning(str(shard_state))
      return False
    elif shard_state.retries < tstate.retries:



      logging.warning(
          "ShardState for %s is behind slice. Waiting for it to catch up",
          shard_state.shard_id)
      return



    if shard_state.slice_id > tstate.slice_id:
      logging.warning(
          "Task %s-%s is behind ShardState %s. Dropping task.""",
          tstate.shard_id, tstate.slice_id, shard_state.slice_id)
      return False



    elif shard_state.slice_id < tstate.slice_id:
      logging.warning(
          "Task %s-%s is ahead of ShardState %s. Waiting for it to catch up.",
          tstate.shard_id, tstate.slice_id, shard_state.slice_id)
      return



    if shard_state.slice_start_time:
      countdown = self._wait_time(shard_state,
                                  _LEASE_GRACE_PERIOD + _SLICE_DURATION_SEC)
      if countdown > 0:
        logging.warning(
            "Last retry of slice %s-%s may be still running."
            "Will try again in %s seconds", tstate.shard_id, tstate.slice_id,
            countdown)



        time.sleep(countdown)
        return

      else:
        if (not self._old_request_ended(shard_state) and
            self._wait_time(shard_state, _REQUEST_EVENTUAL_TIMEOUT)):
          logging.warning(
              "Last retry of slice %s-%s is still in flight with request_id "
              "%s. Will try again later.", tstate.shard_id, tstate.slice_id,
              shard_state.slice_request_id)
          return


    config = util.create_datastore_write_config(tstate.mapreduce_spec)
    @db.transactional(retries=5)
    def _tx():
      """Use datastore to set slice_start_time to now.

      If failed for any reason, raise error to retry the task (hence all
      the previous validation code). The task would die naturally eventually.

      Returns:
        True if state commit succeeded. None otherwise.
      """
      fresh_state = model.ShardState.get_by_shard_id(tstate.shard_id)
      if not fresh_state:
        logging.error("ShardState missing.")
        raise db.Rollback()
      if (fresh_state.active and
          fresh_state.slice_id == shard_state.slice_id and
          fresh_state.slice_start_time == shard_state.slice_start_time):
        fresh_state.slice_start_time = datetime.datetime.now()
        fresh_state.slice_request_id = os.environ.get("REQUEST_LOG_ID")
        fresh_state.put(config=config)
        return True
      else:
        logging.warning(
            "Contention on slice %s-%s execution. Will retry again.",
            tstate.shard_id, tstate.slice_id)

        time.sleep(random.randrange(1, 5))
        return

    return _tx()
Esempio n. 32
0
            self.process_inputs(input_reader, shard_state, tstate, ctx)

            if not shard_state.active:

                if (shard_state.result_status
                        == model.ShardState.RESULT_SUCCESS
                        and tstate.output_writer):
                    tstate.output_writer.finalize(ctx, shard_state)

        except Exception, e:
            retry_shard = self._retry_logic(e, shard_state, tstate,
                                            spec.mapreduce_id)
        finally:
            context.Context._set(None)

        config = util.create_datastore_write_config(spec)

        @db.transactional(retries=5)
        def tx():
            fresh_shard_state = db.get(
                model.ShardState.get_key_by_shard_id(shard_id))
            if not fresh_shard_state:
                raise db.Rollback()
            if (not fresh_shard_state.active or "worker_active_state_collision"
                    in _TEST_INJECTED_FAULTS):
                logging.error(
                    "Shard %s is not active. Possible spurious task "
                    "execution. Dropping this task.", shard_id)
                logging.error("Datastore's %s", str(fresh_shard_state))
                logging.error("Slice's %s", str(shard_state))
                return
Esempio n. 33
0
    def _start_map(cls,
                   name,
                   mapper_spec,
                   mapreduce_params,
                   base_path=None,
                   queue_name="default",
                   eta=None,
                   countdown=None,
                   hooks_class_name=None,
                   _app=None,
                   transactional=False):

        mapper_spec.get_handler()

        mapper_input_reader_class = mapper_spec.input_reader_class()
        mapper_input_reader_class.validate(mapper_spec)

        mapper_output_writer_class = mapper_spec.output_writer_class()
        if mapper_output_writer_class:
            mapper_output_writer_class.validate(mapper_spec)

        mapreduce_id = model.MapreduceState.new_mapreduce_id()
        mapreduce_spec = model.MapreduceSpec(name, mapreduce_id,
                                             mapper_spec.to_json(),
                                             mapreduce_params,
                                             hooks_class_name)

        kickoff_params = {"mapreduce_spec": mapreduce_spec.to_json_str()}
        if _app:
            kickoff_params["app"] = _app
        kickoff_worker_task = util.HugeTask(url=base_path +
                                            "/kickoffjob_callback",
                                            params=kickoff_params,
                                            eta=eta,
                                            countdown=countdown)

        hooks = mapreduce_spec.get_hooks()
        config = util.create_datastore_write_config(mapreduce_spec)

        def start_mapreduce():
            parent = None
            if not transactional:

                state = model.MapreduceState.create_new(
                    mapreduce_spec.mapreduce_id)
                state.mapreduce_spec = mapreduce_spec
                state.active = True
                state.active_shards = mapper_spec.shard_count
                if _app:
                    state.app_id = _app
                state.put(config=config)
                parent = state

            if hooks is not None:
                try:
                    hooks.enqueue_kickoff_task(kickoff_worker_task, queue_name)
                except NotImplementedError:

                    pass
                else:
                    return
            kickoff_worker_task.add(queue_name,
                                    transactional=True,
                                    parent=parent)

        if transactional:
            start_mapreduce()
        else:
            db.run_in_transaction(start_mapreduce)

        return mapreduce_id
Esempio n. 34
0
  def _save_state_and_schedule_next(self, shard_state, tstate, retry_shard):
    """Save state to datastore and schedule next task for this shard.

    Update and save shard state. Schedule next slice if needed.
    This method handles interactions with datastore and taskqueue.

    Args:
      shard_state: model.ShardState for current shard.
      tstate: model.TransientShardState for current shard.
      retry_shard: whether to retry shard.
    """

    spec = tstate.mapreduce_spec
    config = util.create_datastore_write_config(spec)


    task = None
    if retry_shard:


      task = self._state_to_task(tstate, shard_state)
    elif shard_state.active:
      shard_state.advance_for_next_slice()
      tstate.advance_for_next_slice()
      countdown = self._get_countdown_for_next_slice(spec)
      task = self._state_to_task(tstate, shard_state, countdown=countdown)
    queue_name = os.environ.get("HTTP_X_APPENGINE_QUEUENAME", "default")

    @db.transactional(retries=5)
    def _tx():
      fresh_shard_state = model.ShardState.get_by_shard_id(tstate.shard_id)
      if not fresh_shard_state:
        raise db.Rollback()
      if (not fresh_shard_state.active or
          "worker_active_state_collision" in _TEST_INJECTED_FAULTS):
        logging.error("Shard %s is not active. Possible spurious task "
                      "execution. Dropping this task.", tstate.shard_id)
        logging.error("Datastore's %s", str(fresh_shard_state))
        logging.error("Slice's %s", str(shard_state))
        return
      fresh_shard_state.copy_from(shard_state)
      fresh_shard_state.put(config=config)




      if fresh_shard_state.active:
        assert task is not None


        self._add_task(task, spec, queue_name)

    try:
      _tx()
    except (datastore_errors.Error,
            taskqueue.Error,
            runtime.DeadlineExceededError,
            apiproxy_errors.Error), e:
      logging.error(
          "Can't transactionally continue shard. "
          "Will retry slice %s %s for the %s time.",
          tstate.shard_id,
          tstate.slice_id,
          self.task_retry_count() + 1)
      shard_state.slice_id -= 1
      self._try_free_lease(shard_state)
      raise e
Esempio n. 35
0
  def handle(self):
    """Handle request."""
    spec = model.MapreduceSpec.from_json_str(
        self.request.get("mapreduce_spec"))


    logging.debug("post: id=%s headers=%s spec=%s",
                  spec.mapreduce_id, self.request.headers,
                  self.request.get("mapreduce_spec"))

    state, control = db.get([
        model.MapreduceState.get_key_by_job_id(spec.mapreduce_id),
        model.MapreduceControl.get_key_by_job_id(spec.mapreduce_id),
    ])
    if not state:
      logging.error("State not found for mapreduce_id '%s'; skipping",
                    spec.mapreduce_id)
      return

    shard_states = model.ShardState.find_by_mapreduce_state(state)
    if state.active and len(shard_states) != spec.mapper.shard_count:

      logging.error("Incorrect number of shard states: %d vs %d; "
                    "aborting job '%s'",
                    len(shard_states), spec.mapper.shard_count,
                    spec.mapreduce_id)
      state.active = False
      state.result_status = model.MapreduceState.RESULT_FAILED
      model.MapreduceControl.abort(spec.mapreduce_id)

    active_shards = [s for s in shard_states if s.active]
    failed_shards = [s for s in shard_states
                     if s.result_status == model.ShardState.RESULT_FAILED]
    aborted_shards = [s for s in shard_states
                     if s.result_status == model.ShardState.RESULT_ABORTED]
    if state.active:
      state.active = bool(active_shards)
      state.active_shards = len(active_shards)
      state.failed_shards = len(failed_shards)
      state.aborted_shards = len(aborted_shards)

    if (not state.active and control and
        control.command == model.MapreduceControl.ABORT):

      logging.info("Abort signal received for job '%s'", spec.mapreduce_id)
      state.result_status = model.MapreduceState.RESULT_ABORTED

    if not state.active:
      state.active_shards = 0
      if not state.result_status:

        if [s for s in shard_states
            if s.result_status != model.ShardState.RESULT_SUCCESS]:
          state.result_status = model.MapreduceState.RESULT_FAILED
        else:
          state.result_status = model.MapreduceState.RESULT_SUCCESS
        logging.info("Final result for job '%s' is '%s'",
                     spec.mapreduce_id, state.result_status)



    self.aggregate_state(state, shard_states)
    poll_time = state.last_poll_time
    state.last_poll_time = datetime.datetime.utcfromtimestamp(self._time())

    if not state.active:
      ControllerCallbackHandler._finalize_job(
          spec, state, self.base_path())
      return
    else:
      config = util.create_datastore_write_config(spec)
      state.put(config=config)

    processing_rate = int(spec.mapper.params.get(
        "processing_rate") or model._DEFAULT_PROCESSING_RATE_PER_SEC)
    self.refill_quotas(poll_time, processing_rate, active_shards)
    ControllerCallbackHandler.reschedule(
        state, self.base_path(), spec, self.serial_id() + 1)
Esempio n. 36
0
    def handle(self):
        """Handle request."""
        tstate = model.TransientShardState.from_request(self.request)
        spec = tstate.mapreduce_spec
        self._start_time = self._time()
        shard_id = tstate.shard_id

        shard_state, control = db.get([
            model.ShardState.get_key_by_shard_id(shard_id),
            model.MapreduceControl.get_key_by_job_id(spec.mapreduce_id),
        ])
        if not shard_state:

            logging.error("State not found for shard ID %r; shutting down",
                          shard_id)
            return

        if not shard_state.active:
            logging.error(
                "Shard is not active. Looks like spurious task execution.")
            return

        ctx = context.Context(spec,
                              shard_state,
                              task_retry_count=self.task_retry_count())

        if control and control.command == model.MapreduceControl.ABORT:
            logging.info("Abort command received by shard %d of job '%s'",
                         shard_state.shard_number, shard_state.mapreduce_id)
            if tstate.output_writer:
                tstate.output_writer.finalize(ctx, shard_state.shard_number)

            shard_state.active = False
            shard_state.result_status = model.ShardState.RESULT_ABORTED
            shard_state.put(config=util.create_datastore_write_config(spec))
            model.MapreduceControl.abort(spec.mapreduce_id)
            return

        input_reader = tstate.input_reader

        if spec.mapper.params.get("enable_quota", True):
            quota_consumer = quota.QuotaConsumer(
                quota.QuotaManager(memcache.Client()), shard_id,
                _QUOTA_BATCH_SIZE)
        else:
            quota_consumer = None

        context.Context._set(ctx)
        try:

            if not quota_consumer or quota_consumer.check():
                scan_aborted = False
                entity = None

                try:

                    if not quota_consumer or quota_consumer.consume():
                        for entity in input_reader:
                            if isinstance(entity, db.Model):
                                shard_state.last_work_item = repr(entity.key())
                            else:
                                shard_state.last_work_item = repr(entity)[:100]

                            scan_aborted = not self.process_data(
                                entity, input_reader, ctx, tstate)

                            if (quota_consumer and not scan_aborted
                                    and not quota_consumer.consume()):
                                scan_aborted = True
                            if scan_aborted:
                                break
                    else:
                        scan_aborted = True

                    if not scan_aborted:
                        logging.info(
                            "Processing done for shard %d of job '%s'",
                            shard_state.shard_number, shard_state.mapreduce_id)

                        if quota_consumer:
                            quota_consumer.put(1)
                        shard_state.active = False
                        shard_state.result_status = model.ShardState.RESULT_SUCCESS

                    operation.counters.Increment(
                        context.COUNTER_MAPPER_WALLTIME_MS,
                        int((time.time() - self._start_time) * 1000))(ctx)

                    ctx.flush()
                except errors.FailJobError, e:
                    logging.error("Job failed: %s", e)
                    scan_aborted = True
                    shard_state.active = False
                    shard_state.result_status = model.ShardState.RESULT_FAILED

            if not shard_state.active:

                if tstate.output_writer:
                    tstate.output_writer.finalize(ctx,
                                                  shard_state.shard_number)

            config = util.create_datastore_write_config(spec)

            @db.transactional(retries=5)
            def tx():
                fresh_shard_state = db.get(
                    model.ShardState.get_key_by_shard_id(shard_id))
                if (not fresh_shard_state.active
                        or "worker_active_state_collision"
                        in _TEST_INJECTED_FAULTS):
                    shard_state.active = False
                    logging.error(
                        "Spurious task execution. Aborting the shard.")
                    return
                fresh_shard_state.copy_from(shard_state)
                fresh_shard_state.put(config=config)

            tx()
Esempio n. 37
0
  def _start_map(cls,
                 name,
                 mapper_spec,
                 mapreduce_params,
                 base_path=None,
                 queue_name=None,
                 eta=None,
                 countdown=None,
                 hooks_class_name=None,
                 _app=None,
                 transactional=False,
                 parent_entity=None):
    queue_name = queue_name or os.environ.get("HTTP_X_APPENGINE_QUEUENAME",
                                              "default")
    if queue_name[0] == "_":

      queue_name = "default"

    if not transactional and parent_entity:
      raise Exception("Parent shouldn't be specfied "
                      "for non-transactional starts.")


    mapper_input_reader_class = mapper_spec.input_reader_class()
    mapper_input_reader_class.validate(mapper_spec)

    mapper_output_writer_class = mapper_spec.output_writer_class()
    if mapper_output_writer_class:
      mapper_output_writer_class.validate(mapper_spec)

    mapreduce_id = model.MapreduceState.new_mapreduce_id()
    mapreduce_spec = model.MapreduceSpec(
        name,
        mapreduce_id,
        mapper_spec.to_json(),
        mapreduce_params,
        hooks_class_name)


    ctx = context.Context(mapreduce_spec, None)
    context.Context._set(ctx)
    try:
      mapper_spec.get_handler()
    finally:
      context.Context._set(None)

    kickoff_params = {"mapreduce_spec": mapreduce_spec.to_json_str()}
    if _app:
      kickoff_params["app"] = _app
    kickoff_worker_task = util.HugeTask(
        url=base_path + "/kickoffjob_callback",
        params=kickoff_params,
        eta=eta,
        countdown=countdown)

    hooks = mapreduce_spec.get_hooks()
    config = util.create_datastore_write_config(mapreduce_spec)

    def start_mapreduce():
      parent = parent_entity
      if not transactional:



        state = model.MapreduceState.create_new(mapreduce_spec.mapreduce_id)
        state.mapreduce_spec = mapreduce_spec
        state.active = True
        state.active_shards = mapper_spec.shard_count
        if _app:
          state.app_id = _app
        state.put(config=config)
        parent = state

      if hooks is not None:
        try:
          hooks.enqueue_kickoff_task(kickoff_worker_task, queue_name)
        except NotImplementedError:

          pass
        else:
          return
      kickoff_worker_task.add(queue_name, transactional=True, parent=parent)

    if transactional:
      start_mapreduce()
    else:
      db.run_in_transaction(start_mapreduce)

    return mapreduce_id
Esempio n. 38
0
  def _try_acquire_lease(self, shard_state, tstate):
    """Validate datastore and the task payload are consistent.

    If so, attempt to get a lease on this slice's execution.
    See model.ShardState doc on slice_start_time.

    Args:
      shard_state: model.ShardState from datastore.
      tstate: model.TransientShardState from taskqueue paylod.

    Returns:
      True if lease is acquired. False if this task should be dropped. Only
    old tasks (comparing to datastore state) will be dropped. Future tasks
    are retried until they naturally become old so that we don't ever stuck
    MR.

    Raises:
      Exception: if the task should be retried by taskqueue.
    """

    if not shard_state:
      logging.warning("State not found for shard %s; Possible spurious task "
                      "execution. Dropping this task.",
                      tstate.shard_id)
      return False

    if not shard_state.active:
      logging.warning("Shard %s is not active. Possible spurious task "
                      "execution. Dropping this task.", tstate.shard_id)
      logging.warning(str(shard_state))
      return False


    if shard_state.retries > tstate.retries:
      logging.warning(
          "Got shard %s from previous shard retry %s. Possible spurious "
          "task execution. Dropping this task.",
          tstate.shard_id,
          tstate.retries)
      logging.warning(str(shard_state))
      return False
    elif shard_state.retries < tstate.retries:



      raise ValueError(
          "ShardState for %s is behind slice. Waiting for it to catch up",
          shard_state.shard_id)



    if shard_state.slice_id > tstate.slice_id:
      logging.warning(
          "Task %s-%s is behind ShardState %s. Dropping task.""",
          tstate.shard_id, tstate.slice_id, shard_state.slice_id)
      return False



    elif shard_state.slice_id < tstate.slice_id:
      logging.warning(
          "Task %s-%s is ahead of ShardState %s. Waiting for it to catch up.",
          tstate.shard_id, tstate.slice_id, shard_state.slice_id)
      raise errors.RetrySliceError("Raise an error to trigger retry.")



    if shard_state.slice_start_time:
      countdown = self._lease_countdown(shard_state)
      if countdown > 0:
        logging.warning(
            "Last retry of slice %s-%s may be still running."
            "Will try again in %s seconds", tstate.shard_id, tstate.slice_id,
            countdown)



        time.sleep(countdown)
        raise errors.RetrySliceError("Raise an error to trigger retry")

      else:
        if not self._old_request_ended(shard_state):
          logging.warning(
              "Last retry of slice %s-%s is still in flight with request_id "
              "%s. Will try again later.", tstate.shard_id, tstate.slice_id,
              shard_state.slice_request_id)
          raise errors.RetrySliceError("Raise an error to trigger retry")


    config = util.create_datastore_write_config(tstate.mapreduce_spec)
    @db.transactional(retries=5)
    def _tx():
      """Use datastore to set slice_start_time to now.

      If failed for any reason, raise error to retry the task (hence all
      the previous validation code). The task would die naturally eventually.
      """
      fresh_state = model.ShardState.get_by_shard_id(tstate.shard_id)
      if not fresh_state:
        logging.error("ShardState missing.")
        raise db.Rollback()
      if (fresh_state.active and
          fresh_state.slice_id == shard_state.slice_id and
          fresh_state.slice_start_time == shard_state.slice_start_time):
        fresh_state.slice_start_time = datetime.datetime.now()
        fresh_state.slice_request_id = os.environ.get("REQUEST_LOG_ID")
        fresh_state.put(config=config)
      else:
        logging.warning(
            "Contention on slice %s-%s execution. Will retry again.",
            tstate.shard_id, tstate.slice_id)

        time.sleep(random.randrange(1, 5))

        raise errors.RetrySliceError()

    _tx()
    return True
Esempio n. 39
0
  def handle(self):
    """Handle request."""
    spec = model.MapreduceSpec.from_json_str(
        self.request.get("mapreduce_spec"))


    logging.debug("post: id=%s headers=%s spec=%s",
                  spec.mapreduce_id, self.request.headers,
                  self.request.get("mapreduce_spec"))

    state, control = db.get([
        model.MapreduceState.get_key_by_job_id(spec.mapreduce_id),
        model.MapreduceControl.get_key_by_job_id(spec.mapreduce_id),
    ])
    if not state:
      logging.error("State not found for mapreduce_id '%s'; skipping",
                    spec.mapreduce_id)
      return

    shard_states = model.ShardState.find_by_mapreduce_id(spec.mapreduce_id)
    if state.active and len(shard_states) != spec.mapper.shard_count:

      logging.error("Incorrect number of shard states: %d vs %d; "
                    "aborting job '%s'",
                    len(shard_states), spec.mapper.shard_count,
                    spec.mapreduce_id)
      state.active = False
      state.result_status = model.MapreduceState.RESULT_FAILED
      model.MapreduceControl.abort(spec.mapreduce_id)

    active_shards = [s for s in shard_states if s.active]
    failed_shards = [s for s in shard_states
                     if s.result_status == model.ShardState.RESULT_FAILED]
    aborted_shards = [s for s in shard_states
                     if s.result_status == model.ShardState.RESULT_ABORTED]
    if state.active:
      state.active = bool(active_shards)
      state.active_shards = len(active_shards)
      state.failed_shards = len(failed_shards)
      state.aborted_shards = len(aborted_shards)

    if (not state.active and control and
        control.command == model.MapreduceControl.ABORT):

      logging.info("Abort signal received for job '%s'", spec.mapreduce_id)
      state.result_status = model.MapreduceState.RESULT_ABORTED

    if not state.active:
      state.active_shards = 0
      if not state.result_status:

        if [s for s in shard_states
            if s.result_status != model.ShardState.RESULT_SUCCESS]:
          state.result_status = model.MapreduceState.RESULT_FAILED
        else:
          state.result_status = model.MapreduceState.RESULT_SUCCESS
        logging.info("Final result for job '%s' is '%s'",
                     spec.mapreduce_id, state.result_status)



    self.aggregate_state(state, shard_states)
    poll_time = state.last_poll_time
    state.last_poll_time = datetime.datetime.utcfromtimestamp(self._time())

    config = util.create_datastore_write_config(spec)

    if not state.active:


      if spec.mapper.output_writer_class():
        spec.mapper.output_writer_class().finalize_job(state)
      def put_state(state):
        state.put(config=config)
        done_callback = spec.params.get(
            model.MapreduceSpec.PARAM_DONE_CALLBACK)
        if done_callback:
          done_task = taskqueue.Task(
              url=done_callback,
              headers={"Mapreduce-Id": spec.mapreduce_id},
              method=spec.params.get("done_callback_method", "POST"))
          queue_name = spec.params.get(
              model.MapreduceSpec.PARAM_DONE_CALLBACK_QUEUE,
              "default")

          if not _run_task_hook(spec.get_hooks(),
                                "enqueue_done_task",
                                done_task,
                                queue_name):
            done_task.add(queue_name, transactional=True)
        FinalizeJobHandler.schedule(self.base_path(), spec)

      db.run_in_transaction(put_state, state)
      return
    else:
      state.put(config=config)

    processing_rate = int(spec.mapper.params.get(
        "processing_rate") or model._DEFAULT_PROCESSING_RATE_PER_SEC)
    self.refill_quotas(poll_time, processing_rate, active_shards)
    ControllerCallbackHandler.reschedule(
        state, self.base_path(), spec, self.serial_id() + 1)
Esempio n. 40
0
  def _try_acquire_lease(self, shard_state, tstate):
    """Validate datastore and the task payload are consistent.

    If so, attempt to get a lease on this slice's execution.
    See model.ShardState doc on slice_start_time.

    Args:
      shard_state: model.ShardState from datastore.
      tstate: model.TransientShardState from taskqueue paylod.

    Returns:
      A fresh shard state entity if lease is acquired. A _TASK_STATE
    enum if this task should be retried or dropped. Only old tasks
    (comparing to datastore state) will be dropped. Future tasks are
    retried until they naturally become old so that we don't ever stuck MR.
    """

    if not shard_state:
      logging.warning("State not found for shard %s; Possible spurious task "
                      "execution. Dropping this task.",
                      tstate.shard_id)
      return self._TASK_STATE.DROP_TASK

    if not shard_state.active:
      logging.warning("Shard %s is not active. Possible spurious task "
                      "execution. Dropping this task.", tstate.shard_id)
      logging.warning(str(shard_state))
      return self._TASK_STATE.DROP_TASK


    if shard_state.retries > tstate.retries:
      logging.warning(
          "Got shard %s from previous shard retry %s. Possible spurious "
          "task execution. Dropping this task.",
          tstate.shard_id,
          tstate.retries)
      logging.warning(str(shard_state))
      return self._TASK_STATE.DROP_TASK
    elif shard_state.retries < tstate.retries:



      logging.warning(
          "ShardState for %s is behind slice. Waiting for it to catch up",
          shard_state.shard_id)
      return self._TASK_STATE.RETRY_TASK



    if shard_state.slice_id > tstate.slice_id:
      logging.warning(
          "Task %s-%s is behind ShardState %s. Dropping task.""",
          tstate.shard_id, tstate.slice_id, shard_state.slice_id)
      return self._TASK_STATE.DROP_TASK



    elif shard_state.slice_id < tstate.slice_id:
      logging.warning(
          "Task %s-%s is ahead of ShardState %s. Waiting for it to catch up.",
          tstate.shard_id, tstate.slice_id, shard_state.slice_id)
      return self._TASK_STATE.RETRY_TASK



    if shard_state.slice_start_time:
      countdown = self._wait_time(shard_state,
                                  _LEASE_GRACE_PERIOD + _SLICE_DURATION_SEC)
      if countdown > 0:
        logging.warning(
            "Last retry of slice %s-%s may be still running."
            "Will try again in %s seconds", tstate.shard_id, tstate.slice_id,
            countdown)



        time.sleep(countdown)
        return self._TASK_STATE.RETRY_TASK

      else:
        if self._wait_time(shard_state, _REQUEST_EVENTUAL_TIMEOUT):
          if not self._old_request_ended(shard_state):
            logging.warning(
                "Last retry of slice %s-%s is still in flight with request_id "
                "%s. Will try again later.", tstate.shard_id, tstate.slice_id,
                shard_state.slice_request_id)
            return self._TASK_STATE.RETRY_TASK
        else:
          logging.warning(
              "Last retry of slice %s-%s has no log entry and has"
              "timed out after %s seconds",
              tstate.shard_id, tstate.slice_id, _REQUEST_EVENTUAL_TIMEOUT)


    config = util.create_datastore_write_config(tstate.mapreduce_spec)
    @db.transactional(retries=5)
    def _tx():
      """Use datastore to set slice_start_time to now.

      If failed for any reason, raise error to retry the task (hence all
      the previous validation code). The task would die naturally eventually.

      Returns:
        Fresh shard state if state commit succeeded. None otherwise.
      """
      fresh_state = model.ShardState.get_by_shard_id(tstate.shard_id)
      if not fresh_state:
        logging.error("ShardState missing.")
        raise db.Rollback()
      if (fresh_state.active and
          fresh_state.slice_id == shard_state.slice_id and
          fresh_state.slice_start_time == shard_state.slice_start_time):
        fresh_state.slice_start_time = datetime.datetime.now()
        fresh_state.slice_request_id = os.environ.get("REQUEST_LOG_ID")
        fresh_state.acquired_once = True
        fresh_state.put(config=config)
        return fresh_state
      else:
        logging.warning(
            "Contention on slice %s-%s execution. Will retry again.",
            tstate.shard_id, tstate.slice_id)

        time.sleep(random.randrange(1, 5))
        return self._TASK_STATE.RETRY_TASK

    return _tx()