コード例 #1
0
    def _state_to_task(cls, tstate, eta=None, countdown=None):
        """Generate task for slice according to current states.

    Args:
      tstate: An instance of TransientShardState.
      eta: Absolute time when the MR should execute. May not be specified
        if 'countdown' is also supplied. This may be timezone-aware or
        timezone-naive.
      countdown: Time in seconds into the future that this MR should execute.
        Defaults to zero.

    Returns:
      A util.HugeTask instance for the slice specified by current states.
    """
        base_path = tstate.base_path

        task_name = MapperWorkerCallbackHandler.get_task_name(
            tstate.shard_id, tstate.slice_id, tstate.retries)

        worker_task = util.HugeTask(url=base_path + "/worker_callback",
                                    params=tstate.to_dict(),
                                    name=task_name,
                                    eta=eta,
                                    countdown=countdown)
        return worker_task
コード例 #2
0
  def _schedule_slice(cls,
                      shard_state,
                      transient_shard_state,
                      queue_name=None,
                      eta=None,
                      countdown=None):
    """Schedule slice scanning by adding it to the task queue.

    Args:
      shard_state: An instance of ShardState.
      transient_shard_state: An instance of TransientShardState.
      queue_name: Optional queue to run on; uses the current queue of
        execution or the default queue if unspecified.
      eta: Absolute time when the MR should execute. May not be specified
        if 'countdown' is also supplied. This may be timezone-aware or
        timezone-naive.
      countdown: Time in seconds into the future that this MR should execute.
        Defaults to zero.
    """
    base_path = transient_shard_state.base_path
    mapreduce_spec = transient_shard_state.mapreduce_spec

    task_name = MapperWorkerCallbackHandler.get_task_name(
        transient_shard_state.shard_id,
        transient_shard_state.slice_id)
    queue_name = queue_name or os.environ.get("HTTP_X_APPENGINE_QUEUENAME",
                                              "default")

    worker_task = util.HugeTask(url=base_path + "/worker_callback",
                                params=transient_shard_state.to_dict(),
                                name=task_name,
                                eta=eta,
                                countdown=countdown)

    if not _run_task_hook(mapreduce_spec.get_hooks(),
                          "enqueue_worker_task",
                          worker_task,
                          queue_name):
      try:
        worker_task.add(queue_name, parent=shard_state)
      except (taskqueue.TombstonedTaskError,
              taskqueue.TaskAlreadyExistsError), e:
        logging.warning("Task %r with params %r already exists. %s: %s",
                        task_name,
                        transient_shard_state.to_dict(),
                        e.__class__,
                        e)
コード例 #3
0
    def reschedule(cls,
                   mapreduce_state,
                   base_path,
                   mapreduce_spec,
                   serial_id,
                   queue_name=None):
        """Schedule new update status callback task.

    Args:
      mapreduce_state: mapreduce state as model.MapreduceState
      base_path: mapreduce handlers url base path as string.
      mapreduce_spec: mapreduce specification as MapreduceSpec.
      serial_id: id of the invocation as int.
      queue_name: The queue to schedule this task on. Will use the current
        queue of execution if not supplied.
    """
        task_name = ControllerCallbackHandler.get_task_name(
            mapreduce_spec, serial_id)
        task_params = ControllerCallbackHandler.controller_parameters(
            mapreduce_spec, serial_id)
        if not queue_name:
            queue_name = os.environ.get("HTTP_X_APPENGINE_QUEUENAME",
                                        "default")

        controller_callback_task = util.HugeTask(
            url=base_path + "/controller_callback",
            name=task_name,
            params=task_params,
            countdown=_CONTROLLER_PERIOD_SEC)

        if not _run_task_hook(mapreduce_spec.get_hooks(),
                              "enqueue_controller_task",
                              controller_callback_task, queue_name):
            try:
                controller_callback_task.add(queue_name,
                                             parent=mapreduce_state)
            except (taskqueue.TombstonedTaskError,
                    taskqueue.TaskAlreadyExistsError), e:
                logging.warning(
                    "Task %r with params %r already exists. %s: %s", task_name,
                    task_params, e.__class__, e)
コード例 #4
0
    def _start_map(cls,
                   name,
                   mapper_spec,
                   mapreduce_params,
                   base_path=None,
                   queue_name=None,
                   eta=None,
                   countdown=None,
                   hooks_class_name=None,
                   _app=None,
                   transactional=False):
        queue_name = queue_name or os.environ.get("HTTP_X_APPENGINE_QUEUENAME",
                                                  "default")

        mapper_spec.get_handler()

        mapper_input_reader_class = mapper_spec.input_reader_class()
        mapper_input_reader_class.validate(mapper_spec)

        mapper_output_writer_class = mapper_spec.output_writer_class()
        if mapper_output_writer_class:
            mapper_output_writer_class.validate(mapper_spec)

        mapreduce_id = model.MapreduceState.new_mapreduce_id()
        mapreduce_spec = model.MapreduceSpec(name, mapreduce_id,
                                             mapper_spec.to_json(),
                                             mapreduce_params,
                                             hooks_class_name)

        kickoff_params = {"mapreduce_spec": mapreduce_spec.to_json_str()}
        if _app:
            kickoff_params["app"] = _app
        kickoff_worker_task = util.HugeTask(url=base_path +
                                            "/kickoffjob_callback",
                                            params=kickoff_params,
                                            eta=eta,
                                            countdown=countdown)

        hooks = mapreduce_spec.get_hooks()
        config = util.create_datastore_write_config(mapreduce_spec)

        def start_mapreduce():
            parent = None
            if not transactional:

                state = model.MapreduceState.create_new(
                    mapreduce_spec.mapreduce_id)
                state.mapreduce_spec = mapreduce_spec
                state.active = True
                state.active_shards = mapper_spec.shard_count
                if _app:
                    state.app_id = _app
                state.put(config=config)
                parent = state

            if hooks is not None:
                try:
                    hooks.enqueue_kickoff_task(kickoff_worker_task, queue_name)
                except NotImplementedError:

                    pass
                else:
                    return
            kickoff_worker_task.add(queue_name,
                                    transactional=True,
                                    parent=parent)

        if transactional:
            start_mapreduce()
        else:
            db.run_in_transaction(start_mapreduce)

        return mapreduce_id