def _schedule_slice(cls, shard_state, transient_shard_state, queue_name=None, eta=None, countdown=None): """Schedule slice scanning by adding it to the task queue. Args: shard_state: An instance of ShardState. transient_shard_state: An instance of TransientShardState. queue_name: Optional queue to run on; uses the current queue of execution or the default queue if unspecified. eta: Absolute time when the MR should execute. May not be specified if 'countdown' is also supplied. This may be timezone-aware or timezone-naive. countdown: Time in seconds into the future that this MR should execute. Defaults to zero. """ base_path = transient_shard_state.base_path mapreduce_spec = transient_shard_state.mapreduce_spec task_name = MapperWorkerCallbackHandler.get_task_name( transient_shard_state.shard_id, transient_shard_state.slice_id) queue_name = queue_name or os.environ.get("HTTP_X_APPENGINE_QUEUENAME", "default") worker_task = util.HugeTask(url=base_path + "/worker_callback", params=transient_shard_state.to_dict(), name=task_name, eta=eta, countdown=countdown) if not _run_task_hook(mapreduce_spec.get_hooks(), "enqueue_worker_task", worker_task, queue_name): try: worker_task.add(queue_name, parent=shard_state) except (taskqueue.TombstonedTaskError, taskqueue.TaskAlreadyExistsError), e: logging.warning("Task %r with params %r already exists. %s: %s", task_name, transient_shard_state.to_dict(), e.__class__, e)
def reschedule(cls, mapreduce_state, base_path, mapreduce_spec, serial_id, queue_name=None): """Schedule new update status callback task. Args: mapreduce_state: mapreduce state as model.MapreduceState base_path: mapreduce handlers url base path as string. mapreduce_spec: mapreduce specification as MapreduceSpec. serial_id: id of the invocation as int. queue_name: The queue to schedule this task on. Will use the current queue of execution if not supplied. """ task_name = ControllerCallbackHandler.get_task_name( mapreduce_spec, serial_id) task_params = ControllerCallbackHandler.controller_parameters( mapreduce_spec, serial_id) if not queue_name: queue_name = os.environ.get("HTTP_X_APPENGINE_QUEUENAME", "default") controller_callback_task = util.HugeTask( url=base_path + "/controller_callback", name=task_name, params=task_params, countdown=_CONTROLLER_PERIOD_SEC) if not _run_task_hook(mapreduce_spec.get_hooks(), "enqueue_controller_task", controller_callback_task, queue_name): try: controller_callback_task.add(queue_name, parent=mapreduce_state) except (taskqueue.TombstonedTaskError, taskqueue.TaskAlreadyExistsError), e: logging.warning( "Task %r with params %r already exists. %s: %s", task_name, task_params, e.__class__, e)
def _start_map(cls, name, mapper_spec, mapreduce_params, base_path=None, queue_name=None, eta=None, countdown=None, hooks_class_name=None, _app=None, transactional=False, parent_entity=None): queue_name = queue_name or os.environ.get("HTTP_X_APPENGINE_QUEUENAME", "default") if queue_name[0] == "_": # We are currently in some special queue. E.g. __cron. queue_name = "default" if not transactional and parent_entity: raise Exception("Parent shouldn't be specfied " "for non-transactional starts.") # Check that reader can be instantiated and is configured correctly mapper_input_reader_class = mapper_spec.input_reader_class() mapper_input_reader_class.validate(mapper_spec) mapper_output_writer_class = mapper_spec.output_writer_class() if mapper_output_writer_class: mapper_output_writer_class.validate(mapper_spec) mapreduce_id = model.MapreduceState.new_mapreduce_id() mapreduce_spec = model.MapreduceSpec(name, mapreduce_id, mapper_spec.to_json(), mapreduce_params, hooks_class_name) # Check that handler can be instantiated. ctx = context.Context(mapreduce_spec, None) context.Context._set(ctx) try: mapper_spec.get_handler() finally: context.Context._set(None) kickoff_params = {"mapreduce_spec": mapreduce_spec.to_json_str()} if _app: kickoff_params["app"] = _app kickoff_worker_task = util.HugeTask(url=base_path + "/kickoffjob_callback", params=kickoff_params, eta=eta, countdown=countdown) hooks = mapreduce_spec.get_hooks() config = util.create_datastore_write_config(mapreduce_spec) def start_mapreduce(): parent = parent_entity if not transactional: # Save state in datastore so that UI can see it. # We can't save state in foreign transaction, but conventional UI # doesn't ask for transactional starts anyway. state = model.MapreduceState.create_new( mapreduce_spec.mapreduce_id) state.mapreduce_spec = mapreduce_spec state.active = True state.active_shards = mapper_spec.shard_count if _app: state.app_id = _app state.put(config=config) parent = state if hooks is not None: try: hooks.enqueue_kickoff_task(kickoff_worker_task, queue_name) except NotImplementedError: # Use the default task addition implementation. pass else: return kickoff_worker_task.add(queue_name, transactional=True, parent=parent) if transactional: start_mapreduce() else: db.run_in_transaction(start_mapreduce) return mapreduce_id
def _start_map(cls, name, mapper_spec, mapreduce_params, base_path="/mapreduce", queue_name="default", eta=None, countdown=None, hooks_class_name=None, _app=None, transactional=False): # Check that handler can be instantiated. mapper_spec.get_handler() # Check that reader can be instantiated and is configured correctly mapper_input_reader_class = mapper_spec.input_reader_class() mapper_input_reader_class.validate(mapper_spec) mapper_output_writer_class = mapper_spec.output_writer_class() if mapper_output_writer_class: mapper_output_writer_class.validate(mapper_spec) mapreduce_id = model.MapreduceState.new_mapreduce_id() mapreduce_spec = model.MapreduceSpec( name, mapreduce_id, mapper_spec.to_json(), mapreduce_params, hooks_class_name) kickoff_params = {"mapreduce_spec": mapreduce_spec.to_json_str()} if _app: kickoff_params["app"] = _app kickoff_worker_task = util.HugeTask( url=base_path + "/kickoffjob_callback", params=kickoff_params, eta=eta, countdown=countdown) hooks = mapreduce_spec.get_hooks() config = util.create_datastore_write_config(mapreduce_spec) def start_mapreduce(): parent = None if not transactional: # Save state in datastore so that UI can see it. # We can't save state in foreign transaction, but conventional UI # doesn't ask for transactional starts anyway. state = model.MapreduceState.create_new(mapreduce_spec.mapreduce_id) state.mapreduce_spec = mapreduce_spec state.active = True state.active_shards = mapper_spec.shard_count if _app: state.app_id = _app state.put(config=config) parent = state if hooks is not None: try: hooks.enqueue_kickoff_task(kickoff_worker_task, queue_name) except NotImplementedError: # Use the default task addition implementation. pass else: return kickoff_worker_task.add(queue_name, transactional=True, parent=parent) if transactional: start_mapreduce() else: db.run_in_transaction(start_mapreduce) return mapreduce_id