def _start_map(cls, name, mapper_spec, mapreduce_params, base_path="/mapreduce", queue_name="default", eta=None, countdown=None, hooks_class_name=None, _app=None, transactional=False): mapper_spec.get_handler() mapper_input_reader_class = mapper_spec.input_reader_class() mapper_input_reader_class.validate(mapper_spec) mapreduce_id = model.MapreduceState.new_mapreduce_id() mapreduce_spec = model.MapreduceSpec(name, mapreduce_id, mapper_spec.to_json(), mapreduce_params, hooks_class_name) kickoff_params = {"mapreduce_spec": mapreduce_spec.to_json_str()} if _app: kickoff_params["app"] = _app kickoff_worker_task = taskqueue.Task(url=base_path + "/kickoffjob_callback", params=kickoff_params, eta=eta, countdown=countdown) hooks = mapreduce_spec.get_hooks() config = util.create_datastore_write_config(mapreduce_spec) def start_mapreduce(): if not transactional: state = model.MapreduceState.create_new( mapreduce_spec.mapreduce_id) state.mapreduce_spec = mapreduce_spec state.active = True state.active_shards = mapper_spec.shard_count if _app: state.app_id = _app state.put(config=config) if hooks is not None: try: hooks.enqueue_kickoff_task(kickoff_worker_task, queue_name) except NotImplementedError: pass else: return kickoff_worker_task.add(queue_name, transactional=True) if transactional: start_mapreduce() else: db.run_in_transaction(start_mapreduce) return mapreduce_id
def _start_map(cls, name, mapper_spec, mapreduce_params, base_path=None, queue_name=None, eta=None, countdown=None, hooks_class_name=None, _app=None, transactional=False, parent_entity=None): """See control.start_map.""" if not transactional and parent_entity: raise Exception("Parent shouldn't be specfied " "for non-transactional starts.") mapper_input_reader_class = mapper_spec.input_reader_class() mapper_input_reader_class.validate(mapper_spec) mapper_output_writer_class = mapper_spec.output_writer_class() if mapper_output_writer_class: mapper_output_writer_class.validate(mapper_spec) mapreduce_id = model.MapreduceState.new_mapreduce_id() mapreduce_spec = model.MapreduceSpec( name, mapreduce_id, mapper_spec.to_json(), mapreduce_params, hooks_class_name) ctx = context.Context(mapreduce_spec, None) context.Context._set(ctx) try: mapper_spec.handler finally: context.Context._set(None) if not transactional: state = model.MapreduceState.create_new(mapreduce_spec.mapreduce_id) state.mapreduce_spec = mapreduce_spec state.active = True state.active_shards = mapper_spec.shard_count if _app: state.app_id = _app config = util.create_datastore_write_config(mapreduce_spec) state.put(config=config) parent_entity = state cls._add_kickoff_task( base_path, mapreduce_spec, eta, countdown, parent_entity, queue_name, transactional, _app) return mapreduce_id
def _start_map(cls, name, mapper_spec, mapreduce_params, base_path="/mapreduce", queue_name="default", eta=None, countdown=None, hooks_class_name=None, _app=None): mapper_spec.get_handler() mapper_input_reader_class = mapper_spec.input_reader_class() mapper_input_reader_class.validate(mapper_spec) state = model.MapreduceState.create_new() mapreduce_spec = model.MapreduceSpec(name, state.key().id_or_name(), mapper_spec.to_json(), mapreduce_params, hooks_class_name) state.mapreduce_spec = mapreduce_spec state.active = True state.active_shards = mapper_spec.shard_count if _app: state.app_id = _app state.char_url = "" state.sparkline_url = "" def schedule_mapreduce(state, eta, countdown): state.put() kickoff_worker_task = taskqueue.Task( url=base_path + "/kickoffjob_callback", params={"mapreduce_spec": state.mapreduce_spec.to_json_str()}, eta=eta, countdown=countdown) hooks = mapreduce_spec.get_hooks() if hooks is not None: try: hooks.enqueue_kickoff_task(kickoff_worker_task, queue_name) except NotImplementedError: pass else: return kickoff_worker_task.add(queue_name, transactional=True) db.run_in_transaction(schedule_mapreduce, state, eta, countdown) return state.key().id_or_name()
def submit(cls, job_config, in_xg_transaction=False): """Submit the job to run. Args: job_config: an instance of map_job.MapJobConfig. in_xg_transaction: controls what transaction scope to use to start this MR job. If True, there has to be an already opened cross-group transaction scope. MR will use one entity group from it. If False, MR will create an independent transaction to start the job regardless of any existing transaction scopes. Returns: a Job instance representing the submitted job. """ cls.__validate_job_config(job_config) mapper_spec = job_config._get_mapper_spec() mapreduce_params = job_config._get_mr_params() mapreduce_spec = model.MapreduceSpec( job_config.job_name, job_config.job_id, mapper_spec.to_json(), mapreduce_params, util._obj_to_path(job_config._hooks_cls)) if in_xg_transaction: propagation = db.MANDATORY else: propagation = db.INDEPENDENT state = None @db.transactional(propagation=propagation) def _txn(): state = cls.__create_and_save_state(job_config, mapreduce_spec) cls.__add_kickoff_task(job_config, mapreduce_spec) return state state = _txn() return cls(state)
def _start_map(cls, name, mapper_spec, mapreduce_params, base_path=None, queue_name=None, eta=None, countdown=None, hooks_class_name=None, _app=None, transactional=False, parent_entity=None): queue_name = queue_name or os.environ.get("HTTP_X_APPENGINE_QUEUENAME", "default") if queue_name[0] == "_": queue_name = "default" if not transactional and parent_entity: raise Exception("Parent shouldn't be specfied " "for non-transactional starts.") mapper_spec.get_handler() mapper_input_reader_class = mapper_spec.input_reader_class() mapper_input_reader_class.validate(mapper_spec) mapper_output_writer_class = mapper_spec.output_writer_class() if mapper_output_writer_class: mapper_output_writer_class.validate(mapper_spec) mapreduce_id = model.MapreduceState.new_mapreduce_id() mapreduce_spec = model.MapreduceSpec(name, mapreduce_id, mapper_spec.to_json(), mapreduce_params, hooks_class_name) kickoff_params = {"mapreduce_spec": mapreduce_spec.to_json_str()} if _app: kickoff_params["app"] = _app kickoff_worker_task = util.HugeTask(url=base_path + "/kickoffjob_callback", params=kickoff_params, eta=eta, countdown=countdown) hooks = mapreduce_spec.get_hooks() config = util.create_datastore_write_config(mapreduce_spec) def start_mapreduce(): parent = parent_entity if not transactional: state = model.MapreduceState.create_new( mapreduce_spec.mapreduce_id) state.mapreduce_spec = mapreduce_spec state.active = True state.active_shards = mapper_spec.shard_count if _app: state.app_id = _app state.put(config=config) parent = state if hooks is not None: try: hooks.enqueue_kickoff_task(kickoff_worker_task, queue_name) except NotImplementedError: pass else: return kickoff_worker_task.add(queue_name, transactional=True, parent=parent) if transactional: start_mapreduce() else: db.run_in_transaction(start_mapreduce) return mapreduce_id