def _start_map(cls,
                   name,
                   mapper_spec,
                   mapreduce_params,
                   base_path="/mapreduce",
                   queue_name="default",
                   eta=None,
                   countdown=None,
                   hooks_class_name=None,
                   _app=None,
                   transactional=False):
        mapper_spec.get_handler()

        mapper_input_reader_class = mapper_spec.input_reader_class()
        mapper_input_reader_class.validate(mapper_spec)

        mapreduce_id = model.MapreduceState.new_mapreduce_id()
        mapreduce_spec = model.MapreduceSpec(name, mapreduce_id,
                                             mapper_spec.to_json(),
                                             mapreduce_params,
                                             hooks_class_name)

        kickoff_params = {"mapreduce_spec": mapreduce_spec.to_json_str()}
        if _app:
            kickoff_params["app"] = _app
        kickoff_worker_task = taskqueue.Task(url=base_path +
                                             "/kickoffjob_callback",
                                             params=kickoff_params,
                                             eta=eta,
                                             countdown=countdown)

        hooks = mapreduce_spec.get_hooks()
        config = util.create_datastore_write_config(mapreduce_spec)

        def start_mapreduce():
            if not transactional:
                state = model.MapreduceState.create_new(
                    mapreduce_spec.mapreduce_id)
                state.mapreduce_spec = mapreduce_spec
                state.active = True
                state.active_shards = mapper_spec.shard_count
                if _app:
                    state.app_id = _app
                state.put(config=config)

            if hooks is not None:
                try:
                    hooks.enqueue_kickoff_task(kickoff_worker_task, queue_name)
                except NotImplementedError:
                    pass
                else:
                    return
            kickoff_worker_task.add(queue_name, transactional=True)

        if transactional:
            start_mapreduce()
        else:
            db.run_in_transaction(start_mapreduce)

        return mapreduce_id
Exemple #2
0
  def _start_map(cls,
                 name,
                 mapper_spec,
                 mapreduce_params,
                 base_path=None,
                 queue_name=None,
                 eta=None,
                 countdown=None,
                 hooks_class_name=None,
                 _app=None,
                 transactional=False,
                 parent_entity=None):
    """See control.start_map."""
    if not transactional and parent_entity:
      raise Exception("Parent shouldn't be specfied "
                      "for non-transactional starts.")


    mapper_input_reader_class = mapper_spec.input_reader_class()
    mapper_input_reader_class.validate(mapper_spec)

    mapper_output_writer_class = mapper_spec.output_writer_class()
    if mapper_output_writer_class:
      mapper_output_writer_class.validate(mapper_spec)

    mapreduce_id = model.MapreduceState.new_mapreduce_id()
    mapreduce_spec = model.MapreduceSpec(
        name,
        mapreduce_id,
        mapper_spec.to_json(),
        mapreduce_params,
        hooks_class_name)


    ctx = context.Context(mapreduce_spec, None)
    context.Context._set(ctx)
    try:

      mapper_spec.handler
    finally:
      context.Context._set(None)

    if not transactional:

      state = model.MapreduceState.create_new(mapreduce_spec.mapreduce_id)
      state.mapreduce_spec = mapreduce_spec
      state.active = True
      state.active_shards = mapper_spec.shard_count
      if _app:
        state.app_id = _app
      config = util.create_datastore_write_config(mapreduce_spec)
      state.put(config=config)
      parent_entity = state

    cls._add_kickoff_task(
        base_path, mapreduce_spec, eta, countdown, parent_entity,
        queue_name, transactional, _app)

    return mapreduce_id
Exemple #3
0
    def _start_map(cls,
                   name,
                   mapper_spec,
                   mapreduce_params,
                   base_path="/mapreduce",
                   queue_name="default",
                   eta=None,
                   countdown=None,
                   hooks_class_name=None,
                   _app=None):
        mapper_spec.get_handler()

        mapper_input_reader_class = mapper_spec.input_reader_class()
        mapper_input_reader_class.validate(mapper_spec)

        state = model.MapreduceState.create_new()
        mapreduce_spec = model.MapreduceSpec(name,
                                             state.key().id_or_name(),
                                             mapper_spec.to_json(),
                                             mapreduce_params,
                                             hooks_class_name)
        state.mapreduce_spec = mapreduce_spec
        state.active = True
        state.active_shards = mapper_spec.shard_count
        if _app:
            state.app_id = _app

        state.char_url = ""
        state.sparkline_url = ""

        def schedule_mapreduce(state, eta, countdown):
            state.put()
            kickoff_worker_task = taskqueue.Task(
                url=base_path + "/kickoffjob_callback",
                params={"mapreduce_spec": state.mapreduce_spec.to_json_str()},
                eta=eta,
                countdown=countdown)

            hooks = mapreduce_spec.get_hooks()
            if hooks is not None:
                try:
                    hooks.enqueue_kickoff_task(kickoff_worker_task, queue_name)
                except NotImplementedError:
                    pass
                else:
                    return

            kickoff_worker_task.add(queue_name, transactional=True)

        db.run_in_transaction(schedule_mapreduce, state, eta, countdown)

        return state.key().id_or_name()
    def submit(cls, job_config, in_xg_transaction=False):
        """Submit the job to run.

    Args:
      job_config: an instance of map_job.MapJobConfig.
      in_xg_transaction: controls what transaction scope to use to start this MR
        job. If True, there has to be an already opened cross-group transaction
        scope. MR will use one entity group from it.
        If False, MR will create an independent transaction to start the job
        regardless of any existing transaction scopes.

    Returns:
      a Job instance representing the submitted job.
    """
        cls.__validate_job_config(job_config)
        mapper_spec = job_config._get_mapper_spec()

        mapreduce_params = job_config._get_mr_params()
        mapreduce_spec = model.MapreduceSpec(
            job_config.job_name, job_config.job_id, mapper_spec.to_json(),
            mapreduce_params, util._obj_to_path(job_config._hooks_cls))

        if in_xg_transaction:
            propagation = db.MANDATORY
        else:
            propagation = db.INDEPENDENT

        state = None

        @db.transactional(propagation=propagation)
        def _txn():
            state = cls.__create_and_save_state(job_config, mapreduce_spec)
            cls.__add_kickoff_task(job_config, mapreduce_spec)
            return state

        state = _txn()
        return cls(state)
Exemple #5
0
    def _start_map(cls,
                   name,
                   mapper_spec,
                   mapreduce_params,
                   base_path=None,
                   queue_name=None,
                   eta=None,
                   countdown=None,
                   hooks_class_name=None,
                   _app=None,
                   transactional=False,
                   parent_entity=None):
        queue_name = queue_name or os.environ.get("HTTP_X_APPENGINE_QUEUENAME",
                                                  "default")
        if queue_name[0] == "_":

            queue_name = "default"

        if not transactional and parent_entity:
            raise Exception("Parent shouldn't be specfied "
                            "for non-transactional starts.")

        mapper_spec.get_handler()

        mapper_input_reader_class = mapper_spec.input_reader_class()
        mapper_input_reader_class.validate(mapper_spec)

        mapper_output_writer_class = mapper_spec.output_writer_class()
        if mapper_output_writer_class:
            mapper_output_writer_class.validate(mapper_spec)

        mapreduce_id = model.MapreduceState.new_mapreduce_id()
        mapreduce_spec = model.MapreduceSpec(name, mapreduce_id,
                                             mapper_spec.to_json(),
                                             mapreduce_params,
                                             hooks_class_name)

        kickoff_params = {"mapreduce_spec": mapreduce_spec.to_json_str()}
        if _app:
            kickoff_params["app"] = _app
        kickoff_worker_task = util.HugeTask(url=base_path +
                                            "/kickoffjob_callback",
                                            params=kickoff_params,
                                            eta=eta,
                                            countdown=countdown)

        hooks = mapreduce_spec.get_hooks()
        config = util.create_datastore_write_config(mapreduce_spec)

        def start_mapreduce():
            parent = parent_entity
            if not transactional:

                state = model.MapreduceState.create_new(
                    mapreduce_spec.mapreduce_id)
                state.mapreduce_spec = mapreduce_spec
                state.active = True
                state.active_shards = mapper_spec.shard_count
                if _app:
                    state.app_id = _app
                state.put(config=config)
                parent = state

            if hooks is not None:
                try:
                    hooks.enqueue_kickoff_task(kickoff_worker_task, queue_name)
                except NotImplementedError:

                    pass
                else:
                    return
            kickoff_worker_task.add(queue_name,
                                    transactional=True,
                                    parent=parent)

        if transactional:
            start_mapreduce()
        else:
            db.run_in_transaction(start_mapreduce)

        return mapreduce_id