def create_mapreduce_state(self, params=None):
     mapreduce_spec = model.MapreduceSpec(
         "mapreduce0", "mapreduce0",
         self.create_mapper_spec(params=params).to_json())
     mapreduce_state = model.MapreduceState.create_new("mapreduce0")
     mapreduce_state.mapreduce_spec = mapreduce_spec
     return mapreduce_state
Ejemplo n.º 2
0
def start(job_config=None, in_xg_transaction=False):
    """Start a new map job.

  Args:
    job_config: an instance of map_job.MapJobConfig.
    in_xg_transaction: controls what transaction scope to use to start this MR
      job. If True, there has to be an already opened cross-group transaction
      scope. MR will use one entity group from it.
      If False, MR will create an independent transaction to start the job
      regardless of any existing transaction scopes.
  """
    # Validate input reader and output writer.
    mapper_spec = job_config._get_mapper_spec()
    job_config.input_reader_cls.validate(mapper_spec)
    if job_config.output_writer_cls:
        job_config.output_writer_cls.validate(mapper_spec)

    # Create mr spec.
    mapreduce_params = job_config._get_mr_params()
    mapreduce_spec = model.MapreduceSpec(
        job_config.job_name, job_config.job_id, mapper_spec.to_json(),
        mapreduce_params, util._obj_to_path(job_config._hooks_cls))

    # Save states and enqueue task.
    if in_xg_transaction:
        propagation = db.MANDATORY
    else:
        propagation = db.INDEPENDENT

    @db.transactional(propagation=propagation)
    def _txn():
        _create_and_save_state(job_config, mapreduce_spec)
        _add_kickoff_task(job_config, mapreduce_spec)

    _txn()
Ejemplo n.º 3
0
 def testGetTaskHeaders(self):
     mr_spec = model.MapreduceSpec(name="foo",
                                   mapreduce_id="foo_id",
                                   mapper_spec=model.MapperSpec(
                                       "foo", "foo", {}, 8).to_json())
     task = taskqueue.Task(url="/relative_url",
                           headers=util._get_task_headers(
                               mr_spec.mapreduce_id))
     self.assertEqual("foo_id", task.headers[util._MR_ID_TASK_HEADER])
     self.assertEqual("v7.foo-module.foo.appspot.com", task.headers["Host"])
     self.assertEqual("v7.foo-module", task.target)
Ejemplo n.º 4
0
    def _start_map(cls,
                   name,
                   mapper_spec,
                   mapreduce_params,
                   base_path="/mapreduce",
                   queue_name="default",
                   eta=None,
                   countdown=None,
                   _app=None):
        # Check that handler can be instantiated.
        mapper_spec.get_handler()

        mapper_input_reader_class = mapper_spec.input_reader_class()
        mapper_input_readers = mapper_input_reader_class.split_input(
            mapper_spec)
        if not mapper_input_readers:
            raise NoDataError("Found no mapper input readers to process.")
        mapper_spec.shard_count = len(mapper_input_readers)

        state = model.MapreduceState.create_new()
        mapreduce_spec = model.MapreduceSpec(name,
                                             state.key().id_or_name(),
                                             mapper_spec.to_json(),
                                             mapreduce_params)
        state.mapreduce_spec = mapreduce_spec
        state.active = True
        state.active_shards = mapper_spec.shard_count
        if _app:
            state.app_id = _app

        # TODO(user): Initialize UI fields correctly.
        state.char_url = ""
        state.sparkline_url = ""

        def schedule_mapreduce(state, mapper_input_readers, eta, countdown):
            state.save()
            readers_json = [
                reader.to_json_str() for reader in mapper_input_readers
            ]
            taskqueue.Task(url=base_path + "/kickoffjob_callback",
                           params={
                               "mapreduce_spec":
                               state.mapreduce_spec.to_json_str(),
                               "input_readers": simplejson.dumps(readers_json)
                           },
                           eta=eta,
                           countdown=countdown).add(queue_name,
                                                    transactional=True)

        # Point of no return: We're actually going to run this job!
        db.run_in_transaction(schedule_mapreduce, state, mapper_input_readers,
                              eta, countdown)

        return state.key().id_or_name()
  def create_mapreduce_state(self, output_params=None):
    """Create a model.MapreduceState including MapreduceSpec and MapperSpec.

    Args:
      output_params: parameters for the output writer.

    Returns:
      a model.MapreduceSpec with default settings and specified output_params.
    """
    mapreduce_spec = model.MapreduceSpec(
        "DummyMapReduceJobName",
        "DummyMapReduceJobId",
        self.create_mapper_spec(output_params=output_params).to_json())
    mapreduce_state = model.MapreduceState.create_new("DummyMapReduceJobId")
    mapreduce_state.mapreduce_spec = mapreduce_spec
    mapreduce_state.put()
    return mapreduce_state
Ejemplo n.º 6
0
  def submit(cls, job_config, in_xg_transaction=False):
    """Submit the job to run.

    Args:
      job_config: an instance of map_job.MapJobConfig.
      in_xg_transaction: controls what transaction scope to use to start this MR
        job. If True, there has to be an already opened cross-group transaction
        scope. MR will use one entity group from it.
        If False, MR will create an independent transaction to start the job
        regardless of any existing transaction scopes.

    Returns:
      a Job instance representing the submitted job.
    """
    cls.__validate_job_config(job_config)
    mapper_spec = job_config._get_mapper_spec()

    # Create mr spec.
    mapreduce_params = job_config._get_mr_params()
    mapreduce_spec = model.MapreduceSpec(
        job_config.job_name,
        job_config.job_id,
        mapper_spec.to_json(),
        mapreduce_params,
        util._obj_to_path(job_config._hooks_cls))

    # Save states and enqueue task.
    if in_xg_transaction:
      propagation = db.MANDATORY
    else:
      propagation = db.INDEPENDENT

    state = None
    @db.transactional(propagation=propagation)
    def _txn():
      state = cls.__create_and_save_state(job_config, mapreduce_spec)
      cls.__add_kickoff_task(job_config, mapreduce_spec)
      return state

    state = _txn()
    return cls(state)
Ejemplo n.º 7
0
    def testFindAllByMapreduceState(self):
        mr_state = model.MapreduceState.create_new("mapreduce-id")
        mr_state.mapreduce_spec = model.MapreduceSpec(
            "mapreduce", "mapreduce-id",
            model.MapperSpec("handler", "input-reader", {},
                             shard_count=304).to_json())
        mr_state.put()
        for i in range(304):
            model.ShardState.create_new("mapreduce-id", i).put()

        @db.transactional(xg=False)
        def non_xg_tx():
            # Open a single non-related entity group to ensure
            # find_all_by_mapreduce_state does not attempt to use outer transaction
            mr_state2 = model.MapreduceState.create_new(
                "unrelated-mapreduce-id")
            mr_state2.put()
            shard_states = model.ShardState.find_all_by_mapreduce_state(
                mr_state)
            for i, ss in enumerate(shard_states):
                self.assertEqual(i, ss.shard_number)

        non_xg_tx()
Ejemplo n.º 8
0
 def testToJson(self):
     """Test to_json method."""
     mapper_spec_dict = {
         "mapper_handler_spec": "TestHandler",
         "mapper_input_reader": "TestInputReader",
         "mapper_params": {
             "entity_kind": "bar"
         },
         "mapper_shard_count": 8
     }
     mapreduce_spec = model.MapreduceSpec(
         "my job", "mr0", mapper_spec_dict, {"extra": "value"},
         __name__ + "." + TestHooks.__name__)
     self.assertEquals(
         {
             "name": "my job",
             "mapreduce_id": "mr0",
             "mapper_spec": mapper_spec_dict,
             "params": {
                 "extra": "value"
             },
             "hooks_class_name": __name__ + "." + TestHooks.__name__,
         }, mapreduce_spec.to_json())
Ejemplo n.º 9
0
    def _start_map(cls,
                   name,
                   mapper_spec,
                   mapreduce_params,
                   base_path=None,
                   queue_name=None,
                   eta=None,
                   countdown=None,
                   hooks_class_name=None,
                   _app=None,
                   transactional=False,
                   parent_entity=None):
        queue_name = queue_name or os.environ.get("HTTP_X_APPENGINE_QUEUENAME",
                                                  "default")
        if queue_name[0] == "_":
            # We are currently in some special queue. E.g. __cron.
            queue_name = "default"

        if not transactional and parent_entity:
            raise Exception("Parent shouldn't be specfied "
                            "for non-transactional starts.")

        # Check that reader can be instantiated and is configured correctly
        mapper_input_reader_class = mapper_spec.input_reader_class()
        mapper_input_reader_class.validate(mapper_spec)

        mapper_output_writer_class = mapper_spec.output_writer_class()
        if mapper_output_writer_class:
            mapper_output_writer_class.validate(mapper_spec)

        mapreduce_id = model.MapreduceState.new_mapreduce_id()
        mapreduce_spec = model.MapreduceSpec(name, mapreduce_id,
                                             mapper_spec.to_json(),
                                             mapreduce_params,
                                             hooks_class_name)

        # Check that handler can be instantiated.
        ctx = context.Context(mapreduce_spec, None)
        context.Context._set(ctx)
        try:
            mapper_spec.get_handler()
        finally:
            context.Context._set(None)

        kickoff_params = {"mapreduce_spec": mapreduce_spec.to_json_str()}
        if _app:
            kickoff_params["app"] = _app
        kickoff_worker_task = util.HugeTask(url=base_path +
                                            "/kickoffjob_callback",
                                            params=kickoff_params,
                                            eta=eta,
                                            countdown=countdown)

        hooks = mapreduce_spec.get_hooks()
        config = util.create_datastore_write_config(mapreduce_spec)

        def start_mapreduce():
            parent = parent_entity
            if not transactional:
                # Save state in datastore so that UI can see it.
                # We can't save state in foreign transaction, but conventional UI
                # doesn't ask for transactional starts anyway.
                state = model.MapreduceState.create_new(
                    mapreduce_spec.mapreduce_id)
                state.mapreduce_spec = mapreduce_spec
                state.active = True
                state.active_shards = mapper_spec.shard_count
                if _app:
                    state.app_id = _app
                state.put(config=config)
                parent = state

            if hooks is not None:
                try:
                    hooks.enqueue_kickoff_task(kickoff_worker_task, queue_name)
                except NotImplementedError:
                    # Use the default task addition implementation.
                    pass
                else:
                    return
            kickoff_worker_task.add(queue_name,
                                    transactional=True,
                                    parent=parent)

        if transactional:
            start_mapreduce()
        else:
            db.run_in_transaction(start_mapreduce)

        return mapreduce_id
Ejemplo n.º 10
0
    def _start_map(cls,
                   name,
                   mapper_spec,
                   mapreduce_params,
                   base_path="/mapreduce",
                   queue_name="default",
                   eta=None,
                   countdown=None,
                   hooks_class_name=None,
                   _app=None,
                   transactional=False):
        # Check that handler can be instantiated.
        mapper_spec.get_handler()

        # Check that reader can be instantiated and is configured correctly
        mapper_input_reader_class = mapper_spec.input_reader_class()
        mapper_input_reader_class.validate(mapper_spec)

        mapreduce_id = model.MapreduceState.new_mapreduce_id()
        mapreduce_spec = model.MapreduceSpec(name, mapreduce_id,
                                             mapper_spec.to_json(),
                                             mapreduce_params,
                                             hooks_class_name)

        kickoff_params = {"mapreduce_spec": mapreduce_spec.to_json_str()}
        if _app:
            kickoff_params["app"] = _app
        kickoff_worker_task = taskqueue.Task(url=base_path +
                                             "/kickoffjob_callback",
                                             params=kickoff_params,
                                             eta=eta,
                                             countdown=countdown)

        hooks = mapreduce_spec.get_hooks()
        config = util.create_datastore_write_config(mapreduce_spec)

        def start_mapreduce():
            if not transactional:
                # Save state in datastore so that UI can see it.
                # We can't save state in foreign transaction, but conventional UI
                # doesn't ask for transactional starts anyway.
                state = model.MapreduceState.create_new(
                    mapreduce_spec.mapreduce_id)
                state.mapreduce_spec = mapreduce_spec
                state.active = True
                state.active_shards = mapper_spec.shard_count
                if _app:
                    state.app_id = _app
                state.put(config=config)

            if hooks is not None:
                try:
                    hooks.enqueue_kickoff_task(kickoff_worker_task, queue_name)
                except NotImplementedError:
                    # Use the default task addition implementation.
                    pass
                else:
                    return
            kickoff_worker_task.add(queue_name, transactional=True)

        if transactional:
            start_mapreduce()
        else:
            db.run_in_transaction(start_mapreduce)

        return mapreduce_id