예제 #1
0
    def handle(self):
        """Handles start request."""

        mapreduce_name = self._get_required_param("name")
        mapper_input_reader_spec = self._get_required_param(
            "mapper_input_reader")
        mapper_handler_spec = self._get_required_param("mapper_handler")
        mapper_output_writer_spec = self.request.get("mapper_output_writer")
        mapper_params = self._get_params("mapper_params_validator",
                                         "mapper_params.")
        params = self._get_params("params_validator", "params.")

        mapper_params["processing_rate"] = int(
            mapper_params.get("processing_rate")
            or model._DEFAULT_PROCESSING_RATE_PER_SEC)
        queue_name = mapper_params["queue_name"] = mapper_params.get(
            "queue_name", "default")

        mapper_spec = model.MapperSpec(
            mapper_handler_spec,
            mapper_input_reader_spec,
            mapper_params,
            int(mapper_params.get("shard_count", model._DEFAULT_SHARD_COUNT)),
            output_writer_spec=mapper_output_writer_spec)

        mapreduce_id = type(self)._start_map(mapreduce_name,
                                             mapper_spec,
                                             params,
                                             base_path=self.base_path(),
                                             queue_name=queue_name,
                                             _app=mapper_params.get("_app"))
        self.json_response["mapreduce_id"] = mapreduce_id
def start_map(name,
              handler_spec,
              reader_spec,
              mapper_parameters,
              shard_count=_DEFAULT_SHARD_COUNT,
              output_writer_spec=None,
              mapreduce_parameters=None,
              base_path=_DEFAULT_BASE_PATH,
              queue_name="default",
              eta=None,
              countdown=None,
              hooks_class_name=None,
              _app=None,
              transactional=False):
  """Start a new, mapper-only mapreduce.

  Args:
    name: mapreduce name. Used only for display purposes.
    handler_spec: fully qualified name of mapper handler function/class to call.
    reader_spec: fully qualified name of mapper reader to use
    mapper_parameters: dictionary of parameters to pass to mapper. These are
      mapper-specific and also used for reader initialization.
    shard_count: number of shards to create.
    mapreduce_parameters: dictionary of mapreduce parameters relevant to the
      whole job.
    base_path: base path of mapreduce library handler specified in app.yaml.
      "/mapreduce" by default.
    queue_name: executor queue name to be used for mapreduce tasks.
    eta: Absolute time when the MR should execute. May not be specified
        if 'countdown' is also supplied. This may be timezone-aware or
        timezone-naive.
    countdown: Time in seconds into the future that this MR should execute.
        Defaults to zero.
    hooks_class_name: fully qualified name of a hooks.Hooks subclass.
    transactional: Specifies if job should be started as a part of already
      opened transaction.

  Returns:
    mapreduce id as string.
  """
  mapper_spec = model.MapperSpec(handler_spec,
                                 reader_spec,
                                 mapper_parameters,
                                 shard_count,
                                 output_writer_spec=output_writer_spec)

  return handlers.StartJobHandler._start_map(
      name,
      mapper_spec,
      mapreduce_parameters or {},
      base_path=base_path,
      queue_name=queue_name,
      eta=eta,
      countdown=countdown,
      hooks_class_name=hooks_class_name,
      _app=_app,
      transactional=transactional)
예제 #3
0
    def _get_mapper_spec(self):
        """Converts self to model.MapperSpec."""

        from google.appengine.ext.mapreduce import model

        return model.MapperSpec(
            handler_spec=util._obj_to_path(self.mapper),
            input_reader_spec=util._obj_to_path(self.input_reader_cls),
            params=self._get_mapper_params(),
            shard_count=self.shard_count,
            output_writer_spec=util._obj_to_path(self.output_writer_cls))
def start_map(name,
              handler_spec,
              reader_spec,
              mapper_parameters,
              shard_count=None,
              output_writer_spec=None,
              mapreduce_parameters=None,
              base_path=None,
              queue_name=None,
              eta=None,
              countdown=None,
              hooks_class_name=None,
              _app=None,
              in_xg_transaction=False):
  """Start a new, mapper-only mapreduce.

  Args:
    name: mapreduce name. Used only for display purposes.
    handler_spec: fully qualified name of mapper handler function/class to call.
    reader_spec: fully qualified name of mapper reader to use
    mapper_parameters: dictionary of parameters to pass to mapper. These are
      mapper-specific and also used for reader initialization.
    shard_count: number of shards to create.
    mapreduce_parameters: dictionary of mapreduce parameters relevant to the
      whole job.
    base_path: base path of mapreduce library handler specified in app.yaml.
      "/mapreduce" by default.
    queue_name: taskqueue queue name to be used for mapreduce tasks.
      see util.get_queue_name.
    eta: absolute time when the MR should execute. May not be specified
      if 'countdown' is also supplied. This may be timezone-aware or
      timezone-naive.
    countdown: time in seconds into the future that this MR should execute.
      Defaults to zero.
    hooks_class_name: fully qualified name of a hooks.Hooks subclass.
    in_xg_transaction: controls what transaction scope to use to start this MR
      job. If True, there has to be an already opened cross-group transaction
      scope. MR will use one entity group from it.
      If False, MR will create an independent transaction to start the job
      regardless of any existing transaction scopes.

  Returns:
    mapreduce id as string.
  """
  if shard_count is None:
    shard_count = parameters.config.SHARD_COUNT
  if base_path is None:
    base_path = parameters.config.BASE_PATH

  if mapper_parameters:
    mapper_parameters = dict(mapper_parameters)
  if mapreduce_parameters:
    mapreduce_parameters = dict(mapreduce_parameters)
    if "base_path" not in mapreduce_parameters:
      mapreduce_parameters["base_path"] = base_path
  else:
    mapreduce_parameters = {"base_path": base_path}

  mapper_spec = model.MapperSpec(handler_spec,
                                 reader_spec,
                                 mapper_parameters,
                                 shard_count,
                                 output_writer_spec=output_writer_spec)

  if in_xg_transaction and not db.is_in_transaction():
    logging.warning("Expects an opened xg transaction to start mapreduce "
                    "when transactional is True.")

  return handlers.StartJobHandler._start_map(
      name,
      mapper_spec,
      mapreduce_parameters or {},
      base_path=base_path,
      queue_name=util.get_queue_name(queue_name),
      eta=eta,
      countdown=countdown,
      hooks_class_name=hooks_class_name,
      _app=_app,
      in_xg_transaction=in_xg_transaction)
예제 #5
0
파일: control.py 프로젝트: girum11/hang
def start_map(name,
              handler_spec,
              reader_spec,
              mapper_parameters,
              shard_count=_DEFAULT_SHARD_COUNT,
              output_writer_spec=None,
              mapreduce_parameters=None,
              base_path=None,
              queue_name=None,
              eta=None,
              countdown=None,
              hooks_class_name=None,
              _app=None,
              transactional=False,
              transactional_parent=None):
    """Start a new, mapper-only mapreduce.

  Args:
    name: mapreduce name. Used only for display purposes.
    handler_spec: fully qualified name of mapper handler function/class to call.
    reader_spec: fully qualified name of mapper reader to use
    mapper_parameters: dictionary of parameters to pass to mapper. These are
      mapper-specific and also used for reader initialization.
    shard_count: number of shards to create.
    mapreduce_parameters: dictionary of mapreduce parameters relevant to the
      whole job.
    base_path: base path of mapreduce library handler specified in app.yaml.
      "/mapreduce" by default.
    queue_name: executor queue name to be used for mapreduce tasks. If
      unspecified it will be the "default" queue or inherit the queue of
      the currently running request.
    eta: absolute time when the MR should execute. May not be specified
      if 'countdown' is also supplied. This may be timezone-aware or
      timezone-naive.
    countdown: time in seconds into the future that this MR should execute.
      Defaults to zero.
    hooks_class_name: fully qualified name of a hooks.Hooks subclass.
    transactional: specifies if job should be started as a part of already
      opened transaction.
    transactional_parent: specifies the entity which is already a part of
      transaction. Child entity will be used to store task payload if mapreduce
      specification is too big.

  Returns:
    mapreduce id as string.
  """
    if not shard_count:
        shard_count = _DEFAULT_SHARD_COUNT
    if base_path is None:
        base_path = base_handler._DEFAULT_BASE_PATH

    if mapper_parameters:
        mapper_parameters = dict(mapper_parameters)
    if mapreduce_parameters:
        mapreduce_parameters = dict(mapreduce_parameters)

    mapper_spec = model.MapperSpec(handler_spec,
                                   reader_spec,
                                   mapper_parameters,
                                   shard_count,
                                   output_writer_spec=output_writer_spec)

    if transactional and not transactional_parent:

        logging.error(
            "transactional_parent should be specified for transactional starts."
            "Your job will fail to start if mapreduce specification is too big."
        )

    return handlers.StartJobHandler._start_map(
        name,
        mapper_spec,
        mapreduce_parameters or {},
        base_path=base_path,
        queue_name=queue_name,
        eta=eta,
        countdown=countdown,
        hooks_class_name=hooks_class_name,
        _app=_app,
        transactional=transactional,
        parent_entity=transactional_parent)