Example #1
0
    def _to_map_job_config(cls, mr_spec, queue_name):
        """Converts model.MapreduceSpec back to JobConfig.

    This method allows our internal methods to use JobConfig directly.
    This method also allows us to expose JobConfig as an API during execution,
    despite that it is not saved into datastore.

    Args:
      mr_spec: model.MapreduceSpec.
      queue_name: queue name.

    Returns:
      The JobConfig object for this job.
    """
        mapper_spec = mr_spec.mapper

        api_version = mr_spec.params.get("api_version", 0)
        old_api = api_version == 0

        input_reader_cls = mapper_spec.input_reader_class()
        input_reader_params = input_readers._get_params(mapper_spec)
        if issubclass(input_reader_cls, input_reader.InputReader):
            input_reader_params = input_reader_cls.params_from_json(
                input_reader_params)

        output_writer_cls = mapper_spec.output_writer_class()
        output_writer_params = output_writers._get_params(mapper_spec)

        return cls(
            _lenient=old_api,
            job_name=mr_spec.name,
            job_id=mr_spec.mapreduce_id,
            mapper=util.for_name(mapper_spec.handler_spec),
            input_reader_cls=input_reader_cls,
            input_reader_params=input_reader_params,
            output_writer_cls=output_writer_cls,
            output_writer_params=output_writer_params,
            shard_count=mapper_spec.shard_count,
            queue_name=queue_name,
            user_params=mr_spec.params.get("user_params"),
            shard_max_attempts=mr_spec.params.get("shard_max_attempts"),
            done_callback_url=mr_spec.params.get("done_callback"),
            _force_writes=mr_spec.params.get("force_writes"),
            _base_path=mr_spec.params["base_path"],
            _task_max_attempts=mr_spec.params.get("task_max_attempts"),
            _task_max_data_processing_attempts=(
                mr_spec.params.get("task_max_data_processing_attempts")),
            _hooks_cls=util.for_name(mr_spec.hooks_class_name),
            _app=mr_spec.params.get("app_id"),
            _api_version=api_version)
    def create(cls, mr_spec, shard_number, shard_attempt, _writer_state=None):
        """Inherit docs."""
        mapper_spec = mr_spec.mapper
        params = output_writers._get_params(mapper_spec)
        bucket_name = params.get(cls.BUCKET_NAME_PARAM)
        shards = mapper_spec.shard_count

        filehandles = []
        filename = (mr_spec.name + "/" + mr_spec.mapreduce_id + "/shard-" +
                    str(shard_number) + "-bucket-")
        for i in range(shards):
            full_filename = "/%s/%s%d" % (bucket_name, filename, i)
            filehandles.append(cloudstorage.open(full_filename, mode="w"))
        return cls(filehandles)
Example #3
0
  def create(cls, mr_spec, shard_number, shard_attempt, _writer_state=None):
    """Inherit docs."""
    mapper_spec = mr_spec.mapper
    params = output_writers._get_params(mapper_spec)
    bucket_name = params.get(cls.BUCKET_NAME_PARAM)
    shards = mapper_spec.shard_count

    filehandles = []
    filename = (mr_spec.name + "/" + mr_spec.mapreduce_id +
                "/shard-" + str(shard_number) + "-bucket-")
    for i in range(shards):
      full_filename = "/%s/%s%d" % (bucket_name, filename, i)
      filehandles.append(cloudstorage.open(full_filename, mode="w"))
    return cls(filehandles)
Example #4
0
  def _to_map_job_config(cls,
                         mr_spec,


                         queue_name):
    """Converts model.MapreduceSpec back to JobConfig.

    This method allows our internal methods to use JobConfig directly.
    This method also allows us to expose JobConfig as an API during execution,
    despite that it is not saved into datastore.

    Args:
      mr_spec: model.MapreduceSpec.
      queue_name: queue name.

    Returns:
      The JobConfig object for this job.
    """
    mapper_spec = mr_spec.mapper

    api_version = mr_spec.params.get("api_version", 0)
    old_api = api_version == 0





    return cls(_lenient=old_api,
               job_name=mr_spec.name,
               job_id=mr_spec.mapreduce_id,

               mapper=util.for_name(mapper_spec.handler_spec),
               input_reader_cls=mapper_spec.input_reader_class(),
               input_reader_params=input_readers._get_params(mapper_spec),
               output_writer_cls=mapper_spec.output_writer_class(),
               output_writer_params=output_writers._get_params(mapper_spec),
               shard_count=mapper_spec.shard_count,
               queue_name=queue_name,
               user_params=mr_spec.params.get("user_params"),
               shard_max_attempts=mr_spec.params.get("shard_max_attempts"),
               done_callback_url=mr_spec.params.get("done_callback"),
               _force_writes=mr_spec.params.get("force_writes"),
               _base_path=mr_spec.params["base_path"],
               _task_max_attempts=mr_spec.params.get("task_max_attempts"),
               _task_max_data_processing_attempts=(
                   mr_spec.params.get("task_max_data_processing_attempts")),
               _hooks_cls=util.for_name(mr_spec.hooks_class_name),
               _app=mr_spec.params.get("app_id"),
               _api_version=api_version)
    def validate(cls, mapper_spec):
        """Validates mapper specification.

    Args:
      mapper_spec: an instance of model.MapperSpec to validate.
    Raises:
      BadWriterParamsError: when Output writer class mismatch.
    """
        if mapper_spec.output_writer_class() != cls:
            raise errors.BadWriterParamsError("Output writer class mismatch")
        params = output_writers._get_params(mapper_spec)

        if cls.BUCKET_NAME_PARAM not in params:
            raise errors.BadWriterParamsError(
                "%s is required for the _HashingGCSOutputWriter" %
                cls.BUCKET_NAME_PARAM)
Example #6
0
  def validate(cls, mapper_spec):
    """Validates mapper specification.

    Args:
      mapper_spec: an instance of model.MapperSpec to validate.
    Raises:
      BadWriterParamsError: when Output writer class mismatch.
    """
    if mapper_spec.output_writer_class() != cls:
      raise errors.BadWriterParamsError("Output writer class mismatch")
    params = output_writers._get_params(mapper_spec)

    if cls.BUCKET_NAME_PARAM not in params:
      raise errors.BadWriterParamsError(
          "%s is required for the _HashingGCSOutputWriter" %
          cls.BUCKET_NAME_PARAM)