예제 #1
0
def shuffle(job_name,
            input_file_list,
            output_file_list,
            callback):
  """Shuffle mapreduce files using the shuffler service.

  Args:
    job_name: unique shuffle job name as string.
    input_file_list: list of files api file names to shuffle. Files should be
      in records format with serialized KeyValue protocol buffer as record.
    output_file_list: list of files api file names to store shuffle result.
      Files should not be finalized. They will be of records format with
      serialized KeyValues protocol buffer as record.
    callback: shuffle service call back specification. Can be either
      url - the task in default queue with default parameters will be enqueued.
      It can also be a dict with following keys:
        url: url to call back
        version: app version to call
        method: HTTP method to use (POST or GET)
        queue: queue name to enqueue a task in.
  Raises:
    ShufflerUnavailableError if shuffler service is not available.
  """
  if not available():
    raise ShufflerUnavailableError()

  request = file_service_pb.ShuffleRequest()
  response = file_service_pb.ShuffleResponse()

  request.set_shuffle_name(job_name)

  if isinstance(callback, dict):
    request.mutable_callback().set_url(callback["url"])
    if "version" in callback:
      request.mutable_callback().set_app_version_id(callback["version"])
    if "method" in callback:
      request.mutable_callback().set_method(callback["method"])
    if "queue" in callback:
      request.mutable_callback().set_queue(callback["queue"])
  else:
    request.mutable_callback().set_url(callback)


  request.set_shuffle_size_bytes(0)

  for file_name in input_file_list:
    shuffle_input = request.add_input()
    shuffle_input.set_format(
        file_service_pb.ShuffleEnums.RECORDS_KEY_VALUE_PROTO_INPUT)
    shuffle_input.set_path(file_name)

  shuffle_output = request.mutable_output()
  shuffle_output.set_format(
      file_service_pb.ShuffleEnums.RECORDS_KEY_MULTI_VALUE_PROTO_OUTPUT)
  for file_name in output_file_list:
    shuffle_output.add_path(file_name)

  files._make_call("Shuffle", request, response)
예제 #2
0
def shuffle(job_name, input_file_list, output_file_list, callback):
    """Shuffle mapreduce files using the shuffler service.

  Args:
    job_name: unique shuffle job name as string.
    input_file_list: list of files api file names to shuffle. Files should be
      in records format with serialized KeyValue protocol buffer as record.
    output_file_list: list of files api file names to store shuffle result.
      Files should not be finalized. They will be of records format with
      serialized KeyValues protocol buffer as record.
    callback: shuffle service call back specification. Can be either
      url - the task in default queue with default parameters will be enqueued.
      It can also be a dict with following keys:
        url: url to call back
        version: app version to call
        method: HTTP method to use (POST or GET)
        queue: queue name to enqueue a task in.
  Raises:
    ShufflerUnavailableError if shuffler service is not available.
  """
    if not available():
        raise ShufflerUnavailableError()

    request = file_service_pb.ShuffleRequest()
    response = file_service_pb.ShuffleResponse()

    request.set_shuffle_name(job_name)

    if isinstance(callback, dict):
        request.mutable_callback().set_url(callback["url"])
        if "version" in callback:
            request.mutable_callback().set_app_version_id(callback["version"])
        if "method" in callback:
            request.mutable_callback().set_method(callback["method"])
        if "queue" in callback:
            request.mutable_callback().set_queue(callback["queue"])
    else:
        request.mutable_callback().set_url(callback)

    request.set_shuffle_size_bytes(0)

    for file_name in input_file_list:
        shuffle_input = request.add_input()
        shuffle_input.set_format(
            file_service_pb.ShuffleEnums.RECORDS_KEY_VALUE_PROTO_INPUT)
        shuffle_input.set_path(file_name)

    shuffle_output = request.mutable_output()
    shuffle_output.set_format(
        file_service_pb.ShuffleEnums.RECORDS_KEY_MULTI_VALUE_PROTO_OUTPUT)
    for file_name in output_file_list:
        shuffle_output.add_path(file_name)

    files._make_call("Shuffle", request, response)
예제 #3
0
def _listdir_local(path, kwargs):
  """Dev app server version of listdir.

  See listdir for doc.
  """
  request = file_service_pb.ListDirRequest()
  response = file_service_pb.ListDirResponse()
  request.set_path(path)

  if kwargs and kwargs.has_key('marker'):
    request.set_marker(kwargs['marker'])
  if kwargs and kwargs.has_key('max-keys'):
    request.set_max_keys(kwargs['max-keys'])
  if kwargs and kwargs.has_key('prefix'):
    request.set_prefix(kwargs['prefix'])
  files._make_call('ListDir', request, response)
  return response.filenames_list()
예제 #4
0
def _listdir_local(path, kwargs):
    """Dev app server version of listdir.

  See listdir for doc.
  """
    request = file_service_pb.ListDirRequest()
    response = file_service_pb.ListDirResponse()
    request.set_path(path)

    if kwargs and kwargs.has_key('marker'):
        request.set_marker(kwargs['marker'])
    if kwargs and kwargs.has_key('max-keys'):
        request.set_max_keys(kwargs['max-keys'])
    if kwargs and kwargs.has_key('prefix'):
        request.set_prefix(kwargs['prefix'])
    files._make_call('ListDir', request, response)
    return response.filenames_list()