Example #1
0
    def write(self, data, ctx):
        """Write data.

    Args:
      data: actual data yielded from handler. Type is writer-specific.
      ctx: an instance of context.Context.
    """
        if len(data) != 2:
            logging.error("Got bad tuple of length %d (2-tuple expected): %s",
                          len(data), data)

        try:
            key = str(data[0])
            value = str(data[1])
        except TypeError:
            logging.error("Expecting a tuple, but got %s: %s",
                          data.__class__.__name__, data)

        file_index = key.__hash__() % len(self._filenames)
        pool_name = "kv_pool%d" % file_index
        filename = self._filenames[file_index]

        if ctx.get_pool(pool_name) is None:
            ctx.register_pool(
                pool_name,
                output_writers.RecordsPool(filename=filename, ctx=ctx))
        proto = file_service_pb.KeyValue()
        proto.set_key(key)
        proto.set_value(value)
        ctx.get_pool(pool_name).append(proto.Encode())
Example #2
0
def _sort_records(records):
  """Map function sorting records.

  Converts records to KeyValue protos, sorts them by key and writes them
  into new blobstore file. Creates _OutputFile entity to record resulting
  file name.

  Args:
    records: list of records which are serialized KeyValue protos.
  """
  ctx = context.get()
  l = len(records)
  proto_records = [None] * l

  logging.debug("Parsing")
  for i in range(l):
    proto = file_service_pb.KeyValue()
    proto.ParseFromString(records[i])
    proto_records[i] = proto

  logging.debug("Sorting")
  proto_records.sort(cmp=_compare_keys)

  logging.debug("Writing")
  blob_file_name = (ctx.mapreduce_spec.name + "-" +
                    ctx.mapreduce_id + "-output")
  output_path = files.blobstore.create(
      _blobinfo_uploaded_filename=blob_file_name)
  with output_writers.RecordsPool(output_path, ctx=ctx) as pool:
    for proto in proto_records:
      pool.append(proto.Encode())

  logging.debug("Finalizing")
  files.finalize(output_path)
  time.sleep(1)
  output_path = files.blobstore.get_file_name(
      files.blobstore.get_blob_key(output_path))

  entity = _OutputFile(key_name=output_path,
                       parent=_OutputFile.get_root_key(ctx.mapreduce_id))
  entity.put()