Exemple #1
0
  def finalize(self, ctx, shard_state):
    if self._data_written_to_slice:
      raise errors.FailJobError(
          "finalize() called after data was written")

    if self.status.tmpfile:
      self.status.tmpfile.close()  # it's empty
    self.status.mainfile.close()

    # rewrite happened, close happened, we can remove the tmp files
    if self.status.tmpfile_1ago:
      self._remove_tmpfile(self.status.tmpfile_1ago.name,
                           self.status.writer_spec)
    if self.status.tmpfile:
      self._remove_tmpfile(self.status.tmpfile.name,
                           self.status.writer_spec)

    self._try_to_clean_garbage(self.status.writer_spec)

    shard_state.writer_state = {"filename": self.status.mainfile.name}
Exemple #2
0
 def _get_write_buffer(self):
   if not self.status.tmpfile:
     raise errors.FailJobError(
         "write buffer called but empty, begin_slice missing?")
   return self.status.tmpfile
def test_fail_map(_):
  """Always fail job immediately."""
  raise errors.FailJobError()
def test_failed_map(_):
    """Always fail the map immediately."""
    raise errors.FailJobError()
Exemple #5
0
class RecordsPool(object):
    """Pool of append operations for records files."""

    # Approximate number of bytes of overhead for storing one record.
    _RECORD_OVERHEAD_BYTES = 10

    def __init__(self,
                 filename,
                 flush_size_chars=_FILES_API_FLUSH_SIZE,
                 ctx=None,
                 exclusive=False):
        """Constructor.

    Args:
      filename: file name to write data to as string.
      flush_size_chars: buffer flush threshold as int.
      ctx: mapreduce context as context.Context.
      exclusive: a boolean flag indicating if the pool has an exclusive
        access to the file. If it is True, then it's possible to write
        bigger chunks of data.
    """
        self._flush_size = flush_size_chars
        self._buffer = []
        self._size = 0
        self._filename = filename
        self._ctx = ctx
        self._exclusive = exclusive

    def append(self, data):
        """Append data to a file."""
        data_length = len(data)
        if self._size + data_length > self._flush_size:
            self.flush()

        if not self._exclusive and data_length > _FILES_API_MAX_SIZE:
            raise errors.Error("Too big input %s (%s)." %
                               (data_length, _FILES_API_MAX_SIZE))
        else:
            self._buffer.append(data)
            self._size += data_length

        if self._size > self._flush_size:
            self.flush()

    def flush(self):
        """Flush pool contents."""
        try:
            # Write data to in-memory buffer first.
            buf = _StringWriter()
            with records.RecordsWriter(buf) as w:
                for record in self._buffer:
                    w.write(record)

            str_buf = buf.to_string()
            if not self._exclusive and len(str_buf) > _FILES_API_MAX_SIZE:
                # Shouldn't really happen because of flush size.
                raise errors.Error(
                    "Buffer too big. Can't write more than %s bytes in one request: "
                    "risk of writes interleaving. Got: %s" %
                    (_FILES_API_MAX_SIZE, len(str_buf)))

            # Write data to file.
            start_time = time.time()
            with files.open(self._filename,
                            "a",
                            exclusive_lock=self._exclusive) as f:
                f.write(str_buf)
                if self._ctx:
                    operation.counters.Increment(COUNTER_IO_WRITE_BYTES,
                                                 len(str_buf))(self._ctx)
            if self._ctx:
                operation.counters.Increment(
                    COUNTER_IO_WRITE_MSEC,
                    int((time.time() - start_time) * 1000))(self._ctx)

            # reset buffer
            self._buffer = []
            self._size = 0
            gc.collect()
        except (files.UnknownError), e:
            logging.warning("UnknownError: %s", e)
            raise errors.RetrySliceError()
        except (files.ExistenceError), e:
            logging.warning("ExistenceError: %s", e)
            raise errors.FailJobError("Existence error: %s" % (e))