Esempio n. 1
0
  def _recover(self, mr_spec, shard_number, shard_attempt):
    next_seg_index = self._seg_index




    if self._seg_valid_length != 0:
      try:
        gcs_next_offset = self._streaming_buffer._get_offset_from_gcs() + 1

        if gcs_next_offset > self._streaming_buffer.tell():
          self._streaming_buffer._force_close(gcs_next_offset)

        else:
          self._streaming_buffer.close()
      except cloudstorage.FileClosedError:
        pass
      cloudstorage_api.copy2(
          self._streaming_buffer.name,
          self._streaming_buffer.name,
          metadata={self._VALID_LENGTH:
                    self._seg_valid_length})
      next_seg_index = self._seg_index + 1

    writer_spec = self.get_params(mr_spec.mapper, allow_old=False)

    key = self._generate_filename(
        writer_spec, mr_spec.name,
        mr_spec.mapreduce_id,
        shard_number,
        shard_attempt,
        next_seg_index)
    new_writer = self._create(writer_spec, key)
    new_writer._seg_index = next_seg_index
    return new_writer
Esempio n. 2
0
    def _recover(self, mr_spec, shard_number, shard_attempt):
        next_seg_index = self._seg_index

        if self._seg_valid_length != 0:
            try:
                gcs_next_offset = self._streaming_buffer._get_offset_from_gcs(
                ) + 1

                if gcs_next_offset > self._streaming_buffer.tell():
                    self._streaming_buffer._force_close(gcs_next_offset)

                else:
                    self._streaming_buffer.close()
            except cloudstorage.FileClosedError:
                pass
            cloudstorage_api.copy2(
                self._streaming_buffer.name,
                self._streaming_buffer.name,
                metadata={self._VALID_LENGTH: self._seg_valid_length})
            next_seg_index = self._seg_index + 1

        writer_spec = self.get_params(mr_spec.mapper, allow_old=False)

        key = self._generate_filename(writer_spec, mr_spec.name,
                                      mr_spec.mapreduce_id, shard_number,
                                      shard_attempt, next_seg_index)
        new_writer = self._create(writer_spec, key)
        new_writer._seg_index = next_seg_index
        return new_writer
Esempio n. 3
0
  def finalize(self, ctx, shard_state):
    self._streaming_buffer.close()

    if self._no_dup:
      cloudstorage_api.copy2(
          self._streaming_buffer.name,
          self._streaming_buffer.name,
          metadata={self._VALID_LENGTH: self._streaming_buffer.tell()})


      mr_spec = ctx.mapreduce_spec
      writer_spec = self.get_params(mr_spec.mapper, allow_old=False)
      filename = self._generate_filename(writer_spec,
                                         mr_spec.name,
                                         mr_spec.mapreduce_id,
                                         shard_state.shard_number)
      seg_filename = self._streaming_buffer.name
      prefix, last_index = seg_filename.rsplit("-", 1)



      shard_state.writer_state = {self._SEG_PREFIX: prefix + "-",
                                  self._LAST_SEG_INDEX: int(last_index),
                                  "filename": filename}
    else:
      shard_state.writer_state = {"filename": self._streaming_buffer.name}
Esempio n. 4
0
    def finalize(self, ctx, shard_state):
        self._streaming_buffer.close()

        if self._no_dup:
            cloudstorage_api.copy2(
                self._streaming_buffer.name,
                self._streaming_buffer.name,
                metadata={self._VALID_LENGTH: self._streaming_buffer.tell()})

            mr_spec = ctx.mapreduce_spec
            writer_spec = self.get_params(mr_spec.mapper, allow_old=False)
            filename = self._generate_filename(writer_spec, mr_spec.name,
                                               mr_spec.mapreduce_id,
                                               shard_state.shard_number)
            seg_filename = self._streaming_buffer.name
            prefix, last_index = seg_filename.rsplit("-", 1)

            shard_state.writer_state = {
                self._SEG_PREFIX: prefix + "-",
                self._LAST_SEG_INDEX: int(last_index),
                "filename": filename
            }
        else:
            shard_state.writer_state = {
                "filename": self._streaming_buffer.name
            }