Example #1
0
  def finalize(self, ctx, shard_state):
    self._streaming_buffer.close()

    if self._no_dup:
      # TODO(user): This doesn't work properly when the filenames have
      # spaces in them. It's not being re-quoted properly. b/12066572
      cloudstorage_api._copy2(
          self._streaming_buffer.name,
          self._streaming_buffer.name,
          metadata={self._VALID_LENGTH: self._streaming_buffer.tell()})

      # The filename user requested.
      mr_spec = ctx.mapreduce_spec
      writer_spec = _get_params(mr_spec.mapper, allow_old=False)
      filename = self._generate_filename(writer_spec,
                                         mr_spec.name,
                                         mr_spec.mapreduce_id,
                                         shard_state.shard_number)
      seg_filename = self._streaming_buffer.name
      prefix, last_index = seg_filename.rsplit("-", 1)
      # These info is enough for any external process to combine
      # all segs into the final file.
      # TODO(user): Create a special input reader to combine segs.
      shard_state.writer_state = {self._SEG_PREFIX: prefix + "-",
                                  self._LAST_SEG_INDEX: int(last_index),
                                  "filename": filename}
    else:
      shard_state.writer_state = {"filename": self._streaming_buffer.name}
Example #2
0
  def _recover(self, mr_spec, shard_number, shard_attempt):
    next_seg_index = self._seg_index

    # Save the current seg if it actually has something.
    # Remember self._streaming_buffer is the pickled instance
    # from the previous slice.
    if self._seg_valid_length != 0:
      try:
        gcs_next_offset = self._streaming_buffer._get_offset_from_gcs() + 1
        # If GCS is ahead of us, just force close.
        if gcs_next_offset > self._streaming_buffer.tell():
          self._streaming_buffer._force_close(gcs_next_offset)
        # Otherwise flush in memory contents too.
        else:
          self._streaming_buffer.close()
      except cloudstorage.FileClosedError:
        pass
      cloudstorage_api._copy2(
          self._streaming_buffer.name,
          self._streaming_buffer.name,
          metadata={self._VALID_LENGTH:
                    self._seg_valid_length})
      next_seg_index = self._seg_index + 1

    writer_spec = _get_params(mr_spec.mapper, allow_old=False)
    # Create name for the new seg.
    key = self._generate_filename(
        writer_spec, mr_spec.name,
        mr_spec.mapreduce_id,
        shard_number,
        shard_attempt,
        next_seg_index)
    new_writer = self._create(writer_spec, key)
    new_writer._seg_index = next_seg_index
    return new_writer
  def testCopy2(self):
    with cloudstorage.open(TESTFILE, 'w',
                           'text/foo', {'x-goog-meta-foo': 'foo'}) as f:
      f.write('abcde')

    dst = TESTFILE + 'copy'
    self.assertRaises(cloudstorage.NotFoundError, cloudstorage.stat, dst)
    cloudstorage_api._copy2(TESTFILE, dst)

    src_stat = cloudstorage.stat(TESTFILE)
    dst_stat = cloudstorage.stat(dst)
    self.assertEqual(src_stat.st_ctime, dst_stat.st_ctime)
    self.assertEqual(src_stat.st_size, dst_stat.st_size)
    self.assertEqual(src_stat.etag, dst_stat.etag)
    self.assertEqual(src_stat.content_type, dst_stat.content_type)
    self.assertEqual(src_stat.metadata, dst_stat.metadata)

    with cloudstorage.open(dst) as f:
      self.assertEqual('abcde', f.read())
    def testCopy2(self):
        with cloudstorage.open(TESTFILE, 'w', 'text/foo',
                               {'x-goog-meta-foo': 'foo'}) as f:
            f.write('abcde')

        dst = TESTFILE + 'copy'
        self.assertRaises(cloudstorage.NotFoundError, cloudstorage.stat, dst)
        cloudstorage_api._copy2(TESTFILE, dst)

        src_stat = cloudstorage.stat(TESTFILE)
        dst_stat = cloudstorage.stat(dst)
        self.assertEqual(src_stat.st_ctime, dst_stat.st_ctime)
        self.assertEqual(src_stat.st_size, dst_stat.st_size)
        self.assertEqual(src_stat.etag, dst_stat.etag)
        self.assertEqual(src_stat.content_type, dst_stat.content_type)
        self.assertEqual(src_stat.metadata, dst_stat.metadata)

        with cloudstorage.open(dst) as f:
            self.assertEqual('abcde', f.read())
  def testCopy2ReplacesMetadata(self):
    with cloudstorage.open(TESTFILE, 'w',
                           'text/foo', {'x-goog-meta-foo': 'foo'}) as f:
      f.write('abcde')
    src_stat = cloudstorage.stat(TESTFILE)

    cloudstorage_api._copy2(TESTFILE, TESTFILE,
                            metadata={'x-goog-meta-foo': 'bar',
                                      'content-type': 'text/bar'})

    dst_stat = cloudstorage.stat(TESTFILE)
    self.assertEqual(src_stat.st_size, dst_stat.st_size)
    self.assertEqual(src_stat.etag, dst_stat.etag)
    self.assertEqual(src_stat.st_ctime, dst_stat.st_ctime)
    self.assertEqual('text/foo', src_stat.content_type)
    self.assertEqual('text/bar', dst_stat.content_type)
    self.assertEqual('foo', src_stat.metadata['x-goog-meta-foo'])
    self.assertEqual('bar', dst_stat.metadata['x-goog-meta-foo'])

    with cloudstorage.open(TESTFILE) as f:
      self.assertEqual('abcde', f.read())
    def testCopy2ReplacesMetadata(self):
        with cloudstorage.open(TESTFILE, 'w', 'text/foo',
                               {'x-goog-meta-foo': 'foo'}) as f:
            f.write('abcde')
        src_stat = cloudstorage.stat(TESTFILE)

        cloudstorage_api._copy2(TESTFILE,
                                TESTFILE,
                                metadata={
                                    'x-goog-meta-foo': 'bar',
                                    'content-type': 'text/bar'
                                })

        dst_stat = cloudstorage.stat(TESTFILE)
        self.assertEqual(src_stat.st_size, dst_stat.st_size)
        self.assertEqual(src_stat.etag, dst_stat.etag)
        self.assertEqual(src_stat.st_ctime, dst_stat.st_ctime)
        self.assertEqual('text/foo', src_stat.content_type)
        self.assertEqual('text/bar', dst_stat.content_type)
        self.assertEqual('foo', src_stat.metadata['x-goog-meta-foo'])
        self.assertEqual('bar', dst_stat.metadata['x-goog-meta-foo'])

        with cloudstorage.open(TESTFILE) as f:
            self.assertEqual('abcde', f.read())
Example #7
0
 def copy_to(path, target_path):
     # TODO(jeremydw): Replace
     return cloudstorage_api._copy2(path, target_path)
 def _make_api_call(bucket, file_list, destination_file, content_type, retry_params, _account_id):
   """
       Internal Only
       Makes the actual calls.
       Currently stubbed because the dev server cloudstorage_stub.py
         does not handle compose requests.
       TODO: When the dev server gets patch please remove the stub
   Args:
     bucket: Bucket where the files are kept
     file_list: list of dicts with the file name (see compose argument "list_of_files" for format).
     destination_file: Path to the destination file.
     content_type: Content type for the destination file.
     retry_params: An api_utils.RetryParams for this call to GCS. If None,
     the default one is used.
   _account_id: Internal-use only.
   """
   if len(file_list) == 0:
     raise ValueError("Unable to merge 0 files")
   if len(file_list) == 1:
     _copy2(bucket + file_list[0]["file_name"], destination_file)
     return
   '''
   Needed until cloudstorage_stub.py is updated to accept compose requests
   TODO: When patched remove the True flow from this if.
   '''
   if 'development' in os.environ.get('SERVER_SOFTWARE', '').lower():
     '''
     Below is making the call to the Development server
     '''
     with open(destination_file, "w", content_type=content_type) as gcs_merge:
       for source_file in file_list:
         try:
           with open(bucket + source_file['file_name'], "r") as gcs_source:
             gcs_merge.write(gcs_source.read())
         except cloud_errors.NotFoundError:
           logging.warn("File not found %s, skipping", source_file['file_name'])
   else:
     '''
     Below is making the call to the Production server
     '''
     xml = ""
     for item in file_list:
       generation = item.get("Generation", "")
       generation_match = item.get("IfGenerationMatch", "")
       if generation != "":
         generation = "<Generation>%s</Generation>" % generation
       if generation_match != "":
         generation_match = "<IfGenerationMatch>%s</IfGenerationMatch>" % generation_match
       xml += "<Component><Name>%s</Name>%s%s</Component>" % \
                 (item["file_name"], generation, generation_match)
     xml = "<ComposeRequest>%s</ComposeRequest>" % xml
     logging.info(xml)
     # pylint: disable=protected-access
     api = cloudstorage.storage_api._get_storage_api(retry_params=retry_params,
                                  account_id=_account_id)
     headers = {"Content-Type" : content_type}
     # pylint: disable=no-member
     status, resp_headers, content = api.put_object(
               cloudstorage.api_utils._quote_filename(destination_file) + "?compose",
                                         payload=xml,
                                         headers=headers)
     # TODO: confirm whether [200] is sufficient, or if 204 etc. might be returned?
     cloud_errors.check_status(status, [200], destination_file, resp_headers, body=content)
Example #9
0
 def copy_to(path, target_path):
     # TODO(jeremydw): Replace
     return cloudstorage_api._copy2(path, target_path)