Beispiel #1
0
    def _rename_gcs_batch(src_dest_pairs):
        # Prepare batches.
        gcs_batches = []
        gcs_current_batch = []
        for src, dest in src_dest_pairs:
            gcs_current_batch.append((src, dest))
            if len(gcs_current_batch) == gcsio.MAX_BATCH_OPERATION_SIZE:
                gcs_batches.append(gcs_current_batch)
                gcs_current_batch = []
        if gcs_current_batch:
            gcs_batches.append(gcs_current_batch)

        # Execute GCS renames if any and return exceptions.
        exceptions = []
        for batch in gcs_batches:
            copy_statuses = gcsio.GcsIO().copy_batch(batch)
            copy_succeeded = []
            for src, dest, exception in copy_statuses:
                if exception:
                    exceptions.append((src, dest, exception))
                else:
                    copy_succeeded.append((src, dest))
            delete_batch = [src for src, dest in copy_succeeded]
            delete_statuses = gcsio.GcsIO().delete_batch(delete_batch)
            for i, (src, exception) in enumerate(delete_statuses):
                dest = copy_succeeded[i]
                if exception:
                    exceptions.append((src, dest, exception))
        return exceptions
Beispiel #2
0
def open_local_or_gcs(path, mode):
    """Opens the given path."""
    if path.startswith('gs://'):
        try:
            return gcsio.GcsIO().open(path, mode)
        except Exception as e:  # pylint: disable=broad-except
            # Currently we retry exactly once, to work around flaky gcs calls.
            logging.error('Retrying after exception reading gcs file: %s', e)
            time.sleep(10)
            return gcsio.GcsIO().open(path, mode)
    else:
        return open(path, mode)
Beispiel #3
0
    def size_of_files_in_glob(path, file_names=None):
        """Returns a map of file names to sizes.

    Args:
      path: a file path pattern that reads the size of all the files
      file_names: List of file names that we need size for, this is added to
        support eventually consistent sources where two expantions of glob
        might yield to different files.
    """
        if path.startswith('gs://'):
            file_sizes = gcsio.GcsIO().size_of_files_in_glob(path)
            if file_names is None:
                return file_sizes
            else:
                result = {}
                # We need to make sure we fetched the size for all the files as the
                # list API in GCS is eventually consistent so directly call size for
                # any files that may be missing.
                for file_name in file_names:
                    if file_name in file_sizes:
                        result[file_name] = file_sizes[file_name]
                    else:
                        result[file_name] = ChannelFactory.size_in_bytes(
                            file_name)
                return result
        else:
            if file_names is None:
                file_names = ChannelFactory.glob(path)
            return {
                file_name: ChannelFactory.size_in_bytes(file_name)
                for file_name in file_names
            }
Beispiel #4
0
def glob_files(path):
  """Glob the given path."""

  if path.startswith('gs://'):
    return gcsio.GcsIO().glob(path)
  else:
    return glob.glob(path)
Beispiel #5
0
 def rm(path):
     if path.startswith('gs://'):
         gcsio.GcsIO().delete(path)
     else:
         try:
             os.remove(path)
         except OSError as err:
             raise IOError(err)
Beispiel #6
0
    def size_in_bytes(path):
        """Returns the size of a file in bytes.

    Args:
      path: a string that gives the path of a single file.
    """
        if path.startswith('gs://'):
            return gcsio.GcsIO().size(path)
        else:
            return os.path.getsize(path)
Beispiel #7
0
 def rename(src, dest):
     if src.startswith('gs://'):
         if not dest.startswith('gs://'):
             raise ValueError('Destination %r must be GCS path.', dest)
         gcsio.GcsIO().rename(src, dest)
     else:
         try:
             os.rename(src, dest)
         except OSError as err:
             raise IOError(err)
 def setUp(self):
     with open(testdata_util.get_full_file_path('Y.vcf.bgz'),
               mode='rb') as file_to_read:
         data = file_to_read.readlines()
     self._data = b''.join(data)
     self.client = gcsio_test.FakeGcsClient()
     self.gcs = gcsio.GcsIO(self.client)
     self._file_name = 'gs://bucket/test'
     bucket, name = gcsio.parse_gcs_path(self._file_name)
     self.client.objects.add_file(
         gcsio_test.FakeFile(bucket, name, self._data, 1))
Beispiel #9
0
 def rmdir(path):
     if path.startswith('gs://'):
         gcs = gcsio.GcsIO()
         if not path.endswith('/'):
             path += '/'
         # TODO: Threadpool?
         for entry in gcs.glob(path + '*'):
             gcs.delete(entry)
     else:
         try:
             shutil.rmtree(path)
         except OSError as err:
             raise IOError(err)
Beispiel #10
0
 def copytree(src, dest):
     if src.startswith('gs://'):
         if not dest.startswith('gs://'):
             raise ValueError('Destination %r must be GCS path.', dest)
         assert src.endswith('/'), src
         assert dest.endswith('/'), dest
         gcsio.GcsIO().copytree(src, dest)
     else:
         try:
             if os.path.exists(dest):
                 shutil.rmtree(dest)
             shutil.copytree(src, dest)
         except OSError as err:
             raise IOError(err)
Beispiel #11
0
  def open(path,
           mode,
           mime_type='application/octet-stream',
           compression_type=CompressionTypes.AUTO):
    if compression_type == CompressionTypes.AUTO:
      compression_type = CompressionTypes.detect_compression_type(path)
    elif not CompressionTypes.is_valid_compression_type(compression_type):
      raise TypeError('compression_type must be CompressionType object but '
                      'was %s' % type(compression_type))

    if path.startswith('gs://'):
      raw_file = gcsio.GcsIO().open(
          path,
          mode,
          mime_type=CompressionTypes.mime_type(compression_type, mime_type))
    else:
      raw_file = open(path, mode)

    if compression_type == CompressionTypes.UNCOMPRESSED:
      return raw_file
    else:
      return _CompressedFile(raw_file, compression_type=compression_type)
Beispiel #12
0
 def setUp(self):
     self.client = FakeGcsClient()
     self.gcs = gcsio.GcsIO(self.client)
Beispiel #13
0
 def glob(path, limit=None):
     if path.startswith('gs://'):
         return gcsio.GcsIO().glob(path, limit)
     else:
         files = glob.glob(path)
         return files[:limit]
Beispiel #14
0
 def exists(path):
     if path.startswith('gs://'):
         return gcsio.GcsIO().exists(path)
     else:
         return os.path.exists(path)
Beispiel #15
0
def glob_files(path):
    if path.startswith('gs://'):
        return gcsio.GcsIO().glob(path)
    else:
        return glob.glob(path)
Beispiel #16
0
def file_exists(path):
    """Returns whether the file exists."""
    if path.startswith('gs://'):
        return gcsio.GcsIO().exists(path)
    else:
        return os.path.exists(path)