Ejemplo n.º 1
0
 def _path_open(self, path, mode, mime_type='application/octet-stream',
                compression_type=CompressionTypes.AUTO):
   """Helper functions to open a file in the provided mode.
   """
   compression_type = FileSystem._get_compression_type(path, compression_type)
   mime_type = CompressionTypes.mime_type(compression_type, mime_type)
   raw_file = s3io.S3IO().open(path, mode, mime_type=mime_type)
   if compression_type == CompressionTypes.UNCOMPRESSED:
     return raw_file
   return CompressedFile(raw_file, compression_type=compression_type)
Ejemplo n.º 2
0
    def setUp(self):

        # These tests can be run locally against a mock S3 client, or as integration
        # tests against the real S3 client.
        self.USE_MOCK = True

        # If you're running integration tests with S3, set this variable to be an
        # s3 path that you have access to where test data can be written. If you're
        # just running tests against the mock, this can be any s3 path. It should
        # end with a '/'.
        self.TEST_DATA_PATH = 's3://random-data-sets/beam_tests/'

        if self.USE_MOCK:
            self.client = fake_client.FakeS3Client()
            test_data_bucket, _ = s3io.parse_s3_path(self.TEST_DATA_PATH)
            self.client.known_buckets.add(test_data_bucket)
            self.aws = s3io.S3IO(self.client)

        else:
            self.aws = s3io.S3IO()
            self.client = self.aws.client
Ejemplo n.º 3
0
    def exists(self, path):
        """Check if the provided path exists on the FileSystem.

    Args:
      path: string path that needs to be checked.

    Returns: boolean flag indicating if path exists
    """
        try:
            return s3io.S3IO(options=self._options).exists(path)
        except Exception as e:  # pylint: disable=broad-except
            raise BeamIOError("exists() operation failed", {path: e})
Ejemplo n.º 4
0
  def delete(self, paths):
    """Deletes files or directories at the provided paths.
    Directories will be deleted recursively.

    Args:
      paths: list of paths that give the file objects to be deleted
    """
    results = s3io.S3IO().delete_paths(paths)
    exceptions = {path: error for (path, error) in results.items()
                  if error is not None}
    if exceptions:
      raise BeamIOError("Delete operation failed", exceptions)
Ejemplo n.º 5
0
    def last_updated(self, path):
        """Get UNIX Epoch time in seconds on the FileSystem.

    Args:
      path: string path of file.

    Returns: float UNIX Epoch time

    Raises:
      ``BeamIOError``: if path doesn't exist.
    """
        try:
            return s3io.S3IO(options=self._options).last_updated(path)
        except Exception as e:  # pylint: disable=broad-except
            raise BeamIOError("last_updated operation failed", {path: e})
Ejemplo n.º 6
0
    def size(self, path):
        """Get size of path on the FileSystem.

    Args:
      path: string path in question.

    Returns: int size of path according to the FileSystem.

    Raises:
      ``BeamIOError``: if path doesn't exist.
    """
        try:
            return s3io.S3IO(options=self._options).size(path)
        except Exception as e:  # pylint: disable=broad-except
            raise BeamIOError("size() operation failed", {path: e})
Ejemplo n.º 7
0
    def copy(self, source_file_names, destination_file_names):
        """Recursively copy the file tree from the source to the destination

    Args:
      source_file_names: list of source file objects that needs to be copied
      destination_file_names: list of destination of the new object

    Raises:
      ``BeamIOError``: if any of the copy operations fail
    """
        if not len(source_file_names) == len(destination_file_names):
            message = 'Unable to copy unequal number of sources and destinations'
            raise BeamIOError(message)
        src_dest_pairs = list(zip(source_file_names, destination_file_names))
        return s3io.S3IO(options=self._options).copy_paths(src_dest_pairs)
Ejemplo n.º 8
0
    def checksum(self, path):
        """Fetch checksum metadata of a file on the
    :class:`~apache_beam.io.filesystem.FileSystem`.

    Args:
      path: string path of a file.

    Returns: string containing checksum

    Raises:
      ``BeamIOError``: if path isn't a file or doesn't exist.
    """
        try:
            return s3io.S3IO(options=self._options).checksum(path)
        except Exception as e:  # pylint: disable=broad-except
            raise BeamIOError("Checksum operation failed", {path: e})
Ejemplo n.º 9
0
    def metadata(self, path):
        """Fetch metadata fields of a file on the FileSystem.

    Args:
      path: string path of a file.

    Returns:
      :class:`~apache_beam.io.filesystem.FileMetadata`.

    Raises:
      ``BeamIOError``: if path isn't a file or doesn't exist.
    """
        try:
            file_metadata = s3io.S3IO(options=self._options)._status(path)
            return FileMetadata(path, file_metadata['size'],
                                file_metadata['last_updated'])
        except Exception as e:  # pylint: disable=broad-except
            raise BeamIOError("Metadata operation failed", {path: e})
Ejemplo n.º 10
0
    def rename(self, source_file_names, destination_file_names):
        """Rename the files at the source list to the destination list.
    Source and destination lists should be of the same size.

    Args:
      source_file_names: List of file paths that need to be moved
      destination_file_names: List of destination_file_names for the files

    Raises:
      ``BeamIOError``: if any of the rename operations fail
    """
        if not len(source_file_names) == len(destination_file_names):
            message = 'Unable to rename unequal number of sources and destinations'
            raise BeamIOError(message)
        src_dest_pairs = list(zip(source_file_names, destination_file_names))
        results = s3io.S3IO(options=self._options).rename_files(src_dest_pairs)
        exceptions = {(src, dest): error
                      for (src, dest, error) in results if error is not None}
        if exceptions:
            raise BeamIOError("Rename operation failed", exceptions)
Ejemplo n.º 11
0
  def _list(self, dir_or_prefix):
    """List files in a location.

    Listing is non-recursive, for filesystems that support directories.

    Args:
      dir_or_prefix: (string) A directory or location prefix (for filesystems
        that don't have directories).

    Returns:
      Generator of ``FileMetadata`` objects.

    Raises:
      ``BeamIOError``: if listing fails, but not if no files were found.
    """
    try:
      for path, size in iteritems(s3io.S3IO().list_prefix(dir_or_prefix)):
        yield FileMetadata(path, size)
    except Exception as e:  # pylint: disable=broad-except
      raise BeamIOError("List operation failed", {dir_or_prefix: e})