Ejemplo n.º 1
0
 def get_filesystem(path):
     # type: (str) -> FileSystems
     """Get the correct filesystem for the specified path
 """
     try:
         path_scheme = FileSystems.get_scheme(path)
         systems = [
             fs for fs in FileSystem.get_all_subclasses()
             if fs.scheme() == path_scheme
         ]
         if len(systems) == 0:
             raise ValueError(
                 'Unable to get filesystem from specified path, please use the '
                 'correct path or ensure the required dependency is installed, '
                 'e.g., pip install apache_beam[gcp]. Path specified: %s' %
                 path)
         elif len(systems) == 1:
             # Pipeline options could come either from the Pipeline itself (using
             # direct runner), or via RuntimeValueProvider (other runners).
             options = (FileSystems._pipeline_options
                        or RuntimeValueProvider.runtime_options)
             return systems[0](pipeline_options=options)
         else:
             raise ValueError('Found more than one filesystem for path %s' %
                              path)
     except ValueError:
         raise
     except Exception as e:
         raise BeamIOError('Unable to get the Filesystem', {path: e})
Ejemplo n.º 2
0
 def get_filesystem(path):
     """Get the correct filesystem for the specified path
 """
     try:
         path_scheme = FileSystems.get_scheme(path)
         systems = [
             fs for fs in FileSystem.get_all_subclasses()
             if fs.scheme() == path_scheme
         ]
         if len(systems) == 0:
             raise ValueError('Unable to get the Filesystem for path %s' %
                              path)
         elif len(systems) == 1:
             # Pipeline options could come either from the Pipeline itself (using
             # direct runner), or via RuntimeValueProvider (other runners).
             options = (FileSystems._pipeline_options
                        or RuntimeValueProvider.runtime_options)
             return systems[0](pipeline_options=options)
         else:
             raise ValueError('Found more than one filesystem for path %s' %
                              path)
     except ValueError:
         raise
     except Exception as e:
         raise BeamIOError('Unable to get the Filesystem', {path: e})
Ejemplo n.º 3
0
 def _path_open(self, path, mode, mime_type='application/octet-stream',
                compression_type=CompressionTypes.AUTO):
   """Helper functions to open a file in the provided mode.
   """
   compression_type = FileSystem._get_compression_type(path, compression_type)
   raw_file = open(path, mode)
   if compression_type == CompressionTypes.UNCOMPRESSED:
     return raw_file
   return CompressedFile(raw_file, compression_type=compression_type)
Ejemplo n.º 4
0
 def _path_open(self, path, mode, mime_type='application/octet-stream',
                compression_type=CompressionTypes.AUTO):
   """Helper functions to open a file in the provided mode.
   """
   compression_type = FileSystem._get_compression_type(path, compression_type)
   mime_type = CompressionTypes.mime_type(compression_type, mime_type)
   raw_file = s3io.S3IO().open(path, mode, mime_type=mime_type)
   if compression_type == CompressionTypes.UNCOMPRESSED:
     return raw_file
   return CompressedFile(raw_file, compression_type=compression_type)
Ejemplo n.º 5
0
 def get_filesystem(path):
   """Get the correct filesystem for the specified path
   """
   try:
     path_scheme = FileSystems.get_scheme(path)
     systems = [fs for fs in FileSystem.get_all_subclasses()
                if fs.scheme() == path_scheme]
     if len(systems) == 0:
       raise ValueError('Unable to get the Filesystem for path %s' % path)
     elif len(systems) == 1:
       return systems[0]()
     else:
       raise ValueError('Found more than one filesystem for path %s' % path)
   except ValueError:
     raise
   except Exception as e:
     raise BeamIOError('Unable to get the Filesystem', {path: e})
Ejemplo n.º 6
0
 def get_filesystem(path):
   """Get the correct filesystem for the specified path
   """
   try:
     path_scheme = FileSystems.get_scheme(path)
     systems = [fs for fs in FileSystem.get_all_subclasses()
                if fs.scheme() == path_scheme]
     if len(systems) == 0:
       raise ValueError('Unable to get the Filesystem for path %s' % path)
     elif len(systems) == 1:
       # Pipeline options could come either from the Pipeline itself (using
       # direct runner), or via RuntimeValueProvider (other runners).
       options = (FileSystems._pipeline_options or
                  RuntimeValueProvider.runtime_options)
       return systems[0](pipeline_options=options)
     else:
       raise ValueError('Found more than one filesystem for path %s' % path)
   except ValueError:
     raise
   except Exception as e:
     raise BeamIOError('Unable to get the Filesystem', {path: e})
Ejemplo n.º 7
0
 def get_filesystem(path):
     """Get the correct filesystem for the specified path
 """
     try:
         path_scheme = FileSystems.get_scheme(path)
         systems = [
             fs for fs in FileSystem.get_all_subclasses()
             if fs.scheme() == path_scheme
         ]
         if len(systems) == 0:
             raise ValueError('Unable to get the Filesystem for path %s' %
                              path)
         elif len(systems) == 1:
             return systems[0]()
         else:
             raise ValueError('Found more than one filesystem for path %s' %
                              path)
     except ValueError:
         raise
     except Exception as e:
         raise BeamIOError('Unable to get the Filesystem', {path: e})