def download_if_needed(uri, download_dir, fs=None): """Download a file into a directory if it's remote. If uri is local, there is no need to download the file. Args: uri: (string) URI of file download_dir: (string) local directory to download file into fs: Optional FileSystem to use. Returns: (string) path to local file Raises: NotReadableError if URI cannot be read from """ if uri is None: return None if not fs: fs = FileSystem.get_file_system(uri, 'r') path = get_local_path(uri, download_dir, fs=fs) make_dir(path, use_dirname=True) if path != uri: log.debug('Downloading {} to {}'.format(uri, path)) fs.copy_from(uri, path) return path
def list_paths(uri, ext='', fs=None): if uri is None: return None if not fs: fs = FileSystem.get_file_system(uri, 'r') return fs.list_paths(uri, ext=ext)
def test_sync_from_dir_noop_local(self): path = os.path.join(self.temp_dir.name, 'lorem', 'ipsum.txt') src = os.path.join(self.temp_dir.name, 'lorem') make_dir(src, check_empty=False) fs = FileSystem.get_file_system(src, 'r') fs.write_bytes(path, bytes([0x00, 0x01])) sync_from_dir(src, src, delete=True) self.assertEqual(len(list_paths(src)), 1)
def test_last_modified(self): path = os.path.join(self.temp_dir.name, 'lorem', 'ipsum1.txt') directory = os.path.dirname(path) make_dir(directory, check_empty=False) fs = FileSystem.get_file_system(path, 'r') str_to_file(self.lorem, path) stamp = fs.last_modified(path) self.assertTrue(isinstance(stamp, datetime.datetime))
def test_sync_to_dir_local(self): path = os.path.join(self.temp_dir.name, 'lorem', 'ipsum.txt') src = os.path.dirname(path) dst = os.path.join(self.temp_dir.name, 'xxx') make_dir(src, check_empty=False) make_dir(dst, check_empty=False) fs = FileSystem.get_file_system(path, 'r') fs.write_bytes(path, bytes([0x00, 0x01])) sync_to_dir(src, dst, delete=True) self.assertEqual(len(list_paths(dst)), 1)
def test_bytes_local(self): path = os.path.join(self.temp_dir.name, 'lorem', 'ipsum.txt') directory = os.path.dirname(path) make_dir(directory, check_empty=False) expected = bytes([0x00, 0x01, 0x02]) fs = FileSystem.get_file_system(path, 'r') fs.write_bytes(path, expected) actual = fs.read_bytes(path) self.assertEqual(actual, expected)
def test_last_modified_s3(self): path = os.path.join(self.temp_dir.name, 'lorem', 'ipsum1.txt') s3_path = 's3://{}/lorem1.txt'.format(self.bucket_name) directory = os.path.dirname(path) make_dir(directory, check_empty=False) fs = FileSystem.get_file_system(s3_path, 'r') str_to_file(self.lorem, path) upload_or_copy(path, s3_path) stamp = fs.last_modified(s3_path) self.assertTrue(isinstance(stamp, datetime.datetime))
def test_file_exists(self): fs = FileSystem.get_file_system(self.temp_dir.name, 'r') path1 = os.path.join(self.temp_dir.name, 'lorem', 'ipsum.txt') dir1 = os.path.dirname(path1) make_dir(dir1, check_empty=False) str_to_file(self.lorem, path1) self.assertTrue(fs.file_exists(dir1, include_dir=True)) self.assertTrue(fs.file_exists(path1, include_dir=False)) self.assertFalse(fs.file_exists(dir1, include_dir=False)) self.assertFalse( fs.file_exists(dir1 + 'NOTPOSSIBLE', include_dir=False))
def str_to_file(content_str, uri, fs=None): """Writes string to text file. Args: content_str: string to write uri: (string) URI of file to write fs: Optional FileSystem to use Raise: NotWritableError if file_uri cannot be written """ if not fs: fs = FileSystem.get_file_system(uri, 'r') return fs.write_str(uri, content_str)
def file_to_str(uri, fs=None): """Download contents of text file into a string. Args: uri: (string) URI of file fs: Optional FileSystem to use Returns: (string) with contents of text file Raises: NotReadableError if URI cannot be read from """ if not fs: fs = FileSystem.get_file_system(uri, 'r') return fs.read_str(uri)
def sync_from_dir(src_dir_uri, dest_dir_uri, delete=False, fs=None): """Synchronize a local or remote directory to a local directory. Transfers files from source to destination directories so that the destination has all the source files. If delete is True, also delete files in the destination to match those in the source directory. Args: src_dir_uri: (string) URI of source directory dest_dir_uri: (string) URI of local destination directory delete: (bool) fs: Optional FileSystem to use """ if not fs: fs = FileSystem.get_file_system(src_dir_uri, 'r') fs.sync_from_dir(src_dir_uri, dest_dir_uri, delete=delete)
def get_local_path(uri, download_dir, fs=None): """Convert a URI into a corresponding local path. If a uri is local, return it. If it's remote, we generate a path for it within download_dir. For an S3 path of form s3://<bucket>/<key>, the path is <download_dir>/s3/<bucket>/<key>. Args: uri: (string) URI of file download_dir: (string) path to directory fs: Optional FileSystem to use Returns: (string) a local path """ if uri is None: return None if not fs: fs = FileSystem.get_file_system(uri, 'r') path = fs.local_path(uri, download_dir) return path
def upload_or_copy(src_path, dst_uri, fs=None): """Upload a file if the destination is remote. If dst_uri is local, the file is copied. Args: src_path: (string) path to source file dst_uri: (string) URI of destination for file fs: Optional FileSystem to use Raises: NotWritableError if URI cannot be written to """ if dst_uri is None: return if not (os.path.isfile(src_path) or os.path.isdir(src_path)): raise Exception('{} does not exist.'.format(src_path)) if not src_path == dst_uri: log.info('Uploading {} to {}'.format(src_path, dst_uri)) if not fs: fs = FileSystem.get_file_system(dst_uri, 'w') fs.copy_to(src_path, dst_uri)
def test_write_bytes_http(self): uri = 'http://localhost/' fs = FileSystem.get_file_system(uri, 'r') self.assertRaises(NotWritableError, lambda: fs.write_bytes(uri, bytes([0x00, 0x01])))
def test_last_modified_http(self): uri = 'http://localhost/' fs = FileSystem.get_file_system(uri, 'r') self.assertEqual(fs.last_modified(uri), None)
def test_bytes_local_false(self): path = os.path.join(self.temp_dir.name, 'xxx') fs = FileSystem.get_file_system(path, 'r') self.assertRaises(NotReadableError, lambda: fs.read_bytes(path))
def file_exists(uri, fs=None, include_dir=True): if not fs: fs = FileSystem.get_file_system(uri, 'r') return fs.file_exists(uri, include_dir)