def remove_dbfs_file(dbfs_api): print("Removing DBFS files") try: file_exists = dbfs_api.get_status(DbfsPath("dbfs:/example_notebook.py")) dbfs_api.delete(DbfsPath("dbfs:/example_notebook.py"), False) except: pass
def test_mkdirs_rate_limited(self, dbfs_api): rate_limit_exception = get_rate_limit_exception() # Simulate 2 rate limit exceptions followed by a full successful operation exception_sequence = [rate_limit_exception, rate_limit_exception, None] dbfs_api.client.mkdirs = mock.Mock(side_effect=exception_sequence) # Should succeed dbfs_api.mkdirs(DbfsPath('dbfs:/test/mkdir')) files = dbfs_api.client.list(DbfsPath('dbfs:/test/mkdir')) assert len(files) == 0
def test_partial_delete_exception_message_parse_error(self, dbfs_api): message = "unexpected partial delete exception message" e_partial_delete = get_partial_delete_exception(message) dbfs_api.client.delete = mock.Mock(side_effect=[e_partial_delete, None]) dbfs_api.delete_retry_delay_millis = 1 # Should succeed dbfs_api.delete(DbfsPath('dbfs:/whatever-doesnt-matter'), recursive=True)
def from_json(cls, json): dbfs_path = DbfsPath.from_api_path(json['path']) # If JSON doesn't include modification_time data, replace it with None. modification_time = json[ 'modification_time'] if 'modification_time' in json else None return cls(dbfs_path, json['is_dir'], json['file_size'], modification_time)
def test_cp_recursive(self, local_dir): path = local_dir.strpath os.chdir(path) invoke_cli_runner(cli.cp_cli, ['-r', '.', DBFS_TEST_PATH]) assert_dbfs_file_exists(DbfsPath(DBFS_TEST_PATH).join(LOCAL_TEST_FILE)) assert_dbfs_file_exists(DbfsPath(DBFS_TEST_PATH).join(LOCAL_TEST_DIR)) assert_dbfs_file_exists( DbfsPath(DBFS_TEST_PATH).join(LOCAL_TEST_FILE_IN_DIR)) # Copy the data back to `temp-dir`. local_temp_dir = os.path.join(path, LOCAL_TEMP_DIR) invoke_cli_runner(cli.cp_cli, ['-r', DBFS_TEST_PATH, local_temp_dir]) assert_local_file_content( os.path.join(local_temp_dir, LOCAL_TEST_FILE), TEST_FILE_CONTENTS) assert_local_file_content( os.path.join(local_temp_dir, LOCAL_TEST_FILE_IN_DIR), TEST_FILE_CONTENTS)
def test_delete_with_rate_limit(self, dbfs_api): rate_limit_exception = get_rate_limit_exception() # Simulate a rate limit exception followed by a full successful delete exception_sequence = [rate_limit_exception, None] dbfs_api.client.delete = mock.Mock(side_effect=exception_sequence) dbfs_api.delete_retry_delay_millis = 1 # Should succeed dbfs_api.delete(DbfsPath('dbfs:/whatever-doesnt-matter'), recursive=True)
def test_partial_delete(self, dbfs_api): e_partial_delete = get_partial_delete_exception() # Simulate 3 partial deletes followed by a full successful delete exception_sequence = [ e_partial_delete, e_partial_delete, e_partial_delete, None ] dbfs_api.client.delete = mock.Mock(side_effect=exception_sequence) dbfs_api.delete_retry_delay_millis = 1 # Should succeed dbfs_api.delete(DbfsPath('dbfs:/whatever-doesnt-matter'), recursive=True)
def test_mkdirs_stop_retrying(self, dbfs_api): rate_limit_exception = get_rate_limit_exception() # Simulate 9 rate limit exceptions which will fail eventually exception_sequence = [ rate_limit_exception, rate_limit_exception, rate_limit_exception, rate_limit_exception, rate_limit_exception, rate_limit_exception, rate_limit_exception, rate_limit_exception, rate_limit_exception ] dbfs_api.client.mkdirs = mock.Mock(side_effect=exception_sequence) with pytest.raises(RateLimitException): dbfs_api.mkdirs(DbfsPath('dbfs:/test/mkdir')) assert dbfs_api.client.mkdirs.call_count == MAX_RETRY_ATTEMPTS
def test_partial_delete(self, dbfs_api): e_partial_delete = get_partial_delete_exception() e_temporarily_unavailable = get_temporarily_unavailable_exception() # Simulate partial deletes and 503 exceptions followed by a full successful delete exception_sequence = \ [e_temporarily_unavailable, e_partial_delete, e_partial_delete] + \ [e_temporarily_unavailable] * api.DELETE_MAX_CONSECUTIVE_503_RETRIES + \ [e_partial_delete, None] dbfs_api.client.delete = mock.Mock(side_effect=exception_sequence) dbfs_api.delete_retry_delay_millis = 1 # Should succeed dbfs_api.delete(DbfsPath('dbfs:/whatever-doesnt-matter'), recursive=True)
def ls_cli(l, absolute, dbfs_path): # NOQA """ List files in DBFS. """ if len(dbfs_path) == 0: dbfs_path = DbfsPath('dbfs:/') elif len(dbfs_path) == 1: dbfs_path = dbfs_path[0] else: error_and_quit('ls can take a maximum of one path.') files = list_files(dbfs_path) table = tabulate([f.to_row(is_long_form=l, is_absolute=absolute) for f in files], tablefmt='plain') click.echo(table)
def _upload_local_libraries(self, local_lib_objects): remote_lib_objects = [LibraryObject(llo.lib_type, self._get_hashed_path(llo.path)) for llo in local_lib_objects] transformed_remote_lib_objects = [LibraryObject(rlo.lib_type, DbfsPath(rlo.path)) for rlo in remote_lib_objects] upload_files = [llo_tuple for llo_tuple in zip(local_lib_objects, transformed_remote_lib_objects) if not self.dbfs_client.file_exists(llo_tuple[1].path)] for llo, rlo in upload_files: self.dbfs_client.put_file(llo.path, rlo.path, False) return remote_lib_objects
def test_partial_delete_service_unavailable(self, dbfs_api): e_partial_delete = get_partial_delete_exception() e_temporarily_unavailable = get_temporarily_unavailable_exception() # Simulate more than api.DELETE_MAX_CONSECUTIVE_503_ERRORS 503 errors that are not partial # deletes (error_code != PARTIAL_DELETE) exception_sequence = \ [e_partial_delete] + \ [e_temporarily_unavailable] * (api.DELETE_MAX_CONSECUTIVE_503_RETRIES + 1) + \ [e_partial_delete, None] dbfs_api.client.delete = mock.Mock(side_effect=exception_sequence) dbfs_api.delete_retry_delay_millis = 1 with pytest.raises(e_temporarily_unavailable.__class__) as thrown: dbfs_api.delete(DbfsPath('dbfs:/whatever-doesnt-matter'), recursive=True) # Should raise the same e_temporarily_unavailable exception instance assert thrown.value == e_temporarily_unavailable
def run(self, inputArgs: Namespace): if os.path.isabs(inputArgs.sourceFilePath): sourceFilePath = inputArgs.sourceFilePath else: sourceFilePath = os.getcwd() + os.sep + inputArgs.sourceFilePath self.__logger.info( f'Uploading {sourceFilePath} to {inputArgs.targetFilePath}') self.__dbfsApi.put_file( sourceFilePath, DbfsPath(inputArgs.targetFilePath), inputArgs.overwrite, ) self.__logger.info(f'File successfully uploaded')
def dbfs_file_exists(api_client, dbfs_path): """ Checks to determine whether a file exists. Args: api_client (ApiClient object): Object used for authenticating to the workspace dbfs_path (str): Path to check Returns: True if file exists on dbfs, False otherwise. """ try: DbfsApi(api_client).list_files(dbfs_path=DbfsPath(dbfs_path)) file_exists = True except: file_exists = False return file_exists
def test_is_valid_false(self): assert not DbfsPath.is_valid('/test') assert not DbfsPath.is_valid('test')
def _remove_test_file(self): self.dbfs_api_client.delete( dbfs_path=DbfsPath("dbfs:/databricks/init/random.sh"), recursive=False) print("removed test file")
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. from base64 import b64encode import os import requests import mock import pytest import databricks_cli.dbfs.api as api from databricks_cli.dbfs.dbfs_path import DbfsPath from databricks_cli.dbfs.exceptions import LocalFileExistsException TEST_DBFS_PATH = DbfsPath('dbfs:/test') TEST_FILE_JSON = {'path': '/test', 'is_dir': False, 'file_size': 1} TEST_FILE_INFO = api.FileInfo(TEST_DBFS_PATH, False, 1) def get_resource_does_not_exist_exception(): response = requests.Response() response._content = '{"error_code": "' + api.DbfsErrorCodes.RESOURCE_DOES_NOT_EXIST + '"}' # NOQA return requests.exceptions.HTTPError(response=response) class TestFileInfo(object): def test_to_row_not_long_form_not_absolute(self): file_info = api.FileInfo(TEST_DBFS_PATH, False, 1) row = file_info.to_row(is_long_form=False, is_absolute=False) assert len(row) == 1
def test_relpath(self): assert DbfsPath('dbfs:/test/a').relpath(TEST_DBFS_PATH) == 'a'
def _list_init_script_dir(self, srcPath="dbfs:/databricks/init"): print("Starting to list the legacy global init scripts folder") files = self.dbfs_api_client.list_files(dbfs_path=DbfsPath(srcPath)) file_list = [f.dbfs_path.absolute_path for f in files] return file_list
def cp_cli(recursive, overwrite, src, dst): """ Copy files to and from DBFS. Note that this function will fail if the src and dst are both on the local filesystem or if they are both DBFS paths. For non-recursive copies, if the dst is a directory, the file will be placed inside the directory. For example ``dbfs cp dbfs:/apple.txt .`` will create a file at `./apple.txt`. For recursive copies, files inside of the src directory will be copied inside the dst directory with the same name. If the dst path does not exist, a directory will be created. For example ``dbfs cp -r dbfs:/foo foo`` will create a directory foo and place the files ``dbfs:/foo/a`` at ``foo/a``. If ``foo/a`` already exists, the file will not be overriden unless the --overwrite flag is provided -- however, dbfs cp --recursive will continue to try and copy other files. """ # Copy to DBFS in this case if not DbfsPath.is_valid(src) and DbfsPath.is_valid(dst): if not os.path.exists(src): error_and_quit('The local file {} does not exist.'.format(src)) if not recursive: if os.path.isdir(src): error_and_quit(( 'The local file {} is a directory. You must provide --recursive' ).format(src)) copy_to_dbfs_non_recursive(src, DbfsPath(dst), overwrite) else: if not os.path.isdir(src): copy_to_dbfs_non_recursive(src, DbfsPath(dst), overwrite) return copy_to_dbfs_recursive(src, DbfsPath(dst), overwrite) # Copy from DBFS in this case elif DbfsPath.is_valid(src) and not DbfsPath.is_valid(dst): if not recursive: copy_from_dbfs_non_recursive(DbfsPath(src), dst, overwrite) else: dbfs_path_src = DbfsPath(src) if not get_status(dbfs_path_src).is_dir: copy_from_dbfs_non_recursive(dbfs_path_src, dst, overwrite) copy_from_dbfs_recursive(dbfs_path_src, dst, overwrite) elif not DbfsPath.is_valid(src) and not DbfsPath.is_valid(dst): error_and_quit( 'Both paths provided are from your local filesystem. ' 'To use this utility, one of the src or dst must be prefixed ' 'with dbfs:/') elif DbfsPath.is_valid(src) and DbfsPath.is_valid(dst): error_and_quit( 'Both paths provided are from the DBFS filesystem. ' 'To copy between the DBFS filesystem, you currently must copy the ' 'file from DBFS to your local filesystem and then back.') else: assert False, 'not reached'
def test_basename(self): assert DbfsPath('dbfs:/').basename == '' assert DbfsPath('dbfs:/test').basename == 'test' assert DbfsPath('dbfs:/test/').basename == 'test'
def test_is_root(self): assert DbfsPath('dbfs:/').is_root assert not DbfsPath('test', validate=False).is_root
def test_is_absolute_path(self): assert DbfsPath('dbfs:/').is_absolute_path assert not DbfsPath('test', validate=False).is_absolute_path
def from_json(cls, json): dbfs_path = DbfsPath.from_api_path(json['path']) return cls(dbfs_path, json['is_dir'], json['file_size'])
def cp(self, recursive, overwrite, src, dst, headers=None): if not DbfsPath.is_valid(src) and DbfsPath.is_valid(dst): if not os.path.exists(src): error_and_quit('The local file {} does not exist.'.format(src)) if not recursive: if os.path.isdir(src): error_and_quit( ('The local file {} is a directory. You must provide --recursive') .format(src)) self._copy_to_dbfs_non_recursive(src, DbfsPath(dst), overwrite, headers=headers) else: if not os.path.isdir(src): self._copy_to_dbfs_non_recursive(src, DbfsPath(dst), overwrite, headers=headers) return self._copy_to_dbfs_recursive(src, DbfsPath(dst), overwrite, headers=headers) # Copy from DBFS in this case elif DbfsPath.is_valid(src) and not DbfsPath.is_valid(dst): if not recursive: self._copy_from_dbfs_non_recursive(DbfsPath(src), dst, overwrite, headers=headers) else: dbfs_path_src = DbfsPath(src) if not self.get_status(dbfs_path_src, headers=headers).is_dir: self._copy_from_dbfs_non_recursive(dbfs_path_src, dst, overwrite, headers=headers) self._copy_from_dbfs_recursive(dbfs_path_src, dst, overwrite, headers=headers) elif not DbfsPath.is_valid(src) and not DbfsPath.is_valid(dst): error_and_quit('Both paths provided are from your local filesystem. ' 'To use this utility, one of the src or dst must be prefixed ' 'with dbfs:/') elif DbfsPath.is_valid(src) and DbfsPath.is_valid(dst): with TempDir() as temp_dir: # Always copy to <temp_dir>/temp since this will work no matter if it's a # recursive or a non-recursive copy. temp_path = temp_dir.path('temp') self.cp(recursive, True, src, temp_path) self.cp(recursive, overwrite, temp_path, dst) else: assert False, 'not reached'
def cp(self, recursive, overwrite, src, dst, headers=None): if not DbfsPath.is_valid(src) and DbfsPath.is_valid(dst): if not os.path.exists(src): error_and_quit('The local file {} does not exist.'.format(src)) if not recursive: if os.path.isdir(src): error_and_quit( ('The local file {} is a directory. You must provide --recursive') .format(src)) self._copy_to_dbfs_non_recursive(src, DbfsPath(dst), overwrite, headers=headers) else: if not os.path.isdir(src): self._copy_to_dbfs_non_recursive(src, DbfsPath(dst), overwrite, headers=headers) return self._copy_to_dbfs_recursive(src, DbfsPath(dst), overwrite, headers=headers) # Copy from DBFS in this case elif DbfsPath.is_valid(src) and not DbfsPath.is_valid(dst): if not recursive: self._copy_from_dbfs_non_recursive(DbfsPath(src), dst, overwrite, headers=headers) else: dbfs_path_src = DbfsPath(src) if not self.get_status(dbfs_path_src, headers=headers).is_dir: self._copy_from_dbfs_non_recursive(dbfs_path_src, dst, overwrite, headers=headers) self._copy_from_dbfs_recursive(dbfs_path_src, dst, overwrite, headers=headers) elif not DbfsPath.is_valid(src) and not DbfsPath.is_valid(dst): error_and_quit('Both paths provided are from your local filesystem. ' 'To use this utility, one of the src or dst must be prefixed ' 'with dbfs:/') elif DbfsPath.is_valid(src) and DbfsPath.is_valid(dst): error_and_quit('Both paths provided are from the DBFS filesystem. ' 'To copy between the DBFS filesystem, you currently must copy the ' 'file from DBFS to your local filesystem and then back.') else: assert False, 'not reached'
def test_eq(self): assert DbfsPath('dbfs:/') == DbfsPath('dbfs:/') assert DbfsPath('dbfs:/') != 'bad type'
def test_ls(self): assert_dbfs_file_exists(DbfsPath(DBFS_TEST_PATH))
def test_join(self): assert DbfsPath('dbfs:/test/a') == TEST_DBFS_PATH.join('a')
def test_cp_from_local(self, local_dir): path = local_dir.strpath invoke_cli_runner( cli.cp_cli, [os.path.join(path, LOCAL_TEST_FILE), DBFS_TEST_PATH]) assert_dbfs_file_exists(DbfsPath(DBFS_TEST_PATH).join(LOCAL_TEST_FILE))