def remove_dbfs_file(dbfs_api):
    print("Removing DBFS files")
    try:
        file_exists = dbfs_api.get_status(DbfsPath("dbfs:/example_notebook.py"))
        dbfs_api.delete(DbfsPath("dbfs:/example_notebook.py"), False)
    except:
        pass
Example #2
0
    def test_mkdirs_rate_limited(self, dbfs_api):
        rate_limit_exception = get_rate_limit_exception()
        # Simulate 2 rate limit exceptions followed by a full successful operation
        exception_sequence = [rate_limit_exception, rate_limit_exception, None]
        dbfs_api.client.mkdirs = mock.Mock(side_effect=exception_sequence)
        # Should succeed
        dbfs_api.mkdirs(DbfsPath('dbfs:/test/mkdir'))
        files = dbfs_api.client.list(DbfsPath('dbfs:/test/mkdir'))

        assert len(files) == 0
Example #3
0
 def test_partial_delete_exception_message_parse_error(self, dbfs_api):
     message = "unexpected partial delete exception message"
     e_partial_delete = get_partial_delete_exception(message)
     dbfs_api.client.delete = mock.Mock(side_effect=[e_partial_delete, None])
     dbfs_api.delete_retry_delay_millis = 1
     # Should succeed
     dbfs_api.delete(DbfsPath('dbfs:/whatever-doesnt-matter'), recursive=True)
Example #4
0
 def from_json(cls, json):
     dbfs_path = DbfsPath.from_api_path(json['path'])
     # If JSON doesn't include modification_time data, replace it with None.
     modification_time = json[
         'modification_time'] if 'modification_time' in json else None
     return cls(dbfs_path, json['is_dir'], json['file_size'],
                modification_time)
Example #5
0
    def test_cp_recursive(self, local_dir):
        path = local_dir.strpath
        os.chdir(path)
        invoke_cli_runner(cli.cp_cli, ['-r', '.', DBFS_TEST_PATH])
        assert_dbfs_file_exists(DbfsPath(DBFS_TEST_PATH).join(LOCAL_TEST_FILE))
        assert_dbfs_file_exists(DbfsPath(DBFS_TEST_PATH).join(LOCAL_TEST_DIR))
        assert_dbfs_file_exists(
            DbfsPath(DBFS_TEST_PATH).join(LOCAL_TEST_FILE_IN_DIR))

        # Copy the data back to `temp-dir`.
        local_temp_dir = os.path.join(path, LOCAL_TEMP_DIR)
        invoke_cli_runner(cli.cp_cli, ['-r', DBFS_TEST_PATH, local_temp_dir])
        assert_local_file_content(
            os.path.join(local_temp_dir, LOCAL_TEST_FILE), TEST_FILE_CONTENTS)
        assert_local_file_content(
            os.path.join(local_temp_dir, LOCAL_TEST_FILE_IN_DIR),
            TEST_FILE_CONTENTS)
Example #6
0
 def test_delete_with_rate_limit(self, dbfs_api):
     rate_limit_exception = get_rate_limit_exception()
     # Simulate a rate limit exception followed by a full successful delete
     exception_sequence = [rate_limit_exception, None]
     dbfs_api.client.delete = mock.Mock(side_effect=exception_sequence)
     dbfs_api.delete_retry_delay_millis = 1
     # Should succeed
     dbfs_api.delete(DbfsPath('dbfs:/whatever-doesnt-matter'),
                     recursive=True)
Example #7
0
 def test_partial_delete(self, dbfs_api):
     e_partial_delete = get_partial_delete_exception()
     # Simulate 3 partial deletes followed by a full successful delete
     exception_sequence = [
         e_partial_delete, e_partial_delete, e_partial_delete, None
     ]
     dbfs_api.client.delete = mock.Mock(side_effect=exception_sequence)
     dbfs_api.delete_retry_delay_millis = 1
     # Should succeed
     dbfs_api.delete(DbfsPath('dbfs:/whatever-doesnt-matter'),
                     recursive=True)
Example #8
0
 def test_mkdirs_stop_retrying(self, dbfs_api):
     rate_limit_exception = get_rate_limit_exception()
     # Simulate 9 rate limit exceptions which will fail eventually
     exception_sequence = [
         rate_limit_exception, rate_limit_exception, rate_limit_exception,
         rate_limit_exception, rate_limit_exception, rate_limit_exception,
         rate_limit_exception, rate_limit_exception, rate_limit_exception
     ]
     dbfs_api.client.mkdirs = mock.Mock(side_effect=exception_sequence)
     with pytest.raises(RateLimitException):
         dbfs_api.mkdirs(DbfsPath('dbfs:/test/mkdir'))
     assert dbfs_api.client.mkdirs.call_count == MAX_RETRY_ATTEMPTS
Example #9
0
 def test_partial_delete(self, dbfs_api):
     e_partial_delete = get_partial_delete_exception()
     e_temporarily_unavailable = get_temporarily_unavailable_exception()
     # Simulate partial deletes and 503 exceptions followed by a full successful delete
     exception_sequence = \
         [e_temporarily_unavailable, e_partial_delete, e_partial_delete] + \
         [e_temporarily_unavailable] * api.DELETE_MAX_CONSECUTIVE_503_RETRIES + \
         [e_partial_delete, None]
     dbfs_api.client.delete = mock.Mock(side_effect=exception_sequence)
     dbfs_api.delete_retry_delay_millis = 1
     # Should succeed
     dbfs_api.delete(DbfsPath('dbfs:/whatever-doesnt-matter'), recursive=True)
Example #10
0
def ls_cli(l, absolute, dbfs_path): #  NOQA
    """
    List files in DBFS.
    """
    if len(dbfs_path) == 0:
        dbfs_path = DbfsPath('dbfs:/')
    elif len(dbfs_path) == 1:
        dbfs_path = dbfs_path[0]
    else:
        error_and_quit('ls can take a maximum of one path.')
    files = list_files(dbfs_path)
    table = tabulate([f.to_row(is_long_form=l, is_absolute=absolute) for f in files],
                     tablefmt='plain')
    click.echo(table)
Example #11
0
    def _upload_local_libraries(self, local_lib_objects):
        remote_lib_objects = [LibraryObject(llo.lib_type, self._get_hashed_path(llo.path))
                              for llo in local_lib_objects]

        transformed_remote_lib_objects = [LibraryObject(rlo.lib_type, DbfsPath(rlo.path))
                                          for rlo in remote_lib_objects]
        upload_files = [llo_tuple for llo_tuple in
                        zip(local_lib_objects, transformed_remote_lib_objects)
                        if not self.dbfs_client.file_exists(llo_tuple[1].path)]

        for llo, rlo in upload_files:
            self.dbfs_client.put_file(llo.path, rlo.path, False)

        return remote_lib_objects
Example #12
0
 def test_partial_delete_service_unavailable(self, dbfs_api):
     e_partial_delete = get_partial_delete_exception()
     e_temporarily_unavailable = get_temporarily_unavailable_exception()
     # Simulate more than api.DELETE_MAX_CONSECUTIVE_503_ERRORS 503 errors that are not partial
     # deletes (error_code != PARTIAL_DELETE)
     exception_sequence = \
         [e_partial_delete] + \
         [e_temporarily_unavailable] * (api.DELETE_MAX_CONSECUTIVE_503_RETRIES + 1) + \
         [e_partial_delete, None]
     dbfs_api.client.delete = mock.Mock(side_effect=exception_sequence)
     dbfs_api.delete_retry_delay_millis = 1
     with pytest.raises(e_temporarily_unavailable.__class__) as thrown:
         dbfs_api.delete(DbfsPath('dbfs:/whatever-doesnt-matter'), recursive=True)
     # Should raise the same e_temporarily_unavailable exception instance
     assert thrown.value == e_temporarily_unavailable
    def run(self, inputArgs: Namespace):
        if os.path.isabs(inputArgs.sourceFilePath):
            sourceFilePath = inputArgs.sourceFilePath
        else:
            sourceFilePath = os.getcwd() + os.sep + inputArgs.sourceFilePath

        self.__logger.info(
            f'Uploading {sourceFilePath} to {inputArgs.targetFilePath}')

        self.__dbfsApi.put_file(
            sourceFilePath,
            DbfsPath(inputArgs.targetFilePath),
            inputArgs.overwrite,
        )

        self.__logger.info(f'File successfully uploaded')
Example #14
0
def dbfs_file_exists(api_client, dbfs_path):
    """
    Checks to determine whether a file exists.

    Args:
        api_client (ApiClient object): Object used for authenticating to the workspace
        dbfs_path (str): Path to check
    
    Returns:
        True if file exists on dbfs, False otherwise.
    """
    try:
        DbfsApi(api_client).list_files(dbfs_path=DbfsPath(dbfs_path))
        file_exists = True
    except:
        file_exists = False
    return file_exists
 def test_is_valid_false(self):
     assert not DbfsPath.is_valid('/test')
     assert not DbfsPath.is_valid('test')
Example #16
0
 def _remove_test_file(self):
     self.dbfs_api_client.delete(
         dbfs_path=DbfsPath("dbfs:/databricks/init/random.sh"),
         recursive=False)
     print("removed test file")
Example #17
0
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from base64 import b64encode

import os
import requests
import mock
import pytest

import databricks_cli.dbfs.api as api
from databricks_cli.dbfs.dbfs_path import DbfsPath
from databricks_cli.dbfs.exceptions import LocalFileExistsException

TEST_DBFS_PATH = DbfsPath('dbfs:/test')
TEST_FILE_JSON = {'path': '/test', 'is_dir': False, 'file_size': 1}
TEST_FILE_INFO = api.FileInfo(TEST_DBFS_PATH, False, 1)


def get_resource_does_not_exist_exception():
    response = requests.Response()
    response._content = '{"error_code": "' + api.DbfsErrorCodes.RESOURCE_DOES_NOT_EXIST + '"}'  #  NOQA
    return requests.exceptions.HTTPError(response=response)


class TestFileInfo(object):
    def test_to_row_not_long_form_not_absolute(self):
        file_info = api.FileInfo(TEST_DBFS_PATH, False, 1)
        row = file_info.to_row(is_long_form=False, is_absolute=False)
        assert len(row) == 1
 def test_relpath(self):
     assert DbfsPath('dbfs:/test/a').relpath(TEST_DBFS_PATH) == 'a'
Example #19
0
 def _list_init_script_dir(self, srcPath="dbfs:/databricks/init"):
     print("Starting to list the legacy global init scripts folder")
     files = self.dbfs_api_client.list_files(dbfs_path=DbfsPath(srcPath))
     file_list = [f.dbfs_path.absolute_path for f in files]
     return file_list
Example #20
0
def cp_cli(recursive, overwrite, src, dst):
    """
    Copy files to and from DBFS.

    Note that this function will fail if the src and dst are both on the local filesystem
    or if they are both DBFS paths.

    For non-recursive copies, if the dst is a directory, the file will be placed inside the
    directory. For example ``dbfs cp dbfs:/apple.txt .`` will create a file at `./apple.txt`.

    For recursive copies, files inside of the src directory will be copied inside the dst directory
    with the same name. If the dst path does not exist, a directory will be created. For example
    ``dbfs cp -r dbfs:/foo foo`` will create a directory foo and place the files ``dbfs:/foo/a`` at
    ``foo/a``. If ``foo/a`` already exists, the file will not be overriden unless the --overwrite
    flag is provided -- however, dbfs cp --recursive will continue to try and copy other files.
    """
    # Copy to DBFS in this case
    if not DbfsPath.is_valid(src) and DbfsPath.is_valid(dst):
        if not os.path.exists(src):
            error_and_quit('The local file {} does not exist.'.format(src))
        if not recursive:
            if os.path.isdir(src):
                error_and_quit((
                    'The local file {} is a directory. You must provide --recursive'
                ).format(src))
            copy_to_dbfs_non_recursive(src, DbfsPath(dst), overwrite)
        else:
            if not os.path.isdir(src):
                copy_to_dbfs_non_recursive(src, DbfsPath(dst), overwrite)
                return
            copy_to_dbfs_recursive(src, DbfsPath(dst), overwrite)
    # Copy from DBFS in this case
    elif DbfsPath.is_valid(src) and not DbfsPath.is_valid(dst):
        if not recursive:
            copy_from_dbfs_non_recursive(DbfsPath(src), dst, overwrite)
        else:
            dbfs_path_src = DbfsPath(src)
            if not get_status(dbfs_path_src).is_dir:
                copy_from_dbfs_non_recursive(dbfs_path_src, dst, overwrite)
            copy_from_dbfs_recursive(dbfs_path_src, dst, overwrite)
    elif not DbfsPath.is_valid(src) and not DbfsPath.is_valid(dst):
        error_and_quit(
            'Both paths provided are from your local filesystem. '
            'To use this utility, one of the src or dst must be prefixed '
            'with dbfs:/')
    elif DbfsPath.is_valid(src) and DbfsPath.is_valid(dst):
        error_and_quit(
            'Both paths provided are from the DBFS filesystem. '
            'To copy between the DBFS filesystem, you currently must copy the '
            'file from DBFS to your local filesystem and then back.')
    else:
        assert False, 'not reached'
 def test_basename(self):
     assert DbfsPath('dbfs:/').basename == ''
     assert DbfsPath('dbfs:/test').basename == 'test'
     assert DbfsPath('dbfs:/test/').basename == 'test'
 def test_is_root(self):
     assert DbfsPath('dbfs:/').is_root
     assert not DbfsPath('test', validate=False).is_root
 def test_is_absolute_path(self):
     assert DbfsPath('dbfs:/').is_absolute_path
     assert not DbfsPath('test', validate=False).is_absolute_path
Example #24
0
 def from_json(cls, json):
     dbfs_path = DbfsPath.from_api_path(json['path'])
     return cls(dbfs_path, json['is_dir'], json['file_size'])
Example #25
0
 def cp(self, recursive, overwrite, src, dst, headers=None):
     if not DbfsPath.is_valid(src) and DbfsPath.is_valid(dst):
         if not os.path.exists(src):
             error_and_quit('The local file {} does not exist.'.format(src))
         if not recursive:
             if os.path.isdir(src):
                 error_and_quit(
                     ('The local file {} is a directory. You must provide --recursive')
                     .format(src))
             self._copy_to_dbfs_non_recursive(src, DbfsPath(dst), overwrite, headers=headers)
         else:
             if not os.path.isdir(src):
                 self._copy_to_dbfs_non_recursive(src, DbfsPath(dst), overwrite, headers=headers)
                 return
             self._copy_to_dbfs_recursive(src, DbfsPath(dst), overwrite, headers=headers)
     # Copy from DBFS in this case
     elif DbfsPath.is_valid(src) and not DbfsPath.is_valid(dst):
         if not recursive:
             self._copy_from_dbfs_non_recursive(DbfsPath(src), dst, overwrite, headers=headers)
         else:
             dbfs_path_src = DbfsPath(src)
             if not self.get_status(dbfs_path_src, headers=headers).is_dir:
                 self._copy_from_dbfs_non_recursive(dbfs_path_src, dst, overwrite,
                                                    headers=headers)
             self._copy_from_dbfs_recursive(dbfs_path_src, dst, overwrite, headers=headers)
     elif not DbfsPath.is_valid(src) and not DbfsPath.is_valid(dst):
         error_and_quit('Both paths provided are from your local filesystem. '
                        'To use this utility, one of the src or dst must be prefixed '
                        'with dbfs:/')
     elif DbfsPath.is_valid(src) and DbfsPath.is_valid(dst):
         with TempDir() as temp_dir:
             # Always copy to <temp_dir>/temp since this will work no matter if it's a
             # recursive or a non-recursive copy.
             temp_path = temp_dir.path('temp')
             self.cp(recursive, True, src, temp_path)
             self.cp(recursive, overwrite, temp_path, dst)
     else:
         assert False, 'not reached'
Example #26
0
 def cp(self, recursive, overwrite, src, dst, headers=None):
     if not DbfsPath.is_valid(src) and DbfsPath.is_valid(dst):
         if not os.path.exists(src):
             error_and_quit('The local file {} does not exist.'.format(src))
         if not recursive:
             if os.path.isdir(src):
                 error_and_quit(
                     ('The local file {} is a directory. You must provide --recursive')
                     .format(src))
             self._copy_to_dbfs_non_recursive(src, DbfsPath(dst), overwrite, headers=headers)
         else:
             if not os.path.isdir(src):
                 self._copy_to_dbfs_non_recursive(src, DbfsPath(dst), overwrite, headers=headers)
                 return
             self._copy_to_dbfs_recursive(src, DbfsPath(dst), overwrite, headers=headers)
     # Copy from DBFS in this case
     elif DbfsPath.is_valid(src) and not DbfsPath.is_valid(dst):
         if not recursive:
             self._copy_from_dbfs_non_recursive(DbfsPath(src), dst, overwrite, headers=headers)
         else:
             dbfs_path_src = DbfsPath(src)
             if not self.get_status(dbfs_path_src, headers=headers).is_dir:
                 self._copy_from_dbfs_non_recursive(dbfs_path_src, dst, overwrite,
                                                    headers=headers)
             self._copy_from_dbfs_recursive(dbfs_path_src, dst, overwrite, headers=headers)
     elif not DbfsPath.is_valid(src) and not DbfsPath.is_valid(dst):
         error_and_quit('Both paths provided are from your local filesystem. '
                        'To use this utility, one of the src or dst must be prefixed '
                        'with dbfs:/')
     elif DbfsPath.is_valid(src) and DbfsPath.is_valid(dst):
         error_and_quit('Both paths provided are from the DBFS filesystem. '
                        'To copy between the DBFS filesystem, you currently must copy the '
                        'file from DBFS to your local filesystem and then back.')
     else:
         assert False, 'not reached'
 def test_eq(self):
     assert DbfsPath('dbfs:/') == DbfsPath('dbfs:/')
     assert DbfsPath('dbfs:/') != 'bad type'
Example #28
0
 def test_ls(self):
     assert_dbfs_file_exists(DbfsPath(DBFS_TEST_PATH))
 def test_join(self):
     assert DbfsPath('dbfs:/test/a') == TEST_DBFS_PATH.join('a')
Example #30
0
 def test_cp_from_local(self, local_dir):
     path = local_dir.strpath
     invoke_cli_runner(
         cli.cp_cli, [os.path.join(path, LOCAL_TEST_FILE), DBFS_TEST_PATH])
     assert_dbfs_file_exists(DbfsPath(DBFS_TEST_PATH).join(LOCAL_TEST_FILE))