Beispiel #1
0
def export_cli(tag, dry_run, notebook_path, delete, git_ssh_url, api_client: ApiClient, hcl, pattern_matches):
    if hcl:
        log.debug("this if debug")
        service = WorkspaceService(api_client)
        files = get_workspace_notebooks_recursive(service, notebook_path)
        with GitExportHandler(git_ssh_url, "notebooks", delete_not_found=delete, dry_run=dry_run, tag=tag) as gh:
            for file in files:
                identifier = normalize_identifier(f"databricks_notebook-{file.path}")
                content = get_content(service, file.path)
                if content is None:
                    continue
                notebook_resource_data = {
                    "@expr:content": f'filebase64("{identifier}")',
                    "path": file.path,
                    "overwrite": True,
                    "mkdirs": True,
                    "language": file.language,
                    "format": "SOURCE",
                }
                name = "databricks_notebook"
                notebook_file_hcl = create_resource_from_dict(name, identifier, notebook_resource_data, False)
                processed_hcl_file = create_hcl_file(file.path, api_client.url, notebook_resource_data,
                                                     notebook_file_hcl)
                gh.add_file(f"{identifier}.tf", processed_hcl_file)
                gh.add_file(f"files/{identifier}", content)
                hcl_errors = validate_hcl(notebook_file_hcl)
                if len(hcl_errors) > 0:
                    log.error(f"Identified error in the following HCL Config: {notebook_file_hcl}")
                    log.error(hcl_errors)
class WorkspaceApi(object):
    def __init__(self, api_client):
        self.client = WorkspaceService(api_client)

    def get_status(self, workspace_path):
        return WorkspaceFileInfo.from_json(
            self.client.get_status(workspace_path))

    def list_objects(self, workspace_path):
        response = self.client.list(workspace_path)
        # This case is necessary when we list an empty dir in the workspace.
        # TODO(andrewmchen): We should make our API respond with a json with 'objects' field even
        # in this case.
        if 'objects' not in response:
            return []
        objects = response['objects']
        return [WorkspaceFileInfo.from_json(f) for f in objects]

    def mkdirs(self, workspace_path):
        self.client.mkdirs(workspace_path)

    def import_workspace(self, source_path, target_path, language, fmt,
                         is_overwrite):
        with open(source_path, 'rb') as f:
            # import_workspace must take content that is typed str.
            content = b64encode(f.read()).decode()
            self.client.import_workspace(target_path, fmt, language, content,
                                         is_overwrite)

    def export_workspace(self, source_path, target_path, fmt, is_overwrite):
        """
        Faithfully exports the source_path to the target_path. Does not
        attempt to do any munging of the target_path if it is a directory.
        """
        if os.path.exists(target_path) and not is_overwrite:
            raise LocalFileExistsException(
                'Target {} already exists.'.format(target_path))
        output = self.client.export_workspace(source_path, fmt)
        content = output['content']
        # Will overwrite target_path.
        with open(target_path, 'wb') as f:
            decoded = b64decode(content)
            f.write(decoded)

    def delete(self, workspace_path, is_recursive):
        self.client.delete(workspace_path, is_recursive)
Beispiel #3
0
class ReposApi(object):
    def __init__(self, api_client):
        self.client = ReposService(api_client)
        self.ws_client = WorkspaceService(api_client)

    def get_repo_id(self, path):
        if not path.startswith("/Repos/"):
            raise ValueError("Path must start with /Repos/ !")

        if not len([x for x in path.split("/") if x]) == 3:
            raise ValueError("Repos paths must be in /Repos/{folder}/{repo} format!")

        try:
            status = self.ws_client.get_status(path)
            if status['object_type'] == 'REPO':
                return status['object_id']
        except requests.exceptions.HTTPError:
            pass

        raise RuntimeError("Can't find repo ID for {path}".format(path=path))

    def list(self, path_prefix, next_page_token):
        """
        List repos that the caller has Manage permissions on. Results are 
        paginated with each page containing twenty repos.
        """
        return self.client.list_repos(path_prefix, next_page_token)

    def create(self, url, provider, path):
        """
        Creates a repo object and links it to the remote Git repo specified.
        """
        return self.client.create_repo(url, provider, path)

    def get(self, repo_id):
        """
        Gets the repo with the given ID.
        """
        return self.client.get_repo(repo_id)

    def update(self, repo_id, branch, tag):
        """
        Checks out the repo to the given branch or tag. Only one of ``branch`` 
        or ``tag`` should be provided.
        """
        assert bool(branch is not None) ^ bool(tag is not None)
        return self.client.update_repo(repo_id, branch, tag)

    def delete(self, repo_id):
        """
        Deletes the repo with the given ID.
        """
        return self.client.delete_repo(repo_id)
Beispiel #4
0
def _get_notebooks_recrusive(service: WorkspaceService, path):
    resp = service.list(path)
    if "objects" not in resp:
        return []
    objects = resp["objects"]
    output = []
    for obj in objects:
        workspace_obj = WorkspaceFileInfo.from_json(obj)
        if workspace_obj.is_notebook is True:
            output.append(workspace_obj)
        if workspace_obj.is_dir is True:
            output += _get_notebooks_recrusive(service, workspace_obj.path)
    return output
Beispiel #5
0
 def __init__(self, api_client):
     self.client = WorkspaceService(api_client)
Beispiel #6
0
class WorkspaceApi(object):
    def __init__(self, api_client):
        self.client = WorkspaceService(api_client)

    def get_status(self, workspace_path, headers=None):
        return WorkspaceFileInfo.from_json(self.client.get_status(workspace_path, headers=headers))

    def list_objects(self, workspace_path, headers=None):
        response = self.client.list(workspace_path, headers=headers)
        # This case is necessary when we list an empty dir in the workspace.
        # TODO(andrewmchen): We should make our API respond with a json with 'objects' field even
        # in this case.
        if 'objects' not in response:
            return []
        objects = response['objects']
        return [WorkspaceFileInfo.from_json(f) for f in objects]

    def mkdirs(self, workspace_path, headers=None):
        self.client.mkdirs(workspace_path, headers=headers)

    def import_workspace(self, source_path, target_path, language, fmt, is_overwrite, headers=None):
        with open(source_path, 'rb') as f:
            # import_workspace must take content that is typed str.
            content = b64encode(f.read()).decode()
            self.client.import_workspace(
                target_path,
                fmt,
                language,
                content,
                is_overwrite,
                headers=headers)

    def export_workspace(self, source_path, target_path, fmt, is_overwrite, headers=None):
        """
        Faithfully exports the source_path to the target_path. Does not
        attempt to do any munging of the target_path if it is a directory.
        """
        if os.path.exists(target_path) and not is_overwrite:
            raise LocalFileExistsException('Target {} already exists.'.format(target_path))
        output = self.client.export_workspace(source_path, fmt, headers=headers)
        content = output['content']
        # Will overwrite target_path.
        with open(target_path, 'wb') as f:
            decoded = b64decode(content)
            f.write(decoded)

    def delete(self, workspace_path, is_recursive, headers=None):
        self.client.delete(workspace_path, is_recursive, headers=headers)

    def import_workspace_dir(self, source_path, target_path, overwrite, exclude_hidden_files,
                             headers=None):
        # pylint: disable=too-many-locals
        filenames = os.listdir(source_path)
        if exclude_hidden_files:
            # for now, just exclude hidden files or directories based on starting '.'
            filenames = [f for f in filenames if not f.startswith('.')]
        try:
            self.mkdirs(target_path, headers=headers)
        except HTTPError as e:
            click.echo(e.response.json())
            return
        for filename in filenames:
            cur_src = os.path.join(source_path, filename)
            # don't use os.path.join here since it will set \ on Windows
            cur_dst = target_path.rstrip('/') + '/' + filename
            if os.path.isdir(cur_src):
                self.import_workspace_dir(cur_src, cur_dst, overwrite, exclude_hidden_files,
                                          headers=headers)
            elif os.path.isfile(cur_src):
                ext = WorkspaceLanguage.get_extension(cur_src)
                if ext != '':
                    cur_dst = cur_dst[:-len(ext)]
                    (language, file_format) = WorkspaceLanguage.to_language_and_format(cur_src)
                    self.import_workspace(cur_src, cur_dst, language, file_format, overwrite,
                                          headers=headers)
                    click.echo('{} -> {}'.format(cur_src, cur_dst))
                else:
                    extensions = ', '.join(WorkspaceLanguage.EXTENSIONS)
                    click.echo(('{} does not have a valid extension of {}. Skip this file and ' +
                                'continue.').format(cur_src, extensions))

    def export_workspace_dir(self, source_path, target_path, overwrite, headers=None):
        if os.path.isfile(target_path):
            click.echo('{} exists as a file. Skipping this subtree {}'
                       .format(target_path, source_path))
            return
        if not os.path.isdir(target_path):
            os.makedirs(target_path)
        for obj in self.list_objects(source_path, headers=headers):
            cur_src = obj.path
            cur_dst = os.path.join(target_path, obj.basename)
            if obj.is_dir:
                self.export_workspace_dir(cur_src, cur_dst, overwrite, headers=headers)
            elif obj.is_notebook:
                cur_dst = cur_dst + WorkspaceLanguage.to_extension(obj.language)
                try:
                    self.export_workspace(cur_src, cur_dst, WorkspaceFormat.SOURCE, overwrite,
                                          headers=headers)
                    click.echo('{} -> {}'.format(cur_src, cur_dst))
                except LocalFileExistsException:
                    click.echo('{} already exists locally as {}. Skip.'.format(cur_src, cur_dst))
            else:
                click.echo('{} is neither a dir or a notebook. Skip.'.format(cur_src))
Beispiel #7
0
def get_content(service: WorkspaceService, path):
    data = service.export_workspace(path, format="SOURCE")
    if "content" not in data:
        log.error(f"Unable to find content for file {path}")
        return None
    return b64decode(data["content"].encode("utf-8")).decode("utf-8")
Beispiel #8
0
def get_workspace_client():
    api_client = _get_api_client()
    return WorkspaceService(api_client)