def export_cli(tag, dry_run, notebook_path, delete, git_ssh_url, api_client: ApiClient, hcl, pattern_matches): if hcl: log.debug("this if debug") service = WorkspaceService(api_client) files = get_workspace_notebooks_recursive(service, notebook_path) with GitExportHandler(git_ssh_url, "notebooks", delete_not_found=delete, dry_run=dry_run, tag=tag) as gh: for file in files: identifier = normalize_identifier(f"databricks_notebook-{file.path}") content = get_content(service, file.path) if content is None: continue notebook_resource_data = { "@expr:content": f'filebase64("{identifier}")', "path": file.path, "overwrite": True, "mkdirs": True, "language": file.language, "format": "SOURCE", } name = "databricks_notebook" notebook_file_hcl = create_resource_from_dict(name, identifier, notebook_resource_data, False) processed_hcl_file = create_hcl_file(file.path, api_client.url, notebook_resource_data, notebook_file_hcl) gh.add_file(f"{identifier}.tf", processed_hcl_file) gh.add_file(f"files/{identifier}", content) hcl_errors = validate_hcl(notebook_file_hcl) if len(hcl_errors) > 0: log.error(f"Identified error in the following HCL Config: {notebook_file_hcl}") log.error(hcl_errors)
class WorkspaceApi(object): def __init__(self, api_client): self.client = WorkspaceService(api_client) def get_status(self, workspace_path): return WorkspaceFileInfo.from_json( self.client.get_status(workspace_path)) def list_objects(self, workspace_path): response = self.client.list(workspace_path) # This case is necessary when we list an empty dir in the workspace. # TODO(andrewmchen): We should make our API respond with a json with 'objects' field even # in this case. if 'objects' not in response: return [] objects = response['objects'] return [WorkspaceFileInfo.from_json(f) for f in objects] def mkdirs(self, workspace_path): self.client.mkdirs(workspace_path) def import_workspace(self, source_path, target_path, language, fmt, is_overwrite): with open(source_path, 'rb') as f: # import_workspace must take content that is typed str. content = b64encode(f.read()).decode() self.client.import_workspace(target_path, fmt, language, content, is_overwrite) def export_workspace(self, source_path, target_path, fmt, is_overwrite): """ Faithfully exports the source_path to the target_path. Does not attempt to do any munging of the target_path if it is a directory. """ if os.path.exists(target_path) and not is_overwrite: raise LocalFileExistsException( 'Target {} already exists.'.format(target_path)) output = self.client.export_workspace(source_path, fmt) content = output['content'] # Will overwrite target_path. with open(target_path, 'wb') as f: decoded = b64decode(content) f.write(decoded) def delete(self, workspace_path, is_recursive): self.client.delete(workspace_path, is_recursive)
class ReposApi(object): def __init__(self, api_client): self.client = ReposService(api_client) self.ws_client = WorkspaceService(api_client) def get_repo_id(self, path): if not path.startswith("/Repos/"): raise ValueError("Path must start with /Repos/ !") if not len([x for x in path.split("/") if x]) == 3: raise ValueError("Repos paths must be in /Repos/{folder}/{repo} format!") try: status = self.ws_client.get_status(path) if status['object_type'] == 'REPO': return status['object_id'] except requests.exceptions.HTTPError: pass raise RuntimeError("Can't find repo ID for {path}".format(path=path)) def list(self, path_prefix, next_page_token): """ List repos that the caller has Manage permissions on. Results are paginated with each page containing twenty repos. """ return self.client.list_repos(path_prefix, next_page_token) def create(self, url, provider, path): """ Creates a repo object and links it to the remote Git repo specified. """ return self.client.create_repo(url, provider, path) def get(self, repo_id): """ Gets the repo with the given ID. """ return self.client.get_repo(repo_id) def update(self, repo_id, branch, tag): """ Checks out the repo to the given branch or tag. Only one of ``branch`` or ``tag`` should be provided. """ assert bool(branch is not None) ^ bool(tag is not None) return self.client.update_repo(repo_id, branch, tag) def delete(self, repo_id): """ Deletes the repo with the given ID. """ return self.client.delete_repo(repo_id)
def _get_notebooks_recrusive(service: WorkspaceService, path): resp = service.list(path) if "objects" not in resp: return [] objects = resp["objects"] output = [] for obj in objects: workspace_obj = WorkspaceFileInfo.from_json(obj) if workspace_obj.is_notebook is True: output.append(workspace_obj) if workspace_obj.is_dir is True: output += _get_notebooks_recrusive(service, workspace_obj.path) return output
def __init__(self, api_client): self.client = WorkspaceService(api_client)
class WorkspaceApi(object): def __init__(self, api_client): self.client = WorkspaceService(api_client) def get_status(self, workspace_path, headers=None): return WorkspaceFileInfo.from_json(self.client.get_status(workspace_path, headers=headers)) def list_objects(self, workspace_path, headers=None): response = self.client.list(workspace_path, headers=headers) # This case is necessary when we list an empty dir in the workspace. # TODO(andrewmchen): We should make our API respond with a json with 'objects' field even # in this case. if 'objects' not in response: return [] objects = response['objects'] return [WorkspaceFileInfo.from_json(f) for f in objects] def mkdirs(self, workspace_path, headers=None): self.client.mkdirs(workspace_path, headers=headers) def import_workspace(self, source_path, target_path, language, fmt, is_overwrite, headers=None): with open(source_path, 'rb') as f: # import_workspace must take content that is typed str. content = b64encode(f.read()).decode() self.client.import_workspace( target_path, fmt, language, content, is_overwrite, headers=headers) def export_workspace(self, source_path, target_path, fmt, is_overwrite, headers=None): """ Faithfully exports the source_path to the target_path. Does not attempt to do any munging of the target_path if it is a directory. """ if os.path.exists(target_path) and not is_overwrite: raise LocalFileExistsException('Target {} already exists.'.format(target_path)) output = self.client.export_workspace(source_path, fmt, headers=headers) content = output['content'] # Will overwrite target_path. with open(target_path, 'wb') as f: decoded = b64decode(content) f.write(decoded) def delete(self, workspace_path, is_recursive, headers=None): self.client.delete(workspace_path, is_recursive, headers=headers) def import_workspace_dir(self, source_path, target_path, overwrite, exclude_hidden_files, headers=None): # pylint: disable=too-many-locals filenames = os.listdir(source_path) if exclude_hidden_files: # for now, just exclude hidden files or directories based on starting '.' filenames = [f for f in filenames if not f.startswith('.')] try: self.mkdirs(target_path, headers=headers) except HTTPError as e: click.echo(e.response.json()) return for filename in filenames: cur_src = os.path.join(source_path, filename) # don't use os.path.join here since it will set \ on Windows cur_dst = target_path.rstrip('/') + '/' + filename if os.path.isdir(cur_src): self.import_workspace_dir(cur_src, cur_dst, overwrite, exclude_hidden_files, headers=headers) elif os.path.isfile(cur_src): ext = WorkspaceLanguage.get_extension(cur_src) if ext != '': cur_dst = cur_dst[:-len(ext)] (language, file_format) = WorkspaceLanguage.to_language_and_format(cur_src) self.import_workspace(cur_src, cur_dst, language, file_format, overwrite, headers=headers) click.echo('{} -> {}'.format(cur_src, cur_dst)) else: extensions = ', '.join(WorkspaceLanguage.EXTENSIONS) click.echo(('{} does not have a valid extension of {}. Skip this file and ' + 'continue.').format(cur_src, extensions)) def export_workspace_dir(self, source_path, target_path, overwrite, headers=None): if os.path.isfile(target_path): click.echo('{} exists as a file. Skipping this subtree {}' .format(target_path, source_path)) return if not os.path.isdir(target_path): os.makedirs(target_path) for obj in self.list_objects(source_path, headers=headers): cur_src = obj.path cur_dst = os.path.join(target_path, obj.basename) if obj.is_dir: self.export_workspace_dir(cur_src, cur_dst, overwrite, headers=headers) elif obj.is_notebook: cur_dst = cur_dst + WorkspaceLanguage.to_extension(obj.language) try: self.export_workspace(cur_src, cur_dst, WorkspaceFormat.SOURCE, overwrite, headers=headers) click.echo('{} -> {}'.format(cur_src, cur_dst)) except LocalFileExistsException: click.echo('{} already exists locally as {}. Skip.'.format(cur_src, cur_dst)) else: click.echo('{} is neither a dir or a notebook. Skip.'.format(cur_src))
def get_content(service: WorkspaceService, path): data = service.export_workspace(path, format="SOURCE") if "content" not in data: log.error(f"Unable to find content for file {path}") return None return b64decode(data["content"].encode("utf-8")).decode("utf-8")
def get_workspace_client(): api_client = _get_api_client() return WorkspaceService(api_client)