def create(self, notebooks: List[Notebook], packageFilename: PurePosixPath) -> bytes: databricksRelativePaths = list(map(lambda notebook: notebook.databricksRelativePath, notebooks)) rootIgnoredPathName = 'root_ignored_path' inMemoryOutput = BytesIO() zipFile = zipfile.ZipFile(inMemoryOutput, 'w', zipfile.ZIP_DEFLATED) # directories must be created first, otherwise DataBricks is not able to process that zip/dbc file for dirPath in self.__pathsPreparer.prepare(databricksRelativePaths, rootIgnoredPathName): zipFile.writestr(dirPath + '/', '') for notebook in notebooks: source = loadNotebook(notebook.path) try: self.__databricksNotebookConverter.validateSource(source) except UnexpectedSourceException: self.__logger.debug(f'Skipping unrecognized file {notebook.relativePath}') continue notebookSource = self.__databricksNotebookConverter.toDbcNotebook(notebook.path.stem, source, packageFilename) zipPath = PurePosixPath(rootIgnoredPathName).joinpath(notebook.databricksRelativePath).with_suffix('.python') zipFile.writestr(str(zipPath), notebookSource) zipFile.close() inMemoryOutput.seek(0) zipContent = inMemoryOutput.getvalue() with self.__workingDirectory.joinpath('dist/notebooks.dbc').open('wb') as f: f.write(zipContent) return zipContent
def __readFile(self, zipFile: ZipFile, file: ZipInfo): if file.orig_filename[-1:] == '/': return filePathWithoutRootdir = file.orig_filename[ file.orig_filename.index('/') + 1:file.orig_filename.rindex('.')] + '.py' localFilePath = self.__localBaseDir.joinpath(filePathWithoutRootdir) if localFilePath.exists(): localFileSource = loadNotebook(localFilePath) try: self.__databricksNotebookConverter.validateSource( localFileSource) except UnexpectedSourceException: self.__logger.error( f'Skipping unrecognized file {localFilePath}') return if not localFilePath.parent.exists(): localFilePath.parent.mkdir(parents=True) with localFilePath.open('wb') as f: pyContent = self.__dbcNotebookConverter.convert(zipFile, file) f.write(pyContent)
def __updateNotebooks(self, currentReleasePath: PurePosixPath, notebooks: List[Notebook], packagePath: PurePosixPath): for notebook in notebooks: targetPath = currentReleasePath.joinpath(notebook.databricksRelativePath) source = loadNotebook(notebook.path) try: self.__databricksNotebookConverter.validateSource(source) except UnexpectedSourceException: self.__logger.debug(f'Skipping unrecognized file {notebook.relativePath}') continue script = self.__databricksNotebookConverter.toWorkspaceImportNotebook(source, packagePath) self.__logger.info('Updating {}'.format(targetPath)) self.__workspaceImporter.overwriteScript(script, targetPath)
def run(self, inputArgs: Namespace): relativeNotebookPath = PurePosixPath(inputArgs.notebookPath) notebookPath = Path().cwd().joinpath(relativeNotebookPath) source = loadNotebook(notebookPath) try: self.__databricksNotebookConverter.validateSource(source) except UnexpectedSourceException: self.__logger.error('Only valid Databricks notebooks can be submitted as Databricks job') sys.exit(1) relativeNotebookPath = self.__relativePathResolver.resolve(relativeNotebookPath) loop = asyncio.get_event_loop() loop.run_until_complete(self.__deployerJobSubmitter.deployAndSubmitJob(relativeNotebookPath))