Example #1
0
    def create(self, notebooks: List[Notebook], packageFilename: PurePosixPath) -> bytes:
        databricksRelativePaths = list(map(lambda notebook: notebook.databricksRelativePath, notebooks))
        rootIgnoredPathName = 'root_ignored_path'

        inMemoryOutput = BytesIO()

        zipFile = zipfile.ZipFile(inMemoryOutput, 'w', zipfile.ZIP_DEFLATED)

        # directories must be created first, otherwise DataBricks is not able to process that zip/dbc file
        for dirPath in self.__pathsPreparer.prepare(databricksRelativePaths, rootIgnoredPathName):
            zipFile.writestr(dirPath + '/', '')

        for notebook in notebooks:
            source = loadNotebook(notebook.path)

            try:
                self.__databricksNotebookConverter.validateSource(source)
            except UnexpectedSourceException:
                self.__logger.debug(f'Skipping unrecognized file {notebook.relativePath}')
                continue

            notebookSource = self.__databricksNotebookConverter.toDbcNotebook(notebook.path.stem, source, packageFilename)
            zipPath = PurePosixPath(rootIgnoredPathName).joinpath(notebook.databricksRelativePath).with_suffix('.python')
            zipFile.writestr(str(zipPath), notebookSource)

        zipFile.close()
        inMemoryOutput.seek(0)

        zipContent = inMemoryOutput.getvalue()

        with self.__workingDirectory.joinpath('dist/notebooks.dbc').open('wb') as f:
            f.write(zipContent)

        return zipContent
    def __readFile(self, zipFile: ZipFile, file: ZipInfo):
        if file.orig_filename[-1:] == '/':
            return

        filePathWithoutRootdir = file.orig_filename[
            file.orig_filename.index('/') +
            1:file.orig_filename.rindex('.')] + '.py'
        localFilePath = self.__localBaseDir.joinpath(filePathWithoutRootdir)

        if localFilePath.exists():
            localFileSource = loadNotebook(localFilePath)

            try:
                self.__databricksNotebookConverter.validateSource(
                    localFileSource)
            except UnexpectedSourceException:
                self.__logger.error(
                    f'Skipping unrecognized file {localFilePath}')
                return

        if not localFilePath.parent.exists():
            localFilePath.parent.mkdir(parents=True)

        with localFilePath.open('wb') as f:
            pyContent = self.__dbcNotebookConverter.convert(zipFile, file)
            f.write(pyContent)
Example #3
0
    def __updateNotebooks(self, currentReleasePath: PurePosixPath, notebooks: List[Notebook], packagePath: PurePosixPath):
        for notebook in notebooks:
            targetPath = currentReleasePath.joinpath(notebook.databricksRelativePath)
            source = loadNotebook(notebook.path)

            try:
                self.__databricksNotebookConverter.validateSource(source)
            except UnexpectedSourceException:
                self.__logger.debug(f'Skipping unrecognized file {notebook.relativePath}')
                continue

            script = self.__databricksNotebookConverter.toWorkspaceImportNotebook(source, packagePath)

            self.__logger.info('Updating {}'.format(targetPath))
            self.__workspaceImporter.overwriteScript(script, targetPath)
Example #4
0
    def run(self, inputArgs: Namespace):
        relativeNotebookPath = PurePosixPath(inputArgs.notebookPath)
        notebookPath = Path().cwd().joinpath(relativeNotebookPath)

        source = loadNotebook(notebookPath)

        try:
            self.__databricksNotebookConverter.validateSource(source)
        except UnexpectedSourceException:
            self.__logger.error('Only valid Databricks notebooks can be submitted as Databricks job')
            sys.exit(1)

        relativeNotebookPath = self.__relativePathResolver.resolve(relativeNotebookPath)

        loop = asyncio.get_event_loop()
        loop.run_until_complete(self.__deployerJobSubmitter.deployAndSubmitJob(relativeNotebookPath))