Beispiel #1
0
def runNotebookJob(notebookId: str,
                   runStatusId: int = None,
                   runType: str = "Scheduled"):
    """
    Celery task to run a zeppelin notebook
    :param notebookId: ID of the zeppelin notebook which to run
    :param runStatusId: ID of genie.runStatus model
    """
    if not runStatusId:
        runStatus = RunStatus.objects.create(notebookId=notebookId,
                                             status=NOTEBOOK_STATUS_RUNNING,
                                             runType=runType)
    else:
        runStatus = RunStatus.objects.get(id=runStatusId)
        runStatus.startTimestamp = dt.datetime.now()
        runStatus.save()

    try:
        # Check if notebook is already running
        isRunning, notebookName = checkIfNotebookRunning(notebookId)
        if (isRunning):
            runStatus.status = NOTEBOOK_STATUS_ERROR
            runStatus.message = "Notebook already running"
            runStatus.save()
        else:
            # Clear notebook results
            Zeppelin.clearNotebookResults(notebookId)
            response = Zeppelin.runNotebookJob(notebookId)
            if response:
                try:
                    polling.poll(lambda: checkIfNotebookRunningAndStoreLogs(
                        notebookId, runStatus) != True,
                                 step=3,
                                 timeout=3600)
                except Exception as ex:
                    runStatus.status = NOTEBOOK_STATUS_ERROR
                    runStatus.message = str(ex)
                    runStatus.save()
                    NotificationServices.notify(notebookName=notebookName,
                                                isSuccess=False,
                                                message=str(ex))
            else:
                runStatus.status = NOTEBOOK_STATUS_ERROR
                runStatus.message = "Failed running notebook"
                runStatus.save()
    except Exception as ex:
        runStatus.status = NOTEBOOK_STATUS_ERROR
        runStatus.message = str(ex)
        runStatus.save()
        NotificationServices.notify(notebookName=notebookName,
                                    isSuccess=False,
                                    message=str(ex))
 def addNotebook(payload):
     res = ApiResponse(message="Error adding notebook")
     notebookTemplate = NotebookTemplate.objects.get(
         id=payload.get("notebookTemplateId", 0))
     context = payload  # Storing payload in context variable so that it can be used for rendering
     # Handling connection variables
     if payload.get("sourceConnection", False):
         connection = Connection.objects.get(id=payload["sourceConnection"])
         connectionParams = connection.cpvc.all()
         for cp in connectionParams:
             paramName = cp.connectionParam.name
             context["sourceConnection_" + paramName] = cp.value
     if payload.get("targetConnection", False):
         connection = Connection.objects.get(id=payload["sourceConnection"])
         connectionParams = connection.cpvc.all()
         for cp in connectionParams:
             paramName = cp.connectionParam.name
             context["sourceConnection_" + paramName] = cp.value
     # Handling S3 path - Splitting it to get the table name
     if payload.get("destinationTableS3Path", False):
         destinationTableName = payload["destinationTableS3Path"].rsplit(
             '/', 1)[1]
         warehouseLocation = payload["destinationTableS3Path"].rsplit(
             '/', 1)[0]
         context["destinationTableName"] = destinationTableName
         context["warehouseLocation"] = warehouseLocation
     # Adding a temp table name to the context
     context["tempTableName"] = "tempTable_" + str(round(
         time.time() * 1000))
     notebook = Template(notebookTemplate.template).render(Context(context))
     response = Zeppelin.addNotebook(notebook)
     if response:
         res.update(True, "Notebook added successfully")
     return res
Beispiel #3
0
def checkIfNotebookRunningAndStoreLogs(notebookId, runStatus):
    response = Zeppelin.getNotebookDetails(notebookId)
    runStatus.logs = json.dumps(response)
    runStatus.save()
    isNotebookRunning = response.get("info", {}).get("isRunning", False)
    if not isNotebookRunning:
        setNotebookStatus(response, runStatus)
    return isNotebookRunning
 def deleteNotebook(notebookId: str):
     """
     Service to run notebook job
     """
     res = ApiResponse(message="Error in cloning notebook")
     response = Zeppelin.deleteNotebook(notebookId)
     if response:
         res.update(True, "Notebook deleted successfully", None)
     return res
 def clearNotebookResults(notebookId: str):
     """
     Service to run notebook job
     """
     res = ApiResponse(message="Error in clearing notebook")
     response = Zeppelin.clearNotebookResults(notebookId)
     if response:
         res.update(True, "Notebook cleared successfully", None)
     return res
 def cloneNotebook(notebookId: str, payload: dict):
     """
     Service to run notebook job
     """
     res = ApiResponse(message="Error in cloning notebook")
     response = Zeppelin.cloneNotebook(notebookId, json.dumps(payload))
     if response:
         res.update(True, "Notebook cloned successfully", None)
     return res
Beispiel #7
0
 def unarchiveNotebook(notebookId: str, notebookName: str):
     """
     Service to unarchive notebook 
     """
     res = ApiResponse(message="Error in archiving notebook")
     response = Zeppelin.renameNotebook(notebookId, notebookName)
     if response:
         res.update(True, "Notebook unarchived successfully", None)
     return res
Beispiel #8
0
 def stopNotebookJob(notebookId: str):
     """
     Service to run notebook job
     """
     res = ApiResponse(message="Error in stopping notebook")
     # Updating runStatus that the task was aborted
     response = Zeppelin.stopNotebookJob(notebookId)
     if response:
         res.update(True, "Notebook stopped successfully", None)
     return res
Beispiel #9
0
 def archivedNotebooks():
     """
     Get archived notebooks
     """
     res = ApiResponse(message="Error retrieving archived notebooks")
     notebooks = Zeppelin.getAllNotebooks("~Trash")
     if notebooks:
         res.update(True, "Archived notebooks retrieved successfully",
                    notebooks)
     return res
Beispiel #10
0
def runNotebookJob(notebookId: str, runType: str = "Scheduled"):
    """
    Celery task to run a zeppelin notebook
    :param notebookId: ID of the zeppelin notebook which to run
    """
    runStatus = RunStatus.objects.create(notebookId=notebookId,
                                         status="RUNNING",
                                         runType=runType)
    try:
        # Check if notebook is already running
        isRunning, notebookName = checkIfNotebookRunning(notebookId)
        if (isRunning):
            runStatus.status = "ERROR"
            runStatus.message = "Notebook already running"
            runStatus.save()
        else:
            # Clear noteook results
            Zeppelin.clearNotebookResults(notebookId)
            response = Zeppelin.runNotebookJob(notebookId)
            if response:
                try:
                    polling.poll(lambda: checkIfNotebookRunningAndStoreLogs(
                        notebookId, runStatus) != True,
                                 step=3,
                                 timeout=3600)
                except Exception as ex:
                    runStatus.status = "ERROR"
                    runStatus.message = str(ex)
                    runStatus.save()
                    NotificationServices.notify(notebookName=notebookName,
                                                isSuccess=False,
                                                message=str(ex))
            else:
                runStatus.status = "ERROR"
                runStatus.message = "Failed running notebook"
                runStatus.save()
    except Exception as ex:
        runStatus.status = "ERROR"
        runStatus.message = str(ex)
        runStatus.save()
        NotificationServices.notify(notebookName=notebookName,
                                    isSuccess=False,
                                    message=str(ex))
Beispiel #11
0
 def deleteNotebook(notebookId: str):
     """
     Service to run notebook job
     """
     res = ApiResponse(message="Error in deleting notebook")
     response = Zeppelin.deleteNotebook(notebookId)
     if response:
         NotebookObject.objects.filter(
             notebookZeppelinId=notebookId).delete()
         res.update(True, "Notebook deleted successfully", None)
     return res
Beispiel #12
0
 async def _fetchNotebookStatuses(notebooks: list):
     """
     Async method to fetch notebook status details for multiple notebooks
     Returns a dict with notebook ids as keys
     :param notebooks: List of notebook describing dicts each containing the 'id' field
     """
     notebookStatuses = {}
     for future in asyncio.as_completed([
             Zeppelin.getNotebookStatus(notebook["id"])
             for notebook in notebooks
     ]):
         status = await future
         notebookStatuses[status["id"]] = status
     return notebookStatuses
Beispiel #13
0
    def editNotebook(notebookObjId: int, payload: dict):
        """
        Service to update a template based notebook
        :param notebookObjId: ID of the NotebookObject to be edited
        :param payload: Dict containing notebook template info
        """
        res = ApiResponse(message="Error updating notebook")
        defaultPayload = payload.copy()
        notebookObject = NotebookObject.objects.get(id=notebookObjId)
        notebook, connection = NotebookJobServices._prepareNotebookJson(
            notebookObject.notebookTemplate, payload)

        updateSuccess = Zeppelin.updateNotebookParagraphs(
            notebookObject.notebookZeppelinId, notebook)
        if updateSuccess:
            if defaultPayload.get("name"):
                Zeppelin.renameNotebook(notebookObject.notebookZeppelinId,
                                        defaultPayload.get("name"))
            notebookObject.defaultPayload = defaultPayload
            notebookObject.connection = connection
            notebookObject.save()
            res.update(True, "Notebook updated successfully")
        return res
Beispiel #14
0
 def addNotebook(payload: dict):
     """
     Service to create and add a template based notebook
     :param payload: Dict containing notebook template info
     """
     res = ApiResponse(message="Error adding notebook")
     defaultPayload = payload.copy()
     notebookTemplate = NotebookTemplate.objects.get(
         id=payload.get("notebookTemplateId", 0))
     notebook, connection = NotebookJobServices._prepareNotebookJson(
         notebookTemplate, payload)
     notebookZeppelinId = Zeppelin.addNotebook(notebook)
     if notebookZeppelinId:
         NotebookObject.objects.create(
             notebookZeppelinId=notebookZeppelinId,
             connection=connection,
             notebookTemplate=notebookTemplate,
             defaultPayload=defaultPayload)
         res.update(True, "Notebook added successfully")
     return res
Beispiel #15
0
 def getNotebooks(offset: int = 0):
     """
     Service to fetch and serialize NotebookJob objects
     Number of NotebookJobs fetched is stored as the constant GET_NOTEBOOKJOBS_LIMIT
     :param offset: Offset for fetching NotebookJob objects
     """
     res = ApiResponse(message="Error retrieving notebooks")
     notebooks = Zeppelin.getAllNotebooks()
     if notebooks:
         notebookCount = len(notebooks)
         notebooks = notebooks[offset:offset + GET_NOTEBOOKJOBS_LIMIT]
         notebookIds = [notebook["id"] for notebook in notebooks]
         notebookJobs = NotebookJob.objects.filter(
             notebookId__in=notebookIds)
         for notebook in notebooks:
             notebook["name"] = notebook["path"]
             notebookJob = next((notebookJob for notebookJob in notebookJobs
                                 if notebookJob.name == notebook["id"]),
                                False)
             if notebookJob:
                 notebook["isScheduled"] = True
                 notebook["schedule"] = str(notebookJob.crontab)
                 notebook["isActive"] = notebookJob.enabled
                 notebook["notebookJobId"] = notebookJob.id
             else:
                 notebook["isScheduled"] = False
             notebookRunStatus = RunStatus.objects.filter(
                 notebookId=notebook["id"]).order_by(
                     "-startTimestamp").first()
             if notebookRunStatus:
                 notebook["lastRun"] = RunStatusSerializer(
                     notebookRunStatus).data
         res.update(True, "NotebookJobs retrieved successfully", {
             "notebooks": notebooks,
             "count": notebookCount
         })
     return res
Beispiel #16
0
 def getNotebooksLight():
     """ Gets concise notebook data"""
     res = ApiResponse(message="Error retrieving notebooks")
     notebooks = Zeppelin.getAllNotebooks()
     res.update(True, "Notebooks retrieved successfully", notebooks)
     return res
Beispiel #17
0
def checkIfNotebookRunning(notebookId: str):
    response = Zeppelin.getNotebookDetails(notebookId)
    isNotebookRunning = response.get("info", {}).get("isRunning", False)
    notebookName = response.get("name", "Undefined")
    return isNotebookRunning, notebookName
Beispiel #18
0
    def getNotebooks(offset: int = 0,
                     limit: int = None,
                     searchQuery: str = None,
                     sorter: dict = None,
                     _filter: dict = None):
        """
        Service to fetch and serialize NotebookJob objects
        Number of NotebookObjects fetched is stored as the constant GET_NOTEBOOKOJECTS_LIMIT
        :param offset: Offset for fetching NotebookJob objects
        """
        res = ApiResponse(message="Error retrieving notebooks")
        notebooks = Zeppelin.getAllNotebooks()
        if searchQuery:
            notebooks = NotebookJobServices.search(notebooks, "path",
                                                   searchQuery)
        if sorter.get('order', False):
            notebooks = NotebookJobServices.sortingOnNotebook(
                notebooks, sorter, _filter)
        if notebooks:
            notebookCount = len(notebooks)
            notebooks = notebooks[offset:offset + GET_NOTEBOOKOJECTS_LIMIT]
            notebookIds = [notebook["id"] for notebook in notebooks]
            notebookObjects = NotebookObject.objects.filter(
                notebookZeppelinId__in=notebookIds)
            notebookJobs = NotebookJob.objects.filter(
                notebookId__in=notebookIds)
            for notebook in notebooks:
                notebook["name"] = notebook["path"]
                notebookObj = next(
                    (notebookObj for notebookObj in notebookObjects
                     if notebookObj.notebookZeppelinId == notebook["id"]),
                    False)
                if notebookObj:
                    notebook["notebookObjId"] = notebookObj.id
                notebookJob = next(
                    (notebookJob for notebookJob in notebookJobs
                     if notebookJob.notebookId == notebook["id"]), False)
                if notebookJob:
                    notebook["isScheduled"] = True
                    notebook["schedule"] = str(
                        notebookJob.crontab.customschedule.name)
                    notebook["isActive"] = notebookJob.enabled
                    notebook["notebookJobId"] = notebookJob.id
                else:
                    notebook["isScheduled"] = False

                assignedWorkflowId = WorkflowNotebookMap.objects.filter(
                    notebookId=notebook["id"]).values_list("workflow_id",
                                                           flat=True)
                names = Workflow.objects.filter(
                    id__in=assignedWorkflowId).values_list('name', flat=True)
                workflowNames = []
                for name in names:
                    workflowNames.append(name)
                notebook["assignedWorkflow"] = workflowNames
                notebookRunLogs = NotebookRunLogs.objects.filter(
                    notebookId=notebook["id"]).order_by(
                        "-startTimestamp").first()
                if notebookRunLogs:
                    notebook[
                        "notebookStatus"] = notebookRunLogs.status if notebookRunLogs.status else None
                    notebook["lastRun"] = NotebookRunLogsSerializer(
                        notebookRunLogs).data
            res.update(True, "NotebookObjects retrieved successfully", {
                "notebooks": notebooks,
                "count": notebookCount
            })
        else:
            res.update(True, "NotebookObjects retrieved successfully", [])
        return res