Exemple #1
0
    def archiveTasks(self):
        """
        _archiveTasks_

        This method will call several auxiliary methods to do the following:
        1. Get finished workflows (a finished workflow is defined in Workflow.GetFinishedWorkflows)
        2. Gather the summary information from each workflow/task and upload it to couch
        3. Notify the WorkQueue about finished subscriptions
        4. If all succeeds, delete all information about the workflow from couch and WMBS
        """
        # Get the finished workflows, in descending order
        finishedWorkflowsDAO = self.daoFactory(classname="Workflow.GetFinishedWorkflows")
        finishedwfs = finishedWorkflowsDAO.execute()

        # Only delete those where the upload and notification succeeded
        logging.info("Found %d candidate workflows for deletion" % len(finishedwfs))
        abortedWorkflows = self.centralCouchDBWriter.workflowsByStatus(["aborted"], format="dict")
        wfsToDelete = {}
        for workflow in finishedwfs:
            try:
                # Upload summary to couch
                spec = retrieveWMSpec(wmWorkloadURL=finishedwfs[workflow]["spec"])
                if not spec:
                    raise Exception(msg="Couldn't load spec from %s" % workflow[1])
                self.archiveWorkflowSummary(spec=spec)

                # Notify the WorkQueue, if there is one
                if self.workQueue != None:
                    subList = []
                    for l in finishedwfs[workflow]["workflows"].values():
                        subList.extend(l)
                    self.notifyWorkQueue(subList)

                # Now we now the workflow as a whole is gone, we can delete the information from couch
                if not self.useReqMgrForCompletionCheck:
                    self.wmstatsCouchDB.updateRequestStatus(workflow, "completed")
                    logging.info("status updated to completed %s" % workflow)

                if workflow in abortedWorkflows:
                    self.centralCouchDBWriter.updateRequestStatus(workflow, "aborted-completed")
                    logging.info("status updated to aborted-completed %s" % workflow)

                wfsToDelete[workflow] = {"spec": spec, "workflows": finishedwfs[workflow]["workflows"]}

            except TaskArchiverPollerException, ex:
                # Something didn't go well when notifying the workqueue, abort!!!
                logging.error(str(ex))
                self.sendAlert(1, msg=str(ex))
                continue
            except Exception, ex:
                # Something didn't go well on couch, abort!!!
                msg = "Couldn't upload summary for workflow %s, will try again next time\n" % workflow
                msg += "Nothing will be deleted until the summary is in couch\n"
                msg += "Exception message: %s" % str(ex)
                print traceback.format_exc()
                logging.error(msg)
                self.sendAlert(3, msg=msg)
                continue
Exemple #2
0
    def archiveTasks(self):
        """
        _archiveTasks_

        This method will call several auxiliary methods to do the following:
        1. Get finished workflows (a finished workflow is defined in Workflow.GetFinishedWorkflows)
        2. Gather the summary information from each workflow/task and upload it to couch
        3. Notify the WorkQueue about finished subscriptions
        4. If all succeeds, delete all information about the workflow from couch and WMBS
        """
        #Get the finished workflows, in descending order
        finishedWorkflowsDAO = self.daoFactory(
            classname="Workflow.GetFinishedWorkflows")
        finishedwfs = finishedWorkflowsDAO.execute()

        #Only delete those where the upload and notification succeeded
        logging.info("Found %d candidate workflows for deletion" %
                     len(finishedwfs))
        abortedWorkflows = self.centralCouchDBWriter.workflowsByStatus(
            ["aborted"], format="dict")
        wfsToDelete = {}
        for workflow in finishedwfs:
            try:
                #Upload summary to couch
                spec = retrieveWMSpec(
                    wmWorkloadURL=finishedwfs[workflow]["spec"])
                if not spec:
                    raise Exception(msg="Couldn't load spec from %s" %
                                    workflow[1])
                self.archiveWorkflowSummary(spec=spec)

                #Notify the WorkQueue, if there is one
                if self.workQueue != None:
                    subList = []
                    for l in finishedwfs[workflow]["workflows"].values():
                        subList.extend(l)
                    self.notifyWorkQueue(subList)

                #Now we now the workflow as a whole is gone, we can delete the information from couch
                if not self.useReqMgrForCompletionCheck:
                    self.wmstatsCouchDB.updateRequestStatus(
                        workflow, "completed")
                    logging.info("status updated to completed %s" % workflow)

                if workflow in abortedWorkflows:
                    self.centralCouchDBWriter.updateRequestStatus(
                        workflow, "aborted-completed")
                    logging.info("status updated to aborted-completed %s" %
                                 workflow)

                wfsToDelete[workflow] = {
                    "spec": spec,
                    "workflows": finishedwfs[workflow]["workflows"]
                }

            except TaskArchiverPollerException, ex:
                #Something didn't go well when notifying the workqueue, abort!!!
                logging.error(str(ex))
                self.sendAlert(1, msg=str(ex))
                continue
            except Exception, ex:
                #Something didn't go well on couch, abort!!!
                msg = "Couldn't upload summary for workflow %s, will try again next time\n" % workflow
                msg += "Nothing will be deleted until the summary is in couch\n"
                msg += "Exception message: %s" % str(ex)
                print traceback.format_exc()
                logging.error(msg)
                self.sendAlert(3, msg=msg)
                continue
Exemple #3
0
    def killSubscriptions(self, doneList):
        """
        _killSubscriptions_

        Actually dump the subscriptions
        """
        for sub in doneList:
            logging.info("Deleting subscription %i" % sub['id'])
            try:
                sub.load()
                sub['workflow'].load()
                wf = sub['workflow']
                if self.uploadPublishInfo:
                    self.createAndUploadPublish(wf)
                sub.deleteEverything()
                workflow = sub['workflow']

                if workflow.exists():
                    # Then there are other subscriptions attached
                    # to the workflow
                    continue

                # If we deleted the workflow, it's time to delete
                # the work directories

                # Now we have to delete the task area.
                workDir, taskDir = getMasterName(startDir = self.jobCacheDir,
                                                 workflow = workflow)
                logging.info("About to delete work directory %s" % taskDir)
                if os.path.isdir(taskDir):
                    # Remove the taskDir, because we're done
                    shutil.rmtree(taskDir)
                else:
                    msg = "Attempted to delete work directory but it was already gone: %s" % taskDir
                    logging.error(msg)
                    self.sendAlert(1, msg = msg)

                # Now check if the workflow is done
                if not workflow.countWorkflowsBySpec() == 0:
                    continue

                # If the WMSpec is done, then we have to delete
                # the sandbox, and send off the couch summary

                # First load the WMSpec
                try:
                    logging.debug("Loading spec to delete sandbox dir for task %s" % workflow.task)
                    spec     = retrieveWMSpec(workflow = workflow)
                    wmTask   = spec.getTaskByPath(workflow.task)
                except Exception, ex:
                    # If this happens, we're well and truly screwed.
                    # We've passed the deletion point.  We can't recover
                    # Abort this.  There will be no couch summary
                    msg =  "Critical error in opening spec after workflow deletion"
                    msg += "Task: %s" % workflow.task
                    msg += str(ex)
                    msg += "There will be NO workflow summary for this task"
                    raise TaskArchiverPollerException(msg)

                # Then pull its info from couch and archive it
                self.archiveCouchSummary(workflow = workflow, spec = spec)
                self.deleteWorkflowFromCouch(workflowName = workflow.task.split('/')[1])

                # Now take care of the sandbox
                sandbox  = getattr(wmTask.data.input, 'sandbox', None)
                if sandbox:
                    sandboxDir = os.path.dirname(sandbox)
                    if os.path.isdir(sandboxDir):
                        shutil.rmtree(sandboxDir)
                        logging.debug("Sandbox dir deleted")
                    else:
                        logging.error("Attempted to delete sandbox dir but it was already gone: %s" % sandboxDir)
            except Exception, ex:
                msg =  "Critical error while deleting subscription %i\n" % sub['id']
                msg += str(ex)
                msg += str(traceback.format_exc())
                logging.error(msg)
                self.sendAlert(2, msg = msg)
     logging.info("Found %d candidate workflows for deletion" % len(finishedwfs))
     centralCouchAlive = True
     try:
         #TODO: need to enable when reqmgr2 -wmstats is ready
         #abortedWorkflows = self.reqmgrCouchDBWriter.workflowsByStatus(["aborted"], format = "dict");
         abortedWorkflows = self.centralCouchDBWriter.workflowsByStatus(["aborted"], format = "dict");
     except Exception, ex:
        centralCouchAlive = False
        logging.error("we will try again when remote couch server comes back\n%s" % str(ex))
     
     if centralCouchAlive:
         wfsToDelete = {}
         for workflow in finishedwfs:
             try:
                 #Upload summary to couch
                 spec = retrieveWMSpec(wmWorkloadURL = finishedwfs[workflow]["spec"])
                 if not spec:
                     raise Exception(msg = "Couldn't load spec from %s" % workflow[1])
                 self.archiveWorkflowSummary(spec = spec)
 
                 #Notify the WorkQueue, if there is one
                 if self.workQueue != None:
                     subList = []
                     for l in finishedwfs[workflow]["workflows"].values():
                         subList.extend(l)
                     self.notifyWorkQueue(subList)
                 
                 #Now we now the workflow as a whole is gone, we can delete the information from couch
                 if not self.useReqMgrForCompletionCheck:
                     self.wmstatsCouchDB.updateRequestStatus(workflow, "completed")
                     logging.info("status updated to completed %s" % workflow)