def uploadWarning(self, warning, userProxy, taskname): """ Uploads a warning message to the Task DB so that crab status can show it :param warning: string: message text :param userProxy: credential to use for the http POST call Stefano does not know why user proxy is used here instead of TW proxy, maybe some early version of the REST checked that POST was done by task owner, maybe some early developer feared that it would fail, but there are places where TW internal credential is used to change status of the task. So this could be investigate, cleaned up and possibly simplified. But... since it works.. :param taskname: :return: """ if not self.crabserver: # When testing, the server can be None self.logger.warning(warning) return truncWarning = truncateError(warning) configreq = { 'subresource': 'addwarning', 'workflow': taskname, 'warning': b64encode(truncWarning) } try: self.crabserver.post(api='/task', data=urllib.urlencode(configreq)) except HTTPException as hte: self.logger.error("Error uploading warning: %s", str(hte)) self.logger.warning( "Cannot add a warning to REST interface. Warning message: %s", warning)
def uploadWarning(self, warning, userProxy, taskname): truncWarning = truncateError(warning) try: userServer = HTTPRequests(self.server['host'], userProxy, userProxy, retry=2, logger = self.logger) configreq = {'subresource': 'addwarning', 'workflow': taskname, 'warning': b64encode(truncWarning)} userServer.post(self.restURInoAPI + '/task', data = urllib.urlencode(configreq)) except HTTPException as hte: self.logger.error(hte.headers) self.logger.warning("Cannot add a warning to REST interface. Warning message: %s" % warning)
def uploadWarning(self, warning, userProxy, taskname): if not self.server: # When testing, the server can be None self.logger.warning(warning) return truncWarning = truncateError(warning) userServer = HTTPRequests(self.server['host'], userProxy, userProxy, retry=2, logger = self.logger) configreq = {'subresource': 'addwarning', 'workflow': taskname, 'warning': b64encode(truncWarning)} try: userServer.post(self.restURInoAPI + '/task', data = urllib.urlencode(configreq)) except HTTPException as hte: self.logger.error("Error uploading warning: %s", str(hte)) self.logger.warning("Cannot add a warning to REST interface. Warning message: %s", warning)
def failTask(taskName, crabserver, msg, log, failstatus='FAILED'): try: log.info("Uploading failure message to the REST:\n%s", msg) truncMsg = truncateError(msg) configreq = {'workflow': taskName, 'status': failstatus, 'subresource': 'failure', # Limit the message to 7500 chars, which means no more than 10000 once encoded. That's the limit in the REST 'failure': b64encode(truncMsg)} crabserver.post(api='workflowdb', data = urllib.urlencode(configreq)) log.info("Failure message successfully uploaded to the REST") except HTTPException as hte: log.warning("Cannot upload failure message to the REST for task %s. HTTP exception headers follows:", taskName) log.error(hte.headers) except Exception as exc: #pylint: disable=broad-except log.warning("Cannot upload failure message to the REST for workflow %s.\nReason: %s", taskName, exc) log.exception('Traceback follows:')
def failTask(taskName, HTTPServer, resturi, msg, log, failstatus='FAILED'): try: log.info("Uploading failure message to the REST:\n%s", msg) truncMsg = truncateError(msg) configreq = {'workflow': taskName, 'status': failstatus, 'subresource': 'failure', # Limit the message to 7500 chars, which means no more than 10000 once encoded. That's the limit in the REST 'failure': b64encode(truncMsg)} HTTPServer.post(resturi, data = urllib.urlencode(configreq)) log.info("Failure message successfully uploaded to the REST") except HTTPException as hte: log.warning("Cannot upload failure message to the REST for task %s. HTTP exception headers follows:", taskName) log.error(hte.headers) except Exception as exc: #pylint: disable=broad-except log.warning("Cannot upload failure message to the REST for workflow %s.\nReason: %s", taskName, exc) log.exception('Traceback follows:')
def uploadWarning(self, warning, userProxy, taskname): if not self.server: # When testing, the server can be None self.logger.warning(warning) return truncWarning = truncateError(warning) userServer = HTTPRequests(self.server['host'], userProxy, userProxy, retry=2, logger=self.logger) configreq = { 'subresource': 'addwarning', 'workflow': taskname, 'warning': b64encode(truncWarning) } try: userServer.post(self.restURInoAPI + '/task', data=urllib.urlencode(configreq)) except HTTPException as hte: self.logger.error("Error uploading warning: %s", str(hte)) self.logger.warning( "Cannot add a warning to REST interface. Warning message: %s", warning)
def processWorkerLoop(inputs, results, resthost, resturi, procnum, logger): procName = "Process-%s" % procnum while True: try: ## Get (and remove) an item from the input queue. If the queue is empty, wait ## until an item is available. workid, work, task, failstatus, inputargs = inputs.get() taskhandler = addTaskLogHandler(logger, task["tm_username"], task["tm_taskname"]) except (EOFError, IOError): crashMessage = "Hit EOF/IO in getting new work\n" crashMessage += "Assuming this is a graceful break attempt.\n" logger.error(crashMessage) break if work == "STOP": break outputs = None t0 = time.time() logger.debug("%s: Starting %s on %s", procName, str(work), task["tm_taskname"]) try: msg = None outputs = work(resthost, resturi, WORKER_CONFIG, task, procnum, inputargs) except WorkerHandlerException as we: outputs = Result(task=task, err=str(we)) msg = str(we) except Exception as exc: # pylint: disable=broad-except outputs = Result(task=task, err=str(exc)) msg = "%s: I just had a failure for %s" % (procName, str(exc)) msg += "\n\tworkid=" + str(workid) msg += "\n\ttask=" + str(task["tm_taskname"]) msg += "\n" + str(traceback.format_exc()) finally: if msg: try: logger.info("Uploading error message to REST: %s", msg) server = HTTPRequests( resthost, WORKER_CONFIG.TaskWorker.cmscert, WORKER_CONFIG.TaskWorker.cmskey, retry=20, logger=logger, ) truncMsg = truncateError(msg) configreq = { "workflow": task["tm_taskname"], "status": failstatus, "subresource": "failure", # limit the message to 7500 chars, which means no more than 10000 once encoded. That's the limit in the REST "failure": b64encode(truncMsg), } server.post(resturi, data=urllib.urlencode(configreq)) logger.info("Error message successfully uploaded to the REST") except HTTPException as hte: logger.warning( "Cannot upload failure message to the REST for workflow %s. HTTP headers follows:", task["tm_taskname"], ) logger.error(hte.headers) except Exception as exc: # pylint: disable=broad-except logger.warning( "Cannot upload failure message to the REST for workflow %s.\nReason: %s", task["tm_taskname"], exc, ) logger.exception("Traceback follows:") t1 = time.time() logger.debug("%s: ...work on %s completed in %d seconds: %s", procName, task["tm_taskname"], t1 - t0, outputs) removeTaskLogHandler(logger, taskhandler) results.put({"workid": workid, "out": outputs})
def processWorkerLoop(inputs, results, resthost, resturi, procnum, logger): procName = "Process-%s" % procnum while True: try: ## Get (and remove) an item from the input queue. If the queue is empty, wait ## until an item is available. workid, work, task, failstatus, inputargs = inputs.get() if work == 'STOP': break taskhandler = addTaskLogHandler(logger, task['tm_username'], task['tm_taskname']) except (EOFError, IOError): crashMessage = "Hit EOF/IO in getting new work\n" crashMessage += "Assuming this is a graceful break attempt.\n" logger.error(crashMessage) break outputs = None t0 = time.time() logger.debug("%s: Starting %s on %s", procName, str(work), task['tm_taskname']) try: msg = None outputs = work(resthost, resturi, WORKER_CONFIG, task, procnum, inputargs) except TapeDatasetException as tde: outputs = Result(task=task, err=str(tde)) except WorkerHandlerException as we: outputs = Result(task=task, err=str(we)) msg = str(we) except Exception as exc: #pylint: disable=broad-except outputs = Result(task=task, err=str(exc)) msg = "%s: I just had a failure for %s" % (procName, str(exc)) msg += "\n\tworkid=" + str(workid) msg += "\n\ttask=" + str(task['tm_taskname']) msg += "\n" + str(traceback.format_exc()) finally: if msg: try: logger.info("Uploading error message to REST: %s", msg) server = HTTPRequests(resthost, WORKER_CONFIG.TaskWorker.cmscert, WORKER_CONFIG.TaskWorker.cmskey, retry=20, logger=logger) truncMsg = truncateError(msg) configreq = { 'workflow': task['tm_taskname'], 'status': failstatus, 'subresource': 'failure', #limit the message to 7500 chars, which means no more than 10000 once encoded. That's the limit in the REST 'failure': b64encode(truncMsg) } server.post(resturi, data=urllib.urlencode(configreq)) logger.info( "Error message successfully uploaded to the REST") except HTTPException as hte: logger.warning( "Cannot upload failure message to the REST for workflow %s. HTTP headers follows:", task['tm_taskname']) logger.error(hte.headers) except Exception as exc: #pylint: disable=broad-except logger.warning( "Cannot upload failure message to the REST for workflow %s.\nReason: %s", task['tm_taskname'], exc) logger.exception('Traceback follows:') t1 = time.time() logger.debug("%s: ...work on %s completed in %d seconds: %s", procName, task['tm_taskname'], t1 - t0, outputs) try: out, _, _ = executeCommand( "ps u -p %s | awk '{sum=sum+$6}; END {print sum/1024}'" % os.getpid()) msg = "RSS after finishing %s: %s MB" % (task['tm_taskname'], out.strip()) logger.debug(msg) except: logger.exception("Problem getting worker RSS:") removeTaskLogHandler(logger, taskhandler) results.put({'workid': workid, 'out': outputs})
def processWorkerLoop(inputs, results, resthost, resturi, procnum, logger): procName = "Process-%s" % procnum while True: try: ## Get (and remove) an item from the input queue. If the queue is empty, wait ## until an item is available. workid, work, task, failstatus, inputargs = inputs.get() if work == 'STOP': break taskhandler = addTaskLogHandler(logger, task['tm_username'], task['tm_taskname']) except (EOFError, IOError): crashMessage = "Hit EOF/IO in getting new work\n" crashMessage += "Assuming this is a graceful break attempt.\n" logger.error(crashMessage) break outputs = None t0 = time.time() logger.debug("%s: Starting %s on %s", procName, str(work), task['tm_taskname']) try: msg = None outputs = work(resthost, resturi, WORKER_CONFIG, task, procnum, inputargs) except WorkerHandlerException as we: outputs = Result(task=task, err=str(we)) msg = str(we) except Exception as exc: #pylint: disable=broad-except outputs = Result(task=task, err=str(exc)) msg = "%s: I just had a failure for %s" % (procName, str(exc)) msg += "\n\tworkid=" + str(workid) msg += "\n\ttask=" + str(task['tm_taskname']) msg += "\n" + str(traceback.format_exc()) finally: if msg: try: logger.info("Uploading error message to REST: %s", msg) server = HTTPRequests(resthost, WORKER_CONFIG.TaskWorker.cmscert, WORKER_CONFIG.TaskWorker.cmskey, retry = 20, logger = logger) truncMsg = truncateError(msg) configreq = {'workflow': task['tm_taskname'], 'status': failstatus, 'subresource': 'failure', #limit the message to 7500 chars, which means no more than 10000 once encoded. That's the limit in the REST 'failure': b64encode(truncMsg)} server.post(resturi, data = urllib.urlencode(configreq)) logger.info("Error message successfully uploaded to the REST") except HTTPException as hte: logger.warning("Cannot upload failure message to the REST for workflow %s. HTTP headers follows:", task['tm_taskname']) logger.error(hte.headers) except Exception as exc: #pylint: disable=broad-except logger.warning("Cannot upload failure message to the REST for workflow %s.\nReason: %s", task['tm_taskname'], exc) logger.exception('Traceback follows:') t1 = time.time() logger.debug("%s: ...work on %s completed in %d seconds: %s", procName, task['tm_taskname'], t1-t0, outputs) try: out, _, _ = executeCommand("ps u -p %s | awk '{sum=sum+$6}; END {print sum/1024}'" % os.getpid()) msg = "RSS after finishing %s: %s MB" % (task['tm_taskname'], out.strip()) logger.debug(msg) except: logger.exception("Problem getting worker RSS:") removeTaskLogHandler(logger, taskhandler) results.put({ 'workid': workid, 'out' : outputs })