def _log_task(self, taskManager, commandReplacementDic, taskUUID, arguments): """ Creates a new entry in the Tasks table using the supplied data. :param MCPServer.linkTaskManager taskManager: A linkTaskManager subclass instance. :param ReplacementDict commandReplacementDic: A ReplacementDict or dict instance. %fileUUID% and %relativeLocation% variables will be looked up from this dict. :param str taskUUID: The UUID to be used for this Task in the database. :param str arguments: The arguments to be passed to the command when it is executed, as a string. Can contain replacement variables; see ReplacementDict for supported values. """ jobUUID = taskManager.jobChainLink.UUID fileUUID = "" if "%fileUUID%" in commandReplacementDic: fileUUID = commandReplacementDic["%fileUUID%"] taskexec = taskManager.execute fileName = os.path.basename( os.path.abspath(commandReplacementDic["%relativeLocation%"])) Task.objects.create(taskuuid=taskUUID, job_id=jobUUID, fileuuid=fileUUID, filename=fileName, execution=taskexec, arguments=arguments, createdtime=getUTCDate())
def debugMonitor(): """Periodically prints out status of MCP, including whether the database lock is locked, thread count, etc.""" while True: logger.debug('Debug monitor: datetime: %s', getUTCDate()) logger.debug('Debug monitor: thread count: %s', threading.activeCount()) time.sleep(3600)
def write_task_results_callback(): with transaction.atomic(): for job in jobs: logger.info("\n\n*** Completed job: %s", job.dump()) kwargs = { "exitcode": job.get_exit_code(), "endtime": getUTCDate() } if (django_settings.CAPTURE_CLIENT_SCRIPT_OUTPUT or kwargs["exitcode"] > 0): kwargs.update({ "stdout": job.get_stdout(), "stderror": job.get_stderr() }) Task.objects.filter(taskuuid=job.UUID).update(**kwargs) results[job.UUID] = {"exitCode": job.get_exit_code()} if job.caller_wants_output: # Send back stdout/stderr so it can be written to files. # Most cases don't require this (logging to the database is # enough), but the ones that do are coordinated through the # MCP Server so that multiple MCP Client instances don't try # to write the same file at the same time. results[job.UUID]["stdout"] = job.get_stdout() results[job.UUID]["stderror"] = job.get_stderr()
def debugMonitor(): """Periodically prints out status of MCP, including whether the database lock is locked, thread count, etc.""" global countOfCreateUnitAndJobChainThreaded while True: logger.debug('Debug monitor: datetime: %s', getUTCDate()) logger.debug('Debug monitor: thread count: %s', threading.activeCount()) logger.debug('Debug monitor: created job chain threaded: %s', countOfCreateUnitAndJobChainThreaded) time.sleep(3600)
def debugMonitor(): """Periodically prints out status of MCP, including whether the database lock is locked, thread count, etc.""" global countOfCreateUnitAndJobChainThreaded while True: dblockstatus = "SQL Lock: Locked" if databaseInterface.sqlLock.acquire(False): databaseInterface.sqlLock.release() dblockstatus = "SQL Lock: Unlocked" logger.debug('Debug monitor: datetime: %s', databaseFunctions.getUTCDate()) logger.debug('Debug monitor: thread count: %s', threading.activeCount()) logger.debug('Debug monitor: created job chain threaded: %s', countOfCreateUnitAndJobChainThreaded) logger.debug('Debug monitor: DB lock status: %s', dblockstatus) time.sleep(3600)
def handle_batch_task(gearman_job, supported_modules): module_name = supported_modules.get(gearman_job.task) gearman_data = cPickle.loads(gearman_job.data) utc_date = getUTCDate() jobs = [] for task_uuid in gearman_data["tasks"]: task_data = gearman_data["tasks"][task_uuid] arguments = task_data["arguments"] if isinstance(arguments, six.text_type): arguments = arguments.encode("utf-8") replacements = (replacement_dict.items() + { "%date%": utc_date.isoformat(), "%taskUUID%": task_uuid, "%jobCreatedDate%": task_data["createdDate"], }.items()) for var, val in replacements: arguments = arguments.replace(var, val) job = Job( gearman_job.task, task_data["uuid"], _parse_command_line(arguments), caller_wants_output=task_data["wants_output"], ) jobs.append(job) # Set their start times. If we collide with the MCP Server inserting new # Tasks (which can happen under heavy concurrent load), retry as needed. def set_start_times(): Task.objects.filter(taskuuid__in=[item.UUID for item in jobs]).update( starttime=utc_date) retryOnFailure("Set task start times", set_start_times) module = importlib.import_module("clientScripts." + module_name) # Our module can indicate that it should be run concurrently... if hasattr(module, "concurrent_instances"): fork_runner.call( "clientScripts." + module_name, jobs, task_count=module.concurrent_instances(), ) else: module.call(jobs) return jobs
def __init__(self, jobChain, jobChainLinkPK, unit, passVar=None, subJobOf=""): if jobChainLinkPK == None: return None self.UUID = uuid.uuid4().__str__() self.jobChain = jobChain self.unit = unit self.passVar = passVar self.createdDate = getUTCDate() self.subJobOf = subJobOf # Depending on the path that led to this, jobChainLinkPK may # either be a UUID or a MicroServiceChainLink instance if isinstance(jobChainLinkPK, basestring): try: link = MicroServiceChainLink.objects.get( id=str(jobChainLinkPK)) # This will sometimes return no values except MicroServiceChainLink.DoesNotExist: return else: link = jobChainLinkPK self.pk = link.id self.currentTask = link.currenttask_id self.defaultNextChainLink = link.defaultnextchainlink_id taskType = link.currenttask.tasktype_id taskTypePKReference = link.currenttask.tasktypepkreference self.description = link.currenttask.description self.reloadFileList = link.reloadfilelist self.defaultExitMessage = link.defaultexitmessage self.microserviceGroup = link.microservicegroup LOGGER.info('Running %s (unit %s)', self.description, self.unit.UUID) self.unit.reload() logJobCreatedSQL(self) if self.createTasks(taskType, taskTypePKReference) == None: self.getNextChainLinkPK(None)
def createMetsHdr(sip_uuid): header = etree.Element(ns.metsBNS + "metsHdr", CREATEDATE=getUTCDate().strftime("%Y-%m-%dT%H:%M:%S")) agent = etree.SubElement(header, ns.metsBNS + "agent", ROLE="CREATOR", TYPE="OTHER", OTHERTYPE="SOFTWARE") name = etree.SubElement(agent, ns.metsBNS + "name") name.text = get_dashboard_uuid() note = etree.SubElement(agent, ns.metsBNS + "note") note.text = "Archivematica dashboard UUID" accession_number = getAccessionNumberFromTransfer(sip_uuid) if accession_number: alt_id = etree.SubElement(header, ns.metsBNS + "altRecordID", TYPE="Accession number") alt_id.text = accession_number return header
def write_identification_event(file_uuid, command, format=None, success=True): event_detail_text = 'program="{}"; version="{}"'.format( command.tool.description, command.tool.version) if success: event_outcome_text = "Positive" else: event_outcome_text = "Not identified" if not format: format = 'No Matching Format' date = getUTCDate() insertIntoEvents(fileUUID=file_uuid, eventIdentifierUUID=str(uuid.uuid4()), eventType="format identification", eventDateTime=date, eventDetail=event_detail_text, eventOutcome=event_outcome_text, eventOutcomeDetailNote=format)
def __init__(self, jobChain, link, workflow, unit, passVar=None): if link is None: return None self.UUID = uuid.uuid4().__str__() self.jobChain = jobChain self.workflow = workflow self.unit = unit self.passVar = passVar self.pk = link.id self.link = link self.workflow = workflow self.created_at = getUTCDate() self.group = link.get_label("group", "en") self.description = link.get_label("description", "en") LOGGER.info("Running %s (unit %s)", self.description, self.unit.UUID) self.unit.reload() self._create_job() self._run_task_manager()
def executeCommand(gearman_worker, gearman_job): try: execute = gearman_job.task logger.info('Executing %s (%s)', execute, gearman_job.unique) data = cPickle.loads(gearman_job.data) utcDate = databaseFunctions.getUTCDate() arguments = data["arguments"] #.encode("utf-8") if isinstance(arguments, unicode): arguments = arguments.encode("utf-8") sInput = "" clientID = gearman_worker.worker_client_id task = Task.objects.get(taskuuid=gearman_job.unique) if task.starttime is not None: exitCode = -1 stdOut = "" stdError = """Detected this task has already started! Unable to determine if it completed successfully.""" return cPickle.dumps({ "exitCode": exitCode, "stdOut": stdOut, "stdError": stdError }) else: task.client = clientID task.starttime = utcDate task.save() if execute not in supportedModules: output = [ "Error!", "Error! - Tried to run and unsupported command." ] exitCode = -1 return cPickle.dumps({ "exitCode": exitCode, "stdOut": output[0], "stdError": output[1] }) command = supportedModules[execute] replacementDic["%date%"] = utcDate.isoformat() replacementDic["%jobCreatedDate%"] = data["createdDate"] # Replace replacement strings for key in replacementDic.keys(): command = command.replace(key, replacementDic[key]) arguments = arguments.replace(key, replacementDic[key]) key = "%taskUUID%" value = gearman_job.unique.__str__() arguments = arguments.replace(key, value) # Add useful environment vars for client scripts lib_paths = [ '/usr/share/archivematica/dashboard/', '/usr/lib/archivematica/archivematicaCommon' ] env_updates = { 'PYTHONPATH': os.pathsep.join(lib_paths), 'DJANGO_SETTINGS_MODULE': config.get('MCPClient', 'django_settings_module') } # Execute command command += " " + arguments logger.info('<processingCommand>{%s}%s</processingCommand>', gearman_job.unique, command) exitCode, stdOut, stdError = executeOrRun("command", command, sInput, printing=False, env_updates=env_updates) return cPickle.dumps({ "exitCode": exitCode, "stdOut": stdOut, "stdError": stdError }) except OSError as ose: logger.exception('Execution failed') output = ["Archivematica Client Error!", traceback.format_exc()] exitCode = 1 return cPickle.dumps({ "exitCode": exitCode, "stdOut": output[0], "stdError": output[1] }) except Exception as e: logger.exception('Unexpected error') output = ["", traceback.format_exc()] return cPickle.dumps({ "exitCode": -1, "stdOut": output[0], "stdError": output[1] })
def fail_all_tasks_callback(): for task_uuid in gearman_data["tasks"]: Task.objects.filter(taskuuid=task_uuid).update( stderror=str(reason), exitcode=1, endtime=getUTCDate()) retryOnFailure("Fail all tasks", fail_all_tasks_callback)