def executeInternal(self, *args, **kw): # FIXME: In PanDA, we provided the executable as a URL. # So, the filename becomes http:// -- and doesn't really work. Hardcoding the analysis wrapper. #transform_location = getLocation(kw['task']['tm_transformation'], 'CAFUtilities/src/python/transformation/CMSRunAnalysis/') transform_location = getLocation('CMSRunAnalysis.sh', 'CRABServer/scripts/') cmscp_location = getLocation('cmscp.py', 'CRABServer/scripts/') gwms_location = getLocation('gWMS-CMSRunAnalysis.sh', 'CRABServer/scripts/') dag_bootstrap_location = getLocation('dag_bootstrap_startup.sh', 'CRABServer/scripts/') bootstrap_location = getLocation("dag_bootstrap.sh", "CRABServer/scripts/") adjust_location = getLocation("AdjustSites.py", "CRABServer/scripts/") shutil.copy(transform_location, '.') shutil.copy(cmscp_location, '.') shutil.copy(gwms_location, '.') shutil.copy(dag_bootstrap_location, '.') shutil.copy(bootstrap_location, '.') shutil.copy(adjust_location, '.') # Bootstrap the ISB if we are using UFC if UserFileCache and kw['task']['tm_cache_url'].find('/crabcache')!=-1: ufc = UserFileCache(dict={'cert': kw['task']['user_proxy'], 'key': kw['task']['user_proxy'], 'endpoint' : kw['task']['tm_cache_url']}) try: ufc.download(hashkey=kw['task']['tm_user_sandbox'].split(".")[0], output="sandbox.tar.gz") except Exception, ex: self.logger.exception(ex) raise TaskWorkerException("The CRAB3 server backend could not download the input sandbox with your code "+\ "from the frontend (crabcache component).\nThis could be a temporary glitch; please try to submit a new task later "+\ "(resubmit will not work) and contact the experts if the error persists.\nError reason: %s" % str(ex)) #TODO url!? kw['task']['tm_user_sandbox'] = 'sandbox.tar.gz'
def testUploadDownload(self): if 'UFCURL' in os.environ: currdir = getTestBase() upfile = path.join( currdir, 'WMCore_t/Services_t/UserFileCache_t/test_file.tgz' ) #file to upload upfileLog = path.join( currdir, 'WMCore_t/Services_t/UserFileCache_t/uplog.txt' ) #file to upload ufc = UserFileCache({ 'endpoint': os.environ['UFCURL'], 'pycurl': True }) #hashkey upload/download res = ufc.upload(upfile) ufc.download(res['hashkey'], output='pippo_publish_down.tgz') #hashkey deletion ufc.removeFile(res['hashkey']) #log upload/download res = ufc.uploadLog(upfileLog) ufc.downloadLog(upfileLog, upfileLog + '.downloaded') self.assertTrue(filecmp.cmp(upfileLog, upfileLog + '.downloaded'))
def refreshSandbox(self, task): from WMCore.Services.UserFileCache.UserFileCache import UserFileCache ufc = UserFileCache({ 'cert': task['user_proxy'], 'key': task['user_proxy'], 'endpoint': task['tm_cache_url'], "pycurl": True }) sandbox = task['tm_user_sandbox'].replace(".tar.gz", "") debugFiles = task['tm_debug_files'].replace(".tar.gz", "") sandboxPath = os.path.join("/tmp", sandbox) debugFilesPath = os.path.join("/tmp", debugFiles) try: ufc.download(sandbox, sandboxPath, task['tm_username']) ufc.download(debugFiles, debugFilesPath, task['tm_username']) self.logger.info( "Successfully touched input and debug sandboxes (%s and %s) of task %s (frontend: %s) using the '%s' username (request_id = %s).", sandbox, debugFiles, task['tm_taskname'], task['tm_cache_url'], task['tm_username'], task['tm_DDM_reqid']) except Exception as ex: msg = "The CRAB3 server backend could not download the input and/or debug sandbox (%s and/or %s) " % ( sandbox, debugFiles) msg += "of task %s from the frontend (%s) using the '%s' username (request_id = %s). " % \ (task['tm_taskname'], task['tm_cache_url'], task['tm_username'], task['tm_DDM_reqid']) msg += "\nThis could be a temporary glitch, will try again in next occurrence of the recurring action." msg += "Error reason:\n%s" % str(ex) self.logger.info(msg) finally: if os.path.exists(sandboxPath): os.remove(sandboxPath) if os.path.exists(debugFilesPath): os.remove(debugFilesPath)
def executeInternal(self, *args, **kw): # FIXME: In PanDA, we provided the executable as a URL. # So, the filename becomes http:// -- and doesn't really work. Hardcoding the analysis wrapper. #transform_location = getLocation(kw['task']['tm_transformation'], 'CAFUtilities/src/python/transformation/CMSRunAnalysis/') transform_location = getLocation('CMSRunAnalysis.sh', 'CRABServer/scripts/') cmscp_location = getLocation('cmscp.py', 'CRABServer/scripts/') gwms_location = getLocation('gWMS-CMSRunAnalysis.sh', 'CRABServer/scripts/') dag_bootstrap_location = getLocation('dag_bootstrap_startup.sh', 'CRABServer/scripts/') bootstrap_location = getLocation("dag_bootstrap.sh", "CRABServer/scripts/") adjust_location = getLocation("AdjustSites.py", "CRABServer/scripts/") shutil.copy(transform_location, '.') shutil.copy(cmscp_location, '.') shutil.copy(gwms_location, '.') shutil.copy(dag_bootstrap_location, '.') shutil.copy(bootstrap_location, '.') shutil.copy(adjust_location, '.') # Bootstrap the ISB if we are using UFC if UserFileCache and kw['task']['tm_cache_url'].find('/crabcache')!=-1: ufc = UserFileCache(mydict={'cert': kw['task']['user_proxy'], 'key': kw['task']['user_proxy'], 'endpoint' : kw['task']['tm_cache_url']}) try: ufc.download(hashkey=kw['task']['tm_user_sandbox'].split(".")[0], output="sandbox.tar.gz") except Exception as ex: self.logger.exception(ex) raise TaskWorkerException("The CRAB3 server backend could not download the input sandbox with your code "+\ "from the frontend (crabcache component).\nThis could be a temporary glitch; please try to submit a new task later "+\ "(resubmit will not work) and contact the experts if the error persists.\nError reason: %s" % str(ex)) #TODO url!? kw['task']['tm_user_sandbox'] = 'sandbox.tar.gz' # Bootstrap the runtime if it is available. job_runtime = getLocation('CMSRunAnalysis.tar.gz', 'CRABServer/') shutil.copy(job_runtime, '.') task_runtime = getLocation('TaskManagerRun.tar.gz', 'CRABServer/') shutil.copy(task_runtime, '.') kw['task']['resthost'] = self.server['host'] kw['task']['resturinoapi'] = self.restURInoAPI self.task = kw['task'] params = {} if kw['task']['tm_dry_run'] == 'F': params = self.sendDashboardTask() inputFiles = ['gWMS-CMSRunAnalysis.sh', 'CMSRunAnalysis.sh', 'cmscp.py', 'RunJobs.dag', 'Job.submit', 'dag_bootstrap.sh', \ 'AdjustSites.py', 'site.ad', 'site.ad.json', 'run_and_lumis.tar.gz', 'input_files.tar.gz'] self.extractMonitorFiles(inputFiles, **kw) if kw['task'].get('tm_user_sandbox') == 'sandbox.tar.gz': inputFiles.append('sandbox.tar.gz') if os.path.exists("CMSRunAnalysis.tar.gz"): inputFiles.append("CMSRunAnalysis.tar.gz") if os.path.exists("TaskManagerRun.tar.gz"): inputFiles.append("TaskManagerRun.tar.gz") info, splitterResult = self.createSubdag(*args, **kw) return info, params, inputFiles, splitterResult
def executeInternal(self, *args, **kw): cwd = None if hasattr(self.config, 'TaskWorker') and hasattr( self.config.TaskWorker, 'scratchDir'): temp_dir = tempfile.mkdtemp(prefix='_' + kw['task']['tm_taskname'], dir=self.config.TaskWorker.scratchDir) # FIXME: In PanDA, we provided the executable as a URL. # So, the filename becomes http:// -- and doesn't really work. Hardcoding the analysis wrapper. #transform_location = getLocation(kw['task']['tm_transformation'], 'CAFUtilities/src/python/transformation/CMSRunAnalysis/') transform_location = getLocation('CMSRunAnalysis.sh', 'CRABServer/scripts/') cmscp_location = getLocation('cmscp.py', 'CRABServer/scripts/') gwms_location = getLocation('gWMS-CMSRunAnalysis.sh', 'CRABServer/scripts/') dag_bootstrap_location = getLocation('dag_bootstrap_startup.sh', 'CRABServer/scripts/') bootstrap_location = getLocation("dag_bootstrap.sh", "CRABServer/scripts/") adjust_location = getLocation("AdjustSites.py", "CRABServer/scripts/") cwd = os.getcwd() os.chdir(temp_dir) shutil.copy(transform_location, '.') shutil.copy(cmscp_location, '.') shutil.copy(gwms_location, '.') shutil.copy(dag_bootstrap_location, '.') shutil.copy(bootstrap_location, '.') shutil.copy(adjust_location, '.') # Bootstrap the ISB if we are using UFC if UserFileCache and kw['task']['tm_cache_url'].find( '/crabcache') != -1: ufc = UserFileCache( dict={ 'cert': kw['task']['user_proxy'], 'key': kw['task']['user_proxy'], 'endpoint': kw['task']['tm_cache_url'] }) try: ufc.download( hashkey=kw['task']['tm_user_sandbox'].split(".")[0], output="sandbox.tar.gz") except Exception, ex: self.logger.exception(ex) raise TaskWorker.WorkerExceptions.TaskWorkerException("The CRAB3 server backend could not download the input sandbox with your code "+\ "from the frontend (crabcache component).\nThis could be a temporary glitch; please try to submit a new task later "+\ "(resubmit will not work) and contact the experts if the error persists.\nError reason: %s" % str(ex)) #TODO url!? kw['task']['tm_user_sandbox'] = 'sandbox.tar.gz' # Bootstrap the runtime if it is available. job_runtime = getLocation('CMSRunAnalysis.tar.gz', 'CRABServer/') shutil.copy(job_runtime, '.') task_runtime = getLocation('TaskManagerRun.tar.gz', 'CRABServer/') shutil.copy(task_runtime, '.') kw['task']['scratch'] = temp_dir
def testUploadDownload(self): if "UFCURL" in os.environ: currdir = getTestBase() upfile = path.join(currdir, "WMCore_t/Services_t/UserFileCache_t/test_file.tgz") # file to upload ufc = UserFileCache({"endpoint": os.environ["UFCURL"]}) # named upload/download res = ufc.upload(upfile, "name_publish.tgz") ufc.download(name=res["name"], output="name_publish.tgz") # hashkey upload/download res = ufc.upload(upfile) ufc.download(res["hashkey"], output="pippo_publish_down.tgz")
def testUploadDownload(self): if 'UFCURL' in os.environ: currdir = getTestBase() upfile = path.join(currdir, 'WMCore_t/Services_t/UserFileCache_t/test_file.tgz') #file to upload ufc = UserFileCache({'endpoint':os.environ['UFCURL']}) #named upload/download res = ufc.upload(upfile, 'name_publish.tgz') ufc.download(name=res['name'], output='name_publish.tgz') #hashkey upload/download res = ufc.upload(upfile) ufc.download(res['hashkey'], output='pippo_publish_down.tgz')
def testUploadDownload(self): if 'UFCURL' in os.environ: currdir = getTestBase() upfile = path.join(currdir, 'WMCore_t/Services_t/UserFileCache_t/test_file.tgz') #file to upload ufc = UserFileCache({'endpoint':os.environ['UFCURL']}) #named upload/download res = ufc.upload(upfile, 'name_publish.tgz') ufc.download(name=res['name'], output='name_publish.tgz') #hashkey upload/download res = ufc.upload(upfile) ufc.download(res['hashkey'], output='pippo_publish_down.tgz')
def executeInternal(self, *args, **kw): cwd = None if hasattr(self.config, 'TaskWorker') and hasattr(self.config.TaskWorker, 'scratchDir'): temp_dir = tempfile.mkdtemp(prefix='_' + kw['task']['tm_taskname'], dir=self.config.TaskWorker.scratchDir) # FIXME: In PanDA, we provided the executable as a URL. # So, the filename becomes http:// -- and doesn't really work. Hardcoding the analysis wrapper. #transform_location = getLocation(kw['task']['tm_transformation'], 'CAFUtilities/src/python/transformation/CMSRunAnalysis/') transform_location = getLocation('CMSRunAnalysis.sh', 'CRABServer/scripts/') cmscp_location = getLocation('cmscp.py', 'CRABServer/scripts/') gwms_location = getLocation('gWMS-CMSRunAnalysis.sh', 'CRABServer/scripts/') dag_bootstrap_location = getLocation('dag_bootstrap_startup.sh', 'CRABServer/scripts/') bootstrap_location = getLocation("dag_bootstrap.sh", "CRABServer/scripts/") adjust_location = getLocation("AdjustSites.py", "CRABServer/scripts/") cwd = os.getcwd() os.chdir(temp_dir) shutil.copy(transform_location, '.') shutil.copy(cmscp_location, '.') shutil.copy(gwms_location, '.') shutil.copy(dag_bootstrap_location, '.') shutil.copy(bootstrap_location, '.') shutil.copy(adjust_location, '.') # Bootstrap the ISB if we are using UFC if UserFileCache and kw['task']['tm_cache_url'].find('/crabcache')!=-1: ufc = UserFileCache(dict={'cert': kw['task']['user_proxy'], 'key': kw['task']['user_proxy'], 'endpoint' : kw['task']['tm_cache_url']}) ufc.download(hashkey=kw['task']['tm_user_sandbox'].split(".")[0], output="sandbox.tar.gz") kw['task']['tm_user_sandbox'] = 'sandbox.tar.gz' # Bootstrap the runtime if it is available. job_runtime = getLocation('CMSRunAnalysis.tar.gz', 'CRABServer/') shutil.copy(job_runtime, '.') task_runtime = getLocation('TaskManagerRun.tar.gz', 'CRABServer/') shutil.copy(task_runtime, '.') kw['task']['scratch'] = temp_dir kw['task']['restinstance'] = self.server['host'] kw['task']['resturl'] = self.resturl.replace("/workflowdb", "/filemetadata") self.task = kw['task'] params = self.sendDashboardTask() try: info = self.createSubdag(*args, **kw) finally: if cwd: os.chdir(cwd) return TaskWorker.DataObjects.Result.Result(task=kw['task'], result=(temp_dir, info, params))
def testUploadDownload(self): if 'UFCURL' in os.environ: currdir = getTestBase() upfile = path.join(currdir, 'WMCore_t/Services_t/UserFileCache_t/test_file.tgz') #file to upload upfileLog = path.join(currdir, 'WMCore_t/Services_t/UserFileCache_t/uplog.txt') #file to upload ufc = UserFileCache({'endpoint':os.environ['UFCURL']}) #hashkey upload/download res = ufc.upload(upfile) ufc.download(res['hashkey'], output='pippo_publish_down.tgz') #log upload/download res = ufc.uploadLog(upfileLog) ufc.downloadLog(upfileLog, upfileLog+'.downloaded') self.assertTrue(filecmp.cmp(upfileLog, upfileLog+'.downloaded'))
def testUploadDownload(self): if "UFCURL" in os.environ: currdir = getTestBase() upfile = path.join(currdir, "WMCore_t/Services_t/UserFileCache_t/test_file.tgz") # file to upload upfileLog = path.join(currdir, "WMCore_t/Services_t/UserFileCache_t/uplog.txt") # file to upload ufc = UserFileCache({"endpoint": os.environ["UFCURL"], "pycurl": True}) # hashkey upload/download res = ufc.upload(upfile) ufc.download(res["hashkey"], output="pippo_publish_down.tgz") # hashkey deletion ufc.removeFile(res["hashkey"]) # log upload/download res = ufc.uploadLog(upfileLog) ufc.downloadLog(upfileLog, upfileLog + ".downloaded") self.assertTrue(filecmp.cmp(upfileLog, upfileLog + ".downloaded"))
def _execute(self, resthost, resturi, config, task): mw = MasterWorker(config, logWarning=False, logDebug=False, sequential=True, console=False) tapeRecallStatus = 'TAPERECALL' self.logger.info("Retrieving %s tasks", tapeRecallStatus) recallingTasks = mw.getWork(limit=999999, getstatus=tapeRecallStatus) if len(recallingTasks) > 0: self.logger.info("Retrieved a total of %d %s tasks", len(recallingTasks), tapeRecallStatus) self.logger.debug("Retrieved the following %s tasks: \n%s", tapeRecallStatus, str(recallingTasks)) for recallingTask in recallingTasks: if not recallingTask['tm_DDM_reqid']: self.logger.debug("tm_DDM_reqid' is not defined for task %s, skipping such task", recallingTask['tm_taskname']) continue # Make sure the task sandbox in the crabcache is not deleted until the tape recall is completed from WMCore.Services.UserFileCache.UserFileCache import UserFileCache ufc = UserFileCache({'endpoint': recallingTask['tm_cache_url'], "pycurl": True}) sandbox = recallingTask['tm_user_sandbox'].replace(".tar.gz","") try: ufc.download(sandbox, sandbox, recallingTask['tm_username']) os.remove(sandbox) except Exception as ex: self.logger.exception(ex) self.logger.info("The CRAB3 server backend could not download the input sandbox (%s) from the frontend (%s) using the '%s' username."+\ " This could be a temporary glitch, will try again in next occurrence of the recurring action."+\ " Error reason:\n%s", sandbox, recallingTask['tm_cache_url'], recallingTask['tm_username'], str(ex)) ddmRequest = statusRequest(recallingTask['tm_DDM_reqid'], config.TaskWorker.DDMServer, config.TaskWorker.cmscert, config.TaskWorker.cmskey, verbose=False) self.logger.info("Contacted %s using %s and %s, got:\n%s", config.TaskWorker.DDMServer, config.TaskWorker.cmscert, config.TaskWorker.cmskey, ddmRequest) # The query above returns a JSON with a format {"result": "OK", "message": "Request found", "data": [{"request_id": 14, "site": <site>, "item": [<list of blocks>], "group": "AnalysisOps", "n": 1, "status": "new", "first_request": "2018-02-26 23:25:41", "last_request": "2018-02-26 23:25:41", "request_count": 1}]} if ddmRequest["data"][0]["status"] == "completed": # possible values: new, activated, updated, completed, rejected, cancelled self.logger.info("Request %d is completed, setting status of task %s to NEW", recallingTask['tm_DDM_reqid'], recallingTask['tm_taskname']) mw.updateWork(recallingTask['tm_taskname'], recallingTask['tm_task_command'], 'NEW') # Delete all task warnings (the tapeRecallStatus added a dataset warning which is no longer valid now) server = HTTPRequests(config.TaskWorker.resturl, config.TaskWorker.cmscert, config.TaskWorker.cmskey, retry=20, logger=self.logger) mpl = MyProxyLogon(config=config, server=server, resturi=config.TaskWorker.restURInoAPI, myproxylen=self.pollingTime) mpl.execute(task=recallingTask) # this adds 'user_proxy' to recallingTask mpl.deleteWarnings(recallingTask['user_proxy'], recallingTask['tm_taskname']) else: self.logger.info("No %s task retrieved.", tapeRecallStatus)
def executeInternal(self, *args, **kw): # FIXME: In PanDA, we provided the executable as a URL. # So, the filename becomes http:// -- and doesn't really work. Hardcoding the analysis wrapper. #transform_location = getLocation(kw['task']['tm_transformation'], 'CAFUtilities/src/python/transformation/CMSRunAnalysis/') transform_location = getLocation('CMSRunAnalysis.sh', 'CRABServer/scripts/') cmscp_location = getLocation('cmscp.py', 'CRABServer/scripts/') gwms_location = getLocation('gWMS-CMSRunAnalysis.sh', 'CRABServer/scripts/') dag_bootstrap_location = getLocation('dag_bootstrap_startup.sh', 'CRABServer/scripts/') bootstrap_location = getLocation("dag_bootstrap.sh", "CRABServer/scripts/") adjust_location = getLocation("AdjustSites.py", "CRABServer/scripts/") shutil.copy(transform_location, '.') shutil.copy(cmscp_location, '.') shutil.copy(gwms_location, '.') shutil.copy(dag_bootstrap_location, '.') shutil.copy(bootstrap_location, '.') shutil.copy(adjust_location, '.') # Bootstrap the ISB if we are using UFC if UserFileCache and kw['task']['tm_cache_url'].find( '/crabcache') != -1: ufc = UserFileCache( dict={ 'cert': kw['task']['user_proxy'], 'key': kw['task']['user_proxy'], 'endpoint': kw['task']['tm_cache_url'] }) try: ufc.download( hashkey=kw['task']['tm_user_sandbox'].split(".")[0], output="sandbox.tar.gz") except Exception as ex: self.logger.exception(ex) raise TaskWorkerException("The CRAB3 server backend could not download the input sandbox with your code "+\ "from the frontend (crabcache component).\nThis could be a temporary glitch; please try to submit a new task later "+\ "(resubmit will not work) and contact the experts if the error persists.\nError reason: %s" % str(ex)) #TODO url!? kw['task']['tm_user_sandbox'] = 'sandbox.tar.gz' # Bootstrap the runtime if it is available. job_runtime = getLocation('CMSRunAnalysis.tar.gz', 'CRABServer/') shutil.copy(job_runtime, '.') task_runtime = getLocation('TaskManagerRun.tar.gz', 'CRABServer/') shutil.copy(task_runtime, '.') kw['task']['resthost'] = self.server['host'] kw['task']['resturinoapi'] = self.restURInoAPI self.task = kw['task'] params = {} if kw['task']['tm_dry_run'] == 'F': params = self.sendDashboardTask() inputFiles = ['gWMS-CMSRunAnalysis.sh', 'CMSRunAnalysis.sh', 'cmscp.py', 'RunJobs.dag', 'Job.submit', 'dag_bootstrap.sh', \ 'AdjustSites.py', 'site.ad', 'site.ad.json', 'run_and_lumis.tar.gz', 'input_files.tar.gz'] self.extractMonitorFiles(inputFiles, **kw) if kw['task'].get('tm_user_sandbox') == 'sandbox.tar.gz': inputFiles.append('sandbox.tar.gz') if os.path.exists("CMSRunAnalysis.tar.gz"): inputFiles.append("CMSRunAnalysis.tar.gz") if os.path.exists("TaskManagerRun.tar.gz"): inputFiles.append("TaskManagerRun.tar.gz") info, splitterResult = self.createSubdag(*args, **kw) return info, params, inputFiles, splitterResult
def _execute(self, resthost, resturi, config, task): mw = MasterWorker(config, logWarning=False, logDebug=False, sequential=True, console=False) tapeRecallStatus = 'TAPERECALL' self.logger.info("Retrieving %s tasks", tapeRecallStatus) recallingTasks = mw.getWork(limit=999999, getstatus=tapeRecallStatus, ignoreTWName=True) if len(recallingTasks) > 0: self.logger.info("Retrieved a total of %d %s tasks", len(recallingTasks), tapeRecallStatus) for recallingTask in recallingTasks: taskName = recallingTask['tm_taskname'] self.logger.info("Working on task %s", taskName) reqId = recallingTask['tm_DDM_reqid'] if not reqId: self.logger.debug("tm_DDM_reqid' is not defined for task %s, skipping such task", taskName) continue server = HTTPRequests(config.TaskWorker.resturl, config.TaskWorker.cmscert, config.TaskWorker.cmskey, retry=20, logger=self.logger) if (time.time() - getTimeFromTaskname(str(taskName)) > MAX_DAYS_FOR_TAPERECALL*24*60*60): self.logger.info("Task %s is older than %d days, setting its status to FAILED", taskName, MAX_DAYS_FOR_TAPERECALL) msg = "The disk replica request (ID: %d) for the input dataset did not complete in %d days." % (reqId, MAX_DAYS_FOR_TAPERECALL) failTask(taskName, server, config.TaskWorker.restURInoAPI+'workflowdb', msg, self.logger, 'FAILED') continue mpl = MyProxyLogon(config=config, server=server, resturi=config.TaskWorker.restURInoAPI, myproxylen=self.pollingTime) user_proxy = True try: mpl.execute(task=recallingTask) # this adds 'user_proxy' to recallingTask except TaskWorkerException as twe: user_proxy = False self.logger.exception(twe) # Make sure the task sandbox in the crabcache is not deleted until the tape recall is completed if user_proxy: from WMCore.Services.UserFileCache.UserFileCache import UserFileCache ufc = UserFileCache({'cert': recallingTask['user_proxy'], 'key': recallingTask['user_proxy'], 'endpoint': recallingTask['tm_cache_url'], "pycurl": True}) sandbox = recallingTask['tm_user_sandbox'].replace(".tar.gz","") debugFiles = recallingTask['tm_debug_files'].replace(".tar.gz","") sandboxPath = os.path.join("/tmp", sandbox) debugFilesPath = os.path.join("/tmp", debugFiles) try: ufc.download(sandbox, sandboxPath, recallingTask['tm_username']) ufc.download(debugFiles, debugFilesPath, recallingTask['tm_username']) self.logger.info("Successfully touched input and debug sandboxes (%s and %s) of task %s (frontend: %s) using the '%s' username (request_id = %d).", sandbox, debugFiles, taskName, recallingTask['tm_cache_url'], recallingTask['tm_username'], reqId) except Exception as ex: self.logger.info("The CRAB3 server backend could not download the input and/or debug sandbox (%s and/or %s) of task %s from the frontend (%s) using the '%s' username (request_id = %d)."+\ " This could be a temporary glitch, will try again in next occurrence of the recurring action."+\ " Error reason:\n%s", sandbox, debugFiles, taskName, recallingTask['tm_cache_url'], recallingTask['tm_username'], reqId, str(ex)) finally: if os.path.exists(sandboxPath): os.remove(sandboxPath) if os.path.exists(debugFilesPath): os.remove(debugFilesPath) ddmRequest = statusRequest(reqId, config.TaskWorker.DDMServer, config.TaskWorker.cmscert, config.TaskWorker.cmskey, verbose=False) # The query above returns a JSON with a format {"result": "OK", "message": "Request found", "data": [{"request_id": 14, "site": <site>, "item": [<list of blocks>], "group": "AnalysisOps", "n": 1, "status": "new", "first_request": "2018-02-26 23:25:41", "last_request": "2018-02-26 23:25:41", "request_count": 1}]} self.logger.info("Contacted %s using %s and %s for request_id = %d, got:\n%s", config.TaskWorker.DDMServer, config.TaskWorker.cmscert, config.TaskWorker.cmskey, reqId, ddmRequest) if ddmRequest["message"] == "Request found": status = ddmRequest["data"][0]["status"] if status == "completed": # possible values: new, activated, updated, completed, rejected, cancelled self.logger.info("Request %d is completed, setting status of task %s to NEW", reqId, taskName) mw.updateWork(taskName, recallingTask['tm_task_command'], 'NEW') # Delete all task warnings (the tapeRecallStatus added a dataset warning which is no longer valid now) if user_proxy: mpl.deleteWarnings(recallingTask['user_proxy'], taskName) elif status == "rejected": msg = "The DDM request (ID: %d) has been rejected with this reason: %s" % (reqId, ddmRequest["data"][0]["reason"]) self.logger.info(msg + "\nSetting status of task %s to FAILED", taskName) failTask(taskName, server, config.TaskWorker.restURInoAPI+'workflowdb', msg, self.logger, 'FAILED') else: msg = "DDM request_id %d not found. Please report to experts" % reqId self.logger.info(msg) if user_proxy: mpl.uploadWarning(msg, recallingTask['user_proxy'], taskName) else: self.logger.info("No %s task retrieved.", tapeRecallStatus)
def _execute(self, resthost, resturi, config, task): # setup logger if not self.logger: self.logger = logging.getLogger(__name__) handler = logging.StreamHandler(sys.stdout) formatter = logging.Formatter( "%(asctime)s:%(levelname)s:%(module)s %(message)s") handler.setFormatter(formatter) self.logger.addHandler(handler) self.logger.setLevel(logging.DEBUG) else: # do not use BaseRecurringAction logger but create a new logger # which writes to config.TaskWorker.logsDir/taks/recurring/TapeRecallStatus_YYMMDD-HHMM.log self.logger = logging.getLogger('TapeRecallStatus') logDir = config.TaskWorker.logsDir + '/tasks/recurring/' if not os.path.exists(logDir): os.makedirs(logDir) timeStamp = time.strftime('%y%m%d-%H%M', time.localtime()) logFile = 'TapeRecallStatus_' + timeStamp + '.log' handler = logging.FileHandler(logDir + logFile) formatter = logging.Formatter( '%(asctime)s:%(levelname)s:%(module)s:%(message)s') handler.setFormatter(formatter) self.logger.addHandler(handler) mw = MasterWorker(config, logWarning=False, logDebug=False, sequential=True, console=False, name='masterForTapeRecall') tapeRecallStatus = 'TAPERECALL' self.logger.info("Retrieving %s tasks", tapeRecallStatus) recallingTasks = mw.getWork(limit=999999, getstatus=tapeRecallStatus, ignoreTWName=True) if len(recallingTasks) > 0: self.logger.info("Retrieved a total of %d %s tasks", len(recallingTasks), tapeRecallStatus) for recallingTask in recallingTasks: taskName = recallingTask['tm_taskname'] self.logger.info("Working on task %s", taskName) reqId = recallingTask['tm_DDM_reqid'] if not reqId: self.logger.debug( "tm_DDM_reqid' is not defined for task %s, skipping such task", taskName) continue server = HTTPRequests(resthost, config.TaskWorker.cmscert, config.TaskWorker.cmskey, retry=20, logger=self.logger) if (time.time() - getTimeFromTaskname(str(taskName)) > MAX_DAYS_FOR_TAPERECALL * 24 * 60 * 60): self.logger.info( "Task %s is older than %d days, setting its status to FAILED", taskName, MAX_DAYS_FOR_TAPERECALL) msg = "The disk replica request (ID: %d) for the input dataset did not complete in %d days." % ( reqId, MAX_DAYS_FOR_TAPERECALL) failTask(taskName, server, resturi, msg, self.logger, 'FAILED') continue mpl = MyProxyLogon(config=config, server=server, resturi=resturi, myproxylen=self.pollingTime) user_proxy = True try: mpl.execute(task=recallingTask ) # this adds 'user_proxy' to recallingTask except TaskWorkerException as twe: user_proxy = False self.logger.exception(twe) # Make sure the task sandbox in the crabcache is not deleted until the tape recall is completed if user_proxy: from WMCore.Services.UserFileCache.UserFileCache import UserFileCache ufc = UserFileCache({ 'cert': recallingTask['user_proxy'], 'key': recallingTask['user_proxy'], 'endpoint': recallingTask['tm_cache_url'], "pycurl": True }) sandbox = recallingTask['tm_user_sandbox'].replace( ".tar.gz", "") debugFiles = recallingTask['tm_debug_files'].replace( ".tar.gz", "") sandboxPath = os.path.join("/tmp", sandbox) debugFilesPath = os.path.join("/tmp", debugFiles) try: ufc.download(sandbox, sandboxPath, recallingTask['tm_username']) ufc.download(debugFiles, debugFilesPath, recallingTask['tm_username']) self.logger.info( "Successfully touched input and debug sandboxes (%s and %s) of task %s (frontend: %s) using the '%s' username (request_id = %d).", sandbox, debugFiles, taskName, recallingTask['tm_cache_url'], recallingTask['tm_username'], reqId) except Exception as ex: self.logger.info("The CRAB3 server backend could not download the input and/or debug sandbox (%s and/or %s) of task %s from the frontend (%s) using the '%s' username (request_id = %d)."+\ " This could be a temporary glitch, will try again in next occurrence of the recurring action."+\ " Error reason:\n%s", sandbox, debugFiles, taskName, recallingTask['tm_cache_url'], recallingTask['tm_username'], reqId, str(ex)) finally: if os.path.exists(sandboxPath): os.remove(sandboxPath) if os.path.exists(debugFilesPath): os.remove(debugFilesPath) ddmRequest = statusRequest(reqId, config.TaskWorker.DDMServer, config.TaskWorker.cmscert, config.TaskWorker.cmskey, verbose=False) # The query above returns a JSON with a format {"result": "OK", "message": "Request found", "data": [{"request_id": 14, "site": <site>, "item": [<list of blocks>], "group": "AnalysisOps", "n": 1, "status": "new", "first_request": "2018-02-26 23:25:41", "last_request": "2018-02-26 23:25:41", "request_count": 1}]} self.logger.info( "Contacted %s using %s and %s for request_id = %d, got:\n%s", config.TaskWorker.DDMServer, config.TaskWorker.cmscert, config.TaskWorker.cmskey, reqId, ddmRequest) if ddmRequest["message"] == "Request found": status = ddmRequest["data"][0]["status"] if status == "completed": # possible values: new, activated, updated, completed, rejected, cancelled self.logger.info( "Request %d is completed, setting status of task %s to NEW", reqId, taskName) mw.updateWork(taskName, recallingTask['tm_task_command'], 'NEW') # Delete all task warnings (the tapeRecallStatus added a dataset warning which is no longer valid now) if user_proxy: mpl.deleteWarnings(recallingTask['user_proxy'], taskName) elif status == "rejected": msg = "The DDM request (ID: %d) has been rejected with this reason: %s" % ( reqId, ddmRequest["data"][0]["reason"]) self.logger.info( msg + "\nSetting status of task %s to FAILED", taskName) failTask(taskName, server, resturi, msg, self.logger, 'FAILED') else: msg = "DDM request_id %d not found. Please report to experts" % reqId self.logger.info(msg) if user_proxy: mpl.uploadWarning(msg, recallingTask['user_proxy'], taskName) else: self.logger.info("No %s task retrieved.", tapeRecallStatus)