def executeAction(self, nextinput, work): """ Execute an action and deal with the error handling and upload of the tasklogfile to the crabcache """ try: output = work.execute(nextinput, task=self._task, tempDir=self.tempDir) except TaskWorkerException as twe: self.logger.debug(str(traceback.format_exc())) #print the stacktrace only in debug mode raise WorkerHandlerException(str(twe), retry = twe.retry) #TaskWorker error, do not add traceback to the error propagated to the REST except Exception as exc: msg = "Problem handling %s because of %s failure, traceback follows\n" % (self._task['tm_taskname'], str(exc)) msg += str(traceback.format_exc()) self.logger.error(msg) raise WorkerHandlerException(msg) #Errors not foreseen. Print everything! finally: #TODO: we need to do that also in Worker.py otherwise some messages might only be in the TW file but not in the crabcache. logpath = 'logs/tasks/%s/%s.log' % (self._task['tm_username'], self._task['tm_taskname']) if os.path.isfile(logpath) and 'user_proxy' in self._task: #the user proxy might not be there if myproxy retrieval failed cacheurldict = {'endpoint':self._task['tm_cache_url'], 'cert':self._task['user_proxy'], 'key':self._task['user_proxy']} try: ufc = UserFileCache(cacheurldict) logfilename = self._task['tm_taskname'] + '_TaskWorker.log' ufc.uploadLog(logpath, logfilename) except HTTPException as hte: msg = "Failed to upload the logfile to %s for task %s. More details in the http headers and body:\n%s\n%s" % (self._task['tm_cache_url'], self._task['tm_taskname'], hte.headers, hte.result) self.logger.error(msg) except Exception: #pylint: disable=broad-except msg = "Unknown error while uploading the logfile for task %s" % self._task['tm_taskname'] self.logger.exception(msg) #upload logfile of the task to the crabcache return output
class UserFileCacheTest(unittest.TestCase): """ Unit tests for UserFileCache Service """ def testChecksum(self): """ Tests checksum method """ self.ufc = UserFileCache() checksum1 = self.ufc.checksum(fileName=path.join(getTestBase(), 'WMCore_t/Services_t/UserFileCache_t/ewv_crab_EwvAnalysis_31_111229_140959_publish.tgz')) checksum2 = self.ufc.checksum(fileName=path.join(getTestBase(), 'WMCore_t/Services_t/UserFileCache_t/ewv_crab_EwvAnalysis_31_resubmit_111229_144319_publish.tgz')) self.assertTrue(checksum1) self.assertTrue(checksum2) self.assertFalse(checksum1 == checksum2) self.assertRaises(IOError, self.ufc.checksum, **{'fileName': 'does_not_exist'}) return def testUploadDownload(self): if 'UFCURL' in os.environ: currdir = getTestBase() upfile = path.join(currdir, 'WMCore_t/Services_t/UserFileCache_t/test_file.tgz') #file to upload ufc = UserFileCache({'endpoint':os.environ['UFCURL']}) #named upload/download res = ufc.upload(upfile, 'name_publish.tgz') ufc.download(name=res['name'], output='name_publish.tgz') #hashkey upload/download res = ufc.upload(upfile) ufc.download(res['hashkey'], output='pippo_publish_down.tgz')
def executeInternal(self, *args, **kw): # FIXME: In PanDA, we provided the executable as a URL. # So, the filename becomes http:// -- and doesn't really work. Hardcoding the analysis wrapper. #transform_location = getLocation(kw['task']['tm_transformation'], 'CAFUtilities/src/python/transformation/CMSRunAnalysis/') transform_location = getLocation('CMSRunAnalysis.sh', 'CRABServer/scripts/') cmscp_location = getLocation('cmscp.py', 'CRABServer/scripts/') gwms_location = getLocation('gWMS-CMSRunAnalysis.sh', 'CRABServer/scripts/') dag_bootstrap_location = getLocation('dag_bootstrap_startup.sh', 'CRABServer/scripts/') bootstrap_location = getLocation("dag_bootstrap.sh", "CRABServer/scripts/") adjust_location = getLocation("AdjustSites.py", "CRABServer/scripts/") shutil.copy(transform_location, '.') shutil.copy(cmscp_location, '.') shutil.copy(gwms_location, '.') shutil.copy(dag_bootstrap_location, '.') shutil.copy(bootstrap_location, '.') shutil.copy(adjust_location, '.') # Bootstrap the ISB if we are using UFC if UserFileCache and kw['task']['tm_cache_url'].find('/crabcache')!=-1: ufc = UserFileCache(dict={'cert': kw['task']['user_proxy'], 'key': kw['task']['user_proxy'], 'endpoint' : kw['task']['tm_cache_url']}) try: ufc.download(hashkey=kw['task']['tm_user_sandbox'].split(".")[0], output="sandbox.tar.gz") except Exception, ex: self.logger.exception(ex) raise TaskWorkerException("The CRAB3 server backend could not download the input sandbox with your code "+\ "from the frontend (crabcache component).\nThis could be a temporary glitch; please try to submit a new task later "+\ "(resubmit will not work) and contact the experts if the error persists.\nError reason: %s" % str(ex)) #TODO url!? kw['task']['tm_user_sandbox'] = 'sandbox.tar.gz'
def executeInternal(self, *args, **kw): inputFiles = args[0][2] splitterResult = args[0][3][0] cwd = os.getcwd() try: os.chdir(kw['tempDir']) splittingSummary = SplittingSummary(kw['task']['tm_split_algo']) for jobgroup in splitterResult: jobs = jobgroup.getJobs() splittingSummary.addJobs(jobs) splittingSummary.dump('splitting-summary.json') inputFiles.append('splitting-summary.json') self.packSandbox(inputFiles) self.logger.info('Uploading dry run tarball to the user file cache') ufc = UserFileCache(mydict={'cert': kw['task']['user_proxy'], 'key': kw['task']['user_proxy'], 'endpoint': kw['task']['tm_cache_url']}) result = ufc.uploadLog('dry-run-sandbox.tar.gz') os.remove('dry-run-sandbox.tar.gz') if 'hashkey' not in result: raise TaskWorkerException('Failed to upload dry-run-sandbox.tar.gz to the user file cache: ' + str(result)) else: self.logger.info('Uploaded dry run tarball to the user file cache: ' + str(result)) update = {'workflow': kw['task']['tm_taskname'], 'subresource': 'state', 'status': 'UPLOADED'} self.logger.debug('Updating task status: %s' % str(update)) self.server.post(self.resturi, data=urllib.urlencode(update)) finally: os.chdir(cwd) return Result(task=kw['task'], result=args[0])
def executeInternal(self, *args, **kw): # FIXME: In PanDA, we provided the executable as a URL. # So, the filename becomes http:// -- and doesn't really work. Hardcoding the analysis wrapper. #transform_location = getLocation(kw['task']['tm_transformation'], 'CAFUtilities/src/python/transformation/CMSRunAnalysis/') transform_location = getLocation('CMSRunAnalysis.sh', 'CRABServer/scripts/') cmscp_location = getLocation('cmscp.py', 'CRABServer/scripts/') gwms_location = getLocation('gWMS-CMSRunAnalysis.sh', 'CRABServer/scripts/') dag_bootstrap_location = getLocation('dag_bootstrap_startup.sh', 'CRABServer/scripts/') bootstrap_location = getLocation("dag_bootstrap.sh", "CRABServer/scripts/") adjust_location = getLocation("AdjustSites.py", "CRABServer/scripts/") shutil.copy(transform_location, '.') shutil.copy(cmscp_location, '.') shutil.copy(gwms_location, '.') shutil.copy(dag_bootstrap_location, '.') shutil.copy(bootstrap_location, '.') shutil.copy(adjust_location, '.') # Bootstrap the ISB if we are using UFC if UserFileCache and kw['task']['tm_cache_url'].find('/crabcache')!=-1: ufc = UserFileCache(mydict={'cert': kw['task']['user_proxy'], 'key': kw['task']['user_proxy'], 'endpoint' : kw['task']['tm_cache_url']}) try: ufc.download(hashkey=kw['task']['tm_user_sandbox'].split(".")[0], output="sandbox.tar.gz") except Exception as ex: self.logger.exception(ex) raise TaskWorkerException("The CRAB3 server backend could not download the input sandbox with your code "+\ "from the frontend (crabcache component).\nThis could be a temporary glitch; please try to submit a new task later "+\ "(resubmit will not work) and contact the experts if the error persists.\nError reason: %s" % str(ex)) #TODO url!? kw['task']['tm_user_sandbox'] = 'sandbox.tar.gz' # Bootstrap the runtime if it is available. job_runtime = getLocation('CMSRunAnalysis.tar.gz', 'CRABServer/') shutil.copy(job_runtime, '.') task_runtime = getLocation('TaskManagerRun.tar.gz', 'CRABServer/') shutil.copy(task_runtime, '.') kw['task']['resthost'] = self.server['host'] kw['task']['resturinoapi'] = self.restURInoAPI self.task = kw['task'] params = {} if kw['task']['tm_dry_run'] == 'F': params = self.sendDashboardTask() inputFiles = ['gWMS-CMSRunAnalysis.sh', 'CMSRunAnalysis.sh', 'cmscp.py', 'RunJobs.dag', 'Job.submit', 'dag_bootstrap.sh', \ 'AdjustSites.py', 'site.ad', 'site.ad.json', 'run_and_lumis.tar.gz', 'input_files.tar.gz'] self.extractMonitorFiles(inputFiles, **kw) if kw['task'].get('tm_user_sandbox') == 'sandbox.tar.gz': inputFiles.append('sandbox.tar.gz') if os.path.exists("CMSRunAnalysis.tar.gz"): inputFiles.append("CMSRunAnalysis.tar.gz") if os.path.exists("TaskManagerRun.tar.gz"): inputFiles.append("TaskManagerRun.tar.gz") info, splitterResult = self.createSubdag(*args, **kw) return info, params, inputFiles, splitterResult
def actionWork(self, *args, **kwargs): """Performing the set of actions""" nextinput = args #set the logger to save the tasklog formatter = logging.Formatter("%(asctime)s:%(levelname)s:%(module)s:%(message)s") taskdirname = "logs/tasks/%s/" % self._task['tm_username'] if not os.path.isdir(taskdirname): os.mkdir(taskdirname) taskhandler = FileHandler(taskdirname + self._task['tm_taskname'] + '.log') taskhandler.setLevel(logging.DEBUG) self.logger.addHandler(taskhandler) for work in self.getWorks(): self.logger.debug("Starting %s on %s" % (str(work), self._task['tm_taskname'])) t0 = time.time() try: output = work.execute(nextinput, task=self._task) except StopHandler as sh: msg = "Controlled stop of handler for %s on %s " % (self._task, str(sh)) self.logger.error(msg) nextinput = Result(task=self._task, result='StopHandler exception received, controlled stop') break #exit normally. Worker will not notice there was an error except TaskWorkerException as twe: self.logger.debug(str(traceback.format_exc())) #print the stacktrace only in debug mode self.removeTaskLogHandler(taskhandler) raise WorkerHandlerException(str(twe)) #TaskWorker error, do not add traceback to the error propagated to the REST except Exception as exc: msg = "Problem handling %s because of %s failure, traceback follows\n" % (self._task['tm_taskname'], str(exc)) msg += str(traceback.format_exc()) self.logger.error(msg) self.removeTaskLogHandler(taskhandler) raise WorkerHandlerException(msg) #Errors not foreseen. Print everything! finally: #upload logfile of the task to the crabcache logpath = 'logs/tasks/%s/%s.log' % (self._task['tm_username'], self._task['tm_taskname']) if os.path.isfile(logpath) and 'user_proxy' in self._task: #the user proxy might not be there if myproxy retrieval failed cacheurldict = {'endpoint': self._task['tm_cache_url'], 'cert' : self._task['user_proxy'], 'key' : self._task['user_proxy']} try: ufc = UserFileCache(cacheurldict) logfilename = self._task['tm_taskname'] + '_TaskWorker.log' ufc.uploadLog(logpath, logfilename) except HTTPException as hte: msg = ("Failed to upload the logfile to %s for task %s. More details in the http headers and body:\n%s\n%s" % (self._task['tm_cache_url'], self._task['tm_taskname'], hte.headers, hte.result)) self.logger.error(msg) except Exception as e: msg = "Unknown error while uploading the logfile for task %s" % self._task['tm_taskname'] self.logger.exception(msg) t1 = time.time() self.logger.info("Finished %s on %s in %d seconds" % (str(work), self._task['tm_taskname'], t1-t0)) try: nextinput = output.result except AttributeError: nextinput = output self.removeTaskLogHandler(taskhandler) return nextinput
def actionWork(self, *args, **kwargs): """Performing the set of actions""" nextinput = args taskhandler = self.addTaskLogHandler() # I know it looks like a duplicated printout from the process logs (proc.N.log) perspective. # Infact we have a smilar printout in the processWorker function of the Worker module, but # it does not go to the task logfile and it is useful imho. self.logger.debug("Process %s is starting %s on task %s" % (self.procnum, self.workFunction, self._task['tm_taskname'])) for work in self.getWorks(): #Loop that iterates over the actions to be performed self.logger.debug("Starting %s on %s" % (str(work), self._task['tm_taskname'])) t0 = time.time() try: output = work.execute(nextinput, task=self._task) except StopHandler as sh: msg = "Controlled stop of handler for %s on %s " % (self._task, str(sh)) self.logger.error(msg) nextinput = Result(task=self._task, result='StopHandler exception received, controlled stop') break #exit normally. Worker will not notice there was an error except TaskWorkerException as twe: self.logger.debug(str(traceback.format_exc())) #print the stacktrace only in debug mode self.removeTaskLogHandler(taskhandler) raise WorkerHandlerException(str(twe)) #TaskWorker error, do not add traceback to the error propagated to the REST except Exception as exc: msg = "Problem handling %s because of %s failure, traceback follows\n" % (self._task['tm_taskname'], str(exc)) msg += str(traceback.format_exc()) self.logger.error(msg) self.removeTaskLogHandler(taskhandler) raise WorkerHandlerException(msg) #Errors not foreseen. Print everything! finally: #upload logfile of the task to the crabcache logpath = 'logs/tasks/%s/%s.log' % (self._task['tm_username'], self._task['tm_taskname']) if os.path.isfile(logpath) and 'user_proxy' in self._task: #the user proxy might not be there if myproxy retrieval failed cacheurldict = {'endpoint': self._task['tm_cache_url'], 'cert' : self._task['user_proxy'], 'key' : self._task['user_proxy']} try: ufc = UserFileCache(cacheurldict) logfilename = self._task['tm_taskname'] + '_TaskWorker.log' ufc.uploadLog(logpath, logfilename) except HTTPException as hte: msg = ("Failed to upload the logfile to %s for task %s. More details in the http headers and body:\n%s\n%s" % (self._task['tm_cache_url'], self._task['tm_taskname'], hte.headers, hte.result)) self.logger.error(msg) except Exception: msg = "Unknown error while uploading the logfile for task %s" % self._task['tm_taskname'] self.logger.exception(msg) t1 = time.time() self.logger.info("Finished %s on %s in %d seconds" % (str(work), self._task['tm_taskname'], t1 - t0)) try: nextinput = output.result except AttributeError: nextinput = output self.removeTaskLogHandler(taskhandler) return nextinput
def __call__(self): self.logger.info('Getting the tarball hash key') tarballdir = glob.glob(self.requestarea+'/inputs/*.tgz') if len(tarballdir) != 1: self.logger.info('%sError%s: Could not find tarball or there is more than one tarball'% (colors.RED, colors.NORMAL)) raise ConfigurationException tarballdir = tarballdir[0] #checking task status self.logger.info('Checking task status') serverFactory = CRABClient.Emulator.getEmulator('rest') server = serverFactory(self.serverurl, self.proxyfilename, self.proxyfilename, version=__version__) dictresult, status, _ = server.get(self.uri, data = {'workflow': self.cachedinfo['RequestName'], 'verbose': 0}) dictresult = dictresult['result'][0] #take just the significant part if status != 200: msg = "Problem retrieving task status:\ninput: %s\noutput: %s\nreason: %s" % (str(self.cachedinfo['RequestName']), str(dictresult), str(reason)) raise RESTCommunicationException(msg) self.logger.info('Task status: %s' % dictresult['status']) accepstate = ['KILLED','FINISHED','FAILED','KILLFAILED', 'COMPLETED'] if dictresult['status'] not in accepstate: msg = ('%sError%s: Only tasks with these status can be purged: {0}'.format(accepstate) % (colors.RED, colors.NORMAL)) raise ConfigurationException(msg) #getting the cache url cacheresult = {} scheddresult = {} gsisshdict = {} if not self.options.scheddonly: baseurl = getUrl(self.instance, resource='info') cacheurl = server_info('backendurls', self.serverurl, self.proxyfilename, baseurl) cacheurl = cacheurl['cacheSSL'] cacheurldict = {'endpoint': cacheurl, 'pycurl': True} ufc = UserFileCache(cacheurldict) hashkey = ufc.checksum(tarballdir) self.logger.info('Tarball hashkey: %s' %hashkey) self.logger.info('Attempting to remove task file from crab server cache') try: ufcresult = ufc.removeFile(hashkey) except HTTPException, re: if re.headers.has_key('X-Error-Info') and 'Not such file' in re.headers['X-Error-Info']: self.logger.info('%sError%s: Failed to find task file in crab server cache; the file might have been already purged' % (colors.RED,colors.NORMAL)) raise HTTPException , re if ufcresult == '': self.logger.info('%sSuccess%s: Successfully removed task files from crab server cache' % (colors.GREEN, colors.NORMAL)) cacheresult = 'SUCCESS' else: self.logger.info('%sError%s: Failed to remove task files from crab server cache' % (colors.RED, colors.NORMAL)) cacheresult = 'FAILED'
def uploadPublishWorkflow(config, workflow, ufcEndpoint, workDir): """ Write out and upload to the UFC a JSON file with all the info needed to publish this dataset later """ retok, proxyfile = getProxy(config, workflow.dn, workflow.vogroup, workflow.vorole) if not retok: logging.info("Cannot get the user's proxy") return False ufc = UserFileCache({'endpoint': ufcEndpoint, 'cert': proxyfile, 'key': proxyfile}) # Skip tasks ending in LogCollect, they have nothing interesting. taskNameParts = workflow.task.split('/') if taskNameParts.pop() in ['LogCollect']: logging.info('Skipping LogCollect task') return False logging.info('Generating JSON for publication of %s of type %s' % (workflow.name, workflow.wfType)) myThread = threading.currentThread() dbsDaoFactory = DAOFactory(package = "WMComponent.DBS3Buffer", logger = myThread.logger, dbinterface = myThread.dbi) findFiles = dbsDaoFactory(classname = "LoadFilesByWorkflow") # Fetch and filter the files to the ones we actually need uploadDatasets = {} uploadFiles = findFiles.execute(workflowName = workflow.name) for file in uploadFiles: datasetName = file['datasetPath'] if not uploadDatasets.has_key(datasetName): uploadDatasets[datasetName] = [] uploadDatasets[datasetName].append(file) if not uploadDatasets: logging.info('No datasets found to upload.') return False # Write JSON file and then create tarball with it baseName = '%s_publish.tgz' % workflow.name jsonName = os.path.join(workDir, '%s_publish.json' % workflow.name) tgzName = os.path.join(workDir, baseName) with open(jsonName, 'w') as jsonFile: json.dump(uploadDatasets, fp=jsonFile, cls=FileEncoder, indent=2) # Only in 2.7 does tarfile become usable as context manager tgzFile = tarfile.open(name=tgzName, mode='w:gz') tgzFile.add(jsonName) tgzFile.close() result = ufc.upload(fileName=tgzName, name=baseName) logging.debug('Upload result %s' % result) # If this doesn't work, exception will propogate up and block archiving the task logging.info('Uploaded with name %s and hashkey %s' % (result['name'], result['hashkey'])) return
def executeInternal(self, *args, **kw): cwd = None if hasattr(self.config, 'TaskWorker') and hasattr(self.config.TaskWorker, 'scratchDir'): temp_dir = tempfile.mkdtemp(prefix='_' + kw['task']['tm_taskname'], dir=self.config.TaskWorker.scratchDir) # FIXME: In PanDA, we provided the executable as a URL. # So, the filename becomes http:// -- and doesn't really work. Hardcoding the analysis wrapper. #transform_location = getLocation(kw['task']['tm_transformation'], 'CAFUtilities/src/python/transformation/CMSRunAnalysis/') transform_location = getLocation('CMSRunAnalysis.sh', 'CRABServer/scripts/') cmscp_location = getLocation('cmscp.py', 'CRABServer/scripts/') gwms_location = getLocation('gWMS-CMSRunAnalysis.sh', 'CRABServer/scripts/') dag_bootstrap_location = getLocation('dag_bootstrap_startup.sh', 'CRABServer/scripts/') bootstrap_location = getLocation("dag_bootstrap.sh", "CRABServer/scripts/") adjust_location = getLocation("AdjustSites.py", "CRABServer/scripts/") cwd = os.getcwd() os.chdir(temp_dir) shutil.copy(transform_location, '.') shutil.copy(cmscp_location, '.') shutil.copy(gwms_location, '.') shutil.copy(dag_bootstrap_location, '.') shutil.copy(bootstrap_location, '.') shutil.copy(adjust_location, '.') # Bootstrap the ISB if we are using UFC if UserFileCache and kw['task']['tm_cache_url'].find('/crabcache')!=-1: ufc = UserFileCache(dict={'cert': kw['task']['user_proxy'], 'key': kw['task']['user_proxy'], 'endpoint' : kw['task']['tm_cache_url']}) ufc.download(hashkey=kw['task']['tm_user_sandbox'].split(".")[0], output="sandbox.tar.gz") kw['task']['tm_user_sandbox'] = 'sandbox.tar.gz' # Bootstrap the runtime if it is available. job_runtime = getLocation('CMSRunAnalysis.tar.gz', 'CRABServer/') shutil.copy(job_runtime, '.') task_runtime = getLocation('TaskManagerRun.tar.gz', 'CRABServer/') shutil.copy(task_runtime, '.') kw['task']['scratch'] = temp_dir kw['task']['restinstance'] = self.server['host'] kw['task']['resturl'] = self.resturl.replace("/workflowdb", "/filemetadata") self.task = kw['task'] params = self.sendDashboardTask() try: info = self.createSubdag(*args, **kw) finally: if cwd: os.chdir(cwd) return TaskWorker.DataObjects.Result.Result(task=kw['task'], result=(temp_dir, info, params))
def upload(self): """ Upload the tarball to the File Cache """ self.close() archiveName = self.tarfile.name serverUrl = "" self.logger.debug(" uploading archive to cache %s " % archiveName) ufc = UserFileCache({'endpoint' : self.config.JobType.filecacheurl}) result = ufc.upload(archiveName) if 'hashkey' not in result: self.logger.error("Failed to upload source files: %s" % str(result)) raise CachefileNotFoundException return self.config.JobType.filecacheurl, str(result['hashkey']) + '.tar.gz', self.checksum
def testUploadDownload(self): if "UFCURL" in os.environ: currdir = getTestBase() upfile = path.join(currdir, "WMCore_t/Services_t/UserFileCache_t/test_file.tgz") # file to upload upfileLog = path.join(currdir, "WMCore_t/Services_t/UserFileCache_t/uplog.txt") # file to upload ufc = UserFileCache({"endpoint": os.environ["UFCURL"], "pycurl": True}) # hashkey upload/download res = ufc.upload(upfile) ufc.download(res["hashkey"], output="pippo_publish_down.tgz") # hashkey deletion ufc.removeFile(res["hashkey"]) # log upload/download res = ufc.uploadLog(upfileLog) ufc.downloadLog(upfileLog, upfileLog + ".downloaded") self.assertTrue(filecmp.cmp(upfileLog, upfileLog + ".downloaded"))
def testUploadDownload(self): if 'UFCURL' in os.environ: currdir = getTestBase() upfile = path.join(currdir, 'WMCore_t/Services_t/UserFileCache_t/test_file.tgz') #file to upload upfileLog = path.join(currdir, 'WMCore_t/Services_t/UserFileCache_t/uplog.txt') #file to upload ufc = UserFileCache({'endpoint':os.environ['UFCURL'], 'pycurl': True}) #hashkey upload/download res = ufc.upload(upfile) ufc.download(res['hashkey'], output='pippo_publish_down.tgz') #hashkey deletion ufc.removeFile(res['hashkey']) #log upload/download res = ufc.uploadLog(upfileLog) ufc.downloadLog(upfileLog, upfileLog+'.downloaded') self.assertTrue(filecmp.cmp(upfileLog, upfileLog+'.downloaded'))
def testChecksum(self): """ Tests checksum method """ self.ufc = UserFileCache() checksum1 = self.ufc.checksum(fileName=path.join(getTestBase(), 'WMCore_t/Services_t/UserFileCache_t/ewv_crab_EwvAnalysis_31_111229_140959_publish.tgz')) checksum2 = self.ufc.checksum(fileName=path.join(getTestBase(), 'WMCore_t/Services_t/UserFileCache_t/ewv_crab_EwvAnalysis_31_resubmit_111229_144319_publish.tgz')) self.assertTrue(checksum1) self.assertTrue(checksum2) self.assertFalse(checksum1 == checksum2) self.assertRaises(IOError, self.ufc.checksum, **{'fileName': 'does_not_exist'}) return
class UserFileCacheTest(unittest.TestCase): """ Unit tests for UserFileCache Service """ def setUp(self): """ Setup for unit tests """ self.ufc = UserFileCache() def testChecksum(self): """ Tests checksum method """ checksum1 = self.ufc.checksum(fileName=path.join(getTestBase(), '../data/ewv_crab_EwvAnalysis_31_111229_140959_publish.tgz')) checksum2 = self.ufc.checksum(fileName=path.join(getTestBase(), '../data/ewv_crab_EwvAnalysis_31_resubmit_111229_144319_publish.tgz')) self.assertTrue(checksum1) self.assertTrue(checksum2) self.assertFalse(checksum1 == checksum2) self.assertRaises(IOError, self.ufc.checksum, **{'fileName': 'does_not_exist'}) return
def testUploadDownload(self): if "UFCURL" in os.environ: currdir = getTestBase() upfile = path.join(currdir, "WMCore_t/Services_t/UserFileCache_t/test_file.tgz") # file to upload ufc = UserFileCache({"endpoint": os.environ["UFCURL"]}) # named upload/download res = ufc.upload(upfile, "name_publish.tgz") ufc.download(name=res["name"], output="name_publish.tgz") # hashkey upload/download res = ufc.upload(upfile) ufc.download(res["hashkey"], output="pippo_publish_down.tgz")
def testUploadDownload(self): if 'UFCURL' in os.environ: currdir = getTestBase() upfile = path.join(currdir, 'WMCore_t/Services_t/UserFileCache_t/test_file.tgz') #file to upload ufc = UserFileCache({'endpoint':os.environ['UFCURL']}) #named upload/download res = ufc.upload(upfile, 'name_publish.tgz') ufc.download(name=res['name'], output='name_publish.tgz') #hashkey upload/download res = ufc.upload(upfile) ufc.download(res['hashkey'], output='pippo_publish_down.tgz')
class UserFileCacheTest(unittest.TestCase): """ Unit tests for UserFileCache Service """ def testChecksum(self): """ Tests checksum method """ self.ufc = UserFileCache() checksum1 = self.ufc.checksum( fileName=path.join(getTestBase(), "../data/ewv_crab_EwvAnalysis_31_111229_140959_publish.tgz") ) checksum2 = self.ufc.checksum( fileName=path.join(getTestBase(), "../data/ewv_crab_EwvAnalysis_31_resubmit_111229_144319_publish.tgz") ) self.assertTrue(checksum1) self.assertTrue(checksum2) self.assertFalse(checksum1 == checksum2) self.assertRaises(IOError, self.ufc.checksum, **{"fileName": "does_not_exist"}) return def testUploadDownload(self): if "UFCURL" in os.environ: currdir = getTestBase() upfile = path.join(currdir, "WMCore_t/Services_t/UserFileCache_t/test_file.tgz") # file to upload ufc = UserFileCache({"endpoint": os.environ["UFCURL"]}) # named upload/download res = ufc.upload(upfile, "name_publish.tgz") ufc.download(name=res["name"], output="name_publish.tgz") # hashkey upload/download res = ufc.upload(upfile) ufc.download(res["hashkey"], output="pippo_publish_down.tgz")
def testChecksum(self): """ Tests checksum method """ self.ufc = UserFileCache() checksum1 = self.ufc.checksum( fileName=path.join(getTestBase(), "../data/ewv_crab_EwvAnalysis_31_111229_140959_publish.tgz") ) checksum2 = self.ufc.checksum( fileName=path.join(getTestBase(), "../data/ewv_crab_EwvAnalysis_31_resubmit_111229_144319_publish.tgz") ) self.assertTrue(checksum1) self.assertTrue(checksum2) self.assertFalse(checksum1 == checksum2) self.assertRaises(IOError, self.ufc.checksum, **{"fileName": "does_not_exist"}) return
def uploadlogfile(logger, proxyfilename, taskname=None, logfilename=None, logpath=None, instance=None, serverurl=None, username=None): ## WMCore dependencies. Moved here to minimize dependencies in the bootstrap script from WMCore.Services.UserFileCache.UserFileCache import UserFileCache doupload = True if logfilename == None: logfilename = str(time.strftime("%Y-%m-%d_%H%M%S")) + '_crab.log' logger.info('Fetching user enviroment to log file') try: logger.debug('Running env command') stdout, _, _ = execute_command(command='env') logger.debug('\n\n\nUSER ENVIROMENT\n%s' % stdout) except Exception as se: logger.debug('Failed to get the user env\nException message: %s' % (se)) if logpath != None: if not os.path.exists(logpath): doupload = False logger.debug('%sError%s: %s does not exist' % (colors.RED, colors.NORMAL, logpath)) else: if os.path.exists(str(os.getcwd()) + '/crab.log'): logpath = str(os.getcwd()) + '/crab.log' else: logger.debug( '%sError%s: Failed to find crab.log in current directory %s' % (colors.RED, colors.NORMAL, str(os.getcwd()))) if proxyfilename == None: logger.debug('No proxy was given') doupload = False if doupload: # uploadLog is executed directly from crab main script, does not inherit from SubCommand # so it needs its own REST server instantiation restClass = CRABClient.Emulator.getEmulator('rest') crabserver = restClass(hostname=serverurl, localcert=proxyfilename, localkey=proxyfilename, retry=2, logger=logger, verbose=False, version=__version__, userAgent='CRABClient') crabserver.setDbInstance(instance) cacheurl = server_info(crabserver=crabserver, subresource='backendurls')['cacheSSL'] logger.info("Uploading log file...") if 'S3' in cacheurl.upper(): objecttype = 'clientlog' uploadToS3(crabserver=crabserver, filepath=logpath, objecttype=objecttype, taskname=taskname, logger=logger) logfileurl = getDownloadUrlFromS3(crabserver=crabserver, objecttype=objecttype, taskname=taskname, logger=logger) else: cacheurldict = {'endpoint': cacheurl, "pycurl": True} ufc = UserFileCache(cacheurldict) logger.debug("cacheURL: %s\nLog file name: %s" % (cacheurl, logfilename)) ufc.uploadLog(logpath, logfilename) logfileurl = cacheurl + '/logfile?name=' + str(logfilename) if not username: from CRABClient.UserUtilities import getUsername username = getUsername(proxyFile=proxyfilename, logger=logger) logfileurl += '&username='******'Failed to upload the log file') logfileurl = False return logfileurl
def uploadSandboxToUFC( self, url, sandbox = None, name = "YAATFile" ): if not sandbox: sandbox = self.sandbox req = UserFileCache( { 'endpoint' : url} ) return req.upload( sandbox, name )
def executeInternal(self, *args, **kw): inputFiles = args[0][2] splitterResult = args[0][3][0] cwd = os.getcwd() try: os.chdir(kw['tempDir']) splittingSummary = SplittingSummary(kw['task']['tm_split_algo']) for jobgroup in splitterResult: jobs = jobgroup.getJobs() splittingSummary.addJobs(jobs) splittingSummary.dump('splitting-summary.json') inputFiles.append('splitting-summary.json') self.packSandbox(inputFiles) self.logger.info( 'Uploading dry run tarball to the user file cache') t0 = time.time() if 'S3' in kw['task']['tm_cache_url'].upper(): uploadToS3(crabserver=self.crabserver, filepath='dry-run-sandbox.tar.gz', objecttype='runtimefiles', taskname=kw['task']['tm_taskname'], logger=self.logger) result = { 'hashkey': 'ok' } # a dummy one to keep same semantics as when using UserFileCache os.remove('dry-run-sandbox.tar.gz') else: ufc = UserFileCache( mydict={ 'cert': kw['task']['user_proxy'], 'key': kw['task']['user_proxy'], 'endpoint': kw['task']['tm_cache_url'] }) result = ufc.uploadLog('dry-run-sandbox.tar.gz') os.remove('dry-run-sandbox.tar.gz') if 'hashkey' not in result: raise TaskWorkerException( 'Failed to upload dry-run-sandbox.tar.gz to the user file cache: ' + str(result)) self.logger.info( 'Uploaded dry run tarball to the user file cache: %s', str(result)) # wait until tarball is available, S3 may take a few seconds for this (ref. issue #6706 ) t1 = time.time() lt1 = time.strftime("%H:%M:%S", time.localtime(t1)) uploadTime = t1 - t0 self.logger.debug( 'runtimefiles upload took %s secs and completed at %s', uploadTime, lt1) self.logger.debug('check if tarball is available') tarballOK = False while not tarballOK: try: self.logger.debug('download tarball to /dev/null') downloadFromS3(crabserver=self.crabserver, filepath='/dev/null', objecttype='runtimefiles', taskname=kw['task']['tm_taskname'], logger=self.logger) self.logger.debug('OK, it worked') tarballOK = True except Exception as e: self.logger.debug('runtimefiles tarball not ready yet') self.logger.debug('Exception was raised: %s', e) self.logger.debug('Sleep 5 sec') time.sleep(5) update = { 'workflow': kw['task']['tm_taskname'], 'subresource': 'state', 'status': 'UPLOADED' } self.logger.debug('Updating task status: %s', str(update)) self.crabserver.post(api='workflowdb', data=urllib.urlencode(update)) finally: os.chdir(cwd) return Result(task=kw['task'], result=args[0])
logger.debug('%sError%s: serverurl is None' % (colors.RED, colors.NORMAL)) doupload = False if proxyfilename == None: logger.debug('No proxy was given') doupload = False baseurl = getUrl(instance=instance, resource='info') if doupload: cacheurl = server_info('backendurls', serverurl, proxyfilename, baseurl) cacheurl = cacheurl['cacheSSL'] cacheurldict = {'endpoint': cacheurl} ufc = UserFileCache(cacheurldict) logger.debug("cacheURL: %s\nLog file name: %s" % (cacheurl, logfilename)) logger.info("Uploading log file...") ufc.uploadLog(logpath, logfilename) logger.info("%sSuccess%s: Log file uploaded successfully." % (colors.GREEN, colors.NORMAL)) logfileurl = cacheurl + '/logfile?name=' + str(logfilename) if not username: username = getUsernameFromSiteDB_wrapped(logger, quiet=True) if username: logfileurl += '&username='******'Failed to upload the log file')
instance = 'private' elif not instance in SERVICE_INSTANCES.keys() and serverurl == None: logger.debug('%sError%s: serverurl is None' % (colors.RED, colors.NORMAL)) doupload = False if proxyfilename == None: logger.debug('No proxy was given') doupload = False baseurl = getUrl(instance = instance , resource = 'info') if doupload: cacheurl = server_info('backendurls', serverurl, proxyfilename, baseurl) cacheurl = cacheurl['cacheSSL'] cacheurldict = {'endpoint': cacheurl} ufc = UserFileCache(cacheurldict) logger.debug("cacheURL: %s\nLog file name: %s" % (cacheurl, logfilename)) logger.info("Uploading log file...") ufc.uploadLog(logpath, logfilename) logger.info("%sSuccess%s: Log file uploaded successfully." % (colors.GREEN, colors.NORMAL)) logfileurl = cacheurl + '/logfile?name='+str(logfilename) if not username: username = getUsernameFromSiteDB_wrapped(logger, quiet = True) if username: logfileurl += '&username='******'Failed to upload the log file') logfileurl = False
def __call__(self): self.logger.info('Getting the tarball hash key') tarballdir=glob.glob(self.requestarea+'/inputs/*.tgz') if len(tarballdir) != 1 : self.logger.info('%sError%s: Could not find tarball or there is more than one tarball'% (colors.RED, colors.NORMAL)) raise ConfigurationException tarballdir=tarballdir[0] #checking task status self.logger.info('Checking task status') server = HTTPRequests(self.serverurl, self.proxyfilename, self.proxyfilename, version=__version__) dictresult, status, _ = server.get(self.uri, data = { 'workflow' : self.cachedinfo['RequestName'], 'verbose': 0 }) dictresult = dictresult['result'][0] #take just the significant part if status != 200: msg = "Problem retrieving status:\ninput:%s\noutput:%s\nreason:%s" % (str(self.cachedinfo['RequestName']), str(dictresult), str(reason)) raise RESTCommunicationException(msg) self.logger.info('Task status: %s' % dictresult['status']) accepstate = ['KILLED','FINISHED','FAILED','KILLFAILED', 'COMPLETED'] if dictresult['status'] not in accepstate: msg = ('%sError%s: Only task with this status can be purge: {0}'.format(accepstate) % (colors.RED, colors.NORMAL)) raise ConfigurationException(msg) #getting the cache url if not self.options.scheddonly: baseurl=self.getUrl(self.instance, resource='info') cacheurl=server_info('backendurls', self.serverurl, self.proxyfilename, baseurl) cacheurl=cacheurl['cacheSSL'] cacheurldict={'endpoint' : cacheurl, 'pycurl': True} ufc = UserFileCache(cacheurldict) hashkey = ufc.checksum(tarballdir) self.logger.info('Tarball hashkey :%s' %hashkey) self.logger.info('Attempting to clean user file cache') ufcresult = ufc.removeFile(hashkey) if ufcresult == '' : self.logger.info('%sSuccess%s: Successfully remove file from cache' % (colors.GREEN, colors.NORMAL)) else: self.logger.info('%sError%s: Failed to remove the file from cache' % (colors.RED, colors.NORMAL)) if not self.options.cacheonly: self.logger.info('Getting the schedd address') baseurl=self.getUrl(self.instance, resource='info') try: sceddaddress = server_info('scheddaddress', self.serverurl, self.proxyfilename, baseurl, workflow = self.cachedinfo['RequestName'] ) except HTTPException, he: self.logger.info('%sError%s: Failed to get the schedd address' % (colors.RED, colors.NORMAL)) raise HTTPException,he self.logger.debug('%sSuccess%s: Successfully getting schedd address' % (colors.GREEN, colors.NORMAL)) self.logger.debug('Schedd address: %s' % sceddaddress) self.logger.info('Attempting to clean user file schedd') gssishrm = 'gsissh -o ConnectTimeout=60 -o PasswordAuthentication=no ' + sceddaddress + ' rm -rf ' + self.cachedinfo['RequestName'] self.logger.debug('gsissh command: %s' % gssishrm) delprocess=subprocess.Popen(gssishrm, stdout= subprocess.PIPE, stderr= subprocess.PIPE, shell=True) stdout, stderr = delprocess.communicate() exitcode = delprocess.returncode if exitcode == 0 : self.logger.info('%sSuccess%s: Successfully remove task from scehdd' % (colors.GREEN, colors.NORMAL)) else : self.logger.info('%sError%s: Failed to remove task from schedd' % (colors.RED, colors.NORMAL)) self.logger.debug('gsissh stdout: %s\ngsissh stderr: %s\ngsissh exitcode: %s' % (stdout,stderr,exitcode))
def __call__(self): self.logger.info('Getting the tarball hash key') tarballdir = glob.glob(self.requestarea + '/inputs/*.tgz') if len(tarballdir) != 1: self.logger.info( '%sError%s: Could not find tarball or there is more than one tarball' % (colors.RED, colors.NORMAL)) raise ConfigurationException tarballdir = tarballdir[0] #checking task status self.logger.info('Checking task status') serverFactory = CRABClient.Emulator.getEmulator('rest') server = serverFactory(self.serverurl, self.proxyfilename, self.proxyfilename, version=__version__) dictresult, status, _ = server.get(self.uri, data={ 'workflow': self.cachedinfo['RequestName'], 'verbose': 0 }) dictresult = dictresult['result'][0] #take just the significant part if status != 200: msg = "Problem retrieving task status:\ninput: %s\noutput: %s\nreason: %s" % ( str(self.cachedinfo['RequestName']), str(dictresult), str(reason)) raise RESTCommunicationException(msg) self.logger.info('Task status: %s' % dictresult['status']) accepstate = [ 'KILLED', 'FINISHED', 'FAILED', 'KILLFAILED', 'COMPLETED' ] if dictresult['status'] not in accepstate: msg = ('%sError%s: Only tasks with these status can be purged: {0}' .format(accepstate) % (colors.RED, colors.NORMAL)) raise ConfigurationException(msg) #getting the cache url cacheresult = {} scheddresult = {} gsisshdict = {} if not self.options.scheddonly: baseurl = getUrl(self.instance, resource='info') cacheurl = server_info('backendurls', self.serverurl, self.proxyfilename, baseurl) cacheurl = cacheurl['cacheSSL'] cacheurldict = {'endpoint': cacheurl, 'pycurl': True} ufc = UserFileCache(cacheurldict) hashkey = ufc.checksum(tarballdir) self.logger.info('Tarball hashkey: %s' % hashkey) self.logger.info( 'Attempting to remove task file from crab server cache') try: ufcresult = ufc.removeFile(hashkey) except HTTPException, re: if re.headers.has_key( 'X-Error-Info' ) and 'Not such file' in re.headers['X-Error-Info']: self.logger.info( '%sError%s: Failed to find task file in crab server cache; the file might have been already purged' % (colors.RED, colors.NORMAL)) raise HTTPException, re if ufcresult == '': self.logger.info( '%sSuccess%s: Successfully removed task files from crab server cache' % (colors.GREEN, colors.NORMAL)) cacheresult = 'SUCCESS' else: self.logger.info( '%sError%s: Failed to remove task files from crab server cache' % (colors.RED, colors.NORMAL)) cacheresult = 'FAILED'
def uploadPublishWorkflow(config, workflow, ufcEndpoint, workDir): """ Write out and upload to the UFC a JSON file with all the info needed to publish this dataset later """ retok, proxyfile = getProxy(config, workflow.dn, workflow.vogroup, workflow.vorole) if not retok: logging.info("Cannot get the user's proxy") return False ufc = UserFileCache({ 'endpoint': ufcEndpoint, 'cert': proxyfile, 'key': proxyfile }) # Skip tasks ending in LogCollect, they have nothing interesting. taskNameParts = workflow.task.split('/') if taskNameParts.pop() in ['LogCollect']: logging.info('Skipping LogCollect task') return False logging.info('Generating JSON for publication of %s of type %s' % (workflow.name, workflow.wfType)) myThread = threading.currentThread() dbsDaoFactory = DAOFactory(package="WMComponent.DBS3Buffer", logger=myThread.logger, dbinterface=myThread.dbi) findFiles = dbsDaoFactory(classname="LoadFilesByWorkflow") # Fetch and filter the files to the ones we actually need uploadDatasets = {} uploadFiles = findFiles.execute(workflowName=workflow.name) for file in uploadFiles: datasetName = file['datasetPath'] if not uploadDatasets.has_key(datasetName): uploadDatasets[datasetName] = [] uploadDatasets[datasetName].append(file) if not uploadDatasets: logging.info('No datasets found to upload.') return False # Write JSON file and then create tarball with it baseName = '%s_publish.tgz' % workflow.name jsonName = os.path.join(workDir, '%s_publish.json' % workflow.name) tgzName = os.path.join(workDir, baseName) with open(jsonName, 'w') as jsonFile: json.dump(uploadDatasets, fp=jsonFile, cls=FileEncoder, indent=2) # Only in 2.7 does tarfile become usable as context manager tgzFile = tarfile.open(name=tgzName, mode='w:gz') tgzFile.add(jsonName) tgzFile.close() result = ufc.upload(fileName=tgzName, name=baseName) logging.debug('Upload result %s' % result) # If this doesn't work, exception will propogate up and block archiving the task logging.info('Uploaded with name %s and hashkey %s' % (result['name'], result['hashkey'])) return
def __call__(self): self.logger.info('Getting the tarball hash key') inputlist = {'subresource': 'search', 'workflow': self.cachedinfo['RequestName']} serverFactory = CRABClient.Emulator.getEmulator('rest') server = serverFactory(self.serverurl, self.proxyfilename, self.proxyfilename, version=__version__) uri = self.getUrl(self.instance, resource = 'task') dictresult, _, _ = server.get(uri, data = inputlist) tm_user_sandbox = getColumn(dictresult, 'tm_user_sandbox') hashkey = tm_user_sandbox.replace(".tar.gz","") # Get the schedd address from the DB info and strip off the 'crab3@' prefix if it exists scheddaddress = getColumn(dictresult, 'tm_schedd') scheddaddress = scheddaddress.split('@')[1] if '@' in scheddaddress else scheddaddress self.logger.info('Checking task status') serverFactory = CRABClient.Emulator.getEmulator('rest') server = serverFactory(self.serverurl, self.proxyfilename, self.proxyfilename, version=__version__) dictresult, _, _ = server.get(self.uri, data = {'workflow': self.cachedinfo['RequestName'], 'verbose': 0}) dictresult = dictresult['result'][0] #take just the significant part self.logger.info('Task status: %s' % dictresult['status']) accepstate = ['SUBMITFAILED','KILLED','FINISHED','FAILED','KILLFAILED', 'COMPLETED'] if dictresult['status'] not in accepstate: msg = ('%sError%s: Only tasks with these status can be purged: {0}'.format(accepstate) % (colors.RED, colors.NORMAL)) raise ConfigurationException(msg) #getting the cache url cacheresult = {} scheddresult = {} gsisshdict = {} if not self.options.scheddonly: baseurl = getUrl(self.instance, resource='info') cacheurl = server_info('backendurls', self.serverurl, self.proxyfilename, baseurl) cacheurl = cacheurl['cacheSSL'] cacheurldict = {'endpoint': cacheurl, 'pycurl': True} ufc = UserFileCache(cacheurldict) self.logger.info('Tarball hashkey: %s' %hashkey) self.logger.info('Attempting to remove task file from crab server cache') try: ufcresult = ufc.removeFile(hashkey) except HTTPException as re: if 'X-Error-Info' in re.headers and 'Not such file' in re.headers['X-Error-Info']: self.logger.info('%sError%s: Failed to find task file in crab server cache; the file might have been already purged' % (colors.RED,colors.NORMAL)) raise if ufcresult == '': self.logger.info('%sSuccess%s: Successfully removed task files from crab server cache' % (colors.GREEN, colors.NORMAL)) cacheresult = 'SUCCESS' else: self.logger.info('%sError%s: Failed to remove task files from crab server cache' % (colors.RED, colors.NORMAL)) cacheresult = 'FAILED' if not self.options.cacheonly: self.logger.debug('%sSuccess%s: Successfully got schedd address' % (colors.GREEN, colors.NORMAL)) self.logger.debug('Schedd address: %s' % scheddaddress) self.logger.info('Attempting to remove task from schedd') gssishrm = 'gsissh -o ConnectTimeout=60 -o PasswordAuthentication=no ' + scheddaddress + ' rm -rf ' + self.cachedinfo['RequestName'] self.logger.debug('gsissh command: %s' % gssishrm) delprocess=subprocess.Popen(gssishrm, stdout= subprocess.PIPE, stderr= subprocess.PIPE, shell=True) stdout, stderr = delprocess.communicate() exitcode = delprocess.returncode if exitcode == 0 : self.logger.info('%sSuccess%s: Successfully removed task from schedd' % (colors.GREEN, colors.NORMAL)) scheddresult = 'SUCCESS' gsisshdict = {} else : self.logger.info('%sError%s: Failed to remove task from schedd' % (colors.RED, colors.NORMAL)) scheddaddress = 'FAILED' self.logger.debug('gsissh stdout: %s\ngsissh stderr: %s\ngsissh exitcode: %s' % (stdout,stderr,exitcode)) gsisshdict = {'stdout' : stdout, 'stderr' : stderr , 'exitcode' : exitcode} return {'cacheresult' : cacheresult , 'scheddresult' : scheddresult , 'gsiresult' : gsisshdict}
def uploadlogfile(logger, proxyfilename, logfilename=None, logpath=None, instance='prod', serverurl=None, username=None): ## WMCore dependencies. Moved here to minimize dependencies in the bootstrap script from WMCore.Services.UserFileCache.UserFileCache import UserFileCache doupload = True if logfilename == None: logfilename = str(time.strftime("%Y-%m-%d_%H%M%S")) + '_crab.log' logger.info('Fetching user enviroment to log file') try: cmd = 'env' logger.debug('Running env command') pipe = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True) stdout, dummyStderr = pipe.communicate() logger.debug('\n\n\nUSER ENVIROMENT\n%s' % stdout) except Exception as se: logger.debug('Failed to get the user env\nException message: %s' % (se)) if logpath != None: if not os.path.exists(logpath): doupload = False logger.debug('%sError%s: %s does not exist' % (colors.RED, colors.NORMAL, logpath)) else: if os.path.exists(str(os.getcwd()) + '/crab.log'): logpath = str(os.getcwd()) + '/crab.log' else: logger.debug( '%sError%s: Failed to find crab.log in current directory %s' % (colors.RED, colors.NORMAL, str(os.getcwd()))) if serverurl == None and instance in SERVICE_INSTANCES.keys(): serverurl = SERVICE_INSTANCES[instance] elif not instance in SERVICE_INSTANCES.keys() and serverurl != None: instance = 'private' elif not instance in SERVICE_INSTANCES.keys() and serverurl == None: logger.debug('%sError%s: serverurl is None' % (colors.RED, colors.NORMAL)) doupload = False if proxyfilename == None: logger.debug('No proxy was given') doupload = False baseurl = getUrl(instance=instance, resource='info') if doupload: cacheurl = server_info('backendurls', serverurl, proxyfilename, baseurl) # Encode in ascii because old pycurl present in old CMSSW versions # doesn't support unicode. cacheurl = cacheurl['cacheSSL'].encode('ascii') cacheurldict = {'endpoint': cacheurl, "pycurl": True} ufc = UserFileCache(cacheurldict) logger.debug("cacheURL: %s\nLog file name: %s" % (cacheurl, logfilename)) logger.info("Uploading log file...") ufc.uploadLog(logpath, logfilename) logger.info("%sSuccess%s: Log file uploaded successfully." % (colors.GREEN, colors.NORMAL)) logfileurl = cacheurl + '/logfile?name=' + str(logfilename) if not username: username = getUserDNandUsername(logger).get('username') if username: logfileurl += '&username='******'Failed to upload the log file') logfileurl = False return logfileurl
def __call__(self): self.logger.info('Getting the tarball hash key') inputlist = {'subresource': 'search', 'workflow': self.cachedinfo['RequestName']} serverFactory = CRABClient.Emulator.getEmulator('rest') server = serverFactory(self.serverurl, self.proxyfilename, self.proxyfilename, version=__version__) uri = getUrl(self.instance, resource = 'task') dictresult, _, _ = server.get(uri, data = inputlist) tm_user_sandbox = getColumn(dictresult, 'tm_user_sandbox') hashkey = tm_user_sandbox.replace(".tar.gz","") # Get the schedd address from the DB info and strip off the 'crab3@' prefix if it exists scheddaddress = getColumn(dictresult, 'tm_schedd') scheddaddress = scheddaddress.split('@')[1] if '@' in scheddaddress else scheddaddress self.logger.info('Checking task status') serverFactory = CRABClient.Emulator.getEmulator('rest') server = serverFactory(self.serverurl, self.proxyfilename, self.proxyfilename, version=__version__) dictresult, _, _ = server.get(self.uri, data = {'workflow': self.cachedinfo['RequestName'], 'verbose': 0}) dictresult = dictresult['result'][0] #take just the significant part self.logger.info('Task status: %s' % dictresult['status']) accepstate = ['SUBMITFAILED','KILLED','FINISHED','FAILED','KILLFAILED', 'COMPLETED'] if dictresult['status'] not in accepstate: msg = ('%sError%s: Only tasks with these status can be purged: {0}'.format(accepstate) % (colors.RED, colors.NORMAL)) raise ConfigurationException(msg) #getting the cache url cacheresult = {} scheddresult = {} gsisshdict = {} if not self.options.scheddonly: baseurl = getUrl(self.instance, resource='info') cacheurl = server_info('backendurls', self.serverurl, self.proxyfilename, baseurl) cacheurl = cacheurl['cacheSSL'] cacheurldict = {'endpoint': cacheurl, 'pycurl': True} ufc = UserFileCache(cacheurldict) self.logger.info('Tarball hashkey: %s' %hashkey) self.logger.info('Attempting to remove task file from crab server cache') try: ufcresult = ufc.removeFile(hashkey) except HTTPException as re: if 'X-Error-Info' in re.headers and 'Not such file' in re.headers['X-Error-Info']: self.logger.info('%sError%s: Failed to find task file in crab server cache; the file might have been already purged' % (colors.RED,colors.NORMAL)) raise if ufcresult == '': self.logger.info('%sSuccess%s: Successfully removed task files from crab server cache' % (colors.GREEN, colors.NORMAL)) cacheresult = 'SUCCESS' else: self.logger.info('%sError%s: Failed to remove task files from crab server cache' % (colors.RED, colors.NORMAL)) cacheresult = 'FAILED' if not self.options.cacheonly: self.logger.debug('%sSuccess%s: Successfully got schedd address' % (colors.GREEN, colors.NORMAL)) self.logger.debug('Schedd address: %s' % scheddaddress) self.logger.info('Attempting to remove task from schedd') gssishrm = 'gsissh -o ConnectTimeout=60 -o PasswordAuthentication=no ' + scheddaddress + ' rm -rf ' + self.cachedinfo['RequestName'] self.logger.debug('gsissh command: %s' % gssishrm) delprocess=subprocess.Popen(gssishrm, stdout= subprocess.PIPE, stderr= subprocess.PIPE, shell=True) stdout, stderr = delprocess.communicate() exitcode = delprocess.returncode if exitcode == 0 : self.logger.info('%sSuccess%s: Successfully removed task from schedd' % (colors.GREEN, colors.NORMAL)) scheddresult = 'SUCCESS' gsisshdict = {} else : self.logger.info('%sError%s: Failed to remove task from schedd' % (colors.RED, colors.NORMAL)) scheddaddress = 'FAILED' self.logger.debug('gsissh stdout: %s\ngsissh stderr: %s\ngsissh exitcode: %s' % (stdout,stderr,exitcode)) gsisshdict = {'stdout' : stdout, 'stderr' : stderr , 'exitcode' : exitcode} return {'cacheresult' : cacheresult , 'scheddresult' : scheddresult , 'gsiresult' : gsisshdict}
def _execute(self, resthost, resturi, config, task): mw = MasterWorker(config, logWarning=False, logDebug=False, sequential=True, console=False) tapeRecallStatus = 'TAPERECALL' self.logger.info("Retrieving %s tasks", tapeRecallStatus) recallingTasks = mw.getWork(limit=999999, getstatus=tapeRecallStatus, ignoreTWName=True) if len(recallingTasks) > 0: self.logger.info("Retrieved a total of %d %s tasks", len(recallingTasks), tapeRecallStatus) for recallingTask in recallingTasks: taskName = recallingTask['tm_taskname'] self.logger.info("Working on task %s", taskName) reqId = recallingTask['tm_DDM_reqid'] if not reqId: self.logger.debug("tm_DDM_reqid' is not defined for task %s, skipping such task", taskName) continue server = HTTPRequests(config.TaskWorker.resturl, config.TaskWorker.cmscert, config.TaskWorker.cmskey, retry=20, logger=self.logger) if (time.time() - getTimeFromTaskname(str(taskName)) > MAX_DAYS_FOR_TAPERECALL*24*60*60): self.logger.info("Task %s is older than %d days, setting its status to FAILED", taskName, MAX_DAYS_FOR_TAPERECALL) msg = "The disk replica request (ID: %d) for the input dataset did not complete in %d days." % (reqId, MAX_DAYS_FOR_TAPERECALL) failTask(taskName, server, config.TaskWorker.restURInoAPI+'workflowdb', msg, self.logger, 'FAILED') continue mpl = MyProxyLogon(config=config, server=server, resturi=config.TaskWorker.restURInoAPI, myproxylen=self.pollingTime) user_proxy = True try: mpl.execute(task=recallingTask) # this adds 'user_proxy' to recallingTask except TaskWorkerException as twe: user_proxy = False self.logger.exception(twe) # Make sure the task sandbox in the crabcache is not deleted until the tape recall is completed if user_proxy: from WMCore.Services.UserFileCache.UserFileCache import UserFileCache ufc = UserFileCache({'cert': recallingTask['user_proxy'], 'key': recallingTask['user_proxy'], 'endpoint': recallingTask['tm_cache_url'], "pycurl": True}) sandbox = recallingTask['tm_user_sandbox'].replace(".tar.gz","") debugFiles = recallingTask['tm_debug_files'].replace(".tar.gz","") sandboxPath = os.path.join("/tmp", sandbox) debugFilesPath = os.path.join("/tmp", debugFiles) try: ufc.download(sandbox, sandboxPath, recallingTask['tm_username']) ufc.download(debugFiles, debugFilesPath, recallingTask['tm_username']) self.logger.info("Successfully touched input and debug sandboxes (%s and %s) of task %s (frontend: %s) using the '%s' username (request_id = %d).", sandbox, debugFiles, taskName, recallingTask['tm_cache_url'], recallingTask['tm_username'], reqId) except Exception as ex: self.logger.info("The CRAB3 server backend could not download the input and/or debug sandbox (%s and/or %s) of task %s from the frontend (%s) using the '%s' username (request_id = %d)."+\ " This could be a temporary glitch, will try again in next occurrence of the recurring action."+\ " Error reason:\n%s", sandbox, debugFiles, taskName, recallingTask['tm_cache_url'], recallingTask['tm_username'], reqId, str(ex)) finally: if os.path.exists(sandboxPath): os.remove(sandboxPath) if os.path.exists(debugFilesPath): os.remove(debugFilesPath) ddmRequest = statusRequest(reqId, config.TaskWorker.DDMServer, config.TaskWorker.cmscert, config.TaskWorker.cmskey, verbose=False) # The query above returns a JSON with a format {"result": "OK", "message": "Request found", "data": [{"request_id": 14, "site": <site>, "item": [<list of blocks>], "group": "AnalysisOps", "n": 1, "status": "new", "first_request": "2018-02-26 23:25:41", "last_request": "2018-02-26 23:25:41", "request_count": 1}]} self.logger.info("Contacted %s using %s and %s for request_id = %d, got:\n%s", config.TaskWorker.DDMServer, config.TaskWorker.cmscert, config.TaskWorker.cmskey, reqId, ddmRequest) if ddmRequest["message"] == "Request found": status = ddmRequest["data"][0]["status"] if status == "completed": # possible values: new, activated, updated, completed, rejected, cancelled self.logger.info("Request %d is completed, setting status of task %s to NEW", reqId, taskName) mw.updateWork(taskName, recallingTask['tm_task_command'], 'NEW') # Delete all task warnings (the tapeRecallStatus added a dataset warning which is no longer valid now) if user_proxy: mpl.deleteWarnings(recallingTask['user_proxy'], taskName) elif status == "rejected": msg = "The DDM request (ID: %d) has been rejected with this reason: %s" % (reqId, ddmRequest["data"][0]["reason"]) self.logger.info(msg + "\nSetting status of task %s to FAILED", taskName) failTask(taskName, server, config.TaskWorker.restURInoAPI+'workflowdb', msg, self.logger, 'FAILED') else: msg = "DDM request_id %d not found. Please report to experts" % reqId self.logger.info(msg) if user_proxy: mpl.uploadWarning(msg, recallingTask['user_proxy'], taskName) else: self.logger.info("No %s task retrieved.", tapeRecallStatus)
def actionWork(self, *args, **kwargs): """Performing the set of actions""" nextinput = args taskhandler = self.addTaskLogHandler() # I know it looks like a duplicated printout from the process logs (proc.N.log) perspective. # Infact we have a smilar printout in the processWorker function of the Worker module, but # it does not go to the task logfile and it is useful imho. self.logger.debug( "Process %s is starting %s on task %s" % (self.procnum, self.workFunction, self._task['tm_taskname'])) for work in self.getWorks(): #Loop that iterates over the actions to be performed self.logger.debug("Starting %s on %s" % (str(work), self._task['tm_taskname'])) t0 = time.time() try: output = work.execute(nextinput, task=self._task) except StopHandler as sh: msg = "Controlled stop of handler for %s on %s " % (self._task, str(sh)) self.logger.error(msg) nextinput = Result( task=self._task, result='StopHandler exception received, controlled stop') break #exit normally. Worker will not notice there was an error except TaskWorkerException as twe: self.logger.debug(str(traceback.format_exc()) ) #print the stacktrace only in debug mode self.removeTaskLogHandler(taskhandler) raise WorkerHandlerException( str(twe) ) #TaskWorker error, do not add traceback to the error propagated to the REST except Exception as exc: msg = "Problem handling %s because of %s failure, traceback follows\n" % ( self._task['tm_taskname'], str(exc)) msg += str(traceback.format_exc()) self.logger.error(msg) self.removeTaskLogHandler(taskhandler) raise WorkerHandlerException( msg) #Errors not foreseen. Print everything! finally: #upload logfile of the task to the crabcache logpath = 'logs/tasks/%s/%s.log' % (self._task['tm_username'], self._task['tm_taskname']) if os.path.isfile( logpath ) and 'user_proxy' in self._task: #the user proxy might not be there if myproxy retrieval failed cacheurldict = { 'endpoint': self._task['tm_cache_url'], 'cert': self._task['user_proxy'], 'key': self._task['user_proxy'] } try: ufc = UserFileCache(cacheurldict) logfilename = self._task[ 'tm_taskname'] + '_TaskWorker.log' ufc.uploadLog(logpath, logfilename) except HTTPException as hte: msg = ( "Failed to upload the logfile to %s for task %s. More details in the http headers and body:\n%s\n%s" % (self._task['tm_cache_url'], self._task['tm_taskname'], hte.headers, hte.result)) self.logger.error(msg) except Exception: msg = "Unknown error while uploading the logfile for task %s" % self._task[ 'tm_taskname'] self.logger.exception(msg) t1 = time.time() self.logger.info("Finished %s on %s in %d seconds" % (str(work), self._task['tm_taskname'], t1 - t0)) try: nextinput = output.result except AttributeError: nextinput = output self.removeTaskLogHandler(taskhandler) return nextinput
def setUp(self): """ Setup for unit tests """ self.ufc = UserFileCache()
def executeAction(self, nextinput, work): """ Execute an action and deal with the error handling and upload of the tasklogfile to the crabcache """ try: output = work.execute(nextinput, task=self._task, tempDir=self.tempDir) except TapeDatasetException as tde: raise TapeDatasetException(str(tde)) except TaskWorkerException as twe: self.logger.debug(str(traceback.format_exc()) ) #print the stacktrace only in debug mode raise WorkerHandlerException( str(twe), retry=twe.retry ) #TaskWorker error, do not add traceback to the error propagated to the REST except Exception as exc: msg = "Problem handling %s because of %s failure, traceback follows\n" % ( self.taskname, str(exc)) msg += str(traceback.format_exc()) self.logger.error(msg) raise WorkerHandlerException( msg) #Errors not foreseen. Print everything! finally: #TODO: we need to do that also in Worker.py otherwise some messages might only be in the TW file but not in the crabcache. logpath = self.config.TaskWorker.logsDir + '/tasks/%s/%s.log' % ( self._task['tm_username'], self.taskname) if os.path.isfile( logpath ) and 'user_proxy' in self._task: #the user proxy might not be there if myproxy retrieval failed cacheurldict = { 'endpoint': self._task['tm_cache_url'], 'cert': self._task['user_proxy'], 'key': self._task['user_proxy'] } if 'S3' in self._task['tm_cache_url'].upper(): # use S3 try: uploadToS3(crabserver=self.crabserver, objecttype='twlog', filepath=logpath, taskname=self.taskname, logger=self.logger) except Exception as e: msg = 'Failed to upload logfile to S3 for task %s. ' % self.taskname msg += 'Details:\n%s' % str(e) self.logger.error(msg) else: # use old crabcache try: ufc = UserFileCache(cacheurldict) logfilename = self.taskname + '_TaskWorker.log' ufc.uploadLog(logpath, logfilename) except HTTPException as hte: msg = "Failed to upload the logfile to %s for task %s. More details in the http headers and body:\n%s\n%s" % ( self._task['tm_cache_url'], self.taskname, hte.headers, hte.result) self.logger.error(msg) except Exception: #pylint: disable=broad-except msg = "Unknown error while uploading the logfile for task %s" % self.taskname self.logger.exception( msg) #upload logfile of the task to the crabcache return output
def _execute(self, resthost, resturi, config, task): # setup logger if not self.logger: self.logger = logging.getLogger(__name__) handler = logging.StreamHandler(sys.stdout) formatter = logging.Formatter( "%(asctime)s:%(levelname)s:%(module)s %(message)s") handler.setFormatter(formatter) self.logger.addHandler(handler) self.logger.setLevel(logging.DEBUG) else: # do not use BaseRecurringAction logger but create a new logger # which writes to config.TaskWorker.logsDir/taks/recurring/TapeRecallStatus_YYMMDD-HHMM.log self.logger = logging.getLogger('TapeRecallStatus') logDir = config.TaskWorker.logsDir + '/tasks/recurring/' if not os.path.exists(logDir): os.makedirs(logDir) timeStamp = time.strftime('%y%m%d-%H%M', time.localtime()) logFile = 'TapeRecallStatus_' + timeStamp + '.log' handler = logging.FileHandler(logDir + logFile) formatter = logging.Formatter( '%(asctime)s:%(levelname)s:%(module)s:%(message)s') handler.setFormatter(formatter) self.logger.addHandler(handler) mw = MasterWorker(config, logWarning=False, logDebug=False, sequential=True, console=False, name='masterForTapeRecall') tapeRecallStatus = 'TAPERECALL' self.logger.info("Retrieving %s tasks", tapeRecallStatus) recallingTasks = mw.getWork(limit=999999, getstatus=tapeRecallStatus, ignoreTWName=True) if len(recallingTasks) > 0: self.logger.info("Retrieved a total of %d %s tasks", len(recallingTasks), tapeRecallStatus) for recallingTask in recallingTasks: taskName = recallingTask['tm_taskname'] self.logger.info("Working on task %s", taskName) reqId = recallingTask['tm_DDM_reqid'] if not reqId: self.logger.debug( "tm_DDM_reqid' is not defined for task %s, skipping such task", taskName) continue server = HTTPRequests(resthost, config.TaskWorker.cmscert, config.TaskWorker.cmskey, retry=20, logger=self.logger) if (time.time() - getTimeFromTaskname(str(taskName)) > MAX_DAYS_FOR_TAPERECALL * 24 * 60 * 60): self.logger.info( "Task %s is older than %d days, setting its status to FAILED", taskName, MAX_DAYS_FOR_TAPERECALL) msg = "The disk replica request (ID: %d) for the input dataset did not complete in %d days." % ( reqId, MAX_DAYS_FOR_TAPERECALL) failTask(taskName, server, resturi, msg, self.logger, 'FAILED') continue mpl = MyProxyLogon(config=config, server=server, resturi=resturi, myproxylen=self.pollingTime) user_proxy = True try: mpl.execute(task=recallingTask ) # this adds 'user_proxy' to recallingTask except TaskWorkerException as twe: user_proxy = False self.logger.exception(twe) # Make sure the task sandbox in the crabcache is not deleted until the tape recall is completed if user_proxy: from WMCore.Services.UserFileCache.UserFileCache import UserFileCache ufc = UserFileCache({ 'cert': recallingTask['user_proxy'], 'key': recallingTask['user_proxy'], 'endpoint': recallingTask['tm_cache_url'], "pycurl": True }) sandbox = recallingTask['tm_user_sandbox'].replace( ".tar.gz", "") debugFiles = recallingTask['tm_debug_files'].replace( ".tar.gz", "") sandboxPath = os.path.join("/tmp", sandbox) debugFilesPath = os.path.join("/tmp", debugFiles) try: ufc.download(sandbox, sandboxPath, recallingTask['tm_username']) ufc.download(debugFiles, debugFilesPath, recallingTask['tm_username']) self.logger.info( "Successfully touched input and debug sandboxes (%s and %s) of task %s (frontend: %s) using the '%s' username (request_id = %d).", sandbox, debugFiles, taskName, recallingTask['tm_cache_url'], recallingTask['tm_username'], reqId) except Exception as ex: self.logger.info("The CRAB3 server backend could not download the input and/or debug sandbox (%s and/or %s) of task %s from the frontend (%s) using the '%s' username (request_id = %d)."+\ " This could be a temporary glitch, will try again in next occurrence of the recurring action."+\ " Error reason:\n%s", sandbox, debugFiles, taskName, recallingTask['tm_cache_url'], recallingTask['tm_username'], reqId, str(ex)) finally: if os.path.exists(sandboxPath): os.remove(sandboxPath) if os.path.exists(debugFilesPath): os.remove(debugFilesPath) ddmRequest = statusRequest(reqId, config.TaskWorker.DDMServer, config.TaskWorker.cmscert, config.TaskWorker.cmskey, verbose=False) # The query above returns a JSON with a format {"result": "OK", "message": "Request found", "data": [{"request_id": 14, "site": <site>, "item": [<list of blocks>], "group": "AnalysisOps", "n": 1, "status": "new", "first_request": "2018-02-26 23:25:41", "last_request": "2018-02-26 23:25:41", "request_count": 1}]} self.logger.info( "Contacted %s using %s and %s for request_id = %d, got:\n%s", config.TaskWorker.DDMServer, config.TaskWorker.cmscert, config.TaskWorker.cmskey, reqId, ddmRequest) if ddmRequest["message"] == "Request found": status = ddmRequest["data"][0]["status"] if status == "completed": # possible values: new, activated, updated, completed, rejected, cancelled self.logger.info( "Request %d is completed, setting status of task %s to NEW", reqId, taskName) mw.updateWork(taskName, recallingTask['tm_task_command'], 'NEW') # Delete all task warnings (the tapeRecallStatus added a dataset warning which is no longer valid now) if user_proxy: mpl.deleteWarnings(recallingTask['user_proxy'], taskName) elif status == "rejected": msg = "The DDM request (ID: %d) has been rejected with this reason: %s" % ( reqId, ddmRequest["data"][0]["reason"]) self.logger.info( msg + "\nSetting status of task %s to FAILED", taskName) failTask(taskName, server, resturi, msg, self.logger, 'FAILED') else: msg = "DDM request_id %d not found. Please report to experts" % reqId self.logger.info(msg) if user_proxy: mpl.uploadWarning(msg, recallingTask['user_proxy'], taskName) else: self.logger.info("No %s task retrieved.", tapeRecallStatus)
def uploadlogfile(logger, proxyfilename, logfilename = None, logpath = None, instance = 'prod', serverurl = None, username = None): ## WMCore dependencies. Moved here to minimize dependencies in the bootstrap script from WMCore.Services.UserFileCache.UserFileCache import UserFileCache doupload = True if logfilename == None: logfilename = str(time.strftime("%Y-%m-%d_%H%M%S"))+'_crab.log' logger.info('Fetching user enviroment to log file') try: cmd = 'env' logger.debug('Running env command') pipe = subprocess.Popen(cmd, stdout = subprocess.PIPE, stderr = subprocess.PIPE, shell = True) stdout, stderr = pipe.communicate() logger.debug('\n\n\nUSER ENVIROMENT\n%s' % stdout) except Exception as se: logger.debug('Failed to get the user env\nException message: %s' % (se)) if logpath != None: if not os.path.exists(logpath): doupload = False logger.debug('%sError%s: %s does not exist' %(colors.RED, colors.NORMAL, logpath)) else: if os.path.exists(str(os.getcwd()) + '/crab.log'): logpath = str(os.getcwd())+'/crab.log' else: logger.debug('%sError%s: Failed to find crab.log in current directory %s' % (colors.RED, colors.NORMAL, str(os.getcwd()))) if serverurl == None and instance in SERVICE_INSTANCES.keys(): serverurl = SERVICE_INSTANCES[instance] elif not instance in SERVICE_INSTANCES.keys() and serverurl != None: instance = 'private' elif not instance in SERVICE_INSTANCES.keys() and serverurl == None: logger.debug('%sError%s: serverurl is None' % (colors.RED, colors.NORMAL)) doupload = False if proxyfilename == None: logger.debug('No proxy was given') doupload = False baseurl = getUrl(instance = instance , resource = 'info') if doupload: cacheurl = server_info('backendurls', serverurl, proxyfilename, baseurl) cacheurl = cacheurl['cacheSSL'] cacheurldict = {'endpoint': cacheurl} ufc = UserFileCache(cacheurldict) logger.debug("cacheURL: %s\nLog file name: %s" % (cacheurl, logfilename)) logger.info("Uploading log file...") ufc.uploadLog(logpath, logfilename) logger.info("%sSuccess%s: Log file uploaded successfully." % (colors.GREEN, colors.NORMAL)) logfileurl = cacheurl + '/logfile?name='+str(logfilename) if not username: username = getUsernameFromSiteDB_wrapped(logger, quiet = True) if username: logfileurl += '&username='******'Failed to upload the log file') logfileurl = False return logfileurl