def getInputFiles(self): """ Get the InputFiles.tar.gz and extract the necessary files """ taskname = self.cachedinfo['RequestName'] #Get task status from the task DB self.logger.debug("Getting status from he DB") uri = self.getUrl(self.instance, resource = 'task') serverFactory = CRABClient.Emulator.getEmulator('rest') server = serverFactory(self.serverurl, self.proxyfilename, self.proxyfilename, version = __version__) crabDBInfo, _, _ = server.get(uri, data = {'subresource': 'search', 'workflow': taskname}) status = getColumn(crabDBInfo, 'tm_task_status') self.destination = getColumn(crabDBInfo, 'tm_asyncdest') inputsFilename = os.path.join(os.getcwd(), 'InputFiles.tar.gz') if status == 'UPLOADED': filecacheurl = getColumn(crabDBInfo, 'tm_cache_url') ufc = CRABClient.Emulator.getEmulator('ufc')({'endpoint' : filecacheurl, "pycurl": True}) self.logger.debug("Downloading and extracting 'dry-run-sandbox.tar.gz' from %s" % filecacheurl) ufc.downloadLog('dry-run-sandbox.tar.gz', output=os.path.join(os.getcwd(), 'dry-run-sandbox.tar.gz')) with tarfile.open('dry-run-sandbox.tar.gz') as tf: tf.extractall() elif status == 'SUBMITTED': webdir = getProxiedWebDir(taskname, self.serverurl, uri, self.proxyfilename, self.logger.debug) if not webdir: webdir = getColumn(crabDBInfo, 'tm_user_webdir') self.logger.debug("Downloading 'InputFiles.tar.gz' from %s" % webdir) getFileFromURL(webdir + '/InputFiles.tar.gz', inputsFilename, self.proxyfilename) else: raise ClientException('Can only execute jobs from tasks in status SUBMITTED or UPLOADED. Current status is %s' % status) for name in [inputsFilename, 'CMSRunAnalysis.tar.gz', 'sandbox.tar.gz']: with tarfile.open(name) as tf: tf.extractall()
def __call__(self): if self.options.short: taskname = self.cachedinfo['RequestName'] inputlist = {'subresource': 'webdir', 'workflow': taskname} serverFactory = CRABClient.Emulator.getEmulator('rest') server = serverFactory(self.serverurl, self.proxyfilename, self.proxyfilename, version=__version__) uri = self.getUrl(self.instance, resource = 'task') webdir = getProxiedWebDir(taskname, self.serverurl, uri, self.proxyfilename, self.logger.debug) if not webdir: dictresult, status, reason = server.get(uri, data = inputlist) webdir = dictresult['result'][0] self.logger.info('Server result: %s' % webdir) if status != 200: msg = "Problem retrieving information from the server:\ninput:%s\noutput:%s\nreason:%s" % (str(inputlist), str(dictresult), str(reason)) raise RESTCommunicationException(msg) self.setDestination() self.logger.info("Setting the destination to %s " % self.dest) failed, success = self.retrieveShortLogs(webdir, self.proxyfilename) if failed: msg = "%sError%s: Failed to retrieve the following files: %s" % (colors.RED,colors.NORMAL,failed) self.logger.info(msg) else: self.logger.info("%sSuccess%s: All files successfully retrieved." % (colors.GREEN,colors.NORMAL)) returndict = {'success': success, 'failed': failed} else: returndict = getcommand.__call__(self, subresource = 'logs') if ('success' in returndict and not returndict['success']) or \ ('failed' in returndict and returndict['failed']): msg = "You can use the --short option to retrieve a short version of the log files from the Grid scheduler." self.logger.info(msg) return returndict
def saveProxiedWebdir(ad): """ The function queries the REST interface to get the proxied webdir and sets a classad so that we report this to the dashboard isntead of the regular URL The proxied_url (if exist) is written to a file named proxied_webdir so that prejobs can read it and report to dashboard. If the url does not exist (i.e.: schedd not at CERN), the file is not written and we report the usual webdir See https://github.com/dmwm/CRABServer/issues/4883 """ # Get the proxied webdir from the REST itnerface task = ad['CRAB_ReqName'] host = ad['CRAB_RestHost'] uri = ad['CRAB_RestURInoAPI'] + '/task' cert = ad['X509UserProxy'] res = getProxiedWebDir(task, host, uri, cert, logFunction = printLog) # We need to use a file to communicate this to the prejob. I tried things like: # htcondor.Schedd().edit([dagJobId], 'CRAB_UserWebDirPrx', ad.lookup('CRAB_UserWebDir')) # but the prejob read the classads from the file, not querying the schedd.and we can't update # the classad file since it's owned by user condor if res: with open("proxied_webdir", "w") as fd: fd.write(res) else: printLog("Cannot get proxied webdir from the server. Maybe the schedd does not have one in the REST configuration?") return 1 return 0
def getTaskDict(self): #getting information about the task inputlist = {'subresource':'search', 'workflow': self.config.JobType.copyCatTaskname} serverFactory = CRABClient.Emulator.getEmulator('rest') serverhost = SERVICE_INSTANCES.get(self.config.JobType.copyCatInstance) server = serverFactory(serverhost, self.proxyfilename, self.proxyfilename, version=__version__) uri = getUrl(self.config.JobType.copyCatInstance, resource='task') dictresult, dummyStatus, dummyReason = server.get(uri, data=inputlist) webdir = getProxiedWebDir(self.config.JobType.copyCatTaskname, serverhost, uri, self.proxyfilename, self.logger.debug) if not webdir: webdir = getColumn(dictresult, 'tm_user_webdir') return dictresult, webdir
def __call__(self): if self.options.short: taskname = self.cachedinfo['RequestName'] inputlist = {'subresource': 'webdir', 'workflow': taskname} serverFactory = CRABClient.Emulator.getEmulator('rest') server = serverFactory(self.serverurl, self.proxyfilename, self.proxyfilename, version=__version__) uri = self.getUrl(self.instance, resource='task') webdir = getProxiedWebDir(taskname, self.serverurl, uri, self.proxyfilename, self.logger.debug) if not webdir: dictresult, status, reason = server.get(uri, data=inputlist) webdir = dictresult['result'][0] self.logger.info('Server result: %s' % webdir) if status != 200: msg = "Problem retrieving information from the server:\ninput:%s\noutput:%s\nreason:%s" % ( str(inputlist), str(dictresult), str(reason)) raise RESTCommunicationException(msg) self.setDestination() self.logger.info("Setting the destination to %s " % self.dest) failed, success = self.retrieveShortLogs(webdir, self.proxyfilename) if failed: msg = "%sError%s: Failed to retrieve the following files: %s" % ( colors.RED, colors.NORMAL, failed) self.logger.info(msg) else: self.logger.info( "%sSuccess%s: All files successfully retrieved." % (colors.GREEN, colors.NORMAL)) returndict = {'success': success, 'failed': failed} else: # Different from the old getlog code: set 'logs2' as subresource so that 'getcommand' uses the new logic. returndict = getcommand.__call__(self, subresource='logs2') if ('success' in returndict and not returndict['success']) or \ ('failed' in returndict and returndict['failed']): msg = "You can use the --short option to retrieve a short version of the log files from the Grid scheduler." self.logger.info(msg) return returndict
def saveProxiedWebdir(crabserver, ad): """ The function queries the REST interface to get the proxied webdir and sets a classad so that we report this to the dashboard instead of the regular URL. The webdir (if exists) is written to a file named 'webdir' so that prejobs can read it and report to dashboard. If the proxied URL does not exist (i.e.: schedd not at CERN), we report the usual webdir. See https://github.com/dmwm/CRABServer/issues/4883 """ # Get the proxied webdir from the REST itnerface task = ad['CRAB_ReqName'] webDir_adName = 'CRAB_WebDirURL' ad[webDir_adName] = ad['CRAB_localWebDirURL'] proxied_webDir = getProxiedWebDir(crabserver=crabserver, task=task, logFunction=printLog) if proxied_webDir: # Prefer the proxied webDir to the non-proxied one ad[webDir_adName] = str(proxied_webDir) if ad[webDir_adName]: # This condor_edit is required because in the REST interface we look for the webdir if the DB upload failed (or in general if we use the "old logic") # See https://github.com/dmwm/CRABServer/blob/3.3.1507.rc8/src/python/CRABInterface/HTCondorDataWorkflow.py#L398 dagJobId = '%d.%d' % (ad['ClusterId'], ad['ProcId']) try: htcondor.Schedd().edit([dagJobId], webDir_adName, '{0}'.format(ad.lookup(webDir_adName))) except RuntimeError as reerror: printLog(str(reerror)) # We need to use a file to communicate this to the prejob. I tried to read the corresponding ClassAd from the preJob like: # htcondor.Schedd().xquery(requirements="ClusterId == %d && ProcId == %d" % (self.task_ad['ClusterId'], self.task_ad['ProcId']), projection=[webDir_adName]).next().get(webDir_adName) # but it is too heavy of an operation with HTCondor v8.8.3 with open("webdir", "w") as fd: fd.write(ad[webDir_adName]) else: printLog( "Cannot get proxied webdir from the server. Maybe the schedd does not have one in the REST configuration?" ) return 1 return 0
def getTaskDict(self): #getting information about the task inputlist = { 'subresource': 'search', 'workflow': self.config.JobType.copyCatTaskname } serverFactory = CRABClient.Emulator.getEmulator('rest') serverhost = SERVICE_INSTANCES.get(self.config.JobType.copyCatInstance) server = serverFactory(serverhost, self.proxyfilename, self.proxyfilename, version=__version__) dictresult, dummyStatus, dummyReason = server.get(api='task', data=inputlist) webdir = getProxiedWebDir(self.config.JobType.copyCatTaskname, serverhost, uri, self.proxyfilename, self.logger.debug) if not webdir: webdir = getColumn(dictresult, 'tm_user_webdir') return dictresult, webdir
def __call__(self): # Get all of the columns from the database for a certain task taskname = self.cachedinfo['RequestName'] uri = self.getUrl(self.instance, resource='task') serverFactory = CRABClient.Emulator.getEmulator('rest') server = serverFactory(self.serverurl, self.proxyfilename, self.proxyfilename, version=__version__) crabDBInfo, _, _ = server.get(uri, data={ 'subresource': 'search', 'workflow': taskname }) self.logger.debug("Got information from server oracle database: %s", crabDBInfo) # Until the task lands on a schedd we'll show the status from the DB combinedStatus = getColumn(crabDBInfo, 'tm_task_status') user = getColumn(crabDBInfo, 'tm_username') webdir = getColumn(crabDBInfo, 'tm_user_webdir') rootDagId = getColumn(crabDBInfo, 'clusterid') #that's the condor id from the TW asourl = getColumn(crabDBInfo, 'tm_asourl') asodb = getColumn(crabDBInfo, 'tm_asodb') publicationEnabled = True if getColumn( crabDBInfo, 'tm_publication') == 'T' else False #Print information from the database self.printTaskInfo(crabDBInfo, user) if not rootDagId: failureMsg = "The task has not been submitted to the Grid scheduler yet. Not printing job information." self.logger.debug(failureMsg) return self.makeStatusReturnDict(crabDBInfo, combinedStatus, statusFailureMsg=failureMsg) self.logger.debug( "The CRAB server submitted your task to the Grid scheduler (cluster ID: %s)" % rootDagId) if not webdir: # Query condor through the server for information about this task uri = self.getUrl(self.instance, resource='workflow') params = {'subresource': 'taskads', 'workflow': taskname} res = server.get(uri, data=params)[0]['result'][0] # JobStatus 5 = Held if res['JobStatus'] == '5' and 'DagmanHoldReason' in res: # If we didn't find a webdir in the DB and the DAG is held, # the task bootstrapping failed before or during the webdir # upload and the reason should be printed. failureMsg = "The task failed to bootstrap on the Grid scheduler." failureMsg += " Please send an e-mail to %s." % (FEEDBACKMAIL) failureMsg += "\nHold reason: %s" % (res['DagmanHoldReason']) self.logger.info(failureMsg) combinedStatus = "FAILED" else: # if the dag is submitted and the webdir is not there we have to wait that AdjustSites run # and upload the webdir location to the server self.logger.info( "Waiting for the Grid scheduler to bootstrap your task") failureMsg = "Schedd has not reported back the webdir (yet)" self.logger.debug(failureMsg) combinedStatus = "UNKNOWN" return self.makeStatusReturnDict(crabDBInfo, combinedStatus, statusFailureMsg=failureMsg) self.logger.debug("Webdir is located at %s", webdir) proxiedWebDir = getProxiedWebDir(taskname, self.serverurl, uri, self.proxyfilename, self.logger.debug) if not proxiedWebDir: msg = "Failed to get the proxied webdir from CRABServer. " msg += "\nWill fall back to the regular webdir url for file downloads " msg += "but will likely fail if the client is located outside CERN." self.logger.debug(msg) proxiedWebDir = webdir self.logger.debug("Proxied webdir is located at %s", proxiedWebDir) # Download status_cache file url = proxiedWebDir + "/status_cache" self.logger.debug("Retrieving 'status_cache' file from %s", url) statusCacheInfo = None try: statusCacheData = getDataFromURL(url, self.proxyfilename) except HTTPException as ce: self.logger.info( "Waiting for the Grid scheduler to report back the status of your task" ) failureMsg = "Cannot retrieve the status_cache file. Maybe the task process has not run yet?" failureMsg += " Got:\n%s" % ce self.logger.error(failureMsg) logging.getLogger("CRAB3").exception(ce) combinedStatus = "UNKNOWN" return self.makeStatusReturnDict(crabDBInfo, combinedStatus, statusFailureMsg=failureMsg) else: # We skip first two lines of the file because they contain the checkpoint locations # for the job_log / fjr_parse_results files and are used by the status caching script. # Load the job_report summary statusCacheInfo = literal_eval(statusCacheData.split('\n')[2]) self.logger.debug("Got information from status cache file: %s", statusCacheInfo) # If the task is already on the grid, show the dagman status combinedStatus = dagStatus = self.printDAGStatus( crabDBInfo, statusCacheInfo) shortResult = self.printShort(statusCacheInfo) pubStatus = self.printPublication(publicationEnabled, shortResult['jobsPerStatus'], asourl, asodb, taskname, user, crabDBInfo) self.printErrors(statusCacheInfo) if self.options.summary: self.printSummary(statusCacheInfo) if self.options.long or self.options.sort: # If user correctly passed some jobid CSVs to use in the status --long, self.jobids # will be a list of strings already parsed from the input by the validateOptions() if self.jobids: self.checkUserJobids(statusCacheInfo, self.jobids) sortdict = self.printLong(statusCacheInfo, self.jobids, quiet=(not self.options.long)) if self.options.sort: self.printSort(sortdict, self.options.sort) if self.options.json: self.logger.info(json.dumps(statusCacheInfo)) statusDict = self.makeStatusReturnDict(crabDBInfo, combinedStatus, dagStatus, '', shortResult, statusCacheInfo, pubStatus, proxiedWebDir) return statusDict