def __call__(self): if self.options.task is None: return CommandResult(2001, 'ERROR: Task option is required') server = HTTPRequests(self.cachedinfo['Server'] + ':' + str(self.cachedinfo['Port'])) self.logger.debug('Looking up detailed postmortem of task %s' % self.cachedinfo['RequestName']) dictresult, postmortem, reason = server.get(self.uri + self.cachedinfo['RequestName']) if postmortem != 200: msg = "Problem retrieving postmortem:\ninput:%s\noutput:%s\nreason:%s" % (str(self.cachedinfo['RequestName']), str(dictresult), str(reason)) return CommandResult(1, msg) for workflow in dictresult['errors']: self.logger.info("#%i %s" % (workflow['subOrder'], workflow['request'])) if self.options.verbose or self.options.outputfile: self.printVerbose(workflow['details'], self.options.outputfile, os.path.join(self.requestarea, 'results', 'jobFailures.log')) else: self.logger.debug(" Aggregating job failures") groupederrs = self.aggregateFailures(workflow['details']) if not groupederrs: self.logger.info(" No failures") continue self.logger.info(" List of failures and jobs per each failure: (one job could have more then one failure, one per each step)") for hkey in groupederrs: ## removing duplicates and sort joberrs = list(set(groupederrs[hkey]['jobs'])) joberrs.sort() self.logger.info(' %s jobs failed with error "%s"' %(len(joberrs), groupederrs[hkey]['error'])) self.logger.info(' (%s)' %(', '.join([ str(jobid[0]) for jobid in joberrs])) ) return CommandResult(0, None)
def __call__(self): server = HTTPRequests(self.serverurl, self.options.proxyfile if self.options.proxyfile else self.proxyfilename) self.logger.debug('Looking type for task %s' % self.cachedinfo['RequestName']) dictresult, status, reason = server.get(self.uri, data = {'workflow': self.cachedinfo['RequestName'], 'subresource': 'type'}) self.logger.debug('Task type %s' % dictresult['result'][0]) return dictresult['result'][0]
def server_info(subresource, server, proxyfilename): """ Get relevant information about the server """ server = HTTPRequests(server, proxyfilename) dictresult, status, reason = server.get('/crabserver/dev/info', {'subresource' : subresource}) return dictresult['result'][0]
def __call__(self): server = HTTPRequests(self.serverurl, self.proxyfilename) self.logger.debug('Looking up report for task %s' % self.cachedinfo['RequestName']) dictresult, status, reason = server.get( self.uri, data={ 'workflow': self.cachedinfo['RequestName'], 'subresource': 'report' }) self.logger.debug("Result: %s" % dictresult) if status != 200: msg = "Problem retrieving report:\ninput:%s\noutput:%s\nreason:%s" % ( str(self.cachedinfo['RequestName']), str(dictresult), str(reason)) raise RESTCommunicationException(msg) runlumiLists = map(lambda x: literal_eval(x['runlumi']), dictresult['result'][0]['runsAndLumis'].values()) #convert lumi lists from strings to integers for runlumi in runlumiLists: for run in runlumi: runlumi[run] = map(int, runlumi[run]) analyzed, diff = BasicJobType.mergeLumis( runlumiLists, dictresult['result'][0]['lumiMask']) numFiles = len( reduce( set().union, map(lambda x: literal_eval(x['parents']), dictresult['result'][0]['runsAndLumis'].values()))) self.logger.info("%d files have been read" % numFiles) self.logger.info("%d events have been read" % sum( map(lambda x: x['events'], dictresult['result'][0]['runsAndLumis'].values()))) if self.outdir: jsonFileDir = self.outdir else: jsonFileDir = os.path.join(self.requestarea, 'results') if analyzed: with open(os.path.join(jsonFileDir, 'analyzed.json'), 'w') as jsonFile: json.dump(diff, os.path.join(jsonFile)) jsonFile.write("\n") self.logger.info("Analyzed lumi written to %s/analyzed.json" % jsonFileDir) if diff: with open(os.path.join(jsonFileDir, 'diff.json'), 'w') as jsonFile: json.dump(diff, jsonFile) jsonFile.write("\n") self.logger.info("Not Analyzed lumi written to %s/diff.json" % jsonFileDir)
def server_info(subresource, server, proxyfilename): """ Get relevant information about the server """ server = HTTPRequests(server, proxyfilename) dictresult, status, reason = server.get('/crabserver/dev/info', {'subresource': subresource}) return dictresult['result'][0]
def __call__(self, **argv): #Setting default destination if -o is not provided if not self.dest: self.dest = os.path.join(self.requestarea, 'results') #Creating the destination directory if necessary if not os.path.exists(self.dest): self.logger.debug("Creating directory %s " % self.dest) os.makedirs(self.dest) elif not os.path.isdir(self.dest): raise ConfigurationException('Destination directory is a file') self.logger.info("Setting the destination directory to %s " % self.dest) #Retrieving output files location from the server self.logger.debug('Retrieving locations for task %s' % self.cachedinfo['RequestName']) inputlist = [('workflow', self.cachedinfo['RequestName'])] inputlist.extend(list(argv.iteritems())) if getattr(self.options, 'quantity', None): self.logger.debug('Retrieving %s file locations' % self.options.quantity) inputlist.append(('limit', self.options.quantity)) if getattr(self.options, 'jobids', None): self.logger.debug('Retrieving jobs %s' % self.options.jobids) inputlist.extend(self.options.jobids) server = HTTPRequests(self.serverurl, self.proxyfilename) dictresult, status, reason = server.get(self.uri, data=inputlist) self.logger.debug('Server result: %s' % dictresult) dictresult = self.processServerResult(dictresult) if status != 200: msg = "Problem retrieving information from the server:\ninput:%s\noutput:%s\nreason:%s" % ( str(inputlist), str(dictresult), str(reason)) raise ConfigurationException(msg) totalfiles = len(dictresult['result']) cpresults = [] # for workflow in dictresult['result']: TODO re-enable this when we will have resubmissions workflow = dictresult[ 'result'] #TODO assigning workflow to dictresult. for the moment we have only one wf arglist = ['-d', self.dest, '-i', workflow] if self.options.skipProxy: arglist.append('-p') if len(workflow) > 0: self.logger.info("Retrieving %s files" % totalfiles) copyoutput = remote_copy(self.logger, arglist) copyoutput() if totalfiles == 0: self.logger.info("No files to retrieve")
def __call__(self): server = HTTPRequests(self.serverurl, self.proxyfilename) self.logger.debug('Looking up detailed status of task %s' % self.cachedinfo['RequestName']) dictresult, status, reason = server.get( self.uri, data={'workflow': self.cachedinfo['RequestName']}) dictresult = dictresult['result'][0] #take just the significant part if status != 200: msg = "Problem retrieving status:\ninput:%s\noutput:%s\nreason:%s" % ( str(self.cachedinfo['RequestName']), str(dictresult), str(reason)) raise RESTCommunicationException(msg) self.logger.debug( dictresult ) #should be something like {u'result': [[123, u'ciao'], [456, u'ciao']]} self.logger.info("Task name:\t\t\t%s" % self.cachedinfo['RequestName']) self.logger.info("Task status:\t\t\t%s" % dictresult['status']) #Print the url of the panda monitor if dictresult['taskFailureMsg']: self.logger.error( "%sError during task injection:%s\t%s" % (colors.RED, colors.NORMAL, dictresult['taskFailureMsg'])) elif dictresult['jobSetID']: p = Proxy({'logger': self.logger}) username = urllib.quote(p.getUserName()) self.logger.info( "Panda url:\t\t\thttp://panda.cern.ch/server/pandamon/query?job=*&jobsetID=%s&user=%s" % (dictresult['jobSetID'], username)) if dictresult['jobdefErrors']: self.logger.error("%sSubmission partially failed:%s\t%s jobgroup not submittet out of %s:" % (colors.RED, colors.NORMAL,\ dictresult['failedJobdefs'], dictresult['totalJobdefs'])) for error in dictresult['jobdefErrors']: self.logger.info("\t%s" % error) #Print information about jobs states = dictresult['jobsPerStatus'] total = sum(states[st] for st in states) frmt = '' for status in states: frmt += status + ' %s\t' % self._percentageString( states[status], total) if frmt: self.logger.info('Details:\t\t\t%s' % frmt)
def __call__(self): server = HTTPRequests(self.serverurl, self.proxyfilename) self.logger.debug("Looking up detailed status of task %s" % self.cachedinfo["RequestName"]) dictresult, status, reason = server.get(self.uri, data={"workflow": self.cachedinfo["RequestName"]}) dictresult = dictresult["result"][0] # take just the significant part if status != 200: msg = "Problem retrieving status:\ninput:%s\noutput:%s\nreason:%s" % ( str(self.cachedinfo["RequestName"]), str(dictresult), str(reason), ) raise RESTCommunicationException(msg) self.logger.debug(dictresult) # should be something like {u'result': [[123, u'ciao'], [456, u'ciao']]} self.logger.info("Task name:\t\t\t%s" % self.cachedinfo["RequestName"]) self.logger.info("Task status:\t\t\t%s" % dictresult["status"]) # Print the url of the panda monitor if dictresult["taskFailureMsg"]: self.logger.error( "%sError during task injection:%s\t%s" % (colors.RED, colors.NORMAL, dictresult["taskFailureMsg"]) ) elif dictresult["jobSetID"]: p = Proxy({"logger": self.logger}) username = urllib.quote(p.getUserName()) self.logger.info( "Panda url:\t\t\thttp://panda.cern.ch/server/pandamon/query?job=*&jobsetID=%s&user=%s" % (dictresult["jobSetID"], username) ) if dictresult["jobdefErrors"]: self.logger.error( "%sSubmission partially failed:%s\t%s jobgroup not submittet out of %s:" % (colors.RED, colors.NORMAL, dictresult["failedJobdefs"], dictresult["totalJobdefs"]) ) for error in dictresult["jobdefErrors"]: self.logger.info("\t%s" % error) # Print information about jobs states = dictresult["jobsPerStatus"] total = sum(states[st] for st in states) frmt = "" for status in states: frmt += status + " %s\t" % self._percentageString(states[status], total) if frmt: self.logger.info("Details:\t\t\t%s" % frmt)
def __call__(self): server = HTTPRequests( self.serverurl, self.options.proxyfile if self.options.proxyfile else self.proxyfilename) self.logger.debug('Looking type for task %s' % self.cachedinfo['RequestName']) dictresult, status, reason = server.get( self.uri, data={ 'workflow': self.cachedinfo['RequestName'], 'subresource': 'type' }) self.logger.debug('Task type %s' % dictresult['result'][0]) return dictresult['result'][0]
def __call__(self, **argv): #Setting default destination if -o is not provided if not self.dest: self.dest = os.path.join(self.requestarea, 'results') #Creating the destination directory if necessary if not os.path.exists( self.dest ): self.logger.debug("Creating directory %s " % self.dest) os.makedirs( self.dest ) elif not os.path.isdir( self.dest ): raise ConfigurationException('Destination directory is a file') self.logger.info("Setting the destination directory to %s " % self.dest ) #Retrieving output files location from the server self.logger.debug('Retrieving locations for task %s' % self.cachedinfo['RequestName'] ) inputlist = [ ('workflow', self.cachedinfo['RequestName']) ] inputlist.extend(list(argv.iteritems())) if getattr(self.options, 'quantity', None): self.logger.debug('Retrieving %s file locations' % self.options.quantity ) inputlist.append( ('limit',self.options.quantity) ) if getattr(self.options, 'jobids', None): self.logger.debug('Retrieving jobs %s' % self.options.jobids ) inputlist.extend( self.options.jobids ) server = HTTPRequests(self.serverurl, self.proxyfilename) dictresult, status, reason = server.get(self.uri, data = inputlist) self.logger.debug('Server result: %s' % dictresult ) dictresult = self.processServerResult(dictresult) if status != 200: msg = "Problem retrieving information from the server:\ninput:%s\noutput:%s\nreason:%s" % (str(inputlist), str(dictresult), str(reason)) raise ConfigurationException(msg) totalfiles = len( dictresult['result'] ) cpresults = [] # for workflow in dictresult['result']: TODO re-enable this when we will have resubmissions workflow = dictresult['result'] #TODO assigning workflow to dictresult. for the moment we have only one wf arglist = ['-d', self.dest, '-i', workflow] if self.options.skipProxy: arglist.append('-p') if len(workflow) > 0: self.logger.info("Retrieving %s files" % totalfiles ) copyoutput = remote_copy( self.logger, arglist ) copyoutput() if totalfiles == 0: self.logger.info("No files to retrieve")
def __call__(self): server = HTTPRequests(self.serverurl, self.proxyfilename) self.logger.debug('Looking up detailed status of task %s' % self.cachedinfo['RequestName']) dictresult, status, reason = server.get(self.uri, data = { 'workflow' : self.cachedinfo['RequestName']}) dictresult = dictresult['result'][0] #take just the significant part if status != 200: msg = "Problem retrieving status:\ninput:%s\noutput:%s\nreason:%s" % (str(self.cachedinfo['RequestName']), str(dictresult), str(reason)) raise RESTCommunicationException(msg) self.logger.debug(dictresult) #should be something like {u'result': [[123, u'ciao'], [456, u'ciao']]} self.logger.info("Task name:\t\t\t%s" % self.cachedinfo['RequestName']) self.logger.info("Task status:\t\t\t%s" % dictresult['status']) def logJDefErr(jdef): """Printing job def failures if any""" if jdef['jobdefErrors']: self.logger.error("%sFailed to inject %s\t%s out of %s:" %(colors.RED, colors.NORMAL,\ jdef['failedJobdefs'], jdef['totalJobdefs'])) for error in jdef['jobdefErrors']: self.logger.info("\t%s" % error) #Print the url of the panda monitor if dictresult['taskFailureMsg']: self.logger.error("%sError during task injection:%s\t%s" % (colors.RED,colors.NORMAL,dictresult['taskFailureMsg'])) # We might also have more information in the job def errors logJDefErr(jdef=dictresult) elif dictresult['jobSetID']: username = urllib.quote(getUserName(self.logger)) self.logger.info("Panda url:\t\t\thttp://panda.cern.ch/server/pandamon/query?job=*&jobsetID=%s&user=%s" % (dictresult['jobSetID'], username)) # We have cases where the job def errors are there but we have a job def id logJDefErr(jdef=dictresult) #Print information about jobs states = dictresult['jobsPerStatus'] total = sum( states[st] for st in states ) frmt = '' for status in states: frmt += status + ' %s\t' % self._percentageString(states[status], total) if frmt: self.logger.info('Details:\t\t\t%s' % frmt)
def __call__(self): server = HTTPRequests(self.serverurl, self.proxyfilename) self.logger.debug('Looking up report for task %s' % self.cachedinfo['RequestName']) dictresult, status, reason = server.get(self.uri, data = {'workflow': self.cachedinfo['RequestName'], 'subresource': 'report'}) self.logger.debug("Result: %s" % dictresult) if status != 200: msg = "Problem retrieving report:\ninput:%s\noutput:%s\nreason:%s" % (str(self.cachedinfo['RequestName']), str(dictresult), str(reason)) raise RESTCommunicationException(msg) if not dictresult['result'][0]['runsAndLumis'] : self.logger.info('No jobs finished yet. Report is available when jobs complete') return runlumiLists = map(lambda x: literal_eval(x['runlumi']), dictresult['result'][0]['runsAndLumis'].values()) #convert lumi lists from strings to integers for runlumi in runlumiLists: for run in runlumi: runlumi[run] = map(int, runlumi[run]) analyzed, diff = BasicJobType.mergeLumis(runlumiLists, dictresult['result'][0]['lumiMask']) numFiles = len(reduce(set().union, map(lambda x: literal_eval(x['parents']), dictresult['result'][0]['runsAndLumis'].values()))) self.logger.info("%d files have been read" % numFiles) self.logger.info("%d events have been read" % sum(map(lambda x: x['events'], dictresult['result'][0]['runsAndLumis'].values()))) if self.outdir: jsonFileDir = self.outdir else: jsonFileDir = os.path.join(self.requestarea, 'results') if analyzed: with open(os.path.join(jsonFileDir, 'analyzed.json'), 'w') as jsonFile: json.dump(diff, os.path.join(jsonFile)) jsonFile.write("\n") self.logger.info("Analyzed lumi written to %s/analyzed.json" % jsonFileDir) if diff: with open(os.path.join(jsonFileDir, 'diff.json'), 'w') as jsonFile: json.dump(diff, jsonFile) jsonFile.write("\n") self.logger.info("Not Analyzed lumi written to %s/diff.json" % jsonFileDir)