예제 #1
0
    def __call__(self):
        if self.options.task is None:
            return CommandResult(2001, 'ERROR: Task option is required')

        server = HTTPRequests(self.cachedinfo['Server'] + ':' + str(self.cachedinfo['Port']))

        self.logger.debug('Looking up detailed postmortem of task %s' % self.cachedinfo['RequestName'])
        dictresult, postmortem, reason = server.get(self.uri + self.cachedinfo['RequestName'])

        if postmortem != 200:
            msg = "Problem retrieving postmortem:\ninput:%s\noutput:%s\nreason:%s" % (str(self.cachedinfo['RequestName']), str(dictresult), str(reason))
            return CommandResult(1, msg)

        for workflow in dictresult['errors']:
            self.logger.info("#%i %s" % (workflow['subOrder'], workflow['request']))
            if self.options.verbose or self.options.outputfile:
                self.printVerbose(workflow['details'], self.options.outputfile, os.path.join(self.requestarea, 'results', 'jobFailures.log'))
            else:
                self.logger.debug("   Aggregating job failures")
                groupederrs = self.aggregateFailures(workflow['details'])
                if not groupederrs:
                    self.logger.info("   No failures")
                    continue
                self.logger.info("   List of failures and jobs per each failure: (one job could have more then one failure, one per each step)")
                for hkey in groupederrs:
                    ## removing duplicates and sort
                    joberrs = list(set(groupederrs[hkey]['jobs']))
                    joberrs.sort()
                    self.logger.info('     %s jobs failed with error "%s"' %(len(joberrs), groupederrs[hkey]['error']))
                    self.logger.info('       (%s)'  %(', '.join([ str(jobid[0]) for jobid in joberrs])) )

        return CommandResult(0, None)
예제 #2
0
    def __call__(self):

        server = HTTPRequests(self.serverurl, self.options.proxyfile if self.options.proxyfile else self.proxyfilename)

        self.logger.debug('Looking type for task %s' % self.cachedinfo['RequestName'])
        dictresult, status, reason = server.get(self.uri, data = {'workflow': self.cachedinfo['RequestName'], 'subresource': 'type'})
        self.logger.debug('Task type %s' % dictresult['result'][0])
        return dictresult['result'][0]
예제 #3
0
def server_info(subresource, server, proxyfilename):
    """
    Get relevant information about the server
    """

    server = HTTPRequests(server, proxyfilename)

    dictresult, status, reason = server.get('/crabserver/dev/info', {'subresource' : subresource})

    return dictresult['result'][0]
예제 #4
0
    def __call__(self):
        server = HTTPRequests(self.serverurl, self.proxyfilename)

        self.logger.debug('Looking up report for task %s' %
                          self.cachedinfo['RequestName'])
        dictresult, status, reason = server.get(
            self.uri,
            data={
                'workflow': self.cachedinfo['RequestName'],
                'subresource': 'report'
            })

        self.logger.debug("Result: %s" % dictresult)

        if status != 200:
            msg = "Problem retrieving report:\ninput:%s\noutput:%s\nreason:%s" % (
                str(self.cachedinfo['RequestName']), str(dictresult),
                str(reason))
            raise RESTCommunicationException(msg)

        runlumiLists = map(lambda x: literal_eval(x['runlumi']),
                           dictresult['result'][0]['runsAndLumis'].values())
        #convert lumi lists from strings to integers
        for runlumi in runlumiLists:
            for run in runlumi:
                runlumi[run] = map(int, runlumi[run])
        analyzed, diff = BasicJobType.mergeLumis(
            runlumiLists, dictresult['result'][0]['lumiMask'])
        numFiles = len(
            reduce(
                set().union,
                map(lambda x: literal_eval(x['parents']),
                    dictresult['result'][0]['runsAndLumis'].values())))
        self.logger.info("%d files have been read" % numFiles)
        self.logger.info("%d events have been read" % sum(
            map(lambda x: x['events'],
                dictresult['result'][0]['runsAndLumis'].values())))

        if self.outdir:
            jsonFileDir = self.outdir
        else:
            jsonFileDir = os.path.join(self.requestarea, 'results')
        if analyzed:
            with open(os.path.join(jsonFileDir, 'analyzed.json'),
                      'w') as jsonFile:
                json.dump(diff, os.path.join(jsonFile))
                jsonFile.write("\n")
                self.logger.info("Analyzed lumi written to %s/analyzed.json" %
                                 jsonFileDir)
        if diff:
            with open(os.path.join(jsonFileDir, 'diff.json'), 'w') as jsonFile:
                json.dump(diff, jsonFile)
                jsonFile.write("\n")
                self.logger.info("Not Analyzed lumi written to %s/diff.json" %
                                 jsonFileDir)
예제 #5
0
def server_info(subresource, server, proxyfilename):
    """
    Get relevant information about the server
    """

    server = HTTPRequests(server, proxyfilename)

    dictresult, status, reason = server.get('/crabserver/dev/info',
                                            {'subresource': subresource})

    return dictresult['result'][0]
예제 #6
0
    def __call__(self, **argv):
        #Setting default destination if -o is not provided
        if not self.dest:
            self.dest = os.path.join(self.requestarea, 'results')

        #Creating the destination directory if necessary
        if not os.path.exists(self.dest):
            self.logger.debug("Creating directory %s " % self.dest)
            os.makedirs(self.dest)
        elif not os.path.isdir(self.dest):
            raise ConfigurationException('Destination directory is a file')

        self.logger.info("Setting the destination directory to %s " %
                         self.dest)

        #Retrieving output files location from the server
        self.logger.debug('Retrieving locations for task %s' %
                          self.cachedinfo['RequestName'])
        inputlist = [('workflow', self.cachedinfo['RequestName'])]
        inputlist.extend(list(argv.iteritems()))
        if getattr(self.options, 'quantity', None):
            self.logger.debug('Retrieving %s file locations' %
                              self.options.quantity)
            inputlist.append(('limit', self.options.quantity))
        if getattr(self.options, 'jobids', None):
            self.logger.debug('Retrieving jobs %s' % self.options.jobids)
            inputlist.extend(self.options.jobids)
        server = HTTPRequests(self.serverurl, self.proxyfilename)
        dictresult, status, reason = server.get(self.uri, data=inputlist)
        self.logger.debug('Server result: %s' % dictresult)
        dictresult = self.processServerResult(dictresult)

        if status != 200:
            msg = "Problem retrieving information from the server:\ninput:%s\noutput:%s\nreason:%s" % (
                str(inputlist), str(dictresult), str(reason))
            raise ConfigurationException(msg)

        totalfiles = len(dictresult['result'])
        cpresults = []
        #        for workflow in dictresult['result']: TODO re-enable this when we will have resubmissions
        workflow = dictresult[
            'result']  #TODO assigning workflow to dictresult. for the moment we have only one wf
        arglist = ['-d', self.dest, '-i', workflow]
        if self.options.skipProxy:
            arglist.append('-p')
        if len(workflow) > 0:
            self.logger.info("Retrieving %s files" % totalfiles)
            copyoutput = remote_copy(self.logger, arglist)
            copyoutput()

        if totalfiles == 0:
            self.logger.info("No files to retrieve")
예제 #7
0
    def __call__(self):
        server = HTTPRequests(self.serverurl, self.proxyfilename)

        self.logger.debug('Looking up detailed status of task %s' %
                          self.cachedinfo['RequestName'])
        dictresult, status, reason = server.get(
            self.uri, data={'workflow': self.cachedinfo['RequestName']})
        dictresult = dictresult['result'][0]  #take just the significant part

        if status != 200:
            msg = "Problem retrieving status:\ninput:%s\noutput:%s\nreason:%s" % (
                str(self.cachedinfo['RequestName']), str(dictresult),
                str(reason))
            raise RESTCommunicationException(msg)

        self.logger.debug(
            dictresult
        )  #should be something like {u'result': [[123, u'ciao'], [456, u'ciao']]}

        self.logger.info("Task name:\t\t\t%s" % self.cachedinfo['RequestName'])
        self.logger.info("Task status:\t\t\t%s" % dictresult['status'])

        #Print the url of the panda monitor
        if dictresult['taskFailureMsg']:
            self.logger.error(
                "%sError during task injection:%s\t%s" %
                (colors.RED, colors.NORMAL, dictresult['taskFailureMsg']))
        elif dictresult['jobSetID']:
            p = Proxy({'logger': self.logger})
            username = urllib.quote(p.getUserName())
            self.logger.info(
                "Panda url:\t\t\thttp://panda.cern.ch/server/pandamon/query?job=*&jobsetID=%s&user=%s"
                % (dictresult['jobSetID'], username))

        if dictresult['jobdefErrors']:
            self.logger.error("%sSubmission partially failed:%s\t%s jobgroup not submittet out of %s:" % (colors.RED, colors.NORMAL,\
                                                            dictresult['failedJobdefs'], dictresult['totalJobdefs']))
            for error in dictresult['jobdefErrors']:
                self.logger.info("\t%s" % error)

        #Print information about jobs
        states = dictresult['jobsPerStatus']
        total = sum(states[st] for st in states)
        frmt = ''
        for status in states:
            frmt += status + ' %s\t' % self._percentageString(
                states[status], total)
        if frmt:
            self.logger.info('Details:\t\t\t%s' % frmt)
예제 #8
0
    def __call__(self):
        server = HTTPRequests(self.serverurl, self.proxyfilename)

        self.logger.debug("Looking up detailed status of task %s" % self.cachedinfo["RequestName"])
        dictresult, status, reason = server.get(self.uri, data={"workflow": self.cachedinfo["RequestName"]})
        dictresult = dictresult["result"][0]  # take just the significant part

        if status != 200:
            msg = "Problem retrieving status:\ninput:%s\noutput:%s\nreason:%s" % (
                str(self.cachedinfo["RequestName"]),
                str(dictresult),
                str(reason),
            )
            raise RESTCommunicationException(msg)

        self.logger.debug(dictresult)  # should be something like {u'result': [[123, u'ciao'], [456, u'ciao']]}

        self.logger.info("Task name:\t\t\t%s" % self.cachedinfo["RequestName"])
        self.logger.info("Task status:\t\t\t%s" % dictresult["status"])

        # Print the url of the panda monitor
        if dictresult["taskFailureMsg"]:
            self.logger.error(
                "%sError during task injection:%s\t%s" % (colors.RED, colors.NORMAL, dictresult["taskFailureMsg"])
            )
        elif dictresult["jobSetID"]:
            p = Proxy({"logger": self.logger})
            username = urllib.quote(p.getUserName())
            self.logger.info(
                "Panda url:\t\t\thttp://panda.cern.ch/server/pandamon/query?job=*&jobsetID=%s&user=%s"
                % (dictresult["jobSetID"], username)
            )

        if dictresult["jobdefErrors"]:
            self.logger.error(
                "%sSubmission partially failed:%s\t%s jobgroup not submittet out of %s:"
                % (colors.RED, colors.NORMAL, dictresult["failedJobdefs"], dictresult["totalJobdefs"])
            )
            for error in dictresult["jobdefErrors"]:
                self.logger.info("\t%s" % error)

        # Print information about jobs
        states = dictresult["jobsPerStatus"]
        total = sum(states[st] for st in states)
        frmt = ""
        for status in states:
            frmt += status + " %s\t" % self._percentageString(states[status], total)
        if frmt:
            self.logger.info("Details:\t\t\t%s" % frmt)
예제 #9
0
    def __call__(self):

        server = HTTPRequests(
            self.serverurl, self.options.proxyfile
            if self.options.proxyfile else self.proxyfilename)

        self.logger.debug('Looking type for task %s' %
                          self.cachedinfo['RequestName'])
        dictresult, status, reason = server.get(
            self.uri,
            data={
                'workflow': self.cachedinfo['RequestName'],
                'subresource': 'type'
            })
        self.logger.debug('Task type %s' % dictresult['result'][0])
        return dictresult['result'][0]
예제 #10
0
    def __call__(self, **argv):
        #Setting default destination if -o is not provided
        if not self.dest:
            self.dest = os.path.join(self.requestarea, 'results')

        #Creating the destination directory if necessary
        if not os.path.exists( self.dest ):
            self.logger.debug("Creating directory %s " % self.dest)
            os.makedirs( self.dest )
        elif not os.path.isdir( self.dest ):
            raise ConfigurationException('Destination directory is a file')

        self.logger.info("Setting the destination directory to %s " % self.dest )

        #Retrieving output files location from the server
        self.logger.debug('Retrieving locations for task %s' % self.cachedinfo['RequestName'] )
        inputlist =  [ ('workflow', self.cachedinfo['RequestName']) ]
        inputlist.extend(list(argv.iteritems()))
        if getattr(self.options, 'quantity', None):
            self.logger.debug('Retrieving %s file locations' % self.options.quantity )
            inputlist.append( ('limit',self.options.quantity) )
        if getattr(self.options, 'jobids', None):
            self.logger.debug('Retrieving jobs %s' % self.options.jobids )
            inputlist.extend( self.options.jobids )
        server = HTTPRequests(self.serverurl, self.proxyfilename)
        dictresult, status, reason = server.get(self.uri, data = inputlist)
        self.logger.debug('Server result: %s' % dictresult )
        dictresult = self.processServerResult(dictresult)

        if status != 200:
            msg = "Problem retrieving information from the server:\ninput:%s\noutput:%s\nreason:%s" % (str(inputlist), str(dictresult), str(reason))
            raise ConfigurationException(msg)

        totalfiles = len( dictresult['result'] )
        cpresults = []
#        for workflow in dictresult['result']: TODO re-enable this when we will have resubmissions
        workflow = dictresult['result']        #TODO assigning workflow to dictresult. for the moment we have only one wf
        arglist = ['-d', self.dest, '-i', workflow]
        if self.options.skipProxy:
            arglist.append('-p')
        if len(workflow) > 0:
            self.logger.info("Retrieving %s files" % totalfiles )
            copyoutput = remote_copy( self.logger, arglist )
            copyoutput()

        if totalfiles == 0:
            self.logger.info("No files to retrieve")
예제 #11
0
    def __call__(self):
        server = HTTPRequests(self.serverurl, self.proxyfilename)

        self.logger.debug('Looking up detailed status of task %s' % self.cachedinfo['RequestName'])
        dictresult, status, reason = server.get(self.uri, data = { 'workflow' : self.cachedinfo['RequestName']})
        dictresult = dictresult['result'][0] #take just the significant part

        if status != 200:
            msg = "Problem retrieving status:\ninput:%s\noutput:%s\nreason:%s" % (str(self.cachedinfo['RequestName']), str(dictresult), str(reason))
            raise RESTCommunicationException(msg)

        self.logger.debug(dictresult) #should be something like {u'result': [[123, u'ciao'], [456, u'ciao']]}

        self.logger.info("Task name:\t\t\t%s" % self.cachedinfo['RequestName'])
        self.logger.info("Task status:\t\t\t%s" % dictresult['status'])

        def logJDefErr(jdef):
            """Printing job def failures if any"""
            if jdef['jobdefErrors']:
                self.logger.error("%sFailed to inject %s\t%s out of %s:" %(colors.RED, colors.NORMAL,\
                                                                           jdef['failedJobdefs'], jdef['totalJobdefs']))
                for error in jdef['jobdefErrors']:
                    self.logger.info("\t%s" % error)

        #Print the url of the panda monitor
        if dictresult['taskFailureMsg']:
            self.logger.error("%sError during task injection:%s\t%s" % (colors.RED,colors.NORMAL,dictresult['taskFailureMsg']))
            # We might also have more information in the job def errors 
            logJDefErr(jdef=dictresult)
        elif dictresult['jobSetID']:
            username = urllib.quote(getUserName(self.logger))
            self.logger.info("Panda url:\t\t\thttp://panda.cern.ch/server/pandamon/query?job=*&jobsetID=%s&user=%s" % (dictresult['jobSetID'], username))
            # We have cases where the job def errors are there but we have a job def id
            logJDefErr(jdef=dictresult)

        #Print information about jobs
        states = dictresult['jobsPerStatus']
        total = sum( states[st] for st in states )
        frmt = ''
        for status in states:
            frmt += status + ' %s\t' % self._percentageString(states[status], total)
        if frmt:
            self.logger.info('Details:\t\t\t%s' % frmt)
예제 #12
0
    def __call__(self):
        server = HTTPRequests(self.serverurl, self.proxyfilename)

        self.logger.debug('Looking up report for task %s' % self.cachedinfo['RequestName'])
        dictresult, status, reason = server.get(self.uri, data = {'workflow': self.cachedinfo['RequestName'], 'subresource': 'report'})

        self.logger.debug("Result: %s" % dictresult)

        if status != 200:
            msg = "Problem retrieving report:\ninput:%s\noutput:%s\nreason:%s" % (str(self.cachedinfo['RequestName']), str(dictresult), str(reason))
            raise RESTCommunicationException(msg)
        if not dictresult['result'][0]['runsAndLumis'] :
            self.logger.info('No jobs finished yet. Report is available when jobs complete')
            return

        runlumiLists = map(lambda x: literal_eval(x['runlumi']), dictresult['result'][0]['runsAndLumis'].values())
        #convert lumi lists from strings to integers
        for runlumi in runlumiLists:
            for run in runlumi:
                runlumi[run] = map(int, runlumi[run])
        analyzed, diff = BasicJobType.mergeLumis(runlumiLists, dictresult['result'][0]['lumiMask'])
        numFiles = len(reduce(set().union, map(lambda x: literal_eval(x['parents']), dictresult['result'][0]['runsAndLumis'].values())))
        self.logger.info("%d files have been read" % numFiles)
        self.logger.info("%d events have been read" % sum(map(lambda x: x['events'], dictresult['result'][0]['runsAndLumis'].values())))

        if self.outdir:
            jsonFileDir = self.outdir
        else:
            jsonFileDir = os.path.join(self.requestarea, 'results')
        if analyzed:
            with open(os.path.join(jsonFileDir, 'analyzed.json'), 'w') as jsonFile:
                json.dump(diff, os.path.join(jsonFile))
                jsonFile.write("\n")
                self.logger.info("Analyzed lumi written to %s/analyzed.json" % jsonFileDir)
        if diff:
            with open(os.path.join(jsonFileDir, 'diff.json'), 'w') as jsonFile:
                json.dump(diff, jsonFile)
                jsonFile.write("\n")
                self.logger.info("Not Analyzed lumi written to %s/diff.json" % jsonFileDir)