예제 #1
0
    def processAndStoreJobIds(self):
        """
        Call the status command to check that the jobids passed by the user are in a valid
        state to retrieve files. Otherwise, if no jobids are passed by the user, populate the
        list with all possible jobids.

        Also store some information which is used later when deciding the correct pfn.
        """
        statusDict = getMutedStatusInfo(self.logger)
        jobList = statusDict['jobList']
        if not jobList:
            msg = "Cannot retrieve job list from the status command."
            raise ClientException(msg)

        transferringIds = [x[1] for x in jobList if x[0] in ['transferring', 'cooloff', 'held']]
        finishedIds = [x[1] for x in jobList if x[0] in ['finished', 'failed', 'transferred']]
        possibleJobIds = transferringIds + finishedIds

        if self.options.jobids:
            for jobid in self.options.jobids:
                if not str(jobid[1]) in possibleJobIds:
                    raise ConfigurationException("The job with id %s is not in a valid state to retrieve output files" % jobid[1])
        else:
            ## If the user does not give us jobids, set them to all possible ids.
            self.options.jobids = []
            for jobid in possibleJobIds:
                self.options.jobids.append(('jobids', jobid))

        if len(self.options.jobids) > 500:
            msg = "You requested to process files for %d jobs." % len(self.options.jobids)
            msg += "\nThe limit is 500. Please use the '--jobids'"
            msg += "option to select up to 500 jobs."
            raise ConfigurationException(msg)

        self.transferringIds = transferringIds
예제 #2
0
    def __call__(self):

        statusDict = getMutedStatusInfo(self.logger)
        jobList = statusDict['jobList']

        if self.splitting == 'Automatic' and statusDict['dbStatus'] == 'KILLED':
            msg  = "%sError%s:" % (colors.RED, colors.NORMAL)
            msg += " Tasks using automatic splitting cannot be resubmitted after a kill."
            self.logger.info(msg)
            return None

        if not jobList:
            msg  = "%sError%s:" % (colors.RED, colors.NORMAL)
            msg += " Status information is unavailable, will not proceed with the resubmission."
            msg += " Try again a few minutes later if the task has just been submitted."
            self.logger.info(msg)
            return None

        publicationEnabled = statusDict['publicationEnabled']
        jobsPerStatus = statusDict['jobsPerStatus']

        if self.options.publication:
            if not publicationEnabled:
                msg = "Publication was disabled for this task. Therefore, "
                msg += "there are no publications to resubmit."
                self.logger.info(msg)
                return None
            else:
                if "finished" not in jobsPerStatus:
                    msg = "No files found to publish"
                    self.logger.info(msg)
                    return None

        self.jobids = self.processJobIds(jobList)

        configreq = self.getQueryParams()
        self.logger.info("Sending resubmit request to the server.")
        self.logger.debug("Submitting %s " % str(configreq))
        configreq_encoded = self._encodeRequest(configreq)
        self.logger.debug("Encoded resubmit request: %s" % (configreq_encoded))

        dictresult, _, _ = self.server.post(self.uri, data = configreq_encoded)
        self.logger.debug("Result: %s" % (dictresult))
        self.logger.info("Resubmit request sent to the server.")
        if dictresult['result'][0]['result'] != 'ok':
            msg = "Server responded with: '%s'" % (dictresult['result'][0]['result'])
            self.logger.info(msg)
            returndict = {'status': 'FAILED'}
        else:
            if not self.options.wait:
                msg  = "Please use 'crab status' to check how the resubmission process proceeds."
                msg += "\nNotice it may take a couple of minutes for the resubmission to get fully processed."
                self.logger.info(msg)
            else:
                targetTaskStatus = 'SUBMITTED'
                checkStatusLoop(self.logger, self.server, self.uri, self.cachedinfo['RequestName'], targetTaskStatus, self.name)
            returndict = {'status': 'SUCCESS'}

        return returndict
예제 #3
0
    def collectReportData(self):
        """
        Gather information from the server, status2, DBS and files in the webdir that is needed for the report.
        """
        reportData = {}

        serverFactory = CRABClient.Emulator.getEmulator('rest')
        server = serverFactory(self.serverurl, self.proxyfilename, self.proxyfilename, version=__version__)

        self.logger.debug('Looking up report for task %s' % self.cachedinfo['RequestName'])

        # Query server for information from the taskdb, intput/output file metadata from metadatadb
        dictresult, status, _ = server.get(self.uri, data = {'workflow': self.cachedinfo['RequestName'], 'subresource': 'report2'})

        self.logger.debug("Result: %s" % dictresult)
        self.logger.info("Running crab status first to fetch necessary information.")
        # Get job statuses
        statusDict = getMutedStatusInfo(self.logger)

        if not statusDict['jobList']:
            # No point in continuing if the job list is empty.
            # Can happen when the task is very new / old and the files necessary for status2
            # are unavailable.
            return None
        reportData['jobList'] = [(s, j) for (s, j) in statusDict['jobList'] if not j.startswith('0-')]

        reportData['runsAndLumis'] = {}

        # Transform status joblist (tuples of job status and job id) into a dictionary
        jobStatusDict = {}
        for status, jobId in reportData['jobList']:
            jobStatusDict[jobId] = status

        # Filter output/input file metadata by finished job state
        if dictresult['result'][0]['runsAndLumis']:
            for jobId in jobStatusDict:
                if jobStatusDict.get(jobId) in ['finished']:
                    reportData['runsAndLumis'][jobId] = dictresult['result'][0]['runsAndLumis'][jobId]

        reportData['publication'] = statusDict['publicationEnabled']
        userWebDirURL = statusDict['proxiedWebDir']
        jobs = [j for (s, j) in statusDict['jobList']]

        reportData['lumisToProcess'] = self.getLumisToProcess(userWebDirURL, jobs, self.cachedinfo['RequestName'])
        reportData['inputDataset'] = statusDict['inputDataset']

        inputDatasetInfo = self.getInputDatasetLumis(reportData['inputDataset'], userWebDirURL)['inputDataset']
        reportData['inputDatasetLumis'] = inputDatasetInfo['lumis']
        reportData['inputDatasetDuplicateLumis'] = inputDatasetInfo['duplicateLumis']
        reportData['outputDatasets'] = dictresult['result'][0]['taskDBInfo']['outputDatasets']

        if reportData['publication']:
            reportData['outputDatasetsInfo'] = self.getDBSPublicationInfo(reportData['outputDatasets'])

        return reportData
예제 #4
0
파일: report.py 프로젝트: ddaina/CRABClient
    def collectReportData(self):
        """
        Gather information from the server, status2, DBS and files in the webdir that is needed for the report.
        """
        reportData = {}

        server = self.crabserver

        self.logger.debug('Looking up report for task %s' %
                          self.cachedinfo['RequestName'])

        # Query server for information from the taskdb, intput/output file metadata from metadatadb
        dictresult, status, _ = server.get(api=self.defaultApi,
                                           data={
                                               'workflow':
                                               self.cachedinfo['RequestName'],
                                               'subresource':
                                               'report2'
                                           })

        self.logger.debug("Result: %s" % dictresult)
        self.logger.info(
            "Running crab status first to fetch necessary information.")
        # Get job statuses
        statusDict = getMutedStatusInfo(self.logger)

        if not statusDict['jobList']:
            # No point in continuing if the job list is empty.
            # Can happen when the task is very new / old and the files necessary for status2
            # are unavailable.
            return None
        reportData['jobList'] = [(s, j) for (s, j) in statusDict['jobList']
                                 if not j.startswith('0-')]

        reportData['runsAndLumis'] = {}

        # Transform status joblist (tuples of job status and job id) into a dictionary
        jobStatusDict = {}
        for status, jobId in reportData['jobList']:
            jobStatusDict[jobId] = status

        # Filter output/input file metadata by finished job state
        if dictresult['result'][0]['runsAndLumis']:
            for jobId in jobStatusDict:
                if jobStatusDict.get(jobId) in ['finished']:
                    reportData['runsAndLumis'][jobId] = dictresult['result'][
                        0]['runsAndLumis'][jobId]

        reportData['publication'] = statusDict['publicationEnabled']
        userWebDirURL = statusDict['proxiedWebDir']
        jobs = [j for (s, j) in statusDict['jobList']]

        reportData['lumisToProcess'] = self.getLumisToProcess(
            userWebDirURL, jobs, self.cachedinfo['RequestName'])
        reportData['inputDataset'] = statusDict['inputDataset']

        inputDatasetInfo = self.getInputDatasetLumis(
            reportData['inputDataset'], userWebDirURL)['inputDataset']
        reportData['inputDatasetLumis'] = inputDatasetInfo['lumis']
        reportData['inputDatasetDuplicateLumis'] = inputDatasetInfo[
            'duplicateLumis']
        reportData['outputDatasets'] = dictresult['result'][0]['taskDBInfo'][
            'outputDatasets']

        if reportData['publication']:
            repDGO = self.getDBSPublicationInfo_viaDasGoclient(
                reportData['outputDatasets'])
            reportData['outputDatasetsInfo'] = repDGO

        return reportData
예제 #5
0
    def collectReportData(self):
        """
        Gather information from the server, status2, DBS and files in the webdir that is needed for the report.
        """
        reportData = {}

        serverFactory = CRABClient.Emulator.getEmulator('rest')
        server = serverFactory(self.serverurl,
                               self.proxyfilename,
                               self.proxyfilename,
                               version=__version__)

        self.logger.debug('Looking up report for task %s' %
                          self.cachedinfo['RequestName'])

        # Query server for information from the taskdb, intput/output file metadata from metadatadb
        dictresult, status, reason = server.get(
            self.uri,
            data={
                'workflow': self.cachedinfo['RequestName'],
                'subresource': 'report2'
            })

        self.logger.debug("Result: %s" % dictresult)
        self.logger.info(
            "Running crab status2 first to fetch necessary information.")
        # Get job statuses
        crabDBInfo, shortResult = getMutedStatusInfo(self.logger)

        if not shortResult:
            # No point in continuing if the job list is empty.
            # Can happen when the task is very new / old and the files necessary for status2
            # are unavailable.
            return None
        reportData['jobList'] = shortResult['jobList']

        reportData['runsAndLumis'] = {}

        # Transform status joblist (tuples of job status and job id) into a dictionary
        jobStatusDict = {}
        for status, jobId in reportData['jobList']:
            jobStatusDict[jobId] = status

        # Filter output/input file metadata by finished job state
        for jobId in jobStatusDict:
            if jobStatusDict.get(jobId) in ['finished']:
                reportData['runsAndLumis'][jobId] = dictresult['result'][0][
                    'runsAndLumis'][jobId]

        reportData['publication'] = True if getColumn(
            crabDBInfo, 'tm_publication') == "T" else False
        userWebDirURL = getColumn(crabDBInfo, 'tm_user_webdir')
        numJobs = len(shortResult['jobList'])

        reportData['lumisToProcess'] = self.getLumisToProcess(
            userWebDirURL, numJobs, self.cachedinfo['RequestName'])
        reportData['inputDataset'] = getColumn(crabDBInfo, 'tm_input_dataset')

        inputDatasetInfo = self.getInputDatasetLumis(
            reportData['inputDataset'], userWebDirURL)['inputDataset']
        reportData['inputDatasetLumis'] = inputDatasetInfo['lumis']
        reportData['inputDatasetDuplicateLumis'] = inputDatasetInfo[
            'duplicateLumis']
        reportData['outputDatasets'] = dictresult['result'][0]['taskDBInfo'][
            'outputDatasets']

        if reportData['publication']:
            reportData['outputDatasetsInfo'] = self.getDBSPublicationInfo(
                reportData['outputDatasets'])

        return reportData