Esempio n. 1
0
    def __call__(self):  # pylint: disable=arguments-differ
        if self.options.short:
            taskname = self.cachedinfo['RequestName']
            inputlist = {'subresource': 'search', 'workflow': taskname}
            server = self.crabserver
            webdir = getProxiedWebDir(crabserver=self.crabserver, task=taskname, logFunction=self.logger.debug)
            dictresult, status, reason = server.get(api='task', data=inputlist)
            if not webdir:
                webdir = dictresult['result'][0]
                self.logger.info('Server result: %s' % webdir)
                if status != 200:
                    msg = "Problem retrieving information from the server:\ninput:%s\noutput:%s\nreason:%s" % (str(inputlist), str(dictresult), str(reason))
                    raise RESTCommunicationException(msg)
            splitting = getColumn(dictresult, 'tm_split_algo')
            if getattr(self.options, 'jobids', None):
                self.options.jobids = validateJobids(self.options.jobids, splitting != 'Automatic')
            self.setDestination()
            self.logger.info("Setting the destination to %s " % self.dest)
            failed, success = self.retrieveShortLogs(webdir, self.proxyfilename)
            if failed:
                msg = "%sError%s: Failed to retrieve the following files: %s" % (colors.RED, colors.NORMAL, failed)
                self.logger.info(msg)
            else:
                self.logger.info("%sSuccess%s: All files successfully retrieved." % (colors.GREEN, colors.NORMAL))
            returndict = {'success': success, 'failed': failed}
        else:
            # Different from the old getlog code: set 'logs2' as subresource so that 'getcommand' uses the new logic.
            returndict = getcommand.__call__(self, subresource='logs2')
            if ('success' in returndict and not returndict['success']) or \
               ('failed' in returndict and returndict['failed']):
                msg = "You can use the --short option to retrieve a short version of the log files from the Grid scheduler."
                self.logger.info(msg)

        return returndict
Esempio n. 2
0
    def __call__(self):
        server = self.crabserver

        msg = "Continuing submission of task %s" % (
            self.cachedinfo['RequestName'])
        self.logger.debug(msg)

        request = {
            'workflow': self.cachedinfo['RequestName'],
            'subresource': 'proceed'
        }

        self.logger.info("Sending the request to the server")
        self.logger.debug("Submitting %s " % str(request))
        result, status, reason = server.post(api=self.defaultApi,
                                             data=urlencode(request))
        self.logger.debug("Result: %s" % (result))
        if status != 200:
            msg = "Problem continuing task submission:\ninput:%s\noutput:%s\nreason:%s" \
                  % (str(request), str(result), str(reason))
            raise RESTCommunicationException(msg)
        msg = "Task continuation request successfully sent to the CRAB3 server"
        if result['result'][0]['result'] != 'ok':
            msg += "\nServer responded with: '%s'" % (
                result['result'][0]['result'])
            status = {'status': 'FAILED'}
        else:
            status = {'status': 'SUCCESS'}
            self.logger.info("To check task progress, use 'crab status'")
        self.logger.info(msg)

        return status
Esempio n. 3
0
    def __call__(self):
        serverFactory = CRABClient.Emulator.getEmulator('rest')
        server = serverFactory(self.serverurl,
                               self.proxyfilename,
                               self.proxyfilename,
                               version=__version__)

        self.logger.debug('Killing task %s' % self.cachedinfo['RequestName'])
        inputs = {'workflow': self.cachedinfo['RequestName']}
        if self.options.killwarning:
            inputs.update({'killwarning': b64encode(self.options.killwarning)})

        dictresult, status, reason = server.delete(
            self.uri, data=urlencode(inputs) + '&' + urlencode(self.jobids))
        self.logger.debug("Result: %s" % dictresult)

        if status != 200:
            msg = "Problem killing task %s:\ninput:%s\noutput:%s\nreason:%s" % \
                    (self.cachedinfo['RequestName'], str(self.cachedinfo['RequestName']), str(dictresult), str(reason))
            raise RESTCommunicationException(msg)

        self.logger.info("Kill request successfully sent")
        if dictresult['result'][0]['result'] != 'ok':
            resultdict = {'status': 'FAILED'}
            self.logger.info(dictresult['result'][0]['result'])
        else:
            resultdict = {'status': 'SUCCESS'}

        return resultdict
Esempio n. 4
0
    def __call__(self):
        server = self.crabserver

        self.logger.debug("Killing task %s" % self.cachedinfo['RequestName'])
        inputs = {'workflow': self.cachedinfo['RequestName']}
        if self.options.killwarning:
            inputs.update({'killwarning': b64encode(self.options.killwarning)})

        dictresult, status, reason = server.delete(api=self.defaultApi,
                                                   data=urlencode(inputs))
        self.logger.debug("Result: %s" % dictresult)

        if status != 200:
            msg = "Problem killing task %s:\ninput:%s\noutput:%s\nreason:%s" % \
                    (self.cachedinfo['RequestName'], str(self.cachedinfo['RequestName']), str(dictresult), str(reason))
            raise RESTCommunicationException(msg)

        self.logger.info("Kill request successfully sent")
        if dictresult['result'][0]['result'] != 'ok':
            resultdict = {'status': 'FAILED'}
            self.logger.info(dictresult['result'][0]['result'])
        else:
            resultdict = {'status': 'SUCCESS'}

        return resultdict
Esempio n. 5
0
    def __call__(self):
        server = HTTPRequests(self.serverurl,
                              self.proxyfilename,
                              self.proxyfilename,
                              version=__version__)
        dictresult, status, reason = server.get(self.uri,
                                                data={'timestamp': self.date})
        dictresult = dictresult['result']  #take just the significant part

        if status != 200:
            msg = "Problem retrieving tasks:\ninput:%s\noutput:%s\nreason:%s" % (
                str(self.date), str(dictresult), str(reason))
            raise RESTCommunicationException(msg)

        dictresult.sort()
        dictresult.reverse()

        if self.options.status:
            dictresult = [
                item for item in dictresult if item[1] == self.options.status
            ]

        result = [item[0:2] for item in dictresult]

        today = date.today()

        if not dictresult:
            msg = "No tasks found from %s until %s" % (self.date, today)
            if self.options.status:
                msg += " with status %s" % (self.options.status)
            self.logger.info(msg)
            return result

        msg = "\nList of tasks from %s until %s" % (self.date, today)
        if self.options.status:
            msg += " with status %s" % (self.options.status)
        self.logger.info(msg)
        msg = "Beware that STATUS here does not include information from grid jobs"
        self.logger.info(msg)
        self.logger.info('=' * 80)
        self.logger.info('NAME\t\t\t\t\t\t\t\tSTATUS')
        self.logger.info('=' * 80)
        for item in dictresult:
            name, status = item[0:2]
            self.logger.info('%s\n\t\t\t\t\t\t\t\t%s' % (name, status))
            self.logger.info('-' * 80)
        self.logger.info('\n')

        return result
Esempio n. 6
0
    def __call__(self):
        serverFactory = CRABClient.Emulator.getEmulator('rest')
        server = serverFactory(self.serverurl,
                               self.proxyfilename,
                               self.proxyfilename,
                               version=__version__)

        self.logger.debug('Looking up detailed status of task %s' %
                          self.cachedinfo['RequestName'])
        user = self.cachedinfo['RequestName'].split("_")[2].split(":")[-1]
        verbose = int(self.options.summary or self.options.long
                      or self.options.json)
        if self.options.idle:
            verbose = 2
        dictresult, status, reason = server.get(
            self.uri,
            data={
                'workflow': self.cachedinfo['RequestName'],
                'verbose': verbose
            })
        dictresult = dictresult['result'][0]  #take just the significant part

        if status != 200:
            msg = "Problem retrieving status:\ninput:%s\noutput:%s\nreason:%s" % (
                str(self.cachedinfo['RequestName']), str(dictresult),
                str(reason))
            raise RESTCommunicationException(msg)

        self.printShort(dictresult, user)

        if 'jobs' in dictresult:
            self.printPublication(dictresult)
            self.printErrors(dictresult)
            # Note several options could be combined
            if self.options.summary:
                self.printSummary(dictresult)
            if self.options.long or self.options.sort:
                sortdict = self.printLong(dictresult,
                                          quiet=(not self.options.long))
                if self.options.sort:
                    self.printSort(sortdict, self.options.sort)
            if self.options.idle:
                self.printIdle(dictresult, user)
            if self.options.json:
                self.logger.info(json.dumps(dictresult['jobs']))

        return dictresult
Esempio n. 7
0
    def __call__(self):
        if self.options.short:
            #Check if splitting is automatic
            try:
                splitting=self.cachedinfo['OriginalConfig'].Data.splitting
            except AttributeError: #Default setting is 'Automatic'
                splitting='Automatic'
            except KeyError: #crab remade task does not have 'OriginalConfig' key, need to fetch from DB
                splitting='Unknown'
            taskname = self.cachedinfo['RequestName']
            inputlist = {'subresource': 'webdir', 'workflow': taskname}
            serverFactory = CRABClient.Emulator.getEmulator('rest')
            server = serverFactory(self.serverurl, self.proxyfilename, self.proxyfilename, version=__version__)
            uri = self.getUrl(self.instance, resource = 'task')
            webdir=None
            if splitting!='Unknown':
                webdir = getProxiedWebDir(taskname, self.serverurl, uri, self.proxyfilename, self.logger.debug)
            if not webdir:
                dictresult, status, reason =  server.get(uri, data = inputlist)
                if status != 200:
                    msg = "Problem retrieving information from the server:\ninput:%s\noutput:%s\nreason:%s" % (str(inputlist), str(dictresult), str(reason))
                    raise RESTCommunicationException(msg)
                if splitting=='Unknown':
                    splitting=getColumn(dictresult,'tm_split_algo')
                webdir = dictresult['result'][0]
                self.logger.info('Server result: %s' % webdir)
            self.setDestination()
            self.logger.info("Setting the destination to %s " % self.dest)
            #check the format of jobids
            self.options.jobids = validateJobids(self.options.jobids,splitting!='Automatic')
            failed, success = self.retrieveShortLogs(webdir, self.proxyfilename)
            if failed:
                msg = "%sError%s: Failed to retrieve the following files: %s" % (colors.RED,colors.NORMAL,failed)
                self.logger.info(msg)
            else:
                self.logger.info("%sSuccess%s: All files successfully retrieved." % (colors.GREEN,colors.NORMAL))
            returndict = {'success': success, 'failed': failed}
        else:
            # Different from the old getlog code: set 'logs2' as subresource so that 'getcommand' uses the new logic.
            returndict = getcommand.__call__(self, subresource = 'logs2')
            if ('success' in returndict and not returndict['success']) or \
               ('failed'  in returndict and returndict['failed']):
                msg = "You can use the --short option to retrieve a short version of the log files from the Grid scheduler."
                self.logger.info(msg)

        return returndict
Esempio n. 8
0
    def __call__(self):
        if self.options.short:
            taskname = self.cachedinfo['RequestName']
            inputlist = {'subresource': 'webdir', 'workflow': taskname}
            serverFactory = CRABClient.Emulator.getEmulator('rest')
            server = serverFactory(self.serverurl,
                                   self.proxyfilename,
                                   self.proxyfilename,
                                   version=__version__)
            uri = self.getUrl(self.instance, resource='task')
            webdir = getProxiedWebDir(taskname, self.serverurl, uri,
                                      self.proxyfilename, self.logger.debug)
            if not webdir:
                dictresult, status, reason = server.get(uri, data=inputlist)
                webdir = dictresult['result'][0]
                self.logger.info('Server result: %s' % webdir)
                if status != 200:
                    msg = "Problem retrieving information from the server:\ninput:%s\noutput:%s\nreason:%s" % (
                        str(inputlist), str(dictresult), str(reason))
                    raise RESTCommunicationException(msg)
            self.setDestination()
            self.logger.info("Setting the destination to %s " % self.dest)
            failed, success = self.retrieveShortLogs(webdir,
                                                     self.proxyfilename)
            if failed:
                msg = "%sError%s: Failed to retrieve the following files: %s" % (
                    colors.RED, colors.NORMAL, failed)
                self.logger.info(msg)
            else:
                self.logger.info(
                    "%sSuccess%s: All files successfully retrieved." %
                    (colors.GREEN, colors.NORMAL))
            returndict = {'success': success, 'failed': failed}
        else:
            # Different from the old getlog code: set 'logs2' as subresource so that 'getcommand' uses the new logic.
            returndict = getcommand.__call__(self, subresource='logs2')
            if ('success' in returndict and not returndict['success']) or \
               ('failed'  in returndict and returndict['failed']):
                msg = "You can use the --short option to retrieve a short version of the log files from the Grid scheduler."
                self.logger.info(msg)

        return returndict
Esempio n. 9
0
    def __call__(self):

        serverFactory = CRABClient.Emulator.getEmulator('rest')
        server = serverFactory(self.serverurl, self.proxyfilename, self.proxyfilename, version = __version__)

        msg = "Requesting resubmission for failed jobs in task %s" % (self.cachedinfo['RequestName'])
        self.logger.debug(msg)

        configreq = {'workflow': self.cachedinfo['RequestName']}
        for attr_name in ['jobids', 'sitewhitelist', 'siteblacklist', 'maxjobruntime', 'maxmemory', 'numcores', 'priority']:
            attr_value = getattr(self, attr_name)
            if attr_value:
                configreq[attr_name] = attr_value

        self.logger.info("Sending the request to the server")
        self.logger.debug("Submitting %s " % str(configreq))
        ## TODO: this shouldn't be hard-coded.
        listParams = ['jobids', 'sitewhitelist', 'siteblacklist']
        configreq_encoded = self._encodeRequest(configreq, listParams)
        self.logger.debug("Encoded resubmit request: %s" % (configreq_encoded))

        dictresult, status, reason = server.post(self.uri, data = configreq_encoded)
        self.logger.debug("Result: %s" % (dictresult))
        if status != 200:
            msg = "Problem resubmitting the task to the server:\ninput:%s\noutput:%s\nreason:%s" \
                  % (str(data), str(dictresult), str(reason))
            raise RESTCommunicationException(msg)
        msg = "Resubmit request successfuly sent to the CRAB3 server."
        if dictresult['result'][0]['result'] != 'ok':
            msg += "\nServer responded with: '%s'" % (dictresult['result'][0]['result'])
            returndict = {'status': 'FAILED'}
        else:
            returndict = {'status': 'SUCCESS'}
        self.logger.info(msg)

        return returndict
Esempio n. 10
0
    def __call__(self, **argv):  # pylint: disable=arguments-differ
        ## Retrieve the transferLogs parameter from the task database.
        taskdbparam, configparam = '', ''
        if argv.get('subresource') in ['logs', 'logs2']:
            taskdbparam = 'tm_save_logs'
            configparam = "General.transferLogs"
        elif argv.get('subresource') in ['data', 'data2']:
            taskdbparam = 'tm_transfer_outputs'
            configparam = "General.transferOutputs"

        transferFlag = 'unknown'
        inputlist = {'subresource': 'search', 'workflow': self.cachedinfo['RequestName']}
        serverFactory = CRABClient.Emulator.getEmulator('rest')
        server = serverFactory(self.serverurl, self.proxyfilename, self.proxyfilename, version=__version__)
        uri = getUrl(self.instance, resource = 'task')
        dictresult, status, _ =  server.get(uri, data = inputlist)
        self.logger.debug('Server result: %s' % dictresult)
        splitting = None
        if status == 200:
            if 'desc' in dictresult and 'columns' in dictresult['desc']:
                position = dictresult['desc']['columns'].index(taskdbparam)
                transferFlag = dictresult['result'][position] #= 'T' or 'F'
                position = dictresult['desc']['columns'].index('tm_split_algo')
                splitting = dictresult['result'][position]
            else:
                self.logger.debug("Unable to locate %s in server result." % (taskdbparam))
        ## If transferFlag = False, there is nothing to retrieve.
        if transferFlag == 'F':
            msg = "No files to retrieve. Files not transferred to storage since task configuration parameter %s is False." % (configparam)
            self.logger.info(msg)
            return {'success': {}, 'failed': {}}

        ## Retrieve tm_edm_outfiles, tm_tfile_outfiles and tm_outfiles from the task database and check if they are empty.
        if argv.get('subresource') in ['data', 'data2'] and status == 200:
            if 'desc' in dictresult and 'columns' in dictresult['desc']:
                position = dictresult['desc']['columns'].index('tm_edm_outfiles')
                tm_edm_outfiles = dictresult['result'][position]
                position = dictresult['desc']['columns'].index('tm_tfile_outfiles')
                tm_tfile_outfiles = dictresult['result'][position]
                position = dictresult['desc']['columns'].index('tm_outfiles')
                tm_outfiles = dictresult['result'][position]
            if tm_edm_outfiles == '[]' and tm_tfile_outfiles == '[]' and tm_outfiles == '[]':
                msg  = "%sWarning%s:" % (colors.RED, colors.NORMAL)
                msg += " There are no output files to retrieve, because CRAB could not detect any in the CMSSW configuration"
                msg += " nor was any explicitly specified in the CRAB configuration."
                self.logger.warning(msg)

        #check the format of jobids
        if getattr(self.options, 'jobids', None):
            self.options.jobids = validateJobids(self.options.jobids, splitting != 'Automatic')

        self.processAndStoreJobIds()

        #Retrieving output files location from the server
        self.logger.debug('Retrieving locations for task %s' % self.cachedinfo['RequestName'])
        inputlist =  [('workflow', self.cachedinfo['RequestName'])]
        inputlist.extend(list(argv.iteritems()))
        if getattr(self.options, 'quantity', None):
            self.logger.debug('Retrieving %s file locations' % self.options.quantity)
            inputlist.append(('limit', self.options.quantity))
        else:
            self.logger.debug('Retrieving all file locations')
            inputlist.append(('limit', -1))
        if getattr(self.options, 'jobids', None):
            self.logger.debug('Retrieving jobs %s' % self.options.jobids)
            inputlist.extend(self.options.jobids)
        serverFactory = CRABClient.Emulator.getEmulator('rest')
        server = serverFactory(self.serverurl, self.proxyfilename, self.proxyfilename, version=__version__)
        dictresult, status, reason = server.get(self.uri, data = urllib.urlencode(inputlist))
        self.logger.debug('Server result: %s' % dictresult)

        if status != 200:
            msg = "Problem retrieving information from the server:\ninput:%s\noutput:%s\nreason:%s" % (str(inputlist), str(dictresult), str(reason))
            raise RESTCommunicationException(msg)

        totalfiles = len(dictresult['result'])
        fileInfoList = dictresult['result']

        self.insertXrootPfns(fileInfoList)

        if len(fileInfoList) > 0:
            if self.options.dump or self.options.xroot:
                self.logger.debug("Getting url info")
            else:
                self.setDestination()
                self.logger.info("Setting the destination to %s " % self.dest)
            if self.options.xroot:
                self.logger.debug("XRootD urls are requested")
                xrootlfn = ["root://cms-xrd-global.cern.ch/%s" % link['lfn'] for link in fileInfoList]
                self.logger.info("\n".join(xrootlfn))
                returndict = {'xrootd': xrootlfn}
            elif self.options.dump:
                jobid_pfn_lfn_list = sorted(map(lambda x: (x['jobid'], x['pfn'], x['lfn']), fileInfoList)) # pylint: disable=deprecated-lambda
                lastjobid = -1
                filecounter = 1
                msg = ""
                for jobid, pfn, lfn in jobid_pfn_lfn_list:
                    if jobid != lastjobid:
                        msg += "%s=== Files from job %s:" % ('\n' if lastjobid != -1 else '', jobid)
                        lastjobid = jobid
                        filecounter = 1
                    msg += "\n%d) PFN: %s" % (filecounter, pfn)
                    msg += "\n%s  LFN: %s" % (' '*(len(str(filecounter))), lfn)
                    filecounter += 1
                self.logger.info(msg)
                returndict = {'pfn': [pfn for _, pfn, _ in jobid_pfn_lfn_list], 'lfn': [lfn for _, _, lfn in jobid_pfn_lfn_list]}
            else:
                self.logger.info("Retrieving %s files" % (totalfiles))
                arglist = ['--destination', self.dest, '--input', fileInfoList, '--dir', self.options.projdir, \
                           '--proxy', self.proxyfilename, '--parallel', self.options.nparallel, '--wait', self.options.waittime, \
                           '--checksum', self.checksum, '--command', self.command]
                copyoutput = remote_copy(self.logger, arglist)
                successdict, faileddict = copyoutput()
                #need to use deepcopy because successdict and faileddict are dict that is under the a manage dict, accessed multithreadly
                returndict = {'success': copy.deepcopy(successdict) , 'failed': copy.deepcopy(faileddict)}
        if totalfiles == 0:
            self.logger.info("No files to retrieve.")
            returndict = {'success': {} , 'failed': {}}

        if transferFlag == 'unknown':
            if ('success' in returndict and not returndict['success']) and \
               ('failed'  in returndict and not returndict['failed']):
                msg = "This is normal behavior if %s = False in the task configuration." % (configparam)
                self.logger.info(msg)

        return returndict
Esempio n. 11
0
    def __call__(self, **argv):

        #Retrieving output files location from the server
        self.logger.debug('Retrieving locations for task %s' %
                          self.cachedinfo['RequestName'])
        inputlist = [('workflow', self.cachedinfo['RequestName'])]
        inputlist.extend(list(argv.iteritems()))
        if getattr(self.options, 'quantity', None):
            self.logger.debug('Retrieving %s file locations' %
                              self.options.quantity)
            inputlist.append(('limit', self.options.quantity))
        else:
            self.logger.debug('Retrieving all file locations')
            inputlist.append(('limit', -1))
        if getattr(self.options, 'jobids', None):
            self.logger.debug('Retrieving jobs %s' % self.options.jobids)
            inputlist.extend(self.options.jobids)
        serverFactory = CRABClient.Emulator.getEmulator('rest')
        server = serverFactory(self.serverurl,
                               self.proxyfilename,
                               self.proxyfilename,
                               version=__version__)
        dictresult, status, reason = server.get(self.uri, data=inputlist)
        self.logger.debug('Server result: %s' % dictresult)
        dictresult = self.processServerResult(dictresult)

        if status != 200:
            msg = "Problem retrieving information from the server:\ninput:%s\noutput:%s\nreason:%s" % (
                str(inputlist), str(dictresult), str(reason))
            raise RESTCommunicationException(msg)

        totalfiles = len(dictresult['result'])
        cpresults = []
        #        for workflow in dictresult['result']: TODO re-enable this when we will have resubmissions
        workflow = dictresult[
            'result']  #TODO assigning workflow to dictresult. for the moment we have only one wf
        if len(workflow) > 0:
            if self.options.dump or self.options.xroot:
                self.logger.debug("Getting url info")
            else:
                self.setDestination()
                self.logger.info("Setting the destination to %s " % self.dest)
            if self.options.xroot:
                self.logger.debug("XRootD urls are requested")
                xrootlfn = [
                    "root://cms-xrd-global.cern.ch/%s" % link['lfn']
                    for link in workflow
                ]
                self.logger.info("\n".join(xrootlfn))
                returndict = {'xrootd': xrootlfn}
            elif self.dump:
                jobid_pfn_lfn_list = map(
                    lambda x: (x['jobid'], x['pfn'], x['lfn']), workflow)
                jobid_pfn_lfn_list.sort()
                lastjobid = -1
                filecounter = 1
                msg = ""
                for jobid, pfn, lfn in jobid_pfn_lfn_list:
                    if jobid != lastjobid:
                        msg += "%s=== Files from job %s:" % (
                            '\n' if lastjobid != -1 else '', jobid)
                        lastjobid = jobid
                        filecounter = 1
                    msg += "\n%d) PFN: %s" % (filecounter, pfn)
                    msg += "\n%s  LFN: %s" % (' ' *
                                              (len(str(filecounter))), lfn)
                    filecounter += 1
                self.logger.info(msg)
                returndict = {
                    'pfn': [pfn for _, pfn, _ in jobid_pfn_lfn_list],
                    'lfn': [lfn for _, _, lfn in jobid_pfn_lfn_list]
                }
            else:
                self.logger.info("Retrieving %s files" % (totalfiles))
                arglist = ['--destination', self.dest, '--input', workflow, '--dir', self.options.task, \
                           '--proxy', self.proxyfilename, '--parallel', self.options.nparallel, '--wait', self.options.waittime]
                copyoutput = remote_copy(self.logger, arglist)
                successdict, faileddict = copyoutput()
                #need to use deepcopy because successdict and faileddict are dict that is under the a manage dict, accessed multithreadly
                returndict = {
                    'success': copy.deepcopy(successdict),
                    'failed': copy.deepcopy(faileddict)
                }
        if totalfiles == 0:
            ## TODO: we should use an API to retrieve from the TaskDB what are the transfer flag values for the task.
            ## If the corresponding transfer flag is False, the user should not expect to be able to retrieve the files.
            self.logger.info("No files to retrieve.")
            returndict = {'success': {}, 'failed': {}}

        return returndict
Esempio n. 12
0
    def __call__(self):
        self.logger.debug("Started submission")
        serverFactory = CRABClient.Emulator.getEmulator('rest')

        uniquerequestname = None

        self.logger.debug("Working on %s" % str(self.requestarea))

        self.configreq = {'dryrun': 1 if self.options.dryrun else 0}
        for param in parametersMapping['on-server']:
            mustbetype = getattr(types,
                                 parametersMapping['on-server'][param]['type'])
            default = parametersMapping['on-server'][param]['default']
            config_params = parametersMapping['on-server'][param]['config']
            for config_param in config_params:
                attrs = config_param.split('.')
                temp = self.configuration
                for attr in attrs:
                    temp = getattr(temp, attr, None)
                    if temp is None:
                        break
                if temp is not None:
                    self.configreq[param] = temp
                    break
                elif default is not None:
                    self.configreq[param] = default
                    temp = default
                else:
                    ## Parameter not strictly required.
                    pass
            ## Check that the requestname is of the right type.
            ## This is not checked in SubCommand.validateConfig().
            if param == 'workflow':
                if isinstance(self.requestname, mustbetype):
                    self.configreq['workflow'] = self.requestname
            ## Translate boolean flags into integers.
            elif param in ['savelogsflag', 'publication', 'publishgroupname', 'nonprodsw', 'useparent',\
                           'ignorelocality', 'saveoutput', 'oneEventMode', 'nonvaliddata', 'ignoreglobalblacklist']:
                self.configreq[param] = 1 if temp else 0
            ## Translate DBS URL aliases into DBS URLs.
            elif param in ['dbsurl', 'publishdbsurl']:
                if param == 'dbsurl':
                    dbstype = 'reader'
                elif param == 'publishdbsurl':
                    dbstype = 'writer'
                allowed_dbsurls = DBSURLS[dbstype].values()
                allowed_dbsurls_aliases = DBSURLS[dbstype].keys()
                if self.configreq[param] in allowed_dbsurls_aliases:
                    self.configreq[param] = DBSURLS[dbstype][
                        self.configreq[param]]
                elif self.configreq[param].rstrip('/') in allowed_dbsurls:
                    self.configreq[param] = self.configreq[param].rstrip('/')
            elif param == 'scriptexe' and 'scriptexe' in self.configreq:
                self.configreq[param] = os.path.basename(self.configreq[param])

        jobconfig = {}
        #get the backend URLs from the server external configuration
        serverBackendURLs = server_info('backendurls', self.serverurl,
                                        self.proxyfilename,
                                        getUrl(self.instance, resource='info'))
        #if cacheSSL is specified in the server external configuration we will use it to upload the sandbox
        filecacheurl = serverBackendURLs[
            'cacheSSL'] if 'cacheSSL' in serverBackendURLs else None
        pluginParams = [
            self.configuration, self.proxyfilename, self.logger,
            os.path.join(self.requestarea, 'inputs')
        ]
        crab_job_types = getJobTypes()
        if upper(self.configreq['jobtype']) in crab_job_types:
            plugjobtype = crab_job_types[upper(
                self.configreq['jobtype'])](*pluginParams)
            dummy_inputfiles, jobconfig = plugjobtype.run(filecacheurl)
        else:
            fullname = self.configreq['jobtype']
            basename = os.path.basename(fullname).split('.')[0]
            plugin = addPlugin(fullname)[basename]
            pluginInst = plugin(*pluginParams)
            dummy_inputfiles, jobconfig = pluginInst.run()

        if self.configreq['publication']:
            non_edm_files = jobconfig['tfileoutfiles'] + jobconfig[
                'addoutputfiles']
            if non_edm_files:
                msg = "%sWarning%s: The following output files will not be published, as they are not EDM files: %s" % (
                    colors.RED, colors.NORMAL, non_edm_files)
                self.logger.warning(msg)

        self.configreq.update(jobconfig)
        server = serverFactory(self.serverurl,
                               self.proxyfilename,
                               self.proxyfilename,
                               version=__version__)

        self.logger.info("Sending the request to the server at %s" %
                         self.serverurl)
        self.logger.debug("Submitting %s " % str(self.configreq))
        ## TODO: this shouldn't be hard-coded.
        listParams = ['addoutputfiles', 'sitewhitelist', 'siteblacklist', 'blockwhitelist', 'blockblacklist', \
                      'tfileoutfiles', 'edmoutfiles', 'runs', 'lumis', 'userfiles', 'scriptargs', 'extrajdl']
        self.configreq_encoded = self._encodeRequest(self.configreq,
                                                     listParams)
        self.logger.debug('Encoded submit request: %s' %
                          (self.configreq_encoded))

        dictresult, status, reason = server.put(self.uri,
                                                data=self.configreq_encoded)
        self.logger.debug("Result: %s" % dictresult)
        if status != 200:
            msg = "Problem sending the request:\ninput:%s\noutput:%s\nreason:%s" % (
                str(self.configreq), str(dictresult), str(reason))
            raise RESTCommunicationException(msg)
        elif 'result' in dictresult:
            uniquerequestname = dictresult["result"][0]["RequestName"]
        else:
            msg = "Problem during submission, no request ID returned:\ninput:%s\noutput:%s\nreason:%s" \
                   % (str(self.configreq), str(dictresult), str(reason))
            raise RESTCommunicationException(msg)

        tmpsplit = self.serverurl.split(':')
        createCache(self.requestarea,
                    tmpsplit[0],
                    tmpsplit[1] if len(tmpsplit) > 1 else '',
                    uniquerequestname,
                    voRole=self.voRole,
                    voGroup=self.voGroup,
                    instance=self.instance,
                    originalConfig=self.configuration)

        self.logger.info(
            "%sSuccess%s: Your task has been delivered to the %s CRAB3 server."
            % (colors.GREEN, colors.NORMAL, self.instance))
        if not (self.options.wait or self.options.dryrun):
            self.logger.info("Task name: %s" % uniquerequestname)
            projDir = os.path.join(
                getattr(self.configuration.General, 'workArea', '.'),
                self.requestname)
            self.logger.info("Project dir: %s" % projDir)
            self.logger.info(
                "Please use 'crab status -d %s' to check how the submission process proceeds.",
                projDir)
        else:
            targetTaskStatus = 'UPLOADED' if self.options.dryrun else 'SUBMITTED'
            checkStatusLoop(self.logger, server, self.uri, uniquerequestname,
                            targetTaskStatus, self.name)

        if self.options.dryrun:
            self.printDryRunResults(*self.executeTestRun(filecacheurl))

        self.logger.debug("About to return")

        return {
            'requestname': self.requestname,
            'uniquerequestname': uniquerequestname
        }
Esempio n. 13
0
    def __call__(self):

        self.logger.info('Getting the tarball hash key')
        inputlist = {
            'subresource': 'search',
            'workflow': self.cachedinfo['RequestName']
        }
        serverFactory = CRABClient.Emulator.getEmulator('rest')
        server = serverFactory(self.serverurl,
                               self.proxyfilename,
                               self.proxyfilename,
                               version=__version__)
        uri = self.getUrl(self.instance, resource='task')
        dictresult, status, reason = server.get(uri, data=inputlist)
        if status == 200:
            if 'desc' in dictresult and 'columns' in dictresult['desc']:
                position = dictresult['desc']['columns'].index(
                    'tm_user_sandbox')
                tm_user_sandbox = dictresult['result'][position]
                hashkey = tm_user_sandbox.replace(".tar.gz", "")
            else:
                self.logger.info(
                    '%sError%s: Could not find tarball or there is more than one tarball'
                    % (colors.RED, colors.NORMAL))
                raise ConfigurationException

        #checking task status

        self.logger.info('Checking task status')
        serverFactory = CRABClient.Emulator.getEmulator('rest')
        server = serverFactory(self.serverurl,
                               self.proxyfilename,
                               self.proxyfilename,
                               version=__version__)
        dictresult, status, _ = server.get(self.uri,
                                           data={
                                               'workflow':
                                               self.cachedinfo['RequestName'],
                                               'verbose':
                                               0
                                           })

        dictresult = dictresult['result'][0]  #take just the significant part

        if status != 200:
            msg = "Problem retrieving task status:\ninput: %s\noutput: %s\nreason: %s" % (
                str(self.cachedinfo['RequestName']), str(dictresult),
                str(reason))
            raise RESTCommunicationException(msg)

        self.logger.info('Task status: %s' % dictresult['status'])
        accepstate = [
            'KILLED', 'FINISHED', 'FAILED', 'KILLFAILED', 'COMPLETED'
        ]
        if dictresult['status'] not in accepstate:
            msg = ('%sError%s: Only tasks with these status can be purged: {0}'
                   .format(accepstate) % (colors.RED, colors.NORMAL))
            raise ConfigurationException(msg)

        #getting the cache url
        cacheresult = {}
        scheddresult = {}
        gsisshdict = {}
        if not self.options.scheddonly:
            baseurl = getUrl(self.instance, resource='info')
            cacheurl = server_info('backendurls', self.serverurl,
                                   self.proxyfilename, baseurl)
            cacheurl = cacheurl['cacheSSL']
            cacheurldict = {'endpoint': cacheurl, 'pycurl': True}

            ufc = UserFileCache(cacheurldict)
            self.logger.info('Tarball hashkey: %s' % hashkey)
            self.logger.info(
                'Attempting to remove task file from crab server cache')

            try:
                ufcresult = ufc.removeFile(hashkey)
            except HTTPException as re:
                if 'X-Error-Info' in re.headers and 'Not such file' in re.headers[
                        'X-Error-Info']:
                    self.logger.info(
                        '%sError%s: Failed to find task file in crab server cache; the file might have been already purged'
                        % (colors.RED, colors.NORMAL))
                    raise

            if ufcresult == '':
                self.logger.info(
                    '%sSuccess%s: Successfully removed task files from crab server cache'
                    % (colors.GREEN, colors.NORMAL))
                cacheresult = 'SUCCESS'
            else:
                self.logger.info(
                    '%sError%s: Failed to remove task files from crab server cache'
                    % (colors.RED, colors.NORMAL))
                cacheresult = 'FAILED'

        if not self.options.cacheonly:
            self.logger.info('Getting schedd address')
            baseurl = self.getUrl(self.instance, resource='info')
            try:
                scheddaddress = server_info(
                    'scheddaddress',
                    self.serverurl,
                    self.proxyfilename,
                    baseurl,
                    workflow=self.cachedinfo['RequestName'])
            except HTTPException as he:
                self.logger.info('%sError%s: Failed to get schedd address' %
                                 (colors.RED, colors.NORMAL))
                raise HTTPException(he)
            self.logger.debug('%sSuccess%s: Successfully got schedd address' %
                              (colors.GREEN, colors.NORMAL))
            self.logger.debug('Schedd address: %s' % scheddaddress)
            self.logger.info('Attempting to remove task from schedd')

            gssishrm = 'gsissh -o ConnectTimeout=60 -o PasswordAuthentication=no ' + scheddaddress + ' rm -rf ' + self.cachedinfo[
                'RequestName']
            self.logger.debug('gsissh command: %s' % gssishrm)

            delprocess = subprocess.Popen(gssishrm,
                                          stdout=subprocess.PIPE,
                                          stderr=subprocess.PIPE,
                                          shell=True)
            stdout, stderr = delprocess.communicate()
            exitcode = delprocess.returncode

            if exitcode == 0:
                self.logger.info(
                    '%sSuccess%s: Successfully removed task from scehdd' %
                    (colors.GREEN, colors.NORMAL))
                scheddresult = 'SUCCESS'
                gsisshdict = {}
            else:
                self.logger.info(
                    '%sError%s: Failed to remove task from schedd' %
                    (colors.RED, colors.NORMAL))
                scheddaddress = 'FAILED'
                self.logger.debug(
                    'gsissh stdout: %s\ngsissh stderr: %s\ngsissh exitcode: %s'
                    % (stdout, stderr, exitcode))
                gsisshdict = {
                    'stdout': stdout,
                    'stderr': stderr,
                    'exitcode': exitcode
                }

            return {
                'cacheresult': cacheresult,
                'scheddresult': scheddresult,
                'gsiresult': gsisshdict
            }
Esempio n. 14
0
    def checkStatusLoop(self, server, uniquerequestname):
        self.logger.info("Waiting for task to be processed")

        maxwaittime = 900  #in second, changed to 15 minute max wait time, the original 1 hour is too long
        starttime = currenttime = time.time()
        endtime = currenttime + maxwaittime

        startimestring = time.strftime('%Y-%m-%d %H:%M:%S',
                                       time.localtime(starttime))
        endtimestring = time.strftime('%Y-%m-%d %H:%M:%S',
                                      time.localtime(endtime))

        self.logger.debug("Start time:%s" % startimestring)
        self.logger.debug("Max wait time: %s s until : %s" %
                          (maxwaittime, endtimestring))

        #self.logger.debug('Looking up detailed status of task %s' % uniquerequestname)

        continuecheck = True
        tmpresult = None
        self.logger.info("Checking task status")

        while continuecheck:
            currenttime = time.time()
            querytimestring = time.strftime('%Y-%m-%d %H:%M:%S',
                                            time.localtime(currenttime))

            self.logger.debug('Looking up detailed status of task %s' %
                              uniquerequestname)

            dictresult, status, reason = server.get(
                self.uri, data={'workflow': uniquerequestname})
            dictresult = dictresult['result'][0]

            if status != 200:
                self.logger.info(
                    "The task has been submitted, \nImpossible to check task status now. \nPlease check again later by using: crab status -d <crab project directory>"
                )
                msg = "Problem retrieving status:\ninput:%s\noutput:%s\nreason:%s" % (
                    str(uniquerequestname), str(dictresult), str(reason))
                raise RESTCommunicationException(msg)

            self.logger.debug("Query Time:%s Task status:%s" %
                              (querytimestring, dictresult['status']))

            if dictresult['status'] != tmpresult:
                self.logger.info("Task status:%s" % dictresult['status'])
                tmpresult = dictresult['status']

                if dictresult['status'] == 'FAILED':
                    continuecheck = False
                    self.logger.info(
                        "%sError%s: The submission of your task failed. Please use 'crab status -d <crab project directory>' to get the error message"
                        % (colors.RED, colors.NORMAL))
                elif dictresult['status'] == 'SUBMITTED' or dictresult[
                        'status'] == 'UNKNOWN':  #untile the node_state file is available status is unknown
                    continuecheck = False
                    self.logger.info(
                        "%sSuccess%s: Your task has been processed and your jobs have been submitted successfully"
                        % (colors.GREEN, colors.NORMAL))
                elif dictresult['status'] in ['NEW', 'HOLDING', 'QUEUED']:
                    self.logger.info("Please wait...")
                    time.sleep(
                        30)  #the original 60 second query time is too long
                else:
                    continuecheck = False
                    self.logger.info("Please check crab.log ")
                    self.logger.debug(
                        "CRABS Status other than FAILED,SUBMITTED,NEW,HOLDING,QUEUED"
                    )

            if currenttime > endtime:
                continuecheck = False
                self.logger.info(
                    "Exceed maximum query time \n Please check again later by using: crab status -d <crab project directory>"
                )
                waittime = currenttime - starttime
                self.logger.debug("Wait time:%s" % waittime)
                break
        print '\a'  #Generate audio bell
        self.logger.debug("Ended submission process")
Esempio n. 15
0
    def __call__(self):

        serverFactory = CRABClient.Emulator.getEmulator('rest')
        server = serverFactory(self.serverurl,
                               self.proxyfilename,
                               self.proxyfilename,
                               version=__version__)

        if self.jobids:
            msg = "Requesting resubmission of jobs %s in task %s" % (
                self.jobids, self.cachedinfo['RequestName'])
        else:
            msg = "Requesting resubmission of failed jobs in task %s" % (
                self.cachedinfo['RequestName'])
        self.logger.debug(msg)

        configreq = {
            'workflow': self.cachedinfo['RequestName'],
            'subresource': 'resubmit'
        }
        for attr_name in ['jobids', 'sitewhitelist', 'siteblacklist']:
            attr_value = getattr(self, attr_name)
            ## For 'jobids', 'sitewhitelist' and 'siteblacklist', attr_value is either a list of strings or None.
            if attr_value is not None:
                configreq[attr_name] = attr_value
        for attr_name in [
                'maxjobruntime', 'maxmemory', 'numcores', 'priority'
        ]:
            attr_value = getattr(self.options, attr_name)
            ## For 'maxjobruntime', 'maxmemory', 'numcores', and 'priority', attr_value is either an integer or None.
            if attr_value is not None:
                configreq[attr_name] = attr_value
        configreq['force'] = 1 if self.options.force else 0
        configreq['publication'] = 1 if self.options.publication else 0

        self.logger.info("Sending resubmit request to the server.")
        self.logger.debug("Submitting %s " % str(configreq))
        configreq_encoded = self._encodeRequest(configreq)
        self.logger.debug("Encoded resubmit request: %s" % (configreq_encoded))

        dictresult, status, reason = server.post(self.uri,
                                                 data=configreq_encoded)
        self.logger.debug("Result: %s" % (dictresult))
        if status != 200:
            msg = "Problem resubmitting the task to the server:\ninput:%s\noutput:%s\nreason:%s" \
                  % (str(configreq_encoded), str(dictresult), str(reason))
            raise RESTCommunicationException(msg)
        self.logger.info("Resubmit request sent to the server.")
        if dictresult['result'][0]['result'] != 'ok':
            msg = "Server responded with: '%s'" % (
                dictresult['result'][0]['result'])
            self.logger.info(msg)
            returndict = {'status': 'FAILED'}
        else:
            if not self.options.wait:
                msg = "Please use 'crab status' to check how the resubmission process proceeds."
                msg += "\nNotice it may take a couple of minutes for the resubmission to get fully processed."
                self.logger.info(msg)
            else:
                targetTaskStatus = 'SUBMITTED'
                checkStatusLoop(self.logger, server, self.uri,
                                self.cachedinfo['RequestName'],
                                targetTaskStatus, self.name)
            returndict = {'status': 'SUCCESS'}

        return returndict
Esempio n. 16
0
    def __call__(self):

        self.logger.info('Getting the tarball hash key')

        tarballdir = glob.glob(self.requestarea + '/inputs/*.tgz')
        if len(tarballdir) != 1:
            self.logger.info(
                '%sError%s: Could not find tarball or there is more than one tarball'
                % (colors.RED, colors.NORMAL))
            raise ConfigurationException
        tarballdir = tarballdir[0]

        #checking task status

        self.logger.info('Checking task status')
        serverFactory = CRABClient.Emulator.getEmulator('rest')
        server = serverFactory(self.serverurl,
                               self.proxyfilename,
                               self.proxyfilename,
                               version=__version__)
        dictresult, status, _ = server.get(self.uri,
                                           data={
                                               'workflow':
                                               self.cachedinfo['RequestName'],
                                               'verbose':
                                               0
                                           })

        dictresult = dictresult['result'][0]  #take just the significant part

        if status != 200:
            msg = "Problem retrieving task status:\ninput: %s\noutput: %s\nreason: %s" % (
                str(self.cachedinfo['RequestName']), str(dictresult),
                str(reason))
            raise RESTCommunicationException(msg)

        self.logger.info('Task status: %s' % dictresult['status'])
        accepstate = [
            'KILLED', 'FINISHED', 'FAILED', 'KILLFAILED', 'COMPLETED'
        ]
        if dictresult['status'] not in accepstate:
            msg = ('%sError%s: Only tasks with these status can be purged: {0}'
                   .format(accepstate) % (colors.RED, colors.NORMAL))
            raise ConfigurationException(msg)

        #getting the cache url
        cacheresult = {}
        scheddresult = {}
        gsisshdict = {}
        if not self.options.scheddonly:
            baseurl = getUrl(self.instance, resource='info')
            cacheurl = server_info('backendurls', self.serverurl,
                                   self.proxyfilename, baseurl)
            cacheurl = cacheurl['cacheSSL']
            cacheurldict = {'endpoint': cacheurl, 'pycurl': True}

            ufc = UserFileCache(cacheurldict)
            hashkey = ufc.checksum(tarballdir)
            self.logger.info('Tarball hashkey: %s' % hashkey)
            self.logger.info(
                'Attempting to remove task file from crab server cache')

            try:
                ufcresult = ufc.removeFile(hashkey)
            except HTTPException, re:
                if re.headers.has_key(
                        'X-Error-Info'
                ) and 'Not such file' in re.headers['X-Error-Info']:
                    self.logger.info(
                        '%sError%s: Failed to find task file in crab server cache; the file might have been already purged'
                        % (colors.RED, colors.NORMAL))
                    raise HTTPException, re

            if ufcresult == '':
                self.logger.info(
                    '%sSuccess%s: Successfully removed task files from crab server cache'
                    % (colors.GREEN, colors.NORMAL))
                cacheresult = 'SUCCESS'
            else:
                self.logger.info(
                    '%sError%s: Failed to remove task files from crab server cache'
                    % (colors.RED, colors.NORMAL))
                cacheresult = 'FAILED'
Esempio n. 17
0
    def __call__(self):
        serverFactory = CRABClient.Emulator.getEmulator('rest')
        server = serverFactory(self.serverurl,
                               self.proxyfilename,
                               self.proxyfilename,
                               version=__version__)

        self.logger.debug('Looking up report for task %s' %
                          self.cachedinfo['RequestName'])
        dictresult, status, reason = server.get(
            self.uri,
            data={
                'workflow': self.cachedinfo['RequestName'],
                'subresource': 'report'
            })

        self.logger.debug("Result: %s" % dictresult)

        if status != 200:
            msg = "Problem retrieving report:\ninput:%s\noutput:%s\nreason:%s" % (
                str(self.cachedinfo['RequestName']), str(dictresult),
                str(reason))
            raise RESTCommunicationException(msg)

        returndict = {}

        publication = dictresult['result'][0]['publication']

        if self.options.recovery == 'notPublished' and not publication:
            msg = "%sError%s:" % (colors.RED, colors.NORMAL)
            msg += " The option --recovery=%s has been specified" % (
                self.options.recovery)
            msg += " (which instructs to determine the not processed lumis based on published datasets),"
            msg += " but publication has been disabled in the CRAB configuration."
            raise ConfigurationException(msg)

        onlyDBSSummary = False
        if not dictresult['result'][0]['lumisToProcess'] or not dictresult[
                'result'][0]['runsAndLumis']:
            msg = "%sError%s:" % (colors.RED, colors.NORMAL)
            msg += " Cannot get all the needed information for the report."
            msg += " Notice, if your task has been submitted more than 30 days ago, then everything has been cleaned."
            self.logger.info(msg)
            if not publication:
                return returndict
            onlyDBSSummary = True

        def _getNumFiles(jobs, fileType):
            files = set()
            for dummy_jobid, reports in jobs.iteritems():
                for rep in reports:
                    if rep['type'] == fileType:
                        # the split is done to remove the jobnumber at the end of the input file lfn
                        files.add('_'.join(rep['lfn'].split('_')[:-1]))
            return len(files)

        def _getNumEvents(jobs, fileType):
            numEvents = 0
            for dummy_jobid, reports in jobs.iteritems():
                for rep in reports:
                    if rep['type'] == fileType:
                        numEvents += rep['events']
            return numEvents

        ## Extract the reports of the input files.
        poolInOnlyRes = {}
        for jobid, reports in dictresult['result'][0][
                'runsAndLumis'].iteritems():
            poolInOnlyRes[jobid] = [
                rep for rep in reports if rep['type'] == 'POOLIN'
            ]

        ## Calculate how many input files have been processed.
        numFilesProcessed = _getNumFiles(
            dictresult['result'][0]['runsAndLumis'], 'POOLIN')
        returndict['numFilesProcessed'] = numFilesProcessed

        ## Calculate how many events have been read.
        numEventsRead = _getNumEvents(dictresult['result'][0]['runsAndLumis'],
                                      'POOLIN')
        returndict['numEventsRead'] = numEventsRead

        ## Calculate how many events have been written.
        numEventsWritten = {}
        for filetype in ['EDM', 'TFile', 'FAKE']:
            numEventsWritten[filetype] = _getNumEvents(
                dictresult['result'][0]['runsAndLumis'], filetype)
        returndict['numEventsWritten'] = numEventsWritten

        ## Get the lumis in the input dataset.
        inputDatasetLumis = dictresult['result'][0]['inputDataset']['lumis']
        returndict['inputDatasetLumis'] = inputDatasetLumis

        ## Get the lumis split across files in the input dataset.
        inputDatasetDuplicateLumis = dictresult['result'][0]['inputDataset'][
            'duplicateLumis']
        returndict['inputDatasetDuplicateLumis'] = inputDatasetDuplicateLumis

        ## Get the lumis that the jobs had to process. This must be a subset of input
        ## dataset lumis & lumi-mask.
        lumisToProcessPerJob = dictresult['result'][0]['lumisToProcess']
        lumisToProcess = {}
        for jobid in lumisToProcessPerJob.keys():
            for run, lumiRanges in lumisToProcessPerJob[jobid].iteritems():
                if run not in lumisToProcess:
                    lumisToProcess[run] = []
                for lumiRange in lumiRanges:
                    lumisToProcess[run].extend(
                        range(lumiRange[0], lumiRange[1] + 1))
        lumisToProcess = LumiList(runsAndLumis=lumisToProcess).getCompactList()
        returndict['lumisToProcess'] = lumisToProcess

        ## Get the lumis that have been processed.
        processedLumis = BasicJobType.mergeLumis(poolInOnlyRes)
        returndict['processedLumis'] = processedLumis

        ## Get the run-lumi and number of events information about the output datasets.
        outputDatasetsInfo = dictresult['result'][0]['outputDatasets']
        outputDatasetsLumis = {}
        outputDatasetsNumEvents = {}
        if publication:
            for dataset, info in outputDatasetsInfo.iteritems():
                if info['lumis']:
                    outputDatasetsLumis[dataset] = info['lumis']
                outputDatasetsNumEvents[dataset] = info['numEvents']
        returndict['outputDatasetsLumis'] = outputDatasetsLumis
        returndict['outputDatasetsNumEvents'] = outputDatasetsNumEvents
        numOutputDatasets = len(outputDatasetsInfo)

        ## Get the duplicate runs-lumis in the output files. Use for this the run-lumi
        ## information of the input files. Why not to use directly the output files?
        ## Because not all types of output files have run-lumi information in their
        ## filemetadata (note: the run-lumi information in the filemetadata is a copy
        ## of the corresponding information in the FJR). For example, output files
        ## produced by TFileService do not have run-lumi information in the FJR. On the
        ## other hand, input files always have run-lumi information in the FJR, which
        ## lists the runs-lumis in the input file that have been processed by the
        ## corresponding job. And of course, the run-lumi information of an output file
        ## produced by job X should be the (set made out of the) union of the run-lumi
        ## information of the input files to job X.
        outputFilesLumis = {}
        for jobid, reports in poolInOnlyRes.iteritems():
            lumiDict = {}
            for rep in reports:
                for run, lumis in literal_eval(rep['runlumi']).iteritems():
                    lumiDict.setdefault(str(run), []).extend(map(int, lumis))
            for run, lumis in lumiDict.iteritems():
                outputFilesLumis.setdefault(run, []).extend(list(set(lumis)))
        outputFilesDuplicateLumis = BasicJobType.getDuplicateLumis(
            outputFilesLumis)
        returndict['outputFilesDuplicateLumis'] = outputFilesDuplicateLumis

        ## Calculate the not processed runs-lumis in one of three ways:
        ## 1) The lumis that were supposed to be processed by all jobs minus the lumis
        ##    that were processed by finished (but not necessarily published) jobs.
        ## 2) The lumis that were supposed to be processed by all jobs minus the lumis
        ##    published in all the output datasets.
        ## 3) The lumis that were supposed to be processed by jobs whose status is
        ##    'failed'.
        notProcessedLumis = {}
        notProcLumisCalcMethMsg = "The '%s' lumis were calculated as:" % (
            self.options.recovery)
        if self.options.recovery == 'notFinished':
            notProcessedLumis = BasicJobType.subtractLumis(
                lumisToProcess, processedLumis)
            notProcLumisCalcMethMsg += " the lumis to process minus the processed lumis."
        elif self.options.recovery == 'notPublished':
            publishedLumis = {}
            firstdataset = True
            for dataset in outputDatasetsLumis.keys():
                if firstdataset:
                    publishedLumis = outputDatasetsLumis[dataset]
                    firstdataset = False
                else:
                    publishedLumis = BasicJobType.intersectLumis(
                        publishedLumis, outputDatasetsLumis[dataset])
            notProcessedLumis = BasicJobType.subtractLumis(
                lumisToProcess, publishedLumis)
            notProcLumisCalcMethMsg += " the lumis to process"
            if numOutputDatasets > 1:
                notProcLumisCalcMethMsg += " minus the lumis published in all the output datasets."
            else:
                notProcLumisCalcMethMsg += " minus the lumis published in the output dataset."
        elif self.options.recovery == 'failed':
            for jobid, status in dictresult['result'][0][
                    'statusPerJob'].iteritems():
                if status in ['failed']:
                    for run, lumiRanges in lumisToProcessPerJob[
                            jobid].iteritems():
                        if run not in notProcessedLumis:
                            notProcessedLumis[run] = []
                        for lumiRange in lumiRanges:
                            notProcessedLumis[run].extend(
                                range(lumiRange[0], lumiRange[1] + 1))
            notProcessedLumis = LumiList(
                runsAndLumis=notProcessedLumis).getCompactList()
            notProcLumisCalcMethMsg += " the lumis to process by jobs in status 'failed'."
        returndict['notProcessedLumis'] = notProcessedLumis

        ## Create the output directory if it doesn't exists.
        if self.options.outdir:
            jsonFileDir = self.options.outdir
        else:
            jsonFileDir = os.path.join(self.requestarea, 'results')
        self.logger.info("Will save lumi files into output directory %s" %
                         (jsonFileDir))
        if not os.path.exists(jsonFileDir):
            self.logger.debug("Creating directory %s" % (jsonFileDir))
            os.makedirs(jsonFileDir)

        ## Create the report JSON files and print a report summary:
        ## 1) First the summary that depends solely on successfully finished jobs (and
        ##    other general information about the task, but not on failed/running jobs).
        if not onlyDBSSummary:
            self.logger.info("Summary from jobs in status 'finished':")
            msg = "  Number of files processed: %d" % (numFilesProcessed)
            msg += "\n  Number of events read: %d" % (numEventsRead)
            msg += "\n  Number of events written in EDM files: %d" % (
                numEventsWritten.get('EDM', 0))
            msg += "\n  Number of events written in TFileService files: %d" % (
                numEventsWritten.get('TFile', 0))
            msg += "\n  Number of events written in other type of files: %d" % (
                numEventsWritten.get('FAKE', 0))
            self.logger.info(msg)
            if processedLumis:
                with open(os.path.join(jsonFileDir, 'processedLumis.json'),
                          'w') as jsonFile:
                    json.dump(processedLumis, jsonFile)
                    jsonFile.write("\n")
                    self.logger.info(
                        "  Processed lumis written to processedLumis.json")
            if notProcessedLumis:
                filename = self.options.recovery + "Lumis.json"
                with open(os.path.join(jsonFileDir, filename),
                          'w') as jsonFile:
                    json.dump(notProcessedLumis, jsonFile)
                    jsonFile.write("\n")
                    self.logger.info(
                        "  %sWarning%s: '%s' lumis written to %s" %
                        (colors.RED, colors.NORMAL, self.options.recovery,
                         filename))
                self.logger.info("           %s" % (notProcLumisCalcMethMsg))
            if outputFilesDuplicateLumis:
                with open(
                        os.path.join(jsonFileDir,
                                     'outputFilesDuplicateLumis.json'),
                        'w') as jsonFile:
                    json.dump(outputFilesDuplicateLumis, jsonFile)
                    jsonFile.write("\n")
                    self.logger.info(
                        "  %sWarning%s: Duplicate lumis in output files written to outputFilesDuplicateLumis.json"
                        % (colors.RED, colors.NORMAL))
        ## 2) Then the summary about output datasets in DBS. For this, publication must
        ##    be True and the output files must be publishable.
        if publication and outputDatasetsInfo:
            if onlyDBSSummary:
                self.logger.info(
                    "Will provide a short report with information found in DBS."
                )
            self.logger.info("Summary from output datasets in DBS:")
            if outputDatasetsNumEvents:
                msg = "  Number of events:"
                for dataset, numEvents in outputDatasetsNumEvents.iteritems():
                    msg += "\n    %s: %d" % (dataset, numEvents)
                self.logger.info(msg)
            if outputDatasetsLumis:
                with open(
                        os.path.join(jsonFileDir, 'outputDatasetsLumis.json'),
                        'w') as jsonFile:
                    json.dump(outputDatasetsLumis, jsonFile)
                    jsonFile.write("\n")
                    self.logger.info(
                        "  Output datasets lumis written to outputDatasetsLumis.json"
                    )
        ## 3) Finally additional files that can be useful for debugging.
        if inputDatasetLumis or inputDatasetDuplicateLumis or lumisToProcess:
            self.logger.info("Additional report lumi files:")
        if inputDatasetLumis:
            with open(os.path.join(jsonFileDir, 'inputDatasetLumis.json'),
                      'w') as jsonFile:
                json.dump(inputDatasetLumis, jsonFile)
                jsonFile.write("\n")
                self.logger.info(
                    "  Input dataset lumis (from DBS, at task submission time) written to inputDatasetLumis.json"
                )
        if inputDatasetDuplicateLumis:
            with open(
                    os.path.join(jsonFileDir,
                                 'inputDatasetDuplicateLumis.json'),
                    'w') as jsonFile:
                json.dump(inputDatasetDuplicateLumis, jsonFile)
                jsonFile.write("\n")
                self.logger.info(
                    "  Input dataset duplicate lumis (from DBS, at task submission time) written to inputDatasetDuplicateLumis.json"
                )
        if lumisToProcess:
            with open(os.path.join(jsonFileDir, 'lumisToProcess.json'),
                      'w') as jsonFile:
                json.dump(lumisToProcess, jsonFile)
                jsonFile.write("\n")
                self.logger.info(
                    "  Lumis to process written to lumisToProcess.json")

        return returndict
Esempio n. 18
0
def checkStatusLoop(logger, server, uri, uniquerequestname, targetstatus,
                    cmdname):
    logger.info("Waiting for task to be processed")

    maxwaittime = 900  #in second, changed to 15 minute max wait time, the original 1 hour is too long
    starttime = currenttime = time.time()
    endtime = currenttime + maxwaittime

    startimestring = time.strftime('%Y-%m-%d %H:%M:%S',
                                   time.localtime(starttime))
    endtimestring = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(endtime))

    logger.debug("Start time:%s" % (startimestring))
    logger.debug("Max wait time: %s s until : %s" %
                 (maxwaittime, endtimestring))

    #logger.debug('Looking up detailed status of task %s' % uniquerequestname)

    continuecheck = True
    tmpresult = None
    logger.info("Checking task status")

    while continuecheck:
        currenttime = time.time()
        querytimestring = time.strftime('%Y-%m-%d %H:%M:%S',
                                        time.localtime(currenttime))

        logger.debug("Looking up detailed status of task %s" %
                     (uniquerequestname))

        dictresult, status, reason = server.get(
            uri, data={'workflow': uniquerequestname})
        dictresult = dictresult['result'][0]

        if status != 200:
            msg = "Error when trying to check the task status."
            msg += " Please check the task status later using 'crab status'."
            logger.error(msg)
            msg = "Problem retrieving status:\ninput:%s\noutput:%s\nreason:%s" % (
                str(uniquerequestname), str(dictresult), str(reason))
            raise RESTCommunicationException(msg)

        logger.debug("Query Time: %s Task status: %s" %
                     (querytimestring, dictresult['status']))

        logger.info("Task status: %s" % (dictresult['status']))
        if dictresult['status'] != tmpresult:
            tmpresult = dictresult['status']
            if dictresult['status'] in ['SUBMITFAILED', 'RESUBMITFAILED']:
                continuecheck = False
                msg = "%sError%s:" % (colors.RED, colors.NORMAL)
                msg += " The %s of your task has failed." % (
                    "resubmission" if cmdname == "resubmit" else "submission")
                logger.error(msg)
                if dictresult['taskFailureMsg']:
                    msg = "%sFailure message%s:" % (colors.RED, colors.NORMAL)
                    msg += "\t%s" % (dictresult['taskFailureMsg'].replace(
                        '\n', '\n\t\t\t'))
                    logger.error(msg)
            elif dictresult['status'] in [
                    'SUBMITTED', 'UPLOADED', 'UNKNOWN'
            ]:  #until the node_state file is available status is unknown
                continuecheck = False
            else:
                logger.info("Please wait...")
                time.sleep(30)
        elif dictresult['status'] in ['NEW', 'HOLDING', 'QUEUED', 'RESUBMIT']:
            logger.info("Please wait...")
            time.sleep(30)
        else:
            continuecheck = False
            logger.info("Please check crab.log")
            logger.debug(
                "Task status other than SUBMITFAILED, RESUBMITFAILED, SUBMITTED, UPLOADED, NEW, HOLDING, QUEUED, RESUBMIT"
            )
        ## Break the loop if we were waiting already too much.
        if currenttime > endtime:
            continuecheck = False
            msg = "Maximum query time exceeded."
            msg += " Please check the status of the %s later using 'crab status'." % (
                "resubmission" if cmdname == "resubmit" else "submission")
            logger.info(msg)
            waittime = currenttime - starttime
            logger.debug("Wait time: %s" % (waittime))

    if targetstatus == 'SUBMITTED':
        if tmpresult == 'SUBMITTED':
            msg = "%sSuccess%s:" % (colors.GREEN, colors.NORMAL)
            msg += " Your task has been processed and your jobs have been %s successfully." % (
                "resubmitted" if cmdname == "resubmit" else "submitted")
            logger.info(msg)
        elif currenttime < endtime and tmpresult not in [
                'SUBMITFAILED', 'RESUBMITFAILED'
        ]:
            msg = "The CRAB3 server finished processing your task."
            msg += " Use 'crab status' to see if your jobs have been %s successfully." % (
                "resubmitted" if cmdname == "resubmit" else "submitted")
            logger.info(msg)

    print('\a')  #Generate audio bell
    logger.debug("Ended %s process." %
                 ("resubmission" if cmdname == "resubmit" else "submission"))