Beispiel #1
0
    def validateOptions(self):
        SubCommand.validateOptions(self)

        #check the format of jobids
        self.jobids = ''
        if getattr(self.options, 'jobids', None):
            self.jobids = validateJobids(self.options.jobids)
Beispiel #2
0
    def __call__(self):  # pylint: disable=arguments-differ
        if self.options.short:
            taskname = self.cachedinfo['RequestName']
            inputlist = {'subresource': 'search', 'workflow': taskname}
            server = self.crabserver
            webdir = getProxiedWebDir(crabserver=self.crabserver, task=taskname, logFunction=self.logger.debug)
            dictresult, status, reason = server.get(api='task', data=inputlist)
            if not webdir:
                webdir = dictresult['result'][0]
                self.logger.info('Server result: %s' % webdir)
                if status != 200:
                    msg = "Problem retrieving information from the server:\ninput:%s\noutput:%s\nreason:%s" % (str(inputlist), str(dictresult), str(reason))
                    raise RESTCommunicationException(msg)
            splitting = getColumn(dictresult, 'tm_split_algo')
            if getattr(self.options, 'jobids', None):
                self.options.jobids = validateJobids(self.options.jobids, splitting != 'Automatic')
            self.setDestination()
            self.logger.info("Setting the destination to %s " % self.dest)
            failed, success = self.retrieveShortLogs(webdir, self.proxyfilename)
            if failed:
                msg = "%sError%s: Failed to retrieve the following files: %s" % (colors.RED, colors.NORMAL, failed)
                self.logger.info(msg)
            else:
                self.logger.info("%sSuccess%s: All files successfully retrieved." % (colors.GREEN, colors.NORMAL))
            returndict = {'success': success, 'failed': failed}
        else:
            # Different from the old getlog code: set 'logs2' as subresource so that 'getcommand' uses the new logic.
            returndict = getcommand.__call__(self, subresource='logs2')
            if ('success' in returndict and not returndict['success']) or \
               ('failed' in returndict and returndict['failed']):
                msg = "You can use the --short option to retrieve a short version of the log files from the Grid scheduler."
                self.logger.info(msg)

        return returndict
Beispiel #3
0
    def validateOptions(self):
        SubCommand.validateOptions(self)

        #check the format of jobids
        self.jobids = ''
        if getattr(self.options, 'jobids', None):
            self.jobids = validateJobids(self.options.jobids)
Beispiel #4
0
    def validateOptions(self):
        """
        Check if the sitelist parameter is a comma separater list of cms sitenames,
        and put the strings to be passed to the server to self
        """
        SubCommand.validateOptions(self)

        ## Check the format of the jobids option.
        if getattr(self.options, 'jobids'):
            jobidstuple = validateJobids(self.options.jobids)
            self.jobids = [str(jobid) for (_, jobid) in jobidstuple]

        #Checking if the sites provided by the user are valid cmsnames. Doing this because with only the
        #server error handling we get:
        #    Server answered with: Invalid input parameter
        #    Reason is: Incorrect 'siteblacklist' parameter
        #which is not really user friendly.
        #Moreover, I prefer to be independent from Lexicon. I'll the regex here.
        sn_re = "^T[1-3]_[A-Z]{2}(_[A-Za-z0-9]+)+$" #sn_re => SiteName_RegularExpression
        sn_rec = re.compile(sn_re) #sn_rec => SiteName_RegularExpressionCompiled
        for sitelist in ['sitewhitelist', 'siteblacklist']:
            if getattr(self.options, sitelist) is not None:
                for i, site_name in enumerate(getattr(self.options, sitelist).split(',')):
                    if not sn_rec.match(site_name):
                        msg  = "The site name %s does not look like a valid CMS site name" % (site_name)
                        msg += " (it is not matching the regular expression %s)." % (sn_re)
                        raise ConfigurationException(msg)
                setattr(self, sitelist, getattr(self.options, sitelist).split(','))

        ## Sanity checks for task sizes. Limits are purposely fairly generous to provide
        ## some level of future-proofing. The server may restrict further.
        if self.options.maxjobruntime is not None:
            if self.options.maxjobruntime < 60 or self.options.maxjobruntime > 336*60:
                msg = "The requested maximum job runtime (%d minutes) must be between 60 and 20160 minutes." % (self.options.maxjobruntime)
                raise ConfigurationException(msg)
            self.maxjobruntime = str(self.options.maxjobruntime)

        if self.options.maxmemory is not None:
            if self.options.maxmemory < 30 or self.options.maxmemory > 1024*30:
                msg = "The requested per-job memory (%d MB) must be between 30 and 30720 MB." % (self.options.maxmemory)
                raise ConfigurationException(msg)
            self.maxmemory = str(self.options.maxmemory)

        if self.options.numcores is not None:
            if self.options.numcores < 1 or self.options.numcores > 128:
                msg = "The requested number of cores (%d) must be between 1 and 128." % (self.options.numcores)
                raise ConfigurationException(msg)
            self.numcores = str(self.options.numcores)

        if self.options.priority is not None:
            self.priority = str(self.options.priority)
Beispiel #5
0
    def __call__(self):
        if self.options.short:
            #Check if splitting is automatic
            try:
                splitting=self.cachedinfo['OriginalConfig'].Data.splitting
            except AttributeError: #Default setting is 'Automatic'
                splitting='Automatic'
            except KeyError: #crab remade task does not have 'OriginalConfig' key, need to fetch from DB
                splitting='Unknown'
            taskname = self.cachedinfo['RequestName']
            inputlist = {'subresource': 'webdir', 'workflow': taskname}
            serverFactory = CRABClient.Emulator.getEmulator('rest')
            server = serverFactory(self.serverurl, self.proxyfilename, self.proxyfilename, version=__version__)
            uri = self.getUrl(self.instance, resource = 'task')
            webdir=None
            if splitting!='Unknown':
                webdir = getProxiedWebDir(taskname, self.serverurl, uri, self.proxyfilename, self.logger.debug)
            if not webdir:
                dictresult, status, reason =  server.get(uri, data = inputlist)
                if status != 200:
                    msg = "Problem retrieving information from the server:\ninput:%s\noutput:%s\nreason:%s" % (str(inputlist), str(dictresult), str(reason))
                    raise RESTCommunicationException(msg)
                if splitting=='Unknown':
                    splitting=getColumn(dictresult,'tm_split_algo')
                webdir = dictresult['result'][0]
                self.logger.info('Server result: %s' % webdir)
            self.setDestination()
            self.logger.info("Setting the destination to %s " % self.dest)
            #check the format of jobids
            self.options.jobids = validateJobids(self.options.jobids,splitting!='Automatic')
            failed, success = self.retrieveShortLogs(webdir, self.proxyfilename)
            if failed:
                msg = "%sError%s: Failed to retrieve the following files: %s" % (colors.RED,colors.NORMAL,failed)
                self.logger.info(msg)
            else:
                self.logger.info("%sSuccess%s: All files successfully retrieved." % (colors.GREEN,colors.NORMAL))
            returndict = {'success': success, 'failed': failed}
        else:
            # Different from the old getlog code: set 'logs2' as subresource so that 'getcommand' uses the new logic.
            returndict = getcommand.__call__(self, subresource = 'logs2')
            if ('success' in returndict and not returndict['success']) or \
               ('failed'  in returndict and returndict['failed']):
                msg = "You can use the --short option to retrieve a short version of the log files from the Grid scheduler."
                self.logger.info(msg)

        return returndict
Beispiel #6
0
    def validateOptions(self):
        #Figuring out the destination directory
        SubCommand.validateOptions(self)
        self.dest = None
        if self.options.outputpath is not None:
            if re.match("^[a-z]+://", self.options.outputpath):
                self.dest = self.options.outputpath
            elif not os.path.isabs( self.options.outputpath ):
                self.dest = os.path.abspath( self.options.outputpath )
            else:
                self.dest = self.options.outputpath

        #convert all to -1
        if getattr(self.options, 'quantity', None) == 'all':
            self.options.quantity = -1

        #check the format of jobids
        if getattr(self.options, 'jobids', None):
            self.options.jobids = validateJobids(self.options.jobids)
Beispiel #7
0
    def validateOptions(self):
        #Figuring out the destination directory
        SubCommand.validateOptions(self)
        self.dest = None
        if self.options.outputpath is not None:
            if re.match("^[a-z]+://", self.options.outputpath):
                self.dest = self.options.outputpath
            elif not os.path.isabs(self.options.outputpath):
                self.dest = os.path.abspath(self.options.outputpath)
            else:
                self.dest = self.options.outputpath

        #convert all to -1
        if getattr(self.options, 'quantity', None) == 'all':
            self.options.quantity = -1

        #check the format of jobids
        if getattr(self.options, 'jobids', None):
            self.options.jobids = validateJobids(self.options.jobids)

        self.dump = self.options.dump
Beispiel #8
0
    def validateOptions(self):
        SubCommand.validateOptions(self)

        if self.options.sort is not None:
            sortOpts = [
                "state", "site", "runtime", "memory", "cpu", "retries",
                "waste", "exitcode"
            ]
            if self.options.sort not in sortOpts:
                msg = "%sError%s:" % (colors.RED, colors.NORMAL)
                msg += " Only the following values are accepted for --sort option: %s" % (
                    sortOpts)
                raise ConfigurationException(msg)

        if self.options.jobids:
            jobidstuple = validateJobids(self.options.jobids)
            self.jobids = [str(jobid) for (_, jobid) in jobidstuple]

        if self.options.jobids and not (self.options.long
                                        or self.options.sort):
            raise ConfigurationException(
                "Parameter --jobids can only be used in combination "
                "with --long or --sort options.")
Beispiel #9
0
    def validateOptions(self):
        #Figuring out the destination directory
        SubCommand.validateOptions(self)
        self.dest = None
        if self.options.outputpath is not None:
            if re.match("^[a-z]+://", self.options.outputpath):
                self.dest = self.options.outputpath
            elif not os.path.isabs(self.options.outputpath):
                self.dest = os.path.abspath(self.options.outputpath)
            else:
                self.dest = self.options.outputpath

        #convert all to -1
        if getattr(self.options, 'quantity', None) == 'all':
            self.options.quantity = -1

        #check the format of jobids
        if getattr(self.options, 'jobids', None):
            self.options.jobids = validateJobids(self.options.jobids)

        if hasattr(self.options, 'command') and self.options.command != None:
            AvailableCommands = ['LCG', 'GFAL']
            self.command = self.options.command.upper()
            if self.command not in AvailableCommands:
                msg = "You specified to use %s command and it is not allowed. Available commands are: %s " % (
                    self.command, str(AvailableCommands))
                ex = ConfigurationException(msg)
                raise ex
        else:
            self.command = None
        if hasattr(self.options, 'checksum'):
            if re.match('^yes$|^no$', self.options.checksum):
                self.checksum = 'ADLER32' if self.options.checksum == 'yes' else None
            else:
                msg = "You specified to use %s checksum. Only lowercase yes/no is accepted to turn ADLER32 checksum" % self.options.checksum
                ex = ConfigurationException(msg)
                raise ex
Beispiel #10
0
    def validateOptions(self):
        #Figuring out the destination directory
        SubCommand.validateOptions(self)
        self.dest = None
        if self.options.outputpath is not None:
            if re.match("^[a-z]+://", self.options.outputpath):
                self.dest = self.options.outputpath
            elif not os.path.isabs( self.options.outputpath ):
                self.dest = os.path.abspath( self.options.outputpath )
            else:
                self.dest = self.options.outputpath

        #convert all to -1
        if getattr(self.options, 'quantity', None) == 'all':
            self.options.quantity = -1

        #check the format of jobids
        if getattr(self.options, 'jobids', None):
            self.options.jobids = validateJobids(self.options.jobids)

        if hasattr(self.options, 'command') and self.options.command != None:
            AvailableCommands = ['LCG', 'GFAL']
            self.command = self.options.command.upper()
            if self.command not in AvailableCommands:
                msg = "You specified to use %s command and it is not allowed. Available commands are: %s " % (self.command, str(AvailableCommands))
                ex = ConfigurationException(msg)
                raise ex
        else:
            self.command = None
        if hasattr(self.options, 'checksum'):
            if re.match('^yes$|^no$', self.options.checksum):
                self.checksum = 'ADLER32' if self.options.checksum == 'yes' else None
            else:
                msg = "You specified to use %s checksum. Only lowercase yes/no is accepted to turn ADLER32 checksum" % self.options.checksum
                ex = ConfigurationException(msg)
                raise ex
Beispiel #11
0
    def validateOptions(self):
        """
        Check if the sitelist parameter is a comma separater list of cms sitenames,
        and put the strings to be passed to the server to self
        """
        SubCommand.validateOptions(self)

        if self.options.publication:
            if self.options.sitewhitelist is not None or self.options.siteblacklist is not None or \
               self.options.maxjobruntime is not None or self.options.maxmemory is not None or \
               self.options.numcores is not None or self.options.priority is not None:
                msg  = "The options --sitewhitelist, --siteblacklist,"
                msg += " --maxjobruntime, --maxmemory, --numcores and  --priority"
                msg += " can not be specified together with the option --publication."
                msg += " The last option is to only resubmit (failed) publications,"
                msg += " in which case all of the first options make no sense."
                raise ConfigurationException(msg)
            if self.options.jobids:
                msg  = "The option --jobids"
                msg += " can not be specified together with the option --publication."
                msg += " The last option is to only resubmit (failed) publications,"
                msg += " which does not allow yet filtering on job ids (ALL failed publications will be resubmitted)."
                raise ConfigurationException(msg)
            if self.options.force:
                msg  = "The option --force"
                msg += " can not be specified together with the option --publication."
                msg += " The last option is to only resubmit failed publications."
                msg += " Publications in a status other than 'failed' can not be resubmitted."
                raise ConfigurationException(msg)

        ## The --jobids option indicates which jobs have to be resubmitted. If it is not
        ## given, then all jobs in the task that are not running or successfully
        ## completed are resubmitted. If the user provides a list of job ids, then also
        ## successfully completed jobs can be resubmitted.

        ## Check the format of the jobids option.
        if self.options.jobids:
            jobidstuple = validateJobids(self.options.jobids)
            self.jobids = [str(jobid) for (_, jobid) in jobidstuple]

        ## The --force option should not be accepted unless combined with a user-given
        ## list of job ids via --jobids.
        if self.options.force and not self.jobids:
            msg = "Option --force can only be used in combination with option --jobids."
            raise ConfigurationException(msg)

        ## Covention used for the job parameters that the user can set when doing job
        ## resubmission (i.e. siteblacklist, sitewhitelist, maxjobruntime, maxmemory,
        ## numcores and priority):
        ## - If the user doesn't set a parameter we don't pass it to the server and the
        ##   the server copies the original value the parameter had at task submission.
        ##   It copies it from the Task DB. Therefore we need to keep these parameters
        ##   in separate columns of the Task DB containing their original values.
        ## - For the site black- and whitelists, if the user passes an empty string,
        ##   e.g. --siteblacklist='', we pass to the server siteblacklist=empty and the
        ##   server interprets this as and empty list ([]). If the user passes a given
        ##   list of sites, this new list overwrittes the original one.
        ## - The values of the parameters are used only for the resubmitted jobs (for
        ##   their first resubmission and all next automatic resubmissions).

        #Checking if the sites provided by the user are valid cmsnames. Doing this because with only the
        #server error handling we get:
        #    Server answered with: Invalid input parameter
        #    Reason is: Incorrect 'siteblacklist' parameter
        #which is not really user friendly.
        #Moreover, I prefer to be independent from Lexicon. I'll the regex here.
        sn_re = "^T[1-3]_[A-Z]{2}(_[A-Za-z0-9]+)+$" #sn_re => SiteName_RegularExpression
        sn_rec = re.compile(sn_re) #sn_rec => SiteName_RegularExpressionCompiled
        for sitelist in ['sitewhitelist', 'siteblacklist']:
            if getattr(self.options, sitelist) is not None:
                if getattr(self.options, sitelist) != "":
                    for site_name in getattr(self.options, sitelist).split(','):
                        if '*' not in site_name and not sn_rec.match(site_name):
                            msg  = "The site name '%s' does not look like a valid CMS site name" % (site_name)
                            msg += " (it is not matching the regular expression '%s')." % (sn_re)
                            raise ConfigurationException(msg)
                    setattr(self, sitelist, getattr(self.options, sitelist).split(','))
                else:
                    setattr(self, sitelist, [])

        ## Sanity checks for task sizes. Limits are purposely fairly generous to provide
        ## some level of future-proofing. The server may restrict further.
        if self.options.maxjobruntime is not None:
            if self.options.maxjobruntime < 60 or self.options.maxjobruntime > 336*60:
                msg = "The requested maximum job runtime (%d minutes) must be between 60 and 20160 minutes." % (self.options.maxjobruntime)
                raise ConfigurationException(msg)

        if self.options.maxmemory is not None:
            if self.options.maxmemory < 30 or self.options.maxmemory > 1024*30:
                msg = "The requested per-job memory (%d MB) must be between 30 and 30720 MB." % (self.options.maxmemory)
                raise ConfigurationException(msg)

        if self.options.numcores is not None:
            if self.options.numcores < 1 or self.options.numcores > 128:
                msg = "The requested number of cores (%d) must be between 1 and 128." % (self.options.numcores)
                raise ConfigurationException(msg)

        if self.options.priority is not None:
            if self.options.priority < 1:
                msg = "The requested priority (%d) must be greater than 0." % (self.options.priority)
                raise ConfigurationException(msg)
Beispiel #12
0
    def __call__(self, **argv):  # pylint: disable=arguments-differ
        ## Retrieve the transferLogs parameter from the task database.
        taskdbparam, configparam = '', ''
        if argv.get('subresource') in ['logs', 'logs2']:
            taskdbparam = 'tm_save_logs'
            configparam = "General.transferLogs"
        elif argv.get('subresource') in ['data', 'data2']:
            taskdbparam = 'tm_transfer_outputs'
            configparam = "General.transferOutputs"

        transferFlag = 'unknown'
        inputlist = {'subresource': 'search', 'workflow': self.cachedinfo['RequestName']}
        serverFactory = CRABClient.Emulator.getEmulator('rest')
        server = serverFactory(self.serverurl, self.proxyfilename, self.proxyfilename, version=__version__)
        uri = getUrl(self.instance, resource = 'task')
        dictresult, status, _ =  server.get(uri, data = inputlist)
        self.logger.debug('Server result: %s' % dictresult)
        splitting = None
        if status == 200:
            if 'desc' in dictresult and 'columns' in dictresult['desc']:
                position = dictresult['desc']['columns'].index(taskdbparam)
                transferFlag = dictresult['result'][position] #= 'T' or 'F'
                position = dictresult['desc']['columns'].index('tm_split_algo')
                splitting = dictresult['result'][position]
            else:
                self.logger.debug("Unable to locate %s in server result." % (taskdbparam))
        ## If transferFlag = False, there is nothing to retrieve.
        if transferFlag == 'F':
            msg = "No files to retrieve. Files not transferred to storage since task configuration parameter %s is False." % (configparam)
            self.logger.info(msg)
            return {'success': {}, 'failed': {}}

        ## Retrieve tm_edm_outfiles, tm_tfile_outfiles and tm_outfiles from the task database and check if they are empty.
        if argv.get('subresource') in ['data', 'data2'] and status == 200:
            if 'desc' in dictresult and 'columns' in dictresult['desc']:
                position = dictresult['desc']['columns'].index('tm_edm_outfiles')
                tm_edm_outfiles = dictresult['result'][position]
                position = dictresult['desc']['columns'].index('tm_tfile_outfiles')
                tm_tfile_outfiles = dictresult['result'][position]
                position = dictresult['desc']['columns'].index('tm_outfiles')
                tm_outfiles = dictresult['result'][position]
            if tm_edm_outfiles == '[]' and tm_tfile_outfiles == '[]' and tm_outfiles == '[]':
                msg  = "%sWarning%s:" % (colors.RED, colors.NORMAL)
                msg += " There are no output files to retrieve, because CRAB could not detect any in the CMSSW configuration"
                msg += " nor was any explicitly specified in the CRAB configuration."
                self.logger.warning(msg)

        #check the format of jobids
        if getattr(self.options, 'jobids', None):
            self.options.jobids = validateJobids(self.options.jobids, splitting != 'Automatic')

        self.processAndStoreJobIds()

        #Retrieving output files location from the server
        self.logger.debug('Retrieving locations for task %s' % self.cachedinfo['RequestName'])
        inputlist =  [('workflow', self.cachedinfo['RequestName'])]
        inputlist.extend(list(argv.iteritems()))
        if getattr(self.options, 'quantity', None):
            self.logger.debug('Retrieving %s file locations' % self.options.quantity)
            inputlist.append(('limit', self.options.quantity))
        else:
            self.logger.debug('Retrieving all file locations')
            inputlist.append(('limit', -1))
        if getattr(self.options, 'jobids', None):
            self.logger.debug('Retrieving jobs %s' % self.options.jobids)
            inputlist.extend(self.options.jobids)
        serverFactory = CRABClient.Emulator.getEmulator('rest')
        server = serverFactory(self.serverurl, self.proxyfilename, self.proxyfilename, version=__version__)
        dictresult, status, reason = server.get(self.uri, data = urllib.urlencode(inputlist))
        self.logger.debug('Server result: %s' % dictresult)

        if status != 200:
            msg = "Problem retrieving information from the server:\ninput:%s\noutput:%s\nreason:%s" % (str(inputlist), str(dictresult), str(reason))
            raise RESTCommunicationException(msg)

        totalfiles = len(dictresult['result'])
        fileInfoList = dictresult['result']

        self.insertXrootPfns(fileInfoList)

        if len(fileInfoList) > 0:
            if self.options.dump or self.options.xroot:
                self.logger.debug("Getting url info")
            else:
                self.setDestination()
                self.logger.info("Setting the destination to %s " % self.dest)
            if self.options.xroot:
                self.logger.debug("XRootD urls are requested")
                xrootlfn = ["root://cms-xrd-global.cern.ch/%s" % link['lfn'] for link in fileInfoList]
                self.logger.info("\n".join(xrootlfn))
                returndict = {'xrootd': xrootlfn}
            elif self.options.dump:
                jobid_pfn_lfn_list = sorted(map(lambda x: (x['jobid'], x['pfn'], x['lfn']), fileInfoList)) # pylint: disable=deprecated-lambda
                lastjobid = -1
                filecounter = 1
                msg = ""
                for jobid, pfn, lfn in jobid_pfn_lfn_list:
                    if jobid != lastjobid:
                        msg += "%s=== Files from job %s:" % ('\n' if lastjobid != -1 else '', jobid)
                        lastjobid = jobid
                        filecounter = 1
                    msg += "\n%d) PFN: %s" % (filecounter, pfn)
                    msg += "\n%s  LFN: %s" % (' '*(len(str(filecounter))), lfn)
                    filecounter += 1
                self.logger.info(msg)
                returndict = {'pfn': [pfn for _, pfn, _ in jobid_pfn_lfn_list], 'lfn': [lfn for _, _, lfn in jobid_pfn_lfn_list]}
            else:
                self.logger.info("Retrieving %s files" % (totalfiles))
                arglist = ['--destination', self.dest, '--input', fileInfoList, '--dir', self.options.projdir, \
                           '--proxy', self.proxyfilename, '--parallel', self.options.nparallel, '--wait', self.options.waittime, \
                           '--checksum', self.checksum, '--command', self.command]
                copyoutput = remote_copy(self.logger, arglist)
                successdict, faileddict = copyoutput()
                #need to use deepcopy because successdict and faileddict are dict that is under the a manage dict, accessed multithreadly
                returndict = {'success': copy.deepcopy(successdict) , 'failed': copy.deepcopy(faileddict)}
        if totalfiles == 0:
            self.logger.info("No files to retrieve.")
            returndict = {'success': {} , 'failed': {}}

        if transferFlag == 'unknown':
            if ('success' in returndict and not returndict['success']) and \
               ('failed'  in returndict and not returndict['failed']):
                msg = "This is normal behavior if %s = False in the task configuration." % (configparam)
                self.logger.info(msg)

        return returndict
Beispiel #13
0
    def validateOptions(self):
        """
        Check if the sitelist parameter is a comma separater list of cms sitenames,
        and put the strings to be passed to the server to self
        """
        SubCommand.validateOptions(self)

        serverFactory = CRABClient.Emulator.getEmulator('rest')
        self.server = serverFactory(self.serverurl,
                                    self.proxyfilename,
                                    self.proxyfilename,
                                    version=__version__)
        uri = getUrl(self.instance, resource='task')
        crabDBInfo, _, _ = self.server.get(uri,
                                           data={
                                               'subresource':
                                               'search',
                                               'workflow':
                                               self.cachedinfo['RequestName']
                                           })
        self.splitting = getColumn(crabDBInfo, 'tm_split_algo')

        if self.options.publication:
            if self.options.sitewhitelist is not None or self.options.siteblacklist is not None or \
               self.options.maxjobruntime is not None or self.options.maxmemory is not None or \
               self.options.numcores is not None or self.options.priority is not None:
                msg = "The options --sitewhitelist, --siteblacklist,"
                msg += " --maxjobruntime, --maxmemory, --numcores and  --priority"
                msg += " can not be specified together with the option --publication."
                msg += " The last option is to only resubmit (failed) publications,"
                msg += " in which case all of the first options make no sense."
                raise ConfigurationException(msg)
            if self.options.jobids:
                msg = "The option --jobids"
                msg += " can not be specified together with the option --publication."
                msg += " The last option is to only resubmit (failed) publications,"
                msg += " which does not allow yet filtering on job ids (ALL failed publications will be resubmitted)."
                raise ConfigurationException(msg)
            if self.options.force:
                msg = "The option --force"
                msg += " can not be specified together with the option --publication."
                msg += " The last option is to only resubmit failed publications."
                msg += " Publications in a status other than 'failed' can not be resubmitted."
                raise ConfigurationException(msg)

        ## The --jobids option indicates which jobs have to be resubmitted. If it is not
        ## given, then all jobs in the task that are not running or successfully
        ## completed are resubmitted. If the user provides a list of job ids, then also
        ## successfully completed jobs can be resubmitted.

        ## Check the format of the jobids option.
        if self.options.jobids:
            jobidstuple = validateJobids(self.options.jobids,
                                         self.splitting != 'Automatic')
            self.jobids = [str(jobid) for (_, jobid) in jobidstuple]

        ## The --force option should not be accepted unless combined with a user-given
        ## list of job ids via --jobids.
        if self.options.force and not self.jobids:
            msg = "Option --force can only be used in combination with option --jobids."
            raise ConfigurationException(msg)

        ## Covention used for the job parameters that the user can set when doing job
        ## resubmission (i.e. siteblacklist, sitewhitelist, maxjobruntime, maxmemory,
        ## numcores and priority):
        ## - If the user doesn't set a parameter we don't pass it to the server and the
        ##   the server copies the original value the parameter had at task submission.
        ##   It copies it from the Task DB. Therefore we need to keep these parameters
        ##   in separate columns of the Task DB containing their original values.
        ## - For the site black- and whitelists, if the user passes an empty string,
        ##   e.g. --siteblacklist='', we pass to the server siteblacklist=empty and the
        ##   server interprets this as and empty list ([]). If the user passes a given
        ##   list of sites, this new list overwrittes the original one.
        ## - The values of the parameters are used only for the resubmitted jobs (for
        ##   their first resubmission and all next automatic resubmissions).

        #Checking if the sites provided by the user are valid cmsnames. Doing this because with only the
        #server error handling we get:
        #    Server answered with: Invalid input parameter
        #    Reason is: Incorrect 'siteblacklist' parameter
        #which is not really user friendly.
        #Moreover, I prefer to be independent from Lexicon. I'll the regex here.
        sn_re = "^T[1-3]_[A-Z]{2}(_[A-Za-z0-9]+)+$"  #sn_re => SiteName_RegularExpression
        sn_rec = re.compile(
            sn_re)  #sn_rec => SiteName_RegularExpressionCompiled
        for sitelist in ['sitewhitelist', 'siteblacklist']:
            if getattr(self.options, sitelist) is not None:
                if getattr(self.options, sitelist) != "":
                    for site_name in getattr(self.options,
                                             sitelist).split(','):
                        if '*' not in site_name and not sn_rec.match(
                                site_name):
                            msg = "The site name '%s' does not look like a valid CMS site name" % (
                                site_name)
                            msg += " (it is not matching the regular expression '%s')." % (
                                sn_re)
                            raise ConfigurationException(msg)
                    setattr(self, sitelist,
                            getattr(self.options, sitelist).split(','))
                else:
                    setattr(self, sitelist, [])

        ## Sanity checks for task sizes. Limits are purposely fairly generous to provide
        ## some level of future-proofing. The server may restrict further.
        if self.options.maxjobruntime is not None:
            if self.options.maxjobruntime < 60 or self.options.maxjobruntime > 336 * 60:
                msg = "The requested maximum job runtime (%d minutes) must be between 60 and 20160 minutes." % (
                    self.options.maxjobruntime)
                raise ConfigurationException(msg)

        if self.options.maxmemory is not None:
            if self.options.maxmemory < 30 or self.options.maxmemory > 1024 * 30:
                msg = "The requested per-job memory (%d MB) must be between 30 and 30720 MB." % (
                    self.options.maxmemory)
                raise ConfigurationException(msg)

        if self.options.numcores is not None:
            if self.options.numcores < 1 or self.options.numcores > 128:
                msg = "The requested number of cores (%d) must be between 1 and 128." % (
                    self.options.numcores)
                raise ConfigurationException(msg)

        if self.options.priority is not None:
            if self.options.priority < 1:
                msg = "The requested priority (%d) must be greater than 0." % (
                    self.options.priority)
                raise ConfigurationException(msg)
Beispiel #14
0
    def processJobIds(self, jobList):
        """
        If this is a publication resubmission, return None since jobIds are not taken
        into account for publication resubmissions.

        If the user provides a list of jobIds to be resubmitted, validate it and
        return the same list in case of success.

        If no jobIds are provided, create a list of jobs that need resubmitting and
        return it.
        """

        if self.options.publication:
            return None

        automatic = any('-' in n for _, n in jobList)

        def consider(jobId):
            if automatic and (jobId.startswith('0-') or '-' not in jobId):
                return False
            return True

        # Build a dictionary from the jobList
        jobStatusDict = {}
        for jobStatus, jobId in jobList:
            if consider(jobId):
                jobStatusDict[jobId] = jobStatus

        failedJobStatus = 'failed'
        finishedJobStatus = 'finished'

        possibleToResubmitJobIds = []
        for jobStatus, jobId in jobList:
            if ((self.options.force and jobStatus == finishedJobStatus)
                    or jobStatus == failedJobStatus) and consider(jobId):
                possibleToResubmitJobIds.append(jobId)

        allowedJobStates = [failedJobStatus]
        if self.jobids:
            # Automatic splitting does not work with lists... probe- and
            # tail-job ids have a '-' in them, so re-split the joblist.
            if any(('-' in jobId for _, jobId in jobList)):
                jobidstuple = validateJobids(self.options.jobids, False)
                self.jobids = [str(jobid) for (_, jobid) in jobidstuple]
            msg = "Requesting resubmission of jobs %s in task %s" % (
                self.jobids, self.cachedinfo['RequestName'])
            self.logger.debug(msg)
            if self.options.force:
                allowedJobStates += [finishedJobStatus]
            # Go through the jobids and check if it's possible to resubmit them
            for jobId in self.jobids:
                if (jobId not in jobStatusDict) or (jobStatusDict[jobId]
                                                    not in allowedJobStates):
                    possibleAndWantedJobIds = list(
                        set(possibleToResubmitJobIds) & set(self.jobids))
                    notPossibleAndWantedJobIds = list(
                        set(self.jobids) - set(possibleAndWantedJobIds))
                    msg = "Not possible to resubmit the following jobs:\n%s\n" % notPossibleAndWantedJobIds
                    msg += "Only jobs in status %s can be resubmitted. " % failedJobStatus
                    msg += "Jobs in status %s can also be resubmitted, " % finishedJobStatus
                    msg += "but only if the jobid is specified and the force option is set."
                    raise ConfigurationException(msg)
            return self.jobids
        else:
            msg = "Requesting resubmission of failed jobs in task %s" % (
                self.cachedinfo['RequestName'])
            self.logger.debug(msg)

            if not possibleToResubmitJobIds:
                msg = "Found no jobs to resubmit. Only jobs in status %s can be resubmitted. " % failedJobStatus
                msg += "Jobs in status %s can also be resubmitted, but only if the jobids " % finishedJobStatus
                msg += "are specified and the force option is set."
                raise ConfigurationException(msg)

            return possibleToResubmitJobIds
Beispiel #15
0
    def processJobIds(self, jobList):
        """
        If this is a publication resubmission, return None since jobIds are not taken
        into account for publication resubmissions.

        If the user provides a list of jobIds to be resubmitted, validate it and
        return the same list in case of success.

        If no jobIds are provided, create a list of jobs that need resubmitting and
        return it.
        """

        if self.options.publication:
            return None

        automatic = any('-' in n for _, n in jobList)

        def consider(jobId):
            if automatic and (jobId.startswith('0-') or '-' not in jobId):
                return False
            return True

        # Build a dictionary from the jobList
        jobStatusDict = {}
        for jobStatus, jobId in jobList:
            if consider(jobId):
                jobStatusDict[jobId] = jobStatus

        failedJobStatus = 'failed'
        finishedJobStatus = 'finished'

        possibleToResubmitJobIds = []
        for jobStatus, jobId in jobList:
            if ((self.options.force and jobStatus == finishedJobStatus) or jobStatus == failedJobStatus) and consider(jobId):
                possibleToResubmitJobIds.append(jobId)

        allowedJobStates = [failedJobStatus]
        if self.jobids:
            # Automatic splitting does not work with lists... probe- and
            # tail-job ids have a '-' in them, so re-split the joblist.
            if any(('-' in jobId for _, jobId in jobList)):
                jobidstuple = validateJobids(self.options.jobids, False)
                self.jobids = [str(jobid) for (_, jobid) in jobidstuple]
            msg = "Requesting resubmission of jobs %s in task %s" % (self.jobids, self.cachedinfo['RequestName'])
            self.logger.debug(msg)
            if self.options.force:
                allowedJobStates += [finishedJobStatus]
            # Go through the jobids and check if it's possible to resubmit them
            for jobId in self.jobids:
                if (jobId not in jobStatusDict) or (jobStatusDict[jobId] not in allowedJobStates):
                    possibleAndWantedJobIds = list(set(possibleToResubmitJobIds) & set(self.jobids))
                    notPossibleAndWantedJobIds = list(set(self.jobids) - set(possibleAndWantedJobIds))
                    msg = "Not possible to resubmit the following jobs:\n%s\n" % notPossibleAndWantedJobIds
                    msg += "Only jobs in status %s can be resubmitted. " % failedJobStatus
                    msg += "Jobs in status %s can also be resubmitted, " % finishedJobStatus
                    msg += "but only if the jobid is specified and the force option is set."
                    raise ConfigurationException(msg)
            return self.jobids
        else:
            msg = "Requesting resubmission of failed jobs in task %s" % (self.cachedinfo['RequestName'])
            self.logger.debug(msg)

            if not possibleToResubmitJobIds:
                msg = "Found no jobs to resubmit. Only jobs in status %s can be resubmitted. " % failedJobStatus
                msg += "Jobs in status %s can also be resubmitted, but only if the jobids " % finishedJobStatus
                msg += "are specified and the force option is set."
                raise ConfigurationException(msg)

            return possibleToResubmitJobIds