예제 #1
0
    def validate(self, apiobj, method, api, param, safe):
        """Validating all the input parameter as enforced by the WMCore.REST module"""

        if method in ['PUT']:
            username = cherrypy.request.user['login'] # username registered in SiteDB
            requestname = param.kwargs['workflow']
            param.kwargs['workflow'] = generateTaskName(username, requestname)
            validate_str("workflow", param, safe, RX_TASKNAME, optional=False)
            validate_str("activity", param, safe, RX_ACTIVITY, optional=True)
            validate_str("jobtype", param, safe, RX_JOBTYPE, optional=False)
            # TODO this should be changed to be non-optional
            validate_str("generator", param, safe, RX_GENERATOR, optional=True)
            validate_str("eventsperlumi", param, safe, RX_LUMIEVENTS, optional=True)
            validate_str("jobsw", param, safe, RX_CMSSW, optional=False)
            validate_num("nonprodsw", param, safe, optional=False)
            validate_str("jobarch", param, safe, RX_ARCH, optional=False)
            if not safe.kwargs["nonprodsw"]: #if the user wants to allow non-production releases
                self._checkReleases(safe.kwargs['jobarch'], safe.kwargs['jobsw'])
            validate_num("useparent", param, safe, optional=True)
            validate_str("secondarydata", param, safe, RX_DATASET, optional=True)
            validate_strlist("siteblacklist", param, safe, RX_CMSSITE)
            safe.kwargs['siteblacklist'] = self._expandSites(safe.kwargs['siteblacklist'])
            validate_strlist("sitewhitelist", param, safe, RX_CMSSITE)
            safe.kwargs['sitewhitelist'] = self._expandSites(safe.kwargs['sitewhitelist'])
            validate_str("splitalgo", param, safe, RX_SPLIT, optional=False)
            validate_num("algoargs", param, safe, optional=False)
            validate_num("totalunits", param, safe, optional=True)
            validate_str("cachefilename", param, safe, RX_CACHENAME, optional=False)
            validate_str("debugfilename", param, safe, RX_CACHENAME, optional=True)
            validate_str("cacheurl", param, safe, RX_CACHEURL, optional=False)
            validate_str("lfn", param, safe, RX_LFN, optional=True)
            self._checkOutLFN(safe.kwargs, username)
            validate_strlist("addoutputfiles", param, safe, RX_ADDFILE, custom_err="Incorrect 'JobType.outputFiles' parameter. " \
                    "Allowed regexp: '%s'." % RX_ADDFILE.pattern)
            validate_strlist("userfiles", param, safe, RX_USERFILE)
            validate_num("savelogsflag", param, safe, optional=False)
            validate_num("saveoutput", param, safe, optional=True)
            validate_num("faillimit", param, safe, optional=True)
            validate_num("ignorelocality", param, safe, optional=True)
            if safe.kwargs['ignorelocality'] and self.centralcfg.centralconfig.get('ign-locality-blacklist', []):
                safe.kwargs['siteblacklist'] += self._expandSites(self.centralcfg.centralconfig['ign-locality-blacklist'])
            validate_str("vorole", param, safe, RX_VOPARAMS, optional=True)
            validate_str("vogroup", param, safe, RX_VOPARAMS, optional=True)
            validate_num("publication", param, safe, optional=False)
            validate_str("publishdbsurl", param, safe, RX_DBSURL, optional=(not bool(safe.kwargs['publication'])))

            ## We might want to remove publishname once the backward compatibility
            ## wont be needed anymore. Then we can just keep publishname2
            ## NB: AFAIK the only client not using the CRABLibrary but direct cutl is HC,
            ## therefore we will need to make sure we do not break it!
            ## The following two lines will be removed in the future once we will
            ## not need backward compatibility anymore
            self._checkPublishDataName(param.kwargs, safe.kwargs['lfn'], requestname, username)
            validate_str('publishname', param, safe, RX_ANYTHING, optional=True)

            ##And this if as well, just do self._checkPublishDataName2(param.kwargs, safe.kwargs['lfn'], requestname, username)
            if not safe.kwargs["publishname"]: #new clients won't define this anymore
                ## The (user specified part of the) publication dataset name must be
                ## specified and must pass DBS validation. Since this is the correct
                ## validation function, it must be done before the
                ## validate_str("publishname", ...) we have below.
                self._checkPublishDataName2(param.kwargs, safe.kwargs['lfn'], requestname, username)
            else:
                param.kwargs["publishname2"] = safe.kwargs["publishname"]

            ## 'publishname' was already validated above in _checkPublishDataName().
            ## Calling validate_str with a fake regexp to move the param to the
            ## list of validated inputs
            validate_str("publishname2", param, safe, RX_ANYTHING, optional=True)

            validate_num("publishgroupname", param, safe, optional=True)

            if safe.kwargs['jobtype'] == 'PrivateMC':
                if param.kwargs['inputdata']:
                    msg  = "Invalid 'inputdata' parameter."
                    msg += " Job type PrivateMC does not take any input dataset."
                    msg += " If you really intend to run over an input dataset, then you must use job type Analysis."
                    raise InvalidParameter(msg)
                if safe.kwargs['userfiles']:
                    msg  = "Invalid 'userfiles' parameter."
                    msg += " Job type PrivateMC does not take any input files."
                    msg += " If you really intend to run over input files, then you must use job type Analysis."
                    raise InvalidParameter(msg)

            ## Client versions < 3.3.1511 may put in the input dataset something that is not
            ## really an input dataset (for PrivateMC or user input files). So the only case
            ## in which we are sure that we have to validate the input dataset is when the
            ## workflow type is Analysis, the workflow does not run on user input files and
            ## an input dataset is defined (scriptExe may not define an input).
            ## Once we don't care anymore about backward compatibility with client < 3.3.1511,
            ## we can uncomment the 1st line below and delete the next 4 lines.
            #validate_str("inputdata", param, safe, RX_DATASET, optional=True)
            if safe.kwargs['jobtype'] == 'Analysis' and not safe.kwargs['userfiles'] and 'inputdata' in param.kwargs:
                validate_str("inputdata", param, safe, RX_DATASET, optional=True)
            else:
                validate_str("inputdata", param, safe, RX_ANYTHING, optional=True)

            ## The client is not forced to define the primary dataset. So make sure to have
            ## defaults or take it from the input dataset. The primary dataset is needed for
            ## the LFN of the output/log files and for publication. We want to have it well
            ## defined even if publication and/or transfer to storage are off.
            if safe.kwargs['inputdata']:
                param.kwargs['primarydataset'] = safe.kwargs['inputdata'].split('/')[1]
            if not param.kwargs.get('primarydataset', None):
                if safe.kwargs['jobtype'] == 'PrivateMC':
                    param.kwargs['primarydataset'] = "CRAB_PrivateMC"
                elif safe.kwargs['jobtype'] == 'Analysis' and safe.kwargs['userfiles']:
                    param.kwargs['primarydataset'] = "CRAB_UserFiles"
                else:
                    param.kwargs['primarydataset'] = "CRAB_NoInput"
            ## We validate the primary dataset agains DBS rules even if publication is off,
            ## because in the future we may want to give the possibility to users to publish
            ## a posteriori.
            self._checkPrimaryDataset(param.kwargs, optional=False)
            validate_str("primarydataset", param, safe, RX_LFNPRIMDS, optional=False)

            validate_num("nonvaliddata", param, safe, optional=True)
            #if one and only one between outputDatasetTag and publishDbsUrl is set raise an error (we need both or none of them)
            validate_str("asyncdest", param, safe, RX_CMSSITE, optional=False)
            self._checkASODestination(safe.kwargs['asyncdest'])
            # We no longer use this attribute, but keep it around for older client compatibility
            validate_num("blacklistT1", param, safe, optional=True)
            validate_num("oneEventMode", param, safe, optional=True)
            validate_num("priority", param, safe, optional=True)
            validate_num("maxjobruntime", param, safe, optional=True)
            validate_num("numcores", param, safe, optional=True)
            validate_num("maxmemory", param, safe, optional=True)
            validate_str("dbsurl", param, safe, RX_DBSURL, optional=False)
            validate_strlist("tfileoutfiles", param, safe, RX_OUTFILES, custom_err="Incorrect tfileoutfiles parameter (TFileService). " \
                    "Allowed regexp: '%s'." % RX_OUTFILES.pattern)
            validate_strlist("edmoutfiles", param, safe, RX_OUTFILES, custom_err="Incorrect edmoutfiles parameter (PoolOutputModule). " \
                    "Allowed regexp: '%s'." % RX_OUTFILES.pattern)
            validate_strlist("runs", param, safe, RX_RUNS)
            validate_strlist("lumis", param, safe, RX_LUMIRANGE)
            #validate_str("scheduler", param, safe, RX_SCHEDULER)
            if len(safe.kwargs["runs"]) != len(safe.kwargs["lumis"]):
                raise InvalidParameter("The number of runs and the number of lumis lists are different")
            validate_strlist("adduserfiles", param, safe, RX_ADDFILE)
            validate_str("asourl", param, safe, RX_ASOURL, optional=True)
            validate_str("asodb", param, safe, RX_ASODB, optional=True)
            safe.kwargs["asourl"], safe.kwargs["asodb"] = self._getAsoConfig(safe.kwargs["asourl"], safe.kwargs["asodb"])
            validate_str("scriptexe", param, safe, RX_ADDFILE, optional=True)
            validate_strlist("scriptargs", param, safe, RX_SCRIPTARGS)
            validate_str("scheddname", param, safe, RX_SCHEDD_NAME, optional=True)
            validate_str("collector", param, safe, RX_COLLECTOR, optional=True)
            validate_strlist("extrajdl", param, safe, RX_SCRIPTARGS)
            validate_num("dryrun", param, safe, optional=True)
            validate_num("ignoreglobalblacklist", param, safe, optional=True)

        elif method in ['POST']:
            validate_str("workflow", param, safe, RX_TASKNAME, optional=False)
            validate_str("subresource", param, safe, RX_SUBRESTAT, optional=True)
            validate_numlist('jobids', param, safe)
            ## In a resubmission, the site black- and whitelists need to be interpreted
            ## differently than in an initial task submission. If there is no site black-
            ## or whitelist, set it to None and DataWorkflow will use the corresponding
            ## list defined in the initial task submission. If the site black- or whitelist
            ## is equal to the string 'empty', set it to an empty list and don't call
            ## validate_strlist as it would fail.
            if 'siteblacklist' not in param.kwargs:
                safe.kwargs['siteblacklist'] = None
            elif param.kwargs['siteblacklist'] == 'empty':
                safe.kwargs['siteblacklist'] = []
                del param.kwargs['siteblacklist']
            else:
                validate_strlist("siteblacklist", param, safe, RX_CMSSITE)
                safe.kwargs['siteblacklist'] = self._expandSites(safe.kwargs['siteblacklist'])
            if 'sitewhitelist' not in param.kwargs:
                safe.kwargs['sitewhitelist'] = None
            elif param.kwargs['sitewhitelist'] == 'empty':
                safe.kwargs['sitewhitelist'] = []
                del param.kwargs['sitewhitelist']
            else:
                validate_strlist("sitewhitelist", param, safe, RX_CMSSITE)
                safe.kwargs['sitewhitelist'] = self._expandSites(safe.kwargs['sitewhitelist'])
            validate_num("maxjobruntime", param, safe, optional=True)
            validate_num("maxmemory", param, safe, optional=True)
            validate_num("numcores", param, safe, optional=True)
            validate_num("priority", param, safe, optional=True)
            validate_num("force", param, safe, optional=True)
            validate_num("publication", param, safe, optional=True)

        elif method in ['GET']:
            validate_str("workflow", param, safe, RX_TASKNAME, optional=True)
            validate_str('subresource', param, safe, RX_SUBRESTAT, optional=True)
            validate_str('username', param, safe, RX_USERNAME, optional=True)
            validate_str('timestamp', param, safe, RX_DATE, optional=True) ## inserted by eric

            ## Used to determine how much information to return to the client for status.
            ## also used by report to determine if it has to check job states
            validate_num("verbose", param, safe, optional=True)

            ## used by get log, get data
            validate_num('limit', param, safe, optional=True)
            validate_num('exitcode', param, safe, optional=True)
            validate_numlist('jobids', param, safe)

            ## used by errors and report (short format in report means we do not query DBS)
            validate_num('shortformat', param, safe, optional=True)

            ## validation parameters
            if not safe.kwargs['workflow'] and safe.kwargs['subresource']:
                raise InvalidParameter("Invalid input parameters")
            if safe.kwargs['subresource'] in ['data', 'logs'] and not safe.kwargs['limit'] and not safe.kwargs['jobids']:
                raise InvalidParameter("You need to specify the number of jobs to retrieve or their ids.")

        elif method in ['DELETE']:
            validate_str("workflow", param, safe, RX_TASKNAME, optional=False)
            validate_num("force", param, safe, optional=True)
            validate_numlist('jobids', param, safe)
            validate_str("killwarning", param, safe,  RX_TEXT_FAIL, optional=True)
            #decode killwarning message if present
            if safe.kwargs['killwarning']:
                try:
                    safe.kwargs['killwarning'] = b64decode(safe.kwargs['killwarning'])
                except TypeError:
                    raise InvalidParameter("Failure message is not in the accepted format")
예제 #2
0
    def validate(self, apiobj, method, api, param, safe): #pylint: disable=unused-argument
        """Validating all the input parameter as enforced by the WMCore.REST module"""

        if method in ['PUT']:
            username = cherrypy.request.user['login'] # username registered in CMS WEB frontend
            requestname = param.kwargs['workflow']
            param.kwargs['workflow'] = generateTaskName(username, requestname)
            validate_str("workflow", param, safe, RX_TASKNAME, optional=False)
            validate_str("activity", param, safe, RX_ACTIVITY, optional=True)
            validate_str("jobtype", param, safe, RX_JOBTYPE, optional=False)
            # TODO this should be changed to be non-optional
            validate_str("generator", param, safe, RX_GENERATOR, optional=True)
            validate_str("eventsperlumi", param, safe, RX_LUMIEVENTS, optional=True)
            validate_str("jobsw", param, safe, RX_CMSSW, optional=False)
            validate_num("nonprodsw", param, safe, optional=False)
            validate_str("jobarch", param, safe, RX_ARCH, optional=False)
            if not safe.kwargs["nonprodsw"]: #if the user wants to allow non-production releases
                self._checkReleases(safe.kwargs['jobarch'], safe.kwargs['jobsw'])
            validate_num("useparent", param, safe, optional=True)
            validate_str("secondarydata", param, safe, RX_DATASET, optional=True)
            validate_strlist("siteblacklist", param, safe, RX_CMSSITE)
            safe.kwargs['siteblacklist'] = self._expandSites(safe.kwargs['siteblacklist'])
            validate_strlist("sitewhitelist", param, safe, RX_CMSSITE)
            safe.kwargs['sitewhitelist'] = self._expandSites(safe.kwargs['sitewhitelist'])
            validate_str("splitalgo", param, safe, RX_SPLIT, optional=False)
            validate_num("algoargs", param, safe, optional=False)
            try:
                validate_num("totalunits", param, safe, optional=True)
            except InvalidParameter:
                validate_real("totalunits", param, safe, optional=True)
            validate_str("cachefilename", param, safe, RX_CACHENAME, optional=False)
            validate_str("debugfilename", param, safe, RX_CACHENAME, optional=True)
            validate_str("cacheurl", param, safe, RX_CACHEURL, optional=False)
            validate_str("lfn", param, safe, RX_LFN, optional=True)
            self._checkOutLFN(safe.kwargs, username)
            validate_strlist("addoutputfiles", param, safe, RX_ADDFILE, custom_err="Incorrect 'JobType.outputFiles' parameter. " \
                    "Allowed regexp for each filename: '%s'." % RX_ADDFILE.pattern)
            validate_strlist("userfiles", param, safe, RX_USERFILE, custom_err="Incorrect 'Data.userInputFiles' parameter. " \
                    "Allowed regexp for each filename: '%s'." % RX_USERFILE.pattern)
            validate_num("savelogsflag", param, safe, optional=False)
            validate_num("saveoutput", param, safe, optional=True)
            validate_num("faillimit", param, safe, optional=True)
            validate_num("ignorelocality", param, safe, optional=True)
            if safe.kwargs['ignorelocality'] and self.centralcfg.centralconfig.get('ign-locality-blacklist', []):
                safe.kwargs['siteblacklist'] += self._expandSites(self.centralcfg.centralconfig['ign-locality-blacklist'])
            validate_str("vorole", param, safe, RX_VOPARAMS, optional=True)
            validate_str("vogroup", param, safe, RX_VOPARAMS, optional=True)
            validate_num("publication", param, safe, optional=False)
            validate_str("publishdbsurl", param, safe, RX_DBSURL, optional=(not bool(safe.kwargs['publication'])))

            ## We might want to remove publishname once the backward compatibility
            ## wont be needed anymore. Then we can just keep publishname2
            ## NB: AFAIK the only client not using the CRABLibrary but direct cutl is HC,
            ## therefore we will need to make sure we do not break it!
            ## The following two lines will be removed in the future once we will
            ## not need backward compatibility anymore
            self._checkPublishDataName(param.kwargs, safe.kwargs['lfn'], requestname, username)
            validate_str('publishname', param, safe, RX_ANYTHING, optional=True)

            ##And this if as well, just do self._checkPublishDataName2(param.kwargs, safe.kwargs['lfn'], requestname, username)
            if not safe.kwargs["publishname"]: #new clients won't define this anymore
                ## The (user specified part of the) publication dataset name must be
                ## specified and must pass DBS validation. Since this is the correct
                ## validation function, it must be done before the
                ## validate_str("publishname", ...) we have below.
                self._checkPublishDataName2(param.kwargs, safe.kwargs['lfn'], requestname, username)
            else:
                param.kwargs["publishname2"] = safe.kwargs["publishname"]

            ## 'publishname' was already validated above in _checkPublishDataName().
            ## Calling validate_str with a fake regexp to move the param to the
            ## list of validated inputs
            validate_str("publishname2", param, safe, RX_ANYTHING, optional=True)

            validate_num("publishgroupname", param, safe, optional=True)

            if safe.kwargs['jobtype'] == 'PrivateMC':
                if param.kwargs['inputdata']:
                    msg = "Invalid 'inputdata' parameter."
                    msg += " Job type PrivateMC does not take any input dataset."
                    msg += " If you really intend to run over an input dataset, then you must use job type Analysis."
                    raise InvalidParameter(msg)
                if safe.kwargs['userfiles']:
                    msg = "Invalid 'userfiles' parameter."
                    msg += " Job type PrivateMC does not take any input files."
                    msg += " If you really intend to run over input files, then you must use job type Analysis."
                    raise InvalidParameter(msg)

            ## Client versions < 3.3.1511 may put in the input dataset something that is not
            ## really an input dataset (for PrivateMC or user input files). So the only case
            ## in which we are sure that we have to validate the input dataset is when the
            ## workflow type is Analysis, the workflow does not run on user input files and
            ## an input dataset is defined (scriptExe may not define an input).
            ## Once we don't care anymore about backward compatibility with client < 3.3.1511,
            ## we can uncomment the 1st line below and delete the next 4 lines.
            #validate_str("inputdata", param, safe, RX_DATASET, optional=True)
            if safe.kwargs['jobtype'] == 'Analysis' and not safe.kwargs['userfiles'] and 'inputdata' in param.kwargs:
                validate_str("inputdata", param, safe, RX_DATASET, optional=True)
            else:
                validate_str("inputdata", param, safe, RX_ANYTHING, optional=True)

            ## The client is not forced to define the primary dataset. So make sure to have
            ## defaults or take it from the input dataset. The primary dataset is needed for
            ## the LFN of the output/log files and for publication. We want to have it well
            ## defined even if publication and/or transfer to storage are off.
            if safe.kwargs['inputdata']:
                param.kwargs['primarydataset'] = safe.kwargs['inputdata'].split('/')[1]
            if not param.kwargs.get('primarydataset', None):
                if safe.kwargs['jobtype'] == 'PrivateMC':
                    param.kwargs['primarydataset'] = "CRAB_PrivateMC"
                elif safe.kwargs['jobtype'] == 'Analysis' and safe.kwargs['userfiles']:
                    param.kwargs['primarydataset'] = "CRAB_UserFiles"
                else:
                    param.kwargs['primarydataset'] = "CRAB_NoInput"
            ## We validate the primary dataset agains DBS rules even if publication is off,
            ## because in the future we may want to give the possibility to users to publish
            ## a posteriori.
            self._checkPrimaryDataset(param.kwargs, optional=False)
            validate_str("primarydataset", param, safe, RX_LFNPRIMDS, optional=False)

            validate_num("nonvaliddata", param, safe, optional=True)
            #if one and only one between outputDatasetTag and publishDbsUrl is set raise an error (we need both or none of them)
            validate_str("asyncdest", param, safe, RX_CMSSITE, optional=False)
            self._checkASODestination(safe.kwargs['asyncdest'])
            # We no longer use this attribute, but keep it around for older client compatibility
            validate_num("blacklistT1", param, safe, optional=True)
            validate_num("oneEventMode", param, safe, optional=True)
            validate_num("priority", param, safe, optional=True)
            validate_num("maxjobruntime", param, safe, optional=True)
            validate_num("numcores", param, safe, optional=True)
            validate_num("maxmemory", param, safe, optional=True)
            validate_str("dbsurl", param, safe, RX_DBSURL, optional=False)
            validate_strlist("tfileoutfiles", param, safe, RX_OUTFILES, custom_err="Incorrect tfileoutfiles parameter (TFileService). " \
                    "Allowed regexp: '%s'." % RX_OUTFILES.pattern)
            validate_strlist("edmoutfiles", param, safe, RX_OUTFILES, custom_err="Incorrect edmoutfiles parameter (PoolOutputModule). " \
                    "Allowed regexp: '%s'." % RX_OUTFILES.pattern)
            validate_strlist("runs", param, safe, RX_RUNS)
            validate_strlist("lumis", param, safe, RX_LUMIRANGE)
            if len(safe.kwargs["runs"]) != len(safe.kwargs["lumis"]):
                raise InvalidParameter("The number of runs and the number of lumis lists are different")
            validate_strlist("adduserfiles", param, safe, RX_ADDFILE)
            validate_str("asourl", param, safe, RX_ASOURL, optional=True)
            validate_str("asodb", param, safe, RX_ASODB, optional=True)
            safe.kwargs["asourl"], safe.kwargs["asodb"] = self._getAsoConfig(safe.kwargs["asourl"], safe.kwargs["asodb"])
            validate_str("scriptexe", param, safe, RX_ADDFILE, optional=True)
            validate_strlist("scriptargs", param, safe, RX_SCRIPTARGS)
            validate_str("scheddname", param, safe, RX_SCHEDD_NAME, optional=True)
            validate_str("collector", param, safe, RX_COLLECTOR, optional=True)
            validate_strlist("extrajdl", param, safe, RX_SCRIPTARGS)
            validate_num("dryrun", param, safe, optional=True)
            validate_num("ignoreglobalblacklist", param, safe, optional=True)

        elif method in ['POST']:
            validate_str("workflow", param, safe, RX_TASKNAME, optional=False)
            validate_str("subresource", param, safe, RX_SUBRESTAT, optional=True)
            validate_strlist('jobids', param, safe, RX_JOBID)
            ## In a resubmission, the site black- and whitelists need to be interpreted
            ## differently than in an initial task submission. If there is no site black-
            ## or whitelist, set it to None and DataWorkflow will use the corresponding
            ## list defined in the initial task submission. If the site black- or whitelist
            ## is equal to the string 'empty', set it to an empty list and don't call
            ## validate_strlist as it would fail.
            if 'siteblacklist' not in param.kwargs:
                safe.kwargs['siteblacklist'] = None
            elif param.kwargs['siteblacklist'] == 'empty':
                safe.kwargs['siteblacklist'] = []
                del param.kwargs['siteblacklist']
            else:
                validate_strlist("siteblacklist", param, safe, RX_CMSSITE)
                safe.kwargs['siteblacklist'] = self._expandSites(safe.kwargs['siteblacklist'])
            if 'sitewhitelist' not in param.kwargs:
                safe.kwargs['sitewhitelist'] = None
            elif param.kwargs['sitewhitelist'] == 'empty':
                safe.kwargs['sitewhitelist'] = []
                del param.kwargs['sitewhitelist']
            else:
                validate_strlist("sitewhitelist", param, safe, RX_CMSSITE)
                safe.kwargs['sitewhitelist'] = self._expandSites(safe.kwargs['sitewhitelist'])
            validate_num("maxjobruntime", param, safe, optional=True)
            validate_num("maxmemory", param, safe, optional=True)
            validate_num("numcores", param, safe, optional=True)
            validate_num("priority", param, safe, optional=True)
            validate_num("force", param, safe, optional=True)
            validate_num("publication", param, safe, optional=True)

        elif method in ['GET']:
            validate_str("workflow", param, safe, RX_TASKNAME, optional=True)
            validate_str('subresource', param, safe, RX_SUBRESTAT, optional=True)
            validate_str('username', param, safe, RX_USERNAME, optional=True)
            validate_str('timestamp', param, safe, RX_DATE, optional=True) ## inserted by eric

            ## Used to determine how much information to return to the client for status.
            ## also used by report to determine if it has to check job states
            validate_num("verbose", param, safe, optional=True)

            ## used by get log, get data
            validate_num('limit', param, safe, optional=True)
            validate_num('exitcode', param, safe, optional=True)
            validate_strlist('jobids', param, safe, RX_JOBID)

            ## used by errors and report (short format in report means we do not query DBS)
            validate_num('shortformat', param, safe, optional=True)

            # used by publicationStatus
            validate_str("asourl", param, safe, RX_ASOURL, optional=True)
            validate_str("asodb", param, safe, RX_ASODB, optional=True)

            ## validation parameters
            if not safe.kwargs['workflow'] and safe.kwargs['subresource']:
                raise InvalidParameter("Invalid input parameters")
            if safe.kwargs['subresource'] in ['data', 'logs'] and not safe.kwargs['limit'] and not safe.kwargs['jobids']:
                raise InvalidParameter("You need to specify the number of jobs to retrieve or their ids.")

        elif method in ['DELETE']:
            validate_str("workflow", param, safe, RX_TASKNAME, optional=False)
            validate_num("force", param, safe, optional=True)
            validate_str("killwarning", param, safe, RX_TEXT_FAIL, optional=True)
            #decode killwarning message if present
            if safe.kwargs['killwarning']:
                try:
                    safe.kwargs['killwarning'] = b64decode(safe.kwargs['killwarning'])
                except TypeError:
                    raise InvalidParameter("Failure message is not in the accepted format")
예제 #3
0
    def testFileTransferPUT(self):
        """
        _testFileTransferPUT_

        Just test simple testFileTransferPUT with fake data
        """
        # We just sent fake data which is not monitored by dashboard.
        # Also only the first time to decide is publication ON or NOT
        for user in self.users:
            timestamp = time.strftime('%y%m%d_%H%M%S', time.gmtime())
            for i in range(self.totalFiles):
                now = int(time.time())
                # Generate a taskname
                workflowName = ""
                taskname = ""
                if user not in self.tasks:
                    workflowName = "".join([
                        random.choice(string.ascii_lowercase)
                        for _ in range(20)
                    ]) + "_" + str(now)
                    publicationState = random.choice(['NEW', 'NOT_REQUIRED'])
                else:
                    workflowName = self.tasks[user]['workflowName']
                    publicationState = self.tasks[user]['publication']
                transferState = random.choice(['NEW', 'DONE'])
                taskname = generateTaskName(user, workflowName, timestamp)
                finalLfn = self.lfnBase % (user, workflowName, i,
                                           random.randint(1, 9999))
                idHash = getHashLfn(finalLfn)
                self.fileDoc['id'] = idHash
                self.fileDoc['job_id'] = i
                self.fileDoc['username'] = user
                self.fileDoc['taskname'] = taskname
                self.fileDoc['start_time'] = int(time.time())
                self.fileDoc['source_lfn'] = finalLfn
                self.fileDoc['destination_lfn'] = finalLfn
                self.fileDoc['transfer_state'] = transferState
                self.fileDoc['publication_state'] = publicationState
                print(self.fileDoc)
                self.server.put('/crabserver/dev/fileusertransfers',
                                data=encodeRequest(self.fileDoc))
                # if I will put the same doc twice, it should raise an error.
                # self.server.put('/crabserver/dev/fileusertransfers', data=urllib.urlencode(self.fileDoc))
                # This tasks are for the future and next calls
                if user not in self.tasks:
                    self.tasks[user] = {
                        'workflowName': workflowName,
                        'taskname': taskname,
                        'listOfIds': [],
                        'publication': publicationState,
                        'toTransfer': 0,
                        'toPublish': 0,
                        'total': self.totalFiles
                    }
                if self.tasks[user]['publication'] == 'NEW':
                    self.tasks[user]['toPublish'] += 1
                if transferState == 'NEW':
                    self.tasks[user]['toTransfer'] += 1
                self.tasks[user]['listOfIds'].append(idHash)
        # This should raise an error
        for username in self.tasks:
            taskname = self.tasks[username]['taskname']
            for query in ['getTransferStatus', 'getPublicationStatus']:
                result = self.server.get('/crabserver/dev/fileusertransfers',
                                         data=encodeRequest({
                                             'subresource':
                                             query,
                                             'username':
                                             username,
                                             'taskname':
                                             taskname
                                         }))
                print(result)
                print(result[0]['result'])
                taskInfoDict = oracleOutputMapping(result, 'id')
                print(taskInfoDict)
                for key, docDict in taskInfoDict.items():
                    result = self.server.get(
                        '/crabserver/dev/fileusertransfers',
                        data=encodeRequest({
                            'subresource': 'getById',
                            'id': key
                        }))

        randomUsers = random.sample(
            set(self.users), 3
        )  # Take half of the users and kill their transfers for specific task
        for username in randomUsers:
            taskname = self.tasks[username]['taskname']
            result = self.server.post('/crabserver/dev/fileusertransfers',
                                      data=encodeRequest({
                                          'subresource': 'killTransfers',
                                          'username': username,
                                          'taskname': taskname
                                      }))
            print(result)
        # oneUser is left for killing a list of IDs
        # leftUsers will be killing transfers one by one for specific id.
        leftUsers = list(set(self.users) - set(randomUsers))
        oneUser = random.sample(set(leftUsers), 1)
        leftUsers = list(set(leftUsers) - set(oneUser))
        for username in leftUsers:
            # First get all left ids for this users
            result = self.server.get('/crabserver/dev/fileusertransfers',
                                     data=encodeRequest({
                                         'subresource':
                                         'getTransferStatus',
                                         'username':
                                         username,
                                         'taskname':
                                         self.tasks[username]['taskname']
                                     }))
            resultOut = oracleOutputMapping(result, None)
            print("**" * 50)
            for outDict in resultOut:
                print(outDict)
                result = self.server.post('/crabserver/dev/fileusertransfers',
                                          data=encodeRequest({
                                              'subresource':
                                              'killTransfersById',
                                              'username':
                                              username,
                                              'listOfIds':
                                              outDict['id']
                                          }))
                print(result)
            print(resultOut)
            print(result)
        for username in oneUser:
            result = self.server.post(
                '/crabserver/dev/fileusertransfers',
                data=encodeRequest(
                    {
                        'subresource': 'killTransfersById',
                        'username': username,
                        'listOfIds': self.tasks[username]['listOfIds']
                    }, ['listOfIds']))
            # As it asks to kill all which are in new, need to double check what we submitted before and if the output of killed is correct
            print(result)
            print(self.tasks[username])
    def testFileTransferPUT(self):
        """
        _testFileTransferPUT_

        Just test simple testFileTransferPUT with fake data
        """
        # We just sent fake data which is not monitored by dashboard.
        # Also only the first time to decide is publication ON or NOT
        for user in self.users:
            timestamp = time.strftime('%y%m%d_%H%M%S', time.gmtime())
            for i in range(self.totalFiles):
                now = int(time.time())
                # Generate a taskname
                workflowName = ""
                taskname = ""
                if user not in self.tasks:
                    workflowName = "".join([random.choice(string.ascii_lowercase) for _ in range(20)]) + "_" + str(now)
                    publicationState = random.choice(['NEW', 'NOT_REQUIRED'])
                else:
                    workflowName = self.tasks[user]['workflowName']
                    publicationState = self.tasks[user]['publication']
                transferState = random.choice(['NEW', 'DONE'])
                taskname = generateTaskName(user, workflowName, timestamp)
                finalLfn = self.lfnBase % (user, workflowName, i, random.randint(1, 9999))
                idHash = getHashLfn(finalLfn)
                self.fileDoc['id'] = idHash
                self.fileDoc['job_id'] = i
                self.fileDoc['username'] = user
                self.fileDoc['taskname'] = taskname
                self.fileDoc['start_time'] = int(time.time())
                self.fileDoc['source_lfn'] = finalLfn
                self.fileDoc['destination_lfn'] = finalLfn
                self.fileDoc['transfer_state'] = transferState
                self.fileDoc['publication_state'] = publicationState
                print(self.fileDoc)
                self.server.put('/crabserver/dev/fileusertransfers', data=encodeRequest(self.fileDoc))
                # if I will put the same doc twice, it should raise an error.
                # self.server.put('/crabserver/dev/fileusertransfers', data=urllib.urlencode(self.fileDoc))
                # This tasks are for the future and next calls
                if user not in self.tasks:
                    self.tasks[user] = {'workflowName': workflowName, 'taskname': taskname, 'listOfIds': [],
                                        'publication': publicationState, 'toTransfer': 0, 'toPublish': 0, 'total': self.totalFiles}
                if self.tasks[user]['publication'] == 'NEW':
                    self.tasks[user]['toPublish'] += 1
                if transferState == 'NEW':
                    self.tasks[user]['toTransfer'] += 1
                self.tasks[user]['listOfIds'].append(idHash)
        # This should raise an error
        for username in self.tasks:
            taskname = self.tasks[username]['taskname']
            for query in ['getTransferStatus', 'getPublicationStatus']:
                result = self.server.get('/crabserver/dev/fileusertransfers', data=encodeRequest({'subresource': query,
                                                                                    'username': username,
                                                                                    'taskname': taskname}))
                print(result)
                print(result[0]['result'])
                taskInfoDict = oracleOutputMapping(result, 'id')
                print(taskInfoDict)
                for key, docDict in taskInfoDict.items():
                    result = self.server.get('/crabserver/dev/fileusertransfers', data=encodeRequest({'subresource': 'getById', 'id': key}))

        randomUsers = random.sample(set(self.users), 3)  # Take half of the users and kill their transfers for specific task
        for username in randomUsers:
            taskname = self.tasks[username]['taskname']
            result = self.server.post('/crabserver/dev/fileusertransfers', data=encodeRequest({'subresource': 'killTransfers',
                                                                                                  'username': username,
                                                                                                  'taskname': taskname}))
            print(result)
        # oneUser is left for killing a list of IDs
        # leftUsers will be killing transfers one by one for specific id.
        leftUsers = list(set(self.users) - set(randomUsers))
        oneUser = random.sample(set(leftUsers), 1)
        leftUsers = list(set(leftUsers) - set(oneUser))
        for username in leftUsers:
            # First get all left ids for this users
            result = self.server.get('/crabserver/dev/fileusertransfers', data=encodeRequest({'subresource': 'getTransferStatus',
                                                                                                 'username': username,
                                                                                                 'taskname': self.tasks[username]['taskname']}))
            resultOut = oracleOutputMapping(result, None)
            print("**"*50)
            for outDict in resultOut:
                print(outDict)
                result = self.server.post('/crabserver/dev/fileusertransfers', data=encodeRequest({'subresource': 'killTransfersById',
                                                                                                      'username': username,
                                                                                                      'listOfIds': outDict['id']}))
                print(result)
            print(resultOut)
            print(result)
        for username in oneUser:
            result = self.server.post('/crabserver/dev/fileusertransfers', data=encodeRequest({'subresource': 'killTransfersById',
                                                                                               'username': username,
                                                                                               'listOfIds': self.tasks[username]['listOfIds']}, ['listOfIds']))
            # As it asks to kill all which are in new, need to double check what we submitted before and if the output of killed is correct
            print(result)
            print(self.tasks[username])