def validate(self, apiobj, method, api, param, safe): """Validating all the input parameter as enforced by the WMCore.REST module""" if method in ['PUT']: username = cherrypy.request.user['login'] # username registered in SiteDB requestname = param.kwargs['workflow'] param.kwargs['workflow'] = generateTaskName(username, requestname) validate_str("workflow", param, safe, RX_TASKNAME, optional=False) validate_str("activity", param, safe, RX_ACTIVITY, optional=True) validate_str("jobtype", param, safe, RX_JOBTYPE, optional=False) # TODO this should be changed to be non-optional validate_str("generator", param, safe, RX_GENERATOR, optional=True) validate_str("eventsperlumi", param, safe, RX_LUMIEVENTS, optional=True) validate_str("jobsw", param, safe, RX_CMSSW, optional=False) validate_num("nonprodsw", param, safe, optional=False) validate_str("jobarch", param, safe, RX_ARCH, optional=False) if not safe.kwargs["nonprodsw"]: #if the user wants to allow non-production releases self._checkReleases(safe.kwargs['jobarch'], safe.kwargs['jobsw']) validate_num("useparent", param, safe, optional=True) validate_str("secondarydata", param, safe, RX_DATASET, optional=True) validate_strlist("siteblacklist", param, safe, RX_CMSSITE) safe.kwargs['siteblacklist'] = self._expandSites(safe.kwargs['siteblacklist']) validate_strlist("sitewhitelist", param, safe, RX_CMSSITE) safe.kwargs['sitewhitelist'] = self._expandSites(safe.kwargs['sitewhitelist']) validate_str("splitalgo", param, safe, RX_SPLIT, optional=False) validate_num("algoargs", param, safe, optional=False) validate_num("totalunits", param, safe, optional=True) validate_str("cachefilename", param, safe, RX_CACHENAME, optional=False) validate_str("debugfilename", param, safe, RX_CACHENAME, optional=True) validate_str("cacheurl", param, safe, RX_CACHEURL, optional=False) validate_str("lfn", param, safe, RX_LFN, optional=True) self._checkOutLFN(safe.kwargs, username) validate_strlist("addoutputfiles", param, safe, RX_ADDFILE, custom_err="Incorrect 'JobType.outputFiles' parameter. " \ "Allowed regexp: '%s'." % RX_ADDFILE.pattern) validate_strlist("userfiles", param, safe, RX_USERFILE) validate_num("savelogsflag", param, safe, optional=False) validate_num("saveoutput", param, safe, optional=True) validate_num("faillimit", param, safe, optional=True) validate_num("ignorelocality", param, safe, optional=True) if safe.kwargs['ignorelocality'] and self.centralcfg.centralconfig.get('ign-locality-blacklist', []): safe.kwargs['siteblacklist'] += self._expandSites(self.centralcfg.centralconfig['ign-locality-blacklist']) validate_str("vorole", param, safe, RX_VOPARAMS, optional=True) validate_str("vogroup", param, safe, RX_VOPARAMS, optional=True) validate_num("publication", param, safe, optional=False) validate_str("publishdbsurl", param, safe, RX_DBSURL, optional=(not bool(safe.kwargs['publication']))) ## We might want to remove publishname once the backward compatibility ## wont be needed anymore. Then we can just keep publishname2 ## NB: AFAIK the only client not using the CRABLibrary but direct cutl is HC, ## therefore we will need to make sure we do not break it! ## The following two lines will be removed in the future once we will ## not need backward compatibility anymore self._checkPublishDataName(param.kwargs, safe.kwargs['lfn'], requestname, username) validate_str('publishname', param, safe, RX_ANYTHING, optional=True) ##And this if as well, just do self._checkPublishDataName2(param.kwargs, safe.kwargs['lfn'], requestname, username) if not safe.kwargs["publishname"]: #new clients won't define this anymore ## The (user specified part of the) publication dataset name must be ## specified and must pass DBS validation. Since this is the correct ## validation function, it must be done before the ## validate_str("publishname", ...) we have below. self._checkPublishDataName2(param.kwargs, safe.kwargs['lfn'], requestname, username) else: param.kwargs["publishname2"] = safe.kwargs["publishname"] ## 'publishname' was already validated above in _checkPublishDataName(). ## Calling validate_str with a fake regexp to move the param to the ## list of validated inputs validate_str("publishname2", param, safe, RX_ANYTHING, optional=True) validate_num("publishgroupname", param, safe, optional=True) if safe.kwargs['jobtype'] == 'PrivateMC': if param.kwargs['inputdata']: msg = "Invalid 'inputdata' parameter." msg += " Job type PrivateMC does not take any input dataset." msg += " If you really intend to run over an input dataset, then you must use job type Analysis." raise InvalidParameter(msg) if safe.kwargs['userfiles']: msg = "Invalid 'userfiles' parameter." msg += " Job type PrivateMC does not take any input files." msg += " If you really intend to run over input files, then you must use job type Analysis." raise InvalidParameter(msg) ## Client versions < 3.3.1511 may put in the input dataset something that is not ## really an input dataset (for PrivateMC or user input files). So the only case ## in which we are sure that we have to validate the input dataset is when the ## workflow type is Analysis, the workflow does not run on user input files and ## an input dataset is defined (scriptExe may not define an input). ## Once we don't care anymore about backward compatibility with client < 3.3.1511, ## we can uncomment the 1st line below and delete the next 4 lines. #validate_str("inputdata", param, safe, RX_DATASET, optional=True) if safe.kwargs['jobtype'] == 'Analysis' and not safe.kwargs['userfiles'] and 'inputdata' in param.kwargs: validate_str("inputdata", param, safe, RX_DATASET, optional=True) else: validate_str("inputdata", param, safe, RX_ANYTHING, optional=True) ## The client is not forced to define the primary dataset. So make sure to have ## defaults or take it from the input dataset. The primary dataset is needed for ## the LFN of the output/log files and for publication. We want to have it well ## defined even if publication and/or transfer to storage are off. if safe.kwargs['inputdata']: param.kwargs['primarydataset'] = safe.kwargs['inputdata'].split('/')[1] if not param.kwargs.get('primarydataset', None): if safe.kwargs['jobtype'] == 'PrivateMC': param.kwargs['primarydataset'] = "CRAB_PrivateMC" elif safe.kwargs['jobtype'] == 'Analysis' and safe.kwargs['userfiles']: param.kwargs['primarydataset'] = "CRAB_UserFiles" else: param.kwargs['primarydataset'] = "CRAB_NoInput" ## We validate the primary dataset agains DBS rules even if publication is off, ## because in the future we may want to give the possibility to users to publish ## a posteriori. self._checkPrimaryDataset(param.kwargs, optional=False) validate_str("primarydataset", param, safe, RX_LFNPRIMDS, optional=False) validate_num("nonvaliddata", param, safe, optional=True) #if one and only one between outputDatasetTag and publishDbsUrl is set raise an error (we need both or none of them) validate_str("asyncdest", param, safe, RX_CMSSITE, optional=False) self._checkASODestination(safe.kwargs['asyncdest']) # We no longer use this attribute, but keep it around for older client compatibility validate_num("blacklistT1", param, safe, optional=True) validate_num("oneEventMode", param, safe, optional=True) validate_num("priority", param, safe, optional=True) validate_num("maxjobruntime", param, safe, optional=True) validate_num("numcores", param, safe, optional=True) validate_num("maxmemory", param, safe, optional=True) validate_str("dbsurl", param, safe, RX_DBSURL, optional=False) validate_strlist("tfileoutfiles", param, safe, RX_OUTFILES, custom_err="Incorrect tfileoutfiles parameter (TFileService). " \ "Allowed regexp: '%s'." % RX_OUTFILES.pattern) validate_strlist("edmoutfiles", param, safe, RX_OUTFILES, custom_err="Incorrect edmoutfiles parameter (PoolOutputModule). " \ "Allowed regexp: '%s'." % RX_OUTFILES.pattern) validate_strlist("runs", param, safe, RX_RUNS) validate_strlist("lumis", param, safe, RX_LUMIRANGE) #validate_str("scheduler", param, safe, RX_SCHEDULER) if len(safe.kwargs["runs"]) != len(safe.kwargs["lumis"]): raise InvalidParameter("The number of runs and the number of lumis lists are different") validate_strlist("adduserfiles", param, safe, RX_ADDFILE) validate_str("asourl", param, safe, RX_ASOURL, optional=True) validate_str("asodb", param, safe, RX_ASODB, optional=True) safe.kwargs["asourl"], safe.kwargs["asodb"] = self._getAsoConfig(safe.kwargs["asourl"], safe.kwargs["asodb"]) validate_str("scriptexe", param, safe, RX_ADDFILE, optional=True) validate_strlist("scriptargs", param, safe, RX_SCRIPTARGS) validate_str("scheddname", param, safe, RX_SCHEDD_NAME, optional=True) validate_str("collector", param, safe, RX_COLLECTOR, optional=True) validate_strlist("extrajdl", param, safe, RX_SCRIPTARGS) validate_num("dryrun", param, safe, optional=True) validate_num("ignoreglobalblacklist", param, safe, optional=True) elif method in ['POST']: validate_str("workflow", param, safe, RX_TASKNAME, optional=False) validate_str("subresource", param, safe, RX_SUBRESTAT, optional=True) validate_numlist('jobids', param, safe) ## In a resubmission, the site black- and whitelists need to be interpreted ## differently than in an initial task submission. If there is no site black- ## or whitelist, set it to None and DataWorkflow will use the corresponding ## list defined in the initial task submission. If the site black- or whitelist ## is equal to the string 'empty', set it to an empty list and don't call ## validate_strlist as it would fail. if 'siteblacklist' not in param.kwargs: safe.kwargs['siteblacklist'] = None elif param.kwargs['siteblacklist'] == 'empty': safe.kwargs['siteblacklist'] = [] del param.kwargs['siteblacklist'] else: validate_strlist("siteblacklist", param, safe, RX_CMSSITE) safe.kwargs['siteblacklist'] = self._expandSites(safe.kwargs['siteblacklist']) if 'sitewhitelist' not in param.kwargs: safe.kwargs['sitewhitelist'] = None elif param.kwargs['sitewhitelist'] == 'empty': safe.kwargs['sitewhitelist'] = [] del param.kwargs['sitewhitelist'] else: validate_strlist("sitewhitelist", param, safe, RX_CMSSITE) safe.kwargs['sitewhitelist'] = self._expandSites(safe.kwargs['sitewhitelist']) validate_num("maxjobruntime", param, safe, optional=True) validate_num("maxmemory", param, safe, optional=True) validate_num("numcores", param, safe, optional=True) validate_num("priority", param, safe, optional=True) validate_num("force", param, safe, optional=True) validate_num("publication", param, safe, optional=True) elif method in ['GET']: validate_str("workflow", param, safe, RX_TASKNAME, optional=True) validate_str('subresource', param, safe, RX_SUBRESTAT, optional=True) validate_str('username', param, safe, RX_USERNAME, optional=True) validate_str('timestamp', param, safe, RX_DATE, optional=True) ## inserted by eric ## Used to determine how much information to return to the client for status. ## also used by report to determine if it has to check job states validate_num("verbose", param, safe, optional=True) ## used by get log, get data validate_num('limit', param, safe, optional=True) validate_num('exitcode', param, safe, optional=True) validate_numlist('jobids', param, safe) ## used by errors and report (short format in report means we do not query DBS) validate_num('shortformat', param, safe, optional=True) ## validation parameters if not safe.kwargs['workflow'] and safe.kwargs['subresource']: raise InvalidParameter("Invalid input parameters") if safe.kwargs['subresource'] in ['data', 'logs'] and not safe.kwargs['limit'] and not safe.kwargs['jobids']: raise InvalidParameter("You need to specify the number of jobs to retrieve or their ids.") elif method in ['DELETE']: validate_str("workflow", param, safe, RX_TASKNAME, optional=False) validate_num("force", param, safe, optional=True) validate_numlist('jobids', param, safe) validate_str("killwarning", param, safe, RX_TEXT_FAIL, optional=True) #decode killwarning message if present if safe.kwargs['killwarning']: try: safe.kwargs['killwarning'] = b64decode(safe.kwargs['killwarning']) except TypeError: raise InvalidParameter("Failure message is not in the accepted format")
def validate(self, apiobj, method, api, param, safe): #pylint: disable=unused-argument """Validating all the input parameter as enforced by the WMCore.REST module""" if method in ['PUT']: username = cherrypy.request.user['login'] # username registered in CMS WEB frontend requestname = param.kwargs['workflow'] param.kwargs['workflow'] = generateTaskName(username, requestname) validate_str("workflow", param, safe, RX_TASKNAME, optional=False) validate_str("activity", param, safe, RX_ACTIVITY, optional=True) validate_str("jobtype", param, safe, RX_JOBTYPE, optional=False) # TODO this should be changed to be non-optional validate_str("generator", param, safe, RX_GENERATOR, optional=True) validate_str("eventsperlumi", param, safe, RX_LUMIEVENTS, optional=True) validate_str("jobsw", param, safe, RX_CMSSW, optional=False) validate_num("nonprodsw", param, safe, optional=False) validate_str("jobarch", param, safe, RX_ARCH, optional=False) if not safe.kwargs["nonprodsw"]: #if the user wants to allow non-production releases self._checkReleases(safe.kwargs['jobarch'], safe.kwargs['jobsw']) validate_num("useparent", param, safe, optional=True) validate_str("secondarydata", param, safe, RX_DATASET, optional=True) validate_strlist("siteblacklist", param, safe, RX_CMSSITE) safe.kwargs['siteblacklist'] = self._expandSites(safe.kwargs['siteblacklist']) validate_strlist("sitewhitelist", param, safe, RX_CMSSITE) safe.kwargs['sitewhitelist'] = self._expandSites(safe.kwargs['sitewhitelist']) validate_str("splitalgo", param, safe, RX_SPLIT, optional=False) validate_num("algoargs", param, safe, optional=False) try: validate_num("totalunits", param, safe, optional=True) except InvalidParameter: validate_real("totalunits", param, safe, optional=True) validate_str("cachefilename", param, safe, RX_CACHENAME, optional=False) validate_str("debugfilename", param, safe, RX_CACHENAME, optional=True) validate_str("cacheurl", param, safe, RX_CACHEURL, optional=False) validate_str("lfn", param, safe, RX_LFN, optional=True) self._checkOutLFN(safe.kwargs, username) validate_strlist("addoutputfiles", param, safe, RX_ADDFILE, custom_err="Incorrect 'JobType.outputFiles' parameter. " \ "Allowed regexp for each filename: '%s'." % RX_ADDFILE.pattern) validate_strlist("userfiles", param, safe, RX_USERFILE, custom_err="Incorrect 'Data.userInputFiles' parameter. " \ "Allowed regexp for each filename: '%s'." % RX_USERFILE.pattern) validate_num("savelogsflag", param, safe, optional=False) validate_num("saveoutput", param, safe, optional=True) validate_num("faillimit", param, safe, optional=True) validate_num("ignorelocality", param, safe, optional=True) if safe.kwargs['ignorelocality'] and self.centralcfg.centralconfig.get('ign-locality-blacklist', []): safe.kwargs['siteblacklist'] += self._expandSites(self.centralcfg.centralconfig['ign-locality-blacklist']) validate_str("vorole", param, safe, RX_VOPARAMS, optional=True) validate_str("vogroup", param, safe, RX_VOPARAMS, optional=True) validate_num("publication", param, safe, optional=False) validate_str("publishdbsurl", param, safe, RX_DBSURL, optional=(not bool(safe.kwargs['publication']))) ## We might want to remove publishname once the backward compatibility ## wont be needed anymore. Then we can just keep publishname2 ## NB: AFAIK the only client not using the CRABLibrary but direct cutl is HC, ## therefore we will need to make sure we do not break it! ## The following two lines will be removed in the future once we will ## not need backward compatibility anymore self._checkPublishDataName(param.kwargs, safe.kwargs['lfn'], requestname, username) validate_str('publishname', param, safe, RX_ANYTHING, optional=True) ##And this if as well, just do self._checkPublishDataName2(param.kwargs, safe.kwargs['lfn'], requestname, username) if not safe.kwargs["publishname"]: #new clients won't define this anymore ## The (user specified part of the) publication dataset name must be ## specified and must pass DBS validation. Since this is the correct ## validation function, it must be done before the ## validate_str("publishname", ...) we have below. self._checkPublishDataName2(param.kwargs, safe.kwargs['lfn'], requestname, username) else: param.kwargs["publishname2"] = safe.kwargs["publishname"] ## 'publishname' was already validated above in _checkPublishDataName(). ## Calling validate_str with a fake regexp to move the param to the ## list of validated inputs validate_str("publishname2", param, safe, RX_ANYTHING, optional=True) validate_num("publishgroupname", param, safe, optional=True) if safe.kwargs['jobtype'] == 'PrivateMC': if param.kwargs['inputdata']: msg = "Invalid 'inputdata' parameter." msg += " Job type PrivateMC does not take any input dataset." msg += " If you really intend to run over an input dataset, then you must use job type Analysis." raise InvalidParameter(msg) if safe.kwargs['userfiles']: msg = "Invalid 'userfiles' parameter." msg += " Job type PrivateMC does not take any input files." msg += " If you really intend to run over input files, then you must use job type Analysis." raise InvalidParameter(msg) ## Client versions < 3.3.1511 may put in the input dataset something that is not ## really an input dataset (for PrivateMC or user input files). So the only case ## in which we are sure that we have to validate the input dataset is when the ## workflow type is Analysis, the workflow does not run on user input files and ## an input dataset is defined (scriptExe may not define an input). ## Once we don't care anymore about backward compatibility with client < 3.3.1511, ## we can uncomment the 1st line below and delete the next 4 lines. #validate_str("inputdata", param, safe, RX_DATASET, optional=True) if safe.kwargs['jobtype'] == 'Analysis' and not safe.kwargs['userfiles'] and 'inputdata' in param.kwargs: validate_str("inputdata", param, safe, RX_DATASET, optional=True) else: validate_str("inputdata", param, safe, RX_ANYTHING, optional=True) ## The client is not forced to define the primary dataset. So make sure to have ## defaults or take it from the input dataset. The primary dataset is needed for ## the LFN of the output/log files and for publication. We want to have it well ## defined even if publication and/or transfer to storage are off. if safe.kwargs['inputdata']: param.kwargs['primarydataset'] = safe.kwargs['inputdata'].split('/')[1] if not param.kwargs.get('primarydataset', None): if safe.kwargs['jobtype'] == 'PrivateMC': param.kwargs['primarydataset'] = "CRAB_PrivateMC" elif safe.kwargs['jobtype'] == 'Analysis' and safe.kwargs['userfiles']: param.kwargs['primarydataset'] = "CRAB_UserFiles" else: param.kwargs['primarydataset'] = "CRAB_NoInput" ## We validate the primary dataset agains DBS rules even if publication is off, ## because in the future we may want to give the possibility to users to publish ## a posteriori. self._checkPrimaryDataset(param.kwargs, optional=False) validate_str("primarydataset", param, safe, RX_LFNPRIMDS, optional=False) validate_num("nonvaliddata", param, safe, optional=True) #if one and only one between outputDatasetTag and publishDbsUrl is set raise an error (we need both or none of them) validate_str("asyncdest", param, safe, RX_CMSSITE, optional=False) self._checkASODestination(safe.kwargs['asyncdest']) # We no longer use this attribute, but keep it around for older client compatibility validate_num("blacklistT1", param, safe, optional=True) validate_num("oneEventMode", param, safe, optional=True) validate_num("priority", param, safe, optional=True) validate_num("maxjobruntime", param, safe, optional=True) validate_num("numcores", param, safe, optional=True) validate_num("maxmemory", param, safe, optional=True) validate_str("dbsurl", param, safe, RX_DBSURL, optional=False) validate_strlist("tfileoutfiles", param, safe, RX_OUTFILES, custom_err="Incorrect tfileoutfiles parameter (TFileService). " \ "Allowed regexp: '%s'." % RX_OUTFILES.pattern) validate_strlist("edmoutfiles", param, safe, RX_OUTFILES, custom_err="Incorrect edmoutfiles parameter (PoolOutputModule). " \ "Allowed regexp: '%s'." % RX_OUTFILES.pattern) validate_strlist("runs", param, safe, RX_RUNS) validate_strlist("lumis", param, safe, RX_LUMIRANGE) if len(safe.kwargs["runs"]) != len(safe.kwargs["lumis"]): raise InvalidParameter("The number of runs and the number of lumis lists are different") validate_strlist("adduserfiles", param, safe, RX_ADDFILE) validate_str("asourl", param, safe, RX_ASOURL, optional=True) validate_str("asodb", param, safe, RX_ASODB, optional=True) safe.kwargs["asourl"], safe.kwargs["asodb"] = self._getAsoConfig(safe.kwargs["asourl"], safe.kwargs["asodb"]) validate_str("scriptexe", param, safe, RX_ADDFILE, optional=True) validate_strlist("scriptargs", param, safe, RX_SCRIPTARGS) validate_str("scheddname", param, safe, RX_SCHEDD_NAME, optional=True) validate_str("collector", param, safe, RX_COLLECTOR, optional=True) validate_strlist("extrajdl", param, safe, RX_SCRIPTARGS) validate_num("dryrun", param, safe, optional=True) validate_num("ignoreglobalblacklist", param, safe, optional=True) elif method in ['POST']: validate_str("workflow", param, safe, RX_TASKNAME, optional=False) validate_str("subresource", param, safe, RX_SUBRESTAT, optional=True) validate_strlist('jobids', param, safe, RX_JOBID) ## In a resubmission, the site black- and whitelists need to be interpreted ## differently than in an initial task submission. If there is no site black- ## or whitelist, set it to None and DataWorkflow will use the corresponding ## list defined in the initial task submission. If the site black- or whitelist ## is equal to the string 'empty', set it to an empty list and don't call ## validate_strlist as it would fail. if 'siteblacklist' not in param.kwargs: safe.kwargs['siteblacklist'] = None elif param.kwargs['siteblacklist'] == 'empty': safe.kwargs['siteblacklist'] = [] del param.kwargs['siteblacklist'] else: validate_strlist("siteblacklist", param, safe, RX_CMSSITE) safe.kwargs['siteblacklist'] = self._expandSites(safe.kwargs['siteblacklist']) if 'sitewhitelist' not in param.kwargs: safe.kwargs['sitewhitelist'] = None elif param.kwargs['sitewhitelist'] == 'empty': safe.kwargs['sitewhitelist'] = [] del param.kwargs['sitewhitelist'] else: validate_strlist("sitewhitelist", param, safe, RX_CMSSITE) safe.kwargs['sitewhitelist'] = self._expandSites(safe.kwargs['sitewhitelist']) validate_num("maxjobruntime", param, safe, optional=True) validate_num("maxmemory", param, safe, optional=True) validate_num("numcores", param, safe, optional=True) validate_num("priority", param, safe, optional=True) validate_num("force", param, safe, optional=True) validate_num("publication", param, safe, optional=True) elif method in ['GET']: validate_str("workflow", param, safe, RX_TASKNAME, optional=True) validate_str('subresource', param, safe, RX_SUBRESTAT, optional=True) validate_str('username', param, safe, RX_USERNAME, optional=True) validate_str('timestamp', param, safe, RX_DATE, optional=True) ## inserted by eric ## Used to determine how much information to return to the client for status. ## also used by report to determine if it has to check job states validate_num("verbose", param, safe, optional=True) ## used by get log, get data validate_num('limit', param, safe, optional=True) validate_num('exitcode', param, safe, optional=True) validate_strlist('jobids', param, safe, RX_JOBID) ## used by errors and report (short format in report means we do not query DBS) validate_num('shortformat', param, safe, optional=True) # used by publicationStatus validate_str("asourl", param, safe, RX_ASOURL, optional=True) validate_str("asodb", param, safe, RX_ASODB, optional=True) ## validation parameters if not safe.kwargs['workflow'] and safe.kwargs['subresource']: raise InvalidParameter("Invalid input parameters") if safe.kwargs['subresource'] in ['data', 'logs'] and not safe.kwargs['limit'] and not safe.kwargs['jobids']: raise InvalidParameter("You need to specify the number of jobs to retrieve or their ids.") elif method in ['DELETE']: validate_str("workflow", param, safe, RX_TASKNAME, optional=False) validate_num("force", param, safe, optional=True) validate_str("killwarning", param, safe, RX_TEXT_FAIL, optional=True) #decode killwarning message if present if safe.kwargs['killwarning']: try: safe.kwargs['killwarning'] = b64decode(safe.kwargs['killwarning']) except TypeError: raise InvalidParameter("Failure message is not in the accepted format")
def testFileTransferPUT(self): """ _testFileTransferPUT_ Just test simple testFileTransferPUT with fake data """ # We just sent fake data which is not monitored by dashboard. # Also only the first time to decide is publication ON or NOT for user in self.users: timestamp = time.strftime('%y%m%d_%H%M%S', time.gmtime()) for i in range(self.totalFiles): now = int(time.time()) # Generate a taskname workflowName = "" taskname = "" if user not in self.tasks: workflowName = "".join([ random.choice(string.ascii_lowercase) for _ in range(20) ]) + "_" + str(now) publicationState = random.choice(['NEW', 'NOT_REQUIRED']) else: workflowName = self.tasks[user]['workflowName'] publicationState = self.tasks[user]['publication'] transferState = random.choice(['NEW', 'DONE']) taskname = generateTaskName(user, workflowName, timestamp) finalLfn = self.lfnBase % (user, workflowName, i, random.randint(1, 9999)) idHash = getHashLfn(finalLfn) self.fileDoc['id'] = idHash self.fileDoc['job_id'] = i self.fileDoc['username'] = user self.fileDoc['taskname'] = taskname self.fileDoc['start_time'] = int(time.time()) self.fileDoc['source_lfn'] = finalLfn self.fileDoc['destination_lfn'] = finalLfn self.fileDoc['transfer_state'] = transferState self.fileDoc['publication_state'] = publicationState print(self.fileDoc) self.server.put('/crabserver/dev/fileusertransfers', data=encodeRequest(self.fileDoc)) # if I will put the same doc twice, it should raise an error. # self.server.put('/crabserver/dev/fileusertransfers', data=urllib.urlencode(self.fileDoc)) # This tasks are for the future and next calls if user not in self.tasks: self.tasks[user] = { 'workflowName': workflowName, 'taskname': taskname, 'listOfIds': [], 'publication': publicationState, 'toTransfer': 0, 'toPublish': 0, 'total': self.totalFiles } if self.tasks[user]['publication'] == 'NEW': self.tasks[user]['toPublish'] += 1 if transferState == 'NEW': self.tasks[user]['toTransfer'] += 1 self.tasks[user]['listOfIds'].append(idHash) # This should raise an error for username in self.tasks: taskname = self.tasks[username]['taskname'] for query in ['getTransferStatus', 'getPublicationStatus']: result = self.server.get('/crabserver/dev/fileusertransfers', data=encodeRequest({ 'subresource': query, 'username': username, 'taskname': taskname })) print(result) print(result[0]['result']) taskInfoDict = oracleOutputMapping(result, 'id') print(taskInfoDict) for key, docDict in taskInfoDict.items(): result = self.server.get( '/crabserver/dev/fileusertransfers', data=encodeRequest({ 'subresource': 'getById', 'id': key })) randomUsers = random.sample( set(self.users), 3 ) # Take half of the users and kill their transfers for specific task for username in randomUsers: taskname = self.tasks[username]['taskname'] result = self.server.post('/crabserver/dev/fileusertransfers', data=encodeRequest({ 'subresource': 'killTransfers', 'username': username, 'taskname': taskname })) print(result) # oneUser is left for killing a list of IDs # leftUsers will be killing transfers one by one for specific id. leftUsers = list(set(self.users) - set(randomUsers)) oneUser = random.sample(set(leftUsers), 1) leftUsers = list(set(leftUsers) - set(oneUser)) for username in leftUsers: # First get all left ids for this users result = self.server.get('/crabserver/dev/fileusertransfers', data=encodeRequest({ 'subresource': 'getTransferStatus', 'username': username, 'taskname': self.tasks[username]['taskname'] })) resultOut = oracleOutputMapping(result, None) print("**" * 50) for outDict in resultOut: print(outDict) result = self.server.post('/crabserver/dev/fileusertransfers', data=encodeRequest({ 'subresource': 'killTransfersById', 'username': username, 'listOfIds': outDict['id'] })) print(result) print(resultOut) print(result) for username in oneUser: result = self.server.post( '/crabserver/dev/fileusertransfers', data=encodeRequest( { 'subresource': 'killTransfersById', 'username': username, 'listOfIds': self.tasks[username]['listOfIds'] }, ['listOfIds'])) # As it asks to kill all which are in new, need to double check what we submitted before and if the output of killed is correct print(result) print(self.tasks[username])
def testFileTransferPUT(self): """ _testFileTransferPUT_ Just test simple testFileTransferPUT with fake data """ # We just sent fake data which is not monitored by dashboard. # Also only the first time to decide is publication ON or NOT for user in self.users: timestamp = time.strftime('%y%m%d_%H%M%S', time.gmtime()) for i in range(self.totalFiles): now = int(time.time()) # Generate a taskname workflowName = "" taskname = "" if user not in self.tasks: workflowName = "".join([random.choice(string.ascii_lowercase) for _ in range(20)]) + "_" + str(now) publicationState = random.choice(['NEW', 'NOT_REQUIRED']) else: workflowName = self.tasks[user]['workflowName'] publicationState = self.tasks[user]['publication'] transferState = random.choice(['NEW', 'DONE']) taskname = generateTaskName(user, workflowName, timestamp) finalLfn = self.lfnBase % (user, workflowName, i, random.randint(1, 9999)) idHash = getHashLfn(finalLfn) self.fileDoc['id'] = idHash self.fileDoc['job_id'] = i self.fileDoc['username'] = user self.fileDoc['taskname'] = taskname self.fileDoc['start_time'] = int(time.time()) self.fileDoc['source_lfn'] = finalLfn self.fileDoc['destination_lfn'] = finalLfn self.fileDoc['transfer_state'] = transferState self.fileDoc['publication_state'] = publicationState print(self.fileDoc) self.server.put('/crabserver/dev/fileusertransfers', data=encodeRequest(self.fileDoc)) # if I will put the same doc twice, it should raise an error. # self.server.put('/crabserver/dev/fileusertransfers', data=urllib.urlencode(self.fileDoc)) # This tasks are for the future and next calls if user not in self.tasks: self.tasks[user] = {'workflowName': workflowName, 'taskname': taskname, 'listOfIds': [], 'publication': publicationState, 'toTransfer': 0, 'toPublish': 0, 'total': self.totalFiles} if self.tasks[user]['publication'] == 'NEW': self.tasks[user]['toPublish'] += 1 if transferState == 'NEW': self.tasks[user]['toTransfer'] += 1 self.tasks[user]['listOfIds'].append(idHash) # This should raise an error for username in self.tasks: taskname = self.tasks[username]['taskname'] for query in ['getTransferStatus', 'getPublicationStatus']: result = self.server.get('/crabserver/dev/fileusertransfers', data=encodeRequest({'subresource': query, 'username': username, 'taskname': taskname})) print(result) print(result[0]['result']) taskInfoDict = oracleOutputMapping(result, 'id') print(taskInfoDict) for key, docDict in taskInfoDict.items(): result = self.server.get('/crabserver/dev/fileusertransfers', data=encodeRequest({'subresource': 'getById', 'id': key})) randomUsers = random.sample(set(self.users), 3) # Take half of the users and kill their transfers for specific task for username in randomUsers: taskname = self.tasks[username]['taskname'] result = self.server.post('/crabserver/dev/fileusertransfers', data=encodeRequest({'subresource': 'killTransfers', 'username': username, 'taskname': taskname})) print(result) # oneUser is left for killing a list of IDs # leftUsers will be killing transfers one by one for specific id. leftUsers = list(set(self.users) - set(randomUsers)) oneUser = random.sample(set(leftUsers), 1) leftUsers = list(set(leftUsers) - set(oneUser)) for username in leftUsers: # First get all left ids for this users result = self.server.get('/crabserver/dev/fileusertransfers', data=encodeRequest({'subresource': 'getTransferStatus', 'username': username, 'taskname': self.tasks[username]['taskname']})) resultOut = oracleOutputMapping(result, None) print("**"*50) for outDict in resultOut: print(outDict) result = self.server.post('/crabserver/dev/fileusertransfers', data=encodeRequest({'subresource': 'killTransfersById', 'username': username, 'listOfIds': outDict['id']})) print(result) print(resultOut) print(result) for username in oneUser: result = self.server.post('/crabserver/dev/fileusertransfers', data=encodeRequest({'subresource': 'killTransfersById', 'username': username, 'listOfIds': self.tasks[username]['listOfIds']}, ['listOfIds'])) # As it asks to kill all which are in new, need to double check what we submitted before and if the output of killed is correct print(result) print(self.tasks[username])