def __init__(self, config): """ ___init___ Initialise class members """ BaseWorkerThread.__init__(self) self.phedex = PhEDEx({"endpoint": config.PhEDExInjector.phedexurl}, "json") self.siteDB = SiteDBJSON() self.dbsUrl = config.DBSInterface.globalDBSUrl self.group = getattr(config.PhEDExInjector, "group", "DataOps") self.safeMode = getattr(config.PhEDExInjector, "safeOperationMode", False) self.replicaOnly = getattr(config.PhEDExInjector, "replicaOnly", False) # Subscribed state in the DBSBuffer table for datasets self.terminalSubscriptionState = 1 if self.safeMode: self.terminalSubscriptionState = 2 # We will map node names to CMS names, that what the spec will have. # If a CMS name is associated to many PhEDEx node then choose the MSS option self.cmsToPhedexMap = {} # initialize the alert framework (if available - config.Alert present) # self.sendAlert will be then be available self.initAlerts(compName="PhEDExInjector")
def setUp(self): """ _setUp_ Setup couchdb and the test environment """ super(ResubmitBlockTest, self).setUp() self.group = 'unknown' self.user = '******' # Set external test helpers self.testInit = TestInitCouchApp(__file__) self.testInit.setLogging() self.testInit.setupCouch("resubmitblock_t", "ACDC", "GroupUser") # Define test environment self.couchUrl = os.environ["COUCHURL"] self.acdcDBName = 'resubmitblock_t' self.validLocations = ['T2_US_Nebraska', 'T1_US_FNAL_Disk', 'T1_UK_RAL_Disk'] self.siteWhitelist = ['T2_XX_SiteA'] siteDB = SiteDB() #Convert phedex node name to a valid processing site name self.PSNs = siteDB.PNNstoPSNs(self.validLocations) self.workflowName = 'dballest_ReReco_workflow' couchServer = CouchServer(dburl=self.couchUrl) self.acdcDB = couchServer.connectDatabase(self.acdcDBName, create=False) user = makeUser(self.group, '*****@*****.**', self.couchUrl, self.acdcDBName) user.create() return
def testEmulator(self): EmulatorHelper.setEmulators(True, True, True, True) self.assertEqual(PhEDEx().wrapped.__module__, 'WMQuality.Emulators.PhEDExClient.PhEDEx') self.assertEqual(DBSReader(self.globalDBS).wrapped.__module__, 'WMQuality.Emulators.DBSClient.DBSReader') self.assertEqual(SiteDBJSON().wrapped.__module__, 'WMQuality.Emulators.SiteDBClient.SiteDB') self.assertEqual(RequestManager().wrapped.__module__, 'WMQuality.Emulators.RequestManagerClient.RequestManager') self.assertEqual(PhEDEx().__class__.__name__, 'PhEDEx') self.assertEqual(DBSReader(self.globalDBS).__class__.__name__, 'DBSReader') self.assertEqual(SiteDBJSON().__class__.__name__, 'SiteDBJSON') self.assertEqual(RequestManager().__class__.__name__, 'RequestManager') EmulatorHelper.resetEmulators() self.assertEqual(PhEDEx().wrapped.__module__, 'WMCore.Services.PhEDEx.PhEDEx') self.assertEqual(DBSReader(self.globalDBS).wrapped.__module__, 'WMCore.Services.DBS.DBS3Reader') self.assertEqual(SiteDBJSON().wrapped.__module__, 'WMCore.Services.SiteDB.SiteDB') self.assertEqual(RequestManager().wrapped.__module__, 'WMCore.Services.RequestManager.RequestManager') self.assertEqual(PhEDEx().__class__.__name__, 'PhEDEx') self.assertEqual(DBSReader(self.globalDBS).__class__.__name__, 'DBS3Reader') self.assertEqual(SiteDBJSON().__class__.__name__, 'SiteDBJSON') self.assertEqual(RequestManager().__class__.__name__, 'RequestManager')
def wrapped_func(*args, **kwargs): if 'sitedb' in services and ( not args[0].allCMSNames.sites or (args[0].allCMSNames.cachetime + 1800 < mktime(gmtime()))): args[0].allCMSNames = CMSSitesCache( sites=SiteDBJSON(config={ 'cert': serverCert, 'key': serverKey }).getAllCMSNames(), cachetime=mktime(gmtime())) args[0].allPNNNames = CMSSitesCache( sites=SiteDBJSON(config={ 'cert': serverCert, 'key': serverKey }).getAllPhEDExNodeNames(), cachetime=mktime(gmtime())) if 'phedex' in services and not args[0].phedex: phdict = args[0].phedexargs phdict.update({'cert': serverCert, 'key': serverKey}) args[0].phedex = PhEDEx(responseType='xml', dict=phdict) if 'centralconfig' in services and ( not args[0].centralcfg.centralconfig or (args[0].centralcfg.cachetime + 1800 < mktime(gmtime()))): args[0].centralcfg = ConfigCache( centralconfig=getCentralConfig( extconfigurl=args[0].config.extconfigurl, mode=args[0].config.mode), cachetime=mktime(gmtime())) if 'servercert' in services: args[0].serverCert = serverCert args[0].serverKey = serverKey return func(*args, **kwargs)
def __init__(self, config, noSiteDB=False): """ _init_ Note, noSiteDB added for TESTING PURPOSED ONLY! """ WebAPI.__init__(self, config) ReqMgrAuth.assign_roles = config.security_roles # Take a guess self.templatedir = config.templates self.couchUrl = config.couchUrl self.configDBName = config.configDBName self.workloadDBName = config.workloadDBName self.configDBName = config.configDBName self.wmstatWriteURL = "%s/%s" % (self.couchUrl.rstrip("/"), config.wmstatDBName) if not noSiteDB: try: # Download a list of all the sites from SiteDB, uses v2 API. sitedb = SiteDBJSON() self.sites = sitedb.getAllCMSNames() self.sites.sort() except Exception, ex: msg = "ERROR: Could not retrieve sites from SiteDB, reason: %s" % ex cherrypy.log(msg) raise
def execute(self, *args, **kwargs): totalevents = kwargs['task']['tm_totalunits'] firstEvent = 1 lastEvent = totalevents firstLumi = 1 lastLumi = 10 # Set a default of 100 events per lumi. This is set as a task # property, as the splitting considers it independently of the file # information provided by the fake dataset. if not kwargs['task']['tm_events_per_lumi']: kwargs['task']['tm_events_per_lumi'] = 100 #MC comes with only one MCFakeFile singleMCFileset = Fileset(name = "MCFakeFileSet") newFile = File("MCFakeFile", size = 1000, events = totalevents) sbj = SiteDBJSON({"key":self.config.TaskWorker.cmskey, "cert":self.config.TaskWorker.cmscert}) newFile.setLocation(sbj.getAllCMSNames()) newFile.addRun(Run(1, *range(firstLumi, lastLumi + 1))) newFile["block"] = 'MCFakeBlock' newFile["first_event"] = firstEvent newFile["last_event"] = lastEvent singleMCFileset.addFile(newFile) return Result(task=kwargs['task'], result=singleMCFileset)
def execute(self, *args, **kwargs): totalevents = kwargs['task']['tm_totalunits'] firstEvent = 1 lastEvent = totalevents firstLumi = 1 lastLumi = 10 # Set a default of 100 events per lumi. This is set as a task # property, as the splitting considers it independently of the file # information provided by the fake dataset. if not kwargs['task']['tm_events_per_lumi']: kwargs['task']['tm_events_per_lumi'] = 100 #MC comes with only one MCFakeFile singleMCFileset = Fileset(name="MCFakeFileSet") newFile = File("MCFakeFile", size=1000, events=totalevents) sbj = SiteDBJSON({ "key": self.config.TaskWorker.cmskey, "cert": self.config.TaskWorker.cmscert }) newFile.setLocation(sbj.getAllCMSNames()) newFile.addRun(Run(1, *range(firstLumi, lastLumi + 1))) newFile["block"] = 'MCFakeBlock' newFile["first_event"] = firstEvent newFile["last_event"] = lastEvent singleMCFileset.addFile(newFile) return Result(task=kwargs['task'], result=singleMCFileset)
def getUsernameFromSiteDB(self): """ Return a the client hypernews name """ proxy = self.proxy() userdn = proxy.getSubjectFromCert(self.certLocation) sitedb = SiteDBJSON({"key": proxy.getProxyFilename(), "cert": proxy.getProxyFilename()}) username = sitedb.dnUserName(userdn) return username
def getUsernameFromSiteDB(self): """ Retrieve the user's username as it appears in SiteDB. """ proxy = self.proxy() userdn = proxy.getSubjectFromCert(self.certLocation) sitedb = SiteDBJSON({"key": proxy.getProxyFilename(), "cert": proxy.getProxyFilename()}) username = sitedb.dnUserName(userdn) return username
def setUp(self): """ Setup for unit tests """ super(SiteDBTest, self).setUp() EmulatorHelper.setEmulators(phedex=False, dbs=False, siteDB=False, requestMgr=True) self.mySiteDB = SiteDBJSON()
def sites(): "Return known CMS site list from SiteDB" try: # Download a list of all the sites from SiteDB, uses v2 API. sitedb = SiteDBJSON() sites = sorted(sitedb.getAllCMSNames()) except Exception as exc: msg = "ERROR: Could not retrieve sites from SiteDB, reason: %s" % str(exc) raise Exception(msg) return sites
def __init__(self,config): self.br=Browser() self.config = config # Initialise connections self.mySiteDB = SiteDBJSON() self.dbsPhys01 = DbsApi(url = dbs_base_url+"phys01/DBSReader/") self.dbsPhys02 = DbsApi(url = dbs_base_url+"phys02/DBSReader/") self.dbsPhys03 = DbsApi(url = dbs_base_url+"phys03/DBSReader/")
def __init__(self,config): self.br=Browser() self.config = config # Initialise connections self.mySiteDB = SiteDBJSON() self.phedex = PhEDEx({"endpoint":"https://cmsweb.cern.ch/phedex/datasvc/json/prod/"}, "json") self.dbsPhys01 = DbsApi(url = dbs_base_url+"phys01/DBSReader/") self.dbsPhys02 = DbsApi(url = dbs_base_url+"phys02/DBSReader/") self.dbsPhys03 = DbsApi(url = dbs_base_url+"phys03/DBSReader/")
def pnns(): """ Returns all PhEDEx node names, excluding Buffer endpoints """ try: sitedb = SiteDBJSON() pnns = sorted(sitedb.getAllPhEDExNodeNames(excludeBuffer=True)) except Exception as exc: msg = "ERROR: Could not retrieve PNNs from SiteDB, reason: %s" % str(exc) raise Exception(msg) return pnns
def gethnName_urlenc(self,dn): from WMCore.Services.SiteDB.SiteDB import SiteDBJSON hnUserName = None userdn = dn mySiteDB = SiteDBJSON() status = 0 try: hnUserName = mySiteDB.dnUserName(dn=userdn) except: status = 1 return status,hnUserName
def sites(): "Return known CMS site list from SiteDB" try: # Download a list of all the sites from SiteDB, uses v2 API. sitedb = SiteDBJSON() sites = sorted(sitedb.getAllCMSNames()) except Exception as exc: msg = "ERROR: Could not retrieve sites from SiteDB, reason: %s" % str( exc) raise Exception(msg) return sites
def getUsernameFromSiteDB(self): """ Return a the client hypernews name """ proxy = self.proxy() userdn = proxy.getSubjectFromCert(self.certLocation) sitedb = SiteDBJSON({ "key": proxy.getProxyFilename(), "cert": proxy.getProxyFilename() }) username = sitedb.dnUserName(userdn) return username
def __init__(self, taskid): super(Monitor, self).__init__(taskid) p = subprocess.Popen(["voms-proxy-info", "-identity"], stdout=subprocess.PIPE, stderr=subprocess.PIPE) id, err = p.communicate() id = id.strip() db = SiteDBJSON({'cacheduration': 24}) self.__username = db.dnUserName(dn=id) self.__fullname = id.rsplit('/CN=', 1)[1]
def execute(self, *args, **kwargs): self.logger.info( "Data discovery and splitting for %s using user-provided files" % kwargs['task']['tm_taskname']) userfiles = kwargs['task']['tm_arguments'].get('userfiles') splitting = kwargs['task']['tm_split_algo'] total_units = kwargs['task']['tm_totalunits'] if not userfiles or splitting != 'FileBased': if not userfiles: msg = "No files specified to process for task %s." % kwargs[ 'task']['tm_taskname'] if splitting != 'FileBased': msg = "Data.splitting must be set to 'FileBased' when using a custom set of files." self.logger.error("Setting %s as failed: %s" % (kwargs['task']['tm_taskname'], msg)) configreq = { 'workflow': kwargs['task']['tm_taskname'], 'status': "FAILED", 'subresource': 'failure', 'failure': b64encode(msg) } self.server.post(self.resturi, data=urllib.urlencode(configreq)) raise StopHandler(msg) if hasattr(self.config.Sites, 'available'): locations = self.config.Sites.available else: sbj = SiteDBJSON({ "key": self.config.TaskWorker.cmskey, "cert": self.config.TaskWorker.cmscert }) locations = sbj.getAllCMSNames() userFileset = Fileset(name=kwargs['task']['tm_taskname']) self.logger.info("There are %d files specified by the user." % len(userfiles)) if total_units > 0: self.logger.info("Will run over the first %d files." % total_units) file_counter = 0 for userfile, idx in zip(userfiles, range(len(userfiles))): newFile = File(userfile, size=1000, events=1) newFile.setLocation(locations) newFile.addRun(Run(1, idx)) newFile["block"] = 'UserFilesFakeBlock' newFile["first_event"] = 1 newFile["last_event"] = 2 userFileset.addFile(newFile) file_counter += 1 if total_units > 0 and file_counter >= total_units: break return Result(task=kwargs['task'], result=userFileset)
def pnns(): """ Returns all PhEDEx node names, excluding Buffer endpoints """ try: sitedb = SiteDBJSON() pnns = sorted(sitedb.getAllPhEDExNodeNames(excludeBuffer=True)) except Exception as exc: msg = "ERROR: Could not retrieve PNNs from SiteDB, reason: %s" % str( exc) raise Exception(msg) return pnns
def insertAllSEs(self, siteName, pendingSlots=0, runningSlots=0, ceName=None, plugin=None, taskList=[]): """ _insertAllSEs_ Insert all SEs into WMBS ResourceControl This uses the Services.SiteDB to insert all SEs under a common CE. It is meant to be used with WMS submission. Sites will be named siteName_SEName It expects a taskList of the following form: [{'taskType': taskType, 'priority': priority, 'maxSlots': maxSlots, 'pendingSlots' : pendingSlots}] for each entry in the taskList, a threshold is inserted into the database for EVERY SE """ from WMCore.Services.SiteDB.SiteDB import SiteDBJSON siteDB = SiteDBJSON() cmsNames = siteDB.getAllCMSNames() for cmsName in cmsNames: seNames = siteDB.cmsNametoSE(cmsName) for SE in seNames: sName = '%s_%s' % (siteName, SE) self.insertSite(siteName=sName, pendingSlots=pendingSlots, seName=SE, runningSlots=runningSlots, ceName=ceName, cmsName=cmsName, plugin=plugin) for task in taskList: if not task.has_key('maxSlots') or not task.has_key('taskType') \ or not task.has_key('priority'): msg = "Incomplete task in taskList for ResourceControl.insertAllSEs\n" msg += task raise ResourceControlException(msg) self.insertThreshold(siteName=sName, taskType=task['taskType'], maxSlots=task['maxSlots'], pendingSlots=task['pendingSlots'], priority=task['priority']) return
def cmsSiteNames(): """Get all cms sites""" global __cmsSiteNames if __cmsSiteNames: return __cmsSiteNames global __sitedb if not __sitedb: from WMCore.Services.SiteDB.SiteDB import SiteDBJSON as SiteDB __sitedb = SiteDB() try: __cmsSiteNames = __sitedb.getAllCMSNames() except: pass return __cmsSiteNames
def sites(): "Return known CMS site list from SiteDB" try: # Download a list of all the sites from SiteDB, uses v2 API. if os.getenv("WMAGENT_USE_CRIC", False) or os.getenv("WMCORE_USE_CRIC", False): cric = CRIC() site_list = sorted(cric.getAllPSNs()) else: sitedb = SiteDBJSON() site_list = sorted(sitedb.getAllCMSNames()) except Exception as exc: msg = "ERROR: Could not retrieve sites from SiteDB, reason: %s" % str(exc) raise Exception(msg) return site_list
def sitesFromStorageEelements(ses): """Return Sites given Storage Elements""" global __sitedb if not __sitedb: from WMCore.Services.SiteDB.SiteDB import SiteDBJSON as SiteDB __sitedb = SiteDB() result = set() for se in ses: try: site = __sitedb.seToCMSName(se) except: print "Unable to get site name for %s" % se else: result.add(site) return list(result)
def getDNFromUserName(username, log, ckey=None, cert=None): """ Parse site string to know the fts server to use """ dn = '' site_db = SiteDBJSON(config={'key': ckey, 'cert': cert}) try: dn = site_db.userNameDn(username) except IndexError: log.error("user does not exist") return dn except RuntimeError: log.error("SiteDB URL cannot be accessed") return dn return dn
def setupMCWMSpec(self): """Setup MC workflow""" self.wmspec = self.createMCWMSpec() self.topLevelTask = getFirstTask(self.wmspec) self.inputDataset = self.topLevelTask.inputDataset() self.dataset = self.topLevelTask.getInputDatasetPath() self.dbs = None self.siteDB = SiteDBJSON() # add sites that would normally be added by operator via resource_control locationDAO = self.daoFactory(classname="Locations.New") self.pnns = [] for site in ['T2_XX_SiteA', 'T2_XX_SiteB']: locationDAO.execute(siteName=site, pnn=self.siteDB.cmsNametoPhEDExNode(site)[0]) self.pnns.append(self.siteDB.cmsNametoPhEDExNode(site)[0])
def getDNFromUserName(username, log, ckey = None, cert = None): """ Parse site string to know the fts server to use """ dn = '' site_db = SiteDBJSON(config={'key': ckey, 'cert': cert}) try: dn = site_db.userNameDn(username) except IndexError: log.error("user does not exist") return dn except RuntimeError: log.error("SiteDB URL cannot be accessed") return dn return dn
def sitesFromStorageEelements(ses): """Return Sites given Storage Elements""" global __sitedb if not __sitedb: from WMCore.Services.SiteDB.SiteDB import SiteDBJSON as SiteDB __sitedb = SiteDB() result = set() for se in ses: try: sites = __sitedb.seToCMSName(se) except: print "Unable to get site name for %s" % se else: result.update(sites) return list(result)
def __init__(self, **args): StartPolicyInterface.__init__(self, **args) self.args.setdefault('SliceType', 'NumberOfFiles') self.args.setdefault('SliceSize', 1) self.lumiType = "NumberOfLumis" self.sites = [] self.siteDB = SiteDB()
def __init__(self, config): """ ___init___ Initialise class members """ BaseWorkerThread.__init__(self) self.phedex = PhEDEx({"endpoint": config.PhEDExInjector.phedexurl}, "json") self.siteDB = SiteDBJSON() self.dbsUrl = config.DBSInterface.globalDBSUrl self.group = getattr(config.PhEDExInjector, "group", "DataOps") self.safeMode = getattr(config.PhEDExInjector, "safeOperationMode", False) # Subscribed state in the DBSBuffer table for datasets self.terminalSubscriptionState = 1 if self.safeMode: self.terminalSubscriptionState = 2 # We will map node names to CMS names, that what the spec will have. # If a CMS name is associated to many PhEDEx node then choose the MSS option self.cmsToPhedexMap = {} # initialize the alert framework (if available - config.Alert present) # self.sendAlert will be then be available self.initAlerts(compName = "PhEDExInjector")
def __init__(self, **args): StartPolicyInterface.__init__(self, **args) self.args.setdefault('SliceType', 'NumberOfFiles') self.args.setdefault('SliceSize', 1) self.args.setdefault('SplittingAlgo', 'LumiBased') self.lumiType = "NumberOfLumis" # Define how to handle the different splitting algorithms self.algoMapping = { 'Harvest': self.singleChunk, 'ParentlessMergeBySize': self.singleChunk, 'MinFileBased': self.singleChunk, 'LumiBased': self.singleChunk, 'EventAwareLumiBased': self.singleChunk, 'EventBased': self.singleChunk } self.unsupportedAlgos = ['WMBSMergeBySize', 'SiblingProcessingBased'] self.defaultAlgo = self.fixedSizeChunk self.sites = [] if os.getenv("WMAGENT_USE_CRIC", False) or os.getenv( "WMCORE_USE_CRIC", False): self.cric = CRIC() else: self.cric = None self.siteDB = SiteDB()
def formatOutput(self, task, requestname, datasetfiles, locations): """ Receives as input the result of the data location discovery operations and fill up the WMCore objects. """ self.logger.debug(" Formatting data discovery output ") # TEMPORARY secmsmap = {} sbj = SiteDBJSON({"key": self.config.TaskWorker.cmskey, "cert": self.config.TaskWorker.cmscert}) wmfiles = [] event_counter = 0 lumi_counter = 0 file_counter = 0 uniquelumis = set() ## Loop over the sorted list of files. for lfn, infos in datasetfiles.iteritems(): ## Skip the file if the block has not been found or has no locations. if not infos['BlockName'] in locations or not locations[infos['BlockName']]: self.logger.warning("Skipping %s because its block (%s) has no locations" % (lfn, infos['BlockName'])) continue ## Skip the file if it is not in VALID state. if not infos.get('ValidFile', True): self.logger.warning("Skipping invalid file %s" % lfn) continue ## Createa a WMCore File object. wmfile = File(lfn = lfn, events = infos['NumberOfEvents'], size = infos['Size'], checksums = infos['Checksums']) wmfile['block'] = infos['BlockName'] wmfile['locations'] = [] for se in locations[infos['BlockName']]: if se and se not in secmsmap: self.logger.debug("Translating SE %s" %se) try: secmsmap[se] = sbj.seToCMSName(se) except KeyError, ke: self.logger.error("Impossible translating %s to a CMS name through SiteDB" %se) secmsmap[se] = '' except httplib.HTTPException, ex: self.logger.error("Couldn't map SE to site: %s" % se) print "Couldn't map SE to site: %s" % se print "got problem: %s" % ex print "got another problem: %s" % ex.__dict__ if se and se in secmsmap: if type(secmsmap[se]) == list: wmfile['locations'].extend(secmsmap[se]) else: wmfile['locations'].append(secmsmap[se])
def execute(self, *args, **kwargs): self.logger.info("Data discovery and splitting for %s using user-provided files" % kwargs['task']['tm_taskname']) if 'tm_user_files' in kwargs['task'] and kwargs['task']['tm_user_files']: userfiles = kwargs['task']['tm_user_files'] else: ## For backward compatibility only. userfiles = kwargs['task']['tm_arguments'].get('userfiles') splitting = kwargs['task']['tm_split_algo'] total_units = kwargs['task']['tm_totalunits'] if not userfiles or splitting != 'FileBased': if not userfiles: msg = "No files specified to process for task %s." % kwargs['task']['tm_taskname'] if splitting != 'FileBased': msg = "Data.splitting must be set to 'FileBased' when using a custom set of files." self.logger.error("Setting %s as failed: %s" % (kwargs['task']['tm_taskname'], msg)) configreq = {'workflow': kwargs['task']['tm_taskname'], 'status': "FAILED", 'subresource': 'failure', 'failure': b64encode(msg)} self.server.post(self.resturi, data = urllib.urlencode(configreq)) raise StopHandler(msg) if hasattr(self.config.Sites, 'available'): locations = self.config.Sites.available else: sbj = SiteDBJSON({"key":self.config.TaskWorker.cmskey, "cert":self.config.TaskWorker.cmscert}) locations = sbj.getAllCMSNames() userFileset = Fileset(name = kwargs['task']['tm_taskname']) self.logger.info("There are %d files specified by the user." % len(userfiles)) if total_units > 0: self.logger.info("Will run over the first %d files." % total_units) file_counter = 0 for userfile, idx in zip(userfiles, range(len(userfiles))): newFile = File(userfile, size = 1000, events = 1) newFile.setLocation(locations) newFile.addRun(Run(1, idx)) newFile["block"] = 'UserFilesFakeBlock' newFile["first_event"] = 1 newFile["last_event"] = 2 userFileset.addFile(newFile) file_counter += 1 if total_units > 0 and file_counter >= total_units: break return Result(task = kwargs['task'], result = userFileset)
def phedexIt(): x = PhEDEx(responseType = "json") phedexNodes = x.getNodeMap()['phedex']['node'] phedexMap = {} sePhedexMap = {} knownPhedexNodes = set() for node in phedexNodes: phedexMap[node['name']] = node['kind'] #print '%s -> %s, %s' % (node['name'], node['kind'], node['se']) if node['se'] not in sePhedexMap: sePhedexMap[node['se']] = set() sePhedexMap[node['se']].add(node['name']) knownPhedexNodes.add(node['name']) y = SiteDBJSON() seNames = y.getAllSENames() cmsNamesMap = {} for se in seNames: cmsNames = y.seToCMSName(se) cmsNamesMap[se] = cmsNames seToNodeMap = {} for se in cmsNamesMap: candidates = set() for cmsName in cmsNamesMap[se]: phedexNodes = y.cmsNametoPhEDExNode(cmsName) candidates.update(set(phedexNodes)) validCandidates = set() for candidate in candidates: if candidate in knownPhedexNodes: validCandidates.add(candidate) seToNodeMap[se] = validCandidates #print '%s to %s' % (se, candidates) for se in sePhedexMap: if se not in seToNodeMap: print "SE: %s is not in new mapping for sites %s" % (se, list(sePhedexMap[se])) for se in seToNodeMap: if se not in sePhedexMap: print "SE: %s is not in old mapping for sites %s" % (se, list(seToNodeMap[se])) continue for se in set(seToNodeMap.keys()).intersection(set(sePhedexMap.keys())): diff = sePhedexMap[se] - seToNodeMap[se] if diff: print "%s are in old mapping but not in new for %s" %(str(list(diff)), se) diff = seToNodeMap[se] - sePhedexMap[se] if diff: print "%s are in new mapping but not in old for %s" %(str(list(diff)), se)
def execute(self, *args, **kwargs): self.logger.info( "Data discovery and splitting for %s using user-provided files" % kwargs['task']['tm_taskname']) userfiles = kwargs['task']['tm_user_files'] splitting = kwargs['task']['tm_split_algo'] total_units = kwargs['task']['tm_totalunits'] if not userfiles or splitting != 'FileBased': if not userfiles: msg = "No files specified to process for task %s." % kwargs[ 'task']['tm_taskname'] if splitting != 'FileBased': msg = "Data.splitting must be set to 'FileBased' when using a custom set of files." raise TaskWorkerException(msg) if hasattr(self.config.Sites, 'available'): locations = self.config.Sites.available else: sbj = SiteDBJSON({ "key": self.config.TaskWorker.cmskey, "cert": self.config.TaskWorker.cmscert }) locations = sbj.getAllCMSNames() userFileset = Fileset(name=kwargs['task']['tm_taskname']) self.logger.info("There are %d files specified by the user." % len(userfiles)) if total_units > 0: self.logger.info("Will run over the first %d files." % total_units) file_counter = 0 for userfile, idx in zip(userfiles, range(len(userfiles))): newFile = File(userfile, size=1000, events=1) newFile.setLocation(locations) newFile.addRun(Run(1, idx)) newFile["block"] = 'UserFilesFakeBlock' newFile["first_event"] = 1 newFile["last_event"] = 2 userFileset.addFile(newFile) file_counter += 1 if total_units > 0 and file_counter >= total_units: break return Result(task=kwargs['task'], result=userFileset)
def insertAllSEs(self, siteName, pendingSlots = 0, runningSlots = 0, ceName = None, plugin = None, taskList = []): """ _insertAllSEs_ Insert all SEs into WMBS ResourceControl This uses the Services.SiteDB to insert all SEs under a common CE. It is meant to be used with WMS submission. Sites will be named siteName_SEName It expects a taskList of the following form: [{'taskType': taskType, 'priority': priority, 'maxSlots': maxSlots}] for each entry in the taskList, a threshold is inserted into the database for EVERY SE """ from WMCore.Services.SiteDB.SiteDB import SiteDBJSON siteDB = SiteDBJSON() cmsNames = siteDB.getAllCMSNames() for cmsName in cmsNames: seNames = siteDB.cmsNametoSE(cmsName) for SE in seNames: sName = '%s_%s' % (siteName, SE) self.insertSite(siteName = sName, pendingSlots = pendingSlots, seName = SE, runningSlots = runningSlots, ceName = ceName, cmsName = cmsName, plugin = plugin) for task in taskList: if not task.has_key('maxSlots') or not task.has_key('taskType') \ or not task.has_key('priority'): msg = "Incomplete task in taskList for ResourceControl.insertAllSEs\n" msg += task raise ResourceControlException(msg) self.insertThreshold(siteName = sName, taskType = task['taskType'], maxSlots = task['maxSlots'], priority = task['priority']) return
def __init__(self, config): """ ___init___ Initialise class members """ BaseWorkerThread.__init__(self) self.phedex = PhEDEx({"endpoint": config.PhEDExInjector.phedexurl}, "json") self.siteDB = SiteDBJSON() self.dbsUrl = config.DBSInterface.globalDBSUrl self.group = getattr(config.PhEDExInjector, "group", "DataOps") # We will map node names to CMS names, that what the spec will have. # If a CMS name is associated to many PhEDEx node then choose the MSS option self.cmsToPhedexMap = {} self.phedexNodes = {'MSS':[], 'Disk':[]} # initialize the alert framework (if available - config.Alert present) # self.sendAlert will be then be available self.initAlerts(compName = "PhEDExInjector")
def __init__(self, **args): StartPolicyInterface.__init__(self, **args) self.args.setdefault('SliceType', 'NumberOfRuns') self.args.setdefault('SliceSize', 1) self.lumiType = "NumberOfLumis" self.sites = [] if os.getenv("WMAGENT_USE_CRIC", False) or os.getenv( "WMCORE_USE_CRIC", False): self.cric = CRIC() else: self.cric = None self.siteDB = SiteDB()
def getSiteInfo(config): sitedb = SiteDBJSON() sites = sitedb.getAllCMSNames() sites.sort() wildcardKeys = getattr(config, 'wildcardKeys', {'T1*': 'T1_*', 'T2*': 'T2_*', 'T3*': 'T3_*'}) wildcardSites = {} for k in wildcardKeys.keys(): reValue = wildcardKeys.get(k) found = False for s in sites: if re.search(reValue, s): found = True if not k in wildcardSites.keys(): wildcardSites[k] = [] wildcardSites[k].append(s) if found: sites.append(k) return sites
class MakeFakeFileSet(TaskAction): """This is needed to make WMCore.JobSplitting lib working... do not like very much. Given that all is fake here I am quite sure we only need total number of events said that I set all the other parmas to dummy values. We may want to set them in the future""" def __init__(self, *args, **kwargs): TaskAction.__init__(self, *args, **kwargs) self.sbj = SiteDBJSON({ "key": self.config.TaskWorker.cmskey, "cert": self.config.TaskWorker.cmscert }) def getListOfSites(self): """ Get the list of sites to use for PrivateMC workflows. For the moment we are filtering out T1_ since they are precious resources and don't want to overtake production (WMAgent) jobs there. In the future we would like to take this list from the SSB. """ sites = self.sbj.getAllCMSNames() filteredSites = [site for site in sites if not site.startswith("T1_")] return filteredSites #even though args is not used we call "handler.actionWork(args, kwargs)" in Handler def execute(self, *args, **kwargs): #pylint: disable=unused-argument # since https://github.com/dmwm/CRABServer/issues/5633 totalunits can be a float # but that would confuse WMCore, therefore cast to int totalevents = int(kwargs['task']['tm_totalunits']) firstEvent = 1 lastEvent = totalevents firstLumi = 1 lastLumi = 10 # Set a default of 100 events per lumi. This is set as a task # property, as the splitting considers it independently of the file # information provided by the fake dataset. if not kwargs['task']['tm_events_per_lumi']: kwargs['task']['tm_events_per_lumi'] = 100 #MC comes with only one MCFakeFile singleMCFileset = Fileset(name="MCFakeFileSet") newFile = File("MCFakeFile", size=1000, events=totalevents) newFile.setLocation(self.getListOfSites()) newFile.addRun(Run(1, *range(firstLumi, lastLumi + 1))) newFile["block"] = 'MCFakeBlock' newFile["first_event"] = firstEvent newFile["last_event"] = lastEvent singleMCFileset.addFile(newFile) return Result(task=kwargs['task'], result=singleMCFileset)
def __init__(self, url, **contact): # instantiate dbs api object try: self.dbs = DbsApi(url, **contact) except dbsClientException as ex: msg = "Error in DBSReader with DbsApi\n" msg += "%s\n" % formatEx3(ex) raise DBSReaderError(msg) # connection to PhEDEx (Use default endpoint url) self.phedex = PhEDEx(responseType = "json") self.siteDB = SiteDB()
def __init__(self, **args): StartPolicyInterface.__init__(self, **args) self.args.setdefault('SliceType', 'NumberOfFiles') self.args.setdefault('SliceSize', 1) self.lumiType = "NumberOfLumis" # Initialize a list of sites where the data is self.sites = [] # Initialize modifiers of the policy self.blockBlackListModifier = [] self.siteDB = SiteDB()
def __init__(self, url, **contact): # instantiate dbs api object try: self.dbs = DbsApi(url, **contact) except dbsClientException as ex: msg = "Error in DBSReader with DbsApi\n" msg += "%s\n" % formatEx3(ex) raise DBSReaderError(msg) # connection to PhEDEx (Use default endpoint url) self.phedex = PhEDEx(responseType="json") self.siteDB = SiteDB()
class MakeFakeFileSet(TaskAction): """This is needed to make WMCore.JobSplitting lib working... do not like very much. Given that all is fake here I am quite sure we only need total number of events said that I set all the other parmas to dummy values. We may want to set them in the future""" def __init__(self, *args, **kwargs): TaskAction.__init__(self, *args, **kwargs) self.sbj = SiteDBJSON({"key":self.config.TaskWorker.cmskey, "cert":self.config.TaskWorker.cmscert}) def getListOfSites(self): """ Get the list of sites to use for PrivateMC workflows. For the moment we are filtering out T1_ since they are precious resources and don't want to overtake production (WMAgent) jobs there. In the future we would like to take this list from the SSB. """ sites = self.sbj.getAllCMSNames() filteredSites = [site for site in sites if not site.startswith("T1_")] return filteredSites #even though args is not used we call "handler.actionWork(args, kwargs)" in Handler def execute(self, *args, **kwargs): #pylint: disable=unused-argument totalevents = kwargs['task']['tm_totalunits'] firstEvent = 1 lastEvent = totalevents firstLumi = 1 lastLumi = 10 # Set a default of 100 events per lumi. This is set as a task # property, as the splitting considers it independently of the file # information provided by the fake dataset. if not kwargs['task']['tm_events_per_lumi']: kwargs['task']['tm_events_per_lumi'] = 100 #MC comes with only one MCFakeFile singleMCFileset = Fileset(name = "MCFakeFileSet") newFile = File("MCFakeFile", size = 1000, events = totalevents) newFile.setLocation(self.getListOfSites()) newFile.addRun(Run(1, *range(firstLumi, lastLumi + 1))) newFile["block"] = 'MCFakeBlock' newFile["first_event"] = firstEvent newFile["last_event"] = lastEvent singleMCFileset.addFile(newFile) return Result(task=kwargs['task'], result=singleMCFileset)
def execute(self, *args, **kwargs): self.logger.info("Data discovery and splitting for %s using user-provided files" % kwargs['task']['tm_taskname']) userfiles = kwargs['task']['tm_user_files'] splitting = kwargs['task']['tm_split_algo'] total_units = kwargs['task']['tm_totalunits'] if not userfiles or splitting != 'FileBased': if not userfiles: msg = "No files specified to process for task %s." % kwargs['task']['tm_taskname'] if splitting != 'FileBased': msg = "Data.splitting must be set to 'FileBased' when using a custom set of files." raise TaskWorkerException(msg) if hasattr(self.config.Sites, 'available'): locations = self.config.Sites.available else: sbj = SiteDBJSON({"key":self.config.TaskWorker.cmskey, "cert":self.config.TaskWorker.cmscert}) locations = sbj.getAllCMSNames() userFileset = Fileset(name = kwargs['task']['tm_taskname']) self.logger.info("There are %d files specified by the user." % len(userfiles)) if total_units > 0: self.logger.info("Will run over the first %d files." % total_units) file_counter = 0 for userfile, idx in zip(userfiles, range(len(userfiles))): newFile = File(userfile, size = 1000, events = 1) newFile.setLocation(locations) newFile.addRun(Run(1, idx)) newFile["block"] = 'UserFilesFakeBlock' newFile["first_event"] = 1 newFile["last_event"] = 2 userFileset.addFile(newFile) file_counter += 1 if total_units > 0 and file_counter >= total_units: break return Result(task = kwargs['task'], result = userFileset)
def wrapped_func(*args, **kwargs): if 'sitedb' in services and ( not args[0].allCMSNames.sites or (args[0].allCMSNames.cachetime + 1800 < mktime(gmtime()))): args[0].allCMSNames = CMSSitesCache( sites=SiteDBJSON(config={ 'cert': serverCert, 'key': serverKey }).getAllCMSNames(), cachetime=mktime(gmtime())) if 'phedex' in services and not args[0].phedex: args[0].phedex = PhEDEx(responseType='xml', dict=args[0].phedexargs) return func(*args, **kwargs)
def getSiteInfo(config): sitedb = SiteDBJSON() sites = sitedb.getAllCMSNames() sites.sort() wildcardKeys = getattr(config, 'wildcardKeys', { 'T1*': 'T1_*', 'T2*': 'T2_*', 'T3*': 'T3_*' }) wildcardSites = {} for k in wildcardKeys.keys(): reValue = wildcardKeys.get(k) found = False for s in sites: if re.search(reValue, s): found = True if not k in wildcardSites.keys(): wildcardSites[k] = [] wildcardSites[k].append(s) if found: sites.append(k) return sites
def formatOutput(self, task, requestname, datasetfiles, locations): """Receives as input the result of the data location discovery operations and fill up the WMCore objects.""" self.logger.debug(" Formatting data discovery output ") # TEMPORARY secmsmap = {} sbj = SiteDBJSON({"key":self.config.MyProxy.serverhostkey, "cert":self.config.MyProxy.serverhostcert}) wmfiles = [] lumicounter = evecounter = 0 for lfn, infos in datasetfiles.iteritems(): wmfile = File(lfn=lfn, events=infos['NumberOfEvents'], size=infos['Size'], checksums=infos['Checksums']) wmfile['block'] = infos['BlockName'] wmfile['locations'] = [] if locations.has_key(infos['BlockName']): for se in locations[infos['BlockName']]: if se not in secmsmap: self.logger.debug("Translating SE %s" %se) try: secmsmap[se] = sbj.seToCMSName(se) except KeyError, ke: self.logger.error("Impossible translating %s to a CMS name through SiteDB" %se) secmsmap[se] = '' if se in secmsmap: if type(secmsmap[se]) == list: wmfile['locations'].extend(secmsmap[se]) else: wmfile['locations'].append(secmsmap[se]) wmfile['workflow'] = requestname evecounter += infos['NumberOfEvents'] for run, lumis in infos['Lumis'].iteritems(): #self.logger.debug(' - adding run %d and lumis %s' %(run, lumis)) wmfile.addRun(Run(run, *lumis)) lumicounter += len(lumis) wmfiles.append(wmfile)
def setupMCWMSpec(self): """Setup MC workflow""" self.wmspec = self.createMCWMSpec() self.topLevelTask = getFirstTask(self.wmspec) self.inputDataset = self.topLevelTask.inputDataset() self.dataset = self.topLevelTask.getInputDatasetPath() self.dbs = None self.siteDB = SiteDBJSON() # add sites that would normally be added by operator via resource_control locationDAO = self.daoFactory(classname = "Locations.New") self.pnns = [] for site in ['T2_XX_SiteA', 'T2_XX_SiteB']: locationDAO.execute(siteName = site, pnn = self.siteDB.cmsNametoPhEDExNode(site)[0]) self.pnns.append(self.siteDB.cmsNametoPhEDExNode(site)[0])
def __init__(self, **args): StartPolicyInterface.__init__(self, **args) self.args.setdefault('SliceType', 'NumberOfFiles') self.args.setdefault('SliceSize', 1) self.args.setdefault('SplittingAlgo', 'LumiBased') self.lumiType = "NumberOfLumis" # Define how to handle the different splitting algorithms self.algoMapping = {'Harvest' : self.singleChunk, 'ParentlessMergeBySize' : self.singleChunk, 'MinFileBased' : self.singleChunk, 'LumiBased' : self.singleChunk, 'EventAwareLumiBased' : self.singleChunk, 'EventBased' : self.singleChunk} self.unsupportedAlgos = ['WMBSMergeBySize', 'SiblingProcessingBased'] self.defaultAlgo = self.fixedSizeChunk self.sites = [] self.siteDB = SiteDB()
def __init__(self, config): """ ___init___ Initialise class members """ BaseWorkerThread.__init__(self) self.phedex = PhEDEx({"endpoint": config.PhEDExInjector.phedexurl}, "json") self.siteDB = SiteDBJSON() self.dbsUrl = config.DBSInterface.globalDBSUrl self.group = getattr(config.PhEDExInjector, "group", "DataOps") # We will map node names to CMS names, that what the spec will have. # If a CMS name is associated to many PhEDEx node then choose the MSS option self.cmsToPhedexMap = {} self.phedexNodes = {"MSS": [], "Disk": []} # initialize the alert framework (if available - config.Alert present) # self.sendAlert will be then be available self.initAlerts(compName="PhEDExInjector")
def __init__(self, config, noSiteDB=False): """ _init_ Note, noSiteDB added for TESTING PURPOSED ONLY! """ WebAPI.__init__(self, config) ReqMgrAuth.assign_roles = config.security_roles # Take a guess self.templatedir = config.templates self.couchUrl = config.couchUrl self.configDBName = config.configDBName self.workloadDBName = config.workloadDBName self.configDBName = config.configDBName self.wmstatWriteURL = "%s/%s" % (self.couchUrl.rstrip("/"), config.wmstatDBName) if not noSiteDB: try: # Download a list of all the sites from SiteDB, uses v2 API. sitedb = SiteDBJSON() self.sites = sitedb.getAllCMSNames() self.sites.sort() self.phedexNodes = sitedb.getAllPhEDExNodeNames(excludeBuffer=True) self.phedexNodes.sort() except Exception as ex: msg = "ERROR: Could not retrieve sites from SiteDB, reason: %s" % ex cherrypy.log(msg) raise else: self.sites = [] # store result lfn base with all Physics group storeResultLFNBase = [ "/store/results/analysisops", "/store/results/b_physics", "/store/results/b_tagging", "/store/results/b2g", "/store/results/e_gamma_ecal", "/store/results/ewk", "/store/results/exotica", "/store/results/forward", "/store/results/heavy_ions", "/store/results/higgs", "/store/results/jets_met_hcal", "/store/results/muon", "/store/results/qcd", "/store/results/susy", "/store/results/tau_pflow", "/store/results/top", "/store/results/tracker_dpg", "/store/results/tracker_pog", "/store/results/trigger", ] # yet 0.9.40 had also another self.mergedLFNBases which was differentiating # list of mergedLFNBases based on type of request, removed and all bases # will be displayed regardless of the request type (discussion with Edgar) self.allMergedLFNBases = [ "/store/backfill/1", "/store/backfill/2", "/store/data", "/store/mc", "/store/generator", "/store/relval", "/store/hidata", "/store/himc", ] self.allMergedLFNBases.extend(storeResultLFNBase) self.allUnmergedLFNBases = ["/store/unmerged", "/store/temp"] self.yuiroot = config.yuiroot cherrypy.engine.subscribe("start_thread", self.initThread) self.wildcardKeys = getattr(config, "wildcardKeys", {"T1*": "T1_*", "T2*": "T2_*", "T3*": "T3_*"}) self.wildcardSites = {} Utilities.addSiteWildcards(self.wildcardKeys, self.sites, self.wildcardSites)
def setUp(self): """ Setup for unit tests """ self.mySiteDB = SiteDBJSON()
class SiteDBTest(unittest.TestCase): """ Unit tests for SiteScreening module """ def setUp(self): """ Setup for unit tests """ self.mySiteDB = SiteDBJSON() @attr("integration") def testCmsNametoPhEDExNode(self): """ Tests CmsNametoSE """ target = ['T1_US_FNAL_MSS','T1_US_FNAL_Buffer'] results = self.mySiteDB.cmsNametoPhEDExNode("T1_US_FNAL") self.failUnless(sorted(results) == sorted(target)) @attr("integration") def testPhEDExNodetocmsName(self): """ Tests PhEDExNodetocmsName """ result = self.mySiteDB.phEDExNodetocmsName('T1_US_FNAL_MSS') self.failUnless(result == 'T1_US_FNAL') result = self.mySiteDB.phEDExNodetocmsName('T1_US_FNAL_Buffer') self.failUnless(result == 'T1_US_FNAL') result = self.mySiteDB.phEDExNodetocmsName('T2_UK_London_IC') self.failUnless(result == 'T2_UK_London_IC') # don't check this anymore, see comment in phEDExNodetocmsName function #self.assertRaises(ValueError, self.mySiteDB.phEDExNodetocmsName, # 'T9_DOESNT_EXIST_Buffer') @attr("integration") def testCmsNametoSE(self): """ Tests CmsNametoSE """ target = ['srm-cms.gridpp.rl.ac.uk'] results = self.mySiteDB.cmsNametoSE("T1_UK_RAL") self.failUnless(sorted(results) == sorted(target)) @attr("integration") def testSEtoCmsName(self): """ Tests CmsNametoSE """ target = 'T1_US_FNAL' results = self.mySiteDB.seToCMSName("cmssrm.fnal.gov") self.failUnless(results == target) @attr("integration") def testCmsNametoCE(self): """ Tests CmsNametoCE """ target = ['lcgce06.gridpp.rl.ac.uk', 'lcgce07.gridpp.rl.ac.uk', 'lcgce09.gridpp.rl.ac.uk'] results = self.mySiteDB.cmsNametoCE("T1_UK_RAL") self.failUnless(sorted(results) == target) @attr("integration") def testJSONParser(self): """ Tests the JSON parser directly """ cmsName = "cmsgrid02.hep.wisc.edu" results = self.mySiteDB.getJSON("CEtoCMSName", file="CEtoCMSName", name=cmsName) self.failUnless(results['0']['name'] == "T2_US_Wisconsin") @attr("integration") def testDNUserName(self): """ Tests DN to Username lookup """ testDn = "/C=UK/O=eScience/OU=Bristol/L=IS/CN=simon metson" testUserName = "******" userName = self.mySiteDB.dnUserName(dn=testDn) self.failUnless(testUserName == userName) def testDNWithApostrophe(self): """ Tests a DN with an apostrophy in - will fail till SiteDB2 appears """ testDn = "/DC=ch/DC=cern/OU=Organic Units/OU=Users/CN=liviof/CN=472739/CN=Livio Fano'" testUserName = "******" userName = self.mySiteDB.dnUserName(dn=testDn) self.failUnless(testUserName == userName) @attr("integration") def testParsingJsonWithApostrophe(self): """ Tests parsing a DN json with an apostrophe in """ json = """{"dn": "/DC=ch/DC=cern/OU=Organic Units/OU=Users/CN=liviof/CN=472739/CN=Livio Fano'", "user": "******"}""" d = self.mySiteDB.parser.dictParser(json) self.assertEquals("/DC=ch/DC=cern/OU=Organic Units/OU=Users/CN=liviof/CN=472739/CN=Livio Fano'", d['dn']) @attr("integration") def testParsingInvalidJsonWithApostrophe(self): """ Tests parsing a DN invalid json (from sitedb v1) with an apostrophe in """ json = """{'dn': '/DC=ch/DC=cern/OU=Organic Units/OU=Users/CN=liviof/CN=472739/CN=Livio' Fano', 'user': '******'}""" d = self.mySiteDB.parser.dictParser(json) self.assertEquals("/DC=ch/DC=cern/OU=Organic Units/OU=Users/CN=liviof/CN=472739/CN=Livio' Fano", d['dn']) json = """{'dn': '/DC=ch/DC=cern/OU=Organic Units/OU=Users/CN=liviof/CN=472739/CN=Livio Fano'', 'user': '******'}""" d = self.mySiteDB.parser.dictParser(json) self.assertEquals("/DC=ch/DC=cern/OU=Organic Units/OU=Users/CN=liviof/CN=472739/CN=Livio Fano'", d['dn'])
def getFiles(datasetName, runBlacklist, runWhitelist, blockBlacklist, blockWhitelist, dbsUrl, fakeLocation=False): """ _getFiles_ Get the full information of a dataset including files, blocks, runs and lumis. Filter it using run and block white/black lists. It can receive and optional DBSUrl. """ dbsReader = DBSReader(endpoint = dbsUrl) phedexReader = PhEDEx() siteDB = SiteDBJSON() files = {} outputDatasetParts = datasetName.split("/") print "dataset",datasetName,"parts",outputDatasetParts try: #retrieve list of blocks from dataset blockNames = dbsReader.listFileBlocks(datasetName) except: raise RuntimeError("Dataset %s doesn't exist in given DBS instance" % datasetName) has_parent = False try: parents = dbsReader.listDatasetParents( datasetName ) if parents: has_parent=True except: print "Dataset with no parent" pass #traverse each block for blockName in blockNames: #deal with white and black list. if blockBlacklist and blockName in blockBlacklist: continue if blockWhitelist and blockName not in blockWhitelist: continue #existing blocks in phedex replicaInfo = phedexReader.getReplicaInfoForBlocks(block = blockName, subscribed = 'y') blockFiles = dbsReader.listFilesInBlock(blockName, lumis=True) #has_parent = dbsReader.listBlockParents(blockName) if has_parent: try: blockFileParents = dbsReader.listFilesInBlockWithParents(blockName) except: print blockName,"does not appear to have a parent, even though it should. Very suspicious" blockFileParents = dbsReader.listFilesInBlock(blockName) else: blockFileParents = dbsReader.listFilesInBlock(blockName) blockLocations = set() #load block locations if len(replicaInfo["phedex"]["block"]) > 0: for replica in replicaInfo["phedex"]["block"][0]["replica"]: node = replica["node"] cmsSites = siteDB.PNNtoPSN(node) if type(cmsSites) != list: cmsSites = [cmsSites] for cmsName in cmsSites: se = siteDB.cmsNametoSE(cmsName) blockLocations.update(se) logging.debug("cmsName %s mapped to se %s", cmsName, se) logging.debug("PhEDEx node %s, cmsSites %s, blockLocations %s", node, cmsSites, blockLocations) # We cannot upload docs without location, so force it in case it's empty if not blockLocations: if fakeLocation: logging.info("\t\t %s\tno location", blockName) blockLocations.update([u'cmssrmdisk.fnal.gov', u'srm-eoscms.cern.ch']) elif not has_parent: ## this should be the source logging.info("Blockname: %s\tno location, ABORT", blockName) sys.exit(1) logging.info("Blockname: %s\tLocations: %s", blockName, blockLocations) #for each file on the block for blockFile in blockFiles: parentLFNs = [] #populate parent information if blockFileParents and "ParentList" in blockFileParents[0]: for fileParent in blockFileParents[0]["ParentList"]: parentLFNs.append(fileParent["LogicalFileName"]) ## remove when https://github.com/dmwm/WMCore/issues/7128 gets fixed #elif not 'RAW' in blockName: # print "no parent info" runInfo = {} #Lumis not included in file for lumiSection in blockFile["LumiList"]: if runBlacklist and lumiSection["RunNumber"] in runBlacklist: continue if runWhitelist and lumiSection["RunNumber"] not in runWhitelist: continue if lumiSection["RunNumber"] not in runInfo.keys(): runInfo[lumiSection["RunNumber"]] = [] runInfo[lumiSection["RunNumber"]].append(lumiSection["LumiSectionNumber"]) if len(runInfo.keys()) > 0: files[blockFile["LogicalFileName"]] = {"runs": runInfo, "events": blockFile["NumberOfEvents"], "size": blockFile["FileSize"], "locations": list(blockLocations), "parents": parentLFNs} return files
def __get_user(self): db = SiteDBJSON({'cacheduration': 24, 'logger': logging.getLogger("WMCore")}) return db.dnUserName(dn=self.__get_distinguished_name())