def wrapped_func(*args, **kwargs): if 'sitedb' in services and ( not args[0].allCMSNames.sites or (args[0].allCMSNames.cachetime + 1800 < mktime(gmtime()))): args[0].allCMSNames = CMSSitesCache( sites=SiteDBJSON(config={ 'cert': serverCert, 'key': serverKey }).getAllCMSNames(), cachetime=mktime(gmtime())) args[0].allPNNNames = CMSSitesCache( sites=SiteDBJSON(config={ 'cert': serverCert, 'key': serverKey }).getAllPhEDExNodeNames(), cachetime=mktime(gmtime())) if 'phedex' in services and not args[0].phedex: phdict = args[0].phedexargs phdict.update({'cert': serverCert, 'key': serverKey}) args[0].phedex = PhEDEx(responseType='xml', dict=phdict) if 'centralconfig' in services and ( not args[0].centralcfg.centralconfig or (args[0].centralcfg.cachetime + 1800 < mktime(gmtime()))): args[0].centralcfg = ConfigCache( centralconfig=getCentralConfig( extconfigurl=args[0].config.extconfigurl, mode=args[0].config.mode), cachetime=mktime(gmtime())) if 'servercert' in services: args[0].serverCert = serverCert args[0].serverKey = serverKey return func(*args, **kwargs)
def testEmulator(self): EmulatorHelper.setEmulators(True, True, True, True) self.assertEqual(PhEDEx().wrapped.__module__, 'WMQuality.Emulators.PhEDExClient.PhEDEx') self.assertEqual(DBSReader(self.globalDBS).wrapped.__module__, 'WMQuality.Emulators.DBSClient.DBSReader') self.assertEqual(SiteDBJSON().wrapped.__module__, 'WMQuality.Emulators.SiteDBClient.SiteDB') self.assertEqual(RequestManager().wrapped.__module__, 'WMQuality.Emulators.RequestManagerClient.RequestManager') self.assertEqual(PhEDEx().__class__.__name__, 'PhEDEx') self.assertEqual(DBSReader(self.globalDBS).__class__.__name__, 'DBSReader') self.assertEqual(SiteDBJSON().__class__.__name__, 'SiteDBJSON') self.assertEqual(RequestManager().__class__.__name__, 'RequestManager') EmulatorHelper.resetEmulators() self.assertEqual(PhEDEx().wrapped.__module__, 'WMCore.Services.PhEDEx.PhEDEx') self.assertEqual(DBSReader(self.globalDBS).wrapped.__module__, 'WMCore.Services.DBS.DBS2Reader') self.assertEqual(SiteDBJSON().wrapped.__module__, 'WMCore.Services.SiteDB.SiteDB') self.assertEqual(RequestManager().wrapped.__module__, 'WMCore.Services.RequestManager.RequestManager') self.assertEqual(PhEDEx().__class__.__name__, 'PhEDEx') self.assertEqual(DBSReader(self.globalDBS).__class__.__name__, 'DBS2Reader') self.assertEqual(SiteDBJSON().__class__.__name__, 'SiteDBJSON') self.assertEqual(RequestManager().__class__.__name__, 'RequestManager')
def execute(self, *args, **kwargs): totalevents = kwargs['task']['tm_totalunits'] firstEvent = 1 lastEvent = totalevents firstLumi = 1 lastLumi = 10 # Set a default of 100 events per lumi. This is set as a task # property, as the splitting considers it independently of the file # information provided by the fake dataset. if not kwargs['task']['tm_events_per_lumi']: kwargs['task']['tm_events_per_lumi'] = 100 #MC comes with only one MCFakeFile singleMCFileset = Fileset(name = "MCFakeFileSet") newFile = File("MCFakeFile", size = 1000, events = totalevents) if hasattr(self.config.Sites, 'available'): newFile.setLocation(self.config.Sites.available) else: sbj = SiteDBJSON({"key":self.config.TaskWorker.cmskey, "cert":self.config.TaskWorker.cmscert}) newFile.setLocation(sbj.getAllCMSNames()) newFile.addRun(Run(1, *range(firstLumi, lastLumi + 1))) newFile["block"] = 'MCFackBlock' newFile["first_event"] = firstEvent newFile["last_event"] = lastEvent singleMCFileset.addFile(newFile) return Result(task=kwargs['task'], result=singleMCFileset)
def __init__(self, config, noSiteDB=False): """ _init_ Note, noSiteDB added for TESTING PURPOSED ONLY! """ WebAPI.__init__(self, config) ReqMgrAuth.assign_roles = config.security_roles # Take a guess self.templatedir = config.templates self.couchUrl = config.couchUrl self.configDBName = config.configDBName self.workloadDBName = config.workloadDBName self.configDBName = config.configDBName self.wmstatWriteURL = "%s/%s" % (self.couchUrl.rstrip('/'), config.wmstatDBName) if not noSiteDB: try: # Download a list of all the sites from SiteDB, uses v2 API. sitedb = SiteDBJSON() self.sites = sitedb.getAllCMSNames() self.sites.sort() except Exception, ex: msg = "ERROR: Could not retrieve sites from SiteDB, reason: %s" % ex cherrypy.log(msg) raise
def __init__(self, config): """ ___init___ Initialise class members """ BaseWorkerThread.__init__(self) self.phedex = PhEDEx({"endpoint": config.PhEDExInjector.phedexurl}, "json") self.siteDB = SiteDBJSON() self.dbsUrl = config.DBSInterface.globalDBSUrl self.group = getattr(config.PhEDExInjector, "group", "DataOps") self.safeMode = getattr(config.PhEDExInjector, "safeOperationMode", False) self.replicaOnly = getattr(config.PhEDExInjector, "replicaOnly", False) # Subscribed state in the DBSBuffer table for datasets self.terminalSubscriptionState = 1 if self.safeMode: self.terminalSubscriptionState = 2 # We will map node names to CMS names, that what the spec will have. # If a CMS name is associated to many PhEDEx node then choose the MSS option self.cmsToPhedexMap = {} # initialize the alert framework (if available - config.Alert present) # self.sendAlert will be then be available self.initAlerts(compName="PhEDExInjector")
def setUp(self): """ Setup for unit tests """ super(SiteDBTest, self).setUp() EmulatorHelper.setEmulators(phedex=False, dbs=False, siteDB=False, requestMgr=True) self.mySiteDB = SiteDBJSON()
def getUsernameFromSiteDB(self): """ Retrieve the user's username as it appears in SiteDB. """ proxy = self.proxy() userdn = proxy.getSubjectFromCert(self.certLocation) sitedb = SiteDBJSON({"key": proxy.getProxyFilename(), "cert": proxy.getProxyFilename()}) username = sitedb.dnUserName(userdn) return username
def __init__(self,config): self.br=Browser() self.config = config # Initialise connections self.mySiteDB = SiteDBJSON() self.dbsPhys01 = DbsApi(url = dbs_base_url+"phys01/DBSReader/") self.dbsPhys02 = DbsApi(url = dbs_base_url+"phys02/DBSReader/") self.dbsPhys03 = DbsApi(url = dbs_base_url+"phys03/DBSReader/")
def sites(): "Return known CMS site list from SiteDB" try: # Download a list of all the sites from SiteDB, uses v2 API. sitedb = SiteDBJSON() sites = sorted(sitedb.getAllCMSNames()) except Exception as exc: msg = "ERROR: Could not retrieve sites from SiteDB, reason: %s" % str( exc) raise Exception(msg) return sites
def __init__(self,config): self.br=Browser() self.config = config # Initialise connections self.mySiteDB = SiteDBJSON() self.phedex = PhEDEx({"endpoint":"https://cmsweb.cern.ch/phedex/datasvc/json/prod/"}, "json") self.dbsPhys01 = DbsApi(url = dbs_base_url+"phys01/DBSReader/") self.dbsPhys02 = DbsApi(url = dbs_base_url+"phys02/DBSReader/") self.dbsPhys03 = DbsApi(url = dbs_base_url+"phys03/DBSReader/")
def gethnName_urlenc(self,dn): from WMCore.Services.SiteDB.SiteDB import SiteDBJSON hnUserName = None userdn = dn mySiteDB = SiteDBJSON() status = 0 try: hnUserName = mySiteDB.dnUserName(dn=userdn) except: status = 1 return status,hnUserName
def execute(self, *args, **kwargs): self.logger.info( "Data discovery and splitting for %s using user-provided files" % kwargs['task']['tm_taskname']) userfiles = kwargs['task']['tm_arguments'].get('userfiles') splitting = kwargs['task']['tm_split_algo'] total_units = kwargs['task']['tm_totalunits'] if not userfiles or splitting != 'FileBased': if not userfiles: msg = "No files specified to process for task %s." % kwargs[ 'task']['tm_taskname'] if splitting != 'FileBased': msg = "Data.splitting must be set to 'FileBased' when using a custom set of files." self.logger.error("Setting %s as failed: %s" % (kwargs['task']['tm_taskname'], msg)) configreq = { 'workflow': kwargs['task']['tm_taskname'], 'status': "FAILED", 'subresource': 'failure', 'failure': b64encode(msg) } self.server.post(self.resturi, data=urllib.urlencode(configreq)) raise StopHandler(msg) if hasattr(self.config.Sites, 'available'): locations = self.config.Sites.available else: sbj = SiteDBJSON({ "key": self.config.TaskWorker.cmskey, "cert": self.config.TaskWorker.cmscert }) locations = sbj.getAllCMSNames() userFileset = Fileset(name=kwargs['task']['tm_taskname']) self.logger.info("There are %d files specified by the user." % len(userfiles)) if total_units > 0: self.logger.info("Will run over the first %d files." % total_units) file_counter = 0 for userfile, idx in zip(userfiles, range(len(userfiles))): newFile = File(userfile, size=1000, events=1) newFile.setLocation(locations) newFile.addRun(Run(1, idx)) newFile["block"] = 'UserFilesFakeBlock' newFile["first_event"] = 1 newFile["last_event"] = 2 userFileset.addFile(newFile) file_counter += 1 if total_units > 0 and file_counter >= total_units: break return Result(task=kwargs['task'], result=userFileset)
def getUsernameFromSiteDB(self): """ Return a the client hypernews name """ proxy = self.proxy() userdn = proxy.getSubjectFromCert(self.certLocation) sitedb = SiteDBJSON({ "key": proxy.getProxyFilename(), "cert": proxy.getProxyFilename() }) username = sitedb.dnUserName(userdn) return username
def pnns(): """ Returns all PhEDEx node names, excluding Buffer endpoints """ try: sitedb = SiteDBJSON() pnns = sorted(sitedb.getAllPhEDExNodeNames(excludeBuffer=True)) except Exception as exc: msg = "ERROR: Could not retrieve PNNs from SiteDB, reason: %s" % str( exc) raise Exception(msg) return pnns
def insertAllSEs(self, siteName, pendingSlots=0, runningSlots=0, ceName=None, plugin=None, taskList=[]): """ _insertAllSEs_ Insert all SEs into WMBS ResourceControl This uses the Services.SiteDB to insert all SEs under a common CE. It is meant to be used with WMS submission. Sites will be named siteName_SEName It expects a taskList of the following form: [{'taskType': taskType, 'priority': priority, 'maxSlots': maxSlots, 'pendingSlots' : pendingSlots}] for each entry in the taskList, a threshold is inserted into the database for EVERY SE """ from WMCore.Services.SiteDB.SiteDB import SiteDBJSON siteDB = SiteDBJSON() cmsNames = siteDB.getAllCMSNames() for cmsName in cmsNames: seNames = siteDB.cmsNametoSE(cmsName) for SE in seNames: sName = '%s_%s' % (siteName, SE) self.insertSite(siteName=sName, pendingSlots=pendingSlots, seName=SE, runningSlots=runningSlots, ceName=ceName, cmsName=cmsName, plugin=plugin) for task in taskList: if not task.has_key('maxSlots') or not task.has_key('taskType') \ or not task.has_key('priority'): msg = "Incomplete task in taskList for ResourceControl.insertAllSEs\n" msg += task raise ResourceControlException(msg) self.insertThreshold(siteName=sName, taskType=task['taskType'], maxSlots=task['maxSlots'], pendingSlots=task['pendingSlots'], priority=task['priority']) return
def sites(): "Return known CMS site list from SiteDB" try: # Download a list of all the sites from SiteDB, uses v2 API. if os.getenv("WMAGENT_USE_CRIC", False) or os.getenv("WMCORE_USE_CRIC", False): cric = CRIC() site_list = sorted(cric.getAllPSNs()) else: sitedb = SiteDBJSON() site_list = sorted(sitedb.getAllCMSNames()) except Exception as exc: msg = "ERROR: Could not retrieve sites from SiteDB, reason: %s" % str(exc) raise Exception(msg) return site_list
def wrapped_func(*args, **kwargs): if 'sitedb' in services and ( not args[0].allCMSNames.sites or (args[0].allCMSNames.cachetime + 1800 < mktime(gmtime()))): args[0].allCMSNames = CMSSitesCache( sites=SiteDBJSON(config={ 'cert': serverCert, 'key': serverKey }).getAllCMSNames(), cachetime=mktime(gmtime())) if 'phedex' in services and not args[0].phedex: args[0].phedex = PhEDEx(responseType='xml', dict=args[0].phedexargs) return func(*args, **kwargs)
def pnns(): """ Returns all PhEDEx node names, excluding Buffer endpoints """ if os.getenv("WMAGENT_USE_CRIC", False) or os.getenv("WMCORE_USE_CRIC", False): sitedb = CRIC() # FIXME: rename it to cric else: sitedb = SiteDBJSON() try: pnn_list = sorted(sitedb.getAllPhEDExNodeNames(excludeBuffer=True)) except Exception as exc: msg = "ERROR: Could not retrieve PNNs from SiteDB, reason: %s" % str(exc) raise Exception(msg) return pnn_list
def getDNFromUserName(username, log, ckey=None, cert=None): """ Parse site string to know the fts server to use """ dn = '' site_db = SiteDBJSON(config={'key': ckey, 'cert': cert}) try: dn = site_db.userNameDn(username) except IndexError: log.error("user does not exist") return dn except RuntimeError: log.error("SiteDB URL cannot be accessed") return dn return dn
def setupMCWMSpec(self): """Setup MC workflow""" self.wmspec = self.createMCWMSpec() self.topLevelTask = getFirstTask(self.wmspec) self.inputDataset = self.topLevelTask.inputDataset() self.dataset = self.topLevelTask.getInputDatasetPath() self.dbs = None self.siteDB = SiteDBJSON() # add sites that would normally be added by operator via resource_control locationDAO = self.daoFactory(classname="Locations.New") self.pnns = [] for site in ['T2_XX_SiteA', 'T2_XX_SiteB']: locationDAO.execute(siteName=site, pnn=self.siteDB.cmsNametoPhEDExNode(site)[0]) self.pnns.append(self.siteDB.cmsNametoPhEDExNode(site)[0])
def execute(self, *args, **kwargs): self.logger.info( "Data discovery and splitting for %s using user-provided files" % kwargs['task']['tm_taskname']) userfiles = kwargs['task']['tm_user_files'] splitting = kwargs['task']['tm_split_algo'] total_units = kwargs['task']['tm_totalunits'] if not userfiles or splitting != 'FileBased': if not userfiles: msg = "No files specified to process for task %s." % kwargs[ 'task']['tm_taskname'] if splitting != 'FileBased': msg = "Data.splitting must be set to 'FileBased' when using a custom set of files." raise TaskWorkerException(msg) if hasattr(self.config.Sites, 'available'): locations = self.config.Sites.available else: sbj = SiteDBJSON({ "key": self.config.TaskWorker.cmskey, "cert": self.config.TaskWorker.cmscert }) locations = sbj.getAllCMSNames() userFileset = Fileset(name=kwargs['task']['tm_taskname']) self.logger.info("There are %d files specified by the user." % len(userfiles)) if total_units > 0: self.logger.info("Will run over the first %d files." % total_units) file_counter = 0 for userfile, idx in zip(userfiles, range(len(userfiles))): newFile = File(userfile, size=1000, events=1) newFile.setLocation(locations) newFile.addRun(Run(1, idx)) newFile["block"] = 'UserFilesFakeBlock' newFile["first_event"] = 1 newFile["last_event"] = 2 userFileset.addFile(newFile) file_counter += 1 if total_units > 0 and file_counter >= total_units: break return Result(task=kwargs['task'], result=userFileset)
def __init__(self, config): """ ___init___ Initialise class members """ BaseWorkerThread.__init__(self) self.phedex = PhEDEx({"endpoint": config.PhEDExInjector.phedexurl}, "json") self.siteDB = SiteDBJSON() self.dbsUrl = config.DBSInterface.globalDBSUrl self.group = getattr(config.PhEDExInjector, "group", "DataOps") # We will map node names to CMS names, that what the spec will have. # If a CMS name is associated to many PhEDEx node then choose the MSS option self.cmsToPhedexMap = {} self.phedexNodes = {'MSS':[], 'Disk':[]} # initialize the alert framework (if available - config.Alert present) # self.sendAlert will be then be available self.initAlerts(compName = "PhEDExInjector")
def getSiteInfo(config): sitedb = SiteDBJSON() sites = sitedb.getAllCMSNames() sites.sort() wildcardKeys = getattr(config, 'wildcardKeys', { 'T1*': 'T1_*', 'T2*': 'T2_*', 'T3*': 'T3_*' }) wildcardSites = {} for k in wildcardKeys.keys(): reValue = wildcardKeys.get(k) found = False for s in sites: if re.search(reValue, s): found = True if not k in wildcardSites.keys(): wildcardSites[k] = [] wildcardSites[k].append(s) if found: sites.append(k) return sites
def formatOutput(self, task, requestname, datasetfiles, locations): """Receives as input the result of the data location discovery operations and fill up the WMCore objects.""" self.logger.debug(" Formatting data discovery output ") # TEMPORARY secmsmap = {} sbj = SiteDBJSON({"key":self.config.MyProxy.serverhostkey, "cert":self.config.MyProxy.serverhostcert}) wmfiles = [] lumicounter = evecounter = 0 for lfn, infos in datasetfiles.iteritems(): wmfile = File(lfn=lfn, events=infos['NumberOfEvents'], size=infos['Size'], checksums=infos['Checksums']) wmfile['block'] = infos['BlockName'] wmfile['locations'] = [] if locations.has_key(infos['BlockName']): for se in locations[infos['BlockName']]: if se not in secmsmap: self.logger.debug("Translating SE %s" %se) try: secmsmap[se] = sbj.seToCMSName(se) except KeyError, ke: self.logger.error("Impossible translating %s to a CMS name through SiteDB" %se) secmsmap[se] = '' if se in secmsmap: if type(secmsmap[se]) == list: wmfile['locations'].extend(secmsmap[se]) else: wmfile['locations'].append(secmsmap[se]) wmfile['workflow'] = requestname evecounter += infos['NumberOfEvents'] for run, lumis in infos['Lumis'].iteritems(): #self.logger.debug(' - adding run %d and lumis %s' %(run, lumis)) wmfile.addRun(Run(run, *lumis)) lumicounter += len(lumis) wmfiles.append(wmfile)
def __init__(self, whiteList=None, blackList=None, logger=None, mapper=None, dict={}): self.logger = logger self.kind = 'se' self.mapper = mapper self.siteDBAPI = SiteDBJSON(dict) if type(whiteList) == type("string"): if whiteList: whiteList = whiteList.split(',') else: whiteList = [] elif type(whiteList) == type([]): pass else: whiteList = [] if type(blackList) == type("string"): if blackList: blackList = blackList.split(',') else: blackList = [] elif type(blackList) == type([]): pass else: blackList = [] logger.debug('Input whitelist: %s' % ', '.join(whiteList)) logger.debug('Input blacklist: %s' % ', '.join(blackList)) self.blacklist = set(self.expandList(blackList)) self.whitelist = set(self.expandList(whiteList)) logger.debug('Converted whitelist: %s' % ', '.join(self.whitelist)) logger.debug('Converted blacklist: %s' % ', '.join(self.blacklist))
def __init__(self, whiteList=None, blackList=None, logger=None, dict={}): if logger: dict['logger'] = logger self.siteDBAPI = SiteDBJSON(dict) super(CEBlackWhiteListParser, self).__init__(whiteList, blackList, logger, self.siteDBAPI.cmsNametoCE, dict)
def getFiles(datasetName, runBlacklist, runWhitelist, blockBlacklist, blockWhitelist, dbsUrl, fakeLocation=False): """ _getFiles_ Get the full information of a dataset including files, blocks, runs and lumis. Filter it using run and block white/black lists. It can receive and optional DBSUrl. """ dbsReader = DBSReader(endpoint=dbsUrl) phedexReader = PhEDEx() siteDB = SiteDBJSON() class BlockBuster(threading.Thread): def __init__(self, **args): threading.Thread.__init__(self) for k, v in args.items(): setattr(self, k, v) self.major_failure = False def run(self): self.files = {} logging = self.l has_parent = self.hp fakeLocation = self.fl blockName = self.bn blockBlacklist = self.bbl blockWhitelist = self.bwl if blockBlacklist and blockName in blockBlacklist: return if blockWhitelist and blockName not in blockWhitelist: return phedexReader = PhEDEx() siteDB = SiteDBJSON() dbsReader = DBSReader(endpoint=self.dbs) replicaInfo = phedexReader.getReplicaInfoForBlocks(block=blockName, subscribed='y') blockFiles = dbsReader.listFilesInBlock(blockName, lumis=True) if has_parent: try: blockFileParents = dbsReader.listFilesInBlockWithParents( blockName) except: print blockName, "does not appear to have a parent, even though it should. Very suspicious" blockFileParents = dbsReader.listFilesInBlock(blockName) else: blockFileParents = dbsReader.listFilesInBlock(blockName) blockLocations = set() # load block locations if len(replicaInfo["phedex"]["block"]) > 0: for replica in replicaInfo["phedex"]["block"][0]["replica"]: PNN = replica["node"] PSNs = siteDB.PNNtoPSN(PNN) blockLocations.add(PNN) #logging.debug("PhEDEx Node Name: %s\tPSNs: %s", PNN, PSNs) # We cannot upload docs without location, so force it in case it's empty if not blockLocations: if fakeLocation: #logging.info("\t\t %s\tno location", blockName) blockLocations.update([u'T1_US_FNAL_Disk', u'T2_CH_CERN']) elif not has_parent: ## this should be the source logging.info("Blockname: %s\tno location, ABORT", blockName) self.major_failure = True #sys.exit(1) #logging.info("Blockname: %s\tLocations: %s", blockName, blockLocations) # for each file on the block for blockFile in blockFiles: parentLFNs = [] # populate parent information if blockFileParents and "ParentList" in blockFileParents[0]: for fileParent in blockFileParents[0]["ParentList"]: parentLFNs.append(fileParent["LogicalFileName"]) runInfo = {} # Lumis not included in file for lumiSection in blockFile["LumiList"]: if runBlacklist and lumiSection[ "RunNumber"] in runBlacklist: continue if runWhitelist and lumiSection[ "RunNumber"] not in runWhitelist: continue if lumiSection["RunNumber"] not in runInfo.keys(): runInfo[lumiSection["RunNumber"]] = [] runInfo[lumiSection["RunNumber"]].append( lumiSection["LumiSectionNumber"]) if len(runInfo.keys()) > 0: self.files[blockFile["LogicalFileName"]] = { "runs": runInfo, "events": blockFile["NumberOfEvents"], "size": blockFile["FileSize"], "locations": list(blockLocations), "parents": parentLFNs } return files = {} outputDatasetParts = datasetName.split("/") print "dataset", datasetName, "parts", outputDatasetParts try: # retrieve list of blocks from dataset blockNames = dbsReader.listFileBlocks(datasetName) except: raise RuntimeError("Dataset %s doesn't exist in given DBS instance" % datasetName) has_parent = False try: parents = dbsReader.listDatasetParents(datasetName) if parents: has_parent = True except: print "Dataset with no parent" pass bthreads = [] # traverse each block for blockName in blockNames: bthreads.append( BlockBuster(bn=blockName, hp=has_parent, fl=fakeLocation, bbl=blockBlacklist, bwl=blockWhitelist, l=logging, dbs=dbsUrl)) print len(bthreads), "block query created" bthreads = ThreadBuster(bthreads, 40, 2., verbose=False) for t in bthreads: if t.major_failure: print "There was a major failure in processing block files" sys.exit(1) files.update(t.files) print len(files) return files
def run(self): self.files = {} logging = self.l has_parent = self.hp fakeLocation = self.fl blockName = self.bn blockBlacklist = self.bbl blockWhitelist = self.bwl if blockBlacklist and blockName in blockBlacklist: return if blockWhitelist and blockName not in blockWhitelist: return phedexReader = PhEDEx() siteDB = SiteDBJSON() dbsReader = DBSReader(endpoint=self.dbs) replicaInfo = phedexReader.getReplicaInfoForBlocks(block=blockName, subscribed='y') blockFiles = dbsReader.listFilesInBlock(blockName, lumis=True) if has_parent: try: blockFileParents = dbsReader.listFilesInBlockWithParents( blockName) except: print blockName, "does not appear to have a parent, even though it should. Very suspicious" blockFileParents = dbsReader.listFilesInBlock(blockName) else: blockFileParents = dbsReader.listFilesInBlock(blockName) blockLocations = set() # load block locations if len(replicaInfo["phedex"]["block"]) > 0: for replica in replicaInfo["phedex"]["block"][0]["replica"]: PNN = replica["node"] PSNs = siteDB.PNNtoPSN(PNN) blockLocations.add(PNN) #logging.debug("PhEDEx Node Name: %s\tPSNs: %s", PNN, PSNs) # We cannot upload docs without location, so force it in case it's empty if not blockLocations: if fakeLocation: #logging.info("\t\t %s\tno location", blockName) blockLocations.update([u'T1_US_FNAL_Disk', u'T2_CH_CERN']) elif not has_parent: ## this should be the source logging.info("Blockname: %s\tno location, ABORT", blockName) self.major_failure = True #sys.exit(1) #logging.info("Blockname: %s\tLocations: %s", blockName, blockLocations) # for each file on the block for blockFile in blockFiles: parentLFNs = [] # populate parent information if blockFileParents and "ParentList" in blockFileParents[0]: for fileParent in blockFileParents[0]["ParentList"]: parentLFNs.append(fileParent["LogicalFileName"]) runInfo = {} # Lumis not included in file for lumiSection in blockFile["LumiList"]: if runBlacklist and lumiSection[ "RunNumber"] in runBlacklist: continue if runWhitelist and lumiSection[ "RunNumber"] not in runWhitelist: continue if lumiSection["RunNumber"] not in runInfo.keys(): runInfo[lumiSection["RunNumber"]] = [] runInfo[lumiSection["RunNumber"]].append( lumiSection["LumiSectionNumber"]) if len(runInfo.keys()) > 0: self.files[blockFile["LogicalFileName"]] = { "runs": runInfo, "events": blockFile["NumberOfEvents"], "size": blockFile["FileSize"], "locations": list(blockLocations), "parents": parentLFNs } return
def formatOutput(self, task, requestname, datasetfiles, locations, tempDir): """ Receives as input the result of the data location discovery operations and fill up the WMCore objects. """ self.logger.debug(" Formatting data discovery output ") # TEMPORARY pnn_psn_map = {} sbj = SiteDBJSON({ "key": self.config.TaskWorker.cmskey, "cert": self.config.TaskWorker.cmscert }) wmfiles = [] event_counter = 0 lumi_counter = 0 uniquelumis = set() datasetLumis = {} ## Loop over the sorted list of files. for lfn, infos in datasetfiles.iteritems(): ## Skip the file if the block has not been found or has no locations. if not infos['BlockName'] in locations or not locations[ infos['BlockName']]: self.logger.warning( "Skipping %s because its block (%s) has no locations" % (lfn, infos['BlockName'])) continue ## Skip the file if it is not in VALID state. if not infos.get('ValidFile', True): self.logger.warning("Skipping invalid file %s" % lfn) continue if task['tm_use_parent'] == 1 and len(infos['Parents']) == 0: raise TaskWorkerException( "The CRAB3 server backend refuses to submit jobs to the Grid scheduler\n" + "because you specified useParents=True but some your files have no" + "parents.\nExample: " + lfn) ## Create a WMCore File object. try: size = infos['FileSize'] checksums = { 'Checksum': infos['Checksum'], 'Adler32': infos['Adler32'], 'Md5': infos['Md5'] } except: #This is so that the task worker does not crash if an old version of WMCore is used (the interface of an API suddenly changed). # We may want to remove the try/except and the following two lines eventually, but keeping them for the moment so other devels won't be affected #See this WMCore commit: https://github.com/dmwm/WMCore/commit/2afc01ae571390f5fa009dd258be757adac89c28#diff-374b7a6640288184175057234e393e1cL204 size = infos['Size'] checksums = infos['Checksums'] wmfile = File(lfn=lfn, events=infos['NumberOfEvents'], size=size, checksums=checksums, parents=infos['Parents']) wmfile['block'] = infos['BlockName'] wmfile['locations'] = [] for pnn in locations[infos['BlockName']]: if pnn and pnn not in pnn_psn_map: self.logger.debug("Translating PNN %s" % pnn) try: pnn_psn_map[pnn] = sbj.PNNtoPSN(pnn) except KeyError: self.logger.error( "Impossible translating %s to a CMS name through SiteDB" % pnn) pnn_psn_map[pnn] = '' except httplib.HTTPException as ex: self.logger.error("Couldn't map SE to site: %s" % pnn) print("Couldn't map SE to site: %s" % pnn) print("got problem: %s" % ex) print("got another problem: %s" % ex.__dict__) if pnn and pnn in pnn_psn_map: if isinstance(pnn_psn_map[pnn], list): wmfile['locations'].extend(pnn_psn_map[pnn]) else: wmfile['locations'].append(pnn_psn_map[pnn]) wmfile['workflow'] = requestname event_counter += infos['NumberOfEvents'] for run, lumis in infos['Lumis'].iteritems(): datasetLumis.setdefault(run, []).extend(lumis) wmfile.addRun(Run(run, *lumis)) for lumi in lumis: uniquelumis.add((run, lumi)) lumi_counter += len(lumis) wmfiles.append(wmfile) uniquelumis = len(uniquelumis) self.logger.debug('Tot events found: %d' % event_counter) self.logger.debug('Tot lumis found: %d' % uniquelumis) self.logger.debug('Duplicate lumis found: %d' % (lumi_counter - uniquelumis)) self.logger.debug('Tot files found: %d' % len(wmfiles)) self.logger.debug( "Starting to create compact lumilists for input dataset") datasetLumiList = LumiList(runsAndLumis=datasetLumis) datasetLumis = datasetLumiList.getCompactList() datasetDuplicateLumis = datasetLumiList.getDuplicates().getCompactList( ) self.logger.debug( "Finished to create compact lumilists for input dataset") with open(os.path.join(tempDir, "input_dataset_lumis.json"), "w") as fd: json.dump(datasetLumis, fd) with open(os.path.join(tempDir, "input_dataset_duplicate_lumis.json"), "w") as fd: json.dump(datasetDuplicateLumis, fd) return Result(task=task, result=Fileset(name='FilesToSplit', files=set(wmfiles)))
def __init__(self, *args, **kwargs): TaskAction.__init__(self, *args, **kwargs) self.sbj = SiteDBJSON({ "key": self.config.TaskWorker.cmskey, "cert": self.config.TaskWorker.cmscert })