def formatOutput(self, task, requestname, datasetfiles, locations, tempDir): """ Receives as input the result of the data location discovery operations and fill up the WMCore objects. """ self.logger.debug(" Formatting data discovery output ") # TEMPORARY pnn_psn_map = {} sbj = SiteDBJSON({ "key": self.config.TaskWorker.cmskey, "cert": self.config.TaskWorker.cmscert }) wmfiles = [] event_counter = 0 lumi_counter = 0 uniquelumis = set() datasetLumis = {} ## Loop over the sorted list of files. for lfn, infos in datasetfiles.iteritems(): ## Skip the file if the block has not been found or has no locations. if not infos['BlockName'] in locations or not locations[ infos['BlockName']]: self.logger.warning( "Skipping %s because its block (%s) has no locations" % (lfn, infos['BlockName'])) continue ## Skip the file if it is not in VALID state. if not infos.get('ValidFile', True): self.logger.warning("Skipping invalid file %s" % lfn) continue if task['tm_use_parent'] == 1 and len(infos['Parents']) == 0: raise TaskWorkerException( "The CRAB3 server backend refuses to submit jobs to the Grid scheduler\n" + "because you specified useParents=True but some your files have no" + "parents.\nExample: " + lfn) ## Create a WMCore File object. try: size = infos['FileSize'] checksums = { 'Checksum': infos['Checksum'], 'Adler32': infos['Adler32'], 'Md5': infos['Md5'] } except: #This is so that the task worker does not crash if an old version of WMCore is used (the interface of an API suddenly changed). # We may want to remove the try/except and the following two lines eventually, but keeping them for the moment so other devels won't be affected #See this WMCore commit: https://github.com/dmwm/WMCore/commit/2afc01ae571390f5fa009dd258be757adac89c28#diff-374b7a6640288184175057234e393e1cL204 size = infos['Size'] checksums = infos['Checksums'] wmfile = File(lfn=lfn, events=infos['NumberOfEvents'], size=size, checksums=checksums, parents=infos['Parents']) wmfile['block'] = infos['BlockName'] wmfile['locations'] = [] for pnn in locations[infos['BlockName']]: if pnn and pnn not in pnn_psn_map: self.logger.debug("Translating PNN %s" % pnn) try: pnn_psn_map[pnn] = sbj.PNNtoPSN(pnn) except KeyError: self.logger.error( "Impossible translating %s to a CMS name through SiteDB" % pnn) pnn_psn_map[pnn] = '' except httplib.HTTPException as ex: self.logger.error("Couldn't map SE to site: %s" % pnn) print("Couldn't map SE to site: %s" % pnn) print("got problem: %s" % ex) print("got another problem: %s" % ex.__dict__) if pnn and pnn in pnn_psn_map: if isinstance(pnn_psn_map[pnn], list): wmfile['locations'].extend(pnn_psn_map[pnn]) else: wmfile['locations'].append(pnn_psn_map[pnn]) wmfile['workflow'] = requestname event_counter += infos['NumberOfEvents'] for run, lumis in infos['Lumis'].iteritems(): datasetLumis.setdefault(run, []).extend(lumis) wmfile.addRun(Run(run, *lumis)) for lumi in lumis: uniquelumis.add((run, lumi)) lumi_counter += len(lumis) wmfiles.append(wmfile) uniquelumis = len(uniquelumis) self.logger.debug('Tot events found: %d' % event_counter) self.logger.debug('Tot lumis found: %d' % uniquelumis) self.logger.debug('Duplicate lumis found: %d' % (lumi_counter - uniquelumis)) self.logger.debug('Tot files found: %d' % len(wmfiles)) self.logger.debug( "Starting to create compact lumilists for input dataset") datasetLumiList = LumiList(runsAndLumis=datasetLumis) datasetLumis = datasetLumiList.getCompactList() datasetDuplicateLumis = datasetLumiList.getDuplicates().getCompactList( ) self.logger.debug( "Finished to create compact lumilists for input dataset") with open(os.path.join(tempDir, "input_dataset_lumis.json"), "w") as fd: json.dump(datasetLumis, fd) with open(os.path.join(tempDir, "input_dataset_duplicate_lumis.json"), "w") as fd: json.dump(datasetDuplicateLumis, fd) return Result(task=task, result=Fileset(name='FilesToSplit', files=set(wmfiles)))
class SiteDBTest(unittest.TestCase): """ Unit tests for SiteScreening module """ def setUp(self): """ Setup for unit tests """ self.mySiteDB = SiteDBJSON() def testCmsNametoPhEDExNode(self): """ Tests CmsNametoSE """ target = ['T1_US_FNAL_MSS','T1_US_FNAL_Buffer'] results = self.mySiteDB.cmsNametoPhEDExNode("T1_US_FNAL") self.failUnless(sorted(results) == sorted(target)) target = ['T1_US_FNAL_Disk'] results = self.mySiteDB.cmsNametoPhEDExNode("T1_US_FNAL_Disk") self.failUnless(sorted(results) == sorted(target)) def testCmsNametoSE(self): """ Tests CmsNametoSE """ target = [u'srm-cms-disk.gridpp.rl.ac.uk', u'srm-cms.gridpp.rl.ac.uk'] results = self.mySiteDB.cmsNametoSE("T1_UK_RAL") self.failUnless(sorted(results) == sorted(target)) def testCmsNamePatterntoSE(self): """ Tests CmsNamePatterntoSE """ target = [u'srm-eoscms.cern.ch', u'srm-eoscms.cern.ch', u'storage01.lcg.cscs.ch', u'eoscmsftp.cern.ch'] results = self.mySiteDB.cmsNametoSE("%T2_CH") self.failUnless(sorted(results) == sorted(target)) def testSEtoCmsName(self): """ Tests CmsNametoSE """ target = [u'T1_US_FNAL'] results = self.mySiteDB.seToCMSName("cmssrm.fnal.gov") self.failUnless(results == target) target = sorted([u'T2_CH_CERN', u'T2_CH_CERN_HLT']) results = sorted(self.mySiteDB.seToCMSName("srm-eoscms.cern.ch")) self.failUnless(sorted(results) == sorted(target)) target = sorted([u'T0_CH_CERN', u'T1_CH_CERN']) results = sorted(self.mySiteDB.seToCMSName("srm-cms.cern.ch")) self.failUnless(sorted(results) == sorted(target)) target = sorted([u'T2_CH_CERN_AI']) results = sorted(self.mySiteDB.seToCMSName("eoscmsftp.cern.ch")) self.failUnless(sorted(results) == sorted(target)) def testDNUserName(self): """ Tests DN to Username lookup """ testDn = "/DC=ch/DC=cern/OU=Organic Units/OU=Users/CN=gutsche/CN=582680/CN=Oliver Gutsche" testUserName = "******" userName = self.mySiteDB.dnUserName(dn=testDn) self.failUnless(testUserName == userName) def testDNWithApostrophe(self): """ Tests a DN with an apostrophy in - will fail till SiteDB2 appears """ testDn = "/DC=ch/DC=cern/OU=Organic Units/OU=Users/CN=liviof/CN=472739/CN=Livio Fano'" testUserName = "******" userName = self.mySiteDB.dnUserName(dn=testDn) self.failUnless(testUserName == userName) def testSEFinder(self): """ _testSEFinder_ See if we can retrieve seNames from all sites """ seNames = self.mySiteDB.getAllSENames() self.assertTrue(len(seNames) > 1) self.assertTrue('cmssrm.fnal.gov' in seNames) return def testPNNtoPSN(self): """ _testPNNtoPSN_ Test converting PhEDEx Node Name to Processing Site Name """ result = self.mySiteDB.PNNtoPSN('T1_US_FNAL_Disk') self.failUnless(result == ['T1_US_FNAL']) result = self.mySiteDB.PNNtoPSN('T1_US_FNAL_Tape') self.failUnless(result == []) result = self.mySiteDB.PNNtoPSN('T2_UK_London_IC') self.failUnless(result == ['T2_UK_London_IC']) return def testCMSNametoList(self): result = self.mySiteDB.cmsNametoList("T1_US*", "SE") self.failUnless(result == [u'cmssrm.fnal.gov', u'cmssrmdisk.fnal.gov'])
def run(self): self.files = {} logging = self.l has_parent = self.hp fakeLocation = self.fl blockName = self.bn blockBlacklist = self.bbl blockWhitelist = self.bwl if blockBlacklist and blockName in blockBlacklist: return if blockWhitelist and blockName not in blockWhitelist: return phedexReader = PhEDEx() siteDB = SiteDBJSON() dbsReader = DBSReader(endpoint=self.dbs) replicaInfo = phedexReader.getReplicaInfoForBlocks(block=blockName, subscribed='y') blockFiles = dbsReader.listFilesInBlock(blockName, lumis=True) if has_parent: try: blockFileParents = dbsReader.listFilesInBlockWithParents( blockName) except: print blockName, "does not appear to have a parent, even though it should. Very suspicious" blockFileParents = dbsReader.listFilesInBlock(blockName) else: blockFileParents = dbsReader.listFilesInBlock(blockName) blockLocations = set() # load block locations if len(replicaInfo["phedex"]["block"]) > 0: for replica in replicaInfo["phedex"]["block"][0]["replica"]: PNN = replica["node"] PSNs = siteDB.PNNtoPSN(PNN) blockLocations.add(PNN) #logging.debug("PhEDEx Node Name: %s\tPSNs: %s", PNN, PSNs) # We cannot upload docs without location, so force it in case it's empty if not blockLocations: if fakeLocation: #logging.info("\t\t %s\tno location", blockName) blockLocations.update([u'T1_US_FNAL_Disk', u'T2_CH_CERN']) elif not has_parent: ## this should be the source logging.info("Blockname: %s\tno location, ABORT", blockName) self.major_failure = True #sys.exit(1) #logging.info("Blockname: %s\tLocations: %s", blockName, blockLocations) # for each file on the block for blockFile in blockFiles: parentLFNs = [] # populate parent information if blockFileParents and "ParentList" in blockFileParents[0]: for fileParent in blockFileParents[0]["ParentList"]: parentLFNs.append(fileParent["LogicalFileName"]) runInfo = {} # Lumis not included in file for lumiSection in blockFile["LumiList"]: if runBlacklist and lumiSection[ "RunNumber"] in runBlacklist: continue if runWhitelist and lumiSection[ "RunNumber"] not in runWhitelist: continue if lumiSection["RunNumber"] not in runInfo.keys(): runInfo[lumiSection["RunNumber"]] = [] runInfo[lumiSection["RunNumber"]].append( lumiSection["LumiSectionNumber"]) if len(runInfo.keys()) > 0: self.files[blockFile["LogicalFileName"]] = { "runs": runInfo, "events": blockFile["NumberOfEvents"], "size": blockFile["FileSize"], "locations": list(blockLocations), "parents": parentLFNs } return
class SiteDBTest(EmulatedUnitTestCase): """ Unit tests for SiteScreening module """ def __init__(self, methodName='runTest'): super(SiteDBTest, self).__init__(methodName=methodName) def setUp(self): """ Setup for unit tests """ super(SiteDBTest, self).setUp() EmulatorHelper.setEmulators(phedex=False, dbs=False, siteDB=False, requestMgr=True) self.mySiteDB = SiteDBJSON() def tearDown(self): """ _tearDown_ """ super(SiteDBTest, self).tearDown() EmulatorHelper.resetEmulators() return def testCmsNametoPhEDExNode(self): """ #Tests CmsNametoSE """ target = ['T1_US_FNAL_Buffer', 'T1_US_FNAL_MSS'] results = self.mySiteDB.cmsNametoPhEDExNode('T1_US_FNAL') self.assertItemsEqual(results, target) def testSEtoCmsName(self): """ Tests CmsNametoSE """ target = [u'T1_US_FNAL', u'T1_US_FNAL_Disk'] results = self.mySiteDB.seToCMSName("cmsdcadisk01.fnal.gov") self.assertTrue(results == target) target = sorted([u'T2_CH_CERN', u'T2_CH_CERN_HLT']) results = sorted(self.mySiteDB.seToCMSName("srm-eoscms.cern.ch")) self.assertItemsEqual(results, target) target = sorted([u'T0_CH_CERN', u'T1_CH_CERN']) results = sorted(self.mySiteDB.seToCMSName("srm-cms.cern.ch")) self.assertItemsEqual(results, target) target = sorted([u'T2_CH_CERN_AI']) results = sorted(self.mySiteDB.seToCMSName("eoscmsftp.cern.ch")) self.assertItemsEqual(results, target) def testDNUserName(self): """ Tests DN to Username lookup """ testDn = "/DC=ch/DC=cern/OU=Organic Units/OU=Users/CN=jha/CN=618566/CN=Manoj Jha" testUserName = "******" userName = self.mySiteDB.dnUserName(dn=testDn) self.assertTrue(testUserName == userName) def testDNWithApostrophe(self): """ Tests a DN with an apostrophy in - will fail till SiteDB2 appears """ testDn = "/DC=ch/DC=cern/OU=Organic Units/OU=Users/CN=liviof/CN=472739/CN=Livio Fano'" testUserName = "******" userName = self.mySiteDB.dnUserName(dn=testDn) self.assertTrue(testUserName == userName) def testSEFinder(self): """ _testSEFinder_ See if we can retrieve seNames from all sites """ seNames = self.mySiteDB.getAllSENames() self.assertTrue(len(seNames) > 1) self.assertTrue('cmsdcadisk01.fnal.gov' in seNames) return def testPNNtoPSN(self): """ _testPNNtoPSN_ Test converting PhEDEx Node Name to Processing Site Name """ result = self.mySiteDB.PNNtoPSN('T1_US_FNAL_Disk') self.assertTrue(result == ['T1_US_FNAL']) result = self.mySiteDB.PNNtoPSN('T1_US_FNAL_Tape') self.assertTrue(result == []) result = self.mySiteDB.PNNtoPSN('T2_UK_London_IC') self.assertTrue(result == ['T2_UK_London_IC']) return def testCMSNametoList(self): """ Test PNN to storage list """ result = self.mySiteDB.cmsNametoList("T1_US*", "SE") self.assertItemsEqual(result, [u'cmsdcadisk01.fnal.gov']) def testCheckAndConvertSENameToPNN(self): """ Test the conversion of SE name to PNN for single and multiple sites/PNNs using checkAndConvertSENameToPNN """ fnalSE = u'cmsdcadisk01.fnal.gov' purdueSE = u'srm.rcac.purdue.edu' fnalPNNs = [u'T1_US_FNAL_Buffer', u'T1_US_FNAL_MSS', u'T1_US_FNAL_Disk'] purduePNN = [u'T2_US_Purdue'] pnnList = fnalPNNs + purduePNN seList = [fnalSE, purdueSE] self.assertItemsEqual(self.mySiteDB.checkAndConvertSENameToPNN(fnalSE), fnalPNNs) self.assertItemsEqual(self.mySiteDB.checkAndConvertSENameToPNN([fnalSE]), fnalPNNs) self.assertItemsEqual(self.mySiteDB.checkAndConvertSENameToPNN(purdueSE), purduePNN) self.assertItemsEqual(self.mySiteDB.checkAndConvertSENameToPNN([purdueSE]), purduePNN) self.assertItemsEqual(self.mySiteDB.checkAndConvertSENameToPNN(seList), purduePNN + fnalPNNs) self.assertItemsEqual(self.mySiteDB.checkAndConvertSENameToPNN(pnnList), pnnList) return def testPNNstoPSNs(self): """ _testPNNstoPSNs_ Test converting PhEDEx Node Names to Processing Site Names """ result = self.mySiteDB.PNNstoPSNs(['T1_US_FNAL_Disk', 'T1_US_FNAL_Buffer', 'T1_US_FNAL_MSS']) self.assertTrue(result == ['T1_US_FNAL']) result = self.mySiteDB.PNNstoPSNs(['T2_UK_London_IC', 'T2_US_Purdue']) self.assertItemsEqual(result, ['T2_UK_London_IC', 'T2_US_Purdue']) return
class RequestQuery: def __init__(self,config): self.br=Browser() self.config = config # Initialise connections self.mySiteDB = SiteDBJSON() self.dbsPhys01 = DbsApi(url = dbs_base_url+"phys01/DBSReader/") self.dbsPhys02 = DbsApi(url = dbs_base_url+"phys02/DBSReader/") self.dbsPhys03 = DbsApi(url = dbs_base_url+"phys03/DBSReader/") def __del__(self): self.br.close() def getScramArchByCMSSW(self): """ Get from the list of available CMSSW releases return a dictionary of ScramArchitecture by CMSSW """ # Set temporary conection to the server and get the response from cmstags url = 'https://cmssdt.cern.ch/SDT/cgi-bin/ReleasesXML' br = Browser() br.set_handle_robots(False) response=br.open(url) soup = BeautifulSoup(response.read()) # Dictionary form # {'CMSSW_X_X_X':[slc5_amd64_gcc472], ... } archByCmssw={} # Fill the dictionary for arch in soup.find_all('architecture'): for cmssw in arch.find_all('project'): # CMSSW release cmsswLabel = cmssw.get('label').encode('ascii', 'ignore') if cmsswLabel not in archByCmssw: archByCmssw[cmsswLabel]=[] # ScramArch related to this CMSSW release archName = arch.get('name').encode('ascii', 'ignore') archByCmssw[cmsswLabel].append(archName) return archByCmssw def getDatasetOriginSites(self, dbs_url, data): """ Get the origin sites for each block of the dataset. Return a list block origin sites. """ sites=[] local_dbs = dbs_url.split('/')[5] if local_dbs == 'phys01': response = self.dbsPhys01.listBlocks(detail=True,dataset=data) elif local_dbs == 'phys02': response = self.dbsPhys02.listBlocks(detail=True,dataset=data) elif local_dbs == 'phys03': response = self.dbsPhys03.listBlocks(detail=True,dataset=data) seList = [] for block in response: if block['origin_site_name'] not in seList: seList.append(block['origin_site_name']) siteNames = [] for node in self.nodeMappings['phedex']['node']: if node['se'] in seList: siteNames.append(node['name']) return siteNames, seList def setGlobalTagFromOrigin(self, dbs_url,input_dataset): """ Get the global tag of the dataset from the source dbs url. If it is not set, then set global tag to 'UNKNOWN' """ globalTag = "" local_dbs = dbs_url.split('/')[5] if local_dbs == 'phys01': response = self.dbsPhys01.listOutputConfigs(dataset=input_dataset) elif local_dbs == 'phys02': response = self.dbsPhys02.listOutputConfigs(dataset=input_dataset) elif local_dbs == 'phys03': response = self.dbsPhys03.listOutputConfigs(dataset=input_dataset) globalTag = response[0]['global_tag'] # GlobalTag cannot be empty if globalTag == '': globalTag = 'UNKNOWN' return globalTag def isDataAtUrl(self, dbs_url,input_dataset): """ Returns True if the dataset is at the dbs url, if not returns False """ local_dbs = dbs_url.split('/')[5] if local_dbs == 'phys01': response = self.dbsPhys01.listDatasets(dataset=input_dataset) elif local_dbs == 'phys02': response = self.dbsPhys02.listDatasets(dataset=input_dataset) elif local_dbs == 'phys03': response = self.dbsPhys03.listDatasets(dataset=input_dataset) # This means that the dataset is not at the url if not response: return False else: return True def getLabelByValueDict(self, control): """ From control items, create a dictionary by values """ d = {} for item in control.items: value = item.attrs['value'] label = item.attrs['label'] d[value] = label return d def getValueByLabelDict(self, control): """ From control items, create a dictionary by labels """ d = {} for item in control.items: value = item.attrs['value'] label = item.attrs['label'] d[label] = value return d def createRequestJSON(self, ticket, input_dataset, dbs_url, cmssw_release, group_name, version = 1): """ Creates a JSON file 'Ticket_#TICKET.json' with the needed information for creating a requeston ReqMgr. Input: - ticket: the ticket #, for instance 110773 on https://ggus.eu/?mode=ticket_info&ticket_id=110773 - input_dataset - dbs_url: only the instance name, For example: "phys01" for https://cmsweb.cern.ch/dbs/prod/phys01/DBSReader - cmssw_release - group_name: the physics group name - version: the dataset version, 1 by default. It returns a dictionary that contains the request information. """ scramArchByCMSSW = self.getScramArchByCMSSW() self.nodeMappings = self.phedex.getNodeMap() task = ticket print("Processing ticket: %s" % task) #splitting input dataset input_primary_dataset = input_dataset.split('/')[1].replace(' ','') input_processed_dataset = input_dataset.split('/')[2].replace(' ','') data_tier = input_dataset.split('/')[3].replace(' ','') # Transform input value to a valid DBS url #dbs_url = "https://cmsweb.cern.ch/dbs/prod/"+dbs_url+"/DBSReader" dbs_url = dbs_base_url+dbs_url+"/DBSReader" release_id = cmssw_release # check if deprecated release was used release = cmssw_release # check if release has not ScramArch match if release not in scramArchByCMSSW: raise Exception("Error on ticket %s due to ScramArch mismatch" % task) else: scram_arch = scramArchByCMSSW[release][-1] # check if dataset is not at dbs url try: data_at_url = self.isDataAtUrl(dbs_url,input_dataset) except: raise Exception('Error on ticket %s, dataset %s not available at %s' %(task, input_dataset,dbs_url)) if not data_at_url: raise Exception('Error on ticket %s, dataset %s not available at %s' %(task, input_dataset,dbs_url)) ## Get Physics Group group_squad = 'cms-storeresults-'+group_name.replace("-","_").lower() ## Get Dataset Version dataset_version = str(version) # Set default Adquisition Era for StoreResults acquisitionEra = "StoreResults" ## Construction of the new dataset name (ProcessingString) ## remove leading hypernews or physics group name and StoreResults+Version if input_processed_dataset.find(group_name)==0: new_dataset = input_processed_dataset.replace(group_name,"",1) else: stripped_dataset = input_processed_dataset.split("-")[1:] new_dataset = '_'.join(stripped_dataset) # Get dataset site info: phedex_map, se_names = self.getDatasetOriginSites(dbs_url,input_dataset) sites = set([self.mySiteDB.PNNtoPSN(node) for node in phedex_map]) infoDict = {} # Build store results json # First add all the defaults values infoDict["RequestType"] = "StoreResults" infoDict["UnmergedLFNBase"] = "/store/unmerged" infoDict["MergedLFNBase"] = "/store/results/" + group_name.replace("-","_").lower() infoDict["MinMergeSize"] = 1500000000 infoDict["MaxMergeSize"] = 5000000000 infoDict["MaxMergeEvents"] = 100000 infoDict["TimePerEvent"] = 40 infoDict["SizePerEvent"] = 512.0 infoDict["Memory"] = 2394 infoDict["CmsPath"] = "/uscmst1/prod/sw/cms" infoDict["Group"] = "DATAOPS" infoDict["DbsUrl"] = dbs_url # Add all the information pulled from Savannah infoDict["AcquisitionEra"] = acquisitionEra infoDict["GlobalTag"] = self.setGlobalTagFromOrigin(dbs_url, input_dataset) infoDict["DataTier"] = data_tier infoDict["InputDataset"] = input_dataset infoDict["ProcessingString"] = new_dataset infoDict["CMSSWVersion"] = release infoDict["ScramArch"] = scram_arch infoDict["ProcessingVersion"] = dataset_version infoDict["SiteWhitelist"] = list(sites) # Create report for Migration2Global report = {} #Fill json file, if status is done self.writeJSONFile(task, infoDict) report["json"] = 'y' report["task"] = int(task) report["InputDataset"] = input_dataset report["ProcessingString"] = new_dataset report["localUrl"] = dbs_url report["sites"] = list(sites) report["se_names"] = list(se_names) return report def writeJSONFile(self, task, infoDict): """ This writes a JSON file at ComponentDir """ ##check if file already exists filename = self.config["ComponentDir"]+'/Ticket_'+str(task)+'.json' if not os.access(filename,os.F_OK): jsonfile = open(filename,'w') request = {'createRequest':infoDict} ## CHECK THIS BEFORE FINISHING jsonfile.write(json.dumps(request,sort_keys=True, indent=4)) jsonfile.close return def removeJSONFile(self,task): """ This removes the JSON file at ComponentDir if it was created """ filename = self.config["ComponentDir"]+'/Ticket_'+str(task)+'.json' if os.access(filename,os.F_OK): os.remove(filename) return def printReport(self, report): """ Print out a report """ print("%20s %5s %10s %50s %50s" %( 'Ticket','json','local DBS','Sites','se_names')) print("%20s %5s %10s %50s %50s" %( '-'*20,'-'*5,'-'*10,'-'*50,'-'*50 )) json = report["json"] ticket = report["task"] #status = report["ticketStatus"] localUrl = report["localUrl"].split('/')[5] site = ', '.join(report["sites"]) se_names = ', '.join(report["se_names"]) print("%20s %5s %10s %50s %50s" %(ticket,json,localUrl,site,se_names))
class SiteDBTest(EmulatedUnitTestCase): """ Unit tests for SiteScreening module """ def __init__(self, methodName='runTest'): super(SiteDBTest, self).__init__(methodName=methodName) def setUp(self): """ Setup for unit tests """ super(SiteDBTest, self).setUp() self.mySiteDB = SiteDBJSON() def testCmsNametoPhEDExNode(self): """ #Tests CmsNametoSE """ target = ['T1_US_FNAL_Buffer', 'T1_US_FNAL_MSS'] results = self.mySiteDB.cmsNametoPhEDExNode('T1_US_FNAL') self.assertItemsEqual(results, target) def testSEtoCmsName(self): """ Tests CmsNametoSE """ target = [u'T1_US_FNAL', u'T1_US_FNAL_Disk'] results = self.mySiteDB.seToCMSName("cmsdcadisk01.fnal.gov") self.assertTrue(results == target) target = sorted([u'T2_CH_CERN', u'T2_CH_CERN_HLT']) results = sorted(self.mySiteDB.seToCMSName("srm-eoscms.cern.ch")) self.assertItemsEqual(results, target) target = sorted([u'T0_CH_CERN', u'T1_CH_CERN']) results = sorted(self.mySiteDB.seToCMSName("srm-cms.cern.ch")) self.assertItemsEqual(results, target) target = sorted([u'T2_CH_CERN_AI']) results = sorted(self.mySiteDB.seToCMSName("eoscmsftp.cern.ch")) self.assertItemsEqual(results, target) def testDNUserName(self): """ Tests DN to Username lookup """ testDn = "/DC=ch/DC=cern/OU=Organic Units/OU=Users/CN=jha/CN=618566/CN=Manoj Jha" testUserName = "******" userName = self.mySiteDB.dnUserName(dn=testDn) self.assertTrue(testUserName == userName) def testDNWithApostrophe(self): """ Tests a DN with an apostrophy in - will fail till SiteDB2 appears """ testDn = "/DC=ch/DC=cern/OU=Organic Units/OU=Users/CN=liviof/CN=472739/CN=Livio Fano'" testUserName = "******" userName = self.mySiteDB.dnUserName(dn=testDn) self.assertTrue(testUserName == userName) def testSEFinder(self): """ _testSEFinder_ See if we can retrieve seNames from all sites """ seNames = self.mySiteDB.getAllSENames() self.assertTrue(len(seNames) > 1) self.assertTrue('cmsdcadisk01.fnal.gov' in seNames) return def testPNNtoPSN(self): """ _testPNNtoPSN_ Test converting PhEDEx Node Name to Processing Site Name """ result = self.mySiteDB.PNNtoPSN('T1_US_FNAL_Disk') self.assertTrue(result == ['T1_US_FNAL']) result = self.mySiteDB.PNNtoPSN('T1_US_FNAL_Tape') self.assertTrue(result == []) result = self.mySiteDB.PNNtoPSN('T2_UK_London_IC') self.assertTrue(result == ['T2_UK_London_IC']) return def testCMSNametoList(self): """ Test PNN to storage list """ result = self.mySiteDB.cmsNametoList("T1_US*", "SE") self.assertItemsEqual(result, [u'cmsdcadisk01.fnal.gov']) def testPNNstoPSNs(self): """ _testPNNstoPSNs_ Test converting PhEDEx Node Names to Processing Site Names """ result = self.mySiteDB.PNNstoPSNs( ['T1_US_FNAL_Disk', 'T1_US_FNAL_Buffer', 'T1_US_FNAL_MSS']) self.assertTrue(result == ['T1_US_FNAL']) result = self.mySiteDB.PNNstoPSNs(['T2_UK_London_IC', 'T2_US_Purdue']) self.assertItemsEqual(result, ['T2_UK_London_IC', 'T2_US_Purdue']) return
def formatOutput(self, task, requestname, datasetfiles, locations): """ Receives as input the result of the data location discovery operations and fill up the WMCore objects. """ self.logger.debug(" Formatting data discovery output ") # TEMPORARY pnn_psn_map = {} sbj = SiteDBJSON({ "key": self.config.TaskWorker.cmskey, "cert": self.config.TaskWorker.cmscert }) wmfiles = [] event_counter = 0 lumi_counter = 0 file_counter = 0 uniquelumis = set() ## Loop over the sorted list of files. for lfn, infos in datasetfiles.iteritems(): ## Skip the file if the block has not been found or has no locations. if not infos['BlockName'] in locations or not locations[ infos['BlockName']]: self.logger.warning( "Skipping %s because its block (%s) has no locations" % (lfn, infos['BlockName'])) continue ## Skip the file if it is not in VALID state. if not infos.get('ValidFile', True): self.logger.warning("Skipping invalid file %s" % lfn) continue if task['tm_use_parent'] == 1 and len(infos['Parents']) == 0: raise TaskWorkerException( "The CRAB3 server backend refuses to submit jobs to the Grid scheduler\n" + "because you specified useParents=True but some your files have no" + "parents.\nExample: " + lfn) ## Createa a WMCore File object. wmfile = File(lfn=lfn, events=infos['NumberOfEvents'], size=infos['Size'], checksums=infos['Checksums'], parents=infos['Parents']) wmfile['block'] = infos['BlockName'] wmfile['locations'] = [] for pnn in locations[infos['BlockName']]: if pnn and pnn not in pnn_psn_map: self.logger.debug("Translating PNN %s" % pnn) try: pnn_psn_map[pnn] = sbj.PNNtoPSN(pnn) except KeyError, ke: self.logger.error( "Impossible translating %s to a CMS name through SiteDB" % pnn) pnn_psn_map[pnn] = '' except httplib.HTTPException, ex: self.logger.error("Couldn't map SE to site: %s" % pnn) print "Couldn't map SE to site: %s" % pnn print "got problem: %s" % ex print "got another problem: %s" % ex.__dict__ if pnn and pnn in pnn_psn_map: if type(pnn_psn_map[pnn]) == list: wmfile['locations'].extend(pnn_psn_map[pnn]) else: wmfile['locations'].append(pnn_psn_map[pnn])
class SiteDBTest(EmulatedUnitTestCase): """ Unit tests for SiteScreening module """ def __init__(self, methodName='runTest'): super(SiteDBTest, self).__init__(methodName=methodName) def setUp(self): """ Setup for unit tests """ super(SiteDBTest, self).setUp() self.mySiteDB = SiteDBJSON() def testCmsNametoPhEDExNode(self): """ Tests CMS Name to PhEDEx Node Name """ target = ['T1_US_FNAL_Buffer', 'T1_US_FNAL_MSS'] results = self.mySiteDB.cmsNametoPhEDExNode('T1_US_FNAL') self.assertItemsEqual(results, target) def testDNUserName(self): """ Tests DN to Username lookup """ testDn = "/DC=ch/DC=cern/OU=Organic Units/OU=Users/CN=jha/CN=618566/CN=Manoj Jha" testUserName = "******" userName = self.mySiteDB.dnUserName(dn=testDn) self.assertTrue(testUserName == userName) def testDNWithApostrophe(self): """ Tests a DN with an apostrophy in - will fail till SiteDB2 appears """ testDn = "/DC=ch/DC=cern/OU=Organic Units/OU=Users/CN=liviof/CN=472739/CN=Livio Fano'" testUserName = "******" userName = self.mySiteDB.dnUserName(dn=testDn) self.assertTrue(testUserName == userName) def testSEFinder(self): """ _testSEFinder_ See if we can retrieve seNames from all sites """ seNames = self.mySiteDB.getAllSENames() self.assertTrue(len(seNames) > 1) self.assertTrue('cmsdcadisk01.fnal.gov' in seNames) return def testPNNtoPSN(self): """ _testPNNtoPSN_ Test converting PhEDEx Node Name to Processing Site Name """ result = self.mySiteDB.PNNtoPSN('T1_US_FNAL_Disk') self.assertTrue(result == ['T1_US_FNAL']) result = self.mySiteDB.PNNtoPSN('T1_US_FNAL_Tape') self.assertTrue(result == []) result = self.mySiteDB.PNNtoPSN('T2_UK_London_IC') self.assertTrue(result == ['T2_UK_London_IC']) return def testCMSNametoList(self): """ Test PNN to storage list """ result = self.mySiteDB.cmsNametoList("T1_US*", "SE") self.assertItemsEqual(result, [u'cmsdcadisk01.fnal.gov']) def testPNNstoPSNs(self): """ _testPNNstoPSNs_ Test converting PhEDEx Node Names to Processing Site Names """ result = self.mySiteDB.PNNstoPSNs( ['T1_US_FNAL_Disk', 'T1_US_FNAL_Buffer', 'T1_US_FNAL_MSS']) self.assertTrue(result == ['T1_US_FNAL']) result = self.mySiteDB.PNNstoPSNs(['T2_UK_London_IC', 'T2_US_Purdue']) self.assertItemsEqual(result, ['T2_UK_London_IC', 'T2_US_Purdue']) return def testPSNtoPNNMap(self): """ _PSNtoPNNMap_ Test API to get a map of PSNs and PNNs """ result = self.mySiteDB.PSNtoPNNMap() self.assertTrue( [psn for psn in result.keys() if psn.startswith('T1_')]) self.assertTrue( [psn for psn in result.keys() if psn.startswith('T2_')]) self.assertTrue( [psn for psn in result.keys() if psn.startswith('T3_')]) self.assertTrue(len(result) > 50) result = self.mySiteDB.PSNtoPNNMap(psnPattern='T1.*') self.assertFalse( [psn for psn in result.keys() if not psn.startswith('T1_')]) self.assertTrue(len(result) < 10) result = self.mySiteDB.PSNtoPNNMap(psnPattern='T2.*') self.assertFalse( [psn for psn in result.keys() if not psn.startswith('T2_')]) self.assertTrue(len(result) > 10) result = self.mySiteDB.PSNtoPNNMap(psnPattern='T3.*') self.assertFalse( [psn for psn in result.keys() if not psn.startswith('T3_')]) self.assertTrue(len(result) > 10) return def testGetAllPhEDExNodeNames(self): """ _testGetAllPhEDExNodeNames_ Test API to get all PhEDEx Node Names """ result = self.mySiteDB.getAllPhEDExNodeNames(excludeBuffer=True) self.assertFalse([pnn for pnn in result if pnn.endswith('_Buffer')]) result = self.mySiteDB.getAllPhEDExNodeNames(excludeBuffer=False) self.assertTrue( len([pnn for pnn in result if pnn.endswith('_Buffer')]) > 5) result = self.mySiteDB.getAllPhEDExNodeNames(pattern='T1.*', excludeBuffer=True) self.assertFalse([pnn for pnn in result if not pnn.startswith('T1_')]) self.assertTrue(len(result) > 10) result = self.mySiteDB.getAllPhEDExNodeNames(pattern='.*', excludeBuffer=True) self.assertTrue([pnn for pnn in result if pnn.startswith('T1_')]) self.assertTrue([pnn for pnn in result if pnn.startswith('T2_')]) self.assertTrue([pnn for pnn in result if pnn.startswith('T3_')]) self.assertTrue(len(result) > 60) return
class SiteDBTest(unittest.TestCase): """ Unit tests for SiteScreening module """ def setUp(self): """ Setup for unit tests """ EmulatorHelper.setEmulators(siteDB=True) self.mySiteDB = SiteDBJSON() def tearDown(self): EmulatorHelper.resetEmulators() def testCmsNametoPhEDExNode(self): """ Tests CmsNametoSE """ target = ['T1_US_FNAL_MSS', 'T1_US_FNAL_Buffer'] results = self.mySiteDB.cmsNametoPhEDExNode("T1_US_FNAL") self.failUnless(sorted(results) == sorted(target)) def testPhEDExNodetocmsName(self): """ Tests PhEDExNodetocmsName """ result = self.mySiteDB.phEDExNodetocmsName('T1_US_FNAL_MSS') self.failUnless(result == 'T1_US_FNAL') result = self.mySiteDB.phEDExNodetocmsName('T1_US_FNAL_Buffer') self.failUnless(result == 'T1_US_FNAL') result = self.mySiteDB.phEDExNodetocmsName('T2_UK_London_IC') self.failUnless(result == 'T2_UK_London_IC') # don't check this anymore, see comment in phEDExNodetocmsName function #self.assertRaises(ValueError, self.mySiteDB.phEDExNodetocmsName, # 'T9_DOESNT_EXIST_Buffer') def testCmsNametoSE(self): """ Tests CmsNametoSE """ target = [u'srm-cms-disk.gridpp.rl.ac.uk', u'srm-cms.gridpp.rl.ac.uk'] results = self.mySiteDB.cmsNametoSE("T1_UK_RAL") self.failUnless(sorted(results) == sorted(target)) def testCmsNamePatterntoSE(self): """ Tests CmsNamePatterntoSE """ target = [u'T2_XX_SiteA', u'T2_XX_SiteB', u'T2_XX_SiteC'] results = self.mySiteDB.cmsNametoSE("%T2_XX") self.failUnless(sorted(results) == sorted(target)) def testSEtoCmsName(self): """ Tests CmsNametoSE """ target = [u'T1_US_FNAL'] results = self.mySiteDB.seToCMSName("cmssrm.fnal.gov") self.failUnless(results == target) target = sorted([ u'T2_CH_CERN', u'T2_CH_CERN_AI', u'T2_CH_CERN_HLT', u'T2_CH_CERN_T0' ]) results = sorted(self.mySiteDB.seToCMSName("srm-eoscms.cern.ch")) self.failUnless(sorted(results) == sorted(target)) def testDNUserName(self): """ Tests DN to Username lookup """ testDn = "/DC=ch/DC=cern/OU=Organic Units/OU=Users/CN=gutsche/CN=582680/CN=Oliver Gutsche" testUserName = "******" userName = self.mySiteDB.dnUserName(dn=testDn) self.failUnless(testUserName == userName) def testDNWithApostrophe(self): """ Tests a DN with an apostrophy in - will fail till SiteDB2 appears """ testDn = "/DC=ch/DC=cern/OU=Organic Units/OU=Users/CN=liviof/CN=472739/CN=Livio Fano'" testUserName = "******" userName = self.mySiteDB.dnUserName(dn=testDn) self.failUnless(testUserName == userName) def testSEFinder(self): """ _testSEFinder_ See if we can retrieve seNames from all sites """ seNames = self.mySiteDB.getAllSENames() self.assertTrue(len(seNames) > 1) self.assertTrue('cmssrm.fnal.gov' in seNames) return def testPNNtoPSN(self): """ _testPNNtoPSN_ Test converting PhEDEx Node Name to Processing Site Name """ result = self.mySiteDB.PNNtoPSN('T1_US_FNAL_Disk') self.failUnless(result == ['T1_US_FNAL']) result = self.mySiteDB.PNNtoPSN('T1_US_FNAL_Tape') self.failUnless(result == []) result = self.mySiteDB.PNNtoPSN('T2_UK_London_IC') self.failUnless(result == ['T2_UK_London_IC']) return
def getFiles(datasetName, runBlacklist, runWhitelist, blockBlacklist, blockWhitelist, dbsUrl, fakeLocation=False): """ _getFiles_ Get the full information of a dataset including files, blocks, runs and lumis. Filter it using run and block white/black lists. It can receive and optional DBSUrl. """ dbsReader = DBSReader(endpoint=dbsUrl) phedexReader = PhEDEx() siteDB = SiteDBJSON() files = {} outputDatasetParts = datasetName.split("/") print "dataset", datasetName, "parts", outputDatasetParts try: #retrieve list of blocks from dataset blockNames = dbsReader.listFileBlocks(datasetName) except: raise RuntimeError("Dataset %s doesn't exist in given DBS instance" % datasetName) #traverse each block for blockName in blockNames: #deal with white and black list. if blockBlacklist and blockName in blockBlacklist: continue if blockWhitelist and blockName not in blockWhitelist: continue #existing blocks in phedex replicaInfo = phedexReader.getReplicaInfoForBlocks(block=blockName, subscribed='y') blockFiles = dbsReader.listFilesInBlock(blockName, lumis=True) blockLocations = set() #load block locations if len(replicaInfo["phedex"]["block"]) > 0: for replica in replicaInfo["phedex"]["block"][0]["replica"]: node = replica["node"] cmsSites = siteDB.PNNtoPSN(node) if type(cmsSites) != list: cmsSites = [cmsSites] for cmsName in cmsSites: se = siteDB.cmsNametoSE(cmsName) blockLocations.update(se) logging.debug("cmsName %s mapped to se %s", cmsName, se) logging.debug("PhEDEx node %s, cmsSites %s, blockLocations %s", node, cmsSites, blockLocations) # We cannot upload docs without location, so force it in case it's empty if fakeLocation and not blockLocations: blockLocations.update( [u'cmssrmdisk.fnal.gov', u'srm-eoscms.cern.ch']) logging.info("Blockname: %s\tLocations: %s", blockName, blockLocations) #for each file on the block for blockFile in blockFiles: parentLFNs = [] #get parent information about file #blockFileParents = dbsReader.listFilesInBlockWithParents(blockName) blockFileParents = dbsReader.listFilesInBlock(blockName) #populate parent information if blockFileParents and "ParentList" in blockFileParents[0]: for fileParent in blockFileParents[0]["ParentList"]: parentLFNs.append(fileParent["LogicalFileName"]) runInfo = {} #Lumis not included in file for lumiSection in blockFile["LumiList"]: if runBlacklist and lumiSection["RunNumber"] in runBlacklist: continue if runWhitelist and lumiSection[ "RunNumber"] not in runWhitelist: continue if lumiSection["RunNumber"] not in runInfo.keys(): runInfo[lumiSection["RunNumber"]] = [] runInfo[lumiSection["RunNumber"]].append( lumiSection["LumiSectionNumber"]) if len(runInfo.keys()) > 0: files[blockFile["LogicalFileName"]] = { "runs": runInfo, "events": blockFile["NumberOfEvents"], "size": blockFile["FileSize"], "locations": list(blockLocations), "parents": parentLFNs } return files