class PhEDExInjectorPoller(BaseWorkerThread): """ _PhEDExInjectorPoller_ Poll the DBSBuffer database and inject files as they are created. """ def __init__(self, config): """ ___init___ Initialise class members """ BaseWorkerThread.__init__(self) self.dbsUrl = config.DBSInterface.globalDBSUrl self.pollCounter = 0 self.subFrequency = None if getattr(config.PhEDExInjector, "subscribeDatasets", False): pollInterval = config.PhEDExInjector.pollInterval subInterval = config.PhEDExInjector.subscribeInterval self.subFrequency = max(1, int(round(subInterval / pollInterval))) logging.info( "SubscribeDataset and deleteBlocks will run every %d polling cycles", self.subFrequency) # subscribe on first cycle self.pollCounter = self.subFrequency - 1 # retrieving the node mappings is fickle and can fail quite often self.phedex = PhEDEx({"endpoint": config.PhEDExInjector.phedexurl}, "json") try: nodeMappings = self.phedex.getNodeMap() except: time.sleep(2) try: nodeMappings = self.phedex.getNodeMap() except: time.sleep(4) nodeMappings = self.phedex.getNodeMap() # This will be used to map SE names which are stored in the DBSBuffer to # PhEDEx node names. The first key will be the "kind" which consists # of one of the following: MSS, Disk, Buffer. The next key will be the # SE name. self.seMap = {} self.nodeNames = [] for node in nodeMappings["phedex"]["node"]: if node["kind"] not in self.seMap: self.seMap[node["kind"]] = {} logging.info("Adding mapping %s -> %s", node["se"], node["name"]) self.seMap[node["kind"]][node["se"]] = node["name"] self.nodeNames.append(node["name"]) self.phedexNodes = {'MSS': [], 'Disk': []} for node in nodeMappings["phedex"]["node"]: if node["kind"] in ["MSS", "Disk"]: self.phedexNodes[node["kind"]].append(node["name"]) # initialize the alert framework (if available - config.Alert present) # self.sendAlert will be then be available self.initAlerts(compName="PhEDExInjector") self.blocksToRecover = [] return def setup(self, parameters): """ _setup_ Create DAO Factory and setup some DAO. """ myThread = threading.currentThread() daofactory = DAOFactory(package="WMComponent.PhEDExInjector.Database", logger=self.logger, dbinterface=myThread.dbi) self.getUninjected = daofactory(classname="GetUninjectedFiles") self.getMigrated = daofactory(classname="GetMigratedBlocks") self.findDeletableBlocks = daofactory(classname="GetDeletableBlocks") self.markBlocksDeleted = daofactory(classname="MarkBlocksDeleted") self.getUnsubscribed = daofactory(classname="GetUnsubscribedDatasets") self.markSubscribed = daofactory(classname="MarkDatasetSubscribed") daofactory = DAOFactory(package="WMComponent.DBS3Buffer", logger=self.logger, dbinterface=myThread.dbi) self.setStatus = daofactory(classname="DBSBufferFiles.SetPhEDExStatus") self.setBlockClosed = daofactory(classname="SetBlockClosed") return def algorithm(self, parameters): """ _algorithm_ Poll the database for uninjected files and attempt to inject them into PhEDEx. """ logging.info("Running PhEDEx injector poller algorithm...") self.pollCounter += 1 if self.blocksToRecover: logging.info("""PhEDExInjector Recovery: previous injection call failed, check if files were injected to PhEDEx anyway""") self.recoverInjectedFiles() self.injectFiles() self.closeBlocks() if self.pollCounter == self.subFrequency: self.pollCounter = 0 self.deleteBlocks() self.subscribeDatasets() return def createInjectionSpec(self, injectionData): """ _createInjectionSpec_ Transform the data structure returned from the database into an XML string for the PhEDEx Data Service. The injectionData parameter must be a dictionary keyed by dataset path. Each dataset path will map to a list of blocks, each block being a dict. The block dicts will have three keys: name, is-open and files. The files key will be a list of dicts, each of which have the following keys: lfn, size and checksum. The following is an example object: {"dataset1": {"block1": {"is-open": "y", "files": [{"lfn": "lfn1", "size": 10, "checksum": {"cksum": "1234"}}, {"lfn": "lfn2", "size": 20, "checksum": {"cksum": "4321"}}]}}} """ injectionSpec = XMLDrop.XMLInjectionSpec(self.dbsUrl) for datasetPath in injectionData: datasetSpec = injectionSpec.getDataset(datasetPath) for fileBlockName, fileBlock in injectionData[ datasetPath].iteritems(): blockSpec = datasetSpec.getFileblock(fileBlockName, fileBlock["is-open"]) for f in fileBlock["files"]: blockSpec.addFile(f["lfn"], f["checksum"], f["size"]) return injectionSpec.save() def createRecoveryFileFormat(self, unInjectedData): """ _createRecoveryFileFormat_ Transform the data structure returned from database in to the dict format for the PhEDEx Data Service. The injectionData parameter must be a dictionary keyed by dataset path. unInjectedData format {"dataset1": {"block1": {"is-open": "y", "files": [{"lfn": "lfn1", "size": 10, "checksum": {"cksum": "1234"}}, {"lfn": "lfn2", "size": 20, "checksum": {"cksum": "4321"}}]}}} returns [{"block1": set(["lfn1", "lfn2"])}, {"block2": set(["lfn3", "lfn4"])] """ blocks = [] for datasetPath in unInjectedData: for blockName, fileBlock in unInjectedData[datasetPath].items(): newBlock = {blockName: set()} for fileDict in fileBlock["files"]: newBlock[blockName].add(fileDict["lfn"]) blocks.append(newBlock) return blocks def injectFiles(self): """ _injectFiles_ Inject any uninjected files in PhEDEx. """ logging.info("Starting injectFiles method") uninjectedFiles = self.getUninjected.execute() for siteName in uninjectedFiles.keys(): # SE names can be stored in DBSBuffer as that is what is returned in # the framework job report. We'll try to map the SE name to a # PhEDEx node name here. location = None if siteName in self.nodeNames: location = siteName else: if "Buffer" in self.seMap and siteName in self.seMap["Buffer"]: location = self.seMap["Buffer"][siteName] elif "MSS" in self.seMap and siteName in self.seMap["MSS"]: location = self.seMap["MSS"][siteName] elif "Disk" in self.seMap and siteName in self.seMap["Disk"]: location = self.seMap["Disk"][siteName] if location == None: msg = "Could not map SE %s to PhEDEx node." % siteName logging.error(msg) self.sendAlert(7, msg=msg) continue maxDataset = 20 maxBlocks = 50 maxFiles = 5000 numberDatasets = 0 numberBlocks = 0 numberFiles = 0 injectData = {} lfnList = [] for dataset in uninjectedFiles[siteName]: numberDatasets += 1 injectData[dataset] = uninjectedFiles[siteName][dataset] for block in injectData[dataset]: numberBlocks += 1 numberFiles += len(injectData[dataset][block]['files']) for fileInfo in injectData[dataset][block]['files']: lfnList.append(fileInfo['lfn']) if numberDatasets >= maxDataset or numberBlocks >= maxBlocks or numberFiles >= maxFiles: self.injectFilesPhEDExCall(location, injectData, lfnList) numberDatasets = 0 numberBlocks = 0 numberFiles = 0 injectData = {} lfnList = [] if injectData: self.injectFilesPhEDExCall(location, injectData, lfnList) return def injectFilesPhEDExCall(self, location, injectData, lfnList): """ _injectFilesPhEDExCall_ actual PhEDEx call for file injection """ xmlData = self.createInjectionSpec(injectData) logging.debug("injectFiles XMLData: %s", xmlData) try: injectRes = self.phedex.injectBlocks(location, xmlData) except HTTPException as ex: # HTTPException with status 400 assumed to be duplicate injection # trigger later block recovery (investgation needed if not the case) if ex.status == 400: self.blocksToRecover.extend( self.createRecoveryFileFormat(injectData)) logging.error( "PhEDEx file injection failed with HTTPException: %s %s", ex.status, ex.result) except Exception as ex: logging.error("PhEDEx file injection failed with Exception: %s", str(ex)) logging.debug("Traceback: %s", str(traceback.format_exc())) else: logging.info("Injection result: %s", injectRes) if "error" in injectRes: msg = "Error injecting data %s: %s" % (injectData, injectRes["error"]) logging.error(msg) self.sendAlert(6, msg=msg) else: try: self.setStatus.execute(lfnList, 1) except: # possible deadlock with DBS3Upload, retry once after 5s logging.warning( "Oracle exception during file status update, possible deadlock due to race condition, retry after 5s sleep" ) time.sleep(5) self.setStatus.execute(lfnList, 1) return def closeBlocks(self): """ _closeBlocks_ Close any blocks that have been migrated to global DBS """ logging.info("Starting closeBlocks method") migratedBlocks = self.getMigrated.execute() for siteName in migratedBlocks.keys(): # SE names can be stored in DBSBuffer as that is what is returned in # the framework job report. We'll try to map the SE name to a # PhEDEx node name here. location = None if siteName in self.nodeNames: location = siteName else: if "Buffer" in self.seMap and siteName in self.seMap["Buffer"]: location = self.seMap["Buffer"][siteName] elif "MSS" in self.seMap and siteName in self.seMap["MSS"]: location = self.seMap["MSS"][siteName] elif "Disk" in self.seMap and siteName in self.seMap["Disk"]: location = self.seMap["Disk"][siteName] if location == None: msg = "Could not map SE %s to PhEDEx node." % siteName logging.error(msg) self.sendAlert(6, msg=msg) continue xmlData = self.createInjectionSpec(migratedBlocks[siteName]) logging.debug("closeBlocks XMLData: %s", xmlData) try: injectRes = self.phedex.injectBlocks(location, xmlData) except HTTPException as ex: logging.error( "PhEDEx block close failed with HTTPException: %s %s", ex.status, ex.result) except Exception as ex: logging.error("PhEDEx block close failed with Exception: %s", str(ex)) logging.debug("Traceback: %s", str(traceback.format_exc())) else: logging.info("Block closing result: %s", injectRes) if "error" not in injectRes: for datasetName in migratedBlocks[siteName]: for blockName in migratedBlocks[siteName][datasetName]: logging.debug("Closing block %s", blockName) self.setBlockClosed.execute(blockName) else: msg = "Error injecting data %s: %s" % ( migratedBlocks[siteName], injectRes["error"]) logging.error(msg) self.sendAlert(6, msg=msg) return def recoverInjectedFiles(self): """ When PhEDEx inject call timed out, run this function. Since there are 3 min reponse time out in cmsweb, some times PhEDEx injection call times out even though the call succeeded In that case run the recovery mode 1. first check whether files which injection status = 0 are in the PhEDEx. 2. if those file exist set the in_phedex status to 1 3. set self.blocksToRecover = [] Run this recovery one block at a time, with too many blocks the call to the PhEDEx data service on cmsweb can time out """ # recover one block at a time for block in self.blocksToRecover: injectedFiles = self.phedex.getInjectedFiles(block) if injectedFiles: self.setStatus.execute(injectedFiles, 1) self.blocksToRecover = [] return def deleteBlocks(self): """ _deleteBlocks_ Find deletable blocks, then decide if to delete based on: Is there an active subscription for dataset or block ? If yes => set deleted=2 If no => next check Has transfer to all destinations finished ? If yes => request block deletion, approve request, set deleted=1 If no => do nothing (check again next cycle) """ logging.info("Starting deleteBlocks method") blockDict = self.findDeletableBlocks.execute(transaction=False) if not blockDict: return try: subscriptions = self.phedex.getSubscriptionMapping( *blockDict.keys()) except: logging.error( "Couldn't get subscription info from PhEDEx, retry next cycle") return skippableBlocks = [] deletableEntries = {} for blockName in blockDict: location = blockDict[blockName]['location'] # should never be triggered, better safe than sorry if location.endswith('_MSS'): logging.debug("Location %s for block %s is MSS, skip deletion", location, blockName) skippableBlocks.append(blockName) continue dataset = blockDict[blockName]['dataset'] sites = blockDict[blockName]['sites'] if blockName in subscriptions and location in subscriptions[ blockName]: logging.debug("Block %s subscribed to %s, skip deletion", blockName, location) binds = {'DELETED': 2, 'BLOCKNAME': blockName} self.markBlocksDeleted.execute(binds) else: blockInfo = [] try: blockInfo = self.phedex.getReplicaInfoForBlocks( block=blockName, complete='y')['phedex']['block'] except: logging.error( "Couldn't get block info from PhEDEx, retry next cycle" ) else: for entry in blockInfo: if entry['name'] == blockName: nodes = set([x['node'] for x in entry['replica']]) if location not in nodes: logging.debug( "Block %s not present on %s, mark as deleted", blockName, location) binds = {'DELETED': 1, 'BLOCKNAME': blockName} self.markBlocksDeleted.execute(binds) elif sites.issubset(nodes): logging.debug( "Deleting block %s from %s since it is fully transfered", blockName, location) if location not in deletableEntries: deletableEntries[location] = {} if dataset not in deletableEntries[location]: deletableEntries[location][dataset] = set() deletableEntries[location][dataset].add( blockName) binds = [] for blockName in skippableBlocks: binds.append({'DELETED': 2, 'BLOCKNAME': blockName}) if binds: self.markBlocksDeleted.execute(binds) for location in deletableEntries: chunkSize = 100 numberOfBlocks = 0 blocksToDelete = {} for dataset in deletableEntries[location]: blocksToDelete[dataset] = deletableEntries[location][dataset] numberOfBlocks += len(blocksToDelete[dataset]) if numberOfBlocks > chunkSize: self.deleteBlocksPhEDExCalls(location, blocksToDelete) numberOfBlocks = 0 blocksToDelete = {} self.deleteBlocksPhEDExCalls(location, blocksToDelete) return def deleteBlocksPhEDExCalls(self, location, blocksToDelete): """ _deleteBlocksPhEDExCalls_ actual PhEDEx calls for block deletion """ deletion = PhEDExDeletion( blocksToDelete.keys(), location, level='block', comments="WMAgent blocks auto-delete from %s" % location, blocks=blocksToDelete) xmlData = XMLDrop.makePhEDExXMLForBlocks( self.dbsUrl, deletion.getDatasetsAndBlocks()) logging.debug("deleteBlocks XMLData: %s", xmlData) try: response = self.phedex.delete(deletion, xmlData) requestId = response['phedex']['request_created'][0]['id'] # auto-approve deletion request self.phedex.updateRequest(requestId, 'approve', location) except HTTPException as ex: logging.error( "PhEDEx block delete/approval failed with HTTPException: %s %s", ex.status, ex.result) except Exception as ex: logging.error( "PhEDEx block delete/approval failed with Exception: %s", str(ex)) logging.debug("Traceback: %s", str(traceback.format_exc())) else: binds = [] for dataset in blocksToDelete: for blockName in blocksToDelete[dataset]: binds.append({'DELETED': 1, 'BLOCKNAME': blockName}) self.markBlocksDeleted.execute(binds) return def subscribeDatasets(self): """ _subscribeDatasets_ Poll the database for datasets and subscribe them. """ logging.info("Starting subscribeDatasets method") # Check for completely unsubscribed datasets unsubscribedDatasets = self.getUnsubscribed.execute() # Keep a list of subscriptions to tick as subscribed in the database subscriptionsMade = [] # Create a list of subscriptions as defined by the PhEDEx data structures subs = SubscriptionList() # Create the subscription objects and add them to the list # The list takes care of the sorting internally for subInfo in unsubscribedDatasets: site = subInfo['site'] if site not in self.phedexNodes[ 'MSS'] and site not in self.phedexNodes['Disk']: msg = "Site %s doesn't appear to be valid to PhEDEx, " % site msg += "skipping subscription: %s" % subInfo['id'] logging.error(msg) self.sendAlert(7, msg=msg) continue # Avoid custodial subscriptions to disk nodes if site not in self.phedexNodes['MSS']: subInfo['custodial'] = 'n' # Avoid auto approval in T1 sites elif site.startswith("T1"): subInfo['request_only'] = 'y' phedexSub = PhEDExSubscription( subInfo['path'], site, subInfo['phedex_group'], priority=subInfo['priority'], move=subInfo['move'], custodial=subInfo['custodial'], request_only=subInfo['request_only'], subscriptionId=subInfo['id']) # Check if the subscription is a duplicate if phedexSub.matchesExistingSubscription(self.phedex) or \ phedexSub.matchesExistingTransferRequest(self.phedex): subscriptionsMade.append(subInfo['id']) continue # Add it to the list subs.addSubscription(phedexSub) # Compact the subscriptions subs.compact() for subscription in subs.getSubscriptionList(): xmlData = XMLDrop.makePhEDExXMLForDatasets( self.dbsUrl, subscription.getDatasetPaths()) logging.debug("subscribeDatasets XMLData: %s", xmlData) logging.info( "Subscribing: %s to %s, with options: Move: %s, Custodial: %s, Request Only: %s", subscription.getDatasetPaths(), subscription.getNodes(), subscription.move, subscription.custodial, subscription.request_only) try: self.phedex.subscribe(subscription, xmlData) except HTTPException as ex: logging.error( "PhEDEx dataset subscribe failed with HTTPException: %s %s", ex.status, ex.result) except Exception as ex: logging.error( "PhEDEx dataset subscribe failed with Exception: %s", str(ex)) logging.debug("Traceback: %s", str(traceback.format_exc())) else: subscriptionsMade.extend(subscription.getSubscriptionIds()) # Register the result in DBSBuffer if subscriptionsMade: self.markSubscribed.execute(subscriptionsMade) return
class PhEDExTest(unittest.TestCase): def setUp(self): """ _setUp_ Initialize the PhEDEx API to point at the test server. """ phedexTestDS = "https://cmsweb.cern.ch/phedex/datasvc/json/test" self.dbsTestUrl = "http://vocms09.cern.ch:8880/cms_dbs_int_local_yy_writer/servlet/DBSServlet" self.phedexApi = PhEDEx({"endpoint": phedexTestDS, "method": "POST"}) return @attr("integration") def testInjection(self): """ _testInjection_ Verify that we can inject data into PhEDEx. """ xmlData = XMLDrop.makePhEDExDrop(self.dbsTestUrl, makeUUID()) result = self.phedexApi.injectBlocks("T1_US_FNAL_MSS", xmlData) self.assertEqual(result["phedex"]["injected"], {"stats": {"closed_datasets": 0, "closed_blocks": 0, "new_blocks": 0, "new_datasets": 1, "new_files": 0}}) return @attr("integration") def testSubscription(self): """ _testSubscription_ Verify that the subscription API works. """ datasetA = "/%s/WMCorePhEDExTest/RAW" % makeUUID() datasetB = "/%s/WMCorePhEDExTest/RECO" % makeUUID() xmlData = XMLDrop.makePhEDExDrop(self.dbsTestUrl, datasetA) self.phedexApi.injectBlocks("T1_US_FNAL_MSS", xmlData) xmlData = XMLDrop.makePhEDExDrop(self.dbsTestUrl, datasetB) self.phedexApi.injectBlocks("T1_US_FNAL_MSS", xmlData) testSub = PhEDExSubscription([datasetA, datasetB], "T1_UK_RAL_MSS", "Saturn") xmlData = XMLDrop.makePhEDExXMLForDatasets(self.dbsTestUrl, testSub.getDatasetPaths()) result = self.phedexApi.subscribe(testSub, xmlData) requestIDs = result["phedex"]["request_created"] self.assertEqual(len(requestIDs), 1, "Error: Wrong number of request IDs") self.assertTrue(requestIDs[0].has_key("id"), "Error: Missing request ID") return @attr("integration") def testNodeMap(self): """ _testNodeMap_ Verify that the node map can be retrieve from PhEDEx and that the getNodeSE() and getNodeNames() methods work correctly. """ self.failUnless(self.phedexApi.getNodeSE("T2_FR_GRIF_LLR") == "polgrid4.in2p3.fr") self.failUnless(self.phedexApi.getNodeNames("cmssrm.fnal.gov") == ["T1_US_FNAL_Buffer", "T1_US_FNAL_MSS"]) return @attr('integration') def testGetSubscriptionMapping(self): """ _testGetSubscriptionMapping_ Verify that the subscription mapping API works correctly. """ testDataset = "/%s/WMCorePhEDExTest/RECO" % makeUUID() blockA = "%s#%s" % (testDataset, makeUUID()) blockB = "%s#%s" % (testDataset, makeUUID()) injectionSpec = XMLDrop.XMLInjectionSpec(self.dbsTestUrl) datasetSpec = injectionSpec.getDataset(testDataset) datasetSpec.getFileblock(blockA, 'y') datasetSpec.getFileblock(blockB, 'y') blockSpec = injectionSpec.save() self.phedexApi.injectBlocks("T1_US_FNAL_MSS", blockSpec) # Create a dataset level subscription to a node testDatasetSub = PhEDExSubscription([testDataset], "T1_UK_RAL_MSS", "Saturn", requestOnly = "n") datasetSpec = XMLDrop.makePhEDExXMLForDatasets(self.dbsTestUrl, testDatasetSub.getDatasetPaths()) self.phedexApi.subscribe(testDatasetSub, datasetSpec) # Create a block level subscrtion to a different node testBlockSub = PhEDExSubscription([testDataset], "T1_DE_KIT_MSS", "Saturn", level = "block", requestOnly = "n") self.phedexApi.subscribe(testBlockSub, blockSpec) subs = self.phedexApi.getSubscriptionMapping(testDataset) self.assertEqual(subs[testDataset], set(["T1_UK_RAL_MSS"]), "Error: Dataset subscription is wrong.") subs = self.phedexApi.getSubscriptionMapping(blockA) self.assertEqual(len(subs[blockA]), 2, "Error: Wrong number of nodes in block subscription.") self.assertTrue("T1_UK_RAL_MSS" in subs[blockA], "Error: RAL missing from block sub.") self.assertTrue("T1_DE_KIT_MSS" in subs[blockA], "Error: KIT missing from block sub.") return def testPFNLookup(self): """ _testPFNLookup_ Verify that the PFN lookup in PhEDEx works correctly. """ call1 = self.phedexApi.getPFN(['T2_UK_SGrid_Bristol'], ['/store/user/metson/file']) # Should get one mapping back (one lfn, one node) self.assertTrue(len(call1.keys()) == 1) call1_key = call1.keys()[0] call2 = self.phedexApi.getPFN(['T2_UK_SGrid_Bristol', 'T1_US_FNAL_Buffer'], ['/store/user/metson/file']) # Should get back two mappings (two nodes) self.assertTrue(call1_key in call2.keys()) # and one of the mappings should be the same as from the previous call self.assertTrue(call1[call1_key] == call2[call1_key]) return @attr('integration') def testXMLJSON(self): """ Test XML and JSON in the same scope """ site = 'T1_US_FNAL_Buffer' dict = {} dict['endpoint'] = "https://cmsweb.cern.ch/phedex/datasvc/json/test" phedexJSON = PhEDEx(responseType='json', dict=dict) dict['endpoint'] = "https://cmsweb.cern.ch/phedex/datasvc/xml/test" phedexXML = PhEDEx(responseType='xml', dict=dict) phedexXML.getNodeTFC(site) tfc_file = phedexXML.cacheFileName('tfc', inputdata={'node' : site}) tfc_map = {} tfc_map[site] = readTFC(tfc_file) pfn = tfc_map[site].matchLFN('srmv2', '/store/user/jblow/dir/test.root') self.failUnless(pfn == 'srm://cmssrm.fnal.gov:8443/srm/managerv2?SFN=/11/store/user/jblow/dir/test.root') self.failUnless(phedexJSON.getNodeSE('T1_US_FNAL_Buffer') == 'cmssrm.fnal.gov') @attr('integration') def testAuth(self): """ _testAuth_ Verify that the auth method works correctly." """ self.assertFalse(self.phedexApi.getAuth("datasvc_whatever")) self.assertTrue(self.phedexApi.getAuth("datasvc_subscribe")) self.assertTrue(self.phedexApi.getAuth("datasvc_inject")) return
class PhEDExInjectorSubscriber(BaseWorkerThread): """ _PhEDExInjectorSubscriber_ Poll the DBSBuffer database and subscribe datasets as they are created. """ def __init__(self, config): """ ___init___ Initialise class members """ BaseWorkerThread.__init__(self) self.phedex = PhEDEx({"endpoint": config.PhEDExInjector.phedexurl}, "json") self.siteDB = SiteDBJSON() self.dbsUrl = config.DBSInterface.globalDBSUrl self.group = getattr(config.PhEDExInjector, "group", "DataOps") self.tier0Mode = getattr(config.PhEDExInjector, "tier0Mode", False) # We will map node names to CMS names, that what the spec will have. # If a CMS name is associated to many PhEDEx node then choose the MSS option self.cmsToPhedexMap = {} self.phedexNodes = {'MSS':[], 'Disk':[]} # initialize the alert framework (if available - config.Alert present) # self.sendAlert will be then be available self.initAlerts(compName = "PhEDExInjector") def setup(self, parameters): """ _setup_ Create a DAO Factory for the PhEDExInjector. Also load the SE names to PhEDEx node name mappings from the data service. """ myThread = threading.currentThread() daofactory = DAOFactory(package = "WMComponent.PhEDExInjector.Database", logger = self.logger, dbinterface = myThread.dbi) self.getUnsubscribed = daofactory(classname = "GetUnsubscribedDatasets") self.getUnsubscribedBlocks = daofactory(classname = "GetUnsubscribedBlocks") self.markSubscribed = daofactory(classname = "MarkDatasetSubscribed") nodeMappings = self.phedex.getNodeMap() for node in nodeMappings["phedex"]["node"]: cmsName = self.siteDB.phEDExNodetocmsName(node["name"]) if cmsName not in self.cmsToPhedexMap: self.cmsToPhedexMap[cmsName] = {} logging.info("Loaded PhEDEx node %s for site %s" % (node["name"], cmsName)) if node["kind"] not in self.cmsToPhedexMap[cmsName]: self.cmsToPhedexMap[cmsName][node["kind"]] = node["name"] if node["kind"] in [ "MSS", "Disk" ]: self.phedexNodes[node["kind"]].append(node["name"]) return def algorithm(self, parameters): """ _algorithm_ Run the subscription algorithm as configured """ if self.tier0Mode: self.subscribeTier0Blocks() self.subscribeDatasets() return def subscribeTier0Blocks(self): """ _subscribeTier0Blocks_ Subscribe blocks to the Tier-0 where a replica subscription already exists. All Tier-0 subscriptions are move, custodial and autoapproved with high priority. """ myThread = threading.currentThread() myThread.transaction.begin() # Check for candidate blocks for subscription blocksToSubscribe = self.getUnsubscribedBlocks.execute(node = 'T0_CH_CERN', conn = myThread.transaction.conn, transaction = True) if not blocksToSubscribe: return # For the blocks we don't really care about the subscription options # We are subscribing all blocks with the same recipe. subscriptionMap = {} for subInfo in blocksToSubscribe: dataset = subInfo['path'] if dataset not in subscriptionMap: subscriptionMap[dataset] = [] subscriptionMap[dataset].append(subInfo['blockname']) site = 'T0_CH_CERN' custodial = 'y' request_only = 'n' move = 'y' priority = 'High' # Get the phedex node phedexNode = self.cmsToPhedexMap[site]["MSS"] logging.error("Subscribing %d blocks, from %d datasets to the Tier-0" % (len(subscriptionMap), sum([len(x) for x in subscriptionMap.values()]))) newSubscription = PhEDExSubscription(subscriptionMap.keys(), phedexNode, self.group, custodial = custodial, request_only = request_only, move = move, priority = priority, level = 'block', blocks = subscriptionMap) # TODO: Check for blocks already subscribed try: xmlData = XMLDrop.makePhEDExXMLForBlocks(self.dbsUrl, newSubscription.getDatasetsAndBlocks()) logging.debug(str(xmlData)) self.phedex.subscribe(newSubscription, xmlData) except Exception, ex: logging.error("Something went wrong when communicating with PhEDEx, will try again later.") logging.error("Exception: %s" % str(ex))
class PhEDExTest(unittest.TestCase): def setUp(self): """ _setUp_ Initialize the PhEDEx API to point at the test server. """ phedexTestDS = "https://cmsweb.cern.ch/phedex/datasvc/json/test" self.dbsTestUrl = "http://vocms09.cern.ch:8880/cms_dbs_int_local_yy_writer/servlet/DBSServlet" self.phedexApi = PhEDEx({"endpoint": phedexTestDS, "method": "POST"}) return @attr("integration") def testInjection(self): """ _testInjection_ Verify that we can inject data into PhEDEx. """ xmlData = XMLDrop.makePhEDExDrop(self.dbsTestUrl, makeUUID()) result = self.phedexApi.injectBlocks("T1_US_FNAL_MSS", xmlData) self.assertEqual( result["phedex"]["injected"], { "stats": { "closed_datasets": 0, "closed_blocks": 0, "new_blocks": 0, "new_datasets": 1, "new_files": 0 } }) return @attr("integration") def testSubscription(self): """ _testSubscription_ Verify that the subscription API works. """ datasetA = "/%s/WMCorePhEDExTest/RAW" % makeUUID() datasetB = "/%s/WMCorePhEDExTest/RECO" % makeUUID() xmlData = XMLDrop.makePhEDExDrop(self.dbsTestUrl, datasetA) self.phedexApi.injectBlocks("T1_US_FNAL_MSS", xmlData) xmlData = XMLDrop.makePhEDExDrop(self.dbsTestUrl, datasetB) self.phedexApi.injectBlocks("T1_US_FNAL_MSS", xmlData) testSub = PhEDExSubscription([datasetA, datasetB], "T1_UK_RAL_MSS", "Saturn") result = self.phedexApi.subscribe(testSub) requestIDs = result["phedex"]["request_created"] self.assertEqual(len(requestIDs), 1, "Error: Wrong number of request IDs") self.assertTrue("id" in requestIDs[0], "Error: Missing request ID") return @attr("integration") def testBestNodeName(self): """ _testBestNodeName_ Verify that the node name is Buffer first """ self.assertTrue( self.phedexApi.getBestNodeName("cmssrm.fnal.gov") == "T1_US_FNAL_Buffer") return @attr('integration') def testGetSubscriptionMapping(self): """ _testGetSubscriptionMapping_ Verify that the subscription mapping API works correctly. """ testDataset = "/%s/WMCorePhEDExTest/RECO" % makeUUID() blockA = "%s#%s" % (testDataset, makeUUID()) blockB = "%s#%s" % (testDataset, makeUUID()) datasetSpec = injectionSpec.getDataset(testDataset) datasetSpec.getFileblock(blockA, 'y') datasetSpec.getFileblock(blockB, 'y') blockSpec = injectionSpec.save() self.phedexApi.injectBlocks("T1_US_FNAL_MSS", blockSpec) # Create a dataset level subscription to a node testDatasetSub = PhEDExSubscription([testDataset], "T1_UK_RAL_MSS", "Saturn", request_only="n") self.phedexApi.subscribe(testDatasetSub) # Create a block level subscrtion to a different node testBlockSub = PhEDExSubscription([testDataset], "T1_DE_KIT_MSS", "Saturn", level="block", request_only="n") self.phedexApi.subscribe(testBlockSub) subs = self.phedexApi.getSubscriptionMapping(testDataset) self.assertEqual(subs[testDataset], {"T1_UK_RAL_MSS"}, "Error: Dataset subscription is wrong.") subs = self.phedexApi.getSubscriptionMapping(blockA) self.assertEqual( len(subs[blockA]), 2, "Error: Wrong number of nodes in block subscription.") self.assertTrue("T1_UK_RAL_MSS" in subs[blockA], "Error: RAL missing from block sub.") self.assertTrue("T1_DE_KIT_MSS" in subs[blockA], "Error: KIT missing from block sub.") return def testPFNLookup(self): """ _testPFNLookup_ Verify that the PFN lookup in PhEDEx works correctly. """ call1 = self.phedexApi.getPFN(['T2_UK_SGrid_Bristol'], ['/store/user/metson/file']) # Should get one mapping back (one lfn, one node) self.assertTrue(len(call1.keys()) == 1) call1_key = call1.keys()[0] call2 = self.phedexApi.getPFN( ['T2_UK_SGrid_Bristol', 'T1_US_FNAL_Buffer'], ['/store/user/metson/file']) # Should get back two mappings (two nodes) self.assertTrue(call1_key in call2.keys()) # and one of the mappings should be the same as from the previous call self.assertTrue(call1[call1_key] == call2[call1_key]) return
class PhEDExTest(EmulatedUnitTestCase): def setUp(self): """ _setUp_ Initialize the PhEDEx API to point at the test server. """ self.dbsTestUrl = "https://cmsweb-prod.cern.ch/dbs/prod/global/DBSReader" self.phedexApi = PhEDEx() return @attr("integration") def testInjection(self): """ _testInjection_ Verify that we can inject data into PhEDEx. """ xmlData = XMLDrop.makePhEDExDrop(self.dbsTestUrl, makeUUID()) result = self.phedexApi.injectBlocks("T1_US_FNAL_MSS", xmlData) self.assertEqual( result["phedex"]["injected"], { "stats": { "closed_datasets": 0, "closed_blocks": 0, "new_blocks": 0, "new_datasets": 1, "new_files": 0 } }) return @attr("integration") def testSubscription(self): """ _testSubscription_ Verify that the subscription API works. """ datasetA = "/%s/WMCorePhEDExTest/RAW" % makeUUID() datasetB = "/%s/WMCorePhEDExTest/RECO" % makeUUID() xmlData = XMLDrop.makePhEDExDrop(self.dbsTestUrl, datasetA) self.phedexApi.injectBlocks("T1_US_FNAL_MSS", xmlData) xmlData = XMLDrop.makePhEDExDrop(self.dbsTestUrl, datasetB) self.phedexApi.injectBlocks("T1_US_FNAL_MSS", xmlData) testSub = PhEDExSubscription([datasetA, datasetB], "T1_UK_RAL_MSS", "Saturn") result = self.phedexApi.subscribe(testSub) requestIDs = result["phedex"]["request_created"] self.assertEqual(len(requestIDs), 1, "Error: Wrong number of request IDs") self.assertTrue("id" in requestIDs[0], "Error: Missing request ID") return @attr('integration') def testGetSubscriptionMapping(self): """ _testGetSubscriptionMapping_ Verify that the subscription mapping API works correctly. """ testDataset = "/%s/WMCorePhEDExTest/RECO" % makeUUID() blockA = "%s#%s" % (testDataset, makeUUID()) blockB = "%s#%s" % (testDataset, makeUUID()) # NOTE: leaving it broken on purpose, we do NOT want to subscribe # data via unit tests :-) #injectionSpec = XMLDrop.XMLInjectionSpec(self.dbsTestUrl) datasetSpec = injectionSpec.getDataset(testDataset) datasetSpec.getFileblock(blockA, 'y') datasetSpec.getFileblock(blockB, 'y') blockSpec = injectionSpec.save() self.phedexApi.injectBlocks("T1_US_FNAL_MSS", blockSpec) # Create a dataset level subscription to a node testDatasetSub = PhEDExSubscription([testDataset], "T1_UK_RAL_MSS", "Saturn", request_only="y") self.phedexApi.subscribe(testDatasetSub) # Create a block level subscrtion to a different node testBlockSub = PhEDExSubscription([testDataset], "T1_DE_KIT_MSS", "Saturn", level="block", request_only="y") self.phedexApi.subscribe(testBlockSub) subs = self.phedexApi.getSubscriptionMapping(testDataset) self.assertEqual(subs[testDataset], {"T1_UK_RAL_MSS"}, "Error: Dataset subscription is wrong.") subs = self.phedexApi.getSubscriptionMapping(blockA) self.assertEqual( len(subs[blockA]), 2, "Error: Wrong number of nodes in block subscription.") self.assertTrue("T1_UK_RAL_MSS" in subs[blockA], "Error: RAL missing from block sub.") self.assertTrue("T1_DE_KIT_MSS" in subs[blockA], "Error: KIT missing from block sub.") return def testPFNLookup(self): """ _testPFNLookup_ Verify that the PFN lookup in PhEDEx works correctly. """ call1 = self.phedexApi.getPFN(['T2_UK_SGrid_Bristol'], ['/store/user/metson/file']) # Should get one mapping back (one lfn, one node) self.assertTrue(len(call1.keys()) == 1) call1_key = call1.keys()[0] call2 = self.phedexApi.getPFN( ['T2_UK_SGrid_Bristol', 'T1_US_FNAL_Buffer'], ['/store/user/metson/file']) # Should get back two mappings (two nodes) self.assertTrue(call1_key in call2.keys()) # and one of the mappings should be the same as from the previous call self.assertTrue(call1[call1_key] == call2[call1_key]) return def testGetReplicaInfoForBlocks(self): """ Test `getReplicaInfoForBlocks` method, the ability to retrieve replica locations provided a (or a list of) datasets and blocks """ def _checkOutcome(numFiles, replica): "run the checks" if rep['complete'] == 'y': self.assertEqual(rep['files'], numFiles) if rep['custodial'] == 'y': self.assertTrue(rep['node'].endswith("_MSS")) self.assertTrue(rep['subscribed'], 'y') replicaDict = { 'bytes', 'complete', 'custodial', 'files', 'group', 'node', 'node_id', 'se', 'subscribed', 'time_create', 'time_update' } res = self.phedexApi.getReplicaInfoForBlocks(block=BLOCK)['phedex'] self.assertEqual(len(res['block']), 1) self.assertEqual(res['block'][0]['name'], BLOCK) self.assertTrue(len(res['block'][0]['replica']) > 1) self.assertItemsEqual(res['block'][0]['replica'][0].keys(), replicaDict) numFiles = res['block'][0]['files'] for rep in res['block'][0]['replica']: _checkOutcome(numFiles, rep) # same test, but providing a dataset as input (which has only the block above) res = self.phedexApi.getReplicaInfoForBlocks(dataset=DSET)['phedex'] self.assertEqual(len(res['block']), 4) self.assertTrue(BLOCK in [blk['name'] for blk in res['block']]) for block in res['block']: numFiles = block['files'] for rep in block['replica']: self.assertTrue(len(block['replica']) > 1) _checkOutcome(numFiles, rep) # same test again, but providing both block and dataset # NOTE the PhEDEx service only process the block input, the # dataset argument is completely ignored res = self.phedexApi.getReplicaInfoForBlocks(dataset=DSET, block=BLOCK)['phedex'] self.assertEqual(len(res['block']), 1) self.assertEqual(res['block'][0]['name'], BLOCK) self.assertTrue(len(res['block'][0]['replica']) > 1) self.assertItemsEqual(res['block'][0]['replica'][0].keys(), replicaDict) numFiles = res['block'][0]['files'] for rep in res['block'][0]['replica']: _checkOutcome(numFiles, rep) # provide a block that does not exist res = self.phedexApi.getReplicaInfoForBlocks(dataset=DSET, block=BLOCK + "BLAH")['phedex'] self.assertTrue(res['block'] == []) def testGroupUsage(self): """ _testGroupUsage_ Verify that the `getGroupUsage` API works correctly. """ node = "T2_DE_DESY" group = "DataOps" res = self.phedexApi.getGroupUsage(group=group, node=node)['phedex'] self.assertEqual(len(res['node']), 1) self.assertEqual(len(res['node'][0]['group']), 1) self.assertEqual(res['node'][0]['group'][0]['name'], group) self.assertEqual(res['node'][0]['name'], node) self.assertTrue(res['node'][0]['group'][0]['dest_bytes'] > 100) res = self.phedexApi.getGroupUsage(group=group)['phedex'] self.assertTrue(len(res['node']) > 50) self.assertEqual(len(res['node'][10]['group']), 1) self.assertEqual(res['node'][10]['group'][0]['name'], group) return
class PhEDExTest(unittest.TestCase): def setUp(self): """ _setUp_ Initialize the PhEDEx API to point at the test server. """ self.dbsTestUrl = "https://cmsweb.cern.ch/dbs/prod/global/DBSReader" self.phedexApi = PhEDEx() return @attr("integration") def testInjection(self): """ _testInjection_ Verify that we can inject data into PhEDEx. """ xmlData = XMLDrop.makePhEDExDrop(self.dbsTestUrl, makeUUID()) result = self.phedexApi.injectBlocks("T1_US_FNAL_MSS", xmlData) self.assertEqual(result["phedex"]["injected"], {"stats": {"closed_datasets": 0, "closed_blocks": 0, "new_blocks": 0, "new_datasets": 1, "new_files": 0}}) return @attr("integration") def testSubscription(self): """ _testSubscription_ Verify that the subscription API works. """ datasetA = "/%s/WMCorePhEDExTest/RAW" % makeUUID() datasetB = "/%s/WMCorePhEDExTest/RECO" % makeUUID() xmlData = XMLDrop.makePhEDExDrop(self.dbsTestUrl, datasetA) self.phedexApi.injectBlocks("T1_US_FNAL_MSS", xmlData) xmlData = XMLDrop.makePhEDExDrop(self.dbsTestUrl, datasetB) self.phedexApi.injectBlocks("T1_US_FNAL_MSS", xmlData) testSub = PhEDExSubscription([datasetA, datasetB], "T1_UK_RAL_MSS", "Saturn") result = self.phedexApi.subscribe(testSub) requestIDs = result["phedex"]["request_created"] self.assertEqual(len(requestIDs), 1, "Error: Wrong number of request IDs") self.assertTrue("id" in requestIDs[0], "Error: Missing request ID") return @attr('integration') def testGetSubscriptionMapping(self): """ _testGetSubscriptionMapping_ Verify that the subscription mapping API works correctly. """ testDataset = "/%s/WMCorePhEDExTest/RECO" % makeUUID() blockA = "%s#%s" % (testDataset, makeUUID()) blockB = "%s#%s" % (testDataset, makeUUID()) datasetSpec = injectionSpec.getDataset(testDataset) datasetSpec.getFileblock(blockA, 'y') datasetSpec.getFileblock(blockB, 'y') blockSpec = injectionSpec.save() self.phedexApi.injectBlocks("T1_US_FNAL_MSS", blockSpec) # Create a dataset level subscription to a node testDatasetSub = PhEDExSubscription([testDataset], "T1_UK_RAL_MSS", "Saturn", request_only="n") self.phedexApi.subscribe(testDatasetSub) # Create a block level subscrtion to a different node testBlockSub = PhEDExSubscription([testDataset], "T1_DE_KIT_MSS", "Saturn", level="block", request_only="n") self.phedexApi.subscribe(testBlockSub) subs = self.phedexApi.getSubscriptionMapping(testDataset) self.assertEqual(subs[testDataset], {"T1_UK_RAL_MSS"}, "Error: Dataset subscription is wrong.") subs = self.phedexApi.getSubscriptionMapping(blockA) self.assertEqual(len(subs[blockA]), 2, "Error: Wrong number of nodes in block subscription.") self.assertTrue("T1_UK_RAL_MSS" in subs[blockA], "Error: RAL missing from block sub.") self.assertTrue("T1_DE_KIT_MSS" in subs[blockA], "Error: KIT missing from block sub.") return def testPFNLookup(self): """ _testPFNLookup_ Verify that the PFN lookup in PhEDEx works correctly. """ call1 = self.phedexApi.getPFN(['T2_UK_SGrid_Bristol'], ['/store/user/metson/file']) # Should get one mapping back (one lfn, one node) self.assertTrue(len(call1.keys()) == 1) call1_key = call1.keys()[0] call2 = self.phedexApi.getPFN(['T2_UK_SGrid_Bristol', 'T1_US_FNAL_Buffer'], ['/store/user/metson/file']) # Should get back two mappings (two nodes) self.assertTrue(call1_key in call2.keys()) # and one of the mappings should be the same as from the previous call self.assertTrue(call1[call1_key] == call2[call1_key]) return def testGetReplicaInfoForBlocks(self): """ Test `getReplicaInfoForBlocks` method, the ability to retrieve replica locations provided a (or a list of) datasets and blocks """ def _checkOutcome(numFiles, replica): "run the checks" if rep['complete'] == 'y': self.assertEqual(rep['files'], numFiles) if rep['custodial'] == 'y': self.assertTrue(rep['node'].endswith("_MSS")) self.assertTrue(rep['subscribed'], 'y') replicaDict = {'bytes', 'complete', 'custodial', 'files', 'group', 'node', 'node_id', 'se', 'subscribed', 'time_create', 'time_update'} res = self.phedexApi.getReplicaInfoForBlocks(block=BLOCK)['phedex'] self.assertEqual(len(res['block']), 1) self.assertEqual(res['block'][0]['name'], BLOCK) self.assertTrue(len(res['block'][0]['replica']) > 1) self.assertItemsEqual(res['block'][0]['replica'][0].keys(), replicaDict) numFiles = res['block'][0]['files'] for rep in res['block'][0]['replica']: _checkOutcome(numFiles, rep) # same test, but providing a dataset as input (which has only the block above) res = self.phedexApi.getReplicaInfoForBlocks(dataset=DSET)['phedex'] self.assertEqual(len(res['block']), 4) self.assertTrue(BLOCK in [blk['name'] for blk in res['block']]) for block in res['block']: numFiles = block['files'] for rep in block['replica']: self.assertTrue(len(block['replica']) > 1) _checkOutcome(numFiles, rep) # same test again, but providing both block and dataset # NOTE the PhEDEx service only process the block input, the # dataset argument is completely ignored res = self.phedexApi.getReplicaInfoForBlocks(dataset=DSET, block=BLOCK)['phedex'] self.assertEqual(len(res['block']), 1) self.assertEqual(res['block'][0]['name'], BLOCK) self.assertTrue(len(res['block'][0]['replica']) > 1) self.assertItemsEqual(res['block'][0]['replica'][0].keys(), replicaDict) numFiles = res['block'][0]['files'] for rep in res['block'][0]['replica']: _checkOutcome(numFiles, rep) # provide a block that does not exist res = self.phedexApi.getReplicaInfoForBlocks(dataset=DSET, block=BLOCK + "BLAH")['phedex'] self.assertTrue(res['block'] == [])
class PhEDExInjectorPoller(BaseWorkerThread): """ _PhEDExInjectorPoller_ Poll the DBSBuffer database and inject files as they are created. """ def __init__(self, config): """ ___init___ Initialise class members """ BaseWorkerThread.__init__(self) self.dbsUrl = config.DBSInterface.globalDBSUrl self.pollCounter = 0 self.subFrequency = None if getattr(config.PhEDExInjector, "subscribeDatasets", False): pollInterval = config.PhEDExInjector.pollInterval subInterval = config.PhEDExInjector.subscribeInterval self.subFrequency = max(1, int(round(subInterval/pollInterval))) logging.info("SubscribeDataset and deleteBlocks will run every %d polling cycles", self.subFrequency) # subscribe on first cycle self.pollCounter = self.subFrequency - 1 # retrieving the node mappings is fickle and can fail quite often self.phedex = PhEDEx({"endpoint": config.PhEDExInjector.phedexurl}, "json") try: nodeMappings = self.phedex.getNodeMap() except: time.sleep(2) try: nodeMappings = self.phedex.getNodeMap() except: time.sleep(4) nodeMappings = self.phedex.getNodeMap() # This will be used to map SE names which are stored in the DBSBuffer to # PhEDEx node names. The first key will be the "kind" which consists # of one of the following: MSS, Disk, Buffer. The next key will be the # SE name. self.seMap = {} self.nodeNames = [] for node in nodeMappings["phedex"]["node"]: if node["kind"] not in self.seMap: self.seMap[node["kind"]] = {} logging.info("Adding mapping %s -> %s", node["se"], node["name"]) self.seMap[node["kind"]][node["se"]] = node["name"] self.nodeNames.append(node["name"]) self.phedexNodes = {'MSS': [], 'Disk': []} for node in nodeMappings["phedex"]["node"]: if node["kind"] in ["MSS", "Disk"]: self.phedexNodes[node["kind"]].append(node["name"]) # initialize the alert framework (if available - config.Alert present) # self.sendAlert will be then be available self.initAlerts(compName = "PhEDExInjector") self.blocksToRecover = [] return def setup(self, parameters): """ _setup_ Create DAO Factory and setup some DAO. """ myThread = threading.currentThread() daofactory = DAOFactory(package = "WMComponent.PhEDExInjector.Database", logger = self.logger, dbinterface = myThread.dbi) self.getUninjected = daofactory(classname = "GetUninjectedFiles") self.getMigrated = daofactory(classname = "GetMigratedBlocks") self.findDeletableBlocks = daofactory(classname = "GetDeletableBlocks") self.markBlocksDeleted = daofactory(classname = "MarkBlocksDeleted") self.getUnsubscribed = daofactory(classname = "GetUnsubscribedDatasets") self.markSubscribed = daofactory(classname = "MarkDatasetSubscribed") daofactory = DAOFactory(package = "WMComponent.DBS3Buffer", logger = self.logger, dbinterface = myThread.dbi) self.setStatus = daofactory(classname = "DBSBufferFiles.SetPhEDExStatus") self.setBlockClosed = daofactory(classname = "SetBlockClosed") return def algorithm(self, parameters): """ _algorithm_ Poll the database for uninjected files and attempt to inject them into PhEDEx. """ logging.info("Running PhEDEx injector poller algorithm...") self.pollCounter += 1 if self.blocksToRecover: logging.info("""PhEDExInjector Recovery: previous injection call failed, check if files were injected to PhEDEx anyway""") self.recoverInjectedFiles() self.injectFiles() self.closeBlocks() if self.pollCounter == self.subFrequency: self.pollCounter = 0 self.deleteBlocks() self.subscribeDatasets() return def createInjectionSpec(self, injectionData): """ _createInjectionSpec_ Transform the data structure returned from the database into an XML string for the PhEDEx Data Service. The injectionData parameter must be a dictionary keyed by dataset path. Each dataset path will map to a list of blocks, each block being a dict. The block dicts will have three keys: name, is-open and files. The files key will be a list of dicts, each of which have the following keys: lfn, size and checksum. The following is an example object: {"dataset1": {"block1": {"is-open": "y", "files": [{"lfn": "lfn1", "size": 10, "checksum": {"cksum": "1234"}}, {"lfn": "lfn2", "size": 20, "checksum": {"cksum": "4321"}}]}}} """ injectionSpec = XMLDrop.XMLInjectionSpec(self.dbsUrl) for datasetPath in injectionData: datasetSpec = injectionSpec.getDataset(datasetPath) for fileBlockName, fileBlock in injectionData[datasetPath].iteritems(): blockSpec = datasetSpec.getFileblock(fileBlockName, fileBlock["is-open"]) for f in fileBlock["files"]: blockSpec.addFile(f["lfn"], f["checksum"], f["size"]) return injectionSpec.save() def createRecoveryFileFormat(self, unInjectedData): """ _createRecoveryFileFormat_ Transform the data structure returned from database in to the dict format for the PhEDEx Data Service. The injectionData parameter must be a dictionary keyed by dataset path. unInjectedData format {"dataset1": {"block1": {"is-open": "y", "files": [{"lfn": "lfn1", "size": 10, "checksum": {"cksum": "1234"}}, {"lfn": "lfn2", "size": 20, "checksum": {"cksum": "4321"}}]}}} returns [{"block1": set(["lfn1", "lfn2"])}, {"block2": set(["lfn3", "lfn4"])] """ blocks = [] for datasetPath in unInjectedData: for blockName, fileBlock in unInjectedData[datasetPath].items(): newBlock = { blockName: set() } for fileDict in fileBlock["files"]: newBlock[blockName].add(fileDict["lfn"]) blocks.append(newBlock) return blocks def injectFiles(self): """ _injectFiles_ Inject any uninjected files in PhEDEx. """ logging.info("Starting injectFiles method") uninjectedFiles = self.getUninjected.execute() for siteName in uninjectedFiles.keys(): # SE names can be stored in DBSBuffer as that is what is returned in # the framework job report. We'll try to map the SE name to a # PhEDEx node name here. location = None if siteName in self.nodeNames: location = siteName else: if "Buffer" in self.seMap and siteName in self.seMap["Buffer"]: location = self.seMap["Buffer"][siteName] elif "MSS" in self.seMap and siteName in self.seMap["MSS"]: location = self.seMap["MSS"][siteName] elif "Disk" in self.seMap and siteName in self.seMap["Disk"]: location = self.seMap["Disk"][siteName] if location == None: msg = "Could not map SE %s to PhEDEx node." % siteName logging.error(msg) self.sendAlert(7, msg = msg) continue maxDataset = 20 maxBlocks = 50 maxFiles = 5000 numberDatasets = 0 numberBlocks = 0 numberFiles = 0 injectData = {} lfnList = [] for dataset in uninjectedFiles[siteName]: numberDatasets += 1 injectData[dataset] = uninjectedFiles[siteName][dataset] for block in injectData[dataset]: numberBlocks += 1 numberFiles += len(injectData[dataset][block]['files']) for fileInfo in injectData[dataset][block]['files']: lfnList.append(fileInfo['lfn']) if numberDatasets >= maxDataset or numberBlocks >= maxBlocks or numberFiles >= maxFiles: self.injectFilesPhEDExCall(location, injectData, lfnList) numberDatasets = 0 numberBlocks = 0 numberFiles = 0 injectData = {} lfnList = [] if injectData: self.injectFilesPhEDExCall(location, injectData, lfnList) return def injectFilesPhEDExCall(self, location, injectData, lfnList): """ _injectFilesPhEDExCall_ actual PhEDEx call for file injection """ xmlData = self.createInjectionSpec(injectData) logging.debug("injectFiles XMLData: %s", xmlData) try: injectRes = self.phedex.injectBlocks(location, xmlData) except HTTPException as ex: # HTTPException with status 400 assumed to be duplicate injection # trigger later block recovery (investgation needed if not the case) if ex.status == 400: self.blocksToRecover.extend( self.createRecoveryFileFormat(injectData) ) logging.error("PhEDEx file injection failed with HTTPException: %s %s", ex.status, ex.result) except Exception as ex: logging.error("PhEDEx file injection failed with Exception: %s", str(ex)) logging.debug("Traceback: %s", str(traceback.format_exc())) else: logging.info("Injection result: %s", injectRes) if "error" in injectRes: msg = "Error injecting data %s: %s" % (injectData, injectRes["error"]) logging.error(msg) self.sendAlert(6, msg = msg) else: try: self.setStatus.execute(lfnList, 1) except: # possible deadlock with DBS3Upload, retry once after 5s logging.warning("Oracle exception during file status update, possible deadlock due to race condition, retry after 5s sleep") time.sleep(5) self.setStatus.execute(lfnList, 1) return def closeBlocks(self): """ _closeBlocks_ Close any blocks that have been migrated to global DBS """ logging.info("Starting closeBlocks method") migratedBlocks = self.getMigrated.execute() for siteName in migratedBlocks.keys(): # SE names can be stored in DBSBuffer as that is what is returned in # the framework job report. We'll try to map the SE name to a # PhEDEx node name here. location = None if siteName in self.nodeNames: location = siteName else: if "Buffer" in self.seMap and siteName in self.seMap["Buffer"]: location = self.seMap["Buffer"][siteName] elif "MSS" in self.seMap and siteName in self.seMap["MSS"]: location = self.seMap["MSS"][siteName] elif "Disk" in self.seMap and siteName in self.seMap["Disk"]: location = self.seMap["Disk"][siteName] if location == None: msg = "Could not map SE %s to PhEDEx node." % siteName logging.error(msg) self.sendAlert(6, msg = msg) continue xmlData = self.createInjectionSpec(migratedBlocks[siteName]) logging.debug("closeBlocks XMLData: %s", xmlData) try: injectRes = self.phedex.injectBlocks(location, xmlData) except HTTPException as ex: logging.error("PhEDEx block close failed with HTTPException: %s %s", ex.status, ex.result) except Exception as ex: logging.error("PhEDEx block close failed with Exception: %s", str(ex)) logging.debug("Traceback: %s", str(traceback.format_exc())) else: logging.info("Block closing result: %s", injectRes) if "error" not in injectRes: for datasetName in migratedBlocks[siteName]: for blockName in migratedBlocks[siteName][datasetName]: logging.debug("Closing block %s", blockName) self.setBlockClosed.execute(blockName) else: msg = "Error injecting data %s: %s" % (migratedBlocks[siteName], injectRes["error"]) logging.error(msg) self.sendAlert(6, msg = msg) return def recoverInjectedFiles(self): """ When PhEDEx inject call timed out, run this function. Since there are 3 min reponse time out in cmsweb, some times PhEDEx injection call times out even though the call succeeded In that case run the recovery mode 1. first check whether files which injection status = 0 are in the PhEDEx. 2. if those file exist set the in_phedex status to 1 3. set self.blocksToRecover = [] Run this recovery one block at a time, with too many blocks the call to the PhEDEx data service on cmsweb can time out """ # recover one block at a time for block in self.blocksToRecover: injectedFiles = self.phedex.getInjectedFiles(block) if injectedFiles: self.setStatus.execute(injectedFiles, 1) self.blocksToRecover = [] return def deleteBlocks(self): """ _deleteBlocks_ Find deletable blocks, then decide if to delete based on: Is there an active subscription for dataset or block ? If yes => set deleted=2 If no => next check Has transfer to all destinations finished ? If yes => request block deletion, approve request, set deleted=1 If no => do nothing (check again next cycle) """ logging.info("Starting deleteBlocks method") blockDict = self.findDeletableBlocks.execute(transaction = False) if not blockDict: return try: subscriptions = self.phedex.getSubscriptionMapping(*blockDict.keys()) except: logging.error("Couldn't get subscription info from PhEDEx, retry next cycle") return skippableBlocks = [] deletableEntries = {} for blockName in blockDict: location = blockDict[blockName]['location'] # should never be triggered, better safe than sorry if location.endswith('_MSS'): logging.debug("Location %s for block %s is MSS, skip deletion", location, blockName) skippableBlocks.append(blockName) continue dataset = blockDict[blockName]['dataset'] sites = blockDict[blockName]['sites'] if blockName in subscriptions and location in subscriptions[blockName]: logging.debug("Block %s subscribed to %s, skip deletion", blockName, location) binds = { 'DELETED': 2, 'BLOCKNAME': blockName } self.markBlocksDeleted.execute(binds) else: blockInfo = [] try: blockInfo = self.phedex.getReplicaInfoForBlocks(block = blockName, complete = 'y')['phedex']['block'] except: logging.error("Couldn't get block info from PhEDEx, retry next cycle") else: for entry in blockInfo: if entry['name'] == blockName: nodes = set([x['node'] for x in entry['replica']]) if location not in nodes: logging.debug("Block %s not present on %s, mark as deleted", blockName, location) binds = { 'DELETED': 1, 'BLOCKNAME': blockName } self.markBlocksDeleted.execute(binds) elif sites.issubset(nodes): logging.debug("Deleting block %s from %s since it is fully transfered", blockName, location) if location not in deletableEntries: deletableEntries[location] = {} if dataset not in deletableEntries[location]: deletableEntries[location][dataset] = set() deletableEntries[location][dataset].add(blockName) binds = [] for blockName in skippableBlocks: binds.append( { 'DELETED': 2, 'BLOCKNAME': blockName } ) if binds: self.markBlocksDeleted.execute(binds) for location in deletableEntries: chunkSize = 100 numberOfBlocks = 0 blocksToDelete = {} for dataset in deletableEntries[location]: blocksToDelete[dataset] = deletableEntries[location][dataset] numberOfBlocks += len(blocksToDelete[dataset]) if numberOfBlocks > chunkSize: self.deleteBlocksPhEDExCalls(location, blocksToDelete) numberOfBlocks = 0 blocksToDelete = {} self.deleteBlocksPhEDExCalls(location, blocksToDelete) return def deleteBlocksPhEDExCalls(self, location, blocksToDelete): """ _deleteBlocksPhEDExCalls_ actual PhEDEx calls for block deletion """ deletion = PhEDExDeletion(blocksToDelete.keys(), location, level = 'block', comments = "WMAgent blocks auto-delete from %s" % location, blocks = blocksToDelete) xmlData = XMLDrop.makePhEDExXMLForBlocks(self.dbsUrl, deletion.getDatasetsAndBlocks()) logging.debug("deleteBlocks XMLData: %s", xmlData) try: response = self.phedex.delete(deletion, xmlData) requestId = response['phedex']['request_created'][0]['id'] # auto-approve deletion request self.phedex.updateRequest(requestId, 'approve', location) except HTTPException as ex: logging.error("PhEDEx block delete/approval failed with HTTPException: %s %s", ex.status, ex.result) except Exception as ex: logging.error("PhEDEx block delete/approval failed with Exception: %s", str(ex)) logging.debug("Traceback: %s", str(traceback.format_exc())) else: binds = [] for dataset in blocksToDelete: for blockName in blocksToDelete[dataset]: binds.append( { 'DELETED': 1, 'BLOCKNAME': blockName } ) self.markBlocksDeleted.execute(binds) return def subscribeDatasets(self): """ _subscribeDatasets_ Poll the database for datasets and subscribe them. """ logging.info("Starting subscribeDatasets method") # Check for completely unsubscribed datasets unsubscribedDatasets = self.getUnsubscribed.execute() # Keep a list of subscriptions to tick as subscribed in the database subscriptionsMade = [] # Create a list of subscriptions as defined by the PhEDEx data structures subs = SubscriptionList() # Create the subscription objects and add them to the list # The list takes care of the sorting internally for subInfo in unsubscribedDatasets: site = subInfo['site'] if site not in self.phedexNodes['MSS'] and site not in self.phedexNodes['Disk']: msg = "Site %s doesn't appear to be valid to PhEDEx, " % site msg += "skipping subscription: %s" % subInfo['id'] logging.error(msg) self.sendAlert(7, msg = msg) continue # Avoid custodial subscriptions to disk nodes if site not in self.phedexNodes['MSS']: subInfo['custodial'] = 'n' # Avoid auto approval in T1 sites elif site.startswith("T1"): subInfo['request_only'] = 'y' phedexSub = PhEDExSubscription(subInfo['path'], site, subInfo['phedex_group'], priority = subInfo['priority'], move = subInfo['move'], custodial = subInfo['custodial'], request_only = subInfo['request_only'], subscriptionId = subInfo['id']) # Check if the subscription is a duplicate if phedexSub.matchesExistingSubscription(self.phedex) or \ phedexSub.matchesExistingTransferRequest(self.phedex): subscriptionsMade.append(subInfo['id']) continue # Add it to the list subs.addSubscription(phedexSub) # Compact the subscriptions subs.compact() for subscription in subs.getSubscriptionList(): xmlData = XMLDrop.makePhEDExXMLForDatasets(self.dbsUrl, subscription.getDatasetPaths()) logging.debug("subscribeDatasets XMLData: %s" , xmlData) logging.info("Subscribing: %s to %s, with options: Move: %s, Custodial: %s, Request Only: %s", subscription.getDatasetPaths(), subscription.getNodes(), subscription.move, subscription.custodial, subscription.request_only) try: self.phedex.subscribe(subscription, xmlData) except HTTPException as ex: logging.error("PhEDEx dataset subscribe failed with HTTPException: %s %s", ex.status, ex.result) except Exception as ex: logging.error("PhEDEx dataset subscribe failed with Exception: %s", str(ex)) logging.debug("Traceback: %s", str(traceback.format_exc())) else: subscriptionsMade.extend(subscription.getSubscriptionIds()) # Register the result in DBSBuffer if subscriptionsMade: self.markSubscribed.execute(subscriptionsMade) return
class PhEDExTest(unittest.TestCase): def setUp(self): """ _setUp_ Initialize the PhEDEx API to point at the test server. """ phedexTestDS = "https://cmsweb.cern.ch/phedex/datasvc/json/test" self.dbsTestUrl = "http://vocms09.cern.ch:8880/cms_dbs_int_local_yy_writer/servlet/DBSServlet" self.phedexApi = PhEDEx({"endpoint": phedexTestDS, "method": "POST"}) return @attr("integration") def testInjection(self): """ _testInjection_ Verify that we can inject data into PhEDEx. """ xmlData = XMLDrop.makePhEDExDrop(self.dbsTestUrl, makeUUID()) result = self.phedexApi.injectBlocks("T1_US_FNAL_MSS", xmlData) self.assertEqual( result["phedex"]["injected"], { "stats": { "closed_datasets": 0, "closed_blocks": 0, "new_blocks": 0, "new_datasets": 1, "new_files": 0 } }) return @attr("integration") def testSubscription(self): """ _testSubscription_ Verify that the subscription API works. """ datasetA = "/%s/WMCorePhEDExTest/RAW" % makeUUID() datasetB = "/%s/WMCorePhEDExTest/RECO" % makeUUID() xmlData = XMLDrop.makePhEDExDrop(self.dbsTestUrl, datasetA) self.phedexApi.injectBlocks("T1_US_FNAL_MSS", xmlData) xmlData = XMLDrop.makePhEDExDrop(self.dbsTestUrl, datasetB) self.phedexApi.injectBlocks("T1_US_FNAL_MSS", xmlData) testSub = PhEDExSubscription([datasetA, datasetB], "T1_UK_RAL_MSS", "Saturn") result = self.phedexApi.subscribe(testSub) requestIDs = result["phedex"]["request_created"] self.assertEqual(len(requestIDs), 1, "Error: Wrong number of request IDs") self.assertTrue("id" in requestIDs[0], "Error: Missing request ID") return @attr("integration") def testBestNodeName(self): """ _testBestNodeName_ Verify that the node name is Buffer first """ self.assertTrue( self.phedexApi.getBestNodeName("cmssrm.fnal.gov") == "T1_US_FNAL_Buffer") return @attr("integration") def testNodeMap(self): """ _testNodeMap_ Verify that the node map can be retrieve from PhEDEx and that the getNodeSE() and getNodeNames() methods work correctly. """ self.assertTrue( self.phedexApi.getNodeSE("T2_FR_GRIF_LLR") == "polgrid4.in2p3.fr") self.assertTrue( self.phedexApi.getNodeNames("cmssrm.fnal.gov") == ["T1_US_FNAL_Buffer", "T1_US_FNAL_MSS"]) return @attr('integration') def testGetSubscriptionMapping(self): """ _testGetSubscriptionMapping_ Verify that the subscription mapping API works correctly. """ testDataset = "/%s/WMCorePhEDExTest/RECO" % makeUUID() blockA = "%s#%s" % (testDataset, makeUUID()) blockB = "%s#%s" % (testDataset, makeUUID()) datasetSpec = injectionSpec.getDataset(testDataset) datasetSpec.getFileblock(blockA, 'y') datasetSpec.getFileblock(blockB, 'y') blockSpec = injectionSpec.save() self.phedexApi.injectBlocks("T1_US_FNAL_MSS", blockSpec) # Create a dataset level subscription to a node testDatasetSub = PhEDExSubscription([testDataset], "T1_UK_RAL_MSS", "Saturn", request_only="n") self.phedexApi.subscribe(testDatasetSub) # Create a block level subscrtion to a different node testBlockSub = PhEDExSubscription([testDataset], "T1_DE_KIT_MSS", "Saturn", level="block", request_only="n") self.phedexApi.subscribe(testBlockSub) subs = self.phedexApi.getSubscriptionMapping(testDataset) self.assertEqual(subs[testDataset], {"T1_UK_RAL_MSS"}, "Error: Dataset subscription is wrong.") subs = self.phedexApi.getSubscriptionMapping(blockA) self.assertEqual( len(subs[blockA]), 2, "Error: Wrong number of nodes in block subscription.") self.assertTrue("T1_UK_RAL_MSS" in subs[blockA], "Error: RAL missing from block sub.") self.assertTrue("T1_DE_KIT_MSS" in subs[blockA], "Error: KIT missing from block sub.") return def testPFNLookup(self): """ _testPFNLookup_ Verify that the PFN lookup in PhEDEx works correctly. """ call1 = self.phedexApi.getPFN(['T2_UK_SGrid_Bristol'], ['/store/user/metson/file']) # Should get one mapping back (one lfn, one node) self.assertTrue(len(call1.keys()) == 1) call1_key = call1.keys()[0] call2 = self.phedexApi.getPFN( ['T2_UK_SGrid_Bristol', 'T1_US_FNAL_Buffer'], ['/store/user/metson/file']) # Should get back two mappings (two nodes) self.assertTrue(call1_key in call2.keys()) # and one of the mappings should be the same as from the previous call self.assertTrue(call1[call1_key] == call2[call1_key]) return @attr('integration') def testXMLJSON(self): """ Test XML and JSON in the same scope """ site = 'T1_US_FNAL_Buffer' httpDict = { 'endpoint': "https://cmsweb.cern.ch/phedex/datasvc/json/test" } phedexJSON = PhEDEx(responseType='json', httpDict=httpDict) httpDict = { 'endpoint': "https://cmsweb.cern.ch/phedex/datasvc/xml/test" } phedexXML = PhEDEx(responseType='xml', httpDict=httpDict) phedexXML.getNodeTFC(site) tfc_file = phedexXML.cacheFileName('tfc', inputdata={'node': site}) tfc_map = {} tfc_map[site] = readTFC(tfc_file) pfn = tfc_map[site].matchLFN('srmv2', '/store/user/jblow/dir/test.root') self.assertTrue( pfn == 'srm://cmssrm.fnal.gov:8443/srm/managerv2?SFN=/11/store/user/jblow/dir/test.root' ) self.assertTrue( phedexJSON.getNodeSE('T1_US_FNAL_Buffer') == 'cmssrm.fnal.gov') @attr('integration') def testAuth(self): """ _testAuth_ Verify that the auth method works correctly." """ self.assertFalse(self.phedexApi.getAuth("datasvc_whatever")) self.assertTrue(self.phedexApi.getAuth("datasvc_subscribe")) self.assertTrue(self.phedexApi.getAuth("datasvc_inject")) return
class PhEDExInjectorSubscriber(BaseWorkerThread): """ _PhEDExInjectorSubscriber_ Poll the DBSBuffer database and subscribe datasets as they are created. """ def __init__(self, config): """ ___init___ Initialise class members """ BaseWorkerThread.__init__(self) self.phedex = PhEDEx({"endpoint": config.PhEDExInjector.phedexurl}, "json") self.siteDB = SiteDBJSON() self.dbsUrl = config.DBSInterface.globalDBSUrl self.group = getattr(config.PhEDExInjector, "group", "DataOps") self.safeMode = getattr(config.PhEDExInjector, "safeOperationMode", False) # Subscribed state in the DBSBuffer table for datasets self.terminalSubscriptionState = 1 if self.safeMode: self.terminalSubscriptionState = 2 # We will map node names to CMS names, that what the spec will have. # If a CMS name is associated to many PhEDEx node then choose the MSS option self.cmsToPhedexMap = {} # initialize the alert framework (if available - config.Alert present) # self.sendAlert will be then be available self.initAlerts(compName = "PhEDExInjector") def setup(self, parameters): """ _setup_ Create a DAO Factory for the PhEDExInjector. Also load the SE names to PhEDEx node name mappings from the data service. """ myThread = threading.currentThread() daofactory = DAOFactory(package = "WMComponent.PhEDExInjector.Database", logger = self.logger, dbinterface = myThread.dbi) self.getUnsubscribed = daofactory(classname = "GetUnsubscribedDatasets") self.markSubscribed = daofactory(classname = "MarkDatasetSubscribed") self.getPartiallySubscribed = daofactory(classname = "GetPartiallySubscribedDatasets") nodeMappings = self.phedex.getNodeMap() for node in nodeMappings["phedex"]["node"]: cmsName = self.siteDB.phEDExNodetocmsName(node["name"]) if cmsName not in self.cmsToPhedexMap: self.cmsToPhedexMap[cmsName] = {} logging.info("Loaded PhEDEx node %s for site %s" % (node["name"], cmsName)) if node["kind"] not in self.cmsToPhedexMap[cmsName]: self.cmsToPhedexMap[cmsName][node["kind"]] = node["name"] return def algorithm(self, parameters): """ _algorithm_ Poll the database for datasets and subscribe them. """ myThread = threading.currentThread() myThread.transaction.begin() # Check for completely unsubscribed datasets unsubscribedDatasets = self.getUnsubscribed.execute(conn = myThread.transaction.conn, transaction = True) if self.safeMode: partiallySubscribedDatasets = self.getPartiallySubscribed.execute(conn = myThread.transaction.conn, transaction = True) unsubscribedDatasets.extend(partiallySubscribedDatasets) partiallySubscribedSet = set() for entry in partiallySubscribedDatasets: partiallySubscribedSet.add(entry["path"]) # Map the datasets to their specs specDatasetMap = {} for unsubscribedDataset in unsubscribedDatasets: datasetPath = unsubscribedDataset["path"] workflow = unsubscribedDataset["workflow"] spec = unsubscribedDataset["spec"] if datasetPath not in specDatasetMap: specDatasetMap[datasetPath] = [] specDatasetMap[datasetPath].append({"workflow" : workflow, "spec" : spec}) specCache = {} siteMap = {} # Distribute the subscriptions by site, type and priority # This is to make as few subscriptions as possible # Site map values are dictionaries where the keys are tuples (Prio, Custodial, AutoApprove, Move) # Where Custodial is boolean, Prio is in ["Low", "Normal", "High"], AutoApprove is boolean and Move is boolean for dataset in specDatasetMap: # Aggregate all the different subscription configurations subInfo = {} for entry in specDatasetMap[dataset]: if not entry["spec"]: # Can't use this spec, there isn't one continue # Load spec if not in the cache if entry["spec"] not in specCache: helper = WMWorkloadHelper() try: helper.load(entry["spec"]) specCache[entry["spec"]] = helper except Exception: #Couldn't load it , alert and carry on msg = "Couldn't load spec: %s" % entry["spec"] logging.error(msg) self.sendAlert(7, msg = msg) continue #If we are running in safe mode, we need to know if the workflow is ready # We have the spec, get the info helper = specCache[entry["spec"]] workflowSubInfo = helper.getSubscriptionInformation() datasetSubInfo = workflowSubInfo.get(dataset, None) if datasetSubInfo and subInfo: subInfo["CustodialSites"] = extendWithoutDups(subInfo["CustodialSites"], datasetSubInfo["CustodialSites"]) subInfo["NonCustodialSites"] = extendWithoutDups(subInfo["NonCustodialSites"], datasetSubInfo["NonCustodialSites"]) subInfo["AutoApproveSites"] = extendWithoutDups(subInfo["AutoApproveSites"], datasetSubInfo["AutoApproveSites"]) subInfo["Priority"] = solvePrioConflicts(subInfo["Priority"], datasetSubInfo["Priority"]) elif datasetSubInfo: subInfo = datasetSubInfo # We now have aggregated subscription information for this dataset in subInfo # Distribute it by site if not subInfo: #Nothing to do, log and continue msg = "No subscriptions configured for dataset %s" % dataset logging.warning(msg) self.markSubscribed.execute(dataset, subscribed = self.terminalSubscriptionState, conn = myThread.transaction.conn, transaction = True) continue # Make sure that a site is not configured both as non custodial and custodial # Non-custodial is believed to be the right choice subInfo["CustodialSites"] = list(set(subInfo["CustodialSites"]) - set(subInfo["NonCustodialSites"])) for site in subInfo["CustodialSites"]: if site not in siteMap: siteMap[site] = {} if self.safeMode and dataset not in partiallySubscribedSet: tupleKey = (subInfo["Priority"], True, False, False) else: tupleKey = (subInfo["Priority"], True, False, True) if tupleKey not in siteMap[site]: siteMap[site][tupleKey] = [] siteMap[site][tupleKey].append(dataset) # If we are in safe mode and this is a partially subscribed dataset, # then the non-custodial were done in a previous cycle if self.safeMode and dataset in partiallySubscribedSet: self.markSubscribed.execute(dataset, subscribed = self.terminalSubscriptionState, conn = myThread.transaction.conn, transaction = True) continue for site in subInfo["NonCustodialSites"]: if site not in siteMap: siteMap[site] = {} autoApprove = False if site in subInfo["AutoApproveSites"]: autoApprove = True tupleKey = (subInfo["Priority"], False, autoApprove) if tupleKey not in siteMap[site]: siteMap[site][tupleKey] = [] siteMap[site][tupleKey].append(dataset) self.markSubscribed.execute(dataset, subscribed = 1, conn = myThread.transaction.conn, transaction = True) # Actually request the subscriptions for site in siteMap: # Check that the site is valid if site not in self.cmsToPhedexMap: msg = "Site %s doesn't appear to be valid to PhEDEx" % site logging.error(msg) self.sendAlert(7, msg = msg) continue for subscriptionFlavor in siteMap[site]: datasets = siteMap[site][subscriptionFlavor] # Check that the site is valid if "MSS" in self.cmsToPhedexMap[site]: phedexNode = self.cmsToPhedexMap[site]["MSS"] else: phedexNode = self.cmsToPhedexMap[site]["Disk"] logging.info("Subscribing %s to %s" % (datasets, site)) options = {"custodial" : "n", "requestOnly" : "y", "priority" : subscriptionFlavor[0].lower(), "move" : "n"} if subscriptionFlavor[1]: options["custodial"] = "y" if subscriptionFlavor[3]: options["move"] = "y" if subscriptionFlavor[2]: options["requestOnly"] = "n" newSubscription = PhEDExSubscription(datasets, phedexNode, self.group, **options) xmlData = XMLDrop.makePhEDExXMLForDatasets(self.dbsUrl, newSubscription.getDatasetPaths()) logging.debug(str(xmlData)) self.phedex.subscribe(newSubscription, xmlData) myThread.transaction.commit() return
class PhEDExInjectorSubscriber(BaseWorkerThread): """ _PhEDExInjectorSubscriber_ Poll the DBSBuffer database and subscribe datasets as they are created. """ def __init__(self, config): """ ___init___ Initialise class members """ BaseWorkerThread.__init__(self) self.phedex = PhEDEx({"endpoint": config.PhEDExInjector.phedexurl}, "json") self.siteDB = SiteDBJSON() self.dbsUrl = config.DBSInterface.globalDBSUrl self.group = getattr(config.PhEDExInjector, "group", "DataOps") # We will map node names to CMS names, that what the spec will have. # If a CMS name is associated to many PhEDEx node then choose the MSS option self.cmsToPhedexMap = {} self.phedexNodes = {"MSS": [], "Disk": []} # initialize the alert framework (if available - config.Alert present) # self.sendAlert will be then be available self.initAlerts(compName="PhEDExInjector") def setup(self, parameters): """ _setup_ Create a DAO Factory for the PhEDExInjector. Also load the SE names to PhEDEx node name mappings from the data service. """ myThread = threading.currentThread() daofactory = DAOFactory( package="WMComponent.PhEDExInjector.Database", logger=self.logger, dbinterface=myThread.dbi ) self.getUnsubscribed = daofactory(classname="GetUnsubscribedDatasets") self.markSubscribed = daofactory(classname="MarkDatasetSubscribed") nodeMappings = self.phedex.getNodeMap() for node in nodeMappings["phedex"]["node"]: cmsName = self.siteDB.phEDExNodetocmsName(node["name"]) if cmsName not in self.cmsToPhedexMap: self.cmsToPhedexMap[cmsName] = {} logging.info("Loaded PhEDEx node %s for site %s" % (node["name"], cmsName)) if node["kind"] not in self.cmsToPhedexMap[cmsName]: self.cmsToPhedexMap[cmsName][node["kind"]] = node["name"] if node["kind"] in ["MSS", "Disk"]: self.phedexNodes[node["kind"]].append(node["name"]) return def algorithm(self, parameters): """ _algorithm_ Run the subscription algorithm as configured """ self.subscribeDatasets() return def subscribeDatasets(self): """ _subscribeDatasets_ Poll the database for datasets and subscribe them. """ myThread = threading.currentThread() myThread.transaction.begin() # Check for completely unsubscribed datasets unsubscribedDatasets = self.getUnsubscribed.execute(conn=myThread.transaction.conn, transaction=True) # Keep a list of subscriptions to tick as subscribed in the database subscriptionsMade = [] # Create a list of subscriptions as defined by the PhEDEx data structures subs = SubscriptionList() # Create the subscription objects and add them to the list # The list takes care of the sorting internally for subInfo in unsubscribedDatasets: site = subInfo["site"] if site not in self.phedexNodes["MSS"] and site not in self.phedexNodes["Disk"]: if site not in self.cmsToPhedexMap: msg = "Site %s doesn't appear to be valid to PhEDEx, " % site msg += "skipping subscription: %s" % subInfo["id"] logging.error(msg) self.sendAlert(7, msg=msg) continue # Get the phedex node from CMS site site = self.cmsToPhedexMap[site].get("MSS") or self.cmsToPhedexMap[site]["Disk"] # Avoid custodial subscriptions to disk nodes if site not in self.phedexNodes["MSS"]: subInfo["custodial"] = "n" # Avoid auto approval in T1 sites elif site.startswith("T1"): subInfo["request_only"] = "y" phedexSub = PhEDExSubscription( subInfo["path"], site, self.group, priority=subInfo["priority"], move=subInfo["move"], custodial=subInfo["custodial"], request_only=subInfo["request_only"], subscriptionId=subInfo["id"], ) # Check if the subscription is a duplicate if phedexSub.matchesExistingSubscription(self.phedex) or phedexSub.matchesExistingTransferRequest( self.phedex ): subscriptionsMade.append(subInfo["id"]) continue # Add it to the list subs.addSubscription(phedexSub) # Compact the subscriptions subs.compact() for subscription in subs.getSubscriptionList(): try: xmlData = XMLDrop.makePhEDExXMLForDatasets(self.dbsUrl, subscription.getDatasetPaths()) logging.debug(str(xmlData)) msg = "Subscribing: %s to %s, with options: " % ( subscription.getDatasetPaths(), subscription.getNodes(), ) msg += "Move: %s, Custodial: %s, Request Only: %s" % ( subscription.move, subscription.custodial, subscription.request_only, ) logging.info(msg) self.phedex.subscribe(subscription, xmlData) except Exception as ex: logging.error("Something went wrong when communicating with PhEDEx, will try again later.") logging.error("Exception: %s" % str(ex)) else: subscriptionsMade.extend(subscription.getSubscriptionIds()) # Register the result in DBSBuffer if subscriptionsMade: self.markSubscribed.execute(subscriptionsMade, conn=myThread.transaction.conn, transaction=True) myThread.transaction.commit() return
class PhEDExInjectorPoller(BaseWorkerThread): """ _PhEDExInjectorPoller_ Poll the DBSBuffer database and inject files as they are created. """ def __init__(self, config): """ ___init___ Initialise class members """ BaseWorkerThread.__init__(self) self.enabled = getattr(config.PhEDExInjector, "enabled", True) self.dbsUrl = config.DBSInterface.globalDBSUrl self.phedexGroup = config.PhEDExInjector.phedexGroup self.pollCounter = 0 self.subFrequency = None if getattr(config.PhEDExInjector, "subscribeDatasets", False): pollInterval = config.PhEDExInjector.pollInterval subInterval = config.PhEDExInjector.subscribeInterval self.subFrequency = max(1, int(round(subInterval / pollInterval))) logging.info( "SubscribeDataset and deleteBlocks will run every %d polling cycles", self.subFrequency) # subscribe on first cycle self.pollCounter = self.subFrequency - 1 # retrieving the node mappings is fickle and can fail quite often self.phedex = PhEDEx({"endpoint": config.PhEDExInjector.phedexurl}, "json", dbsUrl=self.dbsUrl) try: nodeMappings = self.phedex.getNodeMap() except: time.sleep(2) try: nodeMappings = self.phedex.getNodeMap() except: time.sleep(4) nodeMappings = self.phedex.getNodeMap() # This will be used to map SE names which are stored in the DBSBuffer to # PhEDEx node names. The first key will be the "kind" which consists # of one of the following: MSS, Disk, Buffer. The next key will be the # SE name. self.seMap = {} self.nodeNames = [] for node in nodeMappings["phedex"]["node"]: if node["kind"] not in self.seMap: self.seMap[node["kind"]] = {} logging.info("Adding mapping %s -> %s", node["se"], node["name"]) self.seMap[node["kind"]][node["se"]] = node["name"] self.nodeNames.append(node["name"]) self.phedexNodes = {'MSS': [], 'Disk': []} for node in nodeMappings["phedex"]["node"]: if node["kind"] in ["MSS", "Disk"]: self.phedexNodes[node["kind"]].append(node["name"]) self.blocksToRecover = [] # X-component configuration is BAD! But it will only be here during the # Rucio commissioning within WM self.listTiersToSkip = config.RucioInjector.listTiersToInject logging.info( "Component configured to skip data injection for data tiers: %s", self.listTiersToSkip) return def setup(self, parameters): """ _setup_ Create DAO Factory and setup some DAO. """ myThread = threading.currentThread() daofactory = DAOFactory(package="WMComponent.RucioInjector.Database", logger=self.logger, dbinterface=myThread.dbi) self.getUninjected = daofactory(classname="GetUninjectedFiles") self.getMigrated = daofactory(classname="GetMigratedBlocks") self.getUnsubscribedBlocks = daofactory( classname="GetUnsubscribedBlocks") self.setBlockRules = daofactory(classname="SetBlocksRule") self.findDeletableBlocks = daofactory(classname="GetDeletableBlocks") self.markBlocksDeleted = daofactory(classname="MarkBlocksDeleted") self.getUnsubscribed = daofactory(classname="GetUnsubscribedDatasets") self.markSubscribed = daofactory(classname="MarkDatasetSubscribed") daofactory = DAOFactory(package="WMComponent.DBS3Buffer", logger=self.logger, dbinterface=myThread.dbi) self.setStatus = daofactory(classname="DBSBufferFiles.SetPhEDExStatus") self.setBlockClosed = daofactory(classname="SetBlockClosed") return @timeFunction def algorithm(self, parameters): """ _algorithm_ Poll the database for uninjected files and attempt to inject them into PhEDEx. """ if not self.enabled: logging.info( "PhEDExInjector component is disabled in the configuration, exiting." ) return logging.info("Running PhEDEx injector poller algorithm...") self.pollCounter += 1 try: if self.blocksToRecover: logging.info("""PhEDExInjector Recovery: previous injection call failed, checking if files were injected to PhEDEx anyway""" ) self.recoverInjectedFiles() self.injectFiles() self.closeBlocks() if self.pollCounter == self.subFrequency: self.pollCounter = 0 self.deleteBlocks() self.subscribeDatasets() self.subscribeBlocks() except HTTPException as ex: if hasattr(ex, "status") and ex.status in [502, 503]: # then either proxy error or service is unavailable msg = "Caught HTTPException in PhEDExInjector. Retrying in the next cycle.\n" msg += str(ex) logging.error(msg) else: msg = "Caught unexpected HTTPException in PhEDExInjector.\n%s" % str( ex) logging.exception(msg) raise except Exception as ex: msg = "Caught unexpected exception in PhEDExInjector. Details:\n%s" % str( ex) logging.exception(msg) raise PhEDExInjectorException(msg) return def createInjectionSpec(self, injectionData): """ _createInjectionSpec_ Transform the data structure returned from the database into an XML string for the PhEDEx Data Service. The injectionData parameter must be a dictionary keyed by dataset path. Each dataset path will map to a list of blocks, each block being a dict. The block dicts will have three keys: name, is-open and files. The files key will be a list of dicts, each of which have the following keys: lfn, size and checksum. The following is an example object: {"dataset1": {"block1": {"is-open": "y", "files": [{"lfn": "lfn1", "size": 10, "checksum": {"cksum": "1234"}}, {"lfn": "lfn2", "size": 20, "checksum": {"cksum": "4321"}}]}}} """ injectionSpec = XMLDrop.XMLInjectionSpec(self.dbsUrl) for datasetPath in injectionData: datasetSpec = injectionSpec.getDataset(datasetPath) for fileBlockName, fileBlock in injectionData[ datasetPath].iteritems(): blockSpec = datasetSpec.getFileblock(fileBlockName, fileBlock["is-open"]) for f in fileBlock["files"]: blockSpec.addFile(f["lfn"], f["checksum"], f["size"]) return injectionSpec.save() def createRecoveryFileFormat(self, unInjectedData): """ _createRecoveryFileFormat_ Transform the data structure returned from database in to the dict format for the PhEDEx Data Service. The injectionData parameter must be a dictionary keyed by dataset path. unInjectedData format {"dataset1": {"block1": {"is-open": "y", "files": [{"lfn": "lfn1", "size": 10, "checksum": {"cksum": "1234"}}, {"lfn": "lfn2", "size": 20, "checksum": {"cksum": "4321"}}]}}} returns [{"block1": set(["lfn1", "lfn2"])}, {"block2": set(["lfn3", "lfn4"])] """ blocks = [] for datasetPath in unInjectedData: for blockName, fileBlock in unInjectedData[datasetPath].items(): newBlock = {blockName: set()} for fileDict in fileBlock["files"]: newBlock[blockName].add(fileDict["lfn"]) blocks.append(newBlock) return blocks def injectFiles(self): """ _injectFiles_ Inject any uninjected files in PhEDEx. """ logging.info("Starting injectFiles method") uninjectedFiles = self.getUninjected.execute() # filter out datatiers to be processed by RucioInjector uninjectedFiles = filterDataByTier(uninjectedFiles, self.listTiersToSkip) for siteName in uninjectedFiles.keys(): # SE names can be stored in DBSBuffer as that is what is returned in # the framework job report. We'll try to map the SE name to a # PhEDEx node name here. location = None if siteName in self.nodeNames: location = siteName else: if "Buffer" in self.seMap and siteName in self.seMap["Buffer"]: location = self.seMap["Buffer"][siteName] elif "MSS" in self.seMap and siteName in self.seMap["MSS"]: location = self.seMap["MSS"][siteName] elif "Disk" in self.seMap and siteName in self.seMap["Disk"]: location = self.seMap["Disk"][siteName] if location is None: msg = "Could not map SE %s to PhEDEx node." % siteName logging.error(msg) continue for dataset in uninjectedFiles[siteName]: injectData = {} lfnList = [] injectData[dataset] = uninjectedFiles[siteName][dataset] for block in injectData[dataset]: for fileInfo in injectData[dataset][block]['files']: lfnList.append(fileInfo['lfn']) logging.info("About to inject %d files for block %s", len(injectData[dataset][block]['files']), block) self.injectFilesPhEDExCall(location, injectData, lfnList) return def injectFilesPhEDExCall(self, location, injectData, lfnList): """ _injectFilesPhEDExCall_ actual PhEDEx call for file injection """ xmlData = self.createInjectionSpec(injectData) logging.debug("injectFiles XMLData: %s", xmlData) try: injectRes = self.phedex.injectBlocks(location, xmlData) except HTTPException as ex: # HTTPException with status 400 assumed to be duplicate injection # trigger later block recovery (investigation needed if not the case) if ex.status == 400: self.blocksToRecover.extend( self.createRecoveryFileFormat(injectData)) logging.error( "PhEDEx file injection failed with HTTPException: %s %s", ex.status, ex.result) except Exception as ex: msg = "PhEDEx file injection failed with Exception: %s" % str(ex) logging.exception(msg) else: logging.debug("Injection result: %s", injectRes) if "error" in injectRes: msg = "Error injecting data %s: %s" % (injectData, injectRes["error"]) logging.error(msg) else: try: self.setStatus.execute(lfnList, 1) except Exception as ex: if 'Deadlock found' in str( ex) or 'deadlock detected' in str(ex): logging.error( "Database deadlock during file status update. Retrying again in the next cycle." ) self.blocksToRecover.extend( self.createRecoveryFileFormat(injectData)) else: msg = "Failed to update file status in the database, reason: %s" % str( ex) logging.error(msg) raise PhEDExInjectorException(msg) return def closeBlocks(self): """ _closeBlocks_ Close any blocks that have been migrated to global DBS """ logging.info("Starting closeBlocks method") migratedBlocks = self.getMigrated.execute() # filter out datatiers to be processed by RucioInjector migratedBlocks = filterDataByTier(migratedBlocks, self.listTiersToSkip) for siteName in migratedBlocks: # SE names can be stored in DBSBuffer as that is what is returned in # the framework job report. We'll try to map the SE name to a # PhEDEx node name here. location = None if siteName in self.nodeNames: location = siteName else: if "Buffer" in self.seMap and siteName in self.seMap["Buffer"]: location = self.seMap["Buffer"][siteName] elif "MSS" in self.seMap and siteName in self.seMap["MSS"]: location = self.seMap["MSS"][siteName] elif "Disk" in self.seMap and siteName in self.seMap["Disk"]: location = self.seMap["Disk"][siteName] if location is None: msg = "Could not map SE %s to PhEDEx node." % siteName logging.error(msg) continue for dset, blocks in migratedBlocks[siteName].items(): xmlData = self.createInjectionSpec({dset: blocks}) logging.debug("closeBlocks XMLData: %s", xmlData) try: injectRes = self.phedex.injectBlocks(location, xmlData) except HTTPException as ex: logging.error( "PhEDEx block close failed with HTTPException: %s %s", ex.status, ex.result) except Exception as ex: msg = "PhEDEx block close failed with Exception: %s" % str( ex) logging.exception(msg) else: logging.debug("Block closing result: %s", injectRes) if "error" in injectRes: logging.error( "Failed to close blocks due to: %s, for data: %s", injectRes["error"], migratedBlocks[siteName][dset]) else: for blockName in blocks: logging.info("Block closed in PhEDEx: %s", blockName) self.setBlockClosed.execute(blockName) return def recoverInjectedFiles(self): """ When PhEDEx inject call timed out, run this function. Since there are 3 min reponse time out in cmsweb, some times PhEDEx injection call times out even though the call succeeded In that case run the recovery mode 1. first check whether files which injection status = 0 are in the PhEDEx. 2. if those file exist set the in_phedex status to 1 3. set self.blocksToRecover = [] Run this recovery one block at a time, with too many blocks the call to the PhEDEx data service on cmsweb can time out """ # recover one block at a time for block in self.blocksToRecover: injectedFiles = self.phedex.getInjectedFiles(block) if injectedFiles: self.setStatus.execute(injectedFiles, 1) self.blocksToRecover = [] return def deleteBlocks(self): """ _deleteBlocks_ Find deletable blocks, then decide if to delete based on: Is there an active subscription for dataset or block ? If yes => set deleted=2 If no => next check Has transfer to all destinations finished ? If yes => request block deletion, approve request, set deleted=1 If no => do nothing (check again next cycle) """ logging.info("Starting deleteBlocks method") blockDict = self.findDeletableBlocks.execute(transaction=False) if not blockDict: return ### logic to stop doing things to be done by RucioInjector or by DM team for block in list(blockDict): if not self._isDataTierAllowed(block): blockDict.pop(block) try: subscriptions = self.phedex.getSubscriptionMapping( *blockDict.keys()) except: logging.error( "Couldn't get subscription info from PhEDEx, retry next cycle") return skippableBlocks = [] deletableEntries = {} for blockName in blockDict: location = blockDict[blockName]['location'] # should never be triggered, better safe than sorry if location.endswith('_MSS'): logging.debug("Location %s for block %s is MSS, skip deletion", location, blockName) skippableBlocks.append(blockName) continue dataset = blockDict[blockName]['dataset'] sites = blockDict[blockName]['sites'] if blockName in subscriptions and location in subscriptions[ blockName]: logging.debug("Block %s subscribed to %s, skip deletion", blockName, location) binds = {'DELETED': 2, 'BLOCKNAME': blockName} self.markBlocksDeleted.execute(binds) else: blockInfo = [] try: blockInfo = self.phedex.getReplicaInfoForBlocks( block=blockName, complete='y')['phedex']['block'] except: logging.error( "Couldn't get block info from PhEDEx, retry next cycle" ) else: nodes = set() for entry in blockInfo: if entry['name'] == blockName: nodes = set([x['node'] for x in entry['replica']]) if location not in nodes: logging.debug( "Block %s not present on %s, mark as deleted", blockName, location) binds = {'DELETED': 1, 'BLOCKNAME': blockName} self.markBlocksDeleted.execute(binds) elif sites.issubset(nodes): logging.debug( "Deleting block %s from %s since it is fully transfered", blockName, location) if location not in deletableEntries: deletableEntries[location] = {} if dataset not in deletableEntries[location]: deletableEntries[location][dataset] = set() deletableEntries[location][dataset].add(blockName) binds = [] for blockName in skippableBlocks: binds.append({'DELETED': 2, 'BLOCKNAME': blockName}) if binds: self.markBlocksDeleted.execute(binds) for location in deletableEntries: chunkSize = 100 numberOfBlocks = 0 blocksToDelete = {} for dataset in deletableEntries[location]: blocksToDelete[dataset] = deletableEntries[location][dataset] numberOfBlocks += len(blocksToDelete[dataset]) if numberOfBlocks > chunkSize: self.deleteBlocksPhEDExCalls(location, blocksToDelete) numberOfBlocks = 0 blocksToDelete = {} self.deleteBlocksPhEDExCalls(location, blocksToDelete) return def deleteBlocksPhEDExCalls(self, location, blocksToDelete): """ _deleteBlocksPhEDExCalls_ actual PhEDEx calls for block deletion """ deletion = PhEDExDeletion( blocksToDelete.keys(), location, level='block', comments="WMAgent blocks auto-delete from %s" % location, blocks=blocksToDelete) try: response = self.phedex.delete(deletion) requestId = response['phedex']['request_created'][0]['id'] # auto-approve deletion request self.phedex.updateRequest(requestId, 'approve', location) except HTTPException as ex: logging.error( "PhEDEx block delete/approval failed with HTTPException: %s %s", ex.status, ex.result) except Exception as ex: logging.error( "PhEDEx block delete/approval failed with Exception: %s", str(ex)) logging.debug("Traceback: %s", str(traceback.format_exc())) else: binds = [] for dataset in blocksToDelete: for blockName in blocksToDelete[dataset]: binds.append({'DELETED': 1, 'BLOCKNAME': blockName}) self.markBlocksDeleted.execute(binds) return def _isDataTierAllowed(self, dataName): """ Check whether data belongs to an allowed datatier to be handled by this component (either to inject or to subscribe into PhEDEx) :param dataName: string with the block or the dataset name :return: boolean, True if the tier is allowed, False otherwise """ endTier = dataName.rsplit('/', 1)[1] endTier = endTier.split('#')[0] if '#' in endTier else endTier if endTier in self.listTiersToSkip: logging.debug( "Skipping data: %s because it's listed in the tiers to skip", dataName) return False return True def subscribeDatasets(self): """ _subscribeDatasets_ Poll the database for datasets and subscribe them. """ logging.info("Starting subscribeDatasets method") # Check for completely unsubscribed datasets unsubscribedDatasets = self.getUnsubscribed.execute() # Keep a list of subscriptions to tick as subscribed in the database subscriptionsMade = [] # Create a list of subscriptions as defined by the PhEDEx data structures subs = SubscriptionList() # Create the subscription objects and add them to the list # The list takes care of the sorting internally for subInfo in unsubscribedDatasets: ### logic to stop doing things to be done by RucioInjector or by DM team if not self._isDataTierAllowed(subInfo['path']): continue site = subInfo['site'] if site not in self.phedexNodes[ 'MSS'] and site not in self.phedexNodes['Disk']: msg = "Site %s doesn't appear to be valid to PhEDEx, " % site msg += "skipping subscription: %s" % subInfo['id'] logging.error(msg) continue # Avoid custodial subscriptions to disk nodes if site not in self.phedexNodes['MSS']: subInfo['custodial'] = 'n' # Avoid auto approval in T1 sites elif site.startswith("T1"): subInfo['request_only'] = 'y' phedexSub = PhEDExSubscription( subInfo['path'], site, subInfo['phedex_group'], priority=subInfo['priority'], move=subInfo['move'], custodial=subInfo['custodial'], request_only=subInfo['request_only'], subscriptionId=subInfo['id']) # Check if the subscription is a duplicate if phedexSub.matchesExistingSubscription(self.phedex) or \ phedexSub.matchesExistingTransferRequest(self.phedex): subscriptionsMade.append(subInfo['id']) continue # Add it to the list subs.addSubscription(phedexSub) # Compact the subscriptions subs.compact() for subscription in subs.getSubscriptionList(): logging.info( "Subscribing: %s to %s, with options: Move: %s, Custodial: %s, Request Only: %s", subscription.getDatasetPaths(), subscription.getNodes(), subscription.move, subscription.custodial, subscription.request_only) try: self.phedex.subscribe(subscription) except HTTPException as ex: logging.error( "PhEDEx dataset subscribe failed with HTTPException: %s %s", ex.status, ex.result) except Exception as ex: logging.error( "PhEDEx dataset subscribe failed with Exception: %s", str(ex)) logging.debug("Traceback: %s", str(traceback.format_exc())) else: subscriptionsMade.extend(subscription.getSubscriptionIds()) # Register the result in DBSBuffer if subscriptionsMade: self.markSubscribed.execute(subscriptionsMade) return def subscribeBlocks(self): """ _subscribeBlocks_ Poll the database and subscribe blocks not yet subscribed. """ logging.info("Starting subscribeBlocks method") unsubBlocks = self.getUnsubscribedBlocks.execute() # now organize those by location in order to minimize phedex requests # also remove blocks that this component is meant to skip unsubBlocks = self.organizeBlocksByLocation(unsubBlocks) for location, blockDict in unsubBlocks.items(): phedexSub = PhEDExSubscription(blockDict.keys(), location, self.phedexGroup, blocks=blockDict, level="block", priority="normal", move="n", custodial="n", request_only="n", comments="WMAgent production site") try: res = self.phedex.subscribe(phedexSub) transferId = res['phedex']['request_created'][0]['id'] logging.info( "Subscribed %d blocks for %d datasets, to location: %s, under request ID: %s", len(phedexSub.getBlocks()), len(phedexSub.getDatasetPaths()), phedexSub.getNodes(), transferId) except HTTPException as ex: logging.error( "PhEDEx block subscription failed with HTTPException: %s %s", ex.status, ex.result) logging.error("The subscription object was: %s", str(phedexSub)) except Exception as ex: logging.exception( "PhEDEx block subscription failed with Exception: %s", str(ex)) else: binds = [] for blockname in phedexSub.getBlocks(): binds.append({ 'RULE_ID': str(transferId), 'BLOCKNAME': blockname }) self.setBlockRules.execute(binds) return def organizeBlocksByLocation(self, blocksLocation): """ Given a list of dictionaries (with block name and location). Organize those blocks per location to make phedex subscription calls more efficient. Also drops blocks that we cannot subscribe, and check for valid phedex node names. :param blocksLocation: list of dictionaries :return: a dict of dictionaries, such as: {"locationA": {"datasetA": ["blockA", "blockB", ...], "datasetB": ["blockA", "blockB", ...] }, "locationB": {"datasetA": ["blockA"], ... """ dictByLocation = {} for item in blocksLocation: ### logic to stop doing things to be done by RucioInjector or by DM team if not self._isDataTierAllowed(item['blockname']): continue site = item['pnn'] if site not in self.phedexNodes[ 'MSS'] and site not in self.phedexNodes['Disk']: msg = "Site %s doesn't appear to be valid to PhEDEx, " % site msg += "skipping block subscription for: %s" % item['blockname'] logging.error(msg) continue dictByLocation.setdefault(site, {}) dsetName = item['blockname'].split("#")[0] dictByLocation[site].setdefault(dsetName, []) dictByLocation[site][dsetName].append(item['blockname']) return dictByLocation
class PhEDExInjectorSubscriber(BaseWorkerThread): """ _PhEDExInjectorSubscriber_ Poll the DBSBuffer database and subscribe datasets to MSS as they are created. """ def __init__(self, config): """ ___init___ Initialise class members """ BaseWorkerThread.__init__(self) self.phedex = PhEDEx({"endpoint": config.PhEDExInjector.phedexurl}, "json") self.dbsUrl = config.DBSInterface.globalDBSUrl self.group = getattr(config.PhEDExInjector, "group", "DataOps") # This will be used to map SE names which are stored in the DBSBuffer to # PhEDEx node names. The first key will be the "kind" which consists # of one of the following: MSS, Disk, Buffer. The next key will be the # SE name. self.seMap = {} self.nodeNames = [] # initialize the alert framework (if available - config.Alert present) # self.sendAlert will be then be available self.initAlerts(compName = "PhEDExInjector") def setup(self, parameters): """ _setup_ Create a DAO Factory for the PhEDExInjector. Also load the SE names to PhEDEx node name mappings from the data service. """ myThread = threading.currentThread() daofactory = DAOFactory(package = "WMComponent.PhEDExInjector.Database", logger = self.logger, dbinterface = myThread.dbi) self.getUnsubscribed = daofactory(classname = "GetUnsubscribedDatasets") self.markSubscribed = daofactory(classname = "MarkDatasetSubscribed") nodeMappings = self.phedex.getNodeMap() for node in nodeMappings["phedex"]["node"]: if not self.seMap.has_key(node["kind"]): self.seMap[node["kind"]] = {} logging.info("Adding mapping %s -> %s" % (node["se"], node["name"])) self.seMap[node["kind"]][node["se"]] = node["name"] self.nodeNames.append(node["name"]) return def algorithm(self, parameters): """ _algorithm_ Poll the database for datasets and subscribe them to MSS. """ myThread = threading.currentThread() myThread.transaction.begin() if not self.seMap.has_key("MSS"): return unsubscribedDatasets = self.getUnsubscribed.execute(conn = myThread.transaction.conn, transaction = True) datasetMap = {} for unsubscribedDataset in unsubscribedDatasets: datasetPath = unsubscribedDataset["path"] seName = unsubscribedDataset["se_name"] if not self.seMap["MSS"].has_key(seName): msg = "No MSS node for SE: %s" % seName logging.error(msg) self.sendAlert(7, msg = msg) continue if not datasetMap.has_key(self.seMap["MSS"][seName]): datasetMap[self.seMap["MSS"][seName]] = [] datasetMap[self.seMap["MSS"][seName]].append(datasetPath) self.markSubscribed.execute(datasetPath, conn = myThread.transaction.conn, transaction = True) for siteMSS in datasetMap.keys(): logging.info("Subscribing %s to %s" % (datasetMap[siteMSS], siteMSS)) newSubscription = PhEDExSubscription(datasetMap[siteMSS], siteMSS, self.group, custodial = "y", requestOnly = "y") xmlData = XMLDrop.makePhEDExXMLForDatasets(self.dbsUrl, newSubscription.getDatasetPaths()) print xmlData self.phedex.subscribe(newSubscription, xmlData) myThread.transaction.commit() return
class PhEDExInjectorSubscriber(BaseWorkerThread): """ _PhEDExInjectorSubscriber_ Poll the DBSBuffer database and subscribe datasets as they are created. """ def __init__(self, config): """ ___init___ Initialise class members """ BaseWorkerThread.__init__(self) self.phedex = PhEDEx({"endpoint": config.PhEDExInjector.phedexurl}, "json") self.siteDB = SiteDBJSON() self.dbsUrl = config.DBSInterface.globalDBSUrl self.group = getattr(config.PhEDExInjector, "group", "DataOps") self.safeMode = getattr(config.PhEDExInjector, "safeOperationMode", False) self.replicaOnly = getattr(config.PhEDExInjector, "replicaOnly", False) # Subscribed state in the DBSBuffer table for datasets self.terminalSubscriptionState = 1 if self.safeMode: self.terminalSubscriptionState = 2 # We will map node names to CMS names, that what the spec will have. # If a CMS name is associated to many PhEDEx node then choose the MSS option self.cmsToPhedexMap = {} # initialize the alert framework (if available - config.Alert present) # self.sendAlert will be then be available self.initAlerts(compName="PhEDExInjector") def setup(self, parameters): """ _setup_ Create a DAO Factory for the PhEDExInjector. Also load the SE names to PhEDEx node name mappings from the data service. """ myThread = threading.currentThread() daofactory = DAOFactory(package="WMComponent.PhEDExInjector.Database", logger=self.logger, dbinterface=myThread.dbi) self.getUnsubscribed = daofactory(classname="GetUnsubscribedDatasets") self.markSubscribed = daofactory(classname="MarkDatasetSubscribed") self.getPartiallySubscribed = daofactory( classname="GetPartiallySubscribedDatasets") nodeMappings = self.phedex.getNodeMap() for node in nodeMappings["phedex"]["node"]: cmsName = self.siteDB.phEDExNodetocmsName(node["name"]) if cmsName not in self.cmsToPhedexMap: self.cmsToPhedexMap[cmsName] = {} logging.info("Loaded PhEDEx node %s for site %s" % (node["name"], cmsName)) if node["kind"] not in self.cmsToPhedexMap[cmsName]: self.cmsToPhedexMap[cmsName][node["kind"]] = node["name"] return def algorithm(self, parameters): """ _algorithm_ Poll the database for datasets and subscribe them. """ myThread = threading.currentThread() myThread.transaction.begin() # Check for completely unsubscribed datasets unsubscribedDatasets = self.getUnsubscribed.execute( conn=myThread.transaction.conn, transaction=True) if self.safeMode: partiallySubscribedDatasets = self.getPartiallySubscribed.execute( conn=myThread.transaction.conn, transaction=True) unsubscribedDatasets.extend(partiallySubscribedDatasets) partiallySubscribedSet = set() for entry in partiallySubscribedDatasets: partiallySubscribedSet.add(entry["path"]) # Map the datasets to their specs specDatasetMap = {} for unsubscribedDataset in unsubscribedDatasets: datasetPath = unsubscribedDataset["path"] workflow = unsubscribedDataset["workflow"] spec = unsubscribedDataset["spec"] if datasetPath not in specDatasetMap: specDatasetMap[datasetPath] = [] specDatasetMap[datasetPath].append({ "workflow": workflow, "spec": spec }) specCache = {} siteMap = {} # Distribute the subscriptions by site, type and priority # This is to make as few subscriptions as possible # Site map values are dictionaries where the keys are tuples (Prio, Custodial, AutoApprove, Move) # Where Custodial is boolean, Prio is in ["Low", "Normal", "High"], AutoApprove is boolean and Move is boolean for dataset in specDatasetMap: # Aggregate all the different subscription configurations subInfo = {} for entry in specDatasetMap[dataset]: if not entry["spec"]: # Can't use this spec, there isn't one continue # Load spec if not in the cache if entry["spec"] not in specCache: helper = WMWorkloadHelper() try: helper.load(entry["spec"]) specCache[entry["spec"]] = helper except Exception: #Couldn't load it , alert and carry on msg = "Couldn't load spec: %s" % entry["spec"] logging.error(msg) self.sendAlert(7, msg=msg) continue #If we are running in safe mode, we need to know if the workflow is ready # We have the spec, get the info helper = specCache[entry["spec"]] workflowSubInfo = helper.getSubscriptionInformation() datasetSubInfo = workflowSubInfo.get(dataset, None) if datasetSubInfo and subInfo: subInfo["CustodialSites"] = extendWithoutDups( subInfo["CustodialSites"], datasetSubInfo["CustodialSites"]) subInfo["NonCustodialSites"] = extendWithoutDups( subInfo["NonCustodialSites"], datasetSubInfo["NonCustodialSites"]) subInfo["AutoApproveSites"] = extendWithoutDups( subInfo["AutoApproveSites"], datasetSubInfo["AutoApproveSites"]) subInfo["Priority"] = solvePrioConflicts( subInfo["Priority"], datasetSubInfo["Priority"]) elif datasetSubInfo: subInfo = datasetSubInfo # We now have aggregated subscription information for this dataset in subInfo # Distribute it by site if not subInfo: #Nothing to do, log and continue msg = "No subscriptions configured for dataset %s" % dataset logging.warning(msg) self.markSubscribed.execute( dataset, subscribed=self.terminalSubscriptionState, conn=myThread.transaction.conn, transaction=True) continue # Make sure that a site is not configured both as non custodial and custodial # Non-custodial is believed to be the right choice subInfo["CustodialSites"] = list( set(subInfo["CustodialSites"]) - set(subInfo["NonCustodialSites"])) for site in subInfo["CustodialSites"]: if site not in siteMap: siteMap[site] = {} autoApprove = False if site in subInfo["AutoApproveSites"]: autoApprove = True if self.safeMode and dataset not in partiallySubscribedSet: tupleKey = (subInfo["Priority"], True, autoApprove, False) else: tupleKey = (subInfo["Priority"], True, autoApprove, True) if tupleKey not in siteMap[site]: siteMap[site][tupleKey] = [] # Subscriptions are sorted by options, defined by tupleKey # The tuple key has 3 or 4 entries in this order # Priority, Custodial, Auto approve, Move (True) or Replica (False) siteMap[site][tupleKey].append(dataset) # If we are in safe mode and this is a partially subscribed dataset, # then the non-custodial were done in a previous cycle if self.safeMode and dataset in partiallySubscribedSet: self.markSubscribed.execute( dataset, subscribed=self.terminalSubscriptionState, conn=myThread.transaction.conn, transaction=True) continue for site in subInfo["NonCustodialSites"]: if site not in siteMap: siteMap[site] = {} autoApprove = False if site in subInfo["AutoApproveSites"]: autoApprove = True # Non-custodial is never move, so this tuple has only 3 entries # TODO: Change tuples to frozensets for clarity tupleKey = (subInfo["Priority"], False, autoApprove) if tupleKey not in siteMap[site]: siteMap[site][tupleKey] = [] siteMap[site][tupleKey].append(dataset) self.markSubscribed.execute(dataset, subscribed=1, conn=myThread.transaction.conn, transaction=True) # Actually request the subscriptions for site in siteMap: # Check that the site is valid if site not in self.cmsToPhedexMap: msg = "Site %s doesn't appear to be valid to PhEDEx" % site logging.error(msg) self.sendAlert(7, msg=msg) continue for subscriptionFlavor in siteMap[site]: datasets = siteMap[site][subscriptionFlavor] # Check that the site is valid isMSS = False if "MSS" in self.cmsToPhedexMap[site]: isMSS = True phedexNode = self.cmsToPhedexMap[site]["MSS"] else: phedexNode = self.cmsToPhedexMap[site]["Disk"] logging.info("Subscribing %s to %s" % (datasets, site)) options = { "custodial": "n", "requestOnly": "y", "priority": subscriptionFlavor[0].lower(), "move": "n" } if subscriptionFlavor[1] and isMSS: # Custodial subscriptions are only allowed in MSS nodes # If custodial is requested on Non-MSS it fallsback to a non-custodial subscription options["custodial"] = "y" if subscriptionFlavor[3] and not self.replicaOnly: options["move"] = "y" if subscriptionFlavor[2]: options["requestOnly"] = "n" logging.info( "Request options: Custodial - %s, Move - %s, Request Only - %s" % (options["custodial"].upper(), options["move"].upper(), options["requestOnly"].upper())) newSubscription = PhEDExSubscription(datasets, phedexNode, self.group, **options) xmlData = XMLDrop.makePhEDExXMLForDatasets( self.dbsUrl, newSubscription.getDatasetPaths()) logging.debug(str(xmlData)) self.phedex.subscribe(newSubscription, xmlData) myThread.transaction.commit() return
class PhEDExInjectorSubscriber(BaseWorkerThread): """ _PhEDExInjectorSubscriber_ Poll the DBSBuffer database and subscribe datasets as they are created. """ def __init__(self, config): """ ___init___ Initialise class members """ BaseWorkerThread.__init__(self) self.phedex = PhEDEx({"endpoint": config.PhEDExInjector.phedexurl}, "json") self.siteDB = SiteDBJSON() self.dbsUrl = config.DBSInterface.globalDBSUrl self.group = getattr(config.PhEDExInjector, "group", "DataOps") # We will map node names to CMS names, that what the spec will have. # If a CMS name is associated to many PhEDEx node then choose the MSS option self.cmsToPhedexMap = {} self.phedexNodes = {'MSS':[], 'Disk':[]} # initialize the alert framework (if available - config.Alert present) # self.sendAlert will be then be available self.initAlerts(compName = "PhEDExInjector") def setup(self, parameters): """ _setup_ Create a DAO Factory for the PhEDExInjector. Also load the SE names to PhEDEx node name mappings from the data service. """ myThread = threading.currentThread() daofactory = DAOFactory(package = "WMComponent.PhEDExInjector.Database", logger = self.logger, dbinterface = myThread.dbi) self.getUnsubscribed = daofactory(classname = "GetUnsubscribedDatasets") self.markSubscribed = daofactory(classname = "MarkDatasetSubscribed") nodeMappings = self.phedex.getNodeMap() for node in nodeMappings["phedex"]["node"]: cmsName = self.siteDB.phEDExNodetocmsName(node["name"]) if cmsName not in self.cmsToPhedexMap: self.cmsToPhedexMap[cmsName] = {} logging.info("Loaded PhEDEx node %s for site %s" % (node["name"], cmsName)) if node["kind"] not in self.cmsToPhedexMap[cmsName]: self.cmsToPhedexMap[cmsName][node["kind"]] = node["name"] if node["kind"] in [ "MSS", "Disk" ]: self.phedexNodes[node["kind"]].append(node["name"]) return def algorithm(self, parameters): """ _algorithm_ Run the subscription algorithm as configured """ self.subscribeDatasets() return def subscribeDatasets(self): """ _subscribeDatasets_ Poll the database for datasets and subscribe them. """ myThread = threading.currentThread() myThread.transaction.begin() # Check for completely unsubscribed datasets unsubscribedDatasets = self.getUnsubscribed.execute(conn = myThread.transaction.conn, transaction = True) # Keep a list of subscriptions to tick as subscribed in the database subscriptionsMade = [] # Create a list of subscriptions as defined by the PhEDEx data structures subs = SubscriptionList() # Create the subscription objects and add them to the list # The list takes care of the sorting internally for subInfo in unsubscribedDatasets: site = subInfo['site'] if site not in self.phedexNodes['MSS'] and site not in self.phedexNodes['Disk']: if site not in self.cmsToPhedexMap: msg = "Site %s doesn't appear to be valid to PhEDEx, " % site msg += "skipping subscription: %s" % subInfo['id'] logging.error(msg) self.sendAlert(7, msg = msg) continue # Get the phedex node from CMS site site = self.cmsToPhedexMap[site].get("MSS") or self.cmsToPhedexMap[site]["Disk"] # Avoid custodial subscriptions to disk nodes if site not in self.phedexNodes['MSS']: subInfo['custodial'] = 'n' # Avoid auto approval in T1 sites elif site.startswith("T1"): subInfo['request_only'] = 'y' phedexSub = PhEDExSubscription(subInfo['path'], site, self.group, priority = subInfo['priority'], move = subInfo['move'], custodial = subInfo['custodial'], request_only = subInfo['request_only'], subscriptionId = subInfo['id']) # Check if the subscription is a duplicate if phedexSub.matchesExistingSubscription(self.phedex) or \ phedexSub.matchesExistingTransferRequest(self.phedex): subscriptionsMade.append(subInfo['id']) continue # Add it to the list subs.addSubscription(phedexSub) # Compact the subscriptions subs.compact() for subscription in subs.getSubscriptionList(): try: xmlData = XMLDrop.makePhEDExXMLForDatasets(self.dbsUrl, subscription.getDatasetPaths()) logging.debug(str(xmlData)) msg = "Subscribing: %s to %s, with options: " % (subscription.getDatasetPaths(), subscription.getNodes()) msg += "Move: %s, Custodial: %s, Request Only: %s" % (subscription.move, subscription.custodial, subscription.request_only) logging.info(msg) self.phedex.subscribe(subscription, xmlData) except Exception as ex: logging.error("Something went wrong when communicating with PhEDEx, will try again later.") logging.error("Exception: %s" % str(ex)) else: subscriptionsMade.extend(subscription.getSubscriptionIds()) # Register the result in DBSBuffer if subscriptionsMade: self.markSubscribed.execute(subscriptionsMade, conn = myThread.transaction.conn, transaction = True) myThread.transaction.commit() return