def testSubscription(self): """ _testSubscription_ Verify that the subscription API works. """ datasetA = "/%s/WMCorePhEDExTest/RAW" % makeUUID() datasetB = "/%s/WMCorePhEDExTest/RECO" % makeUUID() xmlData = XMLDrop.makePhEDExDrop(self.dbsTestUrl, datasetA) self.phedexApi.injectBlocks("T1_US_FNAL_MSS", xmlData) xmlData = XMLDrop.makePhEDExDrop(self.dbsTestUrl, datasetB) self.phedexApi.injectBlocks("T1_US_FNAL_MSS", xmlData) testSub = PhEDExSubscription([datasetA, datasetB], "T1_UK_RAL_MSS", "Saturn") xmlData = XMLDrop.makePhEDExXMLForDatasets(self.dbsTestUrl, testSub.getDatasetPaths()) result = self.phedexApi.subscribe(testSub, xmlData) requestIDs = result["phedex"]["request_created"] self.assertEqual(len(requestIDs), 1, "Error: Wrong number of request IDs") self.assertTrue("id" in requestIDs[0], "Error: Missing request ID") return
def subscribe(self, subscription): """ _subscribe_ xmlData = XMLDrop.makePhEDExXMLForDatasets(dbsUrl, subscription.getDatasetPaths()) Subscription is PhEDEX subscription structure """ callname = 'subscribe' args = {} args['node'] = [] for node in subscription.nodes: args['node'].append(node) args['comments'] = subscription.comments args['level'] = subscription.level args['priority'] = subscription.priority args['move'] = subscription.move args['static'] = subscription.static args['custodial'] = subscription.custodial args['group'] = subscription.group args['request_only'] = subscription.request_only if args['level'] == 'dataset': xmlData = XMLDrop.makePhEDExXMLForDatasets( self.dbsUrl, list(subscription.datasetPaths)) else: # block xmlData = XMLDrop.makePhEDExXMLForBlocks( self.dbsUrl, subscription.getDatasetsAndBlocks()) args['data'] = xmlData return self._getResult(callname, args=args, verb="POST")
def testSubscription(self): """ _testSubscription_ Verify that the subscription API works. """ datasetA = "/%s/WMCorePhEDExTest/RAW" % makeUUID() datasetB = "/%s/WMCorePhEDExTest/RECO" % makeUUID() xmlData = XMLDrop.makePhEDExDrop(self.dbsTestUrl, datasetA) self.phedexApi.injectBlocks("T1_US_FNAL_MSS", xmlData) xmlData = XMLDrop.makePhEDExDrop(self.dbsTestUrl, datasetB) self.phedexApi.injectBlocks("T1_US_FNAL_MSS", xmlData) testSub = PhEDExSubscription([datasetA, datasetB], "T1_UK_RAL_MSS", "Saturn") xmlData = XMLDrop.makePhEDExXMLForDatasets(self.dbsTestUrl, testSub.getDatasetPaths()) result = self.phedexApi.subscribe(testSub, xmlData) requestIDs = result["phedex"]["request_created"] self.assertEqual(len(requestIDs), 1, "Error: Wrong number of request IDs") self.assertTrue(requestIDs[0].has_key("id"), "Error: Missing request ID") return
def testA_XMLDrop(self): """ _XMLDrop_ Um...test that it does what it does? """ datasetPath = "/Cosmics/CRUZET09-PromptReco-v1/RECO" fileBlockName = "/Cosmics/CRUZET09-PromptReco-v1/RAW#1" spec = XMLDrop.XMLInjectionSpec(self.dbsURL) datasetSpec = spec.getDataset(datasetPath) fileBlock = datasetSpec.getFileblock(fileBlockName) fileBlock.addFile("lfn", {'adler32': '201', 'cksum': '101'}, '100') output = spec.save() self.assertTrue(re.search('<data version="2">', output) > 0) self.assertTrue( re.search('<dbs dls="dbs" name="%s">' % self.dbsURL, output) > 0) self.assertTrue( re.search( '<dataset is-open="y" is-transient="n" name="%s">' % datasetPath, output) > 0) self.assertTrue( re.search('<block is-open="y" name="%s">' % fileBlockName, output) > 0) self.assertTrue( re.search( '<file bytes="100" checksum="adler32:201,cksum:101" name="lfn"/>', output) > 0) self.assertTrue(re.search('</block>', output) > 0) self.assertTrue(re.search('</dataset>', output) > 0) return
def subscribe(self, subscription): """ _subscribe_ xmlData = XMLDrop.makePhEDExXMLForDatasets(dbsUrl, subscription.getDatasetPaths()) Subscription is PhEDEX subscription structure """ callname = 'subscribe' args = {} args['node'] = [] for node in subscription.nodes: args['node'].append(node) xmlData = XMLDrop.makePhEDExXMLForDatasets(self.dbsUrl, list(subscription.datasetPaths)) args['data'] = xmlData args['level'] = subscription.level args['priority'] = subscription.priority args['move'] = subscription.move args['static'] = subscription.static args['custodial'] = subscription.custodial args['group'] = subscription.group args['request_only'] = subscription.request_only return self._getResult(callname, args=args, verb="POST")
def deleteBlocksPhEDExCalls(self, location, blocksToDelete): """ _deleteBlocksPhEDExCalls_ actual PhEDEx calls for block deletion """ deletion = PhEDExDeletion(blocksToDelete.keys(), location, level = 'block', comments = "WMAgent blocks auto-delete from %s" % location, blocks = blocksToDelete) try: xmlData = XMLDrop.makePhEDExXMLForBlocks(self.dbsUrl, deletion.getDatasetsAndBlocks()) logging.debug(str(xmlData)) response = self.phedex.delete(deletion, xmlData) requestId = response['phedex']['request_created'][0]['id'] # auto-approve deletion request self.phedex.updateRequest(requestId, 'approve', location) binds = [] for dataset in blocksToDelete: for blockName in blocksToDelete[dataset]: binds.append( { 'DELETED' : 1, 'BLOCKNAME' : blockName } ) self.markBlocksDeleted.execute(binds, transaction = False) except Exception as ex: logging.error("Something went wrong when communicating with PhEDEx, will try again later.") logging.error("Exception: %s", str(ex)) return
def createInjectionSpec(self, injectionData): """ _createInjectionSpec_ Transform the data structure returned from the database into an XML string for the PhEDEx Data Service. The injectionData parameter must be a dictionary keyed by dataset path. Each dataset path will map to a list of blocks, each block being a dict. The block dicts will have three keys: name, is-open and files. The files key will be a list of dicts, each of which have the following keys: lfn, size and checksum. The following is an example object: {"dataset1": {"block1": {"is-open": "y", "files": [{"lfn": "lfn1", "size": 10, "checksum": {"cksum": "1234"}}, {"lfn": "lfn2", "size": 20, "checksum": {"cksum": "4321"}}]}}} """ injectionSpec = XMLDrop.XMLInjectionSpec(self.dbsUrl) for datasetPath in injectionData: datasetSpec = injectionSpec.getDataset(datasetPath) for fileBlockName, fileBlock in injectionData[ datasetPath].iteritems(): blockSpec = datasetSpec.getFileblock(fileBlockName, fileBlock["is-open"]) for f in fileBlock["files"]: blockSpec.addFile(f["lfn"], f["checksum"], f["size"]) return injectionSpec.save()
def phedex(): phedexIn = PhEDEx(dict = {'endpoint' : 'https://cmsweb.cern.ch/phedex/datasvc/json/dev/', 'logger' : logging}, responseType = "json") # requests = phedex.getRequestList(dataset = ['/TauParked/Run2012C-LogError-22Jan2013-v1/RAW-RECO'], # node = 'T2_RU_ITEP')['phedex']['request'] # for request in requests: # requestId = request['id'] # request = phedex.getTransferRequests(request = requestId)['phedex']['request'] # if request: # request = request[0] # print request # x = PhEDExSubscription('/TauParked/Run2012C-22Jan2013-v1/AOD', # 'T1_US_FNAL_MSS', 'DataOps', 'dataset', 'low', 'n', 'n', 'n', 'y', subscriptionId = 1) # print x.matchesExistingTransferRequest(phedex) # print x.matchesExistingSubscription(phedex) deletion = PhEDExDeletion('/071103be-7d80-11e0-90de-00163e010039/PromptReco-v1/RECO', 'T1_CH_CERN_Buffer', level = 'block', comments = 'Blocks automatically deleted from T2_CH_CERN as it has already been processed and transferred to a custodial location', blocks = {'/071103be-7d80-11e0-90de-00163e010039/PromptReco-v1/RECO' : ['/071103be-7d80-11e0-90de-00163e010039/PromptReco-v1/RECO#075ea9e8-7d80-11e0-90de-00163e010039']}) xmlData = XMLDrop.makePhEDExXMLForBlocks('http://vocms09.cern.ch:8880/cms_dbs_int_local_yy_writer/servlet/DBSServlet', deletion.getDatasetsAndBlocks()) print str(xmlData) response = phedexIn.delete(deletion, xmlData) print response requestId = response['phedex']['request_created'][0]['id'] phedexIn.updateRequest(requestId, 'approve', 'T1_CH_CERN_Buffer')
def deleteBlocksPhEDExCalls(self, location, blocksToDelete): """ _deleteBlocksPhEDExCalls_ actual PhEDEx calls for block deletion """ deletion = PhEDExDeletion(blocksToDelete.keys(), location, level = 'block', comments = "WMAgent blocks auto-delete from %s" % location, blocks = blocksToDelete) xmlData = XMLDrop.makePhEDExXMLForBlocks(self.dbsUrl, deletion.getDatasetsAndBlocks()) logging.debug("deleteBlocks XMLData: %s", xmlData) try: response = self.phedex.delete(deletion, xmlData) requestId = response['phedex']['request_created'][0]['id'] # auto-approve deletion request self.phedex.updateRequest(requestId, 'approve', location) except HTTPException as ex: logging.error("PhEDEx block delete/approval failed with HTTPException: %s %s", ex.status, ex.result) except Exception as ex: logging.error("PhEDEx block delete/approval failed with Exception: %s", str(ex)) logging.debug("Traceback: %s", str(traceback.format_exc())) else: binds = [] for dataset in blocksToDelete: for blockName in blocksToDelete[dataset]: binds.append( { 'DELETED': 1, 'BLOCKNAME': blockName } ) self.markBlocksDeleted.execute(binds) return
def deleteBlocksPhEDExCalls(self, location, blocksToDelete): """ _deleteBlocksPhEDExCalls_ actual PhEDEx calls for block deletion """ deletion = PhEDExDeletion(blocksToDelete.keys(), location, level='block', comments="WMAgent blocks auto-delete from %s" % location, blocks=blocksToDelete) xmlData = XMLDrop.makePhEDExXMLForBlocks(self.dbsUrl, deletion.getDatasetsAndBlocks()) logging.debug("deleteBlocks XMLData: %s", xmlData) try: response = self.phedex.delete(deletion, xmlData) requestId = response['phedex']['request_created'][0]['id'] # auto-approve deletion request self.phedex.updateRequest(requestId, 'approve', location) except HTTPException as ex: logging.error("PhEDEx block delete/approval failed with HTTPException: %s %s", ex.status, ex.result) except Exception as ex: logging.error("PhEDEx block delete/approval failed with Exception: %s", str(ex)) logging.debug("Traceback: %s", str(traceback.format_exc())) else: binds = [] for dataset in blocksToDelete: for blockName in blocksToDelete[dataset]: binds.append({'DELETED': 1, 'BLOCKNAME': blockName}) self.markBlocksDeleted.execute(binds) return
def testGetSubscriptionMapping(self): """ _testGetSubscriptionMapping_ Verify that the subscription mapping API works correctly. """ testDataset = "/%s/WMCorePhEDExTest/RECO" % makeUUID() blockA = "%s#%s" % (testDataset, makeUUID()) blockB = "%s#%s" % (testDataset, makeUUID()) injectionSpec = XMLDrop.XMLInjectionSpec(self.dbsTestUrl) datasetSpec = injectionSpec.getDataset(testDataset) datasetSpec.getFileblock(blockA, 'y') datasetSpec.getFileblock(blockB, 'y') blockSpec = injectionSpec.save() self.phedexApi.injectBlocks("T1_US_FNAL_MSS", blockSpec) # Create a dataset level subscription to a node testDatasetSub = PhEDExSubscription([testDataset], "T1_UK_RAL_MSS", "Saturn", requestOnly="n") datasetSpec = XMLDrop.makePhEDExXMLForDatasets( self.dbsTestUrl, testDatasetSub.getDatasetPaths()) self.phedexApi.subscribe(testDatasetSub, datasetSpec) # Create a block level subscrtion to a different node testBlockSub = PhEDExSubscription([testDataset], "T1_DE_KIT_MSS", "Saturn", level="block", requestOnly="n") self.phedexApi.subscribe(testBlockSub, blockSpec) subs = self.phedexApi.getSubscriptionMapping(testDataset) self.assertEqual(subs[testDataset], set(["T1_UK_RAL_MSS"]), "Error: Dataset subscription is wrong.") subs = self.phedexApi.getSubscriptionMapping(blockA) self.assertEqual( len(subs[blockA]), 2, "Error: Wrong number of nodes in block subscription.") self.assertTrue("T1_UK_RAL_MSS" in subs[blockA], "Error: RAL missing from block sub.") self.assertTrue("T1_DE_KIT_MSS" in subs[blockA], "Error: KIT missing from block sub.") return
def subscribeTier0Blocks(self): """ _subscribeTier0Blocks_ Subscribe blocks to the Tier-0 where a replica subscription already exists. All Tier-0 subscriptions are move, custodial and autoapproved with high priority. """ myThread = threading.currentThread() myThread.transaction.begin() # Check for candidate blocks for subscription blocksToSubscribe = self.getUnsubscribedBlocks.execute(node = 'T0_CH_CERN', conn = myThread.transaction.conn, transaction = True) if not blocksToSubscribe: return # For the blocks we don't really care about the subscription options # We are subscribing all blocks with the same recipe. subscriptionMap = {} for subInfo in blocksToSubscribe: dataset = subInfo['path'] if dataset not in subscriptionMap: subscriptionMap[dataset] = [] subscriptionMap[dataset].append(subInfo['blockname']) site = 'T0_CH_CERN' custodial = 'y' request_only = 'n' move = 'y' priority = 'High' # Get the phedex node phedexNode = self.cmsToPhedexMap[site]["MSS"] logging.error("Subscribing %d blocks, from %d datasets to the Tier-0" % (len(subscriptionMap), sum([len(x) for x in subscriptionMap.values()]))) newSubscription = PhEDExSubscription(subscriptionMap.keys(), phedexNode, self.group, custodial = custodial, request_only = request_only, move = move, priority = priority, level = 'block', blocks = subscriptionMap) # TODO: Check for blocks already subscribed try: xmlData = XMLDrop.makePhEDExXMLForBlocks(self.dbsUrl, newSubscription.getDatasetsAndBlocks()) logging.debug(str(xmlData)) self.phedex.subscribe(newSubscription, xmlData) except Exception, ex: logging.error("Something went wrong when communicating with PhEDEx, will try again later.") logging.error("Exception: %s" % str(ex))
def testInjection(self): """ _testInjection_ Verify that we can inject data into PhEDEx. """ xmlData = XMLDrop.makePhEDExDrop(self.dbsTestUrl, makeUUID()) result = self.phedexApi.injectBlocks("T1_US_FNAL_MSS", xmlData) self.assertEqual(result["phedex"]["injected"], {"stats": {"closed_datasets": 0, "closed_blocks": 0, "new_blocks": 0, "new_datasets": 1, "new_files": 0}}) return
def algorithm(self, parameters): """ _algorithm_ Poll the database for datasets and subscribe them to MSS. """ myThread = threading.currentThread() myThread.transaction.begin() if not self.seMap.has_key("MSS"): return unsubscribedDatasets = self.getUnsubscribed.execute(conn = myThread.transaction.conn, transaction = True) datasetMap = {} for unsubscribedDataset in unsubscribedDatasets: datasetPath = unsubscribedDataset["path"] seName = unsubscribedDataset["se_name"] if not self.seMap["MSS"].has_key(seName): msg = "No MSS node for SE: %s" % seName logging.error(msg) self.sendAlert(7, msg = msg) continue if not datasetMap.has_key(self.seMap["MSS"][seName]): datasetMap[self.seMap["MSS"][seName]] = [] datasetMap[self.seMap["MSS"][seName]].append(datasetPath) self.markSubscribed.execute(datasetPath, conn = myThread.transaction.conn, transaction = True) for siteMSS in datasetMap.keys(): logging.info("Subscribing %s to %s" % (datasetMap[siteMSS], siteMSS)) newSubscription = PhEDExSubscription(datasetMap[siteMSS], siteMSS, self.group, custodial = "y", requestOnly = "y") xmlData = XMLDrop.makePhEDExXMLForDatasets(self.dbsUrl, newSubscription.getDatasetPaths()) print xmlData self.phedex.subscribe(newSubscription, xmlData) myThread.transaction.commit() return
def testInjection(self): """ _testInjection_ Verify that we can inject data into PhEDEx. """ xmlData = XMLDrop.makePhEDExDrop(self.dbsTestUrl, makeUUID()) result = self.phedexApi.injectBlocks("T1_US_FNAL_MSS", xmlData) self.assertEqual( result["phedex"]["injected"], { "stats": { "closed_datasets": 0, "closed_blocks": 0, "new_blocks": 0, "new_datasets": 1, "new_files": 0 } }) return
def delete(self, deletion): """ _delete_ Deletion is a PhEDEX deletion structure """ callname = 'delete' args = {} args['node'] = [] for node in deletion.nodes: args['node'].append(node) xmlData = XMLDrop.makePhEDExXMLForBlocks(self.dbsUrl, deletion.getDatasetsAndBlocks()) args['data'] = xmlData args['level'] = deletion.level args['rm_subscriptions'] = deletion.subscriptions args['comments'] = deletion.comments return self._getResult(callname, args=args, verb="POST")
def testGetSubscriptionMapping(self): """ _testGetSubscriptionMapping_ Verify that the subscription mapping API works correctly. """ testDataset = "/%s/WMCorePhEDExTest/RECO" % makeUUID() blockA = "%s#%s" % (testDataset, makeUUID()) blockB = "%s#%s" % (testDataset, makeUUID()) injectionSpec = XMLDrop.XMLInjectionSpec(self.dbsTestUrl) datasetSpec = injectionSpec.getDataset(testDataset) datasetSpec.getFileblock(blockA, 'y') datasetSpec.getFileblock(blockB, 'y') blockSpec = injectionSpec.save() self.phedexApi.injectBlocks("T1_US_FNAL_MSS", blockSpec) # Create a dataset level subscription to a node testDatasetSub = PhEDExSubscription([testDataset], "T1_UK_RAL_MSS", "Saturn", requestOnly = "n") datasetSpec = XMLDrop.makePhEDExXMLForDatasets(self.dbsTestUrl, testDatasetSub.getDatasetPaths()) self.phedexApi.subscribe(testDatasetSub, datasetSpec) # Create a block level subscrtion to a different node testBlockSub = PhEDExSubscription([testDataset], "T1_DE_KIT_MSS", "Saturn", level = "block", requestOnly = "n") self.phedexApi.subscribe(testBlockSub, blockSpec) subs = self.phedexApi.getSubscriptionMapping(testDataset) self.assertEqual(subs[testDataset], set(["T1_UK_RAL_MSS"]), "Error: Dataset subscription is wrong.") subs = self.phedexApi.getSubscriptionMapping(blockA) self.assertEqual(len(subs[blockA]), 2, "Error: Wrong number of nodes in block subscription.") self.assertTrue("T1_UK_RAL_MSS" in subs[blockA], "Error: RAL missing from block sub.") self.assertTrue("T1_DE_KIT_MSS" in subs[blockA], "Error: KIT missing from block sub.") return
def deleteBlocksPhEDExCalls(self, location, blocksToDelete): """ _deleteBlocksPhEDExCalls_ actual PhEDEx calls for block deletion """ deletion = PhEDExDeletion( blocksToDelete.keys(), location, level='block', comments="WMAgent blocks auto-delete from %s" % location, blocks=blocksToDelete) try: xmlData = XMLDrop.makePhEDExXMLForBlocks( self.dbsUrl, deletion.getDatasetsAndBlocks()) logging.debug(str(xmlData)) response = self.phedex.delete(deletion, xmlData) requestId = response['phedex']['request_created'][0]['id'] # auto-approve deletion request self.phedex.updateRequest(requestId, 'approve', location) binds = [] for dataset in blocksToDelete: for blockName in blocksToDelete[dataset]: binds.append({'DELETED': 1, 'BLOCKNAME': blockName}) self.markBlocksDeleted.execute(binds, transaction=False) except Exception as ex: logging.error( "Something went wrong when communicating with PhEDEx, will try again later." ) logging.error("Exception: %s", str(ex)) return
def algorithm(self, parameters): """ _algorithm_ Poll the database for datasets and subscribe them. """ myThread = threading.currentThread() myThread.transaction.begin() # Check for completely unsubscribed datasets unsubscribedDatasets = self.getUnsubscribed.execute(conn = myThread.transaction.conn, transaction = True) if self.safeMode: partiallySubscribedDatasets = self.getPartiallySubscribed.execute(conn = myThread.transaction.conn, transaction = True) unsubscribedDatasets.extend(partiallySubscribedDatasets) partiallySubscribedSet = set() for entry in partiallySubscribedDatasets: partiallySubscribedSet.add(entry["path"]) # Map the datasets to their specs specDatasetMap = {} for unsubscribedDataset in unsubscribedDatasets: datasetPath = unsubscribedDataset["path"] workflow = unsubscribedDataset["workflow"] spec = unsubscribedDataset["spec"] if datasetPath not in specDatasetMap: specDatasetMap[datasetPath] = [] specDatasetMap[datasetPath].append({"workflow" : workflow, "spec" : spec}) specCache = {} siteMap = {} # Distribute the subscriptions by site, type and priority # This is to make as few subscriptions as possible # Site map values are dictionaries where the keys are tuples (Prio, Custodial, AutoApprove, Move) # Where Custodial is boolean, Prio is in ["Low", "Normal", "High"], AutoApprove is boolean and Move is boolean for dataset in specDatasetMap: # Aggregate all the different subscription configurations subInfo = {} for entry in specDatasetMap[dataset]: if not entry["spec"]: # Can't use this spec, there isn't one continue # Load spec if not in the cache if entry["spec"] not in specCache: helper = WMWorkloadHelper() try: helper.load(entry["spec"]) specCache[entry["spec"]] = helper except Exception: #Couldn't load it , alert and carry on msg = "Couldn't load spec: %s" % entry["spec"] logging.error(msg) self.sendAlert(7, msg = msg) continue #If we are running in safe mode, we need to know if the workflow is ready # We have the spec, get the info helper = specCache[entry["spec"]] workflowSubInfo = helper.getSubscriptionInformation() datasetSubInfo = workflowSubInfo.get(dataset, None) if datasetSubInfo and subInfo: subInfo["CustodialSites"] = extendWithoutDups(subInfo["CustodialSites"], datasetSubInfo["CustodialSites"]) subInfo["NonCustodialSites"] = extendWithoutDups(subInfo["NonCustodialSites"], datasetSubInfo["NonCustodialSites"]) subInfo["AutoApproveSites"] = extendWithoutDups(subInfo["AutoApproveSites"], datasetSubInfo["AutoApproveSites"]) subInfo["Priority"] = solvePrioConflicts(subInfo["Priority"], datasetSubInfo["Priority"]) elif datasetSubInfo: subInfo = datasetSubInfo # We now have aggregated subscription information for this dataset in subInfo # Distribute it by site if not subInfo: #Nothing to do, log and continue msg = "No subscriptions configured for dataset %s" % dataset logging.warning(msg) self.markSubscribed.execute(dataset, subscribed = self.terminalSubscriptionState, conn = myThread.transaction.conn, transaction = True) continue # Make sure that a site is not configured both as non custodial and custodial # Non-custodial is believed to be the right choice subInfo["CustodialSites"] = list(set(subInfo["CustodialSites"]) - set(subInfo["NonCustodialSites"])) for site in subInfo["CustodialSites"]: if site not in siteMap: siteMap[site] = {} if self.safeMode and dataset not in partiallySubscribedSet: tupleKey = (subInfo["Priority"], True, False, False) else: tupleKey = (subInfo["Priority"], True, False, True) if tupleKey not in siteMap[site]: siteMap[site][tupleKey] = [] siteMap[site][tupleKey].append(dataset) # If we are in safe mode and this is a partially subscribed dataset, # then the non-custodial were done in a previous cycle if self.safeMode and dataset in partiallySubscribedSet: self.markSubscribed.execute(dataset, subscribed = self.terminalSubscriptionState, conn = myThread.transaction.conn, transaction = True) continue for site in subInfo["NonCustodialSites"]: if site not in siteMap: siteMap[site] = {} autoApprove = False if site in subInfo["AutoApproveSites"]: autoApprove = True tupleKey = (subInfo["Priority"], False, autoApprove) if tupleKey not in siteMap[site]: siteMap[site][tupleKey] = [] siteMap[site][tupleKey].append(dataset) self.markSubscribed.execute(dataset, subscribed = 1, conn = myThread.transaction.conn, transaction = True) # Actually request the subscriptions for site in siteMap: # Check that the site is valid if site not in self.cmsToPhedexMap: msg = "Site %s doesn't appear to be valid to PhEDEx" % site logging.error(msg) self.sendAlert(7, msg = msg) continue for subscriptionFlavor in siteMap[site]: datasets = siteMap[site][subscriptionFlavor] # Check that the site is valid if "MSS" in self.cmsToPhedexMap[site]: phedexNode = self.cmsToPhedexMap[site]["MSS"] else: phedexNode = self.cmsToPhedexMap[site]["Disk"] logging.info("Subscribing %s to %s" % (datasets, site)) options = {"custodial" : "n", "requestOnly" : "y", "priority" : subscriptionFlavor[0].lower(), "move" : "n"} if subscriptionFlavor[1]: options["custodial"] = "y" if subscriptionFlavor[3]: options["move"] = "y" if subscriptionFlavor[2]: options["requestOnly"] = "n" newSubscription = PhEDExSubscription(datasets, phedexNode, self.group, **options) xmlData = XMLDrop.makePhEDExXMLForDatasets(self.dbsUrl, newSubscription.getDatasetPaths()) logging.debug(str(xmlData)) self.phedex.subscribe(newSubscription, xmlData) myThread.transaction.commit() return
def subscribeDatasets(self): """ _subscribeDatasets_ Poll the database for datasets and subscribe them. """ logging.info("Starting subscribeDatasets method") # Check for completely unsubscribed datasets unsubscribedDatasets = self.getUnsubscribed.execute() # Keep a list of subscriptions to tick as subscribed in the database subscriptionsMade = [] # Create a list of subscriptions as defined by the PhEDEx data structures subs = SubscriptionList() # Create the subscription objects and add them to the list # The list takes care of the sorting internally for subInfo in unsubscribedDatasets: site = subInfo['site'] if site not in self.phedexNodes[ 'MSS'] and site not in self.phedexNodes['Disk']: msg = "Site %s doesn't appear to be valid to PhEDEx, " % site msg += "skipping subscription: %s" % subInfo['id'] logging.error(msg) self.sendAlert(7, msg=msg) continue # Avoid custodial subscriptions to disk nodes if site not in self.phedexNodes['MSS']: subInfo['custodial'] = 'n' # Avoid auto approval in T1 sites elif site.startswith("T1"): subInfo['request_only'] = 'y' phedexSub = PhEDExSubscription( subInfo['path'], site, subInfo['phedex_group'], priority=subInfo['priority'], move=subInfo['move'], custodial=subInfo['custodial'], request_only=subInfo['request_only'], subscriptionId=subInfo['id']) # Check if the subscription is a duplicate if phedexSub.matchesExistingSubscription(self.phedex) or \ phedexSub.matchesExistingTransferRequest(self.phedex): subscriptionsMade.append(subInfo['id']) continue # Add it to the list subs.addSubscription(phedexSub) # Compact the subscriptions subs.compact() for subscription in subs.getSubscriptionList(): xmlData = XMLDrop.makePhEDExXMLForDatasets( self.dbsUrl, subscription.getDatasetPaths()) logging.debug("subscribeDatasets XMLData: %s", xmlData) logging.info( "Subscribing: %s to %s, with options: Move: %s, Custodial: %s, Request Only: %s", subscription.getDatasetPaths(), subscription.getNodes(), subscription.move, subscription.custodial, subscription.request_only) try: self.phedex.subscribe(subscription, xmlData) except HTTPException as ex: logging.error( "PhEDEx dataset subscribe failed with HTTPException: %s %s", ex.status, ex.result) except Exception as ex: logging.error( "PhEDEx dataset subscribe failed with Exception: %s", str(ex)) logging.debug("Traceback: %s", str(traceback.format_exc())) else: subscriptionsMade.extend(subscription.getSubscriptionIds()) # Register the result in DBSBuffer if subscriptionsMade: self.markSubscribed.execute(subscriptionsMade) return
def subscribeDatasets(self): """ _subscribeDatasets_ Poll the database for datasets and subscribe them. """ logging.info("Starting subscribeDatasets method") # Check for completely unsubscribed datasets unsubscribedDatasets = self.getUnsubscribed.execute() # Keep a list of subscriptions to tick as subscribed in the database subscriptionsMade = [] # Create a list of subscriptions as defined by the PhEDEx data structures subs = SubscriptionList() # Create the subscription objects and add them to the list # The list takes care of the sorting internally for subInfo in unsubscribedDatasets: site = subInfo['site'] if site not in self.phedexNodes['MSS'] and site not in self.phedexNodes['Disk']: msg = "Site %s doesn't appear to be valid to PhEDEx, " % site msg += "skipping subscription: %s" % subInfo['id'] logging.error(msg) self.sendAlert(7, msg = msg) continue # Avoid custodial subscriptions to disk nodes if site not in self.phedexNodes['MSS']: subInfo['custodial'] = 'n' # Avoid auto approval in T1 sites elif site.startswith("T1"): subInfo['request_only'] = 'y' phedexSub = PhEDExSubscription(subInfo['path'], site, subInfo['phedex_group'], priority = subInfo['priority'], move = subInfo['move'], custodial = subInfo['custodial'], request_only = subInfo['request_only'], subscriptionId = subInfo['id']) # Check if the subscription is a duplicate if phedexSub.matchesExistingSubscription(self.phedex) or \ phedexSub.matchesExistingTransferRequest(self.phedex): subscriptionsMade.append(subInfo['id']) continue # Add it to the list subs.addSubscription(phedexSub) # Compact the subscriptions subs.compact() for subscription in subs.getSubscriptionList(): xmlData = XMLDrop.makePhEDExXMLForDatasets(self.dbsUrl, subscription.getDatasetPaths()) logging.debug("subscribeDatasets XMLData: %s" , xmlData) logging.info("Subscribing: %s to %s, with options: Move: %s, Custodial: %s, Request Only: %s", subscription.getDatasetPaths(), subscription.getNodes(), subscription.move, subscription.custodial, subscription.request_only) try: self.phedex.subscribe(subscription, xmlData) except HTTPException as ex: logging.error("PhEDEx dataset subscribe failed with HTTPException: %s %s", ex.status, ex.result) except Exception as ex: logging.error("PhEDEx dataset subscribe failed with Exception: %s", str(ex)) logging.debug("Traceback: %s", str(traceback.format_exc())) else: subscriptionsMade.extend(subscription.getSubscriptionIds()) # Register the result in DBSBuffer if subscriptionsMade: self.markSubscribed.execute(subscriptionsMade) return
def subscribeDatasets(self): """ _subscribeDatasets_ Poll the database for datasets and subscribe them. """ myThread = threading.currentThread() myThread.transaction.begin() # Check for completely unsubscribed datasets unsubscribedDatasets = self.getUnsubscribed.execute(conn = myThread.transaction.conn, transaction = True) # Keep a list of subscriptions to tick as subscribed in the database subscriptionsMade = [] # Create a list of subscriptions as defined by the PhEDEx data structures subs = SubscriptionList() # Create the subscription objects and add them to the list # The list takes care of the sorting internally for subInfo in unsubscribedDatasets: site = subInfo['site'] if site not in self.phedexNodes['MSS'] and site not in self.phedexNodes['Disk']: if site not in self.cmsToPhedexMap: msg = "Site %s doesn't appear to be valid to PhEDEx, " % site msg += "skipping subscription: %s" % subInfo['id'] logging.error(msg) self.sendAlert(7, msg = msg) continue # Get the phedex node from CMS site site = self.cmsToPhedexMap[site].get("MSS") or self.cmsToPhedexMap[site]["Disk"] # Avoid custodial subscriptions to disk nodes if site not in self.phedexNodes['MSS']: subInfo['custodial'] = 'n' # Avoid move subscriptions and replica if subInfo['custodial'] == 'n': subInfo['move'] = 'n' phedexSub = PhEDExSubscription(subInfo['path'], site, self.group, priority = subInfo['priority'], move = subInfo['move'], custodial = subInfo['custodial'], request_only = subInfo['request_only'], subscriptionId = subInfo['id']) # Check if the subscription is a duplicate if phedexSub.matchesExistingSubscription(self.phedex) or \ phedexSub.matchesExistingTransferRequest(self.phedex): subscriptionsMade.append(subInfo['id']) continue # Add it to the list subs.addSubscription(phedexSub) # Compact the subscriptions subs.compact() for subscription in subs.getSubscriptionList(): try: xmlData = XMLDrop.makePhEDExXMLForDatasets(self.dbsUrl, subscription.getDatasetPaths()) logging.debug(str(xmlData)) msg = "Subscribing: %s to %s, with options: " % (subscription.getDatasetPaths(), subscription.getNodes()) msg += "Move: %s, Custodial: %s, Request Only: %s" % (subscription.move, subscription.custodial, subscription.request_only) logging.info(msg) self.phedex.subscribe(subscription, xmlData) except Exception, ex: logging.error("Something went wrong when communicating with PhEDEx, will try again later.") logging.error("Exception: %s" % str(ex)) else: subscriptionsMade.extend(subscription.getSubscriptionIds())
def algorithm(self, parameters): """ _algorithm_ Poll the database for datasets and subscribe them. """ myThread = threading.currentThread() myThread.transaction.begin() # Check for completely unsubscribed datasets unsubscribedDatasets = self.getUnsubscribed.execute( conn=myThread.transaction.conn, transaction=True) if self.safeMode: partiallySubscribedDatasets = self.getPartiallySubscribed.execute( conn=myThread.transaction.conn, transaction=True) unsubscribedDatasets.extend(partiallySubscribedDatasets) partiallySubscribedSet = set() for entry in partiallySubscribedDatasets: partiallySubscribedSet.add(entry["path"]) # Map the datasets to their specs specDatasetMap = {} for unsubscribedDataset in unsubscribedDatasets: datasetPath = unsubscribedDataset["path"] workflow = unsubscribedDataset["workflow"] spec = unsubscribedDataset["spec"] if datasetPath not in specDatasetMap: specDatasetMap[datasetPath] = [] specDatasetMap[datasetPath].append({ "workflow": workflow, "spec": spec }) specCache = {} siteMap = {} # Distribute the subscriptions by site, type and priority # This is to make as few subscriptions as possible # Site map values are dictionaries where the keys are tuples (Prio, Custodial, AutoApprove, Move) # Where Custodial is boolean, Prio is in ["Low", "Normal", "High"], AutoApprove is boolean and Move is boolean for dataset in specDatasetMap: # Aggregate all the different subscription configurations subInfo = {} for entry in specDatasetMap[dataset]: if not entry["spec"]: # Can't use this spec, there isn't one continue # Load spec if not in the cache if entry["spec"] not in specCache: helper = WMWorkloadHelper() try: helper.load(entry["spec"]) specCache[entry["spec"]] = helper except Exception: #Couldn't load it , alert and carry on msg = "Couldn't load spec: %s" % entry["spec"] logging.error(msg) self.sendAlert(7, msg=msg) continue #If we are running in safe mode, we need to know if the workflow is ready # We have the spec, get the info helper = specCache[entry["spec"]] workflowSubInfo = helper.getSubscriptionInformation() datasetSubInfo = workflowSubInfo.get(dataset, None) if datasetSubInfo and subInfo: subInfo["CustodialSites"] = extendWithoutDups( subInfo["CustodialSites"], datasetSubInfo["CustodialSites"]) subInfo["NonCustodialSites"] = extendWithoutDups( subInfo["NonCustodialSites"], datasetSubInfo["NonCustodialSites"]) subInfo["AutoApproveSites"] = extendWithoutDups( subInfo["AutoApproveSites"], datasetSubInfo["AutoApproveSites"]) subInfo["Priority"] = solvePrioConflicts( subInfo["Priority"], datasetSubInfo["Priority"]) elif datasetSubInfo: subInfo = datasetSubInfo # We now have aggregated subscription information for this dataset in subInfo # Distribute it by site if not subInfo: #Nothing to do, log and continue msg = "No subscriptions configured for dataset %s" % dataset logging.warning(msg) self.markSubscribed.execute( dataset, subscribed=self.terminalSubscriptionState, conn=myThread.transaction.conn, transaction=True) continue # Make sure that a site is not configured both as non custodial and custodial # Non-custodial is believed to be the right choice subInfo["CustodialSites"] = list( set(subInfo["CustodialSites"]) - set(subInfo["NonCustodialSites"])) for site in subInfo["CustodialSites"]: if site not in siteMap: siteMap[site] = {} autoApprove = False if site in subInfo["AutoApproveSites"]: autoApprove = True if self.safeMode and dataset not in partiallySubscribedSet: tupleKey = (subInfo["Priority"], True, autoApprove, False) else: tupleKey = (subInfo["Priority"], True, autoApprove, True) if tupleKey not in siteMap[site]: siteMap[site][tupleKey] = [] # Subscriptions are sorted by options, defined by tupleKey # The tuple key has 3 or 4 entries in this order # Priority, Custodial, Auto approve, Move (True) or Replica (False) siteMap[site][tupleKey].append(dataset) # If we are in safe mode and this is a partially subscribed dataset, # then the non-custodial were done in a previous cycle if self.safeMode and dataset in partiallySubscribedSet: self.markSubscribed.execute( dataset, subscribed=self.terminalSubscriptionState, conn=myThread.transaction.conn, transaction=True) continue for site in subInfo["NonCustodialSites"]: if site not in siteMap: siteMap[site] = {} autoApprove = False if site in subInfo["AutoApproveSites"]: autoApprove = True # Non-custodial is never move, so this tuple has only 3 entries # TODO: Change tuples to frozensets for clarity tupleKey = (subInfo["Priority"], False, autoApprove) if tupleKey not in siteMap[site]: siteMap[site][tupleKey] = [] siteMap[site][tupleKey].append(dataset) self.markSubscribed.execute(dataset, subscribed=1, conn=myThread.transaction.conn, transaction=True) # Actually request the subscriptions for site in siteMap: # Check that the site is valid if site not in self.cmsToPhedexMap: msg = "Site %s doesn't appear to be valid to PhEDEx" % site logging.error(msg) self.sendAlert(7, msg=msg) continue for subscriptionFlavor in siteMap[site]: datasets = siteMap[site][subscriptionFlavor] # Check that the site is valid isMSS = False if "MSS" in self.cmsToPhedexMap[site]: isMSS = True phedexNode = self.cmsToPhedexMap[site]["MSS"] else: phedexNode = self.cmsToPhedexMap[site]["Disk"] logging.info("Subscribing %s to %s" % (datasets, site)) options = { "custodial": "n", "requestOnly": "y", "priority": subscriptionFlavor[0].lower(), "move": "n" } if subscriptionFlavor[1] and isMSS: # Custodial subscriptions are only allowed in MSS nodes # If custodial is requested on Non-MSS it fallsback to a non-custodial subscription options["custodial"] = "y" if subscriptionFlavor[3] and not self.replicaOnly: options["move"] = "y" if subscriptionFlavor[2]: options["requestOnly"] = "n" logging.info( "Request options: Custodial - %s, Move - %s, Request Only - %s" % (options["custodial"].upper(), options["move"].upper(), options["requestOnly"].upper())) newSubscription = PhEDExSubscription(datasets, phedexNode, self.group, **options) xmlData = XMLDrop.makePhEDExXMLForDatasets( self.dbsUrl, newSubscription.getDatasetPaths()) logging.debug(str(xmlData)) self.phedex.subscribe(newSubscription, xmlData) myThread.transaction.commit() return