Ejemplo n.º 1
0
    def testSubscription(self):
        """
        _testSubscription_

        Verify that the subscription API works.
        """
        datasetA = "/%s/WMCorePhEDExTest/RAW" % makeUUID()
        datasetB = "/%s/WMCorePhEDExTest/RECO" % makeUUID()
        xmlData = XMLDrop.makePhEDExDrop(self.dbsTestUrl, datasetA)
        self.phedexApi.injectBlocks("T1_US_FNAL_MSS", xmlData)
        xmlData = XMLDrop.makePhEDExDrop(self.dbsTestUrl, datasetB)
        self.phedexApi.injectBlocks("T1_US_FNAL_MSS", xmlData)

        testSub = PhEDExSubscription([datasetA, datasetB], "T1_UK_RAL_MSS",
                                     "Saturn")
        xmlData = XMLDrop.makePhEDExXMLForDatasets(self.dbsTestUrl,
                                                   testSub.getDatasetPaths())
        result = self.phedexApi.subscribe(testSub, xmlData)
        requestIDs = result["phedex"]["request_created"]

        self.assertEqual(len(requestIDs), 1,
                         "Error: Wrong number of request IDs")
        self.assertTrue(requestIDs[0].has_key("id"),
                        "Error: Missing request ID")
        return
Ejemplo n.º 2
0
    def testSubscription(self):
        """
        _testSubscription_

        Verify that the subscription API works.
        """
        datasetA = "/%s/WMCorePhEDExTest/RAW" % makeUUID()
        datasetB = "/%s/WMCorePhEDExTest/RECO" % makeUUID()
        xmlData = XMLDrop.makePhEDExDrop(self.dbsTestUrl, datasetA)
        self.phedexApi.injectBlocks("T1_US_FNAL_MSS", xmlData)
        xmlData = XMLDrop.makePhEDExDrop(self.dbsTestUrl, datasetB)
        self.phedexApi.injectBlocks("T1_US_FNAL_MSS", xmlData)
        
        testSub = PhEDExSubscription([datasetA, datasetB], "T1_UK_RAL_MSS",
                                      "Saturn")
        xmlData = XMLDrop.makePhEDExXMLForDatasets(self.dbsTestUrl, 
                                                   testSub.getDatasetPaths())
        result = self.phedexApi.subscribe(testSub, xmlData)
        requestIDs = result["phedex"]["request_created"]

        self.assertEqual(len(requestIDs), 1,
                         "Error: Wrong number of request IDs")
        self.assertTrue(requestIDs[0].has_key("id"),
                        "Error: Missing request ID")
        return
Ejemplo n.º 3
0
    def subscribeTier0Blocks(self):
        """
        _subscribeTier0Blocks_

        Subscribe blocks to the Tier-0 where a replica subscription
        already exists. All Tier-0 subscriptions are move, custodial
        and autoapproved with high priority.
        """
        myThread = threading.currentThread()
        myThread.transaction.begin()

        # Check for candidate blocks for subscription
        blocksToSubscribe = self.getUnsubscribedBlocks.execute(node = 'T0_CH_CERN',
                                                               conn = myThread.transaction.conn,
                                                               transaction = True)

        if not blocksToSubscribe:
            return

        # For the blocks we don't really care about the subscription options
        # We are subscribing all blocks with the same recipe.
        subscriptionMap = {}
        for subInfo in blocksToSubscribe:
            dataset = subInfo['path']
            if dataset not in subscriptionMap:
                subscriptionMap[dataset] = []
            subscriptionMap[dataset].append(subInfo['blockname'])

        site = 'T0_CH_CERN'
        custodial = 'y'
        request_only = 'n'
        move = 'y'
        priority = 'High'

        # Get the phedex node
        phedexNode = self.cmsToPhedexMap[site]["MSS"]

        logging.error("Subscribing %d blocks, from %d datasets to the Tier-0" % (len(subscriptionMap), sum([len(x) for x in subscriptionMap.values()])))

        newSubscription = PhEDExSubscription(subscriptionMap.keys(),
                                             phedexNode, self.group,
                                             custodial = custodial,
                                             request_only = request_only,
                                             move = move,
                                             priority = priority,
                                             level = 'block',
                                             blocks = subscriptionMap)

        # TODO: Check for blocks already subscribed

        try:
            xmlData = XMLDrop.makePhEDExXMLForBlocks(self.dbsUrl,
                                                     newSubscription.getDatasetsAndBlocks())
            logging.debug(str(xmlData))
            self.phedex.subscribe(newSubscription, xmlData)
        except Exception, ex:
            logging.error("Something went wrong when communicating with PhEDEx, will try again later.")
            logging.error("Exception: %s" % str(ex))
Ejemplo n.º 4
0
    def testGetSubscriptionMapping(self):
        """
        _testGetSubscriptionMapping_

        Verify that the subscription mapping API works correctly.
        """
        testDataset = "/%s/WMCorePhEDExTest/RECO" % makeUUID()
        blockA = "%s#%s" % (testDataset, makeUUID())
        blockB = "%s#%s" % (testDataset, makeUUID())

        # NOTE: leaving it broken on purpose, we do NOT want to subscribe
        # data via unit tests :-)
        #injectionSpec = XMLDrop.XMLInjectionSpec(self.dbsTestUrl)
        datasetSpec = injectionSpec.getDataset(testDataset)
        datasetSpec.getFileblock(blockA, 'y')
        datasetSpec.getFileblock(blockB, 'y')
        blockSpec = injectionSpec.save()
        self.phedexApi.injectBlocks("T1_US_FNAL_MSS", blockSpec)

        # Create a dataset level subscription to a node
        testDatasetSub = PhEDExSubscription([testDataset],
                                            "T1_UK_RAL_MSS",
                                            "Saturn",
                                            request_only="y")
        self.phedexApi.subscribe(testDatasetSub)

        # Create a block level subscrtion to a different node
        testBlockSub = PhEDExSubscription([testDataset],
                                          "T1_DE_KIT_MSS",
                                          "Saturn",
                                          level="block",
                                          request_only="y")
        self.phedexApi.subscribe(testBlockSub)

        subs = self.phedexApi.getSubscriptionMapping(testDataset)
        self.assertEqual(subs[testDataset], {"T1_UK_RAL_MSS"},
                         "Error: Dataset subscription is wrong.")

        subs = self.phedexApi.getSubscriptionMapping(blockA)
        self.assertEqual(
            len(subs[blockA]), 2,
            "Error: Wrong number of nodes in block subscription.")
        self.assertTrue("T1_UK_RAL_MSS" in subs[blockA],
                        "Error: RAL missing from block sub.")
        self.assertTrue("T1_DE_KIT_MSS" in subs[blockA],
                        "Error: KIT missing from block sub.")
        return
Ejemplo n.º 5
0
    def testGetSubscriptionMapping(self):
        """
        _testGetSubscriptionMapping_

        Verify that the subscription mapping API works correctly.
        """
        testDataset = "/%s/WMCorePhEDExTest/RECO" % makeUUID()
        blockA = "%s#%s" % (testDataset, makeUUID())
        blockB = "%s#%s" % (testDataset, makeUUID())

        injectionSpec = XMLDrop.XMLInjectionSpec(self.dbsTestUrl)
        datasetSpec = injectionSpec.getDataset(testDataset)
        datasetSpec.getFileblock(blockA, 'y')
        datasetSpec.getFileblock(blockB, 'y')
        blockSpec = injectionSpec.save()
        self.phedexApi.injectBlocks("T1_US_FNAL_MSS", blockSpec)

        # Create a dataset level subscription to a node
        testDatasetSub = PhEDExSubscription([testDataset],
                                            "T1_UK_RAL_MSS",
                                            "Saturn",
                                            requestOnly="n")
        datasetSpec = XMLDrop.makePhEDExXMLForDatasets(
            self.dbsTestUrl, testDatasetSub.getDatasetPaths())
        self.phedexApi.subscribe(testDatasetSub, datasetSpec)

        # Create a block level subscrtion to a different node
        testBlockSub = PhEDExSubscription([testDataset],
                                          "T1_DE_KIT_MSS",
                                          "Saturn",
                                          level="block",
                                          requestOnly="n")
        self.phedexApi.subscribe(testBlockSub, blockSpec)

        subs = self.phedexApi.getSubscriptionMapping(testDataset)
        self.assertEqual(subs[testDataset], set(["T1_UK_RAL_MSS"]),
                         "Error: Dataset subscription is wrong.")

        subs = self.phedexApi.getSubscriptionMapping(blockA)
        self.assertEqual(
            len(subs[blockA]), 2,
            "Error: Wrong number of nodes in block subscription.")
        self.assertTrue("T1_UK_RAL_MSS" in subs[blockA],
                        "Error: RAL missing from block sub.")
        self.assertTrue("T1_DE_KIT_MSS" in subs[blockA],
                        "Error: KIT missing from block sub.")
        return
Ejemplo n.º 6
0
    def algorithm(self, parameters):
        """
        _algorithm_

        Poll the database for datasets and subscribe them to MSS.
        """
        myThread = threading.currentThread()
        myThread.transaction.begin()

        if not self.seMap.has_key("MSS"):
            return

        unsubscribedDatasets = self.getUnsubscribed.execute(conn = myThread.transaction.conn,
                                                            transaction = True)

        datasetMap = {}
        for unsubscribedDataset in unsubscribedDatasets:
            datasetPath = unsubscribedDataset["path"]
            seName = unsubscribedDataset["se_name"]

            if not self.seMap["MSS"].has_key(seName):
                msg = "No MSS node for SE: %s" % seName
                logging.error(msg)
                self.sendAlert(7, msg = msg)
                continue

            if not datasetMap.has_key(self.seMap["MSS"][seName]):
                datasetMap[self.seMap["MSS"][seName]] = []
            datasetMap[self.seMap["MSS"][seName]].append(datasetPath)

            self.markSubscribed.execute(datasetPath, conn = myThread.transaction.conn,
                                        transaction = True)

        for siteMSS in datasetMap.keys():
            logging.info("Subscribing %s to %s" % (datasetMap[siteMSS],
                                                   siteMSS))
            newSubscription = PhEDExSubscription(datasetMap[siteMSS], siteMSS, self.group,
                                                 custodial = "y", requestOnly = "y")
            
            xmlData = XMLDrop.makePhEDExXMLForDatasets(self.dbsUrl, 
                                                       newSubscription.getDatasetPaths())
            print xmlData
            self.phedex.subscribe(newSubscription, xmlData)

        myThread.transaction.commit()
        return
Ejemplo n.º 7
0
    def testGetSubscriptionMapping(self):
        """
        _testGetSubscriptionMapping_

        Verify that the subscription mapping API works correctly.
        """
        testDataset = "/%s/WMCorePhEDExTest/RECO" % makeUUID()
        blockA = "%s#%s" % (testDataset, makeUUID())
        blockB = "%s#%s" % (testDataset, makeUUID())

        injectionSpec = XMLDrop.XMLInjectionSpec(self.dbsTestUrl)
        datasetSpec = injectionSpec.getDataset(testDataset)
        datasetSpec.getFileblock(blockA, 'y')
        datasetSpec.getFileblock(blockB, 'y')
        blockSpec = injectionSpec.save()
        self.phedexApi.injectBlocks("T1_US_FNAL_MSS", blockSpec)

        # Create a dataset level subscription to a node
        testDatasetSub = PhEDExSubscription([testDataset], "T1_UK_RAL_MSS",
                                            "Saturn", requestOnly = "n")
        datasetSpec = XMLDrop.makePhEDExXMLForDatasets(self.dbsTestUrl, 
                                                       testDatasetSub.getDatasetPaths())
        self.phedexApi.subscribe(testDatasetSub, datasetSpec)

        # Create a block level subscrtion to a different node
        testBlockSub = PhEDExSubscription([testDataset], "T1_DE_KIT_MSS", "Saturn",
                                          level = "block", requestOnly = "n")
        self.phedexApi.subscribe(testBlockSub, blockSpec)

        subs = self.phedexApi.getSubscriptionMapping(testDataset)
        self.assertEqual(subs[testDataset], set(["T1_UK_RAL_MSS"]),
                         "Error: Dataset subscription is wrong.")

        subs = self.phedexApi.getSubscriptionMapping(blockA)
        self.assertEqual(len(subs[blockA]), 2,
                         "Error: Wrong number of nodes in block subscription.")
        self.assertTrue("T1_UK_RAL_MSS" in subs[blockA],
                        "Error: RAL missing from block sub.")
        self.assertTrue("T1_DE_KIT_MSS" in subs[blockA],
                        "Error: KIT missing from block sub.")
        return
Ejemplo n.º 8
0
 def transferRequest(self, req):
     "Send request to Phedex and return status of request subscription"
     datasets = req.get('datasets', [])
     sites = req.get('sites', [])
     if datasets and sites:
         self.logger.debug("### creating subscription for: %s",
                           pformat(req))
         subscription = PhEDExSubscription(datasets, sites,
                                           self.msConfig['group'])
         # TODO: implement how to get transfer id
         tid = hashlib.md5(str(subscription)).hexdigest()
         # TODO: when ready enable submit subscription step
         # self.phedex.subscribe(subscription)
         return tid
Ejemplo n.º 9
0
 def add(self, requests):
     "Add requests to task manager"
     # loop over requests: for non-existing pid submit phedex subscription for existing one check their status
     for request, rdict in requests.items():
         if self.store.exists(request):
             self.checkStatus(request)
         else:
             # if request does not exist in backend submit its subscription and add it to backend
             rdict = requests[request]
             self.store.add(request, rdict)
             datasets = rdict.get('datasets')
             sites = rdict.get('sites')
             subscription = PhEDExSubscription(datasets, sites, self.group)
             if self.verbose:
                 print("### add subscription", subscription)
Ejemplo n.º 10
0
    def subscribeBlocks(self):
        """
        _subscribeBlocks_
        Poll the database and subscribe blocks not yet subscribed.
        """
        logging.info("Starting subscribeBlocks method")

        unsubBlocks = self.getUnsubscribedBlocks.execute()
        # now organize those by location in order to minimize phedex requests
        # also remove blocks that this component is meant to skip
        unsubBlocks = self.organizeBlocksByLocation(unsubBlocks)

        for location, blockDict in unsubBlocks.items():
            phedexSub = PhEDExSubscription(blockDict.keys(),
                                           location,
                                           self.phedexGroup,
                                           blocks=blockDict,
                                           level="block",
                                           priority="normal",
                                           move="n",
                                           custodial="n",
                                           request_only="n",
                                           comments="WMAgent production site")
            try:
                res = self.phedex.subscribe(phedexSub)
                transferId = res['phedex']['request_created'][0]['id']
                logging.info(
                    "Subscribed %d blocks for %d datasets, to location: %s, under request ID: %s",
                    len(phedexSub.getBlocks()),
                    len(phedexSub.getDatasetPaths()), phedexSub.getNodes(),
                    transferId)
            except HTTPException as ex:
                logging.error(
                    "PhEDEx block subscription failed with HTTPException: %s %s",
                    ex.status, ex.result)
                logging.error("The subscription object was: %s",
                              str(phedexSub))
            except Exception as ex:
                logging.exception(
                    "PhEDEx block subscription failed with Exception: %s",
                    str(ex))
            else:
                binds = []
                for blockname in phedexSub.getBlocks():
                    binds.append({
                        'RULE_ID': str(transferId),
                        'BLOCKNAME': blockname
                    })
                self.setBlockRules.execute(binds)

        return
Ejemplo n.º 11
0
    def test_SubscriptionList(self):
        """
        _SubscriptionList_

        Whatever
        """

        policy = SubscriptionList()
        # what will you do with run ID.
        row = [6, "/Cosmics/Test-CRAFTv8/RAW", 1, "T2_CH_CAF", 'high', 'y']
        results = []
        for i in range(6):
            results.append(row)

        results.append(
            [7, "/Cosmics/Test-CRAFTv8/ALCARECO", 2, "FNAL", 'normal', 'y'])
        results.append(
            [8, "/Cosmics/Test-CRAFTv8/RECO", 1, "T2_CH_CAF", 'high', 'y'])
        results.append(
            [8, "/Cosmics/Test-CRAFTv8/RECO", 2, "FNAL", 'high', 'y'])

        print policy.getSubscriptionList()

        for row in results:
            # make a tuple (dataset_path_id, dataset_path_name)
            # make a tuple (node_id, node_name)
            subscription = PhEDExSubscription((row[0], row[1]),
                                              (row[2], row[3]), row[4], row[5])
            policy.addSubscription(subscription)

        i = 0
        for sub in policy.getSubscriptionList():
            i += 1
            print "Subscription %s" % i
            print sub.getDatasetPaths()
            print sub.getNodes()

        return
Ejemplo n.º 12
0
    def subscribeDatasets(self):
        """
        _subscribeDatasets_
        Poll the database for datasets and subscribe them.
        """
        logging.info("Starting subscribeDatasets method")

        # Check for completely unsubscribed datasets
        unsubscribedDatasets = self.getUnsubscribed.execute()

        # Keep a list of subscriptions to tick as subscribed in the database
        subscriptionsMade = []

        # Create a list of subscriptions as defined by the PhEDEx data structures
        subs = SubscriptionList()

        # Create the subscription objects and add them to the list
        # The list takes care of the sorting internally
        for subInfo in unsubscribedDatasets:
            site = subInfo['site']

            if site not in self.phedexNodes[
                    'MSS'] and site not in self.phedexNodes['Disk']:
                msg = "Site %s doesn't appear to be valid to PhEDEx, " % site
                msg += "skipping subscription: %s" % subInfo['id']
                logging.error(msg)
                self.sendAlert(7, msg=msg)
                continue

            # Avoid custodial subscriptions to disk nodes
            if site not in self.phedexNodes['MSS']:
                subInfo['custodial'] = 'n'
            # Avoid auto approval in T1 sites
            elif site.startswith("T1"):
                subInfo['request_only'] = 'y'

            phedexSub = PhEDExSubscription(
                subInfo['path'],
                site,
                subInfo['phedex_group'],
                priority=subInfo['priority'],
                move=subInfo['move'],
                custodial=subInfo['custodial'],
                request_only=subInfo['request_only'],
                subscriptionId=subInfo['id'])

            # Check if the subscription is a duplicate
            if phedexSub.matchesExistingSubscription(self.phedex) or \
                phedexSub.matchesExistingTransferRequest(self.phedex):
                subscriptionsMade.append(subInfo['id'])
                continue

            # Add it to the list
            subs.addSubscription(phedexSub)

        # Compact the subscriptions
        subs.compact()

        for subscription in subs.getSubscriptionList():

            xmlData = XMLDrop.makePhEDExXMLForDatasets(
                self.dbsUrl, subscription.getDatasetPaths())
            logging.debug("subscribeDatasets XMLData: %s", xmlData)

            logging.info(
                "Subscribing: %s to %s, with options: Move: %s, Custodial: %s, Request Only: %s",
                subscription.getDatasetPaths(), subscription.getNodes(),
                subscription.move, subscription.custodial,
                subscription.request_only)

            try:
                self.phedex.subscribe(subscription, xmlData)
            except HTTPException as ex:
                logging.error(
                    "PhEDEx dataset subscribe failed with HTTPException: %s %s",
                    ex.status, ex.result)
            except Exception as ex:
                logging.error(
                    "PhEDEx dataset subscribe failed with Exception: %s",
                    str(ex))
                logging.debug("Traceback: %s", str(traceback.format_exc()))
            else:
                subscriptionsMade.extend(subscription.getSubscriptionIds())

        # Register the result in DBSBuffer
        if subscriptionsMade:
            self.markSubscribed.execute(subscriptionsMade)

        return
Ejemplo n.º 13
0
    def subscribeDatasets(self):
        """
        _subscribeDatasets_

        Poll the database for datasets and subscribe them.
        """
        myThread = threading.currentThread()
        myThread.transaction.begin()

        # Check for completely unsubscribed datasets
        unsubscribedDatasets = self.getUnsubscribed.execute(conn = myThread.transaction.conn,
                                                            transaction = True)

        # Keep a list of subscriptions to tick as subscribed in the database
        subscriptionsMade = []

        # Create a list of subscriptions as defined by the PhEDEx data structures
        subs = SubscriptionList()

        # Create the subscription objects and add them to the list
        # The list takes care of the sorting internally
        for subInfo in unsubscribedDatasets:
            site = subInfo['site']

            if site not in self.phedexNodes['MSS'] and site not in self.phedexNodes['Disk']:

                if site not in self.cmsToPhedexMap:
                    msg = "Site %s doesn't appear to be valid to PhEDEx, " % site
                    msg += "skipping subscription: %s" % subInfo['id']
                    logging.error(msg)
                    self.sendAlert(7, msg = msg)
                    continue

                # Get the phedex node from CMS site
                site = self.cmsToPhedexMap[site].get("MSS") or self.cmsToPhedexMap[site]["Disk"] 

            # Avoid custodial subscriptions to disk nodes
            if site not in self.phedexNodes['MSS']: subInfo['custodial'] = 'n'
            # Avoid move subscriptions and replica
            if subInfo['custodial'] == 'n': subInfo['move'] = 'n'
           
            phedexSub = PhEDExSubscription(subInfo['path'], site,
                                           self.group, priority = subInfo['priority'],
                                           move = subInfo['move'], custodial = subInfo['custodial'],
                                           request_only = subInfo['request_only'], subscriptionId = subInfo['id'])

            # Check if the subscription is a duplicate
            if phedexSub.matchesExistingSubscription(self.phedex) or \
                phedexSub.matchesExistingTransferRequest(self.phedex):
                subscriptionsMade.append(subInfo['id'])
                continue

            # Add it to the list
            subs.addSubscription(phedexSub)

        # Compact the subscriptions
        subs.compact()

        for subscription in subs.getSubscriptionList():
            try:
                xmlData = XMLDrop.makePhEDExXMLForDatasets(self.dbsUrl,
                                                           subscription.getDatasetPaths())
                logging.debug(str(xmlData))
                msg = "Subscribing: %s to %s, with options: " % (subscription.getDatasetPaths(), subscription.getNodes())
                msg += "Move: %s, Custodial: %s, Request Only: %s" % (subscription.move, subscription.custodial, subscription.request_only)
                logging.info(msg)
                self.phedex.subscribe(subscription, xmlData)
            except Exception, ex:
                logging.error("Something went wrong when communicating with PhEDEx, will try again later.")
                logging.error("Exception: %s" % str(ex))
            else:
                subscriptionsMade.extend(subscription.getSubscriptionIds())
Ejemplo n.º 14
0
    def testSubscriptionList(self):
        """
        _SubscriptionList_

        Check that we can organize and agreggate correctly a bunch of different subscriptions
        in standard scenarios
        """
        subList = SubscriptionList()
        # Two GEN datasets subscribed to many sites non-custodially and custodially to one
        subs = []
        genDatasetA = "/DeadlyNeurotoxinOnTestSubjectSim/Run1970-Test-v2/GEN"
        subs.append({
            "datasetPathList": genDatasetA,
            "nodeList": "T1_US_FNAL",
            "group": "dataops",
            "custodial": "y",
            "move": "y"
        })
        subs.append({
            "datasetPathList": genDatasetA,
            "nodeList": "T2_IT_Bari",
            "group": "dataops",
            "request_only": "n"
        })
        subs.append({
            "datasetPathList": genDatasetA,
            "nodeList": "T2_CH_CERN",
            "group": "dataops",
            "request_only": "n"
        })
        subs.append({
            "datasetPathList": genDatasetA,
            "nodeList": "T2_US_Wisconsin",
            "group": "dataops"
        })
        genDatasetB = "/NotEnoughEnergyToLieIn1.1V/Run1970-Potato-v2/GEN"
        subs.append({
            "datasetPathList": genDatasetB,
            "nodeList": "T1_IT_CNAF",
            "group": "dataops",
            "custodial": "y",
            "move": "y"
        })
        subs.append({
            "datasetPathList": genDatasetB,
            "nodeList": "T2_IT_Bari",
            "group": "dataops",
            "request_only": "n"
        })
        subs.append({
            "datasetPathList": genDatasetB,
            "nodeList": "T2_CH_CERN",
            "group": "dataops",
            "request_only": "n"
        })
        subs.append({
            "datasetPathList": genDatasetB,
            "nodeList": "T2_US_Wisconsin",
            "group": "dataops"
        })
        # RECO,DQM,AOD datasets subscribed custodially to 2 sites
        recoDatasetA = '/TestWeightedCubes/Run1970-Test-v2/%s'
        recoDatasetB = '/RepulsiveGel/Run1970-Test-v2/%s'
        subs.append({
            "datasetPathList": recoDatasetA % 'AOD',
            "nodeList": "T1_US_FNAL",
            "group": "dataops",
            "custodial": "y",
            "move": "y"
        })
        subs.append({
            "datasetPathList": recoDatasetA % 'DQM',
            "nodeList": "T1_US_FNAL",
            "group": "dataops",
            "custodial": "y",
            "move": "y"
        })
        subs.append({
            "datasetPathList": recoDatasetB % 'AOD',
            "nodeList": "T1_DE_KIT",
            "group": "dataops",
            "custodial": "y",
            "move": "y"
        })
        subs.append({
            "datasetPathList": recoDatasetB % 'DQM',
            "nodeList": "T1_DE_KIT",
            "group": "dataops",
            "custodial": "y",
            "move": "y"
        })

        for sub in subs:
            phedexSub = PhEDExSubscription(**sub)
            subList.addSubscription(phedexSub)

        # One subscription per node
        self.assertEqual(len(subList.getSubscriptionList()), 6)
        goldenDatasetLists = [
            set([genDatasetA, genDatasetB]),
            set([genDatasetA, recoDatasetA % 'AOD', recoDatasetA % 'DQM']),
            set([genDatasetB]),
            set([recoDatasetB % 'AOD', recoDatasetB % 'DQM'])
        ]
        for sub in subList.getSubscriptionList():
            self.assertTrue(set(sub.getDatasetPaths()) in goldenDatasetLists)

        subList.compact()

        # Compact should have reduced 2 of them to 1
        goldenNodeLists = [
            set(["T1_US_FNAL"]),
            set(["T2_IT_Bari", "T2_CH_CERN"]),
            set(["T1_IT_CNAF"]),
            set(["T1_DE_KIT"]),
            set(["T2_US_Wisconsin"])
        ]
        self.assertEqual(len(subList.getSubscriptionList()), 5)
        for sub in subList.getSubscriptionList():
            self.assertTrue(set(sub.getNodes()) in goldenNodeLists)
        return
Ejemplo n.º 15
0
    def makeTransferRequest(self, wflow):
        """
        Send request to PhEDEx and return status of request subscription
        This method does the following:
          1. return if there is no workflow data to be transferred
          2. check if the data input campaign is in the database, skip if not
          3. _getValidSites: using the workflow site lists and the campaign configuration,
             find a common list of sites (converted to PNNs). If the PNN is out of quota,
             it's also removed from this list
          4. create the transfer record dictionary
          5. for every final node
             5.1. if it's a pileup dataset, pick a random node and subscribe the whole dataset
             5.2. else, retrieve chunks of blocks to be subscribed (evenly distributed)
             5.3. update node usage with the amount of data subscribed
          6. re-evaluate nodes with quota exceeded
          7. return the transfer record, with a list of transfer IDs
        :param wflow: workflow object
        :return: boolean whether it succeeded or not, and a subscription dictionary {"dataset":transferIDs}
        """
        response = []
        success = True
        if not (wflow.getParentBlocks() or wflow.getPrimaryBlocks() or wflow.getSecondarySummary()):
            self.logger.info("Request %s does not have any further data to transfer", wflow.getName())
            return success, response

        self.logger.info("Handling data subscriptions for request: %s", wflow.getName())

        for dataIn in wflow.getDataCampaignMap():
            if dataIn["type"] == "parent":
                msg = "Skipping 'parent' data subscription (done with the 'primary' data), for: %s" % dataIn
                self.logger.info(msg)
                continue
            elif dataIn["type"] == "secondary" and dataIn['name'] not in wflow.getSecondarySummary():
                # secondary already in place
                continue
            if dataIn['campaign'] not in self.campaigns:
                msg = "Data placement can't proceed because campaign '%s' was not found." % dataIn["campaign"]
                msg += " Skipping this workflow until the campaign gets created."
                self.logger.warning(msg)
                return False, response

            nodes = self._getValidSites(wflow, dataIn)
            if not nodes:
                msg = "There are no RSEs with available space for %s. " % wflow.getName()
                msg += "Skipping this workflow until RSEs get enough free space"
                self.logger.warning(msg)
                return False, response

            transRec = newTransferRec(dataIn)
            for blocks, dataSize, idx in self._decideDataDestination(wflow, dataIn, len(nodes)):
                if not blocks and dataIn["type"] == "primary":
                    # no valid files in any blocks, it will likely fail in global workqueue
                    return success, response
                if blocks:
                    subLevel = "block"
                    data = {dataIn['name']: blocks}
                else:
                    # then it's a dataset level subscription
                    subLevel = "dataset"
                    data = None

                subscription = PhEDExSubscription(datasetPathList=dataIn['name'],
                                                  nodeList=nodes[idx],
                                                  group=self.msConfig['quotaAccount'],
                                                  level=subLevel,
                                                  priority="low",
                                                  request_only=self.msConfig["phedexRequestOnly"],
                                                  blocks=data,
                                                  comments="WMCore MicroService automated subscription")
                msg = "Creating '%s' level subscription for %s dataset: %s" % (subscription.level,
                                                                               dataIn['type'],
                                                                               dataIn['name'])
                if wflow.getParentDataset():
                    msg += ", where parent blocks have also been added for dataset: %s" % wflow.getParentDataset()
                self.logger.info(msg)

                if self.msConfig.get('enableDataTransfer', True):
                    # Force request-only subscription
                    # to any data transfer going above some threshold (do not auto-approve)
                    aboveWarningThreshold = self.msConfig.get('warningTransferThreshold') > 0. and \
                        dataSize > self.msConfig.get('warningTransferThreshold')
                    if aboveWarningThreshold and subscription.request_only != "y":
                        subscription.request_only = "y"

                    # Then make the data subscription, for real!!!
                    success, transferId = self._subscribeData(subscription, wflow.getName(), dataIn['name'])
                    if not success:
                        break
                    if transferId:
                        transRec['transferIDs'].add(transferId)

                    # Warn about data transfer subscriptions going above some treshold
                    if aboveWarningThreshold:
                        emailSubject = "[MS] Large pending data transfer under request id: {transferid}".format(
                            transferid=transferId)
                        emailMsg = "Workflow: {}\nhas a large amount of ".format(wflow.getName())
                        emailMsg += "data subscribed: {} TB,\n".format(teraBytes(dataSize))
                        emailMsg += "for {} data: {}.""".format(dataIn['type'], dataIn['name'])
                        self.emailAlert.send(emailSubject, emailMsg)
                        self.logger.info(emailMsg)

                    # and update some instance caches
                    self.rseQuotas.updateNodeUsage(nodes[idx], dataSize)
                    if subLevel == 'dataset':
                        self.dsetCounter += 1
                    else:
                        self.blockCounter += len(blocks)
                else:
                    self.logger.info("DRY-RUN: making subscription: %s", subscription)

            transRec['transferIDs'] = list(transRec['transferIDs'])
            response.append(transRec)

        # once the workflow has been completely processed, update the node usage
        self.rseQuotas.evaluateQuotaExceeded()
        return success, response
Ejemplo n.º 16
0
    def algorithm(self, parameters):
        """
        _algorithm_

        Poll the database for datasets and subscribe them.
        """
        myThread = threading.currentThread()
        myThread.transaction.begin()

        # Check for completely unsubscribed datasets
        unsubscribedDatasets = self.getUnsubscribed.execute(conn = myThread.transaction.conn,
                                                            transaction = True)

        if self.safeMode:
            partiallySubscribedDatasets = self.getPartiallySubscribed.execute(conn = myThread.transaction.conn,
                                                                              transaction = True)
            unsubscribedDatasets.extend(partiallySubscribedDatasets)
            partiallySubscribedSet = set()
            for entry in partiallySubscribedDatasets:
                partiallySubscribedSet.add(entry["path"])

        # Map the datasets to their specs
        specDatasetMap = {}
        for unsubscribedDataset in unsubscribedDatasets:
            datasetPath = unsubscribedDataset["path"]
            workflow = unsubscribedDataset["workflow"]
            spec = unsubscribedDataset["spec"]

            if datasetPath not in specDatasetMap:
                specDatasetMap[datasetPath] = []
            specDatasetMap[datasetPath].append({"workflow" : workflow, "spec" : spec})

        specCache = {}
        siteMap = {}
        # Distribute the subscriptions by site, type and priority
        # This is to make as few subscriptions as possible
        # Site map values are dictionaries where the keys are tuples (Prio, Custodial, AutoApprove, Move)
        # Where Custodial is boolean, Prio is in ["Low", "Normal", "High"], AutoApprove is boolean and Move is boolean
        for dataset in specDatasetMap:
            # Aggregate all the different subscription configurations
            subInfo = {}
            for entry in specDatasetMap[dataset]:
                if not entry["spec"]:
                    # Can't use this spec, there isn't one
                    continue
                # Load spec if not in the cache
                if entry["spec"] not in specCache:
                    helper = WMWorkloadHelper()
                    try:
                        helper.load(entry["spec"])
                        specCache[entry["spec"]] = helper
                    except Exception:
                        #Couldn't load it , alert and carry on
                        msg = "Couldn't load spec: %s" % entry["spec"]
                        logging.error(msg)
                        self.sendAlert(7, msg = msg)
                        continue
                #If we are running in safe mode, we need to know if the workflow is ready
                # We have the spec, get the info
                helper = specCache[entry["spec"]]
                workflowSubInfo = helper.getSubscriptionInformation()
                datasetSubInfo = workflowSubInfo.get(dataset, None)
                if datasetSubInfo and subInfo:
                    subInfo["CustodialSites"] = extendWithoutDups(subInfo["CustodialSites"], datasetSubInfo["CustodialSites"])
                    subInfo["NonCustodialSites"] = extendWithoutDups(subInfo["NonCustodialSites"], datasetSubInfo["NonCustodialSites"])
                    subInfo["AutoApproveSites"] = extendWithoutDups(subInfo["AutoApproveSites"], datasetSubInfo["AutoApproveSites"])
                    subInfo["Priority"] = solvePrioConflicts(subInfo["Priority"], datasetSubInfo["Priority"])
                elif datasetSubInfo:
                    subInfo = datasetSubInfo

            # We now have aggregated subscription information for this dataset in subInfo
            # Distribute it by site
            if not subInfo:
                #Nothing to do, log and continue
                msg = "No subscriptions configured for dataset %s" % dataset
                logging.warning(msg)
                self.markSubscribed.execute(dataset, subscribed = self.terminalSubscriptionState,
                                            conn = myThread.transaction.conn,
                                            transaction = True)
                continue
            # Make sure that a site is not configured both as non custodial and custodial
            # Non-custodial is believed to be the right choice
            subInfo["CustodialSites"] = list(set(subInfo["CustodialSites"]) - set(subInfo["NonCustodialSites"]))
            for site in subInfo["CustodialSites"]:
                if site not in siteMap:
                    siteMap[site] = {}
                if self.safeMode and dataset not in partiallySubscribedSet:
                    tupleKey = (subInfo["Priority"], True, False, False)
                else:
                    tupleKey = (subInfo["Priority"], True, False, True)
                if tupleKey not in siteMap[site]:
                    siteMap[site][tupleKey] = []
                siteMap[site][tupleKey].append(dataset)

            # If we are in safe mode and this is a partially subscribed dataset,
            # then the non-custodial were done in a previous cycle
            if self.safeMode and dataset in partiallySubscribedSet:
                self.markSubscribed.execute(dataset, subscribed = self.terminalSubscriptionState,
                                            conn = myThread.transaction.conn,
                                            transaction = True)
                continue

            for site in subInfo["NonCustodialSites"]:
                if site not in siteMap:
                    siteMap[site] = {}
                autoApprove = False
                if site in subInfo["AutoApproveSites"]:
                    autoApprove = True
                tupleKey = (subInfo["Priority"], False, autoApprove)
                if tupleKey not in siteMap[site]:
                    siteMap[site][tupleKey] = []
                siteMap[site][tupleKey].append(dataset)

            self.markSubscribed.execute(dataset, subscribed = 1,
                                        conn = myThread.transaction.conn,
                                        transaction = True)

        # Actually request the subscriptions
        for site in siteMap:
            # Check that the site is valid
            if site not in self.cmsToPhedexMap:
                msg = "Site %s doesn't appear to be valid to PhEDEx" % site
                logging.error(msg)
                self.sendAlert(7, msg = msg)
                continue
            for subscriptionFlavor in siteMap[site]:
                datasets = siteMap[site][subscriptionFlavor]
                # Check that the site is valid
                if "MSS" in self.cmsToPhedexMap[site]:
                    phedexNode = self.cmsToPhedexMap[site]["MSS"]
                else:
                    phedexNode = self.cmsToPhedexMap[site]["Disk"]
                logging.info("Subscribing %s to %s" % (datasets, site))
                options = {"custodial" : "n", "requestOnly" : "y",
                           "priority" : subscriptionFlavor[0].lower(),
                           "move" : "n"}
                if subscriptionFlavor[1]:
                    options["custodial"] = "y"
                    if subscriptionFlavor[3]:
                        options["move"] = "y"
                if subscriptionFlavor[2]:
                    options["requestOnly"] = "n"

                newSubscription = PhEDExSubscription(datasets, phedexNode, self.group,
                                                     **options)

                xmlData = XMLDrop.makePhEDExXMLForDatasets(self.dbsUrl,
                                                           newSubscription.getDatasetPaths())
                logging.debug(str(xmlData))
                self.phedex.subscribe(newSubscription, xmlData)

        myThread.transaction.commit()
        return
    def subscribeDatasets(self):
        """
        _subscribeDatasets_
        Poll the database for datasets and subscribe them.
        """
        logging.info("Starting subscribeDatasets method")

        # Check for completely unsubscribed datasets
        unsubscribedDatasets = self.getUnsubscribed.execute()

        # Keep a list of subscriptions to tick as subscribed in the database
        subscriptionsMade = []

        # Create a list of subscriptions as defined by the PhEDEx data structures
        subs = SubscriptionList()

        # Create the subscription objects and add them to the list
        # The list takes care of the sorting internally
        for subInfo in unsubscribedDatasets:
            site = subInfo['site']

            if site not in self.phedexNodes['MSS'] and site not in self.phedexNodes['Disk']:
                msg = "Site %s doesn't appear to be valid to PhEDEx, " % site
                msg += "skipping subscription: %s" % subInfo['id']
                logging.error(msg)
                self.sendAlert(7, msg = msg)
                continue

            # Avoid custodial subscriptions to disk nodes
            if site not in self.phedexNodes['MSS']: 
                subInfo['custodial'] = 'n'
            # Avoid auto approval in T1 sites
            elif site.startswith("T1"):
                subInfo['request_only'] = 'y'

            phedexSub = PhEDExSubscription(subInfo['path'], site, subInfo['phedex_group'],
                                           priority = subInfo['priority'],
                                           move = subInfo['move'],
                                           custodial = subInfo['custodial'],
                                           request_only = subInfo['request_only'],
                                           subscriptionId = subInfo['id'])

            # Check if the subscription is a duplicate
            if phedexSub.matchesExistingSubscription(self.phedex) or \
                phedexSub.matchesExistingTransferRequest(self.phedex):
                subscriptionsMade.append(subInfo['id'])
                continue

            # Add it to the list
            subs.addSubscription(phedexSub)

        # Compact the subscriptions
        subs.compact()

        for subscription in subs.getSubscriptionList():

            xmlData = XMLDrop.makePhEDExXMLForDatasets(self.dbsUrl, subscription.getDatasetPaths())
            logging.debug("subscribeDatasets XMLData: %s" , xmlData)

            logging.info("Subscribing: %s to %s, with options: Move: %s, Custodial: %s, Request Only: %s",
                         subscription.getDatasetPaths(),
                         subscription.getNodes(),
                         subscription.move,
                         subscription.custodial,
                         subscription.request_only)

            try:
                self.phedex.subscribe(subscription, xmlData)
            except HTTPException as ex:
                logging.error("PhEDEx dataset subscribe failed with HTTPException: %s %s", ex.status, ex.result)
            except Exception as ex:
                logging.error("PhEDEx dataset subscribe failed with Exception: %s", str(ex))
                logging.debug("Traceback: %s", str(traceback.format_exc()))
            else:
                subscriptionsMade.extend(subscription.getSubscriptionIds())

        # Register the result in DBSBuffer
        if subscriptionsMade:
            self.markSubscribed.execute(subscriptionsMade)

        return
Ejemplo n.º 18
0
    def algorithm(self, parameters):
        """
        _algorithm_

        Poll the database for datasets and subscribe them.
        """
        myThread = threading.currentThread()
        myThread.transaction.begin()

        # Check for completely unsubscribed datasets
        unsubscribedDatasets = self.getUnsubscribed.execute(
            conn=myThread.transaction.conn, transaction=True)

        if self.safeMode:
            partiallySubscribedDatasets = self.getPartiallySubscribed.execute(
                conn=myThread.transaction.conn, transaction=True)
            unsubscribedDatasets.extend(partiallySubscribedDatasets)
            partiallySubscribedSet = set()
            for entry in partiallySubscribedDatasets:
                partiallySubscribedSet.add(entry["path"])

        # Map the datasets to their specs
        specDatasetMap = {}
        for unsubscribedDataset in unsubscribedDatasets:
            datasetPath = unsubscribedDataset["path"]
            workflow = unsubscribedDataset["workflow"]
            spec = unsubscribedDataset["spec"]

            if datasetPath not in specDatasetMap:
                specDatasetMap[datasetPath] = []
            specDatasetMap[datasetPath].append({
                "workflow": workflow,
                "spec": spec
            })

        specCache = {}
        siteMap = {}
        # Distribute the subscriptions by site, type and priority
        # This is to make as few subscriptions as possible
        # Site map values are dictionaries where the keys are tuples (Prio, Custodial, AutoApprove, Move)
        # Where Custodial is boolean, Prio is in ["Low", "Normal", "High"], AutoApprove is boolean and Move is boolean
        for dataset in specDatasetMap:
            # Aggregate all the different subscription configurations
            subInfo = {}
            for entry in specDatasetMap[dataset]:
                if not entry["spec"]:
                    # Can't use this spec, there isn't one
                    continue
                # Load spec if not in the cache
                if entry["spec"] not in specCache:
                    helper = WMWorkloadHelper()
                    try:
                        helper.load(entry["spec"])
                        specCache[entry["spec"]] = helper
                    except Exception:
                        #Couldn't load it , alert and carry on
                        msg = "Couldn't load spec: %s" % entry["spec"]
                        logging.error(msg)
                        self.sendAlert(7, msg=msg)
                        continue
                #If we are running in safe mode, we need to know if the workflow is ready
                # We have the spec, get the info
                helper = specCache[entry["spec"]]
                workflowSubInfo = helper.getSubscriptionInformation()
                datasetSubInfo = workflowSubInfo.get(dataset, None)
                if datasetSubInfo and subInfo:
                    subInfo["CustodialSites"] = extendWithoutDups(
                        subInfo["CustodialSites"],
                        datasetSubInfo["CustodialSites"])
                    subInfo["NonCustodialSites"] = extendWithoutDups(
                        subInfo["NonCustodialSites"],
                        datasetSubInfo["NonCustodialSites"])
                    subInfo["AutoApproveSites"] = extendWithoutDups(
                        subInfo["AutoApproveSites"],
                        datasetSubInfo["AutoApproveSites"])
                    subInfo["Priority"] = solvePrioConflicts(
                        subInfo["Priority"], datasetSubInfo["Priority"])
                elif datasetSubInfo:
                    subInfo = datasetSubInfo

            # We now have aggregated subscription information for this dataset in subInfo
            # Distribute it by site
            if not subInfo:
                #Nothing to do, log and continue
                msg = "No subscriptions configured for dataset %s" % dataset
                logging.warning(msg)
                self.markSubscribed.execute(
                    dataset,
                    subscribed=self.terminalSubscriptionState,
                    conn=myThread.transaction.conn,
                    transaction=True)
                continue
            # Make sure that a site is not configured both as non custodial and custodial
            # Non-custodial is believed to be the right choice
            subInfo["CustodialSites"] = list(
                set(subInfo["CustodialSites"]) -
                set(subInfo["NonCustodialSites"]))
            for site in subInfo["CustodialSites"]:
                if site not in siteMap:
                    siteMap[site] = {}
                autoApprove = False
                if site in subInfo["AutoApproveSites"]:
                    autoApprove = True
                if self.safeMode and dataset not in partiallySubscribedSet:
                    tupleKey = (subInfo["Priority"], True, autoApprove, False)
                else:
                    tupleKey = (subInfo["Priority"], True, autoApprove, True)
                if tupleKey not in siteMap[site]:
                    siteMap[site][tupleKey] = []
                # Subscriptions are sorted by options, defined by tupleKey
                # The tuple key has 3 or 4 entries in this order
                # Priority, Custodial, Auto approve, Move (True) or Replica (False)
                siteMap[site][tupleKey].append(dataset)

            # If we are in safe mode and this is a partially subscribed dataset,
            # then the non-custodial were done in a previous cycle
            if self.safeMode and dataset in partiallySubscribedSet:
                self.markSubscribed.execute(
                    dataset,
                    subscribed=self.terminalSubscriptionState,
                    conn=myThread.transaction.conn,
                    transaction=True)
                continue

            for site in subInfo["NonCustodialSites"]:
                if site not in siteMap:
                    siteMap[site] = {}
                autoApprove = False
                if site in subInfo["AutoApproveSites"]:
                    autoApprove = True
                # Non-custodial is never move, so this tuple has only 3 entries
                # TODO: Change tuples to frozensets for clarity
                tupleKey = (subInfo["Priority"], False, autoApprove)
                if tupleKey not in siteMap[site]:
                    siteMap[site][tupleKey] = []
                siteMap[site][tupleKey].append(dataset)

            self.markSubscribed.execute(dataset,
                                        subscribed=1,
                                        conn=myThread.transaction.conn,
                                        transaction=True)

        # Actually request the subscriptions
        for site in siteMap:
            # Check that the site is valid
            if site not in self.cmsToPhedexMap:
                msg = "Site %s doesn't appear to be valid to PhEDEx" % site
                logging.error(msg)
                self.sendAlert(7, msg=msg)
                continue
            for subscriptionFlavor in siteMap[site]:
                datasets = siteMap[site][subscriptionFlavor]
                # Check that the site is valid
                isMSS = False
                if "MSS" in self.cmsToPhedexMap[site]:
                    isMSS = True
                    phedexNode = self.cmsToPhedexMap[site]["MSS"]
                else:
                    phedexNode = self.cmsToPhedexMap[site]["Disk"]
                logging.info("Subscribing %s to %s" % (datasets, site))
                options = {
                    "custodial": "n",
                    "requestOnly": "y",
                    "priority": subscriptionFlavor[0].lower(),
                    "move": "n"
                }
                if subscriptionFlavor[1] and isMSS:
                    # Custodial subscriptions are only allowed in MSS nodes
                    # If custodial is requested on Non-MSS it fallsback to a non-custodial subscription
                    options["custodial"] = "y"
                    if subscriptionFlavor[3] and not self.replicaOnly:
                        options["move"] = "y"
                if subscriptionFlavor[2]:
                    options["requestOnly"] = "n"
                logging.info(
                    "Request options: Custodial - %s, Move - %s, Request Only - %s"
                    % (options["custodial"].upper(), options["move"].upper(),
                       options["requestOnly"].upper()))
                newSubscription = PhEDExSubscription(datasets, phedexNode,
                                                     self.group, **options)

                xmlData = XMLDrop.makePhEDExXMLForDatasets(
                    self.dbsUrl, newSubscription.getDatasetPaths())
                logging.debug(str(xmlData))
                self.phedex.subscribe(newSubscription, xmlData)

        myThread.transaction.commit()
        return