Python PhEDEx.delete Examples

Programming Language: Python

Namespace/Package Name: WMCore.Services.PhEDEx.PhEDEx

Class/Type: PhEDEx

Method/Function: delete

Examples at hotexamples.com: 4

Python PhEDEx.delete - 4 examples found. These are the top rated real world Python examples of WMCore.Services.PhEDEx.PhEDEx.PhEDEx.delete extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

PhEDEx(30)

getNodeMap(19)

getPFN(10)

subscribe(8)

injectBlocks(7)

getReplicaPhEDExNodesForBlocks(7)

getReplicaInfoForBlocks(6)

getSubscriptionMapping(5)

getNodeSE(4)

getNodeTFC(4)

cacheFileName(4)

getInjectedFiles(3)

getBestNodeName(3)

getNodeNames(2)

_getResult(2)

delete(2)

subscriptions(2)

updateRequest(2)

getGroupUsage(1)

getReplicaInfoForFiles(1)

getReplicaSEForBlocks(1)

getAuth(1)

Example #1

Show file

File: commandRunner.py Project: dballesteros7/dev-scripts

def phedex():
    phedexIn = PhEDEx(dict = {'endpoint' : 'https://cmsweb.cern.ch/phedex/datasvc/json/dev/',
                              'logger' : logging},
                    responseType = "json")
#     requests =  phedex.getRequestList(dataset = ['/TauParked/Run2012C-LogError-22Jan2013-v1/RAW-RECO'],
#                           node = 'T2_RU_ITEP')['phedex']['request']
#     for request in requests:
#         requestId = request['id']
#         request =  phedex.getTransferRequests(request = requestId)['phedex']['request']
#         if request:
#             request = request[0]
#             print request
#    x = PhEDExSubscription('/TauParked/Run2012C-22Jan2013-v1/AOD',
#                           'T1_US_FNAL_MSS', 'DataOps', 'dataset', 'low', 'n', 'n', 'n', 'y', subscriptionId = 1)
#   print x.matchesExistingTransferRequest(phedex)
#    print x.matchesExistingSubscription(phedex)

    deletion = PhEDExDeletion('/071103be-7d80-11e0-90de-00163e010039/PromptReco-v1/RECO',
                              'T1_CH_CERN_Buffer',
                              level = 'block',
                              comments = 'Blocks automatically deleted from T2_CH_CERN as it has already been processed and transferred to a custodial location',
                              blocks = {'/071103be-7d80-11e0-90de-00163e010039/PromptReco-v1/RECO' : 
                                        ['/071103be-7d80-11e0-90de-00163e010039/PromptReco-v1/RECO#075ea9e8-7d80-11e0-90de-00163e010039']})
    xmlData = XMLDrop.makePhEDExXMLForBlocks('http://vocms09.cern.ch:8880/cms_dbs_int_local_yy_writer/servlet/DBSServlet',
                                             deletion.getDatasetsAndBlocks())
    print str(xmlData)
    response = phedexIn.delete(deletion, xmlData)
    print response
    requestId = response['phedex']['request_created'][0]['id']
    phedexIn.updateRequest(requestId, 'approve', 'T1_CH_CERN_Buffer')

Example #2

Show file

File: PhEDExInjectorPoller.py Project: prozober/WMCore

class PhEDExInjectorPoller(BaseWorkerThread):
    """
    _PhEDExInjectorPoller_

    Poll the DBSBuffer database and inject files as they are created.
    """
    def __init__(self, config):
        """
        ___init___

        Initialise class members
        """
        BaseWorkerThread.__init__(self)
        self.dbsUrl = config.DBSInterface.globalDBSUrl

        self.pollCounter = 0
        self.subFrequency = None
        if getattr(config.PhEDExInjector, "subscribeDatasets", False):
            pollInterval = config.PhEDExInjector.pollInterval
            subInterval = config.PhEDExInjector.subscribeInterval
            self.subFrequency = max(1, int(round(subInterval / pollInterval)))
            logging.info(
                "SubscribeDataset and deleteBlocks will run every %d polling cycles",
                self.subFrequency)
            # subscribe on first cycle
            self.pollCounter = self.subFrequency - 1

        # retrieving the node mappings is fickle and can fail quite often
        self.phedex = PhEDEx({"endpoint": config.PhEDExInjector.phedexurl},
                             "json")
        try:
            nodeMappings = self.phedex.getNodeMap()
        except:
            time.sleep(2)
            try:
                nodeMappings = self.phedex.getNodeMap()
            except:
                time.sleep(4)
                nodeMappings = self.phedex.getNodeMap()

        # This will be used to map SE names which are stored in the DBSBuffer to
        # PhEDEx node names.  The first key will be the "kind" which consists
        # of one of the following: MSS, Disk, Buffer.  The next key will be the
        # SE name.
        self.seMap = {}
        self.nodeNames = []
        for node in nodeMappings["phedex"]["node"]:
            if node["kind"] not in self.seMap:
                self.seMap[node["kind"]] = {}
            logging.info("Adding mapping %s -> %s", node["se"], node["name"])
            self.seMap[node["kind"]][node["se"]] = node["name"]
            self.nodeNames.append(node["name"])

        self.phedexNodes = {'MSS': [], 'Disk': []}
        for node in nodeMappings["phedex"]["node"]:
            if node["kind"] in ["MSS", "Disk"]:
                self.phedexNodes[node["kind"]].append(node["name"])

        # initialize the alert framework (if available - config.Alert present)
        #    self.sendAlert will be then be available
        self.initAlerts(compName="PhEDExInjector")

        self.blocksToRecover = []

        return

    def setup(self, parameters):
        """
        _setup_

        Create DAO Factory and setup some DAO.
        """
        myThread = threading.currentThread()
        daofactory = DAOFactory(package="WMComponent.PhEDExInjector.Database",
                                logger=self.logger,
                                dbinterface=myThread.dbi)

        self.getUninjected = daofactory(classname="GetUninjectedFiles")
        self.getMigrated = daofactory(classname="GetMigratedBlocks")

        self.findDeletableBlocks = daofactory(classname="GetDeletableBlocks")
        self.markBlocksDeleted = daofactory(classname="MarkBlocksDeleted")
        self.getUnsubscribed = daofactory(classname="GetUnsubscribedDatasets")
        self.markSubscribed = daofactory(classname="MarkDatasetSubscribed")

        daofactory = DAOFactory(package="WMComponent.DBS3Buffer",
                                logger=self.logger,
                                dbinterface=myThread.dbi)
        self.setStatus = daofactory(classname="DBSBufferFiles.SetPhEDExStatus")
        self.setBlockClosed = daofactory(classname="SetBlockClosed")

        return

    def algorithm(self, parameters):
        """
        _algorithm_

        Poll the database for uninjected files and attempt to inject them into
        PhEDEx.
        """
        logging.info("Running PhEDEx injector poller algorithm...")

        self.pollCounter += 1

        if self.blocksToRecover:
            logging.info("""PhEDExInjector Recovery:
                            previous injection call failed,
                            check if files were injected to PhEDEx anyway""")
            self.recoverInjectedFiles()

        self.injectFiles()
        self.closeBlocks()

        if self.pollCounter == self.subFrequency:
            self.pollCounter = 0
            self.deleteBlocks()
            self.subscribeDatasets()

        return

    def createInjectionSpec(self, injectionData):
        """
        _createInjectionSpec_

        Transform the data structure returned from the database into an XML
        string for the PhEDEx Data Service.  The injectionData parameter must be
        a dictionary keyed by dataset path.  Each dataset path will map to a
        list of blocks, each block being a dict.  The block dicts will have
        three keys: name, is-open and files.  The files key will be a list of
        dicts, each of which have the following keys: lfn, size and checksum.
        The following is an example object:

        {"dataset1":
          {"block1": {"is-open": "y", "files":
            [{"lfn": "lfn1", "size": 10, "checksum": {"cksum": "1234"}},
             {"lfn": "lfn2", "size": 20, "checksum": {"cksum": "4321"}}]}}}
        """
        injectionSpec = XMLDrop.XMLInjectionSpec(self.dbsUrl)

        for datasetPath in injectionData:
            datasetSpec = injectionSpec.getDataset(datasetPath)

            for fileBlockName, fileBlock in injectionData[
                    datasetPath].iteritems():
                blockSpec = datasetSpec.getFileblock(fileBlockName,
                                                     fileBlock["is-open"])

                for f in fileBlock["files"]:
                    blockSpec.addFile(f["lfn"], f["checksum"], f["size"])

        return injectionSpec.save()

    def createRecoveryFileFormat(self, unInjectedData):
        """
        _createRecoveryFileFormat_

        Transform the data structure returned from database in to the dict format
        for the PhEDEx Data Service.  The injectionData parameter must be
        a dictionary keyed by dataset path.  
        
        unInjectedData format
        {"dataset1":
          {"block1": {"is-open": "y", "files":
            [{"lfn": "lfn1", "size": 10, "checksum": {"cksum": "1234"}},
             {"lfn": "lfn2", "size": 20, "checksum": {"cksum": "4321"}}]}}}
        
        returns
        [{"block1": set(["lfn1", "lfn2"])}, {"block2": set(["lfn3", "lfn4"])]
        """
        blocks = []
        for datasetPath in unInjectedData:

            for blockName, fileBlock in unInjectedData[datasetPath].items():

                newBlock = {blockName: set()}

                for fileDict in fileBlock["files"]:
                    newBlock[blockName].add(fileDict["lfn"])

                blocks.append(newBlock)

        return blocks

    def injectFiles(self):
        """
        _injectFiles_

        Inject any uninjected files in PhEDEx.
        """
        logging.info("Starting injectFiles method")

        uninjectedFiles = self.getUninjected.execute()

        for siteName in uninjectedFiles.keys():
            # SE names can be stored in DBSBuffer as that is what is returned in
            # the framework job report.  We'll try to map the SE name to a
            # PhEDEx node name here.
            location = None

            if siteName in self.nodeNames:
                location = siteName
            else:
                if "Buffer" in self.seMap and siteName in self.seMap["Buffer"]:
                    location = self.seMap["Buffer"][siteName]
                elif "MSS" in self.seMap and siteName in self.seMap["MSS"]:
                    location = self.seMap["MSS"][siteName]
                elif "Disk" in self.seMap and siteName in self.seMap["Disk"]:
                    location = self.seMap["Disk"][siteName]

            if location == None:
                msg = "Could not map SE %s to PhEDEx node." % siteName
                logging.error(msg)
                self.sendAlert(7, msg=msg)
                continue

            maxDataset = 20
            maxBlocks = 50
            maxFiles = 5000
            numberDatasets = 0
            numberBlocks = 0
            numberFiles = 0
            injectData = {}
            lfnList = []
            for dataset in uninjectedFiles[siteName]:

                numberDatasets += 1
                injectData[dataset] = uninjectedFiles[siteName][dataset]

                for block in injectData[dataset]:
                    numberBlocks += 1
                    numberFiles += len(injectData[dataset][block]['files'])
                    for fileInfo in injectData[dataset][block]['files']:
                        lfnList.append(fileInfo['lfn'])

                if numberDatasets >= maxDataset or numberBlocks >= maxBlocks or numberFiles >= maxFiles:

                    self.injectFilesPhEDExCall(location, injectData, lfnList)
                    numberDatasets = 0
                    numberBlocks = 0
                    numberFiles = 0
                    injectData = {}
                    lfnList = []

            if injectData:
                self.injectFilesPhEDExCall(location, injectData, lfnList)

        return

    def injectFilesPhEDExCall(self, location, injectData, lfnList):
        """
        _injectFilesPhEDExCall_

        actual PhEDEx call for file injection
        """
        xmlData = self.createInjectionSpec(injectData)
        logging.debug("injectFiles XMLData: %s", xmlData)

        try:
            injectRes = self.phedex.injectBlocks(location, xmlData)
        except HTTPException as ex:
            # HTTPException with status 400 assumed to be duplicate injection
            # trigger later block recovery (investgation needed if not the case)
            if ex.status == 400:
                self.blocksToRecover.extend(
                    self.createRecoveryFileFormat(injectData))
            logging.error(
                "PhEDEx file injection failed with HTTPException: %s %s",
                ex.status, ex.result)
        except Exception as ex:
            logging.error("PhEDEx file injection failed with Exception: %s",
                          str(ex))
            logging.debug("Traceback: %s", str(traceback.format_exc()))
        else:
            logging.info("Injection result: %s", injectRes)

            if "error" in injectRes:
                msg = "Error injecting data %s: %s" % (injectData,
                                                       injectRes["error"])
                logging.error(msg)
                self.sendAlert(6, msg=msg)
            else:
                try:
                    self.setStatus.execute(lfnList, 1)
                except:
                    # possible deadlock with DBS3Upload, retry once after 5s
                    logging.warning(
                        "Oracle exception during file status update, possible deadlock due to race condition, retry after 5s sleep"
                    )
                    time.sleep(5)
                    self.setStatus.execute(lfnList, 1)

        return

    def closeBlocks(self):
        """
        _closeBlocks_

        Close any blocks that have been migrated to global DBS
        """
        logging.info("Starting closeBlocks method")

        migratedBlocks = self.getMigrated.execute()

        for siteName in migratedBlocks.keys():
            # SE names can be stored in DBSBuffer as that is what is returned in
            # the framework job report.  We'll try to map the SE name to a
            # PhEDEx node name here.
            location = None

            if siteName in self.nodeNames:
                location = siteName
            else:
                if "Buffer" in self.seMap and siteName in self.seMap["Buffer"]:
                    location = self.seMap["Buffer"][siteName]
                elif "MSS" in self.seMap and siteName in self.seMap["MSS"]:
                    location = self.seMap["MSS"][siteName]
                elif "Disk" in self.seMap and siteName in self.seMap["Disk"]:
                    location = self.seMap["Disk"][siteName]

            if location == None:
                msg = "Could not map SE %s to PhEDEx node." % siteName
                logging.error(msg)
                self.sendAlert(6, msg=msg)
                continue

            xmlData = self.createInjectionSpec(migratedBlocks[siteName])
            logging.debug("closeBlocks XMLData: %s", xmlData)

            try:
                injectRes = self.phedex.injectBlocks(location, xmlData)
            except HTTPException as ex:
                logging.error(
                    "PhEDEx block close failed with HTTPException: %s %s",
                    ex.status, ex.result)
            except Exception as ex:
                logging.error("PhEDEx block close failed with Exception: %s",
                              str(ex))
                logging.debug("Traceback: %s", str(traceback.format_exc()))
            else:
                logging.info("Block closing result: %s", injectRes)

                if "error" not in injectRes:
                    for datasetName in migratedBlocks[siteName]:
                        for blockName in migratedBlocks[siteName][datasetName]:
                            logging.debug("Closing block %s", blockName)
                            self.setBlockClosed.execute(blockName)
                else:
                    msg = "Error injecting data %s: %s" % (
                        migratedBlocks[siteName], injectRes["error"])
                    logging.error(msg)
                    self.sendAlert(6, msg=msg)

        return

    def recoverInjectedFiles(self):
        """
        When PhEDEx inject call timed out, run this function.
        Since there are 3 min reponse time out in cmsweb, some times 
        PhEDEx injection call times out even though the call succeeded
        In that case run the recovery mode
        1. first check whether files which injection status = 0 are in the PhEDEx.
        2. if those file exist set the in_phedex status to 1
        3. set self.blocksToRecover = []

        Run this recovery one block at a time, with too many blocks
        the call to the PhEDEx data service on cmsweb can time out
        """
        # recover one block at a time
        for block in self.blocksToRecover:

            injectedFiles = self.phedex.getInjectedFiles(block)

            if injectedFiles:
                self.setStatus.execute(injectedFiles, 1)

        self.blocksToRecover = []
        return

    def deleteBlocks(self):
        """
        _deleteBlocks_
        Find deletable blocks, then decide if to delete based on:
        Is there an active subscription for dataset or block ?
          If yes => set deleted=2
          If no => next check
        Has transfer to all destinations finished ?
          If yes => request block deletion, approve request, set deleted=1
          If no => do nothing (check again next cycle)
        """
        logging.info("Starting deleteBlocks method")

        blockDict = self.findDeletableBlocks.execute(transaction=False)

        if not blockDict:
            return

        try:
            subscriptions = self.phedex.getSubscriptionMapping(
                *blockDict.keys())
        except:
            logging.error(
                "Couldn't get subscription info from PhEDEx, retry next cycle")
            return

        skippableBlocks = []
        deletableEntries = {}
        for blockName in blockDict:

            location = blockDict[blockName]['location']

            # should never be triggered, better safe than sorry
            if location.endswith('_MSS'):
                logging.debug("Location %s for block %s is MSS, skip deletion",
                              location, blockName)
                skippableBlocks.append(blockName)
                continue

            dataset = blockDict[blockName]['dataset']
            sites = blockDict[blockName]['sites']

            if blockName in subscriptions and location in subscriptions[
                    blockName]:
                logging.debug("Block %s subscribed to %s, skip deletion",
                              blockName, location)
                binds = {'DELETED': 2, 'BLOCKNAME': blockName}
                self.markBlocksDeleted.execute(binds)
            else:
                blockInfo = []
                try:
                    blockInfo = self.phedex.getReplicaInfoForBlocks(
                        block=blockName, complete='y')['phedex']['block']
                except:
                    logging.error(
                        "Couldn't get block info from PhEDEx, retry next cycle"
                    )
                else:
                    for entry in blockInfo:
                        if entry['name'] == blockName:
                            nodes = set([x['node'] for x in entry['replica']])
                            if location not in nodes:
                                logging.debug(
                                    "Block %s not present on %s, mark as deleted",
                                    blockName, location)
                                binds = {'DELETED': 1, 'BLOCKNAME': blockName}
                                self.markBlocksDeleted.execute(binds)
                            elif sites.issubset(nodes):
                                logging.debug(
                                    "Deleting block %s from %s since it is fully transfered",
                                    blockName, location)
                                if location not in deletableEntries:
                                    deletableEntries[location] = {}
                                if dataset not in deletableEntries[location]:
                                    deletableEntries[location][dataset] = set()
                                    deletableEntries[location][dataset].add(
                                        blockName)

        binds = []
        for blockName in skippableBlocks:
            binds.append({'DELETED': 2, 'BLOCKNAME': blockName})
        if binds:
            self.markBlocksDeleted.execute(binds)

        for location in deletableEntries:

            chunkSize = 100
            numberOfBlocks = 0
            blocksToDelete = {}
            for dataset in deletableEntries[location]:

                blocksToDelete[dataset] = deletableEntries[location][dataset]
                numberOfBlocks += len(blocksToDelete[dataset])

                if numberOfBlocks > chunkSize:

                    self.deleteBlocksPhEDExCalls(location, blocksToDelete)
                    numberOfBlocks = 0
                    blocksToDelete = {}

            self.deleteBlocksPhEDExCalls(location, blocksToDelete)

        return

    def deleteBlocksPhEDExCalls(self, location, blocksToDelete):
        """
        _deleteBlocksPhEDExCalls_
        actual PhEDEx calls for block deletion
        """
        deletion = PhEDExDeletion(
            blocksToDelete.keys(),
            location,
            level='block',
            comments="WMAgent blocks auto-delete from %s" % location,
            blocks=blocksToDelete)

        xmlData = XMLDrop.makePhEDExXMLForBlocks(
            self.dbsUrl, deletion.getDatasetsAndBlocks())
        logging.debug("deleteBlocks XMLData: %s", xmlData)

        try:
            response = self.phedex.delete(deletion, xmlData)
            requestId = response['phedex']['request_created'][0]['id']
            # auto-approve deletion request
            self.phedex.updateRequest(requestId, 'approve', location)
        except HTTPException as ex:
            logging.error(
                "PhEDEx block delete/approval failed with HTTPException: %s %s",
                ex.status, ex.result)
        except Exception as ex:
            logging.error(
                "PhEDEx block delete/approval failed with Exception: %s",
                str(ex))
            logging.debug("Traceback: %s", str(traceback.format_exc()))
        else:
            binds = []
            for dataset in blocksToDelete:
                for blockName in blocksToDelete[dataset]:
                    binds.append({'DELETED': 1, 'BLOCKNAME': blockName})
            self.markBlocksDeleted.execute(binds)

        return

    def subscribeDatasets(self):
        """
        _subscribeDatasets_
        Poll the database for datasets and subscribe them.
        """
        logging.info("Starting subscribeDatasets method")

        # Check for completely unsubscribed datasets
        unsubscribedDatasets = self.getUnsubscribed.execute()

        # Keep a list of subscriptions to tick as subscribed in the database
        subscriptionsMade = []

        # Create a list of subscriptions as defined by the PhEDEx data structures
        subs = SubscriptionList()

        # Create the subscription objects and add them to the list
        # The list takes care of the sorting internally
        for subInfo in unsubscribedDatasets:
            site = subInfo['site']

            if site not in self.phedexNodes[
                    'MSS'] and site not in self.phedexNodes['Disk']:
                msg = "Site %s doesn't appear to be valid to PhEDEx, " % site
                msg += "skipping subscription: %s" % subInfo['id']
                logging.error(msg)
                self.sendAlert(7, msg=msg)
                continue

            # Avoid custodial subscriptions to disk nodes
            if site not in self.phedexNodes['MSS']:
                subInfo['custodial'] = 'n'
            # Avoid auto approval in T1 sites
            elif site.startswith("T1"):
                subInfo['request_only'] = 'y'

            phedexSub = PhEDExSubscription(
                subInfo['path'],
                site,
                subInfo['phedex_group'],
                priority=subInfo['priority'],
                move=subInfo['move'],
                custodial=subInfo['custodial'],
                request_only=subInfo['request_only'],
                subscriptionId=subInfo['id'])

            # Check if the subscription is a duplicate
            if phedexSub.matchesExistingSubscription(self.phedex) or \
                phedexSub.matchesExistingTransferRequest(self.phedex):
                subscriptionsMade.append(subInfo['id'])
                continue

            # Add it to the list
            subs.addSubscription(phedexSub)

        # Compact the subscriptions
        subs.compact()

        for subscription in subs.getSubscriptionList():

            xmlData = XMLDrop.makePhEDExXMLForDatasets(
                self.dbsUrl, subscription.getDatasetPaths())
            logging.debug("subscribeDatasets XMLData: %s", xmlData)

            logging.info(
                "Subscribing: %s to %s, with options: Move: %s, Custodial: %s, Request Only: %s",
                subscription.getDatasetPaths(), subscription.getNodes(),
                subscription.move, subscription.custodial,
                subscription.request_only)

            try:
                self.phedex.subscribe(subscription, xmlData)
            except HTTPException as ex:
                logging.error(
                    "PhEDEx dataset subscribe failed with HTTPException: %s %s",
                    ex.status, ex.result)
            except Exception as ex:
                logging.error(
                    "PhEDEx dataset subscribe failed with Exception: %s",
                    str(ex))
                logging.debug("Traceback: %s", str(traceback.format_exc()))
            else:
                subscriptionsMade.extend(subscription.getSubscriptionIds())

        # Register the result in DBSBuffer
        if subscriptionsMade:
            self.markSubscribed.execute(subscriptionsMade)

        return

Example #3

Show file

File: PhEDExInjectorPoller.py Project: AndresTanasijczuk/WMCore

class PhEDExInjectorPoller(BaseWorkerThread):
    """
    _PhEDExInjectorPoller_

    Poll the DBSBuffer database and inject files as they are created.
    """
    def __init__(self, config):
        """
        ___init___

        Initialise class members
        """
        BaseWorkerThread.__init__(self)
        self.dbsUrl = config.DBSInterface.globalDBSUrl

        self.pollCounter = 0
        self.subFrequency = None
        if getattr(config.PhEDExInjector, "subscribeDatasets", False):
            pollInterval = config.PhEDExInjector.pollInterval
            subInterval = config.PhEDExInjector.subscribeInterval
            self.subFrequency = max(1, int(round(subInterval/pollInterval)))
            logging.info("SubscribeDataset and deleteBlocks will run every %d polling cycles", self.subFrequency)
            # subscribe on first cycle
            self.pollCounter = self.subFrequency - 1

        # retrieving the node mappings is fickle and can fail quite often
        self.phedex = PhEDEx({"endpoint": config.PhEDExInjector.phedexurl}, "json")
        try:
            nodeMappings = self.phedex.getNodeMap()
        except:
            time.sleep(2)
            try:
                nodeMappings = self.phedex.getNodeMap()
            except:
                time.sleep(4)
                nodeMappings = self.phedex.getNodeMap()

        # This will be used to map SE names which are stored in the DBSBuffer to
        # PhEDEx node names.  The first key will be the "kind" which consists
        # of one of the following: MSS, Disk, Buffer.  The next key will be the
        # SE name.
        self.seMap = {}
        self.nodeNames = []
        for node in nodeMappings["phedex"]["node"]:
            if node["kind"] not in self.seMap:
                self.seMap[node["kind"]] = {}
            logging.info("Adding mapping %s -> %s", node["se"], node["name"])
            self.seMap[node["kind"]][node["se"]] = node["name"]
            self.nodeNames.append(node["name"])

        self.phedexNodes = {'MSS': [], 'Disk': []}
        for node in nodeMappings["phedex"]["node"]:
            if node["kind"] in ["MSS", "Disk"]:
                self.phedexNodes[node["kind"]].append(node["name"])

        # initialize the alert framework (if available - config.Alert present)
        #    self.sendAlert will be then be available
        self.initAlerts(compName = "PhEDExInjector")

        self.blocksToRecover = []

        return

    def setup(self, parameters):
        """
        _setup_

        Create DAO Factory and setup some DAO.
        """
        myThread = threading.currentThread()
        daofactory = DAOFactory(package = "WMComponent.PhEDExInjector.Database",
                                logger = self.logger,
                                dbinterface = myThread.dbi)

        self.getUninjected = daofactory(classname = "GetUninjectedFiles")
        self.getMigrated = daofactory(classname = "GetMigratedBlocks")

        self.findDeletableBlocks = daofactory(classname = "GetDeletableBlocks")
        self.markBlocksDeleted = daofactory(classname = "MarkBlocksDeleted")
        self.getUnsubscribed = daofactory(classname = "GetUnsubscribedDatasets")
        self.markSubscribed = daofactory(classname = "MarkDatasetSubscribed")

        daofactory = DAOFactory(package = "WMComponent.DBS3Buffer",
                                logger = self.logger,
                                dbinterface = myThread.dbi)
        self.setStatus = daofactory(classname = "DBSBufferFiles.SetPhEDExStatus")
        self.setBlockClosed = daofactory(classname = "SetBlockClosed")

        return

    def algorithm(self, parameters):
        """
        _algorithm_

        Poll the database for uninjected files and attempt to inject them into
        PhEDEx.
        """
        logging.info("Running PhEDEx injector poller algorithm...")

        self.pollCounter += 1

        if self.blocksToRecover:
            logging.info("""PhEDExInjector Recovery:
                            previous injection call failed,
                            check if files were injected to PhEDEx anyway""")
            self.recoverInjectedFiles()

        self.injectFiles()
        self.closeBlocks()

        if self.pollCounter == self.subFrequency:
            self.pollCounter = 0
            self.deleteBlocks()
            self.subscribeDatasets()

        return

    def createInjectionSpec(self, injectionData):
        """
        _createInjectionSpec_

        Transform the data structure returned from the database into an XML
        string for the PhEDEx Data Service.  The injectionData parameter must be
        a dictionary keyed by dataset path.  Each dataset path will map to a
        list of blocks, each block being a dict.  The block dicts will have
        three keys: name, is-open and files.  The files key will be a list of
        dicts, each of which have the following keys: lfn, size and checksum.
        The following is an example object:

        {"dataset1":
          {"block1": {"is-open": "y", "files":
            [{"lfn": "lfn1", "size": 10, "checksum": {"cksum": "1234"}},
             {"lfn": "lfn2", "size": 20, "checksum": {"cksum": "4321"}}]}}}
        """
        injectionSpec = XMLDrop.XMLInjectionSpec(self.dbsUrl)

        for datasetPath in injectionData:
            datasetSpec = injectionSpec.getDataset(datasetPath)

            for fileBlockName, fileBlock in injectionData[datasetPath].iteritems():
                blockSpec = datasetSpec.getFileblock(fileBlockName,
                                                     fileBlock["is-open"])

                for f in fileBlock["files"]:
                    blockSpec.addFile(f["lfn"], f["checksum"], f["size"])

        return injectionSpec.save()

    def createRecoveryFileFormat(self, unInjectedData):
        """
        _createRecoveryFileFormat_

        Transform the data structure returned from database in to the dict format
        for the PhEDEx Data Service.  The injectionData parameter must be
        a dictionary keyed by dataset path.  
        
        unInjectedData format
        {"dataset1":
          {"block1": {"is-open": "y", "files":
            [{"lfn": "lfn1", "size": 10, "checksum": {"cksum": "1234"}},
             {"lfn": "lfn2", "size": 20, "checksum": {"cksum": "4321"}}]}}}
        
        returns
        [{"block1": set(["lfn1", "lfn2"])}, {"block2": set(["lfn3", "lfn4"])]
        """
        blocks = []
        for datasetPath in unInjectedData:

            for blockName, fileBlock in unInjectedData[datasetPath].items():

                newBlock = { blockName: set() }

                for fileDict in fileBlock["files"]:
                    newBlock[blockName].add(fileDict["lfn"])

                blocks.append(newBlock)

        return blocks
    
    def injectFiles(self):
        """
        _injectFiles_

        Inject any uninjected files in PhEDEx.
        """
        logging.info("Starting injectFiles method")

        uninjectedFiles = self.getUninjected.execute()

        for siteName in uninjectedFiles.keys():
            # SE names can be stored in DBSBuffer as that is what is returned in
            # the framework job report.  We'll try to map the SE name to a
            # PhEDEx node name here.
            location = None

            if siteName in self.nodeNames:
                location = siteName
            else:
                if "Buffer" in self.seMap and siteName in self.seMap["Buffer"]:
                    location = self.seMap["Buffer"][siteName]
                elif "MSS" in self.seMap and siteName in self.seMap["MSS"]:
                    location = self.seMap["MSS"][siteName]
                elif "Disk" in self.seMap and siteName in self.seMap["Disk"]:
                    location = self.seMap["Disk"][siteName]

            if location == None:
                msg = "Could not map SE %s to PhEDEx node." % siteName
                logging.error(msg)
                self.sendAlert(7, msg = msg)
                continue

            maxDataset = 20
            maxBlocks = 50
            maxFiles = 5000
            numberDatasets = 0
            numberBlocks = 0
            numberFiles = 0
            injectData = {}
            lfnList = []
            for dataset in uninjectedFiles[siteName]:

                numberDatasets += 1
                injectData[dataset] = uninjectedFiles[siteName][dataset]

                for block in injectData[dataset]:
                    numberBlocks += 1
                    numberFiles += len(injectData[dataset][block]['files'])
                    for fileInfo in injectData[dataset][block]['files']:
                        lfnList.append(fileInfo['lfn'])

                if numberDatasets >= maxDataset or numberBlocks >= maxBlocks or numberFiles >= maxFiles:

                    self.injectFilesPhEDExCall(location, injectData, lfnList)
                    numberDatasets = 0
                    numberBlocks = 0
                    numberFiles = 0
                    injectData = {}
                    lfnList = []

            if injectData:
                self.injectFilesPhEDExCall(location, injectData, lfnList)

        return

    def injectFilesPhEDExCall(self, location, injectData, lfnList):
        """
        _injectFilesPhEDExCall_

        actual PhEDEx call for file injection
        """
        xmlData = self.createInjectionSpec(injectData)
        logging.debug("injectFiles XMLData: %s", xmlData)

        try:
            injectRes = self.phedex.injectBlocks(location, xmlData)
        except HTTPException as ex:
            # HTTPException with status 400 assumed to be duplicate injection
            # trigger later block recovery (investgation needed if not the case)
            if ex.status == 400:
                self.blocksToRecover.extend( self.createRecoveryFileFormat(injectData) )
            logging.error("PhEDEx file injection failed with HTTPException: %s %s", ex.status, ex.result)
        except Exception as ex:
            logging.error("PhEDEx file injection failed with Exception: %s", str(ex))
            logging.debug("Traceback: %s", str(traceback.format_exc()))
        else:
            logging.info("Injection result: %s", injectRes)

            if "error" in injectRes:
                msg = "Error injecting data %s: %s" % (injectData, injectRes["error"])
                logging.error(msg)
                self.sendAlert(6, msg = msg)
            else:
                try:
                    self.setStatus.execute(lfnList, 1)
                except:
                    # possible deadlock with DBS3Upload, retry once after 5s
                    logging.warning("Oracle exception during file status update, possible deadlock due to race condition, retry after 5s sleep")
                    time.sleep(5)
                    self.setStatus.execute(lfnList, 1)

        return

    def closeBlocks(self):
        """
        _closeBlocks_

        Close any blocks that have been migrated to global DBS
        """
        logging.info("Starting closeBlocks method")

        migratedBlocks = self.getMigrated.execute()

        for siteName in migratedBlocks.keys():
            # SE names can be stored in DBSBuffer as that is what is returned in
            # the framework job report.  We'll try to map the SE name to a
            # PhEDEx node name here.
            location = None

            if siteName in self.nodeNames:
                location = siteName
            else:
                if "Buffer" in self.seMap and siteName in self.seMap["Buffer"]:
                    location = self.seMap["Buffer"][siteName]
                elif "MSS" in self.seMap and siteName in self.seMap["MSS"]:
                    location = self.seMap["MSS"][siteName]
                elif "Disk" in self.seMap and siteName in self.seMap["Disk"]:
                    location = self.seMap["Disk"][siteName]

            if location == None:
                msg = "Could not map SE %s to PhEDEx node." % siteName
                logging.error(msg)
                self.sendAlert(6, msg = msg)
                continue

            xmlData = self.createInjectionSpec(migratedBlocks[siteName])
            logging.debug("closeBlocks XMLData: %s", xmlData)

            try:
                injectRes = self.phedex.injectBlocks(location, xmlData)
            except HTTPException as ex:
                logging.error("PhEDEx block close failed with HTTPException: %s %s", ex.status, ex.result)
            except Exception as ex:
                logging.error("PhEDEx block close failed with Exception: %s", str(ex))
                logging.debug("Traceback: %s", str(traceback.format_exc()))
            else:
                logging.info("Block closing result: %s", injectRes)

                if "error" not in injectRes:
                    for datasetName in migratedBlocks[siteName]:
                        for blockName in migratedBlocks[siteName][datasetName]:
                            logging.debug("Closing block %s", blockName)
                            self.setBlockClosed.execute(blockName)
                else:
                    msg = "Error injecting data %s: %s" % (migratedBlocks[siteName], injectRes["error"])
                    logging.error(msg)
                    self.sendAlert(6, msg = msg)

        return

    def recoverInjectedFiles(self):
        """
        When PhEDEx inject call timed out, run this function.
        Since there are 3 min reponse time out in cmsweb, some times 
        PhEDEx injection call times out even though the call succeeded
        In that case run the recovery mode
        1. first check whether files which injection status = 0 are in the PhEDEx.
        2. if those file exist set the in_phedex status to 1
        3. set self.blocksToRecover = []

        Run this recovery one block at a time, with too many blocks
        the call to the PhEDEx data service on cmsweb can time out
        """
        # recover one block at a time
        for block in self.blocksToRecover:

            injectedFiles = self.phedex.getInjectedFiles(block)

            if injectedFiles:
                self.setStatus.execute(injectedFiles, 1)

        self.blocksToRecover = []
        return
        
    def deleteBlocks(self):
        """
        _deleteBlocks_
        Find deletable blocks, then decide if to delete based on:
        Is there an active subscription for dataset or block ?
          If yes => set deleted=2
          If no => next check
        Has transfer to all destinations finished ?
          If yes => request block deletion, approve request, set deleted=1
          If no => do nothing (check again next cycle)
        """
        logging.info("Starting deleteBlocks method")

        blockDict = self.findDeletableBlocks.execute(transaction = False)

        if not blockDict:
            return

        try:
            subscriptions = self.phedex.getSubscriptionMapping(*blockDict.keys())
        except:
            logging.error("Couldn't get subscription info from PhEDEx, retry next cycle")
            return

        skippableBlocks = []
        deletableEntries = {}
        for blockName in blockDict:

            location = blockDict[blockName]['location']

            # should never be triggered, better safe than sorry
            if location.endswith('_MSS'):
                logging.debug("Location %s for block %s is MSS, skip deletion", location, blockName)
                skippableBlocks.append(blockName)
                continue

            dataset = blockDict[blockName]['dataset']
            sites = blockDict[blockName]['sites']

            if blockName in subscriptions and location in subscriptions[blockName]:
                logging.debug("Block %s subscribed to %s, skip deletion",  blockName, location)
                binds = { 'DELETED': 2,
                          'BLOCKNAME': blockName }
                self.markBlocksDeleted.execute(binds)
            else:
                blockInfo = []
                try:
                    blockInfo = self.phedex.getReplicaInfoForBlocks(block = blockName, complete = 'y')['phedex']['block']
                except:
                    logging.error("Couldn't get block info from PhEDEx, retry next cycle")
                else:
                    for entry in blockInfo:
                        if entry['name'] == blockName:
                            nodes = set([x['node'] for x in entry['replica']])
                            if location not in nodes:
                                logging.debug("Block %s not present on %s, mark as deleted", blockName, location)
                                binds = { 'DELETED': 1,
                                          'BLOCKNAME': blockName }
                                self.markBlocksDeleted.execute(binds)
                            elif sites.issubset(nodes):
                                logging.debug("Deleting block %s from %s since it is fully transfered", blockName, location)
                                if location not in deletableEntries:
                                    deletableEntries[location] = {}
                                if dataset not in deletableEntries[location]:
                                    deletableEntries[location][dataset] = set()
                                    deletableEntries[location][dataset].add(blockName)


        binds = []
        for blockName in skippableBlocks:
            binds.append( { 'DELETED': 2,
                            'BLOCKNAME': blockName } )
        if binds:
            self.markBlocksDeleted.execute(binds)

        for location in deletableEntries:

            chunkSize = 100
            numberOfBlocks = 0
            blocksToDelete = {}
            for dataset in deletableEntries[location]:

                blocksToDelete[dataset] = deletableEntries[location][dataset]
                numberOfBlocks += len(blocksToDelete[dataset])

                if numberOfBlocks > chunkSize:

                    self.deleteBlocksPhEDExCalls(location, blocksToDelete)
                    numberOfBlocks = 0
                    blocksToDelete = {}

            self.deleteBlocksPhEDExCalls(location, blocksToDelete)

        return

    def deleteBlocksPhEDExCalls(self, location, blocksToDelete):
        """
        _deleteBlocksPhEDExCalls_
        actual PhEDEx calls for block deletion
        """
        deletion = PhEDExDeletion(blocksToDelete.keys(), location,
                                  level = 'block',
                                  comments = "WMAgent blocks auto-delete from %s" % location,
                                  blocks = blocksToDelete)

        xmlData = XMLDrop.makePhEDExXMLForBlocks(self.dbsUrl,
                                                 deletion.getDatasetsAndBlocks())
        logging.debug("deleteBlocks XMLData: %s", xmlData)

        try:
            response = self.phedex.delete(deletion, xmlData)
            requestId = response['phedex']['request_created'][0]['id']
            # auto-approve deletion request
            self.phedex.updateRequest(requestId, 'approve', location)
        except HTTPException as ex:
            logging.error("PhEDEx block delete/approval failed with HTTPException: %s %s", ex.status, ex.result)
        except Exception as ex:
            logging.error("PhEDEx block delete/approval failed with Exception: %s", str(ex))
            logging.debug("Traceback: %s", str(traceback.format_exc()))
        else:
            binds = []
            for dataset in blocksToDelete:
                for blockName in blocksToDelete[dataset]:
                    binds.append( { 'DELETED': 1,
                                    'BLOCKNAME': blockName } )
            self.markBlocksDeleted.execute(binds)

        return

    def subscribeDatasets(self):
        """
        _subscribeDatasets_
        Poll the database for datasets and subscribe them.
        """
        logging.info("Starting subscribeDatasets method")

        # Check for completely unsubscribed datasets
        unsubscribedDatasets = self.getUnsubscribed.execute()

        # Keep a list of subscriptions to tick as subscribed in the database
        subscriptionsMade = []

        # Create a list of subscriptions as defined by the PhEDEx data structures
        subs = SubscriptionList()

        # Create the subscription objects and add them to the list
        # The list takes care of the sorting internally
        for subInfo in unsubscribedDatasets:
            site = subInfo['site']

            if site not in self.phedexNodes['MSS'] and site not in self.phedexNodes['Disk']:
                msg = "Site %s doesn't appear to be valid to PhEDEx, " % site
                msg += "skipping subscription: %s" % subInfo['id']
                logging.error(msg)
                self.sendAlert(7, msg = msg)
                continue

            # Avoid custodial subscriptions to disk nodes
            if site not in self.phedexNodes['MSS']: 
                subInfo['custodial'] = 'n'
            # Avoid auto approval in T1 sites
            elif site.startswith("T1"):
                subInfo['request_only'] = 'y'

            phedexSub = PhEDExSubscription(subInfo['path'], site, subInfo['phedex_group'],
                                           priority = subInfo['priority'],
                                           move = subInfo['move'],
                                           custodial = subInfo['custodial'],
                                           request_only = subInfo['request_only'],
                                           subscriptionId = subInfo['id'])

            # Check if the subscription is a duplicate
            if phedexSub.matchesExistingSubscription(self.phedex) or \
                phedexSub.matchesExistingTransferRequest(self.phedex):
                subscriptionsMade.append(subInfo['id'])
                continue

            # Add it to the list
            subs.addSubscription(phedexSub)

        # Compact the subscriptions
        subs.compact()

        for subscription in subs.getSubscriptionList():

            xmlData = XMLDrop.makePhEDExXMLForDatasets(self.dbsUrl, subscription.getDatasetPaths())
            logging.debug("subscribeDatasets XMLData: %s" , xmlData)

            logging.info("Subscribing: %s to %s, with options: Move: %s, Custodial: %s, Request Only: %s",
                         subscription.getDatasetPaths(),
                         subscription.getNodes(),
                         subscription.move,
                         subscription.custodial,
                         subscription.request_only)

            try:
                self.phedex.subscribe(subscription, xmlData)
            except HTTPException as ex:
                logging.error("PhEDEx dataset subscribe failed with HTTPException: %s %s", ex.status, ex.result)
            except Exception as ex:
                logging.error("PhEDEx dataset subscribe failed with Exception: %s", str(ex))
                logging.debug("Traceback: %s", str(traceback.format_exc()))
            else:
                subscriptionsMade.extend(subscription.getSubscriptionIds())

        # Register the result in DBSBuffer
        if subscriptionsMade:
            self.markSubscribed.execute(subscriptionsMade)

        return

Example #4

Show file

File: PhEDExInjectorPoller.py Project: scarletnorberg/WMCore

class PhEDExInjectorPoller(BaseWorkerThread):
    """
    _PhEDExInjectorPoller_

    Poll the DBSBuffer database and inject files as they are created.
    """
    def __init__(self, config):
        """
        ___init___

        Initialise class members
        """
        BaseWorkerThread.__init__(self)

        self.enabled = getattr(config.PhEDExInjector, "enabled", True)
        self.dbsUrl = config.DBSInterface.globalDBSUrl
        self.phedexGroup = config.PhEDExInjector.phedexGroup

        self.pollCounter = 0
        self.subFrequency = None
        if getattr(config.PhEDExInjector, "subscribeDatasets", False):
            pollInterval = config.PhEDExInjector.pollInterval
            subInterval = config.PhEDExInjector.subscribeInterval
            self.subFrequency = max(1, int(round(subInterval / pollInterval)))
            logging.info(
                "SubscribeDataset and deleteBlocks will run every %d polling cycles",
                self.subFrequency)
            # subscribe on first cycle
            self.pollCounter = self.subFrequency - 1

        # retrieving the node mappings is fickle and can fail quite often
        self.phedex = PhEDEx({"endpoint": config.PhEDExInjector.phedexurl},
                             "json",
                             dbsUrl=self.dbsUrl)
        try:
            nodeMappings = self.phedex.getNodeMap()
        except:
            time.sleep(2)
            try:
                nodeMappings = self.phedex.getNodeMap()
            except:
                time.sleep(4)
                nodeMappings = self.phedex.getNodeMap()

        # This will be used to map SE names which are stored in the DBSBuffer to
        # PhEDEx node names.  The first key will be the "kind" which consists
        # of one of the following: MSS, Disk, Buffer.  The next key will be the
        # SE name.
        self.seMap = {}
        self.nodeNames = []
        for node in nodeMappings["phedex"]["node"]:
            if node["kind"] not in self.seMap:
                self.seMap[node["kind"]] = {}
            logging.info("Adding mapping %s -> %s", node["se"], node["name"])
            self.seMap[node["kind"]][node["se"]] = node["name"]
            self.nodeNames.append(node["name"])

        self.phedexNodes = {'MSS': [], 'Disk': []}
        for node in nodeMappings["phedex"]["node"]:
            if node["kind"] in ["MSS", "Disk"]:
                self.phedexNodes[node["kind"]].append(node["name"])

        self.blocksToRecover = []

        # X-component configuration is BAD! But it will only be here during the
        # Rucio commissioning within WM
        self.listTiersToSkip = config.RucioInjector.listTiersToInject
        logging.info(
            "Component configured to skip data injection for data tiers: %s",
            self.listTiersToSkip)

        return

    def setup(self, parameters):
        """
        _setup_

        Create DAO Factory and setup some DAO.
        """
        myThread = threading.currentThread()
        daofactory = DAOFactory(package="WMComponent.RucioInjector.Database",
                                logger=self.logger,
                                dbinterface=myThread.dbi)

        self.getUninjected = daofactory(classname="GetUninjectedFiles")
        self.getMigrated = daofactory(classname="GetMigratedBlocks")

        self.getUnsubscribedBlocks = daofactory(
            classname="GetUnsubscribedBlocks")
        self.setBlockRules = daofactory(classname="SetBlocksRule")

        self.findDeletableBlocks = daofactory(classname="GetDeletableBlocks")
        self.markBlocksDeleted = daofactory(classname="MarkBlocksDeleted")
        self.getUnsubscribed = daofactory(classname="GetUnsubscribedDatasets")
        self.markSubscribed = daofactory(classname="MarkDatasetSubscribed")

        daofactory = DAOFactory(package="WMComponent.DBS3Buffer",
                                logger=self.logger,
                                dbinterface=myThread.dbi)
        self.setStatus = daofactory(classname="DBSBufferFiles.SetPhEDExStatus")
        self.setBlockClosed = daofactory(classname="SetBlockClosed")

        return

    @timeFunction
    def algorithm(self, parameters):
        """
        _algorithm_

        Poll the database for uninjected files and attempt to inject them into
        PhEDEx.
        """
        if not self.enabled:
            logging.info(
                "PhEDExInjector component is disabled in the configuration, exiting."
            )
            return

        logging.info("Running PhEDEx injector poller algorithm...")
        self.pollCounter += 1

        try:
            if self.blocksToRecover:
                logging.info("""PhEDExInjector Recovery:
                                previous injection call failed,
                                checking if files were injected to PhEDEx anyway"""
                             )
                self.recoverInjectedFiles()

            self.injectFiles()
            self.closeBlocks()

            if self.pollCounter == self.subFrequency:
                self.pollCounter = 0
                self.deleteBlocks()
                self.subscribeDatasets()
                self.subscribeBlocks()
        except HTTPException as ex:
            if hasattr(ex, "status") and ex.status in [502, 503]:
                # then either proxy error or service is unavailable
                msg = "Caught HTTPException in PhEDExInjector. Retrying in the next cycle.\n"
                msg += str(ex)
                logging.error(msg)
            else:
                msg = "Caught unexpected HTTPException in PhEDExInjector.\n%s" % str(
                    ex)
                logging.exception(msg)
                raise
        except Exception as ex:
            msg = "Caught unexpected exception in PhEDExInjector. Details:\n%s" % str(
                ex)
            logging.exception(msg)
            raise PhEDExInjectorException(msg)

        return

    def createInjectionSpec(self, injectionData):
        """
        _createInjectionSpec_

        Transform the data structure returned from the database into an XML
        string for the PhEDEx Data Service.  The injectionData parameter must be
        a dictionary keyed by dataset path.  Each dataset path will map to a
        list of blocks, each block being a dict.  The block dicts will have
        three keys: name, is-open and files.  The files key will be a list of
        dicts, each of which have the following keys: lfn, size and checksum.
        The following is an example object:

        {"dataset1":
          {"block1": {"is-open": "y", "files":
            [{"lfn": "lfn1", "size": 10, "checksum": {"cksum": "1234"}},
             {"lfn": "lfn2", "size": 20, "checksum": {"cksum": "4321"}}]}}}
        """
        injectionSpec = XMLDrop.XMLInjectionSpec(self.dbsUrl)

        for datasetPath in injectionData:
            datasetSpec = injectionSpec.getDataset(datasetPath)

            for fileBlockName, fileBlock in injectionData[
                    datasetPath].iteritems():
                blockSpec = datasetSpec.getFileblock(fileBlockName,
                                                     fileBlock["is-open"])

                for f in fileBlock["files"]:
                    blockSpec.addFile(f["lfn"], f["checksum"], f["size"])

        return injectionSpec.save()

    def createRecoveryFileFormat(self, unInjectedData):
        """
        _createRecoveryFileFormat_

        Transform the data structure returned from database in to the dict format
        for the PhEDEx Data Service.  The injectionData parameter must be
        a dictionary keyed by dataset path.

        unInjectedData format
        {"dataset1":
          {"block1": {"is-open": "y", "files":
            [{"lfn": "lfn1", "size": 10, "checksum": {"cksum": "1234"}},
             {"lfn": "lfn2", "size": 20, "checksum": {"cksum": "4321"}}]}}}

        returns
        [{"block1": set(["lfn1", "lfn2"])}, {"block2": set(["lfn3", "lfn4"])]
        """
        blocks = []
        for datasetPath in unInjectedData:

            for blockName, fileBlock in unInjectedData[datasetPath].items():

                newBlock = {blockName: set()}

                for fileDict in fileBlock["files"]:
                    newBlock[blockName].add(fileDict["lfn"])

                blocks.append(newBlock)

        return blocks

    def injectFiles(self):
        """
        _injectFiles_

        Inject any uninjected files in PhEDEx.
        """
        logging.info("Starting injectFiles method")

        uninjectedFiles = self.getUninjected.execute()

        # filter out datatiers to be processed by RucioInjector
        uninjectedFiles = filterDataByTier(uninjectedFiles,
                                           self.listTiersToSkip)

        for siteName in uninjectedFiles.keys():
            # SE names can be stored in DBSBuffer as that is what is returned in
            # the framework job report.  We'll try to map the SE name to a
            # PhEDEx node name here.
            location = None

            if siteName in self.nodeNames:
                location = siteName
            else:
                if "Buffer" in self.seMap and siteName in self.seMap["Buffer"]:
                    location = self.seMap["Buffer"][siteName]
                elif "MSS" in self.seMap and siteName in self.seMap["MSS"]:
                    location = self.seMap["MSS"][siteName]
                elif "Disk" in self.seMap and siteName in self.seMap["Disk"]:
                    location = self.seMap["Disk"][siteName]

            if location is None:
                msg = "Could not map SE %s to PhEDEx node." % siteName
                logging.error(msg)
                continue

            for dataset in uninjectedFiles[siteName]:
                injectData = {}
                lfnList = []
                injectData[dataset] = uninjectedFiles[siteName][dataset]

                for block in injectData[dataset]:
                    for fileInfo in injectData[dataset][block]['files']:
                        lfnList.append(fileInfo['lfn'])
                    logging.info("About to inject %d files for block %s",
                                 len(injectData[dataset][block]['files']),
                                 block)

                self.injectFilesPhEDExCall(location, injectData, lfnList)

        return

    def injectFilesPhEDExCall(self, location, injectData, lfnList):
        """
        _injectFilesPhEDExCall_

        actual PhEDEx call for file injection
        """
        xmlData = self.createInjectionSpec(injectData)
        logging.debug("injectFiles XMLData: %s", xmlData)

        try:
            injectRes = self.phedex.injectBlocks(location, xmlData)
        except HTTPException as ex:
            # HTTPException with status 400 assumed to be duplicate injection
            # trigger later block recovery (investigation needed if not the case)
            if ex.status == 400:
                self.blocksToRecover.extend(
                    self.createRecoveryFileFormat(injectData))
            logging.error(
                "PhEDEx file injection failed with HTTPException: %s %s",
                ex.status, ex.result)
        except Exception as ex:
            msg = "PhEDEx file injection failed with Exception: %s" % str(ex)
            logging.exception(msg)
        else:
            logging.debug("Injection result: %s", injectRes)

            if "error" in injectRes:
                msg = "Error injecting data %s: %s" % (injectData,
                                                       injectRes["error"])
                logging.error(msg)
            else:
                try:
                    self.setStatus.execute(lfnList, 1)
                except Exception as ex:
                    if 'Deadlock found' in str(
                            ex) or 'deadlock detected' in str(ex):
                        logging.error(
                            "Database deadlock during file status update. Retrying again in the next cycle."
                        )
                        self.blocksToRecover.extend(
                            self.createRecoveryFileFormat(injectData))
                    else:
                        msg = "Failed to update file status in the database, reason: %s" % str(
                            ex)
                        logging.error(msg)
                        raise PhEDExInjectorException(msg)

        return

    def closeBlocks(self):
        """
        _closeBlocks_

        Close any blocks that have been migrated to global DBS
        """
        logging.info("Starting closeBlocks method")

        migratedBlocks = self.getMigrated.execute()

        # filter out datatiers to be processed by RucioInjector
        migratedBlocks = filterDataByTier(migratedBlocks, self.listTiersToSkip)

        for siteName in migratedBlocks:
            # SE names can be stored in DBSBuffer as that is what is returned in
            # the framework job report.  We'll try to map the SE name to a
            # PhEDEx node name here.
            location = None

            if siteName in self.nodeNames:
                location = siteName
            else:
                if "Buffer" in self.seMap and siteName in self.seMap["Buffer"]:
                    location = self.seMap["Buffer"][siteName]
                elif "MSS" in self.seMap and siteName in self.seMap["MSS"]:
                    location = self.seMap["MSS"][siteName]
                elif "Disk" in self.seMap and siteName in self.seMap["Disk"]:
                    location = self.seMap["Disk"][siteName]

            if location is None:
                msg = "Could not map SE %s to PhEDEx node." % siteName
                logging.error(msg)
                continue

            for dset, blocks in migratedBlocks[siteName].items():
                xmlData = self.createInjectionSpec({dset: blocks})
                logging.debug("closeBlocks XMLData: %s", xmlData)

                try:
                    injectRes = self.phedex.injectBlocks(location, xmlData)
                except HTTPException as ex:
                    logging.error(
                        "PhEDEx block close failed with HTTPException: %s %s",
                        ex.status, ex.result)
                except Exception as ex:
                    msg = "PhEDEx block close failed with Exception: %s" % str(
                        ex)
                    logging.exception(msg)
                else:
                    logging.debug("Block closing result: %s", injectRes)

                    if "error" in injectRes:
                        logging.error(
                            "Failed to close blocks due to: %s, for data: %s",
                            injectRes["error"], migratedBlocks[siteName][dset])
                    else:
                        for blockName in blocks:
                            logging.info("Block closed in PhEDEx: %s",
                                         blockName)
                            self.setBlockClosed.execute(blockName)

        return

    def recoverInjectedFiles(self):
        """
        When PhEDEx inject call timed out, run this function.
        Since there are 3 min reponse time out in cmsweb, some times
        PhEDEx injection call times out even though the call succeeded
        In that case run the recovery mode
        1. first check whether files which injection status = 0 are in the PhEDEx.
        2. if those file exist set the in_phedex status to 1
        3. set self.blocksToRecover = []

        Run this recovery one block at a time, with too many blocks
        the call to the PhEDEx data service on cmsweb can time out
        """
        # recover one block at a time
        for block in self.blocksToRecover:

            injectedFiles = self.phedex.getInjectedFiles(block)

            if injectedFiles:
                self.setStatus.execute(injectedFiles, 1)

        self.blocksToRecover = []
        return

    def deleteBlocks(self):
        """
        _deleteBlocks_
        Find deletable blocks, then decide if to delete based on:
        Is there an active subscription for dataset or block ?
          If yes => set deleted=2
          If no => next check
        Has transfer to all destinations finished ?
          If yes => request block deletion, approve request, set deleted=1
          If no => do nothing (check again next cycle)
        """
        logging.info("Starting deleteBlocks method")

        blockDict = self.findDeletableBlocks.execute(transaction=False)

        if not blockDict:
            return

        ### logic to stop doing things to be done by RucioInjector or by DM team
        for block in list(blockDict):
            if not self._isDataTierAllowed(block):
                blockDict.pop(block)

        try:
            subscriptions = self.phedex.getSubscriptionMapping(
                *blockDict.keys())
        except:
            logging.error(
                "Couldn't get subscription info from PhEDEx, retry next cycle")
            return

        skippableBlocks = []
        deletableEntries = {}
        for blockName in blockDict:

            location = blockDict[blockName]['location']

            # should never be triggered, better safe than sorry
            if location.endswith('_MSS'):
                logging.debug("Location %s for block %s is MSS, skip deletion",
                              location, blockName)
                skippableBlocks.append(blockName)
                continue

            dataset = blockDict[blockName]['dataset']
            sites = blockDict[blockName]['sites']

            if blockName in subscriptions and location in subscriptions[
                    blockName]:
                logging.debug("Block %s subscribed to %s, skip deletion",
                              blockName, location)
                binds = {'DELETED': 2, 'BLOCKNAME': blockName}
                self.markBlocksDeleted.execute(binds)
            else:
                blockInfo = []
                try:
                    blockInfo = self.phedex.getReplicaInfoForBlocks(
                        block=blockName, complete='y')['phedex']['block']
                except:
                    logging.error(
                        "Couldn't get block info from PhEDEx, retry next cycle"
                    )
                else:
                    nodes = set()
                    for entry in blockInfo:
                        if entry['name'] == blockName:
                            nodes = set([x['node'] for x in entry['replica']])
                    if location not in nodes:
                        logging.debug(
                            "Block %s not present on %s, mark as deleted",
                            blockName, location)
                        binds = {'DELETED': 1, 'BLOCKNAME': blockName}
                        self.markBlocksDeleted.execute(binds)
                    elif sites.issubset(nodes):
                        logging.debug(
                            "Deleting block %s from %s since it is fully transfered",
                            blockName, location)
                        if location not in deletableEntries:
                            deletableEntries[location] = {}
                        if dataset not in deletableEntries[location]:
                            deletableEntries[location][dataset] = set()
                        deletableEntries[location][dataset].add(blockName)

        binds = []
        for blockName in skippableBlocks:
            binds.append({'DELETED': 2, 'BLOCKNAME': blockName})
        if binds:
            self.markBlocksDeleted.execute(binds)

        for location in deletableEntries:

            chunkSize = 100
            numberOfBlocks = 0
            blocksToDelete = {}
            for dataset in deletableEntries[location]:

                blocksToDelete[dataset] = deletableEntries[location][dataset]
                numberOfBlocks += len(blocksToDelete[dataset])

                if numberOfBlocks > chunkSize:
                    self.deleteBlocksPhEDExCalls(location, blocksToDelete)
                    numberOfBlocks = 0
                    blocksToDelete = {}

            self.deleteBlocksPhEDExCalls(location, blocksToDelete)

        return

    def deleteBlocksPhEDExCalls(self, location, blocksToDelete):
        """
        _deleteBlocksPhEDExCalls_
        actual PhEDEx calls for block deletion
        """
        deletion = PhEDExDeletion(
            blocksToDelete.keys(),
            location,
            level='block',
            comments="WMAgent blocks auto-delete from %s" % location,
            blocks=blocksToDelete)

        try:
            response = self.phedex.delete(deletion)
            requestId = response['phedex']['request_created'][0]['id']
            # auto-approve deletion request
            self.phedex.updateRequest(requestId, 'approve', location)
        except HTTPException as ex:
            logging.error(
                "PhEDEx block delete/approval failed with HTTPException: %s %s",
                ex.status, ex.result)
        except Exception as ex:
            logging.error(
                "PhEDEx block delete/approval failed with Exception: %s",
                str(ex))
            logging.debug("Traceback: %s", str(traceback.format_exc()))
        else:
            binds = []
            for dataset in blocksToDelete:
                for blockName in blocksToDelete[dataset]:
                    binds.append({'DELETED': 1, 'BLOCKNAME': blockName})
            self.markBlocksDeleted.execute(binds)

        return

    def _isDataTierAllowed(self, dataName):
        """
        Check whether data belongs to an allowed datatier to
        be handled by this component (either to inject or to
        subscribe into PhEDEx)
        :param dataName: string with the block or the dataset name
        :return: boolean, True if the tier is allowed, False otherwise
        """
        endTier = dataName.rsplit('/', 1)[1]
        endTier = endTier.split('#')[0] if '#' in endTier else endTier
        if endTier in self.listTiersToSkip:
            logging.debug(
                "Skipping data: %s because it's listed in the tiers to skip",
                dataName)
            return False
        return True

    def subscribeDatasets(self):
        """
        _subscribeDatasets_
        Poll the database for datasets and subscribe them.
        """
        logging.info("Starting subscribeDatasets method")

        # Check for completely unsubscribed datasets
        unsubscribedDatasets = self.getUnsubscribed.execute()

        # Keep a list of subscriptions to tick as subscribed in the database
        subscriptionsMade = []

        # Create a list of subscriptions as defined by the PhEDEx data structures
        subs = SubscriptionList()

        # Create the subscription objects and add them to the list
        # The list takes care of the sorting internally
        for subInfo in unsubscribedDatasets:
            ### logic to stop doing things to be done by RucioInjector or by DM team
            if not self._isDataTierAllowed(subInfo['path']):
                continue

            site = subInfo['site']

            if site not in self.phedexNodes[
                    'MSS'] and site not in self.phedexNodes['Disk']:
                msg = "Site %s doesn't appear to be valid to PhEDEx, " % site
                msg += "skipping subscription: %s" % subInfo['id']
                logging.error(msg)
                continue

            # Avoid custodial subscriptions to disk nodes
            if site not in self.phedexNodes['MSS']:
                subInfo['custodial'] = 'n'
            # Avoid auto approval in T1 sites
            elif site.startswith("T1"):
                subInfo['request_only'] = 'y'

            phedexSub = PhEDExSubscription(
                subInfo['path'],
                site,
                subInfo['phedex_group'],
                priority=subInfo['priority'],
                move=subInfo['move'],
                custodial=subInfo['custodial'],
                request_only=subInfo['request_only'],
                subscriptionId=subInfo['id'])

            # Check if the subscription is a duplicate
            if phedexSub.matchesExistingSubscription(self.phedex) or \
                    phedexSub.matchesExistingTransferRequest(self.phedex):
                subscriptionsMade.append(subInfo['id'])
                continue

            # Add it to the list
            subs.addSubscription(phedexSub)

        # Compact the subscriptions
        subs.compact()

        for subscription in subs.getSubscriptionList():

            logging.info(
                "Subscribing: %s to %s, with options: Move: %s, Custodial: %s, Request Only: %s",
                subscription.getDatasetPaths(), subscription.getNodes(),
                subscription.move, subscription.custodial,
                subscription.request_only)

            try:
                self.phedex.subscribe(subscription)
            except HTTPException as ex:
                logging.error(
                    "PhEDEx dataset subscribe failed with HTTPException: %s %s",
                    ex.status, ex.result)
            except Exception as ex:
                logging.error(
                    "PhEDEx dataset subscribe failed with Exception: %s",
                    str(ex))
                logging.debug("Traceback: %s", str(traceback.format_exc()))
            else:
                subscriptionsMade.extend(subscription.getSubscriptionIds())

        # Register the result in DBSBuffer
        if subscriptionsMade:
            self.markSubscribed.execute(subscriptionsMade)

        return

    def subscribeBlocks(self):
        """
        _subscribeBlocks_
        Poll the database and subscribe blocks not yet subscribed.
        """
        logging.info("Starting subscribeBlocks method")

        unsubBlocks = self.getUnsubscribedBlocks.execute()
        # now organize those by location in order to minimize phedex requests
        # also remove blocks that this component is meant to skip
        unsubBlocks = self.organizeBlocksByLocation(unsubBlocks)

        for location, blockDict in unsubBlocks.items():
            phedexSub = PhEDExSubscription(blockDict.keys(),
                                           location,
                                           self.phedexGroup,
                                           blocks=blockDict,
                                           level="block",
                                           priority="normal",
                                           move="n",
                                           custodial="n",
                                           request_only="n",
                                           comments="WMAgent production site")
            try:
                res = self.phedex.subscribe(phedexSub)
                transferId = res['phedex']['request_created'][0]['id']
                logging.info(
                    "Subscribed %d blocks for %d datasets, to location: %s, under request ID: %s",
                    len(phedexSub.getBlocks()),
                    len(phedexSub.getDatasetPaths()), phedexSub.getNodes(),
                    transferId)
            except HTTPException as ex:
                logging.error(
                    "PhEDEx block subscription failed with HTTPException: %s %s",
                    ex.status, ex.result)
                logging.error("The subscription object was: %s",
                              str(phedexSub))
            except Exception as ex:
                logging.exception(
                    "PhEDEx block subscription failed with Exception: %s",
                    str(ex))
            else:
                binds = []
                for blockname in phedexSub.getBlocks():
                    binds.append({
                        'RULE_ID': str(transferId),
                        'BLOCKNAME': blockname
                    })
                self.setBlockRules.execute(binds)

        return

    def organizeBlocksByLocation(self, blocksLocation):
        """
        Given a list of dictionaries (with block name and location). Organize
        those blocks per location to make phedex subscription calls more
        efficient.
        Also drops blocks that we cannot subscribe, and check for valid
        phedex node names.
        :param blocksLocation: list of dictionaries
        :return: a dict of dictionaries, such as:
          {"locationA": {"datasetA": ["blockA", "blockB", ...],
                         "datasetB": ["blockA", "blockB", ...]
                        },
           "locationB": {"datasetA": ["blockA"],
                         ...
        """
        dictByLocation = {}
        for item in blocksLocation:
            ### logic to stop doing things to be done by RucioInjector or by DM team
            if not self._isDataTierAllowed(item['blockname']):
                continue

            site = item['pnn']
            if site not in self.phedexNodes[
                    'MSS'] and site not in self.phedexNodes['Disk']:
                msg = "Site %s doesn't appear to be valid to PhEDEx, " % site
                msg += "skipping block subscription for: %s" % item['blockname']
                logging.error(msg)
                continue

            dictByLocation.setdefault(site, {})

            dsetName = item['blockname'].split("#")[0]
            dictByLocation[site].setdefault(dsetName, [])
            dictByLocation[site][dsetName].append(item['blockname'])
        return dictByLocation