Esempio n. 1
0
class PhEDExInjectorPoller(BaseWorkerThread):
    """
    _PhEDExInjectorPoller_

    Poll the DBSBuffer database and inject files as they are created.
    """
    def __init__(self, config):
        """
        ___init___

        Initialise class members
        """
        BaseWorkerThread.__init__(self)
        self.config = config
        self.phedex = PhEDEx({"endpoint": config.PhEDExInjector.phedexurl}, "json")
        self.dbsUrl = config.DBSInterface.globalDBSUrl
        self.group = getattr(config.PhEDExInjector, "group", "DataOps")

        # This will be used to map SE names which are stored in the DBSBuffer to
        # PhEDEx node names.  The first key will be the "kind" which consists
        # of one of the following: MSS, Disk, Buffer.  The next key will be the
        # SE name.
        self.seMap = {}
        self.nodeNames = []

        self.diskSites = getattr(config.PhEDExInjector, "diskSites", ["storm-fe-cms.cr.cnaf.infn.it",
                                                                      "srm-cms-disk.gridpp.rl.ac.uk"])

        # initialize the alert framework (if available - config.Alert present)
        #    self.sendAlert will be then be available
        self.initAlerts(compName = "PhEDExInjector")

        self.filesToRecover = None

    def setup(self, parameters):
        """
        _setup_

        Create a DAO Factory for the PhEDExInjector.  Also load the SE names to
        PhEDEx node name mappings from the data service.
        """
        myThread = threading.currentThread()
        daofactory = DAOFactory(package = "WMComponent.PhEDExInjector.Database",
                                logger = self.logger,
                                dbinterface = myThread.dbi)

        self.getUninjected = daofactory(classname = "GetUninjectedFiles")
        self.getMigrated = daofactory(classname = "GetMigratedBlocks")

        daofactory = DAOFactory(package = "WMComponent.DBS3Buffer",
                                logger = self.logger,
                                dbinterface = myThread.dbi)
        self.setStatus = daofactory(classname = "DBSBufferFiles.SetPhEDExStatus")

        daofactory = DAOFactory(package = "WMComponent.DBSUpload.Database",
                                logger = self.logger,
                                dbinterface = myThread.dbi)
        self.setBlockStatus = daofactory(classname = "SetBlockStatus")

        nodeMappings = self.phedex.getNodeMap()
        for node in nodeMappings["phedex"]["node"]:
            if not self.seMap.has_key(node["kind"]):
                self.seMap[node["kind"]] = {}

            logging.info("Adding mapping %s -> %s" % (node["se"], node["name"]))
            self.seMap[node["kind"]][node["se"]] = node["name"]
            self.nodeNames.append(node["name"])

        return

    def createInjectionSpec(self, injectionData):
        """
        _createInjectionSpec_

        Transform the data structure returned from the database into an XML
        string for the PhEDEx Data Service.  The injectionData parameter must be
        a dictionary keyed by dataset path.  Each dataset path will map to a
        list of blocks, each block being a dict.  The block dicts will have
        three keys: name, is-open and files.  The files key will be a list of
        dicts, each of which have the following keys: lfn, size and checksum.
        The following is an example object:

        {"dataset1":
          {"block1": {"is-open": "y", "files":
            [{"lfn": "lfn1", "size": 10, "checksum": {"cksum": "1234"}},
             {"lfn": "lfn2", "size": 20, "checksum": {"cksum": "4321"}}]}}}
        """
        injectionSpec = XMLDrop.XMLInjectionSpec(self.dbsUrl)

        for datasetPath in injectionData:
            datasetSpec = injectionSpec.getDataset(datasetPath)

            for fileBlockName, fileBlock in injectionData[datasetPath].iteritems():
                blockSpec = datasetSpec.getFileblock(fileBlockName,
                                                     fileBlock["is-open"])

                for file in fileBlock["files"]:
                    blockSpec.addFile(file["lfn"], file["checksum"],
                                      file["size"])

        return injectionSpec.save()

    def createRecoveryFileFormat(self, unInjectedData):
        """
        _createRecoveryFileFormat_

        Transform the data structure returned from database in to the dict format
        for the PhEDEx Data Service.  The injectionData parameter must be
        a dictionary keyed by dataset path.  
        
        unInjectedData format
        {"dataset1":
          {"block1": {"is-open": "y", "files":
            [{"lfn": "lfn1", "size": 10, "checksum": {"cksum": "1234"}},
             {"lfn": "lfn2", "size": 20, "checksum": {"cksum": "4321"}}]}}}
        
        returns
        {"block1": set(["lfn1", "lfn2"])}
        """
        sortedBlocks = defaultdict(set)
        for datasetPath in unInjectedData:
            
            for fileBlockName, fileBlock in unInjectedData[datasetPath].iteritems():
                for fileDict in fileBlock["files"]:
                    sortedBlocks[fileBlockName].add(fileDict["lfn"])
                    
        return sortedBlocks
    
    def injectFiles(self):
        """
        _injectFiles_

        Inject any uninjected files in PhEDEx.
        """
        myThread = threading.currentThread()
        uninjectedFiles = self.getUninjected.execute()

        injectedFiles = []
        for siteName in uninjectedFiles.keys():
            # SE names can be stored in DBSBuffer as that is what is returned in
            # the framework job report.  We'll try to map the SE name to a
            # PhEDEx node name here.
            location = None

            if siteName in self.nodeNames:
                location = siteName
            else:
                if siteName in self.diskSites:
                    if self.seMap.has_key("Disk") and \
                           self.seMap["Disk"].has_key(siteName):
                        location = self.seMap["Disk"][siteName]
                    elif self.seMap.has_key("Buffer") and \
                             self.seMap["Buffer"].has_key(siteName):
                        location = self.seMap["Buffer"][siteName]
                    elif self.seMap.has_key("MSS") and \
                             self.seMap["MSS"].has_key(siteName):
                        location = self.seMap["MSS"][siteName]
                else:
                    if self.seMap.has_key("Buffer") and \
                           self.seMap["Buffer"].has_key(siteName):
                        location = self.seMap["Buffer"][siteName]
                    elif self.seMap.has_key("MSS") and \
                             self.seMap["MSS"].has_key(siteName):
                        location = self.seMap["MSS"][siteName]
                    elif self.seMap.has_key("Disk") and \
                             self.seMap["Disk"].has_key(siteName):
                        location = self.seMap["Disk"][siteName]

            if location == None:
                msg = "Could not map SE %s to PhEDEx node." % siteName
                logging.error(msg)
                self.sendAlert(7, msg = msg)
                continue

            myThread.transaction.begin()
            xmlData = self.createInjectionSpec(uninjectedFiles[siteName])
            try:
                injectRes = self.phedex.injectBlocks(location, xmlData)
            except HTTPException, ex:
                # If we get an HTTPException of certain types, raise it as an error
                if ex.status == 400:
                    # assume it is duplicate injection error. but if that is not the case
                    # needs to be investigated
                    self.filesToRecover = self.createRecoveryFileFormat(uninjectedFiles[siteName])
                
                msg = "PhEDEx injection failed with %s error: %s" % (ex.status, ex.result)
                raise PhEDExInjectorPassableError(msg)
            except Exception, ex:
                # If we get an error here, assume that it's temporary (it usually is)
                # log it, and ignore it in the algorithm() loop
                msg =  "Encountered error while attempting to inject blocks to PhEDEx.\n"
                msg += str(ex)
                logging.error(msg)
                logging.debug("Traceback: %s" % str(traceback.format_exc()))
                
                raise PhEDExInjectorPassableError(msg)
            logging.info("Injection result: %s" % injectRes)

            if not injectRes.has_key("error"):
                for datasetName in uninjectedFiles[siteName]:
                    for blockName in uninjectedFiles[siteName][datasetName]:
                        for file in uninjectedFiles[siteName][datasetName][blockName]["files"]:
                            injectedFiles.append(file["lfn"])
            else:
                msg = ("Error injecting data %s: %s" %
                       (uninjectedFiles[siteName], injectRes["error"]))
                logging.error(msg)
                self.sendAlert(6, msg = msg)

            self.setStatus.execute(injectedFiles, 1,
                                   conn = myThread.transaction.conn,
                                   transaction = myThread.transaction)
            injectedFiles = []
            myThread.transaction.commit()
Esempio n. 2
0
class PhEDExInjectorPoller(BaseWorkerThread):
    """
    _PhEDExInjectorPoller_

    Poll the DBSBuffer database and inject files as they are created.
    """
    def __init__(self, config):
        """
        ___init___

        Initialise class members
        """
        BaseWorkerThread.__init__(self)
        self.dbsUrl = config.DBSInterface.globalDBSUrl

        self.pollCounter = 0
        self.subFrequency = None
        if getattr(config.PhEDExInjector, "subscribeDatasets", False):
            pollInterval = config.PhEDExInjector.pollInterval
            subInterval = config.PhEDExInjector.subscribeInterval
            self.subFrequency = max(1, int(round(subInterval / pollInterval)))
            logging.info(
                "SubscribeDataset and deleteBlocks will run every %d polling cycles",
                self.subFrequency)
            # subscribe on first cycle
            self.pollCounter = self.subFrequency - 1

        # retrieving the node mappings is fickle and can fail quite often
        self.phedex = PhEDEx({"endpoint": config.PhEDExInjector.phedexurl},
                             "json")
        try:
            nodeMappings = self.phedex.getNodeMap()
        except:
            time.sleep(2)
            try:
                nodeMappings = self.phedex.getNodeMap()
            except:
                time.sleep(4)
                nodeMappings = self.phedex.getNodeMap()

        # This will be used to map SE names which are stored in the DBSBuffer to
        # PhEDEx node names.  The first key will be the "kind" which consists
        # of one of the following: MSS, Disk, Buffer.  The next key will be the
        # SE name.
        self.seMap = {}
        self.nodeNames = []
        for node in nodeMappings["phedex"]["node"]:
            if node["kind"] not in self.seMap:
                self.seMap[node["kind"]] = {}
            logging.info("Adding mapping %s -> %s", node["se"], node["name"])
            self.seMap[node["kind"]][node["se"]] = node["name"]
            self.nodeNames.append(node["name"])

        self.phedexNodes = {'MSS': [], 'Disk': []}
        for node in nodeMappings["phedex"]["node"]:
            if node["kind"] in ["MSS", "Disk"]:
                self.phedexNodes[node["kind"]].append(node["name"])

        # initialize the alert framework (if available - config.Alert present)
        #    self.sendAlert will be then be available
        self.initAlerts(compName="PhEDExInjector")

        self.blocksToRecover = []

        return

    def setup(self, parameters):
        """
        _setup_

        Create DAO Factory and setup some DAO.
        """
        myThread = threading.currentThread()
        daofactory = DAOFactory(package="WMComponent.PhEDExInjector.Database",
                                logger=self.logger,
                                dbinterface=myThread.dbi)

        self.getUninjected = daofactory(classname="GetUninjectedFiles")
        self.getMigrated = daofactory(classname="GetMigratedBlocks")

        self.findDeletableBlocks = daofactory(classname="GetDeletableBlocks")
        self.markBlocksDeleted = daofactory(classname="MarkBlocksDeleted")
        self.getUnsubscribed = daofactory(classname="GetUnsubscribedDatasets")
        self.markSubscribed = daofactory(classname="MarkDatasetSubscribed")

        daofactory = DAOFactory(package="WMComponent.DBS3Buffer",
                                logger=self.logger,
                                dbinterface=myThread.dbi)
        self.setStatus = daofactory(classname="DBSBufferFiles.SetPhEDExStatus")
        self.setBlockClosed = daofactory(classname="SetBlockClosed")

        return

    def algorithm(self, parameters):
        """
        _algorithm_

        Poll the database for uninjected files and attempt to inject them into
        PhEDEx.
        """
        logging.info("Running PhEDEx injector poller algorithm...")

        self.pollCounter += 1

        if self.blocksToRecover:
            logging.info("""PhEDExInjector Recovery:
                            previous injection call failed,
                            check if files were injected to PhEDEx anyway""")
            self.recoverInjectedFiles()

        self.injectFiles()
        self.closeBlocks()

        if self.pollCounter == self.subFrequency:
            self.pollCounter = 0
            self.deleteBlocks()
            self.subscribeDatasets()

        return

    def createInjectionSpec(self, injectionData):
        """
        _createInjectionSpec_

        Transform the data structure returned from the database into an XML
        string for the PhEDEx Data Service.  The injectionData parameter must be
        a dictionary keyed by dataset path.  Each dataset path will map to a
        list of blocks, each block being a dict.  The block dicts will have
        three keys: name, is-open and files.  The files key will be a list of
        dicts, each of which have the following keys: lfn, size and checksum.
        The following is an example object:

        {"dataset1":
          {"block1": {"is-open": "y", "files":
            [{"lfn": "lfn1", "size": 10, "checksum": {"cksum": "1234"}},
             {"lfn": "lfn2", "size": 20, "checksum": {"cksum": "4321"}}]}}}
        """
        injectionSpec = XMLDrop.XMLInjectionSpec(self.dbsUrl)

        for datasetPath in injectionData:
            datasetSpec = injectionSpec.getDataset(datasetPath)

            for fileBlockName, fileBlock in injectionData[
                    datasetPath].iteritems():
                blockSpec = datasetSpec.getFileblock(fileBlockName,
                                                     fileBlock["is-open"])

                for f in fileBlock["files"]:
                    blockSpec.addFile(f["lfn"], f["checksum"], f["size"])

        return injectionSpec.save()

    def createRecoveryFileFormat(self, unInjectedData):
        """
        _createRecoveryFileFormat_

        Transform the data structure returned from database in to the dict format
        for the PhEDEx Data Service.  The injectionData parameter must be
        a dictionary keyed by dataset path.  
        
        unInjectedData format
        {"dataset1":
          {"block1": {"is-open": "y", "files":
            [{"lfn": "lfn1", "size": 10, "checksum": {"cksum": "1234"}},
             {"lfn": "lfn2", "size": 20, "checksum": {"cksum": "4321"}}]}}}
        
        returns
        [{"block1": set(["lfn1", "lfn2"])}, {"block2": set(["lfn3", "lfn4"])]
        """
        blocks = []
        for datasetPath in unInjectedData:

            for blockName, fileBlock in unInjectedData[datasetPath].items():

                newBlock = {blockName: set()}

                for fileDict in fileBlock["files"]:
                    newBlock[blockName].add(fileDict["lfn"])

                blocks.append(newBlock)

        return blocks

    def injectFiles(self):
        """
        _injectFiles_

        Inject any uninjected files in PhEDEx.
        """
        logging.info("Starting injectFiles method")

        uninjectedFiles = self.getUninjected.execute()

        for siteName in uninjectedFiles.keys():
            # SE names can be stored in DBSBuffer as that is what is returned in
            # the framework job report.  We'll try to map the SE name to a
            # PhEDEx node name here.
            location = None

            if siteName in self.nodeNames:
                location = siteName
            else:
                if "Buffer" in self.seMap and siteName in self.seMap["Buffer"]:
                    location = self.seMap["Buffer"][siteName]
                elif "MSS" in self.seMap and siteName in self.seMap["MSS"]:
                    location = self.seMap["MSS"][siteName]
                elif "Disk" in self.seMap and siteName in self.seMap["Disk"]:
                    location = self.seMap["Disk"][siteName]

            if location == None:
                msg = "Could not map SE %s to PhEDEx node." % siteName
                logging.error(msg)
                self.sendAlert(7, msg=msg)
                continue

            maxDataset = 20
            maxBlocks = 50
            maxFiles = 5000
            numberDatasets = 0
            numberBlocks = 0
            numberFiles = 0
            injectData = {}
            lfnList = []
            for dataset in uninjectedFiles[siteName]:

                numberDatasets += 1
                injectData[dataset] = uninjectedFiles[siteName][dataset]

                for block in injectData[dataset]:
                    numberBlocks += 1
                    numberFiles += len(injectData[dataset][block]['files'])
                    for fileInfo in injectData[dataset][block]['files']:
                        lfnList.append(fileInfo['lfn'])

                if numberDatasets >= maxDataset or numberBlocks >= maxBlocks or numberFiles >= maxFiles:

                    self.injectFilesPhEDExCall(location, injectData, lfnList)
                    numberDatasets = 0
                    numberBlocks = 0
                    numberFiles = 0
                    injectData = {}
                    lfnList = []

            if injectData:
                self.injectFilesPhEDExCall(location, injectData, lfnList)

        return

    def injectFilesPhEDExCall(self, location, injectData, lfnList):
        """
        _injectFilesPhEDExCall_

        actual PhEDEx call for file injection
        """
        xmlData = self.createInjectionSpec(injectData)
        logging.debug("injectFiles XMLData: %s", xmlData)

        try:
            injectRes = self.phedex.injectBlocks(location, xmlData)
        except HTTPException as ex:
            # HTTPException with status 400 assumed to be duplicate injection
            # trigger later block recovery (investgation needed if not the case)
            if ex.status == 400:
                self.blocksToRecover.extend(
                    self.createRecoveryFileFormat(injectData))
            logging.error(
                "PhEDEx file injection failed with HTTPException: %s %s",
                ex.status, ex.result)
        except Exception as ex:
            logging.error("PhEDEx file injection failed with Exception: %s",
                          str(ex))
            logging.debug("Traceback: %s", str(traceback.format_exc()))
        else:
            logging.info("Injection result: %s", injectRes)

            if "error" in injectRes:
                msg = "Error injecting data %s: %s" % (injectData,
                                                       injectRes["error"])
                logging.error(msg)
                self.sendAlert(6, msg=msg)
            else:
                try:
                    self.setStatus.execute(lfnList, 1)
                except:
                    # possible deadlock with DBS3Upload, retry once after 5s
                    logging.warning(
                        "Oracle exception during file status update, possible deadlock due to race condition, retry after 5s sleep"
                    )
                    time.sleep(5)
                    self.setStatus.execute(lfnList, 1)

        return

    def closeBlocks(self):
        """
        _closeBlocks_

        Close any blocks that have been migrated to global DBS
        """
        logging.info("Starting closeBlocks method")

        migratedBlocks = self.getMigrated.execute()

        for siteName in migratedBlocks.keys():
            # SE names can be stored in DBSBuffer as that is what is returned in
            # the framework job report.  We'll try to map the SE name to a
            # PhEDEx node name here.
            location = None

            if siteName in self.nodeNames:
                location = siteName
            else:
                if "Buffer" in self.seMap and siteName in self.seMap["Buffer"]:
                    location = self.seMap["Buffer"][siteName]
                elif "MSS" in self.seMap and siteName in self.seMap["MSS"]:
                    location = self.seMap["MSS"][siteName]
                elif "Disk" in self.seMap and siteName in self.seMap["Disk"]:
                    location = self.seMap["Disk"][siteName]

            if location == None:
                msg = "Could not map SE %s to PhEDEx node." % siteName
                logging.error(msg)
                self.sendAlert(6, msg=msg)
                continue

            xmlData = self.createInjectionSpec(migratedBlocks[siteName])
            logging.debug("closeBlocks XMLData: %s", xmlData)

            try:
                injectRes = self.phedex.injectBlocks(location, xmlData)
            except HTTPException as ex:
                logging.error(
                    "PhEDEx block close failed with HTTPException: %s %s",
                    ex.status, ex.result)
            except Exception as ex:
                logging.error("PhEDEx block close failed with Exception: %s",
                              str(ex))
                logging.debug("Traceback: %s", str(traceback.format_exc()))
            else:
                logging.info("Block closing result: %s", injectRes)

                if "error" not in injectRes:
                    for datasetName in migratedBlocks[siteName]:
                        for blockName in migratedBlocks[siteName][datasetName]:
                            logging.debug("Closing block %s", blockName)
                            self.setBlockClosed.execute(blockName)
                else:
                    msg = "Error injecting data %s: %s" % (
                        migratedBlocks[siteName], injectRes["error"])
                    logging.error(msg)
                    self.sendAlert(6, msg=msg)

        return

    def recoverInjectedFiles(self):
        """
        When PhEDEx inject call timed out, run this function.
        Since there are 3 min reponse time out in cmsweb, some times 
        PhEDEx injection call times out even though the call succeeded
        In that case run the recovery mode
        1. first check whether files which injection status = 0 are in the PhEDEx.
        2. if those file exist set the in_phedex status to 1
        3. set self.blocksToRecover = []

        Run this recovery one block at a time, with too many blocks
        the call to the PhEDEx data service on cmsweb can time out
        """
        # recover one block at a time
        for block in self.blocksToRecover:

            injectedFiles = self.phedex.getInjectedFiles(block)

            if injectedFiles:
                self.setStatus.execute(injectedFiles, 1)

        self.blocksToRecover = []
        return

    def deleteBlocks(self):
        """
        _deleteBlocks_
        Find deletable blocks, then decide if to delete based on:
        Is there an active subscription for dataset or block ?
          If yes => set deleted=2
          If no => next check
        Has transfer to all destinations finished ?
          If yes => request block deletion, approve request, set deleted=1
          If no => do nothing (check again next cycle)
        """
        logging.info("Starting deleteBlocks method")

        blockDict = self.findDeletableBlocks.execute(transaction=False)

        if not blockDict:
            return

        try:
            subscriptions = self.phedex.getSubscriptionMapping(
                *blockDict.keys())
        except:
            logging.error(
                "Couldn't get subscription info from PhEDEx, retry next cycle")
            return

        skippableBlocks = []
        deletableEntries = {}
        for blockName in blockDict:

            location = blockDict[blockName]['location']

            # should never be triggered, better safe than sorry
            if location.endswith('_MSS'):
                logging.debug("Location %s for block %s is MSS, skip deletion",
                              location, blockName)
                skippableBlocks.append(blockName)
                continue

            dataset = blockDict[blockName]['dataset']
            sites = blockDict[blockName]['sites']

            if blockName in subscriptions and location in subscriptions[
                    blockName]:
                logging.debug("Block %s subscribed to %s, skip deletion",
                              blockName, location)
                binds = {'DELETED': 2, 'BLOCKNAME': blockName}
                self.markBlocksDeleted.execute(binds)
            else:
                blockInfo = []
                try:
                    blockInfo = self.phedex.getReplicaInfoForBlocks(
                        block=blockName, complete='y')['phedex']['block']
                except:
                    logging.error(
                        "Couldn't get block info from PhEDEx, retry next cycle"
                    )
                else:
                    for entry in blockInfo:
                        if entry['name'] == blockName:
                            nodes = set([x['node'] for x in entry['replica']])
                            if location not in nodes:
                                logging.debug(
                                    "Block %s not present on %s, mark as deleted",
                                    blockName, location)
                                binds = {'DELETED': 1, 'BLOCKNAME': blockName}
                                self.markBlocksDeleted.execute(binds)
                            elif sites.issubset(nodes):
                                logging.debug(
                                    "Deleting block %s from %s since it is fully transfered",
                                    blockName, location)
                                if location not in deletableEntries:
                                    deletableEntries[location] = {}
                                if dataset not in deletableEntries[location]:
                                    deletableEntries[location][dataset] = set()
                                    deletableEntries[location][dataset].add(
                                        blockName)

        binds = []
        for blockName in skippableBlocks:
            binds.append({'DELETED': 2, 'BLOCKNAME': blockName})
        if binds:
            self.markBlocksDeleted.execute(binds)

        for location in deletableEntries:

            chunkSize = 100
            numberOfBlocks = 0
            blocksToDelete = {}
            for dataset in deletableEntries[location]:

                blocksToDelete[dataset] = deletableEntries[location][dataset]
                numberOfBlocks += len(blocksToDelete[dataset])

                if numberOfBlocks > chunkSize:

                    self.deleteBlocksPhEDExCalls(location, blocksToDelete)
                    numberOfBlocks = 0
                    blocksToDelete = {}

            self.deleteBlocksPhEDExCalls(location, blocksToDelete)

        return

    def deleteBlocksPhEDExCalls(self, location, blocksToDelete):
        """
        _deleteBlocksPhEDExCalls_
        actual PhEDEx calls for block deletion
        """
        deletion = PhEDExDeletion(
            blocksToDelete.keys(),
            location,
            level='block',
            comments="WMAgent blocks auto-delete from %s" % location,
            blocks=blocksToDelete)

        xmlData = XMLDrop.makePhEDExXMLForBlocks(
            self.dbsUrl, deletion.getDatasetsAndBlocks())
        logging.debug("deleteBlocks XMLData: %s", xmlData)

        try:
            response = self.phedex.delete(deletion, xmlData)
            requestId = response['phedex']['request_created'][0]['id']
            # auto-approve deletion request
            self.phedex.updateRequest(requestId, 'approve', location)
        except HTTPException as ex:
            logging.error(
                "PhEDEx block delete/approval failed with HTTPException: %s %s",
                ex.status, ex.result)
        except Exception as ex:
            logging.error(
                "PhEDEx block delete/approval failed with Exception: %s",
                str(ex))
            logging.debug("Traceback: %s", str(traceback.format_exc()))
        else:
            binds = []
            for dataset in blocksToDelete:
                for blockName in blocksToDelete[dataset]:
                    binds.append({'DELETED': 1, 'BLOCKNAME': blockName})
            self.markBlocksDeleted.execute(binds)

        return

    def subscribeDatasets(self):
        """
        _subscribeDatasets_
        Poll the database for datasets and subscribe them.
        """
        logging.info("Starting subscribeDatasets method")

        # Check for completely unsubscribed datasets
        unsubscribedDatasets = self.getUnsubscribed.execute()

        # Keep a list of subscriptions to tick as subscribed in the database
        subscriptionsMade = []

        # Create a list of subscriptions as defined by the PhEDEx data structures
        subs = SubscriptionList()

        # Create the subscription objects and add them to the list
        # The list takes care of the sorting internally
        for subInfo in unsubscribedDatasets:
            site = subInfo['site']

            if site not in self.phedexNodes[
                    'MSS'] and site not in self.phedexNodes['Disk']:
                msg = "Site %s doesn't appear to be valid to PhEDEx, " % site
                msg += "skipping subscription: %s" % subInfo['id']
                logging.error(msg)
                self.sendAlert(7, msg=msg)
                continue

            # Avoid custodial subscriptions to disk nodes
            if site not in self.phedexNodes['MSS']:
                subInfo['custodial'] = 'n'
            # Avoid auto approval in T1 sites
            elif site.startswith("T1"):
                subInfo['request_only'] = 'y'

            phedexSub = PhEDExSubscription(
                subInfo['path'],
                site,
                subInfo['phedex_group'],
                priority=subInfo['priority'],
                move=subInfo['move'],
                custodial=subInfo['custodial'],
                request_only=subInfo['request_only'],
                subscriptionId=subInfo['id'])

            # Check if the subscription is a duplicate
            if phedexSub.matchesExistingSubscription(self.phedex) or \
                phedexSub.matchesExistingTransferRequest(self.phedex):
                subscriptionsMade.append(subInfo['id'])
                continue

            # Add it to the list
            subs.addSubscription(phedexSub)

        # Compact the subscriptions
        subs.compact()

        for subscription in subs.getSubscriptionList():

            xmlData = XMLDrop.makePhEDExXMLForDatasets(
                self.dbsUrl, subscription.getDatasetPaths())
            logging.debug("subscribeDatasets XMLData: %s", xmlData)

            logging.info(
                "Subscribing: %s to %s, with options: Move: %s, Custodial: %s, Request Only: %s",
                subscription.getDatasetPaths(), subscription.getNodes(),
                subscription.move, subscription.custodial,
                subscription.request_only)

            try:
                self.phedex.subscribe(subscription, xmlData)
            except HTTPException as ex:
                logging.error(
                    "PhEDEx dataset subscribe failed with HTTPException: %s %s",
                    ex.status, ex.result)
            except Exception as ex:
                logging.error(
                    "PhEDEx dataset subscribe failed with Exception: %s",
                    str(ex))
                logging.debug("Traceback: %s", str(traceback.format_exc()))
            else:
                subscriptionsMade.extend(subscription.getSubscriptionIds())

        # Register the result in DBSBuffer
        if subscriptionsMade:
            self.markSubscribed.execute(subscriptionsMade)

        return
Esempio n. 3
0
class PhEDExInjectorPoller(BaseWorkerThread):
    """
    _PhEDExInjectorPoller_

    Poll the DBSBuffer database and inject files as they are created.
    """
    def __init__(self, config):
        """
        ___init___

        Initialise class members
        """
        BaseWorkerThread.__init__(self)
        self.config = config
        self.phedex = PhEDEx({"endpoint": config.PhEDExInjector.phedexurl}, "json")
        self.dbsUrl = config.DBSInterface.globalDBSUrl
        self.group = getattr(config.PhEDExInjector, "group", "DataOps")

        # This will be used to map SE names which are stored in the DBSBuffer to
        # PhEDEx node names.  The first key will be the "kind" which consists
        # of one of the following: MSS, Disk, Buffer.  The next key will be the
        # SE name.
        self.seMap = {}
        self.nodeNames = []

        self.diskSites = getattr(config.PhEDExInjector, "diskSites", ["storm-fe-cms.cr.cnaf.infn.it",
                                                                      "srm-cms-disk.gridpp.rl.ac.uk"])

        # initialize the alert framework (if available - config.Alert present)
        #    self.sendAlert will be then be available
        self.initAlerts(compName = "PhEDExInjector")

        self.blocksToRecover = None

    def setup(self, parameters):
        """
        _setup_

        Create a DAO Factory for the PhEDExInjector.  Also load the SE names to
        PhEDEx node name mappings from the data service.
        """
        myThread = threading.currentThread()
        daofactory = DAOFactory(package = "WMComponent.PhEDExInjector.Database",
                                logger = self.logger,
                                dbinterface = myThread.dbi)

        self.getUninjected = daofactory(classname = "GetUninjectedFiles")
        self.getMigrated = daofactory(classname = "GetMigratedBlocks")

        daofactory = DAOFactory(package = "WMComponent.DBS3Buffer",
                                logger = self.logger,
                                dbinterface = myThread.dbi)
        self.setStatus = daofactory(classname = "DBSBufferFiles.SetPhEDExStatus")
        self.setBlockClosed = daofactory(classname = "SetBlockClosed")

        nodeMappings = self.phedex.getNodeMap()
        for node in nodeMappings["phedex"]["node"]:
            if node["kind"] not in self.seMap:
                self.seMap[node["kind"]] = {}

            logging.info("Adding mapping %s -> %s" % (node["se"], node["name"]))
            self.seMap[node["kind"]][node["se"]] = node["name"]
            self.nodeNames.append(node["name"])

        return

    def createInjectionSpec(self, injectionData):
        """
        _createInjectionSpec_

        Transform the data structure returned from the database into an XML
        string for the PhEDEx Data Service.  The injectionData parameter must be
        a dictionary keyed by dataset path.  Each dataset path will map to a
        list of blocks, each block being a dict.  The block dicts will have
        three keys: name, is-open and files.  The files key will be a list of
        dicts, each of which have the following keys: lfn, size and checksum.
        The following is an example object:

        {"dataset1":
          {"block1": {"is-open": "y", "files":
            [{"lfn": "lfn1", "size": 10, "checksum": {"cksum": "1234"}},
             {"lfn": "lfn2", "size": 20, "checksum": {"cksum": "4321"}}]}}}
        """
        injectionSpec = XMLDrop.XMLInjectionSpec(self.dbsUrl)

        for datasetPath in injectionData:
            datasetSpec = injectionSpec.getDataset(datasetPath)

            for fileBlockName, fileBlock in injectionData[datasetPath].iteritems():
                blockSpec = datasetSpec.getFileblock(fileBlockName,
                                                     fileBlock["is-open"])

                for file in fileBlock["files"]:
                    blockSpec.addFile(file["lfn"], file["checksum"],
                                      file["size"])

        return injectionSpec.save()

    def createRecoveryFileFormat(self, unInjectedData):
        """
        _createRecoveryFileFormat_

        Transform the data structure returned from database in to the dict format
        for the PhEDEx Data Service.  The injectionData parameter must be
        a dictionary keyed by dataset path.  
        
        unInjectedData format
        {"dataset1":
          {"block1": {"is-open": "y", "files":
            [{"lfn": "lfn1", "size": 10, "checksum": {"cksum": "1234"}},
             {"lfn": "lfn2", "size": 20, "checksum": {"cksum": "4321"}}]}}}
        
        returns
        [{"block1": set(["lfn1", "lfn2"])}, {"block2": set(["lfn3", "lfn4"])]
        """
        blocks = []
        for datasetPath in unInjectedData:

            for blockName, fileBlock in unInjectedData[datasetPath].items():

                newBlock = { blockName : set() }

                for fileDict in fileBlock["files"]:
                    newBlock[blockName].add(fileDict["lfn"])

                blocks.append(newBlock)

        return blocks
    
    def injectFiles(self):
        """
        _injectFiles_

        Inject any uninjected files in PhEDEx.
        """
        myThread = threading.currentThread()
        uninjectedFiles = self.getUninjected.execute()

        injectedFiles = []
        for siteName in uninjectedFiles.keys():
            # SE names can be stored in DBSBuffer as that is what is returned in
            # the framework job report.  We'll try to map the SE name to a
            # PhEDEx node name here.
            location = None

            if siteName in self.nodeNames:
                location = siteName
            else:
                if siteName in self.diskSites:
                    if "Disk" in self.seMap and \
                           siteName in self.seMap["Disk"]:
                        location = self.seMap["Disk"][siteName]
                    elif "Buffer" in self.seMap and \
                             siteName in self.seMap["Buffer"]:
                        location = self.seMap["Buffer"][siteName]
                    elif "MSS" in self.seMap and \
                             siteName in self.seMap["MSS"]:
                        location = self.seMap["MSS"][siteName]
                else:
                    if "Buffer" in self.seMap and \
                           siteName in self.seMap["Buffer"]:
                        location = self.seMap["Buffer"][siteName]
                    elif "MSS" in self.seMap and \
                             siteName in self.seMap["MSS"]:
                        location = self.seMap["MSS"][siteName]
                    elif "Disk" in self.seMap and \
                             siteName in self.seMap["Disk"]:
                        location = self.seMap["Disk"][siteName]

            if location == None:
                msg = "Could not map SE %s to PhEDEx node." % siteName
                logging.error(msg)
                self.sendAlert(7, msg = msg)
                continue

            myThread.transaction.begin()
            xmlData = self.createInjectionSpec(uninjectedFiles[siteName])
            try:
                injectRes = self.phedex.injectBlocks(location, xmlData)
            except HTTPException as ex:
                # If we get an HTTPException of certain types, raise it as an error
                if ex.status == 400:
                    # assume it is duplicate injection error. but if that is not the case
                    # needs to be investigated
                    self.blocksToRecover = self.createRecoveryFileFormat(uninjectedFiles[siteName])
                
                msg = "PhEDEx injection failed with %s error: %s" % (ex.status, ex.result)
                raise PhEDExInjectorPassableError(msg)
            except Exception as ex:
                # If we get an error here, assume that it's temporary (it usually is)
                # log it, and ignore it in the algorithm() loop
                msg =  "Encountered error while attempting to inject blocks to PhEDEx.\n"
                msg += str(ex)
                logging.error(msg)
                logging.debug("Traceback: %s" % str(traceback.format_exc()))
                
                raise PhEDExInjectorPassableError(msg)
            logging.info("Injection result: %s" % injectRes)

            if "error" not in injectRes:
                for datasetName in uninjectedFiles[siteName]:
                    for blockName in uninjectedFiles[siteName][datasetName]:
                        for file in uninjectedFiles[siteName][datasetName][blockName]["files"]:
                            injectedFiles.append(file["lfn"])
            else:
                msg = ("Error injecting data %s: %s" %
                       (uninjectedFiles[siteName], injectRes["error"]))
                logging.error(msg)
                self.sendAlert(6, msg = msg)

            self.setStatus.execute(injectedFiles, 1,
                                   conn = myThread.transaction.conn,
                                   transaction = myThread.transaction)
            injectedFiles = []
            myThread.transaction.commit()

        return

    def closeBlocks(self):
        """
        _closeBlocks_

        Close any blocks that have been migrated to global DBS.
        """
        myThread = threading.currentThread()
        migratedBlocks = self.getMigrated.execute()

        for siteName in migratedBlocks.keys():
            # SE names can be stored in DBSBuffer as that is what is returned in
            # the framework job report.  We'll try to map the SE name to a
            # PhEDEx node name here.
            location = None

            if siteName in self.nodeNames:
                location = siteName
            else:
                if "Buffer" in self.seMap and \
                       siteName in self.seMap["Buffer"]:
                    location = self.seMap["Buffer"][siteName]
                elif "MSS" in self.seMap and \
                         siteName in self.seMap["MSS"]:
                    location = self.seMap["MSS"][siteName]
                elif "Disk" in self.seMap and \
                         siteName in self.seMap["Disk"]:
                    location = self.seMap["Disk"][siteName]

            if location == None:
                msg = "Could not map SE %s to PhEDEx node." % siteName
                logging.error(msg)
                self.sendAlert(6, msg = msg)
                continue

            myThread.transaction.begin()
            try:
                xmlData = self.createInjectionSpec(migratedBlocks[siteName])
                injectRes = self.phedex.injectBlocks(location, xmlData)
                logging.info("Block closing result: %s" % injectRes)
            except HTTPException as ex:
                # If we get an HTTPException of certain types, raise it as an error
                if ex.status == 400:
                    msg =  "Received 400 HTTP Error From PhEDEx: %s" % str(ex.result)
                    logging.error(msg)
                    self.sendAlert(6, msg = msg)
                    logging.debug("Blocks: %s" % migratedBlocks[siteName])
                    logging.debug("XMLData: %s" % xmlData)
                    raise
                else:
                    msg =  "Encountered error while attempting to close blocks in PhEDEx.\n"
                    msg += str(ex)
                    logging.error(msg)
                    logging.debug("Traceback: %s" % str(traceback.format_exc()))
                    raise PhEDExInjectorPassableError(msg)
            except Exception as ex:
                # If we get an error here, assume that it's temporary (it usually is)
                # log it, and ignore it in the algorithm() loop
                msg =  "Encountered error while attempting to close blocks in PhEDEx.\n"
                msg += str(ex)
                logging.error(msg)
                logging.debug("Traceback: %s" % str(traceback.format_exc()))
                raise PhEDExInjectorPassableError(msg)

            if "error" not in injectRes:
                for datasetName in migratedBlocks[siteName]:
                    for blockName in migratedBlocks[siteName][datasetName]:
                        logging.debug("Closing block %s" % blockName)
                        self.setBlockClosed.execute(blockName,
                                                    conn = myThread.transaction.conn,
                                                    transaction = myThread.transaction)
            else:
                msg = ("Error injecting data %s: %s" %
                       (migratedBlocks[siteName], injectRes["error"]))
                logging.error(msg)
                self.sendAlert(6, msg = msg)
            myThread.transaction.commit()
        return

    def recoverInjectedFiles(self):
        """
        When PhEDEx inject call timed out, run this function.
        Since there are 3 min reponse time out in cmsweb, some times 
        PhEDEx injection call times out even though the call succeeded
        In that case run the recovery mode
        1. first check whether files which injection status = 0 are in the PhEDEx.
        2. if those file exist set the in_phedex status to 1
        3. set self.blocksToRecover = None

        Run this recovery one block at a time, with too many blocks
        the call to the PhEDEx data service on cmsweb can time out
        """
        myThread = threading.currentThread()

        # recover one block at a time
        for block in self.blocksToRecover:

            injectedFiles = self.phedex.getInjectedFiles(block)

            if len(injectedFiles) > 0:

                myThread.transaction.begin()
                self.setStatus.execute(injectedFiles, 1)
                myThread.transaction.commit()
                logging.info("%s files already injected: changed status in dbsbuffer db" % len(injectedFiles))

        self.blocksToRecover = None
        return
        
    def algorithm(self, parameters):
        """
        _algorithm_

        Poll the database for uninjected files and attempt to inject them into
        PhEDEx.
        """
        myThread = threading.currentThread()
        try:
            if self.blocksToRecover != None:
                logging.info(""" Running PhEDExInjector Recovery: 
                                 previous injection call failed, 
                                 check if files were injected to PhEDEx anyway""")
                self.recoverInjectedFiles()
                        
            self.injectFiles()
            self.closeBlocks()
        except PhEDExInjectorPassableError as ex:
            logging.error("Encountered PassableError in PhEDExInjector")
            logging.error("Rolling back current transaction and terminating current loop, but not killing component.")
            if getattr(myThread, 'transaction', None):
                myThread.transaction.rollbackForError()
            pass
        except Exception:
            # Guess we should roll back if we actually have an exception
            if getattr(myThread, 'transaction', None):
                myThread.transaction.rollbackForError()
            raise

        return
Esempio n. 4
0
class PhEDExInjectorPoller(BaseWorkerThread):
    """
    _PhEDExInjectorPoller_

    Poll the DBSBuffer database and inject files as they are created.
    """
    def __init__(self, config):
        """
        ___init___

        Initialise class members
        """
        BaseWorkerThread.__init__(self)
        self.config = config
        self.phedex = PhEDEx({"endpoint": config.PhEDExInjector.phedexurl}, "json")
        self.dbsUrl = config.DBSInterface.globalDBSUrl
        self.group = getattr(config.PhEDExInjector, "group", "DataOps")

        # This will be used to map SE names which are stored in the DBSBuffer to
        # PhEDEx node names.  The first key will be the "kind" which consists
        # of one of the following: MSS, Disk, Buffer.  The next key will be the
        # SE name.
        self.seMap = {}
        self.nodeNames = []

        self.diskSites = getattr(config.PhEDExInjector, "diskSites", ["storm-fe-cms.cr.cnaf.infn.it",
                                                                      "srm-cms-disk.gridpp.rl.ac.uk"])

        # initialize the alert framework (if available - config.Alert present)
        #    self.sendAlert will be then be available
        self.initAlerts(compName = "PhEDExInjector")

        self.filesToRecover = None

    def setup(self, parameters):
        """
        _setup_

        Create a DAO Factory for the PhEDExInjector.  Also load the SE names to
        PhEDEx node name mappings from the data service.
        """
        myThread = threading.currentThread()
        daofactory = DAOFactory(package = "WMComponent.PhEDExInjector.Database",
                                logger = self.logger,
                                dbinterface = myThread.dbi)

        self.getUninjected = daofactory(classname = "GetUninjectedFiles")
        self.getMigrated = daofactory(classname = "GetMigratedBlocks")

        daofactory = DAOFactory(package = "WMComponent.DBS3Buffer",
                                logger = self.logger,
                                dbinterface = myThread.dbi)
        self.setStatus = daofactory(classname = "DBSBufferFiles.SetPhEDExStatus")
        self.setBlockClosed = daofactory(classname = "SetBlockClosed")

        nodeMappings = self.phedex.getNodeMap()
        for node in nodeMappings["phedex"]["node"]:
            if node["kind"] not in self.seMap:
                self.seMap[node["kind"]] = {}

            logging.info("Adding mapping %s -> %s" % (node["se"], node["name"]))
            self.seMap[node["kind"]][node["se"]] = node["name"]
            self.nodeNames.append(node["name"])

        return

    def createInjectionSpec(self, injectionData):
        """
        _createInjectionSpec_

        Transform the data structure returned from the database into an XML
        string for the PhEDEx Data Service.  The injectionData parameter must be
        a dictionary keyed by dataset path.  Each dataset path will map to a
        list of blocks, each block being a dict.  The block dicts will have
        three keys: name, is-open and files.  The files key will be a list of
        dicts, each of which have the following keys: lfn, size and checksum.
        The following is an example object:

        {"dataset1":
          {"block1": {"is-open": "y", "files":
            [{"lfn": "lfn1", "size": 10, "checksum": {"cksum": "1234"}},
             {"lfn": "lfn2", "size": 20, "checksum": {"cksum": "4321"}}]}}}
        """
        injectionSpec = XMLDrop.XMLInjectionSpec(self.dbsUrl)

        for datasetPath in injectionData:
            datasetSpec = injectionSpec.getDataset(datasetPath)

            for fileBlockName, fileBlock in injectionData[datasetPath].iteritems():
                blockSpec = datasetSpec.getFileblock(fileBlockName,
                                                     fileBlock["is-open"])

                for file in fileBlock["files"]:
                    blockSpec.addFile(file["lfn"], file["checksum"],
                                      file["size"])

        return injectionSpec.save()

    def createRecoveryFileFormat(self, unInjectedData):
        """
        _createRecoveryFileFormat_

        Transform the data structure returned from database in to the dict format
        for the PhEDEx Data Service.  The injectionData parameter must be
        a dictionary keyed by dataset path.  
        
        unInjectedData format
        {"dataset1":
          {"block1": {"is-open": "y", "files":
            [{"lfn": "lfn1", "size": 10, "checksum": {"cksum": "1234"}},
             {"lfn": "lfn2", "size": 20, "checksum": {"cksum": "4321"}}]}}}
        
        returns
        {"block1": set(["lfn1", "lfn2"])}
        """
        sortedBlocks = defaultdict(set)
        for datasetPath in unInjectedData:
            
            for fileBlockName, fileBlock in unInjectedData[datasetPath].iteritems():
                for fileDict in fileBlock["files"]:
                    sortedBlocks[fileBlockName].add(fileDict["lfn"])
                    
        return sortedBlocks
    
    def injectFiles(self):
        """
        _injectFiles_

        Inject any uninjected files in PhEDEx.
        """
        myThread = threading.currentThread()
        uninjectedFiles = self.getUninjected.execute()

        injectedFiles = []
        for siteName in uninjectedFiles.keys():
            # SE names can be stored in DBSBuffer as that is what is returned in
            # the framework job report.  We'll try to map the SE name to a
            # PhEDEx node name here.
            location = None

            if siteName in self.nodeNames:
                location = siteName
            else:
                if siteName in self.diskSites:
                    if "Disk" in self.seMap and \
                           siteName in self.seMap["Disk"]:
                        location = self.seMap["Disk"][siteName]
                    elif "Buffer" in self.seMap and \
                             siteName in self.seMap["Buffer"]:
                        location = self.seMap["Buffer"][siteName]
                    elif "MSS" in self.seMap and \
                             siteName in self.seMap["MSS"]:
                        location = self.seMap["MSS"][siteName]
                else:
                    if "Buffer" in self.seMap and \
                           siteName in self.seMap["Buffer"]:
                        location = self.seMap["Buffer"][siteName]
                    elif "MSS" in self.seMap and \
                             siteName in self.seMap["MSS"]:
                        location = self.seMap["MSS"][siteName]
                    elif "Disk" in self.seMap and \
                             siteName in self.seMap["Disk"]:
                        location = self.seMap["Disk"][siteName]

            if location == None:
                msg = "Could not map SE %s to PhEDEx node." % siteName
                logging.error(msg)
                self.sendAlert(7, msg = msg)
                continue

            myThread.transaction.begin()
            xmlData = self.createInjectionSpec(uninjectedFiles[siteName])
            try:
                injectRes = self.phedex.injectBlocks(location, xmlData)
            except HTTPException as ex:
                # If we get an HTTPException of certain types, raise it as an error
                if ex.status == 400:
                    # assume it is duplicate injection error. but if that is not the case
                    # needs to be investigated
                    self.filesToRecover = self.createRecoveryFileFormat(uninjectedFiles[siteName])
                
                msg = "PhEDEx injection failed with %s error: %s" % (ex.status, ex.result)
                raise PhEDExInjectorPassableError(msg)
            except Exception as ex:
                # If we get an error here, assume that it's temporary (it usually is)
                # log it, and ignore it in the algorithm() loop
                msg =  "Encountered error while attempting to inject blocks to PhEDEx.\n"
                msg += str(ex)
                logging.error(msg)
                logging.debug("Traceback: %s" % str(traceback.format_exc()))
                
                raise PhEDExInjectorPassableError(msg)
            logging.info("Injection result: %s" % injectRes)

            if "error" not in injectRes:
                for datasetName in uninjectedFiles[siteName]:
                    for blockName in uninjectedFiles[siteName][datasetName]:
                        for file in uninjectedFiles[siteName][datasetName][blockName]["files"]:
                            injectedFiles.append(file["lfn"])
            else:
                msg = ("Error injecting data %s: %s" %
                       (uninjectedFiles[siteName], injectRes["error"]))
                logging.error(msg)
                self.sendAlert(6, msg = msg)

            self.setStatus.execute(injectedFiles, 1,
                                   conn = myThread.transaction.conn,
                                   transaction = myThread.transaction)
            injectedFiles = []
            myThread.transaction.commit()

        return

    def closeBlocks(self):
        """
        _closeBlocks_

        Close any blocks that have been migrated to global DBS.
        """
        myThread = threading.currentThread()
        migratedBlocks = self.getMigrated.execute()

        for siteName in migratedBlocks.keys():
            # SE names can be stored in DBSBuffer as that is what is returned in
            # the framework job report.  We'll try to map the SE name to a
            # PhEDEx node name here.
            location = None

            if siteName in self.nodeNames:
                location = siteName
            else:
                if "Buffer" in self.seMap and \
                       siteName in self.seMap["Buffer"]:
                    location = self.seMap["Buffer"][siteName]
                elif "MSS" in self.seMap and \
                         siteName in self.seMap["MSS"]:
                    location = self.seMap["MSS"][siteName]
                elif "Disk" in self.seMap and \
                         siteName in self.seMap["Disk"]:
                    location = self.seMap["Disk"][siteName]

            if location == None:
                msg = "Could not map SE %s to PhEDEx node." % siteName
                logging.error(msg)
                self.sendAlert(6, msg = msg)
                continue

            myThread.transaction.begin()
            try:
                xmlData = self.createInjectionSpec(migratedBlocks[siteName])
                injectRes = self.phedex.injectBlocks(location, xmlData)
                logging.info("Block closing result: %s" % injectRes)
            except HTTPException as ex:
                # If we get an HTTPException of certain types, raise it as an error
                if ex.status == 400:
                    msg =  "Received 400 HTTP Error From PhEDEx: %s" % str(ex.result)
                    logging.error(msg)
                    self.sendAlert(6, msg = msg)
                    logging.debug("Blocks: %s" % migratedBlocks[siteName])
                    logging.debug("XMLData: %s" % xmlData)
                    raise
                else:
                    msg =  "Encountered error while attempting to close blocks in PhEDEx.\n"
                    msg += str(ex)
                    logging.error(msg)
                    logging.debug("Traceback: %s" % str(traceback.format_exc()))
                    raise PhEDExInjectorPassableError(msg)
            except Exception as ex:
                # If we get an error here, assume that it's temporary (it usually is)
                # log it, and ignore it in the algorithm() loop
                msg =  "Encountered error while attempting to close blocks in PhEDEx.\n"
                msg += str(ex)
                logging.error(msg)
                logging.debug("Traceback: %s" % str(traceback.format_exc()))
                raise PhEDExInjectorPassableError(msg)

            if "error" not in injectRes:
                for datasetName in migratedBlocks[siteName]:
                    for blockName in migratedBlocks[siteName][datasetName]:
                        logging.debug("Closing block %s" % blockName)
                        self.setBlockClosed.execute(blockName,
                                                    conn = myThread.transaction.conn,
                                                    transaction = myThread.transaction)
            else:
                msg = ("Error injecting data %s: %s" %
                       (migratedBlocks[siteName], injectRes["error"]))
                logging.error(msg)
                self.sendAlert(6, msg = msg)
            myThread.transaction.commit()
        return

    def recoverInjectedFiles(self):
        """
        When PhEDEx inject call timed out, run this function.
        Since there are 3 min reponse time out in cmsweb, some times 
        PhEDEx injection call times out even though the call succeeded
        In that case run the recovery mode
        1. first check whether files which injection status = 0 are in the PhEDEx.
        2. if those file exist set the in_phedex status to 1
        3. set self.filesToRecover = None
        """
        myThread = threading.currentThread()
        
        injectedFiles = self.phedex.getInjectedFiles(self.filesToRecover)
        
        myThread.transaction.begin()
        self.setStatus.execute(injectedFiles, 1)
        myThread.transaction.commit()
        # when files are recovered set the self.file 
        self.filesToRecover = None
        return injectedFiles
        
    def algorithm(self, parameters):
        """
        _algorithm_

        Poll the database for uninjected files and attempt to inject them into
        PhEDEx.
        """
        myThread = threading.currentThread()
        try:
            if self.filesToRecover != None:
                logging.info(""" Running PhEDExInjector Recovery: 
                                 previous injection call failed, 
                                 check if files were injected to PhEDEx anyway""")
                recoveredFiles = self.recoverInjectedFiles()
                logging.info("%s files already injected: changed status in dbsbuffer db" % len(recoveredFiles))
                        
            self.injectFiles()
            self.closeBlocks()
        except PhEDExInjectorPassableError as ex:
            logging.error("Encountered PassableError in PhEDExInjector")
            logging.error("Rolling back current transaction and terminating current loop, but not killing component.")
            if getattr(myThread, 'transaction', None):
                myThread.transaction.rollbackForError()
            pass
        except Exception:
            # Guess we should roll back if we actually have an exception
            if getattr(myThread, 'transaction', None):
                myThread.transaction.rollbackForError()
            raise

        return
Esempio n. 5
0
class PhEDExTest(unittest.TestCase):

    def setUp(self):
        """
        _setUp_
        
        Initialize the PhEDEx API to point at the test server.
        """
        phedexTestDS = "https://cmsweb.cern.ch/phedex/datasvc/json/test"
        self.dbsTestUrl = "http://vocms09.cern.ch:8880/cms_dbs_int_local_yy_writer/servlet/DBSServlet"
        self.phedexApi = PhEDEx({"endpoint": phedexTestDS,
                                 "method": "POST"})
        return
        
    @attr("integration")
    def testInjection(self):
        """
        _testInjection_

        Verify that we can inject data into PhEDEx.
        """
        xmlData = XMLDrop.makePhEDExDrop(self.dbsTestUrl, makeUUID())
        result = self.phedexApi.injectBlocks("T1_US_FNAL_MSS", xmlData)
        self.assertEqual(result["phedex"]["injected"],
                         {"stats": {"closed_datasets": 0, "closed_blocks": 0,
                                    "new_blocks": 0, "new_datasets": 1,
                                    "new_files": 0}})
        return

    @attr("integration")
    def testSubscription(self):
        """
        _testSubscription_

        Verify that the subscription API works.
        """
        datasetA = "/%s/WMCorePhEDExTest/RAW" % makeUUID()
        datasetB = "/%s/WMCorePhEDExTest/RECO" % makeUUID()
        xmlData = XMLDrop.makePhEDExDrop(self.dbsTestUrl, datasetA)
        self.phedexApi.injectBlocks("T1_US_FNAL_MSS", xmlData)
        xmlData = XMLDrop.makePhEDExDrop(self.dbsTestUrl, datasetB)
        self.phedexApi.injectBlocks("T1_US_FNAL_MSS", xmlData)
        
        testSub = PhEDExSubscription([datasetA, datasetB], "T1_UK_RAL_MSS",
                                      "Saturn")
        xmlData = XMLDrop.makePhEDExXMLForDatasets(self.dbsTestUrl, 
                                                   testSub.getDatasetPaths())
        result = self.phedexApi.subscribe(testSub, xmlData)
        requestIDs = result["phedex"]["request_created"]

        self.assertEqual(len(requestIDs), 1,
                         "Error: Wrong number of request IDs")
        self.assertTrue(requestIDs[0].has_key("id"),
                        "Error: Missing request ID")
        return

    @attr("integration")
    def testNodeMap(self):
        """
        _testNodeMap_

        Verify that the node map can be retrieve from PhEDEx and that the
        getNodeSE() and getNodeNames() methods work correctly.
        """
        self.failUnless(self.phedexApi.getNodeSE("T2_FR_GRIF_LLR") == "polgrid4.in2p3.fr")
        self.failUnless(self.phedexApi.getNodeNames("cmssrm.fnal.gov") == ["T1_US_FNAL_Buffer",
                                                                           "T1_US_FNAL_MSS"])
        return

    @attr('integration')
    def testGetSubscriptionMapping(self):
        """
        _testGetSubscriptionMapping_

        Verify that the subscription mapping API works correctly.
        """
        testDataset = "/%s/WMCorePhEDExTest/RECO" % makeUUID()
        blockA = "%s#%s" % (testDataset, makeUUID())
        blockB = "%s#%s" % (testDataset, makeUUID())

        injectionSpec = XMLDrop.XMLInjectionSpec(self.dbsTestUrl)
        datasetSpec = injectionSpec.getDataset(testDataset)
        datasetSpec.getFileblock(blockA, 'y')
        datasetSpec.getFileblock(blockB, 'y')
        blockSpec = injectionSpec.save()
        self.phedexApi.injectBlocks("T1_US_FNAL_MSS", blockSpec)

        # Create a dataset level subscription to a node
        testDatasetSub = PhEDExSubscription([testDataset], "T1_UK_RAL_MSS",
                                            "Saturn", requestOnly = "n")
        datasetSpec = XMLDrop.makePhEDExXMLForDatasets(self.dbsTestUrl, 
                                                       testDatasetSub.getDatasetPaths())
        self.phedexApi.subscribe(testDatasetSub, datasetSpec)

        # Create a block level subscrtion to a different node
        testBlockSub = PhEDExSubscription([testDataset], "T1_DE_KIT_MSS", "Saturn",
                                          level = "block", requestOnly = "n")
        self.phedexApi.subscribe(testBlockSub, blockSpec)

        subs = self.phedexApi.getSubscriptionMapping(testDataset)
        self.assertEqual(subs[testDataset], set(["T1_UK_RAL_MSS"]),
                         "Error: Dataset subscription is wrong.")

        subs = self.phedexApi.getSubscriptionMapping(blockA)
        self.assertEqual(len(subs[blockA]), 2,
                         "Error: Wrong number of nodes in block subscription.")
        self.assertTrue("T1_UK_RAL_MSS" in subs[blockA],
                        "Error: RAL missing from block sub.")
        self.assertTrue("T1_DE_KIT_MSS" in subs[blockA],
                        "Error: KIT missing from block sub.")
        return

    def testPFNLookup(self):
        """
        _testPFNLookup_

        Verify that the PFN lookup in PhEDEx works correctly.
        """
        call1 = self.phedexApi.getPFN(['T2_UK_SGrid_Bristol'], ['/store/user/metson/file'])

        # Should get one mapping back (one lfn, one node)
        self.assertTrue(len(call1.keys()) == 1)
        call1_key = call1.keys()[0]

        call2 = self.phedexApi.getPFN(['T2_UK_SGrid_Bristol', 'T1_US_FNAL_Buffer'], ['/store/user/metson/file'])
        # Should get back two mappings (two nodes)
        self.assertTrue(call1_key in call2.keys())

        # and one of the mappings should be the same as from the previous call
        self.assertTrue(call1[call1_key] == call2[call1_key])
        return

    @attr('integration')
    def testXMLJSON(self):
        """
        Test XML and JSON in the same scope
        """
        site = 'T1_US_FNAL_Buffer'
        dict = {}
        dict['endpoint'] = "https://cmsweb.cern.ch/phedex/datasvc/json/test"
        phedexJSON = PhEDEx(responseType='json', dict=dict)
        dict['endpoint'] = "https://cmsweb.cern.ch/phedex/datasvc/xml/test"
        phedexXML  = PhEDEx(responseType='xml',  dict=dict)

        phedexXML.getNodeTFC(site)
        tfc_file = phedexXML.cacheFileName('tfc', inputdata={'node' : site})
        tfc_map = {}
        tfc_map[site] = readTFC(tfc_file)
        pfn =    tfc_map[site].matchLFN('srmv2', '/store/user/jblow/dir/test.root')

        self.failUnless(pfn == 'srm://cmssrm.fnal.gov:8443/srm/managerv2?SFN=/11/store/user/jblow/dir/test.root')

        self.failUnless(phedexJSON.getNodeSE('T1_US_FNAL_Buffer') == 'cmssrm.fnal.gov')

    @attr('integration')
    def testAuth(self):
        """
        _testAuth_

        Verify that the auth method works correctly."
        """
        self.assertFalse(self.phedexApi.getAuth("datasvc_whatever"))
        self.assertTrue(self.phedexApi.getAuth("datasvc_subscribe"))
        self.assertTrue(self.phedexApi.getAuth("datasvc_inject"))

        return
Esempio n. 6
0
class PhEDExTest(EmulatedUnitTestCase):
    def setUp(self):
        """
        _setUp_

        Initialize the PhEDEx API to point at the test server.
        """
        self.dbsTestUrl = "https://cmsweb-prod.cern.ch/dbs/prod/global/DBSReader"
        self.phedexApi = PhEDEx()

        return

    @attr("integration")
    def testInjection(self):
        """
        _testInjection_

        Verify that we can inject data into PhEDEx.
        """
        xmlData = XMLDrop.makePhEDExDrop(self.dbsTestUrl, makeUUID())
        result = self.phedexApi.injectBlocks("T1_US_FNAL_MSS", xmlData)
        self.assertEqual(
            result["phedex"]["injected"], {
                "stats": {
                    "closed_datasets": 0,
                    "closed_blocks": 0,
                    "new_blocks": 0,
                    "new_datasets": 1,
                    "new_files": 0
                }
            })
        return

    @attr("integration")
    def testSubscription(self):
        """
        _testSubscription_

        Verify that the subscription API works.
        """
        datasetA = "/%s/WMCorePhEDExTest/RAW" % makeUUID()
        datasetB = "/%s/WMCorePhEDExTest/RECO" % makeUUID()
        xmlData = XMLDrop.makePhEDExDrop(self.dbsTestUrl, datasetA)
        self.phedexApi.injectBlocks("T1_US_FNAL_MSS", xmlData)
        xmlData = XMLDrop.makePhEDExDrop(self.dbsTestUrl, datasetB)
        self.phedexApi.injectBlocks("T1_US_FNAL_MSS", xmlData)

        testSub = PhEDExSubscription([datasetA, datasetB], "T1_UK_RAL_MSS",
                                     "Saturn")
        result = self.phedexApi.subscribe(testSub)
        requestIDs = result["phedex"]["request_created"]

        self.assertEqual(len(requestIDs), 1,
                         "Error: Wrong number of request IDs")
        self.assertTrue("id" in requestIDs[0], "Error: Missing request ID")
        return

    @attr('integration')
    def testGetSubscriptionMapping(self):
        """
        _testGetSubscriptionMapping_

        Verify that the subscription mapping API works correctly.
        """
        testDataset = "/%s/WMCorePhEDExTest/RECO" % makeUUID()
        blockA = "%s#%s" % (testDataset, makeUUID())
        blockB = "%s#%s" % (testDataset, makeUUID())

        # NOTE: leaving it broken on purpose, we do NOT want to subscribe
        # data via unit tests :-)
        #injectionSpec = XMLDrop.XMLInjectionSpec(self.dbsTestUrl)
        datasetSpec = injectionSpec.getDataset(testDataset)
        datasetSpec.getFileblock(blockA, 'y')
        datasetSpec.getFileblock(blockB, 'y')
        blockSpec = injectionSpec.save()
        self.phedexApi.injectBlocks("T1_US_FNAL_MSS", blockSpec)

        # Create a dataset level subscription to a node
        testDatasetSub = PhEDExSubscription([testDataset],
                                            "T1_UK_RAL_MSS",
                                            "Saturn",
                                            request_only="y")
        self.phedexApi.subscribe(testDatasetSub)

        # Create a block level subscrtion to a different node
        testBlockSub = PhEDExSubscription([testDataset],
                                          "T1_DE_KIT_MSS",
                                          "Saturn",
                                          level="block",
                                          request_only="y")
        self.phedexApi.subscribe(testBlockSub)

        subs = self.phedexApi.getSubscriptionMapping(testDataset)
        self.assertEqual(subs[testDataset], {"T1_UK_RAL_MSS"},
                         "Error: Dataset subscription is wrong.")

        subs = self.phedexApi.getSubscriptionMapping(blockA)
        self.assertEqual(
            len(subs[blockA]), 2,
            "Error: Wrong number of nodes in block subscription.")
        self.assertTrue("T1_UK_RAL_MSS" in subs[blockA],
                        "Error: RAL missing from block sub.")
        self.assertTrue("T1_DE_KIT_MSS" in subs[blockA],
                        "Error: KIT missing from block sub.")
        return

    def testPFNLookup(self):
        """
        _testPFNLookup_

        Verify that the PFN lookup in PhEDEx works correctly.
        """
        call1 = self.phedexApi.getPFN(['T2_UK_SGrid_Bristol'],
                                      ['/store/user/metson/file'])

        # Should get one mapping back (one lfn, one node)
        self.assertTrue(len(call1.keys()) == 1)
        call1_key = call1.keys()[0]

        call2 = self.phedexApi.getPFN(
            ['T2_UK_SGrid_Bristol', 'T1_US_FNAL_Buffer'],
            ['/store/user/metson/file'])
        # Should get back two mappings (two nodes)
        self.assertTrue(call1_key in call2.keys())

        # and one of the mappings should be the same as from the previous call
        self.assertTrue(call1[call1_key] == call2[call1_key])
        return

    def testGetReplicaInfoForBlocks(self):
        """
        Test `getReplicaInfoForBlocks` method, the ability to retrieve replica
        locations provided a (or a list of) datasets and blocks
        """
        def _checkOutcome(numFiles, replica):
            "run the checks"
            if rep['complete'] == 'y':
                self.assertEqual(rep['files'], numFiles)
            if rep['custodial'] == 'y':
                self.assertTrue(rep['node'].endswith("_MSS"))
                self.assertTrue(rep['subscribed'], 'y')

        replicaDict = {
            'bytes', 'complete', 'custodial', 'files', 'group', 'node',
            'node_id', 'se', 'subscribed', 'time_create', 'time_update'
        }

        res = self.phedexApi.getReplicaInfoForBlocks(block=BLOCK)['phedex']
        self.assertEqual(len(res['block']), 1)
        self.assertEqual(res['block'][0]['name'], BLOCK)
        self.assertTrue(len(res['block'][0]['replica']) > 1)
        self.assertItemsEqual(res['block'][0]['replica'][0].keys(),
                              replicaDict)
        numFiles = res['block'][0]['files']
        for rep in res['block'][0]['replica']:
            _checkOutcome(numFiles, rep)

        # same test, but providing a dataset as input (which has only the block above)
        res = self.phedexApi.getReplicaInfoForBlocks(dataset=DSET)['phedex']
        self.assertEqual(len(res['block']), 4)
        self.assertTrue(BLOCK in [blk['name'] for blk in res['block']])
        for block in res['block']:
            numFiles = block['files']
            for rep in block['replica']:
                self.assertTrue(len(block['replica']) > 1)
                _checkOutcome(numFiles, rep)

        # same test again, but providing both block and dataset
        # NOTE the PhEDEx service only process the block input, the
        # dataset argument is completely ignored
        res = self.phedexApi.getReplicaInfoForBlocks(dataset=DSET,
                                                     block=BLOCK)['phedex']
        self.assertEqual(len(res['block']), 1)
        self.assertEqual(res['block'][0]['name'], BLOCK)
        self.assertTrue(len(res['block'][0]['replica']) > 1)
        self.assertItemsEqual(res['block'][0]['replica'][0].keys(),
                              replicaDict)
        numFiles = res['block'][0]['files']
        for rep in res['block'][0]['replica']:
            _checkOutcome(numFiles, rep)

        # provide a block that does not exist
        res = self.phedexApi.getReplicaInfoForBlocks(dataset=DSET,
                                                     block=BLOCK +
                                                     "BLAH")['phedex']
        self.assertTrue(res['block'] == [])

    def testGroupUsage(self):
        """
        _testGroupUsage_

        Verify that the `getGroupUsage` API works correctly.
        """
        node = "T2_DE_DESY"
        group = "DataOps"
        res = self.phedexApi.getGroupUsage(group=group, node=node)['phedex']
        self.assertEqual(len(res['node']), 1)
        self.assertEqual(len(res['node'][0]['group']), 1)
        self.assertEqual(res['node'][0]['group'][0]['name'], group)
        self.assertEqual(res['node'][0]['name'], node)
        self.assertTrue(res['node'][0]['group'][0]['dest_bytes'] > 100)

        res = self.phedexApi.getGroupUsage(group=group)['phedex']
        self.assertTrue(len(res['node']) > 50)
        self.assertEqual(len(res['node'][10]['group']), 1)
        self.assertEqual(res['node'][10]['group'][0]['name'], group)

        return
def insertFilesToBlock(files, injectNode, injectSE, mode, commit):

    # pick a DBS3 instance
    # instance = 'dev'
    instance = 'int'
    # instance = 'prod'

    if instance=='dev':
        # host = 'dbs3-dev01.cern.ch'
        host = 'cmsweb-dev.cern.ch'

    if instance=='int':
        host = 'cmsweb-testbed.cern.ch'

    if instance=='prod':
        host = 'cmsweb.cern.ch'

    globReadUrl = 'https://%s/dbs/%s/global/DBSReader' % (host, instance)
    globWriteUrl = 'https://%s/dbs/%s/global/DBSWriter' % (host, instance)
    phy3ReadUrl = 'https://%s/dbs/%s/phys03/DBSReader' % (host, instance)
    phy3WriteUrl = 'https://%s/dbs/%s/phys03/DBSWriter' % (host, instance)

    readApi   = DbsApi(url=globReadUrl)
    writeApi  = DbsApi(url=globWriteUrl)
    # readApi   = DbsApi(url=phy3ReadUrl)
    # writeApi  = DbsApi(url=phy3WriteUrl)

    if mode == "lhe":
        ds_info = {
            'data_type'       : 'mc',
            'acquisition_era' : 'LHE',
            'primary_ds'      : 'QCD_HT-100To250_8TeV-madgraph',
            'processed_ds'    : 'LHE-testAlan_Attempt3-v2',
            'data_tier'       : 'LHE',
            'physics_group'   : 'GEN',
            'application'     : 'Madgraph',
            'app_version'     : 'Mad_5_1_3_30',
            'proc_version'    : 1,
            'proc_descript'   : 'test_LHE_injection'
            }
    elif mode == "pixel":
        ds_info = {
            'data_type'       : 'data',
            'acquisition_era' : 'Run2012',
            'primary_ds'      : 'QCD_HT-100To250_8TeV-madgraph',
            'processed_ds'    : 'LHE-testAlan_Attempt3-v2',
            'data_tier'       : 'LHE',
            'physics_group'   : None,
            'application'     : 'Madgraph',
            'app_version'     : 'Mad_5_1_3_30',
            'proc_version'    : 1,
            'proc_descript'   : 'test_LHE_injection'
            }

    acquisition_era_config = { 'acquisition_era_name' : ds_info['acquisition_era'],
                               'start_date' : int(time.time())
                               }

    processing_era_config = { 'processing_version': ds_info['proc_version'],
                              'description': ds_info['proc_descript']
                              }

    primds_config = { 'primary_ds_type': ds_info['data_type'],
                      'primary_ds_name': ds_info['primary_ds']
                      }

    dataset_name = "/%s/%s/%s" % (ds_info['primary_ds'], ds_info['processed_ds'], ds_info['data_tier'])
    dataset_config = { 'physics_group_name' : ds_info['physics_group'],
                       'dataset_access_type' : 'VALID',
                       'data_tier_name' : ds_info['data_tier'],
                       'processed_ds_name' : ds_info['processed_ds'],
                       'dataset' : dataset_name
                       }

    block_name = "%s#%s" % (dataset_name, str(uuid.uuid4()))
    block_config = { 'block_name' : block_name,
                     'origin_site_name' : injectSE,
                     'open_for_writing' : 0
                     }

    dataset_conf_list = [ { 'app_name' : ds_info['application'],
                            'global_tag' : 'dummytag',
                            'output_module_label' : 'out',
                            'pset_hash' : 'dummyhash',
                            'release_version' : ds_info['app_version']
                            } ]

    blockDict = { 'files': files,
                  'processing_era': processing_era_config,
                  'primds': primds_config,
                  'dataset': dataset_config,
                  'dataset_conf_list' : dataset_conf_list,
                  'acquisition_era': acquisition_era_config,
                  'block': block_config,
                  'file_parent_list':[],
                  'file_conf_list':[]
                  }

    blockDict['files'] = files
    blockDict['block']['file_count'] = len(files)
    blockDict['block']['block_size'] = sum([int(file['file_size']) for file in files])

    if commit:
        logging.info("inserted block into DBS : %s" % writeApi.url)
        logging.debug(pprint.pformat(blockDict))
        writeApi.insertBulkBlock(blockDict)
    else:
        logging.info("dry run, this block would have been inserted into DBS : %s" % writeApi.url)
        logging.info(pprint.pformat(blockDict))

    injectionSpec = XMLDrop.XMLInjectionSpec(writeApi.url)
    datasetSpec = injectionSpec.getDataset(dataset_name)
    blockSpec = datasetSpec.getFileblock(block_name, "n")
    for f in files:
        blockSpec.addFile(f['logical_file_name'],
                          { 'adler32' : f['adler32'] },
                          f['file_size'])

    xmlData = injectionSpec.save()

    # SELECT PHEDEX URL
    phedexURL = "https://cmsweb.cern.ch/phedex/datasvc/json/dev/"
    #phedexURL = "https://cmsweb.cern.ch/phedex/datasvc/json/prod/"

    if commit:
        logging.info("inserting block into PhEDEx : %s" % phedexURL)
        logging.debug(pprint.pformat(xmlData))
        phedex = PhEDEx({"endpoint": config.PhEDExInjector.phedexurl}, "json")
        injectRes = phedex.injectBlocks(injectNode, xmlData)
    else:
        logging.info("dry run, this block would have been inserted into PhEDEx : %s" % phedexURL)
        logging.info(pprint.pformat(xmlData))

    return
class PhEDExInjectorPoller(BaseWorkerThread):
    """
    _PhEDExInjectorPoller_

    Poll the DBSBuffer database and inject files as they are created.
    """
    def __init__(self, config):
        """
        ___init___

        Initialise class members
        """
        BaseWorkerThread.__init__(self)
        self.dbsUrl = config.DBSInterface.globalDBSUrl

        self.pollCounter = 0
        self.subFrequency = None
        if getattr(config.PhEDExInjector, "subscribeDatasets", False):
            pollInterval = config.PhEDExInjector.pollInterval
            subInterval = config.PhEDExInjector.subscribeInterval
            self.subFrequency = max(1, int(round(subInterval/pollInterval)))
            logging.info("SubscribeDataset and deleteBlocks will run every %d polling cycles", self.subFrequency)
            # subscribe on first cycle
            self.pollCounter = self.subFrequency - 1

        # retrieving the node mappings is fickle and can fail quite often
        self.phedex = PhEDEx({"endpoint": config.PhEDExInjector.phedexurl}, "json")
        try:
            nodeMappings = self.phedex.getNodeMap()
        except:
            time.sleep(2)
            try:
                nodeMappings = self.phedex.getNodeMap()
            except:
                time.sleep(4)
                nodeMappings = self.phedex.getNodeMap()

        # This will be used to map SE names which are stored in the DBSBuffer to
        # PhEDEx node names.  The first key will be the "kind" which consists
        # of one of the following: MSS, Disk, Buffer.  The next key will be the
        # SE name.
        self.seMap = {}
        self.nodeNames = []
        for node in nodeMappings["phedex"]["node"]:
            if node["kind"] not in self.seMap:
                self.seMap[node["kind"]] = {}
            logging.info("Adding mapping %s -> %s", node["se"], node["name"])
            self.seMap[node["kind"]][node["se"]] = node["name"]
            self.nodeNames.append(node["name"])

        self.phedexNodes = {'MSS': [], 'Disk': []}
        for node in nodeMappings["phedex"]["node"]:
            if node["kind"] in ["MSS", "Disk"]:
                self.phedexNodes[node["kind"]].append(node["name"])

        # initialize the alert framework (if available - config.Alert present)
        #    self.sendAlert will be then be available
        self.initAlerts(compName = "PhEDExInjector")

        self.blocksToRecover = []

        return

    def setup(self, parameters):
        """
        _setup_

        Create DAO Factory and setup some DAO.
        """
        myThread = threading.currentThread()
        daofactory = DAOFactory(package = "WMComponent.PhEDExInjector.Database",
                                logger = self.logger,
                                dbinterface = myThread.dbi)

        self.getUninjected = daofactory(classname = "GetUninjectedFiles")
        self.getMigrated = daofactory(classname = "GetMigratedBlocks")

        self.findDeletableBlocks = daofactory(classname = "GetDeletableBlocks")
        self.markBlocksDeleted = daofactory(classname = "MarkBlocksDeleted")
        self.getUnsubscribed = daofactory(classname = "GetUnsubscribedDatasets")
        self.markSubscribed = daofactory(classname = "MarkDatasetSubscribed")

        daofactory = DAOFactory(package = "WMComponent.DBS3Buffer",
                                logger = self.logger,
                                dbinterface = myThread.dbi)
        self.setStatus = daofactory(classname = "DBSBufferFiles.SetPhEDExStatus")
        self.setBlockClosed = daofactory(classname = "SetBlockClosed")

        return

    def algorithm(self, parameters):
        """
        _algorithm_

        Poll the database for uninjected files and attempt to inject them into
        PhEDEx.
        """
        logging.info("Running PhEDEx injector poller algorithm...")

        self.pollCounter += 1

        if self.blocksToRecover:
            logging.info("""PhEDExInjector Recovery:
                            previous injection call failed,
                            check if files were injected to PhEDEx anyway""")
            self.recoverInjectedFiles()

        self.injectFiles()
        self.closeBlocks()

        if self.pollCounter == self.subFrequency:
            self.pollCounter = 0
            self.deleteBlocks()
            self.subscribeDatasets()

        return

    def createInjectionSpec(self, injectionData):
        """
        _createInjectionSpec_

        Transform the data structure returned from the database into an XML
        string for the PhEDEx Data Service.  The injectionData parameter must be
        a dictionary keyed by dataset path.  Each dataset path will map to a
        list of blocks, each block being a dict.  The block dicts will have
        three keys: name, is-open and files.  The files key will be a list of
        dicts, each of which have the following keys: lfn, size and checksum.
        The following is an example object:

        {"dataset1":
          {"block1": {"is-open": "y", "files":
            [{"lfn": "lfn1", "size": 10, "checksum": {"cksum": "1234"}},
             {"lfn": "lfn2", "size": 20, "checksum": {"cksum": "4321"}}]}}}
        """
        injectionSpec = XMLDrop.XMLInjectionSpec(self.dbsUrl)

        for datasetPath in injectionData:
            datasetSpec = injectionSpec.getDataset(datasetPath)

            for fileBlockName, fileBlock in injectionData[datasetPath].iteritems():
                blockSpec = datasetSpec.getFileblock(fileBlockName,
                                                     fileBlock["is-open"])

                for f in fileBlock["files"]:
                    blockSpec.addFile(f["lfn"], f["checksum"], f["size"])

        return injectionSpec.save()

    def createRecoveryFileFormat(self, unInjectedData):
        """
        _createRecoveryFileFormat_

        Transform the data structure returned from database in to the dict format
        for the PhEDEx Data Service.  The injectionData parameter must be
        a dictionary keyed by dataset path.  
        
        unInjectedData format
        {"dataset1":
          {"block1": {"is-open": "y", "files":
            [{"lfn": "lfn1", "size": 10, "checksum": {"cksum": "1234"}},
             {"lfn": "lfn2", "size": 20, "checksum": {"cksum": "4321"}}]}}}
        
        returns
        [{"block1": set(["lfn1", "lfn2"])}, {"block2": set(["lfn3", "lfn4"])]
        """
        blocks = []
        for datasetPath in unInjectedData:

            for blockName, fileBlock in unInjectedData[datasetPath].items():

                newBlock = { blockName: set() }

                for fileDict in fileBlock["files"]:
                    newBlock[blockName].add(fileDict["lfn"])

                blocks.append(newBlock)

        return blocks
    
    def injectFiles(self):
        """
        _injectFiles_

        Inject any uninjected files in PhEDEx.
        """
        logging.info("Starting injectFiles method")

        uninjectedFiles = self.getUninjected.execute()

        for siteName in uninjectedFiles.keys():
            # SE names can be stored in DBSBuffer as that is what is returned in
            # the framework job report.  We'll try to map the SE name to a
            # PhEDEx node name here.
            location = None

            if siteName in self.nodeNames:
                location = siteName
            else:
                if "Buffer" in self.seMap and siteName in self.seMap["Buffer"]:
                    location = self.seMap["Buffer"][siteName]
                elif "MSS" in self.seMap and siteName in self.seMap["MSS"]:
                    location = self.seMap["MSS"][siteName]
                elif "Disk" in self.seMap and siteName in self.seMap["Disk"]:
                    location = self.seMap["Disk"][siteName]

            if location == None:
                msg = "Could not map SE %s to PhEDEx node." % siteName
                logging.error(msg)
                self.sendAlert(7, msg = msg)
                continue

            maxDataset = 20
            maxBlocks = 50
            maxFiles = 5000
            numberDatasets = 0
            numberBlocks = 0
            numberFiles = 0
            injectData = {}
            lfnList = []
            for dataset in uninjectedFiles[siteName]:

                numberDatasets += 1
                injectData[dataset] = uninjectedFiles[siteName][dataset]

                for block in injectData[dataset]:
                    numberBlocks += 1
                    numberFiles += len(injectData[dataset][block]['files'])
                    for fileInfo in injectData[dataset][block]['files']:
                        lfnList.append(fileInfo['lfn'])

                if numberDatasets >= maxDataset or numberBlocks >= maxBlocks or numberFiles >= maxFiles:

                    self.injectFilesPhEDExCall(location, injectData, lfnList)
                    numberDatasets = 0
                    numberBlocks = 0
                    numberFiles = 0
                    injectData = {}
                    lfnList = []

            if injectData:
                self.injectFilesPhEDExCall(location, injectData, lfnList)

        return

    def injectFilesPhEDExCall(self, location, injectData, lfnList):
        """
        _injectFilesPhEDExCall_

        actual PhEDEx call for file injection
        """
        xmlData = self.createInjectionSpec(injectData)
        logging.debug("injectFiles XMLData: %s", xmlData)

        try:
            injectRes = self.phedex.injectBlocks(location, xmlData)
        except HTTPException as ex:
            # HTTPException with status 400 assumed to be duplicate injection
            # trigger later block recovery (investgation needed if not the case)
            if ex.status == 400:
                self.blocksToRecover.extend( self.createRecoveryFileFormat(injectData) )
            logging.error("PhEDEx file injection failed with HTTPException: %s %s", ex.status, ex.result)
        except Exception as ex:
            logging.error("PhEDEx file injection failed with Exception: %s", str(ex))
            logging.debug("Traceback: %s", str(traceback.format_exc()))
        else:
            logging.info("Injection result: %s", injectRes)

            if "error" in injectRes:
                msg = "Error injecting data %s: %s" % (injectData, injectRes["error"])
                logging.error(msg)
                self.sendAlert(6, msg = msg)
            else:
                try:
                    self.setStatus.execute(lfnList, 1)
                except:
                    # possible deadlock with DBS3Upload, retry once after 5s
                    logging.warning("Oracle exception during file status update, possible deadlock due to race condition, retry after 5s sleep")
                    time.sleep(5)
                    self.setStatus.execute(lfnList, 1)

        return

    def closeBlocks(self):
        """
        _closeBlocks_

        Close any blocks that have been migrated to global DBS
        """
        logging.info("Starting closeBlocks method")

        migratedBlocks = self.getMigrated.execute()

        for siteName in migratedBlocks.keys():
            # SE names can be stored in DBSBuffer as that is what is returned in
            # the framework job report.  We'll try to map the SE name to a
            # PhEDEx node name here.
            location = None

            if siteName in self.nodeNames:
                location = siteName
            else:
                if "Buffer" in self.seMap and siteName in self.seMap["Buffer"]:
                    location = self.seMap["Buffer"][siteName]
                elif "MSS" in self.seMap and siteName in self.seMap["MSS"]:
                    location = self.seMap["MSS"][siteName]
                elif "Disk" in self.seMap and siteName in self.seMap["Disk"]:
                    location = self.seMap["Disk"][siteName]

            if location == None:
                msg = "Could not map SE %s to PhEDEx node." % siteName
                logging.error(msg)
                self.sendAlert(6, msg = msg)
                continue

            xmlData = self.createInjectionSpec(migratedBlocks[siteName])
            logging.debug("closeBlocks XMLData: %s", xmlData)

            try:
                injectRes = self.phedex.injectBlocks(location, xmlData)
            except HTTPException as ex:
                logging.error("PhEDEx block close failed with HTTPException: %s %s", ex.status, ex.result)
            except Exception as ex:
                logging.error("PhEDEx block close failed with Exception: %s", str(ex))
                logging.debug("Traceback: %s", str(traceback.format_exc()))
            else:
                logging.info("Block closing result: %s", injectRes)

                if "error" not in injectRes:
                    for datasetName in migratedBlocks[siteName]:
                        for blockName in migratedBlocks[siteName][datasetName]:
                            logging.debug("Closing block %s", blockName)
                            self.setBlockClosed.execute(blockName)
                else:
                    msg = "Error injecting data %s: %s" % (migratedBlocks[siteName], injectRes["error"])
                    logging.error(msg)
                    self.sendAlert(6, msg = msg)

        return

    def recoverInjectedFiles(self):
        """
        When PhEDEx inject call timed out, run this function.
        Since there are 3 min reponse time out in cmsweb, some times 
        PhEDEx injection call times out even though the call succeeded
        In that case run the recovery mode
        1. first check whether files which injection status = 0 are in the PhEDEx.
        2. if those file exist set the in_phedex status to 1
        3. set self.blocksToRecover = []

        Run this recovery one block at a time, with too many blocks
        the call to the PhEDEx data service on cmsweb can time out
        """
        # recover one block at a time
        for block in self.blocksToRecover:

            injectedFiles = self.phedex.getInjectedFiles(block)

            if injectedFiles:
                self.setStatus.execute(injectedFiles, 1)

        self.blocksToRecover = []
        return
        
    def deleteBlocks(self):
        """
        _deleteBlocks_
        Find deletable blocks, then decide if to delete based on:
        Is there an active subscription for dataset or block ?
          If yes => set deleted=2
          If no => next check
        Has transfer to all destinations finished ?
          If yes => request block deletion, approve request, set deleted=1
          If no => do nothing (check again next cycle)
        """
        logging.info("Starting deleteBlocks method")

        blockDict = self.findDeletableBlocks.execute(transaction = False)

        if not blockDict:
            return

        try:
            subscriptions = self.phedex.getSubscriptionMapping(*blockDict.keys())
        except:
            logging.error("Couldn't get subscription info from PhEDEx, retry next cycle")
            return

        skippableBlocks = []
        deletableEntries = {}
        for blockName in blockDict:

            location = blockDict[blockName]['location']

            # should never be triggered, better safe than sorry
            if location.endswith('_MSS'):
                logging.debug("Location %s for block %s is MSS, skip deletion", location, blockName)
                skippableBlocks.append(blockName)
                continue

            dataset = blockDict[blockName]['dataset']
            sites = blockDict[blockName]['sites']

            if blockName in subscriptions and location in subscriptions[blockName]:
                logging.debug("Block %s subscribed to %s, skip deletion",  blockName, location)
                binds = { 'DELETED': 2,
                          'BLOCKNAME': blockName }
                self.markBlocksDeleted.execute(binds)
            else:
                blockInfo = []
                try:
                    blockInfo = self.phedex.getReplicaInfoForBlocks(block = blockName, complete = 'y')['phedex']['block']
                except:
                    logging.error("Couldn't get block info from PhEDEx, retry next cycle")
                else:
                    for entry in blockInfo:
                        if entry['name'] == blockName:
                            nodes = set([x['node'] for x in entry['replica']])
                            if location not in nodes:
                                logging.debug("Block %s not present on %s, mark as deleted", blockName, location)
                                binds = { 'DELETED': 1,
                                          'BLOCKNAME': blockName }
                                self.markBlocksDeleted.execute(binds)
                            elif sites.issubset(nodes):
                                logging.debug("Deleting block %s from %s since it is fully transfered", blockName, location)
                                if location not in deletableEntries:
                                    deletableEntries[location] = {}
                                if dataset not in deletableEntries[location]:
                                    deletableEntries[location][dataset] = set()
                                    deletableEntries[location][dataset].add(blockName)


        binds = []
        for blockName in skippableBlocks:
            binds.append( { 'DELETED': 2,
                            'BLOCKNAME': blockName } )
        if binds:
            self.markBlocksDeleted.execute(binds)

        for location in deletableEntries:

            chunkSize = 100
            numberOfBlocks = 0
            blocksToDelete = {}
            for dataset in deletableEntries[location]:

                blocksToDelete[dataset] = deletableEntries[location][dataset]
                numberOfBlocks += len(blocksToDelete[dataset])

                if numberOfBlocks > chunkSize:

                    self.deleteBlocksPhEDExCalls(location, blocksToDelete)
                    numberOfBlocks = 0
                    blocksToDelete = {}

            self.deleteBlocksPhEDExCalls(location, blocksToDelete)

        return

    def deleteBlocksPhEDExCalls(self, location, blocksToDelete):
        """
        _deleteBlocksPhEDExCalls_
        actual PhEDEx calls for block deletion
        """
        deletion = PhEDExDeletion(blocksToDelete.keys(), location,
                                  level = 'block',
                                  comments = "WMAgent blocks auto-delete from %s" % location,
                                  blocks = blocksToDelete)

        xmlData = XMLDrop.makePhEDExXMLForBlocks(self.dbsUrl,
                                                 deletion.getDatasetsAndBlocks())
        logging.debug("deleteBlocks XMLData: %s", xmlData)

        try:
            response = self.phedex.delete(deletion, xmlData)
            requestId = response['phedex']['request_created'][0]['id']
            # auto-approve deletion request
            self.phedex.updateRequest(requestId, 'approve', location)
        except HTTPException as ex:
            logging.error("PhEDEx block delete/approval failed with HTTPException: %s %s", ex.status, ex.result)
        except Exception as ex:
            logging.error("PhEDEx block delete/approval failed with Exception: %s", str(ex))
            logging.debug("Traceback: %s", str(traceback.format_exc()))
        else:
            binds = []
            for dataset in blocksToDelete:
                for blockName in blocksToDelete[dataset]:
                    binds.append( { 'DELETED': 1,
                                    'BLOCKNAME': blockName } )
            self.markBlocksDeleted.execute(binds)

        return

    def subscribeDatasets(self):
        """
        _subscribeDatasets_
        Poll the database for datasets and subscribe them.
        """
        logging.info("Starting subscribeDatasets method")

        # Check for completely unsubscribed datasets
        unsubscribedDatasets = self.getUnsubscribed.execute()

        # Keep a list of subscriptions to tick as subscribed in the database
        subscriptionsMade = []

        # Create a list of subscriptions as defined by the PhEDEx data structures
        subs = SubscriptionList()

        # Create the subscription objects and add them to the list
        # The list takes care of the sorting internally
        for subInfo in unsubscribedDatasets:
            site = subInfo['site']

            if site not in self.phedexNodes['MSS'] and site not in self.phedexNodes['Disk']:
                msg = "Site %s doesn't appear to be valid to PhEDEx, " % site
                msg += "skipping subscription: %s" % subInfo['id']
                logging.error(msg)
                self.sendAlert(7, msg = msg)
                continue

            # Avoid custodial subscriptions to disk nodes
            if site not in self.phedexNodes['MSS']: 
                subInfo['custodial'] = 'n'
            # Avoid auto approval in T1 sites
            elif site.startswith("T1"):
                subInfo['request_only'] = 'y'

            phedexSub = PhEDExSubscription(subInfo['path'], site, subInfo['phedex_group'],
                                           priority = subInfo['priority'],
                                           move = subInfo['move'],
                                           custodial = subInfo['custodial'],
                                           request_only = subInfo['request_only'],
                                           subscriptionId = subInfo['id'])

            # Check if the subscription is a duplicate
            if phedexSub.matchesExistingSubscription(self.phedex) or \
                phedexSub.matchesExistingTransferRequest(self.phedex):
                subscriptionsMade.append(subInfo['id'])
                continue

            # Add it to the list
            subs.addSubscription(phedexSub)

        # Compact the subscriptions
        subs.compact()

        for subscription in subs.getSubscriptionList():

            xmlData = XMLDrop.makePhEDExXMLForDatasets(self.dbsUrl, subscription.getDatasetPaths())
            logging.debug("subscribeDatasets XMLData: %s" , xmlData)

            logging.info("Subscribing: %s to %s, with options: Move: %s, Custodial: %s, Request Only: %s",
                         subscription.getDatasetPaths(),
                         subscription.getNodes(),
                         subscription.move,
                         subscription.custodial,
                         subscription.request_only)

            try:
                self.phedex.subscribe(subscription, xmlData)
            except HTTPException as ex:
                logging.error("PhEDEx dataset subscribe failed with HTTPException: %s %s", ex.status, ex.result)
            except Exception as ex:
                logging.error("PhEDEx dataset subscribe failed with Exception: %s", str(ex))
                logging.debug("Traceback: %s", str(traceback.format_exc()))
            else:
                subscriptionsMade.extend(subscription.getSubscriptionIds())

        # Register the result in DBSBuffer
        if subscriptionsMade:
            self.markSubscribed.execute(subscriptionsMade)

        return
Esempio n. 9
0
class PhEDExTest(unittest.TestCase):
    def setUp(self):
        """
        _setUp_

        Initialize the PhEDEx API to point at the test server.
        """
        phedexTestDS = "https://cmsweb.cern.ch/phedex/datasvc/json/test"
        self.dbsTestUrl = "http://vocms09.cern.ch:8880/cms_dbs_int_local_yy_writer/servlet/DBSServlet"
        self.phedexApi = PhEDEx({"endpoint": phedexTestDS, "method": "POST"})
        return

    @attr("integration")
    def testInjection(self):
        """
        _testInjection_

        Verify that we can inject data into PhEDEx.
        """
        xmlData = XMLDrop.makePhEDExDrop(self.dbsTestUrl, makeUUID())
        result = self.phedexApi.injectBlocks("T1_US_FNAL_MSS", xmlData)
        self.assertEqual(
            result["phedex"]["injected"], {
                "stats": {
                    "closed_datasets": 0,
                    "closed_blocks": 0,
                    "new_blocks": 0,
                    "new_datasets": 1,
                    "new_files": 0
                }
            })
        return

    @attr("integration")
    def testSubscription(self):
        """
        _testSubscription_

        Verify that the subscription API works.
        """
        datasetA = "/%s/WMCorePhEDExTest/RAW" % makeUUID()
        datasetB = "/%s/WMCorePhEDExTest/RECO" % makeUUID()
        xmlData = XMLDrop.makePhEDExDrop(self.dbsTestUrl, datasetA)
        self.phedexApi.injectBlocks("T1_US_FNAL_MSS", xmlData)
        xmlData = XMLDrop.makePhEDExDrop(self.dbsTestUrl, datasetB)
        self.phedexApi.injectBlocks("T1_US_FNAL_MSS", xmlData)

        testSub = PhEDExSubscription([datasetA, datasetB], "T1_UK_RAL_MSS",
                                     "Saturn")
        result = self.phedexApi.subscribe(testSub)
        requestIDs = result["phedex"]["request_created"]

        self.assertEqual(len(requestIDs), 1,
                         "Error: Wrong number of request IDs")
        self.assertTrue("id" in requestIDs[0], "Error: Missing request ID")
        return

    @attr("integration")
    def testBestNodeName(self):
        """
        _testBestNodeName_

        Verify that the node name is Buffer first
        """
        self.assertTrue(
            self.phedexApi.getBestNodeName("cmssrm.fnal.gov") ==
            "T1_US_FNAL_Buffer")
        return

    @attr('integration')
    def testGetSubscriptionMapping(self):
        """
        _testGetSubscriptionMapping_

        Verify that the subscription mapping API works correctly.
        """
        testDataset = "/%s/WMCorePhEDExTest/RECO" % makeUUID()
        blockA = "%s#%s" % (testDataset, makeUUID())
        blockB = "%s#%s" % (testDataset, makeUUID())

        datasetSpec = injectionSpec.getDataset(testDataset)
        datasetSpec.getFileblock(blockA, 'y')
        datasetSpec.getFileblock(blockB, 'y')
        blockSpec = injectionSpec.save()
        self.phedexApi.injectBlocks("T1_US_FNAL_MSS", blockSpec)

        # Create a dataset level subscription to a node
        testDatasetSub = PhEDExSubscription([testDataset],
                                            "T1_UK_RAL_MSS",
                                            "Saturn",
                                            request_only="n")
        self.phedexApi.subscribe(testDatasetSub)

        # Create a block level subscrtion to a different node
        testBlockSub = PhEDExSubscription([testDataset],
                                          "T1_DE_KIT_MSS",
                                          "Saturn",
                                          level="block",
                                          request_only="n")
        self.phedexApi.subscribe(testBlockSub)

        subs = self.phedexApi.getSubscriptionMapping(testDataset)
        self.assertEqual(subs[testDataset], {"T1_UK_RAL_MSS"},
                         "Error: Dataset subscription is wrong.")

        subs = self.phedexApi.getSubscriptionMapping(blockA)
        self.assertEqual(
            len(subs[blockA]), 2,
            "Error: Wrong number of nodes in block subscription.")
        self.assertTrue("T1_UK_RAL_MSS" in subs[blockA],
                        "Error: RAL missing from block sub.")
        self.assertTrue("T1_DE_KIT_MSS" in subs[blockA],
                        "Error: KIT missing from block sub.")
        return

    def testPFNLookup(self):
        """
        _testPFNLookup_

        Verify that the PFN lookup in PhEDEx works correctly.
        """
        call1 = self.phedexApi.getPFN(['T2_UK_SGrid_Bristol'],
                                      ['/store/user/metson/file'])

        # Should get one mapping back (one lfn, one node)
        self.assertTrue(len(call1.keys()) == 1)
        call1_key = call1.keys()[0]

        call2 = self.phedexApi.getPFN(
            ['T2_UK_SGrid_Bristol', 'T1_US_FNAL_Buffer'],
            ['/store/user/metson/file'])
        # Should get back two mappings (two nodes)
        self.assertTrue(call1_key in call2.keys())

        # and one of the mappings should be the same as from the previous call
        self.assertTrue(call1[call1_key] == call2[call1_key])
        return
Esempio n. 10
0
class PhEDExTest(unittest.TestCase):
    def setUp(self):
        """
        _setUp_

        Initialize the PhEDEx API to point at the test server.
        """
        self.dbsTestUrl = "https://cmsweb.cern.ch/dbs/prod/global/DBSReader"
        self.phedexApi = PhEDEx()

        return

    @attr("integration")
    def testInjection(self):
        """
        _testInjection_

        Verify that we can inject data into PhEDEx.
        """
        xmlData = XMLDrop.makePhEDExDrop(self.dbsTestUrl, makeUUID())
        result = self.phedexApi.injectBlocks("T1_US_FNAL_MSS", xmlData)
        self.assertEqual(result["phedex"]["injected"],
                         {"stats": {"closed_datasets": 0, "closed_blocks": 0,
                                    "new_blocks": 0, "new_datasets": 1,
                                    "new_files": 0}})
        return

    @attr("integration")
    def testSubscription(self):
        """
        _testSubscription_

        Verify that the subscription API works.
        """
        datasetA = "/%s/WMCorePhEDExTest/RAW" % makeUUID()
        datasetB = "/%s/WMCorePhEDExTest/RECO" % makeUUID()
        xmlData = XMLDrop.makePhEDExDrop(self.dbsTestUrl, datasetA)
        self.phedexApi.injectBlocks("T1_US_FNAL_MSS", xmlData)
        xmlData = XMLDrop.makePhEDExDrop(self.dbsTestUrl, datasetB)
        self.phedexApi.injectBlocks("T1_US_FNAL_MSS", xmlData)

        testSub = PhEDExSubscription([datasetA, datasetB], "T1_UK_RAL_MSS",
                                     "Saturn")
        result = self.phedexApi.subscribe(testSub)
        requestIDs = result["phedex"]["request_created"]

        self.assertEqual(len(requestIDs), 1,
                         "Error: Wrong number of request IDs")
        self.assertTrue("id" in requestIDs[0],
                        "Error: Missing request ID")
        return

    @attr('integration')
    def testGetSubscriptionMapping(self):
        """
        _testGetSubscriptionMapping_

        Verify that the subscription mapping API works correctly.
        """
        testDataset = "/%s/WMCorePhEDExTest/RECO" % makeUUID()
        blockA = "%s#%s" % (testDataset, makeUUID())
        blockB = "%s#%s" % (testDataset, makeUUID())

        datasetSpec = injectionSpec.getDataset(testDataset)
        datasetSpec.getFileblock(blockA, 'y')
        datasetSpec.getFileblock(blockB, 'y')
        blockSpec = injectionSpec.save()
        self.phedexApi.injectBlocks("T1_US_FNAL_MSS", blockSpec)

        # Create a dataset level subscription to a node
        testDatasetSub = PhEDExSubscription([testDataset], "T1_UK_RAL_MSS",
                                            "Saturn", request_only="n")
        self.phedexApi.subscribe(testDatasetSub)

        # Create a block level subscrtion to a different node
        testBlockSub = PhEDExSubscription([testDataset], "T1_DE_KIT_MSS", "Saturn",
                                          level="block", request_only="n")
        self.phedexApi.subscribe(testBlockSub)

        subs = self.phedexApi.getSubscriptionMapping(testDataset)
        self.assertEqual(subs[testDataset], {"T1_UK_RAL_MSS"}, "Error: Dataset subscription is wrong.")

        subs = self.phedexApi.getSubscriptionMapping(blockA)
        self.assertEqual(len(subs[blockA]), 2,
                         "Error: Wrong number of nodes in block subscription.")
        self.assertTrue("T1_UK_RAL_MSS" in subs[blockA],
                        "Error: RAL missing from block sub.")
        self.assertTrue("T1_DE_KIT_MSS" in subs[blockA],
                        "Error: KIT missing from block sub.")
        return

    def testPFNLookup(self):
        """
        _testPFNLookup_

        Verify that the PFN lookup in PhEDEx works correctly.
        """
        call1 = self.phedexApi.getPFN(['T2_UK_SGrid_Bristol'], ['/store/user/metson/file'])

        # Should get one mapping back (one lfn, one node)
        self.assertTrue(len(call1.keys()) == 1)
        call1_key = call1.keys()[0]

        call2 = self.phedexApi.getPFN(['T2_UK_SGrid_Bristol', 'T1_US_FNAL_Buffer'], ['/store/user/metson/file'])
        # Should get back two mappings (two nodes)
        self.assertTrue(call1_key in call2.keys())

        # and one of the mappings should be the same as from the previous call
        self.assertTrue(call1[call1_key] == call2[call1_key])
        return

    def testGetReplicaInfoForBlocks(self):
        """
        Test `getReplicaInfoForBlocks` method, the ability to retrieve replica
        locations provided a (or a list of) datasets and blocks
        """
        def _checkOutcome(numFiles, replica):
            "run the checks"
            if rep['complete'] == 'y':
                self.assertEqual(rep['files'], numFiles)
            if rep['custodial'] == 'y':
                self.assertTrue(rep['node'].endswith("_MSS"))
                self.assertTrue(rep['subscribed'], 'y')

        replicaDict = {'bytes', 'complete', 'custodial', 'files', 'group',
                       'node', 'node_id', 'se', 'subscribed',
                       'time_create', 'time_update'}

        res = self.phedexApi.getReplicaInfoForBlocks(block=BLOCK)['phedex']
        self.assertEqual(len(res['block']), 1)
        self.assertEqual(res['block'][0]['name'], BLOCK)
        self.assertTrue(len(res['block'][0]['replica']) > 1)
        self.assertItemsEqual(res['block'][0]['replica'][0].keys(), replicaDict)
        numFiles = res['block'][0]['files']
        for rep in res['block'][0]['replica']:
            _checkOutcome(numFiles, rep)

        # same test, but providing a dataset as input (which has only the block above)
        res = self.phedexApi.getReplicaInfoForBlocks(dataset=DSET)['phedex']
        self.assertEqual(len(res['block']), 4)
        self.assertTrue(BLOCK in [blk['name'] for blk in res['block']])
        for block in res['block']:
            numFiles = block['files']
            for rep in block['replica']:
                self.assertTrue(len(block['replica']) > 1)
                _checkOutcome(numFiles, rep)

        # same test again, but providing both block and dataset
        # NOTE the PhEDEx service only process the block input, the
        # dataset argument is completely ignored
        res = self.phedexApi.getReplicaInfoForBlocks(dataset=DSET, block=BLOCK)['phedex']
        self.assertEqual(len(res['block']), 1)
        self.assertEqual(res['block'][0]['name'], BLOCK)
        self.assertTrue(len(res['block'][0]['replica']) > 1)
        self.assertItemsEqual(res['block'][0]['replica'][0].keys(), replicaDict)
        numFiles = res['block'][0]['files']
        for rep in res['block'][0]['replica']:
            _checkOutcome(numFiles, rep)

        # provide a block that does not exist
        res = self.phedexApi.getReplicaInfoForBlocks(dataset=DSET, block=BLOCK + "BLAH")['phedex']
        self.assertTrue(res['block'] == [])
Esempio n. 11
0
class PhEDExTest(unittest.TestCase):
    def setUp(self):
        """
        _setUp_

        Initialize the PhEDEx API to point at the test server.
        """
        phedexTestDS = "https://cmsweb.cern.ch/phedex/datasvc/json/test"
        self.dbsTestUrl = "http://vocms09.cern.ch:8880/cms_dbs_int_local_yy_writer/servlet/DBSServlet"
        self.phedexApi = PhEDEx({"endpoint": phedexTestDS, "method": "POST"})
        return

    @attr("integration")
    def testInjection(self):
        """
        _testInjection_

        Verify that we can inject data into PhEDEx.
        """
        xmlData = XMLDrop.makePhEDExDrop(self.dbsTestUrl, makeUUID())
        result = self.phedexApi.injectBlocks("T1_US_FNAL_MSS", xmlData)
        self.assertEqual(
            result["phedex"]["injected"], {
                "stats": {
                    "closed_datasets": 0,
                    "closed_blocks": 0,
                    "new_blocks": 0,
                    "new_datasets": 1,
                    "new_files": 0
                }
            })
        return

    @attr("integration")
    def testSubscription(self):
        """
        _testSubscription_

        Verify that the subscription API works.
        """
        datasetA = "/%s/WMCorePhEDExTest/RAW" % makeUUID()
        datasetB = "/%s/WMCorePhEDExTest/RECO" % makeUUID()
        xmlData = XMLDrop.makePhEDExDrop(self.dbsTestUrl, datasetA)
        self.phedexApi.injectBlocks("T1_US_FNAL_MSS", xmlData)
        xmlData = XMLDrop.makePhEDExDrop(self.dbsTestUrl, datasetB)
        self.phedexApi.injectBlocks("T1_US_FNAL_MSS", xmlData)

        testSub = PhEDExSubscription([datasetA, datasetB], "T1_UK_RAL_MSS",
                                     "Saturn")
        result = self.phedexApi.subscribe(testSub)
        requestIDs = result["phedex"]["request_created"]

        self.assertEqual(len(requestIDs), 1,
                         "Error: Wrong number of request IDs")
        self.assertTrue("id" in requestIDs[0], "Error: Missing request ID")
        return

    @attr("integration")
    def testBestNodeName(self):
        """
        _testBestNodeName_

        Verify that the node name is Buffer first
        """
        self.assertTrue(
            self.phedexApi.getBestNodeName("cmssrm.fnal.gov") ==
            "T1_US_FNAL_Buffer")
        return

    @attr("integration")
    def testNodeMap(self):
        """
        _testNodeMap_

        Verify that the node map can be retrieve from PhEDEx and that the
        getNodeSE() and getNodeNames() methods work correctly.
        """
        self.assertTrue(
            self.phedexApi.getNodeSE("T2_FR_GRIF_LLR") == "polgrid4.in2p3.fr")
        self.assertTrue(
            self.phedexApi.getNodeNames("cmssrm.fnal.gov") ==
            ["T1_US_FNAL_Buffer", "T1_US_FNAL_MSS"])
        return

    @attr('integration')
    def testGetSubscriptionMapping(self):
        """
        _testGetSubscriptionMapping_

        Verify that the subscription mapping API works correctly.
        """
        testDataset = "/%s/WMCorePhEDExTest/RECO" % makeUUID()
        blockA = "%s#%s" % (testDataset, makeUUID())
        blockB = "%s#%s" % (testDataset, makeUUID())

        datasetSpec = injectionSpec.getDataset(testDataset)
        datasetSpec.getFileblock(blockA, 'y')
        datasetSpec.getFileblock(blockB, 'y')
        blockSpec = injectionSpec.save()
        self.phedexApi.injectBlocks("T1_US_FNAL_MSS", blockSpec)

        # Create a dataset level subscription to a node
        testDatasetSub = PhEDExSubscription([testDataset],
                                            "T1_UK_RAL_MSS",
                                            "Saturn",
                                            request_only="n")
        self.phedexApi.subscribe(testDatasetSub)

        # Create a block level subscrtion to a different node
        testBlockSub = PhEDExSubscription([testDataset],
                                          "T1_DE_KIT_MSS",
                                          "Saturn",
                                          level="block",
                                          request_only="n")
        self.phedexApi.subscribe(testBlockSub)

        subs = self.phedexApi.getSubscriptionMapping(testDataset)
        self.assertEqual(subs[testDataset], {"T1_UK_RAL_MSS"},
                         "Error: Dataset subscription is wrong.")

        subs = self.phedexApi.getSubscriptionMapping(blockA)
        self.assertEqual(
            len(subs[blockA]), 2,
            "Error: Wrong number of nodes in block subscription.")
        self.assertTrue("T1_UK_RAL_MSS" in subs[blockA],
                        "Error: RAL missing from block sub.")
        self.assertTrue("T1_DE_KIT_MSS" in subs[blockA],
                        "Error: KIT missing from block sub.")
        return

    def testPFNLookup(self):
        """
        _testPFNLookup_

        Verify that the PFN lookup in PhEDEx works correctly.
        """
        call1 = self.phedexApi.getPFN(['T2_UK_SGrid_Bristol'],
                                      ['/store/user/metson/file'])

        # Should get one mapping back (one lfn, one node)
        self.assertTrue(len(call1.keys()) == 1)
        call1_key = call1.keys()[0]

        call2 = self.phedexApi.getPFN(
            ['T2_UK_SGrid_Bristol', 'T1_US_FNAL_Buffer'],
            ['/store/user/metson/file'])
        # Should get back two mappings (two nodes)
        self.assertTrue(call1_key in call2.keys())

        # and one of the mappings should be the same as from the previous call
        self.assertTrue(call1[call1_key] == call2[call1_key])
        return

    @attr('integration')
    def testXMLJSON(self):
        """
        Test XML and JSON in the same scope
        """
        site = 'T1_US_FNAL_Buffer'
        httpDict = {
            'endpoint': "https://cmsweb.cern.ch/phedex/datasvc/json/test"
        }
        phedexJSON = PhEDEx(responseType='json', httpDict=httpDict)
        httpDict = {
            'endpoint': "https://cmsweb.cern.ch/phedex/datasvc/xml/test"
        }
        phedexXML = PhEDEx(responseType='xml', httpDict=httpDict)

        phedexXML.getNodeTFC(site)
        tfc_file = phedexXML.cacheFileName('tfc', inputdata={'node': site})
        tfc_map = {}
        tfc_map[site] = readTFC(tfc_file)
        pfn = tfc_map[site].matchLFN('srmv2',
                                     '/store/user/jblow/dir/test.root')

        self.assertTrue(
            pfn ==
            'srm://cmssrm.fnal.gov:8443/srm/managerv2?SFN=/11/store/user/jblow/dir/test.root'
        )

        self.assertTrue(
            phedexJSON.getNodeSE('T1_US_FNAL_Buffer') == 'cmssrm.fnal.gov')

    @attr('integration')
    def testAuth(self):
        """
        _testAuth_

        Verify that the auth method works correctly."
        """
        self.assertFalse(self.phedexApi.getAuth("datasvc_whatever"))
        self.assertTrue(self.phedexApi.getAuth("datasvc_subscribe"))
        self.assertTrue(self.phedexApi.getAuth("datasvc_inject"))

        return
class PhEDExInjectorPoller(BaseWorkerThread):
    """
    _PhEDExInjectorPoller_

    Poll the DBSBuffer database and inject files as they are created.
    """
    def __init__(self, config):
        """
        ___init___

        Initialise class members
        """
        BaseWorkerThread.__init__(self)

        self.enabled = getattr(config.PhEDExInjector, "enabled", True)
        self.dbsUrl = config.DBSInterface.globalDBSUrl
        self.phedexGroup = config.PhEDExInjector.phedexGroup

        self.pollCounter = 0
        self.subFrequency = None
        if getattr(config.PhEDExInjector, "subscribeDatasets", False):
            pollInterval = config.PhEDExInjector.pollInterval
            subInterval = config.PhEDExInjector.subscribeInterval
            self.subFrequency = max(1, int(round(subInterval / pollInterval)))
            logging.info(
                "SubscribeDataset and deleteBlocks will run every %d polling cycles",
                self.subFrequency)
            # subscribe on first cycle
            self.pollCounter = self.subFrequency - 1

        # retrieving the node mappings is fickle and can fail quite often
        self.phedex = PhEDEx({"endpoint": config.PhEDExInjector.phedexurl},
                             "json",
                             dbsUrl=self.dbsUrl)
        try:
            nodeMappings = self.phedex.getNodeMap()
        except:
            time.sleep(2)
            try:
                nodeMappings = self.phedex.getNodeMap()
            except:
                time.sleep(4)
                nodeMappings = self.phedex.getNodeMap()

        # This will be used to map SE names which are stored in the DBSBuffer to
        # PhEDEx node names.  The first key will be the "kind" which consists
        # of one of the following: MSS, Disk, Buffer.  The next key will be the
        # SE name.
        self.seMap = {}
        self.nodeNames = []
        for node in nodeMappings["phedex"]["node"]:
            if node["kind"] not in self.seMap:
                self.seMap[node["kind"]] = {}
            logging.info("Adding mapping %s -> %s", node["se"], node["name"])
            self.seMap[node["kind"]][node["se"]] = node["name"]
            self.nodeNames.append(node["name"])

        self.phedexNodes = {'MSS': [], 'Disk': []}
        for node in nodeMappings["phedex"]["node"]:
            if node["kind"] in ["MSS", "Disk"]:
                self.phedexNodes[node["kind"]].append(node["name"])

        self.blocksToRecover = []

        # X-component configuration is BAD! But it will only be here during the
        # Rucio commissioning within WM
        self.listTiersToSkip = config.RucioInjector.listTiersToInject
        logging.info(
            "Component configured to skip data injection for data tiers: %s",
            self.listTiersToSkip)

        return

    def setup(self, parameters):
        """
        _setup_

        Create DAO Factory and setup some DAO.
        """
        myThread = threading.currentThread()
        daofactory = DAOFactory(package="WMComponent.RucioInjector.Database",
                                logger=self.logger,
                                dbinterface=myThread.dbi)

        self.getUninjected = daofactory(classname="GetUninjectedFiles")
        self.getMigrated = daofactory(classname="GetMigratedBlocks")

        self.getUnsubscribedBlocks = daofactory(
            classname="GetUnsubscribedBlocks")
        self.setBlockRules = daofactory(classname="SetBlocksRule")

        self.findDeletableBlocks = daofactory(classname="GetDeletableBlocks")
        self.markBlocksDeleted = daofactory(classname="MarkBlocksDeleted")
        self.getUnsubscribed = daofactory(classname="GetUnsubscribedDatasets")
        self.markSubscribed = daofactory(classname="MarkDatasetSubscribed")

        daofactory = DAOFactory(package="WMComponent.DBS3Buffer",
                                logger=self.logger,
                                dbinterface=myThread.dbi)
        self.setStatus = daofactory(classname="DBSBufferFiles.SetPhEDExStatus")
        self.setBlockClosed = daofactory(classname="SetBlockClosed")

        return

    @timeFunction
    def algorithm(self, parameters):
        """
        _algorithm_

        Poll the database for uninjected files and attempt to inject them into
        PhEDEx.
        """
        if not self.enabled:
            logging.info(
                "PhEDExInjector component is disabled in the configuration, exiting."
            )
            return

        logging.info("Running PhEDEx injector poller algorithm...")
        self.pollCounter += 1

        try:
            if self.blocksToRecover:
                logging.info("""PhEDExInjector Recovery:
                                previous injection call failed,
                                checking if files were injected to PhEDEx anyway"""
                             )
                self.recoverInjectedFiles()

            self.injectFiles()
            self.closeBlocks()

            if self.pollCounter == self.subFrequency:
                self.pollCounter = 0
                self.deleteBlocks()
                self.subscribeDatasets()
                self.subscribeBlocks()
        except HTTPException as ex:
            if hasattr(ex, "status") and ex.status in [502, 503]:
                # then either proxy error or service is unavailable
                msg = "Caught HTTPException in PhEDExInjector. Retrying in the next cycle.\n"
                msg += str(ex)
                logging.error(msg)
            else:
                msg = "Caught unexpected HTTPException in PhEDExInjector.\n%s" % str(
                    ex)
                logging.exception(msg)
                raise
        except Exception as ex:
            msg = "Caught unexpected exception in PhEDExInjector. Details:\n%s" % str(
                ex)
            logging.exception(msg)
            raise PhEDExInjectorException(msg)

        return

    def createInjectionSpec(self, injectionData):
        """
        _createInjectionSpec_

        Transform the data structure returned from the database into an XML
        string for the PhEDEx Data Service.  The injectionData parameter must be
        a dictionary keyed by dataset path.  Each dataset path will map to a
        list of blocks, each block being a dict.  The block dicts will have
        three keys: name, is-open and files.  The files key will be a list of
        dicts, each of which have the following keys: lfn, size and checksum.
        The following is an example object:

        {"dataset1":
          {"block1": {"is-open": "y", "files":
            [{"lfn": "lfn1", "size": 10, "checksum": {"cksum": "1234"}},
             {"lfn": "lfn2", "size": 20, "checksum": {"cksum": "4321"}}]}}}
        """
        injectionSpec = XMLDrop.XMLInjectionSpec(self.dbsUrl)

        for datasetPath in injectionData:
            datasetSpec = injectionSpec.getDataset(datasetPath)

            for fileBlockName, fileBlock in injectionData[
                    datasetPath].iteritems():
                blockSpec = datasetSpec.getFileblock(fileBlockName,
                                                     fileBlock["is-open"])

                for f in fileBlock["files"]:
                    blockSpec.addFile(f["lfn"], f["checksum"], f["size"])

        return injectionSpec.save()

    def createRecoveryFileFormat(self, unInjectedData):
        """
        _createRecoveryFileFormat_

        Transform the data structure returned from database in to the dict format
        for the PhEDEx Data Service.  The injectionData parameter must be
        a dictionary keyed by dataset path.

        unInjectedData format
        {"dataset1":
          {"block1": {"is-open": "y", "files":
            [{"lfn": "lfn1", "size": 10, "checksum": {"cksum": "1234"}},
             {"lfn": "lfn2", "size": 20, "checksum": {"cksum": "4321"}}]}}}

        returns
        [{"block1": set(["lfn1", "lfn2"])}, {"block2": set(["lfn3", "lfn4"])]
        """
        blocks = []
        for datasetPath in unInjectedData:

            for blockName, fileBlock in unInjectedData[datasetPath].items():

                newBlock = {blockName: set()}

                for fileDict in fileBlock["files"]:
                    newBlock[blockName].add(fileDict["lfn"])

                blocks.append(newBlock)

        return blocks

    def injectFiles(self):
        """
        _injectFiles_

        Inject any uninjected files in PhEDEx.
        """
        logging.info("Starting injectFiles method")

        uninjectedFiles = self.getUninjected.execute()

        # filter out datatiers to be processed by RucioInjector
        uninjectedFiles = filterDataByTier(uninjectedFiles,
                                           self.listTiersToSkip)

        for siteName in uninjectedFiles.keys():
            # SE names can be stored in DBSBuffer as that is what is returned in
            # the framework job report.  We'll try to map the SE name to a
            # PhEDEx node name here.
            location = None

            if siteName in self.nodeNames:
                location = siteName
            else:
                if "Buffer" in self.seMap and siteName in self.seMap["Buffer"]:
                    location = self.seMap["Buffer"][siteName]
                elif "MSS" in self.seMap and siteName in self.seMap["MSS"]:
                    location = self.seMap["MSS"][siteName]
                elif "Disk" in self.seMap and siteName in self.seMap["Disk"]:
                    location = self.seMap["Disk"][siteName]

            if location is None:
                msg = "Could not map SE %s to PhEDEx node." % siteName
                logging.error(msg)
                continue

            for dataset in uninjectedFiles[siteName]:
                injectData = {}
                lfnList = []
                injectData[dataset] = uninjectedFiles[siteName][dataset]

                for block in injectData[dataset]:
                    for fileInfo in injectData[dataset][block]['files']:
                        lfnList.append(fileInfo['lfn'])
                    logging.info("About to inject %d files for block %s",
                                 len(injectData[dataset][block]['files']),
                                 block)

                self.injectFilesPhEDExCall(location, injectData, lfnList)

        return

    def injectFilesPhEDExCall(self, location, injectData, lfnList):
        """
        _injectFilesPhEDExCall_

        actual PhEDEx call for file injection
        """
        xmlData = self.createInjectionSpec(injectData)
        logging.debug("injectFiles XMLData: %s", xmlData)

        try:
            injectRes = self.phedex.injectBlocks(location, xmlData)
        except HTTPException as ex:
            # HTTPException with status 400 assumed to be duplicate injection
            # trigger later block recovery (investigation needed if not the case)
            if ex.status == 400:
                self.blocksToRecover.extend(
                    self.createRecoveryFileFormat(injectData))
            logging.error(
                "PhEDEx file injection failed with HTTPException: %s %s",
                ex.status, ex.result)
        except Exception as ex:
            msg = "PhEDEx file injection failed with Exception: %s" % str(ex)
            logging.exception(msg)
        else:
            logging.debug("Injection result: %s", injectRes)

            if "error" in injectRes:
                msg = "Error injecting data %s: %s" % (injectData,
                                                       injectRes["error"])
                logging.error(msg)
            else:
                try:
                    self.setStatus.execute(lfnList, 1)
                except Exception as ex:
                    if 'Deadlock found' in str(
                            ex) or 'deadlock detected' in str(ex):
                        logging.error(
                            "Database deadlock during file status update. Retrying again in the next cycle."
                        )
                        self.blocksToRecover.extend(
                            self.createRecoveryFileFormat(injectData))
                    else:
                        msg = "Failed to update file status in the database, reason: %s" % str(
                            ex)
                        logging.error(msg)
                        raise PhEDExInjectorException(msg)

        return

    def closeBlocks(self):
        """
        _closeBlocks_

        Close any blocks that have been migrated to global DBS
        """
        logging.info("Starting closeBlocks method")

        migratedBlocks = self.getMigrated.execute()

        # filter out datatiers to be processed by RucioInjector
        migratedBlocks = filterDataByTier(migratedBlocks, self.listTiersToSkip)

        for siteName in migratedBlocks:
            # SE names can be stored in DBSBuffer as that is what is returned in
            # the framework job report.  We'll try to map the SE name to a
            # PhEDEx node name here.
            location = None

            if siteName in self.nodeNames:
                location = siteName
            else:
                if "Buffer" in self.seMap and siteName in self.seMap["Buffer"]:
                    location = self.seMap["Buffer"][siteName]
                elif "MSS" in self.seMap and siteName in self.seMap["MSS"]:
                    location = self.seMap["MSS"][siteName]
                elif "Disk" in self.seMap and siteName in self.seMap["Disk"]:
                    location = self.seMap["Disk"][siteName]

            if location is None:
                msg = "Could not map SE %s to PhEDEx node." % siteName
                logging.error(msg)
                continue

            for dset, blocks in migratedBlocks[siteName].items():
                xmlData = self.createInjectionSpec({dset: blocks})
                logging.debug("closeBlocks XMLData: %s", xmlData)

                try:
                    injectRes = self.phedex.injectBlocks(location, xmlData)
                except HTTPException as ex:
                    logging.error(
                        "PhEDEx block close failed with HTTPException: %s %s",
                        ex.status, ex.result)
                except Exception as ex:
                    msg = "PhEDEx block close failed with Exception: %s" % str(
                        ex)
                    logging.exception(msg)
                else:
                    logging.debug("Block closing result: %s", injectRes)

                    if "error" in injectRes:
                        logging.error(
                            "Failed to close blocks due to: %s, for data: %s",
                            injectRes["error"], migratedBlocks[siteName][dset])
                    else:
                        for blockName in blocks:
                            logging.info("Block closed in PhEDEx: %s",
                                         blockName)
                            self.setBlockClosed.execute(blockName)

        return

    def recoverInjectedFiles(self):
        """
        When PhEDEx inject call timed out, run this function.
        Since there are 3 min reponse time out in cmsweb, some times
        PhEDEx injection call times out even though the call succeeded
        In that case run the recovery mode
        1. first check whether files which injection status = 0 are in the PhEDEx.
        2. if those file exist set the in_phedex status to 1
        3. set self.blocksToRecover = []

        Run this recovery one block at a time, with too many blocks
        the call to the PhEDEx data service on cmsweb can time out
        """
        # recover one block at a time
        for block in self.blocksToRecover:

            injectedFiles = self.phedex.getInjectedFiles(block)

            if injectedFiles:
                self.setStatus.execute(injectedFiles, 1)

        self.blocksToRecover = []
        return

    def deleteBlocks(self):
        """
        _deleteBlocks_
        Find deletable blocks, then decide if to delete based on:
        Is there an active subscription for dataset or block ?
          If yes => set deleted=2
          If no => next check
        Has transfer to all destinations finished ?
          If yes => request block deletion, approve request, set deleted=1
          If no => do nothing (check again next cycle)
        """
        logging.info("Starting deleteBlocks method")

        blockDict = self.findDeletableBlocks.execute(transaction=False)

        if not blockDict:
            return

        ### logic to stop doing things to be done by RucioInjector or by DM team
        for block in list(blockDict):
            if not self._isDataTierAllowed(block):
                blockDict.pop(block)

        try:
            subscriptions = self.phedex.getSubscriptionMapping(
                *blockDict.keys())
        except:
            logging.error(
                "Couldn't get subscription info from PhEDEx, retry next cycle")
            return

        skippableBlocks = []
        deletableEntries = {}
        for blockName in blockDict:

            location = blockDict[blockName]['location']

            # should never be triggered, better safe than sorry
            if location.endswith('_MSS'):
                logging.debug("Location %s for block %s is MSS, skip deletion",
                              location, blockName)
                skippableBlocks.append(blockName)
                continue

            dataset = blockDict[blockName]['dataset']
            sites = blockDict[blockName]['sites']

            if blockName in subscriptions and location in subscriptions[
                    blockName]:
                logging.debug("Block %s subscribed to %s, skip deletion",
                              blockName, location)
                binds = {'DELETED': 2, 'BLOCKNAME': blockName}
                self.markBlocksDeleted.execute(binds)
            else:
                blockInfo = []
                try:
                    blockInfo = self.phedex.getReplicaInfoForBlocks(
                        block=blockName, complete='y')['phedex']['block']
                except:
                    logging.error(
                        "Couldn't get block info from PhEDEx, retry next cycle"
                    )
                else:
                    nodes = set()
                    for entry in blockInfo:
                        if entry['name'] == blockName:
                            nodes = set([x['node'] for x in entry['replica']])
                    if location not in nodes:
                        logging.debug(
                            "Block %s not present on %s, mark as deleted",
                            blockName, location)
                        binds = {'DELETED': 1, 'BLOCKNAME': blockName}
                        self.markBlocksDeleted.execute(binds)
                    elif sites.issubset(nodes):
                        logging.debug(
                            "Deleting block %s from %s since it is fully transfered",
                            blockName, location)
                        if location not in deletableEntries:
                            deletableEntries[location] = {}
                        if dataset not in deletableEntries[location]:
                            deletableEntries[location][dataset] = set()
                        deletableEntries[location][dataset].add(blockName)

        binds = []
        for blockName in skippableBlocks:
            binds.append({'DELETED': 2, 'BLOCKNAME': blockName})
        if binds:
            self.markBlocksDeleted.execute(binds)

        for location in deletableEntries:

            chunkSize = 100
            numberOfBlocks = 0
            blocksToDelete = {}
            for dataset in deletableEntries[location]:

                blocksToDelete[dataset] = deletableEntries[location][dataset]
                numberOfBlocks += len(blocksToDelete[dataset])

                if numberOfBlocks > chunkSize:
                    self.deleteBlocksPhEDExCalls(location, blocksToDelete)
                    numberOfBlocks = 0
                    blocksToDelete = {}

            self.deleteBlocksPhEDExCalls(location, blocksToDelete)

        return

    def deleteBlocksPhEDExCalls(self, location, blocksToDelete):
        """
        _deleteBlocksPhEDExCalls_
        actual PhEDEx calls for block deletion
        """
        deletion = PhEDExDeletion(
            blocksToDelete.keys(),
            location,
            level='block',
            comments="WMAgent blocks auto-delete from %s" % location,
            blocks=blocksToDelete)

        try:
            response = self.phedex.delete(deletion)
            requestId = response['phedex']['request_created'][0]['id']
            # auto-approve deletion request
            self.phedex.updateRequest(requestId, 'approve', location)
        except HTTPException as ex:
            logging.error(
                "PhEDEx block delete/approval failed with HTTPException: %s %s",
                ex.status, ex.result)
        except Exception as ex:
            logging.error(
                "PhEDEx block delete/approval failed with Exception: %s",
                str(ex))
            logging.debug("Traceback: %s", str(traceback.format_exc()))
        else:
            binds = []
            for dataset in blocksToDelete:
                for blockName in blocksToDelete[dataset]:
                    binds.append({'DELETED': 1, 'BLOCKNAME': blockName})
            self.markBlocksDeleted.execute(binds)

        return

    def _isDataTierAllowed(self, dataName):
        """
        Check whether data belongs to an allowed datatier to
        be handled by this component (either to inject or to
        subscribe into PhEDEx)
        :param dataName: string with the block or the dataset name
        :return: boolean, True if the tier is allowed, False otherwise
        """
        endTier = dataName.rsplit('/', 1)[1]
        endTier = endTier.split('#')[0] if '#' in endTier else endTier
        if endTier in self.listTiersToSkip:
            logging.debug(
                "Skipping data: %s because it's listed in the tiers to skip",
                dataName)
            return False
        return True

    def subscribeDatasets(self):
        """
        _subscribeDatasets_
        Poll the database for datasets and subscribe them.
        """
        logging.info("Starting subscribeDatasets method")

        # Check for completely unsubscribed datasets
        unsubscribedDatasets = self.getUnsubscribed.execute()

        # Keep a list of subscriptions to tick as subscribed in the database
        subscriptionsMade = []

        # Create a list of subscriptions as defined by the PhEDEx data structures
        subs = SubscriptionList()

        # Create the subscription objects and add them to the list
        # The list takes care of the sorting internally
        for subInfo in unsubscribedDatasets:
            ### logic to stop doing things to be done by RucioInjector or by DM team
            if not self._isDataTierAllowed(subInfo['path']):
                continue

            site = subInfo['site']

            if site not in self.phedexNodes[
                    'MSS'] and site not in self.phedexNodes['Disk']:
                msg = "Site %s doesn't appear to be valid to PhEDEx, " % site
                msg += "skipping subscription: %s" % subInfo['id']
                logging.error(msg)
                continue

            # Avoid custodial subscriptions to disk nodes
            if site not in self.phedexNodes['MSS']:
                subInfo['custodial'] = 'n'
            # Avoid auto approval in T1 sites
            elif site.startswith("T1"):
                subInfo['request_only'] = 'y'

            phedexSub = PhEDExSubscription(
                subInfo['path'],
                site,
                subInfo['phedex_group'],
                priority=subInfo['priority'],
                move=subInfo['move'],
                custodial=subInfo['custodial'],
                request_only=subInfo['request_only'],
                subscriptionId=subInfo['id'])

            # Check if the subscription is a duplicate
            if phedexSub.matchesExistingSubscription(self.phedex) or \
                    phedexSub.matchesExistingTransferRequest(self.phedex):
                subscriptionsMade.append(subInfo['id'])
                continue

            # Add it to the list
            subs.addSubscription(phedexSub)

        # Compact the subscriptions
        subs.compact()

        for subscription in subs.getSubscriptionList():

            logging.info(
                "Subscribing: %s to %s, with options: Move: %s, Custodial: %s, Request Only: %s",
                subscription.getDatasetPaths(), subscription.getNodes(),
                subscription.move, subscription.custodial,
                subscription.request_only)

            try:
                self.phedex.subscribe(subscription)
            except HTTPException as ex:
                logging.error(
                    "PhEDEx dataset subscribe failed with HTTPException: %s %s",
                    ex.status, ex.result)
            except Exception as ex:
                logging.error(
                    "PhEDEx dataset subscribe failed with Exception: %s",
                    str(ex))
                logging.debug("Traceback: %s", str(traceback.format_exc()))
            else:
                subscriptionsMade.extend(subscription.getSubscriptionIds())

        # Register the result in DBSBuffer
        if subscriptionsMade:
            self.markSubscribed.execute(subscriptionsMade)

        return

    def subscribeBlocks(self):
        """
        _subscribeBlocks_
        Poll the database and subscribe blocks not yet subscribed.
        """
        logging.info("Starting subscribeBlocks method")

        unsubBlocks = self.getUnsubscribedBlocks.execute()
        # now organize those by location in order to minimize phedex requests
        # also remove blocks that this component is meant to skip
        unsubBlocks = self.organizeBlocksByLocation(unsubBlocks)

        for location, blockDict in unsubBlocks.items():
            phedexSub = PhEDExSubscription(blockDict.keys(),
                                           location,
                                           self.phedexGroup,
                                           blocks=blockDict,
                                           level="block",
                                           priority="normal",
                                           move="n",
                                           custodial="n",
                                           request_only="n",
                                           comments="WMAgent production site")
            try:
                res = self.phedex.subscribe(phedexSub)
                transferId = res['phedex']['request_created'][0]['id']
                logging.info(
                    "Subscribed %d blocks for %d datasets, to location: %s, under request ID: %s",
                    len(phedexSub.getBlocks()),
                    len(phedexSub.getDatasetPaths()), phedexSub.getNodes(),
                    transferId)
            except HTTPException as ex:
                logging.error(
                    "PhEDEx block subscription failed with HTTPException: %s %s",
                    ex.status, ex.result)
                logging.error("The subscription object was: %s",
                              str(phedexSub))
            except Exception as ex:
                logging.exception(
                    "PhEDEx block subscription failed with Exception: %s",
                    str(ex))
            else:
                binds = []
                for blockname in phedexSub.getBlocks():
                    binds.append({
                        'RULE_ID': str(transferId),
                        'BLOCKNAME': blockname
                    })
                self.setBlockRules.execute(binds)

        return

    def organizeBlocksByLocation(self, blocksLocation):
        """
        Given a list of dictionaries (with block name and location). Organize
        those blocks per location to make phedex subscription calls more
        efficient.
        Also drops blocks that we cannot subscribe, and check for valid
        phedex node names.
        :param blocksLocation: list of dictionaries
        :return: a dict of dictionaries, such as:
          {"locationA": {"datasetA": ["blockA", "blockB", ...],
                         "datasetB": ["blockA", "blockB", ...]
                        },
           "locationB": {"datasetA": ["blockA"],
                         ...
        """
        dictByLocation = {}
        for item in blocksLocation:
            ### logic to stop doing things to be done by RucioInjector or by DM team
            if not self._isDataTierAllowed(item['blockname']):
                continue

            site = item['pnn']
            if site not in self.phedexNodes[
                    'MSS'] and site not in self.phedexNodes['Disk']:
                msg = "Site %s doesn't appear to be valid to PhEDEx, " % site
                msg += "skipping block subscription for: %s" % item['blockname']
                logging.error(msg)
                continue

            dictByLocation.setdefault(site, {})

            dsetName = item['blockname'].split("#")[0]
            dictByLocation[site].setdefault(dsetName, [])
            dictByLocation[site][dsetName].append(item['blockname'])
        return dictByLocation
Esempio n. 13
0
class PhEDExInjectorPoller(BaseWorkerThread):
    """
    _PhEDExInjectorPoller_

    Poll the DBSBuffer database and inject files as they are created.
    """

    def __init__(self, config):
        """
        ___init___
        
        Initialise class members
        """
        BaseWorkerThread.__init__(self)
        self.config = config
        self.phedex = PhEDEx({"endpoint": config.PhEDExInjector.phedexurl}, "json")
        self.dbsUrl = config.DBSInterface.globalDBSUrl
        self.group = getattr(config.PhEDExInjector, "group", "DataOps")

        # This will be used to map SE names which are stored in the DBSBuffer to
        # PhEDEx node names.  The first key will be the "kind" which consists
        # of one of the following: MSS, Disk, Buffer.  The next key will be the
        # SE name.
        self.seMap = {}
        self.nodeNames = []

        # initialize the alert framework (if available - config.Alert present)
        #    self.sendAlert will be then be available
        self.initAlerts(compName="PhEDExInjector")

    def setup(self, parameters):
        """
        _setup_
        
        Create a DAO Factory for the PhEDExInjector.  Also load the SE names to
        PhEDEx node name mappings from the data service.
        """
        myThread = threading.currentThread()
        daofactory = DAOFactory(
            package="WMComponent.PhEDExInjector.Database", logger=self.logger, dbinterface=myThread.dbi
        )

        self.getUninjected = daofactory(classname="GetUninjectedFiles")
        self.getMigrated = daofactory(classname="GetMigratedBlocks")

        daofactory = DAOFactory(package="WMComponent.DBSBuffer.Database", logger=self.logger, dbinterface=myThread.dbi)
        self.setStatus = daofactory(classname="DBSBufferFiles.SetPhEDExStatus")

        daofactory = DAOFactory(package="WMComponent.DBSUpload.Database", logger=self.logger, dbinterface=myThread.dbi)
        self.setBlockStatus = daofactory(classname="SetBlockStatus")

        nodeMappings = self.phedex.getNodeMap()
        for node in nodeMappings["phedex"]["node"]:
            if not self.seMap.has_key(node["kind"]):
                self.seMap[node["kind"]] = {}

            logging.info("Adding mapping %s -> %s" % (node["se"], node["name"]))
            self.seMap[node["kind"]][node["se"]] = node["name"]
            self.nodeNames.append(node["name"])

        return

    def createInjectionSpec(self, injectionData):
        """
        _createInjectionSpec_

        Trasform the data structure returned from the database into an XML
        string for the PhEDEx Data Service.  The injectionData parameter must be
        a dictionary keyed by dataset path.  Each dataset path will map to a
        list of blocks, each block being a dict.  The block dicts will have
        three keys: name, is-open and files.  The files key will be a list of
        dicts, each of which have the following keys: lfn, size and checksum.
        The following is an example object:

        {"dataset1":
          {"block1": {"is-open": "y", "files":
            [{"lfn": "lfn1", "size": 10, "checksum": {"cksum": "1234"}},
             {"lfn": "lfn2", "size": 20, "checksum": {"cksum": "4321"}}]}}}
        """
        injectionSpec = XMLDrop.XMLInjectionSpec(self.dbsUrl)

        for datasetPath in injectionData:
            datasetSpec = injectionSpec.getDataset(datasetPath)

            for fileBlockName, fileBlock in injectionData[datasetPath].iteritems():
                blockSpec = datasetSpec.getFileblock(fileBlockName, fileBlock["is-open"])

                for file in fileBlock["files"]:
                    blockSpec.addFile(file["lfn"], file["checksum"], file["size"])

        return injectionSpec.save()

    def injectFiles(self):
        """
        _injectFiles_

        Inject any uninjected files in PhEDEx.
        """
        myThread = threading.currentThread()
        uninjectedFiles = self.getUninjected.execute()

        injectedFiles = []
        for siteName in uninjectedFiles.keys():
            # SE names can be stored in DBSBuffer as that is what is returned in
            # the framework job report.  We'll try to map the SE name to a
            # PhEDEx node name here.
            location = None

            if siteName in self.nodeNames:
                location = siteName
            else:
                if self.seMap.has_key("Buffer") and self.seMap["Buffer"].has_key(siteName):
                    location = self.seMap["Buffer"][siteName]
                elif self.seMap.has_key("MSS") and self.seMap["MSS"].has_key(siteName):
                    location = self.seMap["MSS"][siteName]
                elif self.seMap.has_key("Disk") and self.seMap["Disk"].has_key(siteName):
                    location = self.seMap["Disk"][siteName]

            if location == None:
                msg = "Could not map SE %s to PhEDEx node." % siteName
                logging.error(msg)
                self.sendAlert(7, msg=msg)
                continue

            xmlData = self.createInjectionSpec(uninjectedFiles[siteName])
            injectRes = self.phedex.injectBlocks(location, xmlData, 0, 0)

            if not injectRes.has_key("error"):
                for datasetName in uninjectedFiles[siteName]:
                    for blockName in uninjectedFiles[siteName][datasetName]:
                        for file in uninjectedFiles[siteName][datasetName][blockName]["files"]:
                            injectedFiles.append(file["lfn"])
            else:
                msg = "Error injecting data %s: %s" % (uninjectedFiles[siteName], injectRes["error"])
                logging.error(msg)
                self.sendAlert(6, msg=msg)

        if len(injectedFiles) > 0:
            logging.debug("Injecting files: %s" % injectedFiles)
            self.setStatus.execute(injectedFiles, 1, conn=myThread.transaction.conn, transaction=myThread.transaction)

        return

    def closeBlocks(self):
        """
        _closeBlocks_

        Close any blocks that have been migrated to global DBS.
        """
        myThread = threading.currentThread()
        migratedBlocks = self.getMigrated.execute()

        closedBlocks = []
        for siteName in migratedBlocks.keys():
            # SE names can be stored in DBSBuffer as that is what is returned in
            # the framework job report.  We'll try to map the SE name to a
            # PhEDEx node name here.
            location = None

            if siteName in self.nodeNames:
                location = siteName
            else:
                if self.seMap.has_key("Buffer") and self.seMap["Buffer"].has_key(siteName):
                    location = self.seMap["Buffer"][siteName]
                elif self.seMap.has_key("MSS") and self.seMap["MSS"].has_key(siteName):
                    location = self.seMap["MSS"][siteName]
                elif self.seMap.has_key("Disk") and self.seMap["Disk"].has_key(siteName):
                    location = self.seMap["Disk"][siteName]

            if location == None:
                msg = "Could not map SE %s to PhEDEx node." % siteName
                logging.error(msg)
                self.sendAlert(6, msg=msg)
                continue

            xmlData = self.createInjectionSpec(migratedBlocks[siteName])
            injectRes = self.phedex.injectBlocks(location, xmlData, 0, 0)

            if not injectRes.has_key("error"):
                for datasetName in migratedBlocks[siteName]:
                    for blockName in migratedBlocks[siteName][datasetName]:
                        closedBlocks.append(blockName)
            else:
                msg = "Error injecting data %s: %s" % (migratedBlocks[siteName], injectRes["error"])
                logging.error(msg)
                self.sendAlert(6, msg=msg)

        for closedBlock in closedBlocks:
            logging.debug("Closing block %s" % closedBlock)
            self.setBlockStatus.execute(
                closedBlock,
                locations=None,
                open_status="Closed",
                conn=myThread.transaction.conn,
                transaction=myThread.transaction,
            )

        return

    def algorithm(self, parameters):
        """
        _algorithm_

        Poll the database for uninjected files and attempt to inject them into
        PhEDEx.
        """
        myThread = threading.currentThread()
        myThread.transaction.begin()

        self.injectFiles()
        self.closeBlocks()

        myThread.transaction.commit()
        return
Esempio n. 14
0
class PhEDExInjectorPoller(BaseWorkerThread):
    """
    _PhEDExInjectorPoller_

    Poll the DBSBuffer database and inject files as they are created.
    """
    def __init__(self, config):
        """
        ___init___

        Initialise class members
        """
        BaseWorkerThread.__init__(self)
        self.config = config
        self.phedex = PhEDEx({"endpoint": config.PhEDExInjector.phedexurl},
                             "json")
        self.dbsUrl = config.DBSInterface.globalDBSUrl
        self.group = getattr(config.PhEDExInjector, "group", "DataOps")

        # This will be used to map SE names which are stored in the DBSBuffer to
        # PhEDEx node names.  The first key will be the "kind" which consists
        # of one of the following: MSS, Disk, Buffer.  The next key will be the
        # SE name.
        self.seMap = {}
        self.nodeNames = []

        # initialize the alert framework (if available - config.Alert present)
        #    self.sendAlert will be then be available
        self.initAlerts(compName="PhEDExInjector")

    def setup(self, parameters):
        """
        _setup_

        Create a DAO Factory for the PhEDExInjector.  Also load the SE names to
        PhEDEx node name mappings from the data service.
        """
        myThread = threading.currentThread()
        daofactory = DAOFactory(package="WMComponent.PhEDExInjector.Database",
                                logger=self.logger,
                                dbinterface=myThread.dbi)

        self.getUninjected = daofactory(classname="GetUninjectedFiles")
        self.getMigrated = daofactory(classname="GetMigratedBlocks")

        daofactory = DAOFactory(package="WMComponent.DBSBuffer.Database",
                                logger=self.logger,
                                dbinterface=myThread.dbi)
        self.setStatus = daofactory(classname="DBSBufferFiles.SetPhEDExStatus")

        daofactory = DAOFactory(package="WMComponent.DBSUpload.Database",
                                logger=self.logger,
                                dbinterface=myThread.dbi)
        self.setBlockStatus = daofactory(classname="SetBlockStatus")

        nodeMappings = self.phedex.getNodeMap()
        for node in nodeMappings["phedex"]["node"]:
            if not self.seMap.has_key(node["kind"]):
                self.seMap[node["kind"]] = {}

            logging.info("Adding mapping %s -> %s" %
                         (node["se"], node["name"]))
            self.seMap[node["kind"]][node["se"]] = node["name"]
            self.nodeNames.append(node["name"])

        return

    def createInjectionSpec(self, injectionData):
        """
        _createInjectionSpec_

        Trasform the data structure returned from the database into an XML
        string for the PhEDEx Data Service.  The injectionData parameter must be
        a dictionary keyed by dataset path.  Each dataset path will map to a
        list of blocks, each block being a dict.  The block dicts will have
        three keys: name, is-open and files.  The files key will be a list of
        dicts, each of which have the following keys: lfn, size and checksum.
        The following is an example object:

        {"dataset1":
          {"block1": {"is-open": "y", "files":
            [{"lfn": "lfn1", "size": 10, "checksum": {"cksum": "1234"}},
             {"lfn": "lfn2", "size": 20, "checksum": {"cksum": "4321"}}]}}}
        """
        injectionSpec = XMLDrop.XMLInjectionSpec(self.dbsUrl)

        for datasetPath in injectionData:
            datasetSpec = injectionSpec.getDataset(datasetPath)

            for fileBlockName, fileBlock in injectionData[
                    datasetPath].iteritems():
                blockSpec = datasetSpec.getFileblock(fileBlockName,
                                                     fileBlock["is-open"])

                for file in fileBlock["files"]:
                    blockSpec.addFile(file["lfn"], file["checksum"],
                                      file["size"])

        return injectionSpec.save()

    def injectFiles(self):
        """
        _injectFiles_

        Inject any uninjected files in PhEDEx.
        """
        myThread = threading.currentThread()
        uninjectedFiles = self.getUninjected.execute()

        injectedFiles = []
        for siteName in uninjectedFiles.keys():
            # SE names can be stored in DBSBuffer as that is what is returned in
            # the framework job report.  We'll try to map the SE name to a
            # PhEDEx node name here.
            location = None

            if siteName in self.nodeNames:
                location = siteName
            else:
                if self.seMap.has_key("Buffer") and \
                       self.seMap["Buffer"].has_key(siteName):
                    location = self.seMap["Buffer"][siteName]
                elif self.seMap.has_key("MSS") and \
                         self.seMap["MSS"].has_key(siteName):
                    location = self.seMap["MSS"][siteName]
                elif self.seMap.has_key("Disk") and \
                         self.seMap["Disk"].has_key(siteName):
                    location = self.seMap["Disk"][siteName]

            if location == None:
                msg = "Could not map SE %s to PhEDEx node." % siteName
                logging.error(msg)
                self.sendAlert(7, msg=msg)
                continue

            myThread.transaction.begin()
            xmlData = self.createInjectionSpec(uninjectedFiles[siteName])
            try:
                injectRes = self.phedex.injectBlocks(location, xmlData)
            except Exception, ex:
                # If we get an error here, assume that it's temporary (it usually is)
                # log it, and ignore it in the algorithm() loop
                msg = "Encountered error while attempting to inject blocks to PhEDEx.\n"
                msg += str(ex)
                logging.error(msg)
                logging.debug("Traceback: %s" % str(traceback.format_exc()))
                raise PhEDExInjectorPassableError(msg)
            logging.info("Injection result: %s" % injectRes)

            if not injectRes.has_key("error"):
                for datasetName in uninjectedFiles[siteName]:
                    for blockName in uninjectedFiles[siteName][datasetName]:
                        for file in uninjectedFiles[siteName][datasetName][
                                blockName]["files"]:
                            injectedFiles.append(file["lfn"])
            else:
                msg = ("Error injecting data %s: %s" %
                       (uninjectedFiles[siteName], injectRes["error"]))
                logging.error(msg)
                self.sendAlert(6, msg=msg)

            self.setStatus.execute(injectedFiles,
                                   1,
                                   conn=myThread.transaction.conn,
                                   transaction=myThread.transaction)
            injectedFiles = []
            myThread.transaction.commit()

        return