Python PhEDEx.getNodeMap Exemples, WMCore.Services.PhEDEx.PhEDEx.PhEDEx.getNodeMap Python Exemples

Exemple #1

0

Afficher le fichier

    def preInitialization(self):
        pollInterval = self.config.PhEDExInjector.pollInterval
        subInterval = self.config.PhEDExInjector.subscribeInterval
        logging.info("Setting poll interval to %s seconds for inject",
                     pollInterval)

        # retrieving the node mappings is fickle and can fail quite often
        # hence only do it once (with retries) and pass it to the workers
        phedex = PhEDEx({"endpoint": self.config.PhEDExInjector.phedexurl},
                        "json")
        try:
            nodeMappings = phedex.getNodeMap()
        except Exception:
            time.sleep(2)
            try:
                nodeMappings = phedex.getNodeMap()
            except Exception:
                time.sleep(4)
                nodeMappings = phedex.getNodeMap()

        myThread = threading.currentThread()
        myThread.workerThreadManager.addWorker(
            PhEDExInjectorPoller(self.config, phedex, nodeMappings),
            pollInterval)

        if getattr(self.config.PhEDExInjector, "subscribeDatasets", False):
            # wait a bit for first poll cycle of PhEDExInjectorPoller to complete
            # hopefully avoids intermingled logs (which can be confusing)
            time.sleep(2)
            logging.info("Setting poll interval to %s seconds for subscribe",
                         subInterval)
            myThread.workerThreadManager.addWorker(
                PhEDExInjectorSubscriber(self.config, phedex, nodeMappings),
                subInterval)

Exemple #2

0

Afficher le fichier

Fichier : PhEDExInjector.py Projet : HassenRiahi/WMCore

    def preInitialization(self):
        pollInterval = self.config.PhEDExInjector.pollInterval
        subInterval = self.config.PhEDExInjector.subscribeInterval
        logging.info("Setting poll interval to %s seconds for inject", pollInterval)

        # retrieving the node mappings is fickle and can fail quite often
        # hence only do it once (with retries) and pass it to the workers
        phedex = PhEDEx({"endpoint": self.config.PhEDExInjector.phedexurl}, "json")
        try:
            nodeMappings = phedex.getNodeMap()
        except Exception:
            time.sleep(2)
            try:
                nodeMappings = phedex.getNodeMap()
            except Exception:
                time.sleep(4)
                nodeMappings = phedex.getNodeMap()

        myThread = threading.currentThread()
        myThread.workerThreadManager.addWorker(PhEDExInjectorPoller(self.config, phedex, nodeMappings), pollInterval)

        if getattr(self.config.PhEDExInjector, "subscribeDatasets", False):
            # wait a bit for first poll cycle of PhEDExInjectorPoller to complete
            # hopefully avoids intermingled logs (which can be confusing)
            time.sleep(2)
            logging.info("Setting poll interval to %s seconds for subscribe", subInterval)
            myThread.workerThreadManager.addWorker(PhEDExInjectorSubscriber(self.config, phedex, nodeMappings), subInterval)

Exemple #3

0

Afficher le fichier

Fichier : SiteLocalConfig_t.py Projet : prozober/WMCore

    def testSlcPhedexNodesEqualPhedexApiNodes(self):
        """
        For each site, verify that the stageout node specified in
        site-local-config.xml is the same as the one returned by the PhEDEx api.
        """
        os.environ["CMS_PATH"] = "/cvmfs/cms.cern.ch"

        phedex = PhEDEx()
        nodes = phedex.getNodeMap()["phedex"]["node"]

        # Make a dict for translating the se names into regular site names.
        node_map = {}
        for node in nodes:
            node_map[str(node[u"se"])] = str(node[str(u"name")])

        for d in os.listdir("/cvmfs/cms.cern.ch/SITECONF/"):
            # Only T0_, T1_... folders are needed
            if d[0] == "T":
                os.environ[
                    'WMAGENT_SITE_CONFIG_OVERRIDE'] = '/cvmfs/cms.cern.ch/SITECONF/%s/JobConfig/site-local-config.xml' % (
                        d)
                try:
                    slc = loadSiteLocalConfig()
                except SiteConfigError as e:
                    print e.args[0]
                phedexNode = slc.localStageOut.get("phedex-node")
                # If slc is correct, perform check
                if "se-name" in slc.localStageOut and slc.localStageOut[
                        "se-name"] in node_map and phedexNode != None:
                    self.assertEqual(phedexNode, node_map[slc.localStageOut["se-name"]], \
                            "Error: Node specified in SLC (%s) doesn't match node returned by PhEDEx api (%s)." \
                            % (phedexNode, node_map[slc.localStageOut["se-name"]]))

        return

Exemple #4

0

Afficher le fichier

 def keepOnlyDisks(self, locationsMap):
     phedex = PhEDEx()  # TODO use certs from the config!
     # get all the PNNs that are of kind 'Disk'
     try:
         diskLocations = set([
             pnn['name'] for pnn in phedex.getNodeMap()['phedex']['node']
             if pnn['kind'] == 'Disk'
         ])
     except HTTPException as ex:
         self.logger.error(ex.headers)
         raise TaskWorkerException("The CRAB3 server backend could not contact phedex to get the list of site storages.\n"+\
                             "This is could be a temporary phedex glitch, please try to submit a new task (resubmit will not work)"+\
                             " and contact the experts if the error persists.\nError reason: %s" % str(ex)) # TODO addo the nodes phedex so the user can check themselves
     diskLocationsMap = {}
     for block, locations in locationsMap.iteritems():
         locations[:] = [
             x for x in locations if x != 'T3_CH_CERN_OpenData'
         ]  # ignore OpenData until it is accessible by CRAB
         if set(locations) & diskLocations:
             # at least some locations are disk
             diskLocationsMap[block] = locationsMap[block]
         else:
             # no locations are in the disk list, assume that they are tape
             self.tapeLocations = self.tapeLocations.union(
                 set(locations) - diskLocations)
     locationsMap.clear()  # remove all blocks
     locationsMap.update(
         diskLocationsMap)  # add only blocks with disk locations

Exemple #5

0

Afficher le fichier

    def testSlcPhedexNodesEqualPhedexApiNodes(self):
        """
        For each site, verify that the stageout node specified in
        site-local-config.xml is the same as the one returned by the PhEDEx api.
        """
        os.environ["CMS_PATH"] = "/cvmfs/cms.cern.ch"

        phedex = PhEDEx()
        nodes = [
            node[u'name'] for node in phedex.getNodeMap()["phedex"]["node"]
        ]

        for d in os.listdir("/cvmfs/cms.cern.ch/SITECONF/"):
            # Only T0_, T1_... folders are needed
            if d[0] == "T":
                os.environ[
                    'WMAGENT_SITE_CONFIG_OVERRIDE'] = '/cvmfs/cms.cern.ch/SITECONF/%s/JobConfig/site-local-config.xml' % (
                        d)
                try:
                    slc = loadSiteLocalConfig()
                except SiteConfigError as e:
                    print(e.args[0])
                phedexNode = slc.localStageOut.get("phedex-node")
                self.assertTrue(
                    phedexNode in nodes,
                    "Error: Node specified in SLC (%s) not in list returned by PhEDEx api"
                    % phedexNode)
        return

Exemple #6

0

Afficher le fichier

Fichier : SiteLocalConfig_t.py Projet : HassenRiahi/WMCore

    def testSlcPhedexNodesEqualPhedexApiNodes(self):
        """
        For each site, verify that the stageout node specified in
        site-local-config.xml is the same as the one returned by the PhEDEx api.
        """
        os.environ["CMS_PATH"] = "/cvmfs/cms.cern.ch"

        phedex = PhEDEx()
        nodes = phedex.getNodeMap()["phedex"]["node"]

        # Make a dict for translating the se names into regular site names.
        node_map = {}
        for node in nodes:
            node_map[str(node[u"se"])] = str(node[str(u"name")])
        
        for d in os.listdir("/cvmfs/cms.cern.ch/SITECONF/"):
            # Only T0_, T1_... folders are needed
            if d[0] == "T":
                os.environ['WMAGENT_SITE_CONFIG_OVERRIDE'] ='/cvmfs/cms.cern.ch/SITECONF/%s/JobConfig/site-local-config.xml' % (d)
                try:
                    slc = loadSiteLocalConfig()
                except SiteConfigError as e:
                    print e.args[0]
                phedexNode = slc.localStageOut.get("phedex-node")
                # If slc is correct, perform check
                if "se-name" in slc.localStageOut and slc.localStageOut["se-name"] in node_map and phedexNode != None:
                    self.assertEqual(phedexNode, node_map[slc.localStageOut["se-name"]], \
                            "Error: Node specified in SLC (%s) doesn't match node returned by PhEDEx api (%s)." \
                            % (phedexNode, node_map[slc.localStageOut["se-name"]]))
                    
        return

Exemple #7

0

Afficher le fichier

 def keepOnlyDisks(self, locationsMap):
     phedex = PhEDEx() #TODO use certs from the config!
     #get all the PNN that are of kind disk
     try:
         diskLocations = set([pnn['name'] for pnn in phedex.getNodeMap()['phedex']['node'] if pnn['kind']=='Disk'])
     except Exception, ex: #TODO should we catch HttpException instead?
         self.logger.exception(ex)
         raise TaskWorkerException("The CRAB3 server backend could not contact phedex to get the list of site storages.\n"+\
                             "This is could be a temporary phedex glitch, please try to submit a new task (resubmit will not work)"+\
                             " and contact the experts if the error persists.\nError reason: %s" % str(ex)) #TODO addo the nodes phedex so the user can check themselves

Exemple #8

0

Afficher le fichier

Fichier : DBSDataDiscovery.py Projet : todor-ivanov/CRABServer

 def keepOnlyDisks(self, locationsMap):
     phedex = PhEDEx()  # TODO use certs from the config!
     # get all the PNNs that are of kind 'Disk'
     try:
         diskLocations = set([
             pnn['name'] for pnn in phedex.getNodeMap()['phedex']['node']
             if pnn['kind'] == 'Disk'
         ])
     except HTTPException as ex:
         self.logger.error(ex.headers)
         raise TaskWorkerException("The CRAB3 server backend could not contact phedex to get the list of site storages.\n"+\
                             "This is could be a temporary phedex glitch, please try to submit a new task (resubmit will not work)"+\
                             " and contact the experts if the error persists.\nError reason: %s" % str(ex)) # TODO addo the nodes phedex so the user can check themselves
     for block, locations in locationsMap.iteritems():
         locationsMap[block] = set(locations) & diskLocations
         self.otherLocations = self.otherLocations.union(
             set(locations) - diskLocations)

Exemple #9

0

Afficher le fichier

Fichier : commandRunner.py Projet : dballesteros7/dev-scripts

def phedexIt():
    x = PhEDEx(responseType = "json")
    phedexNodes = x.getNodeMap()['phedex']['node']
    phedexMap = {}
    sePhedexMap = {}
    knownPhedexNodes = set()
    for node in phedexNodes:
        phedexMap[node['name']] = node['kind']
        #print '%s -> %s, %s' % (node['name'], node['kind'], node['se'])
        if node['se'] not in sePhedexMap:
            sePhedexMap[node['se']] = set()
        sePhedexMap[node['se']].add(node['name'])
        knownPhedexNodes.add(node['name'])
    y = SiteDBJSON()
    seNames = y.getAllSENames()
    cmsNamesMap = {}
    for se in seNames:
        cmsNames = y.seToCMSName(se)
        cmsNamesMap[se] = cmsNames
    seToNodeMap = {}
    for se in cmsNamesMap:
        candidates = set()
        for cmsName in cmsNamesMap[se]:
            phedexNodes = y.cmsNametoPhEDExNode(cmsName)
            candidates.update(set(phedexNodes))
        validCandidates = set()
        for candidate in candidates:
            if candidate in knownPhedexNodes:
                validCandidates.add(candidate)
        seToNodeMap[se] = validCandidates
        #print '%s to %s' % (se, candidates)
    for se in sePhedexMap:
        if se not in seToNodeMap:
            print "SE: %s is not in new mapping for sites %s" % (se, list(sePhedexMap[se]))
    for se in seToNodeMap:
        if se not in sePhedexMap:
            print "SE: %s is not in old mapping for sites %s" % (se, list(seToNodeMap[se]))
            continue
    for se in set(seToNodeMap.keys()).intersection(set(sePhedexMap.keys())):
        diff = sePhedexMap[se] - seToNodeMap[se]
        if diff:
            print "%s are in old mapping but not in new for %s" %(str(list(diff)), se)
        diff = seToNodeMap[se] - sePhedexMap[se]
        if diff:
            print "%s are in new mapping but not in old for %s" %(str(list(diff)), se)

Exemple #10

0

Afficher le fichier

Fichier : DBSDataDiscovery.py Projet : dciangot/CRABServer

 def keepOnlyDisks(self, locationsMap):
     self.otherLocations = set()
     phedex = PhEDEx() #TODO use certs from the config!
     #get all the PNN that are of kind disk
     try:
         diskLocations = set([pnn['name'] for pnn in phedex.getNodeMap()['phedex']['node'] if pnn['kind']=='Disk'])
     except HTTPException as ex:
         self.logger.error(ex.headers)
         raise TaskWorkerException("The CRAB3 server backend could not contact phedex to get the list of site storages.\n"+\
                             "This is could be a temporary phedex glitch, please try to submit a new task (resubmit will not work)"+\
                             " and contact the experts if the error persists.\nError reason: %s" % str(ex)) #TODO addo the nodes phedex so the user can check themselves
     for block, locations in locationsMap.iteritems():
         locationsMap[block] = set(locations) & diskLocations
         self.otherLocations = self.otherLocations.union(set(locations) - diskLocations)
     #remove any key with value that has set([])
     for key, value in locationsMap.items(): #wont work in python3!
         if value == set([]):
             locationsMap.pop(key)

Exemple #11

0

Afficher le fichier

Fichier : SiteLocalConfig_t.py Projet : BrunoCoimbra/WMCore

    def testSlcPhedexNodesEqualPhedexApiNodes(self):
        """
        For each site, verify that the stageout node specified in
        site-local-config.xml is the same as the one returned by the PhEDEx api.
        """
        os.environ["CMS_PATH"] = "/cvmfs/cms.cern.ch"

        phedex = PhEDEx()
        nodes = [node[u'name'] for node in phedex.getNodeMap()["phedex"]["node"]]
        
        for d in os.listdir("/cvmfs/cms.cern.ch/SITECONF/"):
            # Only T0_, T1_... folders are needed
            if d[0] == "T":
                os.environ['WMAGENT_SITE_CONFIG_OVERRIDE'] ='/cvmfs/cms.cern.ch/SITECONF/%s/JobConfig/site-local-config.xml' % (d)
                try:
                    slc = loadSiteLocalConfig()
                except SiteConfigError as e:
                    print(e.args[0])
                phedexNode = slc.localStageOut.get("phedex-node")
                self.assertTrue(phedexNode in nodes,
                                "Error: Node specified in SLC (%s) not in list returned by PhEDEx api" % phedexNode)
        return

Exemple #12

0

Afficher le fichier

Fichier : PhEDExInjectorSubscriber.py Projet : ticoann/WMCore

class PhEDExInjectorSubscriber(BaseWorkerThread):
    """
    _PhEDExInjectorSubscriber_

    Poll the DBSBuffer database and subscribe datasets as they are
    created.
    """
    def __init__(self, config):
        """
        ___init___

        Initialise class members
        """
        BaseWorkerThread.__init__(self)
        self.phedex = PhEDEx({"endpoint": config.PhEDExInjector.phedexurl},
                             "json")
        self.siteDB = SiteDBJSON()
        self.dbsUrl = config.DBSInterface.globalDBSUrl
        self.group = getattr(config.PhEDExInjector, "group", "DataOps")
        self.safeMode = getattr(config.PhEDExInjector, "safeOperationMode",
                                False)
        self.replicaOnly = getattr(config.PhEDExInjector, "replicaOnly", False)

        # Subscribed state in the DBSBuffer table for datasets
        self.terminalSubscriptionState = 1
        if self.safeMode:
            self.terminalSubscriptionState = 2

        # We will map node names to CMS names, that what the spec will have.
        # If a CMS name is associated to many PhEDEx node then choose the MSS option
        self.cmsToPhedexMap = {}

        # initialize the alert framework (if available - config.Alert present)
        #    self.sendAlert will be then be available

        self.initAlerts(compName="PhEDExInjector")

    def setup(self, parameters):
        """
        _setup_

        Create a DAO Factory for the PhEDExInjector.  Also load the SE names to
        PhEDEx node name mappings from the data service.
        """
        myThread = threading.currentThread()
        daofactory = DAOFactory(package="WMComponent.PhEDExInjector.Database",
                                logger=self.logger,
                                dbinterface=myThread.dbi)

        self.getUnsubscribed = daofactory(classname="GetUnsubscribedDatasets")
        self.markSubscribed = daofactory(classname="MarkDatasetSubscribed")
        self.getPartiallySubscribed = daofactory(
            classname="GetPartiallySubscribedDatasets")

        nodeMappings = self.phedex.getNodeMap()
        for node in nodeMappings["phedex"]["node"]:

            cmsName = self.siteDB.phEDExNodetocmsName(node["name"])

            if cmsName not in self.cmsToPhedexMap:
                self.cmsToPhedexMap[cmsName] = {}

            logging.info("Loaded PhEDEx node %s for site %s" %
                         (node["name"], cmsName))
            if node["kind"] not in self.cmsToPhedexMap[cmsName]:
                self.cmsToPhedexMap[cmsName][node["kind"]] = node["name"]

        return

    def algorithm(self, parameters):
        """
        _algorithm_

        Poll the database for datasets and subscribe them.
        """
        myThread = threading.currentThread()
        myThread.transaction.begin()

        # Check for completely unsubscribed datasets
        unsubscribedDatasets = self.getUnsubscribed.execute(
            conn=myThread.transaction.conn, transaction=True)

        if self.safeMode:
            partiallySubscribedDatasets = self.getPartiallySubscribed.execute(
                conn=myThread.transaction.conn, transaction=True)
            unsubscribedDatasets.extend(partiallySubscribedDatasets)
            partiallySubscribedSet = set()
            for entry in partiallySubscribedDatasets:
                partiallySubscribedSet.add(entry["path"])

        # Map the datasets to their specs
        specDatasetMap = {}
        for unsubscribedDataset in unsubscribedDatasets:
            datasetPath = unsubscribedDataset["path"]
            workflow = unsubscribedDataset["workflow"]
            spec = unsubscribedDataset["spec"]

            if datasetPath not in specDatasetMap:
                specDatasetMap[datasetPath] = []
            specDatasetMap[datasetPath].append({
                "workflow": workflow,
                "spec": spec
            })

        specCache = {}
        siteMap = {}
        # Distribute the subscriptions by site, type and priority
        # This is to make as few subscriptions as possible
        # Site map values are dictionaries where the keys are tuples (Prio, Custodial, AutoApprove, Move)
        # Where Custodial is boolean, Prio is in ["Low", "Normal", "High"], AutoApprove is boolean and Move is boolean
        for dataset in specDatasetMap:
            # Aggregate all the different subscription configurations
            subInfo = {}
            for entry in specDatasetMap[dataset]:
                if not entry["spec"]:
                    # Can't use this spec, there isn't one
                    continue
                # Load spec if not in the cache
                if entry["spec"] not in specCache:
                    helper = WMWorkloadHelper()
                    try:
                        helper.load(entry["spec"])
                        specCache[entry["spec"]] = helper
                    except Exception:
                        #Couldn't load it , alert and carry on
                        msg = "Couldn't load spec: %s" % entry["spec"]
                        logging.error(msg)
                        self.sendAlert(7, msg=msg)
                        continue
                #If we are running in safe mode, we need to know if the workflow is ready
                # We have the spec, get the info
                helper = specCache[entry["spec"]]
                workflowSubInfo = helper.getSubscriptionInformation()
                datasetSubInfo = workflowSubInfo.get(dataset, None)
                if datasetSubInfo and subInfo:
                    subInfo["CustodialSites"] = extendWithoutDups(
                        subInfo["CustodialSites"],
                        datasetSubInfo["CustodialSites"])
                    subInfo["NonCustodialSites"] = extendWithoutDups(
                        subInfo["NonCustodialSites"],
                        datasetSubInfo["NonCustodialSites"])
                    subInfo["AutoApproveSites"] = extendWithoutDups(
                        subInfo["AutoApproveSites"],
                        datasetSubInfo["AutoApproveSites"])
                    subInfo["Priority"] = solvePrioConflicts(
                        subInfo["Priority"], datasetSubInfo["Priority"])
                elif datasetSubInfo:
                    subInfo = datasetSubInfo

            # We now have aggregated subscription information for this dataset in subInfo
            # Distribute it by site
            if not subInfo:
                #Nothing to do, log and continue
                msg = "No subscriptions configured for dataset %s" % dataset
                logging.warning(msg)
                self.markSubscribed.execute(
                    dataset,
                    subscribed=self.terminalSubscriptionState,
                    conn=myThread.transaction.conn,
                    transaction=True)
                continue
            # Make sure that a site is not configured both as non custodial and custodial
            # Non-custodial is believed to be the right choice
            subInfo["CustodialSites"] = list(
                set(subInfo["CustodialSites"]) -
                set(subInfo["NonCustodialSites"]))
            for site in subInfo["CustodialSites"]:
                if site not in siteMap:
                    siteMap[site] = {}
                autoApprove = False
                if site in subInfo["AutoApproveSites"]:
                    autoApprove = True
                if self.safeMode and dataset not in partiallySubscribedSet:
                    tupleKey = (subInfo["Priority"], True, autoApprove, False)
                else:
                    tupleKey = (subInfo["Priority"], True, autoApprove, True)
                if tupleKey not in siteMap[site]:
                    siteMap[site][tupleKey] = []
                # Subscriptions are sorted by options, defined by tupleKey
                # The tuple key has 3 or 4 entries in this order
                # Priority, Custodial, Auto approve, Move (True) or Replica (False)
                siteMap[site][tupleKey].append(dataset)

            # If we are in safe mode and this is a partially subscribed dataset,
            # then the non-custodial were done in a previous cycle
            if self.safeMode and dataset in partiallySubscribedSet:
                self.markSubscribed.execute(
                    dataset,
                    subscribed=self.terminalSubscriptionState,
                    conn=myThread.transaction.conn,
                    transaction=True)
                continue

            for site in subInfo["NonCustodialSites"]:
                if site not in siteMap:
                    siteMap[site] = {}
                autoApprove = False
                if site in subInfo["AutoApproveSites"]:
                    autoApprove = True
                # Non-custodial is never move, so this tuple has only 3 entries
                # TODO: Change tuples to frozensets for clarity
                tupleKey = (subInfo["Priority"], False, autoApprove)
                if tupleKey not in siteMap[site]:
                    siteMap[site][tupleKey] = []
                siteMap[site][tupleKey].append(dataset)

            self.markSubscribed.execute(dataset,
                                        subscribed=1,
                                        conn=myThread.transaction.conn,
                                        transaction=True)

        # Actually request the subscriptions
        for site in siteMap:
            # Check that the site is valid
            if site not in self.cmsToPhedexMap:
                msg = "Site %s doesn't appear to be valid to PhEDEx" % site
                logging.error(msg)
                self.sendAlert(7, msg=msg)
                continue
            for subscriptionFlavor in siteMap[site]:
                datasets = siteMap[site][subscriptionFlavor]
                # Check that the site is valid
                isMSS = False
                if "MSS" in self.cmsToPhedexMap[site]:
                    isMSS = True
                    phedexNode = self.cmsToPhedexMap[site]["MSS"]
                else:
                    phedexNode = self.cmsToPhedexMap[site]["Disk"]
                logging.info("Subscribing %s to %s" % (datasets, site))
                options = {
                    "custodial": "n",
                    "requestOnly": "y",
                    "priority": subscriptionFlavor[0].lower(),
                    "move": "n"
                }
                if subscriptionFlavor[1] and isMSS:
                    # Custodial subscriptions are only allowed in MSS nodes
                    # If custodial is requested on Non-MSS it fallsback to a non-custodial subscription
                    options["custodial"] = "y"
                    if subscriptionFlavor[3] and not self.replicaOnly:
                        options["move"] = "y"
                if subscriptionFlavor[2]:
                    options["requestOnly"] = "n"
                logging.info(
                    "Request options: Custodial - %s, Move - %s, Request Only - %s"
                    % (options["custodial"].upper(), options["move"].upper(),
                       options["requestOnly"].upper()))
                newSubscription = PhEDExSubscription(datasets, phedexNode,
                                                     self.group, **options)

                xmlData = XMLDrop.makePhEDExXMLForDatasets(
                    self.dbsUrl, newSubscription.getDatasetPaths())
                logging.debug(str(xmlData))
                self.phedex.subscribe(newSubscription, xmlData)

        myThread.transaction.commit()
        return

Exemple #13

0

Afficher le fichier

Fichier : AccountantWorker.py Projet : AndresTanasijczuk/WMCore

class AccountantWorker(WMConnectionBase):
    """
    Class that actually does the work of parsing FWJRs for the Accountant
    Run through ProcessPool
    """
    def __init__(self, config):
        """
        __init__

        Create all DAO objects that are used by this class.
        """
        WMConnectionBase.__init__(self, "WMCore.WMBS")
        myThread = threading.currentThread()
        self.dbsDaoFactory = DAOFactory(package = "WMComponent.DBS3Buffer",
                                        logger = myThread.logger,
                                        dbinterface = myThread.dbi)

        self.getOutputMapAction      = self.daofactory(classname = "Jobs.GetOutputMap")
        self.bulkAddToFilesetAction  = self.daofactory(classname = "Fileset.BulkAddByLFN")
        self.bulkParentageAction     = self.daofactory(classname = "Files.AddBulkParentage")
        self.getJobTypeAction        = self.daofactory(classname = "Jobs.GetType")
        self.getParentInfoAction     = self.daofactory(classname = "Files.GetParentInfo")
        self.setParentageByJob       = self.daofactory(classname = "Files.SetParentageByJob")
        self.setParentageByMergeJob  = self.daofactory(classname = "Files.SetParentageByMergeJob")
        self.setFileRunLumi          = self.daofactory(classname = "Files.AddRunLumi")
        self.setFileLocation         = self.daofactory(classname = "Files.SetLocationByLFN")
        self.setFileAddChecksum      = self.daofactory(classname = "Files.AddChecksumByLFN")
        self.addFileAction           = self.daofactory(classname = "Files.Add")
        self.jobCompleteInput        = self.daofactory(classname = "Jobs.CompleteInput")
        self.setBulkOutcome          = self.daofactory(classname = "Jobs.SetOutcomeBulk")
        self.getWorkflowSpec         = self.daofactory(classname = "Workflow.GetSpecAndNameFromTask")
        self.getJobInfoByID          = self.daofactory(classname = "Jobs.LoadFromID")
        self.getFullJobInfo          = self.daofactory(classname = "Jobs.LoadForErrorHandler")
        self.getJobTaskNameAction    = self.daofactory(classname = "Jobs.GetFWJRTaskName")

        self.dbsStatusAction       = self.dbsDaoFactory(classname = "DBSBufferFiles.SetStatus")
        self.dbsParentStatusAction = self.dbsDaoFactory(classname = "DBSBufferFiles.GetParentStatus")
        self.dbsChildrenAction     = self.dbsDaoFactory(classname = "DBSBufferFiles.GetChildren")
        self.dbsCreateFiles        = self.dbsDaoFactory(classname = "DBSBufferFiles.Add")
        self.dbsSetLocation        = self.dbsDaoFactory(classname = "DBSBufferFiles.SetLocationByLFN")
        self.dbsInsertLocation     = self.dbsDaoFactory(classname = "DBSBufferFiles.AddLocation")
        self.dbsSetChecksum        = self.dbsDaoFactory(classname = "DBSBufferFiles.AddChecksumByLFN")
        self.dbsSetRunLumi         = self.dbsDaoFactory(classname = "DBSBufferFiles.AddRunLumi")
        self.dbsGetWorkflow        = self.dbsDaoFactory(classname = "ListWorkflow")

        self.dbsLFNHeritage      = self.dbsDaoFactory(classname = "DBSBufferFiles.BulkHeritageParent")

        self.stateChanger = ChangeState(config)

        # Decide whether or not to attach jobReport to returned value
        self.returnJobReport = getattr(config.JobAccountant, 'returnReportFromWorker', False)

        # Store location for the specs for DBS
        self.specDir = getattr(config.JobAccountant, 'specDir', None)

        # ACDC service
        self.dataCollection = DataCollectionService(url = config.ACDC.couchurl,
                                                    database = config.ACDC.database)

        jobDBurl = sanitizeURL(config.JobStateMachine.couchurl)['url']
        jobDBName = config.JobStateMachine.couchDBName
        jobCouchdb  = CouchServer(jobDBurl)
        self.fwjrCouchDB = jobCouchdb.connectDatabase("%s/fwjrs" % jobDBName)
        self.localWMStats = WMStatsWriter(config.TaskArchiver.localWMStatsURL, appName="WMStatsAgent")

        # Hold data for later commital
        self.dbsFilesToCreate  = []
        self.wmbsFilesToBuild  = []
        self.wmbsMergeFilesToBuild  = []
        self.fileLocation      = None
        self.mergedOutputFiles = []
        self.listOfJobsToSave  = []
        self.listOfJobsToFail  = []
        self.filesetAssoc      = []
        self.parentageBinds    = []
        self.parentageBindsForMerge    = []
        self.jobsWithSkippedFiles = {}
        self.count = 0
        self.datasetAlgoID     = collections.deque(maxlen = 1000)
        self.datasetAlgoPaths  = collections.deque(maxlen = 1000)
        self.dbsLocations      = set()
        self.workflowIDs       = collections.deque(maxlen = 1000)
        self.workflowPaths     = collections.deque(maxlen = 1000)

        self.phedex = PhEDEx()
        self.locLists = self.phedex.getNodeMap()


        return

    def reset(self):
        """
        _reset_

        Reset all global vars between runs.
        """
        self.dbsFilesToCreate  = []
        self.wmbsFilesToBuild  = []
        self.wmbsMergeFilesToBuild  = []
        self.fileLocation      = None
        self.mergedOutputFiles = []
        self.listOfJobsToSave  = []
        self.listOfJobsToFail  = []
        self.filesetAssoc      = []
        self.parentageBinds    = []
        self.parentageBindsForMerge = []
        self.jobsWithSkippedFiles = {}
        gc.collect()
        return

    def loadJobReport(self, parameters):
        """
        _loadJobReport_

        Given a framework job report on disk, load it and return a
        FwkJobReport instance.  If there is any problem loading or parsing the
        framework job report return None.
        """
        # The jobReportPath may be prefixed with "file://" which needs to be
        # removed so it doesn't confuse the FwkJobReport() parser.
        jobReportPath = parameters.get("fwjr_path", None)
        if not jobReportPath:
            logging.error("Bad FwkJobReport Path: %s" % jobReportPath)
            return self.createMissingFWKJR(parameters, 99999, "FWJR path is empty")

        jobReportPath = jobReportPath.replace("file://","")
        if not os.path.exists(jobReportPath):
            logging.error("Bad FwkJobReport Path: %s" % jobReportPath)
            return self.createMissingFWKJR(parameters, 99999, 'Cannot find file in jobReport path: %s' % jobReportPath)

        if os.path.getsize(jobReportPath) == 0:
            logging.error("Empty FwkJobReport: %s" % jobReportPath)
            return self.createMissingFWKJR(parameters, 99998, 'jobReport of size 0: %s ' % jobReportPath)

        jobReport = Report()

        try:
            jobReport.load(jobReportPath)
        except Exception as ex:
            msg =  "Error loading jobReport %s\n" % jobReportPath
            msg += str(ex)
            logging.error(msg)
            logging.debug("Failing job: %s\n" % parameters)
            return self.createMissingFWKJR(parameters, 99997, 'Cannot load jobReport')

        if len(jobReport.listSteps()) == 0:
            logging.error("FwkJobReport with no steps: %s" % jobReportPath)
            return self.createMissingFWKJR(parameters, 99997, 'jobReport with no steps: %s ' % jobReportPath)

        return jobReport

    def isTaskExistInFWJR(self, jobReport, jobStatus):
        """
        If taskName is not available in the FWJR, then tries to
        recover it getting data from the SQL database.
        """
        if not jobReport.getTaskName():
            logging.warning("Trying to recover a corrupted FWJR for a %s job with job id %s" % (jobStatus,
                                                                                                jobReport.getJobID()))
            jobInfo = self.getJobTaskNameAction.execute(jobId = jobReport.getJobID(),
                                                        conn = self.getDBConn(),
                                                        transaction = self.existingTransaction())

            jobReport.setTaskName(jobInfo['taskName'])
            jobReport.save(jobInfo['fwjr_path'])
            if not jobReport.getTaskName():
                msg = "Report to developers. Failed to recover corrupted fwjr for %s job id %s" % (jobStatus, 
                                                                                                   jobReport.getJobID())
                raise AccountantWorkerException(msg)
            else:
                logging.info("TaskName '%s' successfully recovered and added to fwjr id %s." % (jobReport.getTaskName(),
                                                                                                jobReport.getJobID()))

        return

    def __call__(self, parameters):
        """
        __call__

        Handle a completed job.  The parameters dictionary will contain the job
        ID and the path to the framework job report.
        """
        returnList = []
        self.reset()

        for job in parameters:
            logging.info("Handling %s" % job["fwjr_path"])

            # Load the job and set the ID
            fwkJobReport = self.loadJobReport(job)
            fwkJobReport.setJobID(job['id'])
            
            jobSuccess = self.handleJob(jobID = job["id"],
                                        fwkJobReport = fwkJobReport)

            if self.returnJobReport:
                returnList.append({'id': job["id"], 'jobSuccess': jobSuccess,
                                   'jobReport': fwkJobReport})
            else:
                returnList.append({'id': job["id"], 'jobSuccess': jobSuccess})

            self.count += 1

        self.beginTransaction()

        # Now things done at the end of the job
        # Do what we can with WMBS files
        self.handleWMBSFiles(self.wmbsFilesToBuild, self.parentageBinds)
        
        # handle merge files separately since parentage need to set 
        # separately to support robust merge
        self.handleWMBSFiles(self.wmbsMergeFilesToBuild, self.parentageBindsForMerge)
        
        # Create DBSBufferFiles
        self.createFilesInDBSBuffer()

        # Handle filesetAssoc
        if len(self.filesetAssoc) > 0:
            self.bulkAddToFilesetAction.execute(binds = self.filesetAssoc,
                                                conn = self.getDBConn(),
                                                transaction = self.existingTransaction())

        # Move successful jobs to successful
        if len(self.listOfJobsToSave) > 0:
            idList = [x['id'] for x in self.listOfJobsToSave]
            outcomeBinds = [{'jobid': x['id'], 'outcome': x['outcome']} for x in self.listOfJobsToSave]
            self.setBulkOutcome.execute(binds = outcomeBinds,
                                    conn = self.getDBConn(),
                                    transaction = self.existingTransaction())

            self.jobCompleteInput.execute(id = idList,
                                          lfnsToSkip = self.jobsWithSkippedFiles,
                                          conn = self.getDBConn(),
                                          transaction = self.existingTransaction())
            self.stateChanger.propagate(self.listOfJobsToSave, "success", "complete")

        # If we have failed jobs, fail them
        if len(self.listOfJobsToFail) > 0:
            outcomeBinds = [{'jobid': x['id'], 'outcome': x['outcome']} for x in self.listOfJobsToFail]
            self.setBulkOutcome.execute(binds = outcomeBinds,
                                        conn = self.getDBConn(),
                                        transaction = self.existingTransaction())
            self.stateChanger.propagate(self.listOfJobsToFail, "jobfailed", "complete")

        # Arrange WMBS parentage
        if len(self.parentageBinds) > 0:
            self.setParentageByJob.execute(binds = self.parentageBinds,
                                           conn = self.getDBConn(),
                                           transaction = self.existingTransaction())
        if len(self.parentageBindsForMerge) > 0:
            self.setParentageByMergeJob.execute(binds = self.parentageBindsForMerge,
                                           conn = self.getDBConn(),
                                           transaction = self.existingTransaction())

        # Straighten out DBS Parentage
        if len(self.mergedOutputFiles) > 0:
            self.handleDBSBufferParentage()

        if len(self.jobsWithSkippedFiles) > 0:
            self.handleSkippedFiles()

        self.commitTransaction(existingTransaction = False)

        return returnList

    def outputFilesetsForJob(self, outputMap, merged, moduleLabel):
        """
        _outputFilesetsForJob_

        Determine if the file should be placed in any other fileset.  Note that
        this will not return the JobGroup output fileset as all jobs will have
        their output placed there.
        """
        if moduleLabel not in outputMap:
            logging.info("Output module label missing from output map.")
            return []

        outputFilesets = []
        for outputFileset in outputMap[moduleLabel]:
            if merged == False and outputFileset["output_fileset"] != None:
                outputFilesets.append(outputFileset["output_fileset"])
            else:
                if outputFileset["merged_output_fileset"] != None:
                    outputFilesets.append(outputFileset["merged_output_fileset"])

        return outputFilesets

    def addFileToDBS(self, jobReportFile, task):
        """
        _addFileToDBS_

        Add a file that was output from a job to the DBS buffer.
        """
        datasetInfo = jobReportFile["dataset"]

        dbsFile = DBSBufferFile(lfn = jobReportFile["lfn"],
                                size = jobReportFile["size"],
                                events = jobReportFile["events"],
                                checksums = jobReportFile["checksums"],
                                status = "NOTUPLOADED")
        dbsFile.setAlgorithm(appName = datasetInfo["applicationName"],
                             appVer = datasetInfo["applicationVersion"],
                             appFam = jobReportFile["module_label"],
                             psetHash = "GIBBERISH",
                             configContent = jobReportFile.get('configURL'))

        dbsFile.setDatasetPath("/%s/%s/%s" % (datasetInfo["primaryDataset"],
                                              datasetInfo["processedDataset"],
                                              datasetInfo["dataTier"]))
        dbsFile.setValidStatus(validStatus = jobReportFile.get("validStatus", None))
        dbsFile.setProcessingVer(ver = jobReportFile.get('processingVer', None))
        dbsFile.setAcquisitionEra(era = jobReportFile.get('acquisitionEra', None))
        dbsFile.setGlobalTag(globalTag = jobReportFile.get('globalTag', None))
        #TODO need to find where to get the prep id
        dbsFile.setPrepID(prep_id = jobReportFile.get('prep_id', None))
        dbsFile['task'] = task

        for run in jobReportFile["runs"]:
            newRun = Run(runNumber = run.run)
            newRun.extend(run.lumis)
            dbsFile.addRun(newRun)


        dbsFile.setLocation(pnn = list(jobReportFile["locations"])[0], immediateSave = False)
        self.dbsFilesToCreate.append(dbsFile)
        return

    def findDBSParents(self, lfn):
        """
        _findDBSParents_

        Find the parent of the file in DBS
        This is meant to be called recursively
        """
        parentsInfo = self.getParentInfoAction.execute([lfn],
                                                       conn = self.getDBConn(),
                                                       transaction = self.existingTransaction())
        newParents = set()
        for parentInfo in parentsInfo:
            # This will catch straight to merge files that do not have redneck
            # parents.  We will mark the straight to merge file from the job
            # as a child of the merged parent.
            if int(parentInfo["merged"]) == 1:
                newParents.add(parentInfo["lfn"])

            elif parentInfo['gpmerged'] == None:
                continue

            # Handle the files that result from merge jobs that aren't redneck
            # children.  We have to setup parentage and then check on whether or
            # not this file has any redneck children and update their parentage
            # information.
            elif int(parentInfo["gpmerged"]) == 1:
                newParents.add(parentInfo["gplfn"])

            # If that didn't work, we've reached the great-grandparents
            # And we have to work via recursion
            else:
                parentSet = self.findDBSParents(lfn = parentInfo['gplfn'])
                for parent in parentSet:
                    newParents.add(parent)

        return newParents

    def addFileToWMBS(self, jobType, fwjrFile, jobMask, task, jobID = None):
        """
        _addFileToWMBS_

        Add a file that was produced in a job to WMBS.
        """
        fwjrFile["first_event"] = jobMask["FirstEvent"]

        if fwjrFile["first_event"] == None:
            fwjrFile["first_event"] = 0

        if jobType == "Merge" and fwjrFile["module_label"] != "logArchive":
            setattr(fwjrFile["fileRef"], 'merged', True)
            fwjrFile["merged"] = True

        wmbsFile = self.createFileFromDataStructsFile(file = fwjrFile, jobID = jobID)
        
        if jobType == "Merge":
            self.wmbsMergeFilesToBuild.append(wmbsFile)
        else:
            self.wmbsFilesToBuild.append(wmbsFile)
            
        if fwjrFile["merged"]:
            self.addFileToDBS(fwjrFile, task)

        return wmbsFile


    def _mapLocation(self, fwkJobReport):
        for file in fwkJobReport.getAllFileRefs():
            if file and hasattr(file, 'location'):
                file.location = self.phedex.getBestNodeName(file.location, self.locLists)


    def handleJob(self, jobID, fwkJobReport):
        """
        _handleJob_

        Figure out if a job was successful or not, handle it appropriately
        (parse FWJR, update WMBS) and return the success status as a boolean

        """
        jobSuccess = fwkJobReport.taskSuccessful()

        outputMap = self.getOutputMapAction.execute(jobID = jobID,
                                                    conn = self.getDBConn(),
                                                    transaction = self.existingTransaction())

        jobType = self.getJobTypeAction.execute(jobID = jobID,
                                                conn = self.getDBConn(),
                                                transaction = self.existingTransaction())

        if jobSuccess:
            fileList = fwkJobReport.getAllFiles()

            # consistency check comparing outputMap to fileList
            # they should match except for some limited special cases
            outputModules = set([])
            for fwjrFile in fileList:
                outputModules.add(fwjrFile['outputModule'])
            if set(outputMap.keys()) == outputModules:
                pass
            elif jobType == "LogCollect" and len(outputMap.keys()) == 0 and outputModules == set(['LogCollect']):
                pass
            elif jobType == "Merge" and set(outputMap.keys()) == set(['Merged', 'MergedError', 'logArchive']) and outputModules == set(['Merged', 'logArchive']):
                pass
            elif jobType == "Merge" and set(outputMap.keys()) == set(['Merged', 'MergedError', 'logArchive']) and outputModules == set(['MergedError', 'logArchive']):
                pass
            elif jobType == "Express" and set(outputMap.keys()).difference(outputModules) == set(['write_RAW']):
                pass
            else:
                failJob = True
                if jobType in [ "Processing", "Production" ]:
                    cmsRunSteps = 0
                    for step in fwkJobReport.listSteps():
                        if step.startswith("cmsRun"):
                            cmsRunSteps += 1
                    if cmsRunSteps > 1:
                        failJob = False

                if failJob:
                    jobSuccess = False
                    logging.error("Job %d , list of expected outputModules does not match job report, failing job", jobID)
                    logging.debug("Job %d , expected outputModules %s", jobID, sorted(outputMap.keys()))
                    logging.debug("Job %d , fwjr outputModules %s", jobID, sorted(outputModules))
                    fileList = fwkJobReport.getAllFilesFromStep(step = 'logArch1')
                else:
                    logging.debug("Job %d , list of expected outputModules does not match job report, accepted for multi-step CMSSW job", jobID)
        else:
            fileList = fwkJobReport.getAllFilesFromStep(step = 'logArch1')

        if jobSuccess:
            logging.info("Job %d , handle successful job", jobID)
        else:
            logging.error("Job %d , bad jobReport, failing job",  jobID)

        # make sure the task name is present in FWJR (recover from WMBS if needed)
        if len(fileList) > 0:
            if jobSuccess:
                self.isTaskExistInFWJR(fwkJobReport, "success")
            else:
                self.isTaskExistInFWJR(fwkJobReport, "failed")

        # special check for LogCollect jobs
        skipLogCollect = False
        if jobSuccess and jobType == "LogCollect":
            for fwjrFile in fileList:
                try:
                    # this assumes there is only one file for LogCollect jobs, not sure what happend if that changes
                    self.associateLogCollectToParentJobsInWMStats(fwkJobReport, fwjrFile["lfn"], fwkJobReport.getTaskName())
                except Exception as ex:
                    skipLogCollect = True
                    logging.error("Error occurred: associating log collect location, will try again\n %s" % str(ex))
                    break

        # now handle the job (unless the special LogCollect check failed)
        if not skipLogCollect:

            wmbsJob = Job(id = jobID)
            wmbsJob.load()
            outputID = wmbsJob.loadOutputID()
            wmbsJob.getMask()

            wmbsJob["fwjr"] = fwkJobReport

            if jobSuccess:
                wmbsJob["outcome"] = "success"
            else:
                wmbsJob["outcome"] = "failure"

            for fwjrFile in fileList:

                logging.debug("Job %d , register output %s", jobID, fwjrFile["lfn"])

                wmbsFile = self.addFileToWMBS(jobType, fwjrFile, wmbsJob["mask"],
                                              jobID = jobID, task = fwkJobReport.getTaskName())
                merged = fwjrFile['merged']
                moduleLabel = fwjrFile["module_label"]

                if merged:
                    self.mergedOutputFiles.append(wmbsFile)

                self.filesetAssoc.append({"lfn": wmbsFile["lfn"], "fileset": outputID})

                # LogCollect jobs have no output fileset
                if jobType != "LogCollect":
                    outputFilesets = self.outputFilesetsForJob(outputMap, merged, moduleLabel)
                    for outputFileset in outputFilesets:
                        self.filesetAssoc.append({"lfn": wmbsFile["lfn"], "fileset": outputFileset})

            # Check if the job had any skipped files, put them in ACDC containers
            # We assume full file processing (no job masks)
            if jobSuccess:
                skippedFiles = fwkJobReport.getAllSkippedFiles()
                if skippedFiles:
                    self.jobsWithSkippedFiles[jobID] = skippedFiles

            # Only save once job is done, and we're sure we made it through okay
            self._mapLocation(wmbsJob['fwjr'])
            if jobSuccess:
                self.listOfJobsToSave.append(wmbsJob)
            else:
                self.listOfJobsToFail.append(wmbsJob)

        return jobSuccess
    
    def associateLogCollectToParentJobsInWMStats(self, fwkJobReport, logAchiveLFN, task):
        """
        _associateLogCollectToParentJobsInWMStats_

        Associate a logArchive output to its parent job
        """
        inputFileList = fwkJobReport.getAllInputFiles()
        requestName = task.split('/')[1]
        keys = []
        for inputFile in inputFileList:
            keys.append([requestName, inputFile["lfn"]])
        resultRows = self.fwjrCouchDB.loadView("FWJRDump", 'jobsByOutputLFN', 
                                               options = {"stale": "update_after"},
                                               keys = keys)['rows']
        if len(resultRows) > 0:
            #get data from wmbs
            parentWMBSJobIDs = []
            for row in resultRows:
                parentWMBSJobIDs.append({"jobid": row["value"]})
            #update Job doc in wmstats
            results = self.getJobInfoByID.execute(parentWMBSJobIDs)
            parentJobNames = []
            
            if isinstance(results, list):
                for jobInfo in results:
                    parentJobNames.append(jobInfo['name'])
            else:
                parentJobNames.append(results['name'])
            
            self.localWMStats.updateLogArchiveLFN(parentJobNames, logAchiveLFN)
        else:
            #TODO: if the couch db is consistent with DB this should be removed (checking resultRow > 0)
            #It need to be failed and retried.
            logging.error("job report is missing for updating log archive mapping\n Input file list\n %s" % inputFileList)

        return

    def createMissingFWKJR(self, parameters, errorCode = 999,
                           errorDescription = 'Failure of unknown type'):
        """
        _createMissingFWJR_

        Create a missing FWJR if the report can't be found by the code in the
        path location.
        """
        report = Report()
        report.addError("cmsRun1", 84, errorCode, errorDescription)
        report.data.cmsRun1.status = "Failed"
        return report

    def createFilesInDBSBuffer(self):
        """
        _createFilesInDBSBuffer_
        It does the actual job of creating things in DBSBuffer
        WARNING: This assumes all files in a job have the same final location
        """
        if len(self.dbsFilesToCreate) == 0:
            # Whoops, nothing to do!
            return

        dbsFileTuples = []
        dbsFileLoc    = []
        dbsCksumBinds = []
        runLumiBinds  = []
        selfChecksums = None
        jobLocations  = set()

        for dbsFile in self.dbsFilesToCreate:
            # Append a tuple in the format specified by DBSBufferFiles.Add
            # Also run insertDatasetAlgo

            assocID         = None
            datasetAlgoPath = '%s:%s:%s:%s:%s:%s:%s:%s' % (dbsFile['datasetPath'],
                                                           dbsFile["appName"],
                                                           dbsFile["appVer"],
                                                           dbsFile["appFam"],
                                                           dbsFile["psetHash"],
                                                           dbsFile['processingVer'],
                                                           dbsFile['acquisitionEra'],
                                                           dbsFile['globalTag'])
            # First, check if this is in the cache
            if datasetAlgoPath in self.datasetAlgoPaths:
                for da in self.datasetAlgoID:
                    if da['datasetAlgoPath'] == datasetAlgoPath:
                        assocID = da['assocID']
                        break

            if not assocID:
                # Then we have to get it ourselves
                try:
                    assocID = dbsFile.insertDatasetAlgo()
                    self.datasetAlgoPaths.append(datasetAlgoPath)
                    self.datasetAlgoID.append({'datasetAlgoPath': datasetAlgoPath,
                                               'assocID': assocID})
                except WMException:
                    raise
                except Exception as ex:
                    msg =  "Unhandled exception while inserting datasetAlgo: %s\n" % datasetAlgoPath
                    msg += str(ex)
                    logging.error(msg)
                    raise AccountantWorkerException(msg)

            # Associate the workflow to the file using the taskPath and the requestName
            # TODO: debug why it happens and then drop/recover these cases automatically
            taskPath = dbsFile.get('task')
            if not taskPath:
                msg = "Can't do workflow association, report this error to a developer.\n"
                msg += "DbsFile : %s" % str(dbsFile)
                raise AccountantWorkerException(msg)
            workflowName = taskPath.split('/')[1]
            workflowPath = '%s:%s' % (workflowName, taskPath)
            if workflowPath in self.workflowPaths:
                for wf in self.workflowIDs:
                    if wf['workflowPath'] == workflowPath:
                        workflowID = wf['workflowID']
                        break
            else:
                result = self.dbsGetWorkflow.execute(workflowName, taskPath, conn = self.getDBConn(),
                                                         transaction = self.existingTransaction())
                workflowID = result['id']

            self.workflowPaths.append(workflowPath)
            self.workflowIDs.append({'workflowPath': workflowPath, 'workflowID': workflowID})

            lfn           = dbsFile['lfn']
            selfChecksums = dbsFile['checksums']
            jobLocation   = dbsFile.getLocations()[0]
            jobLocations.add(jobLocation)
            dbsFileTuples.append((lfn, dbsFile['size'],
                                  dbsFile['events'], assocID,
                                  dbsFile['status'], workflowID))

            dbsFileLoc.append({'lfn': lfn, 'sename' : jobLocation})
            if dbsFile['runs']:
                runLumiBinds.append({'lfn': lfn, 'runs': dbsFile['runs']})

            if selfChecksums:
                # If we have checksums we have to create a bind
                # For each different checksum
                for entry in selfChecksums.keys():
                    dbsCksumBinds.append({'lfn': lfn, 'cksum' : selfChecksums[entry],
                                          'cktype' : entry})

        try:
            
            diffLocation = jobLocations.difference(self.dbsLocations)

            for jobLocation in diffLocation:
                self.dbsInsertLocation.execute(siteName = jobLocation,
                                               conn = self.getDBConn(),
                                               transaction = self.existingTransaction())
                self.dbsLocations.add(jobLocation)

            self.dbsCreateFiles.execute(files = dbsFileTuples,
                                        conn = self.getDBConn(),
                                        transaction = self.existingTransaction())

            self.dbsSetLocation.execute(binds = dbsFileLoc,
                                        conn = self.getDBConn(),
                                        transaction = self.existingTransaction())

            self.dbsSetChecksum.execute(bulkList = dbsCksumBinds,
                                        conn = self.getDBConn(),
                                        transaction = self.existingTransaction())

            if len(runLumiBinds) > 0:
                self.dbsSetRunLumi.execute(file = runLumiBinds,
                                           conn = self.getDBConn(),
                                           transaction = self.existingTransaction())
        except WMException:
            raise
        except Exception as ex:
            msg =  "Got exception while inserting files into DBSBuffer!\n"
            msg += str(ex)
            logging.error(msg)
            logging.debug("Listing binds:")
            logging.debug("jobLocation: %s\n" % jobLocation)
            logging.debug("dbsFiles: %s\n" % dbsFileTuples)
            logging.debug("dbsFileLoc: %s\n" %dbsFileLoc)
            logging.debug("Checksum binds: %s\n" % dbsCksumBinds)
            logging.debug("RunLumi binds: %s\n" % runLumiBinds)
            raise AccountantWorkerException(msg)


        # Now that we've created those files, clear the list
        self.dbsFilesToCreate = []
        return


    def handleWMBSFiles(self, wmbsFilesToBuild, parentageBinds):
        """
        _handleWMBSFiles_

        Do what can be done in bulk in bulk
        """
        if len(wmbsFilesToBuild) == 0:
            # Nothing to do
            return

        runLumiBinds   = []
        fileCksumBinds = []
        fileLocations  = []
        fileCreate     = []

        for wmbsFile in wmbsFilesToBuild:
            lfn           = wmbsFile['lfn']
            if lfn == None:
                continue

            selfChecksums = wmbsFile['checksums']
            # by jobType add to different parentage relation
            # if it is the merge job, don't include the parentage on failed input files.
            # otherwise parentage is set for all input files.
            parentageBinds.append({'child': lfn, 'jobid': wmbsFile['jid']})
                
            if wmbsFile['runs']:
                runLumiBinds.append({'lfn': lfn, 'runs': wmbsFile['runs']})

            if len(wmbsFile.getLocations()) > 0:
                fileLocations.append({'lfn': lfn, 'location': wmbsFile.getLocations()[0]})

            if selfChecksums:
                # If we have checksums we have to create a bind
                # For each different checksum
                for entry in selfChecksums.keys():
                    fileCksumBinds.append({'lfn': lfn, 'cksum' : selfChecksums[entry],
                                           'cktype' : entry})

            fileCreate.append([lfn,
                               wmbsFile['size'],
                               wmbsFile['events'],
                               None,
                               wmbsFile["first_event"],
                               wmbsFile['merged']])

        if len(fileCreate) == 0:
            return

        try:

            self.addFileAction.execute(files = fileCreate,
                                       conn = self.getDBConn(),
                                       transaction = self.existingTransaction())

            if runLumiBinds:
                self.setFileRunLumi.execute(file = runLumiBinds,
                                            conn = self.getDBConn(),
                                            transaction = self.existingTransaction())

            self.setFileAddChecksum.execute(bulkList = fileCksumBinds,
                                            conn = self.getDBConn(),
                                            transaction = self.existingTransaction())

            self.setFileLocation.execute(lfn = fileLocations,
                                         location = self.fileLocation,
                                         conn = self.getDBConn(),
                                         transaction = self.existingTransaction())


        except WMException:
            raise
        except Exception as ex:
            msg =  "Error while adding files to WMBS!\n"
            msg += str(ex)
            logging.error(msg)
            logging.debug("Printing binds: \n")
            logging.debug("FileCreate binds: %s\n" % fileCreate)
            logging.debug("Runlumi binds: %s\n" % runLumiBinds)
            logging.debug("Checksum binds: %s\n" % fileCksumBinds)
            logging.debug("FileLocation binds: %s\n" % fileLocations)
            raise AccountantWorkerException(msg)

        # Clear out finished files
        wmbsFilesToBuild = []
        return

    def createFileFromDataStructsFile(self, file, jobID):
        """
        _createFileFromDataStructsFile_

        This function will create a WMBS File given a DataStructs file
        """
        wmbsFile = File()
        wmbsFile.update(file)

        if isinstance(file["locations"], set):
            pnn = list(file["locations"])[0]
        elif isinstance(file["locations"], list):
            if len(file['locations']) > 1:
                logging.error("Have more then one location for a file in job %i" % (jobID))
                logging.error("Choosing location %s" % (file['locations'][0]))
            pnn = file["locations"][0]
        else:
            pnn = file["locations"]

        wmbsFile["locations"] = set()

        if pnn != None:
            wmbsFile.setLocation(pnn = pnn, immediateSave = False)
        wmbsFile['jid'] = jobID
        
        return wmbsFile

    def handleDBSBufferParentage(self):
        """
        _handleDBSBufferParentage_

        Handle all the DBSBuffer Parentage in bulk if you can
        """
        outputLFNs = [f['lfn'] for f in self.mergedOutputFiles]
        bindList         = []
        for lfn in outputLFNs:
            newParents = self.findDBSParents(lfn = lfn)
            for parentLFN in newParents:
                bindList.append({'child': lfn, 'parent': parentLFN})

        # Now all the parents should exist
        # Commit them to DBSBuffer
        logging.info("About to commit all DBSBuffer Heritage information")
        logging.info(len(bindList))

        if len(bindList) > 0:
            try:
                self.dbsLFNHeritage.execute(binds = bindList,
                                            conn = self.getDBConn(),
                                            transaction = self.existingTransaction())
            except WMException:
                raise
            except Exception as ex:
                msg =  "Error while trying to handle the DBS LFN heritage\n"
                msg += str(ex)
                msg += "BindList: %s" % bindList
                logging.error(msg)
                raise AccountantWorkerException(msg)
        return

    def handleSkippedFiles(self):
        """
        _handleSkippedFiles_

        Handle all the skipped files in bulk,
        the way it handles the skipped files
        imposes an important restriction:
        Skipped files should have been processed by a single job
        in the task and no job mask exists in it.
        This is suitable for jobs using ParentlessMergeBySize/FileBased/MinFileBased
        splitting algorithms.
        Here ACDC records and created and the file are moved
        to wmbs_sub_files_failed from completed.
        """
        jobList = self.getFullJobInfo.execute([{'jobid' : x} for x in self.jobsWithSkippedFiles.keys()],
                                              fileSelection = self.jobsWithSkippedFiles,
                                              conn = self.getDBConn(),
                                              transaction = self.existingTransaction())
        self.dataCollection.failedJobs(jobList, useMask = False)
        return

Exemple #14

0

Afficher le fichier

Fichier : RequestQuery.py Projet : pietverwilligen/WMCore

class RequestQuery:
    def __init__(self, config):
        self.br = Browser()

        self.config = config

        # Initialise connections
        self.phedex = PhEDEx({"endpoint": "https://cmsweb.cern.ch/phedex/datasvc/json/prod/"}, "json")
        self.dbsPhys01 = DbsApi(url=dbs_base_url + "phys01/DBSReader/")
        self.dbsPhys02 = DbsApi(url=dbs_base_url + "phys02/DBSReader/")
        self.dbsPhys03 = DbsApi(url=dbs_base_url + "phys03/DBSReader/")

    def __del__(self):
        self.br.close()

    def getScramArchByCMSSW(self):
        """
        Get from the list of available CMSSW releases
        return a dictionary of ScramArchitecture by CMSSW
        """

        # Set temporary conection to the server and get the response from cmstags
        url = "https://cmssdt.cern.ch/SDT/cgi-bin/ReleasesXML"
        br = Browser()
        br.set_handle_robots(False)
        response = br.open(url)
        soup = BeautifulSoup(response.read())

        # Dictionary form
        # {'CMSSW_X_X_X':[slc5_amd64_gcc472], ... }
        archByCmssw = {}

        # Fill the dictionary
        for arch in soup.find_all("architecture"):
            for cmssw in arch.find_all("project"):
                # CMSSW release
                cmsswLabel = cmssw.get("label").encode("ascii", "ignore")
                if cmsswLabel not in archByCmssw:
                    archByCmssw[cmsswLabel] = []
                # ScramArch related to this CMSSW release
                archName = arch.get("name").encode("ascii", "ignore")
                archByCmssw[cmsswLabel].append(archName)

        return archByCmssw

    def getDatasetOriginSites(self, dbs_url, data):
        """
        Get the origin sites for each block of the dataset.
        Return a list block origin sites.
        """

        sites = []
        local_dbs = dbs_url.split("/")[5]
        if local_dbs == "phys01":
            response = self.dbsPhys01.listBlocks(detail=True, dataset=data)
        elif local_dbs == "phys02":
            response = self.dbsPhys02.listBlocks(detail=True, dataset=data)
        elif local_dbs == "phys03":
            response = self.dbsPhys03.listBlocks(detail=True, dataset=data)

        seList = []
        for block in response:
            if block["origin_site_name"] not in seList:
                seList.append(block["origin_site_name"])

        siteNames = []
        for node in self.nodeMappings["phedex"]["node"]:
            if node["se"] in seList:
                siteNames.append(node["name"])

        return siteNames, seList

    def phEDExNodetocmsName(self, nodeList):
        """
        Convert PhEDEx node name list to cms names list 
        """
        names = []
        for node in nodeList:
            name = node.replace("_MSS", "").replace("_Disk", "").replace("_Buffer", "").replace("_Export", "")
            if name not in names:
                names.append(name)
        return names

    def setGlobalTagFromOrigin(self, dbs_url, input_dataset):
        """
        Get the global tag of the dataset from the source dbs url. If it is not set, then set global tag to 'UNKNOWN'
        """

        globalTag = ""
        local_dbs = dbs_url.split("/")[5]
        if local_dbs == "phys01":
            response = self.dbsPhys01.listOutputConfigs(dataset=input_dataset)
        elif local_dbs == "phys02":
            response = self.dbsPhys02.listOutputConfigs(dataset=input_dataset)
        elif local_dbs == "phys03":
            response = self.dbsPhys03.listOutputConfigs(dataset=input_dataset)

        globalTag = response[0]["global_tag"]
        # GlobalTag cannot be empty
        if globalTag == "":
            globalTag = "UNKNOWN"

        return globalTag

    def isDataAtUrl(self, dbs_url, input_dataset):
        """
        Returns True if the dataset is at the dbs url, if not returns False
        """
        local_dbs = dbs_url.split("/")[5]
        if local_dbs == "phys01":
            response = self.dbsPhys01.listDatasets(dataset=input_dataset)
        elif local_dbs == "phys02":
            response = self.dbsPhys02.listDatasets(dataset=input_dataset)
        elif local_dbs == "phys03":
            response = self.dbsPhys03.listDatasets(dataset=input_dataset)
        # This means that the dataset is not at the url
        if not response:
            return False
        else:
            return True

    def getLabelByValueDict(self, control):
        """
        From control items, create a dictionary by values
        """
        d = {}
        for item in control.items:
            value = item.attrs["value"]
            label = item.attrs["label"]
            d[value] = label

        return d

    def getValueByLabelDict(self, control):
        """
        From control items, create a dictionary by labels
        """
        d = {}
        for item in control.items:
            value = item.attrs["value"]
            label = item.attrs["label"]
            d[label] = value

        return d

    def createRequestJSON(self, ticket, input_dataset, dbs_url, cmssw_release, group_name, version=1):
        """
        Creates a JSON file 'Ticket_#TICKET.json' with the needed
        information for creating a requeston ReqMgr.
        Input:
            - ticket: the ticket #, for instance 110773 on https://ggus.eu/?mode=ticket_info&ticket_id=110773
            - input_dataset
            - dbs_url: only the instance name, For example: "phys01" for 
             https://cmsweb.cern.ch/dbs/prod/phys01/DBSReader
            - cmssw_release
            - group_name: the physics group name
            - version: the dataset version, 1 by default.
        It returns a dictionary that contains the request information.
        """

        scramArchByCMSSW = self.getScramArchByCMSSW()
        self.nodeMappings = self.phedex.getNodeMap()
        task = ticket
        print "Processing ticket: %s" % task

        # splitting input dataset
        input_primary_dataset = input_dataset.split("/")[1].replace(" ", "")
        input_processed_dataset = input_dataset.split("/")[2].replace(" ", "")
        data_tier = input_dataset.split("/")[3].replace(" ", "")

        # Transform input value to a valid DBS url
        # dbs_url = "https://cmsweb.cern.ch/dbs/prod/"+dbs_url+"/DBSReader"
        dbs_url = dbs_base_url + dbs_url + "/DBSReader"
        release_id = cmssw_release

        # check if deprecated release was used
        release = cmssw_release
        # check if release has not ScramArch match
        if release not in scramArchByCMSSW:
            raise Exception("Error on ticket %s due to ScramArch mismatch" % task)
        else:
            scram_arch = scramArchByCMSSW[release][-1]

        # check if dataset is not at dbs url
        try:
            data_at_url = self.isDataAtUrl(dbs_url, input_dataset)
        except:
            raise Exception("Error on ticket %s, dataset %s not available at %s" % (task, input_dataset, dbs_url))

        if not data_at_url:
            raise Exception("Error on ticket %s, dataset %s not available at %s" % (task, input_dataset, dbs_url))

        ## Get Physics Group
        group_squad = "cms-storeresults-" + group_name.replace("-", "_").lower()

        ## Get Dataset Version
        dataset_version = str(version)

        # Set default Adquisition Era for StoreResults
        acquisitionEra = "StoreResults"

        ## Construction of the new dataset name (ProcessingString)
        ## remove leading hypernews or physics group name and StoreResults+Version
        if input_processed_dataset.find(group_name) == 0:
            new_dataset = input_processed_dataset.replace(group_name, "", 1)
        else:
            stripped_dataset = input_processed_dataset.split("-")[1:]
            new_dataset = "_".join(stripped_dataset)

        # Get dataset site info:
        phedex_map, se_names = self.getDatasetOriginSites(dbs_url, input_dataset)
        sites = self.phEDExNodetocmsName(phedex_map)

        infoDict = {}
        # Build store results json
        # First add all the defaults values
        infoDict["RequestType"] = "StoreResults"
        infoDict["UnmergedLFNBase"] = "/store/unmerged"
        infoDict["MergedLFNBase"] = "/store/results/" + group_name.replace("-", "_").lower()
        infoDict["MinMergeSize"] = 1500000000
        infoDict["MaxMergeSize"] = 5000000000
        infoDict["MaxMergeEvents"] = 100000
        infoDict["TimePerEvent"] = 40
        infoDict["SizePerEvent"] = 512.0
        infoDict["Memory"] = 2394
        infoDict["CmsPath"] = "/uscmst1/prod/sw/cms"
        infoDict["Group"] = "DATAOPS"
        infoDict["DbsUrl"] = dbs_url

        # Add all the information pulled from Savannah
        infoDict["AcquisitionEra"] = acquisitionEra
        infoDict["GlobalTag"] = self.setGlobalTagFromOrigin(dbs_url, input_dataset)
        infoDict["DataTier"] = data_tier
        infoDict["InputDataset"] = input_dataset
        infoDict["ProcessingString"] = new_dataset
        infoDict["CMSSWVersion"] = release
        infoDict["ScramArch"] = scram_arch
        infoDict["ProcessingVersion"] = dataset_version
        infoDict["SiteWhitelist"] = list(sites)

        # Create report for Migration2Global
        report = {}

        # Fill json file, if status is done
        self.writeJSONFile(task, infoDict)
        report["json"] = "y"
        report["task"] = int(task)
        report["InputDataset"] = input_dataset
        report["ProcessingString"] = new_dataset
        report["localUrl"] = dbs_url
        report["sites"] = list(sites)
        report["se_names"] = list(se_names)

        return report

    def writeJSONFile(self, task, infoDict):
        """
        This writes a JSON file at ComponentDir
        """
        ##check if file already exists
        filename = self.config["ComponentDir"] + "/Ticket_" + str(task) + ".json"
        if not os.access(filename, os.F_OK):
            jsonfile = open(filename, "w")
            request = {"createRequest": infoDict}  ## CHECK THIS BEFORE FINISHING
            jsonfile.write(json.dumps(request, sort_keys=True, indent=4))
            jsonfile.close

        return

    def removeJSONFile(self, task):
        """
        This removes the JSON file at ComponentDir if it was created
        """
        filename = self.config["ComponentDir"] + "/Ticket_" + str(task) + ".json"

        if os.access(filename, os.F_OK):
            os.remove(filename)
        return

    def printReport(self, report):
        """
        Print out a report
        """
        print "%20s %5s %10s %50s %50s" % ("Ticket", "json", "local DBS", "Sites", "se_names")
        print "%20s %5s %10s %50s %50s" % ("-" * 20, "-" * 5, "-" * 10, "-" * 50, "-" * 50)

        json = report["json"]
        ticket = report["task"]
        # status = report["ticketStatus"]
        localUrl = report["localUrl"].split("/")[5]
        site = ", ".join(report["sites"])
        se_names = ", ".join(report["se_names"])
        print "%20s %5s %10s %50s %50s" % (ticket, json, localUrl, site, se_names)

Exemple #15

0

Afficher le fichier

Fichier : PhEDExInjectorSubscriber_t.py Projet : HassenRiahi/WMCore

    def testNormalModeSubscriptions(self):
        """
        _testNormalModeSubscriptions_

        Tests that we can make custodial/non-custodial subscriptions on
        normal operation mode, this time we don't need WMBS for anything.
        All is subscribed in one go.

        Check that the requests are correct.
        """

        self.stuffDatabase()
        config = self.createConfig()

        phedex = PhEDEx({"endpoint": config.PhEDExInjector.phedexurl}, "json")
        try:
            nodeMappings = phedex.getNodeMap()
        except Exception:
            time.sleep(2)
            try:
                nodeMappings = phedex.getNodeMap()
            except Exception:
                time.sleep(4)
                nodeMappings = phedex.getNodeMap()

        subscriber = PhEDExInjectorSubscriber(config, phedex, nodeMappings)
        subscriber.setup({})
        subscriber.algorithm({})

        phedexInstance = subscriber.phedex
        subscriptions = phedexInstance.subRequests

        # Let's check /BogusPrimary/Run2012Z-PromptReco-v1/RECO
        # According to the spec, this should be custodial at T1_US_FNAL
        # Non-custodial at T1_UK_RAL and T3_CO_Uniandes
        # Autoapproved in all sites
        # Priority is normal
        self.assertTrue(self.testDatasetA in subscriptions, "Dataset A was not subscribed")
        subInfoA = subscriptions[self.testDatasetA]
        self.assertEqual(len(subInfoA), 3, "Dataset A was not subscribed to all sites")
        for subInfo in subInfoA:
            site = subInfo["node"]
            self.assertEqual(subInfo["priority"], "normal", "Wrong priority for subscription")
            if site == "T1_UK_RAL_MSS" or site == "T3_CO_Uniandes":
                self.assertEqual(subInfo["custodial"], "n", "Wrong custodiality for dataset A at %s" % subInfo["node"])
                self.assertEqual(subInfo["request_only"], "n", "Wrong requestOnly for dataset A at %s" % subInfo["node"])
                self.assertEqual(subInfo["move"], "n", "Wrong subscription type for dataset A at %s" % subInfo["node"])
            elif site == "T1_US_FNAL_MSS":
                self.assertEqual(subInfo["custodial"], "y", "Wrong custodiality for dataset A at %s" % subInfo["node"])
                self.assertEqual(subInfo["request_only"], "n", "Wrong requestOnly for dataset A at %s" % subInfo["node"])
                self.assertEqual(subInfo["move"], "y", "Wrong subscription type for dataset A at %s" % subInfo["node"])
            else:
                self.fail("Dataset A was subscribed  to a wrong site %s" % site)

        # Now check /BogusPrimary/CRUZET11-v1/RAW
        # According to the spec, this is not custodial anywhere
        # Non-custodial at T1_UK_RAL and T2_CH_CERN
        # Request only at both sites and with high priority
        self.assertTrue(self.testDatasetB in subscriptions, "Dataset B was not subscribed")
        subInfoB = subscriptions[self.testDatasetB]
        self.assertEqual(len(subInfoB), 2, "Dataset B was not subscribed to all sites")
        for subInfo in subInfoB:
            site = subInfo["node"]
            self.assertEqual(subInfo["priority"], "high", "Wrong priority for subscription")
            if site == "T1_UK_RAL_MSS" or site == "T2_CH_CERN":
                self.assertEqual(subInfo["custodial"], "n", "Wrong custodiality for dataset B at %s" % subInfo["node"])
                self.assertEqual(subInfo["request_only"], "y", "Wrong requestOnly for dataset B at %s" % subInfo["node"])
                self.assertEqual(subInfo["move"], "n", "Wrong subscription type for dataset B at %s" % subInfo["node"])
            else:
                self.fail("Dataset B was subscribed to a wrong site %s" % site)

        myThread = threading.currentThread()
        result = myThread.dbi.processData("SELECT COUNT(*) FROM dbsbuffer_dataset_subscription where subscribed = 1")[0].fetchall()
        self.assertEqual(result[0][0], 5, "Not all datasets were marked as subscribed")
        result = myThread.dbi.processData("SELECT site FROM dbsbuffer_dataset_subscription where subscribed = 0")[0].fetchall()
        self.assertEqual(result[0][0], "T1_IT_CNAF", "A non-valid CMS site was subscribed")

        # Reset and run again and make sure that no duplicate subscriptions are created
        myThread.dbi.processData("UPDATE dbsbuffer_dataset_subscription SET subscribed = 0")
        subscriber.algorithm({})
        self.assertEqual(len(subscriptions[self.testDatasetA]), 3)
        self.assertEqual(len(subscriptions[self.testDatasetB]), 2)

        return

Exemple #16

0

Afficher le fichier

class AccountantWorker(WMConnectionBase):
    """
    Class that actually does the work of parsing FWJRs for the Accountant
    Run through ProcessPool
    """
    def __init__(self, config):
        """
        __init__

        Create all DAO objects that are used by this class.
        """
        WMConnectionBase.__init__(self, "WMCore.WMBS")
        myThread = threading.currentThread()
        self.dbsDaoFactory = DAOFactory(package="WMComponent.DBS3Buffer",
                                        logger=myThread.logger,
                                        dbinterface=myThread.dbi)

        self.getOutputMapAction = self.daofactory(
            classname="Jobs.GetOutputMap")
        self.bulkAddToFilesetAction = self.daofactory(
            classname="Fileset.BulkAddByLFN")
        self.bulkParentageAction = self.daofactory(
            classname="Files.AddBulkParentage")
        self.getJobTypeAction = self.daofactory(classname="Jobs.GetType")
        self.getParentInfoAction = self.daofactory(
            classname="Files.GetParentInfo")
        self.setParentageByJob = self.daofactory(
            classname="Files.SetParentageByJob")
        self.setParentageByMergeJob = self.daofactory(
            classname="Files.SetParentageByMergeJob")
        self.setFileRunLumi = self.daofactory(classname="Files.AddRunLumi")
        self.setFileLocation = self.daofactory(
            classname="Files.SetLocationByLFN")
        self.setFileAddChecksum = self.daofactory(
            classname="Files.AddChecksumByLFN")
        self.addFileAction = self.daofactory(classname="Files.Add")
        self.jobCompleteInput = self.daofactory(classname="Jobs.CompleteInput")
        self.setBulkOutcome = self.daofactory(classname="Jobs.SetOutcomeBulk")
        self.getWorkflowSpec = self.daofactory(
            classname="Workflow.GetSpecAndNameFromTask")
        self.getJobInfoByID = self.daofactory(classname="Jobs.LoadFromID")
        self.getFullJobInfo = self.daofactory(
            classname="Jobs.LoadForErrorHandler")
        self.getJobTaskNameAction = self.daofactory(
            classname="Jobs.GetFWJRTaskName")
        self.pnn_to_psn = self.daofactory(
            classname="Locations.GetPNNtoPSNMapping").execute()

        self.dbsStatusAction = self.dbsDaoFactory(
            classname="DBSBufferFiles.SetStatus")
        self.dbsParentStatusAction = self.dbsDaoFactory(
            classname="DBSBufferFiles.GetParentStatus")
        self.dbsChildrenAction = self.dbsDaoFactory(
            classname="DBSBufferFiles.GetChildren")
        self.dbsCreateFiles = self.dbsDaoFactory(
            classname="DBSBufferFiles.Add")
        self.dbsSetLocation = self.dbsDaoFactory(
            classname="DBSBufferFiles.SetLocationByLFN")
        self.dbsInsertLocation = self.dbsDaoFactory(
            classname="DBSBufferFiles.AddLocation")
        self.dbsSetChecksum = self.dbsDaoFactory(
            classname="DBSBufferFiles.AddChecksumByLFN")
        self.dbsSetRunLumi = self.dbsDaoFactory(
            classname="DBSBufferFiles.AddRunLumi")
        self.dbsGetWorkflow = self.dbsDaoFactory(classname="ListWorkflow")

        self.dbsLFNHeritage = self.dbsDaoFactory(
            classname="DBSBufferFiles.BulkHeritageParent")

        self.stateChanger = ChangeState(config)

        # Decide whether or not to attach jobReport to returned value
        self.returnJobReport = getattr(config.JobAccountant,
                                       'returnReportFromWorker', False)

        # Store location for the specs for DBS
        self.specDir = getattr(config.JobAccountant, 'specDir', None)

        # maximum RAW EDM size for Repack output before data is put into Error dataset and skips PromptReco
        self.maxAllowedRepackOutputSize = getattr(
            config.JobAccountant, 'maxAllowedRepackOutputSize',
            12 * 1024 * 1024 * 1024)

        # ACDC service
        self.dataCollection = DataCollectionService(
            url=config.ACDC.couchurl, database=config.ACDC.database)

        jobDBurl = sanitizeURL(config.JobStateMachine.couchurl)['url']
        jobDBName = config.JobStateMachine.couchDBName
        jobCouchdb = CouchServer(jobDBurl)
        self.fwjrCouchDB = jobCouchdb.connectDatabase("%s/fwjrs" % jobDBName)
        self.localWMStats = WMStatsWriter(config.TaskArchiver.localWMStatsURL,
                                          appName="WMStatsAgent")

        # Hold data for later commital
        self.dbsFilesToCreate = []
        self.wmbsFilesToBuild = []
        self.wmbsMergeFilesToBuild = []
        self.fileLocation = None
        self.mergedOutputFiles = []
        self.listOfJobsToSave = []
        self.listOfJobsToFail = []
        self.filesetAssoc = []
        self.parentageBinds = []
        self.parentageBindsForMerge = []
        self.jobsWithSkippedFiles = {}
        self.count = 0
        self.datasetAlgoID = collections.deque(maxlen=1000)
        self.datasetAlgoPaths = collections.deque(maxlen=1000)
        self.dbsLocations = set()
        self.workflowIDs = collections.deque(maxlen=1000)
        self.workflowPaths = collections.deque(maxlen=1000)

        self.phedex = PhEDEx()
        self.locLists = self.phedex.getNodeMap()

        return

    def reset(self):
        """
        _reset_

        Reset all global vars between runs.
        """
        self.dbsFilesToCreate = []
        self.wmbsFilesToBuild = []
        self.wmbsMergeFilesToBuild = []
        self.fileLocation = None
        self.mergedOutputFiles = []
        self.listOfJobsToSave = []
        self.listOfJobsToFail = []
        self.filesetAssoc = []
        self.parentageBinds = []
        self.parentageBindsForMerge = []
        self.jobsWithSkippedFiles = {}
        gc.collect()
        return

    def loadJobReport(self, parameters):
        """
        _loadJobReport_

        Given a framework job report on disk, load it and return a
        FwkJobReport instance.  If there is any problem loading or parsing the
        framework job report return None.
        """
        # The jobReportPath may be prefixed with "file://" which needs to be
        # removed so it doesn't confuse the FwkJobReport() parser.
        jobReportPath = parameters.get("fwjr_path", None)
        if not jobReportPath:
            logging.error("Bad FwkJobReport Path: %s" % jobReportPath)
            return self.createMissingFWKJR(parameters, 99999,
                                           "FWJR path is empty")

        jobReportPath = jobReportPath.replace("file://", "")
        if not os.path.exists(jobReportPath):
            logging.error("Bad FwkJobReport Path: %s" % jobReportPath)
            return self.createMissingFWKJR(
                parameters, 99999,
                'Cannot find file in jobReport path: %s' % jobReportPath)

        if os.path.getsize(jobReportPath) == 0:
            logging.error("Empty FwkJobReport: %s" % jobReportPath)
            return self.createMissingFWKJR(
                parameters, 99998, 'jobReport of size 0: %s ' % jobReportPath)

        jobReport = Report()

        try:
            jobReport.load(jobReportPath)
        except Exception as ex:
            msg = "Error loading jobReport %s\n" % jobReportPath
            msg += str(ex)
            logging.error(msg)
            logging.debug("Failing job: %s\n" % parameters)
            return self.createMissingFWKJR(parameters, 99997,
                                           'Cannot load jobReport')

        if len(jobReport.listSteps()) == 0:
            logging.error("FwkJobReport with no steps: %s" % jobReportPath)
            return self.createMissingFWKJR(
                parameters, 99997,
                'jobReport with no steps: %s ' % jobReportPath)

        return jobReport

    def isTaskExistInFWJR(self, jobReport, jobStatus):
        """
        If taskName is not available in the FWJR, then tries to
        recover it getting data from the SQL database.
        """
        if not jobReport.getTaskName():
            logging.warning(
                "Trying to recover a corrupted FWJR for a %s job with job id %s"
                % (jobStatus, jobReport.getJobID()))
            jobInfo = self.getJobTaskNameAction.execute(
                jobId=jobReport.getJobID(),
                conn=self.getDBConn(),
                transaction=self.existingTransaction())

            jobReport.setTaskName(jobInfo['taskName'])
            jobReport.save(jobInfo['fwjr_path'])
            if not jobReport.getTaskName():
                msg = "Report to developers. Failed to recover corrupted fwjr for %s job id %s" % (
                    jobStatus, jobReport.getJobID())
                raise AccountantWorkerException(msg)
            else:
                logging.info(
                    "TaskName '%s' successfully recovered and added to fwjr id %s."
                    % (jobReport.getTaskName(), jobReport.getJobID()))

        return

    def __call__(self, parameters):
        """
        __call__

        Handle a completed job.  The parameters dictionary will contain the job
        ID and the path to the framework job report.
        """
        returnList = []
        self.reset()

        for job in parameters:
            logging.info("Handling %s" % job["fwjr_path"])

            # Load the job and set the ID
            fwkJobReport = self.loadJobReport(job)
            fwkJobReport.setJobID(job['id'])

            jobSuccess = self.handleJob(jobID=job["id"],
                                        fwkJobReport=fwkJobReport)

            if self.returnJobReport:
                returnList.append({
                    'id': job["id"],
                    'jobSuccess': jobSuccess,
                    'jobReport': fwkJobReport
                })
            else:
                returnList.append({'id': job["id"], 'jobSuccess': jobSuccess})

            self.count += 1

        self.beginTransaction()

        # Now things done at the end of the job
        # Do what we can with WMBS files
        self.handleWMBSFiles(self.wmbsFilesToBuild, self.parentageBinds)

        # handle merge files separately since parentage need to set
        # separately to support robust merge
        self.handleWMBSFiles(self.wmbsMergeFilesToBuild,
                             self.parentageBindsForMerge)

        # Create DBSBufferFiles
        self.createFilesInDBSBuffer()

        # Handle filesetAssoc
        if len(self.filesetAssoc) > 0:
            self.bulkAddToFilesetAction.execute(
                binds=self.filesetAssoc,
                conn=self.getDBConn(),
                transaction=self.existingTransaction())

        # Move successful jobs to successful
        if len(self.listOfJobsToSave) > 0:
            idList = [x['id'] for x in self.listOfJobsToSave]
            outcomeBinds = [{
                'jobid': x['id'],
                'outcome': x['outcome']
            } for x in self.listOfJobsToSave]
            self.setBulkOutcome.execute(binds=outcomeBinds,
                                        conn=self.getDBConn(),
                                        transaction=self.existingTransaction())

            self.jobCompleteInput.execute(
                id=idList,
                lfnsToSkip=self.jobsWithSkippedFiles,
                conn=self.getDBConn(),
                transaction=self.existingTransaction())
            self.stateChanger.propagate(self.listOfJobsToSave, "success",
                                        "complete")

        # If we have failed jobs, fail them
        if len(self.listOfJobsToFail) > 0:
            outcomeBinds = [{
                'jobid': x['id'],
                'outcome': x['outcome']
            } for x in self.listOfJobsToFail]
            self.setBulkOutcome.execute(binds=outcomeBinds,
                                        conn=self.getDBConn(),
                                        transaction=self.existingTransaction())
            self.stateChanger.propagate(self.listOfJobsToFail, "jobfailed",
                                        "complete")

        # Arrange WMBS parentage
        if len(self.parentageBinds) > 0:
            self.setParentageByJob.execute(
                binds=self.parentageBinds,
                conn=self.getDBConn(),
                transaction=self.existingTransaction())
        if len(self.parentageBindsForMerge) > 0:
            self.setParentageByMergeJob.execute(
                binds=self.parentageBindsForMerge,
                conn=self.getDBConn(),
                transaction=self.existingTransaction())

        # Straighten out DBS Parentage
        if len(self.mergedOutputFiles) > 0:
            self.handleDBSBufferParentage()

        if len(self.jobsWithSkippedFiles) > 0:
            self.handleSkippedFiles()

        self.commitTransaction(existingTransaction=False)

        return returnList

    def outputFilesetsForJob(self, outputMap, merged, moduleLabel):
        """
        _outputFilesetsForJob_

        Determine if the file should be placed in any other fileset.  Note that
        this will not return the JobGroup output fileset as all jobs will have
        their output placed there.
        """
        if moduleLabel not in outputMap:
            logging.info("Output module label missing from output map.")
            return []

        outputFilesets = []
        for outputFileset in outputMap[moduleLabel]:
            if merged == False and outputFileset["output_fileset"] != None:
                outputFilesets.append(outputFileset["output_fileset"])
            else:
                if outputFileset["merged_output_fileset"] != None:
                    outputFilesets.append(
                        outputFileset["merged_output_fileset"])

        return outputFilesets

    def addFileToDBS(self, jobReportFile, task, errorDataset=False):
        """
        _addFileToDBS_

        Add a file that was output from a job to the DBS buffer.
        """
        datasetInfo = jobReportFile["dataset"]

        dbsFile = DBSBufferFile(lfn=jobReportFile["lfn"],
                                size=jobReportFile["size"],
                                events=jobReportFile["events"],
                                checksums=jobReportFile["checksums"],
                                status="NOTUPLOADED")
        dbsFile.setAlgorithm(appName=datasetInfo["applicationName"],
                             appVer=datasetInfo["applicationVersion"],
                             appFam=jobReportFile["module_label"],
                             psetHash="GIBBERISH",
                             configContent=jobReportFile.get('configURL'))

        if errorDataset:
            dbsFile.setDatasetPath(
                "/%s/%s/%s" %
                (datasetInfo["primaryDataset"] + "-Error",
                 datasetInfo["processedDataset"], datasetInfo["dataTier"]))
        else:
            dbsFile.setDatasetPath(
                "/%s/%s/%s" %
                (datasetInfo["primaryDataset"],
                 datasetInfo["processedDataset"], datasetInfo["dataTier"]))

        dbsFile.setValidStatus(
            validStatus=jobReportFile.get("validStatus", None))
        dbsFile.setProcessingVer(ver=jobReportFile.get('processingVer', None))
        dbsFile.setAcquisitionEra(
            era=jobReportFile.get('acquisitionEra', None))
        dbsFile.setGlobalTag(globalTag=jobReportFile.get('globalTag', None))
        #TODO need to find where to get the prep id
        dbsFile.setPrepID(prep_id=jobReportFile.get('prep_id', None))
        dbsFile['task'] = task

        for run in jobReportFile["runs"]:
            newRun = Run(runNumber=run.run)
            newRun.extend(run.lumis)
            dbsFile.addRun(newRun)

        dbsFile.setLocation(pnn=list(jobReportFile["locations"])[0],
                            immediateSave=False)
        self.dbsFilesToCreate.append(dbsFile)
        return

    def findDBSParents(self, lfn):
        """
        _findDBSParents_

        Find the parent of the file in DBS
        This is meant to be called recursively
        """
        parentsInfo = self.getParentInfoAction.execute(
            [lfn],
            conn=self.getDBConn(),
            transaction=self.existingTransaction())
        newParents = set()
        for parentInfo in parentsInfo:
            # This will catch straight to merge files that do not have redneck
            # parents.  We will mark the straight to merge file from the job
            # as a child of the merged parent.
            if int(parentInfo["merged"]) == 1:
                newParents.add(parentInfo["lfn"])

            elif parentInfo['gpmerged'] == None:
                continue

            # Handle the files that result from merge jobs that aren't redneck
            # children.  We have to setup parentage and then check on whether or
            # not this file has any redneck children and update their parentage
            # information.
            elif int(parentInfo["gpmerged"]) == 1:
                newParents.add(parentInfo["gplfn"])

            # If that didn't work, we've reached the great-grandparents
            # And we have to work via recursion
            else:
                parentSet = self.findDBSParents(lfn=parentInfo['gplfn'])
                for parent in parentSet:
                    newParents.add(parent)

        return newParents

    def addFileToWMBS(self, jobType, fwjrFile, jobMask, task, jobID=None):
        """
        _addFileToWMBS_

        Add a file that was produced in a job to WMBS.
        """
        fwjrFile["first_event"] = jobMask["FirstEvent"]

        if fwjrFile["first_event"] == None:
            fwjrFile["first_event"] = 0

        if jobType == "Merge" and fwjrFile["module_label"] != "logArchive":
            setattr(fwjrFile["fileRef"], 'merged', True)
            fwjrFile["merged"] = True

        wmbsFile = self.createFileFromDataStructsFile(file=fwjrFile,
                                                      jobID=jobID)

        if jobType == "Merge":
            self.wmbsMergeFilesToBuild.append(wmbsFile)
        else:
            self.wmbsFilesToBuild.append(wmbsFile)

        if fwjrFile["merged"]:
            self.addFileToDBS(
                fwjrFile, task, jobType == "Repack"
                and fwjrFile["size"] > self.maxAllowedRepackOutputSize)

        return wmbsFile

    def _mapLocation(self, fwkJobReport):
        for file in fwkJobReport.getAllFileRefs():
            if file and hasattr(file, 'location'):
                file.location = self.phedex.getBestNodeName(
                    file.location, self.locLists)

    def handleJob(self, jobID, fwkJobReport):
        """
        _handleJob_

        Figure out if a job was successful or not, handle it appropriately
        (parse FWJR, update WMBS) and return the success status as a boolean

        """
        jobSuccess = fwkJobReport.taskSuccessful()

        outputMap = self.getOutputMapAction.execute(
            jobID=jobID,
            conn=self.getDBConn(),
            transaction=self.existingTransaction())

        jobType = self.getJobTypeAction.execute(
            jobID=jobID,
            conn=self.getDBConn(),
            transaction=self.existingTransaction())

        if jobSuccess:
            fileList = fwkJobReport.getAllFiles()

            # consistency check comparing outputMap to fileList
            # they should match except for some limited special cases
            outputModules = set([])
            for fwjrFile in fileList:
                outputModules.add(fwjrFile['outputModule'])
            if set(outputMap.keys()) == outputModules:
                pass
            elif jobType == "LogCollect" and len(
                    outputMap.keys()) == 0 and outputModules == set(
                        ['LogCollect']):
                pass
            elif jobType == "Merge" and set(outputMap.keys()) == set([
                    'Merged', 'MergedError', 'logArchive'
            ]) and outputModules == set(['Merged', 'logArchive']):
                pass
            elif jobType == "Merge" and set(outputMap.keys()) == set([
                    'Merged', 'MergedError', 'logArchive'
            ]) and outputModules == set(['MergedError', 'logArchive']):
                pass
            elif jobType == "Express" and set(
                    outputMap.keys()).difference(outputModules) == set(
                        ['write_RAW']):
                pass
            else:
                failJob = True
                if jobType in ["Processing", "Production"]:
                    cmsRunSteps = 0
                    for step in fwkJobReport.listSteps():
                        if step.startswith("cmsRun"):
                            cmsRunSteps += 1
                    if cmsRunSteps > 1:
                        failJob = False

                if failJob:
                    jobSuccess = False
                    logging.error(
                        "Job %d , list of expected outputModules does not match job report, failing job",
                        jobID)
                    logging.debug("Job %d , expected outputModules %s", jobID,
                                  sorted(outputMap.keys()))
                    logging.debug("Job %d , fwjr outputModules %s", jobID,
                                  sorted(outputModules))
                    fileList = fwkJobReport.getAllFilesFromStep(
                        step='logArch1')
                else:
                    logging.debug(
                        "Job %d , list of expected outputModules does not match job report, accepted for multi-step CMSSW job",
                        jobID)
        else:
            fileList = fwkJobReport.getAllFilesFromStep(step='logArch1')

        if jobSuccess:
            logging.info("Job %d , handle successful job", jobID)
        else:
            logging.warning("Job %d , bad jobReport, failing job", jobID)

        # make sure the task name is present in FWJR (recover from WMBS if needed)
        if len(fileList) > 0:
            if jobSuccess:
                self.isTaskExistInFWJR(fwkJobReport, "success")
            else:
                self.isTaskExistInFWJR(fwkJobReport, "failed")

        # special check for LogCollect jobs
        skipLogCollect = False
        if jobSuccess and jobType == "LogCollect":
            for fwjrFile in fileList:
                try:
                    # this assumes there is only one file for LogCollect jobs, not sure what happend if that changes
                    self.associateLogCollectToParentJobsInWMStats(
                        fwkJobReport, fwjrFile["lfn"],
                        fwkJobReport.getTaskName())
                except Exception as ex:
                    skipLogCollect = True
                    logging.error(
                        "Error occurred: associating log collect location, will try again\n %s"
                        % str(ex))
                    break

        # now handle the job (unless the special LogCollect check failed)
        if not skipLogCollect:

            wmbsJob = Job(id=jobID)
            wmbsJob.load()
            outputID = wmbsJob.loadOutputID()
            wmbsJob.getMask()

            wmbsJob["fwjr"] = fwkJobReport

            if jobSuccess:
                wmbsJob["outcome"] = "success"
            else:
                wmbsJob["outcome"] = "failure"

            for fwjrFile in fileList:

                logging.debug("Job %d , register output %s", jobID,
                              fwjrFile["lfn"])

                wmbsFile = self.addFileToWMBS(jobType,
                                              fwjrFile,
                                              wmbsJob["mask"],
                                              jobID=jobID,
                                              task=fwkJobReport.getTaskName())
                merged = fwjrFile['merged']
                moduleLabel = fwjrFile["module_label"]

                if merged:
                    self.mergedOutputFiles.append(wmbsFile)

                self.filesetAssoc.append({
                    "lfn": wmbsFile["lfn"],
                    "fileset": outputID
                })

                # LogCollect jobs have no output fileset
                if jobType == "LogCollect":
                    pass
                # Repack jobs that wrote too large merged output skip output filesets
                elif jobType == "Repack" and merged and wmbsFile[
                        "size"] > self.maxAllowedRepackOutputSize:
                    pass
                else:
                    outputFilesets = self.outputFilesetsForJob(
                        outputMap, merged, moduleLabel)
                    for outputFileset in outputFilesets:
                        self.filesetAssoc.append({
                            "lfn": wmbsFile["lfn"],
                            "fileset": outputFileset
                        })

            # Check if the job had any skipped files, put them in ACDC containers
            # We assume full file processing (no job masks)
            if jobSuccess:
                skippedFiles = fwkJobReport.getAllSkippedFiles()
                if skippedFiles and jobType not in ['LogCollect', 'Cleanup']:
                    self.jobsWithSkippedFiles[jobID] = skippedFiles

            # Only save once job is done, and we're sure we made it through okay
            self._mapLocation(wmbsJob['fwjr'])
            if jobSuccess:
                self.listOfJobsToSave.append(wmbsJob)
            else:
                self.listOfJobsToFail.append(wmbsJob)

        return jobSuccess

    def associateLogCollectToParentJobsInWMStats(self, fwkJobReport,
                                                 logAchiveLFN, task):
        """
        _associateLogCollectToParentJobsInWMStats_

        Associate a logArchive output to its parent job
        """
        inputFileList = fwkJobReport.getAllInputFiles()
        requestName = task.split('/')[1]
        keys = []
        for inputFile in inputFileList:
            keys.append([requestName, inputFile["lfn"]])
        resultRows = self.fwjrCouchDB.loadView(
            "FWJRDump",
            'jobsByOutputLFN',
            options={"stale": "update_after"},
            keys=keys)['rows']
        if len(resultRows) > 0:
            #get data from wmbs
            parentWMBSJobIDs = []
            for row in resultRows:
                parentWMBSJobIDs.append({"jobid": row["value"]})
            #update Job doc in wmstats
            results = self.getJobInfoByID.execute(parentWMBSJobIDs)
            parentJobNames = []

            if isinstance(results, list):
                for jobInfo in results:
                    parentJobNames.append(jobInfo['name'])
            else:
                parentJobNames.append(results['name'])

            self.localWMStats.updateLogArchiveLFN(parentJobNames, logAchiveLFN)
        else:
            #TODO: if the couch db is consistent with DB this should be removed (checking resultRow > 0)
            #It need to be failed and retried.
            logging.error(
                "job report is missing for updating log archive mapping\n Input file list\n %s"
                % inputFileList)

        return

    def createMissingFWKJR(self,
                           parameters,
                           errorCode=999,
                           errorDescription='Failure of unknown type'):
        """
        _createMissingFWJR_

        Create a missing FWJR if the report can't be found by the code in the
        path location.
        """
        report = Report()
        report.addError("cmsRun1", 84, errorCode, errorDescription)
        report.data.cmsRun1.status = "Failed"
        return report

    def createFilesInDBSBuffer(self):
        """
        _createFilesInDBSBuffer_
        It does the actual job of creating things in DBSBuffer
        WARNING: This assumes all files in a job have the same final location
        """
        if len(self.dbsFilesToCreate) == 0:
            # Whoops, nothing to do!
            return

        dbsFileTuples = []
        dbsFileLoc = []
        dbsCksumBinds = []
        runLumiBinds = []
        selfChecksums = None
        jobLocations = set()

        for dbsFile in self.dbsFilesToCreate:
            # Append a tuple in the format specified by DBSBufferFiles.Add
            # Also run insertDatasetAlgo

            assocID = None
            datasetAlgoPath = '%s:%s:%s:%s:%s:%s:%s:%s' % (
                dbsFile['datasetPath'], dbsFile["appName"], dbsFile["appVer"],
                dbsFile["appFam"], dbsFile["psetHash"],
                dbsFile['processingVer'], dbsFile['acquisitionEra'],
                dbsFile['globalTag'])
            # First, check if this is in the cache
            if datasetAlgoPath in self.datasetAlgoPaths:
                for da in self.datasetAlgoID:
                    if da['datasetAlgoPath'] == datasetAlgoPath:
                        assocID = da['assocID']
                        break

            if not assocID:
                # Then we have to get it ourselves
                try:
                    assocID = dbsFile.insertDatasetAlgo()
                    self.datasetAlgoPaths.append(datasetAlgoPath)
                    self.datasetAlgoID.append({
                        'datasetAlgoPath': datasetAlgoPath,
                        'assocID': assocID
                    })
                except WMException:
                    raise
                except Exception as ex:
                    msg = "Unhandled exception while inserting datasetAlgo: %s\n" % datasetAlgoPath
                    msg += str(ex)
                    logging.error(msg)
                    raise AccountantWorkerException(msg)

            # Associate the workflow to the file using the taskPath and the requestName
            # TODO: debug why it happens and then drop/recover these cases automatically
            taskPath = dbsFile.get('task')
            if not taskPath:
                msg = "Can't do workflow association, report this error to a developer.\n"
                msg += "DbsFile : %s" % str(dbsFile)
                raise AccountantWorkerException(msg)
            workflowName = taskPath.split('/')[1]
            workflowPath = '%s:%s' % (workflowName, taskPath)
            if workflowPath in self.workflowPaths:
                for wf in self.workflowIDs:
                    if wf['workflowPath'] == workflowPath:
                        workflowID = wf['workflowID']
                        break
            else:
                result = self.dbsGetWorkflow.execute(
                    workflowName,
                    taskPath,
                    conn=self.getDBConn(),
                    transaction=self.existingTransaction())
                workflowID = result['id']

            self.workflowPaths.append(workflowPath)
            self.workflowIDs.append({
                'workflowPath': workflowPath,
                'workflowID': workflowID
            })

            lfn = dbsFile['lfn']
            selfChecksums = dbsFile['checksums']
            jobLocation = dbsFile.getLocations()[0]
            jobLocations.add(jobLocation)
            dbsFileTuples.append((lfn, dbsFile['size'], dbsFile['events'],
                                  assocID, dbsFile['status'], workflowID))

            dbsFileLoc.append({'lfn': lfn, 'pnn': jobLocation})
            if dbsFile['runs']:
                runLumiBinds.append({'lfn': lfn, 'runs': dbsFile['runs']})

            if selfChecksums:
                # If we have checksums we have to create a bind
                # For each different checksum
                for entry in selfChecksums.keys():
                    dbsCksumBinds.append({
                        'lfn': lfn,
                        'cksum': selfChecksums[entry],
                        'cktype': entry
                    })

        try:

            diffLocation = jobLocations.difference(self.dbsLocations)

            for jobLocation in diffLocation:
                self.dbsInsertLocation.execute(
                    siteName=jobLocation,
                    conn=self.getDBConn(),
                    transaction=self.existingTransaction())
                self.dbsLocations.add(jobLocation)

            self.dbsCreateFiles.execute(files=dbsFileTuples,
                                        conn=self.getDBConn(),
                                        transaction=self.existingTransaction())

            self.dbsSetLocation.execute(binds=dbsFileLoc,
                                        conn=self.getDBConn(),
                                        transaction=self.existingTransaction())

            self.dbsSetChecksum.execute(bulkList=dbsCksumBinds,
                                        conn=self.getDBConn(),
                                        transaction=self.existingTransaction())

            if len(runLumiBinds) > 0:
                self.dbsSetRunLumi.execute(
                    file=runLumiBinds,
                    conn=self.getDBConn(),
                    transaction=self.existingTransaction())
        except WMException:
            raise
        except Exception as ex:
            msg = "Got exception while inserting files into DBSBuffer!\n"
            msg += str(ex)
            logging.error(msg)
            logging.debug("Listing binds:")
            logging.debug("jobLocation: %s\n" % jobLocation)
            logging.debug("dbsFiles: %s\n" % dbsFileTuples)
            logging.debug("dbsFileLoc: %s\n" % dbsFileLoc)
            logging.debug("Checksum binds: %s\n" % dbsCksumBinds)
            logging.debug("RunLumi binds: %s\n" % runLumiBinds)
            raise AccountantWorkerException(msg)

        # Now that we've created those files, clear the list
        self.dbsFilesToCreate = []
        return

    def handleWMBSFiles(self, wmbsFilesToBuild, parentageBinds):
        """
        _handleWMBSFiles_

        Do what can be done in bulk in bulk
        """
        if len(wmbsFilesToBuild) == 0:
            # Nothing to do
            return

        runLumiBinds = []
        fileCksumBinds = []
        fileLocations = []
        fileCreate = []

        for wmbsFile in wmbsFilesToBuild:
            lfn = wmbsFile['lfn']
            if lfn == None:
                continue

            selfChecksums = wmbsFile['checksums']
            # by jobType add to different parentage relation
            # if it is the merge job, don't include the parentage on failed input files.
            # otherwise parentage is set for all input files.
            parentageBinds.append({'child': lfn, 'jobid': wmbsFile['jid']})

            if wmbsFile['runs']:
                runLumiBinds.append({'lfn': lfn, 'runs': wmbsFile['runs']})

            if len(wmbsFile.getLocations()) > 0:
                outpnn = wmbsFile.getLocations()[0]
                if self.pnn_to_psn.get(outpnn, None):
                    fileLocations.append({'lfn': lfn, 'location': outpnn})
                else:
                    msg = "PNN doesn't exist in wmbs_location_sename table: %s (investigate)" % outpnn
                    logging.error(msg)
                    raise AccountantWorkerException(msg)

            if selfChecksums:
                # If we have checksums we have to create a bind
                # For each different checksum
                for entry in selfChecksums.keys():
                    fileCksumBinds.append({
                        'lfn': lfn,
                        'cksum': selfChecksums[entry],
                        'cktype': entry
                    })

            fileCreate.append([
                lfn, wmbsFile['size'], wmbsFile['events'], None,
                wmbsFile["first_event"], wmbsFile['merged']
            ])

        if len(fileCreate) == 0:
            return

        try:

            self.addFileAction.execute(files=fileCreate,
                                       conn=self.getDBConn(),
                                       transaction=self.existingTransaction())

            if runLumiBinds:
                self.setFileRunLumi.execute(
                    file=runLumiBinds,
                    conn=self.getDBConn(),
                    transaction=self.existingTransaction())

            self.setFileAddChecksum.execute(
                bulkList=fileCksumBinds,
                conn=self.getDBConn(),
                transaction=self.existingTransaction())

            self.setFileLocation.execute(
                lfn=fileLocations,
                location=self.fileLocation,
                conn=self.getDBConn(),
                transaction=self.existingTransaction())

        except WMException:
            raise
        except Exception as ex:
            msg = "Error while adding files to WMBS!\n"
            msg += str(ex)
            logging.error(msg)
            logging.debug("Printing binds: \n")
            logging.debug("FileCreate binds: %s\n" % fileCreate)
            logging.debug("Runlumi binds: %s\n" % runLumiBinds)
            logging.debug("Checksum binds: %s\n" % fileCksumBinds)
            logging.debug("FileLocation binds: %s\n" % fileLocations)
            raise AccountantWorkerException(msg)

        # Clear out finished files
        wmbsFilesToBuild = []
        return

    def createFileFromDataStructsFile(self, file, jobID):
        """
        _createFileFromDataStructsFile_

        This function will create a WMBS File given a DataStructs file
        """
        wmbsFile = File()
        wmbsFile.update(file)

        if isinstance(file["locations"], set):
            pnn = list(file["locations"])[0]
        elif isinstance(file["locations"], list):
            if len(file['locations']) > 1:
                logging.error(
                    "Have more then one location for a file in job %i" %
                    (jobID))
                logging.error("Choosing location %s" % (file['locations'][0]))
            pnn = file["locations"][0]
        else:
            pnn = file["locations"]

        wmbsFile["locations"] = set()

        if pnn != None:
            wmbsFile.setLocation(pnn=pnn, immediateSave=False)
        wmbsFile['jid'] = jobID

        return wmbsFile

    def handleDBSBufferParentage(self):
        """
        _handleDBSBufferParentage_

        Handle all the DBSBuffer Parentage in bulk if you can
        """
        outputLFNs = [f['lfn'] for f in self.mergedOutputFiles]
        bindList = []
        for lfn in outputLFNs:
            newParents = self.findDBSParents(lfn=lfn)
            for parentLFN in newParents:
                bindList.append({'child': lfn, 'parent': parentLFN})

        # Now all the parents should exist
        # Commit them to DBSBuffer
        logging.info("About to commit all DBSBuffer Heritage information")
        logging.info(len(bindList))

        if len(bindList) > 0:
            try:
                self.dbsLFNHeritage.execute(
                    binds=bindList,
                    conn=self.getDBConn(),
                    transaction=self.existingTransaction())
            except WMException:
                raise
            except Exception as ex:
                msg = "Error while trying to handle the DBS LFN heritage\n"
                msg += str(ex)
                msg += "BindList: %s" % bindList
                logging.error(msg)
                raise AccountantWorkerException(msg)
        return

    def handleSkippedFiles(self):
        """
        _handleSkippedFiles_

        Handle all the skipped files in bulk,
        the way it handles the skipped files
        imposes an important restriction:
        Skipped files should have been processed by a single job
        in the task and no job mask exists in it.
        This is suitable for jobs using ParentlessMergeBySize/FileBased/MinFileBased
        splitting algorithms.
        Here ACDC records and created and the file are moved
        to wmbs_sub_files_failed from completed.
        """
        jobList = self.getFullJobInfo.execute(
            [{
                'jobid': x
            } for x in self.jobsWithSkippedFiles.keys()],
            fileSelection=self.jobsWithSkippedFiles,
            conn=self.getDBConn(),
            transaction=self.existingTransaction())
        self.dataCollection.failedJobs(jobList, useMask=False)
        return

Exemple #17

0

Afficher le fichier

Fichier : PhEDExInjectorPoller.py Projet : prozober/WMCore

class PhEDExInjectorPoller(BaseWorkerThread):
    """
    _PhEDExInjectorPoller_

    Poll the DBSBuffer database and inject files as they are created.
    """
    def __init__(self, config):
        """
        ___init___

        Initialise class members
        """
        BaseWorkerThread.__init__(self)
        self.dbsUrl = config.DBSInterface.globalDBSUrl

        self.pollCounter = 0
        self.subFrequency = None
        if getattr(config.PhEDExInjector, "subscribeDatasets", False):
            pollInterval = config.PhEDExInjector.pollInterval
            subInterval = config.PhEDExInjector.subscribeInterval
            self.subFrequency = max(1, int(round(subInterval / pollInterval)))
            logging.info(
                "SubscribeDataset and deleteBlocks will run every %d polling cycles",
                self.subFrequency)
            # subscribe on first cycle
            self.pollCounter = self.subFrequency - 1

        # retrieving the node mappings is fickle and can fail quite often
        self.phedex = PhEDEx({"endpoint": config.PhEDExInjector.phedexurl},
                             "json")
        try:
            nodeMappings = self.phedex.getNodeMap()
        except:
            time.sleep(2)
            try:
                nodeMappings = self.phedex.getNodeMap()
            except:
                time.sleep(4)
                nodeMappings = self.phedex.getNodeMap()

        # This will be used to map SE names which are stored in the DBSBuffer to
        # PhEDEx node names.  The first key will be the "kind" which consists
        # of one of the following: MSS, Disk, Buffer.  The next key will be the
        # SE name.
        self.seMap = {}
        self.nodeNames = []
        for node in nodeMappings["phedex"]["node"]:
            if node["kind"] not in self.seMap:
                self.seMap[node["kind"]] = {}
            logging.info("Adding mapping %s -> %s", node["se"], node["name"])
            self.seMap[node["kind"]][node["se"]] = node["name"]
            self.nodeNames.append(node["name"])

        self.phedexNodes = {'MSS': [], 'Disk': []}
        for node in nodeMappings["phedex"]["node"]:
            if node["kind"] in ["MSS", "Disk"]:
                self.phedexNodes[node["kind"]].append(node["name"])

        # initialize the alert framework (if available - config.Alert present)
        #    self.sendAlert will be then be available
        self.initAlerts(compName="PhEDExInjector")

        self.blocksToRecover = []

        return

    def setup(self, parameters):
        """
        _setup_

        Create DAO Factory and setup some DAO.
        """
        myThread = threading.currentThread()
        daofactory = DAOFactory(package="WMComponent.PhEDExInjector.Database",
                                logger=self.logger,
                                dbinterface=myThread.dbi)

        self.getUninjected = daofactory(classname="GetUninjectedFiles")
        self.getMigrated = daofactory(classname="GetMigratedBlocks")

        self.findDeletableBlocks = daofactory(classname="GetDeletableBlocks")
        self.markBlocksDeleted = daofactory(classname="MarkBlocksDeleted")
        self.getUnsubscribed = daofactory(classname="GetUnsubscribedDatasets")
        self.markSubscribed = daofactory(classname="MarkDatasetSubscribed")

        daofactory = DAOFactory(package="WMComponent.DBS3Buffer",
                                logger=self.logger,
                                dbinterface=myThread.dbi)
        self.setStatus = daofactory(classname="DBSBufferFiles.SetPhEDExStatus")
        self.setBlockClosed = daofactory(classname="SetBlockClosed")

        return

    def algorithm(self, parameters):
        """
        _algorithm_

        Poll the database for uninjected files and attempt to inject them into
        PhEDEx.
        """
        logging.info("Running PhEDEx injector poller algorithm...")

        self.pollCounter += 1

        if self.blocksToRecover:
            logging.info("""PhEDExInjector Recovery:
                            previous injection call failed,
                            check if files were injected to PhEDEx anyway""")
            self.recoverInjectedFiles()

        self.injectFiles()
        self.closeBlocks()

        if self.pollCounter == self.subFrequency:
            self.pollCounter = 0
            self.deleteBlocks()
            self.subscribeDatasets()

        return

    def createInjectionSpec(self, injectionData):
        """
        _createInjectionSpec_

        Transform the data structure returned from the database into an XML
        string for the PhEDEx Data Service.  The injectionData parameter must be
        a dictionary keyed by dataset path.  Each dataset path will map to a
        list of blocks, each block being a dict.  The block dicts will have
        three keys: name, is-open and files.  The files key will be a list of
        dicts, each of which have the following keys: lfn, size and checksum.
        The following is an example object:

        {"dataset1":
          {"block1": {"is-open": "y", "files":
            [{"lfn": "lfn1", "size": 10, "checksum": {"cksum": "1234"}},
             {"lfn": "lfn2", "size": 20, "checksum": {"cksum": "4321"}}]}}}
        """
        injectionSpec = XMLDrop.XMLInjectionSpec(self.dbsUrl)

        for datasetPath in injectionData:
            datasetSpec = injectionSpec.getDataset(datasetPath)

            for fileBlockName, fileBlock in injectionData[
                    datasetPath].iteritems():
                blockSpec = datasetSpec.getFileblock(fileBlockName,
                                                     fileBlock["is-open"])

                for f in fileBlock["files"]:
                    blockSpec.addFile(f["lfn"], f["checksum"], f["size"])

        return injectionSpec.save()

    def createRecoveryFileFormat(self, unInjectedData):
        """
        _createRecoveryFileFormat_

        Transform the data structure returned from database in to the dict format
        for the PhEDEx Data Service.  The injectionData parameter must be
        a dictionary keyed by dataset path.  
        
        unInjectedData format
        {"dataset1":
          {"block1": {"is-open": "y", "files":
            [{"lfn": "lfn1", "size": 10, "checksum": {"cksum": "1234"}},
             {"lfn": "lfn2", "size": 20, "checksum": {"cksum": "4321"}}]}}}
        
        returns
        [{"block1": set(["lfn1", "lfn2"])}, {"block2": set(["lfn3", "lfn4"])]
        """
        blocks = []
        for datasetPath in unInjectedData:

            for blockName, fileBlock in unInjectedData[datasetPath].items():

                newBlock = {blockName: set()}

                for fileDict in fileBlock["files"]:
                    newBlock[blockName].add(fileDict["lfn"])

                blocks.append(newBlock)

        return blocks

    def injectFiles(self):
        """
        _injectFiles_

        Inject any uninjected files in PhEDEx.
        """
        logging.info("Starting injectFiles method")

        uninjectedFiles = self.getUninjected.execute()

        for siteName in uninjectedFiles.keys():
            # SE names can be stored in DBSBuffer as that is what is returned in
            # the framework job report.  We'll try to map the SE name to a
            # PhEDEx node name here.
            location = None

            if siteName in self.nodeNames:
                location = siteName
            else:
                if "Buffer" in self.seMap and siteName in self.seMap["Buffer"]:
                    location = self.seMap["Buffer"][siteName]
                elif "MSS" in self.seMap and siteName in self.seMap["MSS"]:
                    location = self.seMap["MSS"][siteName]
                elif "Disk" in self.seMap and siteName in self.seMap["Disk"]:
                    location = self.seMap["Disk"][siteName]

            if location == None:
                msg = "Could not map SE %s to PhEDEx node." % siteName
                logging.error(msg)
                self.sendAlert(7, msg=msg)
                continue

            maxDataset = 20
            maxBlocks = 50
            maxFiles = 5000
            numberDatasets = 0
            numberBlocks = 0
            numberFiles = 0
            injectData = {}
            lfnList = []
            for dataset in uninjectedFiles[siteName]:

                numberDatasets += 1
                injectData[dataset] = uninjectedFiles[siteName][dataset]

                for block in injectData[dataset]:
                    numberBlocks += 1
                    numberFiles += len(injectData[dataset][block]['files'])
                    for fileInfo in injectData[dataset][block]['files']:
                        lfnList.append(fileInfo['lfn'])

                if numberDatasets >= maxDataset or numberBlocks >= maxBlocks or numberFiles >= maxFiles:

                    self.injectFilesPhEDExCall(location, injectData, lfnList)
                    numberDatasets = 0
                    numberBlocks = 0
                    numberFiles = 0
                    injectData = {}
                    lfnList = []

            if injectData:
                self.injectFilesPhEDExCall(location, injectData, lfnList)

        return

    def injectFilesPhEDExCall(self, location, injectData, lfnList):
        """
        _injectFilesPhEDExCall_

        actual PhEDEx call for file injection
        """
        xmlData = self.createInjectionSpec(injectData)
        logging.debug("injectFiles XMLData: %s", xmlData)

        try:
            injectRes = self.phedex.injectBlocks(location, xmlData)
        except HTTPException as ex:
            # HTTPException with status 400 assumed to be duplicate injection
            # trigger later block recovery (investgation needed if not the case)
            if ex.status == 400:
                self.blocksToRecover.extend(
                    self.createRecoveryFileFormat(injectData))
            logging.error(
                "PhEDEx file injection failed with HTTPException: %s %s",
                ex.status, ex.result)
        except Exception as ex:
            logging.error("PhEDEx file injection failed with Exception: %s",
                          str(ex))
            logging.debug("Traceback: %s", str(traceback.format_exc()))
        else:
            logging.info("Injection result: %s", injectRes)

            if "error" in injectRes:
                msg = "Error injecting data %s: %s" % (injectData,
                                                       injectRes["error"])
                logging.error(msg)
                self.sendAlert(6, msg=msg)
            else:
                try:
                    self.setStatus.execute(lfnList, 1)
                except:
                    # possible deadlock with DBS3Upload, retry once after 5s
                    logging.warning(
                        "Oracle exception during file status update, possible deadlock due to race condition, retry after 5s sleep"
                    )
                    time.sleep(5)
                    self.setStatus.execute(lfnList, 1)

        return

    def closeBlocks(self):
        """
        _closeBlocks_

        Close any blocks that have been migrated to global DBS
        """
        logging.info("Starting closeBlocks method")

        migratedBlocks = self.getMigrated.execute()

        for siteName in migratedBlocks.keys():
            # SE names can be stored in DBSBuffer as that is what is returned in
            # the framework job report.  We'll try to map the SE name to a
            # PhEDEx node name here.
            location = None

            if siteName in self.nodeNames:
                location = siteName
            else:
                if "Buffer" in self.seMap and siteName in self.seMap["Buffer"]:
                    location = self.seMap["Buffer"][siteName]
                elif "MSS" in self.seMap and siteName in self.seMap["MSS"]:
                    location = self.seMap["MSS"][siteName]
                elif "Disk" in self.seMap and siteName in self.seMap["Disk"]:
                    location = self.seMap["Disk"][siteName]

            if location == None:
                msg = "Could not map SE %s to PhEDEx node." % siteName
                logging.error(msg)
                self.sendAlert(6, msg=msg)
                continue

            xmlData = self.createInjectionSpec(migratedBlocks[siteName])
            logging.debug("closeBlocks XMLData: %s", xmlData)

            try:
                injectRes = self.phedex.injectBlocks(location, xmlData)
            except HTTPException as ex:
                logging.error(
                    "PhEDEx block close failed with HTTPException: %s %s",
                    ex.status, ex.result)
            except Exception as ex:
                logging.error("PhEDEx block close failed with Exception: %s",
                              str(ex))
                logging.debug("Traceback: %s", str(traceback.format_exc()))
            else:
                logging.info("Block closing result: %s", injectRes)

                if "error" not in injectRes:
                    for datasetName in migratedBlocks[siteName]:
                        for blockName in migratedBlocks[siteName][datasetName]:
                            logging.debug("Closing block %s", blockName)
                            self.setBlockClosed.execute(blockName)
                else:
                    msg = "Error injecting data %s: %s" % (
                        migratedBlocks[siteName], injectRes["error"])
                    logging.error(msg)
                    self.sendAlert(6, msg=msg)

        return

    def recoverInjectedFiles(self):
        """
        When PhEDEx inject call timed out, run this function.
        Since there are 3 min reponse time out in cmsweb, some times 
        PhEDEx injection call times out even though the call succeeded
        In that case run the recovery mode
        1. first check whether files which injection status = 0 are in the PhEDEx.
        2. if those file exist set the in_phedex status to 1
        3. set self.blocksToRecover = []

        Run this recovery one block at a time, with too many blocks
        the call to the PhEDEx data service on cmsweb can time out
        """
        # recover one block at a time
        for block in self.blocksToRecover:

            injectedFiles = self.phedex.getInjectedFiles(block)

            if injectedFiles:
                self.setStatus.execute(injectedFiles, 1)

        self.blocksToRecover = []
        return

    def deleteBlocks(self):
        """
        _deleteBlocks_
        Find deletable blocks, then decide if to delete based on:
        Is there an active subscription for dataset or block ?
          If yes => set deleted=2
          If no => next check
        Has transfer to all destinations finished ?
          If yes => request block deletion, approve request, set deleted=1
          If no => do nothing (check again next cycle)
        """
        logging.info("Starting deleteBlocks method")

        blockDict = self.findDeletableBlocks.execute(transaction=False)

        if not blockDict:
            return

        try:
            subscriptions = self.phedex.getSubscriptionMapping(
                *blockDict.keys())
        except:
            logging.error(
                "Couldn't get subscription info from PhEDEx, retry next cycle")
            return

        skippableBlocks = []
        deletableEntries = {}
        for blockName in blockDict:

            location = blockDict[blockName]['location']

            # should never be triggered, better safe than sorry
            if location.endswith('_MSS'):
                logging.debug("Location %s for block %s is MSS, skip deletion",
                              location, blockName)
                skippableBlocks.append(blockName)
                continue

            dataset = blockDict[blockName]['dataset']
            sites = blockDict[blockName]['sites']

            if blockName in subscriptions and location in subscriptions[
                    blockName]:
                logging.debug("Block %s subscribed to %s, skip deletion",
                              blockName, location)
                binds = {'DELETED': 2, 'BLOCKNAME': blockName}
                self.markBlocksDeleted.execute(binds)
            else:
                blockInfo = []
                try:
                    blockInfo = self.phedex.getReplicaInfoForBlocks(
                        block=blockName, complete='y')['phedex']['block']
                except:
                    logging.error(
                        "Couldn't get block info from PhEDEx, retry next cycle"
                    )
                else:
                    for entry in blockInfo:
                        if entry['name'] == blockName:
                            nodes = set([x['node'] for x in entry['replica']])
                            if location not in nodes:
                                logging.debug(
                                    "Block %s not present on %s, mark as deleted",
                                    blockName, location)
                                binds = {'DELETED': 1, 'BLOCKNAME': blockName}
                                self.markBlocksDeleted.execute(binds)
                            elif sites.issubset(nodes):
                                logging.debug(
                                    "Deleting block %s from %s since it is fully transfered",
                                    blockName, location)
                                if location not in deletableEntries:
                                    deletableEntries[location] = {}
                                if dataset not in deletableEntries[location]:
                                    deletableEntries[location][dataset] = set()
                                    deletableEntries[location][dataset].add(
                                        blockName)

        binds = []
        for blockName in skippableBlocks:
            binds.append({'DELETED': 2, 'BLOCKNAME': blockName})
        if binds:
            self.markBlocksDeleted.execute(binds)

        for location in deletableEntries:

            chunkSize = 100
            numberOfBlocks = 0
            blocksToDelete = {}
            for dataset in deletableEntries[location]:

                blocksToDelete[dataset] = deletableEntries[location][dataset]
                numberOfBlocks += len(blocksToDelete[dataset])

                if numberOfBlocks > chunkSize:

                    self.deleteBlocksPhEDExCalls(location, blocksToDelete)
                    numberOfBlocks = 0
                    blocksToDelete = {}

            self.deleteBlocksPhEDExCalls(location, blocksToDelete)

        return

    def deleteBlocksPhEDExCalls(self, location, blocksToDelete):
        """
        _deleteBlocksPhEDExCalls_
        actual PhEDEx calls for block deletion
        """
        deletion = PhEDExDeletion(
            blocksToDelete.keys(),
            location,
            level='block',
            comments="WMAgent blocks auto-delete from %s" % location,
            blocks=blocksToDelete)

        xmlData = XMLDrop.makePhEDExXMLForBlocks(
            self.dbsUrl, deletion.getDatasetsAndBlocks())
        logging.debug("deleteBlocks XMLData: %s", xmlData)

        try:
            response = self.phedex.delete(deletion, xmlData)
            requestId = response['phedex']['request_created'][0]['id']
            # auto-approve deletion request
            self.phedex.updateRequest(requestId, 'approve', location)
        except HTTPException as ex:
            logging.error(
                "PhEDEx block delete/approval failed with HTTPException: %s %s",
                ex.status, ex.result)
        except Exception as ex:
            logging.error(
                "PhEDEx block delete/approval failed with Exception: %s",
                str(ex))
            logging.debug("Traceback: %s", str(traceback.format_exc()))
        else:
            binds = []
            for dataset in blocksToDelete:
                for blockName in blocksToDelete[dataset]:
                    binds.append({'DELETED': 1, 'BLOCKNAME': blockName})
            self.markBlocksDeleted.execute(binds)

        return

    def subscribeDatasets(self):
        """
        _subscribeDatasets_
        Poll the database for datasets and subscribe them.
        """
        logging.info("Starting subscribeDatasets method")

        # Check for completely unsubscribed datasets
        unsubscribedDatasets = self.getUnsubscribed.execute()

        # Keep a list of subscriptions to tick as subscribed in the database
        subscriptionsMade = []

        # Create a list of subscriptions as defined by the PhEDEx data structures
        subs = SubscriptionList()

        # Create the subscription objects and add them to the list
        # The list takes care of the sorting internally
        for subInfo in unsubscribedDatasets:
            site = subInfo['site']

            if site not in self.phedexNodes[
                    'MSS'] and site not in self.phedexNodes['Disk']:
                msg = "Site %s doesn't appear to be valid to PhEDEx, " % site
                msg += "skipping subscription: %s" % subInfo['id']
                logging.error(msg)
                self.sendAlert(7, msg=msg)
                continue

            # Avoid custodial subscriptions to disk nodes
            if site not in self.phedexNodes['MSS']:
                subInfo['custodial'] = 'n'
            # Avoid auto approval in T1 sites
            elif site.startswith("T1"):
                subInfo['request_only'] = 'y'

            phedexSub = PhEDExSubscription(
                subInfo['path'],
                site,
                subInfo['phedex_group'],
                priority=subInfo['priority'],
                move=subInfo['move'],
                custodial=subInfo['custodial'],
                request_only=subInfo['request_only'],
                subscriptionId=subInfo['id'])

            # Check if the subscription is a duplicate
            if phedexSub.matchesExistingSubscription(self.phedex) or \
                phedexSub.matchesExistingTransferRequest(self.phedex):
                subscriptionsMade.append(subInfo['id'])
                continue

            # Add it to the list
            subs.addSubscription(phedexSub)

        # Compact the subscriptions
        subs.compact()

        for subscription in subs.getSubscriptionList():

            xmlData = XMLDrop.makePhEDExXMLForDatasets(
                self.dbsUrl, subscription.getDatasetPaths())
            logging.debug("subscribeDatasets XMLData: %s", xmlData)

            logging.info(
                "Subscribing: %s to %s, with options: Move: %s, Custodial: %s, Request Only: %s",
                subscription.getDatasetPaths(), subscription.getNodes(),
                subscription.move, subscription.custodial,
                subscription.request_only)

            try:
                self.phedex.subscribe(subscription, xmlData)
            except HTTPException as ex:
                logging.error(
                    "PhEDEx dataset subscribe failed with HTTPException: %s %s",
                    ex.status, ex.result)
            except Exception as ex:
                logging.error(
                    "PhEDEx dataset subscribe failed with Exception: %s",
                    str(ex))
                logging.debug("Traceback: %s", str(traceback.format_exc()))
            else:
                subscriptionsMade.extend(subscription.getSubscriptionIds())

        # Register the result in DBSBuffer
        if subscriptionsMade:
            self.markSubscribed.execute(subscriptionsMade)

        return

Exemple #18

0

Afficher le fichier

Fichier : TransferDaemon.py Projet : dmwm/AsyncStageout

class TransferDaemon(BaseDaemon):
    """
    _TransferDaemon_
    Call multiprocessing library to instantiate a TransferWorker for each user.
    """
    def __init__(self, config):
        """
        Initialise class members:
            1. check and create dropbox dir
            2. define oracle and couch (config and file instance) server connection
            3. PhEDEx connection
            4. Setup wmcore factory
        """

        self.doc_acq = ''
        # Need a better way to test this without turning off this next line
        BaseDaemon.__init__(self, config, 'AsyncTransfer')

        self.dropbox_dir = '%s/dropbox/outputs' % self.config.componentDir

        if not os.path.isdir(self.dropbox_dir):
            try:
                os.makedirs(self.dropbox_dir)
            except OSError as e:
                if not e.errno == errno.EEXIST:
                    self.logger.exception('Unknown error in mkdir' % e.errno)
                    raise

        if not os.path.isdir("/tmp/DashboardReport"):
            try:
                os.makedirs("/tmp/DashboardReport")
            except OSError as e:
                if not e.errno == errno.EEXIST:
                    self.logger.exception('Unknown error in mkdir' % e.errno)
                    raise
        try:
            config_server = CouchServer(dburl=self.config.config_couch_instance)
            self.config_db = config_server.connectDatabase(self.config.config_database)
        except:
            self.logger.exception('Failed when contacting local couch')
            raise

        try:    
            self.oracleDB = HTTPRequests(self.config.oracleDB,
                                         self.config.opsProxy,
                                         self.config.opsProxy)
        except:
            self.logger.exception('Failed when contacting Oracle')
            raise
        self.pool = Pool(processes=self.config.pool_size)
        self.factory = WMFactory(self.config.schedAlgoDir,
                                 namespace=self.config.schedAlgoDir)

        self.site_tfc_map = {}
        try:
            self.phedex = PhEDEx(responseType='xml',
                                 dict={'key':self.config.opsProxy,
                                       'cert':self.config.opsProxy})
        except Exception as e:
            self.logger.exception('PhEDEx exception: %s' % e)
            raise
        # TODO: decode xml
        try:
            self.phedex2 = PhEDEx(responseType='json',
                                 dict={'key':self.config.opsProxy,
                                       'cert':self.config.opsProxy})
        except Exception as e:
            self.logger.exception('PhEDEx exception: %s' % e)
            raise

        self.logger.debug(type((self.phedex2.getNodeMap())['phedex']['node']))
        for site in [x['name'] for x in self.phedex2.getNodeMap()['phedex']['node']]:
            if site and str(site) != 'None' and str(site) != 'unknown':
                self.site_tfc_map[site] = self.get_tfc_rules(site)
                self.logger.debug('tfc site: %s %s' % (site, self.get_tfc_rules(site)))


    # Over riding setup() is optional, and not needed here
    def algorithm(self, parameters=None):
        """
        1  Get transfer config from couchdb config instance
        2. Get a list of users with files to transfer from the db instance
                                                    (oracle or couch, by config flag)
        3. For each user get a suitably sized input for submission (call to a list)
        4. Submit to a subprocess
        """

        if self.config.isOracle:
            users = self.oracleSiteUser(self.oracleDB)
        else:
            users = self.active_users(self.db)

            sites = self.active_sites()
            self.logger.info('%s active sites' % len(sites))
            self.logger.debug('Active sites are: %s' % sites)

        self.logger.debug('kicking off pool')
        for u in users:
            for i in range(len(u)):
                if not u[i]:
                   u[i] = '' 
                    
            self.logger.debug('current_running %s' % current_running)
            self.logger.debug('Testing current running: %s %s %s' % (u, current_running, (u not in current_running)))
            if u not in current_running:
                self.logger.debug('processing %s' % u)
                current_running.append(u)
                self.logger.debug('processing %s' % current_running)
                self.pool.apply_async(ftscp, (u, self.site_tfc_map, self.config),
                                      callback=log_result)

    def oracleSiteUser(self, db):
        """
        1. Acquire transfers from DB
        2. Get acquired users and destination sites
        """

        self.logger.info('Retrieving users...')
        fileDoc = dict()
        fileDoc['subresource'] = 'activeUsers'
        fileDoc['grouping'] = 0
        fileDoc['asoworker'] = self.config.asoworker

        result = dict()
        try:
            result = db.get(self.config.oracleFileTrans,
                             data=encodeRequest(fileDoc))
        except Exception as ex:
            self.logger.error("Failed to acquire transfers \
                              from oracleDB: %s" % ex)
            return []
        
        self.logger.debug(oracleOutputMapping(result))
        # TODO: translate result into list((user,group,role),...)
        if len(oracleOutputMapping(result)) != 0:
            self.logger.debug(type( [[x['username'].encode('ascii','ignore'), x['user_group'], x['user_role']] for x in oracleOutputMapping(result)]))
            try:
                docs =  oracleOutputMapping(result)
                users = [[x['username'], x['user_group'], x['user_role']] for x in docs]
                self.logger.info('Users to process: %s' % str(users))
            except:
                self.logger.exception('User data malformed. ')
        else:
            self.logger.info('No new user to acquire')
            return []

        actives = list()
        for user in users:
            fileDoc = dict()
            fileDoc['asoworker'] = self.config.asoworker
            fileDoc['subresource'] = 'acquireTransfers'
            fileDoc['username'] = user[0]

            self.logger.debug("Retrieving transfers from oracleDB for user: %s " % user[0])

            try:
                result = db.post(self.config.oracleFileTrans,
                                 data=encodeRequest(fileDoc))
            except Exception as ex:
                self.logger.error("Failed to acquire transfers \
                                  from oracleDB: %s" %ex)
                continue

            self.doc_acq = str(result)
            for i in range(len(user)):
                if not user[i] or user[i] in ['None', 'NULL']:
                    user[i] = ''
                user[i] = str(user[i])
            actives.append(user)


            self.logger.debug("Transfers retrieved from oracleDB. %s " % users)

        return users

    def active_users(self, db):
        """
        Query a view for users with files to transfer.
        get this from the following view:
              ftscp?group=true&group_level=1
        """
        query = {'group': True, 'group_level': 3}
        try:
            users = db.loadView(self.config.ftscp_design, 'ftscp_all', query)
        except Exception as e:
            self.logger.exception('A problem occured when\
                                  contacting couchDB: %s' % e)
            return []

        if len(users['rows']) <= self.config.pool_size:
            active_users = [x['key'] for x in users['rows']]
        else:
            sorted_users = self.factory.loadObject(self.config.algoName,
                                                   args=[self.config,
                                                         self.logger,
                                                         users['rows'],
                                                         self.config.pool_size],
                                                   getFromCache=False,
                                                   listFlag=True)
            active_users = sorted_users()[:self.config.pool_size]
        self.logger.info('%s active users' % len(active_users))
        self.logger.debug('Active users are: %s' % active_users)
        return active_users

    def  active_sites(self):
        """
        Get a list of all sites involved in transfers.
        """
        query = {'group': True, 'stale': 'ok'}
        try:
            sites = self.db.loadView('AsyncTransfer', 'sites', query)
        except Exception as e:
            self.logger.exception('A problem occured \
                                  when contacting couchDB: %s' % e)
            return []

        def keys_map(inputDict):
            """
            Map function.
            """
            return inputDict['key']

        return map(keys_map, sites['rows'])

    def get_tfc_rules(self, site):
        """
        Get the TFC regexp for a given site.
        """
        tfc_file = None
        try:
            self.phedex.getNodeTFC(site)
        except Exception as e:
            self.logger.exception('PhEDEx exception: %s' % e)
        try:
            tfc_file = self.phedex.cacheFileName('tfc',
                                                 inputdata={'node': site})
        except Exception as e:
            self.logger.exception('PhEDEx cache exception: %s' % e)
        return readTFC(tfc_file)

    def terminate(self, parameters=None):
        """
        Called when thread is being terminated.
        """
        self.pool.close()
        self.pool.join()

Exemple #19

0

Afficher le fichier

Fichier : PhEDExInjectorSubscriber.py Projet : stuartw/WMCore

class PhEDExInjectorSubscriber(BaseWorkerThread):
    """
    _PhEDExInjectorSubscriber_

    Poll the DBSBuffer database and subscribe datasets as they are
    created.
    """
    def __init__(self, config):
        """
        ___init___

        Initialise class members
        """
        BaseWorkerThread.__init__(self)
        self.phedex = PhEDEx({"endpoint": config.PhEDExInjector.phedexurl}, "json")
        self.siteDB = SiteDBJSON()
        self.dbsUrl = config.DBSInterface.globalDBSUrl
        self.group = getattr(config.PhEDExInjector, "group", "DataOps")
        self.safeMode = getattr(config.PhEDExInjector, "safeOperationMode", False)

        # Subscribed state in the DBSBuffer table for datasets
        self.terminalSubscriptionState = 1
        if self.safeMode:
            self.terminalSubscriptionState = 2

        # We will map node names to CMS names, that what the spec will have.
        # If a CMS name is associated to many PhEDEx node then choose the MSS option
        self.cmsToPhedexMap = {}

        # initialize the alert framework (if available - config.Alert present)
        #    self.sendAlert will be then be available    
        self.initAlerts(compName = "PhEDExInjector")


    def setup(self, parameters):
        """
        _setup_

        Create a DAO Factory for the PhEDExInjector.  Also load the SE names to
        PhEDEx node name mappings from the data service.
        """
        myThread = threading.currentThread()
        daofactory = DAOFactory(package = "WMComponent.PhEDExInjector.Database",
                                logger = self.logger,
                                dbinterface = myThread.dbi)

        self.getUnsubscribed = daofactory(classname = "GetUnsubscribedDatasets")
        self.markSubscribed = daofactory(classname = "MarkDatasetSubscribed")
        self.getPartiallySubscribed = daofactory(classname = "GetPartiallySubscribedDatasets")

        nodeMappings = self.phedex.getNodeMap()
        for node in nodeMappings["phedex"]["node"]:

            cmsName = self.siteDB.phEDExNodetocmsName(node["name"])

            if cmsName not in self.cmsToPhedexMap:
                self.cmsToPhedexMap[cmsName] = {}

            logging.info("Loaded PhEDEx node %s for site %s" % (node["name"], cmsName))
            if node["kind"] not in self.cmsToPhedexMap[cmsName]:
                self.cmsToPhedexMap[cmsName][node["kind"]] = node["name"]

        return

    def algorithm(self, parameters):
        """
        _algorithm_

        Poll the database for datasets and subscribe them.
        """
        myThread = threading.currentThread()
        myThread.transaction.begin()

        # Check for completely unsubscribed datasets
        unsubscribedDatasets = self.getUnsubscribed.execute(conn = myThread.transaction.conn,
                                                            transaction = True)

        if self.safeMode:
            partiallySubscribedDatasets = self.getPartiallySubscribed.execute(conn = myThread.transaction.conn,
                                                                              transaction = True)
            unsubscribedDatasets.extend(partiallySubscribedDatasets)
            partiallySubscribedSet = set()
            for entry in partiallySubscribedDatasets:
                partiallySubscribedSet.add(entry["path"])

        # Map the datasets to their specs
        specDatasetMap = {}
        for unsubscribedDataset in unsubscribedDatasets:
            datasetPath = unsubscribedDataset["path"]
            workflow = unsubscribedDataset["workflow"]
            spec = unsubscribedDataset["spec"]

            if datasetPath not in specDatasetMap:
                specDatasetMap[datasetPath] = []
            specDatasetMap[datasetPath].append({"workflow" : workflow, "spec" : spec})

        specCache = {}
        siteMap = {}
        # Distribute the subscriptions by site, type and priority
        # This is to make as few subscriptions as possible
        # Site map values are dictionaries where the keys are tuples (Prio, Custodial, AutoApprove, Move)
        # Where Custodial is boolean, Prio is in ["Low", "Normal", "High"], AutoApprove is boolean and Move is boolean
        for dataset in specDatasetMap:
            # Aggregate all the different subscription configurations
            subInfo = {}
            for entry in specDatasetMap[dataset]:
                if not entry["spec"]:
                    # Can't use this spec, there isn't one
                    continue
                # Load spec if not in the cache
                if entry["spec"] not in specCache:
                    helper = WMWorkloadHelper()
                    try:
                        helper.load(entry["spec"])
                        specCache[entry["spec"]] = helper
                    except Exception:
                        #Couldn't load it , alert and carry on
                        msg = "Couldn't load spec: %s" % entry["spec"]
                        logging.error(msg)
                        self.sendAlert(7, msg = msg)
                        continue
                #If we are running in safe mode, we need to know if the workflow is ready
                # We have the spec, get the info
                helper = specCache[entry["spec"]]
                workflowSubInfo = helper.getSubscriptionInformation()
                datasetSubInfo = workflowSubInfo.get(dataset, None)
                if datasetSubInfo and subInfo:
                    subInfo["CustodialSites"] = extendWithoutDups(subInfo["CustodialSites"], datasetSubInfo["CustodialSites"])
                    subInfo["NonCustodialSites"] = extendWithoutDups(subInfo["NonCustodialSites"], datasetSubInfo["NonCustodialSites"])
                    subInfo["AutoApproveSites"] = extendWithoutDups(subInfo["AutoApproveSites"], datasetSubInfo["AutoApproveSites"])
                    subInfo["Priority"] = solvePrioConflicts(subInfo["Priority"], datasetSubInfo["Priority"])
                elif datasetSubInfo:
                    subInfo = datasetSubInfo

            # We now have aggregated subscription information for this dataset in subInfo
            # Distribute it by site
            if not subInfo:
                #Nothing to do, log and continue
                msg = "No subscriptions configured for dataset %s" % dataset
                logging.warning(msg)
                self.markSubscribed.execute(dataset, subscribed = self.terminalSubscriptionState,
                                            conn = myThread.transaction.conn,
                                            transaction = True)
                continue
            # Make sure that a site is not configured both as non custodial and custodial
            # Non-custodial is believed to be the right choice
            subInfo["CustodialSites"] = list(set(subInfo["CustodialSites"]) - set(subInfo["NonCustodialSites"]))
            for site in subInfo["CustodialSites"]:
                if site not in siteMap:
                    siteMap[site] = {}
                if self.safeMode and dataset not in partiallySubscribedSet:
                    tupleKey = (subInfo["Priority"], True, False, False)
                else:
                    tupleKey = (subInfo["Priority"], True, False, True)
                if tupleKey not in siteMap[site]:
                    siteMap[site][tupleKey] = []
                siteMap[site][tupleKey].append(dataset)

            # If we are in safe mode and this is a partially subscribed dataset,
            # then the non-custodial were done in a previous cycle
            if self.safeMode and dataset in partiallySubscribedSet:
                self.markSubscribed.execute(dataset, subscribed = self.terminalSubscriptionState,
                                            conn = myThread.transaction.conn,
                                            transaction = True)
                continue

            for site in subInfo["NonCustodialSites"]:
                if site not in siteMap:
                    siteMap[site] = {}
                autoApprove = False
                if site in subInfo["AutoApproveSites"]:
                    autoApprove = True
                tupleKey = (subInfo["Priority"], False, autoApprove)
                if tupleKey not in siteMap[site]:
                    siteMap[site][tupleKey] = []
                siteMap[site][tupleKey].append(dataset)

            self.markSubscribed.execute(dataset, subscribed = 1,
                                        conn = myThread.transaction.conn,
                                        transaction = True)

        # Actually request the subscriptions
        for site in siteMap:
            # Check that the site is valid
            if site not in self.cmsToPhedexMap:
                msg = "Site %s doesn't appear to be valid to PhEDEx" % site
                logging.error(msg)
                self.sendAlert(7, msg = msg)
                continue
            for subscriptionFlavor in siteMap[site]:
                datasets = siteMap[site][subscriptionFlavor]
                # Check that the site is valid
                if "MSS" in self.cmsToPhedexMap[site]:
                    phedexNode = self.cmsToPhedexMap[site]["MSS"]
                else:
                    phedexNode = self.cmsToPhedexMap[site]["Disk"]
                logging.info("Subscribing %s to %s" % (datasets, site))
                options = {"custodial" : "n", "requestOnly" : "y",
                           "priority" : subscriptionFlavor[0].lower(),
                           "move" : "n"}
                if subscriptionFlavor[1]:
                    options["custodial"] = "y"
                    if subscriptionFlavor[3]:
                        options["move"] = "y"
                if subscriptionFlavor[2]:
                    options["requestOnly"] = "n"

                newSubscription = PhEDExSubscription(datasets, phedexNode, self.group,
                                                     **options)

                xmlData = XMLDrop.makePhEDExXMLForDatasets(self.dbsUrl,
                                                           newSubscription.getDatasetPaths())
                logging.debug(str(xmlData))
                self.phedex.subscribe(newSubscription, xmlData)

        myThread.transaction.commit()
        return

Exemple #20

0

Afficher le fichier

Fichier : PhEDExInjectorSubscriber.py Projet : pietverwilligen/WMCore

class PhEDExInjectorSubscriber(BaseWorkerThread):
    """
    _PhEDExInjectorSubscriber_

    Poll the DBSBuffer database and subscribe datasets as they are
    created.
    """

    def __init__(self, config):
        """
        ___init___

        Initialise class members
        """
        BaseWorkerThread.__init__(self)
        self.phedex = PhEDEx({"endpoint": config.PhEDExInjector.phedexurl}, "json")
        self.siteDB = SiteDBJSON()
        self.dbsUrl = config.DBSInterface.globalDBSUrl
        self.group = getattr(config.PhEDExInjector, "group", "DataOps")

        # We will map node names to CMS names, that what the spec will have.
        # If a CMS name is associated to many PhEDEx node then choose the MSS option
        self.cmsToPhedexMap = {}
        self.phedexNodes = {"MSS": [], "Disk": []}

        # initialize the alert framework (if available - config.Alert present)
        #    self.sendAlert will be then be available
        self.initAlerts(compName="PhEDExInjector")

    def setup(self, parameters):
        """
        _setup_

        Create a DAO Factory for the PhEDExInjector.  Also load the SE names to
        PhEDEx node name mappings from the data service.
        """
        myThread = threading.currentThread()
        daofactory = DAOFactory(
            package="WMComponent.PhEDExInjector.Database", logger=self.logger, dbinterface=myThread.dbi
        )

        self.getUnsubscribed = daofactory(classname="GetUnsubscribedDatasets")
        self.markSubscribed = daofactory(classname="MarkDatasetSubscribed")

        nodeMappings = self.phedex.getNodeMap()
        for node in nodeMappings["phedex"]["node"]:

            cmsName = self.siteDB.phEDExNodetocmsName(node["name"])

            if cmsName not in self.cmsToPhedexMap:
                self.cmsToPhedexMap[cmsName] = {}

            logging.info("Loaded PhEDEx node %s for site %s" % (node["name"], cmsName))
            if node["kind"] not in self.cmsToPhedexMap[cmsName]:
                self.cmsToPhedexMap[cmsName][node["kind"]] = node["name"]

            if node["kind"] in ["MSS", "Disk"]:
                self.phedexNodes[node["kind"]].append(node["name"])
        return

    def algorithm(self, parameters):
        """
        _algorithm_

        Run the subscription algorithm as configured
        """
        self.subscribeDatasets()
        return

    def subscribeDatasets(self):
        """
        _subscribeDatasets_

        Poll the database for datasets and subscribe them.
        """
        myThread = threading.currentThread()
        myThread.transaction.begin()

        # Check for completely unsubscribed datasets
        unsubscribedDatasets = self.getUnsubscribed.execute(conn=myThread.transaction.conn, transaction=True)

        # Keep a list of subscriptions to tick as subscribed in the database
        subscriptionsMade = []

        # Create a list of subscriptions as defined by the PhEDEx data structures
        subs = SubscriptionList()

        # Create the subscription objects and add them to the list
        # The list takes care of the sorting internally
        for subInfo in unsubscribedDatasets:
            site = subInfo["site"]

            if site not in self.phedexNodes["MSS"] and site not in self.phedexNodes["Disk"]:

                if site not in self.cmsToPhedexMap:
                    msg = "Site %s doesn't appear to be valid to PhEDEx, " % site
                    msg += "skipping subscription: %s" % subInfo["id"]
                    logging.error(msg)
                    self.sendAlert(7, msg=msg)
                    continue

                # Get the phedex node from CMS site
                site = self.cmsToPhedexMap[site].get("MSS") or self.cmsToPhedexMap[site]["Disk"]

            # Avoid custodial subscriptions to disk nodes
            if site not in self.phedexNodes["MSS"]:
                subInfo["custodial"] = "n"
            # Avoid auto approval in T1 sites
            elif site.startswith("T1"):
                subInfo["request_only"] = "y"

            phedexSub = PhEDExSubscription(
                subInfo["path"],
                site,
                self.group,
                priority=subInfo["priority"],
                move=subInfo["move"],
                custodial=subInfo["custodial"],
                request_only=subInfo["request_only"],
                subscriptionId=subInfo["id"],
            )

            # Check if the subscription is a duplicate
            if phedexSub.matchesExistingSubscription(self.phedex) or phedexSub.matchesExistingTransferRequest(
                self.phedex
            ):
                subscriptionsMade.append(subInfo["id"])
                continue

            # Add it to the list
            subs.addSubscription(phedexSub)

        # Compact the subscriptions
        subs.compact()

        for subscription in subs.getSubscriptionList():
            try:
                xmlData = XMLDrop.makePhEDExXMLForDatasets(self.dbsUrl, subscription.getDatasetPaths())
                logging.debug(str(xmlData))
                msg = "Subscribing: %s to %s, with options: " % (
                    subscription.getDatasetPaths(),
                    subscription.getNodes(),
                )
                msg += "Move: %s, Custodial: %s, Request Only: %s" % (
                    subscription.move,
                    subscription.custodial,
                    subscription.request_only,
                )
                logging.info(msg)
                self.phedex.subscribe(subscription, xmlData)
            except Exception as ex:
                logging.error("Something went wrong when communicating with PhEDEx, will try again later.")
                logging.error("Exception: %s" % str(ex))
            else:
                subscriptionsMade.extend(subscription.getSubscriptionIds())

        # Register the result in DBSBuffer
        if subscriptionsMade:
            self.markSubscribed.execute(subscriptionsMade, conn=myThread.transaction.conn, transaction=True)

        myThread.transaction.commit()
        return

Exemple #21

0

Afficher le fichier

Fichier : PhEDExInjectorPoller.py Projet : scarletnorberg/WMCore

class PhEDExInjectorPoller(BaseWorkerThread):
    """
    _PhEDExInjectorPoller_

    Poll the DBSBuffer database and inject files as they are created.
    """
    def __init__(self, config):
        """
        ___init___

        Initialise class members
        """
        BaseWorkerThread.__init__(self)

        self.enabled = getattr(config.PhEDExInjector, "enabled", True)
        self.dbsUrl = config.DBSInterface.globalDBSUrl
        self.phedexGroup = config.PhEDExInjector.phedexGroup

        self.pollCounter = 0
        self.subFrequency = None
        if getattr(config.PhEDExInjector, "subscribeDatasets", False):
            pollInterval = config.PhEDExInjector.pollInterval
            subInterval = config.PhEDExInjector.subscribeInterval
            self.subFrequency = max(1, int(round(subInterval / pollInterval)))
            logging.info(
                "SubscribeDataset and deleteBlocks will run every %d polling cycles",
                self.subFrequency)
            # subscribe on first cycle
            self.pollCounter = self.subFrequency - 1

        # retrieving the node mappings is fickle and can fail quite often
        self.phedex = PhEDEx({"endpoint": config.PhEDExInjector.phedexurl},
                             "json",
                             dbsUrl=self.dbsUrl)
        try:
            nodeMappings = self.phedex.getNodeMap()
        except:
            time.sleep(2)
            try:
                nodeMappings = self.phedex.getNodeMap()
            except:
                time.sleep(4)
                nodeMappings = self.phedex.getNodeMap()

        # This will be used to map SE names which are stored in the DBSBuffer to
        # PhEDEx node names.  The first key will be the "kind" which consists
        # of one of the following: MSS, Disk, Buffer.  The next key will be the
        # SE name.
        self.seMap = {}
        self.nodeNames = []
        for node in nodeMappings["phedex"]["node"]:
            if node["kind"] not in self.seMap:
                self.seMap[node["kind"]] = {}
            logging.info("Adding mapping %s -> %s", node["se"], node["name"])
            self.seMap[node["kind"]][node["se"]] = node["name"]
            self.nodeNames.append(node["name"])

        self.phedexNodes = {'MSS': [], 'Disk': []}
        for node in nodeMappings["phedex"]["node"]:
            if node["kind"] in ["MSS", "Disk"]:
                self.phedexNodes[node["kind"]].append(node["name"])

        self.blocksToRecover = []

        # X-component configuration is BAD! But it will only be here during the
        # Rucio commissioning within WM
        self.listTiersToSkip = config.RucioInjector.listTiersToInject
        logging.info(
            "Component configured to skip data injection for data tiers: %s",
            self.listTiersToSkip)

        return

    def setup(self, parameters):
        """
        _setup_

        Create DAO Factory and setup some DAO.
        """
        myThread = threading.currentThread()
        daofactory = DAOFactory(package="WMComponent.RucioInjector.Database",
                                logger=self.logger,
                                dbinterface=myThread.dbi)

        self.getUninjected = daofactory(classname="GetUninjectedFiles")
        self.getMigrated = daofactory(classname="GetMigratedBlocks")

        self.getUnsubscribedBlocks = daofactory(
            classname="GetUnsubscribedBlocks")
        self.setBlockRules = daofactory(classname="SetBlocksRule")

        self.findDeletableBlocks = daofactory(classname="GetDeletableBlocks")
        self.markBlocksDeleted = daofactory(classname="MarkBlocksDeleted")
        self.getUnsubscribed = daofactory(classname="GetUnsubscribedDatasets")
        self.markSubscribed = daofactory(classname="MarkDatasetSubscribed")

        daofactory = DAOFactory(package="WMComponent.DBS3Buffer",
                                logger=self.logger,
                                dbinterface=myThread.dbi)
        self.setStatus = daofactory(classname="DBSBufferFiles.SetPhEDExStatus")
        self.setBlockClosed = daofactory(classname="SetBlockClosed")

        return

    @timeFunction
    def algorithm(self, parameters):
        """
        _algorithm_

        Poll the database for uninjected files and attempt to inject them into
        PhEDEx.
        """
        if not self.enabled:
            logging.info(
                "PhEDExInjector component is disabled in the configuration, exiting."
            )
            return

        logging.info("Running PhEDEx injector poller algorithm...")
        self.pollCounter += 1

        try:
            if self.blocksToRecover:
                logging.info("""PhEDExInjector Recovery:
                                previous injection call failed,
                                checking if files were injected to PhEDEx anyway"""
                             )
                self.recoverInjectedFiles()

            self.injectFiles()
            self.closeBlocks()

            if self.pollCounter == self.subFrequency:
                self.pollCounter = 0
                self.deleteBlocks()
                self.subscribeDatasets()
                self.subscribeBlocks()
        except HTTPException as ex:
            if hasattr(ex, "status") and ex.status in [502, 503]:
                # then either proxy error or service is unavailable
                msg = "Caught HTTPException in PhEDExInjector. Retrying in the next cycle.\n"
                msg += str(ex)
                logging.error(msg)
            else:
                msg = "Caught unexpected HTTPException in PhEDExInjector.\n%s" % str(
                    ex)
                logging.exception(msg)
                raise
        except Exception as ex:
            msg = "Caught unexpected exception in PhEDExInjector. Details:\n%s" % str(
                ex)
            logging.exception(msg)
            raise PhEDExInjectorException(msg)

        return

    def createInjectionSpec(self, injectionData):
        """
        _createInjectionSpec_

        Transform the data structure returned from the database into an XML
        string for the PhEDEx Data Service.  The injectionData parameter must be
        a dictionary keyed by dataset path.  Each dataset path will map to a
        list of blocks, each block being a dict.  The block dicts will have
        three keys: name, is-open and files.  The files key will be a list of
        dicts, each of which have the following keys: lfn, size and checksum.
        The following is an example object:

        {"dataset1":
          {"block1": {"is-open": "y", "files":
            [{"lfn": "lfn1", "size": 10, "checksum": {"cksum": "1234"}},
             {"lfn": "lfn2", "size": 20, "checksum": {"cksum": "4321"}}]}}}
        """
        injectionSpec = XMLDrop.XMLInjectionSpec(self.dbsUrl)

        for datasetPath in injectionData:
            datasetSpec = injectionSpec.getDataset(datasetPath)

            for fileBlockName, fileBlock in injectionData[
                    datasetPath].iteritems():
                blockSpec = datasetSpec.getFileblock(fileBlockName,
                                                     fileBlock["is-open"])

                for f in fileBlock["files"]:
                    blockSpec.addFile(f["lfn"], f["checksum"], f["size"])

        return injectionSpec.save()

    def createRecoveryFileFormat(self, unInjectedData):
        """
        _createRecoveryFileFormat_

        Transform the data structure returned from database in to the dict format
        for the PhEDEx Data Service.  The injectionData parameter must be
        a dictionary keyed by dataset path.

        unInjectedData format
        {"dataset1":
          {"block1": {"is-open": "y", "files":
            [{"lfn": "lfn1", "size": 10, "checksum": {"cksum": "1234"}},
             {"lfn": "lfn2", "size": 20, "checksum": {"cksum": "4321"}}]}}}

        returns
        [{"block1": set(["lfn1", "lfn2"])}, {"block2": set(["lfn3", "lfn4"])]
        """
        blocks = []
        for datasetPath in unInjectedData:

            for blockName, fileBlock in unInjectedData[datasetPath].items():

                newBlock = {blockName: set()}

                for fileDict in fileBlock["files"]:
                    newBlock[blockName].add(fileDict["lfn"])

                blocks.append(newBlock)

        return blocks

    def injectFiles(self):
        """
        _injectFiles_

        Inject any uninjected files in PhEDEx.
        """
        logging.info("Starting injectFiles method")

        uninjectedFiles = self.getUninjected.execute()

        # filter out datatiers to be processed by RucioInjector
        uninjectedFiles = filterDataByTier(uninjectedFiles,
                                           self.listTiersToSkip)

        for siteName in uninjectedFiles.keys():
            # SE names can be stored in DBSBuffer as that is what is returned in
            # the framework job report.  We'll try to map the SE name to a
            # PhEDEx node name here.
            location = None

            if siteName in self.nodeNames:
                location = siteName
            else:
                if "Buffer" in self.seMap and siteName in self.seMap["Buffer"]:
                    location = self.seMap["Buffer"][siteName]
                elif "MSS" in self.seMap and siteName in self.seMap["MSS"]:
                    location = self.seMap["MSS"][siteName]
                elif "Disk" in self.seMap and siteName in self.seMap["Disk"]:
                    location = self.seMap["Disk"][siteName]

            if location is None:
                msg = "Could not map SE %s to PhEDEx node." % siteName
                logging.error(msg)
                continue

            for dataset in uninjectedFiles[siteName]:
                injectData = {}
                lfnList = []
                injectData[dataset] = uninjectedFiles[siteName][dataset]

                for block in injectData[dataset]:
                    for fileInfo in injectData[dataset][block]['files']:
                        lfnList.append(fileInfo['lfn'])
                    logging.info("About to inject %d files for block %s",
                                 len(injectData[dataset][block]['files']),
                                 block)

                self.injectFilesPhEDExCall(location, injectData, lfnList)

        return

    def injectFilesPhEDExCall(self, location, injectData, lfnList):
        """
        _injectFilesPhEDExCall_

        actual PhEDEx call for file injection
        """
        xmlData = self.createInjectionSpec(injectData)
        logging.debug("injectFiles XMLData: %s", xmlData)

        try:
            injectRes = self.phedex.injectBlocks(location, xmlData)
        except HTTPException as ex:
            # HTTPException with status 400 assumed to be duplicate injection
            # trigger later block recovery (investigation needed if not the case)
            if ex.status == 400:
                self.blocksToRecover.extend(
                    self.createRecoveryFileFormat(injectData))
            logging.error(
                "PhEDEx file injection failed with HTTPException: %s %s",
                ex.status, ex.result)
        except Exception as ex:
            msg = "PhEDEx file injection failed with Exception: %s" % str(ex)
            logging.exception(msg)
        else:
            logging.debug("Injection result: %s", injectRes)

            if "error" in injectRes:
                msg = "Error injecting data %s: %s" % (injectData,
                                                       injectRes["error"])
                logging.error(msg)
            else:
                try:
                    self.setStatus.execute(lfnList, 1)
                except Exception as ex:
                    if 'Deadlock found' in str(
                            ex) or 'deadlock detected' in str(ex):
                        logging.error(
                            "Database deadlock during file status update. Retrying again in the next cycle."
                        )
                        self.blocksToRecover.extend(
                            self.createRecoveryFileFormat(injectData))
                    else:
                        msg = "Failed to update file status in the database, reason: %s" % str(
                            ex)
                        logging.error(msg)
                        raise PhEDExInjectorException(msg)

        return

    def closeBlocks(self):
        """
        _closeBlocks_

        Close any blocks that have been migrated to global DBS
        """
        logging.info("Starting closeBlocks method")

        migratedBlocks = self.getMigrated.execute()

        # filter out datatiers to be processed by RucioInjector
        migratedBlocks = filterDataByTier(migratedBlocks, self.listTiersToSkip)

        for siteName in migratedBlocks:
            # SE names can be stored in DBSBuffer as that is what is returned in
            # the framework job report.  We'll try to map the SE name to a
            # PhEDEx node name here.
            location = None

            if siteName in self.nodeNames:
                location = siteName
            else:
                if "Buffer" in self.seMap and siteName in self.seMap["Buffer"]:
                    location = self.seMap["Buffer"][siteName]
                elif "MSS" in self.seMap and siteName in self.seMap["MSS"]:
                    location = self.seMap["MSS"][siteName]
                elif "Disk" in self.seMap and siteName in self.seMap["Disk"]:
                    location = self.seMap["Disk"][siteName]

            if location is None:
                msg = "Could not map SE %s to PhEDEx node." % siteName
                logging.error(msg)
                continue

            for dset, blocks in migratedBlocks[siteName].items():
                xmlData = self.createInjectionSpec({dset: blocks})
                logging.debug("closeBlocks XMLData: %s", xmlData)

                try:
                    injectRes = self.phedex.injectBlocks(location, xmlData)
                except HTTPException as ex:
                    logging.error(
                        "PhEDEx block close failed with HTTPException: %s %s",
                        ex.status, ex.result)
                except Exception as ex:
                    msg = "PhEDEx block close failed with Exception: %s" % str(
                        ex)
                    logging.exception(msg)
                else:
                    logging.debug("Block closing result: %s", injectRes)

                    if "error" in injectRes:
                        logging.error(
                            "Failed to close blocks due to: %s, for data: %s",
                            injectRes["error"], migratedBlocks[siteName][dset])
                    else:
                        for blockName in blocks:
                            logging.info("Block closed in PhEDEx: %s",
                                         blockName)
                            self.setBlockClosed.execute(blockName)

        return

    def recoverInjectedFiles(self):
        """
        When PhEDEx inject call timed out, run this function.
        Since there are 3 min reponse time out in cmsweb, some times
        PhEDEx injection call times out even though the call succeeded
        In that case run the recovery mode
        1. first check whether files which injection status = 0 are in the PhEDEx.
        2. if those file exist set the in_phedex status to 1
        3. set self.blocksToRecover = []

        Run this recovery one block at a time, with too many blocks
        the call to the PhEDEx data service on cmsweb can time out
        """
        # recover one block at a time
        for block in self.blocksToRecover:

            injectedFiles = self.phedex.getInjectedFiles(block)

            if injectedFiles:
                self.setStatus.execute(injectedFiles, 1)

        self.blocksToRecover = []
        return

    def deleteBlocks(self):
        """
        _deleteBlocks_
        Find deletable blocks, then decide if to delete based on:
        Is there an active subscription for dataset or block ?
          If yes => set deleted=2
          If no => next check
        Has transfer to all destinations finished ?
          If yes => request block deletion, approve request, set deleted=1
          If no => do nothing (check again next cycle)
        """
        logging.info("Starting deleteBlocks method")

        blockDict = self.findDeletableBlocks.execute(transaction=False)

        if not blockDict:
            return

        ### logic to stop doing things to be done by RucioInjector or by DM team
        for block in list(blockDict):
            if not self._isDataTierAllowed(block):
                blockDict.pop(block)

        try:
            subscriptions = self.phedex.getSubscriptionMapping(
                *blockDict.keys())
        except:
            logging.error(
                "Couldn't get subscription info from PhEDEx, retry next cycle")
            return

        skippableBlocks = []
        deletableEntries = {}
        for blockName in blockDict:

            location = blockDict[blockName]['location']

            # should never be triggered, better safe than sorry
            if location.endswith('_MSS'):
                logging.debug("Location %s for block %s is MSS, skip deletion",
                              location, blockName)
                skippableBlocks.append(blockName)
                continue

            dataset = blockDict[blockName]['dataset']
            sites = blockDict[blockName]['sites']

            if blockName in subscriptions and location in subscriptions[
                    blockName]:
                logging.debug("Block %s subscribed to %s, skip deletion",
                              blockName, location)
                binds = {'DELETED': 2, 'BLOCKNAME': blockName}
                self.markBlocksDeleted.execute(binds)
            else:
                blockInfo = []
                try:
                    blockInfo = self.phedex.getReplicaInfoForBlocks(
                        block=blockName, complete='y')['phedex']['block']
                except:
                    logging.error(
                        "Couldn't get block info from PhEDEx, retry next cycle"
                    )
                else:
                    nodes = set()
                    for entry in blockInfo:
                        if entry['name'] == blockName:
                            nodes = set([x['node'] for x in entry['replica']])
                    if location not in nodes:
                        logging.debug(
                            "Block %s not present on %s, mark as deleted",
                            blockName, location)
                        binds = {'DELETED': 1, 'BLOCKNAME': blockName}
                        self.markBlocksDeleted.execute(binds)
                    elif sites.issubset(nodes):
                        logging.debug(
                            "Deleting block %s from %s since it is fully transfered",
                            blockName, location)
                        if location not in deletableEntries:
                            deletableEntries[location] = {}
                        if dataset not in deletableEntries[location]:
                            deletableEntries[location][dataset] = set()
                        deletableEntries[location][dataset].add(blockName)

        binds = []
        for blockName in skippableBlocks:
            binds.append({'DELETED': 2, 'BLOCKNAME': blockName})
        if binds:
            self.markBlocksDeleted.execute(binds)

        for location in deletableEntries:

            chunkSize = 100
            numberOfBlocks = 0
            blocksToDelete = {}
            for dataset in deletableEntries[location]:

                blocksToDelete[dataset] = deletableEntries[location][dataset]
                numberOfBlocks += len(blocksToDelete[dataset])

                if numberOfBlocks > chunkSize:
                    self.deleteBlocksPhEDExCalls(location, blocksToDelete)
                    numberOfBlocks = 0
                    blocksToDelete = {}

            self.deleteBlocksPhEDExCalls(location, blocksToDelete)

        return

    def deleteBlocksPhEDExCalls(self, location, blocksToDelete):
        """
        _deleteBlocksPhEDExCalls_
        actual PhEDEx calls for block deletion
        """
        deletion = PhEDExDeletion(
            blocksToDelete.keys(),
            location,
            level='block',
            comments="WMAgent blocks auto-delete from %s" % location,
            blocks=blocksToDelete)

        try:
            response = self.phedex.delete(deletion)
            requestId = response['phedex']['request_created'][0]['id']
            # auto-approve deletion request
            self.phedex.updateRequest(requestId, 'approve', location)
        except HTTPException as ex:
            logging.error(
                "PhEDEx block delete/approval failed with HTTPException: %s %s",
                ex.status, ex.result)
        except Exception as ex:
            logging.error(
                "PhEDEx block delete/approval failed with Exception: %s",
                str(ex))
            logging.debug("Traceback: %s", str(traceback.format_exc()))
        else:
            binds = []
            for dataset in blocksToDelete:
                for blockName in blocksToDelete[dataset]:
                    binds.append({'DELETED': 1, 'BLOCKNAME': blockName})
            self.markBlocksDeleted.execute(binds)

        return

    def _isDataTierAllowed(self, dataName):
        """
        Check whether data belongs to an allowed datatier to
        be handled by this component (either to inject or to
        subscribe into PhEDEx)
        :param dataName: string with the block or the dataset name
        :return: boolean, True if the tier is allowed, False otherwise
        """
        endTier = dataName.rsplit('/', 1)[1]
        endTier = endTier.split('#')[0] if '#' in endTier else endTier
        if endTier in self.listTiersToSkip:
            logging.debug(
                "Skipping data: %s because it's listed in the tiers to skip",
                dataName)
            return False
        return True

    def subscribeDatasets(self):
        """
        _subscribeDatasets_
        Poll the database for datasets and subscribe them.
        """
        logging.info("Starting subscribeDatasets method")

        # Check for completely unsubscribed datasets
        unsubscribedDatasets = self.getUnsubscribed.execute()

        # Keep a list of subscriptions to tick as subscribed in the database
        subscriptionsMade = []

        # Create a list of subscriptions as defined by the PhEDEx data structures
        subs = SubscriptionList()

        # Create the subscription objects and add them to the list
        # The list takes care of the sorting internally
        for subInfo in unsubscribedDatasets:
            ### logic to stop doing things to be done by RucioInjector or by DM team
            if not self._isDataTierAllowed(subInfo['path']):
                continue

            site = subInfo['site']

            if site not in self.phedexNodes[
                    'MSS'] and site not in self.phedexNodes['Disk']:
                msg = "Site %s doesn't appear to be valid to PhEDEx, " % site
                msg += "skipping subscription: %s" % subInfo['id']
                logging.error(msg)
                continue

            # Avoid custodial subscriptions to disk nodes
            if site not in self.phedexNodes['MSS']:
                subInfo['custodial'] = 'n'
            # Avoid auto approval in T1 sites
            elif site.startswith("T1"):
                subInfo['request_only'] = 'y'

            phedexSub = PhEDExSubscription(
                subInfo['path'],
                site,
                subInfo['phedex_group'],
                priority=subInfo['priority'],
                move=subInfo['move'],
                custodial=subInfo['custodial'],
                request_only=subInfo['request_only'],
                subscriptionId=subInfo['id'])

            # Check if the subscription is a duplicate
            if phedexSub.matchesExistingSubscription(self.phedex) or \
                    phedexSub.matchesExistingTransferRequest(self.phedex):
                subscriptionsMade.append(subInfo['id'])
                continue

            # Add it to the list
            subs.addSubscription(phedexSub)

        # Compact the subscriptions
        subs.compact()

        for subscription in subs.getSubscriptionList():

            logging.info(
                "Subscribing: %s to %s, with options: Move: %s, Custodial: %s, Request Only: %s",
                subscription.getDatasetPaths(), subscription.getNodes(),
                subscription.move, subscription.custodial,
                subscription.request_only)

            try:
                self.phedex.subscribe(subscription)
            except HTTPException as ex:
                logging.error(
                    "PhEDEx dataset subscribe failed with HTTPException: %s %s",
                    ex.status, ex.result)
            except Exception as ex:
                logging.error(
                    "PhEDEx dataset subscribe failed with Exception: %s",
                    str(ex))
                logging.debug("Traceback: %s", str(traceback.format_exc()))
            else:
                subscriptionsMade.extend(subscription.getSubscriptionIds())

        # Register the result in DBSBuffer
        if subscriptionsMade:
            self.markSubscribed.execute(subscriptionsMade)

        return

    def subscribeBlocks(self):
        """
        _subscribeBlocks_
        Poll the database and subscribe blocks not yet subscribed.
        """
        logging.info("Starting subscribeBlocks method")

        unsubBlocks = self.getUnsubscribedBlocks.execute()
        # now organize those by location in order to minimize phedex requests
        # also remove blocks that this component is meant to skip
        unsubBlocks = self.organizeBlocksByLocation(unsubBlocks)

        for location, blockDict in unsubBlocks.items():
            phedexSub = PhEDExSubscription(blockDict.keys(),
                                           location,
                                           self.phedexGroup,
                                           blocks=blockDict,
                                           level="block",
                                           priority="normal",
                                           move="n",
                                           custodial="n",
                                           request_only="n",
                                           comments="WMAgent production site")
            try:
                res = self.phedex.subscribe(phedexSub)
                transferId = res['phedex']['request_created'][0]['id']
                logging.info(
                    "Subscribed %d blocks for %d datasets, to location: %s, under request ID: %s",
                    len(phedexSub.getBlocks()),
                    len(phedexSub.getDatasetPaths()), phedexSub.getNodes(),
                    transferId)
            except HTTPException as ex:
                logging.error(
                    "PhEDEx block subscription failed with HTTPException: %s %s",
                    ex.status, ex.result)
                logging.error("The subscription object was: %s",
                              str(phedexSub))
            except Exception as ex:
                logging.exception(
                    "PhEDEx block subscription failed with Exception: %s",
                    str(ex))
            else:
                binds = []
                for blockname in phedexSub.getBlocks():
                    binds.append({
                        'RULE_ID': str(transferId),
                        'BLOCKNAME': blockname
                    })
                self.setBlockRules.execute(binds)

        return

    def organizeBlocksByLocation(self, blocksLocation):
        """
        Given a list of dictionaries (with block name and location). Organize
        those blocks per location to make phedex subscription calls more
        efficient.
        Also drops blocks that we cannot subscribe, and check for valid
        phedex node names.
        :param blocksLocation: list of dictionaries
        :return: a dict of dictionaries, such as:
          {"locationA": {"datasetA": ["blockA", "blockB", ...],
                         "datasetB": ["blockA", "blockB", ...]
                        },
           "locationB": {"datasetA": ["blockA"],
                         ...
        """
        dictByLocation = {}
        for item in blocksLocation:
            ### logic to stop doing things to be done by RucioInjector or by DM team
            if not self._isDataTierAllowed(item['blockname']):
                continue

            site = item['pnn']
            if site not in self.phedexNodes[
                    'MSS'] and site not in self.phedexNodes['Disk']:
                msg = "Site %s doesn't appear to be valid to PhEDEx, " % site
                msg += "skipping block subscription for: %s" % item['blockname']
                logging.error(msg)
                continue

            dictByLocation.setdefault(site, {})

            dsetName = item['blockname'].split("#")[0]
            dictByLocation[site].setdefault(dsetName, [])
            dictByLocation[site][dsetName].append(item['blockname'])
        return dictByLocation

Exemple #22

0

Afficher le fichier

Fichier : PhEDExInjectorSubscriber.py Projet : zhiwenuil/WMCore

class PhEDExInjectorSubscriber(BaseWorkerThread):
    """
    _PhEDExInjectorSubscriber_

    Poll the DBSBuffer database and subscribe datasets to MSS as they are
    created.
    """
    def __init__(self, config):
        """
        ___init___
        
        Initialise class members
        """
        BaseWorkerThread.__init__(self)
        self.phedex = PhEDEx({"endpoint": config.PhEDExInjector.phedexurl}, "json")
        self.dbsUrl = config.DBSInterface.globalDBSUrl 
        self.group = getattr(config.PhEDExInjector, "group", "DataOps")

        # This will be used to map SE names which are stored in the DBSBuffer to
        # PhEDEx node names.  The first key will be the "kind" which consists
        # of one of the following: MSS, Disk, Buffer.  The next key will be the
        # SE name.
        self.seMap = {}
        self.nodeNames = []
        
        # initialize the alert framework (if available - config.Alert present)
        #    self.sendAlert will be then be available    
        self.initAlerts(compName = "PhEDExInjector")        
        
    
    def setup(self, parameters):
        """
        _setup_
        
        Create a DAO Factory for the PhEDExInjector.  Also load the SE names to
        PhEDEx node name mappings from the data service.
        """
        myThread = threading.currentThread()
        daofactory = DAOFactory(package = "WMComponent.PhEDExInjector.Database",
                                logger = self.logger,
                                dbinterface = myThread.dbi)

        self.getUnsubscribed = daofactory(classname = "GetUnsubscribedDatasets")
        self.markSubscribed = daofactory(classname = "MarkDatasetSubscribed")

        nodeMappings = self.phedex.getNodeMap()
        for node in nodeMappings["phedex"]["node"]:
            if not self.seMap.has_key(node["kind"]):
                self.seMap[node["kind"]] = {}

            logging.info("Adding mapping %s -> %s" % (node["se"], node["name"]))
            self.seMap[node["kind"]][node["se"]] = node["name"]
            self.nodeNames.append(node["name"])

        return
    
    def algorithm(self, parameters):
        """
        _algorithm_

        Poll the database for datasets and subscribe them to MSS.
        """
        myThread = threading.currentThread()
        myThread.transaction.begin()

        if not self.seMap.has_key("MSS"):
            return

        unsubscribedDatasets = self.getUnsubscribed.execute(conn = myThread.transaction.conn,
                                                            transaction = True)

        datasetMap = {}
        for unsubscribedDataset in unsubscribedDatasets:
            datasetPath = unsubscribedDataset["path"]
            seName = unsubscribedDataset["se_name"]

            if not self.seMap["MSS"].has_key(seName):
                msg = "No MSS node for SE: %s" % seName
                logging.error(msg)
                self.sendAlert(7, msg = msg)
                continue

            if not datasetMap.has_key(self.seMap["MSS"][seName]):
                datasetMap[self.seMap["MSS"][seName]] = []
            datasetMap[self.seMap["MSS"][seName]].append(datasetPath)

            self.markSubscribed.execute(datasetPath, conn = myThread.transaction.conn,
                                        transaction = True)

        for siteMSS in datasetMap.keys():
            logging.info("Subscribing %s to %s" % (datasetMap[siteMSS],
                                                   siteMSS))
            newSubscription = PhEDExSubscription(datasetMap[siteMSS], siteMSS, self.group,
                                                 custodial = "y", requestOnly = "y")
            
            xmlData = XMLDrop.makePhEDExXMLForDatasets(self.dbsUrl, 
                                                       newSubscription.getDatasetPaths())
            print xmlData
            self.phedex.subscribe(newSubscription, xmlData)

        myThread.transaction.commit()
        return

Exemple #23

0

Afficher le fichier

class PhEDExInjectorPoller(BaseWorkerThread):
    """
    _PhEDExInjectorPoller_

    Poll the DBSBuffer database and inject files as they are created.
    """

    def __init__(self, config):
        """
        ___init___
        
        Initialise class members
        """
        BaseWorkerThread.__init__(self)
        self.config = config
        self.phedex = PhEDEx({"endpoint": config.PhEDExInjector.phedexurl}, "json")
        self.dbsUrl = config.DBSInterface.globalDBSUrl
        self.group = getattr(config.PhEDExInjector, "group", "DataOps")

        # This will be used to map SE names which are stored in the DBSBuffer to
        # PhEDEx node names.  The first key will be the "kind" which consists
        # of one of the following: MSS, Disk, Buffer.  The next key will be the
        # SE name.
        self.seMap = {}
        self.nodeNames = []

        # initialize the alert framework (if available - config.Alert present)
        #    self.sendAlert will be then be available
        self.initAlerts(compName="PhEDExInjector")

    def setup(self, parameters):
        """
        _setup_
        
        Create a DAO Factory for the PhEDExInjector.  Also load the SE names to
        PhEDEx node name mappings from the data service.
        """
        myThread = threading.currentThread()
        daofactory = DAOFactory(
            package="WMComponent.PhEDExInjector.Database", logger=self.logger, dbinterface=myThread.dbi
        )

        self.getUninjected = daofactory(classname="GetUninjectedFiles")
        self.getMigrated = daofactory(classname="GetMigratedBlocks")

        daofactory = DAOFactory(package="WMComponent.DBSBuffer.Database", logger=self.logger, dbinterface=myThread.dbi)
        self.setStatus = daofactory(classname="DBSBufferFiles.SetPhEDExStatus")

        daofactory = DAOFactory(package="WMComponent.DBSUpload.Database", logger=self.logger, dbinterface=myThread.dbi)
        self.setBlockStatus = daofactory(classname="SetBlockStatus")

        nodeMappings = self.phedex.getNodeMap()
        for node in nodeMappings["phedex"]["node"]:
            if not self.seMap.has_key(node["kind"]):
                self.seMap[node["kind"]] = {}

            logging.info("Adding mapping %s -> %s" % (node["se"], node["name"]))
            self.seMap[node["kind"]][node["se"]] = node["name"]
            self.nodeNames.append(node["name"])

        return

    def createInjectionSpec(self, injectionData):
        """
        _createInjectionSpec_

        Trasform the data structure returned from the database into an XML
        string for the PhEDEx Data Service.  The injectionData parameter must be
        a dictionary keyed by dataset path.  Each dataset path will map to a
        list of blocks, each block being a dict.  The block dicts will have
        three keys: name, is-open and files.  The files key will be a list of
        dicts, each of which have the following keys: lfn, size and checksum.
        The following is an example object:

        {"dataset1":
          {"block1": {"is-open": "y", "files":
            [{"lfn": "lfn1", "size": 10, "checksum": {"cksum": "1234"}},
             {"lfn": "lfn2", "size": 20, "checksum": {"cksum": "4321"}}]}}}
        """
        injectionSpec = XMLDrop.XMLInjectionSpec(self.dbsUrl)

        for datasetPath in injectionData:
            datasetSpec = injectionSpec.getDataset(datasetPath)

            for fileBlockName, fileBlock in injectionData[datasetPath].iteritems():
                blockSpec = datasetSpec.getFileblock(fileBlockName, fileBlock["is-open"])

                for file in fileBlock["files"]:
                    blockSpec.addFile(file["lfn"], file["checksum"], file["size"])

        return injectionSpec.save()

    def injectFiles(self):
        """
        _injectFiles_

        Inject any uninjected files in PhEDEx.
        """
        myThread = threading.currentThread()
        uninjectedFiles = self.getUninjected.execute()

        injectedFiles = []
        for siteName in uninjectedFiles.keys():
            # SE names can be stored in DBSBuffer as that is what is returned in
            # the framework job report.  We'll try to map the SE name to a
            # PhEDEx node name here.
            location = None

            if siteName in self.nodeNames:
                location = siteName
            else:
                if self.seMap.has_key("Buffer") and self.seMap["Buffer"].has_key(siteName):
                    location = self.seMap["Buffer"][siteName]
                elif self.seMap.has_key("MSS") and self.seMap["MSS"].has_key(siteName):
                    location = self.seMap["MSS"][siteName]
                elif self.seMap.has_key("Disk") and self.seMap["Disk"].has_key(siteName):
                    location = self.seMap["Disk"][siteName]

            if location == None:
                msg = "Could not map SE %s to PhEDEx node." % siteName
                logging.error(msg)
                self.sendAlert(7, msg=msg)
                continue

            xmlData = self.createInjectionSpec(uninjectedFiles[siteName])
            injectRes = self.phedex.injectBlocks(location, xmlData, 0, 0)

            if not injectRes.has_key("error"):
                for datasetName in uninjectedFiles[siteName]:
                    for blockName in uninjectedFiles[siteName][datasetName]:
                        for file in uninjectedFiles[siteName][datasetName][blockName]["files"]:
                            injectedFiles.append(file["lfn"])
            else:
                msg = "Error injecting data %s: %s" % (uninjectedFiles[siteName], injectRes["error"])
                logging.error(msg)
                self.sendAlert(6, msg=msg)

        if len(injectedFiles) > 0:
            logging.debug("Injecting files: %s" % injectedFiles)
            self.setStatus.execute(injectedFiles, 1, conn=myThread.transaction.conn, transaction=myThread.transaction)

        return

    def closeBlocks(self):
        """
        _closeBlocks_

        Close any blocks that have been migrated to global DBS.
        """
        myThread = threading.currentThread()
        migratedBlocks = self.getMigrated.execute()

        closedBlocks = []
        for siteName in migratedBlocks.keys():
            # SE names can be stored in DBSBuffer as that is what is returned in
            # the framework job report.  We'll try to map the SE name to a
            # PhEDEx node name here.
            location = None

            if siteName in self.nodeNames:
                location = siteName
            else:
                if self.seMap.has_key("Buffer") and self.seMap["Buffer"].has_key(siteName):
                    location = self.seMap["Buffer"][siteName]
                elif self.seMap.has_key("MSS") and self.seMap["MSS"].has_key(siteName):
                    location = self.seMap["MSS"][siteName]
                elif self.seMap.has_key("Disk") and self.seMap["Disk"].has_key(siteName):
                    location = self.seMap["Disk"][siteName]

            if location == None:
                msg = "Could not map SE %s to PhEDEx node." % siteName
                logging.error(msg)
                self.sendAlert(6, msg=msg)
                continue

            xmlData = self.createInjectionSpec(migratedBlocks[siteName])
            injectRes = self.phedex.injectBlocks(location, xmlData, 0, 0)

            if not injectRes.has_key("error"):
                for datasetName in migratedBlocks[siteName]:
                    for blockName in migratedBlocks[siteName][datasetName]:
                        closedBlocks.append(blockName)
            else:
                msg = "Error injecting data %s: %s" % (migratedBlocks[siteName], injectRes["error"])
                logging.error(msg)
                self.sendAlert(6, msg=msg)

        for closedBlock in closedBlocks:
            logging.debug("Closing block %s" % closedBlock)
            self.setBlockStatus.execute(
                closedBlock,
                locations=None,
                open_status="Closed",
                conn=myThread.transaction.conn,
                transaction=myThread.transaction,
            )

        return

    def algorithm(self, parameters):
        """
        _algorithm_

        Poll the database for uninjected files and attempt to inject them into
        PhEDEx.
        """
        myThread = threading.currentThread()
        myThread.transaction.begin()

        self.injectFiles()
        self.closeBlocks()

        myThread.transaction.commit()
        return

Exemple #24

0

Afficher le fichier

class RequestQuery:

    def __init__(self,config):
        self.br=Browser()

        self.config = config
        
        # Initialise connections
        self.phedex = PhEDEx({"endpoint":"https://cmsweb.cern.ch/phedex/datasvc/json/prod/"}, "json")
        self.dbsPhys01 = DbsApi(url = dbs_base_url+"phys01/DBSReader/")
        self.dbsPhys02 = DbsApi(url = dbs_base_url+"phys02/DBSReader/")
        self.dbsPhys03 = DbsApi(url = dbs_base_url+"phys03/DBSReader/")
        
    def __del__(self):
        self.br.close()

    def getScramArchByCMSSW(self):
        """
        Get from the list of available CMSSW releases
        return a dictionary of ScramArchitecture by CMSSW
        """
        
        # Set temporary conection to the server and get the response from cmstags
        url = 'https://cmssdt.cern.ch/SDT/cgi-bin/ReleasesXML'
        br = Browser()
        br.set_handle_robots(False)
        response=br.open(url)
        soup = BeautifulSoup(response.read())
        
        # Dictionary form
        # {'CMSSW_X_X_X':[slc5_amd64_gcc472], ... }
        archByCmssw={}
        
        # Fill the dictionary
        for arch in soup.find_all('architecture'): 
            for cmssw in arch.find_all('project'): 
                # CMSSW release
                cmsswLabel = cmssw.get('label').encode('ascii', 'ignore')
                if cmsswLabel not in archByCmssw:
                    archByCmssw[cmsswLabel]=[]
                # ScramArch related to this CMSSW release
                archName = arch.get('name').encode('ascii', 'ignore')
                archByCmssw[cmsswLabel].append(archName)
        
        return archByCmssw
      
    def getDatasetOriginSites(self, dbs_url, data):
        """
        Get the origin sites for each block of the dataset.
        Return a list block origin sites.
        """
        
        sites=[]
        local_dbs = dbs_url.split('/')[5]
        if local_dbs == 'phys01':
            response = self.dbsPhys01.listBlocks(detail=True,dataset=data)
        elif local_dbs == 'phys02':
            response = self.dbsPhys02.listBlocks(detail=True,dataset=data)
        elif local_dbs == 'phys03':
            response = self.dbsPhys03.listBlocks(detail=True,dataset=data)
        
        seList = []
        for block in response:
            if block['origin_site_name'] not in seList:
                seList.append(block['origin_site_name'])
        
        siteNames = []
        for node in self.nodeMappings['phedex']['node']:
            if node['se'] in seList:
                siteNames.append(node['name']) 
        
        return siteNames, seList
    
    def phEDExNodetocmsName(self, nodeList):
        """
        Convert PhEDEx node name list to cms names list 
        """
        names = []
        for node in nodeList:
            name = node.replace('_MSS',
                                '').replace('_Disk',
                                    '').replace('_Buffer',
                                        '').replace('_Export', '')
            if name not in names:
                names.append(name)
        return names
    
    def setGlobalTagFromOrigin(self, dbs_url,input_dataset):
        """
        Get the global tag of the dataset from the source dbs url. If it is not set, then set global tag to 'UNKNOWN'
        """
        
        globalTag = ""
        local_dbs = dbs_url.split('/')[5]
        if local_dbs == 'phys01':
            response = self.dbsPhys01.listOutputConfigs(dataset=input_dataset)
        elif local_dbs == 'phys02':
            response = self.dbsPhys02.listOutputConfigs(dataset=input_dataset)
        elif local_dbs == 'phys03':
            response = self.dbsPhys03.listOutputConfigs(dataset=input_dataset)
        
        globalTag = response[0]['global_tag']
        # GlobalTag cannot be empty
        if globalTag == '':
            globalTag = 'UNKNOWN'
            
        return globalTag
    
    def isDataAtUrl(self, dbs_url,input_dataset):
        """
        Returns True if the dataset is at the dbs url, if not returns False
        """
        local_dbs = dbs_url.split('/')[5]
        if local_dbs == 'phys01':
            response = self.dbsPhys01.listDatasets(dataset=input_dataset)
        elif local_dbs == 'phys02':
            response = self.dbsPhys02.listDatasets(dataset=input_dataset)
        elif local_dbs == 'phys03':
            response = self.dbsPhys03.listDatasets(dataset=input_dataset)
        # This means that the dataset is not at the url
        if not response:
            return False
        else:
            return True
         
    def getLabelByValueDict(self, control):
        """
        From control items, create a dictionary by values
        """   
        d = {}
        for item in control.items:
            value = item.attrs['value']
            label = item.attrs['label']
            d[value] = label
                
        return d
    
    def getValueByLabelDict(self, control):
        """
        From control items, create a dictionary by labels
        """
        d = {}
        for item in control.items:
            value = item.attrs['value']
            label = item.attrs['label']
            d[label] = value

        return d
    
    def createRequestJSON(self, ticket, input_dataset, dbs_url, cmssw_release, group_name, version = 1):
        """
        Creates a JSON file 'Ticket_#TICKET.json' with the needed
        information for creating a requeston ReqMgr.
        Input:
            - ticket: the ticket #, for instance 110773 on https://ggus.eu/?mode=ticket_info&ticket_id=110773
            - input_dataset
            - dbs_url: only the instance name, For example: "phys01" for 
             https://cmsweb.cern.ch/dbs/prod/phys01/DBSReader
            - cmssw_release
            - group_name: the physics group name
            - version: the dataset version, 1 by default.
        It returns a dictionary that contains the request information.
        """

        scramArchByCMSSW = self.getScramArchByCMSSW()
        self.nodeMappings = self.phedex.getNodeMap()
        task = ticket
        print "Processing ticket: %s" % task
        
        #splitting input dataset       
        input_primary_dataset = input_dataset.split('/')[1].replace(' ','')
        input_processed_dataset = input_dataset.split('/')[2].replace(' ','')
        data_tier = input_dataset.split('/')[3].replace(' ','')
                
        # Transform input value to a valid DBS url
        #dbs_url = "https://cmsweb.cern.ch/dbs/prod/"+dbs_url+"/DBSReader"
        dbs_url = dbs_base_url+dbs_url+"/DBSReader"
        release_id = cmssw_release
                
        # check if deprecated release was used
        release = cmssw_release
        # check if release has not ScramArch match
        if release not in scramArchByCMSSW:
            raise Exception("Error on ticket %s due to ScramArch mismatch" % task)
        else:
            scram_arch = scramArchByCMSSW[release][-1]

        # check if dataset is not at dbs url
        try:
            data_at_url = self.isDataAtUrl(dbs_url,input_dataset)
        except:
            raise Exception('Error on ticket %s, dataset %s not available at %s' %(task, input_dataset,dbs_url))

        if not data_at_url:
            raise Exception('Error on ticket %s, dataset %s not available at %s' %(task, input_dataset,dbs_url))
                    
        ## Get Physics Group
        group_squad = 'cms-storeresults-'+group_name.replace("-","_").lower()

        ## Get Dataset Version
        dataset_version = str(version)

        # Set default Adquisition Era for StoreResults 
        acquisitionEra = "StoreResults"

        ## Construction of the new dataset name (ProcessingString)
        ## remove leading hypernews or physics group name and StoreResults+Version
        if input_processed_dataset.find(group_name)==0:
            new_dataset = input_processed_dataset.replace(group_name,"",1)
        else:
            stripped_dataset = input_processed_dataset.split("-")[1:]
            new_dataset = '_'.join(stripped_dataset)
                        
        # Get dataset site info:
        phedex_map, se_names = self.getDatasetOriginSites(dbs_url,input_dataset)
        sites = self.phEDExNodetocmsName(phedex_map)
        
        infoDict = {}
        # Build store results json
        # First add all the defaults values
        infoDict["RequestType"] = "StoreResults"
        infoDict["UnmergedLFNBase"] = "/store/unmerged" 
        infoDict["MergedLFNBase"] = "/store/results/" + group_name.replace("-","_").lower()
        infoDict["MinMergeSize"] = 1500000000
        infoDict["MaxMergeSize"] = 5000000000
        infoDict["MaxMergeEvents"] = 100000
        infoDict["TimePerEvent"] = 40
        infoDict["SizePerEvent"] = 512.0
        infoDict["Memory"] = 2394
        infoDict["CmsPath"] = "/uscmst1/prod/sw/cms"                                        
        infoDict["Group"] = "DATAOPS"
        infoDict["DbsUrl"] = dbs_url
        
        # Add all the information pulled from Savannah
        infoDict["AcquisitionEra"] = acquisitionEra
        infoDict["GlobalTag"] = self.setGlobalTagFromOrigin(dbs_url, input_dataset)
        infoDict["DataTier"] = data_tier
        infoDict["InputDataset"] = input_dataset
        infoDict["ProcessingString"] = new_dataset
        infoDict["CMSSWVersion"] = release
        infoDict["ScramArch"] = scram_arch
        infoDict["ProcessingVersion"] = dataset_version                    
        infoDict["SiteWhitelist"] = list(sites)
        
        # Create report for Migration2Global
        report = {}
         
        #Fill json file, if status is done
        self.writeJSONFile(task, infoDict)
        report["json"] = 'y'
        report["task"] = int(task)
        report["InputDataset"] = input_dataset
        report["ProcessingString"] = new_dataset
        report["localUrl"] = dbs_url
        report["sites"] = list(sites)
        report["se_names"] = list(se_names)

        return report

    def writeJSONFile(self, task, infoDict):
        """
        This writes a JSON file at ComponentDir
        """
        ##check if file already exists
        filename = self.config["ComponentDir"]+'/Ticket_'+str(task)+'.json'
        if not os.access(filename,os.F_OK):
            jsonfile = open(filename,'w')
            request = {'createRequest':infoDict} ## CHECK THIS BEFORE FINISHING
            jsonfile.write(json.dumps(request,sort_keys=True, indent=4))
            jsonfile.close

        return

    def removeJSONFile(self,task):
        """
        This removes the JSON file at ComponentDir if it was created
        """
        filename = self.config["ComponentDir"]+'/Ticket_'+str(task)+'.json'

        if os.access(filename,os.F_OK):
            os.remove(filename)
        return

    def printReport(self, report):
        """
        Print out a report
        """
        print "%20s %5s %10s %50s %50s" %( 'Ticket','json','local DBS','Sites','se_names') 
        print "%20s %5s %10s %50s %50s" %( '-'*20,'-'*5,'-'*10,'-'*50,'-'*50 )
        
        json = report["json"]
        ticket = report["task"]
        #status = report["ticketStatus"]
        localUrl = report["localUrl"].split('/')[5]
        site = ', '.join(report["sites"])
        se_names = ', '.join(report["se_names"])
        print "%20s %5s %10s %50s %50s" %(ticket,json,localUrl,site,se_names)

Exemple #25

0

Afficher le fichier

class PhEDExInjectorSubscriber(BaseWorkerThread):
    """
    _PhEDExInjectorSubscriber_

    Poll the DBSBuffer database and subscribe datasets as they are
    created.
    """
    def __init__(self, config):
        """
        ___init___

        Initialise class members
        """
        BaseWorkerThread.__init__(self)
        self.phedex = PhEDEx({"endpoint": config.PhEDExInjector.phedexurl}, "json")
        self.siteDB = SiteDBJSON()
        self.dbsUrl = config.DBSInterface.globalDBSUrl
        self.group = getattr(config.PhEDExInjector, "group", "DataOps")

        # We will map node names to CMS names, that what the spec will have.
        # If a CMS name is associated to many PhEDEx node then choose the MSS option
        self.cmsToPhedexMap = {}        
        self.phedexNodes = {'MSS':[], 'Disk':[]}

        # initialize the alert framework (if available - config.Alert present)
        #    self.sendAlert will be then be available
        self.initAlerts(compName = "PhEDExInjector")


    def setup(self, parameters):
        """
        _setup_

        Create a DAO Factory for the PhEDExInjector.  Also load the SE names to
        PhEDEx node name mappings from the data service.
        """
        myThread = threading.currentThread()
        daofactory = DAOFactory(package = "WMComponent.PhEDExInjector.Database",
                                logger = self.logger,
                                dbinterface = myThread.dbi)

        self.getUnsubscribed = daofactory(classname = "GetUnsubscribedDatasets")
        self.markSubscribed = daofactory(classname = "MarkDatasetSubscribed")

        nodeMappings = self.phedex.getNodeMap()
        for node in nodeMappings["phedex"]["node"]:

            cmsName = self.siteDB.phEDExNodetocmsName(node["name"])

            if cmsName not in self.cmsToPhedexMap:
                self.cmsToPhedexMap[cmsName] = {}

            logging.info("Loaded PhEDEx node %s for site %s" % (node["name"], cmsName))
            if node["kind"] not in self.cmsToPhedexMap[cmsName]:
                self.cmsToPhedexMap[cmsName][node["kind"]] = node["name"]

            if node["kind"] in [ "MSS", "Disk" ]:
                self.phedexNodes[node["kind"]].append(node["name"])
        return

    def algorithm(self, parameters):
        """
        _algorithm_

        Run the subscription algorithm as configured
        """
        self.subscribeDatasets()
        return

    def subscribeDatasets(self):
        """
        _subscribeDatasets_

        Poll the database for datasets and subscribe them.
        """
        myThread = threading.currentThread()
        myThread.transaction.begin()

        # Check for completely unsubscribed datasets
        unsubscribedDatasets = self.getUnsubscribed.execute(conn = myThread.transaction.conn,
                                                            transaction = True)

        # Keep a list of subscriptions to tick as subscribed in the database
        subscriptionsMade = []

        # Create a list of subscriptions as defined by the PhEDEx data structures
        subs = SubscriptionList()

        # Create the subscription objects and add them to the list
        # The list takes care of the sorting internally
        for subInfo in unsubscribedDatasets:
            site = subInfo['site']

            if site not in self.phedexNodes['MSS'] and site not in self.phedexNodes['Disk']:

                if site not in self.cmsToPhedexMap:
                    msg = "Site %s doesn't appear to be valid to PhEDEx, " % site
                    msg += "skipping subscription: %s" % subInfo['id']
                    logging.error(msg)
                    self.sendAlert(7, msg = msg)
                    continue

                # Get the phedex node from CMS site
                site = self.cmsToPhedexMap[site].get("MSS") or self.cmsToPhedexMap[site]["Disk"] 

            # Avoid custodial subscriptions to disk nodes
            if site not in self.phedexNodes['MSS']: 
                subInfo['custodial'] = 'n'
            # Avoid auto approval in T1 sites
            elif site.startswith("T1"):
                subInfo['request_only'] = 'y'
            
            phedexSub = PhEDExSubscription(subInfo['path'], site,
                                           self.group, priority = subInfo['priority'],
                                           move = subInfo['move'], custodial = subInfo['custodial'],
                                           request_only = subInfo['request_only'], subscriptionId = subInfo['id'])

            # Check if the subscription is a duplicate
            if phedexSub.matchesExistingSubscription(self.phedex) or \
                phedexSub.matchesExistingTransferRequest(self.phedex):
                subscriptionsMade.append(subInfo['id'])
                continue

            # Add it to the list
            subs.addSubscription(phedexSub)

        # Compact the subscriptions
        subs.compact()

        for subscription in subs.getSubscriptionList():
            try:
                xmlData = XMLDrop.makePhEDExXMLForDatasets(self.dbsUrl,
                                                           subscription.getDatasetPaths())
                logging.debug(str(xmlData))
                msg = "Subscribing: %s to %s, with options: " % (subscription.getDatasetPaths(), subscription.getNodes())
                msg += "Move: %s, Custodial: %s, Request Only: %s" % (subscription.move, subscription.custodial, subscription.request_only)
                logging.info(msg)
                self.phedex.subscribe(subscription, xmlData)
            except Exception as ex:
                logging.error("Something went wrong when communicating with PhEDEx, will try again later.")
                logging.error("Exception: %s" % str(ex))
            else:
                subscriptionsMade.extend(subscription.getSubscriptionIds())

        # Register the result in DBSBuffer
        if subscriptionsMade:
            self.markSubscribed.execute(subscriptionsMade,
                                        conn = myThread.transaction.conn,
                                        transaction = True)

        myThread.transaction.commit()
        return

Exemple #26

0

Afficher le fichier

Fichier : SavannahRequestQuery.py Projet : johnhcasallasl/WMCore

class RequestQuery:

    def __init__(self,config):
        self.br=Browser()

        self.config = config
        
        # Initialise connections
        self.mySiteDB = SiteDBJSON()
        self.phedex = PhEDEx({"endpoint":"https://cmsweb.cern.ch/phedex/datasvc/json/prod/"}, "json")
        self.dbsPhys01 = DbsApi(url = dbs_base_url+"phys01/DBSReader/")
        self.dbsPhys02 = DbsApi(url = dbs_base_url+"phys02/DBSReader/")
        self.dbsPhys03 = DbsApi(url = dbs_base_url+"phys03/DBSReader/")
        
    def __del__(self):
        self.br.close()

    def login2Savannah(self):
        """
        login2Savannah log into savannah with the given parameters in the config (username and password) 
        User must have admin priviledges for store results requests
        """
        login_page='https://savannah.cern.ch/account/login.php?uri=%2F'
        savannah_page='https://savannah.cern.ch/task/?group=cms-storeresults'
        
        self.br.open(login_page)

        ## 'Search' form is form 0
        ## login form is form 1
        self.br.select_form(nr=1)

        username = self.config["SavannahUser"]
    
        self.br['form_loginname']=username
        self.br['form_pw']=self.config["SavannahPasswd"]
        
        self.br.submit()
        
        response = self.br.open(savannah_page)
        
        # Check to see if login was successful
        if not re.search('Logged in as ' + username, response.read()):
            print('login unsuccessful, please check your username and password')
            return False
        else:
            return True
    
    def selectQueryForm(self,**kargs):       
        """
        selectQueryForm create the browser view to get all the store result tickets from savannah
        """
        if self.isLoggedIn:
            self.br.select_form(name="bug_form")

            ## Use right query form labelled Test
            control = self.br.find_control("report_id",type="select")

            for item in control.items:
                if item.attrs['label'] == "Test":
                    control.value = [item.attrs['value']]
                    
            ##select number of entries displayed per page
            control = self.br.find_control("chunksz",type="text")
            control.value = "150"

            ##check additional searching parameter
            for arg in kargs:
                if arg == "approval_status":
                    control = self.br.find_control("resolution_id",type="select")
                    for item in control.items:
                        if item.attrs['label'] == kargs[arg].strip():
                            control.value = [item.attrs['value']]

                elif arg == "task_status":
                    control = self.br.find_control("status_id",type="select")
                    for item in control.items:
                        if item.attrs['label'] == kargs[arg].strip():
                            control.value = [item.attrs['value']]
                            
                elif arg == "team":
                    control = self.br.find_control("custom_sb5",type="select")
                    for item in control.items:
                        if item.attrs['label'] == kargs[arg].strip():
                            control.value = [item.attrs['value']]

            response = self.br.submit()
            response.read()

        return

    def getScramArchByCMSSW(self):
        """
        Get from the list of available CMSSW releases
        return a dictionary of ScramArchitecture by CMSSW
        """
        
        # Set temporary conection to the server and get the response from cmstags
        url = 'https://cmssdt.cern.ch/SDT/cgi-bin/ReleasesXML'
        br = Browser()
        br.set_handle_robots(False)
        response=br.open(url)
        soup = BeautifulSoup(response.read())
        
        # Dictionary form
        # {'CMSSW_X_X_X':[slc5_amd64_gcc472], ... }
        archByCmssw={}
        
        # Fill the dictionary
        for arch in soup.find_all('architecture'): 
            for cmssw in arch.find_all('project'): 
                # CMSSW release
                cmsswLabel = cmssw.get('label').encode('ascii', 'ignore')
                if cmsswLabel not in archByCmssw:
                    archByCmssw[cmsswLabel]=[]
                # ScramArch related to this CMSSW release
                archName = arch.get('name').encode('ascii', 'ignore')
                archByCmssw[cmsswLabel].append(archName)
        
        return archByCmssw
      
    def createValueDicts(self):       
        """
        Init dictionaries by value/label:
        - Releases by Value
        - Physics group by value
        - DBS url by value
        - DBS rul by label
        - Status of savannah request by value 
        - Status of savannah ticket by value (Open/Closed/Any)
        """      
        if self.isLoggedIn:
            self.br.select_form(name="bug_form")
            
            control = self.br.find_control("custom_sb2",type="select")
            self.ReleaseByValueDict = self.getLabelByValueDict(control)

            control = self.br.find_control("custom_sb3",type="select")
            self.GroupByValueDict = self.getLabelByValueDict(control)

            control = self.br.find_control("custom_sb4",type="select")
            self.DBSByValueDict = self.getLabelByValueDict(control)
            self.DBSByLabelDict = self.getValueByLabelDict(control)

            control = self.br.find_control("resolution_id",type="select")
            self.StatusByValueDict = self.getLabelByValueDict(control)

            control = self.br.find_control("status_id",type="select")
            self.TicketStatusByLabelDict = self.getValueByLabelDict(control)

        return
    
    def getDatasetOriginSites(self, dbs_url, data):
        """
        Get the origin sites for each block of the dataset.
        Return a list block origin sites.
        """
        
        local_dbs = dbs_url.split('/')[5]
        if local_dbs == 'phys01':
            response = self.dbsPhys01.listBlocks(detail=True,dataset=data)
        elif local_dbs == 'phys02':
            response = self.dbsPhys02.listBlocks(detail=True,dataset=data)
        elif local_dbs == 'phys03':
            response = self.dbsPhys03.listBlocks(detail=True,dataset=data)

        pnnList = set()
        for block in response:
            pnnList.add(block['origin_site_name'])
        psnList = self.mySiteDB.PNNstoPSNs(pnnList)
        
        return psnList, list(pnnList)

    def phEDExNodetocmsName(self, nodeList):
        """
        Convert PhEDEx node name list to cms names list 
        """
        names = []
        for node in nodeList:
            name = node.replace('_MSS',
                                '').replace('_Disk',
                                    '').replace('_Buffer',
                                        '').replace('_Export', '')
            if name not in names:
                names.append(name)
        return names
    
    def setGlobalTagFromOrigin(self, dbs_url,input_dataset):
        """
        Get the global tag of the dataset from the source dbs url. If it is not set, then set global tag to 'UNKNOWN'
        """
        
        globalTag = ""
        local_dbs = dbs_url.split('/')[5]
        if local_dbs == 'phys01':
            response = self.dbsPhys01.listOutputConfigs(dataset=input_dataset)
        elif local_dbs == 'phys02':
            response = self.dbsPhys02.listOutputConfigs(dataset=input_dataset)
        elif local_dbs == 'phys03':
            response = self.dbsPhys03.listOutputConfigs(dataset=input_dataset)
        
        globalTag = response[0]['global_tag']
        # GlobalTag cannot be empty
        if globalTag == '':
            globalTag = 'UNKNOWN'
            
        return globalTag
    
    def isDataAtUrl(self, dbs_url,input_dataset):
        """
        Returns True if the dataset is at the dbs url, if not returns False
        """
        local_dbs = dbs_url.split('/')[5]
        if local_dbs == 'phys01':
            response = self.dbsPhys01.listDatasets(dataset=input_dataset)
        elif local_dbs == 'phys02':
            response = self.dbsPhys02.listDatasets(dataset=input_dataset)
        elif local_dbs == 'phys03':
            response = self.dbsPhys03.listDatasets(dataset=input_dataset)
        # This means that the dataset is not at the url
        if not response:
            return False
        else:
            return True
         
    def getLabelByValueDict(self, control):
        """
        From control items, create a dictionary by values
        """   
        d = {}
        for item in control.items:
            value = item.attrs['value']
            label = item.attrs['label']
            d[value] = label
                
        return d
    
    def getValueByLabelDict(self, control):
        """
        From control items, create a dictionary by labels
        """
        d = {}
        for item in control.items:
            value = item.attrs['value']
            label = item.attrs['label']
            d[label] = value

        return d
    
    def getRequests(self,**kargs):
        """
        getRequests Actually goes through all the savannah requests and create json files if the 
        ticket is not Closed and the status of the item is Done.
        It also reports back the summary of the requests in savannah
        """
        requests = []
        
        # Open Browser and login into Savannah
        self.br=Browser()
        self.isLoggedIn = self.login2Savannah()
        
        if self.isLoggedIn:
            if not kargs:
                self.selectQueryForm(approval_status='1',task_status='0')
            else:
                self.selectQueryForm(**kargs)
            self.createValueDicts()
        
            self.br.select_form(name="bug_form")
            response = self.br.submit()

            html_ouput = response.read()
            
            scramArchByCMSSW = self.getScramArchByCMSSW()
            self.nodeMappings = self.phedex.getNodeMap()
            
            for link in self.br.links(text_regex="#[0-9]+"):
                response = self.br.follow_link(link)
                
                try:
                    ## Get Information
                    self.br.select_form(name="item_form")

                    ## remove leading &nbsp and # from task
                    task = link.text.replace('#','').decode('utf-8').strip()
                    print("Processing ticket: %s" % task)
                    
                    ## Get input dataset name
                    control = self.br.find_control("custom_tf1",type="text")
                    input_dataset = control.value
                    input_primary_dataset = input_dataset.split('/')[1].replace(' ','')
                    input_processed_dataset = input_dataset.split('/')[2].replace(' ','')
                    data_tier = input_dataset.split('/')[3].replace(' ','')
                    
                    ## Get DBS URL by Drop Down
                    control = self.br.find_control("custom_sb4",type="select")
                    dbs_url = self.DBSByValueDict[control.value[0]]

                    ## Get DBS URL by text field (for old entries)
                    if dbs_url=='None':
                        control = self.br.find_control("custom_tf4",type="text")
                        dbs_url = control.value.replace(' ','')
                    else: # Transform input value to a valid DBS url
                        #dbs_url = "https://cmsweb.cern.ch/dbs/prod/"+dbs_url+"/DBSReader"
                        dbs_url = dbs_base_url+dbs_url+"/DBSReader"
                        
                    ## Get Release
                    control = self.br.find_control("custom_sb2",type="select")
                    release_id = control.value
                    
                    ## Get current request status
                    control = self.br.find_control("status_id",type="select")
                    request_status_id = control.value
                    RequestStatusByValueDict = self.getLabelByValueDict(control)
                    
                    # close the request if deprecated release was used
                    try:
                        release = self.ReleaseByValueDict[release_id[0]]
                    except:
                        if len(self.ReleaseByValueDict)>0 and RequestStatusByValueDict[request_status_id[0]] != "Closed":
                            msg = "Your request is not valid anymore, since the given CMSSW release is deprecated. If your request should be still processed, please reopen the request and update the CMSSW release to a more recent *working* release.\n"
                            msg+= "\n"
                            msg+= "Thanks,\n"
                            msg+= "Your StoreResults team"
                            self.closeRequest(task,msg)
                            self.br.back()
                            print("I tried to Close ticket %s due to CMSSW not valid" % task)
                            continue
                    
                    # close the request if release has not ScramArch match
                    if release not in scramArchByCMSSW:
                        if len(self.ReleaseByValueDict)>0 and RequestStatusByValueDict[request_status_id[0]] != "Closed":
                            msg = "Your request is not valid, there is no ScramArch match for the given CMSSW release.\n"
                            msg+= "If your request should be still processed, please reopen the request and update the CMSSW release according to: https://cmssdt.cern.ch/SDT/cgi-bin/ReleasesXML \n"
                            msg+= "\n"
                            msg+= "Thanks,\n"
                            msg+= "Your StoreResults team"
                            self.closeRequest(task,msg)
                            self.br.back()
                            print("I tried to Close ticket %s due to ScramArch mismatch" % task)
                            continue
                    else: 
                        index=len(scramArchByCMSSW[release])
                        scram_arch = scramArchByCMSSW[release][index-1]

                    # close the request if dataset is not at dbs url
                    try:
                        data_at_url = self.isDataAtUrl(dbs_url,input_dataset)
                    except:
                        print('I got an error trying to look for dataset %s at %s, please look at this ticket: %s' %(input_dataset,dbs_url,task))
                        continue
                    if not data_at_url:
                        msg = "Your request is not valid, I could not find the given dataset at %s\n" % dbs_url
                        msg+= "If your request should be still processed, please reopen the request and change DBS url properly \n"
                        msg+= "\n"
                        msg+= "Thanks,\n"
                        msg+= "Your StoreResults team"
                        self.closeRequest(task,msg)
                        self.br.back()
                        print("I tried to Close ticket %s, dataset is not at DBS url" % task)
                        continue
                        
                    # Avoid not approved Tickets
                    #if not RequestStatusByValueDict[request_status_id[0]] == "Done":
                    #    continue

                    ## Get Physics Group
                    control = self.br.find_control("custom_sb3",type="select")
                    group_id = control.value[0]
                    group_squad = 'cms-storeresults-'+self.GroupByValueDict[group_id].replace("-","_").lower()

                    ## Get Dataset Version
                    control = self.br.find_control("custom_tf3",type="text")
                    dataset_version = control.value.replace(' ','')
                    if dataset_version == "": dataset_version = '1'
                                        
                    ## Get current status
                    control = self.br.find_control("resolution_id",type="select")
                    status_id = control.value

                    ## Get assigned to
                    control = self.br.find_control("assigned_to",type="select")
                    AssignedToByValueDict = self.getLabelByValueDict(control)
                    assignedTo_id = control.value

                    ##Assign task to the physics group squad
                    if AssignedToByValueDict[assignedTo_id[0]]!=group_squad:
                        assignedTo_id = [self.getValueByLabelDict(control)[group_squad]]
                        control.value = assignedTo_id
                        self.br.submit()

                    # Set default Adquisition Era for StoreResults 
                    acquisitionEra = "StoreResults"

                    ## Construction of the new dataset name (ProcessingString)
                    ## remove leading hypernews or physics group name and StoreResults+Version
                    if input_processed_dataset.find(self.GroupByValueDict[group_id])==0:
                        new_dataset = input_processed_dataset.replace(self.GroupByValueDict[group_id],"",1)
                    else:
                        stripped_dataset = input_processed_dataset.split("-")[1:]
                        new_dataset = '_'.join(stripped_dataset)
                    
                except Exception as ex:
                    self.br.back()
                    print("There is a problem with this ticket %s, please have a look to the error:" % task)
                    print(str(ex))
                    print(traceback.format_exc())
                    continue
                
                self.br.back()
                
                # Get dataset site info:
                psnList, pnnList = self.getDatasetOriginSites(dbs_url,input_dataset)
                
                infoDict = {}
                # Build store results json
                # First add all the defaults values
                infoDict["RequestType"] = "StoreResults"
                infoDict["UnmergedLFNBase"] = "/store/unmerged" 
                infoDict["MergedLFNBase"] = "/store/results/" + self.GroupByValueDict[group_id].replace("-","_").lower()
                infoDict["MinMergeSize"] = 1500000000
                infoDict["MaxMergeSize"] = 5000000000
                infoDict["MaxMergeEvents"] = 100000
                infoDict["TimePerEvent"] = 40
                infoDict["SizePerEvent"] = 512.0
                infoDict["Memory"] = 2394
                infoDict["CmsPath"] = "/uscmst1/prod/sw/cms"                                        
                infoDict["Group"] = "DATAOPS"
                infoDict["DbsUrl"] = dbs_url
                
                # Add all the information pulled from Savannah
                infoDict["AcquisitionEra"] = acquisitionEra
                infoDict["GlobalTag"] = self.setGlobalTagFromOrigin(dbs_url,input_dataset)
                infoDict["DataTier"] = data_tier
                infoDict["InputDataset"] = input_dataset
                infoDict["ProcessingString"] = new_dataset
                infoDict["CMSSWVersion"] = release
                infoDict["ScramArch"] = scram_arch
                infoDict["ProcessingVersion"] = dataset_version                    
                infoDict["SiteWhitelist"] = psnList
                
                # Create report for Migration2Global
                report = {}
                 
                #Fill json file, if status is done
                if self.StatusByValueDict[status_id[0]]=='Done' and RequestStatusByValueDict[request_status_id[0]] != "Closed":
                    self.writeJSONFile(task, infoDict)
                    report["json"] = 'y'
                else:
                    report["json"] = 'n'
                    
                report["task"] = int(task)
                report["InputDataset"] = input_dataset
                report["ProcessingString"] = new_dataset
                report["ticketStatus"] = self.StatusByValueDict[status_id[0]]
                report["assignedTo"] = AssignedToByValueDict[assignedTo_id[0]]
                report["localUrl"] = dbs_url
                report["sites"] = psnList
                report["pnns"] = pnnList

                # if the request is closed, change the item status to report to Closed
                if report["ticketStatus"] == "Done" and RequestStatusByValueDict[request_status_id[0]] == "Closed":
                    report["ticketStatus"] = "Closed"

                requests.append(report)
                    
            # Print out report
            self.printReport(requests)
        # Close connections
        self.br.close()
        
        return requests

    def closeRequest(self,task,msg):
        """
        This close a specific savannag ticket
        Insert a message in the ticket
        """
        if self.isLoggedIn:
            #self.createValueDicts()
            
            response = self.br.open('https://savannah.cern.ch/task/?'+str(task))

            html = response.read()

            self.br.select_form(name="item_form")

            control = self.br.find_control("status_id",type="select")
            control.value = [self.TicketStatusByLabelDict["Closed"]]

            #Put reason to the comment field
            control = self.br.find_control("comment",type="textarea")
            control.value = msg
                        
            #DBS Drop Down is a mandatory field, if set to None (for old requests), it is not possible to close the request
            self.setDBSDropDown()
                        
            self.br.submit()

            #remove JSON ticket
            self.removeJSONFile(task)
            
            self.br.back()
        return

    def setDBSDropDown(self):
        ## Get DBS URL by Drop Down
        control = self.br.find_control("custom_sb4",type="select")
        dbs_url = self.DBSByValueDict[control.value[0]]

        ## Get DBS URL by text field (for old entries)
        if dbs_url=='None':
            tmp = self.br.find_control("custom_tf4",type="text")
            dbs_url = tmp.value.replace(' ','')

            if dbs_url.find("phys01")!=-1:
                control.value = [self.DBSByLabelDict["phys01"]]
            elif dbs_url.find("phys02")!=-1:
                control.value = [self.DBSByLabelDict["phys02"]]
            elif dbs_url.find("phys03")!=-1:
                control.value = [self.DBSByLabelDict["phys03"]]
            else:
                msg = 'DBS URL of the old request is neither phys01, phys02 nor phys03. Please, check!'
                print(msg)
                raise RuntimeError(msg)

        return

    def writeJSONFile(self, task, infoDict):
        """
        This writes a JSON file at ComponentDir
        """
        ##check if file already exists
        filename = self.config["ComponentDir"]+'/Ticket_'+str(task)+'.json'
        if not os.access(filename,os.F_OK):
            jsonfile = open(filename,'w')
            request = {'createRequest':infoDict} ## CHECK THIS BEFORE FINISHING
            jsonfile.write(json.dumps(request,sort_keys=True, indent=4))
            jsonfile.close

        return

    def removeJSONFile(self,task):
        """
        This removes the JSON file at ComponentDir if it was created
        """
        filename = self.config["ComponentDir"]+'/Ticket_'+str(task)+'.json'

        if os.access(filename,os.F_OK):
            os.remove(filename)

        return

    def printReport(self, requests):
        """
        Print out a report
        """
        print("%20s %10s %5s %35s %10s %50s %50s" %( 'Savannah Ticket','Status','json','Assigned to','local DBS','Sites','pnns')) 
        print("%20s %10s %5s %35s %10s %50s %50s" %( '-'*20,'-'*10,'-'*5,'-'*35,'-'*10,'-'*50,'-'*50 ))
        
        for report in requests:
            
            json = report["json"]
            ticket = report["task"]
            status = report["ticketStatus"]
            assigned = report["assignedTo"]
            localUrl = report["localUrl"].split('/')[5]
            site = ', '.join(report["sites"])
            pnns = ', '.join(report["pnns"])
            print("%20s %10s %5s %35s %10s %50s %50s" %(ticket,status,json,assigned,localUrl,site,pnns))

Exemple #27

0

Afficher le fichier

class PhEDExInjectorSubscriber(BaseWorkerThread):
    """
    _PhEDExInjectorSubscriber_

    Poll the DBSBuffer database and subscribe datasets as they are
    created.
    """
    def __init__(self, config):
        """
        ___init___

        Initialise class members
        """
        BaseWorkerThread.__init__(self)
        self.phedex = PhEDEx({"endpoint": config.PhEDExInjector.phedexurl}, "json")
        self.siteDB = SiteDBJSON()
        self.dbsUrl = config.DBSInterface.globalDBSUrl
        self.group = getattr(config.PhEDExInjector, "group", "DataOps")
        self.tier0Mode = getattr(config.PhEDExInjector, "tier0Mode", False)

        # We will map node names to CMS names, that what the spec will have.
        # If a CMS name is associated to many PhEDEx node then choose the MSS option
        self.cmsToPhedexMap = {}        
        self.phedexNodes = {'MSS':[], 'Disk':[]}

        # initialize the alert framework (if available - config.Alert present)
        #    self.sendAlert will be then be available
        self.initAlerts(compName = "PhEDExInjector")


    def setup(self, parameters):
        """
        _setup_

        Create a DAO Factory for the PhEDExInjector.  Also load the SE names to
        PhEDEx node name mappings from the data service.
        """
        myThread = threading.currentThread()
        daofactory = DAOFactory(package = "WMComponent.PhEDExInjector.Database",
                                logger = self.logger,
                                dbinterface = myThread.dbi)

        self.getUnsubscribed = daofactory(classname = "GetUnsubscribedDatasets")
        self.getUnsubscribedBlocks = daofactory(classname = "GetUnsubscribedBlocks")
        self.markSubscribed = daofactory(classname = "MarkDatasetSubscribed")

        nodeMappings = self.phedex.getNodeMap()
        for node in nodeMappings["phedex"]["node"]:

            cmsName = self.siteDB.phEDExNodetocmsName(node["name"])

            if cmsName not in self.cmsToPhedexMap:
                self.cmsToPhedexMap[cmsName] = {}

            logging.info("Loaded PhEDEx node %s for site %s" % (node["name"], cmsName))
            if node["kind"] not in self.cmsToPhedexMap[cmsName]:
                self.cmsToPhedexMap[cmsName][node["kind"]] = node["name"]

            if node["kind"] in [ "MSS", "Disk" ]:
                self.phedexNodes[node["kind"]].append(node["name"])
        return

    def algorithm(self, parameters):
        """
        _algorithm_

        Run the subscription algorithm as configured
        """
        if self.tier0Mode:
            self.subscribeTier0Blocks()
        self.subscribeDatasets()
        return

    def subscribeTier0Blocks(self):
        """
        _subscribeTier0Blocks_

        Subscribe blocks to the Tier-0 where a replica subscription
        already exists. All Tier-0 subscriptions are move, custodial
        and autoapproved with high priority.
        """
        myThread = threading.currentThread()
        myThread.transaction.begin()

        # Check for candidate blocks for subscription
        blocksToSubscribe = self.getUnsubscribedBlocks.execute(node = 'T0_CH_CERN',
                                                               conn = myThread.transaction.conn,
                                                               transaction = True)

        if not blocksToSubscribe:
            return

        # For the blocks we don't really care about the subscription options
        # We are subscribing all blocks with the same recipe.
        subscriptionMap = {}
        for subInfo in blocksToSubscribe:
            dataset = subInfo['path']
            if dataset not in subscriptionMap:
                subscriptionMap[dataset] = []
            subscriptionMap[dataset].append(subInfo['blockname'])

        site = 'T0_CH_CERN'
        custodial = 'y'
        request_only = 'n'
        move = 'y'
        priority = 'High'

        # Get the phedex node
        phedexNode = self.cmsToPhedexMap[site]["MSS"]

        logging.error("Subscribing %d blocks, from %d datasets to the Tier-0" % (len(subscriptionMap), sum([len(x) for x in subscriptionMap.values()])))

        newSubscription = PhEDExSubscription(subscriptionMap.keys(),
                                             phedexNode, self.group,
                                             custodial = custodial,
                                             request_only = request_only,
                                             move = move,
                                             priority = priority,
                                             level = 'block',
                                             blocks = subscriptionMap)

        # TODO: Check for blocks already subscribed

        try:
            xmlData = XMLDrop.makePhEDExXMLForBlocks(self.dbsUrl,
                                                     newSubscription.getDatasetsAndBlocks())
            logging.debug(str(xmlData))
            self.phedex.subscribe(newSubscription, xmlData)
        except Exception, ex:
            logging.error("Something went wrong when communicating with PhEDEx, will try again later.")
            logging.error("Exception: %s" % str(ex))

Exemple #28

0

Afficher le fichier

class PhEDExInjectorPoller(BaseWorkerThread):
    """
    _PhEDExInjectorPoller_

    Poll the DBSBuffer database and inject files as they are created.
    """
    def __init__(self, config):
        """
        ___init___

        Initialise class members
        """
        BaseWorkerThread.__init__(self)
        self.config = config
        self.phedex = PhEDEx({"endpoint": config.PhEDExInjector.phedexurl}, "json")
        self.dbsUrl = config.DBSInterface.globalDBSUrl
        self.group = getattr(config.PhEDExInjector, "group", "DataOps")

        # This will be used to map SE names which are stored in the DBSBuffer to
        # PhEDEx node names.  The first key will be the "kind" which consists
        # of one of the following: MSS, Disk, Buffer.  The next key will be the
        # SE name.
        self.seMap = {}
        self.nodeNames = []

        self.diskSites = getattr(config.PhEDExInjector, "diskSites", ["storm-fe-cms.cr.cnaf.infn.it",
                                                                      "srm-cms-disk.gridpp.rl.ac.uk"])

        # initialize the alert framework (if available - config.Alert present)
        #    self.sendAlert will be then be available
        self.initAlerts(compName = "PhEDExInjector")

        self.filesToRecover = None

    def setup(self, parameters):
        """
        _setup_

        Create a DAO Factory for the PhEDExInjector.  Also load the SE names to
        PhEDEx node name mappings from the data service.
        """
        myThread = threading.currentThread()
        daofactory = DAOFactory(package = "WMComponent.PhEDExInjector.Database",
                                logger = self.logger,
                                dbinterface = myThread.dbi)

        self.getUninjected = daofactory(classname = "GetUninjectedFiles")
        self.getMigrated = daofactory(classname = "GetMigratedBlocks")

        daofactory = DAOFactory(package = "WMComponent.DBS3Buffer",
                                logger = self.logger,
                                dbinterface = myThread.dbi)
        self.setStatus = daofactory(classname = "DBSBufferFiles.SetPhEDExStatus")
        self.setBlockClosed = daofactory(classname = "SetBlockClosed")

        nodeMappings = self.phedex.getNodeMap()
        for node in nodeMappings["phedex"]["node"]:
            if node["kind"] not in self.seMap:
                self.seMap[node["kind"]] = {}

            logging.info("Adding mapping %s -> %s" % (node["se"], node["name"]))
            self.seMap[node["kind"]][node["se"]] = node["name"]
            self.nodeNames.append(node["name"])

        return

    def createInjectionSpec(self, injectionData):
        """
        _createInjectionSpec_

        Transform the data structure returned from the database into an XML
        string for the PhEDEx Data Service.  The injectionData parameter must be
        a dictionary keyed by dataset path.  Each dataset path will map to a
        list of blocks, each block being a dict.  The block dicts will have
        three keys: name, is-open and files.  The files key will be a list of
        dicts, each of which have the following keys: lfn, size and checksum.
        The following is an example object:

        {"dataset1":
          {"block1": {"is-open": "y", "files":
            [{"lfn": "lfn1", "size": 10, "checksum": {"cksum": "1234"}},
             {"lfn": "lfn2", "size": 20, "checksum": {"cksum": "4321"}}]}}}
        """
        injectionSpec = XMLDrop.XMLInjectionSpec(self.dbsUrl)

        for datasetPath in injectionData:
            datasetSpec = injectionSpec.getDataset(datasetPath)

            for fileBlockName, fileBlock in injectionData[datasetPath].iteritems():
                blockSpec = datasetSpec.getFileblock(fileBlockName,
                                                     fileBlock["is-open"])

                for file in fileBlock["files"]:
                    blockSpec.addFile(file["lfn"], file["checksum"],
                                      file["size"])

        return injectionSpec.save()

    def createRecoveryFileFormat(self, unInjectedData):
        """
        _createRecoveryFileFormat_

        Transform the data structure returned from database in to the dict format
        for the PhEDEx Data Service.  The injectionData parameter must be
        a dictionary keyed by dataset path.  
        
        unInjectedData format
        {"dataset1":
          {"block1": {"is-open": "y", "files":
            [{"lfn": "lfn1", "size": 10, "checksum": {"cksum": "1234"}},
             {"lfn": "lfn2", "size": 20, "checksum": {"cksum": "4321"}}]}}}
        
        returns
        {"block1": set(["lfn1", "lfn2"])}
        """
        sortedBlocks = defaultdict(set)
        for datasetPath in unInjectedData:
            
            for fileBlockName, fileBlock in unInjectedData[datasetPath].iteritems():
                for fileDict in fileBlock["files"]:
                    sortedBlocks[fileBlockName].add(fileDict["lfn"])
                    
        return sortedBlocks
    
    def injectFiles(self):
        """
        _injectFiles_

        Inject any uninjected files in PhEDEx.
        """
        myThread = threading.currentThread()
        uninjectedFiles = self.getUninjected.execute()

        injectedFiles = []
        for siteName in uninjectedFiles.keys():
            # SE names can be stored in DBSBuffer as that is what is returned in
            # the framework job report.  We'll try to map the SE name to a
            # PhEDEx node name here.
            location = None

            if siteName in self.nodeNames:
                location = siteName
            else:
                if siteName in self.diskSites:
                    if "Disk" in self.seMap and \
                           siteName in self.seMap["Disk"]:
                        location = self.seMap["Disk"][siteName]
                    elif "Buffer" in self.seMap and \
                             siteName in self.seMap["Buffer"]:
                        location = self.seMap["Buffer"][siteName]
                    elif "MSS" in self.seMap and \
                             siteName in self.seMap["MSS"]:
                        location = self.seMap["MSS"][siteName]
                else:
                    if "Buffer" in self.seMap and \
                           siteName in self.seMap["Buffer"]:
                        location = self.seMap["Buffer"][siteName]
                    elif "MSS" in self.seMap and \
                             siteName in self.seMap["MSS"]:
                        location = self.seMap["MSS"][siteName]
                    elif "Disk" in self.seMap and \
                             siteName in self.seMap["Disk"]:
                        location = self.seMap["Disk"][siteName]

            if location == None:
                msg = "Could not map SE %s to PhEDEx node." % siteName
                logging.error(msg)
                self.sendAlert(7, msg = msg)
                continue

            myThread.transaction.begin()
            xmlData = self.createInjectionSpec(uninjectedFiles[siteName])
            try:
                injectRes = self.phedex.injectBlocks(location, xmlData)
            except HTTPException as ex:
                # If we get an HTTPException of certain types, raise it as an error
                if ex.status == 400:
                    # assume it is duplicate injection error. but if that is not the case
                    # needs to be investigated
                    self.filesToRecover = self.createRecoveryFileFormat(uninjectedFiles[siteName])
                
                msg = "PhEDEx injection failed with %s error: %s" % (ex.status, ex.result)
                raise PhEDExInjectorPassableError(msg)
            except Exception as ex:
                # If we get an error here, assume that it's temporary (it usually is)
                # log it, and ignore it in the algorithm() loop
                msg =  "Encountered error while attempting to inject blocks to PhEDEx.\n"
                msg += str(ex)
                logging.error(msg)
                logging.debug("Traceback: %s" % str(traceback.format_exc()))
                
                raise PhEDExInjectorPassableError(msg)
            logging.info("Injection result: %s" % injectRes)

            if "error" not in injectRes:
                for datasetName in uninjectedFiles[siteName]:
                    for blockName in uninjectedFiles[siteName][datasetName]:
                        for file in uninjectedFiles[siteName][datasetName][blockName]["files"]:
                            injectedFiles.append(file["lfn"])
            else:
                msg = ("Error injecting data %s: %s" %
                       (uninjectedFiles[siteName], injectRes["error"]))
                logging.error(msg)
                self.sendAlert(6, msg = msg)

            self.setStatus.execute(injectedFiles, 1,
                                   conn = myThread.transaction.conn,
                                   transaction = myThread.transaction)
            injectedFiles = []
            myThread.transaction.commit()

        return

    def closeBlocks(self):
        """
        _closeBlocks_

        Close any blocks that have been migrated to global DBS.
        """
        myThread = threading.currentThread()
        migratedBlocks = self.getMigrated.execute()

        for siteName in migratedBlocks.keys():
            # SE names can be stored in DBSBuffer as that is what is returned in
            # the framework job report.  We'll try to map the SE name to a
            # PhEDEx node name here.
            location = None

            if siteName in self.nodeNames:
                location = siteName
            else:
                if "Buffer" in self.seMap and \
                       siteName in self.seMap["Buffer"]:
                    location = self.seMap["Buffer"][siteName]
                elif "MSS" in self.seMap and \
                         siteName in self.seMap["MSS"]:
                    location = self.seMap["MSS"][siteName]
                elif "Disk" in self.seMap and \
                         siteName in self.seMap["Disk"]:
                    location = self.seMap["Disk"][siteName]

            if location == None:
                msg = "Could not map SE %s to PhEDEx node." % siteName
                logging.error(msg)
                self.sendAlert(6, msg = msg)
                continue

            myThread.transaction.begin()
            try:
                xmlData = self.createInjectionSpec(migratedBlocks[siteName])
                injectRes = self.phedex.injectBlocks(location, xmlData)
                logging.info("Block closing result: %s" % injectRes)
            except HTTPException as ex:
                # If we get an HTTPException of certain types, raise it as an error
                if ex.status == 400:
                    msg =  "Received 400 HTTP Error From PhEDEx: %s" % str(ex.result)
                    logging.error(msg)
                    self.sendAlert(6, msg = msg)
                    logging.debug("Blocks: %s" % migratedBlocks[siteName])
                    logging.debug("XMLData: %s" % xmlData)
                    raise
                else:
                    msg =  "Encountered error while attempting to close blocks in PhEDEx.\n"
                    msg += str(ex)
                    logging.error(msg)
                    logging.debug("Traceback: %s" % str(traceback.format_exc()))
                    raise PhEDExInjectorPassableError(msg)
            except Exception as ex:
                # If we get an error here, assume that it's temporary (it usually is)
                # log it, and ignore it in the algorithm() loop
                msg =  "Encountered error while attempting to close blocks in PhEDEx.\n"
                msg += str(ex)
                logging.error(msg)
                logging.debug("Traceback: %s" % str(traceback.format_exc()))
                raise PhEDExInjectorPassableError(msg)

            if "error" not in injectRes:
                for datasetName in migratedBlocks[siteName]:
                    for blockName in migratedBlocks[siteName][datasetName]:
                        logging.debug("Closing block %s" % blockName)
                        self.setBlockClosed.execute(blockName,
                                                    conn = myThread.transaction.conn,
                                                    transaction = myThread.transaction)
            else:
                msg = ("Error injecting data %s: %s" %
                       (migratedBlocks[siteName], injectRes["error"]))
                logging.error(msg)
                self.sendAlert(6, msg = msg)
            myThread.transaction.commit()
        return

    def recoverInjectedFiles(self):
        """
        When PhEDEx inject call timed out, run this function.
        Since there are 3 min reponse time out in cmsweb, some times 
        PhEDEx injection call times out even though the call succeeded
        In that case run the recovery mode
        1. first check whether files which injection status = 0 are in the PhEDEx.
        2. if those file exist set the in_phedex status to 1
        3. set self.filesToRecover = None
        """
        myThread = threading.currentThread()
        
        injectedFiles = self.phedex.getInjectedFiles(self.filesToRecover)
        
        myThread.transaction.begin()
        self.setStatus.execute(injectedFiles, 1)
        myThread.transaction.commit()
        # when files are recovered set the self.file 
        self.filesToRecover = None
        return injectedFiles
        
    def algorithm(self, parameters):
        """
        _algorithm_

        Poll the database for uninjected files and attempt to inject them into
        PhEDEx.
        """
        myThread = threading.currentThread()
        try:
            if self.filesToRecover != None:
                logging.info(""" Running PhEDExInjector Recovery: 
                                 previous injection call failed, 
                                 check if files were injected to PhEDEx anyway""")
                recoveredFiles = self.recoverInjectedFiles()
                logging.info("%s files already injected: changed status in dbsbuffer db" % len(recoveredFiles))
                        
            self.injectFiles()
            self.closeBlocks()
        except PhEDExInjectorPassableError as ex:
            logging.error("Encountered PassableError in PhEDExInjector")
            logging.error("Rolling back current transaction and terminating current loop, but not killing component.")
            if getattr(myThread, 'transaction', None):
                myThread.transaction.rollbackForError()
            pass
        except Exception:
            # Guess we should roll back if we actually have an exception
            if getattr(myThread, 'transaction', None):
                myThread.transaction.rollbackForError()
            raise

        return

Exemple #29

0

Afficher le fichier

Fichier : PhEDExInjectorPoller.py Projet : lucacopa/WMCore

class PhEDExInjectorPoller(BaseWorkerThread):
    """
    _PhEDExInjectorPoller_

    Poll the DBSBuffer database and inject files as they are created.
    """
    def __init__(self, config):
        """
        ___init___

        Initialise class members
        """
        BaseWorkerThread.__init__(self)
        self.config = config
        self.phedex = PhEDEx({"endpoint": config.PhEDExInjector.phedexurl}, "json")
        self.dbsUrl = config.DBSInterface.globalDBSUrl
        self.group = getattr(config.PhEDExInjector, "group", "DataOps")

        # This will be used to map SE names which are stored in the DBSBuffer to
        # PhEDEx node names.  The first key will be the "kind" which consists
        # of one of the following: MSS, Disk, Buffer.  The next key will be the
        # SE name.
        self.seMap = {}
        self.nodeNames = []

        self.diskSites = getattr(config.PhEDExInjector, "diskSites", ["storm-fe-cms.cr.cnaf.infn.it",
                                                                      "srm-cms-disk.gridpp.rl.ac.uk"])

        # initialize the alert framework (if available - config.Alert present)
        #    self.sendAlert will be then be available
        self.initAlerts(compName = "PhEDExInjector")

        self.blocksToRecover = None

    def setup(self, parameters):
        """
        _setup_

        Create a DAO Factory for the PhEDExInjector.  Also load the SE names to
        PhEDEx node name mappings from the data service.
        """
        myThread = threading.currentThread()
        daofactory = DAOFactory(package = "WMComponent.PhEDExInjector.Database",
                                logger = self.logger,
                                dbinterface = myThread.dbi)

        self.getUninjected = daofactory(classname = "GetUninjectedFiles")
        self.getMigrated = daofactory(classname = "GetMigratedBlocks")

        daofactory = DAOFactory(package = "WMComponent.DBS3Buffer",
                                logger = self.logger,
                                dbinterface = myThread.dbi)
        self.setStatus = daofactory(classname = "DBSBufferFiles.SetPhEDExStatus")
        self.setBlockClosed = daofactory(classname = "SetBlockClosed")

        nodeMappings = self.phedex.getNodeMap()
        for node in nodeMappings["phedex"]["node"]:
            if node["kind"] not in self.seMap:
                self.seMap[node["kind"]] = {}

            logging.info("Adding mapping %s -> %s" % (node["se"], node["name"]))
            self.seMap[node["kind"]][node["se"]] = node["name"]
            self.nodeNames.append(node["name"])

        return

    def createInjectionSpec(self, injectionData):
        """
        _createInjectionSpec_

        Transform the data structure returned from the database into an XML
        string for the PhEDEx Data Service.  The injectionData parameter must be
        a dictionary keyed by dataset path.  Each dataset path will map to a
        list of blocks, each block being a dict.  The block dicts will have
        three keys: name, is-open and files.  The files key will be a list of
        dicts, each of which have the following keys: lfn, size and checksum.
        The following is an example object:

        {"dataset1":
          {"block1": {"is-open": "y", "files":
            [{"lfn": "lfn1", "size": 10, "checksum": {"cksum": "1234"}},
             {"lfn": "lfn2", "size": 20, "checksum": {"cksum": "4321"}}]}}}
        """
        injectionSpec = XMLDrop.XMLInjectionSpec(self.dbsUrl)

        for datasetPath in injectionData:
            datasetSpec = injectionSpec.getDataset(datasetPath)

            for fileBlockName, fileBlock in injectionData[datasetPath].iteritems():
                blockSpec = datasetSpec.getFileblock(fileBlockName,
                                                     fileBlock["is-open"])

                for file in fileBlock["files"]:
                    blockSpec.addFile(file["lfn"], file["checksum"],
                                      file["size"])

        return injectionSpec.save()

    def createRecoveryFileFormat(self, unInjectedData):
        """
        _createRecoveryFileFormat_

        Transform the data structure returned from database in to the dict format
        for the PhEDEx Data Service.  The injectionData parameter must be
        a dictionary keyed by dataset path.  
        
        unInjectedData format
        {"dataset1":
          {"block1": {"is-open": "y", "files":
            [{"lfn": "lfn1", "size": 10, "checksum": {"cksum": "1234"}},
             {"lfn": "lfn2", "size": 20, "checksum": {"cksum": "4321"}}]}}}
        
        returns
        [{"block1": set(["lfn1", "lfn2"])}, {"block2": set(["lfn3", "lfn4"])]
        """
        blocks = []
        for datasetPath in unInjectedData:

            for blockName, fileBlock in unInjectedData[datasetPath].items():

                newBlock = { blockName : set() }

                for fileDict in fileBlock["files"]:
                    newBlock[blockName].add(fileDict["lfn"])

                blocks.append(newBlock)

        return blocks
    
    def injectFiles(self):
        """
        _injectFiles_

        Inject any uninjected files in PhEDEx.
        """
        myThread = threading.currentThread()
        uninjectedFiles = self.getUninjected.execute()

        injectedFiles = []
        for siteName in uninjectedFiles.keys():
            # SE names can be stored in DBSBuffer as that is what is returned in
            # the framework job report.  We'll try to map the SE name to a
            # PhEDEx node name here.
            location = None

            if siteName in self.nodeNames:
                location = siteName
            else:
                if siteName in self.diskSites:
                    if "Disk" in self.seMap and \
                           siteName in self.seMap["Disk"]:
                        location = self.seMap["Disk"][siteName]
                    elif "Buffer" in self.seMap and \
                             siteName in self.seMap["Buffer"]:
                        location = self.seMap["Buffer"][siteName]
                    elif "MSS" in self.seMap and \
                             siteName in self.seMap["MSS"]:
                        location = self.seMap["MSS"][siteName]
                else:
                    if "Buffer" in self.seMap and \
                           siteName in self.seMap["Buffer"]:
                        location = self.seMap["Buffer"][siteName]
                    elif "MSS" in self.seMap and \
                             siteName in self.seMap["MSS"]:
                        location = self.seMap["MSS"][siteName]
                    elif "Disk" in self.seMap and \
                             siteName in self.seMap["Disk"]:
                        location = self.seMap["Disk"][siteName]

            if location == None:
                msg = "Could not map SE %s to PhEDEx node." % siteName
                logging.error(msg)
                self.sendAlert(7, msg = msg)
                continue

            myThread.transaction.begin()
            xmlData = self.createInjectionSpec(uninjectedFiles[siteName])
            try:
                injectRes = self.phedex.injectBlocks(location, xmlData)
            except HTTPException as ex:
                # If we get an HTTPException of certain types, raise it as an error
                if ex.status == 400:
                    # assume it is duplicate injection error. but if that is not the case
                    # needs to be investigated
                    self.blocksToRecover = self.createRecoveryFileFormat(uninjectedFiles[siteName])
                
                msg = "PhEDEx injection failed with %s error: %s" % (ex.status, ex.result)
                raise PhEDExInjectorPassableError(msg)
            except Exception as ex:
                # If we get an error here, assume that it's temporary (it usually is)
                # log it, and ignore it in the algorithm() loop
                msg =  "Encountered error while attempting to inject blocks to PhEDEx.\n"
                msg += str(ex)
                logging.error(msg)
                logging.debug("Traceback: %s" % str(traceback.format_exc()))
                
                raise PhEDExInjectorPassableError(msg)
            logging.info("Injection result: %s" % injectRes)

            if "error" not in injectRes:
                for datasetName in uninjectedFiles[siteName]:
                    for blockName in uninjectedFiles[siteName][datasetName]:
                        for file in uninjectedFiles[siteName][datasetName][blockName]["files"]:
                            injectedFiles.append(file["lfn"])
            else:
                msg = ("Error injecting data %s: %s" %
                       (uninjectedFiles[siteName], injectRes["error"]))
                logging.error(msg)
                self.sendAlert(6, msg = msg)

            self.setStatus.execute(injectedFiles, 1,
                                   conn = myThread.transaction.conn,
                                   transaction = myThread.transaction)
            injectedFiles = []
            myThread.transaction.commit()

        return

    def closeBlocks(self):
        """
        _closeBlocks_

        Close any blocks that have been migrated to global DBS.
        """
        myThread = threading.currentThread()
        migratedBlocks = self.getMigrated.execute()

        for siteName in migratedBlocks.keys():
            # SE names can be stored in DBSBuffer as that is what is returned in
            # the framework job report.  We'll try to map the SE name to a
            # PhEDEx node name here.
            location = None

            if siteName in self.nodeNames:
                location = siteName
            else:
                if "Buffer" in self.seMap and \
                       siteName in self.seMap["Buffer"]:
                    location = self.seMap["Buffer"][siteName]
                elif "MSS" in self.seMap and \
                         siteName in self.seMap["MSS"]:
                    location = self.seMap["MSS"][siteName]
                elif "Disk" in self.seMap and \
                         siteName in self.seMap["Disk"]:
                    location = self.seMap["Disk"][siteName]

            if location == None:
                msg = "Could not map SE %s to PhEDEx node." % siteName
                logging.error(msg)
                self.sendAlert(6, msg = msg)
                continue

            myThread.transaction.begin()
            try:
                xmlData = self.createInjectionSpec(migratedBlocks[siteName])
                injectRes = self.phedex.injectBlocks(location, xmlData)
                logging.info("Block closing result: %s" % injectRes)
            except HTTPException as ex:
                # If we get an HTTPException of certain types, raise it as an error
                if ex.status == 400:
                    msg =  "Received 400 HTTP Error From PhEDEx: %s" % str(ex.result)
                    logging.error(msg)
                    self.sendAlert(6, msg = msg)
                    logging.debug("Blocks: %s" % migratedBlocks[siteName])
                    logging.debug("XMLData: %s" % xmlData)
                    raise
                else:
                    msg =  "Encountered error while attempting to close blocks in PhEDEx.\n"
                    msg += str(ex)
                    logging.error(msg)
                    logging.debug("Traceback: %s" % str(traceback.format_exc()))
                    raise PhEDExInjectorPassableError(msg)
            except Exception as ex:
                # If we get an error here, assume that it's temporary (it usually is)
                # log it, and ignore it in the algorithm() loop
                msg =  "Encountered error while attempting to close blocks in PhEDEx.\n"
                msg += str(ex)
                logging.error(msg)
                logging.debug("Traceback: %s" % str(traceback.format_exc()))
                raise PhEDExInjectorPassableError(msg)

            if "error" not in injectRes:
                for datasetName in migratedBlocks[siteName]:
                    for blockName in migratedBlocks[siteName][datasetName]:
                        logging.debug("Closing block %s" % blockName)
                        self.setBlockClosed.execute(blockName,
                                                    conn = myThread.transaction.conn,
                                                    transaction = myThread.transaction)
            else:
                msg = ("Error injecting data %s: %s" %
                       (migratedBlocks[siteName], injectRes["error"]))
                logging.error(msg)
                self.sendAlert(6, msg = msg)
            myThread.transaction.commit()
        return

    def recoverInjectedFiles(self):
        """
        When PhEDEx inject call timed out, run this function.
        Since there are 3 min reponse time out in cmsweb, some times 
        PhEDEx injection call times out even though the call succeeded
        In that case run the recovery mode
        1. first check whether files which injection status = 0 are in the PhEDEx.
        2. if those file exist set the in_phedex status to 1
        3. set self.blocksToRecover = None

        Run this recovery one block at a time, with too many blocks
        the call to the PhEDEx data service on cmsweb can time out
        """
        myThread = threading.currentThread()

        # recover one block at a time
        for block in self.blocksToRecover:

            injectedFiles = self.phedex.getInjectedFiles(block)

            if len(injectedFiles) > 0:

                myThread.transaction.begin()
                self.setStatus.execute(injectedFiles, 1)
                myThread.transaction.commit()
                logging.info("%s files already injected: changed status in dbsbuffer db" % len(injectedFiles))

        self.blocksToRecover = None
        return
        
    def algorithm(self, parameters):
        """
        _algorithm_

        Poll the database for uninjected files and attempt to inject them into
        PhEDEx.
        """
        myThread = threading.currentThread()
        try:
            if self.blocksToRecover != None:
                logging.info(""" Running PhEDExInjector Recovery: 
                                 previous injection call failed, 
                                 check if files were injected to PhEDEx anyway""")
                self.recoverInjectedFiles()
                        
            self.injectFiles()
            self.closeBlocks()
        except PhEDExInjectorPassableError as ex:
            logging.error("Encountered PassableError in PhEDExInjector")
            logging.error("Rolling back current transaction and terminating current loop, but not killing component.")
            if getattr(myThread, 'transaction', None):
                myThread.transaction.rollbackForError()
            pass
        except Exception:
            # Guess we should roll back if we actually have an exception
            if getattr(myThread, 'transaction', None):
                myThread.transaction.rollbackForError()
            raise

        return

Exemple #30

0

Afficher le fichier

Fichier : AccountantWorker.py Projet : samircury/WMCore

class AccountantWorker(WMConnectionBase):
    """
    Class that actually does the work of parsing FWJRs for the Accountant
    Run through ProcessPool
    """
    def __init__(self, config):
        """
        __init__

        Create all DAO objects that are used by this class.
        """
        WMConnectionBase.__init__(self, "WMCore.WMBS")
        myThread = threading.currentThread()
        self.dbsDaoFactory = DAOFactory(package="WMComponent.DBS3Buffer",
                                        logger=myThread.logger,
                                        dbinterface=myThread.dbi)

        self.getOutputMapAction = self.daofactory(
            classname="Jobs.GetOutputMap")
        self.bulkAddToFilesetAction = self.daofactory(
            classname="Fileset.BulkAddByLFN")
        self.bulkParentageAction = self.daofactory(
            classname="Files.AddBulkParentage")
        self.getJobTypeAction = self.daofactory(classname="Jobs.GetType")
        self.getParentInfoAction = self.daofactory(
            classname="Files.GetParentInfo")
        self.setParentageByJob = self.daofactory(
            classname="Files.SetParentageByJob")
        self.setParentageByMergeJob = self.daofactory(
            classname="Files.SetParentageByMergeJob")
        self.setFileRunLumi = self.daofactory(classname="Files.AddRunLumi")
        self.setFileLocation = self.daofactory(
            classname="Files.SetLocationByLFN")
        self.setFileAddChecksum = self.daofactory(
            classname="Files.AddChecksumByLFN")
        self.addFileAction = self.daofactory(classname="Files.Add")
        self.jobCompleteInput = self.daofactory(classname="Jobs.CompleteInput")
        self.setBulkOutcome = self.daofactory(classname="Jobs.SetOutcomeBulk")
        self.getWorkflowSpec = self.daofactory(
            classname="Workflow.GetSpecAndNameFromTask")
        self.getJobInfoByID = self.daofactory(classname="Jobs.LoadFromID")
        self.getFullJobInfo = self.daofactory(
            classname="Jobs.LoadForErrorHandler")

        self.dbsStatusAction = self.dbsDaoFactory(
            classname="DBSBufferFiles.SetStatus")
        self.dbsParentStatusAction = self.dbsDaoFactory(
            classname="DBSBufferFiles.GetParentStatus")
        self.dbsChildrenAction = self.dbsDaoFactory(
            classname="DBSBufferFiles.GetChildren")
        self.dbsCreateFiles = self.dbsDaoFactory(
            classname="DBSBufferFiles.Add")
        self.dbsSetLocation = self.dbsDaoFactory(
            classname="DBSBufferFiles.SetLocationByLFN")
        self.dbsInsertLocation = self.dbsDaoFactory(
            classname="DBSBufferFiles.AddLocation")
        self.dbsSetChecksum = self.dbsDaoFactory(
            classname="DBSBufferFiles.AddChecksumByLFN")
        self.dbsSetRunLumi = self.dbsDaoFactory(
            classname="DBSBufferFiles.AddRunLumi")
        self.dbsGetWorkflow = self.dbsDaoFactory(classname="ListWorkflow")

        self.dbsLFNHeritage = self.dbsDaoFactory(
            classname="DBSBufferFiles.BulkHeritageParent")

        self.stateChanger = ChangeState(config)

        # Decide whether or not to attach jobReport to returned value
        self.returnJobReport = getattr(config.JobAccountant,
                                       'returnReportFromWorker', False)

        # Store location for the specs for DBS
        self.specDir = getattr(config.JobAccountant, 'specDir', None)

        # ACDC service
        self.dataCollection = DataCollectionService(
            url=config.ACDC.couchurl, database=config.ACDC.database)

        jobDBurl = sanitizeURL(config.JobStateMachine.couchurl)['url']
        jobDBName = config.JobStateMachine.couchDBName
        jobCouchdb = CouchServer(jobDBurl)
        self.fwjrCouchDB = jobCouchdb.connectDatabase("%s/fwjrs" % jobDBName)
        self.localWMStats = WMStatsWriter(config.TaskArchiver.localWMStatsURL)

        # Hold data for later commital
        self.dbsFilesToCreate = []
        self.wmbsFilesToBuild = []
        self.wmbsMergeFilesToBuild = []
        self.fileLocation = None
        self.mergedOutputFiles = []
        self.listOfJobsToSave = []
        self.listOfJobsToFail = []
        self.filesetAssoc = []
        self.parentageBinds = []
        self.parentageBindsForMerge = []
        self.jobsWithSkippedFiles = {}
        self.count = 0
        self.datasetAlgoID = collections.deque(maxlen=1000)
        self.datasetAlgoPaths = collections.deque(maxlen=1000)
        self.dbsLocations = set()
        self.workflowIDs = collections.deque(maxlen=1000)
        self.workflowPaths = collections.deque(maxlen=1000)

        self.phedex = PhEDEx()
        self.locLists = self.phedex.getNodeMap()

        return

    def reset(self):
        """
        _reset_

        Reset all global vars between runs.
        """
        self.dbsFilesToCreate = []
        self.wmbsFilesToBuild = []
        self.wmbsMergeFilesToBuild = []
        self.fileLocation = None
        self.mergedOutputFiles = []
        self.listOfJobsToSave = []
        self.listOfJobsToFail = []
        self.filesetAssoc = []
        self.parentageBinds = []
        self.parentageBindsForMerge = []
        self.jobsWithSkippedFiles = {}
        gc.collect()
        return

    def loadJobReport(self, parameters):
        """
        _loadJobReport_

        Given a framework job report on disk, load it and return a
        FwkJobReport instance.  If there is any problem loading or parsing the
        framework job report return None.
        """
        # The jobReportPath may be prefixed with "file://" which needs to be
        # removed so it doesn't confuse the FwkJobReport() parser.
        jobReportPath = parameters.get("fwjr_path", None)
        if not jobReportPath:
            logging.error("Bad FwkJobReport Path: %s" % jobReportPath)
            return self.createMissingFWKJR(parameters, 99999,
                                           "FWJR path is empty")

        jobReportPath = jobReportPath.replace("file://", "")
        if not os.path.exists(jobReportPath):
            logging.error("Bad FwkJobReport Path: %s" % jobReportPath)
            return self.createMissingFWKJR(
                parameters, 99999,
                'Cannot find file in jobReport path: %s' % jobReportPath)

        if os.path.getsize(jobReportPath) == 0:
            logging.error("Empty FwkJobReport: %s" % jobReportPath)
            return self.createMissingFWKJR(
                parameters, 99998, 'jobReport of size 0: %s ' % jobReportPath)

        jobReport = Report()

        try:
            jobReport.load(jobReportPath)
        except Exception, ex:
            msg = "Error loading jobReport %s\n" % jobReportPath
            msg += str(ex)
            logging.error(msg)
            logging.debug("Failing job: %s\n" % parameters)
            return self.createMissingFWKJR(parameters, 99997,
                                           'Cannot load jobReport')

        if len(jobReport.listSteps()) == 0:
            logging.error("FwkJobReport with no steps: %s" % jobReportPath)
            return self.createMissingFWKJR(
                parameters, 99997,
                'jobReport with no steps: %s ' % jobReportPath)

        return jobReport

Exemple #31

0

Afficher le fichier

    def testNormalModeSubscriptions(self):
        """
        _testNormalModeSubscriptions_

        Tests that we can make custodial/non-custodial subscriptions on
        normal operation mode, this time we don't need WMBS for anything.
        All is subscribed in one go.

        Check that the requests are correct.
        """

        self.stuffDatabase()
        config = self.createConfig()

        phedex = PhEDEx({"endpoint": config.PhEDExInjector.phedexurl}, "json")
        try:
            nodeMappings = phedex.getNodeMap()
        except Exception:
            time.sleep(2)
            try:
                nodeMappings = phedex.getNodeMap()
            except Exception:
                time.sleep(4)
                nodeMappings = phedex.getNodeMap()

        subscriber = PhEDExInjectorSubscriber(config, phedex, nodeMappings)
        subscriber.setup({})
        subscriber.algorithm({})

        phedexInstance = subscriber.phedex
        subscriptions = phedexInstance.subRequests

        # Let's check /BogusPrimary/Run2012Z-PromptReco-v1/RECO
        # According to the spec, this should be custodial at T1_US_FNAL
        # Non-custodial at T1_UK_RAL and T3_CO_Uniandes
        # Autoapproved in all sites
        # Priority is normal
        self.assertTrue(self.testDatasetA in subscriptions,
                        "Dataset A was not subscribed")
        subInfoA = subscriptions[self.testDatasetA]
        self.assertEqual(len(subInfoA), 3,
                         "Dataset A was not subscribed to all sites")
        for subInfo in subInfoA:
            site = subInfo["node"]
            self.assertEqual(subInfo["priority"], "normal",
                             "Wrong priority for subscription")
            if site == "T1_UK_RAL_MSS" or site == "T3_CO_Uniandes":
                self.assertEqual(
                    subInfo["custodial"], "n",
                    "Wrong custodiality for dataset A at %s" % subInfo["node"])
                self.assertEqual(
                    subInfo["request_only"], "n",
                    "Wrong requestOnly for dataset A at %s" % subInfo["node"])
                self.assertEqual(
                    subInfo["move"], "n",
                    "Wrong subscription type for dataset A at %s" %
                    subInfo["node"])
            elif site == "T1_US_FNAL_MSS":
                self.assertEqual(
                    subInfo["custodial"], "y",
                    "Wrong custodiality for dataset A at %s" % subInfo["node"])
                self.assertEqual(
                    subInfo["request_only"], "n",
                    "Wrong requestOnly for dataset A at %s" % subInfo["node"])
                self.assertEqual(
                    subInfo["move"], "y",
                    "Wrong subscription type for dataset A at %s" %
                    subInfo["node"])
            else:
                self.fail("Dataset A was subscribed  to a wrong site %s" %
                          site)

        # Now check /BogusPrimary/CRUZET11-v1/RAW
        # According to the spec, this is not custodial anywhere
        # Non-custodial at T1_UK_RAL and T2_CH_CERN
        # Request only at both sites and with high priority
        self.assertTrue(self.testDatasetB in subscriptions,
                        "Dataset B was not subscribed")
        subInfoB = subscriptions[self.testDatasetB]
        self.assertEqual(len(subInfoB), 2,
                         "Dataset B was not subscribed to all sites")
        for subInfo in subInfoB:
            site = subInfo["node"]
            self.assertEqual(subInfo["priority"], "high",
                             "Wrong priority for subscription")
            if site == "T1_UK_RAL_MSS" or site == "T2_CH_CERN":
                self.assertEqual(
                    subInfo["custodial"], "n",
                    "Wrong custodiality for dataset B at %s" % subInfo["node"])
                self.assertEqual(
                    subInfo["request_only"], "y",
                    "Wrong requestOnly for dataset B at %s" % subInfo["node"])
                self.assertEqual(
                    subInfo["move"], "n",
                    "Wrong subscription type for dataset B at %s" %
                    subInfo["node"])
            else:
                self.fail("Dataset B was subscribed to a wrong site %s" % site)

        myThread = threading.currentThread()
        result = myThread.dbi.processData(
            "SELECT COUNT(*) FROM dbsbuffer_dataset_subscription where subscribed = 1"
        )[0].fetchall()
        self.assertEqual(result[0][0], 5,
                         "Not all datasets were marked as subscribed")
        result = myThread.dbi.processData(
            "SELECT site FROM dbsbuffer_dataset_subscription where subscribed = 0"
        )[0].fetchall()
        self.assertEqual(result[0][0], "T1_IT_CNAF",
                         "A non-valid CMS site was subscribed")

        # Reset and run again and make sure that no duplicate subscriptions are created
        myThread.dbi.processData(
            "UPDATE dbsbuffer_dataset_subscription SET subscribed = 0")
        subscriber.algorithm({})
        self.assertEqual(len(subscriptions[self.testDatasetA]), 3)
        self.assertEqual(len(subscriptions[self.testDatasetB]), 2)

        return

Exemple #32

0

Afficher le fichier

Fichier : Getter.py Projet : dciangot/asoStandalone

class Getter(object):
    """
    Get transfers to be submitted
    """
    def __init__(self, config, quiet, debug, test=False):
        """
        initialize log, connections etc
        """
        self.config = config.Getter

        self.TEST = False
        createLogdir('Monitor')

        def setRootLogger(quiet, debug):
            """
            Taken from CRABServer TaskWorker
            Sets the root logger with the desired verbosity level
               The root logger logs to logs/asolog.txt and every single
               logging instruction is propagated to it (not really nice
               to read)

            :arg bool quiet: it tells if a quiet logger is needed
            :arg bool debug: it tells if needs a verbose logger
            :return logger: a logger with the appropriate logger level."""

            createLogdir('logs')
            createLogdir('logs/processes')

            if self.TEST:
                # if we are testing log to the console is easier
                logging.getLogger().addHandler(logging.StreamHandler())
            else:
                logHandler = MultiProcessingLog('logs/submitter.txt',
                                                when='midnight')
                logFormatter = \
                    logging.Formatter("%(asctime)s:%(levelname)s:%(module)s:%(message)s")
                logHandler.setFormatter(logFormatter)
                logging.getLogger().addHandler(logHandler)
            loglevel = logging.INFO
            if quiet:
                loglevel = logging.WARNING
            if debug:
                loglevel = logging.DEBUG
            logging.getLogger().setLevel(loglevel)
            logger = setProcessLogger("master")
            logger.debug("PID %s.", os.getpid())
            logger.debug("Logging level initialized to %s.", loglevel)
            return logger

        try:
            self.phedex = PhEDEx(responseType='xml',
                                 dict={
                                     'key': self.config.opsProxy,
                                     'cert': self.config.opsProxy
                                 })
        except Exception as e:
            self.logger.exception('PhEDEx exception: %s' % e)

        self.documents = dict()
        self.doc_acq = ''
        self.STOP = False
        self.logger = setRootLogger(quiet, debug)
        self.q = Queue()
        self.active_lfns = list()
        self.Update = update(self.logger, self.config)
        self.site_tfc_map = {}
        for site in [
                x['name']
                for x in json.loads(self.phedex.getNodeMap())['phedex']['node']
        ]:
            if site and str(site) != 'None' and str(site) != 'unknown':
                self.site_tfc_map[site] = self.get_tfc_rules(site)
                self.logger.debug('tfc site: %s %s' %
                                  (site, self.get_tfc_rules(site)))

    def algorithm(self):
        """
        - Get Users
        - Get Source dest
        - create queue for each (user, link)
        - feed threads
        """

        workers = list()
        for i in range(self.config.max_threads_num):
            worker = Thread(target=self.worker, args=(i, self.q))
            worker.setDaemon(True)
            worker.start()
            workers.append(worker)

        site_tfc_map = dict()
        while not self.STOP:
            sites, users = self.oracleSiteUser(self.Update)

            self.Update.retry()

            for _user in users:
                for source in sites:
                    for dest in sites:
                        lfns = [[x['source_lfn'], x['destination_lfn']]
                                for x in self.documents
                                if x['source'] == source and x['destination']
                                == dest and x['username'] == _user[0]
                                and x not in self.active_lfns]
                        self.active_lfns = self.active_lfns + lfns
                        # IMPORTANT: remove only on final states

                        for files in chunks(lfns, self.config.files_per_job):
                            self.q.put((files, _user, source, dest,
                                        self.site_tfc_map))

            self.logger.debug('Queue lenght: %s' % self.q.qsize())
            time.sleep(4)

        for w in workers:
            w.join()

        self.logger.info('Submitter stopped.')

    def oracleSiteUser(self, Update):
        """
        1. Acquire transfers from DB
        2. Get acquired users and destination sites
        """

        # TODO: flexible with other DBs and get users list

        users = Update.acquire()

        if users != 1:
            self.documents = Update.getAcquired(users)

        for doc in self.documents:
            if doc['user_role'] is None:
                doc['user_role'] = ""
            if doc['user_group'] is None:
                doc['user_group'] = ""

        unique_users = list()
        try:
            unique_users = [
                list(i) for i in set(
                    tuple([x['username'], x['user_group'], x['user_role']])
                    for x in self.documents)
            ]
        except Exception as ex:
            self.logger.error("Failed to map active users: %s" % ex)

        if len(unique_users) <= self.config.pool_size:
            active_users = unique_users
        else:
            active_users = unique_users[:self.config.pool_size]

        self.logger.info('%s active users' % len(active_users))
        self.logger.debug('Active users are: %s' % active_users)

        active_sites_dest = [x['destination'] for x in self.documents]
        active_sites = active_sites_dest + [
            x['source'] for x in self.documents
        ]

        self.logger.debug('Active sites are: %s' % list(set(active_sites)))
        return list(set(active_sites)), active_users

    def get_tfc_rules(self, site):
        """
        Get the TFC regexp for a given site.
        """
        tfc_file = None
        try:
            self.phedex.getNodeTFC(site)
        except Exception as e:
            self.logger.exception('PhEDEx exception: %s' % e)
        try:
            tfc_file = self.phedex.cacheFileName('tfc',
                                                 inputdata={'node': site})
        except Exception as e:
            self.logger.exception('PhEDEx cache exception: %s' % e)
        return readTFC(tfc_file)

    def critical_failure(self, lfns, lock, inputs):
        """
        if an exception occurs before the end, remove lfns from active
        to let it be reprocessed later.

        :param lfns:
        :param lock:
        :param inputs:
        :return:
        """
        lock.acquire()
        for lfn in lfns:
            self.active_lfns.remove(lfn)
        lock.release()
        inputs.task_done()

    def worker(self, i, inputs):
        """
        - Retrieve userDN
        - Retrieve user proxy
        - Delegate proxy to fts is needed
        - submit fts job
        - update doc states

        :param i: thread number
        :param inputs: tuple (lfns, _user, source, dest, tfc_map)
        :return:
        """
        # TODO: differentiate log messages per USER!
        logger = self.logger
        logger.info("Process %s is starting. PID %s", i, os.getpid())
        lock = Lock()
        Update = update(logger, self.config)

        while not self.STOP:
            if inputs.empty():
                time.sleep(10)
                continue
            try:
                lfns, _user, source, dest, tfc_map = inputs.get()
                [user, group, role] = _user
            except (EOFError, IOError):
                crashMessage = "Hit EOF/IO in getting new work\n"
                crashMessage += "Assuming this is a graceful break attempt.\n"
                logger.error(crashMessage)
                continue

            start = time.time()

            if not self.config.TEST:
                try:
                    userDN = getDNFromUserName(user,
                                               logger,
                                               ckey=self.config.opsProxy,
                                               cert=self.config.opsProxy)
                except Exception as ex:
                    logger.exception('Cannot retrieve user DN')
                    self.critical_failure(lfns, lock, inputs)
                    continue

                defaultDelegation = {
                    'logger':
                    logger,
                    'credServerPath':
                    self.config.credentialDir,
                    'myProxySvr':
                    'myproxy.cern.ch',
                    'min_time_left':
                    getattr(self.config, 'minTimeLeft', 36000),
                    'serverDN':
                    self.config.serverDN,
                    'uisource':
                    '',
                    'cleanEnvironment':
                    getattr(self.config, 'cleanEnvironment', False)
                }

                cache_area = self.config.cache_area

                try:
                    defaultDelegation['myproxyAccount'] = re.compile(
                        'https?://([^/]*)/.*').findall(cache_area)[0]
                except IndexError:
                    logger.error(
                        'MyproxyAccount parameter cannot be retrieved from %s . '
                        % self.config.cache_area)
                    self.critical_failure(lfns, lock, inputs)
                    continue

                if getattr(self.config, 'serviceCert', None):
                    defaultDelegation['server_cert'] = self.config.serviceCert
                if getattr(self.config, 'serviceKey', None):
                    defaultDelegation['server_key'] = self.config.serviceKey

                try:
                    defaultDelegation['userDN'] = userDN
                    defaultDelegation['group'] = group
                    defaultDelegation['role'] = role
                    logger.debug('delegation: %s' % defaultDelegation)
                    valid_proxy, user_proxy = getProxy(defaultDelegation,
                                                       logger)
                    if not valid_proxy:
                        logger.error(
                            'Failed to retrieve user proxy... putting docs on retry'
                        )
                        logger.error(
                            'docs on retry: %s' %
                            Update.failed(lfns, submission_error=True))
                        continue
                except Exception:
                    logger.exception('Error retrieving proxy')
                    self.critical_failure(lfns, lock, inputs)
                    continue
            else:
                user_proxy = self.config.opsProxy
                self.logger.debug("Using opsProxy for testmode")

            context = dict()
            try:
                if self.config.TEST:
                    logger.debug("Running in test mode, submitting fake jobs")
                else:
                    context = fts3.Context(self.config.serverFTS,
                                           user_proxy,
                                           user_proxy,
                                           verify=True)
                    logger.debug(
                        fts3.delegate(context,
                                      lifetime=timedelta(hours=48),
                                      force=False))
            except Exception:
                logger.exception("Error submitting to FTS")
                self.critical_failure(lfns, lock, inputs)
                continue

            failed_lfn = list()
            try:
                if self.config.TEST:
                    submitted_lfn = lfns
                    jobid = getHashLfn(lfns[0][0])
                    self.logger.debug('Fake job id: ' + jobid)
                else:
                    failed_lfn, submitted_lfn, jobid = Submission(
                        lfns, source, dest, i, self.logger, fts3, context,
                        tfc_map)
                    if jobid == -1:
                        self.critical_failure(lfns, lock, inputs)
                        continue
                    logger.info('Submitted %s files' % len(submitted_lfn))
            except Exception:
                logger.exception("Unexpected error during FTS job submission!")
                self.critical_failure(lfns, lock, inputs)
                continue

            # TODO: add file FTS id and job id columns for kill command
            try:
                Update.submitted(lfns)
            except Exception:
                logger.exception("Error updating document status")
                self.critical_failure(lfns, lock, inputs)
                continue

            try:
                Update.failed(failed_lfn)
            except Exception:
                logger.exception(
                    "Error updating document status, job submission will be retried later..."
                )
                self.critical_failure(lfns, lock, inputs)
                continue

            try:
                createLogdir('Monitor/' + user)
                with open('Monitor/' + user + '/' + str(jobid) + '.txt',
                          'w') as outfile:
                    json.dump(lfns, outfile)
                logger.info('Monitor files created')
            except Exception:
                logger.exception("Error creating file for monitor")
                self.critical_failure(lfns, lock, inputs)
                continue

            end = time.time()
            self.logger.info('Input processed in %s', str(end - start))
            time.sleep(0.5)

        logger.debug("Worker %s exiting.", i)
        return 0

    def quit_(self):
        """
        set STOP to True
        :return:
        """
        self.logger.info(
            "Received kill request. Setting STOP flag in the master and threads..."
        )
        self.STOP = True

Exemple #33

0

Afficher le fichier

Fichier : PhEDExInjectorPoller.py Projet : AndrewLevin/WMCore

class PhEDExInjectorPoller(BaseWorkerThread):
    """
    _PhEDExInjectorPoller_

    Poll the DBSBuffer database and inject files as they are created.
    """
    def __init__(self, config):
        """
        ___init___

        Initialise class members
        """
        BaseWorkerThread.__init__(self)
        self.config = config
        self.phedex = PhEDEx({"endpoint": config.PhEDExInjector.phedexurl}, "json")
        self.dbsUrl = config.DBSInterface.globalDBSUrl
        self.group = getattr(config.PhEDExInjector, "group", "DataOps")

        # This will be used to map SE names which are stored in the DBSBuffer to
        # PhEDEx node names.  The first key will be the "kind" which consists
        # of one of the following: MSS, Disk, Buffer.  The next key will be the
        # SE name.
        self.seMap = {}
        self.nodeNames = []

        self.diskSites = getattr(config.PhEDExInjector, "diskSites", ["storm-fe-cms.cr.cnaf.infn.it",
                                                                      "srm-cms-disk.gridpp.rl.ac.uk"])

        # initialize the alert framework (if available - config.Alert present)
        #    self.sendAlert will be then be available
        self.initAlerts(compName = "PhEDExInjector")

        self.filesToRecover = None

    def setup(self, parameters):
        """
        _setup_

        Create a DAO Factory for the PhEDExInjector.  Also load the SE names to
        PhEDEx node name mappings from the data service.
        """
        myThread = threading.currentThread()
        daofactory = DAOFactory(package = "WMComponent.PhEDExInjector.Database",
                                logger = self.logger,
                                dbinterface = myThread.dbi)

        self.getUninjected = daofactory(classname = "GetUninjectedFiles")
        self.getMigrated = daofactory(classname = "GetMigratedBlocks")

        daofactory = DAOFactory(package = "WMComponent.DBS3Buffer",
                                logger = self.logger,
                                dbinterface = myThread.dbi)
        self.setStatus = daofactory(classname = "DBSBufferFiles.SetPhEDExStatus")

        daofactory = DAOFactory(package = "WMComponent.DBSUpload.Database",
                                logger = self.logger,
                                dbinterface = myThread.dbi)
        self.setBlockStatus = daofactory(classname = "SetBlockStatus")

        nodeMappings = self.phedex.getNodeMap()
        for node in nodeMappings["phedex"]["node"]:
            if not self.seMap.has_key(node["kind"]):
                self.seMap[node["kind"]] = {}

            logging.info("Adding mapping %s -> %s" % (node["se"], node["name"]))
            self.seMap[node["kind"]][node["se"]] = node["name"]
            self.nodeNames.append(node["name"])

        return

    def createInjectionSpec(self, injectionData):
        """
        _createInjectionSpec_

        Transform the data structure returned from the database into an XML
        string for the PhEDEx Data Service.  The injectionData parameter must be
        a dictionary keyed by dataset path.  Each dataset path will map to a
        list of blocks, each block being a dict.  The block dicts will have
        three keys: name, is-open and files.  The files key will be a list of
        dicts, each of which have the following keys: lfn, size and checksum.
        The following is an example object:

        {"dataset1":
          {"block1": {"is-open": "y", "files":
            [{"lfn": "lfn1", "size": 10, "checksum": {"cksum": "1234"}},
             {"lfn": "lfn2", "size": 20, "checksum": {"cksum": "4321"}}]}}}
        """
        injectionSpec = XMLDrop.XMLInjectionSpec(self.dbsUrl)

        for datasetPath in injectionData:
            datasetSpec = injectionSpec.getDataset(datasetPath)

            for fileBlockName, fileBlock in injectionData[datasetPath].iteritems():
                blockSpec = datasetSpec.getFileblock(fileBlockName,
                                                     fileBlock["is-open"])

                for file in fileBlock["files"]:
                    blockSpec.addFile(file["lfn"], file["checksum"],
                                      file["size"])

        return injectionSpec.save()

    def createRecoveryFileFormat(self, unInjectedData):
        """
        _createRecoveryFileFormat_

        Transform the data structure returned from database in to the dict format
        for the PhEDEx Data Service.  The injectionData parameter must be
        a dictionary keyed by dataset path.  
        
        unInjectedData format
        {"dataset1":
          {"block1": {"is-open": "y", "files":
            [{"lfn": "lfn1", "size": 10, "checksum": {"cksum": "1234"}},
             {"lfn": "lfn2", "size": 20, "checksum": {"cksum": "4321"}}]}}}
        
        returns
        {"block1": set(["lfn1", "lfn2"])}
        """
        sortedBlocks = defaultdict(set)
        for datasetPath in unInjectedData:
            
            for fileBlockName, fileBlock in unInjectedData[datasetPath].iteritems():
                for fileDict in fileBlock["files"]:
                    sortedBlocks[fileBlockName].add(fileDict["lfn"])
                    
        return sortedBlocks
    
    def injectFiles(self):
        """
        _injectFiles_

        Inject any uninjected files in PhEDEx.
        """
        myThread = threading.currentThread()
        uninjectedFiles = self.getUninjected.execute()

        injectedFiles = []
        for siteName in uninjectedFiles.keys():
            # SE names can be stored in DBSBuffer as that is what is returned in
            # the framework job report.  We'll try to map the SE name to a
            # PhEDEx node name here.
            location = None

            if siteName in self.nodeNames:
                location = siteName
            else:
                if siteName in self.diskSites:
                    if self.seMap.has_key("Disk") and \
                           self.seMap["Disk"].has_key(siteName):
                        location = self.seMap["Disk"][siteName]
                    elif self.seMap.has_key("Buffer") and \
                             self.seMap["Buffer"].has_key(siteName):
                        location = self.seMap["Buffer"][siteName]
                    elif self.seMap.has_key("MSS") and \
                             self.seMap["MSS"].has_key(siteName):
                        location = self.seMap["MSS"][siteName]
                else:
                    if self.seMap.has_key("Buffer") and \
                           self.seMap["Buffer"].has_key(siteName):
                        location = self.seMap["Buffer"][siteName]
                    elif self.seMap.has_key("MSS") and \
                             self.seMap["MSS"].has_key(siteName):
                        location = self.seMap["MSS"][siteName]
                    elif self.seMap.has_key("Disk") and \
                             self.seMap["Disk"].has_key(siteName):
                        location = self.seMap["Disk"][siteName]

            if location == None:
                msg = "Could not map SE %s to PhEDEx node." % siteName
                logging.error(msg)
                self.sendAlert(7, msg = msg)
                continue

            myThread.transaction.begin()
            xmlData = self.createInjectionSpec(uninjectedFiles[siteName])
            try:
                injectRes = self.phedex.injectBlocks(location, xmlData)
            except HTTPException, ex:
                # If we get an HTTPException of certain types, raise it as an error
                if ex.status == 400:
                    # assume it is duplicate injection error. but if that is not the case
                    # needs to be investigated
                    self.filesToRecover = self.createRecoveryFileFormat(uninjectedFiles[siteName])
                
                msg = "PhEDEx injection failed with %s error: %s" % (ex.status, ex.result)
                raise PhEDExInjectorPassableError(msg)
            except Exception, ex:
                # If we get an error here, assume that it's temporary (it usually is)
                # log it, and ignore it in the algorithm() loop
                msg =  "Encountered error while attempting to inject blocks to PhEDEx.\n"
                msg += str(ex)
                logging.error(msg)
                logging.debug("Traceback: %s" % str(traceback.format_exc()))
                
                raise PhEDExInjectorPassableError(msg)
            logging.info("Injection result: %s" % injectRes)

            if not injectRes.has_key("error"):
                for datasetName in uninjectedFiles[siteName]:
                    for blockName in uninjectedFiles[siteName][datasetName]:
                        for file in uninjectedFiles[siteName][datasetName][blockName]["files"]:
                            injectedFiles.append(file["lfn"])
            else:
                msg = ("Error injecting data %s: %s" %
                       (uninjectedFiles[siteName], injectRes["error"]))
                logging.error(msg)
                self.sendAlert(6, msg = msg)

            self.setStatus.execute(injectedFiles, 1,
                                   conn = myThread.transaction.conn,
                                   transaction = myThread.transaction)
            injectedFiles = []
            myThread.transaction.commit()

Exemple #34

0

Afficher le fichier

Fichier : PhEDExInjectorPoller.py Projet : AndresTanasijczuk/WMCore

class PhEDExInjectorPoller(BaseWorkerThread):
    """
    _PhEDExInjectorPoller_

    Poll the DBSBuffer database and inject files as they are created.
    """
    def __init__(self, config):
        """
        ___init___

        Initialise class members
        """
        BaseWorkerThread.__init__(self)
        self.dbsUrl = config.DBSInterface.globalDBSUrl

        self.pollCounter = 0
        self.subFrequency = None
        if getattr(config.PhEDExInjector, "subscribeDatasets", False):
            pollInterval = config.PhEDExInjector.pollInterval
            subInterval = config.PhEDExInjector.subscribeInterval
            self.subFrequency = max(1, int(round(subInterval/pollInterval)))
            logging.info("SubscribeDataset and deleteBlocks will run every %d polling cycles", self.subFrequency)
            # subscribe on first cycle
            self.pollCounter = self.subFrequency - 1

        # retrieving the node mappings is fickle and can fail quite often
        self.phedex = PhEDEx({"endpoint": config.PhEDExInjector.phedexurl}, "json")
        try:
            nodeMappings = self.phedex.getNodeMap()
        except:
            time.sleep(2)
            try:
                nodeMappings = self.phedex.getNodeMap()
            except:
                time.sleep(4)
                nodeMappings = self.phedex.getNodeMap()

        # This will be used to map SE names which are stored in the DBSBuffer to
        # PhEDEx node names.  The first key will be the "kind" which consists
        # of one of the following: MSS, Disk, Buffer.  The next key will be the
        # SE name.
        self.seMap = {}
        self.nodeNames = []
        for node in nodeMappings["phedex"]["node"]:
            if node["kind"] not in self.seMap:
                self.seMap[node["kind"]] = {}
            logging.info("Adding mapping %s -> %s", node["se"], node["name"])
            self.seMap[node["kind"]][node["se"]] = node["name"]
            self.nodeNames.append(node["name"])

        self.phedexNodes = {'MSS': [], 'Disk': []}
        for node in nodeMappings["phedex"]["node"]:
            if node["kind"] in ["MSS", "Disk"]:
                self.phedexNodes[node["kind"]].append(node["name"])

        # initialize the alert framework (if available - config.Alert present)
        #    self.sendAlert will be then be available
        self.initAlerts(compName = "PhEDExInjector")

        self.blocksToRecover = []

        return

    def setup(self, parameters):
        """
        _setup_

        Create DAO Factory and setup some DAO.
        """
        myThread = threading.currentThread()
        daofactory = DAOFactory(package = "WMComponent.PhEDExInjector.Database",
                                logger = self.logger,
                                dbinterface = myThread.dbi)

        self.getUninjected = daofactory(classname = "GetUninjectedFiles")
        self.getMigrated = daofactory(classname = "GetMigratedBlocks")

        self.findDeletableBlocks = daofactory(classname = "GetDeletableBlocks")
        self.markBlocksDeleted = daofactory(classname = "MarkBlocksDeleted")
        self.getUnsubscribed = daofactory(classname = "GetUnsubscribedDatasets")
        self.markSubscribed = daofactory(classname = "MarkDatasetSubscribed")

        daofactory = DAOFactory(package = "WMComponent.DBS3Buffer",
                                logger = self.logger,
                                dbinterface = myThread.dbi)
        self.setStatus = daofactory(classname = "DBSBufferFiles.SetPhEDExStatus")
        self.setBlockClosed = daofactory(classname = "SetBlockClosed")

        return

    def algorithm(self, parameters):
        """
        _algorithm_

        Poll the database for uninjected files and attempt to inject them into
        PhEDEx.
        """
        logging.info("Running PhEDEx injector poller algorithm...")

        self.pollCounter += 1

        if self.blocksToRecover:
            logging.info("""PhEDExInjector Recovery:
                            previous injection call failed,
                            check if files were injected to PhEDEx anyway""")
            self.recoverInjectedFiles()

        self.injectFiles()
        self.closeBlocks()

        if self.pollCounter == self.subFrequency:
            self.pollCounter = 0
            self.deleteBlocks()
            self.subscribeDatasets()

        return

    def createInjectionSpec(self, injectionData):
        """
        _createInjectionSpec_

        Transform the data structure returned from the database into an XML
        string for the PhEDEx Data Service.  The injectionData parameter must be
        a dictionary keyed by dataset path.  Each dataset path will map to a
        list of blocks, each block being a dict.  The block dicts will have
        three keys: name, is-open and files.  The files key will be a list of
        dicts, each of which have the following keys: lfn, size and checksum.
        The following is an example object:

        {"dataset1":
          {"block1": {"is-open": "y", "files":
            [{"lfn": "lfn1", "size": 10, "checksum": {"cksum": "1234"}},
             {"lfn": "lfn2", "size": 20, "checksum": {"cksum": "4321"}}]}}}
        """
        injectionSpec = XMLDrop.XMLInjectionSpec(self.dbsUrl)

        for datasetPath in injectionData:
            datasetSpec = injectionSpec.getDataset(datasetPath)

            for fileBlockName, fileBlock in injectionData[datasetPath].iteritems():
                blockSpec = datasetSpec.getFileblock(fileBlockName,
                                                     fileBlock["is-open"])

                for f in fileBlock["files"]:
                    blockSpec.addFile(f["lfn"], f["checksum"], f["size"])

        return injectionSpec.save()

    def createRecoveryFileFormat(self, unInjectedData):
        """
        _createRecoveryFileFormat_

        Transform the data structure returned from database in to the dict format
        for the PhEDEx Data Service.  The injectionData parameter must be
        a dictionary keyed by dataset path.  
        
        unInjectedData format
        {"dataset1":
          {"block1": {"is-open": "y", "files":
            [{"lfn": "lfn1", "size": 10, "checksum": {"cksum": "1234"}},
             {"lfn": "lfn2", "size": 20, "checksum": {"cksum": "4321"}}]}}}
        
        returns
        [{"block1": set(["lfn1", "lfn2"])}, {"block2": set(["lfn3", "lfn4"])]
        """
        blocks = []
        for datasetPath in unInjectedData:

            for blockName, fileBlock in unInjectedData[datasetPath].items():

                newBlock = { blockName: set() }

                for fileDict in fileBlock["files"]:
                    newBlock[blockName].add(fileDict["lfn"])

                blocks.append(newBlock)

        return blocks
    
    def injectFiles(self):
        """
        _injectFiles_

        Inject any uninjected files in PhEDEx.
        """
        logging.info("Starting injectFiles method")

        uninjectedFiles = self.getUninjected.execute()

        for siteName in uninjectedFiles.keys():
            # SE names can be stored in DBSBuffer as that is what is returned in
            # the framework job report.  We'll try to map the SE name to a
            # PhEDEx node name here.
            location = None

            if siteName in self.nodeNames:
                location = siteName
            else:
                if "Buffer" in self.seMap and siteName in self.seMap["Buffer"]:
                    location = self.seMap["Buffer"][siteName]
                elif "MSS" in self.seMap and siteName in self.seMap["MSS"]:
                    location = self.seMap["MSS"][siteName]
                elif "Disk" in self.seMap and siteName in self.seMap["Disk"]:
                    location = self.seMap["Disk"][siteName]

            if location == None:
                msg = "Could not map SE %s to PhEDEx node." % siteName
                logging.error(msg)
                self.sendAlert(7, msg = msg)
                continue

            maxDataset = 20
            maxBlocks = 50
            maxFiles = 5000
            numberDatasets = 0
            numberBlocks = 0
            numberFiles = 0
            injectData = {}
            lfnList = []
            for dataset in uninjectedFiles[siteName]:

                numberDatasets += 1
                injectData[dataset] = uninjectedFiles[siteName][dataset]

                for block in injectData[dataset]:
                    numberBlocks += 1
                    numberFiles += len(injectData[dataset][block]['files'])
                    for fileInfo in injectData[dataset][block]['files']:
                        lfnList.append(fileInfo['lfn'])

                if numberDatasets >= maxDataset or numberBlocks >= maxBlocks or numberFiles >= maxFiles:

                    self.injectFilesPhEDExCall(location, injectData, lfnList)
                    numberDatasets = 0
                    numberBlocks = 0
                    numberFiles = 0
                    injectData = {}
                    lfnList = []

            if injectData:
                self.injectFilesPhEDExCall(location, injectData, lfnList)

        return

    def injectFilesPhEDExCall(self, location, injectData, lfnList):
        """
        _injectFilesPhEDExCall_

        actual PhEDEx call for file injection
        """
        xmlData = self.createInjectionSpec(injectData)
        logging.debug("injectFiles XMLData: %s", xmlData)

        try:
            injectRes = self.phedex.injectBlocks(location, xmlData)
        except HTTPException as ex:
            # HTTPException with status 400 assumed to be duplicate injection
            # trigger later block recovery (investgation needed if not the case)
            if ex.status == 400:
                self.blocksToRecover.extend( self.createRecoveryFileFormat(injectData) )
            logging.error("PhEDEx file injection failed with HTTPException: %s %s", ex.status, ex.result)
        except Exception as ex:
            logging.error("PhEDEx file injection failed with Exception: %s", str(ex))
            logging.debug("Traceback: %s", str(traceback.format_exc()))
        else:
            logging.info("Injection result: %s", injectRes)

            if "error" in injectRes:
                msg = "Error injecting data %s: %s" % (injectData, injectRes["error"])
                logging.error(msg)
                self.sendAlert(6, msg = msg)
            else:
                try:
                    self.setStatus.execute(lfnList, 1)
                except:
                    # possible deadlock with DBS3Upload, retry once after 5s
                    logging.warning("Oracle exception during file status update, possible deadlock due to race condition, retry after 5s sleep")
                    time.sleep(5)
                    self.setStatus.execute(lfnList, 1)

        return

    def closeBlocks(self):
        """
        _closeBlocks_

        Close any blocks that have been migrated to global DBS
        """
        logging.info("Starting closeBlocks method")

        migratedBlocks = self.getMigrated.execute()

        for siteName in migratedBlocks.keys():
            # SE names can be stored in DBSBuffer as that is what is returned in
            # the framework job report.  We'll try to map the SE name to a
            # PhEDEx node name here.
            location = None

            if siteName in self.nodeNames:
                location = siteName
            else:
                if "Buffer" in self.seMap and siteName in self.seMap["Buffer"]:
                    location = self.seMap["Buffer"][siteName]
                elif "MSS" in self.seMap and siteName in self.seMap["MSS"]:
                    location = self.seMap["MSS"][siteName]
                elif "Disk" in self.seMap and siteName in self.seMap["Disk"]:
                    location = self.seMap["Disk"][siteName]

            if location == None:
                msg = "Could not map SE %s to PhEDEx node." % siteName
                logging.error(msg)
                self.sendAlert(6, msg = msg)
                continue

            xmlData = self.createInjectionSpec(migratedBlocks[siteName])
            logging.debug("closeBlocks XMLData: %s", xmlData)

            try:
                injectRes = self.phedex.injectBlocks(location, xmlData)
            except HTTPException as ex:
                logging.error("PhEDEx block close failed with HTTPException: %s %s", ex.status, ex.result)
            except Exception as ex:
                logging.error("PhEDEx block close failed with Exception: %s", str(ex))
                logging.debug("Traceback: %s", str(traceback.format_exc()))
            else:
                logging.info("Block closing result: %s", injectRes)

                if "error" not in injectRes:
                    for datasetName in migratedBlocks[siteName]:
                        for blockName in migratedBlocks[siteName][datasetName]:
                            logging.debug("Closing block %s", blockName)
                            self.setBlockClosed.execute(blockName)
                else:
                    msg = "Error injecting data %s: %s" % (migratedBlocks[siteName], injectRes["error"])
                    logging.error(msg)
                    self.sendAlert(6, msg = msg)

        return

    def recoverInjectedFiles(self):
        """
        When PhEDEx inject call timed out, run this function.
        Since there are 3 min reponse time out in cmsweb, some times 
        PhEDEx injection call times out even though the call succeeded
        In that case run the recovery mode
        1. first check whether files which injection status = 0 are in the PhEDEx.
        2. if those file exist set the in_phedex status to 1
        3. set self.blocksToRecover = []

        Run this recovery one block at a time, with too many blocks
        the call to the PhEDEx data service on cmsweb can time out
        """
        # recover one block at a time
        for block in self.blocksToRecover:

            injectedFiles = self.phedex.getInjectedFiles(block)

            if injectedFiles:
                self.setStatus.execute(injectedFiles, 1)

        self.blocksToRecover = []
        return
        
    def deleteBlocks(self):
        """
        _deleteBlocks_
        Find deletable blocks, then decide if to delete based on:
        Is there an active subscription for dataset or block ?
          If yes => set deleted=2
          If no => next check
        Has transfer to all destinations finished ?
          If yes => request block deletion, approve request, set deleted=1
          If no => do nothing (check again next cycle)
        """
        logging.info("Starting deleteBlocks method")

        blockDict = self.findDeletableBlocks.execute(transaction = False)

        if not blockDict:
            return

        try:
            subscriptions = self.phedex.getSubscriptionMapping(*blockDict.keys())
        except:
            logging.error("Couldn't get subscription info from PhEDEx, retry next cycle")
            return

        skippableBlocks = []
        deletableEntries = {}
        for blockName in blockDict:

            location = blockDict[blockName]['location']

            # should never be triggered, better safe than sorry
            if location.endswith('_MSS'):
                logging.debug("Location %s for block %s is MSS, skip deletion", location, blockName)
                skippableBlocks.append(blockName)
                continue

            dataset = blockDict[blockName]['dataset']
            sites = blockDict[blockName]['sites']

            if blockName in subscriptions and location in subscriptions[blockName]:
                logging.debug("Block %s subscribed to %s, skip deletion",  blockName, location)
                binds = { 'DELETED': 2,
                          'BLOCKNAME': blockName }
                self.markBlocksDeleted.execute(binds)
            else:
                blockInfo = []
                try:
                    blockInfo = self.phedex.getReplicaInfoForBlocks(block = blockName, complete = 'y')['phedex']['block']
                except:
                    logging.error("Couldn't get block info from PhEDEx, retry next cycle")
                else:
                    for entry in blockInfo:
                        if entry['name'] == blockName:
                            nodes = set([x['node'] for x in entry['replica']])
                            if location not in nodes:
                                logging.debug("Block %s not present on %s, mark as deleted", blockName, location)
                                binds = { 'DELETED': 1,
                                          'BLOCKNAME': blockName }
                                self.markBlocksDeleted.execute(binds)
                            elif sites.issubset(nodes):
                                logging.debug("Deleting block %s from %s since it is fully transfered", blockName, location)
                                if location not in deletableEntries:
                                    deletableEntries[location] = {}
                                if dataset not in deletableEntries[location]:
                                    deletableEntries[location][dataset] = set()
                                    deletableEntries[location][dataset].add(blockName)


        binds = []
        for blockName in skippableBlocks:
            binds.append( { 'DELETED': 2,
                            'BLOCKNAME': blockName } )
        if binds:
            self.markBlocksDeleted.execute(binds)

        for location in deletableEntries:

            chunkSize = 100
            numberOfBlocks = 0
            blocksToDelete = {}
            for dataset in deletableEntries[location]:

                blocksToDelete[dataset] = deletableEntries[location][dataset]
                numberOfBlocks += len(blocksToDelete[dataset])

                if numberOfBlocks > chunkSize:

                    self.deleteBlocksPhEDExCalls(location, blocksToDelete)
                    numberOfBlocks = 0
                    blocksToDelete = {}

            self.deleteBlocksPhEDExCalls(location, blocksToDelete)

        return

    def deleteBlocksPhEDExCalls(self, location, blocksToDelete):
        """
        _deleteBlocksPhEDExCalls_
        actual PhEDEx calls for block deletion
        """
        deletion = PhEDExDeletion(blocksToDelete.keys(), location,
                                  level = 'block',
                                  comments = "WMAgent blocks auto-delete from %s" % location,
                                  blocks = blocksToDelete)

        xmlData = XMLDrop.makePhEDExXMLForBlocks(self.dbsUrl,
                                                 deletion.getDatasetsAndBlocks())
        logging.debug("deleteBlocks XMLData: %s", xmlData)

        try:
            response = self.phedex.delete(deletion, xmlData)
            requestId = response['phedex']['request_created'][0]['id']
            # auto-approve deletion request
            self.phedex.updateRequest(requestId, 'approve', location)
        except HTTPException as ex:
            logging.error("PhEDEx block delete/approval failed with HTTPException: %s %s", ex.status, ex.result)
        except Exception as ex:
            logging.error("PhEDEx block delete/approval failed with Exception: %s", str(ex))
            logging.debug("Traceback: %s", str(traceback.format_exc()))
        else:
            binds = []
            for dataset in blocksToDelete:
                for blockName in blocksToDelete[dataset]:
                    binds.append( { 'DELETED': 1,
                                    'BLOCKNAME': blockName } )
            self.markBlocksDeleted.execute(binds)

        return

    def subscribeDatasets(self):
        """
        _subscribeDatasets_
        Poll the database for datasets and subscribe them.
        """
        logging.info("Starting subscribeDatasets method")

        # Check for completely unsubscribed datasets
        unsubscribedDatasets = self.getUnsubscribed.execute()

        # Keep a list of subscriptions to tick as subscribed in the database
        subscriptionsMade = []

        # Create a list of subscriptions as defined by the PhEDEx data structures
        subs = SubscriptionList()

        # Create the subscription objects and add them to the list
        # The list takes care of the sorting internally
        for subInfo in unsubscribedDatasets:
            site = subInfo['site']

            if site not in self.phedexNodes['MSS'] and site not in self.phedexNodes['Disk']:
                msg = "Site %s doesn't appear to be valid to PhEDEx, " % site
                msg += "skipping subscription: %s" % subInfo['id']
                logging.error(msg)
                self.sendAlert(7, msg = msg)
                continue

            # Avoid custodial subscriptions to disk nodes
            if site not in self.phedexNodes['MSS']: 
                subInfo['custodial'] = 'n'
            # Avoid auto approval in T1 sites
            elif site.startswith("T1"):
                subInfo['request_only'] = 'y'

            phedexSub = PhEDExSubscription(subInfo['path'], site, subInfo['phedex_group'],
                                           priority = subInfo['priority'],
                                           move = subInfo['move'],
                                           custodial = subInfo['custodial'],
                                           request_only = subInfo['request_only'],
                                           subscriptionId = subInfo['id'])

            # Check if the subscription is a duplicate
            if phedexSub.matchesExistingSubscription(self.phedex) or \
                phedexSub.matchesExistingTransferRequest(self.phedex):
                subscriptionsMade.append(subInfo['id'])
                continue

            # Add it to the list
            subs.addSubscription(phedexSub)

        # Compact the subscriptions
        subs.compact()

        for subscription in subs.getSubscriptionList():

            xmlData = XMLDrop.makePhEDExXMLForDatasets(self.dbsUrl, subscription.getDatasetPaths())
            logging.debug("subscribeDatasets XMLData: %s" , xmlData)

            logging.info("Subscribing: %s to %s, with options: Move: %s, Custodial: %s, Request Only: %s",
                         subscription.getDatasetPaths(),
                         subscription.getNodes(),
                         subscription.move,
                         subscription.custodial,
                         subscription.request_only)

            try:
                self.phedex.subscribe(subscription, xmlData)
            except HTTPException as ex:
                logging.error("PhEDEx dataset subscribe failed with HTTPException: %s %s", ex.status, ex.result)
            except Exception as ex:
                logging.error("PhEDEx dataset subscribe failed with Exception: %s", str(ex))
                logging.debug("Traceback: %s", str(traceback.format_exc()))
            else:
                subscriptionsMade.extend(subscription.getSubscriptionIds())

        # Register the result in DBSBuffer
        if subscriptionsMade:
            self.markSubscribed.execute(subscriptionsMade)

        return

Exemple #35

0

Afficher le fichier

Fichier : AccountantWorker.py Projet : ticoann/WMCore

class AccountantWorker(WMConnectionBase):
    """
    Class that actually does the work of parsing FWJRs for the Accountant
    Run through ProcessPool
    """
    def __init__(self, config):
        """
        __init__

        Create all DAO objects that are used by this class.
        """
        WMConnectionBase.__init__(self, "WMCore.WMBS")
        myThread = threading.currentThread()
        self.dbsDaoFactory = DAOFactory(package = "WMComponent.DBS3Buffer",
                                        logger = myThread.logger,
                                        dbinterface = myThread.dbi)

        self.getOutputMapAction      = self.daofactory(classname = "Jobs.GetOutputMap")
        self.bulkAddToFilesetAction  = self.daofactory(classname = "Fileset.BulkAddByLFN")
        self.bulkParentageAction     = self.daofactory(classname = "Files.AddBulkParentage")
        self.getJobTypeAction        = self.daofactory(classname = "Jobs.GetType")
        self.getParentInfoAction     = self.daofactory(classname = "Files.GetParentInfo")
        self.setParentageByJob       = self.daofactory(classname = "Files.SetParentageByJob")
        self.setFileRunLumi          = self.daofactory(classname = "Files.AddRunLumi")
        self.setFileLocation         = self.daofactory(classname = "Files.SetLocationByLFN")
        self.setFileAddChecksum      = self.daofactory(classname = "Files.AddChecksumByLFN")
        self.addFileAction           = self.daofactory(classname = "Files.Add")
        self.jobCompleteInput        = self.daofactory(classname = "Jobs.CompleteInput")
        self.setBulkOutcome          = self.daofactory(classname = "Jobs.SetOutcomeBulk")
        self.getWorkflowSpec         = self.daofactory(classname = "Workflow.GetSpecAndNameFromTask")

        self.dbsStatusAction = self.dbsDaoFactory(classname = "DBSBufferFiles.SetStatus")
        self.dbsParentStatusAction = self.dbsDaoFactory(classname = "DBSBufferFiles.GetParentStatus")
        self.dbsChildrenAction = self.dbsDaoFactory(classname = "DBSBufferFiles.GetChildren")
        self.dbsCreateFiles    = self.dbsDaoFactory(classname = "DBSBufferFiles.Add")
        self.dbsSetLocation    = self.dbsDaoFactory(classname = "DBSBufferFiles.SetLocationByLFN")
        self.dbsInsertLocation = self.dbsDaoFactory(classname = "DBSBufferFiles.AddLocation")
        self.dbsSetChecksum    = self.dbsDaoFactory(classname = "DBSBufferFiles.AddChecksumByLFN")
        self.dbsSetRunLumi     = self.dbsDaoFactory(classname = "DBSBufferFiles.AddRunLumi")
        self.insertWorkflow    = self.dbsDaoFactory(classname = "InsertWorkflow")

        self.dbsNewAlgoAction    = self.dbsDaoFactory(classname = "NewAlgo")
        self.dbsNewDatasetAction = self.dbsDaoFactory(classname = "NewDataset")
        self.dbsAssocAction      = self.dbsDaoFactory(classname = "AlgoDatasetAssoc")
        self.dbsExistsAction     = self.dbsDaoFactory(classname = "DBSBufferFiles.ExistsForAccountant")
        self.dbsLFNHeritage      = self.dbsDaoFactory(classname = "DBSBufferFiles.BulkHeritageParent")

        self.dbsSetDatasetAlgoAction = self.dbsDaoFactory(classname = "SetDatasetAlgo")

        self.stateChanger = ChangeState(config)

        # Decide whether or not to attach jobReport to returned value
        self.returnJobReport = getattr(config.JobAccountant, 'returnReportFromWorker', False)

        # Store location for the specs for DBS
        self.specDir = getattr(config.JobAccountant, 'specDir', None)

        # Hold data for later commital
        self.dbsFilesToCreate  = []
        self.wmbsFilesToBuild  = []
        self.fileLocation      = None
        self.mergedOutputFiles = []
        self.listOfJobsToSave  = []
        self.listOfJobsToFail  = []
        self.filesetAssoc      = []
        self.count = 0
        self.datasetAlgoID     = collections.deque(maxlen = 1000)
        self.datasetAlgoPaths  = collections.deque(maxlen = 1000)
        self.dbsLocations      = collections.deque(maxlen = 1000)
        self.workflowIDs       = collections.deque(maxlen = 1000)
        self.workflowPaths     = collections.deque(maxlen = 1000)

        self.phedex = PhEDEx()
        self.locLists = self.phedex.getNodeMap()


        return

    def reset(self):
        """
        _reset_

        Reset all global vars between runs.
        """
        self.dbsFilesToCreate  = []
        self.wmbsFilesToBuild  = []
        self.fileLocation      = None
        self.mergedOutputFiles = []
        self.listOfJobsToSave  = []
        self.listOfJobsToFail  = []
        self.filesetAssoc      = []
        gc.collect()
        return

    def loadJobReport(self, parameters):
        """
        _loadJobReport_

        Given a framework job report on disk, load it and return a
        FwkJobReport instance.  If there is any problem loading or parsing the
        framework job report return None.
        """
        # The jobReportPath may be prefixed with "file://" which needs to be
        # removed so it doesn't confuse the FwkJobReport() parser.
        jobReportPath = parameters.get("fwjr_path", None)
        if not jobReportPath:
            logging.error("Bad FwkJobReport Path: %s" % jobReportPath)
            return self.createMissingFWKJR(parameters, 99999, "FWJR path is empty")

        jobReportPath = jobReportPath.replace("file://","")
        if not os.path.exists(jobReportPath):
            logging.error("Bad FwkJobReport Path: %s" % jobReportPath)
            return self.createMissingFWKJR(parameters, 99999, 'Cannot find file in jobReport path: %s' % jobReportPath)

        if os.path.getsize(jobReportPath) == 0:
            logging.error("Empty FwkJobReport: %s" % jobReportPath)
            return self.createMissingFWKJR(parameters, 99998, 'jobReport of size 0: %s ' % jobReportPath)

        jobReport = Report()

        try:
            jobReport.load(jobReportPath)
        except Exception, ex:
            msg =  "Error loading jobReport %s\n" % jobReportPath
            msg += str(ex)
            logging.error(msg)
            logging.debug("Failing job: %s\n" % parameters)
            return self.createMissingFWKJR(parameters, 99997, 'Cannot load jobReport')

        if len(jobReport.listSteps()) == 0:
            logging.error("FwkJobReport with no steps: %s" % jobReportPath)
            return self.createMissingFWKJR(parameters, 99997, 'jobReport with no steps: %s ' % jobReportPath)

        return jobReport

Exemple #36

0

Afficher le fichier

Fichier : PhEDExInjectorPoller.py Projet : ticoann/WMCore

class PhEDExInjectorPoller(BaseWorkerThread):
    """
    _PhEDExInjectorPoller_

    Poll the DBSBuffer database and inject files as they are created.
    """
    def __init__(self, config):
        """
        ___init___

        Initialise class members
        """
        BaseWorkerThread.__init__(self)
        self.config = config
        self.phedex = PhEDEx({"endpoint": config.PhEDExInjector.phedexurl},
                             "json")
        self.dbsUrl = config.DBSInterface.globalDBSUrl
        self.group = getattr(config.PhEDExInjector, "group", "DataOps")

        # This will be used to map SE names which are stored in the DBSBuffer to
        # PhEDEx node names.  The first key will be the "kind" which consists
        # of one of the following: MSS, Disk, Buffer.  The next key will be the
        # SE name.
        self.seMap = {}
        self.nodeNames = []

        # initialize the alert framework (if available - config.Alert present)
        #    self.sendAlert will be then be available
        self.initAlerts(compName="PhEDExInjector")

    def setup(self, parameters):
        """
        _setup_

        Create a DAO Factory for the PhEDExInjector.  Also load the SE names to
        PhEDEx node name mappings from the data service.
        """
        myThread = threading.currentThread()
        daofactory = DAOFactory(package="WMComponent.PhEDExInjector.Database",
                                logger=self.logger,
                                dbinterface=myThread.dbi)

        self.getUninjected = daofactory(classname="GetUninjectedFiles")
        self.getMigrated = daofactory(classname="GetMigratedBlocks")

        daofactory = DAOFactory(package="WMComponent.DBSBuffer.Database",
                                logger=self.logger,
                                dbinterface=myThread.dbi)
        self.setStatus = daofactory(classname="DBSBufferFiles.SetPhEDExStatus")

        daofactory = DAOFactory(package="WMComponent.DBSUpload.Database",
                                logger=self.logger,
                                dbinterface=myThread.dbi)
        self.setBlockStatus = daofactory(classname="SetBlockStatus")

        nodeMappings = self.phedex.getNodeMap()
        for node in nodeMappings["phedex"]["node"]:
            if not self.seMap.has_key(node["kind"]):
                self.seMap[node["kind"]] = {}

            logging.info("Adding mapping %s -> %s" %
                         (node["se"], node["name"]))
            self.seMap[node["kind"]][node["se"]] = node["name"]
            self.nodeNames.append(node["name"])

        return

    def createInjectionSpec(self, injectionData):
        """
        _createInjectionSpec_

        Trasform the data structure returned from the database into an XML
        string for the PhEDEx Data Service.  The injectionData parameter must be
        a dictionary keyed by dataset path.  Each dataset path will map to a
        list of blocks, each block being a dict.  The block dicts will have
        three keys: name, is-open and files.  The files key will be a list of
        dicts, each of which have the following keys: lfn, size and checksum.
        The following is an example object:

        {"dataset1":
          {"block1": {"is-open": "y", "files":
            [{"lfn": "lfn1", "size": 10, "checksum": {"cksum": "1234"}},
             {"lfn": "lfn2", "size": 20, "checksum": {"cksum": "4321"}}]}}}
        """
        injectionSpec = XMLDrop.XMLInjectionSpec(self.dbsUrl)

        for datasetPath in injectionData:
            datasetSpec = injectionSpec.getDataset(datasetPath)

            for fileBlockName, fileBlock in injectionData[
                    datasetPath].iteritems():
                blockSpec = datasetSpec.getFileblock(fileBlockName,
                                                     fileBlock["is-open"])

                for file in fileBlock["files"]:
                    blockSpec.addFile(file["lfn"], file["checksum"],
                                      file["size"])

        return injectionSpec.save()

    def injectFiles(self):
        """
        _injectFiles_

        Inject any uninjected files in PhEDEx.
        """
        myThread = threading.currentThread()
        uninjectedFiles = self.getUninjected.execute()

        injectedFiles = []
        for siteName in uninjectedFiles.keys():
            # SE names can be stored in DBSBuffer as that is what is returned in
            # the framework job report.  We'll try to map the SE name to a
            # PhEDEx node name here.
            location = None

            if siteName in self.nodeNames:
                location = siteName
            else:
                if self.seMap.has_key("Buffer") and \
                       self.seMap["Buffer"].has_key(siteName):
                    location = self.seMap["Buffer"][siteName]
                elif self.seMap.has_key("MSS") and \
                         self.seMap["MSS"].has_key(siteName):
                    location = self.seMap["MSS"][siteName]
                elif self.seMap.has_key("Disk") and \
                         self.seMap["Disk"].has_key(siteName):
                    location = self.seMap["Disk"][siteName]

            if location == None:
                msg = "Could not map SE %s to PhEDEx node." % siteName
                logging.error(msg)
                self.sendAlert(7, msg=msg)
                continue

            myThread.transaction.begin()
            xmlData = self.createInjectionSpec(uninjectedFiles[siteName])
            try:
                injectRes = self.phedex.injectBlocks(location, xmlData)
            except Exception, ex:
                # If we get an error here, assume that it's temporary (it usually is)
                # log it, and ignore it in the algorithm() loop
                msg = "Encountered error while attempting to inject blocks to PhEDEx.\n"
                msg += str(ex)
                logging.error(msg)
                logging.debug("Traceback: %s" % str(traceback.format_exc()))
                raise PhEDExInjectorPassableError(msg)
            logging.info("Injection result: %s" % injectRes)

            if not injectRes.has_key("error"):
                for datasetName in uninjectedFiles[siteName]:
                    for blockName in uninjectedFiles[siteName][datasetName]:
                        for file in uninjectedFiles[siteName][datasetName][
                                blockName]["files"]:
                            injectedFiles.append(file["lfn"])
            else:
                msg = ("Error injecting data %s: %s" %
                       (uninjectedFiles[siteName], injectRes["error"]))
                logging.error(msg)
                self.sendAlert(6, msg=msg)

            self.setStatus.execute(injectedFiles,
                                   1,
                                   conn=myThread.transaction.conn,
                                   transaction=myThread.transaction)
            injectedFiles = []
            myThread.transaction.commit()

        return