Example #1
0
    def __init__(self, msConfig, logger=None):
        """
        Provides setup for MSTransferor and MSMonitor classes

        :param config: MS service configuration
        :param logger: logger object (optional)
        """
        self.logger = getMSLogger(getattr(msConfig, 'verbose', False), logger)
        self.msConfig = msConfig
        self.logger.info("Configuration including default values:\n%s",
                         self.msConfig)

        self.reqmgr2 = ReqMgr(self.msConfig['reqmgr2Url'], logger=self.logger)
        self.reqmgrAux = ReqMgrAux(self.msConfig['reqmgr2Url'],
                                   httpDict={'cacheduration': 1.0},
                                   logger=self.logger)

        # hard code it to production DBS otherwise PhEDEx subscribe API fails to match TMDB data
        dbsUrl = "https://cmsweb.cern.ch/dbs/prod/global/DBSReader"
        if usingRucio():
            # FIXME: we cannot use Rucio in write mode yet
            # self.rucio = Rucio(self.msConfig['rucioAccount'], configDict={"logger": self.logger})
            self.phedex = PhEDEx(httpDict={'cacheduration': 0.5},
                                 dbsUrl=dbsUrl,
                                 logger=self.logger)
        else:
            self.phedex = PhEDEx(httpDict={'cacheduration': 0.5},
                                 dbsUrl=dbsUrl,
                                 logger=self.logger)
Example #2
0
    def testSlcPhedexNodesEqualPhedexApiNodes(self):
        """
        For each site, verify that the stageout node specified in
        site-local-config.xml is the same as the one returned by the PhEDEx api.
        """
        os.environ["CMS_PATH"] = "/cvmfs/cms.cern.ch"

        phedex = PhEDEx()
        nodes = [
            node[u'name'] for node in phedex.getNodeMap()["phedex"]["node"]
        ]

        for d in os.listdir("/cvmfs/cms.cern.ch/SITECONF/"):
            # Only T0_, T1_... folders are needed
            if d[0] == "T":
                os.environ[
                    'WMAGENT_SITE_CONFIG_OVERRIDE'] = '/cvmfs/cms.cern.ch/SITECONF/%s/JobConfig/site-local-config.xml' % (
                        d)
                try:
                    slc = loadSiteLocalConfig()
                except SiteConfigError as e:
                    print(e.args[0])
                phedexNode = slc.localStageOut.get("phedex-node")
                self.assertTrue(
                    phedexNode in nodes,
                    "Error: Node specified in SLC (%s) not in list returned by PhEDEx api"
                    % phedexNode)
        return
    def __init__(self, config):
        """
        ___init___

        Initialise class members
        """
        BaseWorkerThread.__init__(self)
        self.phedex = PhEDEx({"endpoint": config.PhEDExInjector.phedexurl},
                             "json")
        self.siteDB = SiteDBJSON()
        self.dbsUrl = config.DBSInterface.globalDBSUrl
        self.group = getattr(config.PhEDExInjector, "group", "DataOps")
        self.safeMode = getattr(config.PhEDExInjector, "safeOperationMode",
                                False)
        self.replicaOnly = getattr(config.PhEDExInjector, "replicaOnly", False)

        # Subscribed state in the DBSBuffer table for datasets
        self.terminalSubscriptionState = 1
        if self.safeMode:
            self.terminalSubscriptionState = 2

        # We will map node names to CMS names, that what the spec will have.
        # If a CMS name is associated to many PhEDEx node then choose the MSS option
        self.cmsToPhedexMap = {}

        # initialize the alert framework (if available - config.Alert present)
        #    self.sendAlert will be then be available

        self.initAlerts(compName="PhEDExInjector")
Example #4
0
    def testEmulator(self):

        EmulatorHelper.setEmulators(True, True, True, True)
        self.assertEqual(PhEDEx().wrapped.__module__,
                         'WMQuality.Emulators.PhEDExClient.PhEDEx')
        self.assertEqual(DBSReader(self.globalDBS).wrapped.__module__,
                         'WMQuality.Emulators.DBSClient.DBSReader')
        self.assertEqual(SiteDBJSON().wrapped.__module__,
                         'WMQuality.Emulators.SiteDBClient.SiteDB')
        self.assertEqual(RequestManager().wrapped.__module__,
                         'WMQuality.Emulators.RequestManagerClient.RequestManager')

        self.assertEqual(PhEDEx().__class__.__name__, 'PhEDEx')
        self.assertEqual(DBSReader(self.globalDBS).__class__.__name__, 'DBSReader')
        self.assertEqual(SiteDBJSON().__class__.__name__, 'SiteDBJSON')
        self.assertEqual(RequestManager().__class__.__name__, 'RequestManager')

        EmulatorHelper.resetEmulators()
        self.assertEqual(PhEDEx().wrapped.__module__,
                         'WMCore.Services.PhEDEx.PhEDEx')
        self.assertEqual(DBSReader(self.globalDBS).wrapped.__module__,
                         'WMCore.Services.DBS.DBS2Reader')
        self.assertEqual(SiteDBJSON().wrapped.__module__,
                         'WMCore.Services.SiteDB.SiteDB')
        self.assertEqual(RequestManager().wrapped.__module__,
                         'WMCore.Services.RequestManager.RequestManager')

        self.assertEqual(PhEDEx().__class__.__name__, 'PhEDEx')
        self.assertEqual(DBSReader(self.globalDBS).__class__.__name__, 'DBS2Reader')
        self.assertEqual(SiteDBJSON().__class__.__name__, 'SiteDBJSON')
        self.assertEqual(RequestManager().__class__.__name__, 'RequestManager')
Example #5
0
    def setUp(self):
        "init test class"
        self.group = 'DataOps'
        self.interval = 2
        self.rmgr = RequestManager(group=self.group,
                                   interval=self.interval,
                                   verbose=True)
        self.phedex = PhEDEx()

        # get some subscriptions from PhEDEx to play with
        data = self.phedex.subscriptions(group=self.group)
        for datasetInfo in data['phedex']['dataset']:
            dataset = datasetInfo.get('name')
            print("### dataset info from phedex, #files %s" %
                  datasetInfo.get('files', 0))
            # now use the same logic in as in Transferor, i.e. look-up dataset/group subscription
            data = self.phedex.subscriptions(dataset=dataset, group=self.group)
            if not data['phedex']['dataset']:
                print(
                    "### skip this dataset since no subscription data is available"
                )
                continue
            nodes = [
                i['node'] for r in data['phedex']['dataset']
                for i in r['subscription']
            ]
            print("### nodes", nodes)
            # create fake requests with dataset/nodes info
            rdict1 = dict(datasets=[dataset], sites=nodes, name='req1')
            rdict2 = dict(datasets=[dataset], sites=nodes, name='req2')
            self.requests = {'req1': rdict1, 'req2': rdict2}
            break
def phedex():
    phedexIn = PhEDEx(dict = {'endpoint' : 'https://cmsweb.cern.ch/phedex/datasvc/json/dev/',
                              'logger' : logging},
                    responseType = "json")
#     requests =  phedex.getRequestList(dataset = ['/TauParked/Run2012C-LogError-22Jan2013-v1/RAW-RECO'],
#                           node = 'T2_RU_ITEP')['phedex']['request']
#     for request in requests:
#         requestId = request['id']
#         request =  phedex.getTransferRequests(request = requestId)['phedex']['request']
#         if request:
#             request = request[0]
#             print request
#    x = PhEDExSubscription('/TauParked/Run2012C-22Jan2013-v1/AOD',
#                           'T1_US_FNAL_MSS', 'DataOps', 'dataset', 'low', 'n', 'n', 'n', 'y', subscriptionId = 1)
#   print x.matchesExistingTransferRequest(phedex)
#    print x.matchesExistingSubscription(phedex)

    deletion = PhEDExDeletion('/071103be-7d80-11e0-90de-00163e010039/PromptReco-v1/RECO',
                              'T1_CH_CERN_Buffer',
                              level = 'block',
                              comments = 'Blocks automatically deleted from T2_CH_CERN as it has already been processed and transferred to a custodial location',
                              blocks = {'/071103be-7d80-11e0-90de-00163e010039/PromptReco-v1/RECO' : 
                                        ['/071103be-7d80-11e0-90de-00163e010039/PromptReco-v1/RECO#075ea9e8-7d80-11e0-90de-00163e010039']})
    xmlData = XMLDrop.makePhEDExXMLForBlocks('http://vocms09.cern.ch:8880/cms_dbs_int_local_yy_writer/servlet/DBSServlet',
                                             deletion.getDatasetsAndBlocks())
    print str(xmlData)
    response = phedexIn.delete(deletion, xmlData)
    print response
    requestId = response['phedex']['request_created'][0]['id']
    phedexIn.updateRequest(requestId, 'approve', 'T1_CH_CERN_Buffer')
Example #7
0
    def testSlcPhedexNodesEqualPhedexApiNodes(self):
        """
        For each site, verify that the stageout node specified in
        site-local-config.xml is the same as the one returned by the PhEDEx api.
        """
        os.environ["CMS_PATH"] = "/cvmfs/cms.cern.ch"

        phedex = PhEDEx()
        nodes = phedex.getNodeMap()["phedex"]["node"]

        # Make a dict for translating the se names into regular site names.
        node_map = {}
        for node in nodes:
            node_map[str(node[u"se"])] = str(node[str(u"name")])
        
        for d in os.listdir("/cvmfs/cms.cern.ch/SITECONF/"):
            # Only T0_, T1_... folders are needed
            if d[0] == "T":
                os.environ['WMAGENT_SITE_CONFIG_OVERRIDE'] ='/cvmfs/cms.cern.ch/SITECONF/%s/JobConfig/site-local-config.xml' % (d)
                try:
                    slc = loadSiteLocalConfig()
                except SiteConfigError as e:
                    print e.args[0]
                phedexNode = slc.localStageOut.get("phedex-node")
                # If slc is correct, perform check
                if "se-name" in slc.localStageOut and slc.localStageOut["se-name"] in node_map and phedexNode != None:
                    self.assertEqual(phedexNode, node_map[slc.localStageOut["se-name"]], \
                            "Error: Node specified in SLC (%s) doesn't match node returned by PhEDEx api (%s)." \
                            % (phedexNode, node_map[slc.localStageOut["se-name"]]))
                    
        return 
Example #8
0
    def preInitialization(self):
        pollInterval = self.config.PhEDExInjector.pollInterval
        subInterval = self.config.PhEDExInjector.subscribeInterval
        logging.info("Setting poll interval to %s seconds for inject",
                     pollInterval)

        # retrieving the node mappings is fickle and can fail quite often
        # hence only do it once (with retries) and pass it to the workers
        phedex = PhEDEx({"endpoint": self.config.PhEDExInjector.phedexurl},
                        "json")
        try:
            nodeMappings = phedex.getNodeMap()
        except Exception:
            time.sleep(2)
            try:
                nodeMappings = phedex.getNodeMap()
            except Exception:
                time.sleep(4)
                nodeMappings = phedex.getNodeMap()

        myThread = threading.currentThread()
        myThread.workerThreadManager.addWorker(
            PhEDExInjectorPoller(self.config, phedex, nodeMappings),
            pollInterval)

        if getattr(self.config.PhEDExInjector, "subscribeDatasets", False):
            # wait a bit for first poll cycle of PhEDExInjectorPoller to complete
            # hopefully avoids intermingled logs (which can be confusing)
            time.sleep(2)
            logging.info("Setting poll interval to %s seconds for subscribe",
                         subInterval)
            myThread.workerThreadManager.addWorker(
                PhEDExInjectorSubscriber(self.config, phedex, nodeMappings),
                subInterval)
Example #9
0
    def preInitialization(self):
        pollInterval = self.config.PhEDExInjector.pollInterval
        subInterval = self.config.PhEDExInjector.subscribeInterval
        logging.info("Setting poll interval to %s seconds for inject", pollInterval)

        # retrieving the node mappings is fickle and can fail quite often
        # hence only do it once (with retries) and pass it to the workers
        phedex = PhEDEx({"endpoint": self.config.PhEDExInjector.phedexurl}, "json")
        try:
            nodeMappings = phedex.getNodeMap()
        except Exception:
            time.sleep(2)
            try:
                nodeMappings = phedex.getNodeMap()
            except Exception:
                time.sleep(4)
                nodeMappings = phedex.getNodeMap()

        myThread = threading.currentThread()
        myThread.workerThreadManager.addWorker(PhEDExInjectorPoller(self.config, phedex, nodeMappings), pollInterval)

        if getattr(self.config.PhEDExInjector, "subscribeDatasets", False):
            # wait a bit for first poll cycle of PhEDExInjectorPoller to complete
            # hopefully avoids intermingled logs (which can be confusing)
            time.sleep(2)
            logging.info("Setting poll interval to %s seconds for subscribe", subInterval)
            myThread.workerThreadManager.addWorker(PhEDExInjectorSubscriber(self.config, phedex, nodeMappings), subInterval)
Example #10
0
def main():
    myPhedex = PhEDEx()
    config = loadConfigurationFile(os.environ["WMAGENT_CONFIG"])
    config.CoreDatabase.dialect = "mysql"
    init = WMInit()
    init.setDatabaseConnection(config.CoreDatabase.connectUrl, config.CoreDatabase.dialect, config.CoreDatabase.socket)
    myThread = threading.currentThread()
    daofactory = DAOFactory(package="WMComponent.PhEDExInjector.Database", logger=logging, dbinterface=myThread.dbi)

    getUninjectedDAO = daofactory(classname="GetUninjectedFiles")
    uninjectedFiles = getUninjectedDAO.execute()
    for location in uninjectedFiles:
        for dataset in uninjectedFiles[location]:
            for block in uninjectedFiles[location][dataset]:
                result = myPhedex.getReplicaInfoForFiles(dataset=dataset, block=block)
                phedexBlock = result["phedex"]["block"]
                if not phedexBlock:
                    continue
                phedexBlock = phedexBlock[0]
                filesInjected = [x["name"] for x in phedexBlock["file"]]
                for fileInfo in uninjectedFiles[location][dataset][block]["files"]:
                    lfn = fileInfo["lfn"]
                    if lfn in filesInjected:
                        print lfn
    return 0
Example #11
0
    def __init__(self, config):
        """
        ___init___

        Initialise class members
        """
        BaseWorkerThread.__init__(self)
        self.dbsUrl = config.DBSInterface.globalDBSUrl
        self.phedexGroup = config.PhEDExInjector.phedexGroup

        self.pollCounter = 0
        self.subFrequency = None
        if getattr(config.PhEDExInjector, "subscribeDatasets", False):
            pollInterval = config.PhEDExInjector.pollInterval
            subInterval = config.PhEDExInjector.subscribeInterval
            self.subFrequency = max(1, int(round(subInterval / pollInterval)))
            logging.info("SubscribeDataset and deleteBlocks will run every %d polling cycles", self.subFrequency)
            # subscribe on first cycle
            self.pollCounter = self.subFrequency - 1

        # retrieving the node mappings is fickle and can fail quite often
        self.phedex = PhEDEx({"endpoint": config.PhEDExInjector.phedexurl},
                             "json", dbsUrl=self.dbsUrl)
        try:
            nodeMappings = self.phedex.getNodeMap()
        except:
            time.sleep(2)
            try:
                nodeMappings = self.phedex.getNodeMap()
            except:
                time.sleep(4)
                nodeMappings = self.phedex.getNodeMap()

        # This will be used to map SE names which are stored in the DBSBuffer to
        # PhEDEx node names.  The first key will be the "kind" which consists
        # of one of the following: MSS, Disk, Buffer.  The next key will be the
        # SE name.
        self.seMap = {}
        self.nodeNames = []
        for node in nodeMappings["phedex"]["node"]:
            if node["kind"] not in self.seMap:
                self.seMap[node["kind"]] = {}
            logging.info("Adding mapping %s -> %s", node["se"], node["name"])
            self.seMap[node["kind"]][node["se"]] = node["name"]
            self.nodeNames.append(node["name"])

        self.phedexNodes = {'MSS': [], 'Disk': []}
        for node in nodeMappings["phedex"]["node"]:
            if node["kind"] in ["MSS", "Disk"]:
                self.phedexNodes[node["kind"]].append(node["name"])

        self.blocksToRecover = []

        # X-component configuration is BAD! But it will only be here during the
        # Rucio commissioning within WM
        self.listTiersToSkip = config.RucioInjector.listTiersToInject
        logging.info("Component configured to skip data injection for data tiers: %s",
                     self.listTiersToSkip)

        return
def checkForMissingFiles(options):
    #Initialize stuff
    phedexAPI = PhEDEx({'cachepath' : options.cachepath})
    acdcCouch = Database('wmagent_acdc', options.acdcUrl)

    #Let's get the IDs of the ACDC documents for the task/request/group/user
    array = [options.group, options.user, options.request, options.task]
    result = acdcCouch.loadView('ACDC', 'owner_coll_fileset_docs', {'reduce' : False}, [array])

    documentsIDs = [x['id'] for x in result['rows']]
    
    badFiles = {}

    #Go through the documents
    for docID in documentsIDs:
        doc = acdcCouch.document(docID)

        #Are we going to change this doc? Better back it up
        if options.change:
            backupFile = os.open(os.path.join(options.backup, "%s.bkp" % doc["_id"]), 'w')
            json.dump(doc, backupFile)
            backupFile.close()

        #Go through the files
        files = doc["files"]
        for inputFile in files:

            #Use PhEDEx API to get site based on the SE
            se = files[inputFile]["locations"][0]
            siteLocation = phedexAPI.getBestNodeName(se)

            #Now get the PFN
            pfnDict = phedexAPI.getPFN(siteLocation, inputFile)
            inputPfn = pfnDict[(siteLocation, inputFile)]

            #Run lcg-ls commands and see what we get
            command = 'lcg-ls -b -D srmv2 --srm-timeout 60 %s' % inputPfn
            
            commandList = shlex.split(command)
            try:
                (stdout, stderr, exitCode) = runCommand(commandList, False, 70)
            except Exception, ex:
                exitCode = 99999
                stdout = ''
                stderr = str(ex)
            
            if exitCode:
                #Something went wrong with the command
                #Mark the file as bad
                if docID not in badFiles:
                    badFiles[docID] = []
                badFiles[docID].append(inputFile)
                print 'File %s is thought to be bad' % inputFile
                print 'Command was %s' % command
                print 'Return code was %i' % exitCode
                print 'Stdout was %s' % stdout
                print 'Stderr was %s' % stderr
def get_tfc_rules(site):
    """
    Get the TFC regexp for a given site.
    """
    phedex = PhEDEx(responseType='xml')
    phedex.getNodeTFC(site)
    tfc_file = phedex.cacheFileName('tfc', inputdata={'node': site})

    return readTFC(tfc_file)
def get_tfc_rules(site):
    """
    Get the TFC regexp for a given site.
    """
    phedex = PhEDEx(responseType='xml')
    phedex.getNodeTFC(site)
    tfc_file = phedex.cacheFileName('tfc', inputdata={'node': site})

    return readTFC(tfc_file)
Example #15
0
    def _queryDbsAndGetPileupConfig(self, stepHelper, dbsReader, fakeSites):
        """
        Method iterates over components of the pileup configuration input
        and queries DBS. Then iterates over results from DBS.

        There needs to be a list of files and their locations for each
        dataset name.
        Use dbsReader
        the result data structure is a Python dict following dictionary:
            FileList is a list of LFNs

        {"pileupTypeA": {"BlockA": {"FileList": [], "PhEDExNodeNames": []},
                         "BlockB": {"FileList": [], "PhEDExNodeName": []}, ....}

        this structure preserves knowledge of where particular files of dataset
        are physically (list of PNNs) located. DBS only lists sites which
        have all files belonging to blocks but e.g. BlockA of dataset DS1 may
        be located at site1 and BlockB only at site2 - it's possible that only
        a subset of the blocks in a dataset will be at a site.

        """
        # only production PhEDEx is connected (This can be moved to init method
        phedex = PhEDEx()
        node_filter = set(['UNKNOWN', None])
        # convert the siteWhitelist into SE list and add SEs to the pileup location list
        fakePNNs = []
        if fakeSites:
            fakePNNs = mapSitetoPNN(fakeSites)

        resultDict = {}
        # iterate over input pileup types (e.g. "cosmics", "minbias")
        for pileupType in stepHelper.data.pileup.listSections_():
            # the format here is: step.data.pileup.cosmics.dataset = [/some/data/set]
            datasets = getattr(getattr(stepHelper.data.pileup, pileupType), "dataset")
            # each dataset input can generally be a list, iterate over dataset names
            blockDict = {}
            for dataset in datasets:

                blockFileInfo = dbsReader.getFileListByDataset(dataset=dataset, detail=True)

                for fileInfo in blockFileInfo:
                    blockDict.setdefault(fileInfo['block_name'], {'FileList': [],
                                                                  'NumberOfEvents': 0,
                                                                  'PhEDExNodeNames': []})
                    blockDict[fileInfo['block_name']]['FileList'].append(
                        {'logical_file_name': fileInfo['logical_file_name']})
                    blockDict[fileInfo['block_name']]['NumberOfEvents'] += fileInfo['event_count']

                blockReplicasInfo = phedex.getReplicaPhEDExNodesForBlocks(dataset=dataset, complete='y')
                for block in blockReplicasInfo:
                    nodes = set(blockReplicasInfo[block]) - node_filter | set(fakePNNs)
                    blockDict[block]['PhEDExNodeNames'] = list(nodes)
                    blockDict[block]['FileList'] = sorted(blockDict[block]['FileList'])

            resultDict[pileupType] = blockDict
        return resultDict
Example #16
0
 def keepOnlyDisks(self, locationsMap):
     phedex = PhEDEx() #TODO use certs from the config!
     #get all the PNN that are of kind disk
     try:
         diskLocations = set([pnn['name'] for pnn in phedex.getNodeMap()['phedex']['node'] if pnn['kind']=='Disk'])
     except Exception, ex: #TODO should we catch HttpException instead?
         self.logger.exception(ex)
         raise TaskWorkerException("The CRAB3 server backend could not contact phedex to get the list of site storages.\n"+\
                             "This is could be a temporary phedex glitch, please try to submit a new task (resubmit will not work)"+\
                             " and contact the experts if the error persists.\nError reason: %s" % str(ex)) #TODO addo the nodes phedex so the user can check themselves
def getCMSSiteInfo(pattern):
    """
    _getCMSSiteInfo_

    Query SiteDB for the site and SE names matching the pattern.  Return a
    dictionary keyed by site name.
    """
    phedex = PhEDEx( responseType = "json")
    print phedex.subscriptions(dataset = '/HidjetQuenchedMinBias/HiWinter13-PtHat80_STARTHI44_V12-v1/GEN-SIM-RECODEBUG')
    print phedex.subscriptions(dataset = '/MinimumBias/Run2012D-v1/RAW')
Example #18
0
 def __init__(self):
     """
     Prepare module setup
     """
     super(PileupFetcher, self).__init__()
     if usingRucio():
         # FIXME: find a way to pass the Rucio account name to this fetcher module
         self.rucioAcct = "wmcore_transferor"
         self.rucio = Rucio(self.rucioAcct)
     else:
         self.phedex = PhEDEx()  # this will go away eventually
Example #19
0
def remoteLFNPrefix(site, lfn=''):
    """
    Convert a site name to the relevant remote LFN prefix
    """
    from WMCore.Services.PhEDEx.PhEDEx import PhEDEx
    phedexJSON = PhEDEx(responseType='json')

    seName = phedexJSON.getNodeSE(site)
    uri = phedexJSON.getPFN(nodes=[site], lfns=[lfn])[(site,lfn)]

    return uri.replace(lfn, ''), seName # Don't want the actual LFN, just prefix
Example #20
0
 def __init__(self):
     """
     Prepare module setup
     """
     super(PileupFetcher, self).__init__()
     if usingRucio():
         # Too much work to pass the rucio account name all the way to here
         # just use the production rucio account for resolving pileup location
         self.rucio = Rucio("wma_prod",
                            configDict={'phedexCompatible': False})
     else:
         self.phedex = PhEDEx()  # this will go away eventually
Example #21
0
    def __init__(self, url, **contact):

        # instantiate dbs api object
        try:
            self.dbs = DbsApi(url, **contact)
        except dbsClientException as ex:
            msg = "Error in DBSReader with DbsApi\n"
            msg += "%s\n" % formatEx3(ex)
            raise DBSReaderError(msg)

        # connection to PhEDEx (Use default endpoint url)
        self.phedex = PhEDEx(responseType="json")
Example #22
0
def main():
    """
    _main_
    """
    # Start services
    if 'WMAGENT_CONFIG' not in os.environ:
        os.environ[
            'WMAGENT_CONFIG'] = '/data/srv/wmagent/current/config/wmagent/config.py'
    connectToDB()
    myPhEDEx = PhEDEx()
    myThread = threading.currentThread()
    print "Please remember to shutdown the PhEDExInjector first, you have 10 seconds before the script starts."
    time.sleep(10)

    # Get the files that the PhEDExInjector would look for
    formatter = DBFormatter(logging, myThread.dbi)
    formatter.sql = query
    results = formatter.execute()
    sortedBlocks = defaultdict(set)
    for lfn, block in results:
        sortedBlocks[block].add(lfn)

    # Check with block-level calls
    foundFiles = set()
    for block in sortedBlocks:
        result = myPhEDEx._getResult('data', args={'block': block}, verb='GET')
        for dbs in result['phedex']['dbs']:
            for dataset in dbs['dataset']:
                blockChunk = dataset['block']
                for blockInfo in blockChunk:
                    for fileInfo in blockInfo['file']:
                        if fileInfo['lfn'] in sortedBlocks[block]:
                            foundFiles.add(fileInfo['lfn'])
    if not foundFiles:
        print "I didn't find an abnormal file, feel free to panic!. Please contact a developer."
        return 0
    print "Found %d files that are already registered in PhEDEx but the buffer doesn't know" % len(
        foundFiles)
    print "Fixing them now..."
    # Fix it!
    binds = []
    for lfn in foundFiles:
        binds.append({'lfn': lfn})
    formatter.dbi.processData(modification,
                              binds,
                              conn=None,
                              transaction=False,
                              returnCursor=False)
    print "Fixed them! :)"
    print "You can restart the PhEDExInjector now, have a nice day!"
    return 0
Example #23
0
    def testXMLJSON(self):
        """
        Test XML and JSON in the same scope
        """
        site = 'T1_US_FNAL_Buffer'
        httpDict = {
            'endpoint': "https://cmsweb.cern.ch/phedex/datasvc/json/test"
        }
        phedexJSON = PhEDEx(responseType='json', httpDict=httpDict)
        httpDict = {
            'endpoint': "https://cmsweb.cern.ch/phedex/datasvc/xml/test"
        }
        phedexXML = PhEDEx(responseType='xml', httpDict=httpDict)

        phedexXML.getNodeTFC(site)
        tfc_file = phedexXML.cacheFileName('tfc', inputdata={'node': site})
        tfc_map = {}
        tfc_map[site] = readTFC(tfc_file)
        pfn = tfc_map[site].matchLFN('srmv2',
                                     '/store/user/jblow/dir/test.root')

        self.assertTrue(
            pfn ==
            'srm://cmssrm.fnal.gov:8443/srm/managerv2?SFN=/11/store/user/jblow/dir/test.root'
        )

        self.assertTrue(
            phedexJSON.getNodeSE('T1_US_FNAL_Buffer') == 'cmssrm.fnal.gov')
Example #24
0
    def _queryAndCompareWithDBS(self, pileupDict, defaultArguments, dbsUrl):
        """
        pileupDict is a Python dictionary containing particular pileup
        configuration information. Query DBS on given dataset contained
        now in both input defaultArguments as well as in the pileupDict
        and compare values.

        """
        reader = DBS3Reader(dbsUrl)
        phedex = PhEDEx()

        inputArgs = defaultArguments["PileupConfig"]

        self.assertEqual(len(inputArgs), len(pileupDict),
                         "Number of pileup types different.")
        for pileupType in inputArgs:
            m = ("pileup type '%s' not in PileupFetcher-produced pileup "
                 "configuration: '%s'" % (pileupType, pileupDict))
            self.assertTrue(pileupType in pileupDict, m)

        # now query DBS for compare actual results on files lists for each
        # pileup type and dataset and location (storage element names)
        # pileupDict is saved in the file and now comparing items of this
        # configuration with actual DBS results, the structure of pileupDict:
        #    {"pileupTypeA": {"BlockA": {"FileList": [], "PhEDExNodeNames": []},
        #                     "BlockB": {"FileList": [], "PhEDExNodeNames": []}, ....}
        for pileupType, datasets in inputArgs.items():
            # this is from the pileup configuration produced by PileupFetcher
            blockDict = pileupDict[pileupType]

            for dataset in datasets:
                dbsFileBlocks = reader.listFileBlocks(dataset=dataset)
                blocksLocation = phedex.getReplicaPhEDExNodesForBlocks(dataset=dataset, complete='y')
                for dbsFileBlockName in dbsFileBlocks:
                    fileList = []
                    pnns = set()
                    for pnn in blocksLocation[dbsFileBlockName]:
                        pnns.add(pnn)
                    # now get list of files in the block
                    dbsFiles = reader.listFilesInBlock(dbsFileBlockName)
                    for dbsFile in dbsFiles:
                        fileList.append(dbsFile["LogicalFileName"])
                    # now compare the sets:
                    m = ("PNNs don't agree for pileup type '%s', "
                         "dataset '%s' in configuration: '%s'" % (pileupType, dataset, pileupDict))
                    self.assertEqual(set(blockDict[dbsFileBlockName]["PhEDExNodeNames"]), pnns, m)
                    m = ("FileList don't agree for pileup type '%s', dataset '%s' "
                         " in configuration: '%s'" % (pileupType, dataset, pileupDict))
                    storedFileList = [item['logical_file_name'] for item in blockDict[dbsFileBlockName]["FileList"]]
                    self.assertItemsEqual(storedFileList, fileList, m)
def main():
    """
    _main_
    """
    # Start services
    if 'WMAGENT_CONFIG' not in os.environ:
        os.environ['WMAGENT_CONFIG'] = '/data/srv/wmagent/current/config/wmagent/config.py'
    connectToDB()
    myPhEDEx = PhEDEx()
    myThread = threading.currentThread()
    print "Please remember to shutdown the PhEDExInjector first, you have 10 seconds before the script starts."
    time.sleep(10)

    # Get the files that the PhEDExInjector would look for
    formatter = DBFormatter(logging, myThread.dbi)
    formatter.sql = query
    results = formatter.execute()
    sortedBlocks = defaultdict(set)
    for lfn, block in results:
        sortedBlocks[block].add(lfn)

    # Check with block-level calls
    foundFiles = set()
    for block in sortedBlocks:
        result = myPhEDEx._getResult('data', args = {'block' : block}, verb = 'GET')
        for dbs in result['phedex']['dbs']:
            for dataset in dbs['dataset']:
                blockChunk = dataset['block']
                for blockInfo in blockChunk:
                    for fileInfo in blockInfo['file']:
                        if fileInfo['lfn'] in sortedBlocks[block]:
                            foundFiles.add(fileInfo['lfn'])
    if not foundFiles:
        print "I didn't find an abnormal file, feel free to panic!. Please contact a developer."
        return 0
    print "Found %d files that are already registered in PhEDEx but the buffer doesn't know" % len(foundFiles)
    print "Fixing them now..."
    # Fix it!
    binds = []
    for lfn in foundFiles:
        binds.append({'lfn' :lfn})
    formatter.dbi.processData(modification, binds,
                                        conn = None,
                                        transaction = False,
                                        returnCursor = False)
    print "Fixed them! :)"
    print "You can restart the PhEDExInjector now, have a nice day!"
    return 0
    def __init__(self, config):
        """
        ___init___

        Initialise class members
        """
        BaseWorkerThread.__init__(self)
        self.phedex = PhEDEx({"endpoint": config.PhEDExInjector.phedexurl}, "json")
        self.siteDB = SiteDBJSON()
        self.dbsUrl = config.DBSInterface.globalDBSUrl
        self.group = getattr(config.PhEDExInjector, "group", "DataOps")
        self.safeMode = getattr(config.PhEDExInjector, "safeOperationMode", False)

        # Subscribed state in the DBSBuffer table for datasets
        self.terminalSubscriptionState = 1
        if self.safeMode:
            self.terminalSubscriptionState = 2

        # We will map node names to CMS names, that what the spec will have.
        # If a CMS name is associated to many PhEDEx node then choose the MSS option
        self.cmsToPhedexMap = {}

        # initialize the alert framework (if available - config.Alert present)
        #    self.sendAlert will be then be available    
        self.initAlerts(compName = "PhEDExInjector")
Example #27
0
    def __init__(self, config):
        """
        Initialise class members
        """
        #Need a better way to test this without turning off this next line
        BaseWorkerThread.__init__(self)
        #logging.basicConfig(format = '%(asctime)s %(name)-12s %(levelname)-8s %(message)s',datefmt = '%m-%d %H:%M')
        #self.logger = logging.getLogger()
        # self.logger is set up by the BaseWorkerThread, we just set it's level

        self.config = config.AsyncTransfer
        try:
            self.logger.setLevel(self.config.log_level)
        except:
            import logging
            self.logger = logging.getLogger()
            self.logger.setLevel(self.config.log_level)

        self.logger.debug('Configuration loaded')

        server = CouchServer(self.config.couch_instance)
        self.db = server.connectDatabase(self.config.files_database)
        self.logger.debug('Connected to CouchDB')
        self.pool = Pool(processes=self.config.pool_size)

        self.phedex = PhEDEx(responseType='xml')
Example #28
0
 def __init__(self, logger, cmdargs=None):
     SubCommand.__init__(self, logger, cmdargs)
     self.phedex = PhEDEx(
         {"cert": self.proxyfilename, "key": self.proxyfilename, "logger": self.logger, "pycurl": True}
     )
     self.lfnsaddprefix = None
     self.filename = None
    def __init__(self, config):
        """
        ___init___

        Initialise class members
        """
        BaseWorkerThread.__init__(self)
        self.config = config
        self.phedex = PhEDEx({"endpoint": config.PhEDExInjector.phedexurl}, "json")
        self.dbsUrl = config.DBSInterface.globalDBSUrl
        self.group = getattr(config.PhEDExInjector, "group", "DataOps")

        # This will be used to map SE names which are stored in the DBSBuffer to
        # PhEDEx node names.  The first key will be the "kind" which consists
        # of one of the following: MSS, Disk, Buffer.  The next key will be the
        # SE name.
        self.seMap = {}
        self.nodeNames = []

        self.diskSites = getattr(config.PhEDExInjector, "diskSites", ["storm-fe-cms.cr.cnaf.infn.it",
                                                                      "srm-cms-disk.gridpp.rl.ac.uk"])

        # initialize the alert framework (if available - config.Alert present)
        #    self.sendAlert will be then be available
        self.initAlerts(compName = "PhEDExInjector")

        self.filesToRecover = None
Example #30
0
def getPFN(proxy, lfnsaddprefix, filename, sitename, logger):
    from WMCore.Services.PhEDEx.PhEDEx import PhEDEx

    phedex = PhEDEx({"cert": proxy, "key": proxy, "logger": logger})
    lfnsadd = os.path.join(lfnsaddprefix, filename)
    try:
        pfndict = phedex.getPFN(nodes = [sitename], lfns = [lfnsadd])
        pfn = pfndict[(sitename, lfnsadd)]
        if not pfn:
            logger.info('Error: Failed to get PFN from the site. Please check the site status')
            return False
    except HTTPException as errormsg:
        logger.info('Error: Failed to contact PhEDEx or wrong PhEDEx node name is used')
        logger.info('Result: %s\nStatus :%s\nURL :%s' % (errormsg.result, errormsg.status, errormsg.url))
        raise HTTPException(errormsg)
    return pfn
    def setUp(self):
        """
        _setUp_

        Install the DBSBuffer schema into the database and connect to PhEDEx.
        """
        self.phedexURL = "https://cmsweb.cern.ch/phedex/datasvc/json/test"
        self.dbsURL = "http://vocms09.cern.ch:8880/cms_dbs_int_local_yy_writer/servlet/DBSServlet"

        self.testInit = TestInit(__file__)
        self.testInit.setLogging()
        self.testInit.setDatabaseConnection(destroyAllDatabase = True)

        self.testInit.setSchema(customModules = ["WMComponent.DBS3Buffer"],
                                useDefault = False)

        myThread = threading.currentThread()
        daofactory = DAOFactory(package = "WMComponent.DBSBuffer.Database",
                                logger = myThread.logger,
                                dbinterface = myThread.dbi)

        locationAction = daofactory(classname = "DBSBufferFiles.AddLocation")
        locationAction.execute(siteName = "srm-cms.cern.ch")
        locationAction.execute(siteName = "se.fnal.gov")

        self.testFilesA = []
        self.testFilesB = []
        self.testDatasetA = "/%s/PromptReco-v1/RECO" % makeUUID()
        self.testDatasetB = "/%s/CRUZET11-v1/RAW" % makeUUID()
        self.phedex = PhEDEx({"endpoint": self.phedexURL}, "json")

        return
Example #32
0
    def __init__(self, config):
        """
        Initialise class members
        """
        #Need a better way to test this without turning off this next line
        BaseDaemon.__init__(self, config, 'AsyncTransfer')

        self.dropbox_dir = '%s/dropbox/outputs' % self.config.componentDir
        if not os.path.isdir(self.dropbox_dir):
            try:
                os.makedirs(self.dropbox_dir)
            except OSError as e:
                if e.errno == errno.EEXIST:
                    pass
                else:
                    self.logger.error('Unknown error in mkdir' % e.errno)
                    raise
        server = CouchServer(dburl=self.config.couch_instance, ckey=self.config.opsProxy, cert=self.config.opsProxy)
        self.db = server.connectDatabase(self.config.files_database)
        config_server = CouchServer(dburl=self.config.config_couch_instance)
        self.config_db = config_server.connectDatabase(self.config.config_database)
        self.logger.debug('Connected to CouchDB')
        self.pool = Pool(processes=self.config.pool_size)
        try:
            self.phedex = PhEDEx(responseType='xml', dict = {'key': self.config.opsProxy, 'cert': self.config.opsProxy})
        except Exception as e:
            self.logger.exception('PhEDEx exception: %s' % e)
        # Set up a factory for loading plugins
        self.factory = WMFactory(self.config.schedAlgoDir, namespace = self.config.schedAlgoDir)

        result_list = []
        current_running = []
Example #33
0
    def insertPfns(self, fileInfoList):
        """
        Query phedex to retrieve the pfn for each file and store it in the passed fileInfoList.
        """
        phedex = PhEDEx({'cert': self.proxyfilename, 'key': self.proxyfilename, 'logger': self.logger, 'pycurl': True})

        # Pick out the correct lfns and sites
        if len(fileInfoList) > 0:
            for fileInfo in fileInfoList:
                if str(fileInfo['jobid']) in self.transferringIds:
                    lfn = fileInfo['tmplfn']
                    site = fileInfo['tmpsite']
                else:
                    lfn = fileInfo['lfn']
                    site = fileInfo['site']
                pfn = phedex.getPFN(site, lfn)[(site, lfn)]
                fileInfo['pfn'] = pfn
def phedexIt():
    x = PhEDEx(responseType = "json")
    phedexNodes = x.getNodeMap()['phedex']['node']
    phedexMap = {}
    sePhedexMap = {}
    knownPhedexNodes = set()
    for node in phedexNodes:
        phedexMap[node['name']] = node['kind']
        #print '%s -> %s, %s' % (node['name'], node['kind'], node['se'])
        if node['se'] not in sePhedexMap:
            sePhedexMap[node['se']] = set()
        sePhedexMap[node['se']].add(node['name'])
        knownPhedexNodes.add(node['name'])
    y = SiteDBJSON()
    seNames = y.getAllSENames()
    cmsNamesMap = {}
    for se in seNames:
        cmsNames = y.seToCMSName(se)
        cmsNamesMap[se] = cmsNames
    seToNodeMap = {}
    for se in cmsNamesMap:
        candidates = set()
        for cmsName in cmsNamesMap[se]:
            phedexNodes = y.cmsNametoPhEDExNode(cmsName)
            candidates.update(set(phedexNodes))
        validCandidates = set()
        for candidate in candidates:
            if candidate in knownPhedexNodes:
                validCandidates.add(candidate)
        seToNodeMap[se] = validCandidates
        #print '%s to %s' % (se, candidates)
    for se in sePhedexMap:
        if se not in seToNodeMap:
            print "SE: %s is not in new mapping for sites %s" % (se, list(sePhedexMap[se]))
    for se in seToNodeMap:
        if se not in sePhedexMap:
            print "SE: %s is not in old mapping for sites %s" % (se, list(seToNodeMap[se]))
            continue
    for se in set(seToNodeMap.keys()).intersection(set(sePhedexMap.keys())):
        diff = sePhedexMap[se] - seToNodeMap[se]
        if diff:
            print "%s are in old mapping but not in new for %s" %(str(list(diff)), se)
        diff = seToNodeMap[se] - sePhedexMap[se]
        if diff:
            print "%s are in new mapping but not in old for %s" %(str(list(diff)), se)
Example #35
0
 def keepOnlyDisks(self, locationsMap):
     self.otherLocations = set()
     phedex = PhEDEx() #TODO use certs from the config!
     #get all the PNN that are of kind disk
     try:
         diskLocations = set([pnn['name'] for pnn in phedex.getNodeMap()['phedex']['node'] if pnn['kind']=='Disk'])
     except HTTPException as ex:
         self.logger.error(ex.headers)
         raise TaskWorkerException("The CRAB3 server backend could not contact phedex to get the list of site storages.\n"+\
                             "This is could be a temporary phedex glitch, please try to submit a new task (resubmit will not work)"+\
                             " and contact the experts if the error persists.\nError reason: %s" % str(ex)) #TODO addo the nodes phedex so the user can check themselves
     for block, locations in locationsMap.iteritems():
         locationsMap[block] = set(locations) & diskLocations
         self.otherLocations = self.otherLocations.union(set(locations) - diskLocations)
     #remove any key with value that has set([])
     for key, value in locationsMap.items(): #wont work in python3!
         if value == set([]):
             locationsMap.pop(key)
    def testDataServiceXML(self):
        # asks for PEM pass phrase ...
        raise nose.SkipTest
        phedex = PhEDEx(responseType='xml')

        site = 'T2_UK_SGrid_Bristol'
        lfn = '/store/users/metson/file'
        protocol = 'srmv2'
        phedex.getNodeTFC(site)

        tfc_file = phedex.cacheFileName('tfc', inputdata={'node': site})
        tfc = readTFC(tfc_file)

        pfn_dict = phedex.getPFN(site, lfn, protocol)
        phedex_pfn = pfn_dict[(site, lfn)]
        pfn = tfc.matchLFN(protocol, lfn)
        msg = 'TFC pfn (%s) did not match PhEDEx pfn (%s)' % (pfn, phedex_pfn)
        self.assertEqual(phedex_pfn, pfn, msg)
    def __init__(self, config):
        """
        ___init___

        Initialise class members
        """
        BaseWorkerThread.__init__(self)
        self.dbsUrl = config.DBSInterface.globalDBSUrl

        self.pollCounter = 0
        self.subFrequency = None
        if getattr(config.PhEDExInjector, "subscribeDatasets", False):
            pollInterval = config.PhEDExInjector.pollInterval
            subInterval = config.PhEDExInjector.subscribeInterval
            self.subFrequency = max(1, int(round(subInterval/pollInterval)))
            logging.info("SubscribeDataset and deleteBlocks will run every %d polling cycles", self.subFrequency)
            # subscribe on first cycle
            self.pollCounter = self.subFrequency - 1

        # retrieving the node mappings is fickle and can fail quite often
        self.phedex = PhEDEx({"endpoint": config.PhEDExInjector.phedexurl}, "json")
        try:
            nodeMappings = self.phedex.getNodeMap()
        except:
            time.sleep(2)
            try:
                nodeMappings = self.phedex.getNodeMap()
            except:
                time.sleep(4)
                nodeMappings = self.phedex.getNodeMap()

        # This will be used to map SE names which are stored in the DBSBuffer to
        # PhEDEx node names.  The first key will be the "kind" which consists
        # of one of the following: MSS, Disk, Buffer.  The next key will be the
        # SE name.
        self.seMap = {}
        self.nodeNames = []
        for node in nodeMappings["phedex"]["node"]:
            if node["kind"] not in self.seMap:
                self.seMap[node["kind"]] = {}
            logging.info("Adding mapping %s -> %s", node["se"], node["name"])
            self.seMap[node["kind"]][node["se"]] = node["name"]
            self.nodeNames.append(node["name"])

        self.phedexNodes = {'MSS': [], 'Disk': []}
        for node in nodeMappings["phedex"]["node"]:
            if node["kind"] in ["MSS", "Disk"]:
                self.phedexNodes[node["kind"]].append(node["name"])

        # initialize the alert framework (if available - config.Alert present)
        #    self.sendAlert will be then be available
        self.initAlerts(compName = "PhEDExInjector")

        self.blocksToRecover = []

        return
Example #38
0
    def __init__(self,config):
        self.br=Browser()

        self.config = config
        
        # Initialise connections
        self.phedex = PhEDEx({"endpoint":"https://cmsweb.cern.ch/phedex/datasvc/json/prod/"}, "json")
        self.dbsPhys01 = DbsApi(url = dbs_base_url+"phys01/DBSReader/")
        self.dbsPhys02 = DbsApi(url = dbs_base_url+"phys02/DBSReader/")
        self.dbsPhys03 = DbsApi(url = dbs_base_url+"phys03/DBSReader/")
Example #39
0
    def setUp(self):
        """
        _setUp_

        Initialize the PhEDEx API to point at the test server.
        """
        self.dbsTestUrl = "https://cmsweb.cern.ch/dbs/prod/global/DBSReader"
        self.phedexApi = PhEDEx()

        return
Example #40
0
 def setUp(self):
     """
     _setUp_
     
     Initialize the PhEDEx API to point at the test server.
     """
     phedexTestDS = "https://cmsweb.cern.ch/phedex/datasvc/json/test"
     self.dbsTestUrl = "http://vocms09.cern.ch:8880/cms_dbs_int_local_yy_writer/servlet/DBSServlet"
     self.phedexApi = PhEDEx({"endpoint": phedexTestDS,
                              "method": "POST"})
     return
Example #41
0
    def __init__(self, url, **contact):

        # instantiate dbs api object
        try:
            self.dbs = DbsApi(url, **contact)
        except dbsClientException as ex:
            msg = "Error in DBSReader with DbsApi\n"
            msg += "%s\n" % formatEx3(ex)
            raise DBSReaderError(msg)

        # connection to PhEDEx (Use default endpoint url)
        self.phedex = PhEDEx(responseType = "json")
Example #42
0
    def testSlcPhedexNodesEqualPhedexApiNodes(self):
        """
        For each site, verify that the stageout node specified in
        site-local-config.xml is the same as the one returned by the PhEDEx api.
        """
        os.environ["CMS_PATH"] = "/cvmfs/cms.cern.ch"

        phedex = PhEDEx()
        nodes = [node[u'name'] for node in phedex.getNodeMap()["phedex"]["node"]]
        
        for d in os.listdir("/cvmfs/cms.cern.ch/SITECONF/"):
            # Only T0_, T1_... folders are needed
            if d[0] == "T":
                os.environ['WMAGENT_SITE_CONFIG_OVERRIDE'] ='/cvmfs/cms.cern.ch/SITECONF/%s/JobConfig/site-local-config.xml' % (d)
                try:
                    slc = loadSiteLocalConfig()
                except SiteConfigError as e:
                    print(e.args[0])
                phedexNode = slc.localStageOut.get("phedex-node")
                self.assertTrue(phedexNode in nodes,
                                "Error: Node specified in SLC (%s) not in list returned by PhEDEx api" % phedexNode)
        return 
Example #43
0
    def __call__(self):

        self.filename = 'crab3checkwrite.tmp'

        self.username = self.proxy.getHyperNewsName()
        phedex = PhEDEx({"cert": self.proxyfilename, "key": self.proxyfilename})

        if hasattr(self.options, 'userlfn') and self.options.userlfn != None:
            lfnsadd = self.options.userlfn + '/' + self.filename
        else:
            lfnsadd = '/store/user/' + self.username + '/' + self.filename

        try:
            pfndict = phedex.getPFN(nodes = [self.options.sitename], lfns = [lfnsadd])
            pfn = pfndict[(self.options.sitename, lfnsadd)]
            if not pfn:
                self.logger.info('%sError%s: Failed to get PFN from the site. Please check the site status' % (colors.RED, colors.NORMAL))
                raise ConfigurationException
        except HTTPException, errormsg :
            self.logger.info('%sError:%s Failed to contact PhEDEx or wrong PhEDEx node name is used' % (colors.RED, colors.NORMAL))
            self.logger.info('Result: %s\nStatus :%s\nURL :%s' % (errormsg.result, errormsg.status, errormsg.url))
            raise HTTPException, errormsg