Ejemplo n.º 1
0
 def validateCommon(self):
     """Common validation stuff"""
     try:
         Lexicon.requestName(self.wmspec.name())
     except Exception, ex: # can throw many errors e.g. AttributeError, AssertionError etc.
         error = WorkQueueWMSpecError(self.wmspec, "Workflow name validation error: %s" % str(ex))
         raise error
Ejemplo n.º 2
0
    def queueWork(self, wmspecUrl, request = None, team = None):
        """
        Take and queue work from a WMSpec.

        If request name is provided but doesn't match WMSpec name
        an error is raised.

        If team is provided work will only be available to queue's
        belonging to that team.

        Duplicate specs will be ignored.
        """
        self.logger.info('queueWork() begin queueing "%s"' % wmspecUrl)
        wmspec = WMWorkloadHelper()
        wmspec.load(wmspecUrl)

        # check we haven't already got this work
        try:
            self.backend.getInboxElements(elementIDs = [wmspec.name()])
        except CouchNotFoundError:
            pass
        else:
            self.logger.warning('queueWork(): Ignoring duplicate spec "%s"' % wmspec.name())
            return 1

        if request:
            try:
                Lexicon.requestName(request)
            except Exception, ex: # can throw many errors e.g. AttributeError, AssertionError etc.
                error = WorkQueueWMSpecError(wmspec, "Request name validation error: %s" % str(ex))
                raise error
            if request != wmspec.name():
                raise WorkQueueWMSpecError(wmspec, 'Request & workflow name mismatch %s vs %s' % (request, wmspec.name()))
Ejemplo n.º 3
0
    def queueWork(self, wmspecUrl, request=None, team=None):
        """
        Take and queue work from a WMSpec.

        If request name is provided but doesn't match WMSpec name
        an error is raised.

        If team is provided work will only be available to queue's
        belonging to that team.

        Duplicate specs will be ignored.
        """
        self.logger.info('queueWork() begin queueing "%s"' % wmspecUrl)
        wmspec = WMWorkloadHelper()
        wmspec.load(wmspecUrl)

        if request:  # validate request name
            try:
                Lexicon.requestName(request)
            except Exception, ex:  # can throw many errors e.g. AttributeError, AssertionError etc.
                error = WorkQueueWMSpecError(
                    wmspec, "Request name validation error: %s" % str(ex))
                raise error
            if request != wmspec.name():
                raise WorkQueueWMSpecError(
                    wmspec, 'Request & workflow name mismatch %s vs %s' %
                    (request, wmspec.name()))
Ejemplo n.º 4
0
    def setDataset(self, datasetName, primaryType,
                   datasetType, physicsGroup = None, overwrite = False, valid = 1):
        """
        _setDataset_

        Set all the information concerning a single dataset, including
        the primary, processed and tier info
        """
        if self.hasDataset() and not overwrite:
            # Do nothing, we already have a dataset
            return

        Lexicon.primaryDatasetType(primaryType)
        
        if not datasetType in ['VALID', 'PRODUCTION', 'INVALID', 'DEPRECATED', 'DELETED']:
            msg = "Invalid processedDatasetType %s\n" % datasetType
            logging.error(msg)
            raise DBSBlockException(msg)

        try:
            if datasetName[0] == '/':
                junk, primary, processed, tier = datasetName.split('/')
            else:
                primary, processed, tier = datasetName.split('/')
        except Exception, ex:
            msg = "Invalid dataset name %s" % datasetName
            logging.error(msg)
            raise DBSBlockException(msg)
Ejemplo n.º 5
0
 def validateCommon(self):
     """Common validation stuff"""
     try:
         Lexicon.requestName(self.wmspec.name())
     except Exception, ex:  # can throw many errors e.g. AttributeError, AssertionError etc.
         error = WorkQueueWMSpecError(
             self.wmspec, "Workflow name validation error: %s" % str(ex))
         raise error
Ejemplo n.º 6
0
    def queueNewRequests(self, queue):
        """Get requests from regMgr and queue to workqueue"""
        self.logger.info("Contacting Request manager for more work")
        work = 0
        workLoads = []

        if queue.params['DrainMode']:
            self.logger.info('Draining queue: Skip requesting work from ReqMgr')
            return 0

        try:
            workLoads = self.getAvailableRequests()
        except Exception as ex:
            traceMsg = traceback.format_exc()
            msg = "Error contacting RequestManager: %s" % traceMsg
            self.logger.warning(msg)
            return 0

        for team, reqName, workLoadUrl in workLoads:
            try:
                try:
                    Lexicon.couchurl(workLoadUrl)
                except Exception as ex:  # can throw many errors e.g. AttributeError, AssertionError etc.
                    # check its not a local file
                    if not os.path.exists(workLoadUrl):
                        error = WorkQueueWMSpecError(None, "Workflow url validation error: %s" % str(ex))
                        raise error

                self.logger.info("Processing request %s at %s" % (reqName, workLoadUrl))
                units = queue.queueWork(workLoadUrl, request=reqName, team=team)
                self.logdb.delete(reqName, "error", this_thread=True)
            except TERMINAL_EXCEPTIONS as ex:
                # fatal error - report back to ReqMgr
                self.logger.error('Permanent failure processing request "%s": %s' % (reqName, str(ex)))
                self.logger.info("Marking request %s as failed in ReqMgr" % reqName)
                self.reportRequestStatus(reqName, 'Failed', message=str(ex))
                continue
            except (IOError, socket.error, CouchError, CouchConnectionError) as ex:
                # temporary problem - try again later
                msg = 'Error processing request "%s": will try again later.' % reqName
                msg += '\nError: "%s"' % str(ex)
                self.logger.info(msg)
                self.logdb.post(reqName, msg, 'error')
                continue
            except Exception as ex:
                # Log exception as it isnt a communication problem
                msg = 'Error processing request "%s": will try again later.' % reqName
                msg += '\nSee log for details.\nError: "%s"' % str(ex)
                self.logger.exception('Unknown error processing %s' % reqName)
                self.logdb.post(reqName, msg, 'error')
                continue

            self.logger.info('%s units(s) queued for "%s"' % (units, reqName))
            work += units

            self.logger.info("%s element(s) obtained from RequestManager" % work)
        return work
Ejemplo n.º 7
0
    def validBlocks(self, task, dbs):
        """Return blocks that pass the input data restriction"""
        datasetPath = task.getInputDatasetPath()
        Lexicon.dataset(datasetPath) # check dataset name
        validBlocks = []
        locations = None

        blockWhiteList = task.inputBlockWhitelist()
        blockBlackList = task.inputBlockBlacklist()
        runWhiteList = task.inputRunWhitelist()
        runBlackList = task.inputRunBlacklist()

        for blockName in dbs.listFileBlocks(datasetPath):
            block = dbs.getDBSSummaryInfo(datasetPath, block = blockName)

            # check block restrictions
            if blockWhiteList and block['block'] not in blockWhiteList:
                continue
            if block['block'] in blockBlackList:
                continue

            # check run restrictions
            if runWhiteList or runBlackList:
                # listRuns returns a run number per lumi section
                full_lumi_list = dbs.listRuns(block = block['block'])
                runs = set(full_lumi_list)
                
                # apply blacklist
                runs = runs.difference(runBlackList)
                # if whitelist only accept listed runs
                if runWhiteList:
                    runs = runs.intersection(runWhiteList)
                # any runs left are ones we will run on, if none ignore block
                if not runs:
                    continue

                # recalculate effective size of block
                # make a guess for new event/file numbers from ratio
                # of accepted lumi sections (otherwise have to pull file info)
                accepted_lumis = [x for x in full_lumi_list if x in runs]
                ratio_accepted = 1. * len(accepted_lumis) / len(full_lumi_list)
                block[self.lumiType] = len(accepted_lumis)
                block['NumberOfFiles'] = float(block['NumberOfFiles']) * ratio_accepted
                block['NumberOfEvents'] = float(block['NumberOfEvents']) * ratio_accepted

            validBlocks.append(block)
            if locations is None:
                locations = set(sitesFromStorageEelements(dbs.listFileBlockLocation(block['block'])))
            else:
                locations = locations.intersection(set(sitesFromStorageEelements(dbs.listFileBlockLocation(block['block']))))

        # all needed blocks present at these sites
        if locations:
            self.data[datasetPath] = list(locations)
        return validBlocks
Ejemplo n.º 8
0
    def setDataset(self,
                   datasetName,
                   primaryType,
                   datasetType,
                   physicsGroup=None,
                   prep_id=None,
                   overwrite=False):
        """
        _setDataset_

        Set all the information concerning a single dataset, including
        the primary, processed and tier info
        """
        if self.getDataset() != None and not overwrite:
            # Do nothing, we already have a dataset
            return

        Lexicon.primaryDatasetType(primaryType)

        if not datasetType in [
                'VALID', 'PRODUCTION', 'INVALID', 'DEPRECATED', 'DELETED'
        ]:
            msg = "Invalid processedDatasetType %s\n" % datasetType
            logging.error(msg)
            raise DBSBufferBlockException(msg)

        try:
            if datasetName[0] == '/':
                _, primary, processed, tier = datasetName.split('/')
            else:
                primary, processed, tier = datasetName.split('/')
        except Exception:
            msg = "Invalid dataset name %s" % datasetName
            logging.error(msg)
            raise DBSBufferBlockException(msg)

        # Do the primary dataset
        self.data['primds']['primary_ds_name'] = primary
        self.data['primds']['primary_ds_type'] = primaryType
        self.data['primds']['create_by'] = "WMAgent"
        self.data['primds']['creation_date'] = int(time.time())

        # Do the processed
        self.data['dataset']['physics_group_name'] = physicsGroup
        self.data['dataset']['processed_ds_name'] = processed
        self.data['dataset']['data_tier_name'] = tier
        self.data['dataset']['dataset_access_type'] = datasetType
        self.data['dataset']['dataset'] = datasetName
        self.data['dataset']['prep_id'] = prep_id
        # Add misc meta data.
        self.data['dataset']['create_by'] = "WMAgent"
        self.data['dataset']['last_modified_by'] = "WMAgent"
        self.data['dataset']['creation_date'] = int(time.time())
        self.data['dataset']['last_modification_date'] = int(time.time())
        return
Ejemplo n.º 9
0
    def setDataset(self, datasetName, primaryType,
                   datasetType, physicsGroup = None,
                   prep_id  = None, overwrite = False):
        """
        _setDataset_

        Set all the information concerning a single dataset, including
        the primary, processed and tier info
        """
        if self.getDataset() != None and not overwrite:
            # Do nothing, we already have a dataset
            return

        Lexicon.primaryDatasetType(primaryType)

        if not datasetType in ['VALID', 'PRODUCTION', 'INVALID', 'DEPRECATED', 'DELETED']:
            msg = "Invalid processedDatasetType %s\n" % datasetType
            logging.error(msg)
            raise DBSBufferBlockException(msg)

        try:
            if datasetName[0] == '/':
                junk, primary, processed, tier = datasetName.split('/')
            else:
                primary, processed, tier = datasetName.split('/')
        except Exception:
            msg = "Invalid dataset name %s" % datasetName
            logging.error(msg)
            raise DBSBufferBlockException(msg)

        # Do the primary dataset
        self.data['primds']['primary_ds_name'] = primary
        self.data['primds']['primary_ds_type'] = primaryType
        self.data['primds']['create_by'] = "WMAgent"
        self.data['primds']['creation_date'] = int(time.time())

        # Do the processed
        self.data['dataset']['physics_group_name']  = physicsGroup
        self.data['dataset']['processed_ds_name']   = processed
        self.data['dataset']['data_tier_name']      = tier
        self.data['dataset']['dataset_access_type'] = datasetType
        self.data['dataset']['dataset']             = datasetName
        self.data['dataset']['prep_id'] = prep_id
        # Add misc meta data.
        self.data['dataset']['create_by'] = "WMAgent"
        self.data['dataset']['last_modified_by'] = "WMAgent"
        self.data['dataset']['creation_date'] = int(time.time())
        self.data['dataset']['last_modification_date'] = int(time.time())
        return
    def validateCommon(self):
        """Common validation stuff"""
        try:
            Lexicon.requestName(self.wmspec.name())
        except Exception as ex:  # can throw many errors e.g. AttributeError, AssertionError etc.
            error = WorkQueueWMSpecError(self.wmspec, "Workflow name validation error: %s" % str(ex))
            raise error

        if self.initialTask.siteWhitelist():
            if isinstance(self.initialTask.siteWhitelist(), basestring):
                error = WorkQueueWMSpecError(self.wmspec, 'Invalid site whitelist: Must be tuple/list but is %s' % type(
                    self.initialTask.siteWhitelist()))
                raise error
            try:
                [Lexicon.cmsname(site) for site in self.initialTask.siteWhitelist()]
            except Exception as ex:  # can throw many errors e.g. AttributeError, AssertionError etc.
                error = WorkQueueWMSpecError(self.wmspec, "Site whitelist validation error: %s" % str(ex))
                raise error
        else:
            error = WorkQueueWMSpecError(self.wmspec, "Site whitelist validation error: Empty site whitelist")
            raise error

        if self.initialTask.siteBlacklist():
            if isinstance(self.initialTask.siteBlacklist(), basestring):
                error = WorkQueueWMSpecError(self.wmspec, 'Invalid site blacklist: Must be tuple/list but is %s' % type(
                    self.initialTask.siteBlacklist()))
                raise error
            try:
                [Lexicon.cmsname(site) for site in self.initialTask.siteBlacklist()]
            except Exception as ex:  # can throw many errors e.g. AttributeError, AssertionError etc.
                error = WorkQueueWMSpecError(self.wmspec, "Site blacklist validation error: %s" % str(ex))
                raise error

        # splitter settings
        if self.args.get('SliceSize', 1) <= 0:
            error = WorkQueueWMSpecError(self.wmspec, 'Zero or negative SliceSize parameter')
            raise error
        if self.args.get('SubSliceSize', 1) <= 0:
            error = WorkQueueWMSpecError(self.wmspec, 'Zero or negative SubSliceSize parameter')
            raise error

        # check input dataset is valid
        try:
            if self.initialTask.getInputDatasetPath():
                Lexicon.dataset(self.initialTask.getInputDatasetPath())
        except Exception as ex:  # can throw many errors e.g. AttributeError, AssertionError etc.
            error = WorkQueueWMSpecError(self.wmspec, "Dataset validation error: %s" % str(ex))
            raise error

        # if pileup is found, check that they are valid datasets
        try:
            pileupDatasets = self.wmspec.listPileupDatasets()
            for dbsUrl in pileupDatasets:
                for dataset in pileupDatasets[dbsUrl]:
                    Lexicon.dataset(dataset)
        except Exception as ex:  # can throw many errors e.g. AttributeError, AssertionError etc.
            error = WorkQueueWMSpecError(self.wmspec, "Pileup dataset validation error: %s" % str(ex))
            raise error
Ejemplo n.º 11
0
    def validateCommon(self):
        """Common validation stuff"""
        try:
            Lexicon.requestName(self.wmspec.name())
        except Exception as ex:  # can throw many errors e.g. AttributeError, AssertionError etc.
            error = WorkQueueWMSpecError(self.wmspec, "Workflow name validation error: %s" % str(ex))
            raise error

        if self.initialTask.siteWhitelist():
            if isinstance(self.initialTask.siteWhitelist(), basestring):
                error = WorkQueueWMSpecError(self.wmspec, 'Invalid site whitelist: Must be tuple/list but is %s' % type(
                    self.initialTask.siteWhitelist()))
                raise error
            try:
                [Lexicon.cmsname(site) for site in self.initialTask.siteWhitelist()]
            except Exception as ex:  # can throw many errors e.g. AttributeError, AssertionError etc.
                error = WorkQueueWMSpecError(self.wmspec, "Site whitelist validation error: %s" % str(ex))
                raise error

        if self.initialTask.siteBlacklist():
            if isinstance(self.initialTask.siteBlacklist(), basestring):
                error = WorkQueueWMSpecError(self.wmspec, 'Invalid site blacklist: Must be tuple/list but is %s' % type(
                    self.initialTask.siteBlacklist()))
                raise error
            try:
                [Lexicon.cmsname(site) for site in self.initialTask.siteBlacklist()]
            except Exception as ex:  # can throw many errors e.g. AttributeError, AssertionError etc.
                error = WorkQueueWMSpecError(self.wmspec, "Site blacklist validation error: %s" % str(ex))
                raise error

        # splitter settings
        if self.args.get('SliceSize', 1) <= 0:
            error = WorkQueueWMSpecError(self.wmspec, 'Zero or negative SliceSize parameter')
            raise error
        if self.args.get('SubSliceSize', 1) <= 0:
            error = WorkQueueWMSpecError(self.wmspec, 'Zero or negative SubSliceSize parameter')
            raise error

        # check input dataset is valid
        try:
            if self.initialTask.getInputDatasetPath():
                Lexicon.dataset(self.initialTask.getInputDatasetPath())
        except Exception as ex:  # can throw many errors e.g. AttributeError, AssertionError etc.
            error = WorkQueueWMSpecError(self.wmspec, "Dataset validation error: %s" % str(ex))
            raise error

        # if pileup is found, check that they are valid datasets
        try:
            pileupDatasets = self.wmspec.listPileupDatasets()
            for dbsUrl in pileupDatasets:
                for dataset in pileupDatasets[dbsUrl]:
                    Lexicon.dataset(dataset)
        except Exception as ex:  # can throw many errors e.g. AttributeError, AssertionError etc.
            error = WorkQueueWMSpecError(self.wmspec, "Pileup dataset validation error: %s" % str(ex))
            raise error
Ejemplo n.º 12
0
def createPrimaryDataset(primaryName, primaryDatasetType='mc', apiRef=None):
    """
    _createPrimaryDataset_


    """
    logging.debug("Inserting PrimaryDataset %s with Type %s" \
                  % (primaryName, primaryDatasetType))

    Lexicon.primaryDatasetType(primaryDatasetType)

    primary = DbsPrimaryDataset(Name=primaryName, Type=primaryDatasetType)

    if apiRef:
        try:
            apiRef.insertPrimaryDataset(primary)
        except DbsException, ex:
            msg = "Error in DBSInterface.createPrimaryDataset(%s)\n" % primaryName
            msg += formatEx(ex)
            logging.error(msg)
            raise DBSInterfaceError(msg)
Ejemplo n.º 13
0
def createPrimaryDataset(primaryName, primaryDatasetType = 'mc', apiRef = None):
    """
    _createPrimaryDataset_


    """
    logging.debug("Inserting PrimaryDataset %s with Type %s" \
                  % (primaryName, primaryDatasetType))
    
    Lexicon.primaryDatasetType(primaryDatasetType)
    
    primary = DbsPrimaryDataset(Name = primaryName,
                                Type = primaryDatasetType)

    if apiRef:
        try:
            apiRef.insertPrimaryDataset(primary)
        except DbsException, ex:
            msg = "Error in DBSInterface.createPrimaryDataset(%s)\n" % primaryName
            msg += formatEx(ex)
            logging.error(msg)
            raise DBSInterfaceError(msg)
Ejemplo n.º 14
0
    def getMaskedBlocks(self, task, dbs, datasetPath):
        """ Get the blocks which pass the lumi mask restrictions. For each block return the list of lumis
            which were ok (given the lumi mask). The data structure returned is the following:

            {
                "block1" : {"file1" : LumiList(), "file5" : LumiList(), ...}
                "block2" : {"file2" : LumiList(), "file7" : LumiList(), ...}
            }

        """

        # Get mask and convert to LumiList to make operations easier
        maskedBlocks = {}
        lumiMask = task.getLumiMask()
        taskMask = LumiList(compactList=lumiMask)

        # Find all the files that have runs and lumis we are interested in,
        # fill block lfn part of maskedBlocks

        for run, lumis in lumiMask.items():
            files = []
            for slumis in Lexicon.slicedIterator(lumis, 50):
                slicedFiles = dbs.dbs.listFileArray(dataset=datasetPath,
                                                    run_num=run,
                                                    lumi_list=slumis,
                                                    detail=True)
                files.extend(slicedFiles)
            for file in files:
                blockName = file['block_name']
                fileName = file['logical_file_name']
                if blockName not in maskedBlocks:
                    maskedBlocks[blockName] = {}
                if fileName not in maskedBlocks[blockName]:
                    maskedBlocks[blockName][fileName] = LumiList()

        # Fill maskedLumis part of maskedBlocks

        for block in maskedBlocks:
            fileLumis = dbs.dbs.listFileLumis(block_name=block,
                                              validFileOnly=1)
            for fileLumi in fileLumis:
                lfn = fileLumi['logical_file_name']
                # For each run : [lumis] mask by needed lumis, append to maskedBlocks
                if maskedBlocks[block].get(lfn, None) is not None:
                    lumiList = LumiList(
                        runsAndLumis={
                            fileLumi['run_num']: fileLumi['lumi_section_num']
                        })
                    maskedBlocks[block][lfn] += (lumiList & taskMask)

        return maskedBlocks
Ejemplo n.º 15
0
class StartPolicyInterface(PolicyInterface):
    """Interface for start policies"""
    def __init__(self, **args):
        PolicyInterface.__init__(self, **args)
        self.workQueueElements = []
        self.wmspec = None
        self.team = None
        self.initialTask = None
        self.splitParams = None
        self.dbs_pool = {}
        self.data = {}
        self.lumi = None
        self.couchdb = None
        self.rejectedWork = []  # List of inputs that were rejected
        self.pileupData = {}

    def split(self):
        """Apply policy to spec"""
        raise NotImplementedError

    def validate(self):
        """Check params and spec are appropriate for the policy"""
        raise NotImplementedError

    def validateCommon(self):
        """Common validation stuff"""
        try:
            Lexicon.requestName(self.wmspec.name())
        except Exception, ex:  # can throw many errors e.g. AttributeError, AssertionError etc.
            error = WorkQueueWMSpecError(
                self.wmspec, "Workflow name validation error: %s" % str(ex))
            raise error

        if self.initialTask.siteWhitelist():
            if type(self.initialTask.siteWhitelist()) in types.StringTypes:
                error = WorkQueueWMSpecError(
                    self.wmspec,
                    'Invalid site whitelist: Must be tuple/list but is %s' %
                    type(self.initialTask.siteWhitelist()))
                raise error
            try:
                [
                    Lexicon.cmsname(site)
                    for site in self.initialTask.siteWhitelist()
                ]
            except Exception, ex:  # can throw many errors e.g. AttributeError, AssertionError etc.
                error = WorkQueueWMSpecError(
                    self.wmspec,
                    "Site whitelist validation error: %s" % str(ex))
                raise error
Ejemplo n.º 16
0
    def setDataset(self,
                   datasetName,
                   primaryType,
                   datasetType,
                   physicsGroup=None,
                   prep_id=None,
                   overwrite=False,
                   valid=1):
        """
        _setDataset_

        Set all the information concerning a single dataset, including
        the primary, processed and tier info
        """
        if self.hasDataset() and not overwrite:
            # Do nothing, we already have a dataset
            return

        Lexicon.primaryDatasetType(primaryType)

        if not datasetType in [
                'VALID', 'PRODUCTION', 'INVALID', 'DEPRECATED', 'DELETED'
        ]:
            msg = "Invalid processedDatasetType %s\n" % datasetType
            logging.error(msg)
            raise DBSBlockException(msg)

        try:
            if datasetName[0] == '/':
                junk, primary, processed, tier = datasetName.split('/')
            else:
                primary, processed, tier = datasetName.split('/')
        except Exception, ex:
            msg = "Invalid dataset name %s" % datasetName
            logging.error(msg)
            raise DBSBlockException(msg)
Ejemplo n.º 17
0
    def queueWork(self, wmspecUrl, request = None, team = None):
        """
        Take and queue work from a WMSpec.

        If request name is provided but doesn't match WMSpec name
        an error is raised.

        If team is provided work will only be available to queue's
        belonging to that team.

        Duplicate specs will be ignored.
        """
        self.logger.info('queueWork() begin queueing "%s"' % wmspecUrl)
        wmspec = WMWorkloadHelper()
        wmspec.load(wmspecUrl)

        if request: # validate request name
            try:
                Lexicon.requestName(request)
            except Exception, ex: # can throw many errors e.g. AttributeError, AssertionError etc.
                error = WorkQueueWMSpecError(wmspec, "Request name validation error: %s" % str(ex))
                raise error
            if request != wmspec.name():
                raise WorkQueueWMSpecError(wmspec, 'Request & workflow name mismatch %s vs %s' % (request, wmspec.name()))
Ejemplo n.º 18
0
    def getMaskedBlocks(self, task, dbs, datasetPath):
        """ Get the blocks which pass the lumi mask restrictions. For each block return the list of lumis
            which were ok (given the lumi mask). The data structure returned is the following:

            {
                "block1" : {"file1" : LumiList(), "file5" : LumiList(), ...}
                "block2" : {"file2" : LumiList(), "file7" : LumiList(), ...}
            }

        """

        # Get mask and convert to LumiList to make operations easier
        maskedBlocks = {}
        lumiMask = task.getLumiMask()
        taskMask = LumiList(compactList = lumiMask)

        # Find all the files that have runs and lumis we are interested in,
        # fill block lfn part of maskedBlocks

        for run, lumis in lumiMask.items():
            files = []
            for slumis in Lexicon.slicedIterator(lumis, 50):
                slicedFiles = dbs.dbs.listFileArray(dataset=datasetPath, run_num=run,
                                       lumi_list=slumis, detail=True)
                files.extend(slicedFiles)
            for file in files:
                blockName = file['block_name']
                fileName = file['logical_file_name']
                if blockName not in maskedBlocks:
                    maskedBlocks[blockName] = {}
                if fileName not in maskedBlocks[blockName]:
                    maskedBlocks[blockName][fileName] = LumiList()

        # Fill maskedLumis part of maskedBlocks

        for block in maskedBlocks:
            fileLumis = dbs.dbs.listFileLumis(block_name=block, validFileOnly = 1)
            for fileLumi in fileLumis:
                lfn = fileLumi['logical_file_name']
                # For each run : [lumis] mask by needed lumis, append to maskedBlocks
                if maskedBlocks[block].get(lfn, None) is not None:
                    lumiList = LumiList(runsAndLumis = {fileLumi['run_num']: fileLumi['lumi_section_num']})
                    maskedBlocks[block][lfn] += (lumiList & taskMask)

        return maskedBlocks
def test(num_lumis):
    from dbs.apis.dbsClient import DbsApi
    dbsApi = DbsApi(url = 'https://cmsweb.cern.ch/dbs/prod/global/DBSReader/')
    #dbsApi = DbsApi(url = 'https://cmsweb-testbed.cern.ch/dbs/int/global/DBSReader/')
    datasetPath = "/SMS-T5qqqqVV_mGluino-1200To1275_mLSP-1to1150_TuneCUETP8M1_13TeV-madgraphMLM-pythia8/RunIIWinter15pLHE-MCRUN2_71_V1-v1/LHE"
    #datasetPath = "/QDTojWinc_NC_M-1200_TuneZ2star_8TeV-madgraph/Summer12pLHE-DMWM_Validation_DONOTDELETE_Alan_TEST-v1/GEN"
    run = 1
    lumis = range(1, num_lumis+1)

    files = []
    print "Starting queries to listFileArray (in dataset mode) at %s" % datetime.utcnow()
    for slumis in Lexicon.slicedIterator(lumis, 10):
        start = datetime.utcnow()
        print slumis
        slicedFiles = dbsApi.listFileArray(dataset=datasetPath, run_num=run, lumi_list=slumis, detail=True)
        files.extend(slicedFiles)
        end = datetime.utcnow()
        print "  slice completed in %s" % (end - start)

#    pprint(files)
    maskedBlocks = {}
    for lfn in files:
        blockName = lfn['block_name']
        fileName = lfn['logical_file_name']
        if blockName not in maskedBlocks:
            maskedBlocks[blockName] = {}
        if fileName not in maskedBlocks[blockName]:
            maskedBlocks[blockName][fileName] = LumiList()

#    pprint(maskedBlocks)
    print "\nStarting queries to listFileLumis at %s" % datetime.utcnow()
    for block in maskedBlocks:
        print block
        start = datetime.utcnow()
        fileLumis = dbsApi.listFileLumis(block_name=block, validFileOnly = 1)
        end = datetime.utcnow()
        print "  query completed in %s" % (end - start)
Ejemplo n.º 20
0
    def validBlocks(self, task, dbs):
        """Return blocks that pass the input data restriction"""
        datasetPath = task.getInputDatasetPath()
        Lexicon.dataset(datasetPath)  # check dataset name
        validBlocks = []
        locations = None

        blockWhiteList = task.inputBlockWhitelist()
        blockBlackList = task.inputBlockBlacklist()
        runWhiteList = task.inputRunWhitelist()
        runBlackList = task.inputRunBlacklist()
        lumiMask = task.getLumiMask()
        if lumiMask:
            maskedBlocks = self.getMaskedBlocks(task, dbs, datasetPath)

        for blockName in dbs.listFileBlocks(datasetPath):
            # check block restrictions
            if blockWhiteList and blockName not in blockWhiteList:
                continue
            if blockName in blockBlackList:
                continue

            blockSummary = dbs.getDBSSummaryInfo(block=blockName)
            if int(blockSummary.get('NumberOfFiles', 0)) == 0:
                logging.warning("Block %s being rejected for lack of valid files to process", blockName)
                self.badWork.append(blockName)
                continue

            if self.args['SliceType'] == 'NumberOfRuns':
                blockSummary['NumberOfRuns'] = dbs.listRuns(block=blockName)

            # check lumi restrictions
            if lumiMask:
                if blockName not in maskedBlocks:
                    logging.warning("Block %s doesn't pass the lumi mask constraints", blockName)
                    self.rejectedWork.append(blockName)
                    continue

                acceptedLumiCount = sum([len(maskedBlocks[blockName][lfn].getLumis()) for lfn in maskedBlocks[blockName]])
                ratioAccepted = 1. * acceptedLumiCount / float(blockSummary['NumberOfLumis'])
                maskedRuns = [maskedBlocks[blockName][lfn].getRuns() for lfn in maskedBlocks[blockName]]
                acceptedRuns = set(lumiMask.getRuns()).intersection(set().union(*maskedRuns))

                blockSummary['NumberOfFiles'] = len(maskedBlocks[blockName])
                blockSummary['NumberOfEvents'] = float(blockSummary['NumberOfEvents']) * ratioAccepted
                blockSummary[self.lumiType] = acceptedLumiCount
                blockSummary['NumberOfRuns'] = acceptedRuns
            # check run restrictions
            elif runWhiteList or runBlackList:
                runs = set(dbs.listRuns(block=blockName))
                # multi run blocks need special account, requires more DBS calls
                recalculateLumiCounts = True if len(runs) > 1 else False

                # apply blacklist and whitelist
                runs = runs.difference(runBlackList)
                if runWhiteList:
                    runs = runs.intersection(runWhiteList)
                # any runs left are ones we will run on, if none ignore block
                if not runs:
                    logging.warning("Block %s doesn't pass the runs constraints", blockName)
                    self.rejectedWork.append(blockName)
                    continue

                if recalculateLumiCounts:
                    # Recalculate the number of files, lumis and ~events accepted
                    acceptedLumiCount = 0
                    acceptedEventCount = 0
                    acceptedFileCount = 0
                    fileInfo = dbs.listFilesInBlock(fileBlockName=blockName)

                    for fileEntry in fileInfo:
                        acceptedFile = False
                        for lumiInfo in fileEntry['LumiList']:
                            if lumiInfo['RunNumber'] in runs:
                                acceptedFile = True
                                acceptedLumiCount += len(lumiInfo['LumiSectionNumber'])
                        if acceptedFile:
                            acceptedFileCount += 1
                            acceptedEventCount += fileEntry['NumberOfEvents']

                else:
                    acceptedLumiCount = blockSummary["NumberOfLumis"]
                    acceptedFileCount = blockSummary['NumberOfFiles']
                    acceptedEventCount = blockSummary['NumberOfEvents']

                blockSummary[self.lumiType] = acceptedLumiCount
                blockSummary['NumberOfFiles'] = acceptedFileCount
                blockSummary['NumberOfEvents'] = acceptedEventCount
                blockSummary['NumberOfRuns'] = runs

            validBlocks.append(blockSummary)

            if locations is None:
                locations = set(dbs.listFileBlockLocation(blockName))
            else:
                locations = locations.intersection(dbs.listFileBlockLocation(blockName))

        # all needed blocks present at these sites
        if task.getTrustSitelists().get('trustlists'):
            siteWhitelist = task.siteWhitelist()
            siteBlacklist = task.siteBlacklist()
            self.sites = makeLocationsList(siteWhitelist, siteBlacklist)
            self.data[datasetPath] = self.sites
        elif locations:
            self.data[datasetPath] = list(set(self.cric.PNNstoPSNs(locations)))

        return validBlocks
Ejemplo n.º 21
0
    def validBlocks(self, task, dbs):
        """Return blocks that pass the input data restriction"""
        datasetPath = task.getInputDatasetPath()
        validBlocks = []

        blockWhiteList = task.inputBlockWhitelist()
        blockBlackList = task.inputBlockBlacklist()
        runWhiteList = task.inputRunWhitelist()
        runBlackList = task.inputRunBlacklist()
        if task.getLumiMask():  # if we have a lumi mask get only the relevant blocks
            maskedBlocks = self.getMaskedBlocks(task, dbs, datasetPath)
        if task.getTrustSitelists().get('trustlists'):
            siteWhitelist = task.siteWhitelist()
            siteBlacklist = task.siteBlacklist()
            self.sites = makeLocationsList(siteWhitelist, siteBlacklist)

        blocks = []
        # Take data inputs or from spec
        if not self.data:
            if blockWhiteList:
                self.data = dict((block, []) for block in blockWhiteList)
            else:
                self.data = {datasetPath: []}  # same structure as in WorkQueueElement

        for data in self.data:
            if data.find('#') > -1:
                Lexicon.block(data)  # check block name
                datasetPath = str(data.split('#')[0])
                blocks.append(str(data))
            else:
                Lexicon.dataset(data)  # check dataset name
                for block in dbs.listFileBlocks(data, onlyClosedBlocks=True):
                    blocks.append(str(block))

        for blockName in blocks:
            # check block restrictions
            if blockWhiteList and blockName not in blockWhiteList:
                continue
            if blockName in blockBlackList:
                continue
            if blockName in self.blockBlackListModifier:
                # Don't duplicate blocks rejected before or blocks that were included and therefore are now in the blacklist
                continue
            if task.getLumiMask() and blockName not in maskedBlocks:
                self.rejectedWork.append(blockName)
                continue

            block = dbs.getDBSSummaryInfo(datasetPath, block=blockName)
            # blocks with 0 valid files should be ignored
            # - ideally they would be deleted but dbs can't delete blocks
            if not block['NumberOfFiles'] or block['NumberOfFiles'] == '0':
                self.rejectedWork.append(blockName)
                continue

            # check lumi restrictions
            if task.getLumiMask():
                accepted_lumis = sum([len(maskedBlocks[blockName][lfn].getLumis()) for lfn in maskedBlocks[blockName]])
                # use the information given from getMaskedBlocks to compute che size of the block
                block['NumberOfFiles'] = len(maskedBlocks[blockName])
                # ratio =  lumis which are ok in the block / total num lumis
                ratioAccepted = accepted_lumis / block['NumberOfLumis']
                block['NumberOfEvents'] = block['NumberOfEvents'] * ratioAccepted
                block[self.lumiType] = accepted_lumis
            # check run restrictions
            elif runWhiteList or runBlackList:
                # listRunLumis returns a dictionary with the lumi sections per run
                runLumis = dbs.listRunLumis(block=block['block'])
                runs = set(runLumis.keys())
                recalculateLumiCounts = False
                if len(runs) > 1:
                    # If more than one run in the block
                    # Then we must calculate the lumi counts after filtering the run list
                    # This has to be done rarely and requires calling DBS file information
                    recalculateLumiCounts = True

                # apply blacklist
                runs = runs.difference(runBlackList)
                # if whitelist only accept listed runs
                if runWhiteList:
                    runs = runs.intersection(runWhiteList)
                # any runs left are ones we will run on, if none ignore block
                if not runs:
                    self.rejectedWork.append(blockName)
                    continue

                if len(runs) == len(runLumis):
                    # If there is no change in the runs, then we can skip recalculating lumi counts
                    recalculateLumiCounts = False

                if recalculateLumiCounts:
                    # Recalculate effective size of block
                    # We pull out file info, since we don't do this often
                    acceptedLumiCount = 0
                    acceptedEventCount = 0
                    acceptedFileCount = 0
                    fileInfo = dbs.listFilesInBlock(fileBlockName=block['block'])
                    for fileEntry in fileInfo:
                        acceptedFile = False
                        acceptedFileLumiCount = 0
                        for lumiInfo in fileEntry['LumiList']:
                            runNumber = lumiInfo['RunNumber']
                            if runNumber in runs:
                                acceptedFile = True
                                acceptedFileLumiCount += 1
                                acceptedLumiCount += len(lumiInfo['LumiSectionNumber'])
                        if acceptedFile:
                            acceptedFileCount += 1
                            if len(fileEntry['LumiList']) != acceptedFileLumiCount:
                                acceptedEventCount += acceptedFileLumiCount * fileEntry['NumberOfEvents'] / len(fileEntry['LumiList'])
                            else:
                                acceptedEventCount += fileEntry['NumberOfEvents']
                    block[self.lumiType] = acceptedLumiCount
                    block['NumberOfFiles'] = acceptedFileCount
                    block['NumberOfEvents'] = acceptedEventCount
            # save locations
            if task.getTrustSitelists().get('trustlists'):
                self.data[block['block']] = self.sites
            else:
                self.data[block['block']] = self.siteDB.PNNstoPSNs(dbs.listFileBlockLocation(block['block']))

            # TODO: need to decide what to do when location is no find.
            # There could be case for network problem (no connection to dbs, phedex)
            # or DBS se is not recorded (This will be retried anyway by location mapper)
            if not self.data[block['block']]:
                self.data[block['block']] = ["NoInitialSite"]
            # # No sites for this block, move it to rejected
            #    self.rejectedWork.append(blockName)
            #    continue

            validBlocks.append(block)
        return validBlocks
Ejemplo n.º 22
0
    def validBlocks(self, task, dbs):
        """Return blocks that pass the input data restriction"""
        datasetPath = task.getInputDatasetPath()
        validBlocks = []

        blockWhiteList = task.inputBlockWhitelist()
        blockBlackList = task.inputBlockBlacklist()
        runWhiteList = task.inputRunWhitelist()
        runBlackList = task.inputRunBlacklist()

        blocks = []
        # Take data inputs or from spec
        if not self.data:
            self.data = {datasetPath : []} # same structure as in WorkQueueElement
            #blocks = dbs.getFileBlocksInfo(datasetPath, locations = False)
        #else:
            #dataItems = self.data.keys()

        for data in self.data:
            if data.find('#') > -1:
                Lexicon.block(data) # check dataset name
                datasetPath = str(data.split('#')[0])
                blocks.extend(dbs.getFileBlocksInfo(datasetPath, blockName = str(data), locations = True))
            else:
                Lexicon.dataset(data) # check dataset name
                blocks.extend(dbs.getFileBlocksInfo(datasetPath, locations = True))

        for block in blocks:
            # blocks with 0 valid files should be ignored
            # - ideally they would be deleted but dbs can't delete blocks
            if not block['NumberOfFiles']:
                continue

            # check block restrictions
            if blockWhiteList and block['Name'] not in blockWhiteList:
                continue
            if block['Name'] in blockBlackList:
                continue

            # check run restrictions
            if runWhiteList or runBlackList:
                # listRuns returns a run number per lumi section
                full_lumi_list = dbs.listRuns(block = block['Name'])
                runs = set(full_lumi_list)

                # apply blacklist
                runs = runs.difference(runBlackList)
                # if whitelist only accept listed runs
                if runWhiteList:
                    runs = runs.intersection(runWhiteList)

                # any runs left are ones we will run on, if none ignore block
                if not runs:
                    continue

                # recalculate effective size of block
                # make a guess for new event/file numbers from ratio
                # of accepted lumi sections (otherwise have to pull file info)
                accepted_lumis = [x for x in full_lumi_list if x in runs]
                ratio_accepted = 1. * len(accepted_lumis) / len(full_lumi_list)
                block[self.lumiType] = len(accepted_lumis)
                block['NumberOfFiles'] *= ratio_accepted
                block['NumberOfEvents'] *= ratio_accepted

            # get lumi info if needed and not already available
            if self.args['SliceType'] == self.lumiType and not block.get(self.lumiType):
                blockSummary = dbs.getDBSSummaryInfo(block = block["Name"])
                block[self.lumiType] = blockSummary[self.lumiType]

            # save locations
            self.data[block['Name']] = sitesFromStorageEelements([x['Name'] for x in block['StorageElementList']])

            validBlocks.append(block)
        return validBlocks
Ejemplo n.º 23
0
    def validBlocks(self, task, dbs):
        """Return blocks that pass the input data restriction"""
        datasetPath = task.getInputDatasetPath()
        validBlocks = []

        blockWhiteList = task.inputBlockWhitelist()
        blockBlackList = task.inputBlockBlacklist()
        runWhiteList = task.inputRunWhitelist()
        runBlackList = task.inputRunBlacklist()
        if task.getLumiMask(
        ):  # if we have a lumi mask get only the relevant blocks
            maskedBlocks = self.getMaskedBlocks(task, dbs, datasetPath)
        if task.getTrustSitelists().get('trustlists'):
            siteWhitelist = task.siteWhitelist()
            siteBlacklist = task.siteBlacklist()
            self.sites = makeLocationsList(siteWhitelist, siteBlacklist)

        blocks = []
        # Take data inputs or from spec
        if not self.data:
            if blockWhiteList:
                self.data = dict((block, []) for block in blockWhiteList)
            else:
                self.data = {
                    datasetPath: []
                }  # same structure as in WorkQueueElement

        for data in self.data:
            if data.find('#') > -1:
                Lexicon.block(data)  # check block name
                datasetPath = str(data.split('#')[0])
                blocks.append(str(data))
            else:
                Lexicon.dataset(data)  # check dataset name
                for block in dbs.listFileBlocks(data, onlyClosedBlocks=True):
                    blocks.append(str(block))

        for blockName in blocks:
            # check block restrictions
            if blockWhiteList and blockName not in blockWhiteList:
                continue
            if blockName in blockBlackList:
                continue
            if blockName in self.blockBlackListModifier:
                # Don't duplicate blocks rejected before or blocks that were included and therefore are now in the blacklist
                continue
            if task.getLumiMask() and blockName not in maskedBlocks:
                self.rejectedWork.append(blockName)
                continue

            block = dbs.getDBSSummaryInfo(datasetPath, block=blockName)
            # blocks with 0 valid files should be ignored
            # - ideally they would be deleted but dbs can't delete blocks
            if not block['NumberOfFiles'] or block['NumberOfFiles'] == '0':
                self.rejectedWork.append(blockName)
                continue

            # check lumi restrictions
            if task.getLumiMask():
                accepted_lumis = sum([
                    len(maskedBlocks[blockName][lfn].getLumis())
                    for lfn in maskedBlocks[blockName]
                ])
                # use the information given from getMaskedBlocks to compute che size of the block
                block['NumberOfFiles'] = len(maskedBlocks[blockName])
                # ratio =  lumis which are ok in the block / total num lumis
                ratioAccepted = 1. * accepted_lumis / float(
                    block['NumberOfLumis'])
                block['NumberOfEvents'] = float(
                    block['NumberOfEvents']) * ratioAccepted
                block[self.lumiType] = accepted_lumis
            # check run restrictions
            elif runWhiteList or runBlackList:
                # listRunLumis returns a dictionary with the lumi sections per run
                runLumis = dbs.listRunLumis(block=block['block'])
                runs = set(runLumis.keys())
                recalculateLumiCounts = False
                if len(runs) > 1:
                    # If more than one run in the block
                    # Then we must calculate the lumi counts after filtering the run list
                    # This has to be done rarely and requires calling DBS file information
                    recalculateLumiCounts = True

                # apply blacklist
                runs = runs.difference(runBlackList)
                # if whitelist only accept listed runs
                if runWhiteList:
                    runs = runs.intersection(runWhiteList)
                # any runs left are ones we will run on, if none ignore block
                if not runs:
                    self.rejectedWork.append(blockName)
                    continue

                if len(runs) == len(runLumis):
                    # If there is no change in the runs, then we can skip recalculating lumi counts
                    recalculateLumiCounts = False

                if recalculateLumiCounts:
                    # Recalculate effective size of block
                    # We pull out file info, since we don't do this often
                    acceptedLumiCount = 0
                    acceptedEventCount = 0
                    acceptedFileCount = 0
                    fileInfo = dbs.listFilesInBlock(
                        fileBlockName=block['block'])
                    for fileEntry in fileInfo:
                        acceptedFile = False
                        acceptedFileLumiCount = 0
                        for lumiInfo in fileEntry['LumiList']:
                            runNumber = lumiInfo['RunNumber']
                            if runNumber in runs:
                                acceptedFile = True
                                acceptedFileLumiCount += 1
                                acceptedLumiCount += len(
                                    lumiInfo['LumiSectionNumber'])
                        if acceptedFile:
                            acceptedFileCount += 1
                            if len(fileEntry['LumiList']
                                   ) != acceptedFileLumiCount:
                                acceptedEventCount += float(acceptedFileLumiCount) * fileEntry['NumberOfEvents'] \
                                                      / len(fileEntry['LumiList'])
                            else:
                                acceptedEventCount += fileEntry[
                                    'NumberOfEvents']
                    block[self.lumiType] = acceptedLumiCount
                    block['NumberOfFiles'] = acceptedFileCount
                    block['NumberOfEvents'] = acceptedEventCount
            # save locations
            if task.getTrustSitelists().get('trustlists'):
                self.data[block['block']] = self.sites
            else:
                self.data[block['block']] = self.siteDB.PNNstoPSNs(
                    dbs.listFileBlockLocation(block['block']))

            # TODO: need to decide what to do when location is no find.
            # There could be case for network problem (no connection to dbs, phedex)
            # or DBS se is not recorded (This will be retried anyway by location mapper)
            if not self.data[block['block']]:
                self.data[block['block']] = ["NoInitialSite"]
            # # No sites for this block, move it to rejected
            #    self.rejectedWork.append(blockName)
            #    continue

            validBlocks.append(block)
        return validBlocks
Ejemplo n.º 24
0
    def queueNewRequests(self, queue):
        """Get requests from regMgr and queue to workqueue"""
        self.logger.info("Contacting Request manager for more work")
        work = 0
        workLoads = []

        try:
            workLoads = self.getAvailableRequests()
        except Exception as ex:
            traceMsg = traceback.format_exc()
            msg = "Error contacting RequestManager: %s" % traceMsg
            self.logger.warning(msg)
            return 0

        for team, reqName, workLoadUrl in workLoads:
            try:
                try:
                    Lexicon.couchurl(workLoadUrl)
                except Exception as ex:  # can throw many errors e.g. AttributeError, AssertionError etc.
                    # check its not a local file
                    if not os.path.exists(workLoadUrl):
                        error = WorkQueueWMSpecError(
                            None,
                            "Workflow url validation error: %s" % str(ex))
                        raise error

                self.logger.info("Processing request %s at %s" %
                                 (reqName, workLoadUrl))
                units = queue.queueWork(workLoadUrl,
                                        request=reqName,
                                        team=team)
                self.logdb.delete(reqName,
                                  "error",
                                  this_thread=True,
                                  agent=False)
            except TERMINAL_EXCEPTIONS as ex:
                # fatal error - report back to ReqMgr
                self.logger.error(
                    'Permanent failure processing request "%s": %s' %
                    (reqName, str(ex)))
                self.logger.info("Marking request %s as failed in ReqMgr" %
                                 reqName)
                self.reportRequestStatus(reqName, 'Failed', message=str(ex))
                continue
            except (IOError, socket.error, CouchError,
                    CouchConnectionError) as ex:
                # temporary problem - try again later
                msg = 'Error processing request "%s": will try again later.' % reqName
                msg += '\nError: "%s"' % str(ex)
                self.logger.info(msg)
                self.logdb.post(reqName, msg, 'error')
                continue
            except Exception as ex:
                # Log exception as it isnt a communication problem
                msg = 'Error processing request "%s": will try again later.' % reqName
                msg += '\nSee log for details.\nError: "%s"' % str(ex)
                self.logger.exception('Unknown error processing %s' % reqName)
                self.logdb.post(reqName, msg, 'error')
                continue

            self.logger.info('%s units(s) queued for "%s"' % (units, reqName))
            work += units

            self.logger.info("%s element(s) obtained from RequestManager" %
                             work)
        return work
Ejemplo n.º 25
0
class WorkQueueReqMgrInterface():
    """Helper class for ReqMgr interaction"""
    def __init__(self, **kwargs):
        if not kwargs.get('logger'):
            import logging
            kwargs['logger'] = logging
        self.logger = kwargs['logger']
        self.reqMgr = RequestManager(kwargs)
        self.previous_state = {}

    def __call__(self, queue):
        """Synchronize WorkQueue and RequestManager"""
        msg = ''
        try:  # pull in new work
            work = self.queueNewRequests(queue)
            msg += "New Work: %d\n" % work
        except Exception:
            self.logger.exception("Error caught during RequestManager pull")

        try:  # get additional open-running work
            extraWork = self.addNewElementsToOpenRequests(queue)
            msg += "Work added: %d\n" % extraWork
        except Exception:
            self.logger.exception("Error caught during RequestManager split")

        try:  # report back to ReqMgr
            uptodate_elements = self.report(queue)
            msg += "Updated ReqMgr status for: %s\n" % ", ".join(
                [x['RequestName'] for x in uptodate_elements])
        except:
            self.logger.exception("Error caught during RequestManager update")
        else:
            try:  # Delete finished requests from WorkQueue
                self.deleteFinishedWork(queue, uptodate_elements)
            except:
                self.logger.exception("Error caught during work deletion")

        queue.backend.recordTaskActivity('reqmgr_sync', msg)

    def queueNewRequests(self, queue):
        """Get requests from regMgr and queue to workqueue"""
        self.logger.info("Contacting Request manager for more work")
        work = 0
        workLoads = []

        if queue.params['DrainMode']:
            self.logger.info(
                'Draining queue: Skip requesting work from ReqMgr')
            return 0

        try:
            workLoads = self.getAvailableRequests(*queue.params['Teams'])
        except Exception, ex:
            msg = "Error contacting RequestManager: %s" % str(ex)
            self.logger.warning(msg)
            return 0

        for team, reqName, workLoadUrl in workLoads:
            #            try:
            #                self.reportRequestStatus(reqName, "negotiating")
            #            except Exception, ex:
            #                self.logger.error("""
            #                    Unable to update ReqMgr state to negotiating: %s
            #                    Ignoring this request: %s""" % (str(ex), reqName))
            #                continue

            try:
                try:
                    Lexicon.couchurl(workLoadUrl)
                except Exception, ex:  # can throw many errors e.g. AttributeError, AssertionError etc.
                    # check its not a local file
                    if not os.path.exists(workLoadUrl):
                        error = WorkQueueWMSpecError(
                            None,
                            "Workflow url validation error: %s" % str(ex))
                        raise error

                self.logger.info("Processing request %s at %s" %
                                 (reqName, workLoadUrl))
                units = queue.queueWork(workLoadUrl,
                                        request=reqName,
                                        team=team)
            except (WorkQueueWMSpecError, WorkQueueNoWorkError), ex:
                # fatal error - report back to ReqMgr
                self.logger.info(
                    'Permanent failure processing request "%s": %s' %
                    (reqName, str(ex)))
                self.logger.info("Marking request %s as failed in ReqMgr" %
                                 reqName)
                self.reportRequestStatus(reqName, 'Failed', message=str(ex))
                continue
    def queueNewRequests(self, queue):
        """Get requests from regMgr and queue to workqueue"""
        self.logger.info("Contacting Request manager for more work")
        work = 0
        workLoads = []

        if queue.params['DrainMode']:
            self.logger.info('Draining queue: Skip requesting work from ReqMgr')
            return 0

        try:
            workLoads = self.getAvailableRequests(queue.params['Teams'])
        except Exception as ex:
            traceMsg = traceback.format_exc()
            msg = "Error contacting RequestManager: %s" % traceMsg
            self.logger.warning(msg)
            return 0

        for team, reqName, workLoadUrl in workLoads:
#            try:
#                self.reportRequestStatus(reqName, "negotiating")
#            except Exception, ex:
#                self.logger.error("""
#                    Unable to update ReqMgr state to negotiating: %s
#                    Ignoring this request: %s""" % (str(ex), reqName))
#                continue

            try:
                try:
                    Lexicon.couchurl(workLoadUrl)
                except Exception as ex: # can throw many errors e.g. AttributeError, AssertionError etc.
                    # check its not a local file
                    if not os.path.exists(workLoadUrl):
                        error = WorkQueueWMSpecError(None, "Workflow url validation error: %s" % str(ex))
                        raise error

                self.logger.info("Processing request %s at %s" % (reqName, workLoadUrl))
                units = queue.queueWork(workLoadUrl, request = reqName, team = team)
            except (WorkQueueWMSpecError, WorkQueueNoWorkError) as ex:
                # fatal error - report back to ReqMgr
                self.logger.info('Permanent failure processing request "%s": %s' % (reqName, str(ex)))
                self.logger.info("Marking request %s as failed in ReqMgr" % reqName)
                self.reportRequestStatus(reqName, 'Failed', message = str(ex))
                continue
            except (IOError, socket.error, CouchError, CouchConnectionError) as ex:
                # temporary problem - try again later
                msg = 'Error processing request "%s": will try again later.' \
                '\nError: "%s"' % (reqName, str(ex))
                self.logger.info(msg)
                self.sendMessage(reqName, msg, 'error')
                continue
            except Exception as ex:
                # Log exception as it isnt a communication problem
                msg = 'Error processing request "%s": will try again later.' \
                '\nSee log for details.\nError: "%s"' % (reqName, str(ex))
                self.logger.exception('Unknown error processing %s' % reqName)
                self.sendMessage(reqName, msg, 'error')
                continue

            try:
                if self.reqmgr2Only:
                    self.reqMgr2.updateRequestStatus(reqName, "acquired")
                else:
                    self.markAcquired(reqName, queue.params.get('QueueURL', 'No Queue'))
            except Exception as ex:
                self.logger.warning("Unable to update ReqMgr state: %s" % str(ex))
                self.logger.warning('Will try again later')

            self.logger.info('%s units(s) queued for "%s"' % (units, reqName))
            work += units

            self.logger.info("%s element(s) obtained from RequestManager" % work)
        return work
Ejemplo n.º 27
0
    def validBlocks(self, task, dbs):
        """Return blocks that pass the input data restriction"""
        datasetPath = task.getInputDatasetPath()
        Lexicon.dataset(datasetPath)  # check dataset name
        validBlocks = []
        locations = None

        blockWhiteList = task.inputBlockWhitelist()
        blockBlackList = task.inputBlockBlacklist()
        runWhiteList = task.inputRunWhitelist()
        runBlackList = task.inputRunBlacklist()

        if task.getTrustSitelists():
            siteWhitelist = task.siteWhitelist()
            siteBlacklist = task.siteBlacklist()
            self.sites = makeLocationsList(siteWhitelist, siteBlacklist)

        for blockName in dbs.listFileBlocks(datasetPath):
            block = dbs.getDBSSummaryInfo(datasetPath, block=blockName)

            # check block restrictions
            if blockWhiteList and block['block'] not in blockWhiteList:
                continue
            if block['block'] in blockBlackList:
                continue

            # check run restrictions
            if runWhiteList or runBlackList:
                # listRunLumis returns a dictionary with the lumi sections per run
                runLumis = dbs.listRunLumis(block=block['block'])
                runs = set(runLumis.keys())
                recalculateLumiCounts = False
                if len(runs) > 1:
                    # If more than one run in the block
                    # Then we must calculate the lumi counts after filtering the run list
                    # This has to be done rarely and requires calling DBS file information
                    recalculateLumiCounts = True

                # apply blacklist
                runs = runs.difference(runBlackList)
                # if whitelist only accept listed runs
                if runWhiteList:
                    runs = runs.intersection(runWhiteList)
                # any runs left are ones we will run on, if none ignore block
                if not runs:
                    continue

                if recalculateLumiCounts:
                    # get correct lumi count
                    # Recalculate effective size of block
                    # We pull out file info, since we don't do this often
                    acceptedLumiCount = 0
                    acceptedEventCount = 0
                    acceptedFileCount = 0
                    fileInfo = dbs.listFilesInBlock(
                        fileBlockName=block['block'])
                    for fileEntry in fileInfo:
                        acceptedFile = False
                        acceptedFileLumiCount = 0
                        for lumiInfo in fileEntry['LumiList']:
                            runNumber = lumiInfo['RunNumber']
                            if runNumber in runs:
                                acceptedFile = True
                                acceptedFileLumiCount += 1
                        if acceptedFile:
                            acceptedFileCount += 1
                            acceptedLumiCount += acceptedFileLumiCount
                            if len(fileEntry['LumiList']
                                   ) != acceptedFileLumiCount:
                                acceptedEventCount += float(
                                    acceptedFileLumiCount
                                ) * fileEntry['NumberOfEvents'] / len(
                                    fileEntry['LumiList'])
                            else:
                                acceptedEventCount += fileEntry[
                                    'NumberOfEvents']
                else:
                    acceptedLumiCount = block["NumberOfLumis"]
                    acceptedFileCount = block['NumberOfFiles']
                    acceptedEventCount = block['NumberOfEvents']

                # recalculate effective size of block
                # make a guess for new event/file numbers from ratio
                # of accepted lumi sections (otherwise have to pull file info)

                block[self.lumiType] = acceptedLumiCount
                block['NumberOfFiles'] = acceptedFileCount
                block['NumberOfEvents'] = acceptedEventCount

            validBlocks.append(block)
            if locations is None:
                locations = set(dbs.listFileBlockLocation(block['block']))
            else:
                locations = locations.intersection(
                    dbs.listFileBlockLocation(block['block']))

        # all needed blocks present at these sites
        if self.wmspec.getTrustLocationFlag():
            self.data[datasetPath] = self.sites
        elif locations:
            self.data[datasetPath] = list(
                set(self.siteDB.PNNstoPSNs(locations)))

        return validBlocks
Ejemplo n.º 28
0
    def validBlocks(self, task, dbs):
        """Return blocks that pass the input data restriction"""
        datasetPath = task.getInputDatasetPath()
        validBlocks = []

        blockWhiteList = task.inputBlockWhitelist()
        blockBlackList = task.inputBlockBlacklist()
        runWhiteList = task.inputRunWhitelist()
        runBlackList = task.inputRunBlacklist()
        if task.getLumiMask(): #if we have a lumi mask get only the relevant blocks
            maskedBlocks = self.getMaskedBlocks(task, dbs, datasetPath)

        blocks = []
        # Take data inputs or from spec
        if not self.data:
            if blockWhiteList:
                self.data = dict((block, []) for block in blockWhiteList)
            else:
                self.data = {datasetPath : []} # same structure as in WorkQueueElement

        for data in self.data:
            if data.find('#') > -1:
                Lexicon.block(data) # check block name
                datasetPath = str(data.split('#')[0])
                blocks.append(str(data))
            else:
                Lexicon.dataset(data) # check dataset name
                for block in dbs.listFileBlocks(data):
                    blocks.append(str(block))


        for blockName in blocks:
            # check block restrictions
            if blockWhiteList and blockName not in blockWhiteList:
                continue
            if blockName in blockBlackList:
                continue
            if task.getLumiMask() and blockName not in maskedBlocks:
                continue

            block = dbs.getDBSSummaryInfo(datasetPath, block = blockName)
            # blocks with 0 valid files should be ignored
            # - ideally they would be deleted but dbs can't delete blocks
            if not block['NumberOfFiles'] or block['NumberOfFiles'] == '0':
                continue

            #check lumi restrictions
            if task.getLumiMask():
                accepted_lumis = sum( [ len(maskedBlocks[blockName][file]) for file in maskedBlocks[blockName] ] )
                #use the information given from getMaskedBlocks to compute che size of the block
                block['NumberOfFiles'] = len(maskedBlocks[blockName])
                #ratio =  lumis which are ok in the block / total num lumis
                ratio_accepted = 1. * accepted_lumis / float(block['NumberOfLumis'])
                block['NumberOfEvents'] = float(block['NumberOfEvents']) * ratio_accepted
                block[self.lumiType] = accepted_lumis
            # check run restrictions
            elif runWhiteList or runBlackList:
                # listRuns returns a run number per lumi section
                full_lumi_list = dbs.listRuns(block = block['block'])
                runs = set(full_lumi_list)

                # apply blacklist
                runs = runs.difference(runBlackList)
                # if whitelist only accept listed runs
                if runWhiteList:
                    runs = runs.intersection(runWhiteList)

                # any runs left are ones we will run on, if none ignore block
                if not runs:
                    continue

                # recalculate effective size of block
                # make a guess for new event/file numbers from ratio
                # of accepted lumi sections (otherwise have to pull file info)
                accepted_lumis = [x for x in full_lumi_list if x in runs]
                ratio_accepted = 1. * len(accepted_lumis) / len(full_lumi_list)
                block[self.lumiType] = len(accepted_lumis)
                block['NumberOfFiles'] = float(block['NumberOfFiles']) * ratio_accepted
                block['NumberOfEvents'] = float(block['NumberOfEvents']) * ratio_accepted

            # save locations
            self.data[block['block']] = sitesFromStorageEelements(dbs.listFileBlockLocation(block['block']))

            validBlocks.append(block)
        return validBlocks
Ejemplo n.º 29
0
    def validBlocks(self, task, dbs):
        """Return blocks that pass the input data restriction"""
        datasetPath = task.getInputDatasetPath()
        validBlocks = []

        blockWhiteList = task.inputBlockWhitelist()
        blockBlackList = task.inputBlockBlacklist()
        runWhiteList = task.inputRunWhitelist()
        runBlackList = task.inputRunBlacklist()
        if task.getLumiMask(): #if we have a lumi mask get only the relevant blocks
            maskedBlocks = self.getMaskedBlocks(task, dbs, datasetPath)
        if task.inputLocationFlag():
            # Then get the locations from the site whitelist/blacklist + SiteDB
            siteWhitelist = task.siteWhitelist()
            siteBlacklist = task.siteBlacklist()
            if siteWhitelist:
                # Just get the ses matching the whitelists
                self.sites = siteWhitelist
            elif siteBlacklist:
                # Get all CMS sites less the blacklist
                allSites = cmsSiteNames()
                self.sites = list(set(allSites) - set (siteBlacklist))
            else:
                # Run at any CMS site
                self.sites = cmsSiteNames()

        blocks = []
        # Take data inputs or from spec
        if not self.data:
            if blockWhiteList:
                self.data = dict((block, []) for block in blockWhiteList)
            else:
                self.data = {datasetPath : []} # same structure as in WorkQueueElement

        for data in self.data:
            if data.find('#') > -1:
                Lexicon.block(data) # check block name
                datasetPath = str(data.split('#')[0])
                blocks.append(str(data))
            else:
                Lexicon.dataset(data) # check dataset name
                for block in dbs.listFileBlocks(data):
                    blocks.append(str(block))


        for blockName in blocks:
            # check block restrictions
            if blockWhiteList and blockName not in blockWhiteList:
                continue
            if blockName in blockBlackList:
                continue
            if task.getLumiMask() and blockName not in maskedBlocks:
                continue

            block = dbs.getDBSSummaryInfo(datasetPath, block = blockName)
            # blocks with 0 valid files should be ignored
            # - ideally they would be deleted but dbs can't delete blocks
            if not block['NumberOfFiles'] or block['NumberOfFiles'] == '0':
                continue

            #check lumi restrictions
            if task.getLumiMask():
                accepted_lumis = sum( [ len(maskedBlocks[blockName][file]) for file in maskedBlocks[blockName] ] )
                #use the information given from getMaskedBlocks to compute che size of the block
                block['NumberOfFiles'] = len(maskedBlocks[blockName])
                #ratio =  lumis which are ok in the block / total num lumis
                ratioAccepted = 1. * accepted_lumis / float(block['NumberOfLumis'])
                block['NumberOfEvents'] = float(block['NumberOfEvents']) * ratioAccepted
                block[self.lumiType] = accepted_lumis
            # check run restrictions
            elif runWhiteList or runBlackList:
                # listRunLumis returns a dictionary with the lumi sections per run
                runLumis = dbs.listRunLumis(block = block['block'])
                runs = set(runLumis.keys())
                recalculateLumiCounts = False
                if len(runs) > 1:
                    # If more than one run in the block
                    # Then we must calculate the lumi counts after filtering the run list
                    # This has to be done rarely and requires calling DBS file information
                    recalculateLumiCounts = True

                # apply blacklist
                runs = runs.difference(runBlackList)
                # if whitelist only accept listed runs
                if runWhiteList:
                    runs = runs.intersection(runWhiteList)
                # any runs left are ones we will run on, if none ignore block
                if not runs:
                    continue

                if len(runs) == len(runLumis):
                    # If there is no change in the runs, then we can skip recalculating lumi counts
                    recalculateLumiCounts = False

                if recalculateLumiCounts:
                    # Recalculate effective size of block
                    # We pull out file info, since we don't do this often
                    acceptedLumiCount = 0
                    acceptedEventCount = 0
                    acceptedFileCount = 0
                    fileInfo = dbs.listFilesInBlock(fileBlockName = block['block'])
                    for fileEntry in fileInfo:
                        acceptedFile = False
                        acceptedFileLumiCount = 0
                        for lumiInfo in fileEntry['LumiList']:
                            runNumber = lumiInfo['RunNumber']
                            if runNumber in runs:
                                acceptedFile = True
                                acceptedFileLumiCount += 1
                        if acceptedFile:
                            acceptedFileCount += 1
                            acceptedLumiCount += acceptedFileLumiCount
                            if len(fileEntry['LumiList']) != acceptedFileLumiCount:
                                acceptedEventCount += float(acceptedFileLumiCount) * fileEntry['NumberOfEvents']/len(fileEntry['LumiList'])
                            else:
                                acceptedEventCount += fileEntry['NumberOfEvents']
                    block[self.lumiType] = acceptedLumiCount
                    block['NumberOfFiles'] = acceptedFileCount
                    block['NumberOfEvents'] = acceptedEventCount
            # save locations
            if task.inputLocationFlag():
                self.data[block['block']] = self.sites
            else:
                self.data[block['block']] = sitesFromStorageEelements(dbs.listFileBlockLocation(block['block']))

            validBlocks.append(block)
        return validBlocks
Ejemplo n.º 30
0
    def queueNewRequests(self, queue):
        """Get requests from regMgr and queue to workqueue"""
        self.logger.info("Contacting Request manager for more work")
        work = 0
        workLoads = []

        if queue.params['DrainMode']:
            self.logger.info(
                'Draining queue: Skip requesting work from ReqMgr')
            return 0

        try:
            workLoads = self.getAvailableRequests(queue.params['Teams'])
        except Exception as ex:
            traceMsg = traceback.format_exc()
            msg = "Error contacting RequestManager: %s" % traceMsg
            self.logger.warning(msg)
            return 0

        for team, reqName, workLoadUrl in workLoads:
            #            try:
            #                self.reportRequestStatus(reqName, "negotiating")
            #            except Exception, ex:
            #                self.logger.error("""
            #                    Unable to update ReqMgr state to negotiating: %s
            #                    Ignoring this request: %s""" % (str(ex), reqName))
            #                continue

            try:
                try:
                    Lexicon.couchurl(workLoadUrl)
                except Exception as ex:  # can throw many errors e.g. AttributeError, AssertionError etc.
                    # check its not a local file
                    if not os.path.exists(workLoadUrl):
                        error = WorkQueueWMSpecError(
                            None,
                            "Workflow url validation error: %s" % str(ex))
                        raise error

                self.logger.info("Processing request %s at %s" %
                                 (reqName, workLoadUrl))
                units = queue.queueWork(workLoadUrl,
                                        request=reqName,
                                        team=team)
                self.logdb.delete(reqName, "error", this_thread=True)
            except (WorkQueueWMSpecError, WorkQueueNoWorkError) as ex:
                # fatal error - report back to ReqMgr
                self.logger.info(
                    'Permanent failure processing request "%s": %s' %
                    (reqName, str(ex)))
                self.logger.info("Marking request %s as failed in ReqMgr" %
                                 reqName)
                self.reportRequestStatus(reqName, 'Failed', message=str(ex))
                continue
            except (IOError, socket.error, CouchError,
                    CouchConnectionError) as ex:
                # temporary problem - try again later
                msg = 'Error processing request "%s": will try again later.' \
                '\nError: "%s"' % (reqName, str(ex))
                self.logger.info(msg)
                self.logdb.post(reqName, msg, 'error')
                continue
            except Exception as ex:
                # Log exception as it isnt a communication problem
                msg = 'Error processing request "%s": will try again later.' \
                '\nSee log for details.\nError: "%s"' % (reqName, str(ex))
                self.logger.exception('Unknown error processing %s' % reqName)
                self.logdb.post(reqName, msg, 'error')
                continue

            try:
                self.reportRequestStatus(reqName, "acquired")
            except Exception as ex:
                self.logger.warning("Unable to update ReqMgr state: %s" %
                                    str(ex))
                self.logger.warning('Will try again later')

            self.logger.info('%s units(s) queued for "%s"' % (units, reqName))
            work += units

            self.logger.info("%s element(s) obtained from RequestManager" %
                             work)
        return work
Ejemplo n.º 31
0
            except Exception, ex:  # can throw many errors e.g. AttributeError, AssertionError etc.
                error = WorkQueueWMSpecError(
                    self.wmspec,
                    "Site whitelist validation error: %s" % str(ex))
                raise error

        if self.initialTask.siteBlacklist():
            if type(self.initialTask.siteBlacklist()) in types.StringTypes:
                error = WorkQueueWMSpecError(
                    self.wmspec,
                    'Invalid site blacklist: Must be tuple/list but is %s' %
                    type(self.initialTask.siteBlacklist()))
                raise error
            try:
                [
                    Lexicon.cmsname(site)
                    for site in self.initialTask.siteBlacklist()
                ]
            except Exception, ex:  # can throw many errors e.g. AttributeError, AssertionError etc.
                error = WorkQueueWMSpecError(
                    self.wmspec,
                    "Site blacklist validation error: %s" % str(ex))
                raise error

        # splitter settings
        if self.args.get('SliceSize', 1) <= 0:
            error = WorkQueueWMSpecError(
                self.wmspec, 'Zero or negative SliceSize parameter')
            raise error
        if self.args.get('SubSliceSize', 1) <= 0:
            error = WorkQueueWMSpecError(
Ejemplo n.º 32
0
class WorkQueue(WorkQueueBase):
    """
    _WorkQueue_

    WorkQueue object - interface to WorkQueue functionality.
    """
    def __init__(self, logger=None, dbi=None, **params):

        WorkQueueBase.__init__(self, logger, dbi)
        self.parent_queue = None
        self.params = params

        # config argument (within params) shall be reference to
        # Configuration instance (will later be checked for presence of "Alert")
        self.config = params.get("Config", None)
        self.params.setdefault('CouchUrl', os.environ.get('COUCHURL'))
        if not self.params.get('CouchUrl'):
            raise RuntimeError, 'CouchUrl config value mandatory'
        self.params.setdefault('DbName', 'workqueue')
        self.params.setdefault('InboxDbName', self.params['DbName'] + '_inbox')
        self.params.setdefault('ParentQueueCouchUrl',
                               None)  # We get work from here

        self.backend = WorkQueueBackend(self.params['CouchUrl'],
                                        self.params['DbName'],
                                        self.params['InboxDbName'],
                                        self.params['ParentQueueCouchUrl'],
                                        self.params.get('QueueURL'),
                                        logger=self.logger)
        if self.params.get('ParentQueueCouchUrl'):
            try:
                self.parent_queue = WorkQueueBackend(
                    self.params['ParentQueueCouchUrl'].rsplit('/', 1)[0],
                    self.params['ParentQueueCouchUrl'].rsplit('/', 1)[1])
            except IndexError, ex:
                # Probable cause: Someone didn't put the global WorkQueue name in
                # the ParentCouchUrl
                msg = "Parsing failure for ParentQueueCouchUrl - probably missing dbname in input\n"
                msg += "Exception: %s\n" % str(ex)
                msg += str("ParentQueueCouchUrl: %s\n" %
                           self.params['ParentQueueCouchUrl'])
                self.logger.error(msg)
                raise WorkQueueError(msg)
            self.params['ParentQueueCouchUrl'] = self.parent_queue.queueUrl

        self.params.setdefault(
            "GlobalDBS",
            "http://cmsdbsprod.cern.ch/cms_dbs_prod_global/servlet/DBSServlet")
        self.params.setdefault('QueueDepth',
                               0.5)  # when less than this locally
        self.params.setdefault('LocationRefreshInterval', 600)
        self.params.setdefault('FullLocationRefreshInterval', 7200)
        self.params.setdefault('TrackLocationOrSubscription', 'subscription')
        self.params.setdefault('ReleaseIncompleteBlocks', False)
        self.params.setdefault('ReleaseRequireSubscribed', True)
        self.params.setdefault('PhEDExEndpoint', None)
        self.params.setdefault('PopulateFilesets', True)
        self.params.setdefault('LocalQueueFlag', True)
        self.params.setdefault('QueueRetryTime', 86400)
        self.params.setdefault('stuckElementAlertTime', 86400)
        self.params.setdefault('reqmgrCompleteGraceTime', 604800)
        self.params.setdefault('cancelGraceTime', 604800)

        self.params.setdefault('JobDumpConfig', None)
        self.params.setdefault('BossAirConfig', None)

        self.params[
            'QueueURL'] = self.backend.queueUrl  # url this queue is visible on
        # backend took previous QueueURL and sanitized it
        self.params.setdefault('WMBSUrl',
                               None)  # this will only be set on local Queue
        if self.params.get('WMBSUrl'):
            self.params['WMBSUrl'] = Lexicon.sanitizeURL(
                self.params['WMBSUrl'])['url']
        self.params.setdefault('Teams', [])
        self.params.setdefault('DrainMode', False)
        if self.params.get('CacheDir'):
            try:
                os.makedirs(self.params['CacheDir'])
            except OSError:
                pass
        elif self.params.get('PopulateFilesets'):
            raise RuntimeError, 'CacheDir mandatory for local queue'

        self.params.setdefault('SplittingMapping', {})
        self.params['SplittingMapping'].setdefault('DatasetBlock', {
            'name': 'Block',
            'args': {}
        })
        self.params['SplittingMapping'].setdefault('MonteCarlo', {
            'name': 'MonteCarlo',
            'args': {}
        })
        self.params['SplittingMapping'].setdefault('Dataset', {
            'name': 'Dataset',
            'args': {}
        })
        self.params['SplittingMapping'].setdefault('Block', {
            'name': 'Block',
            'args': {}
        })
        self.params['SplittingMapping'].setdefault('ResubmitBlock', {
            'name': 'ResubmitBlock',
            'args': {}
        })

        self.params.setdefault('EndPolicySettings', {})

        assert (self.params['TrackLocationOrSubscription']
                in ('subscription', 'location'))
        # Can only release blocks on location
        if self.params['TrackLocationOrSubscription'] == 'location':
            if self.params['SplittingMapping']['DatasetBlock'][
                    'name'] != 'Block':
                raise RuntimeError, 'Only blocks can be released on location'

        if self.params.get('PhEDEx'):
            self.phedexService = self.params['PhEDEx']
        else:
            phedexArgs = {}
            if self.params.get('PhEDExEndpoint'):
                phedexArgs['endpoint'] = self.params['PhEDExEndpoint']
            self.phedexService = PhEDEx(phedexArgs)

        if self.params.get('SiteDB'):
            self.SiteDB = self.params['SiteDB']
        else:
            self.SiteDB = SiteDB()

        if type(self.params['Teams']) in types.StringTypes:
            self.params['Teams'] = [x.strip() for x in \
                                    self.params['Teams'].split(',')]

        self.dataLocationMapper = WorkQueueDataLocationMapper(
            self.logger,
            self.backend,
            phedex=self.phedexService,
            sitedb=self.SiteDB,
            locationFrom=self.params['TrackLocationOrSubscription'],
            incompleteBlocks=self.params['ReleaseIncompleteBlocks'],
            requireBlocksSubscribed=not self.params['ReleaseIncompleteBlocks'],
            fullRefreshInterval=self.params['FullLocationRefreshInterval'],
            updateIntervalCoarseness=self.params['LocationRefreshInterval'])

        # initialize alerts sending client (self.sendAlert() method)
        # usage: self.sendAlert(levelNum, msg = msg) ; level - integer 1 .. 10
        #    1 - 4 - lower levels ; 5 - 10 higher levels
        preAlert, self.alertSender = \
            alertAPI.setUpAlertsMessaging(self, compName = "WorkQueueManager")
        self.sendAlert = alertAPI.getSendAlert(sender=self.alertSender,
                                               preAlert=preAlert)

        self.logger.debug("WorkQueue created successfully")
Ejemplo n.º 33
0
    def validBlocks(self, task, dbs):
        """Return blocks that pass the input data restriction"""
        datasetPath = task.getInputDatasetPath()
        Lexicon.dataset(datasetPath) # check dataset name
        validBlocks = []
        locations = None

        blockWhiteList = task.inputBlockWhitelist()
        blockBlackList = task.inputBlockBlacklist()
        runWhiteList = task.inputRunWhitelist()
        runBlackList = task.inputRunBlacklist()
        siteWhiteList = task.siteWhitelist()

        for blockName in dbs.listFileBlocks(datasetPath):
            block = dbs.getDBSSummaryInfo(datasetPath, block = blockName)

            # check block restrictions
            if blockWhiteList and block['block'] not in blockWhiteList:
                continue
            if block['block'] in blockBlackList:
                continue

            # check run restrictions
            if runWhiteList or runBlackList:
                # listRunLumis returns a dictionary with the lumi sections per run
                runLumis = dbs.listRunLumis(block = block['block'])
                runs = set(runLumis.keys())
                recalculateLumiCounts = False
                if len(runs) > 1:
                    # If more than one run in the block
                    # Then we must calculate the lumi counts after filtering the run list
                    # This has to be done rarely and requires calling DBS file information
                    recalculateLumiCounts = True

                # apply blacklist
                runs = runs.difference(runBlackList)
                # if whitelist only accept listed runs
                if runWhiteList:
                    runs = runs.intersection(runWhiteList)
                # any runs left are ones we will run on, if none ignore block
                if not runs:
                    continue
                
                if recalculateLumiCounts:
                    # get correct lumi count
                    # Recalculate effective size of block
                    # We pull out file info, since we don't do this often
                    acceptedLumiCount = 0
                    acceptedEventCount = 0
                    acceptedFileCount = 0
                    fileInfo = dbs.listFilesInBlock(fileBlockName = block['block'])
                    for fileEntry in fileInfo:
                        acceptedFile = False
                        acceptedFileLumiCount = 0
                        for lumiInfo in fileEntry['LumiList']:
                            runNumber = lumiInfo['RunNumber']
                            if runNumber in runs:
                                acceptedFile = True
                                acceptedFileLumiCount += 1
                        if acceptedFile:
                            acceptedFileCount += 1
                            acceptedLumiCount += acceptedFileLumiCount
                            if len(fileEntry['LumiList']) != acceptedFileLumiCount:
                                acceptedEventCount += float(acceptedFileLumiCount) * fileEntry['NumberOfEvents']/len(fileEntry['LumiList'])
                            else:
                                acceptedEventCount += fileEntry['NumberOfEvents']
                else:
                    acceptedLumiCount = block["NumberOfLumis"]
                    acceptedFileCount = block['NumberOfFiles']
                    acceptedEventCount = block['NumberOfEvents']
                    
                # recalculate effective size of block
                # make a guess for new event/file numbers from ratio
                # of accepted lumi sections (otherwise have to pull file info)
                
                fullLumiCount = block["NumberOfLumis"]
                block[self.lumiType] = acceptedLumiCount
                block['NumberOfFiles'] = acceptedFileCount
                block['NumberOfEvents'] = acceptedEventCount

            validBlocks.append(block)
            if locations is None:
                locations = set(sitesFromStorageEelements(dbs.listFileBlockLocation(block['block'])))
            else:
                locations = locations.intersection(set(sitesFromStorageEelements(dbs.listFileBlockLocation(block['block']))))
            
            if self.wmspec.locationDataSourceFlag():
                locations = locations.union(siteWhiteList)

        # all needed blocks present at these sites
        if locations:
            self.data[datasetPath] = list(locations)
        return validBlocks
Ejemplo n.º 34
0
        if self.initialTask.siteWhitelist():
            if type(self.initialTask.siteWhitelist()) in types.StringTypes:
                error = WorkQueueWMSpecError(self.wmspec, 'Invalid site whitelist: Must be tuple/list but is %s' % type(self.initialTask.siteWhitelist()))
                raise error
            try:
                [Lexicon.cmsname(site) for site in self.initialTask.siteWhitelist()]
            except Exception, ex: # can throw many errors e.g. AttributeError, AssertionError etc.
                error = WorkQueueWMSpecError(self.wmspec, "Site whitelist validation error: %s" % str(ex))
                raise error

        if self.initialTask.siteBlacklist():
            if type(self.initialTask.siteBlacklist()) in types.StringTypes:
                error = WorkQueueWMSpecError(self.wmspec, 'Invalid site blacklist: Must be tuple/list but is %s' % type(self.initialTask.siteBlacklist()))
                raise error
            try:
                [Lexicon.cmsname(site) for site in self.initialTask.siteBlacklist()]
            except Exception, ex: # can throw many errors e.g. AttributeError, AssertionError etc.
                error = WorkQueueWMSpecError(self.wmspec, "Site blacklist validation error: %s" % str(ex))
                raise error

        # splitter settings
        if self.args.get('SliceSize', 1) <= 0:
            error = WorkQueueWMSpecError(self.wmspec, 'Zero or negative SliceSize parameter')
            raise error
        if self.args.get('SubSliceSize', 1) <= 0:
            error = WorkQueueWMSpecError(self.wmspec, 'Zero or negative SubSliceSize parameter')
            raise error

        # check input dataset is valid
        try:
            if self.initialTask.getInputDatasetPath():
Ejemplo n.º 35
0
    def validBlocks(self, task, dbs):
        """Return blocks that pass the input data restriction"""
        datasetPath = task.getInputDatasetPath()
        Lexicon.dataset(datasetPath)  # check dataset name
        validBlocks = []
        locations = None

        blockWhiteList = task.inputBlockWhitelist()
        blockBlackList = task.inputBlockBlacklist()
        runWhiteList = task.inputRunWhitelist()
        runBlackList = task.inputRunBlacklist()

        for blockName in dbs.listFileBlocks(datasetPath):
            block = dbs.getDBSSummaryInfo(datasetPath, block=blockName)

            # check block restrictions
            if blockWhiteList and block['block'] not in blockWhiteList:
                continue
            if block['block'] in blockBlackList:
                continue

            # check run restrictions
            if runWhiteList or runBlackList:
                # listRunLumis returns a dictionary with the lumi sections per run
                runLumis = dbs.listRunLumis(block=block['block'])
                runs = set(runLumis.keys())

                # apply blacklist
                runs = runs.difference(runBlackList)
                # if whitelist only accept listed runs
                if runWhiteList:
                    runs = runs.intersection(runWhiteList)
                # any runs left are ones we will run on, if none ignore block
                if not runs:
                    continue

                # recalculate effective size of block
                # make a guess for new event/file numbers from ratio
                # of accepted lumi sections (otherwise have to pull file info)
                acceptedLumiCount = 0
                fullLumiCount = 0
                acceptedLumiCount = 0
                fullLumiCount = 0
                for run in runLumis:
                    if run in runs:
                        acceptedLumiCount += runLumis[run]
                    fullLumiCount += runLumis[run]
                ratioAccepted = float(acceptedLumiCount) / fullLumiCount
                block[self.lumiType] = acceptedLumiCount
                block['NumberOfFiles'] = int(
                    float(block['NumberOfFiles']) * ratioAccepted)
                block['NumberOfEvents'] = int(
                    float(block['NumberOfEvents']) * ratioAccepted)

            validBlocks.append(block)
            if locations is None:
                locations = set(
                    sitesFromStorageEelements(
                        dbs.listFileBlockLocation(block['block'])))
            else:
                locations = locations.intersection(
                    set(
                        sitesFromStorageEelements(
                            dbs.listFileBlockLocation(block['block']))))

        # all needed blocks present at these sites
        if locations:
            self.data[datasetPath] = list(locations)
        return validBlocks