Ejemplo n.º 1
0
    def __init__(self, msConfig, **kwargs):
        """
        Provides setup for MSTransferor and MSMonitor classes

        :param config: MS service configuration
        :param kwargs: can be used to skip the initialization of specific services, such as:
            logger: logger object
            skipReqMgr: boolean to skip ReqMgr initialization
            skipReqMgrAux: boolean to skip ReqMgrAux initialization
            skipRucio: boolean to skip Rucio initialization
        """
        self.logger = getMSLogger(getattr(msConfig, 'verbose', False), kwargs.get("logger"))
        self.msConfig = msConfig
        self.logger.info("Configuration including default values:\n%s", self.msConfig)

        if not kwargs.get("skipReqMgr", False):
            self.reqmgr2 = ReqMgr(self.msConfig['reqmgr2Url'], logger=self.logger)
        if not kwargs.get("skipReqMgrAux", False):
            self.reqmgrAux = ReqMgrAux(self.msConfig['reqmgr2Url'],
                                       httpDict={'cacheduration': 1.0}, logger=self.logger)

        self.phedex = None
        self.rucio = None
        if not kwargs.get("skipRucio", False):
            self.rucio = Rucio(acct=self.msConfig['rucioAccount'],
                               hostUrl=self.msConfig['rucioUrl'],
                               authUrl=self.msConfig['rucioAuthUrl'],
                               configDict={"logger": self.logger, "user_agent": "wmcore-microservices"})
Ejemplo n.º 2
0
    def testConfig(self):
        """
        Test service attributes and the override mechanism
        """
        for key in self.defaultArgs:
            self.assertEqual(getattr(self.myRucio.cli, key),
                             self.defaultArgs[key])
        self.assertTrue(
            getattr(self.myRucio.cli,
                    "user_agent").startswith("wmcore-client/"))
        self.assertTrue(
            getattr(self.client, "user_agent").startswith("rucio-clients/"))

        newParams = {
            "host": 'http://cms-rucio-int.cern.ch',
            "auth_host": 'https://cms-rucio-auth-int.cern.ch',
            "auth_type": "x509",
            "account": self.acct,
            "ca_cert": False,
            "timeout": 5,
            "phedexCompatible": False
        }
        newKeys = newParams.keys()
        newKeys.remove("phedexCompatible")

        rucio = Rucio(newParams['account'],
                      hostUrl=newParams['host'],
                      authUrl=newParams['auth_host'],
                      configDict=newParams)

        self.assertEqual(getattr(rucio, "phedexCompat"), False)
        for key in newKeys:
            self.assertEqual(getattr(rucio.cli, key), newParams[key])
Ejemplo n.º 3
0
 def __init__(self):
     """
     Prepare module setup
     """
     super(PileupFetcher, self).__init__()
     # FIXME: find a way to pass the Rucio account name to this fetcher module
     self.rucioAcct = "wmcore_transferor"
     self.rucio = Rucio(self.rucioAcct)
Ejemplo n.º 4
0
 def __init__(self, msConfig, logger=None):
     super(MSMonitor, self).__init__(msConfig, logger=logger)
     # update interval is used to check records in CouchDB and update them
     # after this interval, default 6h
     self.updateInterval = self.msConfig.get('updateInterval', 6 * 60 * 60)
     self.rucio = Rucio(acct=self.msConfig['rucioAccount'],
                        hostUrl=self.msConfig['rucioUrl'],
                        authUrl=self.msConfig['rucioAuthUrl'],
                        configDict={"logger": self.logger, "user_agent": "WMCore-MSMonitor"})
Ejemplo n.º 5
0
    def __init__(self, config):
        """
        ___init___

        Initialise class members
        """
        BaseWorkerThread.__init__(self)

        self.enabled = config.RucioInjector.enabled
        # dataset rule creation has a larger polling cycle
        self.pollRules = config.RucioInjector.pollIntervalRules
        self.lastRulesExecTime = 0
        self.createBlockRules = config.RucioInjector.createBlockRules
        self.containerDiskRuleParams = config.RucioInjector.containerDiskRuleParams
        self.containerDiskRuleRSEExpr = config.RucioInjector.containerDiskRuleRSEExpr
        self.skipRulesForTiers = config.RucioInjector.skipRulesForTiers
        self.listTiersToInject = config.RucioInjector.listTiersToInject
        if config.RucioInjector.metaDIDProject not in RUCIO_VALID_PROJECT:
            msg = "Component configured with an invalid 'project' DID: %s"
            raise RucioInjectorException(msg % config.RucioInjector.metaDIDProject)
        self.metaDIDProject = dict(project=config.RucioInjector.metaDIDProject)

        # setup cache for container and blocks (containers can be much longer, make 6 days now)
        self.containersCache = MemoryCache(config.RucioInjector.cacheExpiration * 3, set())
        self.blocksCache = MemoryCache(config.RucioInjector.cacheExpiration, set())

        self.scope = getattr(config.RucioInjector, "scope", "cms")
        self.rucioAcct = config.RucioInjector.rucioAccount
        self.rucio = Rucio(acct=self.rucioAcct,
                           hostUrl=config.RucioInjector.rucioUrl,
                           authUrl=config.RucioInjector.rucioAuthUrl,
                           configDict={'logger': self.logger})

        # metadata dictionary information to be added to block/container rules
        # cannot be a python dictionary, but a JSON string instead
        self.metaData = json.dumps(dict(agentHost=config.Agent.hostName,
                                        userAgent=config.Agent.agentName))

        self.testRSEs = config.RucioInjector.RSEPostfix
        self.filesToRecover = []

        # output data placement has a different behaviour between T0 and Production agents
        if hasattr(config, "Tier0Feeder"):
            logging.info("RucioInjector running on a T0 WMAgent")
            self.isT0agent = True
        else:
            self.isT0agent = False

        if not self.listTiersToInject:
            logging.info("Component configured to inject all the data tiers")
        else:
            logging.info("Component configured to only inject data for data tiers: %s",
                         self.listTiersToInject)
        logging.info("Component configured to skip container rule creation for data tiers: %s",
                     self.skipRulesForTiers)
        logging.info("Component configured to create block rules: %s", self.createBlockRules)
Ejemplo n.º 6
0
 def __init__(self):
     """
     Prepare module setup
     """
     super(PileupFetcher, self).__init__()
     if usingRucio():
         # FIXME: find a way to pass the Rucio account name to this fetcher module
         self.rucioAcct = "wmcore_transferor"
         self.rucio = Rucio(self.rucioAcct)
     else:
         self.phedex = PhEDEx()  # this will go away eventually
Ejemplo n.º 7
0
 def __init__(self):
     """
     Prepare module setup
     """
     super(PileupFetcher, self).__init__()
     if usingRucio():
         # Too much work to pass the rucio account name all the way to here
         # just use the production rucio account for resolving pileup location
         self.rucio = Rucio("wma_prod",
                            configDict={'phedexCompatible': False})
     else:
         self.phedex = PhEDEx()  # this will go away eventually
Ejemplo n.º 8
0
def getFromRucio(dataset, logger):
    """
    Using the WMCore Rucio object and fetch all the blocks and files
    for a given container.
    Returns a dictionary key'ed by the block name, value is the amount of files.
    """
    rucio = Rucio(acct=RUCIO_ACCT,
                  hostUrl=RUCIO_HOST,
                  authUrl=RUCIO_AUTH,
                  configDict={'logger': logger})

    result = dict()
    for block in rucio.getBlocksInContainer(dataset):
        data = rucio.getDID(block)
        result.setdefault(block, data['length'])
    return result
Ejemplo n.º 9
0
    def __init__(self, config):
        """
        ___init___

        Initialise class members
        """
        BaseWorkerThread.__init__(self)

        self.enabled = config.RucioInjector.enabled
        # dataset rule creation has a larger polling cycle
        self.pollRules = config.RucioInjector.pollIntervalRules
        self.lastRulesExecTime = 0
        self.createBlockRules = config.RucioInjector.createBlockRules
        self.skipRulesForTiers = config.RucioInjector.skipRulesForTiers
        self.listTiersToInject = config.RucioInjector.listTiersToInject

        # setup cache for container and blocks (containers can be much longer, make 6 days now)
        self.containersCache = MemoryCache(
            config.RucioInjector.cacheExpiration * 3, set())
        self.blocksCache = MemoryCache(config.RucioInjector.cacheExpiration,
                                       set())

        self.scope = getattr(config.RucioInjector, "scope", "cms")
        self.rucioAcct = config.RucioInjector.rucioAccount
        self.rucio = Rucio(acct=self.rucioAcct,
                           hostUrl=config.RucioInjector.rucioUrl,
                           authUrl=config.RucioInjector.rucioAuthUrl,
                           configDict={'logger': self.logger})

        # metadata dictionary information to be added to block/container rules
        # cannot be a python dictionary, but a JSON string instead
        self.metaData = json.dumps(
            dict(agentHost=config.Agent.hostName,
                 userAgent=config.Agent.agentName))

        self.testRSEs = config.RucioInjector.RSEPostfix
        self.filesToRecover = []

        logging.info(
            "Component configured to only inject data for data tiers: %s",
            self.listTiersToInject)
        logging.info(
            "Component configured to skip container rule creation for data tiers: %s",
            self.skipRulesForTiers)
        logging.info("Component configured to create block rules: %s",
                     self.createBlockRules)
Ejemplo n.º 10
0
 def _getDatasetLocation(self, dset, blockDict):
     """
     Given a dataset name, query PhEDEx or Rucio and resolve the block location
     :param dset: string with the dataset name
     :param blockDict: dictionary with DBS summary info
     :return: update blockDict in place
     """
     # initialize Rucio here to avoid this authentication on T0-WMAgent
     self.rucio = Rucio(self.rucioAcct)
     blockReplicas = self.rucio.getPileupLockedAndAvailable(
         dset, account=self.rucioAcct)
     for blockName, blockLocation in viewitems(blockReplicas):
         try:
             blockDict[blockName]['PhEDExNodeNames'] = list(blockLocation)
         except KeyError:
             logging.warning("Block '%s' present in Rucio but not in DBS",
                             blockName)
Ejemplo n.º 11
0
    def setUp(self):
        """
        Setup for unit tests
        """
        super(RucioTest, self).setUp()

        self.myRucio = Rucio(self.acct,
                             hostUrl=self.defaultArgs['host'],
                             authUrl=self.defaultArgs['auth_host'],
                             configDict=self.defaultArgs)

        self.client = testClient(rucio_host=self.defaultArgs['host'],
                                 auth_host=self.defaultArgs['auth_host'],
                                 account=self.acct,
                                 ca_cert=self.defaultArgs['ca_cert'],
                                 auth_type=self.defaultArgs['auth_type'],
                                 creds=self.defaultArgs['creds'],
                                 timeout=self.defaultArgs['timeout'])
Ejemplo n.º 12
0
    def testGetReplicaInfoForBlocksRucio(self):
        """
        Test `getReplicaInfoForBlocks` method, however not using
        the output compatibility with PhEDEx
        """
        theseArgs = self.defaultArgs.copy()
        theseArgs['phedexCompatible'] = False
        myRucio = Rucio(self.acct,
                        hostUrl=theseArgs['host'],
                        authUrl=theseArgs['auth_host'],
                        configDict=theseArgs)

        res = myRucio.getReplicaInfoForBlocks(dataset=DSET)
        self.assertTrue(isinstance(res, list))
        self.assertTrue(len(res) >= 1)  # at this very moment, there are 11 replicas
        blocks = [item['name'] for item in res]
        self.assertTrue(BLOCK in blocks)
        for item in res:
            self.assertTrue(len(item['replica']) > 0)
Ejemplo n.º 13
0
    def __init__(self, msConfig, **kwargs):
        """
        Provides setup for MSTransferor and MSMonitor classes

        :param config: MS service configuration
        :param kwargs: can be used to skip the initialization of specific services, such as:
            logger: logger object
            skipReqMgr: boolean to skip ReqMgr initialization
            skipReqMgrAux: boolean to skip ReqMgrAux initialization
            skipRucio: boolean to skip Rucio initialization
            skipPhEDEx: boolean to skip PhEDEx initialization
        """
        self.logger = getMSLogger(getattr(msConfig, 'verbose', False),
                                  kwargs.get("logger"))
        self.msConfig = msConfig
        self.logger.info("Configuration including default values:\n%s",
                         self.msConfig)

        if not kwargs.get("skipReqMgr", False):
            self.reqmgr2 = ReqMgr(self.msConfig['reqmgr2Url'],
                                  logger=self.logger)
        if not kwargs.get("skipReqMgrAux", False):
            self.reqmgrAux = ReqMgrAux(self.msConfig['reqmgr2Url'],
                                       httpDict={'cacheduration': 1.0},
                                       logger=self.logger)

        self.phedex = None
        self.rucio = None
        if self.msConfig.get('useRucio',
                             False) and not kwargs.get("skipRucio", False):
            self.rucio = Rucio(acct=self.msConfig['rucioAccount'],
                               hostUrl=self.msConfig['rucioUrl'],
                               authUrl=self.msConfig['rucioAuthUrl'],
                               configDict={
                                   "logger": self.logger,
                                   "user_agent": "wmcore-microservices"
                               })
        elif not kwargs.get("skipPhEDEx", False):
            # hard code it to production DBS otherwise PhEDEx subscribe API fails to match TMDB data
            dbsUrl = "https://cmsweb.cern.ch/dbs/prod/global/DBSReader"
            self.phedex = PhEDEx(httpDict={'cacheduration': 0.5},
                                 dbsUrl=dbsUrl,
                                 logger=self.logger)
Ejemplo n.º 14
0
    def setUp(self):
        """
        _setUp_

        """
        super(WMBSHelperTest, self).setUp()

        self.testInit = TestInitCouchApp(__file__)
        self.testInit.setLogging()
        self.testInit.setDatabaseConnection(destroyAllDatabase=True)
        self.testInit.setupCouch("wmbshelper_t/jobs", "JobDump")
        self.testInit.setupCouch("wmbshelper_t/fwjrs", "FWJRDump")
        self.testInit.setupCouch("config_test", "GroupUser", "ConfigCache")
        os.environ["COUCHDB"] = "wmbshelper_t"
        self.testInit.setSchema(customModules=[
            "WMCore.WMBS", "WMComponent.DBS3Buffer", "WMCore.BossAir",
            "WMCore.ResourceControl"
        ],
                                useDefault=False)

        self.workDir = self.testInit.generateWorkDir()

        self.wmspec = self.createWMSpec()
        self.topLevelTask = getFirstTask(self.wmspec)
        self.inputDataset = self.topLevelTask.inputDataset()
        self.dataset = self.topLevelTask.getInputDatasetPath()
        self.dbs = DBSReader(self.inputDataset.dbsurl)
        self.rucioAcct = "wmcore_transferor"
        self.rucio = Rucio(self.rucioAcct)
        self.daoFactory = DAOFactory(package="WMCore.WMBS",
                                     logger=threading.currentThread().logger,
                                     dbinterface=threading.currentThread().dbi)

        self.configFile = EmulatorSetup.setupWMAgentConfig()
        self.config = loadConfigurationFile(self.configFile)

        self.config.component_("JobSubmitter")
        self.config.JobSubmitter.submitDir = self.workDir
        self.config.JobSubmitter.submitScript = os.path.join(
            getTestBase(), 'WMComponent_t/JobSubmitter_t', 'submit.sh')

        return
Ejemplo n.º 15
0
    def __init__(self, **kwargs):
        if not kwargs.get('logger'):
            import logging
            kwargs['logger'] = logging
        self.logger = kwargs['logger']
        self.rucio = Rucio(kwargs.get("rucioAccount", "wmcore_transferor"),
                           configDict=dict(logger=self.logger))
        # this will break all in one test
        self.reqMgr2 = ReqMgr(kwargs.get("reqmgr2_endpoint", None))

        centralurl = kwargs.get("central_logdb_url", "")
        identifier = kwargs.get("log_reporter", "")

        # set the thread name before creat the log db.
        # only sets that when it is not set already
        myThread = threading.currentThread()
        if myThread.getName() == "MainThread":
            myThread.setName(self.__class__.__name__)

        self.logdb = LogDB(centralurl, identifier, logger=self.logger)
Ejemplo n.º 16
0
 def __init__(self, **args):
     PolicyInterface.__init__(self, **args)
     self.workQueueElements = []
     self.wmspec = None
     self.team = None
     self.initialTask = None
     self.splitParams = None
     self.dbs_pool = {}
     self.data = {}
     self.lumi = None
     self.couchdb = None
     self.rejectedWork = []  # List of inputs that were rejected
     self.badWork = [
     ]  # list of bad work unit (e.g. without any valid files)
     self.pileupData = {}
     self.cric = CRIC()
     if usingRucio():
         self.rucio = Rucio(self.args['rucioAcct'],
                            configDict={'logger': self.logger})
     else:
         self.phedex = PhEDEx()  # this will go away eventually
Ejemplo n.º 17
0
 def __init__(self, **args):
     # We need to pop this object instance from args because otherwise
     # the super class blows up when doing a deepcopy(args)
     self.rucio = args.pop("rucioObject", None)
     PolicyInterface.__init__(self, **args)
     self.workQueueElements = []
     self.wmspec = None
     self.team = None
     self.initialTask = None
     self.splitParams = None
     self.dbs_pool = {}
     self.data = {}
     self.lumi = None
     self.couchdb = None
     self.rejectedWork = []  # List of inputs that were rejected
     self.badWork = [
     ]  # list of bad work unit (e.g. without any valid files)
     self.pileupData = {}
     self.cric = CRIC()
     # FIXME: for the moment, it will always use the default value
     self.rucioAcct = self.args.get("rucioAcct", "wmcore_transferor")
     if not self.rucio:
         self.rucio = Rucio(self.rucioAcct,
                            configDict={'logger': self.logger})
Ejemplo n.º 18
0
    def _queryAndCompareWithDBS(self, pileupDict, pileupConfig, dbsUrl):
        """
        pileupDict is a Python dictionary containing particular pileup
        configuration information. Query DBS on given dataset contained
        now in both input pileupConfig as well as in the pileupDict
        and compare values.
        """
        self.assertItemsEqual(list(pileupDict), list(pileupConfig))
        reader = DBS3Reader(dbsUrl)
        rucioObj = Rucio(self.rucioAcct)

        # now query DBS and compare the blocks and files from DBS
        # against those returned by the PileupFetcher
        for pileupType, datasets in viewitems(pileupConfig):
            # this is from the pileup configuration produced by PileupFetcher
            blockDict = pileupDict[pileupType]

            for dataset in datasets:
                dbsBlocks = reader.listFileBlocks(dataset=dataset)
                rucioBlocksLocation = rucioObj.getPileupLockedAndAvailable(dataset,
                                                                           account=self.rucioAcct)

                # first, validate the number of blocks and their names
                self.assertItemsEqual(list(blockDict), dbsBlocks)
                self.assertItemsEqual(list(blockDict), list(rucioBlocksLocation))
                # now validate the block location between Rucio and PileupFetcher
                for block, blockLocation in viewitems(blockDict):
                    self.assertItemsEqual(blockLocation['PhEDExNodeNames'], rucioBlocksLocation[block])

                    # finally, validate the files
                    fileList = []
                    # now get list of files in the block
                    dbsFiles = reader.listFilesInBlock(block)
                    for dbsFile in dbsFiles:
                        fileList.append(dbsFile["LogicalFileName"])
                    self.assertItemsEqual(blockDict[block]["FileList"], fileList)
Ejemplo n.º 19
0
    def executeInternal(self, *args, **kwargs):

        self.logger.info(
            "Data discovery with DBS")  ## to be changed into debug

        dbsurl = self.config.Services.DBSUrl
        if kwargs['task']['tm_dbs_url']:
            dbsurl = kwargs['task']['tm_dbs_url']
        self.dbs = DBSReader(dbsurl)
        self.dbsInstance = self.dbs.dbs.serverinfo()["dbs_instance"]
        isUserDataset = self.dbsInstance.split('/')[1] != 'global'
        # where to look locations in pre-Rucio world
        PhEDExOrDBS = 'PhEDEx' if not isUserDataset else 'DBS origin site'

        taskName = kwargs['task']['tm_taskname']
        userProxy = kwargs['task']['user_proxy']
        self.logger.debug("Data discovery through %s for %s", self.dbs,
                          taskName)

        inputDataset = kwargs['task']['tm_input_dataset']
        secondaryDataset = kwargs['task'].get('tm_secondary_input_dataset',
                                              None)

        self.checkDatasetStatus(inputDataset, kwargs)
        if secondaryDataset:
            self.checkDatasetStatus(secondaryDataset, kwargs)

        try:
            # Get the list of blocks for the locations.
            # The WMCore DBS3 implementation makes one call to DBS for each block
            # when using locations=True so we are using locations=False and looking up location later
            blocks = [
                x['Name'] for x in self.dbs.getFileBlocksInfo(inputDataset,
                                                              locations=False)
            ]
            if secondaryDataset:
                secondaryBlocks = [
                    x['Name']
                    for x in self.dbs.getFileBlocksInfo(secondaryDataset,
                                                        locations=False)
                ]
        except DBSReaderError as dbsexc:
            # dataset not found in DBS is a known use case
            if str(dbsexc).find('No matching data'):
                raise TaskWorkerException(
                    "CRAB could not find dataset %s in this DBS instance: %s" %
                    inputDataset, dbsurl)
            raise
        ## Create a map for block's locations: for each block get the list of locations.
        ## Note: listFileBlockLocation() gets first the locations from PhEDEx, and if no
        ## locations are found it gets the original locations from DBS. So it should
        ## never be the case at this point that some blocks have no locations.
        ## locationsMap is a dictionary, key=blockName, value=list of PhedexNodes, example:
        ## {'/JetHT/Run2016B-PromptReco-v2/AOD#b10179dc-3723-11e6-9aa5-001e67abf228': [u'T1_IT_CNAF_Buffer', u'T2_US_Wisconsin', u'T1_IT_CNAF_MSS', u'T2_BE_UCL'],
        ## '/JetHT/Run2016B-PromptReco-v2/AOD#89b03ca6-1dc9-11e6-b567-001e67ac06a0': [u'T1_IT_CNAF_Buffer', u'T2_US_Wisconsin', u'T1_IT_CNAF_MSS', u'T2_BE_UCL']}

        # For now apply Rucio data location only to NANOAOD*
        # in time useRucioForLocations may become a more rich expression
        isNano = blocks[0].split("#")[0].split("/")[-1] in [
            "NANOAOD", "NANOAODSIM"
        ]
        if isNano:
            self.logger.info(
                "NANOAOD* datset. Will use Rucio for data location")
        useRucioForLocations = isNano
        locationsFoundWithRucio = False

        if not useRucioForLocations:
            self.logger.info("Will not use Rucio for this dataset")
        # if locations should be in Rucio, try it first and fall back to old ways if Rucio calls fail
        # of if they return no locations (possible Rucio teething pain). If Rucio returns a list, trust it.
        if useRucioForLocations:
            locationsMap = {}
            scope = "cms"
            # If the dataset is a USER one, use the Rucio user scope to find it
            # TODO: we need a way to enable users to indicate others user scopes as source
            if isUserDataset:
                scope = "user.%s" % kwargs['task']['tm_username']
            rucio_config_dict = {
                "phedexCompatible": True,
                "auth_type": "x509",
                "ca_cert": self.config.Services.Rucio_caPath,
                "logger": self.logger,
                "creds": {
                    "client_cert": self.config.TaskWorker.cmscert,
                    "client_key": self.config.TaskWorker.cmskey
                }
            }
            try:
                self.logger.info("Initializing Rucio client")
                # WMCore is awfully verbose
                with tempSetLogLevel(logger=self.logger, level=logging.ERROR):
                    rucioClient = Rucio(
                        self.config.Services.Rucio_account,
                        hostUrl=self.config.Services.Rucio_host,
                        authUrl=self.config.Services.Rucio_authUrl,
                        configDict=rucio_config_dict)
                rucioClient.whoAmI()
                self.logger.info(
                    "Looking up data location with Rucio in %s scope.", scope)
                with tempSetLogLevel(logger=self.logger, level=logging.ERROR):
                    locations = rucioClient.getReplicaInfoForBlocks(
                        scope=scope, block=list(blocks))
            except Exception as exc:
                msg = "Rucio lookup failed with\n%s" % str(exc)
                # TODO when removing fall-back to PhEDEx, this should be a fatal error
                # raise TaskWorkerException(msg)
                self.logger.warn(msg)
                locations = None

            # TODO when removing fall-back to PhEDEx, above code will raise if it fails, therefore
            # the following "if" must be removed and the code shifted left
            if locations:
                located_blocks = locations['phedex']['block']
                for element in located_blocks:
                    if element[
                            'replica']:  # only fill map for blocks which have at least one location
                        locationsMap.update({
                            element['name']:
                            [x['node'] for x in element['replica']]
                        })
                if locationsMap:
                    locationsFoundWithRucio = True
                else:
                    msg = "No locations found with Rucio for this dataset"
                    # since NANO* are not in PhEDEx, this should be a fatal error
                    if isNano:
                        raise TaskWorkerException(msg)
                    else:
                        # note it down and try with PhEDEx
                        self.logger.warn(msg)

        if not locationsFoundWithRucio:  # fall back to pre-Rucio methods
            try:
                self.logger.info("Looking up data locations using %s",
                                 PhEDExOrDBS)
                locationsMap = self.dbs.listFileBlockLocation(
                    list(blocks), dbsOnly=isUserDataset)
            except Exception as ex:
                raise TaskWorkerException(
                    "The CRAB3 server backend could not get the location of the files from dbs nor phedex nor rucio.\n"+\
                    "This is could be a temporary phedex/rucio/dbs glitch, please try to submit a new task (resubmit will not work)"+\
                    " and contact the experts if the error persists.\nError reason: %s" % str(ex)
                    )
            # only fill map for blocks which have at least one location
            locationsMap = {
                key: value
                for key, value in locationsMap.iteritems() if value
            }

        if secondaryDataset:
            secondaryLocationsMap = {}
            # see https://github.com/dmwm/CRABServer/issues/6075#issuecomment-641569446
            self.logger.info(
                "Trying data location of secondary blocks with Rucio")
            try:
                locations = rucioClient.getReplicaInfoForBlocks(
                    scope=scope, block=list(secondaryBlocks))
            except Exception as exc:
                locations = None
                secondaryLocationsMap = {}
                self.logger.warn("Rucio lookup failed with. %s", exc)
            if locations:
                located_blocks = locations['phedex']['block']
                for element in located_blocks:
                    if element[
                            'replica']:  # only fill map for blocks which have at least one location
                        secondaryLocationsMap.update({
                            element['name']:
                            [x['node'] for x in element['replica']]
                        })
            if not secondaryLocationsMap:
                msg = "No locations found with Rucio for secondaryDataset."
                # TODO when removing fall-back to PhEDEx, this should be a fatal error
                # raise TaskWorkerException(msg)
                self.logger.warn(msg)
                self.logger.info(
                    "Trying data location of secondary blocks with PhEDEx")
                try:
                    secondaryLocationsMap = self.dbs.listFileBlockLocation(
                        list(secondaryBlocks), dbsOnly=isUserDataset)
                except Exception as ex:
                    raise TaskWorkerException(
                        "The CRAB3 server backend could not get the location of the secondary dataset files from dbs or phedex or rucio.\n" + \
                        "This is could be a temporary phedex/rucio/dbs glitch, please try to submit a new task (resubmit will not work)" + \
                        " and contact the experts if the error persists.\nError reason: %s" % str(ex)
                    )
                # only fill map for blocks which have at least one location
                secondaryLocationsMap = {
                    key: value
                    for key, value in secondaryLocationsMap.iteritems()
                    if value
                }

        # From now on code is not dependent from having used Rucio or PhEDEx

        blocksWithLocation = locationsMap.keys()
        if secondaryDataset:
            secondaryBlocksWithLocation = secondaryLocationsMap.keys()

        self.keepOnlyDisks(locationsMap)
        if not locationsMap:
            msg = "Task could not be submitted because there is no DISK replica for dataset %s" % inputDataset
            if self.tapeLocations:
                msg += "\nN.B.: the input dataset is stored at %s, but those are TAPE locations." % ', '.join(
                    sorted(self.tapeLocations))
                # submit request to DDM
                ddmRequest = None
                ddmServer = self.config.TaskWorker.DDMServer
                try:
                    ddmRequest = blocksRequest(blocksWithLocation,
                                               ddmServer,
                                               self.config.TaskWorker.cmscert,
                                               self.config.TaskWorker.cmskey,
                                               verbose=False)
                except HTTPException as hte:
                    self.logger.exception(hte)
                    msg += "\nThe automatic stage-out failed, please try again later. If the error persists contact the experts and provide this error message:"
                    msg += "\nHTTP Error while contacting the DDM server %s:\n%s" % (
                        ddmServer, str(hte))
                    msg += "\nHTTP Headers are: %s" % hte.headers
                    msg += "\nYou might want to contact your physics group if you need a disk replica."
                    raise TaskWorkerException(msg, retry=True)

                self.logger.info("Contacted %s using %s and %s, got:\n%s",
                                 self.config.TaskWorker.DDMServer,
                                 self.config.TaskWorker.cmscert,
                                 self.config.TaskWorker.cmskey, ddmRequest)
                # The query above returns a JSON with a format {"result": "OK", "message": "Copy requested", "data": [{"request_id": 18, "site": <site>, "item": [<list of blocks>], "group": "AnalysisOps", "n": 1, "status": "new", "first_request": "2018-02-26 23:57:37", "last_request": "2018-02-26 23:57:37", "request_count": 1}]}
                if ddmRequest["result"] == "OK":
                    # set status to TAPERECALL
                    tapeRecallStatus = 'TAPERECALL'
                    ddmReqId = ddmRequest["data"][0]["request_id"]
                    configreq = {
                        'workflow': taskName,
                        'taskstatus': tapeRecallStatus,
                        'ddmreqid': ddmReqId,
                        'subresource': 'addddmreqid',
                    }
                    try:
                        tapeRecallStatusSet = self.server.post(
                            self.restURInoAPI + '/task',
                            data=urllib.urlencode(configreq))
                    except HTTPException as hte:
                        self.logger.exception(hte)
                        msg = "HTTP Error while contacting the REST Interface %s:\n%s" % (
                            self.config.TaskWorker.restHost, str(hte))
                        msg += "\nSetting %s status and DDM request ID (%d) failed for task %s" % (
                            tapeRecallStatus, ddmReqId, taskName)
                        msg += "\nHTTP Headers are: %s" % hte.headers
                        raise TaskWorkerException(msg, retry=True)

                    msg += "\nA disk replica has been requested on %s to CMS DDM (request ID: %d)" % (
                        ddmRequest["data"][0]["first_request"], ddmReqId)
                    if tapeRecallStatusSet[2] == "OK":
                        self.logger.info("Status for task %s set to '%s'",
                                         taskName, tapeRecallStatus)
                        msg += "\nThis task will be automatically submitted as soon as the stage-out is completed."
                        self.uploadWarning(msg, userProxy, taskName)

                        raise TapeDatasetException(msg)
                    else:
                        msg += ", please try again in two days."

                else:
                    msg += "\nThe disk replica request failed with this error:\n %s" % ddmRequest[
                        "message"]

            msg += "\nPlease, check DAS (https://cmsweb.cern.ch/das) and make sure the dataset is accessible on DISK."
            raise TaskWorkerException(msg)

        # will not need lumi info if user has asked for split by file with no run/lumi mask
        splitAlgo = kwargs['task']['tm_split_algo']
        lumiMask = kwargs['task']['tm_split_args']['lumis']
        runRange = kwargs['task']['tm_split_args']['runs']

        needLumiInfo = splitAlgo != 'FileBased' or lumiMask != [] or runRange != []
        # secondary dataset access relies on run/lumi info
        if secondaryDataset:
            needLumiInfo = True
        if needLumiInfo:
            self.checkBlocksSize(
                blocksWithLocation
            )  # Interested only in blocks with locations, 'blocks' may contain invalid ones and trigger an Exception
            if secondaryDataset:
                self.checkBlocksSize(secondaryBlocksWithLocation)
        try:
            filedetails = self.dbs.listDatasetFileDetails(
                inputDataset,
                getParents=True,
                getLumis=needLumiInfo,
                validFileOnly=0)
            if secondaryDataset:
                moredetails = self.dbs.listDatasetFileDetails(
                    secondaryDataset,
                    getParents=False,
                    getLumis=needLumiInfo,
                    validFileOnly=0)

                for secfilename, secinfos in moredetails.items():
                    secinfos['lumiobj'] = LumiList(
                        runsAndLumis=secinfos['Lumis'])

                self.logger.info(
                    "Beginning to match files from secondary dataset")
                for dummyFilename, infos in filedetails.items():
                    infos['Parents'] = []
                    lumis = LumiList(runsAndLumis=infos['Lumis'])
                    for secfilename, secinfos in moredetails.items():
                        if lumis & secinfos['lumiobj']:
                            infos['Parents'].append(secfilename)
                self.logger.info("Done matching files from secondary dataset")
                kwargs['task']['tm_use_parent'] = 1
        except Exception as ex:  #TODO should we catch HttpException instead?
            self.logger.exception(ex)
            raise TaskWorkerException("The CRAB3 server backend could not contact DBS to get the files details (Lumis, events, etc).\n"+\
                                "This is could be a temporary DBS glitch. Please try to submit a new task (resubmit will not work)"+\
                                " and contact the experts if the error persists.\nError reason: %s" % str(ex))
            #TODO addo the nodes phedex so the user can check themselves
        if not filedetails:
            raise TaskWorkerException(("Cannot find any file inside the dataset. Please, check your dataset in DAS, %s.\n" +\
                                "Aborting submission. Resubmitting your task will not help.") %\
                                ("https://cmsweb.cern.ch/das/request?instance=%s&input=dataset=%s") %\
                                (self.dbsInstance, inputDataset))

        ## Format the output creating the data structures required by WMCore. Filters out invalid files,
        ## files whose block has no location, and figures out the PSN
        result = self.formatOutput(task=kwargs['task'],
                                   requestname=taskName,
                                   datasetfiles=filedetails,
                                   locations=locationsMap,
                                   tempDir=kwargs['tempDir'])

        if not result.result:
            raise TaskWorkerException((
                "Cannot find any valid file inside the dataset. Please, check your dataset in DAS, %s.\n"
                + "Aborting submission. Resubmitting your task will not help."
            ) % (
                "https://cmsweb.cern.ch/das/request?instance=%s&input=dataset=%s"
            ) % (self.dbsInstance, inputDataset))

        self.logger.debug("Got %s files", len(result.result.getFiles()))

        return result
Ejemplo n.º 20
0
def loggerSetup(logLevel=logging.INFO):
    """
    Return a logger which writes everything to stdout.
    """
    logger = logging.getLogger(__name__)
    outHandler = logging.StreamHandler(sys.stdout)
    outHandler.setFormatter(logging.Formatter("%(asctime)s:%(levelname)s:%(module)s: %(message)s"))
    outHandler.setLevel(logLevel)
    logger.addHandler(outHandler)
    logger.setLevel(logLevel)
    return logger


if __name__ == '__main__':
    args = parseArgs()
    logger = loggerSetup()

    rucio = Rucio(acct=RUCIO_ACCT, hostUrl=RUCIO_URL, authUrl=RUCIO_AUTH_URL,
                  configDict={"logger": logger, "user_agent": "amaltaro/makeRucioRules"})
    rule = {'copies': 1,
            'activity': 'Production Input',
            'lifetime': None,
            'account': RUCIO_ACCT,
            'grouping': "ALL",
            'comment': 'WMCore MSTransferor input data placement'}
    logger.info("\nCreating rule for DID: %s, with RSE: %s and other attrs: %s",
                args.container, args.rse, rule)
    resp = rucio.createReplicationRule(args.container, args.rse, **rule)
    logger.info("Response: %s", resp)