Python getNativeRucioClient Examples

Programming Language: Python

Namespace/Package Name: RucioUtils

Method/Function: getNativeRucioClient

Examples at hotexamples.com: 5

Python getNativeRucioClient - 5 examples found. These are the top rated real world Python examples of RucioUtils.getNativeRucioClient extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

File: PreDAG.py Project: sharmaprajesh/CRABServer

    def executeInternal(self, *args):
        """The executeInternal method return 4 if the "completion" threshold is not reached, 0 otherwise"""
        self.stage = args[0]
        self.completion = int(args[1])
        self.prefix = args[2]

        self.setupLog()

        self.statusCacheInfo = {
        }  #Will be filled with the status from the status cache

        self.readJobStatus()
        completed = set(self.completedJobs(stage=self.stage))
        if len(completed) < self.completion:
            return 4

        self.readProcessedJobs()
        unprocessed = completed - self.processedJobs
        estimates = copy.copy(unprocessed)
        self.logger.info("jobs remaining to process: %s",
                         ", ".join(sorted(unprocessed)))
        if self.stage == 'tail' and len(estimates - set(self.failedJobs)) == 0:
            estimates = set(
                self.completedJobs(stage='processing', processFailed=False))
        self.logger.info("jobs remaining to process: %s",
                         ", ".join(sorted(unprocessed)))

        # The TaskWorker saves some files that now we are gonna read
        with open('datadiscovery.pkl', 'rb') as fd:
            dataset = pickle.load(fd)  #Output from the discovery process
        with open('taskinformation.pkl', 'rb') as fd:
            task = pickle.load(
                fd
            )  #A dictionary containing information about the task as in the Oracle DB
        with open('taskworkerconfig.pkl', 'rb') as fd:
            config = pickle.load(fd)  #Task worker configuration

        # need to use user proxy as credential for talking with cmsweb
        config.TaskWorker.cmscert = os.environ.get('X509_USER_PROXY')
        config.TaskWorker.cmskey = os.environ.get('X509_USER_PROXY')
        config.TaskWorker.envForCMSWEB = newX509env(
            X509_USER_CERT=config.TaskWorker.cmscert,
            X509_USER_KEY=config.TaskWorker.cmskey)

        # need to get username from classAd to setup for Rucio access
        task_ad = classad.parseOne(open(os.environ['_CONDOR_JOB_AD']))
        username = task_ad['CRAB_UserHN']
        config.Services.Rucio_account = username

        # need the global black list
        config.TaskWorker.scratchDir = './scratchdir'
        if not os.path.exists(config.TaskWorker.scratchDir):
            os.makedirs(config.TaskWorker.scratchDir)
        from TaskWorker.Actions.Recurring.BanDestinationSites import CRAB3BanDestinationSites
        banSites = CRAB3BanDestinationSites(config, self.logger)
        with config.TaskWorker.envForCMSWEB:
            banSites.execute()

        # Read the automatic_splitting/throughputs/0-N files where the PJ
        # saved the EventThroughput
        # (report['steps']['cmsRun']['performance']['cpu']['EventThroughput'])
        # and the average size of the output per event
        sumEventsThr = 0
        sumEventsSize = 0
        count = 0
        for jid in estimates:
            if jid in self.failedJobs:
                continue
            fn = "automatic_splitting/throughputs/{0}".format(jid)
            with open(fn) as fd:
                throughput, eventsize = json.load(fd)
                sumEventsThr += throughput
                sumEventsSize += eventsize
                count += 1
        eventsThr = sumEventsThr / count
        eventsSize = sumEventsSize / count

        self.logger.info("average throughput for %s jobs: %s evt/s", count,
                         eventsThr)
        self.logger.info("average eventsize for %s jobs: %s bytes", count,
                         eventsSize)

        maxSize = getattr(config.TaskWorker, 'automaticOutputSizeMaximum',
                          5 * 1000**3)
        maxEvents = (maxSize / eventsSize) if eventsSize > 0 else 0

        runtime = task['tm_split_args'].get('minutes_per_job', -1)
        if self.stage == "processing":
            # Build in a 33% error margin in the runtime to not create too
            # many tails. This essentially moves the peak to lower
            # runtimes and cuts off less of the job distribution tail.
            target = int(0.75 * runtime)
        elif self.stage == 'tail':
            target = int(
                max(
                    getattr(config.TaskWorker,
                            'automaticTailRuntimeMinimumMins', 45),
                    getattr(config.TaskWorker, 'automaticTailRuntimeFraction',
                            0.2) * runtime))
        # `target` is in minutes, `eventsThr` is in events/second!
        events = int(target * eventsThr * 60)
        if events > maxEvents and maxEvents > 0:
            self.logger.info(
                "reduced the target event count from %s to %s to obey output size",
                events, maxEvents)
            events = int(maxEvents)
        splitTask = dict(task)
        splitTask['tm_split_algo'] = 'EventAwareLumiBased'
        splitTask['tm_split_args']['events_per_job'] = events

        if self.stage == 'tail' and not self.adjustLumisForCompletion(
                splitTask, unprocessed):
            self.logger.info("nothing to process for completion")
            self.saveProcessedJobs(unprocessed)
            return 0

        # Disable retries for processing: every lumi is attempted to be
        # processed once in processing, thrice in the tails -> four times.
        # That should be enough "retries"
        #
        # See note in DagmanCreator about getting this from the Task DB
        if self.stage == "processing":
            config.TaskWorker.numAutomJobRetries = 0

        try:
            splitter = Splitter(config, crabserver=None)
            split_result = splitter.execute(dataset, task=splitTask)
            self.logger.info("Splitting results:")
            for g in split_result.result[0]:
                msg = "Created jobgroup with length {0}".format(
                    len(g.getJobs()))
                self.logger.info(msg)
        except TaskWorkerException as e:
            retmsg = "Splitting failed with:\n{0}".format(e)
            self.logger.error(retmsg)
            #            self.set_dashboard_state('FAILED')
            return 1
        try:
            parent = self.prefix if self.stage == 'tail' else None
            rucioClient = getNativeRucioClient(config=config,
                                               logger=self.logger)
            creator = DagmanCreator(config,
                                    crabserver=None,
                                    rucioClient=rucioClient)
            with config.TaskWorker.envForCMSWEB:
                creator.createSubdag(split_result.result,
                                     task=task,
                                     parent=parent,
                                     stage=self.stage)
            self.submitSubdag(
                'RunJobs{0}.subdag'.format(self.prefix),
                getattr(config.TaskWorker, 'maxIdle', MAX_IDLE_JOBS),
                getattr(config.TaskWorker, 'maxPost', MAX_POST_JOBS),
                self.stage)
        except TaskWorkerException as e:
            retmsg = "DAG creation failed with:\n{0}".format(e)
            self.logger.error(retmsg)
            #            self.set_dashboard_state('FAILED')
            return 1
        self.saveProcessedJobs(unprocessed)
        return 0

Example #2

Show file

File: DBSDataDiscovery.py Project: sharmaprajesh/CRABServer

    def requestTapeRecall(self, blockList=[], system='Dynamo', msgHead=''):   # pylint: disable=W0102
        """
        :param blockList: a list of blocks to recall from Tape to Disk
        :param system: a string identifying the DDM system to use 'Dynamo' or 'Rucio' or 'None'
        :param msgHead: a string with the initial part of a message to be used for exceptions
        :return: nothing: Since data on tape means no submission possible, this function will
            always raise a TaskWorkerException to stop the action flow.
            The exception message contains details and an attempt is done to upload it to TaskDB
            so that crab status can report it
        """

        msg = msgHead
        if system == 'Rucio':
            # need to use crab_tape_recall Rucio account to create containers and create rules
            tapeRecallConfig = copy.copy(self.config)
            tapeRecallConfig.Services.Rucio_account = 'crab_tape_recall'
            rucioClient = getNativeRucioClient(tapeRecallConfig, self.logger) # pylint: disable=redefined-outer-name
            # turn input CMS blocks into Rucio dids in cms scope
            dids = [{'scope': 'cms', 'name': block} for block in blockList]
            # prepare container /TapeRecall/taskname/USER in the service scope
            myScope = 'user.crab_tape_recall'
            containerName = '/TapeRecall/%s/USER' % self.taskName.replace(':', '.')
            containerDid = {'scope':myScope, 'name':containerName}
            self.logger.info("Create RUcio container %s", containerName)
            try:
                rucioClient.add_container(myScope, containerName)
            except DataIdentifierAlreadyExists:
                self.logger.debug("Container name already exists in Rucio. Keep going")
            except Exception as ex:
                msg += "Rucio exception creating container: %s" %  (str(ex))
                raise TaskWorkerException(msg)
            try:
                rucioClient.attach_dids(myScope, containerName, dids)
            except DuplicateContent:
                self.logger.debug("Some dids are already in this container. Keep going")
            except Exception as ex:
                msg += "Rucio exception adding blocks to container: %s" %  (str(ex))
                raise TaskWorkerException(msg)
            self.logger.info("Rucio container %s:%s created with %d blocks", myScope, containerName, len(blockList))

            # Compute size of recall request
            sizeToRecall = 0
            for block in blockList:
                replicas = rucioClient.list_dataset_replicas('cms', block)
                blockBytes = replicas.next()['bytes']  # pick first replica for each block, they better all have same size
                sizeToRecall += blockBytes
            TBtoRecall = sizeToRecall // 1e12
            if TBtoRecall > 0:
                self.logger.info("Total size of data to recall : %d TBytes", TBtoRecall)
            else:
                self.logger.info("Total size of data to recall : %d GBytes", sizeToRecall/1e9)

            if TBtoRecall > 30.:
                grouping = 'DATASET'  # Rucio DATASET i.e. CMS block !
                self.logger.info("Will scatter blocks on multiple sites")
            else:
                grouping = 'ALL'
                self.logger.info("Will place all blocks at a single site")

            # create rule
            RSE_EXPRESSION = 'ddm_quota>0&(tier=1|tier=2)&rse_type=DISK'
            #RSE_EXPRESSION = 'T3_IT_Trieste' # for testing
            WEIGHT = 'ddm_quota'
            #WEIGHT = None # for testing
            LIFETIME = 14 * 24 * 3600  # 14 days
            ASK_APPROVAL = False
            #ASK_APPROVAL = True # for testing
            ACCOUNT = 'crab_tape_recall'
            copies = 1
            try:
                ruleId = rucioClient.add_replication_rule(dids=[containerDid],
                                                  copies=copies, rse_expression=RSE_EXPRESSION,
                                                  grouping=grouping,
                                                  weight=WEIGHT, lifetime=LIFETIME, account=ACCOUNT,
                                                  activity='Analysis Input',
                                                  comment='Staged from tape for %s' % self.username,
                                                  ask_approval=ASK_APPROVAL, asynchronous=True,
                                                  )
            except DuplicateRule as ex:
                # handle "A duplicate rule for this account, did, rse_expression, copies already exists"
                # which should only happen when testing, since container name is unique like task name, anyhow...
                self.logger.debug("A duplicate rule for this account, did, rse_expression, copies already exists. Use that")
                # find the existing rule id
                ruleId = rucioClient.list_did_rules(myScope, containerName)
            except (InsufficientTargetRSEs, InsufficientAccountLimit, FullStorage) as ex:
                msg = "Not enough global quota to issue a tape recall request. Rucio exception:\n%s" % str(ex)
                raise TaskWorkerException(msg)
            except Exception as ex:
                msg += "Rucio exception creating rule: %s" %  str(ex)
                raise TaskWorkerException(msg)
            ruleId = str(ruleId[0])  # from list to singleId and remove unicode

            msg += "\nA disk replica has been requested to Rucio (rule ID: %s )" % ruleId
            msg += "\nyou can check progress via either of the following two commands:"
            msg += "\n rucio rule-info %s" % ruleId
            msg += "\n rucio list-rules %s:%s" % (myScope, containerName)
            automaticTapeRecallIsImplemented = True
            if automaticTapeRecallIsImplemented:
                tapeRecallStatus = 'TAPERECALL'
            else:
                tapeRecallStatus = 'SUBMITFAILED'
            configreq = {'workflow': self.taskName,
                         'taskstatus': tapeRecallStatus,
                         'ddmreqid': ruleId,
                         'subresource': 'addddmreqid',
                         }
            try:
                tapeRecallStatusSet = self.crabserver.post(api='task', data=urllib.urlencode(configreq))
            except HTTPException as hte:
                self.logger.exception(hte)
                msg = "HTTP Error while contacting the REST Interface %s:\n%s" % (
                    self.config.TaskWorker.restHost, str(hte))
                msg += "\nStoring of %s status and ruleId (%s) failed for task %s" % (
                    tapeRecallStatus, ruleId, self.taskName)
                msg += "\nHTTP Headers are: %s" % hte.headers
                raise TaskWorkerException(msg, retry=True)
            if tapeRecallStatusSet[2] == "OK":
                self.logger.info("Status for task %s set to '%s'", self.taskName, tapeRecallStatus)
            if automaticTapeRecallIsImplemented:
                msg += "\nThis task will be automatically submitted as soon as the stage-out is completed."
                self.uploadWarning(msg, self.userproxy, self.taskName)
                raise TapeDatasetException(msg)
            # fall here if could not setup for automatic submission after recall
            msg += "\nPlease monitor recall progress via Rucio or DAS and try again once data are on disk."
            raise TaskWorkerException(msg)

        if system == 'None':
            msg += '\nIt is not possible to request a recall from tape.'
            msg += "\nPlease, check DAS (https://cmsweb.cern.ch/das) and make sure the dataset is accessible on DISK."
            raise TaskWorkerException(msg)

        if system == 'Dynamo':
            raise NotImplementedError

Example #3

Show file

File: TapeRecallStatus.py Project: sharmaprajesh/CRABServer

    def _execute(self, config, task):

        # setup logger
        if not self.logger:
            self.logger = logging.getLogger(__name__)
            handler = logging.StreamHandler(sys.stdout)  # pylint: disable=redefined-outer-name
            formatter = logging.Formatter(
                "%(asctime)s:%(levelname)s:%(module)s %(message)s")  # pylint: disable=redefined-outer-name
            handler.setFormatter(formatter)
            self.logger.addHandler(handler)
            self.logger.setLevel(logging.DEBUG)
        else:
            # do not use BaseRecurringAction logger but create a new logger
            # which writes to config.TaskWorker.logsDir/taks/recurring/TapeRecallStatus_YYMMDD-HHMM.log
            self.logger = logging.getLogger('TapeRecallStatus')
            logDir = config.TaskWorker.logsDir + '/tasks/recurring/'
            if not os.path.exists(logDir):
                os.makedirs(logDir)
            timeStamp = time.strftime('%y%m%d-%H%M', time.localtime())
            logFile = 'TapeRecallStatus_' + timeStamp + '.log'
            handler = logging.FileHandler(logDir + logFile)
            formatter = logging.Formatter(
                '%(asctime)s:%(levelname)s:%(module)s:%(message)s')
            handler.setFormatter(formatter)
            self.logger.addHandler(handler)

        mw = MasterWorker(config,
                          logWarning=False,
                          logDebug=False,
                          sequential=True,
                          console=False,
                          name='masterForTapeRecall')

        tapeRecallStatus = 'TAPERECALL'
        self.logger.info("Retrieving %s tasks", tapeRecallStatus)
        recallingTasks = mw.getWork(limit=999999,
                                    getstatus=tapeRecallStatus,
                                    ignoreTWName=True)
        if not recallingTasks:
            self.logger.info("No %s task retrieved.", tapeRecallStatus)
            return

        self.logger.info("Retrieved a total of %d %s tasks",
                         len(recallingTasks), tapeRecallStatus)
        crabserver = mw.crabserver
        for recallingTask in recallingTasks:
            taskName = recallingTask['tm_taskname']
            self.logger.info("Working on task %s", taskName)

            reqId = recallingTask['tm_DDM_reqid']
            if not reqId:
                self.logger.debug(
                    "tm_DDM_reqid' is not defined for task %s, skipping such task",
                    taskName)
                continue
            else:
                msg = "Task points to Rucio RuleId:  %s " % reqId
                self.logger.info(msg)

            if (time.time() - getTimeFromTaskname(
                    str(taskName))) > MAX_DAYS_FOR_TAPERECALL * 24 * 60 * 60:
                self.logger.info(
                    "Task %s is older than %d days, setting its status to FAILED",
                    taskName, MAX_DAYS_FOR_TAPERECALL)
                msg = "The disk replica request (ID: %s) for the input dataset did not complete in %d days." % (
                    reqId, MAX_DAYS_FOR_TAPERECALL)
                failTask(taskName, crabserver, msg, self.logger, 'FAILED')
                continue

            if not 'S3' in recallingTask['tm_cache_url'].upper():
                # when using old crabcache had to worry about sandbox purging after 3 days
                mpl = MyProxyLogon(config=config,
                                   crabserver=crabserver,
                                   myproxylen=self.pollingTime)
                user_proxy = True
                try:
                    mpl.execute(task=recallingTask
                                )  # this adds 'user_proxy' to recallingTask
                except TaskWorkerException as twe:
                    user_proxy = False
                    self.logger.exception(twe)

                # Make sure the task sandbox in the crabcache is not deleted until the tape recall is completed
                if user_proxy:
                    self.refreshSandbox(recallingTask)

            # Retrieve status of recall request
            if not self.rucioClient:
                self.rucioClient = getNativeRucioClient(config=config,
                                                        logger=self.logger)
            try:
                ddmRequest = self.rucioClient.get_replication_rule(reqId)
            except RuleNotFound:
                msg = "Rucio rule id %s not found. Please report to experts" % reqId
                self.logger.error(msg)
                if user_proxy:
                    mpl.uploadWarning(msg, recallingTask['user_proxy'],
                                      taskName)
            if ddmRequest['state'] == 'OK':
                self.logger.info(
                    "Request %s is completed, setting status of task %s to NEW",
                    reqId, taskName)
                mw.updateWork(taskName, recallingTask['tm_task_command'],
                              'NEW')
                # Delete all task warnings (the tapeRecallStatus added a dataset warning which is no longer valid now)
                if user_proxy:
                    mpl.deleteWarnings(recallingTask['user_proxy'], taskName)
            else:
                expiration = ddmRequest[
                    'expires_at']  # this is a datetime.datetime object
                if expiration < datetime.datetime.now():
                    # give up waiting
                    msg = (
                        "Replication request %s for task %s expired. Setting its status to FAILED"
                        % (reqId, taskName))
                    self.logger.info(msg)
                    failTask(taskName, crabserver, msg, self.logger, 'FAILED')

Example #4

Show file

File: DBSDataDiscovery.py Project: sharmaprajesh/CRABServer

    #config.TaskWorker.cmscert = os.environ["X509_USER_PROXY"]
    #config.TaskWorker.cmskey = os.environ["X509_USER_PROXY"]

    # will user service cert as defined for TW
    config.TaskWorker.cmscert = os.environ["X509_USER_CERT"]
    config.TaskWorker.cmskey = os.environ["X509_USER_KEY"]
    config.TaskWorker.envForCMSWEB = newX509env(X509_USER_CERT=config.TaskWorker.cmscert,
                                                X509_USER_KEY=config.TaskWorker.cmskey)

    config.TaskWorker.instance = 'prod'

    config.Services.Rucio_host = 'https://cms-rucio.cern.ch'
    config.Services.Rucio_account = 'crab_server'
    config.Services.Rucio_authUrl = 'https://cms-rucio-auth.cern.ch'
    config.Services.Rucio_caPath = '/etc/grid-security/certificates/'
    rucioClient = getNativeRucioClient(config=config, logger=logging.getLogger())

    fileset = DBSDataDiscovery(config=config, rucioClient=rucioClient)
    fileset.execute(task={'tm_nonvalid_input_dataset': 'T', 'tm_use_parent': 0, 'user_proxy': 'None',
                          'tm_input_dataset': dbsDataset, 'tm_secondary_input_dataset': dbsSecondaryDataset,
                          'tm_taskname': 'pippo1', 'tm_username':config.Services.Rucio_account,
                          'tm_split_algo' : 'automatic', 'tm_split_args' : {'runs':[], 'lumis':[]},
                          'tm_dbs_url': DBSUrl}, tempDir='')
    
#===============================================================================
#    Some interesting datasets for testing
#    dataset = '/DoubleMuon/Run2018B-PromptReco-v2/AOD'       # on tape
#    dataset = '/DoubleMuon/Run2018B-02Apr2020-v1/NANOAOD'    # isNano
#    dataset = '/DoubleMuon/Run2018B-17Sep2018-v1/MINIAOD'    # parent of above NANOAOD (for secondaryDataset lookup)
#    dataset = '/MuonEG/Run2016B-07Aug17_ver2-v1/AOD'         # no Nano on disk (at least atm)
#    dataset = '/MuonEG/Run2016B-v1/RAW'                      # on tape

Example #5

Show file

File: Handler.py Project: sharmaprajesh/CRABServer

def handleNewTask(resthost, dbInstance, config, task, procnum, *args,
                  **kwargs):
    """Performs the injection of a new task

    :arg str resthost: the hostname where the rest interface is running
    :arg str dbInstance: the rest base url to contact
    :arg WMCore.Configuration config: input configuration
    :arg TaskWorker.DataObjects.Task task: the task to work on
    :arg int procnum: the process number taking care of the work
    :*args and *kwargs: extra parameters currently not defined
    :return: the handler."""
    crabserver = CRABRest(resthost,
                          config.TaskWorker.cmscert,
                          config.TaskWorker.cmskey,
                          retry=20,
                          logger=logging.getLogger(str(procnum)),
                          userAgent='CRABTaskWorker',
                          version=__version__)
    crabserver.setDbInstance(dbInstance)
    handler = TaskHandler(task,
                          procnum,
                          crabserver,
                          config,
                          'handleNewTask',
                          createTempDir=True)
    rucioClient = getNativeRucioClient(config=config, logger=handler.logger)
    handler.addWork(
        MyProxyLogon(config=config,
                     crabserver=crabserver,
                     procnum=procnum,
                     myproxylen=60 * 60 * 24))
    handler.addWork(
        StageoutCheck(config=config,
                      crabserver=crabserver,
                      procnum=procnum,
                      rucioClient=rucioClient))
    if task['tm_job_type'] == 'Analysis':
        if task.get('tm_user_files'):
            handler.addWork(
                UserDataDiscovery(config=config,
                                  crabserver=crabserver,
                                  procnum=procnum))
        else:
            handler.addWork(
                DBSDataDiscovery(config=config,
                                 crabserver=crabserver,
                                 procnum=procnum,
                                 rucioClient=rucioClient))
    elif task['tm_job_type'] == 'PrivateMC':
        handler.addWork(
            MakeFakeFileSet(config=config,
                            crabserver=crabserver,
                            procnum=procnum))
    handler.addWork(
        Splitter(config=config, crabserver=crabserver, procnum=procnum))
    handler.addWork(
        DagmanCreator(config=config,
                      crabserver=crabserver,
                      procnum=procnum,
                      rucioClient=rucioClient))
    if task['tm_dry_run'] == 'T':
        handler.addWork(
            DryRunUploader(config=config,
                           crabserver=crabserver,
                           procnum=procnum))
    else:
        handler.addWork(
            DagmanSubmitter(config=config,
                            crabserver=crabserver,
                            procnum=procnum))

    return handler.actionWork(args, kwargs)