Python Splitter Examples

Programming Language: Python

Namespace/Package Name: TaskWorker.Actions.Splitter

Class/Type: Splitter

Examples at hotexamples.com: 4

Python Splitter - 4 examples found. These are the top rated real world Python examples of TaskWorker.Actions.Splitter.Splitter extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

Splitter(5)

execute(2)

Frequently Used Methods

Splitter (5)

execute (2)

Example #1

Show file

File: PreDAG.py Project: belforte/CRABServer

    def executeInternal(self, *args):
        """The executeInternal method return 4 if the "completion" threshold is not reached, 0 otherwise"""
        self.stage = args[0]
        self.completion = int(args[1])
        self.prefix = args[2]

        self.setupLog()

        self.statusCacheInfo = {} #Will be filled with the status from the status cache

        self.readJobStatus()
        completed = set(self.completedJobs(stage=self.stage))
        if len(completed) < self.completion:
            return 4

        self.readProcessedJobs()
        unprocessed = completed - self.processedJobs
        estimates = copy.copy(unprocessed)
        self.logger.info("jobs remaining to process: %s", ", ".join(sorted(unprocessed)))
        if self.stage == 'tail' and len(estimates-set(self.failedJobs)) == 0:
            estimates = set(self.completedJobs(stage='processing', processFailed=False))
        self.logger.info("jobs remaining to process: %s", ", ".join(sorted(unprocessed)))

        # The TaskWorker saves some files that now we are gonna read
        with open('datadiscovery.pkl', 'rb') as fd:
            dataset = pickle.load(fd) #Output from the discovery process
        with open('taskinformation.pkl', 'rb') as fd:
            task = pickle.load(fd) #A dictionary containing information about the task as in the Oracle DB
        with open('taskworkerconfig.pkl', 'rb') as fd:
            config = pickle.load(fd) #Task worker configuration

        # need to use user proxy as credential for talking with cmsweb
        config.TaskWorker.cmscert = os.environ.get('X509_USER_PROXY')
        config.TaskWorker.cmskey  = os.environ.get('X509_USER_PROXY')
        config.TaskWorker.envForCMSWEB = newX509env(X509_USER_CERT=config.TaskWorker.cmscert,
                                                         X509_USER_KEY=config.TaskWorker.cmskey)

        # need the global black list
        config.TaskWorker.scratchDir = './scratchdir'
        if not os.path.exists(config.TaskWorker.scratchDir):
            os.makedirs(config.TaskWorker.scratchDir)
        from TaskWorker.Actions.Recurring.BanDestinationSites import CRAB3BanDestinationSites
        banSites = CRAB3BanDestinationSites(config, 'dummy', 'dummy', self.logger)
        with config.TaskWorker.envForCMSWEB:
            banSites.execute()

        # Read the automatic_splitting/throughputs/0-N files where the PJ
        # saved the EventThroughput
        # (report['steps']['cmsRun']['performance']['cpu']['EventThroughput'])
        # and the average size of the output per event
        sumEventsThr = 0
        sumEventsSize = 0
        count = 0
        for jid in estimates:
            if jid in self.failedJobs:
                continue
            fn = "automatic_splitting/throughputs/{0}".format(jid)
            with open(fn) as fd:
                throughput, eventsize = json.load(fd)
                sumEventsThr += throughput
                sumEventsSize += eventsize
                count += 1
        eventsThr = sumEventsThr / count
        eventsSize = sumEventsSize / count

        self.logger.info("average throughput for %s jobs: %s evt/s", count, eventsThr)
        self.logger.info("average eventsize for %s jobs: %s bytes", count, eventsSize)

        maxSize = getattr(config.TaskWorker, 'automaticOutputSizeMaximum', 5 * 1000**3)
        maxEvents = (maxSize / eventsSize) if eventsSize > 0 else 0

        runtime = task['tm_split_args'].get('minutes_per_job', -1)
        if self.stage == "processing":
            # Build in a 33% error margin in the runtime to not create too
            # many tails. This essentially moves the peak to lower
            # runtimes and cuts off less of the job distribution tail.
            target = int(0.75 * runtime)
        elif self.stage == 'tail':
            target = int(max(
                getattr(config.TaskWorker, 'automaticTailRuntimeMinimumMins', 45),
                getattr(config.TaskWorker, 'automaticTailRuntimeFraction', 0.2) * runtime
            ))
        # `target` is in minutes, `eventsThr` is in events/second!
        events = int(target * eventsThr * 60)
        if events > maxEvents and maxEvents > 0:
            self.logger.info("reduced the target event count from %s to %s to obey output size", events, maxEvents)
            events = int(maxEvents)
        splitTask = dict(task)
        splitTask['tm_split_algo'] = 'EventAwareLumiBased'
        splitTask['tm_split_args']['events_per_job'] = events

        if self.stage == 'tail' and not self.adjustLumisForCompletion(splitTask, unprocessed):
            self.logger.info("nothing to process for completion")
            self.saveProcessedJobs(unprocessed)
            return 0

        # Disable retries for processing: every lumi is attempted to be
        # processed once in processing, thrice in the tails -> four times.
        # That should be enough "retries"
        #
        # See note in DagmanCreator about getting this from the Task DB
        if self.stage == "processing":
            config.TaskWorker.numAutomJobRetries = 0

        try:
            splitter = Splitter(config, server=None, resturi='')
            split_result = splitter.execute(dataset, task=splitTask)
            self.logger.info("Splitting results:")
            for g in split_result.result[0]:
                msg = "Created jobgroup with length {0}".format(len(g.getJobs()))
                self.logger.info(msg)
        except TaskWorkerException as e:
            retmsg = "Splitting failed with:\n{0}".format(e)
            self.logger.error(retmsg)
#            self.set_dashboard_state('FAILED')
            return 1
        try:
            parent = self.prefix if self.stage == 'tail' else None
            creator = DagmanCreator(config, server=None, resturi='')
            with config.TaskWorker.envForCMSWEB:
                creator.createSubdag(split_result.result, task=task, parent=parent, stage=self.stage)
            self.submitSubdag('RunJobs{0}.subdag'.format(self.prefix), getattr(config.TaskWorker, 'maxPost', 20), self.stage)
        except TaskWorkerException as e:
            retmsg = "DAG creation failed with:\n{0}".format(e)
            self.logger.error(retmsg)
#            self.set_dashboard_state('FAILED')
            return 1
        self.saveProcessedJobs(unprocessed)
        return 0

Example #2

Show file

File: PreDAG.py Project: mmascher/CRABServer

    def executeInternal(self, *args):
        """The executeInternal method return 4 if the "completion" threshold is not reached, 0 otherwise"""
        self.stage = args[0]
        self.completion = int(args[1])
        self.prefix = args[2]

        self.setupLog()

        self.statusCacheInfo = {} #Will be filled with the status from the status cache

        self.readJobStatus()
        completed = set(self.completedJobs(stage=self.stage))
        if len(completed) < self.completion:
            return 4

        self.readProcessedJobs()
        unprocessed = completed - self.processedJobs
        estimates = copy.copy(unprocessed)
        self.logger.info("jobs remaining to process: {0}".format(", ".join(sorted(unprocessed))))
        if self.stage == 'tail' and len(estimates-set(self.failedJobs)) == 0:
            estimates = set(self.completedJobs(stage='processing'))
        self.logger.info("jobs remaining to process: %s", ", ".join(sorted(unprocessed)))

        # The TaskWorker saves some files that now we are gonna read
        with open('datadiscovery.pkl', 'rb') as fd:
            dataset = pickle.load(fd) #Output from the discovery process
        with open('taskinformation.pkl', 'rb') as fd:
            task = pickle.load(fd) #A dictionary containing information about the task as in the Oracle DB
        with open('taskworkerconfig.pkl', 'rb') as fd:
            config = pickle.load(fd) #Task worker configuration

        # Read the automatic_splitting/throughputs/0-N files where the PJ
        # saved the EventThroughput (report['steps']['cmsRun']['performance']['cpu']['EventThroughput'])
        sumEventsThr = 0
        count = 0
        for jid in estimates:
            if jid in self.failedJobs:
                continue
            fn = "automatic_splitting/throughputs/{0}".format(jid)
            with open(fn) as fd:
                sumEventsThr += float(fd.read())
                count += 1
        eventsThr = sumEventsThr / count
        self.logger.info("average throughput for %s jobs: %s", count, eventsThr)
        runtime = task['tm_split_args'].get('seconds_per_job', -1)
        if self.stage == "processing":
            # Build in a 33% error margin in the runtime to not create too
            # many tails. This essentially moves the peak to lower
            # runtimes and cuts off less of the job distribution tail.
            target = int(0.75 * runtime)
        elif self.stage == 'tail':
            target = int(max(
                getattr(config.TaskWorker, 'automaticTailRuntimeMinimum', 45 * 60),
                getattr(config.TaskWorker, 'automaticTailRuntimeFraction', 0.2) * runtime
            ))
        events = int(target * eventsThr)
        splitTask = dict(task)
        splitTask['tm_split_algo'] = 'EventAwareLumiBased'
        splitTask['tm_split_args']['events_per_job'] = events

        if self.stage == 'tail' and not self.adjustLumisForCompletion(splitTask, unprocessed):
            self.logger.info("nothing to process for completion")
            self.saveProcessedJobs(unprocessed)
            return 0

        # Disable retries for processing: every lumi is attempted to be
        # processed once in processing, thrice in the tails -> four times.
        # That should be enough "retries"
        #
        # See note in DagmanCreator about getting this from the Task DB
        if self.stage == "processing":
            config.TaskWorker.numAutomJobRetries = 0

        try:
            config.TaskWorker.scratchDir = './scratchdir' # XXX
            splitter = Splitter(config, server=None, resturi='')
            split_result = splitter.execute(dataset, task=splitTask)
            self.logger.info("Splitting results:")
            for g in split_result.result[0]:
                msg = "Created jobgroup with length {0}".format(len(g.getJobs()))
                self.logger.info(msg)
        except TaskWorkerException as e:
            retmsg = "Splitting failed with:\n{0}".format(e)
            self.logger.error(msg)
#            self.set_dashboard_state('FAILED')
            return 1
        try:
            parent = self.prefix if self.stage == 'tail' else None
            creator = DagmanCreator(config, server=None, resturi='')
            creator.createSubdag(split_result.result, task=task, parent=parent, stage=self.stage)
            self.submitSubdag('RunJobs{0}.subdag'.format(self.prefix), getattr(config.TaskWorker, 'maxPost', 20), self.stage)
        except TaskWorkerException as e:
            retmsg = "DAG creation failed with:\n{0}".format(e)
            self.logger.error(retmsg)
#            self.set_dashboard_state('FAILED')
            return 1
        self.saveProcessedJobs(unprocessed)
        return 0

Example #3

Show file

File: Handler.py Project: todor-ivanov/CRABServer

def handleNewTask(resthost, resturi, config, task, procnum, *args, **kwargs):
    """Performs the injection of a new task

    :arg str resthost: the hostname where the rest interface is running
    :arg str resturi: the rest base url to contact
    :arg WMCore.Configuration config: input configuration
    :arg TaskWorker.DataObjects.Task task: the task to work on
    :arg int procnum: the process number taking care of the work
    :*args and *kwargs: extra parameters currently not defined
    :return: the handler."""
    server = HTTPRequests(resthost,
                          config.TaskWorker.cmscert,
                          config.TaskWorker.cmskey,
                          retry=20,
                          logger=logging.getLogger(str(procnum)))
    handler = TaskHandler(task,
                          procnum,
                          server,
                          config,
                          'handleNewTask',
                          createTempDir=True)
    handler.addWork(
        MyProxyLogon(config=config,
                     server=server,
                     resturi=resturi,
                     procnum=procnum,
                     myproxylen=60 * 60 * 24))
    handler.addWork(
        StageoutCheck(config=config,
                      server=server,
                      resturi=resturi,
                      procnum=procnum))
    if task['tm_job_type'] == 'Analysis':
        if task.get('tm_user_files'):
            handler.addWork(
                UserDataDiscovery(config=config,
                                  server=server,
                                  resturi=resturi,
                                  procnum=procnum))
        else:
            handler.addWork(
                DBSDataDiscovery(config=config,
                                 server=server,
                                 resturi=resturi,
                                 procnum=procnum))
    elif task['tm_job_type'] == 'PrivateMC':
        handler.addWork(
            MakeFakeFileSet(config=config,
                            server=server,
                            resturi=resturi,
                            procnum=procnum))
    handler.addWork(
        Splitter(config=config,
                 server=server,
                 resturi=resturi,
                 procnum=procnum))
    handler.addWork(
        DagmanCreator(config=config,
                      server=server,
                      resturi=resturi,
                      procnum=procnum))
    if task['tm_dry_run'] == 'T':
        handler.addWork(
            DryRunUploader(config=config,
                           server=server,
                           resturi=resturi,
                           procnum=procnum))
    else:
        handler.addWork(
            DagmanSubmitter(config=config,
                            server=server,
                            resturi=resturi,
                            procnum=procnum))

    return handler.actionWork(args, kwargs)

Example #4

Show file

    def executeInternal(self, *args):
        """The executeInternal method return 4 if the "completion" threshold is not reached, 0 otherwise"""
        self.stage = args[0]
        self.completion = int(args[1])
        self.prefix = args[2]

        self.setupLog()

        self.statusCacheInfo = {
        }  #Will be filled with the status from the status cache

        self.readJobStatus()
        completed = set(self.completedJobs(stage=self.stage))
        if len(completed) < self.completion:
            return 4

        self.readProcessedJobs()
        unprocessed = completed - self.processedJobs
        estimates = copy.copy(unprocessed)
        self.logger.info("jobs remaining to process: %s",
                         ", ".join(sorted(unprocessed)))
        if self.stage == 'tail' and len(estimates - set(self.failedJobs)) == 0:
            estimates = set(
                self.completedJobs(stage='processing', processFailed=False))
        self.logger.info("jobs remaining to process: %s",
                         ", ".join(sorted(unprocessed)))

        # The TaskWorker saves some files that now we are gonna read
        with open('datadiscovery.pkl', 'rb') as fd:
            dataset = pickle.load(fd)  #Output from the discovery process
        with open('taskinformation.pkl', 'rb') as fd:
            task = pickle.load(
                fd
            )  #A dictionary containing information about the task as in the Oracle DB
        with open('taskworkerconfig.pkl', 'rb') as fd:
            config = pickle.load(fd)  #Task worker configuration

        # need to use user proxy as credential for talking with cmsweb
        config.TaskWorker.cmscert = os.environ.get('X509_USER_PROXY')
        config.TaskWorker.cmskey = os.environ.get('X509_USER_PROXY')

        # need the global black list
        config.TaskWorker.scratchDir = './scratchdir'
        if not os.path.exists(config.TaskWorker.scratchDir):
            os.makedirs(config.TaskWorker.scratchDir)
        from TaskWorker.Actions.Recurring.BanDestinationSites import CRAB3BanDestinationSites
        banSites = CRAB3BanDestinationSites(config, 'dummy', 'dummy',
                                            self.logger)
        banSites.execute()

        # Read the automatic_splitting/throughputs/0-N files where the PJ
        # saved the EventThroughput
        # (report['steps']['cmsRun']['performance']['cpu']['EventThroughput'])
        # and the average size of the output per event
        sumEventsThr = 0
        sumEventsSize = 0
        count = 0
        for jid in estimates:
            if jid in self.failedJobs:
                continue
            fn = "automatic_splitting/throughputs/{0}".format(jid)
            with open(fn) as fd:
                throughput, eventsize = json.load(fd)
                sumEventsThr += throughput
                sumEventsSize += eventsize
                count += 1
        eventsThr = sumEventsThr / count
        eventsSize = sumEventsSize / count

        self.logger.info("average throughput for %s jobs: %s", count,
                         eventsThr)
        self.logger.info("average eventsize for %s jobs: %s", count,
                         eventsSize)

        maxSize = getattr(config.TaskWorker, 'automaticOutputSizeMaximum',
                          5 * 1000**3)
        maxEvents = (maxSize / eventsSize) if eventsSize > 0 else 0

        runtime = task['tm_split_args'].get('minutes_per_job', -1)
        if self.stage == "processing":
            # Build in a 33% error margin in the runtime to not create too
            # many tails. This essentially moves the peak to lower
            # runtimes and cuts off less of the job distribution tail.
            target = int(0.75 * runtime)
        elif self.stage == 'tail':
            target = int(
                max(
                    getattr(config.TaskWorker,
                            'automaticTailRuntimeMinimumMins', 45),
                    getattr(config.TaskWorker, 'automaticTailRuntimeFraction',
                            0.2) * runtime))
        # `target` is in minutes, `eventsThr` is in events/second!
        events = int(target * eventsThr * 60)
        if events > maxEvents and maxEvents > 0:
            self.logger.info(
                "reduced the target event count from %s to %s to obey output size",
                events, maxEvents)
            events = int(maxEvents)
        splitTask = dict(task)
        splitTask['tm_split_algo'] = 'EventAwareLumiBased'
        splitTask['tm_split_args']['events_per_job'] = events

        if self.stage == 'tail' and not self.adjustLumisForCompletion(
                splitTask, unprocessed):
            self.logger.info("nothing to process for completion")
            self.saveProcessedJobs(unprocessed)
            return 0

        # Disable retries for processing: every lumi is attempted to be
        # processed once in processing, thrice in the tails -> four times.
        # That should be enough "retries"
        #
        # See note in DagmanCreator about getting this from the Task DB
        if self.stage == "processing":
            config.TaskWorker.numAutomJobRetries = 0

        try:
            splitter = Splitter(config, server=None, resturi='')
            split_result = splitter.execute(dataset, task=splitTask)
            self.logger.info("Splitting results:")
            for g in split_result.result[0]:
                msg = "Created jobgroup with length {0}".format(
                    len(g.getJobs()))
                self.logger.info(msg)
        except TaskWorkerException as e:
            retmsg = "Splitting failed with:\n{0}".format(e)
            self.logger.error(retmsg)
            #            self.set_dashboard_state('FAILED')
            return 1
        try:
            parent = self.prefix if self.stage == 'tail' else None
            creator = DagmanCreator(config, server=None, resturi='')
            creator.createSubdag(split_result.result,
                                 task=task,
                                 parent=parent,
                                 stage=self.stage)
            self.submitSubdag('RunJobs{0}.subdag'.format(self.prefix),
                              getattr(config.TaskWorker, 'maxPost', 20),
                              self.stage)
        except TaskWorkerException as e:
            retmsg = "DAG creation failed with:\n{0}".format(e)
            self.logger.error(retmsg)
            #            self.set_dashboard_state('FAILED')
            return 1
        self.saveProcessedJobs(unprocessed)
        return 0