Python DBProxyPool.update_worker Exemples, pandaharvester.harvestercore.db_proxy_pool.DBProxyPool.update_worker Python Exemples

Exemple #1

0

Afficher le fichier

class EventFeeder(AgentBase):
    # constructor
    def __init__(self, communicator, queue_config_mapper, single_mode=False):
        AgentBase.__init__(self, single_mode)
        self.dbProxy = DBProxy()
        self.queueConfigMapper = queue_config_mapper
        self.communicator = communicator
        self.pluginFactory = PluginFactory()

    # main loop
    def run(self):
        lockedBy = 'eventfeeder-{0}'.format(self.ident)
        while True:
            mainLog = self.make_logger(_logger, 'id={0}'.format(lockedBy), method_name='run')
            mainLog.debug('getting workers to feed events')
            workSpecsPerQueue = self.dbProxy.get_workers_to_feed_events(harvester_config.eventfeeder.maxWorkers,
                                                                        harvester_config.eventfeeder.lockInterval)
            mainLog.debug('got {0} queues'.format(len(workSpecsPerQueue)))
            # loop over all workers
            for queueName, workSpecList in iteritems(workSpecsPerQueue):
                tmpQueLog = self.make_logger(_logger, 'queue={0}'.format(queueName), method_name='run')
                # check queue
                if not self.queueConfigMapper.has_queue(queueName):
                    tmpQueLog.error('config not found')
                    continue
                # get queue
                queueConfig = self.queueConfigMapper.get_queue(queueName)
                # get plugin
                messenger = self.pluginFactory.get_plugin(queueConfig.messenger)
                # loop over all workers
                for workSpec in workSpecList:
                    tmpLog = core_utils.make_logger(_logger, 'workerID={0}'.format(workSpec.workerID),
                                                    method_name='run')
                    # get events
                    tmpLog.debug('get events')
                    tmpStat, events = self.communicator.get_event_ranges(workSpec.eventsRequestParams)
                    # failed
                    if tmpStat is False:
                        tmpLog.error('failed to get events with {0}'.format(events))
                        continue
                    tmpStat = messenger.feed_events(workSpec, events)
                    # failed
                    if tmpStat is False:
                        tmpLog.error('failed to feed events')
                        continue
                    # update worker
                    workSpec.eventsRequest = WorkSpec.EV_useEvents
                    workSpec.eventsRequestParams = None
                    workSpec.eventFeedTime = None
                    # update local database
                    tmpStat = self.dbProxy.update_worker(workSpec)
                    tmpLog.debug('done with {0}'.format(tmpStat))
                tmpQueLog.debug('done')
            mainLog.debug('done')
            # check if being terminated
            if self.terminated(harvester_config.eventfeeder.sleepTime):
                mainLog.debug('terminated')
                return

Exemple #2

0

Afficher le fichier

Fichier : event_feeder.py Projet : dougbenjamin/harvester

class EventFeeder(AgentBase):
    # constructor
    def __init__(self, communicator, queue_config_mapper, single_mode=False):
        AgentBase.__init__(self, single_mode)
        self.dbProxy = DBProxy()
        self.queueConfigMapper = queue_config_mapper
        self.communicator = communicator
        self.pluginFactory = PluginFactory()

    # main loop
    def run(self):
        lockedBy = 'eventfeeder-{0}'.format(self.get_pid())
        while True:
            mainLog = self.make_logger(_logger, 'id={0}'.format(lockedBy), method_name='run')
            mainLog.debug('getting workers to feed events')
            workSpecsPerQueue = self.dbProxy.get_workers_to_feed_events(harvester_config.eventfeeder.maxWorkers,
                                                                        harvester_config.eventfeeder.lockInterval,
                                                                        lockedBy)
            mainLog.debug('got {0} queues'.format(len(workSpecsPerQueue)))
            # loop over all workers
            for queueName, workSpecList in iteritems(workSpecsPerQueue):
                tmpQueLog = self.make_logger(_logger, 'queue={0}'.format(queueName), method_name='run')
                # check queue
                if not self.queueConfigMapper.has_queue(queueName):
                    tmpQueLog.error('config not found')
                    continue
                # get queue
                queueConfig = self.queueConfigMapper.get_queue(queueName)
                if hasattr(queueConfig, 'scatteredEvents') and queueConfig.scatteredEvents:
                    scattered = True
                else:
                    scattered = False
                # get plugin
                messenger = self.pluginFactory.get_plugin(queueConfig.messenger)
                # loop over all workers
                for workSpec in workSpecList:
                    tmpLog = core_utils.make_logger(_logger, 'workerID={0}'.format(workSpec.workerID),
                                                    method_name='run')
                    # lock worker again
                    lockedFlag = self.dbProxy.lock_worker_again_to_feed_events(workSpec.workerID, lockedBy)
                    if not lockedFlag:
                        tmpLog.debug('skipped since locked by another')
                        continue
                    # get events
                    tmpLog.debug('get events')
                    tmpStat, events = self.communicator.get_event_ranges(workSpec.eventsRequestParams,
                                                                         scattered,
                                                                         workSpec.get_access_point())
                    # failed
                    if tmpStat is False:
                        tmpLog.error('failed to get events with {0}'.format(events))
                        continue
                    # lock worker again
                    lockedFlag = self.dbProxy.lock_worker_again_to_feed_events(workSpec.workerID, lockedBy)
                    if not lockedFlag:
                        tmpLog.debug('skipped before feeding since locked by another')
                        continue
                    tmpStat = messenger.feed_events(workSpec, events)
                    # failed
                    if tmpStat is False:
                        tmpLog.error('failed to feed events')
                        continue
                    # dump
                    for pandaID, eventList in iteritems(events):
                        try:
                            nRanges = workSpec.eventsRequestParams[pandaID]['nRanges']
                        except Exception:
                            nRanges = None
                        tmpLog.debug('got {0} events for PandaID={1} while getting {2} events'.format(len(eventList),
                                                                                                      pandaID,
                                                                                                      nRanges))
                        # disable multi workers
                        if workSpec.mapType == WorkSpec.MT_MultiWorkers:
                            if len(eventList) == 0 or (nRanges is not None and len(eventList) < nRanges):
                                tmpStat = self.dbProxy.disable_multi_workers(pandaID)
                                if tmpStat == 1:
                                    tmpStr = 'disabled MultiWorkers for PandaID={0}'.format(pandaID)
                                    tmpLog.debug(tmpStr)
                    # update worker
                    workSpec.eventsRequest = WorkSpec.EV_useEvents
                    workSpec.eventsRequestParams = None
                    workSpec.eventFeedTime = None
                    workSpec.eventFeedLock = None
                    # update local database
                    tmpStat = self.dbProxy.update_worker(workSpec, {'eventFeedLock': lockedBy})
                    tmpLog.debug('done with {0}'.format(tmpStat))
                tmpQueLog.debug('done')
            mainLog.debug('done')
            # check if being terminated
            if self.terminated(harvester_config.eventfeeder.sleepTime):
                mainLog.debug('terminated')
                return

Exemple #3

0

Afficher le fichier

class Propagator(AgentBase):
    # constructor
    def __init__(self, communicator, queue_config_mapper, single_mode=False):
        AgentBase.__init__(self, single_mode)
        self.dbProxy = DBProxy()
        self.communicator = communicator
        self.queueConfigMapper = queue_config_mapper
        self._last_stats_update = None
        self._last_metrics_update = None

    # main loop
    def run(self):
        while True:
            sw_main = core_utils.get_stopwatch()
            mainLog = self.make_logger(_logger, 'id={0}'.format(self.get_pid()), method_name='run')
            mainLog.debug('getting jobs to propagate')
            sw = core_utils.get_stopwatch()
            jobSpecs = self.dbProxy.get_jobs_to_propagate(harvester_config.propagator.maxJobs,
                                                          harvester_config.propagator.lockInterval,
                                                          harvester_config.propagator.updateInterval,
                                                          self.get_pid())
            mainLog.debug('got {0} jobs {1}'.format(len(jobSpecs), sw.get_elapsed_time()))
            # update jobs in central database
            iJobs = 0
            nJobs = harvester_config.propagator.nJobsInBulk
            hbSuppressMap = dict()
            while iJobs < len(jobSpecs):
                jobList = jobSpecs[iJobs:iJobs + nJobs]
                iJobs += nJobs
                # collect jobs to update or check
                jobListToSkip = []
                jobListToUpdate = []
                jobListToCheck = []
                retList = []
                for tmpJobSpec in jobList:
                    if tmpJobSpec.computingSite not in hbSuppressMap:
                        queueConfig = self.queueConfigMapper.get_queue(tmpJobSpec.computingSite,
                                                                       tmpJobSpec.configID)
                        hbSuppressMap[tmpJobSpec.computingSite] = queueConfig.get_no_heartbeat_status()
                    # heartbeat is suppressed
                    if tmpJobSpec.get_status() in hbSuppressMap[tmpJobSpec.computingSite] and \
                            not tmpJobSpec.not_suppress_heartbeat():
                        # check running job to detect lost heartbeat
                        if tmpJobSpec.status == 'running':
                            jobListToCheck.append(tmpJobSpec)
                        else:
                            jobListToSkip.append(tmpJobSpec)
                            retList.append({'StatusCode': 0, 'command': None})
                    else:
                        jobListToUpdate.append(tmpJobSpec)
                sw.reset()
                retList += self.communicator.check_jobs(jobListToCheck)
                mainLog.debug('check_jobs for {0} jobs {1}'.format(len(jobListToCheck), sw.get_elapsed_time()))
                sw.reset()
                retList += self.communicator.update_jobs(jobListToUpdate, self.get_pid())
                mainLog.debug('update_jobs for {0} jobs took {1}'.format(len(jobListToUpdate),
                                                                              sw.get_elapsed_time()))
                # logging
                for tmpJobSpec, tmpRet in zip(jobListToSkip+jobListToCheck+jobListToUpdate, retList):
                    if tmpRet['StatusCode'] == 0:
                        if tmpJobSpec in jobListToUpdate:
                            mainLog.debug('updated PandaID={0} status={1}'.format(tmpJobSpec.PandaID,
                                                                                  tmpJobSpec.status))
                        else:
                            mainLog.debug('skip updating PandaID={0} status={1}'.format(tmpJobSpec.PandaID,
                                                                                        tmpJobSpec.status))
                        # release job
                        tmpJobSpec.propagatorLock = None
                        if tmpJobSpec.is_final_status() and tmpJobSpec.status == tmpJobSpec.get_status():
                            # unset to disable further updating
                            tmpJobSpec.propagatorTime = None
                            tmpJobSpec.subStatus = 'done'
                            tmpJobSpec.modificationTime = datetime.datetime.utcnow()
                        elif tmpJobSpec.is_final_status() and not tmpJobSpec.all_events_done():
                            # trigger next propagation to update remaining events
                            tmpJobSpec.trigger_propagation()
                        else:
                            # check event availability
                            if tmpJobSpec.status == 'starting' and 'eventService' in tmpJobSpec.jobParams and \
                                    tmpJobSpec.subStatus != 'submitted':
                                tmpEvStat, tmpEvRet = self.communicator.check_event_availability(tmpJobSpec)
                                if tmpEvStat:
                                    if tmpEvRet is not None:
                                        tmpJobSpec.nRemainingEvents = tmpEvRet
                                    if tmpEvRet == 0:
                                        mainLog.debug('kill PandaID={0} due to no event'.format(tmpJobSpec.PandaID))
                                        tmpRet['command'] = 'tobekilled'
                            # got kill command
                            if 'command' in tmpRet and tmpRet['command'] in ['tobekilled']:
                                nWorkers = self.dbProxy.kill_workers_with_job(tmpJobSpec.PandaID)
                                if nWorkers == 0:
                                    # no workers
                                    tmpJobSpec.status = 'cancelled'
                                    tmpJobSpec.subStatus = 'killed'
                                    tmpJobSpec.set_pilot_error(PilotErrors.ERR_PANDAKILL,
                                                               PilotErrors.pilotError[PilotErrors.ERR_PANDAKILL])
                                    tmpJobSpec.stateChangeTime = datetime.datetime.utcnow()
                                    tmpJobSpec.trigger_propagation()
                        self.dbProxy.update_job(tmpJobSpec, {'propagatorLock': self.get_pid()})
                    else:
                        mainLog.error('failed to update PandaID={0} status={1}'.format(tmpJobSpec.PandaID,
                                                                                       tmpJobSpec.status))
            mainLog.debug('getting workers to propagate')
            sw.reset()
            workSpecs = self.dbProxy.get_workers_to_propagate(harvester_config.propagator.maxWorkers,
                                                              harvester_config.propagator.updateInterval)
            mainLog.debug('got {0} workers {1}'.format(len(workSpecs), sw.get_elapsed_time()))
            # update workers in central database
            sw.reset()
            iWorkers = 0
            nWorkers = harvester_config.propagator.nWorkersInBulk
            while iWorkers < len(workSpecs):
                workList = workSpecs[iWorkers:iWorkers + nWorkers]
                iWorkers += nWorkers
                retList, tmpErrStr = self.communicator.update_workers(workList)
                # logging
                if retList is None:
                    mainLog.error('failed to update workers with {0}'.format(tmpErrStr))
                else:
                    for tmpWorkSpec, tmpRet in zip(workList, retList):
                        if tmpRet:
                            mainLog.debug('updated workerID={0} status={1}'.format(tmpWorkSpec.workerID,
                                                                                   tmpWorkSpec.status))
                            # update logs
                            for logFilePath, logOffset, logSize, logRemoteName in \
                                    tmpWorkSpec.get_log_files_to_upload():
                                with open(logFilePath, 'rb') as logFileObj:
                                    tmpStat, tmpErr = self.communicator.upload_file(logRemoteName, logFileObj,
                                                                                    logOffset, logSize)
                                    if tmpStat:
                                        tmpWorkSpec.update_log_files_to_upload(logFilePath, logOffset+logSize)
                            # disable further update
                            if tmpWorkSpec.is_final_status():
                                tmpWorkSpec.disable_propagation()
                            self.dbProxy.update_worker(tmpWorkSpec, {'workerID': tmpWorkSpec.workerID})
                        else:
                            mainLog.error('failed to update workerID={0} status={1}'.format(tmpWorkSpec.workerID,
                                                                                            tmpWorkSpec.status))
            mainLog.debug('update_workers for {0} workers took {1}'.format(iWorkers,
                                                                      sw.get_elapsed_time()))
            mainLog.debug('getting commands')
            commandSpecs = self.dbProxy.get_commands_for_receiver('propagator')
            mainLog.debug('got {0} commands'.format(len(commandSpecs)))
            for commandSpec in commandSpecs:
                if commandSpec.command.startswith(CommandSpec.COM_reportWorkerStats):
                    # get worker stats
                    siteName = commandSpec.command.split(':')[-1]
                    workerStats = self.dbProxy.get_worker_stats(siteName)
                    if len(workerStats) == 0:
                        mainLog.error('failed to get worker stats for {0}'.format(siteName))
                    else:
                        # report worker stats
                        tmpRet, tmpStr = self.communicator.update_worker_stats(siteName, workerStats)
                        if tmpRet:
                            mainLog.debug('updated worker stats (command) for {0}'.format(siteName))
                        else:
                            mainLog.error('failed to update worker stats (command) for {0} err={1}'.format(siteName,
                                                                                                           tmpStr))

            if not self._last_stats_update or time.time() - self._last_stats_update > STATS_PERIOD:

                # get active UPS queues. PanDA server needs to know about them and which harvester instance is taking
                # care of them
                active_ups_queues = self.queueConfigMapper.get_active_ups_queues()

                # update worker stats for all sites
                worker_stats_bulk = self.dbProxy.get_worker_stats_bulk(active_ups_queues)
                if not worker_stats_bulk:
                    mainLog.error('failed to get worker stats in bulk')
                else:
                    for site_name in worker_stats_bulk:
                        tmp_ret, tmp_str = self.communicator.update_worker_stats(site_name,
                                                                                 worker_stats_bulk[site_name])
                        if tmp_ret:
                            mainLog.debug('update of worker stats (bulk) for {0}'.format(site_name))
                            self._last_stats_update = time.time()
                        else:
                            mainLog.error('failed to update worker stats (bulk) for {0} err={1}'.format(site_name,
                                                                                                        tmp_str))

            if not self._last_metrics_update \
                    or datetime.datetime.utcnow() - self._last_metrics_update > datetime.timedelta(seconds=METRICS_PERIOD):
                # get latest metrics from DB
                service_metrics_list = self.dbProxy.get_service_metrics(self._last_metrics_update)
                if not service_metrics_list:
                    mainLog.error('failed to get service metrics')
                    self._last_metrics_update = datetime.datetime.utcnow()
                else:
                    tmp_ret, tmp_str = self.communicator.update_service_metrics(service_metrics_list)
                    if tmp_ret:
                        mainLog.debug('update of service metrics OK')
                        self._last_metrics_update = datetime.datetime.utcnow()
                    else:
                        mainLog.error('failed to update service metrics err={0}'.format(tmp_str))

            # send dialog messages
            mainLog.debug('getting dialog messages to propagate')
            try:
                maxDialogs = harvester_config.propagator.maxDialogs
            except Exception:
                maxDialogs = 50
            diagSpecs = self.dbProxy.get_dialog_messages_to_send(maxDialogs,
                                                                 harvester_config.propagator.lockInterval)
            mainLog.debug('got {0} dialogs'.format(len(diagSpecs)))
            if len(diagSpecs) > 0:
                tmpStat, tmpStr = self.communicator.send_dialog_messages(diagSpecs)
                if tmpStat:
                    diagIDs = [diagSpec.diagID for diagSpec in diagSpecs]
                    self.dbProxy.delete_dialog_messages(diagIDs)
                    mainLog.debug('sent {0} dialogs'.format(len(diagSpecs)))

                else:
                    mainLog.error('failed to send dialogs err={0}'.format(tmpStr))
            if sw_main.get_elapsed_time_in_sec() > harvester_config.propagator.lockInterval:
                mainLog.warning('a single cycle was longer than lockInterval. done' + sw_main.get_elapsed_time())
            else:
                mainLog.debug('done' + sw_main.get_elapsed_time())
            # check if being terminated
            if self.terminated(harvester_config.propagator.sleepTime):
                mainLog.debug('terminated')
                return

Exemple #4

0

Afficher le fichier

Fichier : propagator.py Projet : wyang007/panda-harvester

class Propagator(AgentBase):
    # constructor
    def __init__(self, communicator, queue_config_mapper, single_mode=False):
        AgentBase.__init__(self, single_mode)
        self.dbProxy = DBProxy()
        self.communicator = communicator
        self.queueConfigMapper = queue_config_mapper
        self._last_stats_update = None

    # main loop
    def run(self):
        while True:
            sw = core_utils.get_stopwatch()
            mainLog = core_utils.make_logger(_logger,
                                             'id={0}'.format(self.ident),
                                             method_name='run')
            mainLog.debug('getting jobs to propagate')
            jobSpecs = self.dbProxy.get_jobs_to_propagate(
                harvester_config.propagator.maxJobs,
                harvester_config.propagator.lockInterval,
                harvester_config.propagator.updateInterval, self.ident)
            mainLog.debug('got {0} jobs'.format(len(jobSpecs)))
            # update jobs in central database
            iJobs = 0
            nJobs = harvester_config.propagator.nJobsInBulk
            hbSuppressMap = dict()
            while iJobs < len(jobSpecs):
                jobList = jobSpecs[iJobs:iJobs + nJobs]
                iJobs += nJobs
                # collect jobs to update or check
                jobListToSkip = []
                jobListToUpdate = []
                jobListToCheck = []
                retList = []
                for tmpJobSpec in jobList:
                    if tmpJobSpec.computingSite not in hbSuppressMap:
                        queueConfig = self.queueConfigMapper.get_queue(
                            tmpJobSpec.computingSite)
                        hbSuppressMap[
                            tmpJobSpec.
                            computingSite] = queueConfig.get_no_heartbeat_status(
                            )
                    # heartbeat is suppressed
                    if tmpJobSpec.status in hbSuppressMap[
                            tmpJobSpec.computingSite]:
                        # check running job to detect lost heartbeat
                        if tmpJobSpec.status == 'running':
                            jobListToCheck.append(tmpJobSpec)
                        else:
                            jobListToSkip.append(tmpJobSpec)
                            retList.append({'StatusCode': 0, 'command': None})
                    else:
                        jobListToUpdate.append(tmpJobSpec)
                retList += self.communicator.check_jobs(jobListToCheck)
                retList += self.communicator.update_jobs(jobListToUpdate)
                # logging
                for tmpJobSpec, tmpRet in zip(
                        jobListToSkip + jobListToCheck + jobListToUpdate,
                        retList):
                    if tmpRet['StatusCode'] == 0:
                        if tmpJobSpec in jobListToUpdate:
                            mainLog.debug(
                                'updated PandaID={0} status={1}'.format(
                                    tmpJobSpec.PandaID, tmpJobSpec.status))
                        else:
                            mainLog.debug(
                                'skip updating PandaID={0} status={1}'.format(
                                    tmpJobSpec.PandaID, tmpJobSpec.status))
                        # release job
                        tmpJobSpec.propagatorLock = None
                        if tmpJobSpec.is_final_status(
                        ) and tmpJobSpec.status == tmpJobSpec.get_status():
                            # unset to disable further updating
                            tmpJobSpec.propagatorTime = None
                            tmpJobSpec.subStatus = 'done'
                        else:
                            # check event availability
                            if tmpJobSpec.status == 'starting' and 'eventService' in tmpJobSpec.jobParams and \
                                    tmpJobSpec.subStatus != 'submitted':
                                tmpEvStat, tmpEvRet = self.communicator.check_event_availability(
                                    tmpJobSpec)
                                if tmpEvStat and tmpEvRet == 0:
                                    mainLog.debug(
                                        'kill PandaID={0} due to no event'.
                                        format(tmpJobSpec.PandaID))
                                    tmpRet['command'] = 'tobekilled'
                            # got kill command
                            if 'command' in tmpRet and tmpRet['command'] in [
                                    'tobekilled'
                            ]:
                                nWorkers = self.dbProxy.kill_workers_with_job(
                                    tmpJobSpec.PandaID)
                                if nWorkers == 0:
                                    # no remaining workers
                                    tmpJobSpec.status = 'cancelled'
                                    tmpJobSpec.subStatus = 'killed'
                                    tmpJobSpec.stateChangeTime = datetime.datetime.utcnow(
                                    )
                                    tmpJobSpec.trigger_propagation()
                        self.dbProxy.update_job(tmpJobSpec,
                                                {'propagatorLock': self.ident})
                    else:
                        mainLog.error(
                            'failed to update PandaID={0} status={1}'.format(
                                tmpJobSpec.PandaID, tmpJobSpec.status))
            mainLog.debug('getting workers to propagate')
            workSpecs = self.dbProxy.get_workers_to_propagate(
                harvester_config.propagator.maxWorkers,
                harvester_config.propagator.updateInterval)
            mainLog.debug('got {0} workers'.format(len(workSpecs)))
            # update workers in central database
            iWorkers = 0
            nWorkers = harvester_config.propagator.nWorkersInBulk
            while iWorkers < len(workSpecs):
                workList = workSpecs[iWorkers:iWorkers + nJobs]
                iWorkers += nWorkers
                retList, tmpErrStr = self.communicator.update_workers(workList)
                # logging
                if retList is None:
                    mainLog.error(
                        'failed to update workers with {0}'.format(tmpErrStr))
                else:
                    for tmpWorkSpec, tmpRet in zip(workList, retList):
                        if tmpRet:
                            mainLog.debug(
                                'updated workerID={0} status={1}'.format(
                                    tmpWorkSpec.workerID, tmpWorkSpec.status))
                            # update logs
                            for logFilePath, logOffset, logSize, logRemoteName in \
                                    tmpWorkSpec.get_log_files_to_upload():
                                with open(logFilePath, 'rb') as logFileObj:
                                    tmpStat, tmpErr = self.communicator.upload_file(
                                        logRemoteName, logFileObj, logOffset,
                                        logSize)
                                    if tmpStat:
                                        tmpWorkSpec.update_log_files_to_upload(
                                            logFilePath, logOffset + logSize)
                            # disable further update
                            if tmpWorkSpec.is_final_status():
                                tmpWorkSpec.disable_propagation()
                            self.dbProxy.update_worker(
                                tmpWorkSpec,
                                {'workerID': tmpWorkSpec.workerID})
                        else:
                            mainLog.error(
                                'failed to update workerID={0} status={1}'.
                                format(tmpWorkSpec.workerID,
                                       tmpWorkSpec.status))
            mainLog.debug('getting commands')
            commandSpecs = self.dbProxy.get_commands_for_receiver('propagator')
            mainLog.debug('got {0} commands'.format(len(commandSpecs)))
            for commandSpec in commandSpecs:
                if commandSpec.command.startswith(
                        CommandSpec.COM_reportWorkerStats):
                    # get worker stats
                    siteName = commandSpec.command.split(':')[-1]
                    workerStats = self.dbProxy.get_worker_stats(siteName)
                    if len(workerStats) == 0:
                        mainLog.error(
                            'failed to get worker stats for {0}'.format(
                                siteName))
                    else:
                        # report worker stats
                        tmpRet, tmpStr = self.communicator.update_worker_stats(
                            siteName, workerStats)
                        if tmpRet:
                            mainLog.debug(
                                'updated worker stats (command) for {0}'.
                                format(siteName))
                        else:
                            mainLog.error(
                                'failed to update worker stats (command) for {0} err={1}'
                                .format(siteName, tmpStr))

            if not self._last_stats_update or time.time(
            ) - self._last_stats_update > STATS_PERIOD:
                # update worker stats for all sites
                worker_stats_bulk = self.dbProxy.get_worker_stats_bulk()
                if not worker_stats_bulk:
                    mainLog.error('failed to get worker stats in bulk')
                else:
                    for site_name in worker_stats_bulk:
                        tmp_ret, tmp_str = self.communicator.update_worker_stats(
                            site_name, worker_stats_bulk[site_name])
                        if tmp_ret:
                            mainLog.debug(
                                'update of worker stats (bulk) for {0}'.format(
                                    site_name))
                            self._last_stats_update = time.time()
                        else:
                            mainLog.error(
                                'failed to update worker stats (bulk) for {0} err={1}'
                                .format(site_name, tmp_str))

            mainLog.debug('done' + sw.get_elapsed_time())
            # check if being terminated
            if self.terminated(harvester_config.propagator.sleepTime):
                mainLog.debug('terminated')
                return

Exemple #5

0

Afficher le fichier

Fichier : event_feeder.py Projet : PanDAWMS/panda-harvester

class EventFeeder(AgentBase):
    # constructor
    def __init__(self, communicator, queue_config_mapper, single_mode=False):
        AgentBase.__init__(self, single_mode)
        self.dbProxy = DBProxy()
        self.queueConfigMapper = queue_config_mapper
        self.communicator = communicator
        self.pluginFactory = PluginFactory()

    # main loop
    def run(self):
        lockedBy = 'eventfeeder-{0}'.format(self.get_pid())
        while True:
            mainLog = self.make_logger(_logger, 'id={0}'.format(lockedBy), method_name='run')
            mainLog.debug('getting workers to feed events')
            workSpecsPerQueue = self.dbProxy.get_workers_to_feed_events(harvester_config.eventfeeder.maxWorkers,
                                                                        harvester_config.eventfeeder.lockInterval,
                                                                        lockedBy)
            mainLog.debug('got {0} queues'.format(len(workSpecsPerQueue)))
            # loop over all workers
            for queueName, workSpecList in iteritems(workSpecsPerQueue):
                tmpQueLog = self.make_logger(_logger, 'queue={0}'.format(queueName), method_name='run')
                # check queue
                if not self.queueConfigMapper.has_queue(queueName):
                    tmpQueLog.error('config not found')
                    continue
                # get queue
                queueConfig = self.queueConfigMapper.get_queue(queueName)
                if hasattr(queueConfig, 'scatteredEvents') and queueConfig.scatteredEvents:
                    scattered = True
                else:
                    scattered = False
                # get plugin
                messenger = self.pluginFactory.get_plugin(queueConfig.messenger)
                # loop over all workers
                for workSpec in workSpecList:
                    tmpLog = core_utils.make_logger(_logger, 'workerID={0}'.format(workSpec.workerID),
                                                    method_name='run')
                    # lock worker again
                    lockedFlag = self.dbProxy.lock_worker_again_to_feed_events(workSpec.workerID, lockedBy)
                    if not lockedFlag:
                        tmpLog.debug('skipped since locked by another')
                        continue
                    # get events
                    tmpLog.debug('get events')
                    tmpStat, events = self.communicator.get_event_ranges(workSpec.eventsRequestParams,
                                                                         scattered)
                    # failed
                    if tmpStat is False:
                        tmpLog.error('failed to get events with {0}'.format(events))
                        continue
                    # lock worker again
                    lockedFlag = self.dbProxy.lock_worker_again_to_feed_events(workSpec.workerID, lockedBy)
                    if not lockedFlag:
                        tmpLog.debug('skipped before feeding since locked by another')
                        continue
                    tmpStat = messenger.feed_events(workSpec, events)
                    # failed
                    if tmpStat is False:
                        tmpLog.error('failed to feed events')
                        continue
                    # dump
                    for pandaID, eventList in iteritems(events):
                        try:
                            nRanges = workSpec.eventsRequestParams[pandaID]['nRanges']
                        except Exception:
                            nRanges = None
                        tmpLog.debug('got {0} events for PandaID={1} while getting {2} events'.format(len(eventList),
                                                                                                      pandaID,
                                                                                                      nRanges))
                        # disable multi workers
                        if workSpec.mapType == WorkSpec.MT_MultiWorkers:
                            if len(eventList) == 0 or (nRanges is not None and len(eventList) < nRanges):
                                tmpStat = self.dbProxy.disable_multi_workers(pandaID)
                                if tmpStat == 1:
                                    tmpStr = 'disabled MultiWorkers for PandaID={0}'.format(pandaID)
                                    tmpLog.debug(tmpStr)
                    # update worker
                    workSpec.eventsRequest = WorkSpec.EV_useEvents
                    workSpec.eventsRequestParams = None
                    workSpec.eventFeedTime = None
                    workSpec.eventFeedLock = None
                    # update local database
                    tmpStat = self.dbProxy.update_worker(workSpec, {'eventFeedLock': lockedBy})
                    tmpLog.debug('done with {0}'.format(tmpStat))
                tmpQueLog.debug('done')
            mainLog.debug('done')
            # check if being terminated
            if self.terminated(harvester_config.eventfeeder.sleepTime):
                mainLog.debug('terminated')
                return