Exemple #1
0
	def __init__(self):
		GangaThread.__init__(self, 'LGI_Pilot')
		self.log = getLogger('LGI.Pilot.Thread')
		if not os.path.exists(config['PilotScript']):
			self.log.error('pilotjob script not found: '+config['PilotScript'])
		if not os.path.exists(config['PilotDist']):
			self.log.error('pilotjob tarball not found: '+config['PilotDist'])
Exemple #2
0
 def startup(self):
     """ Start a background thread that periodically run()s"""
     super(TaskRegistry, self).startup()
     from Ganga.Core.GangaThread import GangaThread
     self._main_thread = GangaThread(name="GangaTasks",
                                     target=self._thread_main)
     self._main_thread.start()
 def __init__(self, name):
     is_critical = not config['enable_multiThreadMon']
     GangaThread.__init__(self, name, critical=is_critical)
     self._currently_running_command = False
     self._running_cmd = None
     self._running_args = None
     self._thread_name = name
Exemple #4
0
 def __init__(self, session_name, sdir, fn, repo, afs):
     GangaThread.__init__(self, name='SessionLockRefresher', critical=True)
     self.session_name = session_name
     self.sdir = sdir
     self.fns = [fn]
     self.repos = [repo]
     self.afs = afs
     self.FileCheckTimes = {}
Exemple #5
0
 def __init__(self, session_name, sdir, fn, repo, afs):
     GangaThread.__init__(self, name="SessionLockRefresher", critical=False)
     self.session_name = session_name
     self.sdir = sdir
     self.fns = [fn]
     self.repos = [repo]
     self.afs = afs
     self.FileCheckTimes = {}
Exemple #6
0
def resolve_file_locations(dataset, sites=None, cloud=None, token='ATLASDATADISK', debug=False):
    '''
    Summarize the locations of files (in terms of sitename) of a dataset.
    If the sites argument is given, ignoring cloud and token arguments;
    otherwise using cloud and toke to retrieve sites from TiersOfATLAS.
    '''

    if not sites:
        logger.debug('resolving sites with token: %s' % token)
        sites = dm_util.get_srmv2_sites(cloud, token=token, debug=debug)

    logger.debug('checking replicas at sites: %s' % str(sites))

    replicas = {}
    # preparing the queue for querying lfn 
    wq = Queue(len(sites))
    for site in sites:
        wq.put(site)

    mylock = Lock()

    def worker(id):
        dq2 = DQ2()
        while not wq.empty():
            try:
                site = wq.get(block=True, timeout=1)
                replicaInfo = dq2.listFileReplicas(site, dataset)
                logger.debug('resolving dataset files at %s, no files: %d' % (site,len(replicaInfo[0]['content'])) )
                if replicaInfo:
                    mylock.acquire()
                    for guid in replicaInfo[0]['content']:
                        if guid not in replicas:
                            replicas[guid] = []
                        replicas[guid].append(site)
                    mylock.release()
            except Empty:
                pass
            except DQException as err:
                logger.warning(str(err))
                logger.warning('site %s excluded' % site)
                pass

    threads = []
    nthread = len(sites)
    if nthread > 10: nthread = 10

    for i in range(nthread):
        t = GangaThread(name='stager_ds_w_%d' % i, target=worker, kwargs={'id': i})
#        t.setDaemon(False)
        threads.append(t)

    for t in threads:
        t.start()

    for t in threads:
        t.join()

    return replicas
	def __init__(self):
		GangaThread.__init__(self, 'LGI_Resource')
		self.log = getLogger('LGI.Resource.Thread')
		config = Config.getConfig('LGI')
		if not os.path.exists(config['PilotDist']):
			self.log.error('cannot connect to LGI server: pilotjob tarball not found: '+config['PilotDist'])
		self.res = LGI.Resource(config['PilotDist'])
		# number of queued LGI jobs
		self.queued = None
    def __init__(self, registry):
        GangaThread.__init__(self, name="JobRegistry_Monitor")
        log.debug("Constructing JobRegistry_Monitor")
        self.setDaemon(True)
        self.registry = registry
        self.__sleepCounter = 0.0
        self.__updateTimeStamp = time.time()
        self.progressCallback = lambda x: None
        self.callbackHookDict = {}
        self.clientCallbackDict = {}
        self.alive = True
        self.enabled = False
        # run the monitoring loop continuosly (steps=-1) or just a specified
        # number of steps(>0)
        self.steps = -1
        self.activeBackends = {}
        self.updateJobStatus = None
        self.errors = {}
        # Create the default backend update method and add to callback hook.
        self.makeUpdateJobStatusFunction()

        # Add credential checking to monitoring loop
        for _credObj in Credentials._allCredentials.itervalues():
            log.debug("Setting callback hook for %s" % getName(_credObj))
            self.setCallbackHook(self.makeCredCheckJobInsertor(_credObj), {},
                                 True,
                                 timeout=config['creds_poll_rate'])

        # Add low disk-space checking to monitoring loop
        log.debug("Setting callback hook for disk space checking")
        self.setCallbackHook(self.diskSpaceCheckJobInsertor, {},
                             True,
                             timeout=config['diskspace_poll_rate'])

        # synch objects
        # main loop mutex
        self.__mainLoopCond = threading.Condition()
        # cleanup synch
        self.__cleanUpEvent = threading.Event()
        # asynch mon loop running synch
        self.__monStepsTerminatedEvent = threading.Event()
        # event to signal the break of job lists iterators
        self.stopIter = threading.Event()
        self.stopIter.set()

        self._runningNow = False
    def __init__(self, registry):
        GangaThread.__init__(self, name="JobRegistry_Monitor")
        log.debug("Constructing JobRegistry_Monitor")
        self.setDaemon(True)
        self.registry = registry
        self.__sleepCounter = 0.0
        self.__updateTimeStamp = time.time()
        self.progressCallback = lambda x: None
        self.callbackHookDict = {}
        self.clientCallbackDict = {}
        self.alive = True
        self.enabled = False
        # run the monitoring loop continuosly (steps=-1) or just a specified
        # number of steps(>0)
        self.steps = -1
        self.activeBackends = {}
        self.updateJobStatus = None
        self.errors = {}

        self.updateDict_ts = SynchronisedObject(UpdateDict())

        # Create the default backend update method and add to callback hook.
        self.makeUpdateJobStatusFunction()

        # Add credential checking to monitoring loop
        for _credObj in Credentials._allCredentials.itervalues():
            log.debug("Setting callback hook for %s" % getName(_credObj))
            self.setCallbackHook(self.makeCredCheckJobInsertor(_credObj), {}, True, timeout=config['creds_poll_rate'])

        # Add low disk-space checking to monitoring loop
        log.debug("Setting callback hook for disk space checking")
        self.setCallbackHook(self.diskSpaceCheckJobInsertor, {}, True, timeout=config['diskspace_poll_rate'])

        # synch objects
        # main loop mutex
        self.__mainLoopCond = threading.Condition()
        # cleanup synch
        self.__cleanUpEvent = threading.Event()
        # asynch mon loop running synch
        self.__monStepsTerminatedEvent = threading.Event()
        # event to signal the break of job lists iterators
        self.stopIter = threading.Event()
        self.stopIter.set()

        self._runningNow = False
Exemple #10
0
    def __resolve_containers(self, containers, nthreads=10):
        '''resolving dataset containers'''

        datasets = {} 
        
        wq = Queue(len(containers))
        for ds in containers:
            wq.put(ds)

        mylock = Lock()
        def worker(id):
            dq2 = DQ2()
            while not wq.empty():
                try:
                    ds = wq.get(block=True, timeout=1)
                    logger.debug('worker id: %d on dataset container: %s' % (id, ds))
       
                    datasets[ds] = []
 
                    ds_tmp = dq2.listDatasetsInContainer(ds)

                    mylock.acquire()
                    datasets[ds] = ds_tmp
                    mylock.release()
                except DQException as err:
                    logger.warning(str(err))
                except Empty:
                    pass

        profiler = ElapsedTimeProfiler(logger=logger)
        profiler.start()
        threads = []
        for i in range(nthreads):
            t = GangaThread(name='stager_ds_w_%d' % i, target=worker, kwargs={'id': i})
#            t.setDaemon(False)
            threads.append(t)
        
        for t in threads:
            t.start()
        
        for t in threads:
            t.join()
        profiler.check('resolving %d containers' % len(containers))

        return datasets
Exemple #11
0
    def startup(self):
        """ Start a background thread that periodically run()s"""
        super(TaskRegistry, self).startup()
        from Ganga.Core.GangaThread import GangaThread
        self._main_thread = GangaThread(name="GangaTasks", target=self._thread_main)
        self._main_thread.start()

        # create a registry flusher
        self.flush_thread = RegistryFlusher(self)
        self.flush_thread.start()
Exemple #12
0
 def start(self):
     config = Config.getConfig("LGI")
     if config["StatsInterval"] == 0:
         self.log.debug("Not starting LGI stats thread because [LGI]StatsInterval is zero")
         return
     if not config["StatsFile"]:
         self.log.debug("Not starting LGI stats thread because [LGI]StatsFile is empty")
         return
     if config["Enable"] is False:
         self.log.debug("Not starting LGI stats thread because [LGI]Enable is False")
         return False
     return GangaThread.start(self)
Exemple #13
0
    def __init_worker_threads(self, num_worker_threads, worker_thread_prefix):
        if len(self.__worker_threads) > 0:
            logger.warning("Threads already started!")
            for i in self.__worker_threads:
                logger.info("Worker Thread: %s is already running!" %
                            i.gangaName)
            return

        for i in range(num_worker_threads):
            t = GangaThread(name=worker_thread_prefix + str(i),
                            auto_register=False,
                            target=self.__worker_thread)
            t._Thread__args = (t, )
            t._name = worker_thread_prefix + str(i)
            t._command = 'idle'
            t._timeout = 'N/A'
            t.start()
            self.__worker_threads.append(t)
Exemple #14
0
    def __init_worker_threads(self, num_worker_threads, worker_thread_prefix):
        if len(self.__worker_threads) > 0:
            logger.warning("Threads already started!")
            for i in self.__worker_threads:
                logger.info("Worker Thread: %s is already running!" % i.gangaName)
            return

        for i in range(num_worker_threads):
            t = GangaThread(name=worker_thread_prefix + str(i), auto_register=False, target=self.__worker_thread)
            t._Thread__args = (t,)
            t._name = worker_thread_prefix + str(i)
            t._command = "idle"
            t._timeout = "N/A"
            t.start()
            self.__worker_threads.append(t)
Exemple #15
0
    def __resolve_containers(self, containers, nthreads=10):
        '''resolving dataset containers'''

        datasets = {}

        wq = Queue(len(containers))
        for ds in containers:
            wq.put(ds)

        mylock = Lock()

        def worker(id):
            dq2 = DQ2()
            while not wq.empty():
                try:
                    ds = wq.get(block=True, timeout=1)
                    logger.debug('worker id: %d on dataset container: %s' %
                                 (id, ds))

                    datasets[ds] = []

                    ds_tmp = dq2.listDatasetsInContainer(ds)

                    mylock.acquire()
                    datasets[ds] = ds_tmp
                    mylock.release()
                except DQException as err:
                    logger.warning(str(err))
                except Empty:
                    pass

        profiler = ElapsedTimeProfiler(logger=logger)
        profiler.start()
        threads = []
        for i in range(nthreads):
            t = GangaThread(name='stager_ds_w_%d' % i,
                            target=worker,
                            kwargs={'id': i})
            #            t.setDaemon(False)
            threads.append(t)

        for t in threads:
            t.start()

        for t in threads:
            t.join()
        profiler.check('resolving %d containers' % len(containers))

        return datasets
Exemple #16
0
class TaskRegistry(Registry):
    def __init__(self, name, doc):

        super(TaskRegistry, self).__init__(name, doc)

        self._main_thread = None

        self.stored_slice = TaskRegistrySlice(self.name)
        self.stored_slice.objects = self
        self.stored_proxy = TaskRegistrySliceProxy(self.stored_slice)

    def getSlice(self):
        return self.stored_slice

    def getProxy(self):
        return self.stored_proxy

    def getIndexCache(self, obj):
        cached_values = ['status', 'id', 'name']
        c = {}
        for cv in cached_values:
            if hasattr(obj, cv):
                c[cv] = getattr(obj, cv)
        this_slice = TaskRegistrySlice("tmp")
        for dpv in this_slice._display_columns:
            c["display:" + dpv] = this_slice._get_display_value(obj, dpv)
        return c

    def _thread_main(self):
        """ This is an internal function; the main loop of the background thread """
        from Ganga.Core.GangaRepository import getRegistry
        while getRegistry("jobs").hasStarted() is not True:
            time.sleep(0.1)
            if self._main_thread is None or self._main_thread.should_stop():
                return

        while True:
            from Ganga.Core import monitoring_component
            if (not monitoring_component is None
                    and monitoring_component.enabled
                ) or config['ForceTaskMonitoring']:
                break
            time.sleep(0.1)
            if self._main_thread is None or self._main_thread.should_stop():
                return

        # setup the tasks - THIS IS INCOMPATIBLE WITH CONCURRENCY
        # and must go away soon
        for tid in self.ids():
            try:
                self[tid].startup()
            except Exception as err:
                logger.error(
                    "Unknown/Unexpected Error in starting up tasks main loop")
                logger.error("Exiting: err=%s" % str(err))
                return

        logger.debug("Entering main loop")

        # Main loop
        while self._main_thread is not None and not self._main_thread.should_stop(
        ):

            # If monitoring is enabled (or forced for Tasks) loop over each one and update
            if (config['ForceTaskMonitoring'] or monitoring_component.enabled
                ) and not config['disableTaskMon']:
                for tid in self.ids():

                    logger.debug("Running over tid: %s" % str(tid))

                    try:
                        p = self[tid]
                        p.update()

                    except Exception as x:
                        logger.error(
                            "Exception occurred in task monitoring loop: %s %s\nThe offending task was paused."
                            % (x.__class__, x))
                        type_, value_, traceback_ = sys.exc_info()
                        logger.error("Full traceback:\n %s" % ' '.join(
                            traceback.format_exception(type_, value_,
                                                       traceback_)))
                        p.pause()

                    if self._main_thread.should_stop():
                        break

                if self._main_thread.should_stop():
                    break

            logger.debug("TaskRegistry Sleeping for: %s seconds" %
                         str(config['TaskLoopFrequency']))

            # Sleep interruptible for 10 seconds
            for i in range(0, int(config['TaskLoopFrequency'] * 100)):
                if self._main_thread.should_stop():
                    break
                time.sleep(0.01)

    def startup(self):
        """ Start a background thread that periodically run()s"""
        super(TaskRegistry, self).startup()
        from Ganga.Core.GangaThread import GangaThread
        self._main_thread = GangaThread(name="GangaTasks",
                                        target=self._thread_main)
        self._main_thread.start()

        # create a registry flusher
        self.flush_thread = RegistryFlusher(self, 'TaskRegistryFlusher')
        self.flush_thread.start()

    def shutdown(self):
        self.flush_thread.join()
        super(TaskRegistry, self).shutdown()

    def stop(self):
        if self._main_thread is not None:
            self._main_thread.stop()
            self._main_thread.join()
Exemple #17
0
    def get_complete_files_replicas(self, nthread=10, diskOnly=True):
        '''Gets a comprehensive dataset information about the contents and the
           location of COMPLETE replicas'''

        if not self.complete_files_replicas:

            re_tapeSite = re.compile('.*TAPE$') 


            ds_info = {}
            self.__expand_datasets()
         
            wq = Queue(len(self.dataset))
            for ds in self.dataset:
                wq.put(ds)
         
            mylock = Lock()
            def worker(id):
         
                dq2 = DQ2()
                while not wq.empty():
                    try:
         
                        ds = wq.get(block=True, timeout=1)
                        logger.debug('worker id: %d on dataset: %s' % (id, ds))
         
                        # get contents (guids) of the complete dataset
                        contents = dq2.listFilesInDataset(ds)
         
                        # get locations of the complete dataset replicas
                        locations = dq2.listDatasetReplicas(ds,complete=1)
         
                        vuid = None
                        try:
                            vuid = locations.keys()[0]
                        except IndexError as err:
                            pass
         
                        mylock.acquire()
         
                        # updating ds_info hastable
                        if vuid:
                            ds_info[ds] = []
                            ds_sites = []
 
                            if diskOnly:
                                for site in locations[vuid][1]:
                                    if not re_tapeSite.match(site):
                                        ds_sites.append(site)
                            else:
                                ds_sites = locations[vuid][1]
 
                            ds_info[ds] += [ contents[0], ds_sites ]
                        else:
                            logger.warning('dataset not available: %s' % ds)
         
                        mylock.release()
         
                    except DQException as err:
                        logger.warning(str(err))
         
                    except Empty:
                        pass
         
            # prepare and run the query threads
            profiler = ElapsedTimeProfiler(logger=logger)
            profiler.start()
            threads = []
            for i in range(nthread):
                t = GangaThread(name='stager_ds_w_%d' % i, target=worker, kwargs={'id': i})
#                t.setDaemon(False)
                threads.append(t)
         
            for t in threads:
                t.start()
         
            for t in threads:
                t.join()

            self.complete_files_replicas = ds_info

            profiler.check( 'information collected: %d datasets' % ( len(self.complete_files_replicas.keys()) ) )
        else:
            logger.debug('using cached complete_files_replicas')
            pass
 
        return self.complete_files_replicas 
 def __init__(self, name):
     GangaThread.__init__(self, name)
Exemple #19
0
    for job in jobs:
      for subjob in job.subjobs:
        try:
          process_subjob(job,subjob,thread_dirac_server)
        except:
          logger.warning('Exception in process_subjob:')
          logger.warning(sys.exc_info()[0])
          logger.warning(sys.exc_info()[1])
      if test_paused() or ct.should_stop():
        break

    test_sleep(10)

  logger.info('HC Monitor Thread: Disconnected from DB')

ct = GangaThread(name="HCMonitorThread", target=hc_monitor_thread)

logger.info('Connected to DB')

if len(jobs):
  # Wait one minute, to let the minute counter update, and avoid
  # problems with the at command submitting on second = 0
  test_sleep(60)
  ct.start()
  test_sleep(60)
  while (test_active() and not test_paused()):

    test = Test.objects.get(pk=testid)
#    logger.info('HC Copy Thread: TOP OF MAIN LOOP')
    for job in jobs:
      if not test_active() or test_paused() or ct.should_stop():
Exemple #20
0
    def get_complete_files_replicas(self, nthread=10, diskOnly=True):
        '''Gets a comprehensive dataset information about the contents and the
           location of COMPLETE replicas'''

        if not self.complete_files_replicas:

            re_tapeSite = re.compile('.*TAPE$')

            ds_info = {}
            self.__expand_datasets()

            wq = Queue(len(self.dataset))
            for ds in self.dataset:
                wq.put(ds)

            mylock = Lock()

            def worker(id):

                dq2 = DQ2()
                while not wq.empty():
                    try:

                        ds = wq.get(block=True, timeout=1)
                        logger.debug('worker id: %d on dataset: %s' % (id, ds))

                        # get contents (guids) of the complete dataset
                        contents = dq2.listFilesInDataset(ds)

                        # get locations of the complete dataset replicas
                        locations = dq2.listDatasetReplicas(ds, complete=1)

                        vuid = None
                        try:
                            vuid = locations.keys()[0]
                        except IndexError as err:
                            pass

                        mylock.acquire()

                        # updating ds_info hastable
                        if vuid:
                            ds_info[ds] = []
                            ds_sites = []

                            if diskOnly:
                                for site in locations[vuid][1]:
                                    if not re_tapeSite.match(site):
                                        ds_sites.append(site)
                            else:
                                ds_sites = locations[vuid][1]

                            ds_info[ds] += [contents[0], ds_sites]
                        else:
                            logger.warning('dataset not available: %s' % ds)

                        mylock.release()

                    except DQException as err:
                        logger.warning(str(err))

                    except Empty:
                        pass

            # prepare and run the query threads
            profiler = ElapsedTimeProfiler(logger=logger)
            profiler.start()
            threads = []
            for i in range(nthread):
                t = GangaThread(name='stager_ds_w_%d' % i,
                                target=worker,
                                kwargs={'id': i})
                #                t.setDaemon(False)
                threads.append(t)

            for t in threads:
                t.start()

            for t in threads:
                t.join()

            self.complete_files_replicas = ds_info

            profiler.check('information collected: %d datasets' %
                           (len(self.complete_files_replicas.keys())))
        else:
            logger.debug('using cached complete_files_replicas')
            pass

        return self.complete_files_replicas
Exemple #21
0
def resolve_file_locations(dataset,
                           sites=None,
                           cloud=None,
                           token='ATLASDATADISK',
                           debug=False):
    '''
    Summarize the locations of files (in terms of sitename) of a dataset.
    If the sites argument is given, ignoring cloud and token arguments;
    otherwise using cloud and toke to retrieve sites from TiersOfATLAS.
    '''

    if not sites:
        logger.debug('resolving sites with token: %s' % token)
        sites = dm_util.get_srmv2_sites(cloud, token=token, debug=debug)

    logger.debug('checking replicas at sites: %s' % str(sites))

    replicas = {}
    # preparing the queue for querying lfn
    wq = Queue(len(sites))
    for site in sites:
        wq.put(site)

    mylock = Lock()

    def worker(id):
        dq2 = DQ2()
        while not wq.empty():
            try:
                site = wq.get(block=True, timeout=1)
                replicaInfo = dq2.listFileReplicas(site, dataset)
                logger.debug('resolving dataset files at %s, no files: %d' %
                             (site, len(replicaInfo[0]['content'])))
                if replicaInfo:
                    mylock.acquire()
                    for guid in replicaInfo[0]['content']:
                        if guid not in replicas:
                            replicas[guid] = []
                        replicas[guid].append(site)
                    mylock.release()
            except Empty:
                pass
            except DQException as err:
                logger.warning(str(err))
                logger.warning('site %s excluded' % site)
                pass

    threads = []
    nthread = len(sites)
    if nthread > 10: nthread = 10

    for i in range(nthread):
        t = GangaThread(name='stager_ds_w_%d' % i,
                        target=worker,
                        kwargs={'id': i})
        #        t.setDaemon(False)
        threads.append(t)

    for t in threads:
        t.start()

    for t in threads:
        t.join()

    return replicas
Exemple #22
0
    logger.info('HC Copy Thread: Connected to DB')
    while (test_active(cursor2) and not test_paused(cursor2) and not ct.should_stop()):
        logger.info('HC Copy Thread: TOP OF MAIN LOOP')
        for job in jobs:
            if test_paused(cursor2) or ct.should_stop():
                break
            copyJob(cursor2,job)
            conn2.commit()
        conn2.commit()
        test_sleep(20)
    cursor2.close ()
    conn2.commit()
    conn2.close()
    logger.info('HC Copy Thread: Disconnected from DB')

ct = GangaThread(name="HCCopyThread", target=hc_copy_thread)

conn = hcutil.connect()
cursor = conn.cursor ()
logger.info('Connected to DB')

if len(jobs):
    ct.start()
    while (test_active(cursor) and not test_paused(cursor)):
        try:
            print_summary(cursor)
        except:
            logger.warning('Bug during print_summary')
            logger.warning(sys.exc_info()[0])
            logger.warning(sys.exc_info()[1])
        for job in jobs:
Exemple #23
0
class TaskRegistry(Registry):
    def __init__(self,
                 name,
                 doc,
                 dirty_flush_counter=10,
                 update_index_time=30):

        super(TaskRegistry, self).__init__(name,
                                           doc,
                                           dirty_flush_counter=10,
                                           update_index_time=30)

        self._main_thread = None

    def getProxy(self):
        this_slice = TaskRegistrySlice(self.name)
        this_slice.objects = self
        return TaskRegistrySliceProxy(this_slice)

    def getIndexCache(self, obj):
        if obj.getNodeData() is None:
            raise Exception("Currently don't support Index Caching")
        cached_values = ['status', 'id', 'name']
        c = {}
        for cv in cached_values:
            if cv in obj.getNodeData():
                c[cv] = obj.getNodeAttribute(cv)
        this_slice = TaskRegistrySlice("tmp")
        for dpv in this_slice._display_columns:
            c["display:" + dpv] = this_slice._get_display_value(obj, dpv)
        return c

    def _thread_main(self):
        """ This is an internal function; the main loop of the background thread """
        # Add runtime handlers for all the taskified applications, since now
        # all the backends are loaded
        from Ganga.GPIDev.Adapters.ApplicationRuntimeHandlers import allHandlers
        from .TaskApplication import handler_map
        for basename, name in handler_map:
            for backend in allHandlers.getAllBackends(basename):
                allHandlers.add(name, backend,
                                allHandlers.get(basename, backend))

        from Ganga.Core.GangaRepository import getRegistry
        while getRegistry("jobs").hasStarted() is not True:
            time.sleep(0.1)
            if self._main_thread is None or self._main_thread.should_stop():
                return

        while True:
            from Ganga.Core import monitoring_component
            if (not monitoring_component is None
                    and monitoring_component.enabled
                ) or config['ForceTaskMonitoring']:
                break
            time.sleep(0.1)
            if self._main_thread is None or self._main_thread.should_stop():
                return

        # setup the tasks - THIS IS INCOMPATIBLE WITH CONCURRENCY
        # and must go away soon
        for tid in self.ids():
            try:
                self[tid]._getWriteAccess()
                self[tid].startup()
            except RegistryError:
                continue
            except Exception as err:
                logger.error(
                    "Unknown/Unexpected Error in starting up tasks main loop")
                logger.error("Exiting: err=%s" % str(err))
                return

        logger.debug("Entering main loop")

        # Main loop
        while self._main_thread is not None and not self._main_thread.should_stop(
        ):

            # For each task try to run it
            if config['ForceTaskMonitoring'] or monitoring_component.enabled:
                for tid in self.ids():

                    logger.debug("Running over tid: %s" % str(tid))

                    try:
                        from Ganga.GPIDev.Lib.Tasks import ITask
                        if isType(self[tid], ITask):
                            # for new ITasks, always need write access
                            self[tid]._getWriteAccess()
                            p = self[tid]
                        else:
                            if self[tid].status in [
                                    "running", "running/pause"
                            ]:
                                self[tid]._getWriteAccess()
                                p = self[tid]
                            elif self[tid].status is 'completed' and (
                                    self[tid].n_status('ready')
                                    or self[tid].n_status('running')):
                                self[tid].updateStatus()
                                continue
                            else:
                                continue
                    except RegistryError:
                        # could not acquire lock
                        continue

                    if self._main_thread.should_stop():
                        break

                    try:
                        from Ganga.GPIDev.Lib.Tasks import ITask
                        if isType(self[tid], ITask):
                            # for new ITasks, always call update()
                            p.update()
                        else:
                            # TODO: Make this user-configurable and add better
                            # error message
                            if (p.n_status("failed") * 100.0 /
                                (20 + p.n_status("completed")) > 20):
                                p.pause()
                                logger.error(
                                    "Task %s paused - %i jobs have failed while only %i jobs have completed successfully."
                                    % (p.name, p.n_status("failed"),
                                       p.n_status("completed")))
                                logger.error(
                                    "Please investigate the cause of the failing jobs and then remove the previously failed jobs using job.remove()"
                                )
                                logger.error(
                                    "You can then continue to run this task with tasks(%i).run()"
                                    % p.id)
                                continue
                            numjobs = p.submitJobs()
                            if numjobs > 0:
                                self._flush([p])
                            # finalise any required transforms
                            p.finaliseTransforms()
                            p.updateStatus()

                    except Exception as x:
                        logger.error(
                            "Exception occurred in task monitoring loop: %s %s\nThe offending task was paused."
                            % (x.__class__, x))
                        type_, value_, traceback_ = sys.exc_info()
                        logger.error("Full traceback:\n %s" % ' '.join(
                            traceback.format_exception(type_, value_,
                                                       traceback_)))
                        p.pause()

                    if self._main_thread.should_stop():
                        break
                if self._main_thread.should_stop():
                    break

            logger.debug("TaskRegistry Sleeping for: %s seconds" %
                         str(config['TaskLoopFrequency']))

            # Sleep interruptible for 10 seconds
            for i in range(0, int(config['TaskLoopFrequency'] * 100)):
                if self._main_thread.should_stop():
                    break
                time.sleep(0.01)

    def startup(self):
        """ Start a background thread that periodically run()s"""
        super(TaskRegistry, self).startup()
        from Ganga.Core.GangaThread import GangaThread
        self._main_thread = GangaThread(name="GangaTasks",
                                        target=self._thread_main)
        self._main_thread.start()

    def shutdown(self):
        super(TaskRegistry, self).shutdown()

    def stop(self):
        if self._main_thread is not None:
            self._main_thread.stop()
Exemple #24
0
 def __init__(self, name):
     GangaThread.__init__(self, name=name)
Exemple #25
0
	def start(self):
		if Config.getConfig('LGI')['Enable'] is False:
			self.log.debug('Not starting LGI pilot thread because [LGI]Enable is False')
			return False
		return GangaThread.start(self)
Exemple #26
0
class TaskRegistry(Registry):

    def __init__(self, name, doc, dirty_flush_counter=10, update_index_time=30):

        super(TaskRegistry, self).__init__( name, doc, dirty_flush_counter=10, update_index_time=30 )

        self._main_thread = None

    def getProxy(self):
        this_slice = TaskRegistrySlice(self.name)
        this_slice.objects = self
        return TaskRegistrySliceProxy(this_slice)

    def getIndexCache(self, obj):
        if obj.getNodeData() is None:
            raise Exception("Currently don't support Index Caching")
        cached_values = ['status', 'id', 'name']
        c = {}
        for cv in cached_values:
            if cv in obj.getNodeData():
                c[cv] = obj.getNodeAttribute(cv)
        this_slice = TaskRegistrySlice("tmp")
        for dpv in this_slice._display_columns:
            c["display:" + dpv] = this_slice._get_display_value(obj, dpv)
        return c

    def _thread_main(self):
        """ This is an internal function; the main loop of the background thread """
        # Add runtime handlers for all the taskified applications, since now
        # all the backends are loaded
        from Ganga.GPIDev.Adapters.ApplicationRuntimeHandlers import allHandlers
        from .TaskApplication import handler_map
        for basename, name in handler_map:
            for backend in allHandlers.getAllBackends(basename):
                allHandlers.add(
                    name, backend, allHandlers.get(basename, backend))

        from Ganga.Core.GangaRepository import getRegistry
        while getRegistry("jobs").hasStarted() is not True:
            time.sleep(0.1)
            if self._main_thread is None or self._main_thread.should_stop():
                return

        while True:
            from Ganga.Core import monitoring_component
            if (not monitoring_component is None and monitoring_component.enabled) or config['ForceTaskMonitoring']:
                break
            time.sleep(0.1)
            if self._main_thread is None or self._main_thread.should_stop():
                return

        # setup the tasks - THIS IS INCOMPATIBLE WITH CONCURRENCY
        # and must go away soon
        for tid in self.ids():
            try:
                self[tid]._getWriteAccess()
                self[tid].startup()
            except RegistryError:
                continue
            except Exception as err:
                logger.error("Unknown/Unexpected Error in starting up tasks main loop")
                logger.error("Exiting: err=%s" % str(err))
                return

        logger.debug("Entering main loop")

        # Main loop
        while self._main_thread is not None and not self._main_thread.should_stop():

            # For each task try to run it
            if config['ForceTaskMonitoring'] or monitoring_component.enabled:
                for tid in self.ids():

                    logger.debug("Running over tid: %s" % str(tid))

                    try:
                        from Ganga.GPIDev.Lib.Tasks import ITask
                        if isType(self[tid], ITask):
                            # for new ITasks, always need write access
                            self[tid]._getWriteAccess()
                            p = self[tid]
                        else:
                            if self[tid].status in ["running", "running/pause"]:
                                self[tid]._getWriteAccess()
                                p = self[tid]
                            elif self[tid].status is 'completed' and (self[tid].n_status('ready') or self[tid].n_status('running')):
                                self[tid].updateStatus()
                                continue
                            else:
                                continue
                    except RegistryError:
                        # could not acquire lock
                        continue

                    if self._main_thread.should_stop():
                        break

                    try:
                        from Ganga.GPIDev.Lib.Tasks import ITask
                        if isType(self[tid], ITask):
                            # for new ITasks, always call update()
                            p.update()
                        else:
                            # TODO: Make this user-configurable and add better
                            # error message
                            if (p.n_status("failed") * 100.0 / (20 + p.n_status("completed")) > 20):
                                p.pause()
                                logger.error("Task %s paused - %i jobs have failed while only %i jobs have completed successfully." % (
                                    p.name, p.n_status("failed"), p.n_status("completed")))
                                logger.error(
                                    "Please investigate the cause of the failing jobs and then remove the previously failed jobs using job.remove()")
                                logger.error(
                                    "You can then continue to run this task with tasks(%i).run()" % p.id)
                                continue
                            numjobs = p.submitJobs()
                            if numjobs > 0:
                                self._flush([p])
                            # finalise any required transforms
                            p.finaliseTransforms()
                            p.updateStatus()

                    except Exception as x:
                        logger.error(
                            "Exception occurred in task monitoring loop: %s %s\nThe offending task was paused." % (x.__class__, x))
                        type_, value_, traceback_ = sys.exc_info()
                        logger.error("Full traceback:\n %s" % ' '.join(
                            traceback.format_exception(type_, value_, traceback_)))
                        p.pause()

                    if self._main_thread.should_stop():
                        break
                if self._main_thread.should_stop():
                    break

            logger.debug("TaskRegistry Sleeping for: %s seconds" % str(config['TaskLoopFrequency']))

            # Sleep interruptible for 10 seconds
            for i in range(0, int(config['TaskLoopFrequency'] * 100)):
                if self._main_thread.should_stop():
                    break
                time.sleep(0.01)

    def startup(self):
        """ Start a background thread that periodically run()s"""
        super(TaskRegistry, self).startup()
        from Ganga.Core.GangaThread import GangaThread
        self._main_thread = GangaThread(name="GangaTasks", target=self._thread_main)
        self._main_thread.start()

    def shutdown(self):
        super(TaskRegistry, self).shutdown()

    def stop(self):
        if self._main_thread is not None:
            self._main_thread.stop()
 def __init__(self, name):
     GangaThread.__init__(self, name)
     self._currently_running_command = False
     self._running_cmd = None
     self._thread_name = name
Exemple #28
0
 def __init__(self):
     GangaThread.__init__(self, "LGI_Stats")
     self.log = getLogger("LGI.Stats.Thread")
Exemple #29
0
class TaskRegistry(Registry):

    def __init__(self, name, doc, dirty_flush_counter=10, update_index_time=30):

        super(TaskRegistry, self).__init__( name, doc, dirty_flush_counter=dirty_flush_counter, update_index_time=update_index_time )

        self._main_thread = None

        self.stored_slice = TaskRegistrySlice(self.name)
        self.stored_slice.objects = self
        self.stored_proxy = TaskRegistrySliceProxy(self.stored_slice)

    def getSlice(self):
        return self.stored_slice

    def getProxy(self):
        return self.stored_proxy

    def getIndexCache(self, obj):
        if obj._data is None:
            raise Exception("Currently don't support Index Caching")
        cached_values = ['status', 'id', 'name']
        c = {}
        for cv in cached_values:
            if cv in obj._data:
                c[cv] = getattr(obj, cv)
        this_slice = TaskRegistrySlice("tmp")
        for dpv in this_slice._display_columns:
            c["display:" + dpv] = this_slice._get_display_value(obj, dpv)
        return c

    def _thread_main(self):
        """ This is an internal function; the main loop of the background thread """
        from Ganga.Core.GangaRepository import getRegistry
        while getRegistry("jobs").hasStarted() is not True:
            time.sleep(0.1)
            if self._main_thread is None or self._main_thread.should_stop():
                return

        while True:
            from Ganga.Core import monitoring_component
            if (not monitoring_component is None and monitoring_component.enabled) or config['ForceTaskMonitoring']:
                break
            time.sleep(0.1)
            if self._main_thread is None or self._main_thread.should_stop():
                return

        # setup the tasks - THIS IS INCOMPATIBLE WITH CONCURRENCY
        # and must go away soon
        for tid in self.ids():
            try:
                self[tid].startup()
            except Exception as err:
                logger.error("Unknown/Unexpected Error in starting up tasks main loop")
                logger.error("Exiting: err=%s" % str(err))
                return

        logger.debug("Entering main loop")

        # Main loop
        while self._main_thread is not None and not self._main_thread.should_stop():

            # If monitoring is enabled (or forced for Tasks) loop over each one and update
            if (config['ForceTaskMonitoring'] or monitoring_component.enabled) and not config['disableTaskMon']:
                for tid in self.ids():

                    logger.debug("Running over tid: %s" % str(tid))

                    try:
                        p = self[tid]
                        p.update()

                    except Exception as x:
                        logger.error(
                            "Exception occurred in task monitoring loop: %s %s\nThe offending task was paused." % (x.__class__, x))
                        type_, value_, traceback_ = sys.exc_info()
                        logger.error("Full traceback:\n %s" % ' '.join(
                            traceback.format_exception(type_, value_, traceback_)))
                        p.pause()

                    if self._main_thread.should_stop():
                        break

                if self._main_thread.should_stop():
                    break

            logger.debug("TaskRegistry Sleeping for: %s seconds" % str(config['TaskLoopFrequency']))

            # Sleep interruptible for 10 seconds
            for i in range(0, int(config['TaskLoopFrequency'] * 100)):
                if self._main_thread.should_stop():
                    break
                time.sleep(0.01)

    def startup(self):
        """ Start a background thread that periodically run()s"""
        super(TaskRegistry, self).startup()
        from Ganga.Core.GangaThread import GangaThread
        self._main_thread = GangaThread(name="GangaTasks", target=self._thread_main)
        self._main_thread.start()

        # create a registry flusher
        self.flush_thread = RegistryFlusher(self)
        self.flush_thread.start()

    def shutdown(self):
        self.flush_thread.join()
        super(TaskRegistry, self).shutdown()

    def stop(self):
        if self._main_thread is not None:
            self._main_thread.stop()
            self._main_thread.join()
Exemple #30
0
  logger.info('HC Plot Summarize Thread: Disconnected.')

def hc_copy_thread():
  test_sleep(60)
  logger.info('HC Copy Thread: Connected to DB')
  while (test_active() and not test_paused() and not ct.should_stop()):
    logger.debug('HC Copy Thread: TOP OF MAIN LOOP')
    for job in jobs:
      if test_paused() or ct.should_stop():
        break
      copyJob(job)
    test_sleep(30)

  logger.info('HC Copy Thread: Disconnected from DB')

ct = GangaThread(name="HCCopyThread", target=hc_copy_thread)
pt = GangaThread(name="HCPlotSummary", target=hc_plot_summarize)

logger.info('Connected to DB')

if len(jobs):
  ct.start()
  pt.start()

  while (test_active() and not test_paused()):

    #We need to refresh the test object
    test = Test.objects.get(pk=testid)

    try:
      print_summary()