def startup(self): """ Start a background thread that periodically run()s""" super(TaskRegistry, self).startup() from Ganga.Core.GangaThread import GangaThread self._main_thread = GangaThread(name="GangaTasks", target=self._thread_main) self._main_thread.start()
def __resolve_containers(self, containers, nthreads=10): '''resolving dataset containers''' datasets = {} wq = Queue(len(containers)) for ds in containers: wq.put(ds) mylock = Lock() def worker(id): dq2 = DQ2() while not wq.empty(): try: ds = wq.get(block=True, timeout=1) logger.debug('worker id: %d on dataset container: %s' % (id, ds)) datasets[ds] = [] ds_tmp = dq2.listDatasetsInContainer(ds) mylock.acquire() datasets[ds] = ds_tmp mylock.release() except DQException as err: logger.warning(str(err)) except Empty: pass profiler = ElapsedTimeProfiler(logger=logger) profiler.start() threads = [] for i in range(nthreads): t = GangaThread(name='stager_ds_w_%d' % i, target=worker, kwargs={'id': i}) # t.setDaemon(False) threads.append(t) for t in threads: t.start() for t in threads: t.join() profiler.check('resolving %d containers' % len(containers)) return datasets
def __init_worker_threads(self, num_worker_threads, worker_thread_prefix): if len(self.__worker_threads) > 0: logger.warning("Threads already started!") for i in self.__worker_threads: logger.info("Worker Thread: %s is already running!" % i.gangaName) return for i in range(num_worker_threads): t = GangaThread(name=worker_thread_prefix + str(i), auto_register=False, target=self.__worker_thread) t._Thread__args = (t, ) t._name = worker_thread_prefix + str(i) t._command = 'idle' t._timeout = 'N/A' t.start() self.__worker_threads.append(t)
def get_complete_files_replicas(self, nthread=10, diskOnly=True): '''Gets a comprehensive dataset information about the contents and the location of COMPLETE replicas''' if not self.complete_files_replicas: re_tapeSite = re.compile('.*TAPE$') ds_info = {} self.__expand_datasets() wq = Queue(len(self.dataset)) for ds in self.dataset: wq.put(ds) mylock = Lock() def worker(id): dq2 = DQ2() while not wq.empty(): try: ds = wq.get(block=True, timeout=1) logger.debug('worker id: %d on dataset: %s' % (id, ds)) # get contents (guids) of the complete dataset contents = dq2.listFilesInDataset(ds) # get locations of the complete dataset replicas locations = dq2.listDatasetReplicas(ds, complete=1) vuid = None try: vuid = locations.keys()[0] except IndexError as err: pass mylock.acquire() # updating ds_info hastable if vuid: ds_info[ds] = [] ds_sites = [] if diskOnly: for site in locations[vuid][1]: if not re_tapeSite.match(site): ds_sites.append(site) else: ds_sites = locations[vuid][1] ds_info[ds] += [contents[0], ds_sites] else: logger.warning('dataset not available: %s' % ds) mylock.release() except DQException as err: logger.warning(str(err)) except Empty: pass # prepare and run the query threads profiler = ElapsedTimeProfiler(logger=logger) profiler.start() threads = [] for i in range(nthread): t = GangaThread(name='stager_ds_w_%d' % i, target=worker, kwargs={'id': i}) # t.setDaemon(False) threads.append(t) for t in threads: t.start() for t in threads: t.join() self.complete_files_replicas = ds_info profiler.check('information collected: %d datasets' % (len(self.complete_files_replicas.keys()))) else: logger.debug('using cached complete_files_replicas') pass return self.complete_files_replicas
def resolve_file_locations(dataset, sites=None, cloud=None, token='ATLASDATADISK', debug=False): ''' Summarize the locations of files (in terms of sitename) of a dataset. If the sites argument is given, ignoring cloud and token arguments; otherwise using cloud and toke to retrieve sites from TiersOfATLAS. ''' if not sites: logger.debug('resolving sites with token: %s' % token) sites = dm_util.get_srmv2_sites(cloud, token=token, debug=debug) logger.debug('checking replicas at sites: %s' % str(sites)) replicas = {} # preparing the queue for querying lfn wq = Queue(len(sites)) for site in sites: wq.put(site) mylock = Lock() def worker(id): dq2 = DQ2() while not wq.empty(): try: site = wq.get(block=True, timeout=1) replicaInfo = dq2.listFileReplicas(site, dataset) logger.debug('resolving dataset files at %s, no files: %d' % (site, len(replicaInfo[0]['content']))) if replicaInfo: mylock.acquire() for guid in replicaInfo[0]['content']: if guid not in replicas: replicas[guid] = [] replicas[guid].append(site) mylock.release() except Empty: pass except DQException as err: logger.warning(str(err)) logger.warning('site %s excluded' % site) pass threads = [] nthread = len(sites) if nthread > 10: nthread = 10 for i in range(nthread): t = GangaThread(name='stager_ds_w_%d' % i, target=worker, kwargs={'id': i}) # t.setDaemon(False) threads.append(t) for t in threads: t.start() for t in threads: t.join() return replicas