Ejemplo n.º 1
0
def resolve_file_locations(dataset, sites=None, cloud=None, token='ATLASDATADISK', debug=False):
    '''
    Summarize the locations of files (in terms of sitename) of a dataset.
    If the sites argument is given, ignoring cloud and token arguments;
    otherwise using cloud and toke to retrieve sites from TiersOfATLAS.
    '''

    if not sites:
        logger.debug('resolving sites with token: %s' % token)
        sites = dm_util.get_srmv2_sites(cloud, token=token, debug=debug)

    logger.debug('checking replicas at sites: %s' % str(sites))

    replicas = {}
    # preparing the queue for querying lfn 
    wq = Queue(len(sites))
    for site in sites:
        wq.put(site)

    mylock = Lock()

    def worker(id):
        dq2 = DQ2()
        while not wq.empty():
            try:
                site = wq.get(block=True, timeout=1)
                replicaInfo = dq2.listFileReplicas(site, dataset)
                logger.debug('resolving dataset files at %s, no files: %d' % (site,len(replicaInfo[0]['content'])) )
                if replicaInfo:
                    mylock.acquire()
                    for guid in replicaInfo[0]['content']:
                        if guid not in replicas:
                            replicas[guid] = []
                        replicas[guid].append(site)
                    mylock.release()
            except Empty:
                pass
            except DQException as err:
                logger.warning(str(err))
                logger.warning('site %s excluded' % site)
                pass

    threads = []
    nthread = len(sites)
    if nthread > 10: nthread = 10

    for i in range(nthread):
        t = GangaThread(name='stager_ds_w_%d' % i, target=worker, kwargs={'id': i})
#        t.setDaemon(False)
        threads.append(t)

    for t in threads:
        t.start()

    for t in threads:
        t.join()

    return replicas
Ejemplo n.º 2
0
    def __resolve_containers(self, containers, nthreads=10):
        '''resolving dataset containers'''

        datasets = {}

        wq = Queue(len(containers))
        for ds in containers:
            wq.put(ds)

        mylock = Lock()

        def worker(id):
            dq2 = DQ2()
            while not wq.empty():
                try:
                    ds = wq.get(block=True, timeout=1)
                    logger.debug('worker id: %d on dataset container: %s' %
                                 (id, ds))

                    datasets[ds] = []

                    ds_tmp = dq2.listDatasetsInContainer(ds)

                    mylock.acquire()
                    datasets[ds] = ds_tmp
                    mylock.release()
                except DQException as err:
                    logger.warning(str(err))
                except Empty:
                    pass

        profiler = ElapsedTimeProfiler(logger=logger)
        profiler.start()
        threads = []
        for i in range(nthreads):
            t = GangaThread(name='stager_ds_w_%d' % i,
                            target=worker,
                            kwargs={'id': i})
            #            t.setDaemon(False)
            threads.append(t)

        for t in threads:
            t.start()

        for t in threads:
            t.join()
        profiler.check('resolving %d containers' % len(containers))

        return datasets
Ejemplo n.º 3
0
    def __resolve_containers(self, containers, nthreads=10):
        '''resolving dataset containers'''

        datasets = {} 
        
        wq = Queue(len(containers))
        for ds in containers:
            wq.put(ds)

        mylock = Lock()
        def worker(id):
            dq2 = DQ2()
            while not wq.empty():
                try:
                    ds = wq.get(block=True, timeout=1)
                    logger.debug('worker id: %d on dataset container: %s' % (id, ds))
       
                    datasets[ds] = []
 
                    ds_tmp = dq2.listDatasetsInContainer(ds)

                    mylock.acquire()
                    datasets[ds] = ds_tmp
                    mylock.release()
                except DQException as err:
                    logger.warning(str(err))
                except Empty:
                    pass

        profiler = ElapsedTimeProfiler(logger=logger)
        profiler.start()
        threads = []
        for i in range(nthreads):
            t = GangaThread(name='stager_ds_w_%d' % i, target=worker, kwargs={'id': i})
#            t.setDaemon(False)
            threads.append(t)
        
        for t in threads:
            t.start()
        
        for t in threads:
            t.join()
        profiler.check('resolving %d containers' % len(containers))

        return datasets
Ejemplo n.º 4
0
class TaskRegistry(Registry):

    def __init__(self, name, doc, dirty_flush_counter=10, update_index_time=30):

        super(TaskRegistry, self).__init__( name, doc, dirty_flush_counter=dirty_flush_counter, update_index_time=update_index_time )

        self._main_thread = None

        self.stored_slice = TaskRegistrySlice(self.name)
        self.stored_slice.objects = self
        self.stored_proxy = TaskRegistrySliceProxy(self.stored_slice)

    def getSlice(self):
        return self.stored_slice

    def getProxy(self):
        return self.stored_proxy

    def getIndexCache(self, obj):
        if obj._data is None:
            raise Exception("Currently don't support Index Caching")
        cached_values = ['status', 'id', 'name']
        c = {}
        for cv in cached_values:
            if cv in obj._data:
                c[cv] = getattr(obj, cv)
        this_slice = TaskRegistrySlice("tmp")
        for dpv in this_slice._display_columns:
            c["display:" + dpv] = this_slice._get_display_value(obj, dpv)
        return c

    def _thread_main(self):
        """ This is an internal function; the main loop of the background thread """
        from Ganga.Core.GangaRepository import getRegistry
        while getRegistry("jobs").hasStarted() is not True:
            time.sleep(0.1)
            if self._main_thread is None or self._main_thread.should_stop():
                return

        while True:
            from Ganga.Core import monitoring_component
            if (not monitoring_component is None and monitoring_component.enabled) or config['ForceTaskMonitoring']:
                break
            time.sleep(0.1)
            if self._main_thread is None or self._main_thread.should_stop():
                return

        # setup the tasks - THIS IS INCOMPATIBLE WITH CONCURRENCY
        # and must go away soon
        for tid in self.ids():
            try:
                self[tid].startup()
            except Exception as err:
                logger.error("Unknown/Unexpected Error in starting up tasks main loop")
                logger.error("Exiting: err=%s" % str(err))
                return

        logger.debug("Entering main loop")

        # Main loop
        while self._main_thread is not None and not self._main_thread.should_stop():

            # If monitoring is enabled (or forced for Tasks) loop over each one and update
            if (config['ForceTaskMonitoring'] or monitoring_component.enabled) and not config['disableTaskMon']:
                for tid in self.ids():

                    logger.debug("Running over tid: %s" % str(tid))

                    try:
                        p = self[tid]
                        p.update()

                    except Exception as x:
                        logger.error(
                            "Exception occurred in task monitoring loop: %s %s\nThe offending task was paused." % (x.__class__, x))
                        type_, value_, traceback_ = sys.exc_info()
                        logger.error("Full traceback:\n %s" % ' '.join(
                            traceback.format_exception(type_, value_, traceback_)))
                        p.pause()

                    if self._main_thread.should_stop():
                        break

                if self._main_thread.should_stop():
                    break

            logger.debug("TaskRegistry Sleeping for: %s seconds" % str(config['TaskLoopFrequency']))

            # Sleep interruptible for 10 seconds
            for i in range(0, int(config['TaskLoopFrequency'] * 100)):
                if self._main_thread.should_stop():
                    break
                time.sleep(0.01)

    def startup(self):
        """ Start a background thread that periodically run()s"""
        super(TaskRegistry, self).startup()
        from Ganga.Core.GangaThread import GangaThread
        self._main_thread = GangaThread(name="GangaTasks", target=self._thread_main)
        self._main_thread.start()

        # create a registry flusher
        self.flush_thread = RegistryFlusher(self)
        self.flush_thread.start()

    def shutdown(self):
        self.flush_thread.join()
        super(TaskRegistry, self).shutdown()

    def stop(self):
        if self._main_thread is not None:
            self._main_thread.stop()
            self._main_thread.join()
Ejemplo n.º 5
0
class TaskRegistry(Registry):
    def __init__(self, name, doc):

        super(TaskRegistry, self).__init__(name, doc)

        self._main_thread = None

        self.stored_slice = TaskRegistrySlice(self.name)
        self.stored_slice.objects = self
        self.stored_proxy = TaskRegistrySliceProxy(self.stored_slice)

    def getSlice(self):
        return self.stored_slice

    def getProxy(self):
        return self.stored_proxy

    def getIndexCache(self, obj):
        cached_values = ['status', 'id', 'name']
        c = {}
        for cv in cached_values:
            if hasattr(obj, cv):
                c[cv] = getattr(obj, cv)
        this_slice = TaskRegistrySlice("tmp")
        for dpv in this_slice._display_columns:
            c["display:" + dpv] = this_slice._get_display_value(obj, dpv)
        return c

    def _thread_main(self):
        """ This is an internal function; the main loop of the background thread """
        from Ganga.Core.GangaRepository import getRegistry
        while getRegistry("jobs").hasStarted() is not True:
            time.sleep(0.1)
            if self._main_thread is None or self._main_thread.should_stop():
                return

        while True:
            from Ganga.Core import monitoring_component
            if (not monitoring_component is None
                    and monitoring_component.enabled
                ) or config['ForceTaskMonitoring']:
                break
            time.sleep(0.1)
            if self._main_thread is None or self._main_thread.should_stop():
                return

        # setup the tasks - THIS IS INCOMPATIBLE WITH CONCURRENCY
        # and must go away soon
        for tid in self.ids():
            try:
                self[tid].startup()
            except Exception as err:
                logger.error(
                    "Unknown/Unexpected Error in starting up tasks main loop")
                logger.error("Exiting: err=%s" % str(err))
                return

        logger.debug("Entering main loop")

        # Main loop
        while self._main_thread is not None and not self._main_thread.should_stop(
        ):

            # If monitoring is enabled (or forced for Tasks) loop over each one and update
            if (config['ForceTaskMonitoring'] or monitoring_component.enabled
                ) and not config['disableTaskMon']:
                for tid in self.ids():

                    logger.debug("Running over tid: %s" % str(tid))

                    try:
                        p = self[tid]
                        p.update()

                    except Exception as x:
                        logger.error(
                            "Exception occurred in task monitoring loop: %s %s\nThe offending task was paused."
                            % (x.__class__, x))
                        type_, value_, traceback_ = sys.exc_info()
                        logger.error("Full traceback:\n %s" % ' '.join(
                            traceback.format_exception(type_, value_,
                                                       traceback_)))
                        p.pause()

                    if self._main_thread.should_stop():
                        break

                if self._main_thread.should_stop():
                    break

            logger.debug("TaskRegistry Sleeping for: %s seconds" %
                         str(config['TaskLoopFrequency']))

            # Sleep interruptible for 10 seconds
            for i in range(0, int(config['TaskLoopFrequency'] * 100)):
                if self._main_thread.should_stop():
                    break
                time.sleep(0.01)

    def startup(self):
        """ Start a background thread that periodically run()s"""
        super(TaskRegistry, self).startup()
        from Ganga.Core.GangaThread import GangaThread
        self._main_thread = GangaThread(name="GangaTasks",
                                        target=self._thread_main)
        self._main_thread.start()

        # create a registry flusher
        self.flush_thread = RegistryFlusher(self, 'TaskRegistryFlusher')
        self.flush_thread.start()

    def shutdown(self):
        self.flush_thread.join()
        super(TaskRegistry, self).shutdown()

    def stop(self):
        if self._main_thread is not None:
            self._main_thread.stop()
            self._main_thread.join()
Ejemplo n.º 6
0
    def get_complete_files_replicas(self, nthread=10, diskOnly=True):
        '''Gets a comprehensive dataset information about the contents and the
           location of COMPLETE replicas'''

        if not self.complete_files_replicas:

            re_tapeSite = re.compile('.*TAPE$') 


            ds_info = {}
            self.__expand_datasets()
         
            wq = Queue(len(self.dataset))
            for ds in self.dataset:
                wq.put(ds)
         
            mylock = Lock()
            def worker(id):
         
                dq2 = DQ2()
                while not wq.empty():
                    try:
         
                        ds = wq.get(block=True, timeout=1)
                        logger.debug('worker id: %d on dataset: %s' % (id, ds))
         
                        # get contents (guids) of the complete dataset
                        contents = dq2.listFilesInDataset(ds)
         
                        # get locations of the complete dataset replicas
                        locations = dq2.listDatasetReplicas(ds,complete=1)
         
                        vuid = None
                        try:
                            vuid = locations.keys()[0]
                        except IndexError as err:
                            pass
         
                        mylock.acquire()
         
                        # updating ds_info hastable
                        if vuid:
                            ds_info[ds] = []
                            ds_sites = []
 
                            if diskOnly:
                                for site in locations[vuid][1]:
                                    if not re_tapeSite.match(site):
                                        ds_sites.append(site)
                            else:
                                ds_sites = locations[vuid][1]
 
                            ds_info[ds] += [ contents[0], ds_sites ]
                        else:
                            logger.warning('dataset not available: %s' % ds)
         
                        mylock.release()
         
                    except DQException as err:
                        logger.warning(str(err))
         
                    except Empty:
                        pass
         
            # prepare and run the query threads
            profiler = ElapsedTimeProfiler(logger=logger)
            profiler.start()
            threads = []
            for i in range(nthread):
                t = GangaThread(name='stager_ds_w_%d' % i, target=worker, kwargs={'id': i})
#                t.setDaemon(False)
                threads.append(t)
         
            for t in threads:
                t.start()
         
            for t in threads:
                t.join()

            self.complete_files_replicas = ds_info

            profiler.check( 'information collected: %d datasets' % ( len(self.complete_files_replicas.keys()) ) )
        else:
            logger.debug('using cached complete_files_replicas')
            pass
 
        return self.complete_files_replicas 
Ejemplo n.º 7
0
    def get_complete_files_replicas(self, nthread=10, diskOnly=True):
        '''Gets a comprehensive dataset information about the contents and the
           location of COMPLETE replicas'''

        if not self.complete_files_replicas:

            re_tapeSite = re.compile('.*TAPE$')

            ds_info = {}
            self.__expand_datasets()

            wq = Queue(len(self.dataset))
            for ds in self.dataset:
                wq.put(ds)

            mylock = Lock()

            def worker(id):

                dq2 = DQ2()
                while not wq.empty():
                    try:

                        ds = wq.get(block=True, timeout=1)
                        logger.debug('worker id: %d on dataset: %s' % (id, ds))

                        # get contents (guids) of the complete dataset
                        contents = dq2.listFilesInDataset(ds)

                        # get locations of the complete dataset replicas
                        locations = dq2.listDatasetReplicas(ds, complete=1)

                        vuid = None
                        try:
                            vuid = locations.keys()[0]
                        except IndexError as err:
                            pass

                        mylock.acquire()

                        # updating ds_info hastable
                        if vuid:
                            ds_info[ds] = []
                            ds_sites = []

                            if diskOnly:
                                for site in locations[vuid][1]:
                                    if not re_tapeSite.match(site):
                                        ds_sites.append(site)
                            else:
                                ds_sites = locations[vuid][1]

                            ds_info[ds] += [contents[0], ds_sites]
                        else:
                            logger.warning('dataset not available: %s' % ds)

                        mylock.release()

                    except DQException as err:
                        logger.warning(str(err))

                    except Empty:
                        pass

            # prepare and run the query threads
            profiler = ElapsedTimeProfiler(logger=logger)
            profiler.start()
            threads = []
            for i in range(nthread):
                t = GangaThread(name='stager_ds_w_%d' % i,
                                target=worker,
                                kwargs={'id': i})
                #                t.setDaemon(False)
                threads.append(t)

            for t in threads:
                t.start()

            for t in threads:
                t.join()

            self.complete_files_replicas = ds_info

            profiler.check('information collected: %d datasets' %
                           (len(self.complete_files_replicas.keys())))
        else:
            logger.debug('using cached complete_files_replicas')
            pass

        return self.complete_files_replicas
Ejemplo n.º 8
0
def resolve_file_locations(dataset,
                           sites=None,
                           cloud=None,
                           token='ATLASDATADISK',
                           debug=False):
    '''
    Summarize the locations of files (in terms of sitename) of a dataset.
    If the sites argument is given, ignoring cloud and token arguments;
    otherwise using cloud and toke to retrieve sites from TiersOfATLAS.
    '''

    if not sites:
        logger.debug('resolving sites with token: %s' % token)
        sites = dm_util.get_srmv2_sites(cloud, token=token, debug=debug)

    logger.debug('checking replicas at sites: %s' % str(sites))

    replicas = {}
    # preparing the queue for querying lfn
    wq = Queue(len(sites))
    for site in sites:
        wq.put(site)

    mylock = Lock()

    def worker(id):
        dq2 = DQ2()
        while not wq.empty():
            try:
                site = wq.get(block=True, timeout=1)
                replicaInfo = dq2.listFileReplicas(site, dataset)
                logger.debug('resolving dataset files at %s, no files: %d' %
                             (site, len(replicaInfo[0]['content'])))
                if replicaInfo:
                    mylock.acquire()
                    for guid in replicaInfo[0]['content']:
                        if guid not in replicas:
                            replicas[guid] = []
                        replicas[guid].append(site)
                    mylock.release()
            except Empty:
                pass
            except DQException as err:
                logger.warning(str(err))
                logger.warning('site %s excluded' % site)
                pass

    threads = []
    nthread = len(sites)
    if nthread > 10: nthread = 10

    for i in range(nthread):
        t = GangaThread(name='stager_ds_w_%d' % i,
                        target=worker,
                        kwargs={'id': i})
        #        t.setDaemon(False)
        threads.append(t)

    for t in threads:
        t.start()

    for t in threads:
        t.join()

    return replicas