def new_funct(*args, **kwargs): if isgeneratorfunction(function): raise RucioException( 'read_session decorator should not be used with generator. Use stream_session instead.' ) if not kwargs.get('session'): session = get_session() try: kwargs['session'] = session return function(*args, **kwargs) except TimeoutError as error: session.rollback() # pylint: disable=maybe-no-member raise DatabaseException(str(error)) except DatabaseError as error: session.rollback() # pylint: disable=maybe-no-member raise DatabaseException(str(error)) except: session.rollback() # pylint: disable=maybe-no-member raise finally: session.remove() try: return function(*args, **kwargs) except Exception: raise
def new_funct(*args, **kwargs): if not isgeneratorfunction(function): raise RucioException( 'stream_session decorator should be used only with generator. Use read_session instead.' ) if not kwargs.get('session'): session = get_session() try: kwargs['session'] = session for row in function(*args, **kwargs): yield row except TimeoutError as error: session.rollback() # pylint: disable=maybe-no-member raise DatabaseException(str(error)) except DatabaseError as error: session.rollback() # pylint: disable=maybe-no-member raise DatabaseException(str(error)) except: session.rollback() # pylint: disable=maybe-no-member raise finally: session.remove() else: try: for row in function(*args, **kwargs): yield row except: raise
def new_funct(*args, **kwargs): if not kwargs.get('session'): session = get_session() try: kwargs['session'] = session result = function(*args, **kwargs) session.commit() # pylint: disable=maybe-no-member except TimeoutError, error: print error session.rollback() # pylint: disable=maybe-no-member raise DatabaseException(str(error)) except DatabaseError, error: print error session.rollback() # pylint: disable=maybe-no-member raise DatabaseException(str(error))
def run(once=False, total_workers=1, chunk_size=10, sleep_time=60): """ Starts up the undertaker threads. """ setup_logging() if rucio.db.sqla.util.is_old_db(): raise DatabaseException( 'Database was not updated, daemon won\'t start') logging.info('main: starting threads') threads = [ threading.Thread(target=undertaker, kwargs={ 'worker_number': i, 'total_workers': total_workers, 'once': once, 'chunk_size': chunk_size, 'sleep_time': sleep_time }) for i in range(0, total_workers) ] [t.start() for t in threads] logging.info('main: waiting for interrupts') # Interruptible joins require a timeout. while threads[0].is_alive(): [t.join(timeout=3.14) for t in threads]
def run(threads: int = 1, bulk: int = 100, once: bool = False, sleep_time: int = 60) -> None: """ Starts up the transmogrifier threads. """ setup_logging() if rucio.db.sqla.util.is_old_db(): raise DatabaseException("Database was not updated, daemon won't start") if once: logging.info("Will run only one iteration in a single threaded mode") transmogrifier(bulk=bulk, once=once) else: logging.info("starting transmogrifier threads") thread_list = [ threading.Thread( target=transmogrifier, kwargs={ "once": once, "sleep_time": sleep_time, "bulk": bulk }, ) for _ in range(0, threads) ] [thread.start() for thread in thread_list] logging.info("waiting for interrupts") # Interruptible joins require a timeout. while thread_list: thread_list = [ thread.join(timeout=3.14) for thread in thread_list if thread and thread.is_alive() ]
def run(once=False, threads=1, loop_rate=300, max_rows=100, sleep_time=300): """ Starts up the OAuth Manager threads. """ setup_logging() if rucio.db.sqla.util.is_old_db(): raise DatabaseException( 'Database was not updated, daemon won\'t start') sanity_check(executable='OAuthManager', hostname=socket.gethostname()) if once: OAuthManager(once, loop_rate, max_rows, sleep_time) else: logging.info('OAuth Manager starting %s threads', str(threads)) threads = [ threading.Thread(target=OAuthManager, kwargs={ 'once': once, 'loop_rate': int(loop_rate), 'max_rows': max_rows, 'sleep_time': sleep_time }) for i in range(0, threads) ] [t.start() for t in threads] # Interruptible joins require a timeout. while threads[0].is_alive(): [t.join(timeout=3.14) for t in threads]
def run(threads=1, sleep_time_datasets=60, sleep_time_files=60): """ Starts up the consumer threads """ setup_logging() if rucio.db.sqla.util.is_old_db(): raise DatabaseException('Database was not updated, daemon won\'t start') dataset_queue = Queue() logging.info('starting tracer consumer threads') thread_list = [] for thread in range(0, threads): thread_list.append(Thread(target=kronos_file, kwargs={'thread': thread, 'sleep_time': sleep_time_files, 'dataset_queue': dataset_queue})) thread_list.append(Thread(target=kronos_dataset, kwargs={'thread': thread, 'sleep_time': sleep_time_datasets, 'dataset_queue': dataset_queue})) [thread.start() for thread in thread_list] logging.info('waiting for interrupts') while len(thread_list) > 0: thread_list = [thread.join(timeout=3) for thread in thread_list if thread and thread.is_alive()]
def run(threads=1, chunk_size=100, once=False, greedy=False, rses=None, scheme=None, exclude_rses=None, include_rses=None, vos=None, delay_seconds=0, sleep_time=60, auto_exclude_threshold=100, auto_exclude_timeout=600): """ Starts up the reaper threads. :param threads: The total number of workers. :param chunk_size: The size of chunk for deletion. :param threads_per_worker: Total number of threads created by each worker. :param once: If True, only runs one iteration of the main loop. :param greedy: If True, delete right away replicas with tombstone. :param rses: List of RSEs the reaper should work against. If empty, it considers all RSEs. :param scheme: Force the reaper to use a particular protocol/scheme, e.g., mock. :param exclude_rses: RSE expression to exclude RSEs from the Reaper. :param include_rses: RSE expression to include RSEs. :param vos: VOs on which to look for RSEs. Only used in multi-VO mode. If None, we either use all VOs if run from "def", or the current VO otherwise. :param delay_seconds: The delay to query replicas in BEING_DELETED state. :param sleep_time: Time between two cycles. :param auto_exclude_threshold: Number of service unavailable exceptions after which the RSE gets temporarily excluded. :param auto_exclude_timeout: Timeout for temporarily excluded RSEs. """ setup_logging() if rucio.db.sqla.util.is_old_db(): raise DatabaseException('Database was not updated, daemon won\'t start') logging.log(logging.INFO, 'main: starting processes') rses_to_process = get_rses_to_process(rses, include_rses, exclude_rses, vos) if not rses_to_process: logging.log(logging.ERROR, 'Reaper: No RSEs found. Exiting.') return logging.log(logging.INFO, 'Reaper: This instance will work on RSEs: %s', ', '.join([rse['rse'] for rse in rses_to_process])) # To populate the cache get_rses_to_hostname_mapping() logging.log(logging.INFO, 'starting reaper threads') threads_list = [threading.Thread(target=reaper, kwargs={'once': once, 'rses': rses, 'include_rses': include_rses, 'exclude_rses': exclude_rses, 'vos': vos, 'chunk_size': chunk_size, 'greedy': greedy, 'sleep_time': sleep_time, 'delay_seconds': delay_seconds, 'scheme': scheme, 'auto_exclude_threshold': auto_exclude_threshold, 'auto_exclude_timeout': auto_exclude_timeout}) for _ in range(0, threads)] for thread in threads_list: thread.start() logging.log(logging.INFO, 'waiting for interrupts') # Interruptible joins require a timeout. while threads_list: threads_list = [thread.join(timeout=3.14) for thread in threads_list if thread and thread.is_alive()]
def run(once=False, threads=1, sleep_time_datasets=60, sleep_time_files=60): """ Starts up the consumer threads """ if rucio.db.sqla.util.is_old_db(): raise DatabaseException( 'Database was not updated, daemon won\'t start') logging.info('resolving brokers') brokers_alias = [] brokers_resolved = [] try: brokers_alias = [ b.strip() for b in config_get('tracer-kronos', 'brokers').split(',') ] except Exception: raise Exception('Could not load brokers from configuration') logging.info('resolving broker dns alias: %s' % brokers_alias) brokers_resolved = [] for broker in brokers_alias: addrinfos = socket.getaddrinfo(broker, 0, socket.AF_INET, 0, socket.IPPROTO_TCP) brokers_resolved.extend(ai[4][0] for ai in addrinfos) logging.debug('brokers resolved to %s', brokers_resolved) dataset_queue = Queue() logging.info('starting tracer consumer threads') thread_list = [] for thread in range(0, threads): thread_list.append( Thread(target=kronos_file, kwargs={ 'thread': thread, 'sleep_time': sleep_time_files, 'brokers_resolved': brokers_resolved, 'dataset_queue': dataset_queue })) thread_list.append( Thread(target=kronos_dataset, kwargs={ 'thread': thread, 'sleep_time': sleep_time_datasets, 'dataset_queue': dataset_queue })) [thread.start() for thread in thread_list] logging.info('waiting for interrupts') while thread_list > 0: thread_list = [ thread.join(timeout=3) for thread in thread_list if thread and thread.isAlive() ]
def run(once=False, total_threads=1, sleep_time=60, activities=None, bulk=100, db_bulk=1000): """ Starts up the conveyer threads. """ setup_logging() if rucio.db.sqla.util.is_old_db(): raise DatabaseException('Database was not updated, daemon won\'t start') if once: logging.log(logging.INFO, 'executing one finisher iteration only') finisher(once=once, activities=activities, bulk=bulk, db_bulk=db_bulk) else: logging.log(logging.INFO, 'starting finisher threads') threads = [threading.Thread(target=finisher, kwargs={'sleep_time': sleep_time, 'activities': activities, 'db_bulk': db_bulk, 'bulk': bulk}) for _ in range(0, total_threads)] [thread.start() for thread in threads] logging.log(logging.INFO, 'waiting for interrupts') # Interruptible joins require a timeout. while threads: threads = [thread.join(timeout=3.14) for thread in threads if thread and thread.is_alive()]
def run(once=False, threads=1, sleep_time=30, did_limit=100): """ Starts up the Judge-Eval threads. """ setup_logging() if rucio.db.sqla.util.is_old_db(): raise DatabaseException( 'Database was not updated, daemon won\'t start') executable = 'judge-evaluator' hostname = socket.gethostname() sanity_check(executable=executable, hostname=hostname) if once: re_evaluator(once=once, did_limit=did_limit) else: logging.info('Evaluator starting %s threads' % str(threads)) threads = [ threading.Thread(target=re_evaluator, kwargs={ 'once': once, 'sleep_time': sleep_time, 'did_limit': did_limit }) for i in range(0, threads) ] [t.start() for t in threads] # Interruptible joins require a timeout. while threads[0].is_alive(): [t.join(timeout=3.14) for t in threads]
def run(once=False, threads=1): """ Starts up the Judge-Injector threads. """ setup_logging() if rucio.db.sqla.util.is_old_db(): raise DatabaseException( 'Database was not updated, daemon won\'t start') executable = 'judge-injector' hostname = socket.gethostname() sanity_check(executable=executable, hostname=hostname) if once: rule_injector(once) else: logging.info('Injector starting %s threads' % str(threads)) threads = [ threading.Thread(target=rule_injector, kwargs={'once': once}) for i in range(0, threads) ] [t.start() for t in threads] # Interruptible joins require a timeout. while threads[0].is_alive(): [t.join(timeout=3.14) for t in threads]
def run(once=False, younger_than=3, nattempts=10, vos=None, limit_suspicious_files_on_rse=5): """ Starts up the Suspicious-Replica-Recoverer threads. """ setup_logging() logger = formatted_logger(logging.log) if rucio.db.sqla.util.is_old_db(): raise DatabaseException('Database was not updated, daemon won\'t start') client_time, db_time = datetime.utcnow(), get_db_time() max_offset = timedelta(hours=1, seconds=10) if isinstance(db_time, datetime): if db_time - client_time > max_offset or client_time - db_time > max_offset: logger(logging.CRITICAL, 'Offset between client and db time too big. Stopping Suspicious-Replica-Recoverer.') return sanity_check(executable='rucio-replica-recoverer', hostname=socket.gethostname()) if once: declare_suspicious_replicas_bad(once, younger_than, nattempts, vos, limit_suspicious_files_on_rse) else: logger(logging.INFO, 'Suspicious file replicas recovery starting 1 worker.') t = threading.Thread(target=declare_suspicious_replicas_bad, kwargs={'once': once, 'younger_than': younger_than, 'nattempts': nattempts, 'vos': vos, 'limit_suspicious_files_on_rse': limit_suspicious_files_on_rse}) t.start() logger(logging.INFO, 'Waiting for interrupts') # Interruptible joins require a timeout. while t.is_alive(): t.join(timeout=3.14)
def run(once=False, threads=1, sleep_time=60): """ Starts up the Judge-Injector threads. """ setup_logging() if rucio.db.sqla.util.is_old_db(): raise DatabaseException( 'Database was not updated, daemon won\'t start') if once: rule_injector(once) else: logging.info('Injector starting %s threads' % str(threads)) threads = [ threading.Thread(target=rule_injector, kwargs={ 'once': once, 'sleep_time': sleep_time }) for i in range(0, threads) ] [t.start() for t in threads] # Interruptible joins require a timeout. while threads[0].is_alive(): [t.join(timeout=3.14) for t in threads]
def run(threads=1, bulk=100, once=False, sleep_time=60): """ Starts up the transmogrifier threads. """ if rucio.db.sqla.util.is_old_db(): raise DatabaseException( 'Database was not updated, daemon won\'t start') if once: logging.info('Will run only one iteration in a single threaded mode') transmogrifier(bulk=bulk, once=once) else: logging.info('starting transmogrifier threads') thread_list = [ threading.Thread(target=transmogrifier, kwargs={ 'once': once, 'sleep_time': sleep_time, 'bulk': bulk }) for _ in range(0, threads) ] [thread.start() for thread in thread_list] logging.info('waiting for interrupts') # Interruptible joins require a timeout. while thread_list: thread_list = [ thread.join(timeout=3.14) for thread in thread_list if thread and thread.isAlive() ]
def run(once=False, threads=1, bulk=1000, sleep_time=10, broker_timeout=3): ''' Starts up the hermes2 threads. ''' setup_logging() if rucio.db.sqla.util.is_old_db(): raise DatabaseException( 'Database was not updated, daemon won\'t start') logging.info('starting hermes2 threads') thread_list = [ threading.Thread(target=hermes2, kwargs={ 'thread': cnt, 'once': once, 'bulk': bulk, 'sleep_time': sleep_time }) for cnt in range(0, threads) ] for thrd in thread_list: thrd.start() logging.debug(thread_list) # Interruptible joins require a timeout. while thread_list: thread_list = [ thread.join(timeout=3.14) for thread in thread_list if thread and thread.is_alive() ]
def run(once=False, sleep_time=60, activities=None, fts_bulk=100, db_bulk=1000, older_than=60, activity_shares=None, total_threads=1): """ Starts up the conveyer threads. """ setup_logging() if rucio.db.sqla.util.is_old_db(): raise DatabaseException('Database was not updated, daemon won\'t start') if activity_shares: try: activity_shares = json.loads(activity_shares) except Exception: logging.critical('activity share is not a valid JSON dictionary') return try: if round(sum(activity_shares.values()), 2) != 1: logging.critical('activity shares do not sum up to 1, got %s - aborting' % round(sum(activity_shares.values()), 2)) return except Exception: logging.critical('activity shares are not numbers? - aborting') return activity_shares.update((share, int(percentage * db_bulk)) for share, percentage in activity_shares.items()) logging.info('activity shares enabled: %s' % activity_shares) if once: logging.info('executing one poller iteration only') poller(once=once, fts_bulk=fts_bulk, db_bulk=db_bulk, older_than=older_than, activities=activities, activity_shares=activity_shares) else: logging.info('starting poller threads') threads = [threading.Thread(target=poller, kwargs={'older_than': older_than, 'fts_bulk': fts_bulk, 'db_bulk': db_bulk, 'sleep_time': sleep_time, 'activities': activities, 'activity_shares': activity_shares}) for _ in range(0, total_threads)] [thread.start() for thread in threads] logging.info('waiting for interrupts') # Interruptible joins require a timeout. while threads: threads = [thread.join(timeout=3.14) for thread in threads if thread and thread.is_alive()]
def run(): """ Starts the Sonar thread. """ if rucio.db.sqla.util.is_old_db(): raise DatabaseException( 'Database was not updated, daemon won\'t start') threads = [] threads.append( threading.Thread(target=sonar_tests, kwargs={}, name='Sonar_test_v3')) for thread in threads: thread.start() while threads[0].is_alive(): for thread in threads: thread.join(timeout=3.14)
def run(total_workers=1, chunk_size=100, threads_per_worker=None, once=False, greedy=False, rses=[], scheme=None, exclude_rses=None, include_rses=None, vos=None, delay_seconds=0): """ Starts up the reaper threads. :param total_workers: The total number of workers. :param chunk_size: the size of chunk for deletion. :param threads_per_worker: Total number of threads created by each worker. :param once: If True, only runs one iteration of the main loop. :param greedy: If True, delete right away replicas with tombstone. :param rses: List of RSEs the reaper should work against. If empty, it considers all RSEs. :param scheme: Force the reaper to use a particular protocol/scheme, e.g., mock. :param exclude_rses: RSE expression to exclude RSEs from the Reaper. :param include_rses: RSE expression to include RSEs. :param vos: VOs on which to look for RSEs. Only used in multi-VO mode. If None, we either use all VOs if run from "def", or the current VO otherwise. """ if rucio.db.sqla.util.is_old_db(): raise DatabaseException('Database was not updated, daemon won\'t start') logging.info('Reaper1 daemon will be deprecated and replaced by reaper2 with Rucio release 1.25 (~March 2021)!') logging.info('main: starting processes') multi_vo = config_get_bool('common', 'multi_vo', raise_exception=False, default=False) if not multi_vo: if vos: logging.warning('Ignoring argument vos, this is only applicable in a multi-VO setup.') vos = ['def'] else: if vos: invalid = set(vos) - set([v['vo'] for v in list_vos()]) if invalid: msg = 'VO{} {} cannot be found'.format('s' if len(invalid) > 1 else '', ', '.join([repr(v) for v in invalid])) raise VONotFound(msg) else: vos = [v['vo'] for v in list_vos()] logging.info('Reaper: This instance will work on VO%s: %s' % ('s' if len(vos) > 1 else '', ', '.join([v for v in vos]))) all_rses = [] for vo in vos: all_rses.extend(rse_core.list_rses(filters={'vo': vo})) if rses: invalid = set(rses) - set([rse['rse'] for rse in all_rses]) if invalid: msg = 'RSE{} {} cannot be found'.format('s' if len(invalid) > 1 else '', ', '.join([repr(rse) for rse in invalid])) raise RSENotFound(msg) rses = [rse for rse in all_rses if rse['rse'] in rses] else: rses = all_rses if exclude_rses: excluded_rses = parse_expression(exclude_rses) rses = [rse for rse in rses if rse not in excluded_rses] if include_rses: included_rses = parse_expression(include_rses) rses = [rse for rse in rses if rse in included_rses] if not rses: logging.error('Reaper: No RSEs found. Exiting.') return logging.info('Reaper: This instance will work on RSEs: ' + ', '.join([rse['rse'] for rse in rses])) threads = [] nb_rses_per_worker = int(math.ceil(len(rses) / float(total_workers))) or 1 rses = random.sample(rses, len(rses)) for worker in range(total_workers): for child in range(threads_per_worker or 1): rses_list = rses[worker * nb_rses_per_worker: worker * nb_rses_per_worker + nb_rses_per_worker] if not rses_list: logging.warning('Reaper: Empty RSEs list for worker %(worker)s' % locals()) continue kwargs = {'worker_number': worker, 'child_number': child, 'total_children': threads_per_worker or 1, 'once': once, 'chunk_size': chunk_size, 'greedy': greedy, 'rses': rses_list, 'delay_seconds': delay_seconds, 'scheme': scheme} threads.append(threading.Thread(target=reaper, kwargs=kwargs, name='Worker: %s, child: %s' % (worker, child))) [t.start() for t in threads] while threads[0].is_alive(): [t.join(timeout=3.14) for t in threads]
def run(total_workers=1, chunk_size=100, once=False, rses=[], scheme=None, exclude_rses=None, include_rses=None, vos=None, delay_seconds=0): """ Starts up the reaper threads. :param total_workers: The total number of workers. :param chunk_size: the size of chunk for deletion. :param once: If True, only runs one iteration of the main loop. :param rses: List of RSEs the reaper should work against. If empty, it considers all RSEs. (Single-VO only) :param scheme: Force the reaper to use a particular protocol/scheme, e.g., mock. :param exclude_rses: RSE expression to exclude RSEs from the Reaper. :param include_rses: RSE expression to include RSEs. :param vos: VOs on which to look for RSEs. Only used in multi-VO mode. If None, we either use all VOs if run from "def", or the current VO otherwise. """ setup_logging() if rucio.db.sqla.util.is_old_db(): raise DatabaseException('Database was not updated, daemon won\'t start') logging.info('main: starting processes') multi_vo = config_get_bool('common', 'multi_vo', raise_exception=False, default=False) if not multi_vo: if vos: logging.warning('Ignoring argument vos, this is only applicable in a multi-VO setup.') vos = ['def'] else: if vos: invalid = set(vos) - set([v['vo'] for v in list_vos()]) if invalid: msg = 'VO{} {} cannot be found'.format('s' if len(invalid) > 1 else '', ', '.join([repr(v) for v in invalid])) raise VONotFound(msg) else: vos = [v['vo'] for v in list_vos()] logging.info('Light Reaper: This instance will work on VO%s: %s' % ('s' if len(vos) > 1 else '', ', '.join([v for v in vos]))) all_rses = [] for vo in vos: all_rses.extend(rse_core.list_rses(filters={'vo': vo})) if rses: invalid = set(rses) - set([rse['rse'] for rse in all_rses]) if invalid: msg = 'RSE{} {} cannot be found'.format('s' if len(invalid) > 1 else '', ', '.join([repr(rse) for rse in invalid])) raise RSENotFound(msg) rses = [rse for rse in all_rses if rse['rse'] in rses] else: rses = all_rses if exclude_rses: excluded_rses = parse_expression(exclude_rses) rses = [rse for rse in rses if rse not in excluded_rses] if include_rses: included_rses = parse_expression(include_rses) rses = [rse for rse in rses if rse in included_rses] if not rses: logging.error('Light Reaper: No RSEs found. Exiting.') return threads = [] for worker in range(total_workers): kwargs = {'worker_number': worker, 'total_workers': total_workers, 'rses': rses, 'once': once, 'chunk_size': chunk_size, 'scheme': scheme} threads.append(threading.Thread(target=reaper, kwargs=kwargs, name='Worker: %s, Total_Workers: %s' % (worker, total_workers))) [t.start() for t in threads] while threads[0].is_alive(): [t.join(timeout=3.14) for t in threads]