def test_heartbeat_payload(self): """ HEARTBEAT (CORE): Test heartbeat with payload""" pids = [self.__pid() for _ in range(6)] threads = [self.__thread() for _ in range(6)] live('test5', 'host0', pids[0], threads[0], payload='payload1') live('test5', 'host0', pids[1], threads[1], payload='payload1') live('test5', 'host0', pids[2], threads[2], payload='payload1') live('test5', 'host1', pids[3], threads[3], payload='payload2') live('test5', 'host2', pids[4], threads[4], payload='payload3') live('test5', 'host3', pids[5], threads[5], payload='payload4') assert_equal(list_payload_counts('test5'), { 'payload4': 1, 'payload2': 1, 'payload3': 1, 'payload1': 3 }) die('test5', 'host0', pids[0], threads[0]) die('test5', 'host0', pids[1], threads[1]) die('test5', 'host0', pids[2], threads[2]) die('test5', 'host1', pids[3], threads[3]) die('test5', 'host2', pids[4], threads[4]) die('test5', 'host3', pids[5], threads[5]) assert_equal(list_payload_counts('test5'), {})
def __try_reserve_worker_slot(heartbeat_handler: "HeartbeatHandler", rse: RseData, hostname: str, logger: "Callable[..., Any]") -> "Optional[str]": """ The maximum number of concurrent workers is limited per hostname and per RSE due to storage performance reasons. This function tries to reserve a slot to run the deletion worker for the given RSE and hostname. The function doesn't guarantee strong consistency: the number of total workers may end being slightly higher than the configured limit. The reservation is done using the "payload" field of the rucio heart-beats. if reservation successful, returns the heartbeat payload used for the reservation. Otherwise, returns None """ rse_hostname_key = '%s,%s' % (rse.id, hostname) payload_cnt = list_payload_counts(heartbeat_handler.executable, older_than=heartbeat_handler.older_than) tot_threads_for_hostname = 0 tot_threads_for_rse = 0 for key in payload_cnt: if key and key.find(',') > -1: if key.split(',')[1] == hostname: tot_threads_for_hostname += payload_cnt[key] if key.split(',')[0] == str(rse.id): tot_threads_for_rse += payload_cnt[key] max_deletion_thread = get_max_deletion_threads_by_hostname(hostname) if rse_hostname_key in payload_cnt and tot_threads_for_hostname >= max_deletion_thread: logger(logging.DEBUG, 'Too many deletion threads for %s on RSE %s. Back off', hostname, rse.name) return None logger( logging.INFO, 'Nb workers on %s smaller than the limit (current %i vs max %i). Starting new worker on RSE %s', hostname, tot_threads_for_hostname, max_deletion_thread, rse.name) _, total_workers, logger = heartbeat_handler.live(payload=rse_hostname_key) logger(logging.DEBUG, 'Total deletion workers for %s : %i', hostname, tot_threads_for_hostname + 1) return rse_hostname_key
def reaper(rses, include_rses, exclude_rses, vos=None, chunk_size=100, once=False, greedy=False, scheme=None, delay_seconds=0, sleep_time=60, auto_exclude_threshold=100, auto_exclude_timeout=600): """ Main loop to select and delete files. :param rses: List of RSEs the reaper should work against. If empty, it considers all RSEs. :param include_rses: RSE expression to include RSEs. :param exclude_rses: RSE expression to exclude RSEs from the Reaper. :param vos: VOs on which to look for RSEs. Only used in multi-VO mode. If None, we either use all VOs if run from "def", or the current VO otherwise. :param chunk_size: The size of chunk for deletion. :param once: If True, only runs one iteration of the main loop. :param greedy: If True, delete right away replicas with tombstone. :param scheme: Force the reaper to use a particular protocol, e.g., mock. :param delay_seconds: The delay to query replicas in BEING_DELETED state. :param sleep_time: Time between two cycles. :param auto_exclude_threshold: Number of service unavailable exceptions after which the RSE gets temporarily excluded. :param auto_exclude_timeout: Timeout for temporarily excluded RSEs. """ hostname = socket.getfqdn() executable = 'reaper' pid = os.getpid() hb_thread = threading.current_thread() sanity_check(executable=executable, hostname=hostname) heart_beat = live(executable, hostname, pid, hb_thread) prepend_str = 'reaper[%i/%i] ' % (heart_beat['assign_thread'], heart_beat['nr_threads']) logger = formatted_logger(logging.log, prepend_str + '%s') logger(logging.INFO, 'Reaper starting') if not once: GRACEFUL_STOP.wait( 10 ) # To prevent running on the same partition if all the reapers restart at the same time heart_beat = live(executable, hostname, pid, hb_thread) prepend_str = 'reaper[%i/%i] ' % (heart_beat['assign_thread'], heart_beat['nr_threads']) logger = formatted_logger(logging.log, prepend_str + '%s') logger(logging.INFO, 'Reaper started') while not GRACEFUL_STOP.is_set(): # try to get auto exclude parameters from the config table. Otherwise use CLI parameters. try: auto_exclude_threshold = config_get('reaper', 'auto_exclude_threshold', default=auto_exclude_threshold) auto_exclude_timeout = config_get('reaper', 'auto_exclude_timeout', default=auto_exclude_timeout) except (NoOptionError, NoSectionError, RuntimeError): pass # Check if there is a Judge Evaluator backlog try: max_evaluator_backlog_count = config_get( 'reaper', 'max_evaluator_backlog_count') except (NoOptionError, NoSectionError, RuntimeError): max_evaluator_backlog_count = None try: max_evaluator_backlog_duration = config_get( 'reaper', 'max_evaluator_backlog_duration') except (NoOptionError, NoSectionError, RuntimeError): max_evaluator_backlog_duration = None if max_evaluator_backlog_count or max_evaluator_backlog_duration: backlog = get_evaluation_backlog() if max_evaluator_backlog_count and \ backlog[0] and \ max_evaluator_backlog_duration and \ backlog[1] and \ backlog[0] > max_evaluator_backlog_count and \ backlog[1] < datetime.utcnow() - timedelta(minutes=max_evaluator_backlog_duration): logger( logging.ERROR, 'Reaper: Judge evaluator backlog count and duration hit, stopping operation' ) GRACEFUL_STOP.wait(30) continue elif max_evaluator_backlog_count and backlog[ 0] and backlog[0] > max_evaluator_backlog_count: logger( logging.ERROR, 'Reaper: Judge evaluator backlog count hit, stopping operation' ) GRACEFUL_STOP.wait(30) continue elif max_evaluator_backlog_duration and backlog[ 1] and backlog[1] < datetime.utcnow() - timedelta( minutes=max_evaluator_backlog_duration): logger( logging.ERROR, 'Reaper: Judge evaluator backlog duration hit, stopping operation' ) GRACEFUL_STOP.wait(30) continue rses_to_process = get_rses_to_process(rses, include_rses, exclude_rses, vos) if not rses_to_process: logger(logging.ERROR, 'Reaper: No RSEs found. Will sleep for 30 seconds') GRACEFUL_STOP.wait(30) continue start_time = time.time() try: staging_areas = [] dict_rses = {} heart_beat = live(executable, hostname, pid, hb_thread, older_than=3600) prepend_str = 'reaper[%i/%i] ' % (heart_beat['assign_thread'], heart_beat['nr_threads']) logger = formatted_logger(logging.log, prepend_str + '%s') tot_needed_free_space = 0 for rse in rses_to_process: # Check if the RSE is a staging area if rse['staging_area']: staging_areas.append(rse['rse']) # Check if RSE is blocklisted if rse['availability'] % 2 == 0: logger(logging.DEBUG, 'RSE %s is blocklisted for delete', rse['rse']) continue needed_free_space, only_delete_obsolete = __check_rse_usage( rse['rse'], rse['id'], greedy=greedy, logger=logger) if needed_free_space: dict_rses[(rse['rse'], rse['id'])] = [ needed_free_space, only_delete_obsolete ] tot_needed_free_space += needed_free_space elif only_delete_obsolete: dict_rses[(rse['rse'], rse['id'])] = [ needed_free_space, only_delete_obsolete ] else: logger(logging.DEBUG, 'Nothing to delete on %s', rse['rse']) # Ordering the RSEs based on the needed free space sorted_dict_rses = OrderedDict( sorted(dict_rses.items(), key=lambda x: x[1][0], reverse=True)) logger(logging.DEBUG, 'List of RSEs to process ordered by needed space desc: %s', str(sorted_dict_rses)) # Get the mapping between the RSE and the hostname used for deletion. The dictionary has RSE as key and (hostanme, rse_info) as value rses_hostname_mapping = get_rses_to_hostname_mapping() # logger(logging.DEBUG, '%s Mapping RSEs to hostnames used for deletion : %s', prepend_str, str(rses_hostname_mapping)) list_rses_mult = [] # Loop over the RSEs. rse_key = (rse, rse_id) and fill list_rses_mult that contains all RSEs to process with different multiplicity for rse_key in dict_rses: rse_name, rse_id = rse_key # The length of the deletion queue scales inversily with the number of workers # The ceil increase the weight of the RSE with small amount of files to delete if tot_needed_free_space: max_workers = ceil(dict_rses[rse_key][0] / tot_needed_free_space * 1000 / heart_beat['nr_threads']) else: max_workers = 1 list_rses_mult.extend([ (rse_name, rse_id, dict_rses[rse_key][0], dict_rses[rse_key][1]) for _ in range(int(max_workers)) ]) random.shuffle(list_rses_mult) paused_rses = [] for rse_name, rse_id, needed_free_space, max_being_deleted_files in list_rses_mult: result = REGION.get('pause_deletion_%s' % rse_id, expiration_time=120) if result is not NO_VALUE: paused_rses.append(rse_name) logger( logging.DEBUG, 'Not enough replicas to delete on %s during the previous cycle. Deletion paused for a while', rse_name) continue result = REGION.get('temporary_exclude_%s' % rse_id, expiration_time=auto_exclude_timeout) if result is not NO_VALUE: logger( logging.WARNING, 'Too many failed attempts for %s in last cycle. RSE is temporarly excluded.', rse_name) labels = {'rse': rse_name} EXCLUDED_RSE_GAUGE.labels(**labels).set(1) continue labels = {'rse': rse_name} EXCLUDED_RSE_GAUGE.labels(**labels).set(0) percent = 0 if tot_needed_free_space: percent = needed_free_space / tot_needed_free_space * 100 logger( logging.DEBUG, 'Working on %s. Percentage of the total space needed %.2f', rse_name, percent) try: rse_hostname, rse_info = rses_hostname_mapping[rse_id] except KeyError: logger(logging.DEBUG, "Hostname lookup for %s failed.", rse_name) REGION.set('pause_deletion_%s' % rse_id, True) continue rse_hostname_key = '%s,%s' % (rse_id, rse_hostname) payload_cnt = list_payload_counts(executable, older_than=600, hash_executable=None, session=None) # logger(logging.DEBUG, '%s Payload count : %s', prepend_str, str(payload_cnt)) tot_threads_for_hostname = 0 tot_threads_for_rse = 0 for key in payload_cnt: if key and key.find(',') > -1: if key.split(',')[1] == rse_hostname: tot_threads_for_hostname += payload_cnt[key] if key.split(',')[0] == str(rse_id): tot_threads_for_rse += payload_cnt[key] max_deletion_thread = get_max_deletion_threads_by_hostname( rse_hostname) if rse_hostname_key in payload_cnt and tot_threads_for_hostname >= max_deletion_thread: logger( logging.DEBUG, 'Too many deletion threads for %s on RSE %s. Back off', rse_hostname, rse_name) # Might need to reschedule a try on this RSE later in the same cycle continue logger( logging.INFO, 'Nb workers on %s smaller than the limit (current %i vs max %i). Starting new worker on RSE %s', rse_hostname, tot_threads_for_hostname, max_deletion_thread, rse_name) live(executable, hostname, pid, hb_thread, older_than=600, hash_executable=None, payload=rse_hostname_key, session=None) logger(logging.DEBUG, 'Total deletion workers for %s : %i', rse_hostname, tot_threads_for_hostname + 1) # List and mark BEING_DELETED the files to delete del_start_time = time.time() only_delete_obsolete = dict_rses[(rse_name, rse_id)][1] try: with monitor.record_timer_block( 'reaper.list_unlocked_replicas'): if only_delete_obsolete: logger( logging.DEBUG, 'Will run list_and_mark_unlocked_replicas on %s. No space needed, will only delete EPOCH tombstoned replicas', rse_name) replicas = list_and_mark_unlocked_replicas( limit=chunk_size, bytes_=needed_free_space, rse_id=rse_id, delay_seconds=delay_seconds, only_delete_obsolete=only_delete_obsolete, session=None) logger( logging.DEBUG, 'list_and_mark_unlocked_replicas on %s for %s bytes in %s seconds: %s replicas', rse_name, needed_free_space, time.time() - del_start_time, len(replicas)) if len(replicas) < chunk_size: logger( logging.DEBUG, 'Not enough replicas to delete on %s (%s requested vs %s returned). Will skip any new attempts on this RSE until next cycle', rse_name, chunk_size, len(replicas)) REGION.set('pause_deletion_%s' % rse_id, True) except (DatabaseException, IntegrityError, DatabaseError) as error: logger(logging.ERROR, '%s', str(error)) continue except Exception: logger(logging.CRITICAL, 'Exception', exc_info=True) # Physical deletion will take place there try: prot = rsemgr.create_protocol(rse_info, 'delete', scheme=scheme, logger=logger) for file_replicas in chunks(replicas, chunk_size): # Refresh heartbeat live(executable, hostname, pid, hb_thread, older_than=600, hash_executable=None, payload=rse_hostname_key, session=None) del_start_time = time.time() for replica in file_replicas: try: replica['pfn'] = str( list( rsemgr.lfns2pfns( rse_settings=rse_info, lfns=[{ 'scope': replica['scope'].external, 'name': replica['name'], 'path': replica['path'] }], operation='delete', scheme=scheme).values())[0]) except (ReplicaUnAvailable, ReplicaNotFound) as error: logger( logging.WARNING, 'Failed get pfn UNAVAILABLE replica %s:%s on %s with error %s', replica['scope'], replica['name'], rse_name, str(error)) replica['pfn'] = None except Exception: logger(logging.CRITICAL, 'Exception', exc_info=True) deleted_files = delete_from_storage( file_replicas, prot, rse_info, staging_areas, auto_exclude_threshold, logger=logger) logger(logging.INFO, '%i files processed in %s seconds', len(file_replicas), time.time() - del_start_time) # Then finally delete the replicas del_start = time.time() with monitor.record_timer_block( 'reaper.delete_replicas'): delete_replicas(rse_id=rse_id, files=deleted_files) logger( logging.DEBUG, 'delete_replicas successed on %s : %s replicas in %s seconds', rse_name, len(deleted_files), time.time() - del_start) DELETION_COUNTER.inc(len(deleted_files)) except Exception: logger(logging.CRITICAL, 'Exception', exc_info=True) if paused_rses: logger(logging.INFO, 'Deletion paused for a while for following RSEs: %s', ', '.join(paused_rses)) if once: break daemon_sleep(start_time=start_time, sleep_time=sleep_time, graceful_stop=GRACEFUL_STOP, logger=logger) except DatabaseException as error: logger(logging.WARNING, 'Reaper: %s', str(error)) except Exception: logger(logging.CRITICAL, 'Exception', exc_info=True) finally: if once: break die(executable=executable, hostname=hostname, pid=pid, thread=hb_thread) logger(logging.INFO, 'Graceful stop requested') logger(logging.INFO, 'Graceful stop done') return
def reaper(rses, chunk_size=100, once=False, greedy=False, scheme=None, delay_seconds=0, sleep_time=60): """ Main loop to select and delete files. :param rses: List of RSEs the reaper should work against. If empty, it considers all RSEs. :param chunk_size: The size of chunk for deletion. :param once: If True, only runs one iteration of the main loop. :param greedy: If True, delete right away replicas with tombstone. :param scheme: Force the reaper to use a particular protocol, e.g., mock. :param delay_seconds: The delay to query replicas in BEING_DELETED state. :param sleep_time: Time between two cycles. """ try: max_deletion_thread = get('reaper', 'nb_workers_by_hostname') except ConfigNotFound as error: max_deletion_thread = 5 hostname = socket.getfqdn() executable = sys.argv[0] pid = os.getpid() hb_thread = threading.current_thread() sanity_check(executable=executable, hostname=hostname) heart_beat = live(executable, hostname, pid, hb_thread) prepend_str = 'Thread [%i/%i] : ' % (heart_beat['assign_thread'] + 1, heart_beat['nr_threads']) logging.info('%s Reaper starting', prepend_str) time.sleep( 10 ) # To prevent running on the same partition if all the reapers restart at the same time heart_beat = live(executable, hostname, pid, hb_thread) prepend_str = 'Thread [%i/%i] : ' % (heart_beat['assign_thread'] + 1, heart_beat['nr_threads']) logging.info('%s Reaper started', prepend_str) while not GRACEFUL_STOP.is_set(): start_time = time.time() try: staging_areas = [] dict_rses = {} heart_beat = live(executable, hostname, pid, hb_thread, older_than=3600) prepend_str = 'Thread [%i/%i] : ' % (heart_beat['assign_thread'] + 1, heart_beat['nr_threads']) tot_needed_free_space = 0 for rse in rses: # Check if the RSE is a staging area if rse['staging_area']: staging_areas.append(rse['rse']) # Check if RSE is blacklisted if rse['availability'] % 2 == 0: logging.debug('%s RSE %s is blacklisted for delete', prepend_str, rse['rse']) continue max_being_deleted_files, needed_free_space, used, free = __check_rse_usage( rse['rse'], rse['id'], prepend_str) # Check if greedy mode if greedy: dict_rses[(rse['rse'], rse['id'])] = [ 1000000000000, max_being_deleted_files ] tot_needed_free_space += 1000000000000 else: if needed_free_space: dict_rses[(rse['rse'], rse['id'])] = [ needed_free_space, max_being_deleted_files ] tot_needed_free_space += needed_free_space else: logging.debug('%s Nothing to delete on %s', prepend_str, rse['rse']) # Ordering the RSEs based on the needed free space sorted_dict_rses = OrderedDict( sorted(dict_rses.items(), key=itemgetter(1), reverse=True)) logging.debug( '%s List of RSEs to process ordered by needed space desc : %s', prepend_str, str(sorted_dict_rses)) # Get the mapping between the RSE and the hostname used for deletion. The dictionary has RSE as key and (hostanme, rse_info) as value rses_hostname_mapping = get_rses_to_hostname_mapping() # logging.debug('%s Mapping RSEs to hostnames used for deletion : %s', prepend_str, str(rses_hostname_mapping)) list_rses_mult = [] # Loop over the RSEs. rse_key = (rse, rse_id) and fill list_rses_mult that contains all RSEs to process with different multiplicity for rse_key in dict_rses: rse_name, rse_id = rse_key # The length of the deletion queue scales inversily with the number of workers # The ceil increase the weight of the RSE with small amount of files to delete max_workers = ceil(dict_rses[rse_key][0] / tot_needed_free_space * 1000 / heart_beat['nr_threads']) list_rses_mult.extend([ (rse_name, rse_id, dict_rses[rse_key][0], dict_rses[rse_key][1]) for _ in range(int(max_workers)) ]) random.shuffle(list_rses_mult) skip_until_next_run = [] for rse_name, rse_id, needed_free_space, max_being_deleted_files in list_rses_mult: if rse_id in skip_until_next_run: continue logging.debug( '%s Working on %s. Percentage of the total space needed %.2f', prepend_str, rse_name, needed_free_space / tot_needed_free_space * 100) rse_hostname, rse_info = rses_hostname_mapping[rse_id] rse_hostname_key = '%s,%s' % (rse_id, rse_hostname) payload_cnt = list_payload_counts(executable, older_than=600, hash_executable=None, session=None) # logging.debug('%s Payload count : %s', prepend_str, str(payload_cnt)) tot_threads_for_hostname = 0 tot_threads_for_rse = 0 for key in payload_cnt: if key and key.find(',') > -1: if key.split(',')[1] == rse_hostname: tot_threads_for_hostname += payload_cnt[key] if key.split(',')[0] == str(rse_id): tot_threads_for_rse += payload_cnt[key] if rse_hostname_key in payload_cnt and tot_threads_for_hostname >= max_deletion_thread: logging.debug( '%s Too many deletion threads for %s on RSE %s. Back off', prepend_str, rse_hostname, rse_name) # Might need to reschedule a try on this RSE later in the same cycle continue logging.info( '%s Nb workers on %s smaller than the limit (current %i vs max %i). Starting new worker on RSE %s', prepend_str, rse_hostname, tot_threads_for_hostname, max_deletion_thread, rse_name) live(executable, hostname, pid, hb_thread, older_than=600, hash_executable=None, payload=rse_hostname_key, session=None) logging.debug('%s Total deletion workers for %s : %i', prepend_str, rse_hostname, tot_threads_for_hostname + 1) # List and mark BEING_DELETED the files to delete del_start_time = time.time() try: with monitor.record_timer_block( 'reaper.list_unlocked_replicas'): replicas = list_and_mark_unlocked_replicas( limit=chunk_size, bytes=needed_free_space, rse_id=rse_id, delay_seconds=delay_seconds, session=None) logging.debug( '%s list_and_mark_unlocked_replicas on %s for %s bytes in %s seconds: %s replicas', prepend_str, rse_name, needed_free_space, time.time() - del_start_time, len(replicas)) if len(replicas) < chunk_size: logging.info( '%s Not enough replicas to delete on %s (%s requested vs %s returned). Will skip any new attempts on this RSE until next cycle', prepend_str, rse_name, chunk_size, len(replicas)) skip_until_next_run.append(rse_id) except (DatabaseException, IntegrityError, DatabaseError) as error: logging.error('%s %s', prepend_str, str(error)) continue except Exception: logging.critical('%s %s', prepend_str, str(traceback.format_exc())) # Physical deletion will take place there try: prot = rsemgr.create_protocol(rse_info, 'delete', scheme=scheme) for file_replicas in chunks(replicas, 100): # Refresh heartbeat live(executable, hostname, pid, hb_thread, older_than=600, hash_executable=None, payload=rse_hostname_key, session=None) del_start_time = time.time() for replica in file_replicas: try: replica['pfn'] = str( rsemgr.lfns2pfns( rse_settings=rse_info, lfns=[{ 'scope': replica['scope'], 'name': replica['name'], 'path': replica['path'] }], operation='delete', scheme=scheme).values()[0]) time.sleep(random.uniform(0, 0.01)) except (ReplicaUnAvailable, ReplicaNotFound) as error: logging.warning( '%s Failed get pfn UNAVAILABLE replica %s:%s on %s with error %s', prepend_str, replica['scope'], replica['name'], rse_name, str(error)) replica['pfn'] = None except Exception: logging.critical('%s %s', prepend_str, str(traceback.format_exc())) deleted_files = delete_from_storage( file_replicas, prot, rse_info, staging_areas, prepend_str) logging.info('%s %i files processed in %s seconds', prepend_str, len(file_replicas), time.time() - del_start_time) # Then finally delete the replicas del_start = time.time() with monitor.record_timer_block( 'reaper.delete_replicas'): delete_replicas(rse_id=rse_id, files=deleted_files) logging.debug( '%s delete_replicas successed on %s : %s replicas in %s seconds', prepend_str, rse_name, len(deleted_files), time.time() - del_start) monitor.record_counter(counters='reaper.deletion.done', delta=len(deleted_files)) except Exception as error: logging.critical('%s %s', prepend_str, str(traceback.format_exc())) if once: break tottime = time.time() - start_time if tottime < sleep_time: logging.info('%s Will sleep for %s seconds', prepend_str, sleep_time - tottime) time.sleep(sleep_time - tottime) except DatabaseException as error: logging.warning('%s Reaper: %s', prepend_str, str(error)) except Exception: logging.critical('%s %s', prepend_str, str(traceback.format_exc())) finally: if once: break die(executable=executable, hostname=hostname, pid=pid, thread=hb_thread) logging.info('%s Graceful stop requested', prepend_str) logging.info('%s Graceful stop done', prepend_str) return
def rule_rebalancer(rse_expression, move_subscriptions=False, use_dump=False, sleep_time=300, once=True, dry_run=False): """ Main loop to rebalancer rules automatically """ total_rebalance_volume = 0 executable = 'rucio-bb8' hostname = socket.gethostname() pid = os.getpid() hb_thread = threading.current_thread() heart_beat = live(executable, hostname, pid, hb_thread) prepend_str = 'bb8[%i/%i] ' % (heart_beat['assign_thread'], heart_beat['nr_threads']) logger = formatted_logger(logging.log, prepend_str + '%s') logger(logging.DEBUG, 'rse_expression: %s', rse_expression) logger(logging.INFO, 'BB8 started') while not GRACEFUL_STOP.is_set(): logger(logging.INFO, 'Starting new cycle') heart_beat = live(executable, hostname, pid, hb_thread) start_time = time.time() total_rebalance_volume = 0 tolerance = config_core.get('bb8', 'tolerance', default=0.05) max_total_rebalance_volume = config_core.get('bb8', 'max_total_rebalance_volume', default=10 * 1E12) max_rse_rebalance_volume = config_core.get('bb8', 'max_rse_rebalance_volume', default=500 * 1E9) min_total = config_core.get('bb8', 'min_total', default=20 * 1E9) payload_cnt = list_payload_counts(executable, older_than=600, hash_executable=None, session=None) if rse_expression in payload_cnt: logger(logging.WARNING, 'One BB8 instance already running with the same RSE expression. Stopping') break else: # List the RSEs represented by rse_expression try: rses = [rse for rse in parse_expression(rse_expression)] list_rses2 = [rse['rse'] for rse in rses] except InvalidRSEExpression as err: logger(logging.ERROR, err) break # List the RSEs represented by all the RSE expressions stored in heartbeat payload list_rses1 = [] for rse_exp in payload_cnt: if rse_exp: list_rses1 = [rse['rse'] for rse in parse_expression(rse_exp)] for rse in list_rses2: if rse in list_rses1: logger(logging.WARNING, 'Overlapping RSE expressions %s vs %s. Stopping', rse_exp, rse_expression) break logger(logging.INFO, 'Will process rebalancing on %s', rse_expression) heart_beat = live(executable, hostname, pid, hb_thread, older_than=max(600, sleep_time), hash_executable=None, payload=rse_expression, session=None) total_primary = 0 total_secondary = 0 total_total = 0 global_ratio = float(0) for rse in rses: logger(logging.DEBUG, 'Getting RSE usage on %s', rse['rse']) rse_usage = get_rse_usage(rse_id=rse['id']) usage_dict = {} for item in rse_usage: # TODO Check last update usage_dict[item['source']] = {'used': item['used'], 'free': item['free'], 'total': item['total']} try: rse['primary'] = usage_dict['rucio']['used'] - usage_dict['expired']['used'] rse['secondary'] = usage_dict['expired']['used'] rse['total'] = usage_dict['storage']['total'] - usage_dict['min_free_space']['used'] rse['ratio'] = float(rse['primary']) / float(rse['total']) except KeyError as err: logger(logging.ERROR, 'Missing source usage %s for RSE %s. Exiting', err, rse['rse']) break total_primary += rse['primary'] total_secondary += rse['secondary'] total_total += float(rse['total']) rse['receive_volume'] = 0 # Already rebalanced volume in this run global_ratio = float(total_primary) / float(total_total) logger(logging.INFO, 'Global ratio: %f' % (global_ratio)) for rse in sorted(rses, key=lambda k: k['ratio']): logger(logging.INFO, '%s Sec/Prim local ratio (%f) vs global %s', rse['rse'], rse['ratio'], global_ratio) rses_over_ratio = sorted([rse for rse in rses if rse['ratio'] > global_ratio + global_ratio * tolerance], key=lambda k: k['ratio'], reverse=True) rses_under_ratio = sorted([rse for rse in rses if rse['ratio'] < global_ratio - global_ratio * tolerance], key=lambda k: k['ratio'], reverse=False) # Excluding RSEs logger(logging.DEBUG, 'Excluding RSEs as destination which are too small by size:') for des in rses_under_ratio: if des['total'] < min_total: logger(logging.DEBUG, 'Excluding %s', des['rse']) rses_under_ratio.remove(des) logger(logging.DEBUG, 'Excluding RSEs as sources which are too small by size:') for src in rses_over_ratio: if src['total'] < min_total: logger(logging.DEBUG, 'Excluding %s', src['rse']) rses_over_ratio.remove(src) logger(logging.DEBUG, 'Excluding RSEs as destinations which are not available for write:') for des in rses_under_ratio: if des['availability'] & 2 == 0: logger(logging.DEBUG, 'Excluding %s', des['rse']) rses_under_ratio.remove(des) logger(logging.DEBUG, 'Excluding RSEs as sources which are not available for read:') for src in rses_over_ratio: if src['availability'] & 4 == 0: logger(logging.DEBUG, 'Excluding %s', src['rse']) rses_over_ratio.remove(src) # Gets the number of active transfers per location dict_locks = get_active_locks(session=None) # Loop over RSEs over the ratio for index, source_rse in enumerate(rses_over_ratio): # The volume that would be rebalanced, not real availability of the data: available_source_rebalance_volume = int((source_rse['primary'] - global_ratio * source_rse['secondary']) / (global_ratio + 1)) if available_source_rebalance_volume > max_rse_rebalance_volume: available_source_rebalance_volume = max_rse_rebalance_volume if available_source_rebalance_volume > max_total_rebalance_volume - total_rebalance_volume: available_source_rebalance_volume = max_total_rebalance_volume - total_rebalance_volume # Select a target: for destination_rse in rses_under_ratio: if available_source_rebalance_volume > 0: vo_str = ' on VO {}'.format(destination_rse['vo']) if destination_rse['vo'] != 'def' else '' if index == 0 and destination_rse['id'] in dict_locks: replicating_volume = dict_locks[destination_rse['id']]['bytes'] logger(logging.DEBUG, 'Already %f TB replicating to %s%s', replicating_volume / 1E12, destination_rse['rse'], vo_str) destination_rse['receive_volume'] += replicating_volume if destination_rse['receive_volume'] >= max_rse_rebalance_volume: continue available_target_rebalance_volume = max_rse_rebalance_volume - destination_rse['receive_volume'] if available_target_rebalance_volume >= available_source_rebalance_volume: available_target_rebalance_volume = available_source_rebalance_volume logger(logging.INFO, 'Rebalance %d TB from %s(%f) to %s(%f)%s', available_target_rebalance_volume / 1E12, source_rse['rse'], source_rse['ratio'], destination_rse['rse'], destination_rse['ratio'], vo_str) expr = destination_rse['rse'] rebalance_rse(rse_id=source_rse['id'], max_bytes=available_target_rebalance_volume, dry_run=dry_run, comment='Background rebalancing', force_expression=expr, logger=logger) destination_rse['receive_volume'] += available_target_rebalance_volume total_rebalance_volume += available_target_rebalance_volume available_source_rebalance_volume -= available_target_rebalance_volume if once: break end_time = time.time() time_diff = end_time - start_time if time_diff < sleep_time: logger(logging.INFO, 'Sleeping for a while : %f seconds', sleep_time - time_diff) GRACEFUL_STOP.wait(sleep_time - time_diff) die(executable='rucio-bb8', hostname=hostname, pid=pid, thread=hb_thread)
def run_once(heartbeat_handler: "HeartbeatHandler", rse_expression: str, move_subscriptions: bool, use_dump: bool, dry_run: bool, **_kwargs) -> bool: must_sleep = False total_rebalance_volume = 0 worker_number, total_workers, logger = heartbeat_handler.live() logger(logging.DEBUG, "Running BB8 on rse_expression: %s", rse_expression) tolerance = config_get_float("bb8", "tolerance", default=0.05) max_total_rebalance_volume = config_get_float("bb8", "max_total_rebalance_volume", default=10 * 1e12) max_rse_rebalance_volume = config_get_float("bb8", "max_rse_rebalance_volume", default=500 * 1e9) min_total = config_get_float("bb8", "min_total", default=20 * 1e9) payload_cnt = list_payload_counts(executable="rucio-bb8", older_than=600, hash_executable=None, session=None) if rse_expression in payload_cnt: logger( logging.WARNING, "One BB8 instance already running with the same RSE expression. Stopping", ) must_sleep = True return must_sleep else: # List the RSEs represented by rse_expression try: rses = [rse for rse in parse_expression(rse_expression)] list_rses2 = [rse["rse"] for rse in rses] except InvalidRSEExpression as err: logger(logging.ERROR, err) return must_sleep # List the RSEs represented by all the RSE expressions stored in heartbeat payload list_rses1 = [] for rse_exp in payload_cnt: if rse_exp: list_rses1 = [rse["rse"] for rse in parse_expression(rse_exp)] for rse in list_rses2: if rse in list_rses1: logger( logging.WARNING, "Overlapping RSE expressions %s vs %s. Stopping", rse_exp, rse_expression, ) return must_sleep logger(logging.INFO, "Will process rebalancing on %s", rse_expression) worker_number, total_workers, logger = heartbeat_handler.live() total_primary = 0 total_secondary = 0 total_total = 0 global_ratio = float(0) for rse in rses: logger(logging.DEBUG, "Getting RSE usage on %s", rse["rse"]) rse_usage = get_rse_usage(rse_id=rse["id"]) usage_dict = {} for item in rse_usage: # TODO Check last update usage_dict[item["source"]] = { "used": item["used"], "free": item["free"], "total": item["total"], } try: rse["primary"] = (usage_dict["rucio"]["used"] - usage_dict["expired"]["used"]) rse["secondary"] = usage_dict["expired"]["used"] rse["total"] = (usage_dict["storage"]["total"] - usage_dict["min_free_space"]["used"]) rse["ratio"] = float(rse["primary"]) / float(rse["total"]) except KeyError as err: logger( logging.ERROR, "Missing source usage %s for RSE %s. Exiting", err, rse["rse"], ) break total_primary += rse["primary"] total_secondary += rse["secondary"] total_total += float(rse["total"]) rse["receive_volume"] = 0 # Already rebalanced volume in this run global_ratio = float(total_primary) / float(total_total) logger(logging.INFO, "Global ratio: %f" % (global_ratio)) for rse in sorted(rses, key=lambda k: k["ratio"]): logger( logging.INFO, "%s Sec/Prim local ratio (%f) vs global %s", rse["rse"], rse["ratio"], global_ratio, ) rses_over_ratio = sorted( [ rse for rse in rses if rse["ratio"] > global_ratio + global_ratio * tolerance ], key=lambda k: k["ratio"], reverse=True, ) rses_under_ratio = sorted( [ rse for rse in rses if rse["ratio"] < global_ratio - global_ratio * tolerance ], key=lambda k: k["ratio"], reverse=False, ) # Excluding RSEs logger(logging.DEBUG, "Excluding RSEs as destination which are too small by size:") for des in rses_under_ratio: if des["total"] < min_total: logger(logging.DEBUG, "Excluding %s", des["rse"]) rses_under_ratio.remove(des) logger(logging.DEBUG, "Excluding RSEs as sources which are too small by size:") for src in rses_over_ratio: if src["total"] < min_total: logger(logging.DEBUG, "Excluding %s", src["rse"]) rses_over_ratio.remove(src) logger( logging.DEBUG, "Excluding RSEs as destinations which are not available for write:", ) for des in rses_under_ratio: if des["availability"] & 2 == 0: logger(logging.DEBUG, "Excluding %s", des["rse"]) rses_under_ratio.remove(des) logger(logging.DEBUG, "Excluding RSEs as sources which are not available for read:") for src in rses_over_ratio: if src["availability"] & 4 == 0: logger(logging.DEBUG, "Excluding %s", src["rse"]) rses_over_ratio.remove(src) # Gets the number of active transfers per location dict_locks = get_active_locks(session=None) # Loop over RSEs over the ratio for index, source_rse in enumerate(rses_over_ratio): # The volume that would be rebalanced, not real availability of the data: available_source_rebalance_volume = int( (source_rse["primary"] - global_ratio * source_rse["secondary"]) / (global_ratio + 1)) if available_source_rebalance_volume > max_rse_rebalance_volume: available_source_rebalance_volume = max_rse_rebalance_volume if (available_source_rebalance_volume > max_total_rebalance_volume - total_rebalance_volume): available_source_rebalance_volume = ( max_total_rebalance_volume - total_rebalance_volume) # Select a target: for destination_rse in rses_under_ratio: if available_source_rebalance_volume > 0: vo_str = (" on VO {}".format(destination_rse["vo"]) if destination_rse["vo"] != "def" else "") if index == 0 and destination_rse["id"] in dict_locks: replicating_volume = dict_locks[ destination_rse["id"]]["bytes"] logger( logging.DEBUG, "Already %f TB replicating to %s%s", replicating_volume / 1e12, destination_rse["rse"], vo_str, ) destination_rse["receive_volume"] += replicating_volume if destination_rse[ "receive_volume"] >= max_rse_rebalance_volume: continue available_target_rebalance_volume = ( max_rse_rebalance_volume - destination_rse["receive_volume"]) if (available_target_rebalance_volume >= available_source_rebalance_volume): available_target_rebalance_volume = ( available_source_rebalance_volume) logger( logging.INFO, "Rebalance %d TB from %s(%f) to %s(%f)%s", available_target_rebalance_volume / 1e12, source_rse["rse"], source_rse["ratio"], destination_rse["rse"], destination_rse["ratio"], vo_str, ) expr = destination_rse["rse"] rebalance_rse( rse_id=source_rse["id"], max_bytes=available_target_rebalance_volume, dry_run=dry_run, comment="Background rebalancing", force_expression=expr, logger=logger, ) destination_rse[ "receive_volume"] += available_target_rebalance_volume total_rebalance_volume += available_target_rebalance_volume available_source_rebalance_volume -= ( available_target_rebalance_volume) must_sleep = True return must_sleep