def reaper(rses, include_rses, exclude_rses, vos=None, chunk_size=100, once=False, greedy=False, scheme=None, delay_seconds=0, sleep_time=60, auto_exclude_threshold=100, auto_exclude_timeout=600): """ Main loop to select and delete files. :param rses: List of RSEs the reaper should work against. If empty, it considers all RSEs. :param include_rses: RSE expression to include RSEs. :param exclude_rses: RSE expression to exclude RSEs from the Reaper. :param vos: VOs on which to look for RSEs. Only used in multi-VO mode. If None, we either use all VOs if run from "def", or the current VO otherwise. :param chunk_size: The size of chunk for deletion. :param once: If True, only runs one iteration of the main loop. :param greedy: If True, delete right away replicas with tombstone. :param scheme: Force the reaper to use a particular protocol, e.g., mock. :param delay_seconds: The delay to query replicas in BEING_DELETED state. :param sleep_time: Time between two cycles. :param auto_exclude_threshold: Number of service unavailable exceptions after which the RSE gets temporarily excluded. :param auto_exclude_timeout: Timeout for temporarily excluded RSEs. """ hostname = socket.getfqdn() executable = 'reaper' pid = os.getpid() hb_thread = threading.current_thread() sanity_check(executable=executable, hostname=hostname) heart_beat = live(executable, hostname, pid, hb_thread) prepend_str = 'reaper[%i/%i] ' % (heart_beat['assign_thread'], heart_beat['nr_threads']) logger = formatted_logger(logging.log, prepend_str + '%s') logger(logging.INFO, 'Reaper starting') if not once: GRACEFUL_STOP.wait( 10 ) # To prevent running on the same partition if all the reapers restart at the same time heart_beat = live(executable, hostname, pid, hb_thread) prepend_str = 'reaper[%i/%i] ' % (heart_beat['assign_thread'], heart_beat['nr_threads']) logger = formatted_logger(logging.log, prepend_str + '%s') logger(logging.INFO, 'Reaper started') while not GRACEFUL_STOP.is_set(): # try to get auto exclude parameters from the config table. Otherwise use CLI parameters. try: auto_exclude_threshold = config_get('reaper', 'auto_exclude_threshold', default=auto_exclude_threshold) auto_exclude_timeout = config_get('reaper', 'auto_exclude_timeout', default=auto_exclude_timeout) except (NoOptionError, NoSectionError, RuntimeError): pass # Check if there is a Judge Evaluator backlog try: max_evaluator_backlog_count = config_get( 'reaper', 'max_evaluator_backlog_count') except (NoOptionError, NoSectionError, RuntimeError): max_evaluator_backlog_count = None try: max_evaluator_backlog_duration = config_get( 'reaper', 'max_evaluator_backlog_duration') except (NoOptionError, NoSectionError, RuntimeError): max_evaluator_backlog_duration = None if max_evaluator_backlog_count or max_evaluator_backlog_duration: backlog = get_evaluation_backlog() if max_evaluator_backlog_count and \ backlog[0] and \ max_evaluator_backlog_duration and \ backlog[1] and \ backlog[0] > max_evaluator_backlog_count and \ backlog[1] < datetime.utcnow() - timedelta(minutes=max_evaluator_backlog_duration): logger( logging.ERROR, 'Reaper: Judge evaluator backlog count and duration hit, stopping operation' ) GRACEFUL_STOP.wait(30) continue elif max_evaluator_backlog_count and backlog[ 0] and backlog[0] > max_evaluator_backlog_count: logger( logging.ERROR, 'Reaper: Judge evaluator backlog count hit, stopping operation' ) GRACEFUL_STOP.wait(30) continue elif max_evaluator_backlog_duration and backlog[ 1] and backlog[1] < datetime.utcnow() - timedelta( minutes=max_evaluator_backlog_duration): logger( logging.ERROR, 'Reaper: Judge evaluator backlog duration hit, stopping operation' ) GRACEFUL_STOP.wait(30) continue rses_to_process = get_rses_to_process(rses, include_rses, exclude_rses, vos) if not rses_to_process: logger(logging.ERROR, 'Reaper: No RSEs found. Will sleep for 30 seconds') GRACEFUL_STOP.wait(30) continue start_time = time.time() try: staging_areas = [] dict_rses = {} heart_beat = live(executable, hostname, pid, hb_thread, older_than=3600) prepend_str = 'reaper[%i/%i] ' % (heart_beat['assign_thread'], heart_beat['nr_threads']) logger = formatted_logger(logging.log, prepend_str + '%s') tot_needed_free_space = 0 for rse in rses_to_process: # Check if the RSE is a staging area if rse['staging_area']: staging_areas.append(rse['rse']) # Check if RSE is blocklisted if rse['availability'] % 2 == 0: logger(logging.DEBUG, 'RSE %s is blocklisted for delete', rse['rse']) continue needed_free_space, only_delete_obsolete = __check_rse_usage( rse['rse'], rse['id'], greedy=greedy, logger=logger) if needed_free_space: dict_rses[(rse['rse'], rse['id'])] = [ needed_free_space, only_delete_obsolete ] tot_needed_free_space += needed_free_space elif only_delete_obsolete: dict_rses[(rse['rse'], rse['id'])] = [ needed_free_space, only_delete_obsolete ] else: logger(logging.DEBUG, 'Nothing to delete on %s', rse['rse']) # Ordering the RSEs based on the needed free space sorted_dict_rses = OrderedDict( sorted(dict_rses.items(), key=lambda x: x[1][0], reverse=True)) logger(logging.DEBUG, 'List of RSEs to process ordered by needed space desc: %s', str(sorted_dict_rses)) # Get the mapping between the RSE and the hostname used for deletion. The dictionary has RSE as key and (hostanme, rse_info) as value rses_hostname_mapping = get_rses_to_hostname_mapping() # logger(logging.DEBUG, '%s Mapping RSEs to hostnames used for deletion : %s', prepend_str, str(rses_hostname_mapping)) list_rses_mult = [] # Loop over the RSEs. rse_key = (rse, rse_id) and fill list_rses_mult that contains all RSEs to process with different multiplicity for rse_key in dict_rses: rse_name, rse_id = rse_key # The length of the deletion queue scales inversily with the number of workers # The ceil increase the weight of the RSE with small amount of files to delete if tot_needed_free_space: max_workers = ceil(dict_rses[rse_key][0] / tot_needed_free_space * 1000 / heart_beat['nr_threads']) else: max_workers = 1 list_rses_mult.extend([ (rse_name, rse_id, dict_rses[rse_key][0], dict_rses[rse_key][1]) for _ in range(int(max_workers)) ]) random.shuffle(list_rses_mult) paused_rses = [] for rse_name, rse_id, needed_free_space, max_being_deleted_files in list_rses_mult: result = REGION.get('pause_deletion_%s' % rse_id, expiration_time=120) if result is not NO_VALUE: paused_rses.append(rse_name) logger( logging.DEBUG, 'Not enough replicas to delete on %s during the previous cycle. Deletion paused for a while', rse_name) continue result = REGION.get('temporary_exclude_%s' % rse_id, expiration_time=auto_exclude_timeout) if result is not NO_VALUE: logger( logging.WARNING, 'Too many failed attempts for %s in last cycle. RSE is temporarly excluded.', rse_name) labels = {'rse': rse_name} EXCLUDED_RSE_GAUGE.labels(**labels).set(1) continue labels = {'rse': rse_name} EXCLUDED_RSE_GAUGE.labels(**labels).set(0) percent = 0 if tot_needed_free_space: percent = needed_free_space / tot_needed_free_space * 100 logger( logging.DEBUG, 'Working on %s. Percentage of the total space needed %.2f', rse_name, percent) try: rse_hostname, rse_info = rses_hostname_mapping[rse_id] except KeyError: logger(logging.DEBUG, "Hostname lookup for %s failed.", rse_name) REGION.set('pause_deletion_%s' % rse_id, True) continue rse_hostname_key = '%s,%s' % (rse_id, rse_hostname) payload_cnt = list_payload_counts(executable, older_than=600, hash_executable=None, session=None) # logger(logging.DEBUG, '%s Payload count : %s', prepend_str, str(payload_cnt)) tot_threads_for_hostname = 0 tot_threads_for_rse = 0 for key in payload_cnt: if key and key.find(',') > -1: if key.split(',')[1] == rse_hostname: tot_threads_for_hostname += payload_cnt[key] if key.split(',')[0] == str(rse_id): tot_threads_for_rse += payload_cnt[key] max_deletion_thread = get_max_deletion_threads_by_hostname( rse_hostname) if rse_hostname_key in payload_cnt and tot_threads_for_hostname >= max_deletion_thread: logger( logging.DEBUG, 'Too many deletion threads for %s on RSE %s. Back off', rse_hostname, rse_name) # Might need to reschedule a try on this RSE later in the same cycle continue logger( logging.INFO, 'Nb workers on %s smaller than the limit (current %i vs max %i). Starting new worker on RSE %s', rse_hostname, tot_threads_for_hostname, max_deletion_thread, rse_name) live(executable, hostname, pid, hb_thread, older_than=600, hash_executable=None, payload=rse_hostname_key, session=None) logger(logging.DEBUG, 'Total deletion workers for %s : %i', rse_hostname, tot_threads_for_hostname + 1) # List and mark BEING_DELETED the files to delete del_start_time = time.time() only_delete_obsolete = dict_rses[(rse_name, rse_id)][1] try: with monitor.record_timer_block( 'reaper.list_unlocked_replicas'): if only_delete_obsolete: logger( logging.DEBUG, 'Will run list_and_mark_unlocked_replicas on %s. No space needed, will only delete EPOCH tombstoned replicas', rse_name) replicas = list_and_mark_unlocked_replicas( limit=chunk_size, bytes_=needed_free_space, rse_id=rse_id, delay_seconds=delay_seconds, only_delete_obsolete=only_delete_obsolete, session=None) logger( logging.DEBUG, 'list_and_mark_unlocked_replicas on %s for %s bytes in %s seconds: %s replicas', rse_name, needed_free_space, time.time() - del_start_time, len(replicas)) if len(replicas) < chunk_size: logger( logging.DEBUG, 'Not enough replicas to delete on %s (%s requested vs %s returned). Will skip any new attempts on this RSE until next cycle', rse_name, chunk_size, len(replicas)) REGION.set('pause_deletion_%s' % rse_id, True) except (DatabaseException, IntegrityError, DatabaseError) as error: logger(logging.ERROR, '%s', str(error)) continue except Exception: logger(logging.CRITICAL, 'Exception', exc_info=True) # Physical deletion will take place there try: prot = rsemgr.create_protocol(rse_info, 'delete', scheme=scheme, logger=logger) for file_replicas in chunks(replicas, chunk_size): # Refresh heartbeat live(executable, hostname, pid, hb_thread, older_than=600, hash_executable=None, payload=rse_hostname_key, session=None) del_start_time = time.time() for replica in file_replicas: try: replica['pfn'] = str( list( rsemgr.lfns2pfns( rse_settings=rse_info, lfns=[{ 'scope': replica['scope'].external, 'name': replica['name'], 'path': replica['path'] }], operation='delete', scheme=scheme).values())[0]) except (ReplicaUnAvailable, ReplicaNotFound) as error: logger( logging.WARNING, 'Failed get pfn UNAVAILABLE replica %s:%s on %s with error %s', replica['scope'], replica['name'], rse_name, str(error)) replica['pfn'] = None except Exception: logger(logging.CRITICAL, 'Exception', exc_info=True) deleted_files = delete_from_storage( file_replicas, prot, rse_info, staging_areas, auto_exclude_threshold, logger=logger) logger(logging.INFO, '%i files processed in %s seconds', len(file_replicas), time.time() - del_start_time) # Then finally delete the replicas del_start = time.time() with monitor.record_timer_block( 'reaper.delete_replicas'): delete_replicas(rse_id=rse_id, files=deleted_files) logger( logging.DEBUG, 'delete_replicas successed on %s : %s replicas in %s seconds', rse_name, len(deleted_files), time.time() - del_start) DELETION_COUNTER.inc(len(deleted_files)) except Exception: logger(logging.CRITICAL, 'Exception', exc_info=True) if paused_rses: logger(logging.INFO, 'Deletion paused for a while for following RSEs: %s', ', '.join(paused_rses)) if once: break daemon_sleep(start_time=start_time, sleep_time=sleep_time, graceful_stop=GRACEFUL_STOP, logger=logger) except DatabaseException as error: logger(logging.WARNING, 'Reaper: %s', str(error)) except Exception: logger(logging.CRITICAL, 'Exception', exc_info=True) finally: if once: break die(executable=executable, hostname=hostname, pid=pid, thread=hb_thread) logger(logging.INFO, 'Graceful stop requested') logger(logging.INFO, 'Graceful stop done') return
def reaper(rses, include_rses, exclude_rses, chunk_size=100, once=False, greedy=False, scheme=None, delay_seconds=0, sleep_time=60): """ Main loop to select and delete files. :param rses: List of RSEs the reaper should work against. If empty, it considers all RSEs. :param exclude_rses: RSE expression to exclude RSEs from the Reaper. :param include_rses: RSE expression to include RSEs. :param chunk_size: The size of chunk for deletion. :param once: If True, only runs one iteration of the main loop. :param greedy: If True, delete right away replicas with tombstone. :param scheme: Force the reaper to use a particular protocol, e.g., mock. :param delay_seconds: The delay to query replicas in BEING_DELETED state. :param sleep_time: Time between two cycles. """ hostname = socket.getfqdn() executable = sys.argv[0] pid = os.getpid() hb_thread = threading.current_thread() sanity_check(executable=executable, hostname=hostname) heart_beat = live(executable, hostname, pid, hb_thread) prepend_str = 'Thread [%i/%i] : ' % (heart_beat['assign_thread'], heart_beat['nr_threads']) logging.info('%s Reaper starting', prepend_str) GRACEFUL_STOP.wait(10) # To prevent running on the same partition if all the reapers restart at the same time heart_beat = live(executable, hostname, pid, hb_thread) prepend_str = 'Thread [%i/%i] : ' % (heart_beat['assign_thread'], heart_beat['nr_threads']) logging.info('%s Reaper started', prepend_str) while not GRACEFUL_STOP.is_set(): # Check if there is a Judge Evaluator backlog try: max_evaluator_backlog_count = get('reaper', 'max_evaluator_backlog_count') except ConfigNotFound: max_evaluator_backlog_count = None try: max_evaluator_backlog_duration = get('reaper', 'max_evaluator_backlog_duration') except ConfigNotFound: max_evaluator_backlog_duration = None if max_evaluator_backlog_count or max_evaluator_backlog_duration: backlog = get_evaluation_backlog() if max_evaluator_backlog_count and \ backlog[0] and \ max_evaluator_backlog_duration and \ backlog[1] and \ backlog[0] > max_evaluator_backlog_count and \ backlog[1] < datetime.utcnow() - timedelta(minutes=max_evaluator_backlog_duration): logging.error('%s Reaper: Judge evaluator backlog count and duration hit, stopping operation', prepend_str) GRACEFUL_STOP.wait(30) continue elif max_evaluator_backlog_count and backlog[0] and backlog[0] > max_evaluator_backlog_count: logging.error('%s Reaper: Judge evaluator backlog count hit, stopping operation', prepend_str) GRACEFUL_STOP.wait(30) continue elif max_evaluator_backlog_duration and backlog[1] and backlog[1] < datetime.utcnow() - timedelta(minutes=max_evaluator_backlog_duration): logging.error('%s Reaper: Judge evaluator backlog duration hit, stopping operation', prepend_str) GRACEFUL_STOP.wait(30) continue rses_to_process = get_rses_to_process(rses, include_rses, exclude_rses) if not rses_to_process: logging.error('%s Reaper: No RSEs found. Will sleep for 30 seconds', prepend_str) GRACEFUL_STOP.wait(30) continue start_time = time.time() try: staging_areas = [] dict_rses = {} heart_beat = live(executable, hostname, pid, hb_thread, older_than=3600) prepend_str = 'Thread [%i/%i] : ' % (heart_beat['assign_thread'], heart_beat['nr_threads']) tot_needed_free_space = 0 for rse in rses_to_process: # Check if the RSE is a staging area if rse['staging_area']: staging_areas.append(rse['rse']) # Check if RSE is blacklisted if rse['availability'] % 2 == 0: logging.debug('%s RSE %s is blacklisted for delete', prepend_str, rse['rse']) continue max_being_deleted_files, needed_free_space, used, free, only_delete_obsolete = __check_rse_usage(rse['rse'], rse['id'], prepend_str) # Check if greedy mode if greedy: dict_rses[(rse['rse'], rse['id'])] = [1000000000000, max_being_deleted_files, only_delete_obsolete] tot_needed_free_space += 1000000000000 else: if needed_free_space: dict_rses[(rse['rse'], rse['id'])] = [needed_free_space, max_being_deleted_files, only_delete_obsolete] tot_needed_free_space += needed_free_space else: logging.debug('%s Nothing to delete on %s', prepend_str, rse['rse']) # Ordering the RSEs based on the needed free space sorted_dict_rses = OrderedDict(sorted(dict_rses.items(), key=itemgetter(1), reverse=True)) logging.debug('%s List of RSEs to process ordered by needed space desc : %s', prepend_str, str(sorted_dict_rses)) # Get the mapping between the RSE and the hostname used for deletion. The dictionary has RSE as key and (hostanme, rse_info) as value rses_hostname_mapping = get_rses_to_hostname_mapping() # logging.debug('%s Mapping RSEs to hostnames used for deletion : %s', prepend_str, str(rses_hostname_mapping)) list_rses_mult = [] # Loop over the RSEs. rse_key = (rse, rse_id) and fill list_rses_mult that contains all RSEs to process with different multiplicity for rse_key in dict_rses: rse_name, rse_id = rse_key # The length of the deletion queue scales inversily with the number of workers # The ceil increase the weight of the RSE with small amount of files to delete max_workers = ceil(dict_rses[rse_key][0] / tot_needed_free_space * 1000 / heart_beat['nr_threads']) list_rses_mult.extend([(rse_name, rse_id, dict_rses[rse_key][0], dict_rses[rse_key][1]) for _ in range(int(max_workers))]) random.shuffle(list_rses_mult) for rse_name, rse_id, needed_free_space, max_being_deleted_files in list_rses_mult: result = REGION.get('pause_deletion_%s' % rse_id, expiration_time=120) if result is not NO_VALUE: logging.info('%s Not enough replicas to delete on %s during the previous cycle. Deletion paused for a while', prepend_str, rse_name) continue logging.debug('%s Working on %s. Percentage of the total space needed %.2f', prepend_str, rse_name, needed_free_space / tot_needed_free_space * 100) rse_hostname, rse_info = rses_hostname_mapping[rse_id] rse_hostname_key = '%s,%s' % (rse_id, rse_hostname) payload_cnt = list_payload_counts(executable, older_than=600, hash_executable=None, session=None) # logging.debug('%s Payload count : %s', prepend_str, str(payload_cnt)) tot_threads_for_hostname = 0 tot_threads_for_rse = 0 for key in payload_cnt: if key and key.find(',') > -1: if key.split(',')[1] == rse_hostname: tot_threads_for_hostname += payload_cnt[key] if key.split(',')[0] == str(rse_id): tot_threads_for_rse += payload_cnt[key] max_deletion_thread = get_max_deletion_threads_by_hostname(rse_hostname) if rse_hostname_key in payload_cnt and tot_threads_for_hostname >= max_deletion_thread: logging.debug('%s Too many deletion threads for %s on RSE %s. Back off', prepend_str, rse_hostname, rse_name) # Might need to reschedule a try on this RSE later in the same cycle continue logging.info('%s Nb workers on %s smaller than the limit (current %i vs max %i). Starting new worker on RSE %s', prepend_str, rse_hostname, tot_threads_for_hostname, max_deletion_thread, rse_name) live(executable, hostname, pid, hb_thread, older_than=600, hash_executable=None, payload=rse_hostname_key, session=None) logging.debug('%s Total deletion workers for %s : %i', prepend_str, rse_hostname, tot_threads_for_hostname + 1) # List and mark BEING_DELETED the files to delete del_start_time = time.time() only_delete_obsolete = dict_rses[(rse_name, rse_id)][2] try: with monitor.record_timer_block('reaper.list_unlocked_replicas'): if only_delete_obsolete: logging.debug('%s Will run list_and_mark_unlocked_replicas on %s. No space needed, will only delete EPOCH tombstoned replicas', prepend_str, rse_name) replicas = list_and_mark_unlocked_replicas(limit=chunk_size, bytes=needed_free_space, rse_id=rse_id, delay_seconds=delay_seconds, only_delete_obsolete=only_delete_obsolete, session=None) logging.debug('%s list_and_mark_unlocked_replicas on %s for %s bytes in %s seconds: %s replicas', prepend_str, rse_name, needed_free_space, time.time() - del_start_time, len(replicas)) if len(replicas) < chunk_size: logging.info('%s Not enough replicas to delete on %s (%s requested vs %s returned). Will skip any new attempts on this RSE until next cycle', prepend_str, rse_name, chunk_size, len(replicas)) REGION.set('pause_deletion_%s' % rse_id, True) except (DatabaseException, IntegrityError, DatabaseError) as error: logging.error('%s %s', prepend_str, str(error)) continue except Exception: logging.critical('%s %s', prepend_str, str(traceback.format_exc())) # Physical deletion will take place there try: prot = rsemgr.create_protocol(rse_info, 'delete', scheme=scheme) for file_replicas in chunks(replicas, 100): # Refresh heartbeat live(executable, hostname, pid, hb_thread, older_than=600, hash_executable=None, payload=rse_hostname_key, session=None) del_start_time = time.time() for replica in file_replicas: try: replica['pfn'] = str(rsemgr.lfns2pfns(rse_settings=rse_info, lfns=[{'scope': replica['scope'], 'name': replica['name'], 'path': replica['path']}], operation='delete', scheme=scheme).values()[0]) except (ReplicaUnAvailable, ReplicaNotFound) as error: logging.warning('%s Failed get pfn UNAVAILABLE replica %s:%s on %s with error %s', prepend_str, replica['scope'], replica['name'], rse_name, str(error)) replica['pfn'] = None except Exception: logging.critical('%s %s', prepend_str, str(traceback.format_exc())) deleted_files = delete_from_storage(file_replicas, prot, rse_info, staging_areas, prepend_str) logging.info('%s %i files processed in %s seconds', prepend_str, len(file_replicas), time.time() - del_start_time) # Then finally delete the replicas del_start = time.time() with monitor.record_timer_block('reaper.delete_replicas'): delete_replicas(rse_id=rse_id, files=deleted_files) logging.debug('%s delete_replicas successed on %s : %s replicas in %s seconds', prepend_str, rse_name, len(deleted_files), time.time() - del_start) monitor.record_counter(counters='reaper.deletion.done', delta=len(deleted_files)) DELETION_COUNTER.inc(len(deleted_files)) except Exception as error: logging.critical('%s %s', prepend_str, str(traceback.format_exc())) if once: break tottime = time.time() - start_time if tottime < sleep_time: logging.info('%s Will sleep for %s seconds', prepend_str, sleep_time - tottime) GRACEFUL_STOP.wait(sleep_time - tottime) except DatabaseException as error: logging.warning('%s Reaper: %s', prepend_str, str(error)) except Exception: logging.critical('%s %s', prepend_str, str(traceback.format_exc())) finally: if once: break die(executable=executable, hostname=hostname, pid=pid, thread=hb_thread) logging.info('%s Graceful stop requested', prepend_str) logging.info('%s Graceful stop done', prepend_str) return
def necromancer(thread=0, bulk=5, once=False, sleep_time=60): """ Creates a Necromancer Worker that gets a list of bad replicas for a given hash, identify lost DIDs and for non-lost ones, set the locks and rules for reevaluation. :param thread: Thread number at startup. :param bulk: The number of requests to process. :param once: Run only once. :param sleep_time: Thread sleep time after each chunk of work. """ update_history_threshold = 3600 update_history_time = time.time() executable = 'necromancer' hostname = socket.getfqdn() pid = os.getpid() hb_thread = threading.current_thread() heartbeat.sanity_check(executable=executable, hostname=hostname) while not GRACEFUL_STOP.is_set(): heart_beat = heartbeat.live(executable, hostname, pid, hb_thread) prefix = 'Thread [%i/%i] : ' % (heart_beat['assign_thread'], heart_beat['nr_threads']) logger = formatted_logger(logging.log, prefix + '%s') # Check if there is a Judge Evaluator backlog try: max_evaluator_backlog_count = config_get( 'necromancer', 'max_evaluator_backlog_count') max_evaluator_backlog_count = int(max_evaluator_backlog_count) except (NoOptionError, NoSectionError, RuntimeError, ValueError): max_evaluator_backlog_count = None try: max_evaluator_backlog_duration = config_get( 'necromancer', 'max_evaluator_backlog_duration') max_evaluator_backlog_duration = int( max_evaluator_backlog_duration) except (NoOptionError, NoSectionError, RuntimeError, ValueError): max_evaluator_backlog_duration = None if max_evaluator_backlog_count or max_evaluator_backlog_duration: backlog = get_evaluation_backlog(expiration_time=sleep_time) if max_evaluator_backlog_count and \ backlog[0] and \ max_evaluator_backlog_duration and \ backlog[1] and \ backlog[0] > max_evaluator_backlog_count and \ backlog[1] < datetime.utcnow() - timedelta(minutes=max_evaluator_backlog_duration): logger( logging.ERROR, 'Necromancer: Judge evaluator backlog count and duration hit, stopping operation' ) GRACEFUL_STOP.wait(30) continue elif max_evaluator_backlog_count and backlog[ 0] and backlog[0] > max_evaluator_backlog_count: logger( logging.ERROR, 'Necromancer: Judge evaluator backlog count hit, stopping operation' ) GRACEFUL_STOP.wait(30) continue elif max_evaluator_backlog_duration and backlog[ 1] and backlog[1] < datetime.utcnow() - timedelta( minutes=max_evaluator_backlog_duration): logger( logging.ERROR, 'Necromancer: Judge evaluator backlog duration hit, stopping operation' ) GRACEFUL_STOP.wait(30) continue # Check how many bad replicas are queued try: max_bad_replicas_backlog_count = config_get( 'necromancer', 'max_bad_replicas_backlog_count') max_bad_replicas_backlog_count = int( max_bad_replicas_backlog_count) except (NoOptionError, NoSectionError, RuntimeError, ValueError): max_bad_replicas_backlog_count = None bad_replicas_backlog = get_bad_replicas_backlog() tot_bad_files = sum( [bad_replicas_backlog[key] for key in bad_replicas_backlog]) list_of_rses = list() # If too many replica, call list_bad_replicas with a list of RSEs if max_bad_replicas_backlog_count and tot_bad_files > max_bad_replicas_backlog_count and len( bad_replicas_backlog) > 1: logger( logging.INFO, 'Backlog of bads replica too big. Apply some sharing between different RSEs' ) rses = list() cnt = 0 for key in sorted(bad_replicas_backlog, key=bad_replicas_backlog.get, reverse=False): rses.append({'id': key}) cnt += bad_replicas_backlog[key] if cnt >= bulk: list_of_rses.append(rses) rses = list() cnt = 0 else: list_of_rses.append(None) stime = time.time() replicas = [] try: for rses in list_of_rses: replicas = list_bad_replicas( limit=bulk, thread=heart_beat['assign_thread'], total_threads=heart_beat['nr_threads'], rses=rses) for replica in replicas: scope, name, rse_id, rse = replica['scope'], replica[ 'name'], replica['rse_id'], replica['rse'] logger(logging.INFO, 'Working on %s:%s on %s' % (scope, name, rse)) list_replicas = get_replicas_state(scope=scope, name=name) if ReplicaState.AVAILABLE not in list_replicas and ReplicaState.TEMPORARY_UNAVAILABLE not in list_replicas: logger( logging.INFO, 'File %s:%s has no other available or temporary available replicas, it will be marked as lost' % (scope, name)) try: update_rules_for_lost_replica(scope=scope, name=name, rse_id=rse_id, nowait=True) monitor.record_counter( name='necromancer.badfiles.lostfile') except DatabaseException as error: logger(logging.WARNING, str(error)) else: rep = list_replicas.get(ReplicaState.AVAILABLE, []) unavailable_rep = list_replicas.get( ReplicaState.TEMPORARY_UNAVAILABLE, []) logger( logging.INFO, 'File %s:%s can be recovered. Available sources : %s + Unavailable sources : %s' % (scope, name, str(rep), str(unavailable_rep))) try: update_rules_for_bad_replica(scope=scope, name=name, rse_id=rse_id, nowait=True) monitor.record_counter( name='necromancer.badfiles.recovering') except DatabaseException as error: logger(logging.WARNING, str(error)) logger( logging.INFO, 'It took %s seconds to process %s replicas' % (str(time.time() - stime), str(len(replicas)))) except Exception: exc_type, exc_value, exc_traceback = exc_info() logger( logging.CRITICAL, ''.join(format_exception(exc_type, exc_value, exc_traceback)).strip()) if once: break else: now = time.time() if (now - update_history_time) > update_history_threshold: logger( logging.INFO, 'Last update of history table %s seconds ago. Running update.' % (now - update_history_time)) bad_replicas = list_bad_replicas_history( limit=1000000, thread=heart_beat['assign_thread'], total_threads=heart_beat['nr_threads']) for rse_id in bad_replicas: chunk_size = 1000 nchunk = int(ceil(len(bad_replicas[rse_id]) / chunk_size)) logger(logging.DEBUG, 'Update history for rse_id %s' % (rse_id)) cnt = 0 for chunk in chunks(bad_replicas[rse_id], chunk_size): logger( logging.DEBUG, ' History for rse_id %s : chunk %i/%i' % (rse_id, cnt, nchunk)) cnt += 1 update_bad_replicas_history(chunk, rse_id) logger( logging.INFO, 'History table updated in %s seconds' % (time.time() - now)) update_history_time = time.time() if len(replicas) == bulk: logger( logging.INFO, 'Processed maximum number of replicas according to the bulk size. Restart immediately next cycle' ) else: daemon_sleep(start_time=stime, sleep_time=sleep_time, graceful_stop=GRACEFUL_STOP) logger(logging.INFO, 'Graceful stop requested') heartbeat.die(executable, hostname, pid, hb_thread) logger(logging.INFO, 'Graceful stop done')
def necromancer(thread=0, bulk=5, once=False, sleep_time=60): """ Creates a Necromancer Worker that gets a list of bad replicas for a given hash, identify lost DIDs and for non-lost ones, set the locks and rules for reevaluation. :param thread: Thread number at startup. :param bulk: The number of requests to process. :param once: Run only once. :param sleep_time: Thread sleep time after each chunk of work. """ executable = 'necromancer' hostname = socket.getfqdn() pid = os.getpid() hb_thread = threading.current_thread() heartbeat.sanity_check(executable=executable, hostname=hostname) while not GRACEFUL_STOP.is_set(): stime = time.time() heart_beat = heartbeat.live(executable, hostname, pid, hb_thread) prefix = 'Thread [%i/%i] : ' % (heart_beat['assign_thread'], heart_beat['nr_threads']) logger = formatted_logger(logging.log, prefix + '%s') logger(logging.DEBUG, 'Starting new cycle') # Check if there is a Judge Evaluator backlog try: max_evaluator_backlog_count = config_get('necromancer', 'max_evaluator_backlog_count') max_evaluator_backlog_count = int(max_evaluator_backlog_count) except (NoOptionError, NoSectionError, RuntimeError, ValueError): max_evaluator_backlog_count = None try: max_evaluator_backlog_duration = config_get('necromancer', 'max_evaluator_backlog_duration') max_evaluator_backlog_duration = int(max_evaluator_backlog_duration) except (NoOptionError, NoSectionError, RuntimeError, ValueError): max_evaluator_backlog_duration = None if max_evaluator_backlog_count or max_evaluator_backlog_duration: evaluator_backlog_count, evaluator_backlog_duration = get_evaluation_backlog(expiration_time=sleep_time) if max_evaluator_backlog_count and \ evaluator_backlog_count and \ max_evaluator_backlog_duration and \ evaluator_backlog_duration and \ evaluator_backlog_count > max_evaluator_backlog_count and \ evaluator_backlog_duration < datetime.utcnow() - timedelta(minutes=max_evaluator_backlog_duration): logger(logging.ERROR, 'Necromancer: Judge evaluator backlog count and duration hit, stopping operation') GRACEFUL_STOP.wait(30) continue elif max_evaluator_backlog_count and evaluator_backlog_count and evaluator_backlog_count > max_evaluator_backlog_count: logger(logging.ERROR, 'Necromancer: Judge evaluator backlog count hit, stopping operation') GRACEFUL_STOP.wait(30) continue elif max_evaluator_backlog_duration and evaluator_backlog_duration and evaluator_backlog_duration < datetime.utcnow() - timedelta(minutes=max_evaluator_backlog_duration): logger(logging.ERROR, 'Necromancer: Judge evaluator backlog duration hit, stopping operation') GRACEFUL_STOP.wait(30) continue # Check how many bad replicas are queued try: max_bad_replicas_backlog_count = config_get('necromancer', 'max_bad_replicas_backlog_count') max_bad_replicas_backlog_count = int(max_bad_replicas_backlog_count) except (NoOptionError, NoSectionError, RuntimeError, ValueError): max_bad_replicas_backlog_count = None bad_replicas_backlog = REGION.get('bad_replicas_backlog') if bad_replicas_backlog is NO_VALUE: bad_replicas_backlog = get_bad_replicas_backlog() REGION.set('bad_replicas_backlog', bad_replicas_backlog) tot_bad_files = sum([bad_replicas_backlog[key] for key in bad_replicas_backlog]) list_of_rses = list() # If too many replica, call list_bad_replicas with a list of RSEs if max_bad_replicas_backlog_count and tot_bad_files > max_bad_replicas_backlog_count and len(bad_replicas_backlog) > 1: logger(logging.INFO, 'Backlog of bads replica too big. Apply some sharing between different RSEs') rses = list() cnt = 0 for key in sorted(bad_replicas_backlog, key=bad_replicas_backlog.get, reverse=True): rses.append({'id': key}) cnt += bad_replicas_backlog[key] if cnt >= bulk: list_of_rses.append(rses) rses = list() cnt = 0 else: list_of_rses.append(None) tot_processed = 0 if tot_bad_files == 0: logger(logging.INFO, 'No bad replicas to process.') else: ttime = time.time() replicas = [] try: for rses in list_of_rses: replicas = list_bad_replicas(limit=bulk, thread=heart_beat['assign_thread'], total_threads=heart_beat['nr_threads'], rses=rses) for replica in replicas: scope, name, rse_id, rse = replica['scope'], replica['name'], replica['rse_id'], replica['rse'] logger(logging.INFO, 'Working on %s:%s on %s' % (scope, name, rse)) list_replicas = get_replicas_state(scope=scope, name=name) if ReplicaState.AVAILABLE not in list_replicas and ReplicaState.TEMPORARY_UNAVAILABLE not in list_replicas: logger(logging.INFO, 'File %s:%s has no other available or temporary available replicas, it will be marked as lost' % (scope, name)) try: update_rules_for_lost_replica(scope=scope, name=name, rse_id=rse_id, nowait=True) monitor.record_counter(name='necromancer.badfiles.lostfile') except (DatabaseException, DatabaseError) as error: if re.match('.*ORA-00054.*', error.args[0]) or re.match('.*ORA-00060.*', error.args[0]) or 'ERROR 1205 (HY000)' in error.args[0]: logger(logging.WARNING, 'Lock detected when handling request - skipping: %s', str(error)) else: logger(logging.ERROR, str(error)) else: rep = list_replicas.get(ReplicaState.AVAILABLE, []) unavailable_rep = list_replicas.get(ReplicaState.TEMPORARY_UNAVAILABLE, []) logger(logging.INFO, 'File %s:%s can be recovered. Available sources : %s + Unavailable sources : %s' % (scope, name, str(rep), str(unavailable_rep))) try: update_rules_for_bad_replica(scope=scope, name=name, rse_id=rse_id, nowait=True) monitor.record_counter(name='necromancer.badfiles.recovering') except (DatabaseException, DatabaseError) as error: if re.match('.*ORA-00054.*', error.args[0]) or re.match('.*ORA-00060.*', error.args[0]) or 'ERROR 1205 (HY000)' in error.args[0]: logger(logging.WARNING, 'Lock detected when handling request - skipping: %s', str(error)) else: logger(logging.ERROR, str(error)) tot_processed += len(replicas) logger(logging.INFO, 'It took %s seconds to process %s replicas' % (str(time.time() - ttime), str(len(replicas)))) except Exception: exc_type, exc_value, exc_traceback = exc_info() logger(logging.CRITICAL, ''.join(format_exception(exc_type, exc_value, exc_traceback)).strip()) if once: break elif tot_processed == 0 or tot_bad_files == 0: daemon_sleep(start_time=stime, sleep_time=sleep_time, graceful_stop=GRACEFUL_STOP, logger=logger) logger(logging.INFO, 'Graceful stop requested') heartbeat.die(executable, hostname, pid, hb_thread) logger(logging.INFO, 'Graceful stop done')
def run_once(rses, include_rses, exclude_rses, vos, chunk_size, greedy, scheme, delay_seconds, auto_exclude_threshold, auto_exclude_timeout, heartbeat_handler, oidc_account, oidc_scope, oidc_audience, **_kwargs): must_sleep = True _, total_workers, logger = heartbeat_handler.live() logger(logging.INFO, 'Reaper started') # try to get auto exclude parameters from the config table. Otherwise use CLI parameters. auto_exclude_threshold = config_get('reaper', 'auto_exclude_threshold', default=auto_exclude_threshold, raise_exception=False) auto_exclude_timeout = config_get('reaper', 'auto_exclude_timeout', default=auto_exclude_timeout, raise_exception=False) # Check if there is a Judge Evaluator backlog max_evaluator_backlog_count = config_get('reaper', 'max_evaluator_backlog_count', default=None, raise_exception=False) max_evaluator_backlog_duration = config_get( 'reaper', 'max_evaluator_backlog_duration', default=None, raise_exception=False) if max_evaluator_backlog_count or max_evaluator_backlog_duration: backlog = get_evaluation_backlog() count_is_hit = max_evaluator_backlog_count and backlog[ 0] and backlog[0] > max_evaluator_backlog_count duration_is_hit = max_evaluator_backlog_duration and backlog[ 1] and backlog[1] < datetime.utcnow() - timedelta( minutes=max_evaluator_backlog_duration) if count_is_hit and duration_is_hit: logger( logging.ERROR, 'Reaper: Judge evaluator backlog count and duration hit, stopping operation' ) return must_sleep elif count_is_hit: logger( logging.ERROR, 'Reaper: Judge evaluator backlog count hit, stopping operation' ) return must_sleep elif duration_is_hit: logger( logging.ERROR, 'Reaper: Judge evaluator backlog duration hit, stopping operation' ) return must_sleep rses_to_process = get_rses_to_process(rses, include_rses, exclude_rses, vos) rses_to_process = [ RseData(id_=rse['id'], name=rse['rse'], columns=rse) for rse in rses_to_process ] if not rses_to_process: logger(logging.ERROR, 'Reaper: No RSEs found. Will sleep for 30 seconds') return must_sleep # On big deletion campaigns, we desire to re-iterate fast on RSEs which have a lot of data to delete. # The called function will return the RSEs which have more work remaining. # Call the deletion routine again on this returned subset of RSEs. # Scale the number of allowed iterations with the number of total reaper workers iteration = 0 max_fast_reiterations = int(log2(total_workers)) while rses_to_process and iteration <= max_fast_reiterations: rses_to_process = _run_once( rses_to_process=rses_to_process, chunk_size=chunk_size, greedy=greedy, scheme=scheme, delay_seconds=delay_seconds, auto_exclude_threshold=auto_exclude_threshold, auto_exclude_timeout=auto_exclude_timeout, heartbeat_handler=heartbeat_handler, oidc_account=oidc_account, oidc_scope=oidc_scope, oidc_audience=oidc_audience, ) if rses_to_process and iteration < max_fast_reiterations: logger(logging.INFO, "Will perform fast-reiteration %d/%d with rses: %s", iteration + 1, max_fast_reiterations, [str(rse) for rse in rses_to_process]) iteration += 1 if rses_to_process: # There is still more work to be performed. # Inform the calling context that it must call reaper again (on the full list of rses) must_sleep = False return must_sleep