def test_quarantined_replicas(): """ QUARANTINED REPLICA (CORE): Add, List and Delete quarantined replicas """ if config_get_bool('common', 'multi_vo', raise_exception=False, default=False): vo = {'vo': get_vo()} else: vo = {} rse_id = get_rse_id(rse='MOCK', **vo) real_replicas, dark_replicas = list_quarantined_replicas(rse_id=rse_id, limit=10000) quarantined_replicas = len(real_replicas) + len(dark_replicas) nbreplicas = 5 replicas = [{ 'path': '/path/' + generate_uuid() } for _ in range(nbreplicas)] add_quarantined_replicas(rse_id=rse_id, replicas=replicas) real_replicas, dark_replicas = list_quarantined_replicas(rse_id=rse_id, limit=10000) assert quarantined_replicas + nbreplicas == len(dark_replicas) + len( real_replicas) delete_quarantined_replicas(rse_id=rse_id, replicas=replicas) real_replicas, dark_replicas = list_quarantined_replicas(rse_id=rse_id, limit=10000) assert quarantined_replicas == len(dark_replicas) + len(real_replicas)
def test_quarantined_replicas(): """ QUARANTINED REPLICA (CORE): Add, List and Delete quarantined replicas """ quarantined_replicas = len( list_quarantined_replicas(rse='MOCK', limit=10000)) nbreplicas = 5 replicas = [{ 'path': '/path/' + generate_uuid() } for _ in xrange(nbreplicas)] add_quarantined_replicas(rse='MOCK', replicas=replicas) assert_equal(quarantined_replicas + nbreplicas, len(list_quarantined_replicas(rse='MOCK', limit=10000))) delete_quarantined_replicas(rse='MOCK', replicas=replicas) assert_equal(quarantined_replicas, len(list_quarantined_replicas(rse='MOCK', limit=10000)))
def reaper(rses=[], worker_number=1, total_workers=1, chunk_size=100, once=False, scheme=None): """ Main loop to select and delete files. :param rses: List of RSEs the reaper should work against. If empty, it considers all RSEs. :param worker_number: The worker number. :param total_workers: The total number of workers. :param chunk_size: the size of chunk for deletion. :param once: If True, only runs one iteration of the main loop. :param scheme: Force the reaper to use a particular protocol, e.g., mock. """ logging.info('Starting Dark Reaper %s-%s: Will work on RSEs: %s', worker_number, total_workers, str(rses)) pid = os.getpid() thread = threading.current_thread() hostname = socket.gethostname() executable = ' '.join(sys.argv) hash_executable = hashlib.sha256(sys.argv[0] + ''.join(rses)).hexdigest() sanity_check(executable=None, hostname=hostname) while not GRACEFUL_STOP.is_set(): try: # heartbeat heartbeat = live(executable=executable, hostname=hostname, pid=pid, thread=thread, hash_executable=hash_executable) logging.info( 'Dark Reaper({0[worker_number]}/{0[total_workers]}): Live gives {0[heartbeat]}' .format(locals())) nothing_to_do = True random.shuffle(rses) for rse_id in rses: rse = rse_core.get_rse_name(rse_id=rse_id) replicas = list_quarantined_replicas( rse_id=rse_id, limit=chunk_size, worker_number=worker_number, total_workers=total_workers) rse_info = rsemgr.get_rse_info(rse_id=rse_id) prot = rsemgr.create_protocol(rse_info, 'delete', scheme=scheme) deleted_replicas = [] try: prot.connect() for replica in replicas: nothing_to_do = False try: pfn = str( rsemgr.lfns2pfns(rse_settings=rse_info, lfns=[{ 'scope': replica['scope'].external, 'name': replica['name'], 'path': replica['path'] }], operation='delete', scheme=scheme).values()[0]) logging.info( 'Dark Reaper %s-%s: Deletion ATTEMPT of %s:%s as %s on %s', worker_number, total_workers, replica['scope'], replica['name'], pfn, rse) start = time.time() prot.delete(pfn) duration = time.time() - start logging.info( 'Dark Reaper %s-%s: Deletion SUCCESS of %s:%s as %s on %s in %s seconds', worker_number, total_workers, replica['scope'], replica['name'], pfn, rse, duration) add_message( 'deletion-done', { 'scope': replica['scope'].external, 'name': replica['name'], 'rse': rse, 'rse_id': rse_id, 'file-size': replica.get('bytes') or 0, 'bytes': replica.get('bytes') or 0, 'url': pfn, 'duration': duration, 'protocol': prot.attributes['scheme'] }) deleted_replicas.append(replica) except SourceNotFound: err_msg = 'Dark Reaper %s-%s: Deletion NOTFOUND of %s:%s as %s on %s' % ( worker_number, total_workers, replica['scope'], replica['name'], pfn, rse) logging.warning(err_msg) deleted_replicas.append(replica) except (ServiceUnavailable, RSEAccessDenied, ResourceTemporaryUnavailable) as error: err_msg = 'Dark Reaper %s-%s: Deletion NOACCESS of %s:%s as %s on %s: %s' % ( worker_number, total_workers, replica['scope'], replica['name'], pfn, rse, str(error)) logging.warning(err_msg) add_message( 'deletion-failed', { 'scope': replica['scope'].external, 'name': replica['name'], 'rse': rse, 'rse_id': rse_id, 'file-size': replica['bytes'] or 0, 'bytes': replica['bytes'] or 0, 'url': pfn, 'reason': str(error), 'protocol': prot.attributes['scheme'] }) except: logging.critical(traceback.format_exc()) finally: prot.close() delete_quarantined_replicas(rse_id=rse_id, replicas=deleted_replicas) if once: break if once: break if nothing_to_do: logging.info( 'Dark Reaper %s-%s: Nothing to do. I will sleep for 60s', worker_number, total_workers) time.sleep(60) except DatabaseException as error: logging.warning('Reaper: %s', str(error)) except: logging.critical(traceback.format_exc()) die(executable=executable, hostname=hostname, pid=pid, thread=thread, hash_executable=hash_executable) logging.info('Graceful stop requested') logging.info('Graceful stop done') return
def reaper(rses, chunk_size=100, once=False, scheme=None, sleep_time=300): """ Main loop to select and delete files. :param rses: List of RSEs the reaper should work against. :param chunk_size: the size of chunk for deletion. :param once: If True, only runs one iteration of the main loop. :param scheme: Force the reaper to use a particular protocol, e.g., mock. :param sleep_time: Thread sleep time after each chunk of work. """ pid = os.getpid() thread = threading.current_thread() hostname = socket.gethostname() executable = ' '.join(sys.argv) hash_executable = hashlib.sha256( (sys.argv[0] + ''.join(rses)).encode()).hexdigest() sanity_check(executable=None, hostname=hostname) # heartbeat heartbeat = live(executable=executable, hostname=hostname, pid=pid, thread=thread, hash_executable=hash_executable) prepend_str = 'dark-reaper [%i/%i] : ' % (heartbeat['assign_thread'], heartbeat['nr_threads']) logger = formatted_logger(logging.log, prepend_str + '%s') logger(logging.INFO, 'Starting Dark Reaper on RSEs: %s', ', '.join(rses)) if not once: logger(logging.INFO, 'Waiting for heartbeat synchonization') GRACEFUL_STOP.wait( 10 ) # To prevent running on the same partition if all the reapers restart at the same time while not GRACEFUL_STOP.is_set(): try: heartbeat = live(executable=executable, hostname=hostname, pid=pid, thread=thread, hash_executable=hash_executable) prepend_str = 'dark-reaper [%i/%i] : ' % ( heartbeat['assign_thread'], heartbeat['nr_threads']) logger = formatted_logger(logging.log, prepend_str + '%s') logger(logging.INFO, 'Live gives {0[heartbeat]}'.format(locals())) nothing_to_do = True start_time = time.time() rses_to_process = list( set(rses) & set(list_rses_with_quarantined_replicas())) random.shuffle(rses_to_process) for rse_id in rses_to_process: # The following query returns the list of real replicas (deleted_replicas) and list of dark replicas (dark_replicas) # Real replicas can be directly removed from the quarantine table deleted_replicas, dark_replicas = list_quarantined_replicas( rse_id=rse_id, limit=chunk_size, worker_number=heartbeat['assign_thread'], total_workers=heartbeat['nr_threads']) rse_info = rsemgr.get_rse_info(rse_id=rse_id) rse = rse_info['rse'] prot = rsemgr.create_protocol(rse_info, 'delete', scheme=scheme) heartbeat = live(executable=executable, hostname=hostname, pid=pid, thread=thread, hash_executable=hash_executable) prepend_str = 'dark-reaper [%i/%i] : ' % ( heartbeat['assign_thread'], heartbeat['nr_threads']) logger = formatted_logger(logging.log, prepend_str + '%s') try: prot.connect() for replica in dark_replicas: nothing_to_do = False scope = '' if replica['scope']: scope = replica['scope'].external try: pfn = str( list( rsemgr.lfns2pfns( rse_settings=rse_info, lfns=[{ 'scope': scope, 'name': replica['name'], 'path': replica['path'] }], operation='delete', scheme=scheme).values())[0]) logger(logging.INFO, 'Deletion ATTEMPT of %s:%s as %s on %s', scope, replica['name'], pfn, rse) start = time.time() prot.delete(pfn) duration = time.time() - start logger( logging.INFO, 'Deletion SUCCESS of %s:%s as %s on %s in %s seconds', scope, replica['name'], pfn, rse, duration) payload = { 'scope': scope, 'name': replica['name'], 'rse': rse, 'rse_id': rse_id, 'file-size': replica.get('bytes') or 0, 'bytes': replica.get('bytes') or 0, 'url': pfn, 'duration': duration, 'protocol': prot.attributes['scheme'] } if replica['scope'].vo != 'def': payload['vo'] = replica['scope'].vo add_message('deletion-done', payload) deleted_replicas.append(replica) except SourceNotFound: err_msg = ( 'Deletion NOTFOUND of %s:%s as %s on %s' % (scope, replica['name'], pfn, rse)) logger(logging.WARNING, err_msg) deleted_replicas.append(replica) except (ServiceUnavailable, RSEAccessDenied, ResourceTemporaryUnavailable) as error: err_msg = ( 'Deletion NOACCESS of %s:%s as %s on %s: %s' % (scope, replica['name'], pfn, rse, str(error))) logger(logging.WARNING, err_msg) payload = { 'scope': scope, 'name': replica['name'], 'rse': rse, 'rse_id': rse_id, 'file-size': replica['bytes'] or 0, 'bytes': replica['bytes'] or 0, 'url': pfn, 'reason': str(error), 'protocol': prot.attributes['scheme'] } if replica['scope'].vo != 'def': payload['vo'] = replica['scope'].vo add_message('deletion-failed', payload) except Exception: logging.critical(traceback.format_exc()) finally: prot.close() delete_quarantined_replicas(rse_id=rse_id, replicas=deleted_replicas) if once: break if once: break if nothing_to_do: logger(logging.INFO, 'Nothing to do') daemon_sleep(start_time=start_time, sleep_time=sleep_time, graceful_stop=GRACEFUL_STOP) except DatabaseException as error: logging.warning('Reaper: %s', str(error)) except Exception: logging.critical(traceback.format_exc()) die(executable=executable, hostname=hostname, pid=pid, thread=thread, hash_executable=hash_executable) logging.info('Graceful stop requested') logging.info('Graceful stop done') return