def test_judge_evaluate_detach_datasetlock(self): """ JUDGE EVALUATOR: Test if the a datasetlock is detached correctly when removing a dataset from a container""" re_evaluator(once=True) scope = 'mock' files = create_files(3, scope, self.rse1, bytes=100) dataset = 'dataset_' + str(uuid()) add_did(scope, dataset, DIDType.from_sym('DATASET'), 'jdoe') attach_dids(scope, dataset, files, 'jdoe') container = 'container_' + str(uuid()) add_did(scope, container, DIDType.from_sym('CONTAINER'), 'jdoe') attach_dids(scope, container, [{'scope': scope, 'name': dataset}], 'jdoe') # Add a rule to the Container add_rule(dids=[{'scope': scope, 'name': container}], account='jdoe', copies=1, rse_expression=self.rse1, grouping='DATASET', weight=None, lifetime=None, locked=False, subscription_id=None) # Check if the datasetlock is there locks = [ds_lock for ds_lock in get_dataset_locks(scope=scope, name=dataset)] assert(len(locks) > 0) detach_dids(scope, container, [{'scope': scope, 'name': dataset}]) # Fake judge re_evaluator(once=True) locks = [ds_lock for ds_lock in get_dataset_locks(scope=scope, name=dataset)] assert(len(locks) == 0)
def test_judge_evaluate_detach_datasetlock(self): """ JUDGE EVALUATOR: Test if the a datasetlock is detached correctly when removing a dataset from a container""" re_evaluator(once=True) scope = InternalScope('mock', **self.vo) files = create_files(3, scope, self.rse1_id, bytes=100) dataset = 'dataset_' + str(uuid()) add_did(scope, dataset, DIDType.from_sym('DATASET'), self.jdoe) attach_dids(scope, dataset, files, self.jdoe) container = 'container_' + str(uuid()) add_did(scope, container, DIDType.from_sym('CONTAINER'), self.jdoe) attach_dids(scope, container, [{'scope': scope, 'name': dataset}], self.jdoe) # Add a rule to the Container add_rule(dids=[{'scope': scope, 'name': container}], account=self.jdoe, copies=1, rse_expression=self.rse1, grouping='DATASET', weight=None, lifetime=None, locked=False, subscription_id=None) # Check if the datasetlock is there locks = [ds_lock for ds_lock in get_dataset_locks(scope=scope, name=dataset)] assert(len(locks) > 0) detach_dids(scope, container, [{'scope': scope, 'name': dataset}]) # Fake judge re_evaluator(once=True) locks = [ds_lock for ds_lock in get_dataset_locks(scope=scope, name=dataset)] assert(len(locks) == 0)
def get_dataset_locks(scope, name): """ Get the dataset locks of a dataset. :param scope: Scope of the dataset. :param name: Name of the dataset. :return: List of dicts {'rse_id': ..., 'state': ...} """ return lock.get_dataset_locks(scope=scope, name=name)
def get_dataset_locks(scope, name, vo='def'): """ Get the dataset locks of a dataset. :param scope: Scope of the dataset. :param name: Name of the dataset. :param vo: The VO to act on. :return: List of dicts {'rse_id': ..., 'state': ...} """ scope = InternalScope(scope, vo=vo) locks = lock.get_dataset_locks(scope=scope, name=name) for l in locks: yield api_update_return_dict(l)
def test_judge_add_dataset_to_container(self): """ JUDGE EVALUATOR: Test the judge when adding dataset to container""" scope = InternalScope('mock') files = create_files(3, scope, self.rse1_id) dataset = 'dataset_' + str(uuid()) add_did(scope, dataset, DIDType.from_sym('DATASET'), self.jdoe) attach_dids(scope, dataset, files, self.jdoe) parent_container = 'dataset_' + str(uuid()) add_did(scope, parent_container, DIDType.from_sym('CONTAINER'), self.jdoe) # Add a first rule to the DS add_rule(dids=[{ 'scope': scope, 'name': parent_container }], account=self.jdoe, copies=2, rse_expression=self.T1, grouping='DATASET', weight=None, lifetime=None, locked=False, subscription_id=None) attach_dids(scope, parent_container, [{ 'scope': scope, 'name': dataset }], self.jdoe) # Fake judge re_evaluator(once=True) # Check if the Locks are created properly for file in files: assert (len( get_replica_locks(scope=file['scope'], name=file['name'])) == 2) # Check if the DatasetLocks are created properly dataset_locks = [ lock for lock in get_dataset_locks(scope=scope, name=dataset) ] assert (len(dataset_locks) == 2)
def test_judge_add_dataset_to_container(self): """ JUDGE EVALUATOR: Test the judge when adding dataset to container""" scope = 'mock' files = create_files(3, scope, self.rse1) dataset = 'dataset_' + str(uuid()) add_did(scope, dataset, DIDType.from_sym('DATASET'), 'jdoe') attach_dids(scope, dataset, files, 'jdoe') parent_container = 'dataset_' + str(uuid()) add_did(scope, parent_container, DIDType.from_sym('CONTAINER'), 'jdoe') # Add a first rule to the DS add_rule(dids=[{'scope': scope, 'name': parent_container}], account='jdoe', copies=2, rse_expression=self.T1, grouping='DATASET', weight=None, lifetime=None, locked=False, subscription_id=None) attach_dids(scope, parent_container, [{'scope': scope, 'name': dataset}], 'jdoe') # Fake judge re_evaluator(once=True) # Check if the Locks are created properly for file in files: assert(len(get_replica_locks(scope=file['scope'], name=file['name'])) == 2) # Check if the DatasetLocks are created properly dataset_locks = [lock for lock in get_dataset_locks(scope=scope, name=dataset)] assert(len(dataset_locks) == 2)
def test_add_rule_dataset_dataset(self): """ REPLICATION RULE (CORE): Add a replication rule on a dataset, DATASET Grouping""" scope = 'mock' files = create_files(3, scope, self.rse1) dataset = 'dataset_' + str(uuid()) add_did(scope, dataset, DIDType.from_sym('DATASET'), 'jdoe') attach_dids(scope, dataset, files, 'jdoe') add_rule(dids=[{'scope': scope, 'name': dataset}], account='jdoe', copies=2, rse_expression=self.T1, grouping='DATASET', weight=None, lifetime=None, locked=False, subscription_id=None) # Check if the Locks are created properly t1 = set([self.rse1_id, self.rse3_id, self.rse5_id]) first_locks = None for file in files: if first_locks is None: first_locks = set([lock['rse_id'] for lock in get_replica_locks(scope=file['scope'], name=file['name'])]) rse_locks = set([lock['rse_id'] for lock in get_replica_locks(scope=file['scope'], name=file['name'])]) assert(len(t1.intersection(rse_locks)) == 2) assert(len(first_locks.intersection(rse_locks)) == 2) # Check if the DatasetLocks are created properly dataset_locks = [lock for lock in get_dataset_locks(scope=scope, name=dataset)] assert(len(t1.intersection(set([lock['rse_id'] for lock in dataset_locks]))) == 2) assert(len(first_locks.intersection(set([lock['rse_id'] for lock in dataset_locks]))) == 2)
def rebalance_rule(parent_rule_id, activity, rse_expression, priority, source_replica_expression=None, comment=None): """ Rebalance a replication rule to a new RSE :param parent_rule_id: Replication rule to be rebalanced. :param activity: Activity to be used for the rebalancing. :param rse_expression: RSE expression of the new rule. :param priority: Priority of the newly created rule. :param source_replica_expression: Source replica expression of the new rule. :param comment: Comment to set on the new rules. :returns: The new child rule id. """ parent_rule = get_rule(rule_id=parent_rule_id) if parent_rule['expires_at'] is None: lifetime = None else: lifetime = (parent_rule['expires_at'] - datetime.utcnow()).days * 24 * 3600 + ( parent_rule['expires_at'] - datetime.utcnow()).seconds if parent_rule['grouping'] == RuleGrouping.ALL: grouping = 'ALL' elif parent_rule['grouping'] == RuleGrouping.NONE: grouping = 'NONE' else: grouping = 'DATASET' # check if concurrent replica at target rse does not exist concurrent_replica = False try: for lock in get_dataset_locks(parent_rule['scope'], parent_rule['name']): if lock['rse'] == rse_expression: concurrent_replica = True except Exception as error: concurrent_replica = True print 'Exception: get_dataset_locks not feasible for %s %s:' % ( parent_rule['scope'], parent_rule['name']) raise error if concurrent_replica: return 'Concurrent replica exists at target rse!' print concurrent_replica child_rule = add_rule( dids=[{ 'scope': parent_rule['scope'], 'name': parent_rule['name'] }], account=parent_rule['account'], copies=parent_rule['copies'], rse_expression=rse_expression, grouping=grouping, weight=parent_rule['weight'], lifetime=lifetime, locked=parent_rule['locked'], subscription_id=parent_rule['subscription_id'], source_replica_expression=source_replica_expression, activity=activity, notify=parent_rule['notification'], purge_replicas=parent_rule['purge_replicas'], ignore_availability=False, comment=parent_rule['comments'] if not comment else comment, ask_approval=False, asynchronous=False, priority=priority)[0] update_rule(rule_id=parent_rule_id, options={ 'child_rule_id': child_rule, 'lifetime': 0 }) return child_rule
def rebalance_rse(rse, max_bytes=1E9, max_files=None, dry_run=False, exclude_expression=None, comment=None, force_expression=None, mode=None, priority=3, source_replica_expression=None, session=None): """ Rebalance data from an RSE :param rse: RSE to rebalance data from. :param max_bytes: Maximum amount of bytes to rebalance. :param max_files: Maximum amount of files to rebalance. :param dry_run: Only run in dry-run mode. :param exclude_expression: Exclude this rse_expression from being target_rses. :param comment: Comment to set on the new rules. :param force_expression: Force a specific rse_expression as target. :param mode: BB8 mode to execute (None=normal, 'decomission'=Decomission mode) :param priority: Priority of the new created rules. :param source_replica_expression: Source replica expression of the new created rules. :param session: The database session. :returns: List of rebalanced datasets. """ rebalanced_bytes = 0 rebalanced_files = 0 rebalanced_datasets = [] rse_attributes = list_rse_attributes(rse=rse, session=session) print '***************************' print 'BB8 - Execution Summary' print 'Mode: %s' % ('STANDARD' if mode is None else mode.upper()) print 'Dry Run: %s' % (dry_run) print '***************************' print 'scope:name rule_id bytes(Gb) target_rse child_rule_id' for scope, name, rule_id, rse_expression, subscription_id, bytes, length in list_rebalance_rule_candidates( rse=rse, mode=mode): if force_expression is not None and subscription_id is not None: continue if rebalanced_bytes + bytes > max_bytes: continue if max_files: if rebalanced_files + length > max_files: continue try: other_rses = [ r['rse_id'] for r in get_dataset_locks(scope, name, session=session) ] # Select the target RSE for this rule try: target_rse_exp = select_target_rse( current_rse=rse, rse_expression=rse_expression, subscription_id=subscription_id, rse_attributes=rse_attributes, other_rses=other_rses, exclude_expression=exclude_expression, force_expression=force_expression, session=session) # Rebalance this rule if not dry_run: child_rule_id = rebalance_rule( parent_rule_id=rule_id, activity='Data rebalancing', rse_expression=target_rse_exp, priority=priority, source_replica_expression=source_replica_expression, comment=comment) else: child_rule_id = '' except (InsufficientTargetRSEs, DuplicateRule, RuleNotFound, InsufficientAccountLimit): continue print '%s:%s %s %d %s %s' % (scope, name, str(rule_id), int(bytes / 1E9), target_rse_exp, child_rule_id) if 'Concurrent' in str(child_rule_id): print str(child_rule_id) continue rebalanced_bytes += bytes rebalanced_files += length rebalanced_datasets.append( (scope, name, bytes, length, target_rse_exp, rule_id, child_rule_id)) except Exception as error: print 'Exception %s occured while rebalancing %s:%s, rule_id: %s!' % ( str(error), scope, name, str(rule_id)) raise error print 'BB8 is rebalancing %d Gb of data (%d rules)' % (int( rebalanced_bytes / 1E9), len(rebalanced_datasets)) return rebalanced_datasets
def atropos(thread, bulk, date_check, dry_run=True, grace_period=86400, once=True, unlock=False, spread_period=0, purge_replicas=False): """ Creates an Atropos Worker that gets a list of rules which have an eol_at expired and delete them. :param thread: Thread number at startup. :param bulk: The number of requests to process. :param grace_period: The grace_period for the rules. :param once: Run only once. """ sleep_time = 60 executable = 'atropos' hostname = socket.getfqdn() pid = os.getpid() hb_thread = threading.current_thread() heartbeat.sanity_check(executable=executable, hostname=hostname) now = datetime.datetime.now() hb = heartbeat.live(executable, hostname, pid, hb_thread) time.sleep(10) hb = heartbeat.live(executable, hostname, pid, hb_thread) prepend_str = 'Thread [%i/%i] : ' % (hb['assign_thread'], hb['nr_threads']) logging.debug(prepend_str + 'Starting worker') summary = {} lifetime_exceptions = {} rand = random.Random(hb['assign_thread']) for excep in rucio.core.lifetime_exception.list_exceptions( exception_id=None, states=[ LifetimeExceptionsState.APPROVED, ], session=None): key = '{}:{}'.format(excep['scope'].internal, excep['name']) if key not in lifetime_exceptions: lifetime_exceptions[key] = excep['expires_at'] elif lifetime_exceptions[key] < excep['expires_at']: lifetime_exceptions[key] = excep['expires_at'] logging.debug(prepend_str + '%s active exceptions' % len(lifetime_exceptions)) if not dry_run and date_check > now: logging.error( prepend_str + 'Atropos cannot run in non-dry-run mode for date in the future') else: while not GRACEFUL_STOP.is_set(): hb = heartbeat.live(executable, hostname, pid, hb_thread) prepend_str = 'Thread [%i/%i] : ' % (hb['assign_thread'], hb['nr_threads']) stime = time.time() try: rules = get_rules_beyond_eol(date_check, thread, hb['nr_threads'], session=None) logging.info(prepend_str + '%s rules to process' % (len(rules))) for rule_idx, rule in enumerate(rules, start=1): did = '%s:%s' % (rule.scope, rule.name) did_key = '{}:{}'.format(rule.scope.internal, rule.name) logging.debug(prepend_str + 'Working on rule %s on DID %s on %s' % (rule.id, did, rule.rse_expression)) if (rule_idx % 1000) == 0: logging.info(prepend_str + '%s/%s rules processed' % (rule_idx, len(rules))) # We compute the expected eol_at try: rses = parse_expression(rule.rse_expression, filter={'vo': rule.account.vo}) except InvalidRSEExpression: logging.warning( prepend_str + 'Rule %s has an RSE expression that results in an empty set: %s' % (rule.id, rule.rse_expression)) continue eol_at = rucio.core.lifetime_exception.define_eol( rule.scope, rule.name, rses) if eol_at != rule.eol_at: logging.warning( prepend_str + 'The computed eol %s differs from the one recorded %s for rule %s on %s at %s' % (eol_at, rule.eol_at, rule.id, did, rule.rse_expression)) try: update_rule(rule.id, options={'eol_at': eol_at}) except RuleNotFound: logging.warning(prepend_str + 'Cannot find rule %s on DID %s' % (rule.id, did)) continue # Check the exceptions if did_key in lifetime_exceptions: if eol_at > lifetime_exceptions[did_key]: logging.info( prepend_str + 'Rule %s on DID %s on %s has longer expiration date than the one requested : %s' % (rule.id, did, rule.rse_expression, lifetime_exceptions[did_key])) else: # If eol_at < requested extension, update eol_at logging.info( prepend_str + 'Updating rule %s on DID %s on %s according to the exception till %s' % (rule.id, did, rule.rse_expression, lifetime_exceptions[did_key])) eol_at = lifetime_exceptions[did_key] try: update_rule(rule.id, options={ 'eol_at': lifetime_exceptions[did_key] }) except RuleNotFound: logging.warning( prepend_str + 'Cannot find rule %s on DID %s' % (rule.id, did)) continue # Now check that the new eol_at is expired if eol_at and eol_at < date_check: no_locks = True for lock in get_dataset_locks(rule.scope, rule.name): if lock['rule_id'] == rule[4]: no_locks = False if lock['rse_id'] not in summary: summary[lock['rse_id']] = {} if did_key not in summary[lock['rse_id']]: summary[lock['rse_id']][did_key] = { 'length': lock['length'] or 0, 'bytes': lock['bytes'] or 0 } if no_locks: logging.warning( prepend_str + 'Cannot find a lock for rule %s on DID %s' % (rule.id, did)) if not dry_run: lifetime = grace_period + rand.randrange( spread_period + 1) logging.info( prepend_str + 'Setting %s seconds lifetime for rule %s' % (lifetime, rule.id)) options = {'lifetime': lifetime} if purge_replicas: options['purge_replicas'] = True if rule.locked and unlock: logging.info(prepend_str + 'Unlocking rule %s', rule.id) options['locked'] = False try: update_rule(rule.id, options=options) except RuleNotFound: logging.warning( prepend_str + 'Cannot find rule %s on DID %s' % (rule.id, did)) continue except Exception: exc_type, exc_value, exc_traceback = exc_info() logging.critical(''.join( format_exception(exc_type, exc_value, exc_traceback)).strip()) for rse_id in summary: tot_size, tot_files, tot_datasets = 0, 0, 0 for did in summary[rse_id]: tot_datasets += 1 tot_files += summary[rse_id][did].get('length', 0) tot_size += summary[rse_id][did].get('bytes', 0) vo = get_rse_vo(rse_id=rse_id) logging.info( prepend_str + 'For RSE %s %s %s datasets will be deleted representing %s files and %s bytes' % (get_rse_name(rse_id=rse_id), '' if vo == 'def' else 'on VO ' + vo, tot_datasets, tot_files, tot_size)) if once: break else: tottime = time.time() - stime if tottime < sleep_time: logging.info(prepend_str + 'Will sleep for %s seconds' % (str(sleep_time - tottime))) time.sleep(sleep_time - tottime) continue logging.info(prepend_str + 'Graceful stop requested') heartbeat.die(executable, hostname, pid, hb_thread) logging.info(prepend_str + 'Graceful stop done')
def rebalance_rse(rse_id, max_bytes=1E9, max_files=None, dry_run=False, exclude_expression=None, comment=None, force_expression=None, mode=None, priority=3, source_replica_expression='*\\bb8-enabled=false', session=None, logger=logging.log): """ Rebalance data from an RSE :param rse_id: RSE to rebalance data from. :param max_bytes: Maximum amount of bytes to rebalance. :param max_files: Maximum amount of files to rebalance. :param dry_run: Only run in dry-run mode. :param exclude_expression: Exclude this rse_expression from being target_rses. :param comment: Comment to set on the new rules. :param force_expression: Force a specific rse_expression as target. :param mode: BB8 mode to execute (None=normal, 'decomission'=Decomission mode) :param priority: Priority of the new created rules. :param source_replica_expression: Source replica expression of the new created rules. :param session: The database session. :param logger: Logger. :returns: List of rebalanced datasets. """ rebalanced_bytes = 0 rebalanced_files = 0 rebalanced_datasets = [] rse_attributes = list_rse_attributes(rse_id=rse_id, session=session) src_rse = get_rse_name(rse_id=rse_id) logger(logging.INFO, '***************************') logger(logging.INFO, 'BB8 - Execution Summary') logger(logging.INFO, 'Mode: %s' % ('STANDARD' if mode is None else mode.upper())) logger(logging.INFO, 'Dry Run: %s' % (dry_run)) logger(logging.INFO, '***************************') for scope, name, rule_id, rse_expression, subscription_id, bytes_, length, fsize in list_rebalance_rule_candidates( rse_id=rse_id, mode=mode): if force_expression is not None and subscription_id is not None: continue if rebalanced_bytes + bytes_ > max_bytes: continue if max_files: if rebalanced_files + length > max_files: continue try: rule = get_rule(rule_id=rule_id) other_rses = [ r['rse_id'] for r in get_dataset_locks(scope, name, session=session) ] # Select the target RSE for this rule try: target_rse_exp = select_target_rse( parent_rule=rule, current_rse_id=rse_id, rse_expression=rse_expression, subscription_id=subscription_id, rse_attributes=rse_attributes, other_rses=other_rses, exclude_expression=exclude_expression, force_expression=force_expression, session=session) # Rebalance this rule if not dry_run: child_rule_id = rebalance_rule( parent_rule=rule, activity='Data rebalancing', rse_expression=target_rse_exp, priority=priority, source_replica_expression=source_replica_expression, comment=comment) else: child_rule_id = '' except (InsufficientTargetRSEs, DuplicateRule, RuleNotFound, InsufficientAccountLimit) as err: logger(logging.ERROR, str(err)) continue if child_rule_id is None: logger( logging.WARNING, 'A rule for %s:%s already exists on %s. It cannot be rebalanced', scope, name, target_rse_exp) continue logger( logging.INFO, 'Rebalancing %s:%s rule %s (%f GB) from %s to %s. New rule %s', scope, name, str(rule_id), bytes_ / 1E9, rule['rse_expression'], target_rse_exp, child_rule_id) rebalanced_bytes += bytes_ rebalanced_files += length rebalanced_datasets.append( (scope, name, bytes_, length, target_rse_exp, rule_id, child_rule_id)) except Exception as error: logger( logging.ERROR, 'Exception %s occured while rebalancing %s:%s, rule_id: %s!', str(error), scope, name, str(rule_id)) logger(logging.INFO, 'BB8 is rebalancing %d GB of data (%d rules) from %s', rebalanced_bytes / 1E9, len(rebalanced_datasets), src_rse) return rebalanced_datasets
def atropos(thread, bulk, date_check, dry_run=True, grace_period=86400, once=True): """ Creates an Atropos Worker that gets a list of rules which have an eol_at expired and delete them. :param thread: Thread number at startup. :param bulk: The number of requests to process. :param grace_period: The grace_period for the rules. :param once: Run only once. """ sleep_time = 60 executable = ' '.join(argv) hostname = socket.getfqdn() pid = os.getpid() hb_thread = threading.current_thread() heartbeat.sanity_check(executable=executable, hostname=hostname) now = datetime.datetime.now() hb = heartbeat.live(executable, hostname, pid, hb_thread) summary = {} lifetime_exceptions = get_lifetime_exceptions() prepend_str = 'Thread [%i/%i] : ' % (hb['assign_thread'] + 1, hb['nr_threads']) if not dry_run and date_check > now: logging.error( prepend_str + 'Atropos cannot run in non-dry-run mode for date in the future') else: while not graceful_stop.is_set(): hb = heartbeat.live(executable, hostname, pid, hb_thread) prepend_str = 'Thread [%i/%i] : ' % (hb['assign_thread'] + 1, hb['nr_threads']) stime = time.time() try: rules = get_rules_beyond_eol(date_check, thread, hb['nr_threads'] - 1) logging.info(prepend_str + '%s rules to process' % (len(rules))) rule_idx = 0 for rule in rules: rule_idx += 1 logging.debug( prepend_str + 'Working on rule %s on DID %s:%s on %s' % (rule.id, rule.scope, rule.name, rule.rse_expression)) if (rule_idx % 1000) == 0: logging.info(prepend_str + '%s/%s rules processed' % (rule_idx, len(rules))) # We compute the expended eol_at rses = parse_expression(rule.rse_expression) eol_at = define_eol(rule.scope, rule.name, rses) # Check the exceptions if rule.name in lifetime_exceptions: if rule.eol_at > lifetime_exceptions[rule.name]: logging.info( prepend_str + 'Rule %s on DID %s:%s on %s expired. Extension requested till %s' % (rule.id, rule.scope, rule.name, rule.rse_expression, lifetime_exceptions[rule.name])) else: # If eol_at < requested extension, update eol_at logging.info( prepend_str + 'Updating rule %s on DID %s:%s on %s according to the exception till %s' % (rule.id, rule.scope, rule.name, rule.rse_expression, lifetime_exceptions[rule.name])) try: update_rule(rule.id, options={ 'eol_at': lifetime_exceptions[rule.name] }) except RuleNotFound: logging.warning( prepend_str + 'Cannot find rule %s on DID %s:%s' % (rule.id, rule.scope, rule.name)) elif eol_at != rule.eol_at: logging.warning( prepend_str + 'The computed eol %s differs from the one recorded %s for rule %s on %s:%s at %s' % (eol_at, rule.eol_at, rule.id, rule.scope, rule.name, rule.rse_expression)) try: update_rule(rule.id, options={'eol_at': eol_at}) except RuleNotFound: logging.warning( prepend_str + 'Cannot find rule %s on DID %s:%s' % (rule.id, rule.scope, rule.name)) no_locks = True for lock in get_dataset_locks(rule.scope, rule.name): if lock['rule_id'] == rule[4]: no_locks = False if lock['rse'] not in summary: summary[lock['rse']] = {} if '%s:%s' % (rule.scope, rule.name ) not in summary[lock['rse']]: summary[lock['rse']]['%s:%s' % (rule.scope, rule.name)] = { 'length': lock['length'] or 0, 'bytes': lock['bytes'] or 0 } if no_locks: logging.warning( prepend_str + 'Cannot find a lock for rule %s on DID %s:%s' % (rule.id, rule.scope, rule.name)) if not dry_run: logging.info( prepend_str + 'Setting %s seconds lifetime for rule %s' % (grace_period, rule.id)) try: update_rule(rule.id, options={'lifetime': grace_period}) except RuleNotFound: logging.warning( prepend_str + 'Cannot find rule %s on DID %s:%s' % (rule.id, rule.scope, rule.name)) except Exception: exc_type, exc_value, exc_traceback = exc_info() logging.critical(''.join( format_exception(exc_type, exc_value, exc_traceback)).strip()) for rse in summary: tot_size, tot_files, tot_datasets = 0, 0, 0 for did in summary[rse]: tot_datasets += 1 tot_files += summary[rse][did].get('length', 0) tot_size += summary[rse][did].get('bytes', 0) logging.info( prepend_str + 'For RSE %s %s datasets will be deleted representing %s files and %s bytes' % (rse, tot_datasets, tot_files, tot_size)) if once: break else: tottime = time.time() - stime if tottime < sleep_time: logging.info(prepend_str + 'Will sleep for %s seconds' % (str(sleep_time - tottime))) time.sleep(sleep_time - tottime) continue logging.info(prepend_str + 'Graceful stop requested') heartbeat.die(executable, hostname, pid, hb_thread) logging.info(prepend_str + 'Graceful stop done')
def rebalance_rule(parent_rule, activity, rse_expression, priority, source_replica_expression='*\\bb8-enabled=false', comment=None): """ Rebalance a replication rule to a new RSE :param parent_rule: Replication rule to be rebalanced. :param activity: Activity to be used for the rebalancing. :param rse_expression: RSE expression of the new rule. :param priority: Priority of the newly created rule. :param source_replica_expression: Source replica expression of the new rule. :param comment: Comment to set on the new rules. :returns: The new child rule id. """ if parent_rule['expires_at'] is None: lifetime = None else: lifetime = (parent_rule['expires_at'] - datetime.utcnow()).days * 24 * 3600 + ( parent_rule['expires_at'] - datetime.utcnow()).seconds if parent_rule['grouping'] == RuleGrouping.ALL: grouping = 'ALL' elif parent_rule['grouping'] == RuleGrouping.NONE: grouping = 'NONE' else: grouping = 'DATASET' # ensure that expressions are for correct vo rule_vo = parent_rule['scope'].vo if parent_rule['scope'].vo != 'def': source_replica_expression = 'vo={}&({})'.format( rule_vo, source_replica_expression) rse_expression = 'vo={}&({})'.format(rule_vo, rse_expression) # check if concurrent replica at target rse does not exist concurrent_replica = False try: for lock in get_dataset_locks(parent_rule['scope'], parent_rule['name']): lock_rse_expr = lock['rse'] if rule_vo != 'def': lock_rse_expr = 'vo={}&({})'.format(rule_vo, lock_rse_expr) if lock_rse_expr == rse_expression: # may need to evaluate to be sure... could get 'vo=tst&(vo=tst&(MOCK))' concurrent_replica = True except Exception as error: concurrent_replica = True print('Exception: get_dataset_locks not feasible for %s %s:' % (parent_rule['scope'], parent_rule['name'])) raise error if concurrent_replica: return 'Concurrent replica exists at target rse!' print(concurrent_replica) child_rule = add_rule( dids=[{ 'scope': parent_rule['scope'], 'name': parent_rule['name'] }], account=parent_rule['account'], copies=parent_rule['copies'], rse_expression=rse_expression, grouping=grouping, weight=parent_rule['weight'], lifetime=lifetime, locked=parent_rule['locked'], subscription_id=parent_rule['subscription_id'], source_replica_expression=source_replica_expression, activity=activity, notify=parent_rule['notification'], purge_replicas=parent_rule['purge_replicas'], ignore_availability=False, comment=parent_rule['comments'] if not comment else comment, ask_approval=False, asynchronous=False, ignore_account_limit=True, priority=priority)[0] update_rule(rule_id=parent_rule['id'], options={ 'child_rule_id': child_rule, 'lifetime': 0 }) return child_rule
def rebalance_rule( parent_rule, activity, rse_expression, priority, source_replica_expression="*\\bb8-enabled=false", comment=None, session=None, ): """ Rebalance a replication rule to a new RSE :param parent_rule: Replication rule to be rebalanced. :param activity: Activity to be used for the rebalancing. :param rse_expression: RSE expression of the new rule. :param priority: Priority of the newly created rule. :param source_replica_expression: Source replica expression of the new rule. :param comment: Comment to set on the new rules. :returns: The new child rule id. """ if parent_rule["expires_at"] is None: lifetime = None else: lifetime = (parent_rule["expires_at"] - datetime.utcnow()).days * 24 * 3600 + ( parent_rule["expires_at"] - datetime.utcnow()).seconds if parent_rule["grouping"] == RuleGrouping.ALL: grouping = "ALL" elif parent_rule["grouping"] == RuleGrouping.NONE: grouping = "NONE" else: grouping = "DATASET" # check if concurrent replica at target rse does not exist concurrent_replica = False for lock in get_dataset_locks(parent_rule["scope"], parent_rule["name"]): lock_rse_expr = lock["rse"] if lock_rse_expr == rse_expression: concurrent_replica = True if concurrent_replica: return None child_rule = add_rule( dids=[{ "scope": parent_rule["scope"], "name": parent_rule["name"] }], account=parent_rule["account"], copies=parent_rule["copies"], rse_expression=rse_expression, grouping=grouping, weight=parent_rule["weight"], lifetime=lifetime, locked=parent_rule["locked"], subscription_id=parent_rule["subscription_id"], source_replica_expression=source_replica_expression, activity=activity, notify=parent_rule["notification"], purge_replicas=parent_rule["purge_replicas"], ignore_availability=False, comment=parent_rule["comments"] if not comment else comment, ask_approval=False, asynchronous=False, ignore_account_limit=True, priority=priority, session=session, )[0] update_rule( rule_id=parent_rule["id"], options={ "child_rule_id": child_rule, "lifetime": 0 }, session=session, ) return child_rule