def test_judge_expire_rule(self): """ JUDGE CLEANER: Test the judge when deleting expired rules""" scope = 'mock' files = create_files(3, scope, self.rse1_id) dataset = 'dataset_' + str(uuid()) add_did(scope, dataset, DIDType.from_sym('DATASET'), 'jdoe') attach_dids(scope, dataset, files, 'jdoe') add_rule(dids=[{'scope': scope, 'name': dataset}], account='jdoe', copies=1, rse_expression=self.rse1, grouping='NONE', weight='fakeweight', lifetime=-3, locked=False, subscription_id=None)[0] add_rule(dids=[{'scope': scope, 'name': dataset}], account='jdoe', copies=2, rse_expression=self.T1, grouping='NONE', weight='fakeweight', lifetime=None, locked=False, subscription_id=None)[0] add_rule(dids=[{'scope': scope, 'name': dataset}], account='jdoe', copies=3, rse_expression=self.T1, grouping='NONE', weight='fakeweight', lifetime=None, locked=False, subscription_id=None)[0] rule_cleaner(once=True) for file in files: rse_locks = get_replica_locks(scope=file['scope'], name=file['name']) assert(len(rse_locks) == 5)
def upgrade(): ''' Upgrade the database to this revision ''' if context.get_context().dialect.name in ['oracle', 'mysql', 'postgresql']: schema = context.get_context( ).version_table_schema if context.get_context( ).version_table_schema else '' add_column('requests', sa.Column('did_type', DIDType.db_type(name='REQUESTS_DIDTYPE_CHK'), default=DIDType.FILE), schema=schema) # we don't want checks on the history table, fake the DID type add_column('requests_history', sa.Column('did_type', sa.String(1)), schema=schema)
def test_judge_inject_rule(self): """ JUDGE INJECTOR: Test the judge when injecting a rule""" scope = InternalScope('mock') files = create_files(3, scope, self.rse1_id) dataset = 'dataset_' + str(uuid()) add_did(scope, dataset, DIDType.from_sym('DATASET'), self.jdoe) attach_dids(scope, dataset, files, self.jdoe) # Add a first rule to the DS rule_id = add_rule(dids=[{'scope': scope, 'name': dataset}], account=self.jdoe, copies=2, rse_expression=self.T1, grouping='DATASET', weight=None, lifetime=None, locked=False, subscription_id=None, asynchronous=True)[0] assert(get_rule(rule_id)['state'] == RuleState.INJECT) rule_injector(once=True) # Check if the Locks are created properly for file in files: assert(len(get_replica_locks(scope=file['scope'], name=file['name'])) == 2) assert(get_rule(rule_id)['state'] == RuleState.REPLICATING)
def list_new_dids(type=None, thread=None, total_threads=None, chunk_size=1000, vo='def'): """ List recent identifiers. :param type : The DID type. :param thread: The assigned thread for this necromancer. :param total_threads: The total number of threads of all necromancers. :param chunk_size: Number of requests to return per yield. :param vo: The VO to act on. """ dids = did.list_new_dids(did_type=type and DIDType.from_sym(type), thread=thread, total_threads=total_threads, chunk_size=chunk_size) for d in dids: if d['scope'].vo == vo: yield api_update_return_dict(d)
def test_account_counter_judge_evaluate_detach(self): """ JUDGE EVALUATOR: Test if the account counter is updated correctly when a file is removed from a DS""" re_evaluator(once=True) account_update(once=True) scope = 'mock' files = create_files(3, scope, self.rse1, bytes=100) dataset = 'dataset_' + str(uuid()) add_did(scope, dataset, DIDType.from_sym('DATASET'), 'jdoe') attach_dids(scope, dataset, files, 'jdoe') # Add a first rule to the DS add_rule(dids=[{ 'scope': scope, 'name': dataset }], account='jdoe', copies=1, rse_expression=self.rse1, grouping='ALL', weight=None, lifetime=None, locked=False, subscription_id=None) account_update(once=True) account_counter_before = get_counter(self.rse1_id, 'jdoe') detach_dids(scope, dataset, [files[0]]) # Fake judge re_evaluator(once=True) account_update(once=True) account_counter_after = get_counter(self.rse1_id, 'jdoe') assert (account_counter_before['bytes'] - 100 == account_counter_after['bytes']) assert (account_counter_before['files'] - 1 == account_counter_after['files'])
def upgrade(): ''' upgrade method ''' create_table('collection_replicas', sa.Column('scope', sa.String(25)), sa.Column('name', sa.String(255)), sa.Column('did_type', DIDType.db_type(name='COLLECTION_REPLICAS_TYPE_CHK')), sa.Column('rse_id', GUID()), sa.Column('bytes', sa.BigInteger), sa.Column('length', sa.BigInteger), sa.Column('state', ReplicaState.db_type(name='COLLECTION_REPLICAS_STATE_CHK'), default=ReplicaState.UNAVAILABLE), sa.Column('accessed_at', sa.DateTime), sa.Column('updated_at', sa.DateTime), sa.Column('created_at', sa.DateTime)) if context.get_context().dialect.name != 'sqlite': create_primary_key('COLLECTION_REPLICAS_PK', 'collection_replicas', ['scope', 'name', 'rse_id']) create_foreign_key('COLLECTION_REPLICAS_LFN_FK', 'collection_replicas', 'dids', ['scope', 'name'], ['scope', 'name']) create_foreign_key('COLLECTION_REPLICAS_RSE_ID_FK', 'collection_replicas', 'rses', ['rse_id'], ['id']) create_check_constraint('COLLECTION_REPLICAS_SIZE_NN', 'collection_replicas', 'bytes IS NOT NULL') create_check_constraint('COLLECTION_REPLICAS_STATE_NN', 'collection_replicas', 'state IS NOT NULL') create_index('COLLECTION_REPLICAS_RSE_ID_IDX', 'collection_replicas', ['rse_id'])
def test_judge_add_files_to_dataset(self): """ JUDGE EVALUATOR: Test the judge when adding files to dataset""" scope = InternalScope('mock', **self.vo) files = create_files(3, scope, self.rse1_id) dataset = 'dataset_' + str(uuid()) add_did(scope, dataset, DIDType.from_sym('DATASET'), self.jdoe) # Add a first rule to the DS add_rule(dids=[{'scope': scope, 'name': dataset}], account=self.jdoe, copies=2, rse_expression=self.T1, grouping='DATASET', weight=None, lifetime=None, locked=False, subscription_id=None) attach_dids(scope, dataset, files, self.jdoe) re_evaluator(once=True) files = create_files(3, scope, self.rse1_id) attach_dids(scope, dataset, files, self.jdoe) # Fake judge re_evaluator(once=True) # Check if the Locks are created properly for file in files: assert(len(get_replica_locks(scope=file['scope'], name=file['name'])) == 2)
def upgrade(): ''' Upgrade the database to this revision ''' if context.get_context().dialect.name in ['oracle', 'mysql', 'postgresql']: create_table( 'collection_replicas', sa.Column('scope', sa.String(25)), sa.Column('name', sa.String(255)), sa.Column('did_type', DIDType.db_type()), sa.Column('rse_id', GUID()), sa.Column('bytes', sa.BigInteger), sa.Column('length', sa.BigInteger), sa.Column('state', ReplicaState.db_type(), default=ReplicaState.UNAVAILABLE), sa.Column('accessed_at', sa.DateTime), sa.Column('created_at', sa.DateTime, default=datetime.datetime.utcnow), sa.Column('updated_at', sa.DateTime, default=datetime.datetime.utcnow, onupdate=datetime.datetime.utcnow)) create_primary_key('COLLECTION_REPLICAS_PK', 'collection_replicas', ['scope', 'name', 'rse_id']) create_foreign_key('COLLECTION_REPLICAS_LFN_FK', 'collection_replicas', 'dids', ['scope', 'name'], ['scope', 'name']) create_foreign_key('COLLECTION_REPLICAS_RSE_ID_FK', 'collection_replicas', 'rses', ['rse_id'], ['id']) create_check_constraint('COLLECTION_REPLICAS_SIZE_NN', 'collection_replicas', 'bytes IS NOT NULL') create_check_constraint('COLLECTION_REPLICAS_STATE_NN', 'collection_replicas', 'state IS NOT NULL') create_check_constraint('COLLECTION_REPLICAS_STATE_CHK', 'collection_replicas', "state in ('A', 'U', 'C', 'B', 'D', 'S')") create_index('COLLECTION_REPLICAS_RSE_ID_IDX', 'collection_replicas', ['rse_id'])
def test_to_repair_a_rule_with_ALL_grouping_whose_transfer_failed(self): """ JUDGE REPAIRER: Test to repair a rule with 1 failed transfer (lock)""" rule_repairer(once=True) # Clean out the repairer scope = 'mock' files = create_files(4, scope, self.rse4_id, bytes=100) dataset = 'dataset_' + str(uuid()) add_did(scope, dataset, DIDType.from_sym('DATASET'), 'jdoe') attach_dids(scope, dataset, files, 'jdoe') rule_id = add_rule(dids=[{'scope': scope, 'name': dataset}], account='jdoe', copies=1, rse_expression=self.T1, grouping='ALL', weight=None, lifetime=None, locked=False, subscription_id=None, activity='DebugJudge')[0] successful_transfer(scope=scope, name=files[0]['name'], rse_id=get_replica_locks(scope=files[0]['scope'], name=files[2]['name'])[0].rse_id, nowait=False) successful_transfer(scope=scope, name=files[1]['name'], rse_id=get_replica_locks(scope=files[1]['scope'], name=files[2]['name'])[0].rse_id, nowait=False) failed_transfer(scope=scope, name=files[2]['name'], rse_id=get_replica_locks(scope=files[2]['scope'], name=files[2]['name'])[0].rse_id) failed_transfer(scope=scope, name=files[3]['name'], rse_id=get_replica_locks(scope=files[3]['scope'], name=files[3]['name'])[0].rse_id) assert(rule_id == get_rule(rule_id)['id'].replace('-', '').lower()) assert(RuleState.STUCK == get_rule(rule_id)['state']) rule_repairer(once=True) assert(RuleState.REPLICATING == get_rule(rule_id)['state']) assert(get_replica_locks(scope=files[2]['scope'], name=files[2]['name'])[0].rse_id == get_replica_locks(scope=files[3]['scope'], name=files[3]['name'])[0].rse_id) assert(get_replica_locks(scope=files[1]['scope'], name=files[1]['name'])[0].rse_id == get_replica_locks(scope=files[3]['scope'], name=files[3]['name'])[0].rse_id)
def test_judge_ask_approval(self): """ JUDGE INJECTOR: Test the judge when asking approval for a rule""" scope = 'mock' files = create_files(3, scope, self.rse1_id) dataset = 'dataset_' + str(uuid()) add_did(scope, dataset, DIDType.from_sym('DATASET'), 'jdoe') attach_dids(scope, dataset, files, 'jdoe') # Add a first rule to the DS rule_id = add_rule(dids=[{ 'scope': scope, 'name': dataset }], account='jdoe', copies=1, rse_expression=self.rse4, grouping='DATASET', weight=None, lifetime=None, locked=False, subscription_id=None, ask_approval=True)[0] assert (get_rule(rule_id)['state'] == RuleState.WAITING_APPROVAL) approve_rule(rule_id=rule_id, approver='root') assert (get_rule(rule_id)['state'] == RuleState.INJECT) rule_injector(once=True) # Check if the Locks are created properly for file in files: assert (len( get_replica_locks(scope=file['scope'], name=file['name'])) == 1) assert (get_rule(rule_id)['state'] == RuleState.REPLICATING)
def test_repair_a_rule_with_missing_locks(self): """ JUDGE EVALUATOR: Test the judge when a rule gets STUCK from re_evaluating and there are missing locks""" scope = 'mock' files = create_files(3, scope, self.rse4_id) dataset = 'dataset_' + str(uuid()) add_did(scope, dataset, DIDType.from_sym('DATASET'), 'jdoe') # Add a first rule to the DS rule_id = add_rule(dids=[{'scope': scope, 'name': dataset}], account='jdoe', copies=2, rse_expression=self.T1, grouping='DATASET', weight=None, lifetime=None, locked=False, subscription_id=None)[0] attach_dids(scope, dataset, files, 'jdoe') # Fake judge re_evaluator(once=True) # Check if the Locks are created properly for file in files: assert(len(get_replica_locks(scope=file['scope'], name=file['name'])) == 2) # Add more files to the DID files2 = create_files(3, scope, self.rse4_id) attach_dids(scope, dataset, files2, 'jdoe') # Mark the rule STUCK to fake that the re-evaluation failed session = get_session() rule = session.query(models.ReplicationRule).filter_by(id=rule_id).one() rule.state = RuleState.STUCK session.commit() rule_repairer(once=True) for file in files: assert(len(get_replica_locks(scope=file['scope'], name=file['name'])) == 2) for file in files2: assert(len(get_replica_locks(scope=file['scope'], name=file['name'])) == 2) assert(len(set([lock.rse_id for lock in get_replica_locks(scope=files[0]['scope'], name=files[0]['name'])]).intersection(set([lock.rse_id for lock in get_replica_locks(scope=file['scope'], name=file['name'])]))) == 2) assert(12 == get_rule(rule_id)['locks_replicating_cnt'])
def upgrade(): ''' Upgrade the database to this revision ''' if context.get_context().dialect.name in ['oracle', 'mysql', 'postgresql']: schema = context.get_context( ).version_table_schema if context.get_context( ).version_table_schema else '' add_column('collection_replicas', sa.Column('available_replicas_cnt', sa.BigInteger()), schema=schema) add_column('collection_replicas', sa.Column('available_bytes', sa.BigInteger()), schema=schema) create_table( 'updated_col_rep', sa.Column('id', GUID()), sa.Column('scope', sa.String(25)), sa.Column('name', sa.String(255)), sa.Column('did_type', DIDType.db_type()), sa.Column('rse_id', GUID()), sa.Column('created_at', sa.DateTime, default=datetime.datetime.utcnow), sa.Column('updated_at', sa.DateTime, default=datetime.datetime.utcnow, onupdate=datetime.datetime.utcnow)) create_primary_key('UPDATED_COL_REP_PK', 'updated_col_rep', ['id']) create_check_constraint('UPDATED_COL_REP_SCOPE_NN', 'updated_col_rep', 'scope IS NOT NULL') create_check_constraint('UPDATED_COL_REP_NAME_NN', 'updated_col_rep', 'name IS NOT NULL') create_index('UPDATED_COL_REP_SNR_IDX', 'updated_col_rep', ['scope', 'name', 'rse_id'])
def test_repair_a_rule_with_source_replica_expression(self): """ JUDGE EVALUATOR: Test the judge when a with two rules with source_replica_expression""" scope = InternalScope('mock') files = create_files(3, scope, self.rse4_id) dataset = 'dataset_' + str(uuid()) add_did(scope, dataset, DIDType.from_sym('DATASET'), self.jdoe) attach_dids(scope, dataset, files, self.jdoe) # Add a first rule to the DS rule_id1 = add_rule(dids=[{ 'scope': scope, 'name': dataset }], account=self.jdoe, copies=1, rse_expression=self.rse1, grouping='DATASET', weight=None, lifetime=None, locked=False, subscription_id=None)[0] rule_id2 = add_rule(dids=[{ 'scope': scope, 'name': dataset }], account=self.jdoe, copies=1, rse_expression=self.rse3, grouping='DATASET', weight=None, lifetime=None, locked=False, subscription_id=None, source_replica_expression=self.rse1)[0] assert (RuleState.REPLICATING == get_rule(rule_id1)['state']) assert (RuleState.STUCK == get_rule(rule_id2)['state']) successful_transfer(scope=scope, name=files[0]['name'], rse_id=self.rse1_id, nowait=False) successful_transfer(scope=scope, name=files[1]['name'], rse_id=self.rse1_id, nowait=False) successful_transfer(scope=scope, name=files[2]['name'], rse_id=self.rse1_id, nowait=False) # Also make replicas AVAILABLE session = get_session() replica = session.query(models.RSEFileAssociation).filter_by( scope=scope, name=files[0]['name'], rse_id=self.rse1_id).one() replica.state = ReplicaState.AVAILABLE replica = session.query(models.RSEFileAssociation).filter_by( scope=scope, name=files[1]['name'], rse_id=self.rse1_id).one() replica.state = ReplicaState.AVAILABLE replica = session.query(models.RSEFileAssociation).filter_by( scope=scope, name=files[2]['name'], rse_id=self.rse1_id).one() replica.state = ReplicaState.AVAILABLE session.commit() rule_repairer(once=True) assert (RuleState.OK == get_rule(rule_id1)['state']) assert (RuleState.REPLICATING == get_rule(rule_id2)['state'])
def add_did(scope, name, type, issuer, account=None, statuses={}, meta={}, rules=[], lifetime=None, dids=[], rse=None): """ Add data did. :param scope: The scope name. :param name: The data identifier name. :param type: The data identifier type. :param issuer: The issuer account. :param account: The account owner. If None, then issuer is selected as owner. :param statuses: Dictionary with statuses, e.g.g {'monotonic':True}. :meta: Meta-data associated with the data identifier is represented using key/value pairs in a dictionary. :rules: Replication rules associated with the data did. A list of dictionaries, e.g., [{'copies': 2, 'rse_expression': 'TIERS1'}, ]. :param lifetime: DID's lifetime (in seconds). :param dids: The content. :param rse: The RSE name when registering replicas. """ validate_schema(name='name', obj=name) validate_schema(name='scope', obj=scope) validate_schema(name='dids', obj=dids) validate_schema(name='rse', obj=rse) kwargs = { 'scope': scope, 'name': name, 'type': type, 'issuer': issuer, 'account': account, 'statuses': statuses, 'meta': meta, 'rules': rules, 'lifetime': lifetime } if not rucio.api.permission.has_permission( issuer=issuer, action='add_did', kwargs=kwargs): raise rucio.common.exception.AccessDenied( 'Account %s can not add data identifier to scope %s' % (issuer, scope)) if account is not None: account = InternalAccount(account) issuer = InternalAccount(issuer) scope = InternalScope(scope) for d in dids: d['scope'] = InternalScope(d['scope']) for r in rules: r['account'] = InternalAccount(r['account']) rse_id = None if rse is not None: rse_id = get_rse_id(rse=rse) if type == 'DATASET': # naming_convention validation extra_meta = naming_convention.validate_name(scope=scope, name=name, did_type='D') # merge extra_meta with meta for k in extra_meta or {}: if k not in meta: meta[k] = extra_meta[k] elif meta[k] != extra_meta[k]: print( "Provided metadata %s doesn't match the naming convention: %s != %s" % (k, meta[k], extra_meta[k])) raise rucio.common.exception.InvalidObject( "Provided metadata %s doesn't match the naming convention: %s != %s" % (k, meta[k], extra_meta[k])) # Validate metadata meta_core.validate_meta(meta=meta, did_type=DIDType.from_sym(type)) return did.add_did(scope=scope, name=name, type=DIDType.from_sym(type), account=account or issuer, statuses=statuses, meta=meta, rules=rules, lifetime=lifetime, dids=dids, rse_id=rse_id)
def upgrade(): ''' Upgrade the database to this revision ''' if context.get_context().dialect.name in ['oracle', 'mysql', 'postgresql']: create_table( 'rules_hist_recent', sa.Column('history_id', GUID()), sa.Column('id', GUID()), sa.Column('subscription_id', GUID()), sa.Column('account', sa.String(25)), sa.Column('scope', sa.String(25)), sa.Column('name', sa.String(255)), sa.Column('did_type', DIDType.db_type()), sa.Column('state', RuleState.db_type()), sa.Column('error', sa.String(255)), sa.Column('rse_expression', sa.String(255)), sa.Column('copies', sa.SmallInteger), sa.Column('expires_at', sa.DateTime), sa.Column('weight', sa.String(255)), sa.Column('locked', sa.Boolean()), sa.Column('locks_ok_cnt', sa.BigInteger), sa.Column('locks_replicating_cnt', sa.BigInteger), sa.Column('locks_stuck_cnt', sa.BigInteger), sa.Column('source_replica_expression', sa.String(255)), sa.Column('activity', sa.String(50)), sa.Column('grouping', RuleGrouping.db_type()), sa.Column('notification', RuleNotification.db_type()), sa.Column('stuck_at', sa.DateTime), sa.Column('purge_replicas', sa.Boolean()), sa.Column('ignore_availability', sa.Boolean()), sa.Column('ignore_account_limit', sa.Boolean()), sa.Column('updated_at', sa.DateTime, default=datetime.datetime.utcnow), sa.Column('created_at', sa.DateTime, default=datetime.datetime.utcnow, onupdate=datetime.datetime.utcnow)) create_table( 'rules_history', sa.Column('history_id', GUID()), sa.Column('id', GUID()), sa.Column('subscription_id', GUID()), sa.Column('account', sa.String(25)), sa.Column('scope', sa.String(25)), sa.Column('name', sa.String(255)), sa.Column('did_type', DIDType.db_type()), sa.Column('state', RuleState.db_type()), sa.Column('error', sa.String(255)), sa.Column('rse_expression', sa.String(255)), sa.Column('copies', sa.SmallInteger), sa.Column('expires_at', sa.DateTime), sa.Column('weight', sa.String(255)), sa.Column('locked', sa.Boolean()), sa.Column('locks_ok_cnt', sa.BigInteger), sa.Column('locks_replicating_cnt', sa.BigInteger), sa.Column('locks_stuck_cnt', sa.BigInteger), sa.Column('source_replica_expression', sa.String(255)), sa.Column('activity', sa.String(50)), sa.Column('grouping', RuleGrouping.db_type()), sa.Column('notification', RuleNotification.db_type()), sa.Column('stuck_at', sa.DateTime), sa.Column('purge_replicas', sa.Boolean()), sa.Column('ignore_availability', sa.Boolean()), sa.Column('ignore_account_limit', sa.Boolean()), sa.Column('updated_at', sa.DateTime, default=datetime.datetime.utcnow), sa.Column('created_at', sa.DateTime, default=datetime.datetime.utcnow, onupdate=datetime.datetime.utcnow)) create_primary_key('RULES_HIST_RECENT_PK', 'rules_hist_recent', ['history_id']) create_index('RULES_HIST_RECENT_ID_IDX', 'rules_hist_recent', ["id"])
def transmogrifier(bulk=5, once=False, sleep_time=60): """ Creates a Transmogrifier Worker that gets a list of new DIDs for a given hash, identifies the subscriptions matching the DIDs and submit a replication rule for each DID matching a subscription. :param thread: Thread number at startup. :param bulk: The number of requests to process. :param once: Run only once. :param sleep_time: Time between two cycles. """ executable = 'transmogrifier' hostname = socket.getfqdn() pid = os.getpid() hb_thread = threading.current_thread() heartbeat.sanity_check(executable=executable, hostname=hostname) while not graceful_stop.is_set(): heart_beat = heartbeat.live(executable, hostname, pid, hb_thread) dids, subscriptions = [], [] tottime = 0 prepend_str = 'Thread [%i/%i] : ' % (heart_beat['assign_thread'], heart_beat['nr_threads']) try: # Get the new DIDs based on the is_new flag for did in list_new_dids(thread=heart_beat['assign_thread'], total_threads=heart_beat['nr_threads'], chunk_size=bulk, did_type=None): dids.append({ 'scope': did['scope'], 'did_type': str(did['did_type']), 'name': did['name'] }) sub_dict = {3: []} # Get the list of subscriptions. The default priority of the subscription is 3. 0 is the highest priority, 5 the lowest # The priority is defined as 'policyid' for sub in list_subscriptions(None, None): if sub['state'] != SubscriptionState.INACTIVE and sub[ 'lifetime'] and (datetime.now() > sub['lifetime']): update_subscription( name=sub['name'], account=sub['account'], metadata={'state': SubscriptionState.INACTIVE}) elif sub['state'] in [ SubscriptionState.ACTIVE, SubscriptionState.UPDATED ]: priority = 3 if 'policyid' in sub: if int(sub['policyid']) not in sub_dict: sub_dict[int(sub['policyid'])] = [] priority = int(sub['policyid']) sub_dict[priority].append(sub) priorities = list(sub_dict.keys()) priorities.sort() # Order the subscriptions according to their priority for priority in priorities: subscriptions.extend(sub_dict[priority]) except SubscriptionNotFound as error: logging.warning(prepend_str + 'No subscriptions defined: %s' % (str(error))) time.sleep(10) continue except Exception as error: logging.error( prepend_str + 'Failed to get list of new DIDs or subscriptions: %s' % (str(error))) try: results = {} start_time = time.time() blacklisted_rse_id = [ rse['id'] for rse in list_rses({'availability_write': False}) ] logging.debug(prepend_str + 'In transmogrifier worker') identifiers = [] # Loop over all the new dids for did in dids: did_success = True if did['did_type'] == str( DIDType.DATASET) or did['did_type'] == str( DIDType.CONTAINER): did_tag = '%s:%s' % (did['scope'].internal, did['name']) results[did_tag] = [] try: metadata = get_metadata(did['scope'], did['name']) # Loop over all the subscriptions for subscription in subscriptions: # Check if the DID match the subscription if is_matching_subscription( subscription, did, metadata) is True: filter_string = loads(subscription['filter']) split_rule = filter_string.get( 'split_rule', False) stime = time.time() results[did_tag].append(subscription['id']) logging.info(prepend_str + '%s:%s matches subscription %s' % (did['scope'], did['name'], subscription['name'])) rules = loads( subscription['replication_rules']) created_rules = {} cnt = 0 for rule_dict in rules: cnt += 1 created_rules[cnt] = [] # Get all the rule and subscription parameters grouping = rule_dict.get( 'grouping', 'DATASET') lifetime = rule_dict.get('lifetime', None) ignore_availability = rule_dict.get( 'ignore_availability', None) weight = rule_dict.get('weight', None) source_replica_expression = rule_dict.get( 'source_replica_expression', None) locked = rule_dict.get('locked', None) if locked == 'True': locked = True else: locked = False purge_replicas = rule_dict.get( 'purge_replicas', False) if purge_replicas == 'True': purge_replicas = True else: purge_replicas = False rse_expression = str( rule_dict['rse_expression']) comment = str(subscription['comments']) subscription_id = str(subscription['id']) account = subscription['account'] copies = int(rule_dict['copies']) activity = rule_dict.get( 'activity', 'User Subscriptions') try: validate_schema(name='activity', obj=activity) except InputValidationError as error: logging.error( prepend_str + 'Error validating the activity %s' % (str(error))) activity = 'User Subscriptions' if lifetime: lifetime = int(lifetime) str_activity = "".join(activity.split()) success = False nattempt = 5 attemptnr = 0 skip_rule_creation = False selected_rses = [] chained_idx = rule_dict.get( 'chained_idx', None) if chained_idx: params = {} if rule_dict.get( 'associated_site_idx', None): params[ 'associated_site_idx'] = rule_dict.get( 'associated_site_idx', None) logging.debug( '%s Chained subscription identified. Will use %s', prepend_str, str(created_rules[chained_idx])) algorithm = rule_dict.get( 'algorithm', None) selected_rses = select_algorithm( algorithm, created_rules[chained_idx], params) else: # In the case of chained subscription, don't use rseselector but use the rses returned by the algorithm if split_rule: vo = account.vo rses = parse_expression( rse_expression, filter={'vo': vo}) list_of_rses = [ rse['id'] for rse in rses ] # Check that some rule doesn't already exist for this DID and subscription preferred_rse_ids = [] for rule in list_rules( filters={ 'subscription_id': subscription_id, 'scope': did['scope'], 'name': did['name'] }): already_existing_rses = [ (rse['rse'], rse['id']) for rse in parse_expression( rule['rse_expression'], filter={'vo': vo}) ] for rse, rse_id in already_existing_rses: if (rse_id in list_of_rses ) and ( rse_id not in preferred_rse_ids): preferred_rse_ids.append( rse_id) if len(preferred_rse_ids ) >= copies: skip_rule_creation = True rse_id_dict = {} for rse in rses: rse_id_dict[ rse['id']] = rse['rse'] try: rseselector = RSESelector( account=account, rses=rses, weight=weight, copies=copies - len(preferred_rse_ids)) selected_rses = [ rse_id_dict[rse_id] for rse_id, _, _ in rseselector.select_rse( 0, preferred_rse_ids= preferred_rse_ids, copies=copies, blacklist= blacklisted_rse_id) ] except (InsufficientTargetRSEs, InsufficientAccountLimit, InvalidRuleWeight, RSEOverQuota) as error: logging.warning( prepend_str + 'Problem getting RSEs for subscription "%s" for account %s : %s. Try including blacklisted sites' % (subscription['name'], account, str(error))) # Now including the blacklisted sites try: rseselector = RSESelector( account=account, rses=rses, weight=weight, copies=copies - len(preferred_rse_ids)) selected_rses = [ rse_id_dict[rse_id] for rse_id, _, _ in rseselector.select_rse( 0, preferred_rse_ids= preferred_rse_ids, copies=copies, blacklist=[]) ] ignore_availability = True except (InsufficientTargetRSEs, InsufficientAccountLimit, InvalidRuleWeight, RSEOverQuota) as error: logging.error( prepend_str + 'Problem getting RSEs for subscription "%s" for account %s : %s. Skipping rule creation.' % (subscription['name'], account, str(error))) monitor.record_counter( counters= 'transmogrifier.addnewrule.errortype.%s' % (str(error.__class__. __name__)), delta=1) # The DID won't be reevaluated at the next cycle did_success = did_success and True continue for attempt in range(0, nattempt): attemptnr = attempt nb_rule = 0 # Try to create the rule try: if split_rule: if not skip_rule_creation: for rse in selected_rses: if isinstance( selected_rses, dict): source_replica_expression = selected_rses[ rse].get( 'source_replica_expression', None) weight = selected_rses[ rse].get( 'weight', None) logging.info( prepend_str + 'Will insert one rule for %s:%s on %s' % (did['scope'], did['name'], rse)) rule_ids = add_rule( dids=[{ 'scope': did['scope'], 'name': did['name'] }], account=account, copies=1, rse_expression=rse, grouping=grouping, weight=weight, lifetime=lifetime, locked=locked, subscription_id= subscription_id, source_replica_expression =source_replica_expression, activity=activity, purge_replicas= purge_replicas, ignore_availability= ignore_availability, comment=comment) created_rules[ cnt].append( rule_ids[0]) nb_rule += 1 if nb_rule == copies: success = True break else: rule_ids = add_rule( dids=[{ 'scope': did['scope'], 'name': did['name'] }], account=account, copies=copies, rse_expression= rse_expression, grouping=grouping, weight=weight, lifetime=lifetime, locked=locked, subscription_id= subscription['id'], source_replica_expression= source_replica_expression, activity=activity, purge_replicas= purge_replicas, ignore_availability= ignore_availability, comment=comment) created_rules[cnt].append( rule_ids[0]) nb_rule += 1 monitor.record_counter( counters= 'transmogrifier.addnewrule.done', delta=nb_rule) monitor.record_counter( counters= 'transmogrifier.addnewrule.activity.%s' % str_activity, delta=nb_rule) success = True break except (InvalidReplicationRule, InvalidRuleWeight, InvalidRSEExpression, StagingAreaRuleRequiresLifetime, DuplicateRule) as error: # Errors that won't be retried success = True logging.error(prepend_str + '%s' % (str(error))) monitor.record_counter( counters= 'transmogrifier.addnewrule.errortype.%s' % (str( error.__class__.__name__)), delta=1) break except (ReplicationRuleCreationTemporaryFailed, InsufficientTargetRSEs, InsufficientAccountLimit, DatabaseException, RSEBlacklisted, RSEWriteBlocked) as error: # Errors to be retried logging.error( prepend_str + '%s Will perform an other attempt %i/%i' % (str(error), attempt + 1, nattempt)) monitor.record_counter( counters= 'transmogrifier.addnewrule.errortype.%s' % (str( error.__class__.__name__)), delta=1) except Exception: # Unexpected errors monitor.record_counter( counters= 'transmogrifier.addnewrule.errortype.unknown', delta=1) exc_type, exc_value, exc_traceback = exc_info( ) logging.critical( prepend_str + ''.join( format_exception( exc_type, exc_value, exc_traceback)).strip( )) did_success = (did_success and success) if (attemptnr + 1) == nattempt and not success: logging.error( prepend_str + 'Rule for %s:%s on %s cannot be inserted' % (did['scope'], did['name'], rse_expression)) else: logging.info( prepend_str + '%s rule(s) inserted in %f seconds' % (str(nb_rule), time.time() - stime)) except DataIdentifierNotFound as error: logging.warning(prepend_str + error) if did_success: if did['did_type'] == str(DIDType.FILE): monitor.record_counter( counters='transmogrifier.did.file.processed', delta=1) elif did['did_type'] == str(DIDType.DATASET): monitor.record_counter( counters='transmogrifier.did.dataset.processed', delta=1) elif did['did_type'] == str(DIDType.CONTAINER): monitor.record_counter( counters='transmogrifier.did.container.processed', delta=1) monitor.record_counter( counters='transmogrifier.did.processed', delta=1) identifiers.append({ 'scope': did['scope'], 'name': did['name'], 'did_type': DIDType.from_sym(did['did_type']) }) time1 = time.time() # Mark the DIDs as processed for identifier in chunks(identifiers, 100): _retrial(set_new_dids, identifier, None) logging.info(prepend_str + 'Time to set the new flag : %f' % (time.time() - time1)) tottime = time.time() - start_time for sub in subscriptions: update_subscription( name=sub['name'], account=sub['account'], metadata={'last_processed': datetime.now()}) logging.info(prepend_str + 'It took %f seconds to process %i DIDs' % (tottime, len(dids))) logging.debug(prepend_str + 'DIDs processed : %s' % (str(dids))) monitor.record_counter(counters='transmogrifier.job.done', delta=1) monitor.record_timer(stat='transmogrifier.job.duration', time=1000 * tottime) except Exception: exc_type, exc_value, exc_traceback = exc_info() logging.critical(prepend_str + ''.join( format_exception(exc_type, exc_value, exc_traceback)).strip()) monitor.record_counter(counters='transmogrifier.job.error', delta=1) monitor.record_counter(counters='transmogrifier.addnewrule.error', delta=1) if once is True: break if tottime < sleep_time: logging.info(prepend_str + 'Will sleep for %s seconds' % (sleep_time - tottime)) time.sleep(sleep_time - tottime) heartbeat.die(executable, hostname, pid, hb_thread) logging.info(prepend_str + 'Graceful stop requested') logging.info(prepend_str + 'Graceful stop done')
def add_exception(dids, account, pattern, comments, expires_at, session=None): """ Add exceptions to Lifetime Model. :param dids: The list of dids :param account: The account of the requester. :param pattern: The account. :param comments: The comments associated to the exception. :param expires_at: The expiration date of the exception. :param session: The database session in use. returns: The id of the exception. """ exception_id = generate_uuid() text = 'Account %s requested a lifetime extension for a list of DIDs that can be found below\n' % account reason = comments volume = None lifetime = None if comments.find('||||') > -1: reason, volume = comments.split('||||') text += 'The reason for the extension is "%s"\n' % reason text += 'It represents %s datasets\n' % len(dids) if volume: text += 'The estimated physical volume is %s\n' % volume if expires_at and isinstance(expires_at, string_types): lifetime = str_to_date(expires_at) text += 'The lifetime exception should expires on %s\n' % str( expires_at) elif isinstance(expires_at, datetime): lifetime = expires_at text += 'The lifetime exception should expires on %s\n' % str( expires_at) text += 'Link to approve or reject this request can be found at the end of the mail\n' text += '\n' text += 'DIDTYPE SCOPE NAME\n' text += '\n' truncated_message = False for did in dids: did_type = None if 'did_type' in did: if isinstance(did['did_type'], string_types): did_type = DIDType.from_sym(did['did_type']) else: did_type = did['did_type'] new_exception = models.LifetimeExceptions( id=exception_id, scope=did['scope'], name=did['name'], did_type=did_type, account=account, pattern=pattern, comments=reason, state=LifetimeExceptionsState.WAITING, expires_at=lifetime) if len(text) < 3000: text += '%s %s %s\n' % (str(did_type), did['scope'], did['name']) else: truncated_message = True try: new_exception.save(session=session, flush=False) except IntegrityError as error: if match('.*ORA-00001.*', str(error.args[0])) \ or match('.*IntegrityError.*UNIQUE constraint failed.*', str(error.args[0])) \ or match('.*1062.*Duplicate entry.*for key.*', str(error.args[0])) \ or match('.*IntegrityError.*columns? .*not unique.*', error.args[0]): raise LifetimeExceptionDuplicate() raise RucioException(error.args[0]) if truncated_message: text += '...\n' text += 'List too long. Truncated\n' text += '\n' text += 'Approve: https://rucio-ui.cern.ch/lifetime_exception?id=%s&action=approve\n' % str( exception_id) text += 'Deny: https://rucio-ui.cern.ch/lifetime_exception?id=%s&action=deny\n' % str( exception_id) approvers_email = get('lifetime_model', 'approvers_email', default=[], session=session) if approvers_email: approvers_email = approvers_email.split(',') # pylint: disable=no-member add_message(event_type='email', payload={ 'body': text, 'to': approvers_email, 'subject': '[RUCIO] Request to approve lifetime exception %s' % str(exception_id) }, session=session) return exception_id
def test_to_repair_a_rule_with_only_1_rse_whose_transfers_failed(self): """ JUDGE REPAIRER: Test to repair a rule with only 1 rse whose transfers failed (lock)""" rule_repairer(once=True) # Clean out the repairer scope = InternalScope('mock') files = create_files(4, scope, self.rse4_id, bytes=100) dataset = 'dataset_' + str(uuid()) add_did(scope, dataset, DIDType.from_sym('DATASET'), self.jdoe) attach_dids(scope, dataset, files, self.jdoe) rule_id = add_rule(dids=[{ 'scope': scope, 'name': dataset }], account=self.jdoe, copies=1, rse_expression=self.rse1, grouping='DATASET', weight=None, lifetime=None, locked=False, subscription_id=None)[0] successful_transfer( scope=scope, name=files[0]['name'], rse_id=get_replica_locks(scope=files[0]['scope'], name=files[2]['name'])[0].rse_id, nowait=False) successful_transfer( scope=scope, name=files[1]['name'], rse_id=get_replica_locks(scope=files[1]['scope'], name=files[2]['name'])[0].rse_id, nowait=False) failed_transfer( scope=scope, name=files[2]['name'], rse_id=get_replica_locks(scope=files[2]['scope'], name=files[2]['name'])[0].rse_id) failed_transfer( scope=scope, name=files[3]['name'], rse_id=get_replica_locks(scope=files[3]['scope'], name=files[3]['name'])[0].rse_id) cancel_request_did( scope=scope, name=files[2]['name'], dest_rse_id=get_replica_locks(scope=files[2]['scope'], name=files[2]['name'])[0].rse_id) cancel_request_did( scope=scope, name=files[3]['name'], dest_rse_id=get_replica_locks(scope=files[3]['scope'], name=files[2]['name'])[0].rse_id) assert (rule_id == get_rule(rule_id)['id'].replace('-', '').lower()) assert (RuleState.STUCK == get_rule(rule_id)['state']) rule_repairer(once=True) # Stil assert STUCK because of delays: assert (RuleState.STUCK == get_rule(rule_id)['state']) assert (get_replica_locks( scope=files[2]['scope'], name=files[2]['name'])[0].rse_id == get_replica_locks( scope=files[3]['scope'], name=files[3]['name'])[0].rse_id)
def test_judge_evaluate_detach(self): """ JUDGE EVALUATOR: Test if the detach is done correctly""" re_evaluator(once=True) scope = 'mock' container = 'container_' + str(uuid()) add_did(scope, container, DIDType.from_sym('CONTAINER'), 'jdoe') scope = 'mock' files = create_files(3, scope, self.rse1, bytes=100) dataset = 'dataset_' + str(uuid()) add_did(scope, dataset, DIDType.from_sym('DATASET'), 'jdoe') attach_dids(scope, dataset, files, 'jdoe') attach_dids(scope, container, [{ 'scope': scope, 'name': dataset }], 'jdoe') scope = 'mock' files = create_files(3, scope, self.rse1, bytes=100) dataset = 'dataset_' + str(uuid()) add_did(scope, dataset, DIDType.from_sym('DATASET'), 'jdoe') attach_dids(scope, dataset, files, 'jdoe') attach_dids(scope, container, [{ 'scope': scope, 'name': dataset }], 'jdoe') scope = 'mock' files = create_files(3, scope, self.rse1, bytes=100) dataset = 'dataset_' + str(uuid()) add_did(scope, dataset, DIDType.from_sym('DATASET'), 'jdoe') attach_dids(scope, dataset, files, 'jdoe') attach_dids(scope, container, [{ 'scope': scope, 'name': dataset }], 'jdoe') # Add a first rule to the Container rule_id = add_rule(dids=[{ 'scope': scope, 'name': container }], account='jdoe', copies=1, rse_expression=self.rse1, grouping='ALL', weight=None, lifetime=None, locked=False, subscription_id=None)[0] # Fake judge re_evaluator(once=True) assert (9 == get_rule(rule_id)['locks_ok_cnt']) detach_dids(scope, dataset, [files[0]]) # Fake judge re_evaluator(once=True) assert (8 == get_rule(rule_id)['locks_ok_cnt'])
monitor.record_counter( counters='transmogrifier.did.dataset.processed', delta=1) elif did['did_type'] == str(DIDType.CONTAINER): monitor.record_counter( counters='transmogrifier.did.container.processed', delta=1) monitor.record_counter( counters='transmogrifier.did.processed', delta=1) identifiers.append({ 'scope': did['scope'], 'name': did['name'], 'did_type': DIDType.from_sym(did['did_type']) }) time1 = time.time() for identifier in chunks(identifiers, 100): _retrial(set_new_dids, identifier, None) logging.info(prepend_str + 'Time to set the new flag : %f' % (time.time() - time1)) tottime = time.time() - start_time logging.info(prepend_str + 'It took %f seconds to process %i DIDs' % (tottime, len(dids))) logging.debug(prepend_str + 'DIDs processed : %s' % (str(dids))) monitor.record_counter(counters='transmogrifier.job.done', delta=1)
def test_to_repair_a_rule_with_NONE_grouping_whose_transfer_failed(self): """ JUDGE REPAIRER: Test to repair a rule with 1 failed transfer (lock)""" rule_repairer(once=True) # Clean out the repairer scope = InternalScope('mock') files = create_files(3, scope, self.rse4_id, bytes=100) dataset = 'dataset_' + str(uuid()) add_did(scope, dataset, DIDType.from_sym('DATASET'), self.jdoe) attach_dids(scope, dataset, files, self.jdoe) rule_id = add_rule(dids=[{ 'scope': scope, 'name': dataset }], account=self.jdoe, copies=1, rse_expression=self.T1, grouping='NONE', weight=None, lifetime=None, locked=False, subscription_id=None)[0] failed_rse_id = get_replica_locks(scope=files[2]['scope'], name=files[2]['name'])[0].rse_id assert (get_replica( scope=files[2]['scope'], name=files[2]['name'], rse_id=failed_rse_id)['state'] == ReplicaState.COPYING) assert (get_replica(scope=files[2]['scope'], name=files[2]['name'], rse_id=failed_rse_id)['lock_cnt'] == 1) successful_transfer( scope=scope, name=files[0]['name'], rse_id=get_replica_locks(scope=files[0]['scope'], name=files[2]['name'])[0].rse_id, nowait=False) successful_transfer( scope=scope, name=files[1]['name'], rse_id=get_replica_locks(scope=files[1]['scope'], name=files[2]['name'])[0].rse_id, nowait=False) failed_transfer( scope=scope, name=files[2]['name'], rse_id=get_replica_locks(scope=files[2]['scope'], name=files[2]['name'])[0].rse_id) assert (rule_id == get_rule(rule_id)['id'].replace('-', '').lower()) assert (RuleState.STUCK == get_rule(rule_id)['state']) rule_repairer(once=True) assert (RuleState.REPLICATING == get_rule(rule_id)['state']) assert (get_replica( scope=files[2]['scope'], name=files[2]['name'], rse_id=failed_rse_id)['state'] == ReplicaState.UNAVAILABLE) assert (get_replica(scope=files[2]['scope'], name=files[2]['name'], rse_id=failed_rse_id)['lock_cnt'] == 0)
def upgrade(): ''' Upgrade the database to this revision ''' if context.get_context().dialect.name in ['oracle', 'mysql', 'postgresql']: create_table( 'dids_followed', sa.Column('scope', sa.String(25)), sa.Column('name', sa.String(255)), sa.Column('account', sa.String(25)), sa.Column('did_type', DIDType.db_type(name='DIDS_FOLLOWED_TYPE_CHK')), sa.Column('updated_at', sa.DateTime, default=datetime.datetime.utcnow, onupdate=datetime.datetime.utcnow), sa.Column('created_at', sa.DateTime, default=datetime.datetime.utcnow)) create_primary_key('DIDS_FOLLOWED_PK', 'dids_followed', ['scope', 'name', 'account']) create_check_constraint('DIDS_FOLLOWED_SCOPE_NN', 'dids_followed', 'scope is not null') create_check_constraint('DIDS_FOLLOWED_NAME_NN', 'dids_followed', 'name is not null') create_check_constraint('DIDS_FOLLOWED_ACCOUNT_NN', 'dids_followed', 'account is not null') create_check_constraint('DIDS_FOLLOWED_DID_TYPE_NN', 'dids_followed', 'did_type is not null') create_check_constraint('DIDS_FOLLOWED_CREATED_NN', 'dids_followed', 'created_at is not null') create_check_constraint('DIDS_FOLLOWED_UPDATED_NN', 'dids_followed', 'updated_at is not null') create_foreign_key('DIDS_FOLLOWED_ACCOUNT_FK', 'dids_followed', 'accounts', ['account'], ['account']) create_foreign_key('DIDS_FOLLOWED_SCOPE_NAME_FK', 'dids_followed', 'dids', ['scope', 'name'], ['scope', 'name']) create_table( 'dids_followed_events', sa.Column('scope', sa.String(25)), sa.Column('name', sa.String(255)), sa.Column('account', sa.String(25)), sa.Column('did_type', DIDType.db_type(name='DIDS_FOLLOWED_EVENTS_TYPE_CHK')), sa.Column('event_type', sa.String(1024)), sa.Column('payload', sa.Text), sa.Column('updated_at', sa.DateTime, default=datetime.datetime.utcnow, onupdate=datetime.datetime.utcnow), sa.Column('created_at', sa.DateTime, default=datetime.datetime.utcnow)) create_primary_key('DIDS_FOLLOWED_EVENTS_PK', 'dids_followed_events', ['scope', 'name', 'account']) create_check_constraint('DIDS_FOLLOWED_EVENTS_SCOPE_NN', 'dids_followed_events', 'scope is not null') create_check_constraint('DIDS_FOLLOWED_EVENTS_NAME_NN', 'dids_followed_events', 'name is not null') create_check_constraint('DIDS_FOLLOWED_EVENTS_ACC_NN', 'dids_followed_events', 'account is not null') create_check_constraint('DIDS_FOLLOWED_EVENTS_TYPE_NN', 'dids_followed_events', 'did_type is not null') create_check_constraint('DIDS_FOLLOWED_EVENTS_CRE_NN', 'dids_followed_events', 'created_at is not null') create_check_constraint('DIDS_FOLLOWED_EVENTS_UPD_NN', 'dids_followed_events', 'updated_at is not null') create_foreign_key('DIDS_FOLLOWED_EVENTS_ACC_FK', 'dids_followed_events', 'accounts', ['account'], ['account']) create_index('DIDS_FOLLOWED_EVENTS_ACC_IDX', 'dids_followed_events', ['account'])