def set_metadata(scope, name, key, value, recursive=False, session=None): """ Sets metadata for a given did. :param scope: The scope of the did. :param name: The data identifier name. :param key: Metadata key. :param value: Metadata value. :param recursive: (optional) Propagate the metadata change recursively to content. :param session: (optional) The database session in use. :raises: InvalidMetadata """ # Check for forbidden characters in key. for char in RESTRICTED_CHARACTERS: if char in key: raise exception.InvalidMetadata('Restricted character "{}" found in metadata key. Reason: {}'.format( char, RESTRICTED_CHARACTERS[char] )) # Sequentially check if each metadata plugin manages this key. Note that the order of [METADATA_PLUGIN_MODULES] # means that the key is always checked for existence in the base list first. metadata_was_set = False for metadata_plugin in METADATA_PLUGIN_MODULES: if metadata_plugin.manages_key(key, session=session): metadata_plugin.set_metadata(scope, name, key, value, recursive, session=session) metadata_was_set = True break if not metadata_was_set: raise exception.InvalidMetadata('No plugin manages metadata key %s for DID %s:%s' % (key, scope, name))
def set_metadata_bulk(scope, name, meta, recursive=False, session=None): """ Bulk sets metadata for a given did. :param scope: The scope name. :param name: The data identifier name. :param meta: The key-value mapping of metadata to set. :param recursive: (optional) Propagate the metadata change recursively to content. :param session: (optional) The database session in use. :raises: InvalidMetadata """ metadata = meta unmanaged_keys = list() if not isinstance(metadata, dict): metadata = dict(metadata) metadata_plugin_keys = { metadata_plugin: [] for metadata_plugin in METADATA_PLUGIN_MODULES } # Iterate through all keys, sequentially checking if each metadata plugin manages the considered key. If it # does, add it to the list in the plugin's entry in {metadata_plugin_keys}. Note that the order of # [METADATA_PLUGIN_MODULES] means that the key is always checked for existence in the base list first. for key in metadata.keys(): # Check for forbidden characters in key. for char in RESTRICTED_CHARACTERS: if char in key: raise exception.InvalidMetadata( 'Restricted character "{}" found in metadata key. Reason: {}' .format(char, RESTRICTED_CHARACTERS[char])) metadata_is_included = False for metadata_plugin in METADATA_PLUGIN_MODULES: if metadata_plugin.manages_key(key, session=session): metadata_plugin_keys[metadata_plugin].append(key) metadata_is_included = True break if not metadata_is_included: unmanaged_keys.append(key) if unmanaged_keys: raise exception.InvalidMetadata( 'No plugin manages metadata keys %s on DID %s:%s' % (unmanaged_keys, scope, name)) # For each plugin, set the metadata. for metadata_plugin, keys_managed_by_this_plugin in metadata_plugin_keys.items( ): if keys_managed_by_this_plugin: this_plugin_metadata = { key: metadata[key] for key in keys_managed_by_this_plugin } metadata_plugin.set_metadata_bulk(scope, name, metadata=this_plugin_metadata, recursive=recursive, session=session)
def list_dids(scope=None, filters=None, did_type='collection', ignore_case=False, limit=None, offset=None, long=False, recursive=False, ignore_dids=None, session=None): """ Search data identifiers. All filter keys should belong to a single plugin. Queries across plugins are not currently supported. :param scope: the scope name. :param filters: dictionary of attributes by which the results should be filtered. :param did_type: the type of the did: all(container, dataset, file), collection(dataset or container), dataset, container, file. :param ignore_case: ignore case distinctions. :param limit: limit number. :param offset: offset number. :param long: Long format option to display more information for each DID. :param recursive: Recursively list DIDs content. :param ignore_dids: List of DIDs to refrain from yielding. :param session: The database session in use. :returns: List of dids satisfying metadata criteria. :raises: InvalidMetadata """ # backwards compatability for filters as single {}. if isinstance(filters, dict): filters = [filters] required_unique_plugins = set() # keep track of which plugins are required for or_group in filters: for key in or_group.keys(): if key == 'name': # [name] is always passed through, and needs to be in schema of all plugins continue key_nooperator = key.split('.')[0] # remove operator attribute from key if suffixed # Iterate through the list of metadata plugins, checking which (if any) manages this particular key # and appending the corresponding plugin to the set, required_unique_plugins. is_this_key_managed = False for metadata_plugin in METADATA_PLUGIN_MODULES: if metadata_plugin.manages_key(key_nooperator, session=session): required_unique_plugins.add(metadata_plugin) is_this_key_managed = True break if not is_this_key_managed: raise exception.InvalidMetadata('There is no metadata plugin that manages the filter key(s) you requested.') if not required_unique_plugins: # if no metadata keys were specified, fall back to using the base plugin required_unique_plugins = [METADATA_PLUGIN_MODULES[0]] elif len(required_unique_plugins) > 1: # check that only a single plugin is required for the query, otherwise not supported raise exception.InvalidMetadata('Filter keys used do not all belong to the same metadata plugin.') selected_plugin_to_use = list(required_unique_plugins)[0] return selected_plugin_to_use.list_dids(scope=scope, filters=filters, did_type=did_type, ignore_case=ignore_case, limit=limit, offset=offset, long=long, recursive=recursive, ignore_dids=ignore_dids, session=session)
def set_metadata_bulk(self, scope, name, meta, recursive=False, session=None): did_query = session.query(models.DataIdentifier).with_hint(models.DataIdentifier, "INDEX(DIDS DIDS_PK)", 'oracle').filter_by(scope=scope, name=name) if did_query.one_or_none() is None: raise exception.DataIdentifierNotFound("Data identifier '%s:%s' not found" % (scope, name)) remainder = {} for key, value in meta.items(): if key == 'lifetime': try: expired_at = None if value is not None: expired_at = datetime.utcnow() + timedelta(seconds=float(value)) rowcount = did_query.update({'expired_at': expired_at}, synchronize_session='fetch') except TypeError as error: raise exception.InvalidValueForKey(error) if not rowcount: # check for did presence raise exception.UnsupportedOperation('%s for %s:%s cannot be updated' % (key, scope, name)) elif key in ['guid', 'events']: rowcount = did_query.filter_by(did_type=DIDType.FILE).update({key: value}, synchronize_session=False) if not rowcount: # check for did presence raise exception.UnsupportedOperation('%s for %s:%s cannot be updated' % (key, scope, name)) session.query(models.DataIdentifierAssociation).filter_by(child_scope=scope, child_name=name, child_type=DIDType.FILE).update({key: value}, synchronize_session=False) if key == 'events': for parent_scope, parent_name in session.query(models.DataIdentifierAssociation.scope, models.DataIdentifierAssociation.name).filter_by(child_scope=scope, child_name=name): events = session.query(func.sum(models.DataIdentifierAssociation.events)).filter_by(scope=parent_scope, name=parent_name).one()[0] session.query(models.DataIdentifier).filter_by(scope=parent_scope, name=parent_name).update({'events': events}, synchronize_session=False) elif key == 'adler32': rowcount = did_query.filter_by(did_type=DIDType.FILE).update({key: value}, synchronize_session=False) if not rowcount: # check for did presence raise exception.UnsupportedOperation('%s for %s:%s cannot be updated' % (key, scope, name)) session.query(models.DataIdentifierAssociation).filter_by(child_scope=scope, child_name=name, child_type=DIDType.FILE).update({key: value}, synchronize_session=False) session.query(models.Request).filter_by(scope=scope, name=name).update({key: value}, synchronize_session=False) session.query(models.RSEFileAssociation).filter_by(scope=scope, name=name).update({key: value}, synchronize_session=False) elif key == 'bytes': rowcount = did_query.filter_by(did_type=DIDType.FILE).update({key: value}, synchronize_session=False) if not rowcount: # check for did presence raise exception.UnsupportedOperation('%s for %s:%s cannot be updated' % (key, scope, name)) session.query(models.DataIdentifierAssociation).filter_by(child_scope=scope, child_name=name, child_type=DIDType.FILE).update({key: value}, synchronize_session=False) session.query(models.Request).filter_by(scope=scope, name=name).update({key: value}, synchronize_session=False) for account, bytes, rse_id, rule_id in session.query(models.ReplicaLock.account, models.ReplicaLock.bytes, models.ReplicaLock.rse_id, models.ReplicaLock.rule_id).filter_by(scope=scope, name=name): session.query(models.ReplicaLock).filter_by(scope=scope, name=name, rule_id=rule_id, rse_id=rse_id).update({key: value}, synchronize_session=False) account_counter.decrease(rse_id=rse_id, account=account, files=1, bytes=bytes, session=session) account_counter.increase(rse_id=rse_id, account=account, files=1, bytes=value, session=session) for bytes, rse_id in session.query(models.RSEFileAssociation.bytes, models.RSEFileAssociation.rse_id).filter_by(scope=scope, name=name): session.query(models.RSEFileAssociation).filter_by(scope=scope, name=name, rse_id=rse_id).update({key: value}, synchronize_session=False) rse_counter.decrease(rse_id=rse_id, files=1, bytes=bytes, session=session) rse_counter.increase(rse_id=rse_id, files=1, bytes=value, session=session) for parent_scope, parent_name in session.query(models.DataIdentifierAssociation.scope, models.DataIdentifierAssociation.name).filter_by(child_scope=scope, child_name=name): values = {} values['length'], values['bytes'], values['events'] = session.query(func.count(models.DataIdentifierAssociation.scope), func.sum(models.DataIdentifierAssociation.bytes), func.sum(models.DataIdentifierAssociation.events)).filter_by(scope=parent_scope, name=parent_name).one() session.query(models.DataIdentifier).filter_by(scope=parent_scope, name=parent_name).update(values, synchronize_session=False) session.query(models.DatasetLock).filter_by(scope=parent_scope, name=parent_name).update({'length': values['length'], 'bytes': values['bytes']}, synchronize_session=False) else: remainder[key] = value if remainder: try: rowcount = did_query.update(remainder, synchronize_session='fetch') except CompileError as error: raise exception.InvalidMetadata(error) except InvalidRequestError: raise exception.InvalidMetadata("Some of the keys are not accepted: " + str(list(remainder.keys()))) if not rowcount: raise exception.UnsupportedOperation('Some of the keys for %s:%s cannot be updated: %s' % (scope, name, str(list(remainder.keys())))) # propagate metadata updates to child content if recursive: content_query = session.query(models.DataIdentifierAssociation.child_scope, models.DataIdentifierAssociation.child_name) content_query = content_query.with_hint(models.DataIdentifierAssociation, "INDEX(CONTENTS CONTENTS_PK)", 'oracle').filter_by(scope=scope, name=name) for child_scope, child_name in content_query: try: child_did_query = session.query(models.DataIdentifier).with_hint(models.DataIdentifier, "INDEX(DIDS DIDS_PK)", 'oracle').filter_by(scope=child_scope, name=child_name) child_did_query.update(remainder, synchronize_session='fetch') except CompileError as error: raise exception.InvalidMetadata(error) except InvalidRequestError: raise exception.InvalidMetadata("Some of the keys are not accepted recursively: " + str(list(remainder.keys())))
def list_dids(self, scope, filters, did_type='collection', ignore_case=False, limit=None, offset=None, long=False, recursive=False, ignore_dids=None, session=None): if not json_implemented(session=session): raise NotImplementedError if not ignore_dids: ignore_dids = set() # backwards compatability for filters as single {}. if isinstance(filters, dict): filters = [filters] # instantiate fe and create sqla query, note that coercion to a model keyword # is not appropriate here as the filter words are stored in a single json column. fe = FilterEngine(filters, model_class=models.DidMeta, strict_coerce=False) query = fe.create_sqla_query(additional_model_attributes=[ models.DidMeta.scope, models.DidMeta.name ], additional_filters=[(models.DidMeta.scope, operator.eq, scope)], json_column=models.DidMeta.meta) if limit: query = query.limit(limit) if recursive: from rucio.core.did import list_content # Get attached DIDs and save in list because query has to be finished before starting a new one in the recursion collections_content = [] for did in query.yield_per(100): if (did.did_type == DIDType.CONTAINER or did.did_type == DIDType.DATASET): collections_content += [ d for d in list_content(scope=did.scope, name=did.name) ] # Replace any name filtering with recursed DID names. for did in collections_content: for or_group in filters: or_group['name'] = did['name'] for result in self.list_dids(scope=did['scope'], filters=filters, recursive=True, did_type=did_type, limit=limit, offset=offset, long=long, ignore_dids=ignore_dids, session=session): yield result try: for did in query.yield_per( 5 ): # don't unpack this as it makes it dependent on query return order! if long: did_full = "{}:{}".format(did.scope, did.name) if did_full not in ignore_dids: # concatenating results of OR clauses may contain duplicate DIDs if query result sets not mutually exclusive. ignore_dids.add(did_full) yield { 'scope': did.scope, 'name': did.name, 'did_type': None, # not available with JSON plugin 'bytes': None, # not available with JSON plugin 'length': None # not available with JSON plugin } else: did_full = "{}:{}".format(did.scope, did.name) if did_full not in ignore_dids: # concatenating results of OR clauses may contain duplicate DIDs if query result sets not mutually exclusive. ignore_dids.add(did_full) yield did.name except DataError as e: raise exception.InvalidMetadata( "Database query failed: {}. This can be raised when the datatype of a key is inconsistent between dids." .format(e))
def set_metadata(self, scope, name, key, value, recursive=False, session=None): """ Add metadata to data identifier. :param scope: The scope name. :param name: The data identifier name. :param key: the key. :param value: the value. :param did: The data identifier info. :param recursive: Option to propagate the metadata change to content. :param session: The database session in use. """ try: rowcount = session.query(models.DataIdentifier).filter_by(scope=scope, name=name).\ with_hint(models.DataIdentifier, "INDEX(DIDS DIDS_PK)", 'oracle').one() except NoResultFound: raise exception.DataIdentifierNotFound("Data identifier '%s:%s' not found" % (scope, name)) if key == 'lifetime': try: expired_at = None if value is not None: expired_at = datetime.utcnow() + timedelta(seconds=float(value)) rowcount = session.query(models.DataIdentifier).filter_by(scope=scope, name=name).update({'expired_at': expired_at}, synchronize_session='fetch') except TypeError as error: raise exception.InvalidValueForKey(error) elif key in ['guid', 'events']: rowcount = session.query(models.DataIdentifier).filter_by(scope=scope, name=name, did_type=DIDType.FILE).update({key: value}, synchronize_session=False) session.query(models.DataIdentifierAssociation).filter_by(child_scope=scope, child_name=name, child_type=DIDType.FILE).update({key: value}, synchronize_session=False) if key == 'events': for parent_scope, parent_name in session.query(models.DataIdentifierAssociation.scope, models.DataIdentifierAssociation.name).filter_by(child_scope=scope, child_name=name): events = session.query(func.sum(models.DataIdentifierAssociation.events)).filter_by(scope=parent_scope, name=parent_name).one()[0] session.query(models.DataIdentifier).filter_by(scope=parent_scope, name=parent_name).update({'events': events}, synchronize_session=False) elif key == 'adler32': rowcount = session.query(models.DataIdentifier).filter_by(scope=scope, name=name, did_type=DIDType.FILE).update({key: value}, synchronize_session=False) session.query(models.DataIdentifierAssociation).filter_by(child_scope=scope, child_name=name, child_type=DIDType.FILE).update({key: value}, synchronize_session=False) session.query(models.Request).filter_by(scope=scope, name=name).update({key: value}, synchronize_session=False) session.query(models.RSEFileAssociation).filter_by(scope=scope, name=name).update({key: value}, synchronize_session=False) elif key == 'bytes': rowcount = session.query(models.DataIdentifier).filter_by(scope=scope, name=name, did_type=DIDType.FILE).update({key: value}, synchronize_session=False) session.query(models.DataIdentifierAssociation).filter_by(child_scope=scope, child_name=name, child_type=DIDType.FILE).update({key: value}, synchronize_session=False) session.query(models.Request).filter_by(scope=scope, name=name).update({key: value}, synchronize_session=False) for account, bytes, rse_id, rule_id in session.query(models.ReplicaLock.account, models.ReplicaLock.bytes, models.ReplicaLock.rse_id, models.ReplicaLock.rule_id).filter_by(scope=scope, name=name): session.query(models.ReplicaLock).filter_by(scope=scope, name=name, rule_id=rule_id, rse_id=rse_id).update({key: value}, synchronize_session=False) account_counter.decrease(rse_id=rse_id, account=account, files=1, bytes=bytes, session=session) account_counter.increase(rse_id=rse_id, account=account, files=1, bytes=value, session=session) for bytes, rse_id in session.query(models.RSEFileAssociation.bytes, models.RSEFileAssociation.rse_id).filter_by(scope=scope, name=name): session.query(models.RSEFileAssociation).filter_by(scope=scope, name=name, rse_id=rse_id).update({key: value}, synchronize_session=False) rse_counter.decrease(rse_id=rse_id, files=1, bytes=bytes, session=session) rse_counter.increase(rse_id=rse_id, files=1, bytes=value, session=session) for parent_scope, parent_name in session.query(models.DataIdentifierAssociation.scope, models.DataIdentifierAssociation.name).filter_by(child_scope=scope, child_name=name): values = {} values['length'], values['bytes'], values['events'] = session.query(func.count(models.DataIdentifierAssociation.scope), func.sum(models.DataIdentifierAssociation.bytes), func.sum(models.DataIdentifierAssociation.events)).filter_by(scope=parent_scope, name=parent_name).one() session.query(models.DataIdentifier).filter_by(scope=parent_scope, name=parent_name).update(values, synchronize_session=False) session.query(models.DatasetLock).filter_by(scope=parent_scope, name=parent_name).update({'length': values['length'], 'bytes': values['bytes']}, synchronize_session=False) else: try: rowcount = session.query(models.DataIdentifier).\ with_hint(models.DataIdentifier, "INDEX(DIDS DIDS_PK)", 'oracle').\ filter_by(scope=scope, name=name).\ update({key: value}, synchronize_session='fetch') except CompileError as error: raise exception.InvalidMetadata(error) except InvalidRequestError: raise exception.InvalidMetadata("Key %s is not accepted" % key) # propagate metadata updates to child content if recursive: content_query = session.query(models.DataIdentifierAssociation.child_scope, models.DataIdentifierAssociation.child_name).\ with_hint(models.DataIdentifierAssociation, "INDEX(CONTENTS CONTENTS_PK)", 'oracle').\ filter_by(scope=scope, name=name) for child_scope, child_name in content_query: try: session.query(models.DataIdentifier).\ with_hint(models.DataIdentifier, "INDEX(DIDS DIDS_PK)", 'oracle').\ filter_by(scope=child_scope, name=child_name).\ update({key: value}, synchronize_session='fetch') except CompileError as error: raise exception.InvalidMetadata(error) except InvalidRequestError: raise exception.InvalidMetadata("Key %s is not accepted" % key) if not rowcount: # check for did presence raise exception.UnsupportedOperation('%(key)s for %(scope)s:%(name)s cannot be updated' % locals())