def delete(self): """Delete a deposit.""" deposit_pid = self.pid pid_value = deposit_pid.pid_value record_pid = RecordUUIDProvider.get(pid_value).pid version_master = PIDNodeVersioning(child=record_pid) # every deposit has a parent version after the 2.1.0 upgrade # except deleted ones. We check the parent version in case of a delete # revert. assert version_master is not None, 'Unexpected deposit without versioning.' # if the record is unpublished hard delete it if record_pid.status == PIDStatus.RESERVED: version_master.remove_draft_child() db.session.delete(record_pid) # if the parent doesn't have any published records hard delete it if version_master.parent.status == PIDStatus.RESERVED: db.session.delete(version_master.parent) deposit_pid.delete() # delete all buckets linked to the deposit res = Bucket.query.join(RecordsBuckets).\ filter(RecordsBuckets.bucket_id == Bucket.id, RecordsBuckets.record_id == self.id).all() # remove the deposit from ES self.indexer.delete(self) # we call the super of Invenio deposit instead of B2Share deposit as # Invenio deposit doesn't support the deletion of published deposits super(InvenioDeposit, self).delete(force=True) for bucket in res: bucket.locked = False bucket.remove()
def delete(self): """Delete a deposit.""" deposit_pid = self.pid pid_value = deposit_pid.pid_value record_pid = RecordUUIDProvider.get(pid_value).pid version_master = PIDVersioning(child=record_pid) # every deposit has a parent version after the 2.1.0 upgrade # except deleted ones. We check the parent version in case of a delete # revert. assert version_master is not None, 'Unexpected deposit without versioning.' # if the record is unpublished hard delete it if record_pid.status == PIDStatus.RESERVED: version_master.remove_draft_child() db.session.delete(record_pid) # if the parent doesn't have any published records hard delete it if version_master.parent.status == PIDStatus.RESERVED: db.session.delete(version_master.parent) deposit_pid.delete() # delete all buckets linked to the deposit res = Bucket.query.join(RecordsBuckets).\ filter(RecordsBuckets.bucket_id == Bucket.id, RecordsBuckets.record_id == self.id).all() # remove the deposit from ES self.indexer.delete(self) # we call the super of Invenio deposit instead of B2Share deposit as # Invenio deposit doesn't support the deletion of published deposits super(InvenioDeposit, self).delete(force=True) for bucket in res: bucket.locked = False bucket.remove()
def find_version_master_and_previous_record(version_of): """Retrieve the PIDNodeVersioning and previous record of a record PID. :params version_of: record PID. """ from b2share.modules.records.providers import RecordUUIDProvider from b2share.modules.records.utils import is_publication try: child_pid = RecordUUIDProvider.get(version_of).pid if child_pid.status == PIDStatus.DELETED: raise RecordNotFoundVersioningError() except PIDDoesNotExistError as e: raise RecordNotFoundVersioningError() from e parent_pid = PIDNodeVersioning(pid=child_pid).parents.first() version_master = PIDNodeVersioning(pid=parent_pid) prev_pid = version_master.last_child assert prev_pid.pid_type == RecordUUIDProvider.pid_type prev_version = Record.get_record(prev_pid.object_uuid) # check that version_of references the last version of a record assert is_publication(prev_version.model) if prev_pid.pid_value != version_of: raise IncorrectRecordVersioningError(prev_pid.pid_value) return version_master, prev_version
def commit(self): """Store changes on current instance in database. This method extends the default implementation by publishing the deposition when 'publication_state' is set to 'published'. """ if self.model is None or self.model.json is None: raise MissingModelError() # automatically make embargoed records private if self.get('embargo_date') and self.get('open_access'): if is_under_embargo(self): self['open_access'] = False if 'community' in self: try: community = Community.get(self['community']) except CommunityDoesNotExistError as e: raise InvalidDepositError('Community {} does not exist.'.format( self['community'])) from e workflow = publication_workflows[community.publication_workflow] workflow(self.model, self) # publish the deposition if needed if (self['publication_state'] == PublicationStates.published.name # check invenio-deposit status so that we do not loop and self['_deposit']['status'] != PublicationStates.published.name): # Retrieve previous version in order to reindex it later. previous_version_pid = None # Save the previous "last" version for later use if self.versioning.parent.status == PIDStatus.REDIRECTED and \ self.versioning.has_children: previous_version_pid = self.versioning.last_child previous_version_uuid = str(RecordUUIDProvider.get( previous_version_pid.pid_value ).pid.object_uuid) super(Deposit, self).publish() # publish() already calls commit() # Register parent PID if necessary and update redirect self.versioning.update_redirect() # Reindex previous version. This is needed in order to update # the is_last_version flag if previous_version_pid is not None: self.indexer.index_by_id(previous_version_uuid) # save the action for later indexing if g: g.deposit_action = 'publish' else: super(Deposit, self).commit() if g: g.deposit_action = 'update-metadata' return self
def get(self, pid=None, **kwargs): """GET a list of record's versions.""" record_endpoint = 'b2share_records_rest.{0}_item'.format( RecordUUIDProvider.pid_type) pid_value = request.view_args['pid_value'] pid = RecordUUIDProvider.get(pid_value).pid pid_versioning = PIDVersioning(child=pid) if pid_versioning.is_child: # This is a record PID. Retrieve the parent versioning PID. version_parent_pid_value = pid_versioning.parent.pid_value else: # This is a parent versioning PID version_parent_pid_value = pid_value records = [] child_pid_table = aliased(PersistentIdentifier) parent_pid_table = aliased(PersistentIdentifier) pids_and_meta = db.session.query(child_pid_table, RecordMetadata).join( PIDRelation, PIDRelation.child_id == child_pid_table.id, ).join(parent_pid_table, PIDRelation.parent_id == parent_pid_table.id).filter( parent_pid_table.pid_value == version_parent_pid_value, RecordMetadata.id == child_pid_table.object_uuid, ).order_by(RecordMetadata.created).all() for version_number, rec_pid_and_rec_meta in enumerate(pids_and_meta): rec_pid, rec_meta = rec_pid_and_rec_meta records.append({ 'version': version_number + 1, 'id': str(rec_pid.pid_value), 'url': url_for(record_endpoint, pid_value=str(rec_pid.pid_value), _external=True), 'created': rec_meta.created, 'updated': rec_meta.updated, }) return {'versions': records}
def get(self, pid=None, **kwargs): """GET a list of record's versions.""" record_endpoint = 'b2share_records_rest.{0}_item'.format( RecordUUIDProvider.pid_type) pid_value = request.view_args['pid_value'] pid = RecordUUIDProvider.get(pid_value).pid pid_versioning = PIDVersioning(child=pid) if pid_versioning.is_child: # This is a record PID. Retrieve the parent versioning PID. version_parent_pid_value = pid_versioning.parent.pid_value else: # This is a parent versioning PID version_parent_pid_value = pid_value records = [] child_pid_table = aliased(PersistentIdentifier) parent_pid_table = aliased(PersistentIdentifier) pids_and_meta = db.session.query( child_pid_table, RecordMetadata ).join( PIDRelation, PIDRelation.child_id == child_pid_table.id, ).join( parent_pid_table, PIDRelation.parent_id == parent_pid_table.id ).filter( parent_pid_table.pid_value == version_parent_pid_value, RecordMetadata.id == child_pid_table.object_uuid, ).order_by(RecordMetadata.created).all() for version_number, rec_pid_and_rec_meta in enumerate(pids_and_meta): rec_pid, rec_meta = rec_pid_and_rec_meta records.append({ 'version': version_number + 1, 'id': str(rec_pid.pid_value), 'url': url_for(record_endpoint, pid_value=str(rec_pid.pid_value), _external=True), 'created': rec_meta.created, 'updated': rec_meta.updated, }) return {'versions': records}
def check_handles(update, record_pid): """Allocate handles for a record and its files, if necessary.""" rec_pid = RecordUUIDProvider.get(pid_value=record_pid).pid record = Record.get_record(rec_pid.object_uuid) record_updated = False pid_list = [ p.get('value') for p in record['_pid'] if p.get('type') == 'ePIC_PID' ] if pid_list: click.secho('record {} already has a handle'.format(record_pid), fg='green') else: click.secho('record {} has no handle'.format(record_pid), fg='red') if update: b2share_pid_minter(rec_pid, record) record_updated = True click.secho(' handle added to record', fg='green') else: click.secho('use -u argument to add a handle to the record') files_ok = True for f in record.get('_files', []): if f.get('ePIC_PID'): click.secho('file {} already has a handle'.format(f.get('key')), fg='green') else: click.secho('file {} has no handle'.format(f.get('key')), fg='red') files_ok = False if update and not files_ok: create_file_pids(record) record_updated = True click.secho(' files updated with handles', fg='green') elif not update and not files_ok: click.secho('use -u argument to add handles to the files') if record_updated: record.commit() db.session.commit()
def find_version_master_and_previous_record(version_of): """Retrieve the PIDVersioning and previous record of a record PID. :params version_of: record PID. """ try: child_pid = RecordUUIDProvider.get(version_of).pid if child_pid.status == PIDStatus.DELETED: raise RecordNotFoundVersioningError() except PIDDoesNotExistError as e: raise RecordNotFoundVersioningError() from e version_master = PIDVersioning(child=child_pid) prev_pid = version_master.last_child assert prev_pid.pid_type == RecordUUIDProvider.pid_type prev_version = Record.get_record(prev_pid.object_uuid) # check that version_of references the last version of a record assert is_publication(prev_version.model) if prev_pid.pid_value != version_of: raise IncorrectRecordVersioningError(prev_pid.pid_value) return version_master, prev_version
def check_handles(update, record_pid): """Allocate handles for a record and its files, if necessary.""" rec_pid = RecordUUIDProvider.get(pid_value=record_pid).pid record = Record.get_record(rec_pid.object_uuid) record_updated = False pid_list = [p.get('value') for p in record['_pid'] if p.get('type') == 'ePIC_PID'] if pid_list: click.secho('record {} already has a handle'.format(record_pid), fg='green') else: click.secho('record {} has no handle'.format(record_pid), fg='red') if update: b2share_pid_minter(rec_pid, record) record_updated = True click.secho(' handle added to record', fg='green') else: click.secho('use -u argument to add a handle to the record') files_ok = True for f in record.get('_files', []): if f.get('ePIC_PID'): click.secho('file {} already has a handle'.format(f.get('key')), fg='green') else: click.secho('file {} has no handle'.format(f.get('key')), fg='red') files_ok = False if update and not files_ok: create_file_pids(record) record_updated = True click.secho(' files updated with handles', fg='green') elif not update and not files_ok: click.secho('use -u argument to add handles to the files') if record_updated: record.commit() db.session.commit()
def alembic_upgrade_database_data(alembic, verbose): """Migrate the database data from v2.0.0 to 2.1.0.""" ### Add versioning PIDs ### # Reserve the record PID and versioning PID for unpublished deposits # Hack: disable record indexing during record migration from invenio_indexer.api import RecordIndexer old_index_fn = RecordIndexer.index RecordIndexer.index = lambda s, record: None if verbose: click.secho('migrating deposits and records...') with db.session.begin_nested(): # Migrate published records records_pids = PersistentIdentifier.query.filter( PersistentIdentifier.pid_type == RecordUUIDProvider.pid_type, PersistentIdentifier.status == PIDStatus.REGISTERED, ).all() for rec_pid in records_pids: if verbose: click.secho(' record {}'.format(rec_pid.pid_value)) try: record = Record.get_record(rec_pid.object_uuid) except NoResultFound: # The record is deleted but not the PID. Fix it. rec_pid.status = PIDStatus.DELETED continue # Create parent version PID parent_pid = RecordUUIDProvider.create().pid version_master = PIDVersioning(parent=parent_pid) version_master.insert_draft_child(child=rec_pid) version_master.update_redirect() migrate_record_metadata( Record.get_record(rec_pid.object_uuid), parent_pid ) # Migrate deposits deposit_pids = PersistentIdentifier.query.filter( PersistentIdentifier.pid_type == DepositUUIDProvider.pid_type, PersistentIdentifier.status == PIDStatus.REGISTERED, ).all() for dep_pid in deposit_pids: if verbose: click.secho(' deposit {}'.format(dep_pid.pid_value)) try: deposit = Deposit.get_record(dep_pid.object_uuid) if deposit['publication_state'] != \ PublicationStates.published.name: # The record is not published yet. Reserve the PID. rec_pid = RecordUUIDProvider.create( object_type='rec', pid_value=dep_pid.pid_value, ).pid # Create parent version PID parent_pid = RecordUUIDProvider.create().pid assert parent_pid version_master = PIDVersioning(parent=parent_pid) version_master.insert_draft_child(child=rec_pid) else: # Retrieve previously created version PID rec_pid = RecordUUIDProvider.get(dep_pid.pid_value).pid version_master = PIDVersioning(child=rec_pid) parent_pid = version_master.parent if not parent_pid: click.secho(' record {} was deleted, but the deposit has not been removed'.format(rec_pid.pid_value), fg='red') if parent_pid: migrate_record_metadata( Deposit.get_record(dep_pid.object_uuid), parent_pid ) except NoResultFound: # The deposit is deleted but not the PID. Fix it. dep_pid.status = PIDStatus.DELETED if verbose: click.secho('done migrating deposits.') RecordIndexer.index = old_index_fn
def alembic_upgrade_database_data(alembic, verbose): """Migrate the database data from v2.0.0 to 2.1.0.""" ### Add versioning PIDs ### # Reserve the record PID and versioning PID for unpublished deposits # Hack: disable record indexing during record migration from invenio_indexer.api import RecordIndexer old_index_fn = RecordIndexer.index RecordIndexer.index = lambda s, record: None if verbose: click.secho('migrating deposits and records...') with db.session.begin_nested(): # Migrate published records records_pids = PersistentIdentifier.query.filter( PersistentIdentifier.pid_type == RecordUUIDProvider.pid_type, PersistentIdentifier.status == PIDStatus.REGISTERED, ).all() for rec_pid in records_pids: if verbose: click.secho(' record {}'.format(rec_pid.pid_value)) try: record = Record.get_record(rec_pid.object_uuid) except NoResultFound: # The record is deleted but not the PID. Fix it. rec_pid.status = PIDStatus.DELETED continue # Create parent version PID parent_pid = RecordUUIDProvider.create().pid version_master = PIDVersioning(parent=parent_pid) version_master.insert_draft_child(child=rec_pid) version_master.update_redirect() migrate_record_metadata(Record.get_record(rec_pid.object_uuid), parent_pid) # Migrate deposits deposit_pids = PersistentIdentifier.query.filter( PersistentIdentifier.pid_type == DepositUUIDProvider.pid_type, PersistentIdentifier.status == PIDStatus.REGISTERED, ).all() for dep_pid in deposit_pids: if verbose: click.secho(' deposit {}'.format(dep_pid.pid_value)) try: deposit = Deposit.get_record(dep_pid.object_uuid) if deposit['publication_state'] != \ PublicationStates.published.name: # The record is not published yet. Reserve the PID. rec_pid = RecordUUIDProvider.create( object_type='rec', pid_value=dep_pid.pid_value, ).pid # Create parent version PID parent_pid = RecordUUIDProvider.create().pid assert parent_pid version_master = PIDVersioning(parent=parent_pid) version_master.insert_draft_child(child=rec_pid) else: # Retrieve previously created version PID rec_pid = RecordUUIDProvider.get(dep_pid.pid_value).pid version_master = PIDVersioning(child=rec_pid) parent_pid = version_master.parent if not parent_pid: click.secho( ' record {} was deleted, but the deposit has not been removed' .format(rec_pid.pid_value), fg='red') if parent_pid: migrate_record_metadata( Deposit.get_record(dep_pid.object_uuid), parent_pid) except NoResultFound: # The deposit is deleted but not the PID. Fix it. dep_pid.status = PIDStatus.DELETED if verbose: click.secho('done migrating deposits.') RecordIndexer.index = old_index_fn
def commit(self): """Store changes on current instance in database. This method extends the default implementation by publishing the deposition when 'publication_state' is set to 'published'. """ if 'external_pids' in self: deposit_id = self['_deposit']['id'] recid = PersistentIdentifier.query.filter_by( pid_value=deposit_id).first() assert recid.status == 'R' record_bucket = RecordsBuckets.query.filter_by( record_id=recid.pid_value).first() bucket = Bucket.query.filter_by(id=record_bucket.bucket_id).first() object_versions = ObjectVersion.query.filter_by( bucket_id=bucket.id).all() key_to_pid = { ext_pid.get('key'): ext_pid.get('ePIC_PID') for ext_pid in self['external_pids'] } # for the existing files for object_version in object_versions: if object_version.file is None or \ object_version.file.storage_class != 'B': continue # check that they are still in the file pids list or remove if object_version.key not in key_to_pid: ObjectVersion.delete(bucket, object_version.key) # check that the uri is still the same or update it elif object_version.file.uri != \ key_to_pid[object_version.key]: db.session.query(FileInstance).\ filter(FileInstance.id == object_version.file_id).\ update({"uri": key_to_pid[object_version.key]}) create_b2safe_file(self['external_pids'], bucket) del self['external_pids'] if self.model is None or self.model.json is None: raise MissingModelError() # automatically make embargoed records private if self.get('embargo_date') and self.get('open_access'): if is_under_embargo(self): self['open_access'] = False if 'community' in self: try: community = Community.get(self['community']) except CommunityDoesNotExistError as e: raise InvalidDepositError('Community {} does not exist.'.format( self['community'])) from e workflow = publication_workflows[community.publication_workflow] workflow(self.model, self) # publish the deposition if needed if (self['publication_state'] == PublicationStates.published.name # check invenio-deposit status so that we do not loop and self['_deposit']['status'] != PublicationStates.published.name): # Retrieve previous version in order to reindex it later. previous_version_pid = None # Save the previous "last" version for later use if self.versioning.parent.status == PIDStatus.REDIRECTED and \ self.versioning.has_children: previous_version_pid = self.versioning.last_child previous_version_uuid = str(RecordUUIDProvider.get( previous_version_pid.pid_value ).pid.object_uuid) external_pids = generate_external_pids(self) if external_pids: self['_deposit']['external_pids'] = external_pids super(Deposit, self).publish() # publish() already calls commit() # Register parent PID if necessary and update redirect self.versioning.update_redirect() # Reindex previous version. This is needed in order to update # the is_last_version flag if previous_version_pid is not None: self.indexer.index_by_id(previous_version_uuid) # save the action for later indexing if g: g.deposit_action = 'publish' else: super(Deposit, self).commit() if g: g.deposit_action = 'update-metadata' return self
def create(cls, data, id_=None, version_of=None): """Create a deposit with the optional id. :params version_of: PID of an existing record. If set, the new record will be marked as a new version of this referenced record. If no data is provided the new record will be a copy of this record. Note: this PID must reference the current last version of a record. """ # check that the status field is not set if 'publication_state' in data: raise InvalidDepositError( 'Field "publication_state" cannot be set.') data['publication_state'] = PublicationStates.draft.name # Set record's schema if '$schema' in data: raise InvalidDepositError('"$schema" field should not be set.') # Retrieve reserved record PID which should have already been created # by the deposit minter (The record PID value is the same # as the one of the deposit) rec_pid = RecordUUIDProvider.get(data['_deposit']['id']).pid version_master, prev_version = None, None # if this is a new version of an existing record, add the future # record pid in the chain of versions. if version_of: version_master, prev_version = \ find_version_master_and_previous_record(version_of) # The new version must be in the same community if data['community'] != prev_version['community']: raise ValidationError( 'The community field cannot change between versions.') try: version_master.insert_draft_child(rec_pid) except Exception as exc: # Only one draft is allowed per version chain. if 'Draft child already exists for this relation' in \ exc.args[0]: raise DraftExistsVersioningError( version_master.draft_child ) raise exc else: # create parent PID parent_pid = RecordUUIDProvider.create().pid version_master = PIDNodeVersioning(parent=parent_pid) version_master.insert_draft_child(child=rec_pid) # Mint the deposit with the parent PID data['_pid'] = [{ 'value': version_master.parent.pid_value, 'type': RecordUUIDProvider.parent_pid_type, }] if 'community' not in data or not data['community']: raise ValidationError( 'Record\s metadata has no community field.') try: community_id = uuid.UUID(data['community']) except ValueError as e: raise InvalidDepositError( 'Community ID is not a valid UUID.') from e try: schema = CommunitySchema.get_community_schema(community_id) except CommunitySchemaDoesNotExistError as e: raise InvalidDepositError( 'No schema for community {}.'.format(community_id)) from e if version_of: data['$schema'] = Deposit._build_deposit_schema(prev_version) else: from b2share.modules.schemas.serializers import \ community_schema_draft_json_schema_link data['$schema'] = community_schema_draft_json_schema_link( schema, _external=True ) # create file bucket if prev_version and prev_version.files: # Clone the bucket from the previous version. This doesn't # duplicate files. bucket = prev_version.files.bucket.snapshot(lock=False) bucket.locked = False else: bucket = Bucket.create(storage_class=current_app.config[ 'DEPOSIT_DEFAULT_STORAGE_CLASS' ]) if 'external_pids' in data: create_b2safe_file(data['external_pids'], bucket) del data['external_pids'] deposit = super(Deposit, cls).create(data, id_=id_) db.session.add(bucket) db.session.add(RecordsBuckets( record_id=deposit.id, bucket_id=bucket.id )) return deposit
def create(cls, data, id_=None, version_of=None): """Create a deposit with the optional id. :params version_of: PID of an existing record. If set, the new record will be marked as a new version of this referenced record. If no data is provided the new record will be a copy of this record. Note: this PID must reference the current last version of a record. """ # check that the status field is not set if 'publication_state' in data: raise InvalidDepositError( 'Field "publication_state" cannot be set.') data['publication_state'] = PublicationStates.draft.name # Set record's schema if '$schema' in data: raise InvalidDepositError('"$schema" field should not be set.') # Retrieve reserved record PID which should have already been created # by the deposit minter (The record PID value is the same # as the one of the deposit) rec_pid = RecordUUIDProvider.get(data['_deposit']['id']).pid version_master, prev_version = None, None # if this is a new version of an existing record, add the future # record pid in the chain of versions. if version_of: version_master, prev_version = \ find_version_master_and_previous_record(version_of) # The new version must be in the same community if data['community'] != prev_version['community']: raise ValidationError( 'The community field cannot change between versions.') try: version_master.insert_draft_child(rec_pid) except Exception as exc: # Only one draft is allowed per version chain. if 'Draft child already exists for this relation' in \ exc.args[0]: raise DraftExistsVersioningError( version_master.draft_child ) raise exc else: # create parent PID parent_pid = RecordUUIDProvider.create().pid version_master = PIDVersioning(parent=parent_pid) version_master.insert_draft_child(child=rec_pid) # Mint the deposit with the parent PID data['_pid'] = [{ 'value': version_master.parent.pid_value, 'type': RecordUUIDProvider.parent_pid_type, }] if 'community' not in data or not data['community']: raise ValidationError( 'Record\s metadata has no community field.') try: community_id = uuid.UUID(data['community']) except ValueError as e: raise InvalidDepositError( 'Community ID is not a valid UUID.') from e try: schema = CommunitySchema.get_community_schema(community_id) except CommunitySchemaDoesNotExistError as e: raise InvalidDepositError( 'No schema for community {}.'.format(community_id)) from e if version_of: data['$schema'] = Deposit._build_deposit_schema(prev_version) else: from b2share.modules.schemas.serializers import \ community_schema_draft_json_schema_link data['$schema'] = community_schema_draft_json_schema_link( schema, _external=True ) # create file bucket if prev_version and prev_version.files: # Clone the bucket from the previous version. This doesn't # duplicate files. bucket = prev_version.files.bucket.snapshot(lock=False) bucket.locked = False else: bucket = Bucket.create(storage_class=current_app.config[ 'DEPOSIT_DEFAULT_STORAGE_CLASS' ]) if 'external_pids' in data: create_b2safe_file(data['external_pids'], bucket) del data['external_pids'] deposit = super(Deposit, cls).create(data, id_=id_) db.session.add(bucket) db.session.add(RecordsBuckets( record_id=deposit.id, bucket_id=bucket.id )) return deposit