def delete(self): """Delete a deposit.""" deposit_pid = self.pid pid_value = deposit_pid.pid_value record_pid = RecordUUIDProvider.get(pid_value).pid version_master = PIDVersioning(child=record_pid) # every deposit has a parent version after the 2.1.0 upgrade # except deleted ones. We check the parent version in case of a delete # revert. assert version_master is not None, 'Unexpected deposit without versioning.' # if the record is unpublished hard delete it if record_pid.status == PIDStatus.RESERVED: version_master.remove_draft_child() db.session.delete(record_pid) # if the parent doesn't have any published records hard delete it if version_master.parent.status == PIDStatus.RESERVED: db.session.delete(version_master.parent) deposit_pid.delete() # delete all buckets linked to the deposit res = Bucket.query.join(RecordsBuckets).\ filter(RecordsBuckets.bucket_id == Bucket.id, RecordsBuckets.record_id == self.id).all() # remove the deposit from ES self.indexer.delete(self) # we call the super of Invenio deposit instead of B2Share deposit as # Invenio deposit doesn't support the deletion of published deposits super(InvenioDeposit, self).delete(force=True) for bucket in res: bucket.locked = False bucket.remove()
def doi_ish_view_method(parent_pid_value=0, version=0): """DOI-like item version endpoint view. :param pid: PID value. :returns: Redirect to correct version. """ try: p_pid = PersistentIdentifier.get('parent', 'parent:' + str(parent_pid_value)) except PIDDoesNotExistError: p_pid = None if p_pid: pid_ver = PIDVersioning(parent=p_pid) all_versions = list(pid_ver.get_children(ordered=True, pid_status=None)) if version == 0 or version == len(all_versions): return redirect( url_for('invenio_records_ui.recid', pid_value=pid_ver.last_child.pid_value)) elif version <= len(all_versions): version_pid = all_versions[(version - 1)] current_app.logger.info(version_pid.__dict__) if version_pid.status == PIDStatus.REGISTERED: return redirect( url_for('invenio_records_ui.recid', pid_value=version_pid.pid_value)) return abort(404)
def versioning_published_record(uuid): """Migrate a published record.""" record = ZenodoRecord.get_record(uuid) if 'conceptrecid' in record: return # ASSERT ZENODO DOI ONLY! assert 'conceptrecid' not in record, "Record already migrated" # doi = PersistentIdentifier.get('doi', str(record['doi'])) # assert is_local_doi(doi.pid_value), 'DOI is not controlled by Zenodo.' conceptrecid = zenodo_concept_recid_minter(uuid, record) conceptrecid.register() recid = PersistentIdentifier.get('recid', str(record['recid'])) pv = PIDVersioning(parent=conceptrecid) pv.insert_child(recid) record.commit() # Some old records have no deposit ID, some don't have '_deposit' if ('_deposit' in record and 'id' in record['_deposit'] and record['_deposit']['id']): try: depid = PersistentIdentifier.get('depid', str(record['_deposit']['id'])) deposit = ZenodoDeposit.get_record(depid.object_uuid) deposit['conceptrecid'] = conceptrecid.pid_value if deposit['_deposit']['status'] == 'draft': deposit['_deposit']['pid']['revision_id'] = \ deposit['_deposit']['pid']['revision_id'] + 1 deposit.commit() except PIDDoesNotExistError: pass db.session.commit()
def delete(self, delete_published=False, *args, **kwargs): """Delete the deposit. :param delete_published: If True, even deposit of a published record will be deleted (usually used by admin operations). :type delete_published: bool """ is_published = self['_deposit'].get('pid') if is_published and not delete_published: raise PIDInvalidAction() # Delete the recid recid = PersistentIdentifier.get(pid_type='recid', pid_value=self['recid']) versioning = PIDVersioning(child=recid) if versioning.exists: if versioning.draft_child and \ self.pid == versioning.draft_child_deposit: versioning.remove_draft_child() if versioning.last_child: index_siblings(versioning.last_child, children=versioning.children.all(), include_pid=True, neighbors_eager=True, with_deposits=True) if recid.status == PIDStatus.RESERVED: db.session.delete(recid) if 'conceptrecid' in self: concept_recid = PersistentIdentifier.get( pid_type='recid', pid_value=self['conceptrecid']) if concept_recid.status == PIDStatus.RESERVED: db.session.delete(concept_recid) # Completely remove bucket bucket = self.files.bucket with db.session.begin_nested(): # Remove Record-Bucket link RecordsBuckets.query.filter_by(record_id=self.id).delete() mp_q = MultipartObject.query_by_bucket(bucket) # Remove multipart objects Part.query.filter( Part.upload_id.in_( mp_q.with_entities( MultipartObject.upload_id).subquery())).delete( synchronize_session='fetch') mp_q.delete(synchronize_session='fetch') bucket.locked = False bucket.remove() depid = kwargs.get('pid', self.pid) if depid: depid.delete() # NOTE: We call the parent of Deposit, invenio_records.api.Record since # we need to completely override eveything that the Deposit.delete # method does. return super(Deposit, self).delete(*args, **kwargs)
def _publish_new(self, id_=None): """Publish new deposit with communities handling.""" dep_comms = set(self.pop('communities', [])) record = super(ZenodoDeposit, self)._publish_new(id_=id_) conceptrecid = PersistentIdentifier.get('recid', record['conceptrecid']) pv = PIDVersioning(parent=conceptrecid) if pv.children.count() > 1: files_set = set(f.get_version().file.checksum for f in self.files) for prev_recid in pv.children.all()[:-1]: rec = ZenodoRecord.get_record(prev_recid.object_uuid) prev_files_set = set(f.get_version().file.checksum for f in rec.files) if files_set == prev_files_set: raise VersioningFilesError() prev_recid = pv.children.all()[-2] rec_comms = set(ZenodoRecord.get_record( prev_recid.get_assigned_object()).get('communities', [])) else: rec_comms = set() record = self._sync_communities(dep_comms, rec_comms, record) record.commit() # Update the concept recid redirection pv.update_redirect() RecordDraft.unlink(record.pid, self.pid) index_siblings(record.pid, neighbors_eager=True, with_deposits=True) return record
def create_deposit_and_record(pid_value, owner): """Utility function for creating records and deposits.""" recid = PersistentIdentifier.create( 'recid', pid_value, status=PIDStatus.RESERVED) pv = PIDVersioning(parent=conceptrecid) pv.insert_draft_child(recid) depid = PersistentIdentifier.create( 'depid', pid_value, status=PIDStatus.REGISTERED) deposit = ZenodoRecord.create({'_deposit': {'id': depid.pid_value}, 'conceptrecid': conceptrecid.pid_value, 'recid': recid.pid_value}) deposit.commit() depid.assign('rec', deposit.id) record_metadata = deepcopy(minimal_record) record_metadata['_deposit'] = {'id': depid.pid_value} record_metadata['conceptrecid'] = conceptrecid.pid_value record_metadata['recid'] = int(recid.pid_value) record_metadata['owners'] = [owner.id] record = ZenodoRecord.create(record_metadata) zenodo_record_minter(record.id, record) record.commit() return (depid, deposit, recid, record)
def preprocess_related_identifiers(pid, record, result): """Preprocess related identifiers for record serialization. Resolves the passed pid to the proper `recid` in order to add related identifiers from PID relations. """ recid_value = record.get('recid') if pid.pid_type == 'doi' and pid.pid_value == record.get('conceptdoi'): recid_value = record.get('conceptrecid') result['metadata']['doi'] = record.get('conceptdoi') recid = (pid if pid.pid_value == recid_value else PersistentIdentifier.get( pid_type='recid', pid_value=recid_value)) if recid.pid_value == record.get('conceptrecid'): pv = PIDVersioning(parent=recid) else: pv = PIDVersioning(child=recid) # Serialize PID versioning as related identifiers if pv.exists: rels = serialize_related_identifiers(recid) if rels: result['metadata'].setdefault('related_identifiers', []).extend(rels) return result
def test_version_pids_create(app, db): # Create a child, initialize the Versioning API and create a parent assert PersistentIdentifier.query.count() == 0 # Create a child h1v1 = PersistentIdentifier.create('recid', '12345', object_type='rec', status=PIDStatus.REGISTERED) assert PersistentIdentifier.query.count() == 1 pv = PIDVersioning(child=h1v1) # Create a parent pv.create_parent('12345.parent') assert PersistentIdentifier.query.count() == 2 assert pv.parent.get_redirect() == h1v1 assert pv.parent.status == PIDStatus.REDIRECTED # Make sure 'pid_type', 'object_type' and 'status' are inherited from child assert pv.parent.pid_type == pv.child.pid_type assert pv.parent.object_type == pv.child.object_type pr = PIDRelation.query.one() assert pr.child == h1v1 assert pr.parent == pv.parent VERSION = resolve_relation_type_config('version').id assert pr.relation_type == VERSION assert pr.index == 0
def test_deposit_versioning_draft_child_unlinking_bug(app, db, communities, deposit, deposit_file): """ Bug with draft_child_deposit unlinking. Bug where a draft_child_deposit was unlinked from a new version draft, when another version of a record was edited and published. """ deposit_v1 = publish_and_expunge(db, deposit) recid_v1, record_v1 = deposit.fetch_published() recid_v1_value = recid_v1.pid_value # Initiate a new version draft deposit_v1.newversion() recid_v1, record_v1 = record_resolver.resolve(recid_v1_value) pv = PIDVersioning(child=recid_v1) assert pv.draft_child_deposit assert pv.draft_child deposit_v1.edit() deposit_v1 = deposit_v1.edit() deposit_v1 = publish_and_expunge(db, deposit_v1) recid_v1, record_v1 = record_resolver.resolve(recid_v1_value) pv = PIDVersioning(child=recid_v1) # Make sure the draft child deposit was not unliked due to publishing of # the edited draft assert pv.draft_child_deposit assert pv.draft_child
def delete(self, delete_published=False, *args, **kwargs): """Delete the deposit. :param delete_published: If True, even deposit of a published record will be deleted (usually used by admin operations). :type delete_published: bool """ is_published = self['_deposit'].get('pid') if is_published and not delete_published: raise PIDInvalidAction() # Delete the recid recid = PersistentIdentifier.get( pid_type='recid', pid_value=self['recid']) versioning = PIDVersioning(child=recid) if versioning.exists: if versioning.draft_child and \ self.pid == versioning.draft_child_deposit: versioning.remove_draft_child() if versioning.last_child: index_siblings(versioning.last_child, children=versioning.children.all(), include_pid=True, neighbors_eager=True, with_deposits=True) if recid.status == PIDStatus.RESERVED: db.session.delete(recid) if 'conceptrecid' in self: concept_recid = PersistentIdentifier.get( pid_type='recid', pid_value=self['conceptrecid']) if concept_recid.status == PIDStatus.RESERVED: db.session.delete(concept_recid) # Completely remove bucket bucket = self.files.bucket with db.session.begin_nested(): # Remove Record-Bucket link RecordsBuckets.query.filter_by(record_id=self.id).delete() mp_q = MultipartObject.query_by_bucket(bucket) # Remove multipart objects Part.query.filter( Part.upload_id.in_(mp_q.with_entities( MultipartObject.upload_id).subquery()) ).delete(synchronize_session='fetch') mp_q.delete(synchronize_session='fetch') bucket.locked = False bucket.remove() depid = kwargs.get('pid', self.pid) if depid: depid.delete() # NOTE: We call the parent of Deposit, invenio_records.api.Record since # we need to completely override eveything that the Deposit.delete # method does. return super(Deposit, self).delete(*args, **kwargs)
def cancel_action(activity_id='0', action_id=0): """Next action.""" post_json = request.get_json() work_activity = WorkActivity() activity = dict(activity_id=activity_id, action_id=action_id, action_version=post_json.get('action_version'), action_status=ActionStatusPolicy.ACTION_CANCELED, commond=post_json.get('commond')) # clear deposit activity_detail = work_activity.get_activity_detail(activity_id) if activity_detail is not None: cancel_item_id = activity_detail.item_id if cancel_item_id is None: pid_value = post_json.get('pid_value') if pid_value is not None: pid = PersistentIdentifier.get('recid', pid_value) cancel_item_id = pid.object_uuid if cancel_item_id is not None: cancel_record = WekoDeposit.get_record(cancel_item_id) if cancel_record is not None: cancel_deposit = WekoDeposit(cancel_record, cancel_record.model) cancel_deposit.clear() # Remove draft child cancel_pid = PersistentIdentifier.get_by_object( pid_type='recid', object_type='rec', object_uuid=cancel_item_id) cancel_pv = PIDVersioning(child=cancel_pid) if cancel_pv.exists: previous_pid = cancel_pv.previous if previous_pid is not None: activity.update(dict(item_id=previous_pid.object_uuid)) cancel_pv.remove_child(cancel_pid) work_activity.upt_activity_action_status( activity_id=activity_id, action_id=action_id, action_status=ActionStatusPolicy.ACTION_CANCELED) rtn = work_activity.quit_activity(activity) if rtn is None: work_activity.upt_activity_action_status( activity_id=activity_id, action_id=action_id, action_status=ActionStatusPolicy.ACTION_DOING) return jsonify(code=-1, msg=_('Error! Cannot process quit activity!')) return jsonify(code=0, msg=_('success'), data={ 'redirect': url_for('weko_workflow.display_activity', activity_id=activity_id) })
def retrieve_version_master(child_pid): """Retrieve the PIDVersioning from a child PID.""" if type(child_pid).__name__ == "FetchedPID": # when getting a pid-like object from elasticsearch child_pid = child_pid.provider.get(child_pid.pid_value).pid parent_pid = PIDVersioning(child=child_pid).parent if not parent_pid: return None return PIDVersioning(parent=parent_pid)
def serialize_related_identifiers(pid): """Serialize PID Versioning relations as related_identifiers metadata.""" pv = PIDVersioning(child=pid) related_identifiers = [] if pv.exists: rec = ZenodoRecord.get_record(pid.get_assigned_object()) # External DOI records don't have Concept DOI if 'conceptdoi' in rec: ri = { 'scheme': 'doi', 'relation': 'isVersionOf', 'identifier': rec['conceptdoi'] } related_identifiers.append(ri) # TODO: We do not serialize previous/next versions to # related identifiers because of the semantic-versioning cases # (e.g. GitHub releases of minor versions) # # children = pv.children.all() # idx = children.index(pid) # left = children[:idx] # right = children[idx + 1:] # for p in left: # rec = ZenodoRecord.get_record(p.get_assigned_object()) # ri = { # 'scheme': 'doi', # 'relation': 'isNewVersionOf', # 'identifier': rec['doi'] # } # related_identifiers.append(ri) # for p in right: # rec = ZenodoRecord.get_record(p.get_assigned_object()) # ri = { # 'scheme': 'doi', # 'relation': 'isPreviousVersionOf', # 'identifier': rec['doi'] # } # related_identifiers.append(ri) pv = PIDVersioning(parent=pid) if pv.exists: for p in pv.children: rec = ZenodoRecord.get_record(p.get_assigned_object()) ri = { 'scheme': 'doi', 'relation': 'hasVersion', 'identifier': rec['doi'] } related_identifiers.append(ri) return related_identifiers
def create_versioned_record(recid_value, conceptrecid): """Utility function for creating versioned records.""" recid = PersistentIdentifier.create( 'recid', recid_value, status=PIDStatus.RESERVED) pv = PIDVersioning(parent=conceptrecid) pv.insert_draft_child(recid) record_metadata = deepcopy(minimal_record) record_metadata['conceptrecid'] = conceptrecid.pid_value record_metadata['recid'] = int(recid.pid_value) record = ZenodoRecord.create(record_metadata) zenodo_record_minter(record.id, record) record.commit() return recid, record
def versioning_new_deposit(uuid): """Migrate a yet-unpublished deposit to a versioning scheme.""" deposit = ZenodoDeposit.get_record(uuid) if 'conceptrecid' in deposit: return # ASSERT ZENODO DOI ONLY! assert 'conceptrecid' not in deposit, 'Concept RECID already in record.' conceptrecid = zenodo_concept_recid_minter(uuid, deposit) recid = PersistentIdentifier.get('recid', str(deposit['recid'])) depid = PersistentIdentifier.get('depid', str(deposit['_deposit']['id'])) pv = PIDVersioning(parent=conceptrecid) pv.insert_draft_child(recid) RecordDraft.link(recid, depid) deposit.commit() db.session.commit()
def test_propagation_with_newversion_open( app, db, users, communities, deposit, deposit_file): """Adding old versions to a community should propagate to all drafts.""" # deposit['communities'] = ['c1', 'c2'] deposit_v1 = publish_and_expunge(db, deposit) deposit_v1 = deposit_v1.edit() recid_v1, record_v1 = deposit_v1.fetch_published() recid_v1_value = recid_v1.pid_value deposit_v1 = deposit_v1.newversion() pv = PIDVersioning(child=recid_v1) depid_v2 = pv.draft_child_deposit depid_v2_value = depid_v2.pid_value # New version in 'deposit_v2' has not been published yet deposit_v2 = ZenodoDeposit.get_record(depid_v2.get_assigned_object()) # depid_v1_value = deposit_v1['_deposit']['id'] # depid_v1, deposit_v1 = deposit_resolver.resolve(depid_v1_value) deposit_v1['communities'] = ['c1', 'c2', ] deposit_v1 = publish_and_expunge(db, deposit_v1) recid_v1, record_v1 = record_resolver.resolve(recid_v1_value) c1_api = ZenodoCommunity('c1') c1_api.accept_record(record_v1, pid=recid_v1) depid_v2, deposit_v2 = deposit_resolver.resolve(depid_v2_value) assert deposit_v2['communities'] == ['c1', 'c2'] deposit_v2.files['file.txt'] = BytesIO(b('file1')) deposit_v2 = publish_and_expunge(db, deposit_v2) recid_v2, record_v2 = deposit_v2.fetch_published() assert record_v2['communities'] == ['c1', ]
def check_records_migration(app): """Check that a set of records have been migrated.""" expected_records = _load_json('expected_records.json') for exp_record in expected_records: db_record = Record.get_record(exp_record['id'], with_deleted=True) assert str(db_record.created) == exp_record['created'] # If the record is deleted there is no metadata to check if db_record.model.json is None: continue # Check that the parent pid is minted properly parent_pid = b2share_parent_pid_fetcher(exp_record['id'], db_record) fetched_pid = b2share_record_uuid_fetcher(exp_record['id'], db_record) record_pid = PersistentIdentifier.get(fetched_pid.pid_type, fetched_pid.pid_value) assert PIDVersioning( record_pid).parent.pid_value == parent_pid.pid_value # Remove the parent pid as it has been added by the migration db_record['_pid'].remove({ 'type': RecordUUIDProvider.parent_pid_type, 'value': parent_pid.pid_value, }) # The OAI-PMH identifier has been modified by the migration if db_record.get('_oai'): oai_prefix = app.config.get('OAISERVER_ID_PREFIX', 'oai:') record_id = exp_record['metadata']['_deposit']['id'] assert db_record['_oai']['id'] == str(oai_prefix) + record_id exp_record['metadata']['_oai']['id'] = db_record['_oai']['id'] assert db_record == exp_record['metadata']
def test_autoadd_explicit_newversion( app, db, users, communities, deposit, deposit_file, communities_autoadd_enabled): """Explicitly the autoadded communities in a new version.""" deposit_v1 = publish_and_expunge(db, deposit) recid_v1, record_v1 = deposit_v1.fetch_published() depid_v1_value = deposit_v1['_deposit']['id'] recid_v1_value = recid_v1.pid_value deposit_v1 = deposit_v1.newversion() pv = PIDVersioning(child=recid_v1) depid_v2 = pv.draft_child_deposit depid_v2_value = depid_v2.pid_value deposit_v2 = ZenodoDeposit.get_record(depid_v2.get_assigned_object()) deposit_v2['communities'] = ['ecfunded', 'grants_comm', 'zenodo'] deposit_v2['grants'] = [{'title': 'SomeGrant'}, ] deposit_v2.files['file.txt'] = BytesIO(b('file1')) deposit_v2 = publish_and_expunge(db, deposit_v2) recid_v2, record_v2 = deposit_v2.fetch_published() depid_v1, deposit_v1 = deposit_resolver.resolve(depid_v1_value) depid_v2, deposit_v2 = deposit_resolver.resolve(depid_v2_value) recid_v1, record_v1 = record_resolver.resolve(recid_v1_value) assert record_v1.get('communities', []) == ['grants_comm', ] assert deposit_v1.get('communities', []) == ['ecfunded', 'grants_comm', 'zenodo'] assert record_v2.get('communities', []) == ['grants_comm', ] assert deposit_v2.get('communities', []) == ['ecfunded', 'grants_comm', 'zenodo']
def update_record_statistics(start_date=None, end_date=None): """Update "_stats" field of affected records.""" start_date = dateutil_parse(start_date) if start_date else None end_date = dateutil_parse(end_date) if start_date else None aggr_configs = {} if not start_date and not end_date: start_date = datetime.utcnow() end_date = datetime.utcnow() for aggr_name, aggr_cfg in current_stats.aggregations.items(): aggr = aggr_cfg.cls(name=aggr_cfg.name, **aggr_cfg.params) if not Index(aggr.index, using=aggr.client).exists(): if not Index(aggr.event_index, using=aggr.client).exists(): start_date = min(start_date, datetime.utcnow()) else: start_date = min(start_date, aggr._get_oldest_event_timestamp()) # Retrieve the last two bookmarks bookmarks = aggr.list_bookmarks(limit=2) if len(bookmarks) >= 1: end_date = max( end_date, datetime.strptime(bookmarks[0].date, aggr.doc_id_suffix)) if len(bookmarks) == 2: start_date = min( start_date, datetime.strptime(bookmarks[1].date, aggr.doc_id_suffix)) aggr_configs[aggr.index] = aggr elif start_date and end_date: for aggr_name, aggr_cfg in current_stats.aggregations.items(): aggr = aggr_cfg.cls(name=aggr_cfg.name, **aggr_cfg.params) aggr_configs[aggr.index] = aggr else: return # Get conceptrecids for all the affected records between the two dates conceptrecids = set() for aggr_index, aggr in aggr_configs.items(): query = Search( using=aggr.client, index=aggr.index, doc_type=aggr.doc_type, ).filter('range', timestamp={ 'gte': start_date.replace(microsecond=0).isoformat() + '||/d', 'lte': end_date.replace(microsecond=0).isoformat() + '||/d' }).source(include='conceptrecid') conceptrecids |= {b.conceptrecid for b in query.scan()} indexer = RecordIndexer() for concpetrecid_val in conceptrecids: conceptrecid = PersistentIdentifier.get('recid', concpetrecid_val) pv = PIDVersioning(parent=conceptrecid) children_recids = pv.children.all() indexer.bulk_index([str(p.object_uuid) for p in children_recids])
def get_data(self, record_id, query_date=None, get_period=False): """Public interface of _get_data.""" result = dict(total=0, country=dict(), period=list()) recid = PersistentIdentifier.query.filter_by( pid_type='recid', object_uuid=record_id).first() if recid: versioning = PIDVersioning(child=recid) if not versioning.exists: return self._get_data(record_id, query_date, get_period) _data = list( self._get_data(record_id=child.object_uuid, get_period=True) for child in versioning.children.all()) countries = result['country'] for _idx in _data: for key, value in _idx['country'].items(): countries[key] = countries.get(key, 0) + value result['total'] = result['total'] + _idx['total'] result['period'] = _idx.get('period', []) return result
def indexer_receiver(sender, json=None, record=None, index=None, **dummy_kwargs): """Connect to before_record_index signal to transform record for ES.""" if not index.startswith('records-') or record.get('$schema') is None: return # Remove files from index if record is not open access. if json['access_right'] != 'open' and '_files' in json: del json['_files'] else: # Compute file count and total size files = json.get('_files', []) json['filecount'] = len(files) json['size'] = sum([f.get('size', 0) for f in files]) pid = PersistentIdentifier.query.filter( PersistentIdentifier.object_uuid == record.id, PersistentIdentifier.pid_type == current_pidrelations.primary_pid_type, ).one_or_none() if pid: pv = PIDVersioning(child=pid) if pv.exists: relations = serialize_relations(pid) else: relations = {'version': [{'is_last': True, 'index': 0}, ]} if relations: json['relations'] = relations rels = serialize_related_identifiers(pid) if rels: json.setdefault('related_identifiers', []).extend(rels) # Remove internal data. if '_internal' in json: del json['_internal']
def indexer_receiver(sender, json=None, record=None, index=None, **dummy_kwargs): """Connect to before_record_index signal to transform record for ES. In order to avoid that a record and published deposit differs (e.g. if an embargo task updates the record), every time we index a record we also index the deposit and overwrite the content with that of the record. :param sender: Sender of the signal. :param json: JSON to be passed for the elastic search. :type json: `invenio_records.api.Deposit` :param record: Indexed deposit record. :type record: `invenio_records.api.Deposit` :param index: Elasticsearch index name. :type index: str """ if not index.startswith('deposits-records-'): return if not isinstance(record, ZenodoDeposit): record = ZenodoDeposit(record, model=record.model) if record['_deposit']['status'] == 'published': schema = json['$schema'] pub_record = record.fetch_published()[1] # Temporarily set to draft mode to ensure that `clear` can be called json['_deposit']['status'] = 'draft' json.clear() json.update(copy.deepcopy(pub_record.replace_refs())) # Set back to published mode and restore schema. json['_deposit']['status'] = 'published' json['$schema'] = schema json['_updated'] = pub_record.updated else: json['_updated'] = record.updated json['_created'] = record.created # Compute filecount and total file size files = json.get('_files', []) json['filecount'] = len(files) json['size'] = sum([f.get('size', 0) for f in files]) recid = record.get('recid') if recid: pid = PersistentIdentifier.get('recid', recid) pv = PIDVersioning(child=pid) relations = serialize_relations(pid) if pv.exists: if pv.draft_child_deposit: is_last = (pv.draft_child_deposit.pid_value == record['_deposit']['id']) relations['version'][0]['is_last'] = is_last relations['version'][0]['count'] += 1 else: relations = {'version': [{'is_last': True, 'index': 0}, ]} if relations: json['relations'] = relations
def record_minter(record_uuid, data): parent = data.get('conceptrecid') if not parent: parent_pid = RecordIdProvider.create(object_type='rec', object_uuid=None, status=PIDStatus.REGISTERED).pid data['conceptrecid'] = parent_pid.pid_value else: parent_pid = PersistentIdentifier.get( pid_type=RecordIdProvider.pid_type, pid_value=parent) provider = RecordIdProvider.create('rec', record_uuid) data['recid'] = provider.pid.pid_value versioning = PIDVersioning(parent=parent_pid) versioning.insert_child(child=provider.pid) return provider.pid
def get_irs(record, community_id=None, pid=None): """Get all inclusion requests for given record and community. :param record: record for which the inclusion requests are fetched. This includes all of the record's versions. :param community_id: Narrow down the query to given community. Query for all communities if 'None'. """ if not pid: pid = PersistentIdentifier.get('recid', record['recid']) pv = PIDVersioning(child=pid) if pv.exists: sq = pv.children.with_entities( PersistentIdentifier.object_uuid).subquery() filter_cond = [ InclusionRequest.id_record.in_(sq), ] if community_id: filter_cond.append( InclusionRequest.id_community == community_id) q = (db.session.query(InclusionRequest).filter(*filter_cond)) else: q = InclusionRequest.query.filter_by(id_record=record.id).order_by( InclusionRequest.id_community) return q
def create(cls, data, id_=None): """Create a deposit. Adds bucket creation immediately on deposit creation. """ bucket = Bucket.create( quota_size=current_app.config['ZENODO_BUCKET_QUOTA_SIZE'], max_file_size=current_app.config['ZENODO_MAX_FILE_SIZE'], ) data['_buckets'] = {'deposit': str(bucket.id)} deposit = super(ZenodoDeposit, cls).create(data, id_=id_) RecordsBuckets.create(record=deposit.model, bucket=bucket) recid = PersistentIdentifier.get( 'recid', str(data['recid'])) conceptrecid = PersistentIdentifier.get( 'recid', str(data['conceptrecid'])) depid = PersistentIdentifier.get( 'depid', str(data['_deposit']['id'])) PIDVersioning(parent=conceptrecid).insert_draft_child(child=recid) RecordDraft.link(recid, depid) return deposit
def migrate_concept_recid_sips(recid, overwrite=False): """Create Bagit metadata for SIPs.""" pid = PersistentIdentifier.get('recid', recid) pv = PIDVersioning(parent=pid) all_sips = [] for child in pv.children: pid, rec = record_resolver.resolve(child.pid_value) rsips = RecordSIP.query.filter_by(pid_id=pid.id).order_by( RecordSIP.created) all_sips.append([rs.sip.id for rs in rsips]) base_sip_id = None for sipv in all_sips: for idx, sip_id in enumerate(sipv): sip = SIP.query.get(sip_id) base_sip = SIP.query.get(base_sip_id) if base_sip_id else None bia = BagItArchiver(SIPApi(sip), patch_of=base_sip, include_all_previous=(idx > 0)) bmeta = BagItArchiver.get_bagit_metadata(sip) if (not bmeta) or overwrite: bia.save_bagit_metadata(overwrite=True) base_sip_id = sip_id db.session.commit()
def get_all_deposit_siblings(deposit): """Get all siblings of the deposit.""" from invenio_pidstore.models import PersistentIdentifier from invenio_pidrelations.contrib.versioning import PIDVersioning recid = deposit['recid'] rec_pid = PersistentIdentifier.get(pid_type='recid', pid_value=str(recid)) pv = PIDVersioning(child=rec_pid) return [pid.get_assigned_object() for pid in pv.children]
def test_record_delete_v2(mocker, app, db, users, deposit, deposit_file): """Delete a record (only last version) with multiple versions.""" dc_mock = mocker.patch( 'invenio_pidstore.providers.datacite.DataCiteMDSClient') deposit_v1 = publish_and_expunge(db, deposit) recid_v1, record_v1 = deposit.fetch_published() recid_v1_value = recid_v1.pid_value deposit_v1.newversion() recid_v1, record_v1 = record_resolver.resolve(recid_v1_value) # Stash a copy of v1 for later rec1 = deepcopy(record_v1) pv = PIDVersioning(child=recid_v1) depid_v2 = pv.draft_child_deposit deposit_v2 = ZenodoDeposit.get_record(depid_v2.get_assigned_object()) deposit_v2.files['file.txt'] = BytesIO(b('file1')) deposit_v2 = publish_and_expunge(db, deposit_v2) recid_v2, record_v2 = deposit_v2.fetch_published() # Stash a copy of v2 for later rec2 = deepcopy(record_v2) rec2_id = str(record_v2.id) assert dc_mock().metadata_delete.call_count == 0 # Remove the first version delete_record(rec2_id, 'spam', users[0]['id']) # Make sure all PIDs are deleted assert PID.get('doi', rec2['doi']).status == PIDStatus.DELETED assert PID.get('recid', rec2['recid']).status == PIDStatus.DELETED assert PID.get('depid', rec2['_deposit']['id']).status == PIDStatus.DELETED # Concept DOI should be left registered assert PID.get('doi', rec2['conceptdoi']).status == PIDStatus.REGISTERED # Make sure conceptrecid is redirecting to v1 crecid = PID.get('recid', rec2['conceptrecid']) assert crecid.status == PIDStatus.REDIRECTED assert crecid.get_redirect() == PID.get('recid', rec1['recid']) # Make sure the v1 PIDs are kept intact assert PID.get('oai', rec1['_oai']['id']).status == PIDStatus.REGISTERED assert PID.get('doi', rec1['doi']).status == PIDStatus.REGISTERED assert PID.get('recid', rec1['recid']).status == PIDStatus.REGISTERED assert PID.get('depid', rec1['_deposit']['id']).status == \ PIDStatus.REGISTERED # Only the v1 DOI should be deleted assert dc_mock().doi_post.call_count == 2 assert dc_mock().doi_post.has_any_call('10.5072/zenodo.2') assert dc_mock().doi_post.has_any_call('10.5072/zenodo.1') assert dc_mock().metadata_delete.call_count == 1 dc_mock().metadata_delete.assert_any_call('10.5072/zenodo.3') record = Record.get_record(rec2_id) assert record['removed_by'] == users[0]['id'] assert record['removal_reason'] == 'Spam record, removed by Zenodo staff.'
def test_basic_api(app, db, communities, deposit, deposit_file): """Test basic workflow using Deposit and Communities API.""" deposit_v1 = publish_and_expunge(db, deposit) depid_v1_value = deposit_v1['_deposit']['id'] recid_v1, record_v1 = deposit_v1.fetch_published() recid_v1_value = recid_v1.pid_value deposit_v1.newversion() pv = PIDVersioning(child=recid_v1) depid_v2 = pv.draft_child_deposit deposit_v2 = ZenodoDeposit.get_record(depid_v2.get_assigned_object()) deposit_v2.files['file.txt'] = BytesIO(b('file1')) deposit_v2 = publish_and_expunge(db, deposit_v2) deposit_v2 = deposit_v2.edit() # 1. Request for 'c1' and 'c2' through deposit v2 deposit_v2['communities'] = [ 'c1', 'c2', ] deposit_v2 = publish_and_expunge(db, deposit_v2) recid_v2, record_v2 = deposit_v2.fetch_published() recid_v2_value = recid_v2.pid_value depid_v1, deposit_v1 = deposit_resolver.resolve(depid_v1_value) recid_v1, record_v1 = deposit_v1.fetch_published() assert record_v1.get('communities', []) == [] assert record_v2.get('communities', []) == [] c1_api = ZenodoCommunity('c1') c2_api = ZenodoCommunity('c2') # Inclusion requests should be visible for both records assert c1_api.get_comm_irs(record_v1, pid=recid_v1).count() == 1 assert c1_api.get_comm_irs(record_v2, pid=recid_v2).count() == 1 assert c2_api.get_comm_irs(record_v1, pid=recid_v1).count() == 1 assert c2_api.get_comm_irs(record_v2, pid=recid_v2).count() == 1 # Accept to 'c1' through record_v2 (as originally requested), # and 'c2' through record_v1 (version) c1_api.accept_record(record_v2, pid=recid_v2) c2_api.accept_record(record_v1, pid=recid_v1) recid_v1, record_v1 = record_resolver.resolve(recid_v1_value) recid_v2, record_v2 = record_resolver.resolve(recid_v2_value) # Accepting individual record to a community should propagate the changes # to all versions assert record_v1['communities'] == record_v2['communities'] == \ ['c1', 'c2', ] # Removing 'c1' from deposit_v1 should remove it from two published records depid_v1, deposit_v1 = deposit_resolver.resolve(depid_v1_value) deposit_v1 = deposit_v1.edit() deposit_v1['communities'] = [] deposit_v1 = publish_and_expunge(db, deposit_v1) recid_v1, record_v1 = record_resolver.resolve(recid_v1_value) recid_v2, record_v2 = record_resolver.resolve(recid_v2_value) assert record_v1.get('communities', []) == [] assert record_v2.get('communities', []) == []
def indexer_receiver(sender, json=None, record=None, index=None, **dummy_kwargs): """Connect to before_record_index signal to transform record for ES.""" if not index.startswith('records-') or record.get('$schema') is None: return # Remove files from index if record is not open access. if json['access_right'] != 'open' and '_files' in json: del json['_files'] else: # Compute file count and total size files = json.get('_files', []) json['filecount'] = len(files) json['size'] = sum([f.get('size', 0) for f in files]) pid = PersistentIdentifier.query.filter( PersistentIdentifier.pid_value == str(record['recid']), PersistentIdentifier.pid_type == 'recid', PersistentIdentifier.object_uuid == record.id, ).one_or_none() if pid: pv = PIDVersioning(child=pid) if pv.exists: relations = serialize_relations(pid) else: relations = { 'version': [ { 'is_last': True, 'index': 0 }, ] } if relations: json['relations'] = relations rels = serialize_related_identifiers(pid) if rels: json.setdefault('related_identifiers', []).extend(rels) for loc in json.get('locations', []): if loc.get('lat') and loc.get('lon'): loc['point'] = {'lat': loc['lat'], 'lon': loc['lon']} # Remove internal data. if '_internal' in json: del json['_internal'] json['_stats'] = build_record_stats(record['recid'], record.get('conceptrecid')) custom_es_fields = build_record_custom_fields(json) for es_field, es_value in custom_es_fields.items(): json[es_field] = es_value
def versioning_link(recids): """Link several records into a versioning scheme. Support cases with some records being already versioned, as long as they are all within a single versioning scheme. For example, given the following records: - 123, 234, 345 (record with 3 versions) - 543, 432 (record with 2 versions) - 111 (single non-versioned record) - 222 (single, non-versioned record) The following cases are supported (Good) or not supported (Error): versioning_link 111 123 234 345 (Good - will add 111 as first version) versioning_link 111 222 (Good, will create new versioning scheme) versioning_link 345 123 234 (Good - no new records liked, but will reorder the records in the versioning list) versioning_link 123 234 543 (Error - trying to link two versioned records) versioning_link 123 234 (Error - must specify all children) """ int_recids = [int(recid) for recid in recids] if sorted(int_recids) != int_recids and not click.confirm( u'Requested RECIDS are not in the order of creation. Continue?'): click.echo(click.style(u'Record linking aborted.', fg='green')) return recids_records = [ record_resolver.resolve(recid_val) for recid_val in recids ] upgraded = [(recid, rec) for recid, rec in recids_records if 'conceptdoi' in rec] if len(upgraded) == 1 and not click.confirm( u'Recid {0} already migrated. Its Concept recid: {1} will be used as' u'the base for the Concept DOI in the versioning linking. ' u'Continue?'): return elif len(upgraded) > 1: i_recids = [int(recid) for recid in recids] child_recids = [ int(recid.pid_value) for recid in PIDVersioning(child=upgraded[0][0]).children.all() ] if not all(cr in i_recids for cr in child_recids): click.echo(u'All children recids ({0}) of the upgraded record need' u' to be specified. Aborting.'.format( [recid for recid in child_recids])) return i_upgraded = [int(recid.pid_value) for recid, rec in upgraded] if set(child_recids) != set(i_upgraded): click.echo(u'Found multiple upgraded records {0}, which do not ' u'belong to a single versioning scheme. Aborting.' u''.format(i_upgraded, [recid for recid in child_recids])) return versioning_link_records(recids)
def _create_records(base_metadata, total, versions, files): records = [] cur_recid_val = 1 for _ in range(total): conceptrecid_val = cur_recid_val conceptrecid = PersistentIdentifier.create('recid', str(conceptrecid_val), status='R') db.session.commit() versioning = PIDVersioning(parent=conceptrecid) for ver_idx in range(versions): recid_val = conceptrecid_val + ver_idx + 1 data = deepcopy(base_metadata) data.update({ 'conceptrecid': str(conceptrecid_val), 'conceptdoi': '10.1234/{}'.format(recid_val), 'recid': recid_val, 'doi': '10.1234/{}'.format(recid_val), }) record = ZenodoRecord.create(data) bucket = Bucket.create() record['_buckets'] = {'record': str(bucket.id)} record.commit() RecordsBuckets.create(bucket=bucket, record=record.model) recid = PersistentIdentifier.create(pid_type='recid', pid_value=record['recid'], object_type='rec', object_uuid=record.id, status='R') versioning.insert_child(recid) file_objects = [] for f in range(files): filename = 'Test{0}_v{1}.pdf'.format(f, ver_idx) record.files[filename] = BytesIO(b'1234567890') # 10 bytes record.files[filename]['type'] = 'pdf' file_objects.append(record.files[filename].obj) record.commit() db.session.commit() records.append((recid, record, file_objects)) cur_recid_val += versions + 1 return records
def _create_records(base_metadata, total, versions, files): records = [] cur_recid_val = 1 for _ in range(total): conceptrecid_val = cur_recid_val conceptrecid = PersistentIdentifier.create( 'recid', str(conceptrecid_val), status='R') db.session.commit() versioning = PIDVersioning(parent=conceptrecid) for ver_idx in range(versions): recid_val = conceptrecid_val + ver_idx + 1 data = deepcopy(base_metadata) data.update({ 'conceptrecid': str(conceptrecid_val), 'conceptdoi': '10.1234/{}'.format(recid_val), 'recid': recid_val, 'doi': '10.1234/{}'.format(recid_val), }) record = ZenodoRecord.create(data) bucket = Bucket.create() record['_buckets'] = {'record': str(bucket.id)} record.commit() RecordsBuckets.create(bucket=bucket, record=record.model) recid = PersistentIdentifier.create( pid_type='recid', pid_value=record['recid'], object_type='rec', object_uuid=record.id, status='R') versioning.insert_child(recid) file_objects = [] for f in range(files): filename = 'Test{0}_v{1}.pdf'.format(f, ver_idx) record.files[filename] = BytesIO(b'1234567890') # 10 bytes record.files[filename]['type'] = 'pdf' file_objects.append(record.files[filename].obj) record.commit() db.session.commit() records.append((recid, record, file_objects)) cur_recid_val += versions + 1 return records
def alembic_upgrade_database_data(alembic, verbose): """Migrate the database data from v2.0.0 to 2.1.0.""" ### Add versioning PIDs ### # Reserve the record PID and versioning PID for unpublished deposits # Hack: disable record indexing during record migration from invenio_indexer.api import RecordIndexer old_index_fn = RecordIndexer.index RecordIndexer.index = lambda s, record: None if verbose: click.secho('migrating deposits and records...') with db.session.begin_nested(): # Migrate published records records_pids = PersistentIdentifier.query.filter( PersistentIdentifier.pid_type == RecordUUIDProvider.pid_type, PersistentIdentifier.status == PIDStatus.REGISTERED, ).all() for rec_pid in records_pids: if verbose: click.secho(' record {}'.format(rec_pid.pid_value)) try: record = Record.get_record(rec_pid.object_uuid) except NoResultFound: # The record is deleted but not the PID. Fix it. rec_pid.status = PIDStatus.DELETED continue # Create parent version PID parent_pid = RecordUUIDProvider.create().pid version_master = PIDVersioning(parent=parent_pid) version_master.insert_draft_child(child=rec_pid) version_master.update_redirect() migrate_record_metadata( Record.get_record(rec_pid.object_uuid), parent_pid ) # Migrate deposits deposit_pids = PersistentIdentifier.query.filter( PersistentIdentifier.pid_type == DepositUUIDProvider.pid_type, PersistentIdentifier.status == PIDStatus.REGISTERED, ).all() for dep_pid in deposit_pids: if verbose: click.secho(' deposit {}'.format(dep_pid.pid_value)) try: deposit = Deposit.get_record(dep_pid.object_uuid) if deposit['publication_state'] != \ PublicationStates.published.name: # The record is not published yet. Reserve the PID. rec_pid = RecordUUIDProvider.create( object_type='rec', pid_value=dep_pid.pid_value, ).pid # Create parent version PID parent_pid = RecordUUIDProvider.create().pid assert parent_pid version_master = PIDVersioning(parent=parent_pid) version_master.insert_draft_child(child=rec_pid) else: # Retrieve previously created version PID rec_pid = RecordUUIDProvider.get(dep_pid.pid_value).pid version_master = PIDVersioning(child=rec_pid) parent_pid = version_master.parent if not parent_pid: click.secho(' record {} was deleted, but the deposit has not been removed'.format(rec_pid.pid_value), fg='red') if parent_pid: migrate_record_metadata( Deposit.get_record(dep_pid.object_uuid), parent_pid ) except NoResultFound: # The deposit is deleted but not the PID. Fix it. dep_pid.status = PIDStatus.DELETED if verbose: click.secho('done migrating deposits.') RecordIndexer.index = old_index_fn
def publish(self): """Publish GitHub release as record.""" id_ = uuid.uuid4() deposit_metadata = dict(self.metadata) deposit = None try: db.session.begin_nested() # TODO: Add filter on Published releases previous_releases = self.model.repository.releases.filter_by( status=ReleaseStatus.PUBLISHED) versioning = None stashed_draft_child = None if previous_releases.count(): last_release = previous_releases.order_by( Release.created.desc()).first() last_recid = PersistentIdentifier.get( 'recid', last_release.record['recid']) versioning = PIDVersioning(child=last_recid) last_record = ZenodoRecord.get_record( versioning.last_child.object_uuid) deposit_metadata['conceptrecid'] = last_record['conceptrecid'] if 'conceptdoi' not in last_record: last_depid = PersistentIdentifier.get( 'depid', last_record['_deposit']['id']) last_deposit = ZenodoDeposit.get_record( last_depid.object_uuid) last_deposit = last_deposit.registerconceptdoi() last_recid, last_record = last_deposit.fetch_published() deposit_metadata['conceptdoi'] = last_record['conceptdoi'] if versioning.draft_child: stashed_draft_child = versioning.draft_child versioning.remove_draft_child() deposit = self.deposit_class.create(deposit_metadata, id_=id_) deposit['_deposit']['created_by'] = self.event.user_id deposit['_deposit']['owners'] = [self.event.user_id] # Fetch the deposit files for key, url in self.files: # Make a HEAD request to get GitHub to compute the # Content-Length. res = self.gh.api.session.head(url, allow_redirects=True) # Now, download the file res = self.gh.api.session.get(url, stream=True, allow_redirects=True) if res.status_code != 200: raise Exception( "Could not retrieve archive from GitHub: {url}" .format(url=url) ) size = int(res.headers.get('Content-Length', 0)) ObjectVersion.create( bucket=deposit.files.bucket, key=key, stream=res.raw, size=size or None, mimetype=res.headers.get('Content-Type'), ) # GitHub-specific SIP store agent sip_agent = { '$schema': current_jsonschemas.path_to_url( current_app.config['SIPSTORE_GITHUB_AGENT_JSONSCHEMA']), 'user_id': self.event.user_id, 'github_id': self.release['author']['id'], 'email': self.gh.account.user.email, } deposit.publish(user_id=self.event.user_id, sip_agent=sip_agent) recid_pid, record = deposit.fetch_published() self.model.recordmetadata = record.model if versioning and stashed_draft_child: versioning.insert_draft_child(stashed_draft_child) record_id = str(record.id) db.session.commit() # Send Datacite DOI registration task datacite_register.delay(recid_pid.pid_value, record_id) # Index the record RecordIndexer().index_by_id(record_id) except Exception: db.session.rollback() # Remove deposit from index since it was not commited. if deposit and deposit.id: try: RecordIndexer().delete(deposit) except Exception: current_app.logger.exception( "Failed to remove uncommited deposit from index.") raise
def create(cls, data, id_=None, version_of=None): """Create a deposit with the optional id. :params version_of: PID of an existing record. If set, the new record will be marked as a new version of this referenced record. If no data is provided the new record will be a copy of this record. Note: this PID must reference the current last version of a record. """ # check that the status field is not set if 'publication_state' in data: raise InvalidDepositError( 'Field "publication_state" cannot be set.') data['publication_state'] = PublicationStates.draft.name # Set record's schema if '$schema' in data: raise InvalidDepositError('"$schema" field should not be set.') # Retrieve reserved record PID which should have already been created # by the deposit minter (The record PID value is the same # as the one of the deposit) rec_pid = RecordUUIDProvider.get(data['_deposit']['id']).pid version_master, prev_version = None, None # if this is a new version of an existing record, add the future # record pid in the chain of versions. if version_of: version_master, prev_version = \ find_version_master_and_previous_record(version_of) # The new version must be in the same community if data['community'] != prev_version['community']: raise ValidationError( 'The community field cannot change between versions.') try: version_master.insert_draft_child(rec_pid) except Exception as exc: # Only one draft is allowed per version chain. if 'Draft child already exists for this relation' in \ exc.args[0]: raise DraftExistsVersioningError( version_master.draft_child ) raise exc else: # create parent PID parent_pid = RecordUUIDProvider.create().pid version_master = PIDVersioning(parent=parent_pid) version_master.insert_draft_child(child=rec_pid) # Mint the deposit with the parent PID data['_pid'] = [{ 'value': version_master.parent.pid_value, 'type': RecordUUIDProvider.parent_pid_type, }] if 'community' not in data or not data['community']: raise ValidationError( 'Record\s metadata has no community field.') try: community_id = uuid.UUID(data['community']) except ValueError as e: raise InvalidDepositError( 'Community ID is not a valid UUID.') from e try: schema = CommunitySchema.get_community_schema(community_id) except CommunitySchemaDoesNotExistError as e: raise InvalidDepositError( 'No schema for community {}.'.format(community_id)) from e if version_of: data['$schema'] = Deposit._build_deposit_schema(prev_version) else: from b2share.modules.schemas.serializers import \ community_schema_draft_json_schema_link data['$schema'] = community_schema_draft_json_schema_link( schema, _external=True ) # create file bucket if prev_version and prev_version.files: # Clone the bucket from the previous version. This doesn't # duplicate files. bucket = prev_version.files.bucket.snapshot(lock=False) bucket.locked = False else: bucket = Bucket.create(storage_class=current_app.config[ 'DEPOSIT_DEFAULT_STORAGE_CLASS' ]) if 'external_pids' in data: create_b2safe_file(data['external_pids'], bucket) del data['external_pids'] deposit = super(Deposit, cls).create(data, id_=id_) db.session.add(bucket) db.session.add(RecordsBuckets( record_id=deposit.id, bucket_id=bucket.id )) return deposit
def delete_record(record_uuid, reason, user): """Delete the record and it's PIDs. :param record_uuid: UUID of the record to be removed. :param reason: Reason for removal. Either one of: 'spam', 'uploader', 'takedown' (see 'ZENODO_REMOVAL_REASONS' variable in config), otherwise using it as a verbatim "Reason" string. :param user: ID or email of the Zenodo user (admin) responsible for the removal. """ from invenio_github.models import ReleaseStatus if isinstance(user, text_type): user_id = User.query.filter_by(email=user).one().id elif isinstance(user, int): user_id = User.query.get(user).id else: raise TypeError("User cannot be determined from argument: {0}".format( user)) record = ZenodoRecord.get_record(record_uuid) # Remove the record from versioning and delete the recid recid = PersistentIdentifier.get('recid', record['recid']) pv = PIDVersioning(child=recid) pv.remove_child(recid) pv.update_redirect() recid.delete() # Remove the record from index try: RecordIndexer().delete(record) except NotFoundError: pass # Remove buckets record_bucket = record.files.bucket RecordsBuckets.query.filter_by(record_id=record.id).delete() record_bucket.locked = False record_bucket.remove() removal_reasons = dict(current_app.config['ZENODO_REMOVAL_REASONS']) if reason in removal_reasons: reason = removal_reasons[reason] depid, deposit = deposit_resolver.resolve(record['_deposit']['id']) try: doi = PersistentIdentifier.get('doi', record['doi']) except PIDDoesNotExistError: doi = None # Record OpenAIRE info try: original_id = openaire_original_id(record, openaire_type(record))[1] datasource_id = openaire_datasource_id(record) except PIDDoesNotExistError: original_id = None datasource_id = None if pv.children.count() == 0: conceptrecid = PersistentIdentifier.get('recid', record['conceptrecid']) conceptrecid.delete() new_last_child = None else: new_last_child = (pv.last_child.pid_value, str(pv.last_child.object_uuid)) if 'conceptdoi' in record: conceptdoi_value = record['conceptdoi'] else: conceptdoi_value = None # Completely delete the deposit # Deposit will be removed from index deposit.delete(delete_published=True) # Clear the record and put the deletion information record.clear() record.update({ 'removal_reason': reason, 'removed_by': user_id, }) record.commit() # Mark the relevant GitHub Release as deleted for ghr in record.model.github_releases: ghr.status = ReleaseStatus.DELETED db.session.commit() # After successful DB commit, sync the DOIs with DataCite datacite_inactivate.delay(doi.pid_value) if conceptdoi_value: if new_last_child: # Update last child (updates also conceptdoi) pid_value, rec_uuid = new_last_child datacite_register.delay(pid_value, rec_uuid) else: datacite_inactivate.delay(conceptdoi_value) # Also delete from OpenAIRE index if current_app.config['OPENAIRE_DIRECT_INDEXING_ENABLED'] and original_id \ and datasource_id: openaire_delete.delay(original_id=original_id, datasource_id=datasource_id)