def create_deposit_and_record(pid_value, owner): """Utility function for creating records and deposits.""" recid = PersistentIdentifier.create( 'recid', pid_value, status=PIDStatus.RESERVED) pv = PIDVersioning(parent=conceptrecid) pv.insert_draft_child(recid) depid = PersistentIdentifier.create( 'depid', pid_value, status=PIDStatus.REGISTERED) deposit = ZenodoRecord.create({'_deposit': {'id': depid.pid_value}, 'conceptrecid': conceptrecid.pid_value, 'recid': recid.pid_value}) deposit.commit() depid.assign('rec', deposit.id) record_metadata = deepcopy(minimal_record) record_metadata['_deposit'] = {'id': depid.pid_value} record_metadata['conceptrecid'] = conceptrecid.pid_value record_metadata['recid'] = int(recid.pid_value) record_metadata['owners'] = [owner.id] record = ZenodoRecord.create(record_metadata) zenodo_record_minter(record.id, record) record.commit() return (depid, deposit, recid, record)
def _publish_new(self, id_=None): """Publish new deposit with communities handling.""" dep_comms = set(self.pop('communities', [])) record = super(ZenodoDeposit, self)._publish_new(id_=id_) conceptrecid = PersistentIdentifier.get('recid', record['conceptrecid']) pv = PIDVersioning(parent=conceptrecid) if pv.children.count() > 1: files_set = set(f.get_version().file.checksum for f in self.files) for prev_recid in pv.children.all()[:-1]: rec = ZenodoRecord.get_record(prev_recid.object_uuid) prev_files_set = set(f.get_version().file.checksum for f in rec.files) if files_set == prev_files_set: raise VersioningFilesError() prev_recid = pv.children.all()[-2] rec_comms = set(ZenodoRecord.get_record( prev_recid.get_assigned_object()).get('communities', [])) else: rec_comms = set() record = self._sync_communities(dep_comms, rec_comms, record) record.commit() # Update the concept recid redirection pv.update_redirect() RecordDraft.unlink(record.pid, self.pid) index_siblings(record.pid, neighbors_eager=True, with_deposits=True) return record
def test_jsonschema(app, record_audit, minimal_record): check = RecordCheck(record_audit, ZenodoRecord(minimal_record)) check.jsonschema() assert check.issues.get('jsonschema') is None minimal_record['invalid_key'] = 'should not be here' check = RecordCheck(record_audit, ZenodoRecord(minimal_record)) check.jsonschema() assert check.issues.get('jsonschema')
def serialize_related_identifiers(pid): """Serialize PID Versioning relations as related_identifiers metadata.""" pv = PIDVersioning(child=pid) related_identifiers = [] if pv.exists: rec = ZenodoRecord.get_record(pid.get_assigned_object()) # External DOI records don't have Concept DOI if 'conceptdoi' in rec: ri = { 'scheme': 'doi', 'relation': 'isVersionOf', 'identifier': rec['conceptdoi'] } related_identifiers.append(ri) # TODO: We do not serialize previous/next versions to # related identifiers because of the semantic-versioning cases # (e.g. GitHub releases of minor versions) # # children = pv.children.all() # idx = children.index(pid) # left = children[:idx] # right = children[idx + 1:] # for p in left: # rec = ZenodoRecord.get_record(p.get_assigned_object()) # ri = { # 'scheme': 'doi', # 'relation': 'isNewVersionOf', # 'identifier': rec['doi'] # } # related_identifiers.append(ri) # for p in right: # rec = ZenodoRecord.get_record(p.get_assigned_object()) # ri = { # 'scheme': 'doi', # 'relation': 'isPreviousVersionOf', # 'identifier': rec['doi'] # } # related_identifiers.append(ri) pv = PIDVersioning(parent=pid) if pv.exists: for p in pv.children: rec = ZenodoRecord.get_record(p.get_assigned_object()) ri = { 'scheme': 'doi', 'relation': 'hasVersion', 'identifier': rec['doi'] } related_identifiers.append(ri) return related_identifiers
def bibtex_records(app, db, full_record): """Create some records for bibtex serializer.""" test_bad_record = dict(recid='12345') r_good = ZenodoRecord.create(full_record, UUID("24029cb9-f0f8-4b72-94a7-bdf746f9d075")) r_bad = ZenodoRecord.create(test_bad_record, UUID("0281c22c-266a-499b-8446-e12eff2f79b8")) db.session.commit() record_good = Bibtex(r_good) record_bad = Bibtex(r_bad) record_empty = Bibtex({}) return (record_good, record_bad, record_empty, r_good)
def bibtex_records(app, db, full_record): """Create some records for bibtex serializer.""" test_bad_record = dict(recid='12345') r_good = ZenodoRecord.create( full_record, UUID("24029cb9-f0f8-4b72-94a7-bdf746f9d075")) r_bad = ZenodoRecord.create( test_bad_record, UUID("0281c22c-266a-499b-8446-e12eff2f79b8")) db.session.commit() record_good = Bibtex(r_good) record_bad = Bibtex(r_bad) record_empty = Bibtex({}) return (record_good, record_bad, record_empty, r_good)
def versioning_published_record(uuid): """Migrate a published record.""" record = ZenodoRecord.get_record(uuid) if 'conceptrecid' in record: return # ASSERT ZENODO DOI ONLY! assert 'conceptrecid' not in record, "Record already migrated" # doi = PersistentIdentifier.get('doi', str(record['doi'])) # assert is_local_doi(doi.pid_value), 'DOI is not controlled by Zenodo.' conceptrecid = zenodo_concept_recid_minter(uuid, record) conceptrecid.register() recid = PersistentIdentifier.get('recid', str(record['recid'])) pv = PIDVersioning(parent=conceptrecid) pv.insert_child(recid) record.commit() # Some old records have no deposit ID, some don't have '_deposit' if ('_deposit' in record and 'id' in record['_deposit'] and record['_deposit']['id']): try: depid = PersistentIdentifier.get('depid', str(record['_deposit']['id'])) deposit = ZenodoDeposit.get_record(depid.object_uuid) deposit['conceptrecid'] = conceptrecid.pid_value if deposit['_deposit']['status'] == 'draft': deposit['_deposit']['pid']['revision_id'] = \ deposit['_deposit']['pid']['revision_id'] + 1 deposit.commit() except PIDDoesNotExistError: pass db.session.commit()
def create_sip_for_record(recid, agent=None, user_id=432): """Create a new SIP if the record's files diverged from last SIPFiles. :param agent: Agent JSON passed to the SIP. :param user_id: ID of the user resposible for the SIP (by default, user ID of [email protected]) """ pid = PersistentIdentifier.get('recid', recid) rec = ZenodoRecord.get_record(pid.object_uuid) recsip = RecordSIP.query.filter_by(pid_id=pid.id).order_by( RecordSIP.created.desc()).first() rec_f = sorted([f['file_id'] for f in rec.get('_files', [])]) sipfiles = recsip.sip.sip_files if recsip else [] sipfiles_s = sorted([str(s.file_id) for s in sipfiles]) default_agent = { "$schema": "https://zenodo.org/schemas/sipstore/" "agent-webclient-v1.0.0.json", "ip_address": "127.0.0.1", "email": "*****@*****.**" } agent = agent or default_agent archivable = True if sipfiles_s != rec_f: RecordSIPApi.create(pid, rec, archivable, create_sip_files=True, agent=agent, user_id=user_id) db.session.commit()
def dispatch_webhook(community_id, record_id, event_type): """Build webhook payload and dispatch delivery tasks.""" webhooks_cfg = current_app.config.get('ZENODO_COMMUNITIES_WEBHOOKS', {}) recipients = webhooks_cfg.get(community_id, []) if not recipients: return # TODO: Extract to a utility? record = ZenodoRecord.get_record(record_id) community = Community.query.get(community_id) # TODO: Make configurable record_payload = legacyjson_v1.transform_record(record.pid, record) payload = { "timestamp": datetime.utcnow().isoformat(), "id": str(uuid.uuid4()), "event_type": event_type, "context": { "community": community.id, "user": record['owners'][0], }, "payload": { "community": { "id": community.id, "owner": { "id": community.id_user, } }, "record": record_payload, }, } for recipient_id in recipients: deliver_webhook.delay(payload, community_id, recipient_id)
def openaire_delete(record_uuid=None, original_id=None, datasource_id=None): """Delete record from OpenAIRE index. :param record_uuid: Record Metadata UUID. :type record_uuid: str :param original_id: OpenAIRE originalId. :type original_id: str :param datasource_id: OpenAIRE datasource identifier. :type datasource_id: str """ try: # Resolve originalId and datasource if not already available if not (original_id and datasource_id) and record_uuid: record = ZenodoRecord.get_record(record_uuid) original_id = openaire_original_id( record, openaire_type(record))[1] datasource_id = openaire_datasource_id(record) params = {'originalId': original_id, 'collectedFromId': datasource_id} req = _openaire_request_factory() res = req.delete(current_app.config['OPENAIRE_API_URL'], params=params) res_beta = None if current_app.config['OPENAIRE_API_URL_BETA']: res_beta = req.delete(current_app.config['OPENAIRE_API_URL_BETA'], params=params) if not res.ok or (res_beta and not res_beta.ok): raise OpenAIRERequestError(res.text) except Exception as exc: openaire_delete.retry(exc=exc)
def audit_records(minimal_record, db): """Audit test records.""" records = {} for i in (1, 2, 3, 4): record = RecordMetadata() record.json = deepcopy(minimal_record) record.json['recid'] = i record.json['_oai'] = { 'id': 'oai:{}'.format(i), 'sets': [], 'updated': datetime.utcnow().date().isoformat(), } db.session.add(record) db.session.commit() records[i] = ZenodoRecord(data=record.json, model=record) recid = PersistentIdentifier(pid_type='recid', pid_value=str(i), status='R', object_type='rec', object_uuid=record.id) db.session.add(recid) db.session.commit() return records
def test_record_audit(record_audit, full_record, db, communities, users, oaiid_pid): # Add the "ecfunded" community since it's usually being added automatically # after processing a deposit if the record has an EC grant. oaiid_pid.pid_value = full_record['communities'].append('ecfunded') # Mint the OAI identifier oaiid_pid.pid_value = full_record['_oai']['id'] db.session.add(oaiid_pid) # Create the record metadata, to store the record_model = RecordMetadata() record_model.json = full_record db.session.add(record_model) db.session.commit() record = ZenodoRecord(data=full_record, model=record_model) check = RecordCheck(record_audit, record) check.perform() assert check.issues == {} assert check.is_ok is True assert check.dump() == { 'record': { 'recid': record['recid'], 'object_uuid': str(record.id), }, 'issues': {}, }
def openaire_direct_index(record_uuid): """Send record for direct indexing at OpenAIRE. :param record_uuid: Record Metadata UUID. :type record_uuid: str """ try: record = ZenodoRecord.get_record(record_uuid) # Bail out if not an OpenAIRE record. if not (is_openaire_publication(record) or is_openaire_dataset(record)): return data = openaire_json_v1.serialize(record.pid, record) url = '{}/api/results/feedObject'.format( current_app.config['OPENAIRE_API_URL']) req = _openaire_request_factory() res = req.post(url, data=data) if not res.ok: raise OpenAIRERequestError(res.text) else: recid = record.get('recid') current_cache.delete('openaire_direct_index:{}'.format(recid)) except Exception as exc: recid = record.get('recid') current_cache.set('openaire_direct_index:{}'.format(recid), datetime.now(), timeout=-1) openaire_direct_index.retry(exc=exc)
def openaire_delete(record_uuid=None, original_id=None, datasource_id=None): """Delete record from OpenAIRE index. :param record_uuid: Record Metadata UUID. :type record_uuid: str :param original_id: OpenAIRE originalId. :type original_id: str :param datasource_id: OpenAIRE datasource identifier. :type datasource_id: str """ try: # Resolve originalId and datasource if not already available if not (original_id and datasource_id) and record_uuid: record = ZenodoRecord.get_record(record_uuid) original_id = openaire_original_id(record, openaire_type(record))[1] datasource_id = openaire_datasource_id(record) params = {'originalId': original_id, 'collectedFromId': datasource_id} url = '{}/api/results'.format(current_app.config['OPENAIRE_API_URL']) req = _openaire_request_factory() res = req.delete(url, params=params) if not res.ok: raise OpenAIRERequestError(res.text) except Exception as exc: openaire_delete.retry(exc=exc)
def accept_record(self, record, pid=None): """Accept the record and all of its versions into the community. :type record: zenodo.modules.records.api.ZenodoRecord :param pid: PID of type 'recid' :type pid: invenio_pidstore.models.PersistentIdentifier """ if not pid: pid = PersistentIdentifier.get('recid', record['recid']) with db.session.begin_nested(): pending_q = self.get_comm_irs(record, pid=pid) if not pending_q.count(): raise InclusionRequestMissingError(community=self, record=record) pv = PIDVersioning(child=pid) for child in pv.children.all(): rec = ZenodoRecord.get_record( child.get_assigned_object()) # req = InclusionRequest.get(self.community.id, rec.id) # if req: # req.delete() self.community.add_record(rec) rec.commit() pending_q.delete(synchronize_session=False)
def record_with_bucket(db, full_record, bucket, sip_metadata_types): """Create a bucket.""" record = ZenodoRecord.create(full_record) RecordsBuckets.create(bucket=bucket, record=record.model) pid = PersistentIdentifier.create( pid_type='recid', pid_value=12345, object_type='rec', object_uuid=record.id, status='R') db.session.commit() return pid, record
def add_record(self, record, pid=None): """Add a record and all of its versions to a community.""" if not pid: pid = PersistentIdentifier.get('recid', record['recid']) pv = PIDVersioning(child=pid) for child in pv.children.all(): rec = ZenodoRecord.get_record(child.get_assigned_object()) if not self.community.has_record(rec): self.community.add_record(rec) rec.commit()
def minimal_record_model(db, minimal_record, sip_metadata_types, recid_pid): """Minimal record.""" model = RecordMetadata(id=str(recid_pid.object_uuid)) model.created = datetime.utcnow() - timedelta(days=1) model.updated = model.created + timedelta(days=1) model.version_id = 0 rec = ZenodoRecord(minimal_record, model=model) db.session.commit() return rec
def add_record(self, record, pid=None): """Add a record and all of its versions to a community.""" if not pid: pid = PersistentIdentifier.get('recid', record['recid']) pv = PIDVersioning(child=pid) for child in pv.children.all(): rec = ZenodoRecord.get_record( child.get_assigned_object()) if not self.community.has_record(rec): self.community.add_record(rec) rec.commit()
def record_with_bucket(db, full_record, bucket, sip_metadata_types): """Create a bucket.""" record = ZenodoRecord.create(full_record) record['_buckets']['record'] = str(bucket.id) record['_files'][0]['bucket'] = str(bucket.id) record.commit() RecordsBuckets.create(bucket=bucket, record=record.model) pid = PersistentIdentifier.create( pid_type='recid', pid_value=12345, object_type='rec', object_uuid=record.id, status='R') db.session.commit() return pid, record
def _sync_communities(self, dep_comms, rec_comms, record): new_dep_comms, new_rec_comms, new_ir_comms = \ self._get_new_communities(dep_comms, rec_comms, record) # Update Communities and OAISet information for all record versions conceptrecid = PersistentIdentifier.get('recid', record['conceptrecid']) pv = PIDVersioning(parent=conceptrecid) for pid in pv.children: rec = ZenodoRecord.get_record(pid.get_assigned_object()) if rec.id != record.id: rec['communities'] = sorted(new_rec_comms) if current_app.config['COMMUNITIES_OAI_ENABLED']: rec = self._sync_oaisets_with_communities(rec) if not rec['communities']: del rec['communities'] rec.commit() depid = PersistentIdentifier.get( 'depid', rec['_deposit']['id']) deposit = ZenodoDeposit.get_record(depid.get_assigned_object()) deposit['communities'] = sorted(new_dep_comms) if not deposit['communities']: del deposit['communities'] deposit.commit() # Update new version deposit if pv.draft_child_deposit: draft_dep = ZenodoDeposit.get_record( pv.draft_child_deposit.get_assigned_object()) if draft_dep.id != self.id: draft_dep['communities'] = sorted(new_dep_comms) if not draft_dep['communities']: del draft_dep['communities'] draft_dep.commit() record['communities'] = sorted(new_rec_comms) if current_app.config['COMMUNITIES_OAI_ENABLED']: record = self._sync_oaisets_with_communities(record) if not record['communities']: del record['communities'] self['communities'] = sorted(new_dep_comms) if not self['communities']: del self['communities'] # Create Inclusion requests against this record self._create_inclusion_requests(new_ir_comms, record) # Remove obsolete InclusionRequests again the record and its versions self._remove_obsolete_irs(new_ir_comms, record) return record
def _get_auto_added(self, record): """Get communities which are to be auto added to each record.""" if not current_app.config['ZENODO_COMMUNITIES_AUTO_ENABLED']: return [] comms = [] pid = PersistentIdentifier.get('recid', record['conceptrecid']) pv = PIDVersioning(parent=pid) rec_grants = [ZenodoRecord.get_record( p.get_assigned_object()).get('grants') for p in pv.children] if self.get('grants') or any(rec_grants): comms = copy(current_app.config[ 'ZENODO_COMMUNITIES_ADD_IF_GRANTS']) return comms
def minimal_record_model(db, minimal_record, sip_metadata_types): """Minimal record.""" model = RecordMetadata() model.created = datetime.utcnow() - timedelta(days=1) model.updated = model.created + timedelta(days=1) model.version_id = 0 rec = ZenodoRecord(minimal_record, model=model) PersistentIdentifier.create( 'recid', '123', status=PIDStatus.REGISTERED, object_type='rec', object_uuid=rec.id) db.session.commit() return rec
def create_versioned_record(recid_value, conceptrecid): """Utility function for creating versioned records.""" recid = PersistentIdentifier.create( 'recid', recid_value, status=PIDStatus.RESERVED) pv = PIDVersioning(parent=conceptrecid) pv.insert_draft_child(recid) record_metadata = deepcopy(minimal_record) record_metadata['conceptrecid'] = conceptrecid.pid_value record_metadata['recid'] = int(recid.pid_value) record = ZenodoRecord.create(record_metadata) zenodo_record_minter(record.id, record) record.commit() return recid, record
def fetch_depid(pid): """Fetch depid from any pid.""" try: if isinstance(pid, PersistentIdentifier): if pid.pid_type == 'depid': return pid elif pid.pid_type == 'recid': return ZenodoRecord.get_record(pid.object_uuid).depid elif isinstance(pid, (string_types, int)): return PersistentIdentifier.get('depid', pid_value=pid) else: raise Exception('"[{}] cannot be resolved to depid'.format(pid)) except Exception: # FIXME: Handle or let it bubble pass
def remove_record(self, record, pid=None): """Remove the record and all of its versions from the community. :type record: zenodo.modules.records.api.ZenodoRecord """ if not pid: pid = PersistentIdentifier.get('recid', record['recid']) with db.session.begin_nested(): pv = PIDVersioning(child=pid) for child in pv.children.all(): rec = ZenodoRecord.get_record(child.get_assigned_object()) if self.community.has_record(rec): self.community.remove_record(rec) rec.commit()
def remove_record(self, record, pid=None): """Remove the record and all of its versions from the community. :type record: zenodo.modules.records.api.ZenodoRecord """ if not pid: pid = PersistentIdentifier.get('recid', record['recid']) with db.session.begin_nested(): pv = PIDVersioning(child=pid) for child in pv.children.all(): rec = ZenodoRecord.get_record( child.get_assigned_object()) if self.community.has_record(rec): self.community.remove_record(rec) rec.commit()
def _create_records(base_metadata, total, versions, files): records = [] cur_recid_val = 1 for _ in range(total): conceptrecid_val = cur_recid_val conceptrecid = PersistentIdentifier.create('recid', str(conceptrecid_val), status='R') db.session.commit() versioning = PIDVersioning(parent=conceptrecid) for ver_idx in range(versions): recid_val = conceptrecid_val + ver_idx + 1 data = deepcopy(base_metadata) data.update({ 'conceptrecid': str(conceptrecid_val), 'conceptdoi': '10.1234/{}'.format(recid_val), 'recid': recid_val, 'doi': '10.1234/{}'.format(recid_val), }) record = ZenodoRecord.create(data) bucket = Bucket.create() record['_buckets'] = {'record': str(bucket.id)} record.commit() RecordsBuckets.create(bucket=bucket, record=record.model) recid = PersistentIdentifier.create(pid_type='recid', pid_value=record['recid'], object_type='rec', object_uuid=record.id, status='R') versioning.insert_child(recid) file_objects = [] for f in range(files): filename = 'Test{0}_v{1}.pdf'.format(f, ver_idx) record.files[filename] = BytesIO(b'1234567890') # 10 bytes record.files[filename]['type'] = 'pdf' file_objects.append(record.files[filename].obj) record.commit() db.session.commit() records.append((recid, record, file_objects)) cur_recid_val += versions + 1 return records
def openaire_direct_index(record_uuid): """Send record for direct indexing at OpenAIRE. :param record_uuid: Record Metadata UUID. :type record_uuid: str """ try: record = ZenodoRecord.get_record(record_uuid) # Bail out if not an OpenAIRE record. if not (is_openaire_publication(record) or is_openaire_dataset(record) or is_openaire_software(record) or is_openaire_other(record)): return data = openaire_json_v1.serialize(record.pid, record) url = '{}/feedObject'.format( current_app.config['OPENAIRE_API_URL']) req = _openaire_request_factory() res = req.post(url, data=data) if not res.ok: raise OpenAIRERequestError(res.text) res_beta = None if current_app.config['OPENAIRE_API_URL_BETA']: url_beta = '{}/feedObject'.format( current_app.config['OPENAIRE_API_URL_BETA']) res_beta = req.post(url_beta, data=data) if res_beta and not res_beta.ok: raise OpenAIRERequestError(res_beta.text) else: recid = record.get('recid') current_cache.delete('openaire_direct_index:{}'.format(recid)) except Exception as exc: recid = record.get('recid') current_cache.set('openaire_direct_index:{}'.format(recid), datetime.now(), timeout=-1) openaire_direct_index.retry(exc=exc)
def accept_record(self, record, pid=None): """Accept the record and all of its versions into the community. :type record: zenodo.modules.records.api.ZenodoRecord :param pid: PID of type 'recid' :type pid: invenio_pidstore.models.PersistentIdentifier """ if not pid: pid = PersistentIdentifier.get('recid', record['recid']) with db.session.begin_nested(): pending_q = self.get_comm_irs(record, pid=pid) if not pending_q.count(): raise InclusionRequestMissingError(community=self, record=record) pv = PIDVersioning(child=pid) for child in pv.children.all(): rec = ZenodoRecord.get_record(child.get_assigned_object()) self.community.add_record(rec) rec.commit() pending_q.delete(synchronize_session=False)
def _create_records(base_metadata, total, versions, files): records = [] cur_recid_val = 1 for _ in range(total): conceptrecid_val = cur_recid_val conceptrecid = PersistentIdentifier.create( 'recid', str(conceptrecid_val), status='R') db.session.commit() versioning = PIDVersioning(parent=conceptrecid) for ver_idx in range(versions): recid_val = conceptrecid_val + ver_idx + 1 data = deepcopy(base_metadata) data.update({ 'conceptrecid': str(conceptrecid_val), 'conceptdoi': '10.1234/{}'.format(recid_val), 'recid': recid_val, 'doi': '10.1234/{}'.format(recid_val), }) record = ZenodoRecord.create(data) bucket = Bucket.create() record['_buckets'] = {'record': str(bucket.id)} record.commit() RecordsBuckets.create(bucket=bucket, record=record.model) recid = PersistentIdentifier.create( pid_type='recid', pid_value=record['recid'], object_type='rec', object_uuid=record.id, status='R') versioning.insert_child(recid) file_objects = [] for f in range(files): filename = 'Test{0}_v{1}.pdf'.format(f, ver_idx) record.files[filename] = BytesIO(b'1234567890') # 10 bytes record.files[filename]['type'] = 'pdf' file_objects.append(record.files[filename].obj) record.commit() db.session.commit() records.append((recid, record, file_objects)) cur_recid_val += versions + 1 return records
def has_record(self, record, pid=None, scope='any'): """Check if record is in a community. :type scope: str :param scope: Can take values 'any', 'all' or 'this'. * 'all': returns True if all record versions are in the community. * 'any': returns True if any of the record versions are in the community. * 'this': returns if the specified 'record' is in the community. """ if not pid: pid = PersistentIdentifier.get('recid', record['recid']) pv = PIDVersioning(child=pid) if scope == 'this': return self.community.has_record(record) q = (self.community.has_record( ZenodoRecord.get_record(p.get_assigned_object())) for p in pv.children) if scope == 'all': return all(q) if scope == 'any': return any(q)
def get_github_repository(pid): """Get GitHub repository from depid.""" depid = fetch_depid(pid) # First check if the passed depid is a GitHub release release = (Release.query.filter_by(record_id=depid.object_uuid) .one_or_none()) if release: return release.repository deposit = ZenodoDeposit.get_record(depid.object_uuid) concepterecid = deposit.get('conceptrecid') if not concepterecid: return None parent = PersistentIdentifier.get( pid_type='recid', pid_value=concepterecid) pv = PIDVersioning(parent=parent) if pv.exists: record_ids = [r.object_uuid for r in pv.children] deposit_ids = (rec.depid.object_uuid for rec in ZenodoRecord.get_records(record_ids)) release = (Release.query .filter(Release.record_id.in_(deposit_ids)) .first()) return release.repository if release else None
def publish(self): """Publish GitHub release as record.""" id_ = uuid.uuid4() deposit_metadata = dict(self.metadata) deposit = None try: db.session.begin_nested() # TODO: Add filter on Published releases previous_releases = self.model.repository.releases.filter_by( status=ReleaseStatus.PUBLISHED) versioning = None stashed_draft_child = None if previous_releases.count(): last_release = previous_releases.order_by( Release.created.desc()).first() last_recid = PersistentIdentifier.get( 'recid', last_release.record['recid']) versioning = PIDVersioning(child=last_recid) last_record = ZenodoRecord.get_record( versioning.last_child.object_uuid) deposit_metadata['conceptrecid'] = last_record['conceptrecid'] if 'conceptdoi' not in last_record: last_depid = PersistentIdentifier.get( 'depid', last_record['_deposit']['id']) last_deposit = ZenodoDeposit.get_record( last_depid.object_uuid) last_deposit = last_deposit.registerconceptdoi() last_recid, last_record = last_deposit.fetch_published() deposit_metadata['conceptdoi'] = last_record['conceptdoi'] if versioning.draft_child: stashed_draft_child = versioning.draft_child versioning.remove_draft_child() deposit = self.deposit_class.create(deposit_metadata, id_=id_) deposit['_deposit']['created_by'] = self.event.user_id deposit['_deposit']['owners'] = [self.event.user_id] # Fetch the deposit files for key, url in self.files: # Make a HEAD request to get GitHub to compute the # Content-Length. res = self.gh.api.session.head(url, allow_redirects=True) # Now, download the file res = self.gh.api.session.get(url, stream=True, allow_redirects=True) if res.status_code != 200: raise Exception( "Could not retrieve archive from GitHub: {url}" .format(url=url) ) size = int(res.headers.get('Content-Length', 0)) ObjectVersion.create( bucket=deposit.files.bucket, key=key, stream=res.raw, size=size or None, mimetype=res.headers.get('Content-Type'), ) # GitHub-specific SIP store agent sip_agent = { '$schema': current_jsonschemas.path_to_url( current_app.config['SIPSTORE_GITHUB_AGENT_JSONSCHEMA']), 'user_id': self.event.user_id, 'github_id': self.release['author']['id'], 'email': self.gh.account.user.email, } deposit.publish(user_id=self.event.user_id, sip_agent=sip_agent) recid_pid, record = deposit.fetch_published() self.model.recordmetadata = record.model if versioning and stashed_draft_child: versioning.insert_draft_child(stashed_draft_child) record_id = str(record.id) db.session.commit() # Send Datacite DOI registration task datacite_register.delay(recid_pid.pid_value, record_id) # Index the record RecordIndexer().index_by_id(record_id) except Exception: db.session.rollback() # Remove deposit from index since it was not commited. if deposit and deposit.id: try: RecordIndexer().delete(deposit) except Exception: current_app.logger.exception( "Failed to remove uncommited deposit from index.") raise
def delete_record(record_uuid, reason, user): """Delete the record and it's PIDs. :param record_uuid: UUID of the record to be removed. :param reason: Reason for removal. Either one of: 'spam', 'uploader', 'takedown' (see 'ZENODO_REMOVAL_REASONS' variable in config), otherwise using it as a verbatim "Reason" string. :param user: ID or email of the Zenodo user (admin) responsible for the removal. """ from invenio_github.models import ReleaseStatus if isinstance(user, text_type): user_id = User.query.filter_by(email=user).one().id elif isinstance(user, int): user_id = User.query.get(user).id else: raise TypeError("User cannot be determined from argument: {0}".format( user)) record = ZenodoRecord.get_record(record_uuid) # Remove the record from versioning and delete the recid recid = PersistentIdentifier.get('recid', record['recid']) pv = PIDVersioning(child=recid) pv.remove_child(recid) pv.update_redirect() recid.delete() # Remove the record from index try: RecordIndexer().delete(record) except NotFoundError: pass # Remove buckets record_bucket = record.files.bucket RecordsBuckets.query.filter_by(record_id=record.id).delete() record_bucket.locked = False record_bucket.remove() removal_reasons = dict(current_app.config['ZENODO_REMOVAL_REASONS']) if reason in removal_reasons: reason = removal_reasons[reason] depid, deposit = deposit_resolver.resolve(record['_deposit']['id']) try: doi = PersistentIdentifier.get('doi', record['doi']) except PIDDoesNotExistError: doi = None # Record OpenAIRE info try: original_id = openaire_original_id(record, openaire_type(record))[1] datasource_id = openaire_datasource_id(record) except PIDDoesNotExistError: original_id = None datasource_id = None if pv.children.count() == 0: conceptrecid = PersistentIdentifier.get('recid', record['conceptrecid']) conceptrecid.delete() new_last_child = None else: new_last_child = (pv.last_child.pid_value, str(pv.last_child.object_uuid)) if 'conceptdoi' in record: conceptdoi_value = record['conceptdoi'] else: conceptdoi_value = None # Completely delete the deposit # Deposit will be removed from index deposit.delete(delete_published=True) # Clear the record and put the deletion information record.clear() record.update({ 'removal_reason': reason, 'removed_by': user_id, }) record.commit() # Mark the relevant GitHub Release as deleted for ghr in record.model.github_releases: ghr.status = ReleaseStatus.DELETED db.session.commit() # After successful DB commit, sync the DOIs with DataCite datacite_inactivate.delay(doi.pid_value) if conceptdoi_value: if new_last_child: # Update last child (updates also conceptdoi) pid_value, rec_uuid = new_last_child datacite_register.delay(pid_value, rec_uuid) else: datacite_inactivate.delay(conceptdoi_value) # Also delete from OpenAIRE index if current_app.config['OPENAIRE_DIRECT_INDEXING_ENABLED'] and original_id \ and datasource_id: openaire_delete.delay(original_id=original_id, datasource_id=datasource_id)
def versioning_github_repository(uuid): """ Migrate the GitHub repositories. :param uuid: UUID of the repository (invenio_github.models.Repository) """ from invenio_github.models import Repository, ReleaseStatus from zenodo.modules.deposit.minters import zenodo_concept_recid_minter from zenodo.modules.records.minters import zenodo_concept_doi_minter from invenio_pidrelations.contrib.records import index_siblings repository = Repository.query.get(uuid) published_releases = repository.releases.filter_by( status=ReleaseStatus.PUBLISHED).all() # Nothing to migrate if no successful release was ever made if not published_releases: return deposits = [ ZenodoDeposit.get_record(r.record_id) for r in published_releases if r.recordmetadata.json is not None ] deposits = [dep for dep in deposits if 'removed_by' not in dep] deposits = sorted(deposits, key=lambda dep: int(dep['recid'])) recids = [ PersistentIdentifier.get('recid', dep['recid']) for dep in deposits ] records = [ZenodoRecord.get_record(p.object_uuid) for p in recids] # There were successful releases, but deposits/records were removed since if not records: return assert not any('conceptrecid' in rec for rec in records), \ "One or more of the release records have been already migrated" assert not any('conceptrecid' in dep for dep in deposits), \ "One or more of the release deposits have been already migrated" conceptrecid = zenodo_concept_recid_minter(record_uuid=records[0].id, data=records[0]) conceptrecid.register() # Mint the Concept DOI if we are migrating (linking) more than one record if len(records) > 1: conceptdoi = zenodo_concept_doi_minter(records[0].id, records[0]) else: conceptdoi = None rec_comms = sorted( set(sum([rec.get('communities', []) for rec in records], []))) dep_comms = sorted( set(sum([dep.get('communities', []) for dep in deposits], []))) for rec in records: rec['conceptrecid'] = conceptrecid.pid_value if conceptdoi: rec['conceptdoi'] = conceptdoi.pid_value if rec_comms: rec['communities'] = rec_comms rec.commit() for dep in deposits: dep['conceptrecid'] = conceptrecid.pid_value if conceptdoi: dep['conceptdoi'] = conceptdoi.pid_value if dep_comms: dep['communities'] = dep_comms dep.commit() pv = PIDVersioning(parent=conceptrecid) for recid in recids: pv.insert_child(recid) pv.update_redirect() if current_app.config['DEPOSIT_DATACITE_MINTING_ENABLED']: datacite_register.delay(recids[-1].pid_value, str(records[-1].id)) db.session.commit() # Reindex all siblings index_siblings(pv.last_child, with_deposits=True)
def delete_record(record_uuid, reason, user): """Delete the record and it's PIDs. :param record_uuid: UUID of the record to be removed. :param reason: Reason for removal. Either one of: 'spam', 'uploader', 'takedown' (see 'ZENODO_REMOVAL_REASONS' variable in config), otherwise using it as a verbatim "Reason" string. :param user: ID or email of the Zenodo user (admin) responsible for the removal. """ from invenio_github.models import ReleaseStatus if isinstance(user, text_type): user_id = User.query.filter_by(email=user).one().id elif isinstance(user, int): user_id = User.query.get(user).id else: raise TypeError( "User cannot be determined from argument: {0}".format(user)) record = ZenodoRecord.get_record(record_uuid) # Remove the record from versioning and delete the recid recid = PersistentIdentifier.get('recid', record['recid']) pv = PIDVersioning(child=recid) pv.remove_child(recid) pv.update_redirect() recid.delete() # Remove the record from index try: RecordIndexer().delete(record) except NotFoundError: pass # Remove buckets record_bucket = record.files.bucket RecordsBuckets.query.filter_by(record_id=record.id).delete() record_bucket.locked = False record_bucket.remove() removal_reasons = dict(current_app.config['ZENODO_REMOVAL_REASONS']) if reason in removal_reasons: reason = removal_reasons[reason] depid, deposit = deposit_resolver.resolve(record['_deposit']['id']) try: doi = PersistentIdentifier.get('doi', record['doi']) except PIDDoesNotExistError: doi = None # Record OpenAIRE info try: original_id = openaire_original_id(record, openaire_type(record))[1] datasource_id = openaire_datasource_id(record) except PIDDoesNotExistError: original_id = None datasource_id = None if pv.children.count() == 0: conceptrecid = PersistentIdentifier.get('recid', record['conceptrecid']) conceptrecid.delete() new_last_child = None else: new_last_child = (pv.last_child.pid_value, str(pv.last_child.object_uuid)) if 'conceptdoi' in record: conceptdoi_value = record['conceptdoi'] else: conceptdoi_value = None # Completely delete the deposit # Deposit will be removed from index deposit.delete(delete_published=True) # Clear the record and put the deletion information record.clear() record.update({ 'removal_reason': reason, 'removed_by': user_id, }) record.commit() # Mark the relevant GitHub Release as deleted for ghr in record.model.github_releases: ghr.status = ReleaseStatus.DELETED if not is_local_doi(doi.pid_value): db.session.delete(doi) db.session.commit() # After successful DB commit, sync the DOIs with DataCite if is_local_doi(doi.pid_value): datacite_inactivate.delay(doi.pid_value) if conceptdoi_value: if new_last_child: # Update last child (updates also conceptdoi) pid_value, rec_uuid = new_last_child datacite_register.delay(pid_value, rec_uuid) else: datacite_inactivate.delay(conceptdoi_value) # Also delete from OpenAIRE index if current_app.config['OPENAIRE_DIRECT_INDEXING_ENABLED'] and original_id \ and datasource_id: openaire_delete.delay(original_id=original_id, datasource_id=datasource_id)
def newversion(self, pid=None): """Create a new version deposit.""" if not self.is_published(): raise PIDInvalidAction() # Check that there is not a newer draft version for this record pid, record = self.fetch_published() pv = PIDVersioning(child=pid) if (not pv.draft_child and is_doi_locally_managed(record['doi'])): with db.session.begin_nested(): # Get copy of the latest record latest_record = ZenodoRecord.get_record( pv.last_child.object_uuid) data = latest_record.dumps() # Get the communities from the last deposit # and push those to the new version latest_depid = PersistentIdentifier.get( 'depid', data['_deposit']['id']) latest_deposit = ZenodoDeposit.get_record( latest_depid.object_uuid) last_communities = latest_deposit.get('communities', []) owners = data['_deposit']['owners'] # TODO: Check other data that may need to be removed keys_to_remove = ( '_deposit', 'doi', '_oai', '_files', '_buckets', '$schema') for k in keys_to_remove: data.pop(k, None) # NOTE: We call the superclass `create()` method, because we # don't want a new empty bucket, but an unlocked snapshot of # the old record's bucket. deposit = (super(ZenodoDeposit, self).create(data)) # Injecting owners is required in case of creating new # version this outside of request context deposit['_deposit']['owners'] = owners if last_communities: deposit['communities'] = last_communities ### conceptrecid = PersistentIdentifier.get( 'recid', data['conceptrecid']) recid = PersistentIdentifier.get( 'recid', str(data['recid'])) depid = PersistentIdentifier.get( 'depid', str(data['_deposit']['id'])) PIDVersioning(parent=conceptrecid).insert_draft_child( child=recid) RecordDraft.link(recid, depid) # Pre-fill the Zenodo DOI to prevent the user from changing it # to a custom DOI. deposit['doi'] = doi_generator(recid.pid_value) pv = PIDVersioning(child=pid) index_siblings(pv.draft_child, neighbors_eager=True, with_deposits=True) with db.session.begin_nested(): # Create snapshot from the record's bucket and update data snapshot = latest_record.files.bucket.snapshot(lock=False) snapshot.locked = False if 'extra_formats' in latest_record['_buckets']: extra_formats_snapshot = \ latest_record.extra_formats.bucket.snapshot( lock=False) deposit['_buckets'] = {'deposit': str(snapshot.id)} RecordsBuckets.create(record=deposit.model, bucket=snapshot) if 'extra_formats' in latest_record['_buckets']: deposit['_buckets']['extra_formats'] = \ str(extra_formats_snapshot.id) RecordsBuckets.create( record=deposit.model, bucket=extra_formats_snapshot) deposit.commit() return self
def newversion(self, pid=None): """Create a new version deposit.""" if not self.is_published(): raise PIDInvalidAction() # Check that there is not a newer draft version for this record pid, record = self.fetch_published() pv = PIDVersioning(child=pid) if (not pv.draft_child and is_doi_locally_managed(record['doi'])): with db.session.begin_nested(): # Get copy of the latest record latest_record = ZenodoRecord.get_record( pv.last_child.object_uuid) data = latest_record.dumps() # Get the communities from the last deposit # and push those to the new version latest_depid = PersistentIdentifier.get( 'depid', data['_deposit']['id']) latest_deposit = ZenodoDeposit.get_record( latest_depid.object_uuid) last_communities = latest_deposit.get('communities', []) owners = data['_deposit']['owners'] # TODO: Check other data that may need to be removed keys_to_remove = ( '_deposit', 'doi', '_oai', '_files', '_buckets', '$schema') for k in keys_to_remove: data.pop(k, None) # NOTE: We call the superclass `create()` method, because we # don't want a new empty bucket, but an unlocked snapshot of # the old record's bucket. deposit = (super(ZenodoDeposit, self).create(data)) # Injecting owners is required in case of creating new # version this outside of request context deposit['_deposit']['owners'] = owners if last_communities: deposit['communities'] = last_communities ### conceptrecid = PersistentIdentifier.get( 'recid', data['conceptrecid']) recid = PersistentIdentifier.get( 'recid', str(data['recid'])) depid = PersistentIdentifier.get( 'depid', str(data['_deposit']['id'])) PIDVersioning(parent=conceptrecid).insert_draft_child( child=recid) RecordDraft.link(recid, depid) # Pre-fill the Zenodo DOI to prevent the user from changing it # to a custom DOI. deposit['doi'] = doi_generator(recid.pid_value) pv = PIDVersioning(child=pid) index_siblings(pv.draft_child, neighbors_eager=True, with_deposits=True) with db.session.begin_nested(): # Create snapshot from the record's bucket and update data snapshot = latest_record.files.bucket.snapshot(lock=False) snapshot.locked = False # FIXME: `snapshot.id` might not be present because we need to # commit first to the DB. # db.session.commit() deposit['_buckets'] = {'deposit': str(snapshot.id)} RecordsBuckets.create(record=deposit.model, bucket=snapshot) deposit.commit() return self