def datacite_register_after_publish(sender, action=None, pid=None, deposit=None): """Mind DOI with DataCite after the deposit has been published.""" if action == 'publish' and \ current_app.config['DEPOSIT_DATACITE_MINTING_ENABLED']: recid_pid, record = deposit.fetch_published() datacite_register.delay(recid_pid.pid_value, str(record.id))
def datacite_register_after_publish(sender, action=None, pid=None, deposit=None): """Mind DOI with DataCite after the deposit has been published.""" if action == 'publish' and \ current_app.config['DEPOSIT_DATACITE_MINTING_ENABLED']: recid_pid, record = deposit.fetch_published() datacite_register.delay(recid_pid.pid_value, str(record.id))
def publish(self): """Publish GitHub release as record.""" id_ = uuid.uuid4() deposit = None try: db.session.begin_nested() deposit = self.deposit_class.create(self.metadata, id_=id_) deposit['_deposit']['created_by'] = self.event.user_id deposit['_deposit']['owners'] = [self.event.user_id] # Fetch the deposit files for key, url in self.files: # Make a HEAD request to get GitHub to compute the # Content-Length. res = self.gh.api.session.head(url, allow_redirects=True) # Now, download the file res = self.gh.api.session.get(url, stream=True) if res.status_code != 200: raise Exception( "Could not retrieve archive from GitHub: {url}".format( url=url)) size = int(res.headers.get('Content-Length', 0)) ObjectVersion.create( bucket=deposit.files.bucket, key=key, stream=res.raw, size=size or None, mimetype=res.headers.get('Content-Type'), ) # GitHub-specific SIP store agent sip_agent = { '$schema': current_jsonschemas.path_to_url( current_app.config['SIPSTORE_GITHUB_AGENT_JSONSCHEMA']), 'user_id': self.event.user_id, 'github_id': self.release['author']['id'], 'email': self.gh.account.user.email, } deposit.publish(user_id=self.event.user_id, sip_agent=sip_agent) self.model.recordmetadata = deposit.model db.session.commit() # Send Datacite DOI registration task recid_pid, record = deposit.fetch_published() datacite_register.delay(recid_pid.pid_value, str(record.id)) except Exception: db.session.rollback() # Remove deposit from index since it was not commited. if deposit and deposit.id: try: RecordIndexer().delete(deposit) except Exception: current_app.logger.exception( "Failed to remove uncommited deposit from index.") raise
def registerconceptdoi(self, pid=None): """Register the conceptdoi for the deposit and record.""" pid, record = self.fetch_published() zenodo_concept_doi_minter(record.id, record) record.commit() self['conceptdoi'] = record['conceptrecid'] self.commit() if current_app.config['DEPOSIT_DATACITE_MINTING_ENABLED']: from zenodo.modules.deposit.tasks import datacite_register datacite_register.delay(pid.pid_value, str(record.id)) return self
def registerconceptdoi(self, pid=None): """Register the conceptdoi for the deposit and record.""" if not self.is_published() and is_doi_locally_managed(self['doi']): raise PIDInvalidAction() pid, record = self.fetch_published() zenodo_concept_doi_minter(record.id, record) record.commit() self['conceptdoi'] = record['conceptdoi'] self.commit() if current_app.config['DEPOSIT_DATACITE_MINTING_ENABLED']: from zenodo.modules.deposit.tasks import datacite_register datacite_register.delay(pid.pid_value, str(record.id)) return self
def registerconceptdoi(self, pid=None): """Register the conceptdoi for the deposit and record.""" if not self.is_published() and is_doi_locally_managed(self['doi']): raise PIDInvalidAction() pid, record = self.fetch_published() zenodo_concept_doi_minter(record.id, record) record.commit() self['conceptdoi'] = record['conceptdoi'] self.commit() if current_app.config['DEPOSIT_DATACITE_MINTING_ENABLED']: from zenodo.modules.deposit.tasks import datacite_register datacite_register.delay(pid.pid_value, str(record.id)) return self
def curate(community): """Index page with uploader and list of existing depositions. :param community_id: ID of the community to curate. """ action = request.json.get('action') recid = request.json.get('recid') if not recid: abort(400) if action not in ['accept', 'reject', 'remove']: abort(400) # Resolve recid to a Record pid, record = record_resolver.resolve(recid) # Perform actions pv = PIDVersioning(child=pid) if pv.exists: api = ZenodoCommunity(community) else: api = community if action == "accept": api.accept_record(record, pid=pid) elif action == "reject": api.reject_record(record, pid=pid) elif action == "remove": api.remove_record(record, pid=pid) record_id = record.id db.session.commit() RecordIndexer().index_by_id(record_id) if current_app.config['OPENAIRE_DIRECT_INDEXING_ENABLED']: if action == 'accept': openaire_direct_index.delay(record_uuid=str(record_id)) elif action in ('reject', 'remove'): openaire_delete.delay(record_uuid=str(record_id)) if current_app.config['DEPOSIT_DATACITE_MINTING_ENABLED']: datacite_register.delay(recid, str(record_id)) return jsonify({'status': 'success'})
def curate(community): """Index page with uploader and list of existing depositions. :param community_id: ID of the community to curate. """ action = request.json.get('action') recid = request.json.get('recid') if not recid: abort(400) if action not in ['accept', 'reject', 'remove']: abort(400) # Resolve recid to a Record pid, record = record_resolver.resolve(recid) # Perform actions pv = PIDVersioning(child=pid) if pv.exists: api = ZenodoCommunity(community) else: api = community if action == "accept": api.accept_record(record, pid=pid) elif action == "reject": api.reject_record(record, pid=pid) elif action == "remove": api.remove_record(record, pid=pid) record_id = record.id db.session.commit() RecordIndexer().index_by_id(record_id) if current_app.config['OPENAIRE_DIRECT_INDEXING_ENABLED']: if action == 'accept': openaire_direct_index.delay(record_uuid=str(record_id)) elif action in ('reject', 'remove'): openaire_delete.delay(record_uuid=str(record_id)) if current_app.config['DEPOSIT_DATACITE_MINTING_ENABLED']: datacite_register.delay(recid, str(record_id)) return jsonify({'status': 'success'})
def publish(self): """Publish GitHub release as record.""" id_ = uuid.uuid4() deposit_metadata = dict(self.metadata) deposit = None try: db.session.begin_nested() # TODO: Add filter on Published releases previous_releases = self.model.repository.releases.filter_by( status=ReleaseStatus.PUBLISHED) versioning = None stashed_draft_child = None if previous_releases.count(): last_release = previous_releases.order_by( Release.created.desc()).first() last_recid = PersistentIdentifier.get( 'recid', last_release.record['recid']) versioning = PIDVersioning(child=last_recid) last_record = ZenodoRecord.get_record( versioning.last_child.object_uuid) deposit_metadata['conceptrecid'] = last_record['conceptrecid'] if 'conceptdoi' not in last_record: last_depid = PersistentIdentifier.get( 'depid', last_record['_deposit']['id']) last_deposit = ZenodoDeposit.get_record( last_depid.object_uuid) last_deposit = last_deposit.registerconceptdoi() last_recid, last_record = last_deposit.fetch_published() deposit_metadata['conceptdoi'] = last_record['conceptdoi'] if versioning.draft_child: stashed_draft_child = versioning.draft_child versioning.remove_draft_child() deposit = self.deposit_class.create(deposit_metadata, id_=id_) deposit['_deposit']['created_by'] = self.event.user_id deposit['_deposit']['owners'] = [self.event.user_id] # Fetch the deposit files for key, url in self.files: # Make a HEAD request to get GitHub to compute the # Content-Length. res = self.gh.api.session.head(url, allow_redirects=True) # Now, download the file res = self.gh.api.session.get(url, stream=True, allow_redirects=True) if res.status_code != 200: raise Exception( "Could not retrieve archive from GitHub: {url}" .format(url=url) ) size = int(res.headers.get('Content-Length', 0)) ObjectVersion.create( bucket=deposit.files.bucket, key=key, stream=res.raw, size=size or None, mimetype=res.headers.get('Content-Type'), ) # GitHub-specific SIP store agent sip_agent = { '$schema': current_jsonschemas.path_to_url( current_app.config['SIPSTORE_GITHUB_AGENT_JSONSCHEMA']), 'user_id': self.event.user_id, 'github_id': self.release['author']['id'], 'email': self.gh.account.user.email, } deposit.publish(user_id=self.event.user_id, sip_agent=sip_agent) recid_pid, record = deposit.fetch_published() self.model.recordmetadata = record.model if versioning and stashed_draft_child: versioning.insert_draft_child(stashed_draft_child) record_id = str(record.id) db.session.commit() # Send Datacite DOI registration task datacite_register.delay(recid_pid.pid_value, record_id) # Index the record RecordIndexer().index_by_id(record_id) except Exception: db.session.rollback() # Remove deposit from index since it was not commited. if deposit and deposit.id: try: RecordIndexer().delete(deposit) except Exception: current_app.logger.exception( "Failed to remove uncommited deposit from index.") raise
def delete_record(record_uuid, reason, user): """Delete the record and it's PIDs. :param record_uuid: UUID of the record to be removed. :param reason: Reason for removal. Either one of: 'spam', 'uploader', 'takedown' (see 'ZENODO_REMOVAL_REASONS' variable in config), otherwise using it as a verbatim "Reason" string. :param user: ID or email of the Zenodo user (admin) responsible for the removal. """ from invenio_github.models import ReleaseStatus if isinstance(user, text_type): user_id = User.query.filter_by(email=user).one().id elif isinstance(user, int): user_id = User.query.get(user).id else: raise TypeError( "User cannot be determined from argument: {0}".format(user)) record = ZenodoRecord.get_record(record_uuid) # Remove the record from versioning and delete the recid recid = PersistentIdentifier.get('recid', record['recid']) pv = PIDVersioning(child=recid) pv.remove_child(recid) pv.update_redirect() recid.delete() # Remove the record from index try: RecordIndexer().delete(record) except NotFoundError: pass # Remove buckets record_bucket = record.files.bucket RecordsBuckets.query.filter_by(record_id=record.id).delete() record_bucket.locked = False record_bucket.remove() removal_reasons = dict(current_app.config['ZENODO_REMOVAL_REASONS']) if reason in removal_reasons: reason = removal_reasons[reason] depid, deposit = deposit_resolver.resolve(record['_deposit']['id']) try: doi = PersistentIdentifier.get('doi', record['doi']) except PIDDoesNotExistError: doi = None # Record OpenAIRE info try: original_id = openaire_original_id(record, openaire_type(record))[1] datasource_id = openaire_datasource_id(record) except PIDDoesNotExistError: original_id = None datasource_id = None if pv.children.count() == 0: conceptrecid = PersistentIdentifier.get('recid', record['conceptrecid']) conceptrecid.delete() new_last_child = None else: new_last_child = (pv.last_child.pid_value, str(pv.last_child.object_uuid)) if 'conceptdoi' in record: conceptdoi_value = record['conceptdoi'] else: conceptdoi_value = None # Completely delete the deposit # Deposit will be removed from index deposit.delete(delete_published=True) # Clear the record and put the deletion information record.clear() record.update({ 'removal_reason': reason, 'removed_by': user_id, }) record.commit() # Mark the relevant GitHub Release as deleted for ghr in record.model.github_releases: ghr.status = ReleaseStatus.DELETED if not is_local_doi(doi.pid_value): db.session.delete(doi) db.session.commit() # After successful DB commit, sync the DOIs with DataCite if is_local_doi(doi.pid_value): datacite_inactivate.delay(doi.pid_value) if conceptdoi_value: if new_last_child: # Update last child (updates also conceptdoi) pid_value, rec_uuid = new_last_child datacite_register.delay(pid_value, rec_uuid) else: datacite_inactivate.delay(conceptdoi_value) # Also delete from OpenAIRE index if current_app.config['OPENAIRE_DIRECT_INDEXING_ENABLED'] and original_id \ and datasource_id: openaire_delete.delay(original_id=original_id, datasource_id=datasource_id)
def versioning_link_records(recids): """Link several non-versioned records into one versioning scheme. The records are linked in the order as they appear in the list, with the first record being base for minting of the conceptdoi. In case one of the records is already upgraded, its taken as the base for conceptdoi instead, with preserving the requested order. :param recids: list of recid values (strings) to link, e.g.: ['1234','55125','51269'] :type recids: list of str """ recids_records = [ record_resolver.resolve(recid_val) for recid_val in recids ] depids_deposits = [ deposit_resolver.resolve(record['_deposit']['id']) for _, record in recids_records ] rec_comms = sorted( set(sum([rec.get('communities', []) for _, rec in recids_records], []))) dep_comms = sorted( set(sum([dep.get('communities', []) for _, dep in depids_deposits], []))) upgraded = [(recid, rec) for recid, rec in recids_records if 'conceptdoi' in rec] # Determine the base record for versioning if len(upgraded) == 0: recid_v, record_v = recids_records[0] elif len(upgraded) == 1: recid_v, record_v = upgraded[0] elif len(upgraded) > 1: recid_v, record_v = upgraded[0] child_recids = [ int(recid.pid_value) for recid in PIDVersioning(child=recid_v).children.all() ] i_upgraded = [int(recid.pid_value) for recid, rec in upgraded] if set(child_recids) != set(i_upgraded): raise Exception('Multiple upgraded records, which belong' 'to different versioning schemes.') # Get the first record and mint the concept DOI for it conceptdoi = zenodo_concept_doi_minter(record_v.id, record_v) conceptrecid_v = PersistentIdentifier.get('recid', record_v['conceptrecid']) conceptrecid_v_val = conceptrecid_v.pid_value pv_r1 = PIDVersioning(parent=conceptrecid_v) children_recids = [c.pid_value for c in pv_r1.children.all()] if not all(cr in recids for cr in children_recids): raise Exception('Children of the already upgraded record: {0} are ' 'not specified in the ordering: {1}' ''.format(children_recids, recids)) for (recid, record), (depid, deposit) in \ zip(recids_records, depids_deposits): # Remove old versioning schemes for non-base recids # Note: This will remove the child of the base-conceptrecid as well # but that's OK, since it will be added again afterwards in the # correct order. conceptrecid = PersistentIdentifier.get('recid', record['conceptrecid']) pv = PIDVersioning(parent=conceptrecid) pv.remove_child(recid) if conceptrecid.pid_value != conceptrecid_v_val: conceptrecid.delete() # Update the 'conceptrecid' and 'conceptdoi' in records and deposits record['conceptdoi'] = conceptdoi.pid_value record['conceptrecid'] = conceptrecid_v.pid_value record['communities'] = rec_comms record.commit() deposit['conceptdoi'] = conceptdoi.pid_value deposit['conceptrecid'] = conceptrecid_v.pid_value deposit['communities'] = dep_comms deposit.commit() # Add the child to the new versioning scheme pv_r1.insert_child(recid) pv_r1.update_redirect() db.session.commit() conceptrecid_v = PersistentIdentifier.get('recid', conceptrecid_v_val) pv = PIDVersioning(parent=conceptrecid_v) if current_app.config['DEPOSIT_DATACITE_MINTING_ENABLED']: datacite_register.delay(pv.last_child.pid_value, str(pv.last_child.object_uuid)) index_siblings(pv.last_child, with_deposits=True, eager=True)
def versioning_github_repository(uuid): """ Migrate the GitHub repositories. :param uuid: UUID of the repository (invenio_github.models.Repository) """ from invenio_github.models import Repository, ReleaseStatus from zenodo.modules.deposit.minters import zenodo_concept_recid_minter from zenodo.modules.records.minters import zenodo_concept_doi_minter from invenio_pidrelations.contrib.records import index_siblings repository = Repository.query.get(uuid) published_releases = repository.releases.filter_by( status=ReleaseStatus.PUBLISHED).all() # Nothing to migrate if no successful release was ever made if not published_releases: return deposits = [ ZenodoDeposit.get_record(r.record_id) for r in published_releases if r.recordmetadata.json is not None ] deposits = [dep for dep in deposits if 'removed_by' not in dep] deposits = sorted(deposits, key=lambda dep: int(dep['recid'])) recids = [ PersistentIdentifier.get('recid', dep['recid']) for dep in deposits ] records = [ZenodoRecord.get_record(p.object_uuid) for p in recids] # There were successful releases, but deposits/records were removed since if not records: return assert not any('conceptrecid' in rec for rec in records), \ "One or more of the release records have been already migrated" assert not any('conceptrecid' in dep for dep in deposits), \ "One or more of the release deposits have been already migrated" conceptrecid = zenodo_concept_recid_minter(record_uuid=records[0].id, data=records[0]) conceptrecid.register() # Mint the Concept DOI if we are migrating (linking) more than one record if len(records) > 1: conceptdoi = zenodo_concept_doi_minter(records[0].id, records[0]) else: conceptdoi = None rec_comms = sorted( set(sum([rec.get('communities', []) for rec in records], []))) dep_comms = sorted( set(sum([dep.get('communities', []) for dep in deposits], []))) for rec in records: rec['conceptrecid'] = conceptrecid.pid_value if conceptdoi: rec['conceptdoi'] = conceptdoi.pid_value if rec_comms: rec['communities'] = rec_comms rec.commit() for dep in deposits: dep['conceptrecid'] = conceptrecid.pid_value if conceptdoi: dep['conceptdoi'] = conceptdoi.pid_value if dep_comms: dep['communities'] = dep_comms dep.commit() pv = PIDVersioning(parent=conceptrecid) for recid in recids: pv.insert_child(recid) pv.update_redirect() if current_app.config['DEPOSIT_DATACITE_MINTING_ENABLED']: datacite_register.delay(recids[-1].pid_value, str(records[-1].id)) db.session.commit() # Reindex all siblings index_siblings(pv.last_child, with_deposits=True)
def delete_record(record_uuid, reason, user): """Delete the record and it's PIDs. :param record_uuid: UUID of the record to be removed. :param reason: Reason for removal. Either one of: 'spam', 'uploader', 'takedown' (see 'ZENODO_REMOVAL_REASONS' variable in config), otherwise using it as a verbatim "Reason" string. :param user: ID or email of the Zenodo user (admin) responsible for the removal. """ from invenio_github.models import ReleaseStatus if isinstance(user, text_type): user_id = User.query.filter_by(email=user).one().id elif isinstance(user, int): user_id = User.query.get(user).id else: raise TypeError("User cannot be determined from argument: {0}".format( user)) record = ZenodoRecord.get_record(record_uuid) # Remove the record from versioning and delete the recid recid = PersistentIdentifier.get('recid', record['recid']) pv = PIDVersioning(child=recid) pv.remove_child(recid) pv.update_redirect() recid.delete() # Remove the record from index try: RecordIndexer().delete(record) except NotFoundError: pass # Remove buckets record_bucket = record.files.bucket RecordsBuckets.query.filter_by(record_id=record.id).delete() record_bucket.locked = False record_bucket.remove() removal_reasons = dict(current_app.config['ZENODO_REMOVAL_REASONS']) if reason in removal_reasons: reason = removal_reasons[reason] depid, deposit = deposit_resolver.resolve(record['_deposit']['id']) try: doi = PersistentIdentifier.get('doi', record['doi']) except PIDDoesNotExistError: doi = None # Record OpenAIRE info try: original_id = openaire_original_id(record, openaire_type(record))[1] datasource_id = openaire_datasource_id(record) except PIDDoesNotExistError: original_id = None datasource_id = None if pv.children.count() == 0: conceptrecid = PersistentIdentifier.get('recid', record['conceptrecid']) conceptrecid.delete() new_last_child = None else: new_last_child = (pv.last_child.pid_value, str(pv.last_child.object_uuid)) if 'conceptdoi' in record: conceptdoi_value = record['conceptdoi'] else: conceptdoi_value = None # Completely delete the deposit # Deposit will be removed from index deposit.delete(delete_published=True) # Clear the record and put the deletion information record.clear() record.update({ 'removal_reason': reason, 'removed_by': user_id, }) record.commit() # Mark the relevant GitHub Release as deleted for ghr in record.model.github_releases: ghr.status = ReleaseStatus.DELETED db.session.commit() # After successful DB commit, sync the DOIs with DataCite datacite_inactivate.delay(doi.pid_value) if conceptdoi_value: if new_last_child: # Update last child (updates also conceptdoi) pid_value, rec_uuid = new_last_child datacite_register.delay(pid_value, rec_uuid) else: datacite_inactivate.delay(conceptdoi_value) # Also delete from OpenAIRE index if current_app.config['OPENAIRE_DIRECT_INDEXING_ENABLED'] and original_id \ and datasource_id: openaire_delete.delay(original_id=original_id, datasource_id=datasource_id)