def test_appoint_profile_from_claimed_signature(small_app): """Check the module for the case where claimed signature takes everything. """ from inspirehep.modules.disambiguation.tasks import disambiguation_clustering, update_authors_recid old_record_id = str(PersistentIdentifier.get("literature", 11883).object_uuid) old_record = get_es_record_by_uuid(old_record_id) old_author_uuid = old_record["authors"][0]["uuid"] # Add phonetic block to the record. old_record["authors"][0]["signature_block"] = "HAGp" old_record["authors"][0]["recid"] = "2" es.index(index="records-hep", doc_type="hep", id=old_record_id, body=old_record) es.indices.refresh("records-hep") record_id = str(PersistentIdentifier.get("literature", 1358492).object_uuid) record = get_es_record_by_uuid(record_id) author_uuid = record["authors"][0]["uuid"] # Add phonetic block to the record. record["authors"][0]["signature_block"] = "HAGp" record["authors"][0]["recid"] = "314159265" record["authors"][0]["curated_relation"] = True es.index(index="records-hep", doc_type="hep", id=record_id, body=record) es.indices.refresh("records-hep") with patch("celery.current_app.send_task", return_value=_BeardObject(({"2": [old_author_uuid, author_uuid]}, {}))): with patch( "inspirehep.modules.disambiguation.tasks.update_authors_recid.delay", side_effect=update_authors_recid ): disambiguation_clustering("HAGp") assert Record.get_record(old_record_id)["authors"][0]["recid"] == "314159265" assert Record.get_record(record_id)["authors"][0]["recid"] == "314159265"
def test_published_external_doi(db, deposit, deposit_file): """Test published external DOI.""" ext_doi1 = '10.1234/foo' ext_doi2 = '10.1234/bar' deposit['doi'] = ext_doi1 deposit.publish() db.session.commit() # Published record with external DOI must have: # 1) a registered recid with object recid = PersistentIdentifier.get('recid', str(deposit['recid'])) assert recid and recid.status == PIDStatus.REGISTERED \ and recid.has_object() # 2) a reserved external doi with object doi = PersistentIdentifier.get('doi', ext_doi1) assert doi and doi.status == PIDStatus.RESERVED \ and doi.has_object() # Now change external DOI. deposit = deposit.edit() deposit['doi'] = ext_doi2 deposit.publish() db.session.commit() # Ensure DOI 1 has been removed. pytest.raises( PIDDoesNotExistError, PersistentIdentifier.get, 'doi', ext_doi1) # Ensure DOI 2 has been reserved. doi = PersistentIdentifier.get('doi', ext_doi2) assert doi and doi.status == PIDStatus.RESERVED \ and doi.has_object()
def add_drafts_from_file(file_path, schema, egroup=None, user=None, limit=None): """Adds drafts from a specified file. Drafts with specified pid will be registered under those. For drafts without pid, new pids will be minted. """ with open(file_path, 'r') as fp: entries = json.load(fp) for entry in entries[0:limit]: data = construct_draft_obj(schema, entry) pid = cap_deposit_fetcher(None, data) pid_value = pid.pid_value if pid else None try: PersistentIdentifier.get('depid', pid_value) print('Draft with id {} already exist!'.format(pid_value)) except PIDDoesNotExistError: record_uuid = uuid.uuid4() pid = cap_deposit_minter(record_uuid, data) if user: user = User.query.filter_by(email=user).one() if egroup: role = Role.query.filter_by(name=egroup).one() deposit = CAPDeposit.create(data, record_uuid, user) deposit.commit() if egroup: add_read_permission_for_egroup(deposit, egroup) print('Draft {} added.'.format(pid.pid_value))
def create(cls, data, id_=None): """Create a deposit. Adds bucket creation immediately on deposit creation. """ bucket = Bucket.create( quota_size=current_app.config['ZENODO_BUCKET_QUOTA_SIZE'], max_file_size=current_app.config['ZENODO_MAX_FILE_SIZE'], ) data['_buckets'] = {'deposit': str(bucket.id)} deposit = super(ZenodoDeposit, cls).create(data, id_=id_) RecordsBuckets.create(record=deposit.model, bucket=bucket) recid = PersistentIdentifier.get( 'recid', str(data['recid'])) conceptrecid = PersistentIdentifier.get( 'recid', str(data['conceptrecid'])) depid = PersistentIdentifier.get( 'depid', str(data['_deposit']['id'])) PIDVersioning(parent=conceptrecid).insert_draft_child(child=recid) RecordDraft.link(recid, depid) return deposit
def validate_doi(self, value): """Validate if doi exists.""" if value and has_request_context(): required_doi = self.context.get('required_doi') if value == required_doi: return err = ValidationError(_('DOI already exists in Zenodo.'), field_names=['doi']) try: doi_pid = PersistentIdentifier.get('doi', value) except PIDDoesNotExistError: return # If the DOI exists, check if it's been assigned to this record # by fetching the recid and comparing both PIDs record UUID try: recid_pid = PersistentIdentifier.get( 'recid', self.context['recid']) except PIDDoesNotExistError: # There's no way to verify if this DOI belongs to this record raise err doi_uuid = doi_pid.get_assigned_object() recid_uuid = recid_pid.get_assigned_object() if doi_uuid and doi_uuid == recid_uuid: return else: # DOI exists and belongs to a different record raise err
def test_app_fixture_lacks_db_isolation_step2(pids_count, app): assert PersistentIdentifier.query.count() == pids_count + 1 # Force the cleanup. PersistentIdentifier.get( pid_type='type1', pid_value='value1', ).delete()
def delete(self, delete_published=False, *args, **kwargs): """Delete the deposit. :param delete_published: If True, even deposit of a published record will be deleted (usually used by admin operations). :type delete_published: bool """ is_published = self['_deposit'].get('pid') if is_published and not delete_published: raise PIDInvalidAction() # Delete the recid recid = PersistentIdentifier.get( pid_type='recid', pid_value=self['recid']) versioning = PIDVersioning(child=recid) if versioning.exists: if versioning.draft_child and \ self.pid == versioning.draft_child_deposit: versioning.remove_draft_child() if versioning.last_child: index_siblings(versioning.last_child, children=versioning.children.all(), include_pid=True, neighbors_eager=True, with_deposits=True) if recid.status == PIDStatus.RESERVED: db.session.delete(recid) if 'conceptrecid' in self: concept_recid = PersistentIdentifier.get( pid_type='recid', pid_value=self['conceptrecid']) if concept_recid.status == PIDStatus.RESERVED: db.session.delete(concept_recid) # Completely remove bucket bucket = self.files.bucket with db.session.begin_nested(): # Remove Record-Bucket link RecordsBuckets.query.filter_by(record_id=self.id).delete() mp_q = MultipartObject.query_by_bucket(bucket) # Remove multipart objects Part.query.filter( Part.upload_id.in_(mp_q.with_entities( MultipartObject.upload_id).subquery()) ).delete(synchronize_session='fetch') mp_q.delete(synchronize_session='fetch') bucket.locked = False bucket.remove() depid = kwargs.get('pid', self.pid) if depid: depid.delete() # NOTE: We call the parent of Deposit, invenio_records.api.Record since # we need to completely override eveything that the Deposit.delete # method does. return super(Deposit, self).delete(*args, **kwargs)
def generate_doi(prefix, experiment=None): """Generate random DOI, unique within PIDStore.""" while True: doi = random_doi(prefix, experiment) try: PersistentIdentifier.get('doi', doi) except PIDDoesNotExistError: return doi
def generate_recid(experiment): """CAP Pid generator.""" while True: pid_value = random_pid(experiment) try: PersistentIdentifier.get('recid', pid_value) except PIDDoesNotExistError: return pid_value
def test_delete_with_sqldatabase_error(app): """Test VALID record delete request (GET .../records/<record_id>).""" with app.app_context(): # create the record using the internal API pid, record = create_record(test_data) db.session.expire(record.model) pid_value = pid.pid_value pid_type = pid.pid_type record_id = record.id db.session.commit() Record.get_record(record_id) def raise_exception(): raise SQLAlchemyError() with app.test_client() as client: # start a new SQLAlchemy session so that it will rollback # everything nested_transaction = db.session().transaction orig_rollback = nested_transaction.rollback flags = {'rollbacked': False} def custom_rollback(*args, **kwargs): flags['rollbacked'] = True orig_rollback(*args, **kwargs) nested_transaction.rollback = custom_rollback with patch.object(PersistentIdentifier, 'delete', side_effect=raise_exception): headers = [('Accept', 'application/json')] res = client.delete(url_for('invenio_records_rest.recid_item', pid_value=pid_value), headers=headers) assert res.status_code == 500 # check that the transaction is finished assert db.session().transaction is not nested_transaction # check that the session has rollbacked assert flags['rollbacked'] with app.app_context(): with app.test_client() as client: # check that the record and PID have not been deleted Record.get_record(record_id) assert not PersistentIdentifier.get(pid_type, pid_value).is_deleted() # try to delete without exception, the transaction should have been # rollbacked headers = [('Accept', 'application/json')] res = client.delete(url_for('invenio_records_rest.recid_item', pid_value=pid_value), headers=headers) assert res.status_code == 204 # check database state with pytest.raises(NoResultFound): Record.get_record(record_id) assert PersistentIdentifier.get(pid_type, pid_value).is_deleted()
def missing_pids(self): """Filter persistent identifiers.""" missing = [] for p in self.pids: try: PersistentIdentifier.get(p.pid_type, p.pid_value) except PIDDoesNotExistError: missing.append(p) return missing
def test_solve_claim_conflicts(small_app): """Check the module for the case where at least two claimed signatures are assigned to the same cluster. """ from inspirehep.modules.disambiguation.tasks import disambiguation_clustering, update_authors_recid # Claimed signature #1. glashow_record_id_claimed = str(PersistentIdentifier.get("literature", 4328).object_uuid) glashow_record_claimed = get_es_record_by_uuid(glashow_record_id_claimed) glashow_record_uuid_claimed = glashow_record_claimed["authors"][0]["uuid"] # Add phonetic block to the record. glashow_record_claimed["authors"][0]["signature_block"] = "HAGp" glashow_record_claimed["authors"][0]["curated_relation"] = True glashow_record_claimed["authors"][0]["recid"] = "3" es.index(index="records-hep", doc_type="hep", id=glashow_record_id_claimed, body=glashow_record_claimed) es.indices.refresh("records-hep") # Claimed signature #2. higgs_record_id_claimed = str(PersistentIdentifier.get("literature", 1358492).object_uuid) higgs_record_claimed = get_es_record_by_uuid(higgs_record_id_claimed) higgs_record_uuid_claimed = higgs_record_claimed["authors"][0]["uuid"] # Add phonetic block to the record. higgs_record_claimed["authors"][0]["signature_block"] = "HAGp" higgs_record_claimed["authors"][0]["curated_relation"] = True higgs_record_claimed["authors"][0]["recid"] = "4" es.index(index="records-hep", doc_type="hep", id=higgs_record_id_claimed, body=higgs_record_claimed) es.indices.refresh("records-hep") # Not claimed signature. higgs_record_id_not_claimed = str(PersistentIdentifier.get("literature", 11883).object_uuid) higgs_record_not_claimed = get_es_record_by_uuid(higgs_record_id_not_claimed) higgs_record_uuid_not_claimed = higgs_record_not_claimed["authors"][0]["uuid"] # Add phonetic block to the record. higgs_record_not_claimed["authors"][0]["signature_block"] = "HAGp" es.index(index="records-hep", doc_type="hep", id=higgs_record_id_not_claimed, body=higgs_record_not_claimed) es.indices.refresh("records-hep") with patch( "celery.current_app.send_task", return_value=_BeardObject( ({"3": [glashow_record_uuid_claimed, higgs_record_uuid_claimed, higgs_record_uuid_not_claimed]}, {}) ), ): with patch( "inspirehep.modules.disambiguation.logic._solve_claims_conflict", return_value=_ConflictObject({higgs_record_uuid_claimed: [higgs_record_uuid_not_claimed]}), ): with patch( "inspirehep.modules.disambiguation.tasks.update_authors_recid.delay", side_effect=update_authors_recid ): disambiguation_clustering("HAGp") assert Record.get_record(higgs_record_id_not_claimed)["authors"][0]["recid"] == "4"
def _sync_communities(self, dep_comms, rec_comms, record): new_dep_comms, new_rec_comms, new_ir_comms = \ self._get_new_communities(dep_comms, rec_comms, record) # Update Communities and OAISet information for all record versions conceptrecid = PersistentIdentifier.get('recid', record['conceptrecid']) pv = PIDVersioning(parent=conceptrecid) for pid in pv.children: rec = ZenodoRecord.get_record(pid.get_assigned_object()) if rec.id != record.id: rec['communities'] = sorted(new_rec_comms) if current_app.config['COMMUNITIES_OAI_ENABLED']: rec = self._sync_oaisets_with_communities(rec) if not rec['communities']: del rec['communities'] rec.commit() depid = PersistentIdentifier.get( 'depid', rec['_deposit']['id']) deposit = ZenodoDeposit.get_record(depid.get_assigned_object()) deposit['communities'] = sorted(new_dep_comms) if not deposit['communities']: del deposit['communities'] deposit.commit() # Update new version deposit if pv.draft_child_deposit: draft_dep = ZenodoDeposit.get_record( pv.draft_child_deposit.get_assigned_object()) if draft_dep.id != self.id: draft_dep['communities'] = sorted(new_dep_comms) if not draft_dep['communities']: del draft_dep['communities'] draft_dep.commit() record['communities'] = sorted(new_rec_comms) if current_app.config['COMMUNITIES_OAI_ENABLED']: record = self._sync_oaisets_with_communities(record) if not record['communities']: del record['communities'] self['communities'] = sorted(new_dep_comms) if not self['communities']: del self['communities'] # Create Inclusion requests against this record self._create_inclusion_requests(new_ir_comms, record) # Remove obsolete InclusionRequests again the record and its versions self._remove_obsolete_irs(new_ir_comms, record) return record
def validate_doi(self, value): """Validate if doi exists.""" if value and has_request_context(): required_doi = self.context.get('required_doi') if value == required_doi: return try: PersistentIdentifier.get('doi', value) raise ValidationError( _('DOI already exists in Zenodo.'), field_names=['doi']) except PIDDoesNotExistError: pass
def _delete_merged_records(pid_type, merged_pid_value, deleted_pid_value, merged_uuid, deleted_uuid): InspireRecord.get_record(merged_uuid)._delete(force=True) InspireRecord.get_record(deleted_uuid)._delete(force=True) merged_pid = PersistentIdentifier.get(pid_type, merged_pid_value) deleted_pid = PersistentIdentifier.get(pid_type, deleted_pid_value) Redirect.query.filter(Redirect.id == deleted_pid.object_uuid).delete() db.session.delete(merged_pid) db.session.delete(deleted_pid) db.session.commit()
def proc(ai): try: PersistentIdentifier.get('recid', ai.control_number) except PIDDoesNotExistError: api_response = requests_retry_session().get(crossref_url % ai.doi) if api_response.status_code != 200: error('Failed to query crossref for doi: %s. Error code: %s' % (ai.doi, api_response.status_code)) result['not200'].append(ai.control_number) return None title = api_response.json()['message']['title'][0].lower() if 'addendum' in title or 'corrigendum' in title or 'erratum' in title: result['hit'].append((ai.control_number, title))
def test_pid_unassign(app): """Test pid object unassignment.""" runner = CliRunner() script_info = ScriptInfo(create_app=lambda info: app) with runner.isolated_filesystem(): rec_uuid = uuid.uuid4() # Assigned object result = runner.invoke(cmd, [ 'create', 'recid', '101', '-t', 'rec', '-i', str(rec_uuid) ], obj=script_info) assert 0 == result.exit_code result = runner.invoke(cmd, [ 'get', 'recid', '101', ], obj=script_info) assert 0 == result.exit_code assert 'rec {0} N\n'.format(str(rec_uuid)) == result.output result = runner.invoke(cmd, [ 'dereference', 'rec', str(rec_uuid), ], obj=script_info) assert 0 == result.exit_code assert 'recid 101 None\n' == result.output result = runner.invoke(cmd, [ 'dereference', 'rec', str(rec_uuid), '-s', 'NEW', ], obj=script_info) assert 0 == result.exit_code assert 'recid 101 None\n' == result.output with app.app_context(): pid = PersistentIdentifier.get('recid', '101') assert pid.has_object() assert pid.get_assigned_object() == rec_uuid assert pid.get_assigned_object('rec') == rec_uuid # Unassign the object result = runner.invoke(cmd, [ 'unassign', 'recid', '101', ], obj=script_info) assert 0 == result.exit_code with app.app_context(): pid = PersistentIdentifier.get('recid', '101') assert not pid.has_object() assert pid.get_assigned_object() is None assert pid.get_assigned_object('rec') is None
def sync_pidstore(): """Populate PID store with all DOIs registered in DataCite.""" cli = DataCiteMDSClientWrapper() dois = cli.doi_get_all().split('\n') for doi in dois: try: PersistentIdentifier.get('doi', doi) except PIDDoesNotExistError: DataCiteProviderWrapper.create(pid_value=doi) click.echo('Record with doi {} added to PID store'. format(doi)) db.session.commit() click.echo('PID Store updated')
def check_pids_migration(): """Check that the persistent identifiers have been migrated.""" expected_pids = _load_json('expected_pids.json') # Check unchanging properties for exp_pid in expected_pids: db_pid = PersistentIdentifier.get(exp_pid['pid_type'], exp_pid['pid_value']) for key, value in exp_pid.items(): if key != 'updated': assert str(getattr(db_pid, key)) == str(value) # check that deleted PID's records are (soft or hard) deleted if exp_pid['status'] == PIDStatus.DELETED.value: metadata = None try: record = Record.get_record(exp_pid['pid_value'], with_deleted=True) # Soft deleted record metadata = record.model.json except NoResultFound: # Hard deleted record pass assert metadata is None # Check versioning relations and PIDs if exp_pid['pid_type'] == 'b2dep': try: rec_pid = PersistentIdentifier.get('b2rec', exp_pid['pid_value']) # if the deposit is deleted, either the record PID was reserved # and has been deleted, or it still exists. if db_pid.status == PIDStatus.DELETED: assert rec_pid.status != PIDStatus.RESERVED except PIDDoesNotExistError: # The record PID was only reserved and has been deleted # with the deposit PID. assert db_pid.status == PIDStatus.DELETED continue # Check that a parent pid has been created versioning = PIDVersioning(child=rec_pid) parent = versioning.parent assert rec_pid.status in [PIDStatus.RESERVED, PIDStatus.REGISTERED] if rec_pid.status == PIDStatus.RESERVED: assert parent.status == PIDStatus.RESERVED else: assert parent.status == PIDStatus.REDIRECTED assert parent.get_redirect() == rec_pid
def ill_register(record_id=None): """Interface to register an inter library loan for the administrator. Without a record_id, an empty form will be presented. """ if record_id: _uuid = PersistentIdentifier.get("recid", record_id).object_uuid rec = Record.get_record(_uuid) else: rec = {} _prepare_record(rec, rec_fields) _prepare_record_authors(rec) start_date = datetime.date.today().isoformat() end_date = datetime.date.today() + datetime.timedelta(weeks=4) return render_template( "circulation_ill_register.html", action="register", record_id=record_id, start_date=start_date, end_date=end_date, **rec )
def datacite_delete(recid): """Delete DOI in DataCite. If it fails, it will retry every 10 minutes for 1 hour. """ record = get_record(recid) if record is None: logger.debug("Record %s not found" % recid) return doi_val = record.get(cfg['PIDSTORE_DATACITE_RECORD_DOI_FIELD'], None) logger.debug("Found DOI %s in record %s" % (doi_val, recid)) pid = PersistentIdentifier.get("doi", doi_val) if not pid: logger.debug("DOI not locally managed.") return else: logger.debug("DOI locally managed.") if not pid.has_object("rec", recid): raise Exception( "DOI %s is not assigned to record %s." % (doi_val, recid)) if pid.is_registered(): logger.info("Inactivating DOI %s for record %s" % (doi_val, recid)) if not pid.delete(): m = "Failed to inactive DOI %s" % doi_val logger.error(m) if not datacite_delete.request.is_eager: raise datacite_delete.retry(exc=Exception(m)) else: logger.info("Successfully inactivated DOI %s." % doi_val)
def delete(self, *args, **kwargs): """Delete the deposit.""" recid = PersistentIdentifier.get(pid_type='recid', pid_value=self['recid']) if recid.status == PIDStatus.RESERVED: db.session.delete(recid) return super(ZenodoDeposit, self).delete(*args, **kwargs)
def ill_request(record_id=None): """Interface to request an inter library loan for the user. Without a record_id, an empty form will be presented. """ try: get_user(current_user) except AttributeError: # Anonymous User return render_template("invenio_theme/401.html") if record_id: _uuid = PersistentIdentifier.get("recid", record_id).object_uuid rec = Record.get_record(_uuid) else: rec = {} _prepare_record(rec, rec_fields) _prepare_record_authors(rec) start_date = datetime.date.today().isoformat() end_date = datetime.date.today() + datetime.timedelta(weeks=4) return render_template( "circulation_ill_request.html", action="request", record_id=record_id, start_date=start_date, end_date=end_date, **rec )
def indexer_receiver(sender, json=None, record=None, index=None, **dummy_kwargs): """Connect to before_record_index signal to transform record for ES. In order to avoid that a record and published deposit differs (e.g. if an embargo task updates the record), every time we index a record we also index the deposit and overwrite the content with that of the record. :param sender: Sender of the signal. :param json: JSON to be passed for the elastic search. :type json: `invenio_records.api.Deposit` :param record: Indexed deposit record. :type record: `invenio_records.api.Deposit` :param index: Elasticsearch index name. :type index: str """ if not index.startswith('deposits-records-'): return if not isinstance(record, ZenodoDeposit): record = ZenodoDeposit(record, model=record.model) if record['_deposit']['status'] == 'published': schema = json['$schema'] pub_record = record.fetch_published()[1] # Temporarily set to draft mode to ensure that `clear` can be called json['_deposit']['status'] = 'draft' json.clear() json.update(copy.deepcopy(pub_record.replace_refs())) # Set back to published mode and restore schema. json['_deposit']['status'] = 'published' json['$schema'] = schema json['_updated'] = pub_record.updated else: json['_updated'] = record.updated json['_created'] = record.created # Compute filecount and total file size files = json.get('_files', []) json['filecount'] = len(files) json['size'] = sum([f.get('size', 0) for f in files]) recid = record.get('recid') if recid: pid = PersistentIdentifier.get('recid', recid) pv = PIDVersioning(child=pid) relations = serialize_relations(pid) if pv.exists: if pv.draft_child_deposit: is_last = (pv.draft_child_deposit.pid_value == record['_deposit']['id']) relations['version'][0]['is_last'] = is_last relations['version'][0]['count'] += 1 else: relations = {'version': [{'is_last': True, 'index': 0}, ]} if relations: json['relations'] = relations
def _publish_new(self, id_=None): """Publish new deposit with communities handling.""" dep_comms = set(self.pop('communities', [])) record = super(ZenodoDeposit, self)._publish_new(id_=id_) conceptrecid = PersistentIdentifier.get('recid', record['conceptrecid']) pv = PIDVersioning(parent=conceptrecid) if pv.children.count() > 1: files_set = set(f.get_version().file.checksum for f in self.files) for prev_recid in pv.children.all()[:-1]: rec = ZenodoRecord.get_record(prev_recid.object_uuid) prev_files_set = set(f.get_version().file.checksum for f in rec.files) if files_set == prev_files_set: raise VersioningFilesError() prev_recid = pv.children.all()[-2] rec_comms = set(ZenodoRecord.get_record( prev_recid.get_assigned_object()).get('communities', [])) else: rec_comms = set() record = self._sync_communities(dep_comms, rec_comms, record) record.commit() # Update the concept recid redirection pv.update_redirect() RecordDraft.unlink(record.pid, self.pid) index_siblings(record.pid, neighbors_eager=True, with_deposits=True) return record
def test_double_minting_depid_recid(db): """Test using same integer for dep/rec ids.""" dep_uuid = uuid4() data = dict() pid = zenodo_deposit_minter(dep_uuid, data) # Assert values added to data. Depid and recid have IDs starting from # '2' since the conceptrecid is minted first assert data['_deposit']['id'] == '2' assert data['conceptrecid'] == '1' assert data['recid'] == 2 assert 'doi' not in data # Assert pid values assert pid.pid_type == 'depid' assert pid.pid_value == '2' assert pid.status == PIDStatus.REGISTERED assert pid.object_uuid == dep_uuid # Assert reservation of recid. assert PersistentIdentifier.get('recid', pid.pid_value).status \ == PIDStatus.RESERVED db.session.commit() # Assert registration of recid. rec_uuid = uuid4() pid = zenodo_record_minter(rec_uuid, data) assert pid.pid_type == 'recid' assert pid.pid_value == '2' assert pid.status == PIDStatus.REGISTERED assert pid.object_uuid == rec_uuid assert data['doi'] == '10.5072/zenodo.2' assert data['_oai']['id'] == 'oai:zenodo.org:2'
def add_oai_information(obj, eng): """Adds OAI information like identifier""" recid = obj.data['control_number'] pid = PersistentIdentifier.get('recid', recid) existing_record = Record.get_record(pid.object_uuid) if '_oai' not in existing_record: try: oaiid_minter(pid.object_uuid, existing_record) except PIDAlreadyExists: existing_record['_oai'] = { 'id': 'oai:beta.scoap3.org:%s' % recid, 'sets': _get_oai_sets(existing_record) } if 'id' not in existing_record['_oai']: current_app.logger.info('adding new oai id') oaiid_minter(pid.object_uuid, existing_record) if 'sets' not in existing_record['_oai'] or not existing_record['_oai']['sets']: existing_record['_oai']['sets'] = _get_oai_sets(existing_record) existing_record['_oai']['updated'] = datetime.utcnow().strftime('%Y-%m-%dT%H:%M:%SZ') existing_record.commit() obj.save() db.session.commit()
def test_create_distance_signature_method(small_app): """Test the method responsible for creating data in Beard format.""" pid = PersistentIdentifier.get("literature", 4328) publication_id = str(pid.object_uuid) signatures_map = { 'aabe5373-39bf-4d27-bb91-2aec593940a9': { 'author_affiliation': 'Copenhagen U.', 'author_claimed': False, 'author_name': 'Glashow, S.L.', 'author_recid': False, 'publication_id': publication_id, 'signature_id': 'aabe5373-39bf-4d27-bb91-2aec593940a9' } } uuid = 'aabe5373-39bf-4d27-bb91-2aec593940a9' distance_signature = { 'author_affiliation': 'Copenhagen U.', 'author_claimed': False, 'author_name': 'Glashow, S.L.', 'author_recid': False, 'publication_id': publication_id, 'signature_id': 'aabe5373-39bf-4d27-bb91-2aec593940a9', 'publication': { 'publication_id': publication_id, 'year': '1961', 'authors': ['Glashow, S.L.'] } } assert _create_distance_signature(signatures_map, uuid) == \ distance_signature
def test_delete_draft(api, api_client, db, es, location, json_auth_headers, auth_headers, deposit_url, get_json, license_record): """Test deleting of Deposit draft using REST API.""" # Setting var this way doesn't work headers = json_auth_headers client = api_client links, data = create_deposit( client, headers, auth_headers, deposit_url, get_json, {}) # Two 'recid' PIDs - Concept PID and Version PID assert PersistentIdentifier.query.filter_by(pid_type='recid').count() == 2 recid = PersistentIdentifier.get('recid', str(data['record_id'])) depid = PersistentIdentifier.query.filter_by(pid_type='depid').one() assert recid.status == PIDStatus.RESERVED assert depid.status == PIDStatus.REGISTERED # Get deposition current_search.flush_and_refresh(index='deposits') response = client.get(links['self'], headers=auth_headers) assert response.status_code == 200 # Delete deposition current_search.flush_and_refresh(index='deposits') response = client.delete(links['self'], headers=auth_headers) assert response.status_code == 204 # 'recid' PID shuld be removed, while 'depid' should have status deleted. # No 'doi' PIDs should be created without publishing assert PersistentIdentifier.query.filter_by(pid_type='recid').count() == 0 depid = PersistentIdentifier.query.filter_by(pid_type='depid').one() assert PersistentIdentifier.query.filter_by(pid_type='doi').count() == 0 assert depid.status == PIDStatus.DELETED
def attach_files(obj, eng): if 'files' in obj.extra_data: recid = obj.data['control_number'] pid = PersistentIdentifier.get('recid', recid) existing_record = Record.get_record(pid.object_uuid) if '_files' not in existing_record or not existing_record['_files']: bucket = Bucket.create() RecordsBuckets.create(record=existing_record.model, bucket=bucket) for file_ in obj.extra_data['files']: if file_['url'].startswith('http'): data = requests_retry_session().get(file_['url'], headers=file_.get('headers', {})) f = StringIO(data.content) else: f = open(file_['url']) existing_record.files[file_['name']] = f existing_record.files[file_['name']]['filetype'] = file_['filetype'] obj.save() existing_record.commit() db.session.commit() else: __halt_and_notify('No files found.', eng)
def delete_record(record_uuid, reason, user): """Delete the record and it's PIDs. :param record_uuid: UUID of the record to be removed. :param reason: Reason for removal. Either one of: 'spam', 'uploader', 'takedown' (see 'ZENODO_REMOVAL_REASONS' variable in config), otherwise using it as a verbatim "Reason" string. :param user: ID or email of the Zenodo user (admin) responsible for the removal. """ from invenio_github.models import ReleaseStatus if isinstance(user, text_type): user_id = User.query.filter_by(email=user).one().id elif isinstance(user, int): user_id = User.query.get(user).id else: raise TypeError("User cannot be determined from argument: {0}".format( user)) record = ZenodoRecord.get_record(record_uuid) # Remove the record from versioning and delete the recid recid = PersistentIdentifier.get('recid', record['recid']) pv = PIDVersioning(child=recid) pv.remove_child(recid) pv.update_redirect() recid.delete() # Remove the record from index try: RecordIndexer().delete(record) except NotFoundError: pass # Remove buckets record_bucket = record.files.bucket RecordsBuckets.query.filter_by(record_id=record.id).delete() record_bucket.locked = False record_bucket.remove() removal_reasons = dict(current_app.config['ZENODO_REMOVAL_REASONS']) if reason in removal_reasons: reason = removal_reasons[reason] depid, deposit = deposit_resolver.resolve(record['_deposit']['id']) try: doi = PersistentIdentifier.get('doi', record['doi']) except PIDDoesNotExistError: doi = None # Record OpenAIRE info try: original_id = openaire_original_id(record, openaire_type(record))[1] datasource_id = openaire_datasource_id(record) except PIDDoesNotExistError: original_id = None datasource_id = None if pv.children.count() == 0: conceptrecid = PersistentIdentifier.get('recid', record['conceptrecid']) conceptrecid.delete() new_last_child = None else: new_last_child = (pv.last_child.pid_value, str(pv.last_child.object_uuid)) if 'conceptdoi' in record: conceptdoi_value = record['conceptdoi'] else: conceptdoi_value = None # Completely delete the deposit # Deposit will be removed from index deposit.delete(delete_published=True) # Clear the record and put the deletion information record.clear() record.update({ 'removal_reason': reason, 'removed_by': user_id, }) record.commit() # Mark the relevant GitHub Release as deleted for ghr in record.model.github_releases: ghr.status = ReleaseStatus.DELETED if not is_local_doi(doi.pid_value): db.session.delete(doi) db.session.commit() # After successful DB commit, sync the DOIs with DataCite if is_local_doi(doi.pid_value): datacite_inactivate.delay(doi.pid_value) if conceptdoi_value: if new_last_child: # Update last child (updates also conceptdoi) pid_value, rec_uuid = new_last_child datacite_register.delay(pid_value, rec_uuid) else: datacite_inactivate.delay(conceptdoi_value) # Also delete from OpenAIRE index if current_app.config['OPENAIRE_DIRECT_INDEXING_ENABLED'] and original_id \ and datasource_id: openaire_delete.delay(original_id=original_id, datasource_id=datasource_id)
def pid(self): """Return an instance of record PID.""" pid = b2share_record_uuid_fetcher(self.id, self) return PersistentIdentifier.get(pid.pid_type, pid.pid_value)
def make_combined_pdf(pid, obj_file_uri, fileobj, obj, lang_user): """Make the cover-page-combined PDF file. :param pid: PID object :param file_uri: URI of the file object :param lang_user: LANGUAGE of access user :return: cover-page-combined PDF file object """ lang_filepath = current_app.config['PDF_COVERPAGE_LANG_FILEPATH']\ + lang_user + current_app.config['PDF_COVERPAGE_LANG_FILENAME'] pidObject = PersistentIdentifier.get('recid', pid.pid_value) item_metadata_json = ItemsMetadata.get_record(pidObject.object_uuid) item_type = ItemsMetadata.get_by_object_id(pidObject.object_uuid) item_type_id = item_type.item_type_id type_mapping = Mapping.get_record(item_type_id) item_map = get_mapping(type_mapping, "jpcoar_mapping") with open(lang_filepath) as json_datafile: lang_data = json.loads(json_datafile.read()) # Initialize Instance pdf = FPDF('P', 'mm', 'A4') pdf.add_page() pdf.set_margins(20.0, 20.0) pdf.set_fill_color(100, 149, 237) pdf.add_font( 'IPAexg', '', current_app.config["JPAEXG_TTF_FILEPATH"], uni=True) pdf.add_font( 'IPAexm', '', current_app.config["JPAEXM_TTF_FILEPATH"], uni=True) # Parameters such as width and height of rows/columns w1 = 40 # width of the left column w2 = 130 # width of the right column footer_w = 90 # width of the footer cell # url_oapolicy_h = 7 # height of the URL & OA-policy # height of the URL & OA-policy url_oapolicy_h = current_app.config['URL_OA_POLICY_HEIGHT'] # title_h = 8 # height of the title title_h = current_app.config['TITLE_HEIGHT'] # height of the title # header_h = 20 # height of the header cell header_h = current_app.config['HEADER_HEIGHT'] # height of the header cell # footer_h = 4 # height of the footer cell footer_h = current_app.config['FOOTER_HEIGHT'] # height of the footer cell # meta_h = 9 # height of the metadata cell # height of the metadata cell meta_h = current_app.config['METADATA_HEIGHT'] max_letters_num = 51 # number of maximum letters that can be contained \ # in the right column cc_logo_xposition = 160 # x-position of Creative Commons logos # Get the header settings record = PDFCoverPageSettings.find(1) header_display_type = record.header_display_type header_output_string = record.header_output_string header_output_image = record.header_output_image header_display_position = record.header_display_position # Set the header position positions = {} if header_display_position == 'left': positions['str_position'] = 'L' positions['img_position'] = 20 elif header_display_position == 'center' or header_display_position is None: positions['str_position'] = 'C' positions['img_position'] = 85 elif header_display_position == 'right': positions['str_position'] = 'R' positions['img_position'] = 150 # Show header(string or image) if header_display_type == 'string': pdf.set_font('IPAexm', '', 22) pdf.multi_cell( w1 + w2, header_h, header_output_string, 0, positions['str_position'], False) else: pdf.image( header_output_image, x=positions['img_position'], y=None, w=0, h=30, type='') pdf.set_y(55) # Title settings title = item_metadata_json['title'] pdf.set_font('IPAexm', '', 20) pdf.multi_cell(w1 + w2, title_h, title, 0, 'L', False) pdf.ln(h='15') # Metadata fg = WekoFeedGenerator() fe = fg.add_entry() _file = 'file.URI.@value' _file_item_id = None if _file in item_map: _file_item_id = item_map[_file].split('.')[0] _file_item_id = _file_item_id.replace('fileinfo', 'files') _creator = 'creator.creatorName.@value' _creator_item_id = None if _creator in item_map: _creator_item_id = item_map[_creator].split('.')[0] publisher_attr_lang = '[email protected]:lang' publisher_value = 'publisher.@value' publisher_item_id = None publisher_lang_id = None publisher_text_id = None keyword_attr_lang = '[email protected]:lang' keyword_attr_value = 'subject.@value' keyword_base = None keyword_lang = None pdf.set_font('Arial', '', 14) pdf.set_font('IPAexg', '', 14) if item_metadata_json['lang'] == 'en': item_metadata_json['lang'] = 'English' elif item_metadata_json['lang'] == 'ja': item_metadata_json['lang'] = 'Japanese' try: lang_field = item_map['language.@value'].split('.') if item_metadata_json[lang_field[0]][lang_field[1]] == 'eng': item_metadata_json['lang'] = 'English' elif item_metadata_json[lang_field[0]][lang_field[1]] == 'jpn': item_metadata_json['lang'] = 'Japanese' except BaseException: pass try: lang = item_metadata_json.get('lang') except (KeyError, IndexError): lang = None try: publisher_item_id = item_map[publisher_attr_lang].split('.')[0] publisher_lang_ids = item_map[publisher_attr_lang].split('.')[1:] publisher_text_ids = item_map[publisher_value].split('.')[1:] publisher = None default_publisher = None publishers = item_metadata_json[publisher_item_id] pair_name_language_publisher = get_pair_value(publisher_text_ids, publisher_lang_ids, publishers) for publisher_name, publisher_lang in pair_name_language_publisher: if publisher_lang == lang_user: publisher = publisher_name if publisher_lang == 'en': default_publisher = publisher_name if publisher is None: publisher = default_publisher except (KeyError, IndexError): publisher = None try: pubdate = item_metadata_json.get('pubdate') except (KeyError, IndexError): pubdate = None try: keyword_item_id = item_map[keyword_attr_lang].split('.')[0] keyword_item_langs = item_map[keyword_attr_lang].split('.')[1:] keyword_item_values = item_map[keyword_attr_value].split('.')[1:] keyword_base = item_metadata_json.get(keyword_item_id) keywords_ja = None keywords_en = None pair_name_language_keyword = get_pair_value(keyword_item_values, keyword_item_langs, keyword_base) for name, lang in pair_name_language_keyword: keyword_lang = lang if keyword_lang == 'ja': keywords_ja = name elif keyword_lang == 'en': keywords_en = name except (KeyError, IndexError): pass creator_items = item_metadata_json.get(_creator_item_id) if type(creator_items) is dict: creator_items = [creator_items] creator_mail_list = [] creator_name_list = [] creator_affiliation_list = [] for creator_item in creator_items: # Get creator mail if creator_item.get('creatorMails'): for creator_mail in creator_item.get('creatorMails'): if creator_mail.get('creatorMail'): creator_mail_list.append(creator_mail.get('creatorMail')) # Get creator name default_creator_name_list = [] if creator_item.get('creatorNames'): for creator_name in creator_item.get('creatorNames'): if creator_name.get('creatorNameLang') == lang_user: creator_name_list.append(creator_name.get('creatorName')) if creator_name.get('creatorNameLang') == 'en': default_creator_name_list.append(creator_name.get( 'creatorName')) if not creator_name_list and default_creator_name_list: creator_name_list = default_creator_name_list # Get creator affiliation default_creator_affiliation_list = [] if creator_item.get('affiliation'): for creator_affiliation in creator_item.get('affiliation'): if creator_affiliation.get('affiliationNameLang') == lang_user: creator_affiliation_list.append(creator_affiliation.get( 'affiliationName')) if creator_affiliation.get('affiliationNameLang') == 'en': default_creator_affiliation_list.\ append(creator_affiliation.get('affiliationName')) if not creator_affiliation_list and default_creator_affiliation_list: creator_affiliation_list = default_creator_affiliation_list seperator = ', ' metadata_dict = { "lang": lang, "publisher": publisher, "pubdate": pubdate, "keywords_ja": keywords_ja, "keywords_en": keywords_en, "creator_mail": seperator.join(creator_mail_list), "creator_name": seperator.join(creator_name_list), "affiliation": seperator.join(creator_affiliation_list) } # Change the values from None to '' for printing for key in metadata_dict: if metadata_dict[key] is None: metadata_dict[key] = '' metadata_list = [ "{}: {}".format(lang_data["Metadata"]["LANG"], metadata_dict["lang"]), "{}: {}".format( lang_data["Metadata"]["PUBLISHER"], metadata_dict["publisher"]), "{}: {}".format( lang_data["Metadata"]["PUBLICDATE"], metadata_dict["pubdate"]), "{} (Ja): {}".format( lang_data["Metadata"]["KEY"], metadata_dict["keywords_ja"]), "{} (En): {}".format( lang_data["Metadata"]["KEY"], metadata_dict["keywords_en"]), "{}: {}".format( lang_data["Metadata"]["AUTHOR"], metadata_dict["creator_name"]), "{}: {}".format( lang_data["Metadata"]["EMAIL"], metadata_dict["creator_mail"]), "{}: {}".format( lang_data["Metadata"]["AFFILIATED"], metadata_dict["affiliation"]) ] metadata = '\n'.join(metadata_list) metadata_lfnum = int(metadata.count('\n')) for item in metadata_list: metadata_lfnum += int(get_east_asian_width_count(item) ) // max_letters_num url = '' # will be modified later url_lfnum = int(get_east_asian_width_count(url)) // max_letters_num oa_policy = '' # will be modified later oa_policy_lfnum = int( get_east_asian_width_count(oa_policy)) // max_letters_num # Save top coordinate top = pdf.y # Calculate x position of next cell offset = pdf.x + w1 pdf.multi_cell(w1, meta_h, lang_data["Title"]["METADATA"] + '\n' * (metadata_lfnum + 1), 1, 'C', True) # Reset y coordinate pdf.y = top # Move to computed offset pdf.x = offset pdf.multi_cell(w2, meta_h, metadata, 1, 'L', False) top = pdf.y pdf.multi_cell(w1, url_oapolicy_h, lang_data["Title"]["URL"] + '\n' * (url_lfnum + 1), 1, 'C', True) pdf.y = top pdf.x = offset pdf.multi_cell(w2, url_oapolicy_h, url, 1, 'L', False) top = pdf.y pdf.multi_cell(w1, url_oapolicy_h, lang_data["Title"]["OAPOLICY"] + '\n' * (oa_policy_lfnum + 1), 1, 'C', True) pdf.y = top pdf.x = offset pdf.multi_cell(w2, url_oapolicy_h, oa_policy, 1, 'L', False) pdf.ln(h=1) # Footer pdf.set_font('Courier', '', 10) pdf.set_x(108) try: license = item_metadata_json[_file_item_id][0].get('licensetype') except (KeyError, IndexError, TypeError): license = None list_license_dict = current_app.config['WEKO_RECORDS_UI_LICENSE_DICT'] for item in list_license_dict: if item['value'] == license: get_license_pdf(license, item_metadata_json, pdf, _file_item_id, footer_w, footer_h, cc_logo_xposition, item) break else: pdf.multi_cell(footer_w, footer_h, '', 0, 'L', False) """ Convert PDF cover page data as bytecode """ output = pdf.output(dest='S').encode('latin-1') b_output = io.BytesIO(output) # Combine cover page and existing pages cover_page = PdfFileReader(b_output) f = open(obj_file_uri, "rb") existing_pages = PdfFileReader(f) # In the case the PDF file is encrypted by the password, ''(i.e. not # encrypted intentionally) if existing_pages.isEncrypted: try: existing_pages.decrypt('') except BaseException: # Errors such as NotImplementedError return ObjectResource.send_object( obj.bucket, obj, expected_chksum=fileobj.get('checksum'), logger_data={ 'bucket_id': obj.bucket_id, 'pid_type': pid.pid_type, 'pid_value': pid.pid_value, }, as_attachment=False ) # In the case the PDF file is encrypted by the password except '' if existing_pages.isEncrypted: return ObjectResource.send_object( obj.bucket, obj, expected_chksum=fileobj.get('checksum'), logger_data={ 'bucket_id': obj.bucket_id, 'pid_type': pid.pid_type, 'pid_value': pid.pid_value, }, as_attachment=False ) combined_pages = PdfFileWriter() combined_pages.addPage(cover_page.getPage(0)) for page_num in range(existing_pages.numPages): existing_page = existing_pages.getPage(page_num) combined_pages.addPage(existing_page) # Download the newly generated combined PDF file try: combined_filename = 'CV_' + datetime.now().strftime('%Y%m%d') + '_' + \ item_metadata_json[_file_item_id][0].get("filename") except (KeyError, IndexError): combined_filename = 'CV_' + title + '.pdf' combined_filepath = "/code/invenio/{}.pdf".format(combined_filename) combined_file = open(combined_filepath, "wb") combined_pages.write(combined_file) combined_file.close() return send_file( combined_filepath, as_attachment=True, attachment_filename=combined_filename, mimetype='application/pdf', cache_timeout=-1)
def delete(self, delete_published=False, *args, **kwargs): """Delete the deposit. :param delete_published: If True, even deposit of a published record will be deleted (usually used by admin operations). :type delete_published: bool """ is_published = self['_deposit'].get('pid') if is_published and not delete_published: raise PIDInvalidAction() # Delete the recid recid = PersistentIdentifier.get( pid_type='recid', pid_value=self['recid']) versioning = PIDVersioning(child=recid) if versioning.exists: if versioning.draft_child and \ self.pid == versioning.draft_child_deposit: versioning.remove_draft_child() if versioning.last_child: index_siblings(versioning.last_child, children=versioning.children.all(), include_pid=True, neighbors_eager=True, with_deposits=True) if recid.status == PIDStatus.RESERVED: db.session.delete(recid) if 'conceptrecid' in self: concept_recid = PersistentIdentifier.get( pid_type='recid', pid_value=self['conceptrecid']) if concept_recid.status == PIDStatus.RESERVED: db.session.delete(concept_recid) # Completely remove bucket bucket = self.files.bucket extra_formats_bucket = None if 'extra_formats' in self['_buckets']: extra_formats_bucket = self.extra_formats.bucket with db.session.begin_nested(): # Remove Record-Bucket link RecordsBuckets.query.filter_by(record_id=self.id).delete() mp_q = MultipartObject.query_by_bucket(bucket) # Remove multipart objects Part.query.filter( Part.upload_id.in_(mp_q.with_entities( MultipartObject.upload_id).subquery()) ).delete(synchronize_session='fetch') mp_q.delete(synchronize_session='fetch') if extra_formats_bucket: extra_formats_bucket.remove() bucket.locked = False bucket.remove() depid = kwargs.get('pid', self.pid) if depid: depid.delete() # NOTE: We call the parent of Deposit, invenio_records.api.Record since # we need to completely override eveything that the Deposit.delete # method does. return super(Deposit, self).delete(*args, **kwargs)
def pid_from_value(pid_value, pid_type='recid'): """Determine if DOI is managed locally.""" try: return PersistentIdentifier.get(pid_type=pid_type, pid_value=pid_value) except Exception: pass
def test_avc_workflow_receiver_local_file_pass(api_app, db, api_project, access_token, json_headers, mock_sorenson, online_video, webhooks, local_file): """Test AVCWorkflow receiver.""" project, video_1, video_2 = api_project video_1_depid = video_1['_deposit']['id'] video_1_id = str(video_1.id) project_id = str(project.id) bucket_id = ObjectVersion.query.filter_by( version_id=local_file).one().bucket_id video_size = 5510872 master_key = 'test.mp4' slave_keys = [ '{0}.mp4'.format(quality) for quality in get_presets_applied().keys() if quality != '1024p' ] with api_app.test_request_context(): url = url_for('invenio_webhooks.event_list', receiver_id='avc', access_token=access_token) with api_app.test_client() as client, \ mock.patch('invenio_sse.ext._SSEState.publish') as mock_sse, \ mock.patch('invenio_indexer.api.RecordIndexer.bulk_index') \ as mock_indexer: sse_channel = 'mychannel' payload = dict( uri=online_video, deposit_id=video_1_depid, key=master_key, sse_channel=sse_channel, sleep_time=0, version_id=str(local_file), ) # [[ RUN WORKFLOW ]] resp = client.post(url, headers=json_headers, data=json.dumps(payload)) assert resp.status_code == 201 data = json.loads(resp.data.decode('utf-8')) assert '_tasks' in data assert data['key'] == master_key assert 'version_id' in data assert data.get('presets') == get_all_distinct_qualities() assert 'links' in data # TODO decide with links are needed assert ObjectVersion.query.count() == get_object_count() # Master file master = ObjectVersion.get(bucket_id, master_key) tags = master.get_tags() assert tags['_event_id'] == data['tags']['_event_id'] assert master.key == master_key assert str(master.version_id) == data['version_id'] assert master.file assert master.file.size == video_size # Check metadata tags metadata_keys = [ 'duration', 'bit_rate', 'size', 'avg_frame_rate', 'codec_name', 'codec_long_name', 'width', 'height', 'nb_frames', 'display_aspect_ratio', 'color_range' ] assert all([key in tags for key in metadata_keys]) assert ObjectVersion.query.count() == get_object_count() assert ObjectVersionTag.query.count() == get_tag_count(is_local=True) # Check metadata patch recid = PersistentIdentifier.get('depid', video_1_depid).object_uuid record = Record.get_record(recid) assert 'extracted_metadata' in record['_cds'] assert all([ key in str(record['_cds']['extracted_metadata']) for key in metadata_keys ]) # Check slaves for slave_key in slave_keys: slave = ObjectVersion.get(bucket_id, slave_key) tags = slave.get_tags() assert slave.key == slave_key assert '_sorenson_job_id' in tags assert tags['_sorenson_job_id'] == '1234' assert 'master' in tags assert tags['master'] == str(master.version_id) assert master.file assert master.file.size == video_size video = deposit_video_resolver(video_1_depid) events = get_deposit_events(video['_deposit']['id']) # check deposit tasks status tasks_status = get_tasks_status_by_task(events) assert len(tasks_status) == 3 assert 'file_transcode' in tasks_status assert 'file_video_extract_frames' in tasks_status assert 'file_video_metadata_extraction' in tasks_status # check single status collector = CollectInfoTasks() iterate_events_results(events=events, fun=collector) info = list(collector) assert len(info) == 11 assert info[0][0] == 'file_video_metadata_extraction' assert info[0][1].status == states.SUCCESS assert info[1][0] == 'file_video_extract_frames' assert info[1][1].status == states.SUCCESS transocode_tasks = info[2:] statuses = [task[1].status for task in info[2:]] assert len(transocode_tasks) == len(statuses) assert [ states.SUCCESS, states.REVOKED, states.REVOKED, states.REVOKED, states.SUCCESS, states.REVOKED, states.REVOKED, states.REVOKED, states.REVOKED ] == statuses # check tags (exclude 'uri-origin') assert ObjectVersionTag.query.count() == (get_tag_count() - 1) # check sse is called assert mock_sse.called messages = [ (sse_channel, states.SUCCESS, 'file_video_metadata_extraction'), (sse_channel, states.STARTED, 'file_transcode'), (sse_channel, states.SUCCESS, 'file_transcode'), (sse_channel, states.REVOKED, 'file_transcode'), # ResolutionError (sse_channel, states.STARTED, 'file_video_extract_frames'), (sse_channel, states.SUCCESS, 'file_video_extract_frames'), (sse_channel, states.SUCCESS, 'update_deposit'), ] call_args = [] for (_, kwargs) in mock_sse.call_args_list: type_ = kwargs['type_'] state = kwargs['data']['state'] channel = kwargs['channel'] tuple_ = (channel, state, type_) if tuple_ not in call_args: call_args.append(tuple_) assert len(call_args) == len(messages) for message in messages: assert message in call_args deposit = deposit_video_resolver(video_1_depid) def filter_events(call_args): _, x = call_args return x['type_'] == 'update_deposit' list_kwargs = list(filter(filter_events, mock_sse.call_args_list)) assert len(list_kwargs) == 10 _, kwargs = list_kwargs[8] assert kwargs['type_'] == 'update_deposit' assert kwargs['channel'] == 'mychannel' assert kwargs['data']['state'] == states.SUCCESS assert kwargs['data']['meta']['payload'] == { 'deposit_id': deposit['_deposit']['id'], 'event_id': data['tags']['_event_id'], 'deposit': deposit, } # check ElasticSearch is called ids = set(get_indexed_records_from_mock(mock_indexer)) assert video_1_id in ids assert project_id in ids assert deposit['_cds']['state'] == { 'file_video_metadata_extraction': states.SUCCESS, 'file_video_extract_frames': states.SUCCESS, 'file_transcode': states.SUCCESS, } # Test cleaning! url = '{0}?access_token={1}'.format(data['links']['cancel'], access_token) with mock.patch('invenio_sse.ext._SSEState.publish') as mock_sse, \ mock.patch('invenio_indexer.api.RecordIndexer.bulk_index') \ as mock_indexer, \ api_app.test_client() as client: # [[ DELETE WORKFLOW ]] resp = client.delete(url, headers=json_headers) assert resp.status_code == 201 # check that object versions and tags are deleted # (Create + Delete) * Num Objs - 1 (because the file is local and will # be not touched) assert ObjectVersion.query.count() == 2 * get_object_count() - 1 # Tags associated with the old version assert ObjectVersionTag.query.count() == get_tag_count(is_local=True) bucket = Bucket.query.first() # and bucket is empty assert bucket.size == 0 record = RecordMetadata.query.filter_by(id=video_1_id).one() # check metadata patch are deleted assert 'extracted_metadata' not in record.json['_cds'] # check the corresponding Event persisted after cleaning assert len(get_deposit_events(record.json['_deposit']['id'])) == 0 assert len( get_deposit_events(record.json['_deposit']['id'], _deleted=True)) == 1 # check no SSE message and reindexing is fired assert mock_sse.called is False assert mock_indexer.called is False
def docs(files, mode): """Load demo article records.""" from slugify import slugify indexer = RecordIndexer() schema = current_app.extensions['invenio-jsonschemas'].path_to_url( 'records/docs-v1.0.0.json' ) data = pkg_resources.resource_filename('cernopendata', 'modules/fixtures/data/docs') if files: articles_json = files else: articles_json = get_jsons_from_dir(data) for filename in articles_json: # name = filename.split('/')[-1] # if name.startswith('opera'): # click.echo('Skipping opera records ...') # continue click.echo('Loading docs from {0} ...'.format(filename)) with open(filename, 'rb') as source: for data in json.load(source): # Replace body with responding content assert data["body"]["content"] content_filename = os.path.join( *( ["/", ] + filename.split('/')[:-1] + [data["body"]["content"], ] ) ) with open(content_filename) as body_field: data["body"]["content"] = body_field.read() if "collections" not in data and \ not isinstance( data.get("collections", None), str): data["collections"] = [] if mode == 'insert-or-replace': try: pid = PersistentIdentifier.get( 'docid', str(slugify( data.get('slug', data['title'])))) if pid: record = update_doc(pid, data) action = 'updated' except PIDDoesNotExistError: record = create_doc(data, schema) action = 'inserted' elif mode == 'insert': try: pid = PersistentIdentifier.get( 'docid', str(slugify( data.get('slug', data['title'])))) if pid: click.echo( 'Record docid {} exists already;' ' cannot insert it. '.format( str(slugify( data.get('slug', data['title'])))), err=True) return except PIDDoesNotExistError: record = create_doc(data, schema) action = 'inserted' else: try: pid = PersistentIdentifier.get( 'docid', str(slugify( data.get('slug', data['title'])))) except PIDDoesNotExistError: click.echo( 'Record docid {} does not exist; ' 'cannot replace it.'.format( str(slugify( data.get('slug', data['title'])))), err=True) return record = update_doc(pid, data) action = 'updated' record.commit() db.session.commit() click.echo( ' Record docid {0} {1}.'.format( str(slugify(data.get( 'slug', data['title']))), action)) indexer.index(record) db.session.expunge_all()
def records(skip_files, files, profile, mode): """Load all records.""" if profile: import cProfile import pstats import StringIO pr = cProfile.Profile() pr.enable() indexer = RecordIndexer() schema = current_app.extensions['invenio-jsonschemas'].path_to_url( 'records/record-v1.0.0.json' ) data = pkg_resources.resource_filename('cernopendata', 'modules/fixtures/data/records') action = None if files: record_json = files else: record_json = glob.glob(os.path.join(data, '*.json')) for filename in record_json: # name = filename.split('/')[-1] # if name.startswith('opera'): # click.echo('Skipping opera records ...') # continue click.echo('Loading records from {0} ...'.format(filename)) with open(filename, 'rb') as source: for data in json.load(source): if not data: click.echo('IGNORING a possibly broken or corrupted ' 'record entry in file {0} ...'.format(filename)) continue files = data.get('files', []) if mode == 'insert-or-replace': try: pid = PersistentIdentifier.get('recid', data['recid']) if pid: record = update_record( pid, schema, data, files, skip_files) action = 'updated' except PIDDoesNotExistError: record = create_record(schema, data, files, skip_files) action = 'inserted' elif mode == 'insert': try: pid = PersistentIdentifier.get('recid', data['recid']) if pid: click.echo( 'Record recid {} exists already;' ' cannot insert it. '.format( data.get('recid')), err=True) return except PIDDoesNotExistError: record = create_record(schema, data, files, skip_files) action = 'inserted' else: try: pid = PersistentIdentifier.get('recid', data['recid']) except PIDDoesNotExistError: click.echo( 'Record recid {} does not exist; ' 'cannot replace it.'.format( data.get('recid')), err=True) return record = update_record( pid, schema, data, files, skip_files) action = 'updated' if not skip_files: record.files.flush() record.commit() db.session.commit() click.echo( 'Record recid {0} {1}.'.format( data.get('recid'), action)) indexer.index(record) db.session.expunge_all() if profile: pr.disable() s = StringIO.StringIO() sortby = 'cumulative' ps = pstats.Stats(pr, stream=s).sort_stats(sortby) ps.print_stats() print(s.getvalue())
def get_record_by_pid(cls, pid): """Get record by pid.""" pid = PersistentIdentifier.get('depid', pid) return cls.get_record(id_=pid.object_uuid)
def test_avc_workflow_receiver_pass(api_app, db, api_project, access_token, json_headers, mock_sorenson, online_video, webhooks, users): """Test AVCWorkflow receiver.""" project, video_1, video_2 = api_project video_1_depid = video_1['_deposit']['id'] video_1_id = str(video_1.id) project_id = str(project.id) bucket_id = video_1['_buckets']['deposit'] video_size = 5510872 master_key = 'test.mp4' slave_keys = [ '{0}.mp4'.format(quality) for quality in get_presets_applied() if quality != '1024p' ] with api_app.test_request_context(): url = url_for('invenio_webhooks.event_list', receiver_id='avc', access_token=access_token) with api_app.test_client() as client, \ mock.patch('invenio_sse.ext._SSEState.publish') as mock_sse, \ mock.patch('invenio_indexer.api.RecordIndexer.bulk_index') \ as mock_indexer: sse_channel = 'mychannel' payload = dict( uri=online_video, deposit_id=video_1_depid, key=master_key, sse_channel=sse_channel, sleep_time=0, ) resp = client.post(url, headers=json_headers, data=json.dumps(payload)) assert resp.status_code == 201 data = json.loads(resp.data.decode('utf-8')) assert '_tasks' in data assert data['tags']['uri_origin'] == online_video assert data['key'] == master_key assert 'version_id' in data assert data.get('presets') == get_all_distinct_qualities() assert 'links' in data # TODO decide with links are needed assert ObjectVersion.query.count() == get_object_count() # Master file master = ObjectVersion.get(bucket_id, master_key) tags = master.get_tags() assert tags['_event_id'] == data['tags']['_event_id'] assert master.key == master_key assert str(master.version_id) == data['version_id'] assert master.file assert master.file.size == video_size # Check metadata tags metadata_keys = [ 'duration', 'bit_rate', 'size', 'avg_frame_rate', 'codec_name', 'codec_long_name', 'width', 'height', 'nb_frames', 'display_aspect_ratio', 'color_range' ] assert all([key in tags for key in metadata_keys]) # Check metadata patch recid = PersistentIdentifier.get('depid', video_1_depid).object_uuid record = Record.get_record(recid) assert 'extracted_metadata' in record['_cds'] assert all([ key in str(record['_cds']['extracted_metadata']) for key in metadata_keys ]) # Check slaves for slave_key in slave_keys: slave = ObjectVersion.get(bucket_id, slave_key) tags = slave.get_tags() assert slave.key == slave_key assert '_sorenson_job_id' in tags assert tags['_sorenson_job_id'] == '1234' assert 'master' in tags assert tags['master'] == str(master.version_id) assert master.file assert master.file.size == video_size video = deposit_video_resolver(video_1_depid) events = get_deposit_events(video['_deposit']['id']) # check deposit tasks status tasks_status = get_tasks_status_by_task(events) assert len(tasks_status) == 4 assert 'file_download' in tasks_status assert 'file_transcode' in tasks_status assert 'file_video_extract_frames' in tasks_status assert 'file_video_metadata_extraction' in tasks_status # check single status collector = CollectInfoTasks() iterate_events_results(events=events, fun=collector) info = list(collector) presets = get_presets_applied().keys() assert info[0][0] == 'file_download' assert info[0][1].status == states.SUCCESS assert info[1][0] == 'file_video_metadata_extraction' assert info[1][1].status == states.SUCCESS assert info[2][0] == 'file_video_extract_frames' assert info[2][1].status == states.SUCCESS for i in info[3:]: assert i[0] == 'file_transcode' if i[1].status == states.SUCCESS: assert i[1].result['payload']['preset_quality'] in presets # check tags assert ObjectVersionTag.query.count() == get_tag_count() # check sse is called assert mock_sse.called messages = [ (sse_channel, states.STARTED, 'file_download'), (sse_channel, states.SUCCESS, 'file_download'), (sse_channel, states.SUCCESS, 'file_video_metadata_extraction'), (sse_channel, states.STARTED, 'file_transcode'), (sse_channel, states.SUCCESS, 'file_transcode'), (sse_channel, states.REVOKED, 'file_transcode'), # ResolutionError (sse_channel, states.STARTED, 'file_video_extract_frames'), (sse_channel, states.SUCCESS, 'file_video_extract_frames'), (sse_channel, states.SUCCESS, 'update_deposit'), ] call_args = [] for (_, kwargs) in mock_sse.call_args_list: type_ = kwargs['type_'] state = kwargs['data']['state'] channel = kwargs['channel'] tuple_ = (channel, state, type_) if tuple_ not in call_args: call_args.append(tuple_) assert len(call_args) == len(messages) for message in messages: assert message in call_args deposit = deposit_video_resolver(video_1_depid) def filter_events(call_args): _, x = call_args return x['type_'] == 'update_deposit' list_kwargs = list(filter(filter_events, mock_sse.call_args_list)) assert len(list_kwargs) == 12 _, kwargs = list_kwargs[10] assert kwargs['type_'] == 'update_deposit' assert kwargs['channel'] == 'mychannel' assert kwargs['data']['state'] == states.SUCCESS assert kwargs['data']['meta']['payload'] == { 'deposit_id': deposit['_deposit']['id'], 'event_id': data['tags']['_event_id'], 'deposit': deposit, } # check ElasticSearch is called ids = set(get_indexed_records_from_mock(mock_indexer)) assert video_1_id in ids assert project_id in ids assert deposit['_cds']['state'] == { 'file_download': states.SUCCESS, 'file_video_metadata_extraction': states.SUCCESS, 'file_video_extract_frames': states.SUCCESS, 'file_transcode': states.SUCCESS, } # check feedback from anoymous user event_id = data['tags']['_event_id'] with api_app.test_request_context(): url = url_for('invenio_webhooks.event_feedback_item', event_id=event_id, receiver_id='avc') with api_app.test_client() as client: resp = client.get(url, headers=json_headers) assert resp.status_code == 401 # check feedback from owner with api_app.test_request_context(): url = url_for('invenio_webhooks.event_feedback_item', event_id=event_id, receiver_id='avc') with api_app.test_client() as client: login_user_via_session(client, email=User.query.get(users[0]).email) resp = client.get(url, headers=json_headers) assert resp.status_code == 200 # check feedback from another user without access with api_app.test_request_context(): url = url_for('invenio_webhooks.event_feedback_item', event_id=event_id, receiver_id='avc') with api_app.test_client() as client: login_user_via_session(client, email=User.query.get(users[1]).email) resp = client.get(url, headers=json_headers) assert resp.status_code == 403 # check feedback from another user with access user_2 = User.query.get(users[1]) user_2_id = str(user_2.id) user_2_email = user_2.email project = deposit_project_resolver(project['_deposit']['id']) project['_access'] = {'update': [user_2_email]} project = project.commit() with api_app.test_request_context(): url = url_for('invenio_webhooks.event_feedback_item', event_id=event_id, receiver_id='avc') with api_app.test_client() as client: @identity_loaded.connect def load_email(sender, identity): if current_user.get_id() == user_2_id: identity.provides.update([UserNeed(user_2_email)]) login_user_via_session(client, email=user_2_email) resp = client.get(url, headers=json_headers) assert resp.status_code == 200 # Test cleaning! url = '{0}?access_token={1}'.format(data['links']['cancel'], access_token) with mock.patch('invenio_sse.ext._SSEState.publish') as mock_sse, \ mock.patch('invenio_indexer.api.RecordIndexer.bulk_index') \ as mock_indexer, \ api_app.test_client() as client: resp = client.delete(url, headers=json_headers) assert resp.status_code == 201 # check that object versions and tags are deleted # (Create + Delete) * Num Objs assert ObjectVersion.query.count() == 2 * get_object_count() # Tags connected with the old version assert ObjectVersionTag.query.count() == get_tag_count() bucket = Bucket.query.first() # and bucket is empty assert bucket.size == 0 record = RecordMetadata.query.filter_by(id=video_1_id).one() # check metadata patch are deleted assert 'extracted_metadata' not in record.json['_cds'] # check the corresponding Event persisted after cleaning assert len(get_deposit_events(record.json['_deposit']['id'])) == 0 assert len( get_deposit_events(record.json['_deposit']['id'], _deleted=True)) == 1 # check no SSE message and reindexing is fired assert mock_sse.called is False assert mock_indexer.called is False
def newversion(self, pid=None): """Create a new version deposit.""" deposit = None try: if not self.is_published(): raise PIDInvalidAction() # Check that there is not a newer draft version for this record # and this is the latest version pv = PIDVersioning(child=pid) if pv.exists and not pv.draft_child and pid == pv.last_child: last_pid = pv.last_child # Get copy of the latest record latest_record = WekoDeposit.get_record(last_pid.object_uuid) if latest_record is not None: data = latest_record.dumps() owners = data['_deposit']['owners'] keys_to_remove = ('_deposit', 'doi', '_oai', '_files', '_buckets', '$schema') for k in keys_to_remove: data.pop(k, None) # NOTE: We call the superclass `create()` method, because we # don't want a new empty bucket, but an unlocked snapshot of # the old record's bucket. deposit = super(WekoDeposit, self).create(data) # Injecting owners is required in case of creating new # version this outside of request context deposit['_deposit']['owners'] = owners recid = PersistentIdentifier.get( 'recid', str(data['_deposit']['id'])) depid = PersistentIdentifier.get( 'depid', str(data['_deposit']['id'])) PIDVersioning(parent=pv.parent).insert_draft_child( child=recid) RecordDraft.link(recid, depid) # Create snapshot from the record's bucket and update data snapshot = latest_record.files.bucket.snapshot(lock=False) snapshot.locked = False deposit['_buckets'] = {'deposit': str(snapshot.id)} RecordsBuckets.create(record=deposit.model, bucket=snapshot) if 'extra_formats' in latest_record['_buckets']: extra_formats_snapshot = \ latest_record.extra_formats.bucket.snapshot( lock=False) deposit['_buckets']['extra_formats'] = \ str(extra_formats_snapshot.id) RecordsBuckets.create(record=deposit.model, bucket=extra_formats_snapshot) index = { 'index': self.get('path', []), 'actions': 'private' if self.get('publish_status', '1') == '1' else 'publish' } if 'activity_info' in session: del session['activity_info'] item_metadata = ItemsMetadata.get_record( last_pid.object_uuid).dumps() args = [index, item_metadata] deposit.update(*args) deposit.commit() return deposit except SQLAlchemyError as ex: current_app.logger.debug(ex) db.session.rollback() return None
def pid(self): """Return an instance of deposit PID.""" pid = self.deposit_fetcher(self.id, self) return PersistentIdentifier.get(pid.pid_type, pid.pid_value)
# the Free Software Foundation, version 3 of the License. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU Affero General Public License for more details. # # You should have received a copy of the GNU Affero General Public License # along with this program. If not, see <http://www.gnu.org/licenses/>. """{{ cookiecutter.class_name }} resolver.""" import jsonresolver from flask import current_app from invenio_pidstore.models import PersistentIdentifier, PIDStatus @jsonresolver.route('/api/{{ cookiecutter.resource_name }}/<pid>', host='ils.rero.ch') def {{ cookiecutter.name }}_resolver(pid): """Resolver for {{ cookiecutter.name }} record.""" persistent_id = PersistentIdentifier.get('{{ cookiecutter.pid_type }}', pid) if persistent_id.status == PIDStatus.REGISTERED: return dict(pid=persistent_id.pid_value) current_app.logger.error( 'Doc resolver error: /api/{{ cookiecutter.resource_name }}/{pid} {persistent_id}'.format( pid=pid, persistent_id=persistent_id ) ) raise Exception('unable to resolve')
def get_uuid_from_pid_value(cls, pid_value, pid_type=None): if not pid_type: pid_type = cls.pid_type pid = PersistentIdentifier.get(pid_type, pid_value) return pid.object_uuid
def test_solve_claim_conflicts(small_app): """Check the module for the case where at least two claimed signatures are assigned to the same cluster. """ from inspirehep.modules.disambiguation.tasks import ( disambiguation_clustering, update_authors_recid ) # Claimed signature #1. glashow_record_id_claimed = str( PersistentIdentifier.get('lit', 4328).object_uuid) glashow_record_claimed = get_es_record_by_uuid( glashow_record_id_claimed) glashow_record_uuid_claimed = glashow_record_claimed[ 'authors'][0]['uuid'] # Add phonetic block to the record. glashow_record_claimed['authors'][0]['signature_block'] = "HAGp" glashow_record_claimed['authors'][0]['curated_relation'] = True glashow_record_claimed['authors'][0]['recid'] = "3" es.index(index='records-hep', doc_type='hep', id=glashow_record_id_claimed, body=glashow_record_claimed) es.indices.refresh('records-hep') # Claimed signature #2. higgs_record_id_claimed = str( PersistentIdentifier.get('lit', 1358492).object_uuid) higgs_record_claimed = get_es_record_by_uuid( higgs_record_id_claimed) higgs_record_uuid_claimed = higgs_record_claimed[ 'authors'][0]['uuid'] # Add phonetic block to the record. higgs_record_claimed['authors'][0]['signature_block'] = "HAGp" higgs_record_claimed['authors'][0]['curated_relation'] = True higgs_record_claimed['authors'][0]['recid'] = "4" es.index(index='records-hep', doc_type='hep', id=higgs_record_id_claimed, body=higgs_record_claimed) es.indices.refresh('records-hep') # Not claimed signature. higgs_record_id_not_claimed = str( PersistentIdentifier.get('lit', 11883).object_uuid) higgs_record_not_claimed = get_es_record_by_uuid( higgs_record_id_not_claimed) higgs_record_uuid_not_claimed = higgs_record_not_claimed[ 'authors'][0]['uuid'] # Add phonetic block to the record. higgs_record_not_claimed['authors'][0]['signature_block'] = "HAGp" es.index(index='records-hep', doc_type='hep', id=higgs_record_id_not_claimed, body=higgs_record_not_claimed) es.indices.refresh('records-hep') with patch("celery.current_app.send_task", return_value=_BeardObject( ({"3": [glashow_record_uuid_claimed, higgs_record_uuid_claimed, higgs_record_uuid_not_claimed]}, {}))): with patch( "inspirehep.modules.disambiguation.logic._solve_claims_conflict", return_value=_ConflictObject( {higgs_record_uuid_claimed: [ higgs_record_uuid_not_claimed]})): with patch("inspirehep.modules.disambiguation.tasks.update_authors_recid.delay", side_effect=update_authors_recid): disambiguation_clustering("HAGp") assert InspireRecord.get_record( higgs_record_id_not_claimed)['authors'][0]['recid'] == "4"
def get_db_record(pid_type, recid): from inspirehep.modules.records.api import InspireRecord pid = PersistentIdentifier.get(pid_type, recid) return InspireRecord.get_record(pid.object_uuid)
def get_db_record(record_type, recid): pid = PersistentIdentifier.get(record_type, recid) return Record.get_record(pid.object_uuid)
def newversion(self, pid=None): """Create a new version deposit.""" if not self.is_published(): raise PIDInvalidAction() # Check that there is not a newer draft version for this record pid, record = self.fetch_published() pv = PIDVersioning(child=pid) if (not pv.draft_child and is_doi_locally_managed(record['doi'])): with db.session.begin_nested(): # Get copy of the latest record latest_record = ZenodoRecord.get_record( pv.last_child.object_uuid) data = latest_record.dumps() # Get the communities from the last deposit # and push those to the new version latest_depid = PersistentIdentifier.get( 'depid', data['_deposit']['id']) latest_deposit = ZenodoDeposit.get_record( latest_depid.object_uuid) last_communities = latest_deposit.get('communities', []) owners = data['_deposit']['owners'] # TODO: Check other data that may need to be removed keys_to_remove = ( '_deposit', 'doi', '_oai', '_files', '_buckets', '$schema') for k in keys_to_remove: data.pop(k, None) # NOTE: We call the superclass `create()` method, because we # don't want a new empty bucket, but an unlocked snapshot of # the old record's bucket. deposit = (super(ZenodoDeposit, self).create(data)) # Injecting owners is required in case of creating new # version this outside of request context deposit['_deposit']['owners'] = owners if last_communities: deposit['communities'] = last_communities ### conceptrecid = PersistentIdentifier.get( 'recid', data['conceptrecid']) recid = PersistentIdentifier.get( 'recid', str(data['recid'])) depid = PersistentIdentifier.get( 'depid', str(data['_deposit']['id'])) PIDVersioning(parent=conceptrecid).insert_draft_child( child=recid) RecordDraft.link(recid, depid) # Pre-fill the Zenodo DOI to prevent the user from changing it # to a custom DOI. deposit['doi'] = doi_generator(recid.pid_value) pv = PIDVersioning(child=pid) index_siblings(pv.draft_child, neighbors_eager=True, with_deposits=True) with db.session.begin_nested(): # Create snapshot from the record's bucket and update data snapshot = latest_record.files.bucket.snapshot(lock=False) snapshot.locked = False if 'extra_formats' in latest_record['_buckets']: extra_formats_snapshot = \ latest_record.extra_formats.bucket.snapshot( lock=False) deposit['_buckets'] = {'deposit': str(snapshot.id)} RecordsBuckets.create(record=deposit.model, bucket=snapshot) if 'extra_formats' in latest_record['_buckets']: deposit['_buckets']['extra_formats'] = \ str(extra_formats_snapshot.id) RecordsBuckets.create( record=deposit.model, bucket=extra_formats_snapshot) deposit.commit() return self
def get_change_list_content_xml(self, from_date, from_date_args=None, to_date_args=None): """ Get change list xml. :return: Updated Change List info """ if not self._validation(): return None from .utils import parse_date if from_date_args: from_date_args = parse_date(from_date_args) if to_date_args: to_date_args = parse_date(to_date_args) change_list = ChangeList() change_list.up = INVENIO_CAPABILITY_URL.format(request.url_root) change_list.index = '{}resync/{}/changelist.xml'.format( request.url_root, self.repository_id, ) record_changes = self._get_record_changes_with_interval(from_date) for data in record_changes: try: if from_date_args and from_date_args > parse_date( data.get("updated")): continue if to_date_args and to_date_args < parse_date( data.get("updated")): continue pid_object = PersistentIdentifier.get('recid', data.get('record_id')) latest_pid = PIDVersioning(child=pid_object).last_child is_latest = str(latest_pid.pid_value) == "{}.{}".format( data.get('record_id'), data.get('record_version')) if not is_latest and data.get('status') != 'deleted': loc = '{}resync/{}/records/{}'.format( request.url_root, self.repository_id, '{}.{}'.format(data.get('record_id'), data.get('record_version'))) else: loc = '{}resync/{}/records/{}'.format( request.url_root, self.repository_id, data.get('record_id')) rc = Resource( loc, lastmod=data.get("updated"), change=data.get('status'), md_at=data.get("updated"), ) change_list.add(rc) except Exception: current_app.logger.error('-' * 60) traceback.print_exc(file=sys.stdout) current_app.logger.error('-' * 60) continue return change_list.as_xml()
def test_create_acl_after_record(app, db, es, es_acl_prepare, test_users): with app.test_client() as client: login(client, test_users.u1) response = client.post(records_url(), data=json.dumps({ 'title': 'blah', 'contributors': [] }), content_type='application/json') assert response.status_code == 201 rest_metadata = get_json(response)['metadata'] assert 'control_number' in rest_metadata current_search_client.indices.refresh() current_search_client.indices.flush() with db.session.begin_nested(): acl1 = DefaultACL(name='default', schemas=[RECORD_SCHEMA], priority=0, originator=test_users.u1, operation='get') actor1 = SystemRoleActor(name='auth', system_role='any_user', acl=acl1, originator=test_users.u1) db.session.add(acl1) db.session.add(actor1) # reindex all resources that might be affected by the ACL change current_explicit_acls.reindex_acl(acl1, delayed=False) index, doctype = schema_to_index(RECORD_SCHEMA) rec_md = current_search_client.get( index=index, doc_type=doctype, id=str( PersistentIdentifier.get( 'recid', rest_metadata['control_number']).object_uuid)) clear_timestamp(rec_md) assert rec_md['_source']['_invenio_explicit_acls'] == [{ 'operation': 'get', 'id': acl1.id, 'timestamp': 'cleared', 'system_role': ['any_user'] }] # remove the ACL from the database with db.session.begin_nested(): db.session.delete(acl1) # reindex records affected by the removal of ACL current_explicit_acls.reindex_acl_removed(acl1, delayed=False) # make sure all changes had time to propagate and test current_search_client.indices.refresh() current_search_client.indices.flush() rec_md = current_search_client.get( index=index, doc_type=doctype, id=str( PersistentIdentifier.get( 'recid', rest_metadata['control_number']).object_uuid)) # there is no ACL in the database => no acls are defined nor enforced on the record print(json.dumps(rec_md, indent=4)) assert '_invenio_explicit_acls' not in rec_md['_source']
def record_pid(self): """Return the published/reserved record PID.""" return PersistentIdentifier.get('b2rec', self.id.hex)
def get_es_record(record_type, recid, **kwargs): pid = PersistentIdentifier.get(record_type, recid) search_conf = current_app.config['RECORDS_REST_ENDPOINTS'][record_type] search_class = import_string(search_conf['search_class'])() return search_class.get_source(pid.object_uuid, **kwargs)
def recid(self): """Get RECID object for the Release record.""" if self.record: return PersistentIdentifier.get('recid', str(self.record['recid']))
def create_files_and_sip(deposit, dep_pid): """Create deposit Bucket, Files and SIPs.""" from invenio_pidstore.errors import PIDDoesNotExistError from invenio_pidstore.models import PersistentIdentifier from invenio_sipstore.errors import SIPUserDoesNotExist from invenio_sipstore.models import SIP, RecordSIP, SIPFile from invenio_files_rest.models import Bucket, FileInstance, ObjectVersion from invenio_records_files.models import RecordsBuckets from invenio_db import db buc = Bucket.create() recbuc = RecordsBuckets(record_id=deposit.id, bucket_id=buc.id) db.session.add(recbuc) deposit.setdefault('_deposit', dict()) deposit.setdefault('_files', list()) files = deposit.get('files', []) sips = deposit.get('sips', []) recid = None if sips: recids = [int(sip['metadata']['recid']) for sip in sips] if len(set(recids)) > 1: logger.error('Multiple recids ({recids}) found in deposit {depid}' ' does not exists.'.format(recids=recids, depid=dep_pid.pid_value)) raise DepositMultipleRecids(dep_pid.pid_value, list(set(recids))) elif recids: # If only one recid recid = recids[0] # Store the path -> FileInstance mappings for SIPFile creation later dep_file_instances = list() for file_ in files: fi = FileInstance.create() fi.set_uri(file_['path'], file_['size'], file_['checksum']) ov = ObjectVersion.create(buc, file_['name'], _file_id=fi.id) file_meta = dict( bucket=str(buc.id), key=file_['name'], checksum=file_['checksum'], size=file_['size'], version_id=str(ov.version_id), ) deposit['_files'].append(file_meta) dep_file_instances.append((file_['path'], fi)) for idx, sip in enumerate(sips): agent = None user_id = None if sip['agents']: agent = dict( ip_address=sip['agents'][0].get('ip_address', ""), email=sip['agents'][0].get('email_address', ""), ) user_id = sip['agents'][0]['user_id'] content = sip['package'] sip_format = 'marcxml' try: sip = SIP.create(sip_format, content, user_id=user_id, agent=agent) except SIPUserDoesNotExist: logger.exception('User ID {user_id} referred in deposit {depid} ' 'does not exists.'.format( user_id=user_id, depid=dep_pid.pid_value)) raise DepositSIPUserDoesNotExist(dep_pid.pid_value, user_id) # If recid was found, attach it to SIP # TODO: This is always uses the first recid, as we quit if multiple # recids are found in the sips information if recid: try: pid = PersistentIdentifier.get(pid_type='recid', pid_value=recid) record_sip = RecordSIP(sip_id=sip.id, pid_id=pid.id) db.session.add(record_sip) except PIDDoesNotExistError: logger.exception('Record {recid} referred in ' 'Deposit {depid} does not exists.'.format( recid=recid, depid=dep_pid.pid_value)) raise DepositRecidDoesNotExist(dep_pid.pid_value, recid) if idx == 0: for fp, fi in dep_file_instances: sipf = SIPFile(sip_id=sip.id, filepath=fp, file_id=fi.id) db.session.add(sipf) deposit.commit() db.session.commit() return deposit
def publish(self): """Publish GitHub release as record.""" id_ = uuid.uuid4() deposit_metadata = dict(self.metadata) deposit = None try: db.session.begin_nested() # TODO: Add filter on Published releases previous_releases = self.model.repository.releases.filter_by( status=ReleaseStatus.PUBLISHED) versioning = None stashed_draft_child = None if previous_releases.count(): last_release = previous_releases.order_by( Release.created.desc()).first() last_recid = PersistentIdentifier.get( 'recid', last_release.record['recid']) versioning = PIDVersioning(child=last_recid) last_record = ZenodoRecord.get_record( versioning.last_child.object_uuid) deposit_metadata['conceptrecid'] = last_record['conceptrecid'] if 'conceptdoi' not in last_record: last_depid = PersistentIdentifier.get( 'depid', last_record['_deposit']['id']) last_deposit = ZenodoDeposit.get_record( last_depid.object_uuid) last_deposit = last_deposit.registerconceptdoi() last_recid, last_record = last_deposit.fetch_published() deposit_metadata['conceptdoi'] = last_record['conceptdoi'] if last_record.get('communities'): deposit_metadata.setdefault('communities', last_record['communities']) if versioning.draft_child: stashed_draft_child = versioning.draft_child versioning.remove_draft_child() deposit = self.deposit_class.create(deposit_metadata, id_=id_) deposit['_deposit']['created_by'] = self.event.user_id deposit['_deposit']['owners'] = [self.event.user_id] # Fetch the deposit files for key, url in self.files: # Make a HEAD request to get GitHub to compute the # Content-Length. res = self.gh.api.session.head(url, allow_redirects=True) # Now, download the file res = self.gh.api.session.get(url, stream=True, allow_redirects=True) if res.status_code != 200: raise Exception( "Could not retrieve archive from GitHub: {url}" .format(url=url) ) size = int(res.headers.get('Content-Length', 0)) ObjectVersion.create( bucket=deposit.files.bucket, key=key, stream=res.raw, size=size or None, mimetype=res.headers.get('Content-Type'), ) # GitHub-specific SIP store agent sip_agent = { '$schema': current_jsonschemas.path_to_url( current_app.config['SIPSTORE_GITHUB_AGENT_JSONSCHEMA']), 'user_id': self.event.user_id, 'github_id': self.release['author']['id'], 'email': self.gh.account.user.email, } deposit.publish( user_id=self.event.user_id, sip_agent=sip_agent, spam_check=False) recid_pid, record = deposit.fetch_published() self.model.recordmetadata = record.model if versioning and stashed_draft_child: versioning.insert_draft_child(stashed_draft_child) record_id = str(record.id) db.session.commit() # Send Datacite DOI registration task if current_app.config['DEPOSIT_DATACITE_MINTING_ENABLED']: datacite_register.delay(recid_pid.pid_value, record_id) # Index the record RecordIndexer().index_by_id(record_id) except Exception: db.session.rollback() # Remove deposit from index since it was not commited. if deposit and deposit.id: try: RecordIndexer().delete(deposit) except Exception: current_app.logger.exception( "Failed to remove uncommited deposit from index.") raise
def test_deposit_create_versions(app, test_records_data, test_users, login_user): """Test the creation of new record version draft.""" # Use admin user in order to publish easily the records. login = lambda c: login_user(test_users['admin'], c) data = test_records_data # create and publish first record in a chain v1_draft = create_ok(app, login, data[0]) assert 'versions' in v1_draft['links'] check_links(app, v1_draft, []) v1_rec = publish(app, login, v1_draft) assert 'versions' in v1_rec['links'] check_links(app, v1_rec, [v1_rec]) # try to create a new version from an unknown pid res, json_data = create(app, login, data[1], version_of=uuid.uuid4().hex) assert res.status_code == 400 # try to create a new version from a parent pid with app.app_context(): v1_pid = PersistentIdentifier.get(pid_value=v1_rec['id'], pid_type='b2rec') parent_pid = PIDVersioning(child=v1_pid).parent res, json_data = create(app, login, data[1], version_of=parent_pid.pid_value) assert res.status_code == 400 # create and publish second record in a chain v2_draft = create_ok(app, login, data[1], version_of=v1_rec['id']) check_links(app, v2_draft, [v1_rec]) v2_rec = publish(app, login, v2_draft) check_links(app, v2_rec, [v1_rec, v2_rec]) # test error if trying to create a non-linear version chain res, json_data = create(app, login, data[1], version_of=v1_rec['id']) assert res.status_code == 400 assert json_data['use_record'] == v2_rec['id'] # create third record draft in a chain v3_draft = create_ok(app, login, data[2], version_of=v2_rec['id']) check_links(app, v3_draft, [v1_rec, v2_rec]) # test error when a draft already exists in a version chain res, json_data = create(app, login, data[1], version_of=v2_rec['id']) assert res.status_code == 400 assert json_data['goto_draft'] == v3_draft['id'] # publish third record in a chain v3_rec = publish(app, login, v3_draft) check_links(app, v3_rec, [v1_rec, v2_rec, v3_rec]) # create a new version without data # assert that data is copied from the previous version v4_draft = create_ok(app, login, None, v3_rec['id']) with app.app_context(): record_resolver = Resolver( pid_type='b2rec', object_type='rec', getter=B2ShareRecord.get_record, ) deposit_resolver = Resolver( pid_type='b2dep', object_type='rec', getter=Deposit.get_record, ) v4_metadata = deposit_resolver.resolve(v4_draft['id'])[1].model.json v3_metadata = record_resolver.resolve(v3_rec['id'])[1].model.json assert copy_data_from_previous(v4_metadata) == \ copy_data_from_previous(v3_metadata)
def loan_resolver(pid): """Loan resolver.""" persistent_id = PersistentIdentifier.get('loanid', pid) if persistent_id.status == PIDStatus.REGISTERED: return dict(pid=persistent_id.pid_value) raise Exception('unable to resolve')
def pids(): """Fetch and register PIDs.""" from invenio_db import db from invenio_oaiserver.fetchers import oaiid_fetcher from invenio_oaiserver.minters import oaiid_minter from invenio_pidstore.errors import PIDDoesNotExistError, \ PersistentIdentifierError from invenio_pidstore.models import PIDStatus, PersistentIdentifier from invenio_pidstore.fetchers import recid_fetcher from invenio_pidstore.minters import recid_minter from invenio_records.models import RecordMetadata recids = [r.id for r in RecordMetadata.query.all()] db.session.expunge_all() with click.progressbar(recids) as bar: for record_id in bar: record = RecordMetadata.query.get(record_id) try: pid = recid_fetcher(record.id, record.json) found = PersistentIdentifier.get( pid_type=pid.pid_type, pid_value=pid.pid_value, pid_provider=pid.provider.pid_provider ) click.echo('Found {0}.'.format(found)) except PIDDoesNotExistError: db.session.add( PersistentIdentifier.create( pid.pid_type, pid.pid_value, object_type='rec', object_uuid=record.id, status=PIDStatus.REGISTERED ) ) except KeyError: click.echo('Skiped: {0}'.format(record.id)) continue pid_value = record.json.get('_oai', {}).get('id') if pid_value is None: assert 'control_number' in record.json pid_value = current_app.config.get( 'OAISERVER_ID_PREFIX' ) + str(record.json['control_number']) record.json.setdefault('_oai', {}) record.json['_oai']['id'] = pid.pid_value pid = oaiid_fetcher(record.id, record.json) try: found = PersistentIdentifier.get( pid_type=pid.pid_type, pid_value=pid.pid_value, pid_provider=pid.provider.pid_provider ) click.echo('Found {0}.'.format(found)) except PIDDoesNotExistError: pid = oaiid_minter(record.id, record.json) db.session.add(pid) flag_modified(record, 'json') assert record.json['_oai']['id'] db.session.add(record) db.session.commit() db.session.expunge_all()
def pid(self): """Return an instance of record PID.""" pid = self.record_fetcher(self.id, self) obj = PersistentIdentifier.get(pid.pid_type, pid.pid_value) return obj
def test_resolver(app): """Test the class methods of PersistentIdentifier class.""" status = [ PIDStatus.NEW, PIDStatus.RESERVED, PIDStatus.REGISTERED, PIDStatus.DELETED, ] with app.app_context(): i = 1 rec_a = uuid.uuid4() # Create pids for each status with and without object for s in status: PersistentIdentifier.create('recid', i, status=s) i += 1 if s != PIDStatus.DELETED: PersistentIdentifier.create('recid', i, status=s, object_type='rec', object_uuid=rec_a) i += 1 # Create a DOI pid_doi = PersistentIdentifier.create('doi', '10.1234/foo', status=PIDStatus.REGISTERED, object_type='rec', object_uuid=rec_a) # Create redirects pid = PersistentIdentifier.create('recid', i, status=PIDStatus.REGISTERED) i += 1 pid.redirect(PersistentIdentifier.get('recid', '2')) pid = PersistentIdentifier.create('recid', i, status=PIDStatus.REGISTERED) pid.redirect(pid_doi) db.session.commit() # Start tests resolver = Resolver(pid_type='recid', object_type='rec', getter=lambda x: x) # Resolve non-existing pid pytest.raises(PIDDoesNotExistError, resolver.resolve, '100') pytest.raises(PIDDoesNotExistError, resolver.resolve, '10.1234/foo') # Resolve status new pytest.raises(PIDUnregistered, resolver.resolve, '1') pytest.raises(PIDUnregistered, resolver.resolve, '2') # Resolve status reserved pytest.raises(PIDUnregistered, resolver.resolve, '3') pytest.raises(PIDUnregistered, resolver.resolve, '4') # Resolve status registered pytest.raises(PIDMissingObjectError, resolver.resolve, '5') pid, obj = resolver.resolve('6') assert pid and obj == rec_a # Resolve status deleted pytest.raises(PIDDeletedError, resolver.resolve, '7') # Resolve status redirected try: resolver.resolve('8') assert False except PIDRedirectedError as e: assert e.destination_pid.pid_type == 'recid' assert e.destination_pid.pid_value == '2' try: resolver.resolve('9') assert False except PIDRedirectedError as e: assert e.destination_pid.pid_type == 'doi' assert e.destination_pid.pid_value == '10.1234/foo' doiresolver = Resolver(pid_type='doi', object_type='rec', getter=lambda x: x) pytest.raises(PIDDoesNotExistError, doiresolver.resolve, '1') pid, obj = doiresolver.resolve('10.1234/foo') assert pid and obj == rec_a