def validate_doi(self, value): """Validate if doi exists.""" if value and has_request_context(): required_doi = self.context.get('required_doi') if value == required_doi: return err = ValidationError(_('DOI already exists in Zenodo.'), field_names=['doi']) try: doi_pid = PersistentIdentifier.get('doi', value) except PIDDoesNotExistError: return # If the DOI exists, check if it's been assigned to this record # by fetching the recid and comparing both PIDs record UUID try: recid_pid = PersistentIdentifier.get( 'recid', self.context['recid']) except PIDDoesNotExistError: # There's no way to verify if this DOI belongs to this record raise err doi_uuid = doi_pid.get_assigned_object() recid_uuid = recid_pid.get_assigned_object() if doi_uuid and doi_uuid == recid_uuid: return else: # DOI exists and belongs to a different record raise err
def create(cls, data, id_=None): """Create a deposit. Adds bucket creation immediately on deposit creation. """ bucket = Bucket.create( quota_size=current_app.config['ZENODO_BUCKET_QUOTA_SIZE'], max_file_size=current_app.config['ZENODO_MAX_FILE_SIZE'], ) data['_buckets'] = {'deposit': str(bucket.id)} deposit = super(ZenodoDeposit, cls).create(data, id_=id_) RecordsBuckets.create(record=deposit.model, bucket=bucket) recid = PersistentIdentifier.get( 'recid', str(data['recid'])) conceptrecid = PersistentIdentifier.get( 'recid', str(data['conceptrecid'])) depid = PersistentIdentifier.get( 'depid', str(data['_deposit']['id'])) PIDVersioning(parent=conceptrecid).insert_draft_child(child=recid) RecordDraft.link(recid, depid) return deposit
def zenodo_doi_updater(record_uuid, data): """Update the DOI (only external DOIs).""" assert 'recid' in data doi = data.get('doi') assert doi assert idutils.is_doi(doi) # If the DOI is the same as an already generated one, do nothing if doi == doi_generator(data['recid']): return if is_local_doi(doi): # Zenodo DOI, but different than recid # ERROR, user provided a custom ZENODO DOI! raise PIDValueError('doi', doi) doi_pid = PersistentIdentifier.get_by_object( pid_type='doi', object_type='rec', object_uuid=record_uuid) if doi_pid.pid_value != doi: with db.session.begin_nested(): db.session.delete(doi_pid) return PersistentIdentifier.create( 'doi', doi, object_type='rec', object_uuid=record_uuid, status=PIDStatus.RESERVED, )
def test_published_external_doi(db, deposit, deposit_file): """Test published external DOI.""" ext_doi1 = '10.1234/foo' ext_doi2 = '10.1234/bar' deposit['doi'] = ext_doi1 deposit.publish() db.session.commit() # Published record with external DOI must have: # 1) a registered recid with object recid = PersistentIdentifier.get('recid', str(deposit['recid'])) assert recid and recid.status == PIDStatus.REGISTERED \ and recid.has_object() # 2) a reserved external doi with object doi = PersistentIdentifier.get('doi', ext_doi1) assert doi and doi.status == PIDStatus.RESERVED \ and doi.has_object() # Now change external DOI. deposit = deposit.edit() deposit['doi'] = ext_doi2 deposit.publish() db.session.commit() # Ensure DOI 1 has been removed. pytest.raises( PIDDoesNotExistError, PersistentIdentifier.get, 'doi', ext_doi1) # Ensure DOI 2 has been reserved. doi = PersistentIdentifier.get('doi', ext_doi2) assert doi and doi.status == PIDStatus.RESERVED \ and doi.has_object()
def test_deposit_index(db, es): """Test update embargoed records.""" deposit_index_name = 'deposits-records-record-v1.0.0' rec1 = Record.create({ 'title': 'One', '_deposit': { 'status': 'published', 'pid': { 'type': 'recid', 'value': '1' } } }) PersistentIdentifier.create(pid_type='recid', pid_value='1', status=PIDStatus.REGISTERED, object_uuid=rec1.id, object_type='rec') Deposit.create({ '_deposit': { 'status': 'published', 'pid': { 'type': 'recid', 'value': '1' } } }) db.session.commit() current_search.flush_and_refresh(deposit_index_name) res = current_search.client.search(index=deposit_index_name) # Make sure the 'title' was indexed from record assert res['hits']['hits'][0]['_source']['title'] == 'One'
def _mint_pid(obj, dummy_eng): d = Deposition(obj) recjson = d.get_latest_sip(sealed=False).metadata if 'recid' not in recjson: raise Exception("'recid' not found in sip metadata.") pid_text = None pid = recjson.get(pid_field, None) if not pid: # No pid found in recjson, so create new pid with user supplied # function. pid_text = recjson[pid_field] = pid_creator(recjson) else: # Pid found - check if it should be minted if existing_pid_checker and existing_pid_checker(pid, recjson): pid_text = pid # Create an assign pid internally - actually registration will happen # asynchronously later. if pid_text: current_app.logger.info("Registering pid %s" % pid_text) pid_obj = PersistentIdentifier.create(pid_store_type, pid_text) if pid_obj is None: pid_obj = PersistentIdentifier.get(pid_store_type, pid_text) try: pid_obj.assign("rec", recjson['recid']) except Exception: register_exception(alert_admin=True) d.update()
def create_deposit_and_record(pid_value, owner): """Utility function for creating records and deposits.""" recid = PersistentIdentifier.create( 'recid', pid_value, status=PIDStatus.RESERVED) pv = PIDVersioning(parent=conceptrecid) pv.insert_draft_child(recid) depid = PersistentIdentifier.create( 'depid', pid_value, status=PIDStatus.REGISTERED) deposit = ZenodoRecord.create({'_deposit': {'id': depid.pid_value}, 'conceptrecid': conceptrecid.pid_value, 'recid': recid.pid_value}) deposit.commit() depid.assign('rec', deposit.id) record_metadata = deepcopy(minimal_record) record_metadata['_deposit'] = {'id': depid.pid_value} record_metadata['conceptrecid'] = conceptrecid.pid_value record_metadata['recid'] = int(recid.pid_value) record_metadata['owners'] = [owner.id] record = ZenodoRecord.create(record_metadata) zenodo_record_minter(record.id, record) record.commit() return (depid, deposit, recid, record)
def setup_record_fixture(app): """Setup a record fixture.""" records = [] def _create_pid(record): pid = PersistentIdentifier.create( 'recid', record['recid'], pid_provider='recid') pid.assign('rec', record['recid']) pid.register() with before_record_insert.connected_to(_create_pid): with app.app_context(): records.append(Record.create( {'title': 'Test record 1', 'recid': 1}, identifier_key='recid' )) records.append(Record.create( {'title': 'Test record 2', 'recid': 2}, identifier_key='recid' )) pid = PersistentIdentifier.create('recid', 3, pid_provider='recid') db.session.add(pid) db.session.commit() pid = PersistentIdentifier.get('recid', 2, pid_provider='recid') pid.delete() db.session.commit() return records
def add_drafts_from_file(file_path, schema, egroup=None, user=None, limit=None): """Adds drafts from a specified file. Drafts with specified pid will be registered under those. For drafts without pid, new pids will be minted. """ with open(file_path, 'r') as fp: entries = json.load(fp) for entry in entries[0:limit]: data = construct_draft_obj(schema, entry) pid = cap_deposit_fetcher(None, data) pid_value = pid.pid_value if pid else None try: PersistentIdentifier.get('depid', pid_value) print('Draft with id {} already exist!'.format(pid_value)) except PIDDoesNotExistError: record_uuid = uuid.uuid4() pid = cap_deposit_minter(record_uuid, data) if user: user = User.query.filter_by(email=user).one() if egroup: role = Role.query.filter_by(name=egroup).one() deposit = CAPDeposit.create(data, record_uuid, user) deposit.commit() if egroup: add_read_permission_for_egroup(deposit, egroup) print('Draft {} added.'.format(pid.pid_value))
def test_appoint_profile_from_claimed_signature(small_app): """Check the module for the case where claimed signature takes everything. """ from inspirehep.modules.disambiguation.tasks import disambiguation_clustering, update_authors_recid old_record_id = str(PersistentIdentifier.get("literature", 11883).object_uuid) old_record = get_es_record_by_uuid(old_record_id) old_author_uuid = old_record["authors"][0]["uuid"] # Add phonetic block to the record. old_record["authors"][0]["signature_block"] = "HAGp" old_record["authors"][0]["recid"] = "2" es.index(index="records-hep", doc_type="hep", id=old_record_id, body=old_record) es.indices.refresh("records-hep") record_id = str(PersistentIdentifier.get("literature", 1358492).object_uuid) record = get_es_record_by_uuid(record_id) author_uuid = record["authors"][0]["uuid"] # Add phonetic block to the record. record["authors"][0]["signature_block"] = "HAGp" record["authors"][0]["recid"] = "314159265" record["authors"][0]["curated_relation"] = True es.index(index="records-hep", doc_type="hep", id=record_id, body=record) es.indices.refresh("records-hep") with patch("celery.current_app.send_task", return_value=_BeardObject(({"2": [old_author_uuid, author_uuid]}, {}))): with patch( "inspirehep.modules.disambiguation.tasks.update_authors_recid.delay", side_effect=update_authors_recid ): disambiguation_clustering("HAGp") assert Record.get_record(old_record_id)["authors"][0]["recid"] == "314159265" assert Record.get_record(record_id)["authors"][0]["recid"] == "314159265"
def test_record_page(app, db, es, event_queues, full_record): """Test record page views.""" full_record['conceptdoi'] = '10.1234/foo.concept' full_record['conceptrecid'] = 'foo.concept' r = Record.create(full_record) PersistentIdentifier.create( 'recid', '12345', object_type='rec', object_uuid=r.id, status=PIDStatus.REGISTERED) db.session.commit() with app.test_client() as client: record_url = url_for('invenio_records_ui.recid', pid_value='12345') assert client.get(record_url).status_code == 200 process_events(['record-view']) current_search.flush_and_refresh(index='events-stats-record-view') search = Search(using=es, index='events-stats-record-view') assert search.count() == 1 doc = search.execute()[0] assert doc['doi'] == '10.1234/foo.bar' assert doc['conceptdoi'] == '10.1234/foo.concept' assert doc['recid'] == '12345' assert doc['conceptrecid'] == 'foo.concept' assert doc['resource_type'] == {'type': 'publication', 'subtype': 'book'} assert doc['access_right'] == 'open' assert doc['communities'] == ['zenodo'] assert doc['owners'] == [1]
def test_register(logger, app): """Test pid register.""" with app.app_context(): i = 1 for s in [PIDStatus.NEW, PIDStatus.RESERVED]: pid = PersistentIdentifier.create('rec', str(i), status=s) i += 1 assert pid.register() assert logger.info.call_args[0][0].startswith( "Registered PID") for s in [PIDStatus.REGISTERED, PIDStatus.DELETED, PIDStatus.REDIRECTED]: pid = PersistentIdentifier.create('rec', str(i), status=s) i += 1 pytest.raises(PIDInvalidAction, pid.register) # Test logging of bad errors. pid = PersistentIdentifier.create('rec', str(i), status=PIDStatus.RESERVED) with patch('invenio_pidstore.models.db.session.begin_nested') as mock: mock.side_effect = SQLAlchemyError() pytest.raises(SQLAlchemyError, pid.register) assert logger.exception.call_args[0][0].startswith( "Failed to register") assert 'pid' in logger.exception.call_args[1]['extra']
def test_app_fixture_lacks_db_isolation_step2(pids_count, app): assert PersistentIdentifier.query.count() == pids_count + 1 # Force the cleanup. PersistentIdentifier.get( pid_type='type1', pid_value='value1', ).delete()
def create_pids(cls, dump, deposit): """Create a persistent identifiers.""" # Mark deposit deleted if recid is deleted. recid = dump.recid_pid # Create depid depid = PersistentIdentifier.create( pid_type='depid', pid_value=str(dump.depid), object_type='rec', object_uuid=deposit.id, status=PIDStatus.REGISTERED ) if recid and recid.status == PIDStatus.DELETED: depid.delete() if RecordIdentifier.query.get(dump.depid) is None: RecordIdentifier.insert(dump.depid) # Pre-reserved recid. if not recid and dump.recid: if dump.has_pid: # Published deposit without a recid (this is an upload which # never got ingested so we set it back to draft status and # reserves the reid). pass recid = PersistentIdentifier.create( pid_type='recid', pid_value=str(dump.recid), status=PIDStatus.RESERVED ) if RecordIdentifier.query.get(dump.recid) is None: RecordIdentifier.insert(dump.recid) return depid, recid
def test_delete(logger, app): """Test pid delete.""" with app.app_context(): i = 1 for s in [PIDStatus.RESERVED, PIDStatus.RESERVED, PIDStatus.REDIRECTED, PIDStatus.DELETED]: pid = PersistentIdentifier.create('rec', str(i), status=s) i += 1 assert pid.delete() assert logger.info.call_args[0][0] == "Deleted PID." # New persistent identifiers are removed completely count = PersistentIdentifier.query.count() pid = PersistentIdentifier.create('rec', str(i), status=PIDStatus.NEW) db.session.commit() assert PersistentIdentifier.query.count() == count + 1 pid.delete() assert PersistentIdentifier.query.count() == count assert logger.info.call_args[0][0] == "Deleted PID (removed)." pid = PersistentIdentifier.create('rec', str(i+1)) with patch('invenio_pidstore.models.db.session.begin_nested') as mock: mock.side_effect = SQLAlchemyError() pytest.raises(SQLAlchemyError, pid.delete) assert logger.exception.call_args[0][0].startswith( "Failed to delete") assert 'pid' in logger.exception.call_args[1]['extra']
def grant_records(db, funder_record): """Create grant records.""" grants = [ Record.create(dict( internal_id='10.13039/501100000780::282896', funder={'$ref': 'https://dx.doi.org/10.13039/501100000780'}, identifiers=dict( eurepo='info:eu-repo/grantAgreement/EC/FP7/282896', ), code='282896', title='Open Access Research Infrastructure in Europe', acronym='OpenAIREplus', program='FP7', )), Record.create(dict( internal_id='10.13039/501100000780::027819', funder={'$ref': 'https://dx.doi.org/10.13039/501100000780'}, identifiers=dict( eurepo='info:eu-repo/grantAgreement/EC/FP6/027819', ), code='027819', title='Integrating cognition, emotion and autonomy', acronym='ICEA', program='FP6', )), ] for g in grants: PersistentIdentifier.create( pid_type='grant', pid_value=g['internal_id'], object_type='rec', object_uuid=g.id, status='R') db.session.commit() return grants
def delete(self, delete_published=False, *args, **kwargs): """Delete the deposit. :param delete_published: If True, even deposit of a published record will be deleted (usually used by admin operations). :type delete_published: bool """ is_published = self['_deposit'].get('pid') if is_published and not delete_published: raise PIDInvalidAction() # Delete the recid recid = PersistentIdentifier.get( pid_type='recid', pid_value=self['recid']) versioning = PIDVersioning(child=recid) if versioning.exists: if versioning.draft_child and \ self.pid == versioning.draft_child_deposit: versioning.remove_draft_child() if versioning.last_child: index_siblings(versioning.last_child, children=versioning.children.all(), include_pid=True, neighbors_eager=True, with_deposits=True) if recid.status == PIDStatus.RESERVED: db.session.delete(recid) if 'conceptrecid' in self: concept_recid = PersistentIdentifier.get( pid_type='recid', pid_value=self['conceptrecid']) if concept_recid.status == PIDStatus.RESERVED: db.session.delete(concept_recid) # Completely remove bucket bucket = self.files.bucket with db.session.begin_nested(): # Remove Record-Bucket link RecordsBuckets.query.filter_by(record_id=self.id).delete() mp_q = MultipartObject.query_by_bucket(bucket) # Remove multipart objects Part.query.filter( Part.upload_id.in_(mp_q.with_entities( MultipartObject.upload_id).subquery()) ).delete(synchronize_session='fetch') mp_q.delete(synchronize_session='fetch') bucket.locked = False bucket.remove() depid = kwargs.get('pid', self.pid) if depid: depid.delete() # NOTE: We call the parent of Deposit, invenio_records.api.Record since # we need to completely override eveything that the Deposit.delete # method does. return super(Deposit, self).delete(*args, **kwargs)
def generate_recid(experiment): """CAP Pid generator.""" while True: pid_value = random_pid(experiment) try: PersistentIdentifier.get('recid', pid_value) except PIDDoesNotExistError: return pid_value
def generate_doi(prefix, experiment=None): """Generate random DOI, unique within PIDStore.""" while True: doi = random_doi(prefix, experiment) try: PersistentIdentifier.get('doi', doi) except PIDDoesNotExistError: return doi
def test_delete_with_sqldatabase_error(app): """Test VALID record delete request (GET .../records/<record_id>).""" with app.app_context(): # create the record using the internal API pid, record = create_record(test_data) db.session.expire(record.model) pid_value = pid.pid_value pid_type = pid.pid_type record_id = record.id db.session.commit() Record.get_record(record_id) def raise_exception(): raise SQLAlchemyError() with app.test_client() as client: # start a new SQLAlchemy session so that it will rollback # everything nested_transaction = db.session().transaction orig_rollback = nested_transaction.rollback flags = {'rollbacked': False} def custom_rollback(*args, **kwargs): flags['rollbacked'] = True orig_rollback(*args, **kwargs) nested_transaction.rollback = custom_rollback with patch.object(PersistentIdentifier, 'delete', side_effect=raise_exception): headers = [('Accept', 'application/json')] res = client.delete(url_for('invenio_records_rest.recid_item', pid_value=pid_value), headers=headers) assert res.status_code == 500 # check that the transaction is finished assert db.session().transaction is not nested_transaction # check that the session has rollbacked assert flags['rollbacked'] with app.app_context(): with app.test_client() as client: # check that the record and PID have not been deleted Record.get_record(record_id) assert not PersistentIdentifier.get(pid_type, pid_value).is_deleted() # try to delete without exception, the transaction should have been # rollbacked headers = [('Accept', 'application/json')] res = client.delete(url_for('invenio_records_rest.recid_item', pid_value=pid_value), headers=headers) assert res.status_code == 204 # check database state with pytest.raises(NoResultFound): Record.get_record(record_id) assert PersistentIdentifier.get(pid_type, pid_value).is_deleted()
def test_file_download_ui(app, objects): """Test get buckets.""" app.config.update(dict( FILES_REST_PERMISSION_FACTORY=lambda *a, **kw: type( 'Allow', (object, ), {'can': lambda self: True} )(), RECORDS_UI_DEFAULT_PERMISSION_FACTORY=None, # No permission checking RECORDS_UI_ENDPOINTS=dict( recid=dict( pid_type='recid', route='/records/<pid_value>', ), recid_files=dict( pid_type='recid', route='/records/<pid_value>/files/<filename>', view_imp='invenio_records_files.utils:file_download_ui', record_class='invenio_records_files.api:Record', ), ) )) InvenioRecordsUI(app) obj1 = objects[0] with app.test_request_context(): # Record 1 - Live record rec_uuid = uuid.uuid4() PersistentIdentifier.create( 'recid', '1', object_type='rec', object_uuid=rec_uuid, status=PIDStatus.REGISTERED) record = Record.create({ 'title': 'Registered', 'recid': 1, '_files': [ {'key': obj1.key, 'bucket': str(obj1.bucket_id), 'checksum': 'invalid'}, ] }, id_=rec_uuid) RecordsBuckets.create(record=record.model, bucket=obj1.bucket) db.session.commit() main_url = url_for('invenio_records_ui.recid', pid_value='1') file_url = url_for( 'invenio_records_ui.recid_files', pid_value='1', filename=obj1.key) no_file_url = url_for( 'invenio_records_ui.recid_files', pid_value='1', filename='') invalid_file_url = url_for( 'invenio_records_ui.recid_files', pid_value='1', filename='no') with app.test_client() as client: res = client.get(main_url) assert res.status_code == 200 res = client.get(file_url) assert res.status_code == 200 res = client.get(no_file_url) assert res.status_code == 404 res = client.get(invalid_file_url) assert res.status_code == 404
def test_isolated_app_fixture_rollback(isolated_app): pids_count = PersistentIdentifier.query.count() PersistentIdentifier.create( pid_type='type1', pid_value='value1', ) db.session.rollback() assert PersistentIdentifier.query.count() == pids_count
def test_funder_ep_resolving(app, db): """Test funder resolving through entry point-registered JSON resolver.""" json1 = {"internal_id": "10.13039/001", "parent": "", "name": "Foo"} json2 = {"internal_id": "10.13039/002", "parent": {"$ref": "http://dx.doi.org/10.13039/001"}, "name": "Bar"} r1 = R.create(json1) PID.create("frdoi", json1["internal_id"], object_type="rec", object_uuid=r1.id, status=PIDStatus.REGISTERED) r2 = R.create(json2) PID.create("frdoi", json2["internal_id"], object_type="rec", object_uuid=r2.id, status=PIDStatus.REGISTERED) assert r2.replace_refs()["parent"] == json1
def test_app_fixture_lacks_db_isolation_step1(pids_count, app): assert PersistentIdentifier.query.count() == pids_count PersistentIdentifier.create( pid_type='type1', pid_value='value1', ) # The #PIDs must have incremented. assert PersistentIdentifier.query.count() == pids_count + 1
def missing_pids(self): """Filter persistent identifiers.""" missing = [] for p in self.pids: try: PersistentIdentifier.get(p.pid_type, p.pid_value) except PIDDoesNotExistError: missing.append(p) return missing
def test_tombstone(app): """Test tomstones.""" with app.app_context(): # OK PID pid_ok, record = create_record({'title': 'test'}) # Deleted PID pid_del, record = create_record({'title': 'deleted'}) pid_del.delete() # Missing object PID pid_noobj = PersistentIdentifier.create( 'recid', '100', status=PIDStatus.REGISTERED) db.session.commit() # Redirected PID pid_red = PersistentIdentifier.create( 'recid', '101', status=PIDStatus.REGISTERED) pid_red.redirect(pid_ok) # Redirect PID - different endpoint pid_doi = PersistentIdentifier.create( 'doi', '10.1234/foo', status=PIDStatus.REGISTERED) pid_red_doi = PersistentIdentifier.create( 'recid', '102', status=PIDStatus.REGISTERED) pid_red_doi.redirect(pid_doi) db.session.commit() with app.test_client() as client: # PID deleted headers = [('Accept', 'application/json')] res = client.get( url_for('invenio_records_rest.recid_item', pid_value=pid_del.pid_value), headers=headers) assert res.status_code == 410 # PID missing object res = client.get( url_for('invenio_records_rest.recid_item', pid_value=pid_noobj.pid_value), headers=headers) assert res.status_code == 500 # Redirected invalid endpoint res = client.get( url_for('invenio_records_rest.recid_item', pid_value=pid_red_doi.pid_value), headers=headers) assert res.status_code == 500 # Redirected res = client.get( url_for('invenio_records_rest.recid_item', pid_value=pid_red.pid_value), headers=headers) assert res.status_code == 301
def test_solve_claim_conflicts(small_app): """Check the module for the case where at least two claimed signatures are assigned to the same cluster. """ from inspirehep.modules.disambiguation.tasks import disambiguation_clustering, update_authors_recid # Claimed signature #1. glashow_record_id_claimed = str(PersistentIdentifier.get("literature", 4328).object_uuid) glashow_record_claimed = get_es_record_by_uuid(glashow_record_id_claimed) glashow_record_uuid_claimed = glashow_record_claimed["authors"][0]["uuid"] # Add phonetic block to the record. glashow_record_claimed["authors"][0]["signature_block"] = "HAGp" glashow_record_claimed["authors"][0]["curated_relation"] = True glashow_record_claimed["authors"][0]["recid"] = "3" es.index(index="records-hep", doc_type="hep", id=glashow_record_id_claimed, body=glashow_record_claimed) es.indices.refresh("records-hep") # Claimed signature #2. higgs_record_id_claimed = str(PersistentIdentifier.get("literature", 1358492).object_uuid) higgs_record_claimed = get_es_record_by_uuid(higgs_record_id_claimed) higgs_record_uuid_claimed = higgs_record_claimed["authors"][0]["uuid"] # Add phonetic block to the record. higgs_record_claimed["authors"][0]["signature_block"] = "HAGp" higgs_record_claimed["authors"][0]["curated_relation"] = True higgs_record_claimed["authors"][0]["recid"] = "4" es.index(index="records-hep", doc_type="hep", id=higgs_record_id_claimed, body=higgs_record_claimed) es.indices.refresh("records-hep") # Not claimed signature. higgs_record_id_not_claimed = str(PersistentIdentifier.get("literature", 11883).object_uuid) higgs_record_not_claimed = get_es_record_by_uuid(higgs_record_id_not_claimed) higgs_record_uuid_not_claimed = higgs_record_not_claimed["authors"][0]["uuid"] # Add phonetic block to the record. higgs_record_not_claimed["authors"][0]["signature_block"] = "HAGp" es.index(index="records-hep", doc_type="hep", id=higgs_record_id_not_claimed, body=higgs_record_not_claimed) es.indices.refresh("records-hep") with patch( "celery.current_app.send_task", return_value=_BeardObject( ({"3": [glashow_record_uuid_claimed, higgs_record_uuid_claimed, higgs_record_uuid_not_claimed]}, {}) ), ): with patch( "inspirehep.modules.disambiguation.logic._solve_claims_conflict", return_value=_ConflictObject({higgs_record_uuid_claimed: [higgs_record_uuid_not_claimed]}), ): with patch( "inspirehep.modules.disambiguation.tasks.update_authors_recid.delay", side_effect=update_authors_recid ): disambiguation_clustering("HAGp") assert Record.get_record(higgs_record_id_not_claimed)["authors"][0]["recid"] == "4"
def test_file_download_ui(base_app, objects, db): """Test get buckets.""" app = base_app app.config.update(dict( RECORDS_UI_DEFAULT_PERMISSION_FACTORY=None, # No permission checking RECORDS_UI_ENDPOINTS=dict( recid=dict( pid_type='recid', route='/records/<pid_value>', ), recid_files=dict( pid_type='recid', route='/records/<pid_value>/files/<filename>', view_imp='invenio_files_rest.views.file_download_ui', ), ) )) InvenioRecords(app) InvenioPIDStore(app) InvenioRecordsUI(app) obj1 = objects[0] with app.app_context(): # Record 1 - Live record rec_uuid = uuid.uuid4() PersistentIdentifier.create( 'recid', '1', object_type='rec', object_uuid=rec_uuid, status=PIDStatus.REGISTERED) Record.create({ 'title': 'Registered', 'recid': 1, 'files': [ {'filename': obj1.key, 'bucket': str(obj1.bucket_id), 'checksum': 'invalid'}, ] }, id_=rec_uuid) db.session.commit() main_url = url_for('invenio_records_ui.recid', pid_value='1') file_url = url_for( 'invenio_records_ui.recid_files', pid_value='1', filename=obj1.key) no_file_url = url_for( 'invenio_records_ui.recid_files', pid_value='1', filename='') invalid_file_url = url_for( 'invenio_records_ui.recid_files', pid_value='1', filename='no') with app.test_client() as client: res = client.get(main_url) assert res.status_code == 200 res = client.get(file_url) assert res.status_code == 200 res = client.get(no_file_url) assert res.status_code == 404 res = client.get(invalid_file_url) assert res.status_code == 404
def test_filter_uuid(app): """Test FilterUUID.""" with app.app_context(): myuuid = uuid.uuid4() PersistentIdentifier.create( 'doi', '10.1234/a', object_type='tst', object_uuid=myuuid) query = FilterUUID(PersistentIdentifier.object_uuid, 'Test').apply( PersistentIdentifier.query, str(myuuid), None) assert query.count() == 1
def test_file_permissions(app, db, test_object, # fixtures user, access_right, expected): """Test file permissions.""" # Create test users admin = User(email='*****@*****.**', password='******') owner = User(email='*****@*****.**', password='******') auth = User(email='*****@*****.**', password='******') db.session.add_all([admin, owner, auth]) db.session.add( ActionUsers.allow(ActionNeed('admin-access'), user=admin) ) # Create test record rec_uuid = uuid.uuid4() PersistentIdentifier.create( 'recid', '1', object_type='rec', object_uuid=rec_uuid, status=PIDStatus.REGISTERED ) Record.create({ 'recid': 1, 'owners': [2], 'access_right': access_right, '_files': [ { 'key': test_object.key, 'bucket': str(test_object.bucket_id), 'checksum': 'invalid' }, ] }, id_=rec_uuid) db.session.add( RecordsBuckets(record_id=rec_uuid, bucket_id=test_object.bucket_id) ) file_url = url_for( 'invenio_records_ui.recid_files', pid_value='1', filename=test_object.key ) db.session.commit() with app.test_client() as client: if user: # Login as user with client.session_transaction() as sess: sess['user_id'] = User.query.filter_by( email='{}@zenodo.org'.format(user)).one().id sess['_fresh'] = True res = client.get(file_url) assert res.status_code == expected
def has_record(self, record, pid=None, scope='any'): """Check if record is in a community. :type scope: str :param scope: Can take values 'any', 'all' or 'this'. * 'all': returns True if all record versions are in the community. * 'any': returns True if any of the record versions are in the community. * 'this': returns if the specified 'record' is in the community. """ if not pid: pid = PersistentIdentifier.get('recid', record['recid']) pv = PIDVersioning(child=pid) if scope == 'this': return self.community.has_record(record) q = (self.community.has_record( ZenodoRecord.get_record(p.get_assigned_object())) for p in pv.children) if scope == 'all': return all(q) if scope == 'any': return any(q)
def test_delete_member(reindex, db, minimal_organisation_record, minimal_member_record): """Test OrganisationsMembers delete.""" org = OrganisationWithMembers.create(minimal_organisation_record, dbcommit=True) member = MemberWithLocations.create(minimal_member_record, dbcommit=True) org.add_member(member, dbcommit=True) pid = PersistentIdentifier.get_by_object('memb', 'rec', member.id) assert pid.is_registered() org.remove_member(member) assert pid.is_deleted() assert org.members == [] member1 = MemberWithLocations.create(minimal_member_record, dbcommit=True) org.add_member(member1, dbcommit=True) member2 = MemberWithLocations.create(minimal_member_record, dbcommit=True) org.add_member(member2, dbcommit=True) member3 = MemberWithLocations.create(minimal_member_record, dbcommit=True) org.add_member(member3, dbcommit=True) org.remove_member(member2) assert len(org.members) == 2 assert org.members[0]['pid'] == '2' assert org.members[1]['pid'] == '4'
def test_apa_citation(self, config, create_record): """Integration test with the citation serializer. This validates we are passing the right input and getting a citation from the underlying library. Formatting of the citation is left to the 3rd-party citeproc-py library. """ record = create_record({ 'authors': [{ 'first_name': 'Jane', 'middle_name': 'Rachel', 'last_name': 'Doe', 'full_name': 'Doe, Jane Rachel' }, { 'first_name': 'John', 'last_name': 'Smith', 'full_name': 'Smith, John' }], 'resource_type': { 'general': 'dataset', 'specific': 'dataset', 'full_hierarchy': ['dataset'] } }) record['doi'] = '10.5072/qwer-tyui' pid = PersistentIdentifier.get( record['_deposit']['pid']['type'], record['_deposit']['pid']['value'], ) citation_str = citeproc_v1.serialize(pid, record, style='apa') assert citation_str == ("Doe, J., & Smith, J. ({year}). " "A title [Data set]. {publisher}. " "http://doi.org/10.5072/qwer-tyui".format( year=from_isodate(record.created).year, publisher=config['DOI_PUBLISHER']))
def merge_articles(obj, eng): """Merge two articles. The workflow payload is overwritten by the merged record, the conflicts are stored in ``extra_data.conflicts``. Also, it adds a ``callback_url`` which contains the endpoint which resolves the merge conflicts. Note: For the time being the ``root`` will be ignored, and we'll rely only on the ``head``, hence it is a rootless implementation. Also when the feature flag ``FEATURE_FLAG_ENABLE_MERGER`` is ``False`` it will skip the merge. """ if not current_app.config.get('FEATURE_FLAG_ENABLE_MERGER'): return None matched_control_number = obj.extra_data['matches']['approved'] head_uuid = PersistentIdentifier.get('lit', matched_control_number).object_uuid obj.extra_data['head_uuid'] = str(head_uuid) head = InspireRecord.get_record(head_uuid) root = {} update = obj.data merged, conflicts = merge(head=head.dumps(), root=root, update=update) obj.data = merged if conflicts: obj.extra_data['conflicts'] = conflicts obj.extra_data['callback_url'] = \ get_resolve_merge_conflicts_callback_url() obj.save()
def load_github_releases(releases_file): """Load GitHub releases information. Updates the missing releases and the submission dates. """ from invenio_github.models import Release, Repository, ReleaseStatus from sqlalchemy.orm.exc import NoResultFound import arrow import json releases_db = json.load(releases_file) with click.progressbar(releases_db) as releases: for release in releases: repo_name, new_repo_name, gh_repo_id, ra_id, user_id, dep = release try: repo = Repository.query.filter_by(github_id=gh_repo_id).one() except NoResultFound: repo = Repository.create(user_id=user_id, github_id=gh_repo_id, name=new_repo_name) pid = PersistentIdentifier.get(pid_type='recid', pid_value=str(dep['record_id'])) rel = Release.query.filter_by( repository_id=repo.id, record_id=pid.get_assigned_object()).first() created = arrow.get(dep['submitted']).datetime.replace(tzinfo=None) if rel: rel.created = created else: rel = Release(tag=dep['github_ref'], errors=dep['errors'], record_id=pid.get_assigned_object(), repository_id=repo.id, status=ReleaseStatus.PUBLISHED, created=created) db.session.add(rel) db.session.commit()
def accept_record(self, record, pid=None): """Accept the record and all of its versions into the community. :type record: zenodo.modules.records.api.ZenodoRecord :param pid: PID of type 'recid' :type pid: invenio_pidstore.models.PersistentIdentifier """ if not pid: pid = PersistentIdentifier.get('recid', record['recid']) with db.session.begin_nested(): pending_q = self.get_comm_irs(record, pid=pid) if not pending_q.count(): raise InclusionRequestMissingError(community=self, record=record) pv = PIDVersioning(child=pid) for child in pv.children.all(): rec = ZenodoRecord.get_record(child.get_assigned_object()) self.community.add_record(rec) rec.commit() if request: @after_this_request def send_signals(response): try: record_accepted.send( current_app._get_current_object(), record_id=rec.id, community_id=self.community.id, ) except Exception: pass return response pending_q.delete(synchronize_session=False)
def rerodoc_redirection(pid, filename=None): """Redirection to document with identifier from RERODOC. :param pid: PID from RERODOC. :returns: A redirection to record's detail page or 404 if not found. """ try: pid = PersistentIdentifier.get('rerod', pid) except Exception: abort(404) # Files URLs does not contains view if filename: return redirect( url_for('invenio_records_ui.doc_files', pid_value=pid.get_redirect().pid_value, filename=filename)) doc_pid = pid.get_redirect().pid_value doc = DocumentRecord.get_record_by_pid(doc_pid) if doc: doc = doc.replace_refs() orgs = doc.get('organisation', []) # In case of multiple organisations we redirect to the global view if len(orgs) == 1: org = orgs.pop() # Only for dedicated or shared if org.get('isDedicated') or org.get('isShared'): return redirect( url_for('invenio_records_ui.doc', view=org.get('code'), pid_value=pid.get_redirect().pid_value)) global_view = current_app.config.get('SONAR_APP_DEFAULT_ORGANISATION') return redirect( url_for('invenio_records_ui.doc', view=global_view, pid_value=pid.get_redirect().pid_value))
def test_datacite_register_fail(mocker, app, db, es, minimal_record): # Make the datacite API unavailable dc_mock = mocker.patch( 'invenio_pidstore.providers.datacite.DataCiteMDSClient') dc_mock().metadata_post.side_effect = datacite.errors.HttpError() # Create a reserved recid record = Record.create(minimal_record) record_uuid = record.id recid = record['recid'] recid_pid = PersistentIdentifier.create( 'recid', recid, status=PIDStatus.RESERVED) # Mint the record zenodo_record_minter(record_uuid, record) record.commit() db.session.commit() with pytest.raises(datacite.errors.HttpError): datacite_register.apply((recid_pid.pid_value, str(record_uuid))) # Check that the task was retried ("max_retries" + 1) times dc_calls = len(dc_mock().metadata_post.mock_calls) assert dc_calls == datacite_register.max_retries + 1
def clean(self, deposit_id, version_id, *args, **kwargs): """Undo metadata extraction.""" # 1. Revert patch on record recid = str(PersistentIdentifier.get( 'depid', deposit_id).object_uuid) patch = [{ 'op': 'remove', 'path': '/_cds/extracted_metadata', }] validator = 'cds.modules.records.validators.PartialDraft4Validator' try: patch_record(recid=recid, patch=patch, validator=validator) except jsonpatch.JsonPatchConflict as c: logger.warning( 'Failed to apply JSON Patch to deposit {0}: {1}'.format( recid, c)) # Delete tmp file if any obj = as_object_version(version_id) temp_location = obj.get_tags().get('temp_location', None) if temp_location: shutil.rmtree(temp_location) ObjectVersionTag.delete(obj, 'temp_location') db.session.commit()
def zenodo_record_minter(record_uuid, data): """Zenodo record minter. Mint, or register if previously minted, the Concept RECID and RECID. Mint the Concept DOI and DOI. """ if 'conceptrecid' not in data: zenodo_concept_recid_minter(record_uuid, data) if 'recid' in data: recid = PersistentIdentifier.get('recid', data['recid']) recid.assign('rec', record_uuid) recid.register() else: recid = RecordIdProvider.create(object_type='rec', object_uuid=record_uuid).pid data['recid'] = int(recid.pid_value) zenodo_doi_minter(record_uuid, data) oaiid_minter(record_uuid, data) if 'conceptdoi' not in data: zenodo_concept_doi_minter(record_uuid, data) return recid
def delete(self, force=True, pid=None): """Delete deposit. Status required: ``'draft'``. :param force: Force deposit delete. (Default: ``True``) :param pid: Force pid object. (Default: ``None``) :returns: A new Deposit object. """ # Delete the recid recid = PersistentIdentifier.get(pid_type='recid', pid_value=self.pid.pid_value) if recid.status == PIDStatus.RESERVED: db.session.delete(recid) # if this item has been deleted self.delete_es_index_attempt(recid) # Completely remove bucket bucket = self.files.bucket with db.session.begin_nested(): # Remove Record-Bucket link RecordsBuckets.query.filter_by(record_id=self.id).delete() mp_q = MultipartObject.query_by_bucket(bucket) # Remove multipart objects Part.query.filter( Part.upload_id.in_( mp_q.with_entities( MultipartObject.upload_id).subquery())).delete( synchronize_session='fetch') mp_q.delete(synchronize_session='fetch') bucket.locked = False bucket.remove() return super(Deposit, self).delete()
def test_minimal_record(app, minimal_record): """Test minimal record.""" expected = { u'publication_distribution_imprint': [{ 'date_of_publication_distribution': ( datetime.utcnow().date().isoformat()) }], u'control_number': '123', u'information_relating_to_copyright_status': { 'copyright_status': 'open' }, u'summary': { 'summary': 'My description' }, u'main_entry_personal_name': { 'personal_name': 'Test' }, u'resource_type': { 'type': 'software' }, u'title_statement': { 'title': 'Test' } } # Create record and pid. record = Record(minimal_record) pid = PersistentIdentifier(pid_type='recid', pid_value='2') assert record.validate() is None data = marcxml_v1.schema_class().dump(marcxml_v1.preprocess_record( pid=pid, record=record)).data assert_dict(expected, data) marcxml_v1.serialize(pid=pid, record=record)
def accept_record(self, record, pid=None): """Accept the record and all of its versions into the community. :type record: zenodo.modules.records.api.ZenodoRecord :param pid: PID of type 'recid' :type pid: invenio_pidstore.models.PersistentIdentifier """ if not pid: pid = PersistentIdentifier.get('recid', record['recid']) with db.session.begin_nested(): pending_q = self.get_comm_irs(record, pid=pid) if not pending_q.count(): raise InclusionRequestMissingError(community=self, record=record) pv = PIDVersioning(child=pid) for child in pv.children.all(): rec = ZenodoRecord.get_record(child.get_assigned_object()) # req = InclusionRequest.get(self.community.id, rec.id) # if req: # req.delete() self.community.add_record(rec) rec.commit() pending_q.delete(synchronize_session=False)
def create(self, pid_value=None, pid_type=None, status=None, object_type=None, object_uuid=None, **kwargs): """Create a new instance for the given type and pid. :param pid_value: Persistent identifier value. (Default: None). :param pid_type: Persistent identifier type. (Default: None). :param status: Status for the created PID (Default: :attr:`invenio_pidstore.models.PIDStatus.NEW`). :param object_type: The object type is a string that identify its type. (Default: None). :param object_uuid: The object UUID. (Default: None). :returns: A :class:`invenio_pidstore.models.PersistentIdentifier` instance. """ pid_type = pid_type or self.pid_type assert pid_type pid_value = pid_value or self._generate_id(**kwargs) assert pid_value status = status or self.default_status assert status return PersistentIdentifier.create( pid_type, pid_value, pid_provider=self.name, object_type=object_type, object_uuid=object_uuid, status=status, )
def test_resolver_deleted_object(app, db): """Test the class methods of PersistentIdentifier class.""" with app.app_context(): rec_uuid = uuid.uuid4() records = { rec_uuid: { 'title': 'test' }, } with db.session.begin_nested(): pid = PersistentIdentifier.create('recid', '1', status=PIDStatus.REGISTERED, object_type='rec', object_uuid=rec_uuid) with db.session.begin_nested(): pid.delete() resolver = Resolver(pid_type='recid', object_type='rec', getter=records.get) assert pytest.raises(PIDDeletedError, resolver.resolve, '1')
def test_single_signature_with_no_profile(small_app): """Check the module for the case with a single, new signature.""" from inspirehep.modules.disambiguation.tasks import ( disambiguation_clustering, update_authors_recid) record_id = str(PersistentIdentifier.get('lit', 11883).object_uuid) record = get_es_record_by_uuid(record_id) author_uuid = record['authors'][0]['uuid'] # Add phonetic block to the record. record['authors'][0]['signature_block'] = "HAGp" es.index(index='records-hep', doc_type='hep', id=record_id, body=record) es.indices.refresh('records-hep') with patch("celery.current_app.send_task", return_value=_BeardObject(({}, { "0": [author_uuid] }))): with patch( "inspirehep.modules.disambiguation.tasks.update_authors_recid.delay", side_effect=update_authors_recid): disambiguation_clustering("HAGp") assert InspireRecord.get_record(record_id)['authors'][0]['recid'] == "1"
def zenodo_deposit_minter(record_uuid, data): """Mint the DEPID, and reserve the Concept RECID and RECID PIDs.""" if 'conceptrecid' not in data: zenodo_concept_recid_minter(data=data) recid = zenodo_reserved_record_minter(data=data) # Create depid with same pid_value of the recid depid = PersistentIdentifier.create( 'depid', str(recid.pid_value), object_type='rec', object_uuid=record_uuid, status=PIDStatus.REGISTERED, ) data.update({ '_deposit': { 'id': depid.pid_value, 'status': 'draft', }, }) return depid
def pid(self): """Get the PersistentIdentifier for this record.""" return PersistentIdentifier.get(pid_type=self._pid_type, pid_value=self["pid"])
def pid_from_value(pid_value, pid_type='recid'): """Determine if DOI is managed locally.""" try: return PersistentIdentifier.get(pid_type=pid_type, pid_value=pid_value) except Exception: pass
def test_pid_creation(app): """Test pid creation.""" runner = CliRunner() script_info = ScriptInfo(create_app=lambda info: app) with runner.isolated_filesystem(): with app.app_context(): assert PersistentIdentifier.query.count() == 0 result = runner.invoke(cmd, [ 'create', 'doi', '10.1234/foo' ], obj=script_info) assert 0 == result.exit_code with app.app_context(): assert PersistentIdentifier.query.count() == 1 pid = PersistentIdentifier.get('doi', '10.1234/foo') assert pid.pid_type == 'doi' assert pid.pid_value == '10.1234/foo' assert pid.pid_provider is None assert pid.status == PIDStatus.NEW assert pid.object_type is None assert pid.object_uuid is None rec_uuid = uuid.uuid4() # Bad parameter status: result = runner.invoke(cmd, [ 'create', 'recid', '2', '--status', 'BADPARAMETER', '--type', 'rec', '--uuid', str(rec_uuid), ], obj=script_info) assert 2 == result.exit_code # Any or both type and uuid must be defined: result = runner.invoke(cmd, [ 'create', 'recid', '2', '--type', 'rec', ], obj=script_info) assert 2 == result.exit_code result = runner.invoke(cmd, [ 'create', 'recid', '2', '--uuid', str(rec_uuid), ], obj=script_info) assert 2 == result.exit_code # Everything should be fine now: result = runner.invoke(cmd, [ 'create', 'recid', '2', '--status', 'REGISTERED', '--type', 'rec', '--uuid', str(rec_uuid), ], obj=script_info) assert 0 == result.exit_code with app.app_context(): assert PersistentIdentifier.query.count() == 2 pid = PersistentIdentifier.get('recid', '2') assert pid.pid_type == 'recid' assert pid.pid_value == '2' assert pid.pid_provider is None assert pid.status == PIDStatus.REGISTERED assert pid.object_type == 'rec' assert pid.object_uuid == rec_uuid # Can't duplicate existing persistent identifier result = runner.invoke(cmd, [ 'create', 'recid', '2', ], obj=script_info) assert -1 == result.exit_code
def test_pid_assign(app): """Test pid object assignment.""" runner = CliRunner() script_info = ScriptInfo(create_app=lambda info: app) with runner.isolated_filesystem(): # No assigned object result = runner.invoke(cmd, [ 'create', 'doi', '10.1234/foo' ], obj=script_info) assert 0 == result.exit_code with app.app_context(): pid = PersistentIdentifier.get('doi', '10.1234/foo') assert not pid.has_object() assert pid.get_assigned_object() is None assert pid.get_assigned_object('rec') is None # Assign object rec_uuid = uuid.uuid4() result = runner.invoke(cmd, [ 'assign', 'doi', '10.1234/foo', '-t', 'rec', '-i', str(rec_uuid) ], obj=script_info) assert 0 == result.exit_code pid_status = result.output with app.app_context(): pid = PersistentIdentifier.get('doi', '10.1234/foo') assert pid.has_object() assert pid.get_assigned_object() == rec_uuid assert pid.get_assigned_object('rec') == rec_uuid assert pid.get_assigned_object('oth') is None # Doesnt' raise result = runner.invoke(cmd, [ 'assign', 'doi', '10.1234/foo', '-t', 'rec', '-i', str(rec_uuid) ], obj=script_info) assert 0 == result.exit_code # Missing type or uuid: result = runner.invoke(cmd, [ 'assign', 'doi', '10.1234/foo', ], obj=script_info) assert 2 == result.exit_code result = runner.invoke(cmd, [ 'assign', 'doi', '10.1234/foo', '-t', 'rec', ], obj=script_info) assert 2 == result.exit_code result = runner.invoke(cmd, [ 'assign', 'doi', '10.1234/foo', '-i', str(rec_uuid), ], obj=script_info) assert 2 == result.exit_code # Assign without overwrite (uuid as str and uuid) new_uuid = uuid.uuid4() result = runner.invoke(cmd, [ 'assign', 'doi', '10.1234/foo', '-t', 'rec', '-i', str(new_uuid) ], obj=script_info) assert -1 == result.exit_code # Assign with overwrite result = runner.invoke(cmd, [ 'assign', 'doi', '10.1234/foo', '-s', 'REGISTERED', '-t', 'rec', '-i', str(new_uuid), '--overwrite' ], obj=script_info) assert 0 == result.exit_code with app.app_context(): pid = PersistentIdentifier.get('doi', '10.1234/foo') assert pid.has_object() assert pid.status == PIDStatus.REGISTERED assert pid.get_assigned_object() == new_uuid assert pid.get_assigned_object('rec') == new_uuid assert pid.get_assigned_object('oth') is None
def get_persistent_identifier(cls, id): """Get Persistent Identifier.""" return PersistentIdentifier.get_by_object(cls.provider.pid_type, cls.object_type, id)
def fetcher(obj_uuid, data): assert obj_uuid in ['a', 'b'] return PersistentIdentifier(pid_type='doi', pid_value='a')
def get_db_record(pid_type, recid): from inspirehep.modules.records.api import InspireRecord pid = PersistentIdentifier.get(pid_type, recid) return InspireRecord.get_record(pid.object_uuid)
def publish(self): """Publish GitHub release as record.""" id_ = uuid.uuid4() deposit_metadata = dict(self.metadata) deposit = None try: db.session.begin_nested() # TODO: Add filter on Published releases previous_releases = self.model.repository.releases.filter_by( status=ReleaseStatus.PUBLISHED) versioning = None stashed_draft_child = None if previous_releases.count(): last_release = previous_releases.order_by( Release.created.desc()).first() last_recid = PersistentIdentifier.get( 'recid', last_release.record['recid']) versioning = PIDVersioning(child=last_recid) last_record = ZenodoRecord.get_record( versioning.last_child.object_uuid) deposit_metadata['conceptrecid'] = last_record['conceptrecid'] if 'conceptdoi' not in last_record: last_depid = PersistentIdentifier.get( 'depid', last_record['_deposit']['id']) last_deposit = ZenodoDeposit.get_record( last_depid.object_uuid) last_deposit = last_deposit.registerconceptdoi() last_recid, last_record = last_deposit.fetch_published() deposit_metadata['conceptdoi'] = last_record['conceptdoi'] if versioning.draft_child: stashed_draft_child = versioning.draft_child versioning.remove_draft_child() deposit = self.deposit_class.create(deposit_metadata, id_=id_) deposit['_deposit']['created_by'] = self.event.user_id deposit['_deposit']['owners'] = [self.event.user_id] # Fetch the deposit files for key, url in self.files: # Make a HEAD request to get GitHub to compute the # Content-Length. res = self.gh.api.session.head(url, allow_redirects=True) # Now, download the file res = self.gh.api.session.get(url, stream=True, allow_redirects=True) if res.status_code != 200: raise Exception( "Could not retrieve archive from GitHub: {url}" .format(url=url) ) size = int(res.headers.get('Content-Length', 0)) ObjectVersion.create( bucket=deposit.files.bucket, key=key, stream=res.raw, size=size or None, mimetype=res.headers.get('Content-Type'), ) # GitHub-specific SIP store agent sip_agent = { '$schema': current_jsonschemas.path_to_url( current_app.config['SIPSTORE_GITHUB_AGENT_JSONSCHEMA']), 'user_id': self.event.user_id, 'github_id': self.release['author']['id'], 'email': self.gh.account.user.email, } deposit.publish(user_id=self.event.user_id, sip_agent=sip_agent) recid_pid, record = deposit.fetch_published() self.model.recordmetadata = record.model if versioning and stashed_draft_child: versioning.insert_draft_child(stashed_draft_child) record_id = str(record.id) db.session.commit() # Send Datacite DOI registration task datacite_register.delay(recid_pid.pid_value, record_id) # Index the record RecordIndexer().index_by_id(record_id) except Exception: db.session.rollback() # Remove deposit from index since it was not commited. if deposit and deposit.id: try: RecordIndexer().delete(deposit) except Exception: current_app.logger.exception( "Failed to remove uncommited deposit from index.") raise
def recid(self): """Get RECID object for the Release record.""" if self.record: return PersistentIdentifier.get('recid', str(self.record['recid']))
def update_record_statistics(start_date=None, end_date=None): """Update "_stats" field of affected records.""" start_date = dateutil_parse(start_date) if start_date else None end_date = dateutil_parse(end_date) if start_date else None aggr_configs = {} if not start_date and not end_date: start_date = datetime.utcnow() end_date = datetime.utcnow() for aggr_name in current_stats.enabled_aggregations: aggr_cfg = current_stats.aggregations[aggr_name] aggr = aggr_cfg.aggregator_class(name=aggr_cfg.name, **aggr_cfg.aggregator_config) if not Index(aggr.aggregation_alias, using=aggr.client).exists(): if not Index(aggr.event_index, using=aggr.client).exists(): start_date = min(start_date, datetime.utcnow()) else: start_date = min(start_date, aggr._get_oldest_event_timestamp()) # Retrieve the last two bookmarks bookmarks = Search(using=aggr.client, index=aggr.aggregation_alias, doc_type=aggr.bookmark_doc_type)[0:2].sort({ 'date': { 'order': 'desc' } }).execute() if len(bookmarks) >= 1: end_date = max( end_date, datetime.strptime(bookmarks[0].date, aggr.doc_id_suffix)) if len(bookmarks) == 2: start_date = min( start_date, datetime.strptime(bookmarks[1].date, aggr.doc_id_suffix)) aggr_configs[aggr.aggregation_alias] = aggr elif start_date and end_date: for aggr_name in current_stats.enabled_aggregations: aggr_cfg = current_stats.aggregations[aggr_name] aggr = aggr_cfg.aggregator_class(name=aggr_cfg.name, **aggr_cfg.aggregator_config) aggr_configs[aggr.aggregation_alias] = aggr else: return # Get conceptrecids for all the affected records between the two dates conceptrecids = set() for aggr_alias, aggr in aggr_configs.items(): query = Search( using=aggr.client, index=aggr.aggregation_alias, doc_type=aggr.aggregation_doc_type, ).filter('range', timestamp={ 'gte': start_date.replace(microsecond=0).isoformat() + '||/d', 'lte': end_date.replace(microsecond=0).isoformat() + '||/d' }).extra(_source=False) query.aggs.bucket('ids', 'terms', field='conceptrecid', size=0) conceptrecids |= { b.key for b in query.execute().aggregations.ids.buckets } indexer = RecordIndexer() for concpetrecid_val in conceptrecids: conceptrecid = PersistentIdentifier.get('recid', concpetrecid_val) pv = PIDVersioning(parent=conceptrecid) children_recids = pv.children.all() indexer.bulk_index([str(p.object_uuid) for p in children_recids])
def license_record(db, sip_metadata_types): """Create a license record.""" license = Record.create({ "$schema": "https://zenodo.org/schemas/licenses/license-v1.0.0.json", "domain_content": True, "domain_data": True, "domain_software": True, "family": "", "id": "CC-BY-4.0", "maintainer": "Creative Commons", "od_conformance": "approved", "osd_conformance": "not reviewed", "status": "active", "title": "Creative Commons Attribution International 4.0", "url": "https://creativecommons.org/licenses/by/4.0/" }) PersistentIdentifier.create(pid_type='od_lic', pid_value=license['id'], object_type='rec', object_uuid=license.id, status='R') license = Record.create({ "$schema": "https://zenodo.org/schemas/licenses/license-v1.0.0.json", "domain_content": True, "domain_data": True, "domain_software": True, "family": "", "id": "CC0-1.0", "maintainer": "Creative Commons", "od_conformance": "approved", "osd_conformance": "not reviewed", "status": "active", "title": "CC0 1.0", "url": "https://creativecommons.org/publicdomain/zero/1.0/" }) PersistentIdentifier.create(pid_type='od_lic', pid_value=license['id'], object_type='rec', object_uuid=license.id, status='R') db.session.commit() return license
def test_solve_claim_conflicts(small_app): """Check the module for the case where at least two claimed signatures are assigned to the same cluster. """ from inspirehep.modules.disambiguation.tasks import ( disambiguation_clustering, update_authors_recid) # Claimed signature #1. glashow_record_id_claimed = str( PersistentIdentifier.get('lit', 4328).object_uuid) glashow_record_claimed = get_es_record_by_uuid(glashow_record_id_claimed) glashow_record_uuid_claimed = glashow_record_claimed['authors'][0]['uuid'] # Add phonetic block to the record. glashow_record_claimed['authors'][0]['signature_block'] = "HAGp" glashow_record_claimed['authors'][0]['curated_relation'] = True glashow_record_claimed['authors'][0]['recid'] = "3" es.index(index='records-hep', doc_type='hep', id=glashow_record_id_claimed, body=glashow_record_claimed) es.indices.refresh('records-hep') # Claimed signature #2. higgs_record_id_claimed = str( PersistentIdentifier.get('lit', 1358492).object_uuid) higgs_record_claimed = get_es_record_by_uuid(higgs_record_id_claimed) higgs_record_uuid_claimed = higgs_record_claimed['authors'][0]['uuid'] # Add phonetic block to the record. higgs_record_claimed['authors'][0]['signature_block'] = "HAGp" higgs_record_claimed['authors'][0]['curated_relation'] = True higgs_record_claimed['authors'][0]['recid'] = "4" es.index(index='records-hep', doc_type='hep', id=higgs_record_id_claimed, body=higgs_record_claimed) es.indices.refresh('records-hep') # Not claimed signature. higgs_record_id_not_claimed = str( PersistentIdentifier.get('lit', 11883).object_uuid) higgs_record_not_claimed = get_es_record_by_uuid( higgs_record_id_not_claimed) higgs_record_uuid_not_claimed = higgs_record_not_claimed['authors'][0][ 'uuid'] # Add phonetic block to the record. higgs_record_not_claimed['authors'][0]['signature_block'] = "HAGp" es.index(index='records-hep', doc_type='hep', id=higgs_record_id_not_claimed, body=higgs_record_not_claimed) es.indices.refresh('records-hep') with patch("celery.current_app.send_task", return_value=_BeardObject(({ "3": [ glashow_record_uuid_claimed, higgs_record_uuid_claimed, higgs_record_uuid_not_claimed ] }, {}))): with patch( "inspirehep.modules.disambiguation.logic._solve_claims_conflict", return_value=_ConflictObject({ higgs_record_uuid_claimed: [higgs_record_uuid_not_claimed] })): with patch( "inspirehep.modules.disambiguation.tasks.update_authors_recid.delay", side_effect=update_authors_recid): disambiguation_clustering("HAGp") assert InspireRecord.get_record( higgs_record_id_not_claimed)['authors'][0]['recid'] == "4"
def publish_record_internal(self, record_context, published_record_class, published_pid_type, collected_records): draft_record = record_context.record draft_pid = record_context.record_pid # clone metadata metadata = copy.deepcopy(dict(draft_record)) if 'oarepo:validity' in metadata: del metadata['oarepo:validity'] metadata.pop('oarepo:draft', True) try: published_pid = PersistentIdentifier.get(published_pid_type, draft_pid.pid_value) except PIDDoesNotExistError: published_pid = None before_publish_record.send( draft_record, metadata=metadata, record_context=record_context, record=record_context, # back compatibility, deprecated collected_records=collected_records) if published_pid: if published_pid.status == PIDStatus.DELETED: # the draft is deleted, resurrect it # change the pid to registered published_pid.status = PIDStatus.REGISTERED db.session.add(published_pid) # and fetch the draft record and update its metadata return self._update_published_record(published_pid, metadata, None, published_record_class, record_context) elif published_pid.status == PIDStatus.REGISTERED: # fetch the draft record and update its metadata # if it is older than the published one return self._update_published_record(published_pid, metadata, draft_record.updated, published_record_class, record_context) raise NotImplementedError( 'Can not unpublish record to draft record ' 'with pid status %s. Only registered or deleted ' 'statuses are implemented', published_pid.status) # create a new draft record. Do not call minter as the pid value will be the # same as the pid value of the published record id = uuid.uuid4() published_record = published_record_class.create(metadata, id_=id) published_pid = PersistentIdentifier.create( pid_type=published_pid_type, pid_value=draft_pid.pid_value, status=PIDStatus.REGISTERED, object_type='rec', object_uuid=id) self._copy_files_between_records( draft_record, published_record, record_context, RecordContext(record_pid=published_pid, record=published_record)) after_publish_record.send(draft_record, published_record=published_record, published_pid=published_pid, collected_records=collected_records) return published_record, published_pid