def test_propertyvalue_acl_delete(app, db, es, es_acl_prepare, test_users): # should pass as it does nothing with db.session.begin_nested(): acl = PropertyValueACL(name='test', schemas=[RECORD_SCHEMA], priority=0, operation='get', originator=test_users.u1) propval = PropertyValue(name='keywords', value='test', acl=acl, originator=test_users.u1) db.session.add(acl) db.session.add(propval) acl.update() idx = acl.get_acl_index_name(schema_to_index(RECORD_SCHEMA)[0]) acl_md = current_search_client.get( index=idx, doc_type=current_app.config['INVENIO_EXPLICIT_ACLS_DOCTYPE_NAME'], id=acl.id) # ES7 returns extra: acl_md.pop('_seq_no', None) acl_md.pop('_primary_term', None) assert acl_md == { '_id': acl.id, '_index': 'invenio_explicit_acls-acl-v1.0.0-records-record-v1.0.0', '_source': { '__acl_record_selector': { 'bool': { 'must': [{ 'term': { 'keywords': 'test' } }] } }, '__acl_record_type': 'propertyvalue' }, '_type': '_doc', '_version': 1, 'found': True } acl.delete() with pytest.raises(elasticsearch.exceptions.NotFoundError): current_search_client.get( index=idx, doc_type=current_app.config['INVENIO_EXPLICIT_ACLS_DOCTYPE_NAME'], id=acl.id)
def test_reindex(app, script_info): """Test reindex.""" # load records with app.test_request_context(): runner = CliRunner() rec_uuid = uuid.uuid4() data = {'title': 'Test0'} record = Record.create(data, id_=rec_uuid) db.session.commit() # Initialize queue res = runner.invoke(cli.queue, ['init', 'purge'], obj=script_info) assert 0 == res.exit_code res = runner.invoke(cli.reindex, ['--yes-i-know'], obj=script_info) assert 0 == res.exit_code res = runner.invoke(cli.run, [], obj=script_info) assert 0 == res.exit_code sleep(5) indexer = RecordIndexer() index, doc_type = indexer.record_to_index(record) res = current_search_client.get(index=index, doc_type=doc_type, id=rec_uuid) assert res['found'] # Destroy queue res = runner.invoke(cli.queue, ['delete'], obj=script_info) assert 0 == res.exit_code
def test_draft_indexing(app, db, es, example_draft, indexer): """Test indexing of a draft.""" # Index document in ES assert indexer.index(example_draft)['result'] == 'created' # Retrieve document from ES data = current_search_client.get('draftsresources-drafts-draft-v1.0.0', id=example_draft.id, doc_type='_doc') # Loads the ES data and compare draft = Draft.loads(data['_source']) assert draft == example_draft assert draft.id == example_draft.id assert draft.revision_id == example_draft.revision_id assert draft.created == example_draft.created assert draft.updated == example_draft.updated assert draft.expires_at == example_draft.expires_at assert draft.parent == example_draft.parent assert draft.versions.is_latest_draft == \ example_draft.versions.is_latest_draft assert draft.versions.index == \ example_draft.versions.index # Check system fields assert draft.metadata == example_draft['metadata']
def test_create_record_check_acl_priority(app, db, es, es_acl_prepare, test_users): with app.test_client() as client: with db.session.begin_nested(): acl1 = DefaultACL(name='default', schemas=[RECORD_SCHEMA], priority=0, originator=test_users.u1, operation='get') actor1 = SystemRoleActor(name='auth', system_role='any_user', acl=acl1, originator=test_users.u1) acl2 = DefaultACL(name='default', schemas=[RECORD_SCHEMA], priority=1, originator=test_users.u1, operation='get') actor2 = SystemRoleActor(name='auth', system_role='authenticated_user', acl=acl2, originator=test_users.u1) db.session.add(acl1) db.session.add(actor1) db.session.add(acl2) db.session.add(actor2) login(client, test_users.u1) response = client.post(records_url(), data=json.dumps({ 'title': 'blah', 'contributors': [] }), content_type='application/json') assert response.status_code == 201 rest_metadata = get_json(response)['metadata'] assert 'control_number' in rest_metadata index, doctype = schema_to_index(RECORD_SCHEMA) rec_md = current_search_client.get( index=index, doc_type=doctype, id=str( PersistentIdentifier.get( 'recid', rest_metadata['control_number']).object_uuid)) clear_timestamp(rec_md) assert rec_md['_source']['_invenio_explicit_acls'] == [{ 'operation': 'get', 'id': acl2.id, 'timestamp': 'cleared', 'system_role': ['authenticated_user'] }]
def test_create_or_update_es(self, load_entry_points, app, db, record_xml, index): synchronizer = current_oai_client.providers["uk"].synchronizers["xoai"] oai_sync = OAISync(provider_code="uk") synchronizer.oai_sync = oai_sync synchronizer.create_or_update_es( "oai:dspace.cuni.cz:20.500.11956/2623", xml=record_xml, index=index) es_record = current_search_client.get( id="oai:dspace.cuni.cz:20.500.11956/2623", index=index) assert es_record is not None synchronizer.create_or_update_es( "oai:dspace.cuni.cz:20.500.11956/2623", xml=record_xml) es_record_2 = current_search_client.get( id="oai:dspace.cuni.cz:20.500.11956/2623", index=index) assert es_record_2 is not None assert es_record == es_record_2
def fetch_record(record_id, doc_type, index=None): """ Fetch a record from ES with a given id. :param record_id: [int] :param doc_type: [string] document type :param index: [string] name of the index. If None a default is used :return: [dict] Record if found, otherwise an error message """ res = es.get(index=index, doc_type=doc_type, id=record_id) return res.get('_source', res)
def fetch_record(record_id, doc_type, index=None): """ Fetch a record from ES with a given id. :param record_id: [int] :param doc_type: [string] document type :param index: [string] name of the index. If None a default is used :return: [dict] Record if found, otherwise an error message """ res = es.get(index=index, doc_type=doc_type, id=record_id) return res.get("_source", res)
def get_record(record_id, doc_type, index=None, parent=None): """ Fetch a given record from ES. Parent must be defined for fetching datatable records. :param record_id: [int] ES record id :param doc_type: [string] type of document. "publication" or "datatable" :param index: [string] name of the index. If None a default is used :param parent: [int] record id of the potential parent :return: [dict] Fetched record """ try: if doc_type == CFG_DATA_TYPE and parent: result = es.get(index=index, doc_type=doc_type, id=record_id, parent=parent) else: result = es.get(index=index, doc_type=doc_type, id=record_id) return result.get("_source", result) except (NotFoundError, RequestError): return None
def _es_item(cls, record): """Get the item from the corresponding index. :param record: an item object :returns: the elasticsearch document or {} """ try: es_item = current_search_client.get(ItemsSearch.Meta.index, record.id) return es_item['_source'] except NotFoundError: return {}
def get_record(record_id, doc_type, index=None, parent=None): """ Fetch a given record from ES. Parent must be defined for fetching datatable records. :param record_id: [int] ES record id :param doc_type: [string] type of document. "publication" or "datatable" :param index: [string] name of the index. If None a default is used :param parent: [int] record id of the potential parent :return: [dict] Fetched record """ try: if doc_type == CFG_DATA_TYPE and parent: result = es.get(index=index, doc_type=doc_type, id=record_id, parent=parent) else: result = es.get(index=index, doc_type=doc_type, id=record_id) return result.get('_source', result) except (NotFoundError, RequestError): return None
def test_aclserializer(app, db, es, es_acl_prepare, test_users): with db.session.begin_nested(): acl1 = DefaultACL(name='default', schemas=[RECORD_SCHEMA], priority=0, originator=test_users.u1, operation='get') actor1 = UserActor(name='auth', users=[test_users.u1], acl=acl1, originator=test_users.u1) db.session.add(acl1) db.session.add(actor1) pid, rec = create_record({'title': 'blah'}, clz=SchemaEnforcingRecord) RecordIndexer().index(rec) current_search_client.indices.flush() assert current_jsonschemas.url_to_path( rec['$schema']) in current_explicit_acls.enabled_schemas assert list(DefaultACL.get_record_acls(rec)) != [] index, doc_type = schema_to_index(RECORD_SCHEMA) data = current_search_client.get(index=index, doc_type=doc_type, id=str(pid.object_uuid))['_source'] assert '_invenio_explicit_acls' in data assert len(data['_invenio_explicit_acls']) == 1 with app.test_request_context(): login_user(test_users.u1) set_identity(test_users.u1) acljson_serializer = ACLJSONSerializer(RecordSchemaV1, acl_rest_endpoint='recid', replace_refs=True) serialized = json.loads(acljson_serializer.serialize(pid, rec)) assert serialized['invenio_explicit_acls'] == ["get"] with app.test_client() as client: login(client, test_users.u1) search_results = client.get(url_for('invenio_records_rest.recid_list')) search_results = get_json(search_results) hits = search_results['hits']['hits'] assert len(hits) == 1 assert hits[0]['invenio_explicit_acls'] == ["get"]
def test_record_indexing(app, db, es, example_record, indexer): """Test indexing of a record.""" # Index document in ES assert indexer.index(example_record)['result'] == 'created' # Retrieve document from ES data = current_search_client.get('draftsresources-drafts-draft-v1.0.0', id=example_record.id, doc_type='_doc') # Loads the ES data and compare record = Draft.loads(data['_source']) assert record == example_record assert record.id == example_record.id assert record.revision_id == example_record.revision_id assert record.created == example_record.created assert record.updated == example_record.updated assert record.expires_at == example_record.expires_at # Check system fields record.metadata == example_record['metadata']
def test_record_indexing(app, db, es, example_record, indexer): """Test indexing of a record.""" # Index document in ES assert indexer.index(example_record)["result"] == "created" # Retrieve document from ES data = current_search_client.get("vocabularies-vocabulary-v1.0.0", id=example_record.id, doc_type="_doc") # Loads the ES data and compare record = Vocabulary.loads(data["_source"]) assert record == example_record assert record.id == example_record.id assert record.revision_id == example_record.revision_id assert record.created == example_record.created assert record.updated == example_record.updated assert record.vocabulary_type_id == example_record.vocabulary_type_id # Check system fields assert record.metadata == example_record["metadata"]
def test_elasticsearch_acl_update(app, db, es, es_acl_prepare, test_users): # should pass as it does nothing with db.session.begin_nested(): acl = ElasticsearchACL(name='test', schemas=[RECORD_SCHEMA], priority=0, operation='get', originator=test_users.u1, record_selector={'term': { 'keywords': 'test' }}) db.session.add(acl) acl.update() # makes version 1 acl.update() # makes version 2 idx = acl.get_acl_index_name(schema_to_index(RECORD_SCHEMA)[0]) acl_md = current_search_client.get( index=idx, doc_type=current_app.config['INVENIO_EXPLICIT_ACLS_DOCTYPE_NAME'], id=acl.id) # ES7 returns extra: acl_md.pop('_seq_no', None) acl_md.pop('_primary_term', None) print(json.dumps(acl_md, indent=4)) assert acl_md == { '_id': acl.id, '_index': 'invenio_explicit_acls-acl-v1.0.0-records-record-v1.0.0', '_source': { '__acl_record_selector': { 'term': { 'keywords': 'test' } }, '__acl_record_type': 'elasticsearch' }, '_type': '_doc', '_version': 2, 'found': True }
def get_from_es(pid, schema='records/record-v1.0.0.json'): """Retrieves a record from elasticsearch.""" index, doctype = schema_to_index(schema) return current_search_client.get(index=index, doc_type=doctype, id=pid.object_uuid)
def test_used_in_records(app, db, es, es_acl_prepare, test_users): with db.session.begin_nested(): acl1 = ElasticsearchACL(name='test', schemas=[RECORD_SCHEMA], priority=0, operation='get', originator=test_users.u1, record_selector={'term': { 'keywords': 'blah' }}) actor1 = SystemRoleActor(name='auth', acl=acl1, originator=test_users.u1, system_role='authenticated_user') db.session.add(acl1) db.session.add(actor1) acl2 = ElasticsearchACL(name='test', schemas=[RECORD_SCHEMA], priority=0, operation='get', originator=test_users.u1, record_selector={'term': { 'keywords': 'test' }}) actor2 = SystemRoleActor(name='noauth', acl=acl2, originator=test_users.u1, system_role='any_user') db.session.add(actor2) db.session.add(acl2) acl1.update() acl2.update() pid1, record1 = create_record( { '$schema': RECORD_SCHEMA, 'keywords': ['blah'] }, clz=SchemaEnforcingRecord) pid2, record2 = create_record( { '$schema': RECORD_SCHEMA, 'keywords': ['test'] }, clz=SchemaEnforcingRecord) ts1 = datetime.datetime.now(datetime.timezone.utc) time.sleep(0.1) RecordIndexer().index(record1) current_search_client.indices.refresh() current_search_client.indices.flush() time.sleep(1) ts2 = datetime.datetime.now(datetime.timezone.utc) time.sleep(0.1) RecordIndexer().index(record2) current_search_client.indices.refresh() current_search_client.indices.flush() time.sleep(1) ts3 = datetime.datetime.now(datetime.timezone.utc) # the records should have cached ACLs, let's check idx, doc_type = schema_to_index(RECORD_SCHEMA) assert clear_timestamp( current_search_client.get( index=idx, doc_type=doc_type, id=str(record1.id))['_source']['_invenio_explicit_acls']) == [{ 'operation': 'get', 'id': acl1.id, 'timestamp': 'cleared', 'system_role': ['authenticated_user'] }] assert clear_timestamp( current_search_client.get( index=idx, doc_type=doc_type, id=str(record2.id))['_source']['_invenio_explicit_acls']) == [{ 'operation': 'get', 'id': acl2.id, 'timestamp': 'cleared', 'system_role': ['any_user'] }] # there should be no resource for acl1 before ts1 assert list(acl1.used_in_records(older_than_timestamp=ts1)) == [] # one record before ts2 and ts3 assert list( acl1.used_in_records(older_than_timestamp=ts2)) == [str(record1.id)] assert list( acl1.used_in_records(older_than_timestamp=ts3)) == [str(record1.id)] # and one record before now assert list(acl1.used_in_records()) == [str(record1.id)] # there should be no resource for acl2 before ts1 and ts2 assert list(acl2.used_in_records(older_than_timestamp=ts1)) == [] assert list(acl2.used_in_records(older_than_timestamp=ts2)) == [] # one record before ts3 assert list( acl2.used_in_records(older_than_timestamp=ts3)) == [str(record2.id)] # and one record before now assert list(acl2.used_in_records()) == [str(record2.id)]
def test_basic_in_cluster_migration(in_cluster_app, testdata): """Test a basic in-cluster migration.""" recipe_id = 'my_recipe' in_cluster_app.config['SEARCH_INDEX_PREFIX'] = 'new-' old_suffix = current_search.current_suffix current_search._current_suffix = '-new' records_mapping_file, mapping_filepath = tempfile.mkstemp() with open(current_search.mappings['authors-author-v1.0.0']) as old: data = json.load(old) data['mappings']['properties']['author_id']['type'] = 'text' with open(mapping_filepath, 'w') as new: new.write(json.dumps(data)) assert_indices_exists(exists=[ 'old-records-record-v1.0.0{}'.format(old_suffix), 'old-authors-author-v1.0.0{}'.format(old_suffix), ], not_exists=[ 'new-.invenio-index-migrator', 'new-records-record-v1.0.0-new', 'new-authors-author-v1.0.0-new', ]) runner = in_cluster_app.test_cli_runner() init_result = runner.invoke(init_migration, [recipe_id, '--yes-i-know']) assert_indices_exists(exists=[ 'old-records-record-v1.0.0{}'.format(old_suffix), 'old-authors-author-v1.0.0{}'.format(old_suffix), 'new-.invenio-index-migrator', 'new-records-record-v1.0.0-new', 'new-authors-author-v1.0.0-new', ], not_exists=[ 'new-records-record-v1.0.0', ]) # TODO: assert new indexes are created and no aliases migrate_result = runner.invoke(run_migration, [recipe_id]) assert_indices_exists(exists=[ 'old-records-record-v1.0.0{}'.format(old_suffix), 'old-authors-author-v1.0.0{}'.format(old_suffix), 'new-.invenio-index-migrator', 'new-records-record-v1.0.0-new', 'new-authors-author-v1.0.0-new', ], not_exists=[ 'new-records-record-v1.0.0', ]) migrate_result = runner.invoke(run_migration, [recipe_id]) recipe_doc = current_search_client.get(index='new-.invenio-index-migrator', id='my_recipe') assert recipe_doc['_source']['status'] == 'COMPLETED' rollover_result = runner.invoke(rollover_sync, [recipe_id]) assert_indices_exists(exists=[ 'old-records-record-v1.0.0{}'.format(old_suffix), 'old-authors-author-v1.0.0{}'.format(old_suffix), 'new-.invenio-index-migrator', 'new-records-record-v1.0.0-new', 'new-authors-author-v1.0.0-new', 'new-records-record-v1.0.0', ], not_exists=[]) os.close(records_mapping_file)
def test_create_acl_after_record(app, db, es, es_acl_prepare, test_users): with app.test_client() as client: login(client, test_users.u1) response = client.post(records_url(), data=json.dumps({ 'title': 'blah', 'contributors': [] }), content_type='application/json') assert response.status_code == 201 rest_metadata = get_json(response)['metadata'] assert 'control_number' in rest_metadata current_search_client.indices.refresh() current_search_client.indices.flush() with db.session.begin_nested(): acl1 = DefaultACL(name='default', schemas=[RECORD_SCHEMA], priority=0, originator=test_users.u1, operation='get') actor1 = SystemRoleActor(name='auth', system_role='any_user', acl=acl1, originator=test_users.u1) db.session.add(acl1) db.session.add(actor1) # reindex all resources that might be affected by the ACL change current_explicit_acls.reindex_acl(acl1, delayed=False) index, doctype = schema_to_index(RECORD_SCHEMA) rec_md = current_search_client.get( index=index, doc_type=doctype, id=str( PersistentIdentifier.get( 'recid', rest_metadata['control_number']).object_uuid)) clear_timestamp(rec_md) assert rec_md['_source']['_invenio_explicit_acls'] == [{ 'operation': 'get', 'id': acl1.id, 'timestamp': 'cleared', 'system_role': ['any_user'] }] # remove the ACL from the database with db.session.begin_nested(): db.session.delete(acl1) # reindex records affected by the removal of ACL current_explicit_acls.reindex_acl_removed(acl1, delayed=False) # make sure all changes had time to propagate and test current_search_client.indices.refresh() current_search_client.indices.flush() rec_md = current_search_client.get( index=index, doc_type=doctype, id=str( PersistentIdentifier.get( 'recid', rest_metadata['control_number']).object_uuid)) # there is no ACL in the database => no acls are defined nor enforced on the record print(json.dumps(rec_md, indent=4)) assert '_invenio_explicit_acls' not in rec_md['_source']