def test_cli_full_reindex(app, db, es, capsys, es_acl_prepare, test_users): pid, record = create_record( { '$schema': RECORD_SCHEMA, 'keywords': ['blah'] }, clz=SchemaEnforcingRecord) RecordIndexer().index(record) current_search_client.indices.flush() with db.session.begin_nested(): acl = ElasticsearchACL(name='test', schemas=[RECORD_SCHEMA], priority=0, operation='get', originator=test_users.u1, record_selector={'term': { 'keywords': 'blah' }}) db.session.add(acl) u = UserActor(name='test', acl=acl, originator=test_users.u1, users=[test_users.u1]) db.session.add(u) # now the record is not indexed and ACL is not in the helper index, check it ... retrieved = RecordsSearch( index=schema_to_index(RECORD_SCHEMA)[0]).get_record( record.id).execute().hits[0].to_dict() assert '_invenio_explicit_acls' not in retrieved # just a precaution test assert current_explicit_acls.enabled_schemas == {RECORD_SCHEMA} # and run the reindex - should reindex one record from invenio_explicit_acls.cli import full_reindex_impl full_reindex_impl(verbose=True, records=True, in_bulk=False) captured = capsys.readouterr() assert captured.out.strip() == """ Reindexing ACLs Updating ACL representation for "test" (%s) on schemas ['records/record-v1.0.0.json'] Getting records for schema records/record-v1.0.0.json ... collected 1 records Adding 1 records to indexing queue""".strip() % (acl.id) current_search_client.indices.flush() retrieved = RecordsSearch( index=schema_to_index(RECORD_SCHEMA)[0]).get_record( record.id).execute().hits[0].to_dict() assert clear_timestamp(retrieved['_invenio_explicit_acls']) == [{ 'id': str(acl.id), 'operation': 'get', 'timestamp': 'cleared', 'user': [1] }]
def test_elasticsearch_acl_prepare_schema_acl(app, db, es, es_acl_prepare, test_users): # should pass as it does nothing ElasticsearchACL.prepare_schema_acls(RECORD_SCHEMA) idx = ElasticsearchACL.get_acl_index_name( schema_to_index(RECORD_SCHEMA)[0]) mapping = current_search_client.indices.get_mapping(idx) assert idx in mapping idx, doc_type = schema_to_index(RECORD_SCHEMA) mapping = current_search_client.indices.get_mapping(idx) assert len(mapping) == 1 key = list(mapping.keys())[0] if ES_VERSION[0] < 7: assert '_invenio_explicit_acls' in mapping[key]['mappings'][doc_type][ 'properties'] else: assert '_invenio_explicit_acls' in mapping[key]['mappings'][ 'properties']
def test_elasticsearch_acl_get_record_acl(app, db, es, es_acl_prepare, test_users): pid, record = create_record( { '$schema': 'https://localhost/schemas/' + RECORD_SCHEMA, 'keywords': ['blah'] }, clz=SchemaEnforcingRecord) pid1, record1 = create_record( { '$schema': 'https://localhost/schemas/' + RECORD_SCHEMA, 'keywords': ['test'] }, clz=SchemaEnforcingRecord) with db.session.begin_nested(): acl = ElasticsearchACL(name='test', schemas=[RECORD_SCHEMA], priority=0, operation='get', originator=test_users.u1, record_selector={'term': { 'keywords': 'blah' }}) db.session.add(acl) acl2 = ElasticsearchACL(name='test 2', schemas=[ANOTHER_SCHEMA], priority=0, operation='get', originator=test_users.u1, record_selector={'term': { 'keywords': 'test' }}) db.session.add(acl2) acl.update() with pytest.raises( AttributeError, match='No index found for schema records/blah-v1.0.0.json'): acl2.update() acls = list(ElasticsearchACL.get_record_acls(record)) assert len(acls) == 1 assert isinstance(acls[0], ElasticsearchACL) assert acls[0].id == acl.id
def test_no_es_prepared_index(app, db, es, test_users): pid, record = create_record( { '$schema': RECORD_SCHEMA, 'keywords': ['blah'] }, clz=SchemaEnforcingRecord) with pytest.raises( RuntimeError, match='Explicit ACLs were not prepared for the given schema. ' 'Please run invenio explicit-acls prepare ' 'https://localhost/schemas/records/record-v1.0.0.json'): list(ElasticsearchACL.get_record_acls(record))
def test_elasticsearch_acl_update(app, db, es, es_acl_prepare, test_users): # should pass as it does nothing with db.session.begin_nested(): acl = ElasticsearchACL(name='test', schemas=[RECORD_SCHEMA], priority=0, operation='get', originator=test_users.u1, record_selector={'term': { 'keywords': 'test' }}) db.session.add(acl) acl.update() # makes version 1 acl.update() # makes version 2 idx = acl.get_acl_index_name(schema_to_index(RECORD_SCHEMA)[0]) acl_md = current_search_client.get( index=idx, doc_type=current_app.config['INVENIO_EXPLICIT_ACLS_DOCTYPE_NAME'], id=acl.id) # ES7 returns extra: acl_md.pop('_seq_no', None) acl_md.pop('_primary_term', None) print(json.dumps(acl_md, indent=4)) assert acl_md == { '_id': acl.id, '_index': 'invenio_explicit_acls-acl-v1.0.0-records-record-v1.0.0', '_source': { '__acl_record_selector': { 'term': { 'keywords': 'test' } }, '__acl_record_type': 'elasticsearch' }, '_type': '_doc', '_version': 2, 'found': True }
def test_elasticsearch_acl_repr(app, db, es, es_acl_prepare, test_users): # should pass as it does nothing with db.session.begin_nested(): acl = ElasticsearchACL(name='test', schemas=[RECORD_SCHEMA], priority=0, operation='get', originator=test_users.u1, record_selector={'term': { 'keywords': 'test' }}) db.session.add(acl) assert repr( acl ) == "\"test\" (%s) on schemas ['records/record-v1.0.0.json']" % acl.id
def test_elasticsearch_acl_get_matching_resources(app, db, es, es_acl_prepare, test_users): pid, record = create_record( { '$schema': RECORD_SCHEMA, 'keywords': ['blah'] }, clz=SchemaEnforcingRecord) pid1, record1 = create_record( { '$schema': RECORD_SCHEMA, 'keywords': ['test'] }, clz=SchemaEnforcingRecord) RecordIndexer().index(record) RecordIndexer().index(record1) current_search_client.indices.refresh() current_search_client.indices.flush() with db.session.begin_nested(): acl = ElasticsearchACL(name='test', schemas=[RECORD_SCHEMA], priority=0, operation='get', originator=test_users.u1, record_selector={'term': { 'keywords': 'blah' }}) db.session.add(acl) acl1 = ElasticsearchACL(name='test', schemas=[RECORD_SCHEMA], priority=0, operation='get', originator=test_users.u1, record_selector={'term': { 'keywords': 'test' }}) db.session.add(acl1) ids = list(acl.get_matching_resources()) assert len(ids) == 1 assert ids[0] == str(pid.object_uuid) ids = list(acl1.get_matching_resources()) assert len(ids) == 1 assert ids[0] == str(pid1.object_uuid)
def test_aclrecordsearch_returnall(app, db, es, es_acl_prepare, test_users): with db.session.begin_nested(): acl1 = ElasticsearchACL(name='test', schemas=[RECORD_SCHEMA], priority=0, operation='get', originator=test_users.u1, record_selector={'term': { 'keywords': 'test' }}) actor1 = SystemRoleActor(name='auth', system_role='any_user', acl=acl1, originator=test_users.u1) db.session.add(acl1) db.session.add(actor1) current_explicit_acls.reindex_acl(acl1, delayed=False) with app.test_client() as client: login(client, test_users.u1) response = client.post(records_url(), data=json.dumps({'title': 'blah', 'contributors': [], 'keywords': ['blah']}), content_type='application/json') assert response.status_code == 201 rest_metadata = get_json(response)['metadata'] assert 'control_number' in rest_metadata # make sure indices are flushed current_search_client.indices.refresh() current_search_client.indices.flush() index, doc_type = schema_to_index(RECORD_SCHEMA) record_uuid = PersistentIdentifier.get('recid', rest_metadata['control_number']).object_uuid with app.test_request_context(): login_user(test_users.u1) set_identity(test_users.u1) assert current_user == test_users.u1 # acl1 does not apply to the resource so the search must return no data assert not len( ACLRecordsSearch(index=index, doc_type=doc_type, operation='get').get_record(record_uuid).execute()) # when acl_return_all is specified, return all matching records regardless of ACL with_all = ACLRecordsSearch(index=index, doc_type=doc_type).acl_return_all().get_record( record_uuid).execute().hits assert len(with_all) == 1 assert with_all[0]['_invenio_explicit_acls'] == [] # add another acl, this one maps to the record with db.session.begin_nested(): acl2 = ElasticsearchACL(name='test', schemas=[RECORD_SCHEMA], priority=0, operation='get', originator=test_users.u1, record_selector={'term': { 'keywords': 'blah' }}) actor2 = UserActor(name='u2', users=[test_users.u2], acl=acl2, originator=test_users.u1) db.session.add(acl2) db.session.add(actor2) current_explicit_acls.reindex_acl(acl2, delayed=False) # make sure indices are flushed current_search_client.indices.refresh() current_search_client.indices.flush() # for the same user acl_return_all() must return the record and effective acls with app.test_request_context(): login_user(test_users.u1) set_identity(test_users.u1) # when acl_return_all is specified, return all matching records regardless of ACL with_all = ACLRecordsSearch(index=index, doc_type=doc_type).acl_return_all().get_record( record_uuid).execute().hits assert len(with_all) == 1 assert clear_timestamp(with_all[0].to_dict()['_invenio_explicit_acls']) == [ { 'operation': 'get', 'id': acl2.id, 'timestamp': 'cleared', 'user': [2] } ] # for user2 plain ACLRecordsSearch must return the record and effective acls with app.test_request_context(): login_user(test_users.u2) set_identity(test_users.u2) # when acl_return_all is specified, return all matching records regardless of ACL with_all = ACLRecordsSearch(index=index, doc_type=doc_type).get_record(record_uuid).execute().hits assert len(with_all) == 1 assert clear_timestamp(with_all[0].to_dict()['_invenio_explicit_acls']) == [ { 'operation': 'get', 'id': acl2.id, 'timestamp': 'cleared', 'user': [2] } ]
def test_change_acl_mapping(app, db, es, es_acl_prepare, test_users): pid, record = create_record({'$schema': RECORD_SCHEMA, 'keywords': ['blah']}, clz=SchemaEnforcingRecord) pid1, record1 = create_record({'$schema': RECORD_SCHEMA, 'keywords': ['test']}, clz=SchemaEnforcingRecord) RecordIndexer().index(record) RecordIndexer().index(record1) current_search_client.indices.flush() with db.session.begin_nested(): acl = ElasticsearchACL(name='test', schemas=[RECORD_SCHEMA], priority=0, operation='get', originator=test_users.u1, record_selector={'term': { 'keywords': 'blah' }}) actor = UserActor(users=[test_users.u1], acl=acl, originator=test_users.u1) db.session.add(acl) db.session.add(actor) current_explicit_acls.reindex_acl(acl, delayed=False) current_search_client.indices.flush() index, doc_type = schema_to_index(RECORD_SCHEMA) hits = current_search_client.search( index=index, doc_type=doc_type, body={ 'query': { 'nested': { 'path': '_invenio_explicit_acls', 'query': { 'term': { '_invenio_explicit_acls.id': str(acl.id) } } } }, '_source': False } )['hits']['hits'] assert len(hits) == 1 assert hits[0]['_id'] == str(pid.object_uuid) with db.session.begin_nested(): acl.record_selector = { 'term': { 'keywords': 'test' } } db.session.add(acl) current_explicit_acls.reindex_acl(acl, delayed=False) current_search_client.indices.flush() hits = current_search_client.search( index=index, doc_type=doc_type, body={ 'query': { 'nested': { 'path': '_invenio_explicit_acls', 'query': { 'term': { '_invenio_explicit_acls.id': str(acl.id) } } } }, '_source': False } )['hits']['hits'] assert len(hits) == 1 assert hits[0]['_id'] == str(pid1.object_uuid)
def test_used_in_records(app, db, es, es_acl_prepare, test_users): with db.session.begin_nested(): acl1 = ElasticsearchACL(name='test', schemas=[RECORD_SCHEMA], priority=0, operation='get', originator=test_users.u1, record_selector={'term': { 'keywords': 'blah' }}) actor1 = SystemRoleActor(name='auth', acl=acl1, originator=test_users.u1, system_role='authenticated_user') db.session.add(acl1) db.session.add(actor1) acl2 = ElasticsearchACL(name='test', schemas=[RECORD_SCHEMA], priority=0, operation='get', originator=test_users.u1, record_selector={'term': { 'keywords': 'test' }}) actor2 = SystemRoleActor(name='noauth', acl=acl2, originator=test_users.u1, system_role='any_user') db.session.add(actor2) db.session.add(acl2) acl1.update() acl2.update() pid1, record1 = create_record( { '$schema': RECORD_SCHEMA, 'keywords': ['blah'] }, clz=SchemaEnforcingRecord) pid2, record2 = create_record( { '$schema': RECORD_SCHEMA, 'keywords': ['test'] }, clz=SchemaEnforcingRecord) ts1 = datetime.datetime.now(datetime.timezone.utc) time.sleep(0.1) RecordIndexer().index(record1) current_search_client.indices.refresh() current_search_client.indices.flush() time.sleep(1) ts2 = datetime.datetime.now(datetime.timezone.utc) time.sleep(0.1) RecordIndexer().index(record2) current_search_client.indices.refresh() current_search_client.indices.flush() time.sleep(1) ts3 = datetime.datetime.now(datetime.timezone.utc) # the records should have cached ACLs, let's check idx, doc_type = schema_to_index(RECORD_SCHEMA) assert clear_timestamp( current_search_client.get( index=idx, doc_type=doc_type, id=str(record1.id))['_source']['_invenio_explicit_acls']) == [{ 'operation': 'get', 'id': acl1.id, 'timestamp': 'cleared', 'system_role': ['authenticated_user'] }] assert clear_timestamp( current_search_client.get( index=idx, doc_type=doc_type, id=str(record2.id))['_source']['_invenio_explicit_acls']) == [{ 'operation': 'get', 'id': acl2.id, 'timestamp': 'cleared', 'system_role': ['any_user'] }] # there should be no resource for acl1 before ts1 assert list(acl1.used_in_records(older_than_timestamp=ts1)) == [] # one record before ts2 and ts3 assert list( acl1.used_in_records(older_than_timestamp=ts2)) == [str(record1.id)] assert list( acl1.used_in_records(older_than_timestamp=ts3)) == [str(record1.id)] # and one record before now assert list(acl1.used_in_records()) == [str(record1.id)] # there should be no resource for acl2 before ts1 and ts2 assert list(acl2.used_in_records(older_than_timestamp=ts1)) == [] assert list(acl2.used_in_records(older_than_timestamp=ts2)) == [] # one record before ts3 assert list( acl2.used_in_records(older_than_timestamp=ts3)) == [str(record2.id)] # and one record before now assert list(acl2.used_in_records()) == [str(record2.id)]
def test_cli_explain(app, db, es, capsys, es_acl_prepare, test_users): check_explain(capsys, {'$schema': RECORD_SCHEMA, 'keywords': ['blah']}, """ Possible ACLs Checking ACLs of type <class 'invenio_explicit_acls.acls.default_acls.DefaultACL'> Checking ACLs of type <class 'invenio_explicit_acls.acls.elasticsearch_acls.ElasticsearchACL'> Will run percolate query on index invenio_explicit_acls-acl-v1.0.0-records-record-v1.0.0 and doc_type _doc: { "query": { "bool": { "must": [ { "percolate": { "field": "__acl_record_selector", "document": { "$schema": "records/record-v1.0.0.json", "keywords": [ "blah" ] } } }, { "term": { "__acl_record_type": "elasticsearch" } } ] } } } Checking ACLs of type <class 'invenio_explicit_acls.acls.id_acls.IdACL'> Checking ACLs of type <class 'invenio_explicit_acls.acls.propertyvalue_acls.PropertyValueACL'> Will run percolate query on index invenio_explicit_acls-acl-v1.0.0-records-record-v1.0.0 and doc_type _doc: { "query": { "bool": { "must": [ { "percolate": { "field": "__acl_record_selector", "document": { "$schema": "records/record-v1.0.0.json", "keywords": [ "blah" ] } } }, { "term": { "__acl_record_type": "propertyvalue" } } ] } } } The record is not matched by any ACLs """) with db.session.begin_nested(): acl = ElasticsearchACL(name='test', schemas=[RECORD_SCHEMA], priority=0, operation='get', originator=test_users.u1, record_selector={'term': { 'keywords': 'blah' }}) db.session.add(acl) u = UserActor(name='test', acl=acl, originator=test_users.u1, users=[test_users.u1]) db.session.add(u) acl.update() current_search_client.indices.refresh() current_search_client.indices.flush() check_explain(capsys, {}, """Please add $schema to record metadata""") check_explain(capsys, {'$schema': RECORD_SCHEMA, 'keywords': ['blah']}, """ Possible ACLs ElasticsearchACL "test" (%(acl_id)s) on schemas ['records/record-v1.0.0.json'] actors = ['UserActor[test]'] created = 2019-05-10 11:27:26.013084 id = ebb91d27-70fd-4077-b6e1-6aadd7fa0bcd name = test operation = get originator = User <id=1, [email protected]> originator_id = 1 priority_group = default record_selector = {'term': {'keywords': 'blah'}} schemas = ['records/record-v1.0.0.json'] type = elasticsearch updated = 2019-05-10 11:27:26.013093 Checking ACLs of type <class 'invenio_explicit_acls.acls.default_acls.DefaultACL'> Checking ACLs of type <class 'invenio_explicit_acls.acls.elasticsearch_acls.ElasticsearchACL'> Will run percolate query on index invenio_explicit_acls-acl-v1.0.0-records-record-v1.0.0 and doc_type _doc: { "query": { "bool": { "must": [ { "percolate": { "field": "__acl_record_selector", "document": { "$schema": "records/record-v1.0.0.json", "keywords": [ "blah" ] } } }, { "term": { "__acl_record_type": "elasticsearch" } } ] } } } found match: "test" (%(acl_id)s) on schemas ['records/record-v1.0.0.json'] with priority of 0 UserActor[test] Checking ACLs of type <class 'invenio_explicit_acls.acls.id_acls.IdACL'> Checking ACLs of type <class 'invenio_explicit_acls.acls.propertyvalue_acls.PropertyValueACL'> Will run percolate query on index invenio_explicit_acls-acl-v1.0.0-records-record-v1.0.0 and doc_type _doc: { "query": { "bool": { "must": [ { "percolate": { "field": "__acl_record_selector", "document": { "$schema": "records/record-v1.0.0.json", "keywords": [ "blah" ] } } }, { "term": { "__acl_record_type": "propertyvalue" } } ] } } } Of these, the following ACLs will be used (have the highest priority): "test" (%(acl_id)s) on schemas ['records/record-v1.0.0.json'] UserActor[test] The ACLs will get serialized to the following element { "_invenio_explicit_acls": [ { "operation": "get", "id": "%(acl_id)s", "timestamp": "2019-05-10T10:24:21.812875+00:00", "user": [ 1 ] } ] } """.strip() % {'acl_id': str(acl.id)}) check_explain(capsys, {'$schema': 'https://localhost/schemas/' + RECORD_SCHEMA, 'keywords': ['blah']}, """ Possible ACLs ElasticsearchACL "test" (%(acl_id)s) on schemas ['records/record-v1.0.0.json'] actors = ['UserActor[test]'] created = 2019-05-10 11:27:26.013084 id = ebb91d27-70fd-4077-b6e1-6aadd7fa0bcd name = test operation = get originator = User <id=1, [email protected]> originator_id = 1 priority_group = default record_selector = {'term': {'keywords': 'blah'}} schemas = ['records/record-v1.0.0.json'] type = elasticsearch updated = 2019-05-10 11:27:26.013093 Checking ACLs of type <class 'invenio_explicit_acls.acls.default_acls.DefaultACL'> Checking ACLs of type <class 'invenio_explicit_acls.acls.elasticsearch_acls.ElasticsearchACL'> Will run percolate query on index invenio_explicit_acls-acl-v1.0.0-records-record-v1.0.0 and doc_type _doc: { "query": { "bool": { "must": [ { "percolate": { "field": "__acl_record_selector", "document": { "$schema": "https://localhost/schemas/records/record-v1.0.0.json", "keywords": [ "blah" ] } } }, { "term": { "__acl_record_type": "elasticsearch" } } ] } } } found match: "test" (%(acl_id)s) on schemas ['records/record-v1.0.0.json'] with priority of 0 UserActor[test] Checking ACLs of type <class 'invenio_explicit_acls.acls.id_acls.IdACL'> Checking ACLs of type <class 'invenio_explicit_acls.acls.propertyvalue_acls.PropertyValueACL'> Will run percolate query on index invenio_explicit_acls-acl-v1.0.0-records-record-v1.0.0 and doc_type _doc: { "query": { "bool": { "must": [ { "percolate": { "field": "__acl_record_selector", "document": { "$schema": "https://localhost/schemas/records/record-v1.0.0.json", "keywords": [ "blah" ] } } }, { "term": { "__acl_record_type": "propertyvalue" } } ] } } } Of these, the following ACLs will be used (have the highest priority): "test" (%(acl_id)s) on schemas ['records/record-v1.0.0.json'] UserActor[test] The ACLs will get serialized to the following element { "_invenio_explicit_acls": [ { "operation": "get", "id": "%(acl_id)s", "timestamp": "2019-05-10T10:24:21.812875+00:00", "user": [ 1 ] } ] } """.strip() % {'acl_id': str(acl.id)}) check_explain(capsys, {'$schema': RECORD_SCHEMA, 'keywords': ['aaa']}, """ Possible ACLs ElasticsearchACL "test" (%(acl_id)s) on schemas ['records/record-v1.0.0.json'] actors = ['UserActor[test]'] name = test operation = get originator = User <id=1, [email protected]> priority_group = default record_selector = {'term': {'keywords': 'blah'}} schemas = ['records/record-v1.0.0.json'] type = elasticsearch Checking ACLs of type <class 'invenio_explicit_acls.acls.default_acls.DefaultACL'> Checking ACLs of type <class 'invenio_explicit_acls.acls.elasticsearch_acls.ElasticsearchACL'> Will run percolate query on index invenio_explicit_acls-acl-v1.0.0-records-record-v1.0.0 and doc_type _doc: { "query": { "bool": { "must": [ { "percolate": { "field": "__acl_record_selector", "document": { "$schema": "records/record-v1.0.0.json", "keywords": [ "aaa" ] } } }, { "term": { "__acl_record_type": "elasticsearch" } } ] } } } Checking ACLs of type <class 'invenio_explicit_acls.acls.id_acls.IdACL'> Checking ACLs of type <class 'invenio_explicit_acls.acls.propertyvalue_acls.PropertyValueACL'> Will run percolate query on index invenio_explicit_acls-acl-v1.0.0-records-record-v1.0.0 and doc_type _doc: { "query": { "bool": { "must": [ { "percolate": { "field": "__acl_record_selector", "document": { "$schema": "records/record-v1.0.0.json", "keywords": [ "aaa" ] } } }, { "term": { "__acl_record_type": "propertyvalue" } } ] } } } The record is not matched by any ACLs """ % {'acl_id': str(acl.id)}) with pytest.raises(RuntimeError, match='Explicit ACLs were not prepared for the given schema. Please run invenio explicit-acls prepare http://bla'): check_explain(capsys, {'$schema': 'http://blah', 'keywords': ['aaa']}, """""")