Esempio n. 1
0
def test_propertyvalue_acl_delete(app, db, es, es_acl_prepare, test_users):
    # should pass as it does nothing
    with db.session.begin_nested():
        acl = PropertyValueACL(name='test',
                               schemas=[RECORD_SCHEMA],
                               priority=0,
                               operation='get',
                               originator=test_users.u1)
        propval = PropertyValue(name='keywords',
                                value='test',
                                acl=acl,
                                originator=test_users.u1)

        db.session.add(acl)
        db.session.add(propval)

    acl.update()
    idx = acl.get_acl_index_name(schema_to_index(RECORD_SCHEMA)[0])
    acl_md = current_search_client.get(
        index=idx,
        doc_type=current_app.config['INVENIO_EXPLICIT_ACLS_DOCTYPE_NAME'],
        id=acl.id)
    # ES7 returns extra:
    acl_md.pop('_seq_no', None)
    acl_md.pop('_primary_term', None)
    assert acl_md == {
        '_id': acl.id,
        '_index': 'invenio_explicit_acls-acl-v1.0.0-records-record-v1.0.0',
        '_source': {
            '__acl_record_selector': {
                'bool': {
                    'must': [{
                        'term': {
                            'keywords': 'test'
                        }
                    }]
                }
            },
            '__acl_record_type': 'propertyvalue'
        },
        '_type': '_doc',
        '_version': 1,
        'found': True
    }
    acl.delete()
    with pytest.raises(elasticsearch.exceptions.NotFoundError):
        current_search_client.get(
            index=idx,
            doc_type=current_app.config['INVENIO_EXPLICIT_ACLS_DOCTYPE_NAME'],
            id=acl.id)
Esempio n. 2
0
def test_reindex(app, script_info):
    """Test reindex."""
    # load records
    with app.test_request_context():
        runner = CliRunner()
        rec_uuid = uuid.uuid4()
        data = {'title': 'Test0'}
        record = Record.create(data, id_=rec_uuid)
        db.session.commit()

        # Initialize queue
        res = runner.invoke(cli.queue, ['init', 'purge'],
                            obj=script_info)
        assert 0 == res.exit_code

        res = runner.invoke(cli.reindex, ['--yes-i-know'], obj=script_info)
        assert 0 == res.exit_code
        res = runner.invoke(cli.run, [], obj=script_info)
        assert 0 == res.exit_code

        sleep(5)
        indexer = RecordIndexer()
        index, doc_type = indexer.record_to_index(record)
        res = current_search_client.get(index=index, doc_type=doc_type,
                                        id=rec_uuid)
        assert res['found']

        # Destroy queue
        res = runner.invoke(cli.queue, ['delete'],
                            obj=script_info)
        assert 0 == res.exit_code
def test_draft_indexing(app, db, es, example_draft, indexer):
    """Test indexing of a draft."""
    # Index document in ES
    assert indexer.index(example_draft)['result'] == 'created'
    # Retrieve document from ES
    data = current_search_client.get('draftsresources-drafts-draft-v1.0.0',
                                     id=example_draft.id,
                                     doc_type='_doc')

    # Loads the ES data and compare
    draft = Draft.loads(data['_source'])

    assert draft == example_draft
    assert draft.id == example_draft.id
    assert draft.revision_id == example_draft.revision_id
    assert draft.created == example_draft.created
    assert draft.updated == example_draft.updated
    assert draft.expires_at == example_draft.expires_at
    assert draft.parent == example_draft.parent
    assert draft.versions.is_latest_draft == \
        example_draft.versions.is_latest_draft
    assert draft.versions.index == \
        example_draft.versions.index
    # Check system fields
    assert draft.metadata == example_draft['metadata']
Esempio n. 4
0
def test_reindex(app, script_info):
    """Test reindex."""
    # load records
    with app.test_request_context():
        runner = CliRunner()
        rec_uuid = uuid.uuid4()
        data = {'title': 'Test0'}
        record = Record.create(data, id_=rec_uuid)
        db.session.commit()

        # Initialize queue
        res = runner.invoke(cli.queue, ['init', 'purge'], obj=script_info)
        assert 0 == res.exit_code

        res = runner.invoke(cli.reindex, ['--yes-i-know'], obj=script_info)
        assert 0 == res.exit_code
        res = runner.invoke(cli.run, [], obj=script_info)
        assert 0 == res.exit_code

        sleep(5)
        indexer = RecordIndexer()
        index, doc_type = indexer.record_to_index(record)
        res = current_search_client.get(index=index,
                                        doc_type=doc_type,
                                        id=rec_uuid)
        assert res['found']

        # Destroy queue
        res = runner.invoke(cli.queue, ['delete'], obj=script_info)
        assert 0 == res.exit_code
Esempio n. 5
0
def test_create_record_check_acl_priority(app, db, es, es_acl_prepare,
                                          test_users):
    with app.test_client() as client:
        with db.session.begin_nested():
            acl1 = DefaultACL(name='default',
                              schemas=[RECORD_SCHEMA],
                              priority=0,
                              originator=test_users.u1,
                              operation='get')
            actor1 = SystemRoleActor(name='auth',
                                     system_role='any_user',
                                     acl=acl1,
                                     originator=test_users.u1)

            acl2 = DefaultACL(name='default',
                              schemas=[RECORD_SCHEMA],
                              priority=1,
                              originator=test_users.u1,
                              operation='get')
            actor2 = SystemRoleActor(name='auth',
                                     system_role='authenticated_user',
                                     acl=acl2,
                                     originator=test_users.u1)

            db.session.add(acl1)
            db.session.add(actor1)
            db.session.add(acl2)
            db.session.add(actor2)

        login(client, test_users.u1)
        response = client.post(records_url(),
                               data=json.dumps({
                                   'title': 'blah',
                                   'contributors': []
                               }),
                               content_type='application/json')
        assert response.status_code == 201
        rest_metadata = get_json(response)['metadata']
        assert 'control_number' in rest_metadata

        index, doctype = schema_to_index(RECORD_SCHEMA)

        rec_md = current_search_client.get(
            index=index,
            doc_type=doctype,
            id=str(
                PersistentIdentifier.get(
                    'recid', rest_metadata['control_number']).object_uuid))

        clear_timestamp(rec_md)

        assert rec_md['_source']['_invenio_explicit_acls'] == [{
            'operation':
            'get',
            'id':
            acl2.id,
            'timestamp':
            'cleared',
            'system_role': ['authenticated_user']
        }]
Esempio n. 6
0
 def test_create_or_update_es(self, load_entry_points, app, db, record_xml,
                              index):
     synchronizer = current_oai_client.providers["uk"].synchronizers["xoai"]
     oai_sync = OAISync(provider_code="uk")
     synchronizer.oai_sync = oai_sync
     synchronizer.create_or_update_es(
         "oai:dspace.cuni.cz:20.500.11956/2623",
         xml=record_xml,
         index=index)
     es_record = current_search_client.get(
         id="oai:dspace.cuni.cz:20.500.11956/2623", index=index)
     assert es_record is not None
     synchronizer.create_or_update_es(
         "oai:dspace.cuni.cz:20.500.11956/2623", xml=record_xml)
     es_record_2 = current_search_client.get(
         id="oai:dspace.cuni.cz:20.500.11956/2623", index=index)
     assert es_record_2 is not None
     assert es_record == es_record_2
Esempio n. 7
0
def fetch_record(record_id, doc_type, index=None):
    """ Fetch a record from ES with a given id.

    :param record_id: [int]
    :param doc_type: [string] document type
    :param index: [string] name of the index. If None a default is used

    :return: [dict] Record if found, otherwise an error message
    """
    res = es.get(index=index, doc_type=doc_type, id=record_id)
    return res.get('_source', res)
Esempio n. 8
0
def fetch_record(record_id, doc_type, index=None):
    """ Fetch a record from ES with a given id.

    :param record_id: [int]
    :param doc_type: [string] document type
    :param index: [string] name of the index. If None a default is used

    :return: [dict] Record if found, otherwise an error message
    """
    res = es.get(index=index, doc_type=doc_type, id=record_id)
    return res.get("_source", res)
Esempio n. 9
0
def get_record(record_id, doc_type, index=None, parent=None):
    """ Fetch a given record from ES.
    Parent must be defined for fetching datatable records.

    :param record_id: [int] ES record id
    :param doc_type: [string] type of document. "publication" or "datatable"
    :param index: [string] name of the index. If None a default is used
    :param parent: [int] record id of the potential parent

    :return: [dict] Fetched record
    """
    try:
        if doc_type == CFG_DATA_TYPE and parent:
            result = es.get(index=index, doc_type=doc_type, id=record_id, parent=parent)
        else:
            result = es.get(index=index, doc_type=doc_type, id=record_id)

        return result.get("_source", result)
    except (NotFoundError, RequestError):
        return None
Esempio n. 10
0
    def _es_item(cls, record):
        """Get the item from the corresponding index.

        :param record: an item object
        :returns: the elasticsearch document or {}
        """
        try:
            es_item = current_search_client.get(ItemsSearch.Meta.index,
                                                record.id)
            return es_item['_source']
        except NotFoundError:
            return {}
Esempio n. 11
0
def get_record(record_id, doc_type, index=None, parent=None):
    """ Fetch a given record from ES.
    Parent must be defined for fetching datatable records.

    :param record_id: [int] ES record id
    :param doc_type: [string] type of document. "publication" or "datatable"
    :param index: [string] name of the index. If None a default is used
    :param parent: [int] record id of the potential parent

    :return: [dict] Fetched record
    """
    try:
        if doc_type == CFG_DATA_TYPE and parent:
            result = es.get(index=index,
                            doc_type=doc_type,
                            id=record_id,
                            parent=parent)
        else:
            result = es.get(index=index, doc_type=doc_type, id=record_id)

        return result.get('_source', result)
    except (NotFoundError, RequestError):
        return None
Esempio n. 12
0
def test_aclserializer(app, db, es, es_acl_prepare, test_users):
    with db.session.begin_nested():
        acl1 = DefaultACL(name='default',
                          schemas=[RECORD_SCHEMA],
                          priority=0,
                          originator=test_users.u1,
                          operation='get')
        actor1 = UserActor(name='auth',
                           users=[test_users.u1],
                           acl=acl1,
                           originator=test_users.u1)

        db.session.add(acl1)
        db.session.add(actor1)

    pid, rec = create_record({'title': 'blah'}, clz=SchemaEnforcingRecord)
    RecordIndexer().index(rec)
    current_search_client.indices.flush()

    assert current_jsonschemas.url_to_path(
        rec['$schema']) in current_explicit_acls.enabled_schemas
    assert list(DefaultACL.get_record_acls(rec)) != []

    index, doc_type = schema_to_index(RECORD_SCHEMA)
    data = current_search_client.get(index=index,
                                     doc_type=doc_type,
                                     id=str(pid.object_uuid))['_source']
    assert '_invenio_explicit_acls' in data
    assert len(data['_invenio_explicit_acls']) == 1

    with app.test_request_context():
        login_user(test_users.u1)
        set_identity(test_users.u1)

        acljson_serializer = ACLJSONSerializer(RecordSchemaV1,
                                               acl_rest_endpoint='recid',
                                               replace_refs=True)
        serialized = json.loads(acljson_serializer.serialize(pid, rec))
        assert serialized['invenio_explicit_acls'] == ["get"]

    with app.test_client() as client:
        login(client, test_users.u1)
        search_results = client.get(url_for('invenio_records_rest.recid_list'))
        search_results = get_json(search_results)
        hits = search_results['hits']['hits']
        assert len(hits) == 1
        assert hits[0]['invenio_explicit_acls'] == ["get"]
Esempio n. 13
0
def test_record_indexing(app, db, es, example_record, indexer):
    """Test indexing of a record."""
    # Index document in ES
    assert indexer.index(example_record)['result'] == 'created'

    # Retrieve document from ES
    data = current_search_client.get('draftsresources-drafts-draft-v1.0.0',
                                     id=example_record.id,
                                     doc_type='_doc')

    # Loads the ES data and compare
    record = Draft.loads(data['_source'])
    assert record == example_record
    assert record.id == example_record.id
    assert record.revision_id == example_record.revision_id
    assert record.created == example_record.created
    assert record.updated == example_record.updated
    assert record.expires_at == example_record.expires_at

    # Check system fields
    record.metadata == example_record['metadata']
Esempio n. 14
0
def test_record_indexing(app, db, es, example_record, indexer):
    """Test indexing of a record."""
    # Index document in ES
    assert indexer.index(example_record)["result"] == "created"

    # Retrieve document from ES
    data = current_search_client.get("vocabularies-vocabulary-v1.0.0",
                                     id=example_record.id,
                                     doc_type="_doc")

    # Loads the ES data and compare
    record = Vocabulary.loads(data["_source"])
    assert record == example_record
    assert record.id == example_record.id
    assert record.revision_id == example_record.revision_id
    assert record.created == example_record.created
    assert record.updated == example_record.updated
    assert record.vocabulary_type_id == example_record.vocabulary_type_id

    # Check system fields
    assert record.metadata == example_record["metadata"]
Esempio n. 15
0
def test_elasticsearch_acl_update(app, db, es, es_acl_prepare, test_users):
    # should pass as it does nothing
    with db.session.begin_nested():
        acl = ElasticsearchACL(name='test',
                               schemas=[RECORD_SCHEMA],
                               priority=0,
                               operation='get',
                               originator=test_users.u1,
                               record_selector={'term': {
                                   'keywords': 'test'
                               }})
        db.session.add(acl)
    acl.update()  # makes version 1
    acl.update()  # makes version 2
    idx = acl.get_acl_index_name(schema_to_index(RECORD_SCHEMA)[0])
    acl_md = current_search_client.get(
        index=idx,
        doc_type=current_app.config['INVENIO_EXPLICIT_ACLS_DOCTYPE_NAME'],
        id=acl.id)
    # ES7 returns extra:
    acl_md.pop('_seq_no', None)
    acl_md.pop('_primary_term', None)

    print(json.dumps(acl_md, indent=4))
    assert acl_md == {
        '_id': acl.id,
        '_index': 'invenio_explicit_acls-acl-v1.0.0-records-record-v1.0.0',
        '_source': {
            '__acl_record_selector': {
                'term': {
                    'keywords': 'test'
                }
            },
            '__acl_record_type': 'elasticsearch'
        },
        '_type': '_doc',
        '_version': 2,
        'found': True
    }
Esempio n. 16
0
def get_from_es(pid, schema='records/record-v1.0.0.json'):
    """Retrieves a record from elasticsearch."""
    index, doctype = schema_to_index(schema)
    return current_search_client.get(index=index,
                                     doc_type=doctype,
                                     id=pid.object_uuid)
Esempio n. 17
0
def test_used_in_records(app, db, es, es_acl_prepare, test_users):
    with db.session.begin_nested():
        acl1 = ElasticsearchACL(name='test',
                                schemas=[RECORD_SCHEMA],
                                priority=0,
                                operation='get',
                                originator=test_users.u1,
                                record_selector={'term': {
                                    'keywords': 'blah'
                                }})
        actor1 = SystemRoleActor(name='auth',
                                 acl=acl1,
                                 originator=test_users.u1,
                                 system_role='authenticated_user')
        db.session.add(acl1)
        db.session.add(actor1)

        acl2 = ElasticsearchACL(name='test',
                                schemas=[RECORD_SCHEMA],
                                priority=0,
                                operation='get',
                                originator=test_users.u1,
                                record_selector={'term': {
                                    'keywords': 'test'
                                }})
        actor2 = SystemRoleActor(name='noauth',
                                 acl=acl2,
                                 originator=test_users.u1,
                                 system_role='any_user')
        db.session.add(actor2)
        db.session.add(acl2)

    acl1.update()
    acl2.update()

    pid1, record1 = create_record(
        {
            '$schema': RECORD_SCHEMA,
            'keywords': ['blah']
        },
        clz=SchemaEnforcingRecord)
    pid2, record2 = create_record(
        {
            '$schema': RECORD_SCHEMA,
            'keywords': ['test']
        },
        clz=SchemaEnforcingRecord)

    ts1 = datetime.datetime.now(datetime.timezone.utc)
    time.sleep(0.1)
    RecordIndexer().index(record1)
    current_search_client.indices.refresh()
    current_search_client.indices.flush()

    time.sleep(1)
    ts2 = datetime.datetime.now(datetime.timezone.utc)
    time.sleep(0.1)
    RecordIndexer().index(record2)
    current_search_client.indices.refresh()
    current_search_client.indices.flush()

    time.sleep(1)
    ts3 = datetime.datetime.now(datetime.timezone.utc)

    # the records should have cached ACLs, let's check
    idx, doc_type = schema_to_index(RECORD_SCHEMA)
    assert clear_timestamp(
        current_search_client.get(
            index=idx, doc_type=doc_type,
            id=str(record1.id))['_source']['_invenio_explicit_acls']) == [{
                'operation':
                'get',
                'id':
                acl1.id,
                'timestamp':
                'cleared',
                'system_role': ['authenticated_user']
            }]

    assert clear_timestamp(
        current_search_client.get(
            index=idx, doc_type=doc_type,
            id=str(record2.id))['_source']['_invenio_explicit_acls']) == [{
                'operation':
                'get',
                'id':
                acl2.id,
                'timestamp':
                'cleared',
                'system_role': ['any_user']
            }]

    # there should be no resource for acl1 before ts1
    assert list(acl1.used_in_records(older_than_timestamp=ts1)) == []
    # one record before ts2 and ts3
    assert list(
        acl1.used_in_records(older_than_timestamp=ts2)) == [str(record1.id)]
    assert list(
        acl1.used_in_records(older_than_timestamp=ts3)) == [str(record1.id)]
    # and one record before now
    assert list(acl1.used_in_records()) == [str(record1.id)]

    # there should be no resource for acl2 before ts1 and ts2
    assert list(acl2.used_in_records(older_than_timestamp=ts1)) == []
    assert list(acl2.used_in_records(older_than_timestamp=ts2)) == []
    # one record before ts3
    assert list(
        acl2.used_in_records(older_than_timestamp=ts3)) == [str(record2.id)]
    # and one record before now
    assert list(acl2.used_in_records()) == [str(record2.id)]
Esempio n. 18
0
def test_basic_in_cluster_migration(in_cluster_app, testdata):
    """Test a basic in-cluster migration."""
    recipe_id = 'my_recipe'
    in_cluster_app.config['SEARCH_INDEX_PREFIX'] = 'new-'
    old_suffix = current_search.current_suffix
    current_search._current_suffix = '-new'
    records_mapping_file, mapping_filepath = tempfile.mkstemp()
    with open(current_search.mappings['authors-author-v1.0.0']) as old:
        data = json.load(old)
    data['mappings']['properties']['author_id']['type'] = 'text'
    with open(mapping_filepath, 'w') as new:
        new.write(json.dumps(data))

    assert_indices_exists(exists=[
        'old-records-record-v1.0.0{}'.format(old_suffix),
        'old-authors-author-v1.0.0{}'.format(old_suffix),
    ],
                          not_exists=[
                              'new-.invenio-index-migrator',
                              'new-records-record-v1.0.0-new',
                              'new-authors-author-v1.0.0-new',
                          ])

    runner = in_cluster_app.test_cli_runner()
    init_result = runner.invoke(init_migration, [recipe_id, '--yes-i-know'])

    assert_indices_exists(exists=[
        'old-records-record-v1.0.0{}'.format(old_suffix),
        'old-authors-author-v1.0.0{}'.format(old_suffix),
        'new-.invenio-index-migrator',
        'new-records-record-v1.0.0-new',
        'new-authors-author-v1.0.0-new',
    ],
                          not_exists=[
                              'new-records-record-v1.0.0',
                          ])

    # TODO: assert new indexes are created and no aliases
    migrate_result = runner.invoke(run_migration, [recipe_id])

    assert_indices_exists(exists=[
        'old-records-record-v1.0.0{}'.format(old_suffix),
        'old-authors-author-v1.0.0{}'.format(old_suffix),
        'new-.invenio-index-migrator',
        'new-records-record-v1.0.0-new',
        'new-authors-author-v1.0.0-new',
    ],
                          not_exists=[
                              'new-records-record-v1.0.0',
                          ])

    migrate_result = runner.invoke(run_migration, [recipe_id])

    recipe_doc = current_search_client.get(index='new-.invenio-index-migrator',
                                           id='my_recipe')
    assert recipe_doc['_source']['status'] == 'COMPLETED'

    rollover_result = runner.invoke(rollover_sync, [recipe_id])

    assert_indices_exists(exists=[
        'old-records-record-v1.0.0{}'.format(old_suffix),
        'old-authors-author-v1.0.0{}'.format(old_suffix),
        'new-.invenio-index-migrator',
        'new-records-record-v1.0.0-new',
        'new-authors-author-v1.0.0-new',
        'new-records-record-v1.0.0',
    ],
                          not_exists=[])

    os.close(records_mapping_file)
Esempio n. 19
0
def test_create_acl_after_record(app, db, es, es_acl_prepare, test_users):
    with app.test_client() as client:
        login(client, test_users.u1)
        response = client.post(records_url(),
                               data=json.dumps({
                                   'title': 'blah',
                                   'contributors': []
                               }),
                               content_type='application/json')
        assert response.status_code == 201
        rest_metadata = get_json(response)['metadata']
        assert 'control_number' in rest_metadata

        current_search_client.indices.refresh()
        current_search_client.indices.flush()

        with db.session.begin_nested():
            acl1 = DefaultACL(name='default',
                              schemas=[RECORD_SCHEMA],
                              priority=0,
                              originator=test_users.u1,
                              operation='get')
            actor1 = SystemRoleActor(name='auth',
                                     system_role='any_user',
                                     acl=acl1,
                                     originator=test_users.u1)
            db.session.add(acl1)
            db.session.add(actor1)

        # reindex all resources that might be affected by the ACL change
        current_explicit_acls.reindex_acl(acl1, delayed=False)

        index, doctype = schema_to_index(RECORD_SCHEMA)

        rec_md = current_search_client.get(
            index=index,
            doc_type=doctype,
            id=str(
                PersistentIdentifier.get(
                    'recid', rest_metadata['control_number']).object_uuid))

        clear_timestamp(rec_md)

        assert rec_md['_source']['_invenio_explicit_acls'] == [{
            'operation':
            'get',
            'id':
            acl1.id,
            'timestamp':
            'cleared',
            'system_role': ['any_user']
        }]

        # remove the ACL from the database
        with db.session.begin_nested():
            db.session.delete(acl1)

        # reindex records affected by the removal of ACL
        current_explicit_acls.reindex_acl_removed(acl1, delayed=False)

        # make sure all changes had time to propagate and test
        current_search_client.indices.refresh()
        current_search_client.indices.flush()

        rec_md = current_search_client.get(
            index=index,
            doc_type=doctype,
            id=str(
                PersistentIdentifier.get(
                    'recid', rest_metadata['control_number']).object_uuid))

        # there is no ACL in the database => no acls are defined nor enforced on the record
        print(json.dumps(rec_md, indent=4))
        assert '_invenio_explicit_acls' not in rec_md['_source']