Esempio n. 1
0
def test_reindex(app, script_info):
    """Test reindex."""
    # load records
    with app.test_request_context():
        runner = CliRunner()
        rec_uuid = uuid.uuid4()
        data = {'title': 'Test0'}
        record = Record.create(data, id_=rec_uuid)
        db.session.commit()

        # Initialize queue
        res = runner.invoke(cli.queue, ['init', 'purge'],
                            obj=script_info)
        assert 0 == res.exit_code

        res = runner.invoke(cli.reindex, ['--yes-i-know'], obj=script_info)
        assert 0 == res.exit_code
        res = runner.invoke(cli.run, [], obj=script_info)
        assert 0 == res.exit_code

        sleep(5)
        indexer = RecordIndexer()
        index, doc_type = indexer.record_to_index(record)
        res = current_search_client.get(index=index, doc_type=doc_type,
                                        id=rec_uuid)
        assert res['found']

        # Destroy queue
        res = runner.invoke(cli.queue, ['delete'],
                            obj=script_info)
        assert 0 == res.exit_code
Esempio n. 2
0
def test_reindex(app, script_info):
    """Test reindex."""
    # load records
    with app.test_request_context():
        runner = CliRunner()
        rec_uuid = uuid.uuid4()
        data = {'title': 'Test0'}
        record = Record.create(data, id_=rec_uuid)
        db.session.commit()

        # Initialize queue
        res = runner.invoke(cli.queue, ['init', 'purge'], obj=script_info)
        assert 0 == res.exit_code

        res = runner.invoke(cli.reindex, ['--yes-i-know'], obj=script_info)
        assert 0 == res.exit_code
        res = runner.invoke(cli.run, [], obj=script_info)
        assert 0 == res.exit_code

        sleep(5)
        indexer = RecordIndexer()
        index, doc_type = indexer.record_to_index(record)
        res = current_search_client.get(index=index,
                                        doc_type=doc_type,
                                        id=rec_uuid)
        assert res['found']

        # Destroy queue
        res = runner.invoke(cli.queue, ['delete'], obj=script_info)
        assert 0 == res.exit_code
Esempio n. 3
0
def mef_person_update_index(sender, *args, **kwargs):
    """Index MEF person in ES."""
    record = kwargs['record']
    if 'documents' in record.get('$schema', ''):
        authors = record.get('authors', [])
        for author in authors:
            mef_url = author.get('$ref')
            if mef_url:
                mef_url = mef_url.replace(
                    'mef.rero.ch', current_app.config['RERO_ILS_MEF_HOST'])
                request = requests_get(url=mef_url,
                                       params=dict(resolve=1, sources=1))
                if request.status_code == requests_codes.ok:
                    data = request.json()
                    id = data['id']
                    data = data.get('metadata')
                    if data:
                        data['id'] = id
                        data['$schema'] = current_jsonschemas.path_to_url(
                            current_app.config['RERO_ILS_PERSONS_MEF_SCHEMA'])
                        indexer = RecordIndexer()
                        index, doc_type = indexer.record_to_index(data)
                        indexer.client.index(
                            id=id,
                            index=index,
                            doc_type=doc_type,
                            body=data,
                        )
                        current_search.flush_and_refresh(index)
                else:
                    current_app.logger.error(
                        'Mef resolver request error: {stat} {url}'.format(
                            stat=request.status_code, url=mef_url))
                    raise Exception('unable to resolve')
Esempio n. 4
0
def mef_person_delete(sender, *args, **kwargs):
    """Delete signal."""
    record = kwargs['record']
    if 'documents' in record.get('$schema', ''):
        authors = record.get('authors', [])
        for author in authors:
            mef_url = author.get('$ref')
            if mef_url:
                mef_url = mef_url.replace(
                    'mef.rero.ch', current_app.config['RERO_ILS_MEF_HOST'])
                request = requests_get(url=mef_url,
                                       params=dict(resolve=1, sources=1))
                if request.status_code == requests_codes.ok:
                    data = request.json()
                    id = data['id']
                    data = data.get('metadata')
                    if data:
                        search = DocumentsSearch()
                        count = search.filter(
                            'match', authors__pid=id).execute().hits.total
                        if count == 1:
                            indexer = RecordIndexer()
                            index, doc_type = indexer.record_to_index(data)
                            indexer.client.delete(id=id,
                                                  index=index,
                                                  doc_type=doc_type)
                            current_search.flush_and_refresh(index)
                else:
                    current_app.logger.error(
                        'Mef resolver request error: {result} {url}'.format(
                            result=request.status_code, url=mef_url))
                    raise Exception('unable to resolve')
def test_before_deposit_index_hook_sets_files(create_record, db, es):
    deposit = create_record(published=False)
    # Reproduce file upload: add file to bucket associated with deposit
    bucket = Bucket.get(deposit['_buckets']['deposit'])
    obj = ObjectVersion.create(bucket, 'foo.txt')
    stream = BytesIO(b'Hello world!')
    obj.set_contents(stream,
                     size=len(stream.getvalue()),
                     size_limit=bucket.size_limit)
    db.session.commit()
    indexer = RecordIndexer()

    indexer.index(deposit)

    # Get the raw indexed document
    index, doc_type = indexer.record_to_index(deposit)
    es_deposit = es.get(index=index, doc_type=doc_type, id=deposit.id)
    assert '_files' in es_deposit['_source']
    assert es_deposit['_source']['_files'][0]['type'] == 'txt'
Esempio n. 6
0
 def reindex_pid(pid_type, RecordClass):
     index_name = None
     indexer = RecordIndexer()
     for pid in tqdm.tqdm(PersistentIdentifier.query.filter_by(
             pid_type=pid_type, object_type='rec', status=PIDStatus.REGISTERED.value)):
         record = RecordClass.get_record(pid.object_uuid)
         if only and str(record.id) != only:
             continue
         try:
             index_name, doc_type = indexer.record_to_index(record)
             index_name = build_alias_name(index_name)
             # print('Indexing', record.get('id'), 'into', index_name)
             indexer.index(record)
         except:
             with open('/tmp/indexing-error.json', 'a') as f:
                 print(json.dumps(record.dumps(), indent=4, ensure_ascii=False), file=f)
                 traceback.print_exc(file=f)
             if raise_on_error:
                 raise
     if index_name:
         current_search_client.indices.refresh(index_name)
         current_search_client.indices.flush(index_name)
Esempio n. 7
0
def reindex_pid(pid_type,
                RecordClass,
                only: bool = False,
                raise_on_error=None):
    index_name = None
    indexer = RecordIndexer()
    pids = PersistentIdentifier.query.filter_by(
        pid_type=pid_type,
        object_type='rec',
        status=PIDStatus.REGISTERED.value).all()
    for pid in tqdm(pids):
        try:
            record = RecordClass.get_record(pid.object_uuid)
        except NoResultFound:
            continue
        keywords = record.get("keywords")
        if keywords:
            if keywords == "Keywords must be fixed in draft mode":
                del record["keywords"]
        if only and str(record.id) != only:
            continue
        try:
            index_name, doc_type = indexer.record_to_index(record)
            index_name = build_alias_name(index_name)
            # print('Indexing', record.get('id'), 'into', index_name)
            indexer.index(record)
        except:
            with open('/tmp/indexing-error.json', 'a') as f:
                print(json.dumps(record.dumps(), indent=4, ensure_ascii=False),
                      file=f)
                traceback.print_exc(file=f)
            if raise_on_error:
                raise
    if index_name:
        current_search_client.indices.refresh(index_name)
        current_search_client.indices.flush(index_name)
Esempio n. 8
0
def test_reindex(app, script_info):
    """Test reindex."""
    # load records
    with app.test_request_context():
        runner = CliRunner()

        id1 = uuid.uuid4()
        id2 = uuid.uuid4()
        record1 = Record.create(dict(title='Test 1', recid=1), id_=id1)
        record2 = Record.create(dict(title='Test 2', recid=2), id_=id2)
        PersistentIdentifier.create(
            pid_type='recid',
            pid_value=1,
            object_type='rec',
            object_uuid=id1,
            status=PIDStatus.REGISTERED,
        )
        PersistentIdentifier.create(
            pid_type='recid',
            pid_value=2,
            object_type='rec',
            object_uuid=id2,
            status=PIDStatus.REGISTERED,
        )
        db.session.commit()
        indexer = RecordIndexer()
        index, doc_type = indexer.record_to_index(record1)

        # Make sure the index doesn't exist at the beginning (it was not
        # preserved by accident from some other tests)
        assert current_search_client.indices.exists(index) is False

        # Initialize queue
        res = runner.invoke(cli.queue, ['init', 'purge'], obj=script_info)
        assert 0 == res.exit_code

        res = runner.invoke(cli.reindex, ['--yes-i-know', '-t', 'recid'],
                            obj=script_info)
        assert 0 == res.exit_code
        res = runner.invoke(cli.run, [], obj=script_info)
        assert 0 == res.exit_code
        current_search.flush_and_refresh(index)

        # Both records should be indexed
        res = current_search_client.search(index=index)
        assert res['hits']['total'] == 2

        # Delete one of the records
        record2 = Record.get_record(id2)
        record2.delete()
        db.session.commit()
        # Destroy the index and reindex
        list(current_search.delete(ignore=[404]))
        res = runner.invoke(cli.reindex, ['--yes-i-know', '-t', 'recid'],
                            obj=script_info)
        assert 0 == res.exit_code
        res = runner.invoke(cli.run, [], obj=script_info)
        assert 0 == res.exit_code
        current_search.flush_and_refresh(index)

        # Check that the deleted record is not indexed
        res = current_search_client.search(index=index)
        assert res['hits']['total'] == 1
        assert res['hits']['hits'][0]['_source']['title'] == 'Test 1'

        # Destroy queue and the index
        res = runner.invoke(cli.queue, ['delete'], obj=script_info)
        assert 0 == res.exit_code
        list(current_search.delete(ignore=[404]))