Example #1
0
def test_sync_app_uses_latest_data(monkeypatch, setup_es):
    """Test that sync_app() picks up updates made to records between batches."""
    CompanyFactory.create_batch(2, name='old name')

    def sync_objects_side_effect(*args, **kwargs):
        nonlocal mock_sync_objects

        ret = sync_objects(*args, **kwargs)

        if mock_sync_objects.call_count == 1:
            Company.objects.update(name='new name')

        return ret

    mock_sync_objects = Mock(side_effect=sync_objects_side_effect)
    monkeypatch.setattr('datahub.search.bulk_sync.sync_objects', mock_sync_objects)
    sync_app(CompanySearchApp, batch_size=1)

    setup_es.indices.refresh()

    company = mock_sync_objects.call_args_list[1][0][1][0]
    fetched_company = setup_es.get(
        index=CompanySearchApp.es_model.get_read_alias(),
        doc_type=CompanySearchApp.name,
        id=company.pk,
    )
    assert fetched_company['_source']['name'] == 'new name'
Example #2
0
def test_sync_app_logic(monkeypatch):
    """Tests syncing an app to Elasticsearch during a mapping migration."""
    bulk_mock = Mock()
    monkeypatch.setattr('datahub.search.bulk_sync.bulk', bulk_mock)
    search_app = create_mock_search_app(
        current_mapping_hash='mapping-hash',
        target_mapping_hash='mapping-hash',
        read_indices=('index1', 'index2'),
        write_index='index1',
        queryset=MockQuerySet([Mock(id=1), Mock(id=2)]),
    )
    sync_app(search_app, batch_size=1000)
    assert bulk_mock.call_args_list[0][1]['actions'] == [
        {
            '_index': 'index1',
            '_id': 1,
            '_type': 'test-type',
        },
        {
            '_index': 'index1',
            '_id': 2,
            '_type': 'test-type',
        },
    ]
    assert bulk_mock.call_count == 1
Example #3
0
def sync_model(search_app_name):
    """
    Task that syncs a single model to Elasticsearch.

    acks_late is set to True so that the task restarts if interrupted.

    priority is set to the lowest priority (for Redis, 0 is the highest priority).
    """
    search_app = get_search_app(search_app_name)
    sync_app(search_app)
Example #4
0
def test_sync_app_with_overridden_batch_size(monkeypatch):
    """Tests syncing an app to OpenSearch with an overridden batch size."""
    bulk_mock = Mock()
    monkeypatch.setattr('datahub.search.bulk_sync.bulk', bulk_mock)

    search_app = create_mock_search_app(queryset=MockQuerySet(
        [Mock(id=1), Mock(id=2)]), )
    sync_app(search_app, batch_size=1)

    assert bulk_mock.call_count == 2
Example #5
0
def test_sync_app_with_default_batch_size(monkeypatch):
    """Tests syncing an app to Elasticsearch with the default batch size."""
    bulk_mock = Mock()
    monkeypatch.setattr('datahub.search.bulk_sync.bulk', bulk_mock)

    search_app = create_mock_search_app(queryset=MockQuerySet(
        [Mock(id=1), Mock(id=2)]), )
    sync_app(search_app)

    assert bulk_mock.call_count == 1
Example #6
0
def resync_after_migrate(search_app):
    """
    Completes a migration by performing a full resync, updating aliases and removing old indices.
    """
    if not search_app.search_model.was_migration_started():
        logger.warning(
            f'No pending migration detected for the {search_app.name} search app, aborting '
            f'resync...', )
        return

    sync_app(search_app,
             post_batch_callback=delete_from_secondary_indices_callback)
    _clean_up_aliases_and_indices(search_app)
Example #7
0
def sync_es(batch_size, search_apps):
    """Sends data to Elasticsearch."""
    for app in search_apps:
        sync_app(app, batch_size=batch_size)

    logger.info('Elasticsearch sync complete!')