Python DocTypeの例

プログラミング言語: Python

名前空間/パッケージ名: elasticsearch_dsl

クラス/型: DocType

hotexamples.comのコード掲載数: 2

Python DocType - 2件のコード例が見つかりました。すべてオープンソースプロジェクトから抽出されたPythonのelasticsearch_dsl.DocTypeの実例で、最も評価が高いものを厳選しています。コード例の評価を行っていただくことで、より質の高いコード例が表示されるようになります。

よく使われるメソッド

表示非表示

DocType(2)

body(2)

save(2)

よく使われるメソッド

DocType (2)

body (2)

save (2)

コード例 #1

ファイルを表示

ファイル: test_elasticsearch_stages.py プロジェクト: streamsets/datacollector-tests-external

def test_offset_upgrade(sdc_builder, sdc_executor, elasticsearch):
    """Ensure that when upgrading from older offset format (that can be generated by either SCH or by upgrading
       pre-multithreaded pipeline) we properly upgrade the offset and the pipeline will not re-read everything
       from the source.
    """
    es_index = get_random_string(string.ascii_letters, 10).lower()
    es_doc_id = get_random_string(string.ascii_letters, 10)
    raw_str = 'Hello World!'

    builder = sdc_builder.get_pipeline_builder()
    es_origin = builder.add_stage('Elasticsearch', type='origin')
    es_origin.set_attributes(index=es_index,
                             query="{'query': {'match_all': {}}}")
    trash = builder.add_stage('Trash')

    es_origin >> trash
    pipeline = builder.build().configure_for_environment(elasticsearch)
    sdc_executor.add_pipeline(pipeline)

    # We hard code offset to be pre-migration to multi-threaded origin and thus forcing the origin to upgrade it
    offset = {
        'offsets': {
            '$com.streamsets.datacollector.pollsource.offset$': None,
        },
        'version': 2
    }
    sdc_executor.api_client.update_pipeline_committed_offsets(pipeline.id,
                                                              body=offset)

    try:
        # Put data to Elasticsearch
        elasticsearch.connect()
        doc_type = DocType(meta={'id': es_doc_id, 'index': es_index})
        doc_type.body = raw_str
        doc_type.save()  # save document to Elasticsearch
        index = Index(es_index)
        assert index.refresh(
        )  # assert to refresh index, making all operations available for search

        # Run pipeline and assert
        snapshot = sdc_executor.capture_snapshot(pipeline,
                                                 start_pipeline=True).snapshot
        # no need to stop pipeline - as ES origin shuts off once data is read from Elasticsearch
        snapshot_data = snapshot[es_origin.instance_name].output[0].field
        # assert ES meta
        assert snapshot_data['_index'] == es_index and snapshot_data[
            '_id'] == es_doc_id
        # assert ES data
        assert snapshot_data['_source']['body'] == raw_str

        # Now let's validate that the offset doesn't have the poll key any more
        offset = sdc_executor.api_client.get_pipeline_committed_offsets(
            pipeline.id).response.json()
        assert offset is not None
        assert '$com.streamsets.datacollector.pollsource.offset$' not in offset[
            'offsets']
    finally:
        # Clean up test data in ES
        idx = Index(es_index)
        idx.delete()

コード例 #2

ファイルを表示

ファイル: test_elasticsearch_stages.py プロジェクト: gridl/datacollector-tests

def test_elasticsearch_origin(sdc_builder, sdc_executor, elasticsearch):
    """Test for Elasticsearch origin stage. We do so by putting data via Elastisearch client and reading via
    Elastisearch origin pipeline. To assert, we will snapshot the pipeline.
    The pipeline looks like:

    Elasticsearch origin pipeline:
        es_origin >> trash
    """
    es_index = get_random_string(
        string.ascii_letters,
        10).lower()  # Elasticsearch indexes must be lower case
    es_doc_id = get_random_string(string.ascii_letters, 10)
    raw_str = 'Hello World!'

    builder = sdc_builder.get_pipeline_builder()
    es_origin = builder.add_stage('Elasticsearch', type='origin')
    es_origin.set_attributes(index=es_index,
                             query="{'query': {'match_all': {}}}")
    trash = builder.add_stage('Trash')

    es_origin >> trash
    es_origin_pipeline = builder.build(
        title='ES origin pipeline').configure_for_environment(elasticsearch)
    sdc_executor.add_pipeline(es_origin_pipeline)

    try:
        # Put data to Elasticsearch
        elasticsearch.connect()
        doc_type = DocType(meta={'id': es_doc_id, 'index': es_index})
        doc_type.body = raw_str
        doc_type.save()  # save document to Elasticsearch
        index = Index(es_index)
        assert index.refresh(
        )  # assert to refresh index, making all operations available for search

        # Run pipeline and assert
        snapshot = sdc_executor.capture_snapshot(es_origin_pipeline,
                                                 start_pipeline=True).snapshot
        # no need to stop pipeline - as ES origin shuts off once data is read from Elasticsearch
        snapshot_data = snapshot[
            es_origin.instance_name].output[0].value['value']
        # assert ES meta
        assert snapshot_data['_index']['value'] == es_index and snapshot_data[
            '_id']['value'] == es_doc_id
        # assert ES data
        assert snapshot_data['_source']['value']['body']['value'] == raw_str
    finally:
        # Clean up test data in ES
        idx = Index(es_index)
        idx.delete()