Python Dataset.from_elasticsearch Examples

Programming Language: Python

Namespace/Package Name: biome.text

Class/Type: Dataset

Method/Function: from_elasticsearch

Examples at hotexamples.com: 2

Python Dataset.from_elasticsearch - 2 examples found. These are the top rated real world Python examples of biome.text.Dataset.from_elasticsearch extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

from_dict(16)

from_csv(9)

from_json(4)

from_pandas(4)

from_elasticsearch(2)

from_datasets(1)

load_dataset(1)

load_from_disk(1)

Example #1

Show file

def test_from_elasticsearch(dataset, default_pipeline_config):
    pipeline = Pipeline.from_config(default_pipeline_config)
    es_index = explore.create(pipeline,
                              dataset,
                              explore_id="test_index",
                              show_explore=False)
    es_client = Elasticsearch()
    __wait_for_index_creation__(es_client, es_index)
    ds = Dataset.from_elasticsearch(es_client,
                                    index=es_index,
                                    query={"query": {
                                        "match_all": {}
                                    }})

    assert len(ds) == len(dataset)
    for key in ["_id", "_index", "_type", "_score"]:
        assert key in ds.column_names

    ds = Dataset.from_elasticsearch(
        es_client,
        index=es_index,
        query={"query": {
            "exists": {
                "field": "not_found.field"
            }
        }},
    )
    assert len(ds) == 0

    ds = Dataset.from_elasticsearch(es_client,
                                    index=es_index,
                                    fields=["label", "text", "_id"])
    assert len(ds) == len(dataset)
    assert "label" in ds.column_names
    assert "text" in ds.column_names
    assert "_id" in ds.column_names
    assert "prediction" not in ds.column_names

Example #2

Show file

def dataset_from_path(path: str) -> Dataset:
    file_extension = Path(path).suffix
    if file_extension in [".csv"]:
        return Dataset.from_csv(path)
    elif file_extension in [".json", ".jsonl"]:
        return Dataset.from_json(path)
    # yaml files are used for elasticsearch data
    elif file_extension in [".yaml", ".yml"]:
        from_es_kwargs = yaml_to_dict(path)
        client = Elasticsearch(**from_es_kwargs["client"])
        return Dataset.from_elasticsearch(
            client=client,
            index=from_es_kwargs["index"],
            query=from_es_kwargs.get("query"),
        )
    else:
        raise ValueError(f"Could not create a Dataset from '{path}'")