Python PandasDatasource.get_batchの例

プログラミング言語: Python

名前空間/パッケージ名: great_expectations.datasource

クラス/型: PandasDatasource

メソッド/関数: get_batch

hotexamples.comのコード掲載数: 6

Python PandasDatasource.get_batch - 6件のコード例が見つかりました。すべてオープンソースプロジェクトから抽出されたPythonのgreat_expectations.datasource.PandasDatasource.get_batchの実例で、最も評価が高いものを厳選しています。コード例の評価を行っていただくことで、より質の高いコード例が表示されるようになります。

よく使われるメソッド

表示非表示

PandasDatasource(12)

get_batch(6)

build_configuration(3)

get_available_data_asset_names(3)

get_data_asset(3)

get_generator(3)

build_batch_kwargs(2)

_infer_default_options(1)

get_batch_kwargs_generator(1)

process_batch_parameters(1)

コード例 #1

ファイルを表示

ファイル: test_pandas_datasource.py プロジェクト: alfredo-f/great_expectations

def test_invalid_reader_pandas_datasource(tmp_path_factory):
    basepath = str(
        tmp_path_factory.mktemp("test_invalid_reader_pandas_datasource"))
    datasource = PandasDatasource(
        "mypandassource",
        batch_kwargs_generators={
            "subdir_reader": {
                "class_name": "SubdirReaderBatchKwargsGenerator",
                "base_directory": basepath,
            }
        },
    )

    with open(os.path.join(basepath, "idonotlooklikeacsvbutiam.notrecognized"),
              "w") as newfile:
        newfile.write("a,b\n1,2\n3,4\n")

    with pytest.raises(BatchKwargsError) as exc:
        datasource.get_batch(
            batch_kwargs={
                "path":
                os.path.join(basepath,
                             "idonotlooklikeacsvbutiam.notrecognized")
            })
        assert "Unable to determine reader for path" in exc.value.message

    with pytest.raises(BatchKwargsError) as exc:
        datasource.get_batch(
            batch_kwargs={
                "path":
                os.path.join(basepath,
                             "idonotlooklikeacsvbutiam.notrecognized"),
                "reader_method":
                "blarg",
            })
        assert "Unknown reader method: blarg" in exc.value.message

    batch = datasource.get_batch(
        batch_kwargs={
            "path":
            os.path.join(basepath, "idonotlooklikeacsvbutiam.notrecognized"),
            "reader_method":
            "read_csv",
            "reader_options": {
                "header": 0
            },
        })
    assert batch.data["a"][0] == 1

コード例 #2

ファイルを表示

ファイル: test_pandas_datasource.py プロジェクト: alfredo-f/great_expectations

def test_read_limit(test_folder_connection_path_csv):
    datasource = PandasDatasource(
        "PandasCSV",
        batch_kwargs_generators={
            "subdir_reader": {
                "class_name": "SubdirReaderBatchKwargsGenerator",
                "base_directory": test_folder_connection_path_csv,
            }
        },
    )

    batch_kwargs = PathBatchKwargs({
        "path":
        os.path.join(str(test_folder_connection_path_csv), "test.csv"),
        # "reader_options": {"sep": ",", "header": 0, "index_col": 0},
        "reader_options": {
            "sep": ","
        },
    })
    nested_update(batch_kwargs, datasource.process_batch_parameters(limit=1))

    batch = datasource.get_batch(batch_kwargs=batch_kwargs)
    assert isinstance(batch, Batch)
    dataset = batch.data
    assert (dataset["col_1"] == [1]).all()
    assert len(dataset) == 1

    # A datasource should always return an object with a typed batch_id
    assert isinstance(batch.batch_kwargs, PathBatchKwargs)
    assert isinstance(batch.batch_markers, BatchMarkers)

コード例 #3

ファイルを表示

ファイル: test_datasources.py プロジェクト: cuulee/great_expectations

def test_invalid_reader_pandas_datasource(tmp_path_factory):
    basepath = str(
        tmp_path_factory.mktemp("test_invalid_reader_pandas_datasource"))
    datasource = PandasDatasource('mypandassource', base_directory=basepath)

    with open(os.path.join(basepath, "idonotlooklikeacsvbutiam.notrecognized"),
              "w") as newfile:
        newfile.write("a,b\n1,2\n3,4\n")

    with pytest.raises(BatchKwargsError) as exc:
        datasource.get_batch("idonotlooklikeacsvbutiam.notrecognized",
                             expectation_suite_name="default",
                             batch_kwargs={
                                 "path":
                                 os.path.join(
                                     basepath,
                                     "idonotlooklikeacsvbutiam.notrecognized")
                             })
        assert "Unable to determine reader for path" in exc.message

    with pytest.raises(BatchKwargsError) as exc:
        datasource.get_batch("idonotlooklikeacsvbutiam.notrecognized",
                             expectation_suite_name="default",
                             batch_kwargs={
                                 "path":
                                 os.path.join(
                                     basepath,
                                     "idonotlooklikeacsvbutiam.notrecognized")
                             },
                             reader_method="blarg")
        assert "Unknown reader method: blarg" in exc.message

    dataset = datasource.get_batch(
        "idonotlooklikeacsvbutiam.notrecognized",
        expectation_suite_name="default",
        batch_kwargs={
            "path":
            os.path.join(basepath, "idonotlooklikeacsvbutiam.notrecognized")
        },
        reader_method="csv",
        header=0)
    assert dataset["a"][0] == 1

コード例 #4

ファイルを表示

ファイル: test_datasources.py プロジェクト: scarrucciu/great_expectations

def test_standalone_pandas_datasource(test_folder_connection_path):
    datasource = PandasDatasource('PandasCSV', base_directory=test_folder_connection_path)

    assert datasource.get_available_data_asset_names() == {"default": {"test"}}
    manual_batch_kwargs = datasource.build_batch_kwargs(os.path.join(str(test_folder_connection_path), "test.csv"))

    # Get the default (subdir_path) generator
    generator = datasource.get_generator()
    auto_batch_kwargs = generator.yield_batch_kwargs("test")

    assert manual_batch_kwargs["path"] == auto_batch_kwargs["path"]

    # Include some extra kwargs...
    dataset = datasource.get_batch("test", batch_kwargs=auto_batch_kwargs, sep=",", header=0, index_col=0)
    assert isinstance(dataset, PandasDataset)
    assert (dataset["col_1"] == [1, 2, 3, 4, 5]).all()

コード例 #5

ファイルを表示

ファイル: test_pandas_datasource.py プロジェクト: rexboyce/great_expectations

def test_pandas_datasource_processes_dataset_options(
        test_folder_connection_path):
    datasource = PandasDatasource(
        "PandasCSV",
        batch_kwargs_generators={
            "subdir_reader": {
                "class_name": "SubdirReaderBatchKwargsGenerator",
                "base_directory": test_folder_connection_path,
            }
        },
    )
    batch_kwargs = datasource.build_batch_kwargs("subdir_reader", name="test")
    batch_kwargs["dataset_options"] = {"caching": False}
    batch = datasource.get_batch(batch_kwargs)
    validator = Validator(batch,
                          ExpectationSuite(expectation_suite_name="foo"))
    dataset = validator.get_dataset()
    assert dataset.caching is False

コード例 #6

ファイルを表示

ファイル: test_pandas_datasource.py プロジェクト: rexboyce/great_expectations

def test_standalone_pandas_datasource(test_folder_connection_path):
    datasource = PandasDatasource(
        "PandasCSV",
        batch_kwargs_generators={
            "subdir_reader": {
                "class_name": "SubdirReaderBatchKwargsGenerator",
                "base_directory": test_folder_connection_path,
            }
        },
    )

    assert datasource.get_available_data_asset_names() == {
        "subdir_reader": {
            "names": [("test", "file")],
            "is_complete_list": True
        }
    }
    manual_batch_kwargs = PathBatchKwargs(
        path=os.path.join(str(test_folder_connection_path), "test.csv"))

    generator = datasource.get_batch_kwargs_generator("subdir_reader")
    auto_batch_kwargs = generator.yield_batch_kwargs("test")

    assert manual_batch_kwargs["path"] == auto_batch_kwargs["path"]

    # Include some extra kwargs...
    auto_batch_kwargs.update(
        {"reader_options": {
            "sep": ",",
            "header": 0,
            "index_col": 0
        }})
    batch = datasource.get_batch(batch_kwargs=auto_batch_kwargs)
    assert isinstance(batch, Batch)
    dataset = batch.data
    assert (dataset["col_1"] == [1, 2, 3, 4, 5]).all()
    assert len(dataset) == 5

    # A datasource should always return an object with a typed batch_id
    assert isinstance(batch.batch_kwargs, PathBatchKwargs)
    assert isinstance(batch.batch_markers, BatchMarkers)