Python JsonDatasetReader.JsonDatasetReaderの例

プログラミング言語: Python

名前空間/パッケージ名: datasets.io.json

クラス/型: JsonDatasetReader

メソッド/関数: JsonDatasetReader

hotexamples.comのコード掲載数: 9

Python JsonDatasetReader.JsonDatasetReader - 9件のコード例が見つかりました。すべてオープンソースプロジェクトから抽出されたPythonのdatasets.io.json.JsonDatasetReader.JsonDatasetReaderの実例で、最も評価が高いものを厳選しています。コード例の評価を行っていただくことで、より質の高いコード例が表示されるようになります。

よく使われるメソッド

表示非表示

JsonDatasetReader(9)

よく使われるメソッド

JsonDatasetReader (9)

コード例 #1

ファイルを表示

def test_dataset_from_json_features(features, jsonl_path, tmp_path):
    cache_dir = tmp_path / "cache"
    default_expected_features = {"col_1": "string", "col_2": "int64", "col_3": "float64"}
    expected_features = features.copy() if features else default_expected_features
    features = (
        Features({feature: Value(dtype) for feature, dtype in features.items()}) if features is not None else None
    )
    dataset = JsonDatasetReader(jsonl_path, features=features, cache_dir=cache_dir).read()
    _check_json_dataset(dataset, expected_features)

コード例 #2

ファイルを表示

def test_dataset_from_json_path_type(path_type, jsonl_path, tmp_path):
    if issubclass(path_type, str):
        path = jsonl_path
    elif issubclass(path_type, list):
        path = [jsonl_path]
    cache_dir = tmp_path / "cache"
    expected_features = {"col_1": "string", "col_2": "int64", "col_3": "float64"}
    dataset = JsonDatasetReader(path, cache_dir=cache_dir).read()
    _check_json_dataset(dataset, expected_features)

コード例 #3

ファイルを表示

def test_dataset_from_json_split(split, jsonl_path, tmp_path):
    cache_dir = tmp_path / "cache"
    expected_features = {
        "col_1": "string",
        "col_2": "int64",
        "col_3": "float64"
    }
    dataset = JsonDatasetReader(jsonl_path, cache_dir=cache_dir,
                                split=split).read()
    _check_json_dataset(dataset, expected_features)
    assert dataset.split == str(split) if split else "train"

コード例 #4

ファイルを表示

def test_datasetdict_from_json_splits(split, jsonl_path, tmp_path):
    if split:
        path = {split: jsonl_path}
    else:
        split = "train"
        path = {"train": jsonl_path, "test": jsonl_path}
    cache_dir = tmp_path / "cache"
    expected_features = {"col_1": "string", "col_2": "int64", "col_3": "float64"}
    dataset = JsonDatasetReader(path, cache_dir=cache_dir).read()
    _check_json_datasetdict(dataset, expected_features, splits=list(path.keys()))
    assert all(dataset[split].split == split for split in path.keys())

コード例 #5

ファイルを表示

def test_dataset_from_json_keep_in_memory(keep_in_memory, jsonl_path,
                                          tmp_path):
    cache_dir = tmp_path / "cache"
    expected_features = {
        "col_1": "string",
        "col_2": "int64",
        "col_3": "float64"
    }
    with assert_arrow_memory_increases(
    ) if keep_in_memory else assert_arrow_memory_doesnt_increase():
        dataset = JsonDatasetReader(jsonl_path,
                                    cache_dir=cache_dir,
                                    keep_in_memory=keep_in_memory).read()
    _check_json_dataset(dataset, expected_features)

コード例 #6

ファイルを表示

def test_dataset_from_json_with_unsorted_column_names(features, jsonl_312_path, tmp_path):
    cache_dir = tmp_path / "cache"
    default_expected_features = {"col_3": "float64", "col_1": "string", "col_2": "int64"}
    expected_features = features.copy() if features else default_expected_features
    features = (
        Features({feature: Value(dtype) for feature, dtype in features.items()}) if features is not None else None
    )
    dataset = JsonDatasetReader(jsonl_312_path, features=features, cache_dir=cache_dir).read()
    assert isinstance(dataset, Dataset)
    assert dataset.num_rows == 2
    assert dataset.num_columns == 3
    assert dataset.column_names == ["col_3", "col_1", "col_2"]
    for feature, expected_dtype in expected_features.items():
        assert dataset.features[feature].dtype == expected_dtype

コード例 #7

ファイルを表示

def test_json_dataset_reader(
    path_type,
    split,
    features,
    keep_in_memory,
    jsonl_path,
    tmp_path,
):
    file_path = jsonl_path
    field = None
    if issubclass(path_type, str):
        path = file_path
    elif issubclass(path_type, list):
        path = [file_path]
    cache_dir = tmp_path / "cache"

    expected_split = str(split) if split else "train"

    default_expected_features = {
        "col_1": "string",
        "col_2": "int64",
        "col_3": "float64"
    }
    expected_features = features.copy(
    ) if features else default_expected_features
    features = Features(
        {feature: Value(dtype)
         for feature, dtype in features.items()}) if features else None
    with assert_arrow_memory_increases(
    ) if keep_in_memory else assert_arrow_memory_doesnt_increase():
        dataset = JsonDatasetReader(path,
                                    split=split,
                                    features=features,
                                    cache_dir=cache_dir,
                                    keep_in_memory=keep_in_memory,
                                    field=field).read()
    assert isinstance(dataset, Dataset)
    assert dataset.num_rows == 4
    assert dataset.num_columns == 3
    assert dataset.column_names == ["col_1", "col_2", "col_3"]
    assert dataset.split == expected_split
    for feature, expected_dtype in expected_features.items():
        assert dataset.features[feature].dtype == expected_dtype

コード例 #8

ファイルを表示

ファイル: test_json.py プロジェクト: wbj0110/datasets

def test_datasetdict_from_json_features(features, jsonl_path, tmp_path):
    cache_dir = tmp_path / "cache"
    # CSV file loses col_1 string dtype information: default now is "int64" instead of "string"
    default_expected_features = {
        "col_1": "string",
        "col_2": "int64",
        "col_3": "float64"
    }
    expected_features = features.copy(
    ) if features else default_expected_features
    features = (Features({
        feature: Value(dtype)
        for feature, dtype in features.items()
    }) if features is not None else None)
    dataset = JsonDatasetReader({
        "train": jsonl_path
    },
                                features=features,
                                cache_dir=cache_dir).read()
    _check_json_datasetdict(dataset, expected_features)

コード例 #9

ファイルを表示

def test_json_datasetdict_reader(
    split,
    features,
    keep_in_memory,
    jsonl_path,
    tmp_path,
):
    file_path = jsonl_path
    field = None
    if split:
        path = {split: file_path}
    else:
        split = "train"
        path = {"train": file_path, "test": file_path}
    cache_dir = tmp_path / "cache"

    default_expected_features = {
        "col_1": "string",
        "col_2": "int64",
        "col_3": "float64"
    }
    expected_features = features.copy(
    ) if features else default_expected_features
    features = Features(
        {feature: Value(dtype)
         for feature, dtype in features.items()}) if features else None
    with assert_arrow_memory_increases(
    ) if keep_in_memory else assert_arrow_memory_doesnt_increase():
        dataset = JsonDatasetReader(path,
                                    features=features,
                                    cache_dir=cache_dir,
                                    keep_in_memory=keep_in_memory,
                                    field=field).read()
    assert isinstance(dataset, DatasetDict)
    dataset = dataset[split]
    assert dataset.num_rows == 4
    assert dataset.num_columns == 3
    assert dataset.column_names == ["col_1", "col_2", "col_3"]
    assert dataset.split == split
    for feature, expected_dtype in expected_features.items():
        assert dataset.features[feature].dtype == expected_dtype