def test_iterable_dataset_cast_column(generate_examples_fn): ex_iterable = ExamplesIterable(generate_examples_fn, {"label": 10}) features = Features({"id": Value("int64"), "label": Value("int64")}) dataset = IterableDataset(ex_iterable, info=DatasetInfo(features=features)) casted_dataset = dataset.cast_column("label", Value("bool")) casted_features = features.copy() casted_features["label"] = Value("bool") assert list(casted_dataset) == [casted_features.encode_example(ex) for _, ex in ex_iterable]
def test_iterable_dataset_map_complex_features(dataset: IterableDataset, generate_examples_fn): # https://github.com/huggingface/datasets/issues/3505 ex_iterable = ExamplesIterable(generate_examples_fn, {"label": "positive"}) features = Features( { "id": Value("int64"), "label": Value("string"), } ) dataset = IterableDataset(ex_iterable, info=DatasetInfo(features=features)) dataset = dataset.cast_column("label", ClassLabel(names=["negative", "positive"])) dataset = dataset.map(lambda x: {"id+1": x["id"] + 1, **x}) assert isinstance(dataset._ex_iterable, MappedExamplesIterable) features["label"] = ClassLabel(names=["negative", "positive"]) assert [{k: v for k, v in ex.items() if k != "id+1"} for ex in dataset] == [ features.encode_example(ex) for _, ex in ex_iterable ]