def _split_generators(self, dl_manager): """Returns SplitGenerators.""" train_path = dl_manager.download(_URLs["train"]) dev_path = dl_manager.download(_URLs["dev"]) test_fixed_path = dl_manager.download(_URLs["test_fixed"]) test_random_path = dl_manager.download(_URLs["test_random"]) return [ datasets.SplitGenerator( name=datasets.Split.TRAIN, # These kwargs will be passed to _generate_examples gen_kwargs={ "filepath": train_path, "split": "train", }, ), datasets.SplitGenerator( name=NamedSplit("test_random"), # These kwargs will be passed to _generate_examples gen_kwargs={ "filepath": test_random_path, "split": "test_random" }, ), datasets.SplitGenerator( name=NamedSplit("test_fixed"), # These kwargs will be passed to _generate_examples gen_kwargs={ "filepath": test_fixed_path, "split": "test_fixed" }, ), datasets.SplitGenerator( name=datasets.Split.VALIDATION, # These kwargs will be passed to _generate_examples gen_kwargs={ "filepath": dev_path, "split": "dev", }, ), ]
"col_2": "int64", "col_3": "float64" } expected_features = features.copy( ) if features else default_expected_features features = (Features({ feature: Value(dtype) for feature, dtype in features.items() }) if features is not None else None) dataset = DatasetDict.from_csv({"train": csv_path}, features=features, cache_dir=cache_dir) _check_csv_datasetdict(dataset, expected_features) @pytest.mark.parametrize("split", [None, NamedSplit("train"), "train", "test"]) def test_datasetdict_from_csv_split(split, csv_path, tmp_path): if split: path = {split: csv_path} else: split = "train" path = {"train": csv_path, "test": csv_path} cache_dir = tmp_path / "cache" expected_features = { "col_1": "int64", "col_2": "int64", "col_3": "float64" } dataset = DatasetDict.from_csv(path, cache_dir=cache_dir) _check_csv_datasetdict(dataset, expected_features,