def test_dataset_to_csv_multiproc(csv_path, tmp_path): cache_dir = tmp_path / "cache" output_csv = os.path.join(cache_dir, "tmp.csv") dataset = CsvDatasetReader({"train": csv_path}, cache_dir=cache_dir).read() CsvDatasetWriter(dataset["train"], output_csv, index=False, num_proc=2).write() original_csv = iter_csv_file(csv_path) expected_csv = iter_csv_file(output_csv) for row1, row2 in zip(original_csv, expected_csv): assert row1 == row2
def test_dataset_to_csv_invalidproc(csv_path, tmp_path): cache_dir = tmp_path / "cache" output_csv = os.path.join(cache_dir, "tmp.csv") dataset = CsvDatasetReader({"train": csv_path}, cache_dir=cache_dir).read() with pytest.raises(ValueError): CsvDatasetWriter(dataset["train"], output_csv, index=False, num_proc=0)