def test_dataset_to_json_lines(self, lines, load_json_function, dataset): with io.BytesIO() as buffer: JsonDatasetWriter(dataset, buffer, lines=lines).write() buffer.seek(0) exported_content = load_json_function(buffer) assert isinstance(exported_content, list) assert isinstance(exported_content[0], dict) assert len(exported_content) == 10
def test_dataset_to_json_compression(self, shared_datadir, tmp_path_factory, extension, compression, dataset): path = tmp_path_factory.mktemp("data") / f"test.json.{extension}" original_path = str(shared_datadir / f"test_file.json.{extension}") JsonDatasetWriter(dataset, path, compression=compression).write() with fsspec.open(path, "rb", compression="infer") as f: exported_content = f.read() with fsspec.open(original_path, "rb", compression="infer") as f: original_content = f.read() assert exported_content == original_content
def test_dataset_to_json_orient_multiproc(self, orient, container, keys, len_at, dataset): with io.BytesIO() as buffer: JsonDatasetWriter(dataset, buffer, lines=False, orient=orient, num_proc=2).write() buffer.seek(0) exported_content = load_json(buffer) assert isinstance(exported_content, container) if keys: if container is dict: assert exported_content.keys() == keys else: assert exported_content[0].keys() == keys else: assert not hasattr(exported_content, "keys") and not hasattr(exported_content[0], "keys") if len_at: assert len(exported_content[len_at]) == 10 else: assert len(exported_content) == 10
def test_dataset_to_json_orient_invalidproc(self, dataset): with pytest.raises(ValueError): with io.BytesIO() as buffer: JsonDatasetWriter(dataset, buffer, num_proc=0)