def test_download_from_nested_dataset(self): """Test that all files in a nested dataset can be downloaded with one command.""" dataset_path = self._create_nested_cloud_dataset() dataset = Dataset(path=dataset_path, recursive=True) with tempfile.TemporaryDirectory() as temporary_directory: dataset.download(local_directory=temporary_directory) with open(os.path.join(temporary_directory, "file_0.txt")) as f: self.assertEqual(f.read(), "[1, 2, 3]") with open(os.path.join(temporary_directory, "file_1.txt")) as f: self.assertEqual(f.read(), "[4, 5, 6]") with open( os.path.join(temporary_directory, "sub-directory", "sub_file.txt")) as f: self.assertEqual(f.read(), "['a', 'b', 'c']") with open( os.path.join(temporary_directory, "sub-directory", "sub-sub-directory", "sub_sub_file.txt")) as f: self.assertEqual(f.read(), "['blah', 'b', 'c']")
def test_download_from_nested_dataset_with_no_local_directory_given(self): """Test that, when downloading all files from a nested dataset and no local directory is given, the dataset structure is preserved in the temporary directory used. """ dataset_path = self._create_nested_cloud_dataset() dataset = Dataset(path=dataset_path, recursive=True) # Mock the temporary directory created in `Dataset.download_all_files` so we can access it for the test. temporary_directory = tempfile.TemporaryDirectory() with patch("tempfile.TemporaryDirectory", return_value=temporary_directory): dataset.download() with open(os.path.join(temporary_directory.name, "file_0.txt")) as f: self.assertEqual(f.read(), "[1, 2, 3]") with open(os.path.join(temporary_directory.name, "file_1.txt")) as f: self.assertEqual(f.read(), "[4, 5, 6]") with open( os.path.join(temporary_directory.name, "sub-directory", "sub_file.txt")) as f: self.assertEqual(f.read(), "['a', 'b', 'c']") with open( os.path.join(temporary_directory.name, "sub-directory", "sub-sub-directory", "sub_sub_file.txt")) as f: self.assertEqual(f.read(), "['blah', 'b', 'c']")
def test_download(self): """Test that all files in a dataset can be downloaded with one command.""" storage_client = GoogleCloudStorageClient() dataset_name = "another-dataset" storage_client.upload_from_string( string=json.dumps([1, 2, 3]), cloud_path=storage.path.generate_gs_path(TEST_BUCKET_NAME, dataset_name, "file_0.txt"), ) storage_client.upload_from_string( string=json.dumps([4, 5, 6]), cloud_path=storage.path.generate_gs_path(TEST_BUCKET_NAME, dataset_name, "file_1.txt"), ) dataset = Dataset(path=f"gs://{TEST_BUCKET_NAME}/{dataset_name}") with tempfile.TemporaryDirectory() as temporary_directory: dataset.download(local_directory=temporary_directory) with open(os.path.join(temporary_directory, "file_0.txt")) as f: self.assertEqual(f.read(), "[1, 2, 3]") with open(os.path.join(temporary_directory, "file_1.txt")) as f: self.assertEqual(f.read(), "[4, 5, 6]")