class FileSystemTest(StorageTestCase): def setUp(self): super(FileSystemTest, self).setUp() url = self._get_account_url() self.dsc = DataLakeServiceClient( url, credential=self.settings.STORAGE_DATA_LAKE_ACCOUNT_KEY) self.config = self.dsc._config self.test_file_systems = [] def tearDown(self): if not self.is_playback(): try: for file_system in self.test_file_systems: self.dsc.delete_file_system(file_system) except: pass return super(FileSystemTest, self).tearDown() # --Helpers----------------------------------------------------------------- def _get_file_system_reference(self, prefix=TEST_FILE_SYSTEM_PREFIX): file_system_name = self.get_resource_name(prefix) self.test_file_systems.append(file_system_name) return file_system_name def _create_file_system(self, file_system_prefix=TEST_FILE_SYSTEM_PREFIX): return self.dsc.create_file_system( self._get_file_system_reference(prefix=file_system_prefix)) # --Helpers----------------------------------------------------------------- @record def test_create_file_system(self): # Arrange file_system_name = self._get_file_system_reference() # Act file_system_client = self.dsc.get_file_system_client(file_system_name) created = file_system_client.create_file_system() # Assert self.assertTrue(created) @record def test_create_file_system_with_metadata(self): # Arrange metadata = {'hello': 'world', 'number': '42'} file_system_name = self._get_file_system_reference() # Act file_system_client = self.dsc.get_file_system_client(file_system_name) created = file_system_client.create_file_system(metadata=metadata) # Assert meta = file_system_client.get_file_system_properties().metadata self.assertTrue(created) self.assertDictEqual(meta, metadata) @record def test_set_file_system_acl(self): # Act file_system = self._create_file_system() access_policy = AccessPolicy( permission=FileSystemSasPermissions(read=True), expiry=datetime.utcnow() + timedelta(hours=1), start=datetime.utcnow()) signed_identifier1 = {'testid': access_policy} response = file_system.set_file_system_access_policy( signed_identifier1, public_access=PublicAccess.FileSystem) self.assertIsNotNone(response.get('etag')) self.assertIsNotNone(response.get('last_modified')) acl1 = file_system.get_file_system_access_policy() self.assertIsNotNone(acl1['public_access']) self.assertEqual(len(acl1['signed_identifiers']), 1) # If set signed identifier without specifying the access policy then it will be default to None signed_identifier2 = {'testid': access_policy, 'test2': access_policy} file_system.set_file_system_access_policy(signed_identifier2) acl2 = file_system.get_file_system_access_policy() self.assertIsNone(acl2['public_access']) self.assertEqual(len(acl2['signed_identifiers']), 2) @record def test_list_file_systemss(self): # Arrange file_system_name = self._get_file_system_reference() file_system = self.dsc.create_file_system(file_system_name) # Act file_systems = list(self.dsc.list_file_systems()) # Assert self.assertIsNotNone(file_systems) self.assertGreaterEqual(len(file_systems), 1) self.assertIsNotNone(file_systems[0]) self.assertNamedItemInContainer(file_systems, file_system.file_system_name) self.assertIsNotNone(file_systems[0].has_immutability_policy) self.assertIsNotNone(file_systems[0].has_legal_hold) @record def test_delete_file_system_with_existing_file_system(self): # Arrange file_system = self._create_file_system() # Act deleted = file_system.delete_file_system() # Assert self.assertIsNone(deleted) @record def test_delete_none_existing_file_system(self): fake_file_system_client = self.dsc.get_file_system_client("fakeclient") # Act with self.assertRaises(ResourceNotFoundError): fake_file_system_client.delete_file_system( match_condition=MatchConditions.IfMissing) @record def test_list_file_systems_with_include_metadata(self): # Arrange file_system = self._create_file_system() metadata = {'hello': 'world', 'number': '42'} resp = file_system.set_file_system_metadata(metadata) # Act file_systems = list( self.dsc.list_file_systems( name_starts_with=file_system.file_system_name, include_metadata=True)) # Assert self.assertIsNotNone(file_systems) self.assertGreaterEqual(len(file_systems), 1) self.assertIsNotNone(file_systems[0]) self.assertNamedItemInContainer(file_systems, file_system.file_system_name) self.assertDictEqual(file_systems[0].metadata, metadata) @record def test_list_file_systems_by_page(self): # Arrange for i in range(0, 6): self._create_file_system( file_system_prefix="filesystem{}".format(i)) # Act file_systems = list( next( self.dsc.list_file_systems(results_per_page=3, name_starts_with="file", include_metadata=True).by_page())) # Assert self.assertIsNotNone(file_systems) self.assertGreaterEqual(len(file_systems), 3) @record def test_list_file_systems_with_public_access(self): # Arrange file_system_name = self._get_file_system_reference() file_system = self.dsc.get_file_system_client(file_system_name) file_system.create_file_system(public_access="blob") metadata = {'hello': 'world', 'number': '42'} resp = file_system.set_file_system_metadata(metadata) # Act file_systems = list( self.dsc.list_file_systems( name_starts_with=file_system.file_system_name, include_metadata=True)) # Assert self.assertIsNotNone(file_systems) self.assertGreaterEqual(len(file_systems), 1) self.assertIsNotNone(file_systems[0]) self.assertNamedItemInContainer(file_systems, file_system.file_system_name) self.assertDictEqual(file_systems[0].metadata, metadata) self.assertTrue(file_systems[0].public_access is PublicAccess.File) @record def test_get_file_system_properties(self): # Arrange metadata = {'hello': 'world', 'number': '42'} file_system = self._create_file_system() file_system.set_file_system_metadata(metadata) # Act props = file_system.get_file_system_properties() # Assert self.assertIsNotNone(props) self.assertDictEqual(props.metadata, metadata) self.assertIsNotNone(props.has_immutability_policy) self.assertIsNotNone(props.has_legal_hold) @record def test_list_paths(self): # Arrange file_system = self._create_file_system() for i in range(0, 6): file_system.create_directory("dir1{}".format(i)) paths = list(file_system.get_paths(upn=True)) self.assertEqual(len(paths), 6) @record def test_list_paths_which_are_all_files(self): # Arrange file_system = self._create_file_system() for i in range(0, 6): file_system.create_file("file{}".format(i)) paths = list(file_system.get_paths(upn=True)) self.assertEqual(len(paths), 6) @record def test_list_paths_with_max_per_page(self): # Arrange file_system = self._create_file_system() for i in range(0, 6): file_system.create_directory("dir1{}".format(i)) generator1 = file_system.get_paths(max_results=2, upn=True).by_page() paths1 = list(next(generator1)) generator2 = file_system.get_paths(max_results=4, upn=True)\ .by_page(continuation_token=generator1.continuation_token) paths2 = list(next(generator2)) self.assertEqual(len(paths1), 2) self.assertEqual(len(paths2), 4) @record def test_list_paths_under_specific_path(self): # Arrange file_system = self._create_file_system() for i in range(0, 6): file_system.create_directory("dir1{}".format(i)) # create a subdirectory under the current directory subdir = file_system.get_directory_client( "dir1{}".format(i)).create_sub_directory("subdir") subdir.create_sub_directory("subsub") # create a file under the current directory file_client = subdir.create_file("file") file_client.append_data(b"abced", 0, 5) file_client.flush_data(5) generator1 = file_system.get_paths(path="dir10/subdir", max_results=2, upn=True).by_page() paths = list(next(generator1)) self.assertEqual(len(paths), 2) self.assertEqual(paths[0].content_length, 5) @record def test_list_paths_recursively(self): # Arrange file_system = self._create_file_system() for i in range(0, 6): file_system.create_directory("dir1{}".format(i)) # create a subdirectory under the current directory subdir = file_system.get_directory_client( "dir1{}".format(i)).create_sub_directory("subdir") subdir.create_sub_directory("subsub") # create a file under the current directory subdir.create_file("file") paths = list(file_system.get_paths(recursive=True, upn=True)) # there are 24 subpaths in total self.assertEqual(len(paths), 24) @record def test_create_directory_from_file_system_client(self): # Arrange file_system = self._create_file_system() file_system.create_directory("dir1/dir2") paths = list(file_system.get_paths(recursive=False, upn=True)) self.assertEqual(len(paths), 1) self.assertEqual(paths[0].name, "dir1") @record def test_create_file_from_file_system_client(self): # Arrange file_system = self._create_file_system() file_system.create_file("dir1/dir2/file") paths = list(file_system.get_paths(recursive=True, upn=True)) self.assertEqual(len(paths), 3) self.assertEqual(paths[0].name, "dir1") self.assertEqual(paths[2].is_directory, False)
class FileTest(StorageTestCase): def _setUp(self, account_name, account_key): url = self._get_account_url(account_name) self.dsc = DataLakeServiceClient(url, credential=account_key, logging_enable=True) self.config = self.dsc._config self.file_system_name = self.get_resource_name('filesystem') if not self.is_playback(): file_system = self.dsc.get_file_system_client( self.file_system_name) try: file_system.create_file_system(timeout=5) except ResourceExistsError: pass def tearDown(self): if not self.is_playback(): try: self.dsc.delete_file_system(self.file_system_name) except: pass return super(FileTest, self).tearDown() # --Helpers----------------------------------------------------------------- def _get_directory_reference(self, prefix=TEST_DIRECTORY_PREFIX): directory_name = self.get_resource_name(prefix) return directory_name def _get_file_reference(self, prefix=TEST_FILE_PREFIX): file_name = self.get_resource_name(prefix) return file_name def _create_file_system(self): return self.dsc.create_file_system(self._get_file_system_reference()) def _create_directory_and_return_client(self, directory=None): directory_name = directory if directory else self._get_directory_reference( ) directory_client = self.dsc.get_directory_client( self.file_system_name, directory_name) directory_client.create_directory() return directory_client def _create_file_and_return_client(self, directory="", file=None): if directory: self._create_directory_and_return_client(directory) if not file: file = self._get_file_reference() file_client = self.dsc.get_file_client(self.file_system_name, directory + '/' + file) file_client.create_file() return file_client # --Helpers----------------------------------------------------------------- @DataLakePreparer() def test_create_file(self, datalake_storage_account_name, datalake_storage_account_key): self._setUp(datalake_storage_account_name, datalake_storage_account_key) # Arrange directory_name = self._get_directory_reference() # Create a directory to put the file under that directory_client = self.dsc.get_directory_client( self.file_system_name, directory_name) directory_client.create_directory() file_client = directory_client.get_file_client('filename') response = file_client.create_file() # Assert self.assertIsNotNone(response) @DataLakePreparer() def test_file_exists(self, datalake_storage_account_name, datalake_storage_account_key): self._setUp(datalake_storage_account_name, datalake_storage_account_key) # Arrange directory_name = self._get_directory_reference() directory_client = self.dsc.get_directory_client( self.file_system_name, directory_name) directory_client.create_directory() file_client1 = directory_client.get_file_client('filename') file_client2 = directory_client.get_file_client('nonexistentfile') file_client1.create_file() self.assertTrue(file_client1.exists()) self.assertFalse(file_client2.exists()) @DataLakePreparer() def test_create_file_using_oauth_token_credential( self, datalake_storage_account_name, datalake_storage_account_key): self._setUp(datalake_storage_account_name, datalake_storage_account_key) # Arrange file_name = self._get_file_reference() token_credential = self.generate_oauth_token() # Create a directory to put the file under that file_client = DataLakeFileClient(self.dsc.url, self.file_system_name, file_name, credential=token_credential) response = file_client.create_file() # Assert self.assertIsNotNone(response) @DataLakePreparer() def test_create_file_with_existing_name(self, datalake_storage_account_name, datalake_storage_account_key): self._setUp(datalake_storage_account_name, datalake_storage_account_key) # Arrange file_client = self._create_file_and_return_client() with self.assertRaises(ResourceExistsError): # if the file exists then throw error # if_none_match='*' is to make sure no existing file file_client.create_file(match_condition=MatchConditions.IfMissing) @DataLakePreparer() def test_create_file_with_lease_id(self, datalake_storage_account_name, datalake_storage_account_key): self._setUp(datalake_storage_account_name, datalake_storage_account_key) # Arrange directory_name = self._get_directory_reference() directory_client = self.dsc.get_directory_client( self.file_system_name, directory_name) directory_client.create_directory() file_client = directory_client.get_file_client('filename') # Act file_client.create_file() lease = file_client.acquire_lease() create_resp = file_client.create_file(lease=lease) # Assert file_properties = file_client.get_file_properties() self.assertIsNotNone(file_properties) self.assertEqual(file_properties.etag, create_resp.get('etag')) self.assertEqual(file_properties.last_modified, create_resp.get('last_modified')) @DataLakePreparer() def test_create_file_under_root_directory(self, datalake_storage_account_name, datalake_storage_account_key): self._setUp(datalake_storage_account_name, datalake_storage_account_key) # Arrange # get a file client to interact with the file under root directory file_client = self.dsc.get_file_client(self.file_system_name, "filename") response = file_client.create_file() # Assert self.assertIsNotNone(response) @DataLakePreparer() def test_append_data(self, datalake_storage_account_name, datalake_storage_account_key): self._setUp(datalake_storage_account_name, datalake_storage_account_key) directory_name = self._get_directory_reference() # Create a directory to put the file under that directory_client = self.dsc.get_directory_client( self.file_system_name, directory_name) directory_client.create_directory() file_client = directory_client.get_file_client('filename') file_client.create_file() # Act response = file_client.append_data(b'abc', 0, 3) self.assertIsNotNone(response) @DataLakePreparer() def test_append_empty_data(self, datalake_storage_account_name, datalake_storage_account_key): self._setUp(datalake_storage_account_name, datalake_storage_account_key) file_client = self._create_file_and_return_client() # Act file_client.flush_data(0) file_props = file_client.get_file_properties() self.assertIsNotNone(file_props['size'], 0) @DataLakePreparer() def test_flush_data(self, datalake_storage_account_name, datalake_storage_account_key): self._setUp(datalake_storage_account_name, datalake_storage_account_key) directory_name = self._get_directory_reference() # Create a directory to put the file under that directory_client = self.dsc.get_directory_client( self.file_system_name, directory_name) directory_client.create_directory() file_client = directory_client.get_file_client('filename') file_client.create_file() # Act file_client.append_data(b'abc', 0, 3) response = file_client.flush_data(3) # Assert prop = file_client.get_file_properties() self.assertIsNotNone(response) self.assertEqual(prop['size'], 3) @DataLakePreparer() def test_flush_data_with_match_condition(self, datalake_storage_account_name, datalake_storage_account_key): self._setUp(datalake_storage_account_name, datalake_storage_account_key) directory_name = self._get_directory_reference() # Create a directory to put the file under that directory_client = self.dsc.get_directory_client( self.file_system_name, directory_name) directory_client.create_directory() file_client = directory_client.get_file_client('filename') resp = file_client.create_file() # Act file_client.append_data(b'abc', 0, 3) # flush is successful because it isn't touched response = file_client.flush_data( 3, etag=resp['etag'], match_condition=MatchConditions.IfNotModified) file_client.append_data(b'abc', 3, 3) with self.assertRaises(ResourceModifiedError): # flush is unsuccessful because extra data were appended. file_client.flush_data( 6, etag=resp['etag'], match_condition=MatchConditions.IfNotModified) @pytest.mark.live_test_only @DataLakePreparer() def test_upload_data_to_none_existing_file(self, datalake_storage_account_name, datalake_storage_account_key): self._setUp(datalake_storage_account_name, datalake_storage_account_key) # parallel upload cannot be recorded directory_name = self._get_directory_reference() # Create a directory to put the file under that directory_client = self.dsc.get_directory_client( self.file_system_name, directory_name) directory_client.create_directory() file_client = directory_client.get_file_client('filename') data = self.get_random_bytes(200 * 1024) file_client.upload_data(data, overwrite=True, max_concurrency=3) downloaded_data = file_client.download_file().readall() self.assertEqual(data, downloaded_data) @pytest.mark.live_test_only @DataLakePreparer() def test_upload_data_in_substreams(self, datalake_storage_account_name, datalake_storage_account_key): self._setUp(datalake_storage_account_name, datalake_storage_account_key) # parallel upload cannot be recorded directory_name = self._get_directory_reference() # Create a directory to put the file under that directory_client = self.dsc.get_directory_client( self.file_system_name, directory_name) directory_client.create_directory() file_client = directory_client.get_file_client('filename') # Get 16MB data data = self.get_random_bytes(16 * 1024 * 1024) # Ensure chunk size is greater than threshold (8MB > 4MB) - for optimized upload file_client.upload_data(data, chunk_size=8 * 1024 * 1024, overwrite=True, max_concurrency=3) downloaded_data = file_client.download_file().readall() self.assertEqual(data, downloaded_data) # Run on single thread file_client.upload_data(data, chunk_size=8 * 1024 * 1024, overwrite=True) downloaded_data = file_client.download_file().readall() self.assertEqual(data, downloaded_data) @DataLakePreparer() def test_upload_data_to_existing_file(self, datalake_storage_account_name, datalake_storage_account_key): self._setUp(datalake_storage_account_name, datalake_storage_account_key) directory_name = self._get_directory_reference() # Create a directory to put the file under that directory_client = self.dsc.get_directory_client( self.file_system_name, directory_name) directory_client.create_directory() # create an existing file file_client = directory_client.get_file_client('filename') file_client.create_file() file_client.append_data(b"abc", 0) file_client.flush_data(3) # to override the existing file data = self.get_random_bytes(100) with self.assertRaises(HttpResponseError): file_client.upload_data(data, max_concurrency=5) file_client.upload_data(data, overwrite=True, max_concurrency=5) downloaded_data = file_client.download_file().readall() self.assertEqual(data, downloaded_data) @DataLakePreparer() def test_upload_data_to_existing_file_with_content_settings( self, datalake_storage_account_name, datalake_storage_account_key): self._setUp(datalake_storage_account_name, datalake_storage_account_key) directory_name = self._get_directory_reference() # Create a directory to put the file under that directory_client = self.dsc.get_directory_client( self.file_system_name, directory_name) directory_client.create_directory() # create an existing file file_client = directory_client.get_file_client('filename') etag = file_client.create_file()['etag'] # to override the existing file data = self.get_random_bytes(100) content_settings = ContentSettings(content_language='spanish', content_disposition='inline') file_client.upload_data(data, max_concurrency=5, content_settings=content_settings, etag=etag, match_condition=MatchConditions.IfNotModified) downloaded_data = file_client.download_file().readall() properties = file_client.get_file_properties() self.assertEqual(data, downloaded_data) self.assertEqual(properties.content_settings.content_language, content_settings.content_language) @DataLakePreparer() def test_upload_data_to_existing_file_with_permission_and_umask( self, datalake_storage_account_name, datalake_storage_account_key): self._setUp(datalake_storage_account_name, datalake_storage_account_key) directory_name = self._get_directory_reference() # Create a directory to put the file under that directory_client = self.dsc.get_directory_client( self.file_system_name, directory_name) directory_client.create_directory() # create an existing file file_client = directory_client.get_file_client('filename') etag = file_client.create_file()['etag'] # to override the existing file data = self.get_random_bytes(100) file_client.upload_data(data, overwrite=True, max_concurrency=5, permissions='0777', umask="0000", etag=etag, match_condition=MatchConditions.IfNotModified) downloaded_data = file_client.download_file().readall() prop = file_client.get_access_control() # Assert self.assertEqual(data, downloaded_data) self.assertEqual(prop['permissions'], 'rwxrwxrwx') @DataLakePreparer() def test_read_file(self, datalake_storage_account_name, datalake_storage_account_key): self._setUp(datalake_storage_account_name, datalake_storage_account_key) file_client = self._create_file_and_return_client() data = self.get_random_bytes(1024) # upload data to file file_client.append_data(data, 0, len(data)) file_client.flush_data(len(data)) # doanload the data and make sure it is the same as uploaded data downloaded_data = file_client.download_file().readall() self.assertEqual(data, downloaded_data) @pytest.mark.live_test_only @DataLakePreparer() def test_read_file_with_user_delegation_key(self, datalake_storage_account_name, datalake_storage_account_key): self._setUp(datalake_storage_account_name, datalake_storage_account_key) # SAS URL is calculated from storage key, so this test runs live only # Create file file_client = self._create_file_and_return_client() data = self.get_random_bytes(1024) # Upload data to file file_client.append_data(data, 0, len(data)) file_client.flush_data(len(data)) # Get user delegation key token_credential = self.generate_oauth_token() service_client = DataLakeServiceClient( self._get_account_url(datalake_storage_account_name), credential=token_credential, logging_enable=True) user_delegation_key = service_client.get_user_delegation_key( datetime.utcnow(), datetime.utcnow() + timedelta(hours=1)) sas_token = generate_file_sas( file_client.account_name, file_client.file_system_name, None, file_client.path_name, user_delegation_key, permission=FileSasPermissions(read=True, create=True, write=True, delete=True), expiry=datetime.utcnow() + timedelta(hours=1), ) # doanload the data and make sure it is the same as uploaded data new_file_client = DataLakeFileClient( self._get_account_url(datalake_storage_account_name), file_client.file_system_name, file_client.path_name, credential=sas_token, logging_enable=True) downloaded_data = new_file_client.download_file().readall() self.assertEqual(data, downloaded_data) @pytest.mark.live_test_only @DataLakePreparer() def test_set_acl_with_user_delegation_key(self, datalake_storage_account_name, datalake_storage_account_key): self._setUp(datalake_storage_account_name, datalake_storage_account_key) # SAS URL is calculated from storage key, so this test runs live only # Create file file_client = self._create_file_and_return_client() data = self.get_random_bytes(1024) # Upload data to file file_client.append_data(data, 0, len(data)) file_client.flush_data(len(data)) # Get user delegation key token_credential = self.generate_oauth_token() service_client = DataLakeServiceClient( self._get_account_url(datalake_storage_account_name), credential=token_credential) user_delegation_key = service_client.get_user_delegation_key( datetime.utcnow(), datetime.utcnow() + timedelta(hours=1)) sas_token = generate_file_sas( file_client.account_name, file_client.file_system_name, None, file_client.path_name, user_delegation_key, permission=FileSasPermissions(execute=True, manage_access_control=True, manage_ownership=True), expiry=datetime.utcnow() + timedelta(hours=1), ) # doanload the data and make sure it is the same as uploaded data new_file_client = DataLakeFileClient( self._get_account_url(datalake_storage_account_name), file_client.file_system_name, file_client.path_name, credential=sas_token) acl = 'user::rwx,group::r-x,other::rwx' owner = "dc140949-53b7-44af-b1e9-cd994951fb86" new_file_client.set_access_control(acl=acl, owner=owner) access_control = new_file_client.get_access_control() self.assertEqual(acl, access_control['acl']) self.assertEqual(owner, access_control['owner']) @pytest.mark.live_test_only @DataLakePreparer() def test_preauthorize_user_with_user_delegation_key( self, datalake_storage_account_name, datalake_storage_account_key): self._setUp(datalake_storage_account_name, datalake_storage_account_key) # SAS URL is calculated from storage key, so this test runs live only # Create file file_client = self._create_file_and_return_client() data = self.get_random_bytes(1024) # Upload data to file file_client.append_data(data, 0, len(data)) file_client.flush_data(len(data)) file_client.set_access_control( owner="68390a19-a643-458b-b726-408abf67b4fc", permissions='0777') acl = file_client.get_access_control() # Get user delegation key token_credential = self.generate_oauth_token() service_client = DataLakeServiceClient( self._get_account_url(datalake_storage_account_name), credential=token_credential) user_delegation_key = service_client.get_user_delegation_key( datetime.utcnow(), datetime.utcnow() + timedelta(hours=1)) sas_token = generate_file_sas( file_client.account_name, file_client.file_system_name, None, file_client.path_name, user_delegation_key, permission=FileSasPermissions(read=True, write=True, manage_access_control=True, manage_ownership=True), expiry=datetime.utcnow() + timedelta(hours=1), preauthorized_agent_object_id="68390a19-a643-458b-b726-408abf67b4fc" ) # doanload the data and make sure it is the same as uploaded data new_file_client = DataLakeFileClient( self._get_account_url(datalake_storage_account_name), file_client.file_system_name, file_client.path_name, credential=sas_token) acl = new_file_client.set_access_control(permissions='0777') self.assertIsNotNone(acl) @DataLakePreparer() def test_read_file_into_file(self, datalake_storage_account_name, datalake_storage_account_key): self._setUp(datalake_storage_account_name, datalake_storage_account_key) file_client = self._create_file_and_return_client() data = self.get_random_bytes(1024) # upload data to file file_client.append_data(data, 0, len(data)) file_client.flush_data(len(data)) # doanload the data into a file and make sure it is the same as uploaded data with open(FILE_PATH, 'wb') as stream: download = file_client.download_file(max_concurrency=2) download.readinto(stream) # Assert with open(FILE_PATH, 'rb') as stream: actual = stream.read() self.assertEqual(data, actual) @DataLakePreparer() def test_read_file_to_text(self, datalake_storage_account_name, datalake_storage_account_key): self._setUp(datalake_storage_account_name, datalake_storage_account_key) file_client = self._create_file_and_return_client() data = self.get_random_text_data(1024) # upload data to file file_client.append_data(data, 0, len(data)) file_client.flush_data(len(data)) # doanload the text data and make sure it is the same as uploaded data downloaded_data = file_client.download_file( max_concurrency=2, encoding="utf-8").readall() # Assert self.assertEqual(data, downloaded_data) @pytest.mark.live_test_only @DataLakePreparer() def test_account_sas(self, datalake_storage_account_name, datalake_storage_account_key): self._setUp(datalake_storage_account_name, datalake_storage_account_key) # SAS URL is calculated from storage key, so this test runs live only file_name = self._get_file_reference() # create a file under root directory self._create_file_and_return_client(file=file_name) # generate a token with file level read permission token = generate_account_sas( self.dsc.account_name, self.dsc.credential.account_key, ResourceTypes(file_system=True, object=True), AccountSasPermissions(read=True), datetime.utcnow() + timedelta(hours=1), ) for credential in [token, AzureSasCredential(token)]: # read the created file which is under root directory file_client = DataLakeFileClient(self.dsc.url, self.file_system_name, file_name, credential=credential) properties = file_client.get_file_properties() # make sure we can read the file properties self.assertIsNotNone(properties) # try to write to the created file with the token with self.assertRaises(HttpResponseError): file_client.append_data(b"abcd", 0, 4) @DataLakePreparer() def test_account_sas_raises_if_sas_already_in_uri( self, datalake_storage_account_name, datalake_storage_account_key): self._setUp(datalake_storage_account_name, datalake_storage_account_key) with self.assertRaises(ValueError): DataLakeFileClient(self.dsc.url + "?sig=foo", self.file_system_name, "foo", credential=AzureSasCredential("?foo=bar")) @pytest.mark.live_test_only @DataLakePreparer() def test_file_sas_only_applies_to_file_level(self, datalake_storage_account_name, datalake_storage_account_key): self._setUp(datalake_storage_account_name, datalake_storage_account_key) # SAS URL is calculated from storage key, so this test runs live only file_name = self._get_file_reference() directory_name = self._get_directory_reference() self._create_file_and_return_client(directory=directory_name, file=file_name) # generate a token with file level read and write permissions token = generate_file_sas( self.dsc.account_name, self.file_system_name, directory_name, file_name, self.dsc.credential.account_key, permission=FileSasPermissions(read=True, write=True), expiry=datetime.utcnow() + timedelta(hours=1), ) # read the created file which is under root directory file_client = DataLakeFileClient(self.dsc.url, self.file_system_name, directory_name + '/' + file_name, credential=token) properties = file_client.get_file_properties() # make sure we can read the file properties self.assertIsNotNone(properties) # try to write to the created file with the token response = file_client.append_data(b"abcd", 0, 4, validate_content=True) self.assertIsNotNone(response) # the token is for file level, so users are not supposed to have access to file system level operations file_system_client = FileSystemClient(self.dsc.url, self.file_system_name, credential=token) with self.assertRaises(ClientAuthenticationError): file_system_client.get_file_system_properties() # the token is for file level, so users are not supposed to have access to directory level operations directory_client = DataLakeDirectoryClient(self.dsc.url, self.file_system_name, directory_name, credential=token) with self.assertRaises(ClientAuthenticationError): directory_client.get_directory_properties() @DataLakePreparer() def test_delete_file(self, datalake_storage_account_name, datalake_storage_account_key): self._setUp(datalake_storage_account_name, datalake_storage_account_key) # Arrange file_client = self._create_file_and_return_client() file_client.delete_file() with self.assertRaises(ResourceNotFoundError): file_client.get_file_properties() @DataLakePreparer() def test_delete_file_with_if_unmodified_since( self, datalake_storage_account_name, datalake_storage_account_key): self._setUp(datalake_storage_account_name, datalake_storage_account_key) # Arrange file_client = self._create_file_and_return_client() prop = file_client.get_file_properties() file_client.delete_file(if_unmodified_since=prop['last_modified']) # Make sure the file was deleted with self.assertRaises(ResourceNotFoundError): file_client.get_file_properties() @DataLakePreparer() def test_set_access_control(self, datalake_storage_account_name, datalake_storage_account_key): self._setUp(datalake_storage_account_name, datalake_storage_account_key) file_client = self._create_file_and_return_client() response = file_client.set_access_control(permissions='0777') # Assert self.assertIsNotNone(response) @DataLakePreparer() def test_set_access_control_with_match_conditions( self, datalake_storage_account_name, datalake_storage_account_key): self._setUp(datalake_storage_account_name, datalake_storage_account_key) file_client = self._create_file_and_return_client() with self.assertRaises(ResourceModifiedError): file_client.set_access_control( permissions='0777', match_condition=MatchConditions.IfMissing) @DataLakePreparer() def test_get_access_control(self, datalake_storage_account_name, datalake_storage_account_key): self._setUp(datalake_storage_account_name, datalake_storage_account_key) file_client = self._create_file_and_return_client() file_client.set_access_control(permissions='0777') # Act response = file_client.get_access_control() # Assert self.assertIsNotNone(response) @DataLakePreparer() def test_get_access_control_with_if_modified_since( self, datalake_storage_account_name, datalake_storage_account_key): self._setUp(datalake_storage_account_name, datalake_storage_account_key) file_client = self._create_file_and_return_client() file_client.set_access_control(permissions='0777') prop = file_client.get_file_properties() # Act response = file_client.get_access_control( if_modified_since=prop['last_modified'] - timedelta(minutes=15)) # Assert self.assertIsNotNone(response) @DataLakePreparer() def test_set_access_control_recursive(self, datalake_storage_account_name, datalake_storage_account_key): self._setUp(datalake_storage_account_name, datalake_storage_account_key) acl = 'user::rwx,group::r-x,other::rwx' file_client = self._create_file_and_return_client() summary = file_client.set_access_control_recursive(acl=acl) # Assert self.assertEqual(summary.counters.directories_successful, 0) self.assertEqual(summary.counters.files_successful, 1) self.assertEqual(summary.counters.failure_count, 0) access_control = file_client.get_access_control() self.assertIsNotNone(access_control) self.assertEqual(acl, access_control['acl']) @DataLakePreparer() def test_update_access_control_recursive(self, datalake_storage_account_name, datalake_storage_account_key): self._setUp(datalake_storage_account_name, datalake_storage_account_key) acl = 'user::rwx,group::r-x,other::rwx' file_client = self._create_file_and_return_client() summary = file_client.update_access_control_recursive(acl=acl) # Assert self.assertEqual(summary.counters.directories_successful, 0) self.assertEqual(summary.counters.files_successful, 1) self.assertEqual(summary.counters.failure_count, 0) access_control = file_client.get_access_control() self.assertIsNotNone(access_control) self.assertEqual(acl, access_control['acl']) @DataLakePreparer() def test_remove_access_control_recursive(self, datalake_storage_account_name, datalake_storage_account_key): self._setUp(datalake_storage_account_name, datalake_storage_account_key) acl = "mask," + "default:user,default:group," + \ "user:ec3595d6-2c17-4696-8caa-7e139758d24a,group:ec3595d6-2c17-4696-8caa-7e139758d24a," + \ "default:user:ec3595d6-2c17-4696-8caa-7e139758d24a,default:group:ec3595d6-2c17-4696-8caa-7e139758d24a" file_client = self._create_file_and_return_client() summary = file_client.remove_access_control_recursive(acl=acl) # Assert self.assertEqual(summary.counters.directories_successful, 0) self.assertEqual(summary.counters.files_successful, 1) self.assertEqual(summary.counters.failure_count, 0) @DataLakePreparer() def test_get_properties(self, datalake_storage_account_name, datalake_storage_account_key): self._setUp(datalake_storage_account_name, datalake_storage_account_key) # Arrange directory_client = self._create_directory_and_return_client() metadata = {'hello': 'world', 'number': '42'} content_settings = ContentSettings(content_language='spanish', content_disposition='inline') file_client = directory_client.create_file( "newfile", metadata=metadata, content_settings=content_settings) file_client.append_data(b"abc", 0, 3) file_client.flush_data(3) properties = file_client.get_file_properties() # Assert self.assertTrue(properties) self.assertEqual(properties.size, 3) self.assertEqual(properties.metadata['hello'], metadata['hello']) self.assertEqual(properties.content_settings.content_language, content_settings.content_language) @DataLakePreparer() def test_set_expiry(self, datalake_storage_account_name, datalake_storage_account_key): self._setUp(datalake_storage_account_name, datalake_storage_account_key) # Arrange directory_client = self._create_directory_and_return_client() metadata = {'hello': 'world', 'number': '42'} content_settings = ContentSettings(content_language='spanish', content_disposition='inline') expires_on = datetime.utcnow() + timedelta(hours=1) file_client = directory_client.create_file( "newfile", metadata=metadata, content_settings=content_settings) file_client.set_file_expiry("Absolute", expires_on=expires_on) properties = file_client.get_file_properties() # Assert self.assertTrue(properties) self.assertIsNotNone(properties.expiry_time) @DataLakePreparer() def test_rename_file_with_non_used_name(self, datalake_storage_account_name, datalake_storage_account_key): self._setUp(datalake_storage_account_name, datalake_storage_account_key) file_client = self._create_file_and_return_client() data_bytes = b"abc" file_client.append_data(data_bytes, 0, 3) file_client.flush_data(3) new_client = file_client.rename_file(file_client.file_system_name + '/' + 'newname') data = new_client.download_file().readall() self.assertEqual(data, data_bytes) self.assertEqual(new_client.path_name, "newname") @pytest.mark.live_test_only @DataLakePreparer() def test_rename_file_with_file_system_sas(self, datalake_storage_account_name, datalake_storage_account_key): self._setUp(datalake_storage_account_name, datalake_storage_account_key) # sas token is calculated from storage key, so live only token = generate_file_system_sas( self.dsc.account_name, self.file_system_name, self.dsc.credential.account_key, FileSystemSasPermissions(write=True, read=True, delete=True), datetime.utcnow() + timedelta(hours=1), ) # read the created file which is under root directory file_client = DataLakeFileClient(self.dsc.url, self.file_system_name, "oldfile", credential=token) file_client.create_file() data_bytes = b"abc" file_client.append_data(data_bytes, 0, 3) file_client.flush_data(3) new_client = file_client.rename_file(file_client.file_system_name + '/' + 'newname') data = new_client.download_file().readall() self.assertEqual(data, data_bytes) self.assertEqual(new_client.path_name, "newname") @pytest.mark.live_test_only @DataLakePreparer() def test_rename_file_with_file_sas(self, datalake_storage_account_name, datalake_storage_account_key): self._setUp(datalake_storage_account_name, datalake_storage_account_key) # SAS URL is calculated from storage key, so this test runs live only token = generate_file_sas( self.dsc.account_name, self.file_system_name, None, "oldfile", datalake_storage_account_key, permission=FileSasPermissions(read=True, create=True, write=True, delete=True), expiry=datetime.utcnow() + timedelta(hours=1), ) new_token = generate_file_sas( self.dsc.account_name, self.file_system_name, None, "newname", datalake_storage_account_key, permission=FileSasPermissions(read=True, create=True, write=True, delete=True), expiry=datetime.utcnow() + timedelta(hours=1), ) # read the created file which is under root directory file_client = DataLakeFileClient(self.dsc.url, self.file_system_name, "oldfile", credential=token) file_client.create_file() data_bytes = b"abc" file_client.append_data(data_bytes, 0, 3) file_client.flush_data(3) new_client = file_client.rename_file(file_client.file_system_name + '/' + 'newname' + '?' + new_token) data = new_client.download_file().readall() self.assertEqual(data, data_bytes) self.assertEqual(new_client.path_name, "newname") @DataLakePreparer() def test_rename_file_with_account_sas(self, datalake_storage_account_name, datalake_storage_account_key): self._setUp(datalake_storage_account_name, datalake_storage_account_key) pytest.skip("service bug") token = generate_account_sas( self.dsc.account_name, self.dsc.credential.account_key, ResourceTypes(object=True), AccountSasPermissions(write=True, read=True, create=True, delete=True), datetime.utcnow() + timedelta(hours=5), ) # read the created file which is under root directory file_client = DataLakeFileClient(self.dsc.url, self.file_system_name, "oldfile", credential=token) file_client.create_file() data_bytes = b"abc" file_client.append_data(data_bytes, 0, 3) file_client.flush_data(3) new_client = file_client.rename_file(file_client.file_system_name + '/' + 'newname') data = new_client.download_file().readall() self.assertEqual(data, data_bytes) self.assertEqual(new_client.path_name, "newname") @DataLakePreparer() def test_rename_file_to_existing_file(self, datalake_storage_account_name, datalake_storage_account_key): self._setUp(datalake_storage_account_name, datalake_storage_account_key) # create the existing file existing_file_client = self._create_file_and_return_client( file="existingfile") existing_file_client.append_data(b"a", 0, 1) existing_file_client.flush_data(1) old_url = existing_file_client.url # prepare to rename the file to the existing file file_client = self._create_file_and_return_client() data_bytes = b"abc" file_client.append_data(data_bytes, 0, 3) file_client.flush_data(3) new_client = file_client.rename_file(file_client.file_system_name + '/' + existing_file_client.path_name) new_url = file_client.url data = new_client.download_file().readall() # the existing file was overridden self.assertEqual(data, data_bytes) @DataLakePreparer() def test_rename_file_will_not_change_existing_directory( self, datalake_storage_account_name, datalake_storage_account_key): self._setUp(datalake_storage_account_name, datalake_storage_account_key) # create none empty directory(with 2 files) dir1 = self._create_directory_and_return_client(directory="dir1") f1 = dir1.create_file("file1") f1.append_data(b"file1", 0, 5) f1.flush_data(5) f2 = dir1.create_file("file2") f2.append_data(b"file2", 0, 5) f2.flush_data(5) # create another none empty directory(with 2 files) dir2 = self._create_directory_and_return_client(directory="dir2") f3 = dir2.create_file("file3") f3.append_data(b"file3", 0, 5) f3.flush_data(5) f4 = dir2.create_file("file4") f4.append_data(b"file4", 0, 5) f4.flush_data(5) new_client = f3.rename_file(f1.file_system_name + '/' + f1.path_name) self.assertEqual(new_client.download_file().readall(), b"file3") # make sure the data in file2 and file4 weren't touched f2_data = f2.download_file().readall() self.assertEqual(f2_data, b"file2") f4_data = f4.download_file().readall() self.assertEqual(f4_data, b"file4") with self.assertRaises(HttpResponseError): f3.download_file().readall()
class DirectoryTest(StorageTestCase): def setUp(self): super(DirectoryTest, self).setUp() url = self._get_account_url() self.dsc = DataLakeServiceClient( url, credential=self.settings.STORAGE_DATA_LAKE_ACCOUNT_KEY) self.config = self.dsc._config self.file_system_name = self.get_resource_name('filesystem') if not self.is_playback(): file_system = self.dsc.get_file_system_client( self.file_system_name) try: file_system.create_file_system(timeout=5) except ResourceExistsError: pass def tearDown(self): if not self.is_playback(): try: self.dsc.delete_file_system(self.file_system_name) for file_system in self.dsc.list_file_systems(): self.dsc.delete_file_system(file_system.name) except: pass return super(DirectoryTest, self).tearDown() # --Helpers----------------------------------------------------------------- def _get_directory_reference(self, prefix=TEST_DIRECTORY_PREFIX): directory_name = self.get_resource_name(prefix) return directory_name def _create_directory_and_get_directory_client(self, directory_name=None): directory_name = directory_name if directory_name else self._get_directory_reference( ) directory_client = self.dsc.get_directory_client( self.file_system_name, directory_name) directory_client.create_directory() return directory_client def _create_file_system(self): return self.dsc.create_file_system(self._get_file_system_reference()) # --Helpers----------------------------------------------------------------- @record def test_create_directory(self): # Arrange directory_name = self._get_directory_reference() content_settings = ContentSettings(content_language='spanish', content_disposition='inline') # Act directory_client = self.dsc.get_directory_client( self.file_system_name, directory_name) created = directory_client.create_directory( content_settings=content_settings) # Assert self.assertTrue(created) @record def test_using_oauth_token_credential_to_create_directory(self): # generate a token with directory level create permission directory_name = self._get_directory_reference() token_credential = self.generate_oauth_token() directory_client = DataLakeDirectoryClient(self.dsc.url, self.file_system_name, directory_name, credential=token_credential) response = directory_client.create_directory() self.assertIsNotNone(response) @record def test_create_directory_with_match_conditions(self): # Arrange directory_name = self._get_directory_reference() # Act directory_client = self.dsc.get_directory_client( self.file_system_name, directory_name) created = directory_client.create_directory( match_condition=MatchConditions.IfMissing) # Assert self.assertTrue(created) @record def test_create_directory_with_permission(self): # Arrange directory_name = self._get_directory_reference() # Act directory_client = self.dsc.get_directory_client( self.file_system_name, directory_name) created = directory_client.create_directory(permissions="rwxr--r--", umask="0000") prop = directory_client.get_access_control() # Assert self.assertTrue(created) self.assertEqual(prop['permissions'], 'rwxr--r--') @record def test_create_directory_with_content_settings(self): # Arrange directory_name = self._get_directory_reference() content_settings = ContentSettings(content_language='spanish', content_disposition='inline') # Act directory_client = self.dsc.get_directory_client( self.file_system_name, directory_name) created = directory_client.create_directory( content_settings=content_settings) # Assert self.assertTrue(created) @record def test_create_directory_with_metadata(self): # Arrange directory_name = self._get_directory_reference() metadata = {'hello': 'world', 'number': '42'} # Act directory_client = self.dsc.get_directory_client( self.file_system_name, directory_name) created = directory_client.create_directory(metadata=metadata) properties = directory_client.get_directory_properties() # Assert self.assertTrue(created) @record def test_delete_directory(self): # Arrange directory_name = self._get_directory_reference() metadata = {'hello': 'world', 'number': '42'} directory_client = self.dsc.get_directory_client( self.file_system_name, directory_name) directory_client.create_directory(metadata=metadata) response = directory_client.delete_directory() # Assert self.assertIsNone(response) @record def test_delete_directory_with_if_modified_since(self): # Arrange directory_name = self._get_directory_reference() directory_client = self.dsc.get_directory_client( self.file_system_name, directory_name) directory_client.create_directory() prop = directory_client.get_directory_properties() with self.assertRaises(ResourceModifiedError): directory_client.delete_directory( if_modified_since=prop['last_modified']) @record def test_create_sub_directory_and_delete_sub_directory(self): # Arrange directory_name = self._get_directory_reference() metadata = {'hello': 'world', 'number': '42'} # Create a directory first, to prepare for creating sub directory directory_client = self.dsc.get_directory_client( self.file_system_name, directory_name) directory_client.create_directory(metadata=metadata) # Create sub directory from the current directory sub_directory_name = 'subdir' sub_directory_created = directory_client.create_sub_directory( sub_directory_name) # to make sure the sub directory was indeed created by get sub_directory properties from sub directory client sub_directory_client = self.dsc.get_directory_client( self.file_system_name, directory_name + '/' + sub_directory_name) sub_properties = sub_directory_client.get_directory_properties() # Assert self.assertTrue(sub_directory_created) self.assertTrue(sub_properties) # Act directory_client.delete_sub_directory(sub_directory_name) with self.assertRaises(ResourceNotFoundError): sub_directory_client.get_directory_properties() @record def test_set_access_control(self): directory_name = self._get_directory_reference() metadata = {'hello': 'world', 'number': '42'} directory_client = self.dsc.get_directory_client( self.file_system_name, directory_name) directory_client.create_directory(metadata=metadata) response = directory_client.set_access_control(permissions='0777') # Assert self.assertIsNotNone(response) @record def test_set_access_control_with_acl(self): directory_name = self._get_directory_reference() metadata = {'hello': 'world', 'number': '42'} directory_client = self.dsc.get_directory_client( self.file_system_name, directory_name) directory_client.create_directory(metadata=metadata) acl = 'user::rwx,group::r-x,other::rwx' directory_client.set_access_control(acl=acl) access_control = directory_client.get_access_control() # Assert self.assertIsNotNone(access_control) self.assertEqual(acl, access_control['acl']) @record def test_set_access_control_if_none_modified(self): directory_name = self._get_directory_reference() directory_client = self.dsc.get_directory_client( self.file_system_name, directory_name) resp = directory_client.create_directory() response = directory_client.set_access_control( permissions='0777', etag=resp['etag'], match_condition=MatchConditions.IfNotModified) # Assert self.assertIsNotNone(response) @record def test_get_access_control(self): directory_name = self._get_directory_reference() metadata = {'hello': 'world', 'number': '42'} directory_client = self.dsc.get_directory_client( self.file_system_name, directory_name) directory_client.create_directory(metadata=metadata, permissions='0777') # Act response = directory_client.get_access_control() # Assert self.assertIsNotNone(response) @record def test_get_access_control_with_match_conditions(self): directory_name = self._get_directory_reference() directory_client = self.dsc.get_directory_client( self.file_system_name, directory_name) resp = directory_client.create_directory(permissions='0777', umask='0000') # Act response = directory_client.get_access_control( etag=resp['etag'], match_condition=MatchConditions.IfNotModified) # Assert self.assertIsNotNone(response) self.assertEquals(response['permissions'], 'rwxrwxrwx') @record def test_rename_from(self): metadata = {'hello': 'world', 'number': '42'} directory_name = self._get_directory_reference() directory_client = self.dsc.get_directory_client( self.file_system_name, directory_name) directory_client.create_directory() new_name = "newname" new_directory_client = self.dsc.get_directory_client( self.file_system_name, new_name) new_directory_client._rename_path('/' + self.file_system_name + '/' + directory_name, metadata=metadata) properties = new_directory_client.get_directory_properties() self.assertIsNotNone(properties) @record def test_rename_from_a_shorter_directory_to_longer_directory(self): # TODO: investigate why rename shorter path to a longer one does not work pytest.skip("") directory_name = self._get_directory_reference() self._create_directory_and_get_directory_client(directory_name="old") new_name = "newname" new_directory_client = self._create_directory_and_get_directory_client( directory_name=new_name) new_directory_client = new_directory_client.create_sub_directory( "newsub") new_directory_client._rename_path('/' + self.file_system_name + '/' + directory_name) properties = new_directory_client.get_directory_properties() self.assertIsNotNone(properties) @record def test_rename_from_a_directory_in_another_file_system(self): # create a file dir1 under file system1 old_file_system_name = "oldfilesystem" old_dir_name = "olddir" old_client = self.dsc.get_file_system_client(old_file_system_name) old_client.create_file_system() old_client.create_directory(old_dir_name) # create a dir2 under file system2 new_name = "newname" new_directory_client = self._create_directory_and_get_directory_client( directory_name=new_name) new_directory_client = new_directory_client.create_sub_directory( "newsub") # rename dir1 under file system1 to dir2 under file system2 new_directory_client._rename_path('/' + old_file_system_name + '/' + old_dir_name) properties = new_directory_client.get_directory_properties() self.assertIsNotNone(properties) self.dsc.delete_file_system(old_file_system_name) @record def test_rename_to_an_existing_directory_in_another_file_system(self): # create a file dir1 under file system1 destination_file_system_name = "destfilesystem" destination_dir_name = "destdir" fs_client = self.dsc.get_file_system_client( destination_file_system_name) fs_client.create_file_system() destination_directory_client = fs_client.create_directory( destination_dir_name) # create a dir2 under file system2 source_name = "source" source_directory_client = self._create_directory_and_get_directory_client( directory_name=source_name) source_directory_client = source_directory_client.create_sub_directory( "subdir") # rename dir2 under file system2 to dir1 under file system1 res = source_directory_client.rename_directory( '/' + destination_file_system_name + '/' + destination_dir_name) # the source directory has been renamed to destination directory, so it cannot be found with self.assertRaises(HttpResponseError): source_directory_client.get_directory_properties() self.assertEquals(res.url, destination_directory_client.url) @record def test_rename_with_none_existing_destination_condition_and_source_unmodified_condition( self): non_existing_dir_name = "nonexistingdir" # create a file system1 destination_file_system_name = self._get_directory_reference( "destfilesystem") fs_client = self.dsc.get_file_system_client( destination_file_system_name) fs_client.create_file_system() # create a dir2 under file system2 source_name = "source" source_directory_client = self._create_directory_and_get_directory_client( directory_name=source_name) source_directory_client = source_directory_client.create_sub_directory( "subdir") # rename dir2 under file system2 to a non existing directory under file system1, # when dir1 does not exist and dir2 wasn't modified etag = source_directory_client.get_directory_properties()['etag'] res = source_directory_client.rename_directory( '/' + destination_file_system_name + '/' + non_existing_dir_name, match_condition=MatchConditions.IfMissing, source_etag=etag, source_match_condition=MatchConditions.IfNotModified) # the source directory has been renamed to destination directory, so it cannot be found with self.assertRaises(HttpResponseError): source_directory_client.get_directory_properties() self.assertEquals(non_existing_dir_name, res.path_name) @record def test_rename_to_an_non_existing_directory_in_another_file_system(self): # create a file dir1 under file system1 destination_file_system_name = self._get_directory_reference( "destfilesystem") non_existing_dir_name = "nonexistingdir" fs_client = self.dsc.get_file_system_client( destination_file_system_name) fs_client.create_file_system() # create a dir2 under file system2 source_name = "source" source_directory_client = self._create_directory_and_get_directory_client( directory_name=source_name) source_directory_client = source_directory_client.create_sub_directory( "subdir") # rename dir2 under file system2 to dir1 under file system1 res = source_directory_client.rename_directory( '/' + destination_file_system_name + '/' + non_existing_dir_name) # the source directory has been renamed to destination directory, so it cannot be found with self.assertRaises(HttpResponseError): source_directory_client.get_directory_properties() self.assertEquals(non_existing_dir_name, res.path_name) @record def test_rename_directory_to_non_empty_directory(self): # TODO: investigate why rename non empty dir doesn't work pytest.skip("") dir1 = self._create_directory_and_get_directory_client("dir1") dir1.create_sub_directory("subdir") dir2 = self._create_directory_and_get_directory_client("dir2") dir2.rename_directory(dir1.file_system_name + '/' + dir1.path_name) with self.assertRaises(HttpResponseError): dir2.get_directory_properties() @record def test_get_properties(self): # Arrange directory_name = self._get_directory_reference() metadata = {'hello': 'world', 'number': '42'} directory_client = self.dsc.get_directory_client( self.file_system_name, directory_name) directory_client.create_directory(metadata=metadata) properties = directory_client.get_directory_properties() # Assert self.assertTrue(properties) self.assertIsNotNone(properties.metadata) self.assertEqual(properties.metadata['hello'], metadata['hello']) @record def test_using_directory_sas_to_read(self): # SAS URL is calculated from storage key, so this test runs live only if TestMode.need_recording_file(self.test_mode): return client = self._create_directory_and_get_directory_client() directory_name = client.path_name # generate a token with directory level read permission token = generate_directory_sas( self.dsc.account_name, self.file_system_name, directory_name, account_key=self.dsc.credential.account_key, permission=DirectorySasPermissions(read=True), expiry=datetime.utcnow() + timedelta(hours=1), ) directory_client = DataLakeDirectoryClient(self.dsc.url, self.file_system_name, directory_name, credential=token) access_control = directory_client.get_access_control() self.assertIsNotNone(access_control) @record def test_using_directory_sas_to_create(self): # SAS URL is calculated from storage key, so this test runs live only if TestMode.need_recording_file(self.test_mode): return # generate a token with directory level create permission directory_name = self._get_directory_reference() token = generate_directory_sas( self.dsc.account_name, self.file_system_name, directory_name, account_key=self.dsc.credential.account_key, permission=DirectorySasPermissions(create=True), expiry=datetime.utcnow() + timedelta(hours=1), ) directory_client = DataLakeDirectoryClient(self.dsc.url, self.file_system_name, directory_name, credential=token) response = directory_client.create_directory() self.assertIsNotNone(response)
class FileSystemTest(StorageTestCase): def _setUp(self, account_name, account_key): url = self._get_account_url(account_name) self.dsc = DataLakeServiceClient(url, account_key) self.config = self.dsc._config self.test_file_systems = [] def tearDown(self): if not self.is_playback(): try: for file_system in self.test_file_systems: self.dsc.delete_file_system(file_system) except: pass return super(FileSystemTest, self).tearDown() # --Helpers----------------------------------------------------------------- def _get_file_system_reference(self, prefix=TEST_FILE_SYSTEM_PREFIX): file_system_name = self.get_resource_name(prefix) self.test_file_systems.append(file_system_name) return file_system_name def _create_file_system(self, file_system_prefix=TEST_FILE_SYSTEM_PREFIX): return self.dsc.create_file_system(self._get_file_system_reference(prefix=file_system_prefix)) # --Helpers----------------------------------------------------------------- @DataLakePreparer() def test_create_file_system(self, datalake_storage_account_name, datalake_storage_account_key): self._setUp(datalake_storage_account_name, datalake_storage_account_key) # Arrange file_system_name = self._get_file_system_reference() # Act file_system_client = self.dsc.get_file_system_client(file_system_name) created = file_system_client.create_file_system() # Assert self.assertTrue(created) @DataLakePreparer() def test_file_system_exists(self, datalake_storage_account_name, datalake_storage_account_key): self._setUp(datalake_storage_account_name, datalake_storage_account_key) # Arrange file_system_name = self._get_file_system_reference() # Act file_system_client1 = self.dsc.get_file_system_client(file_system_name) file_system_client2 = self.dsc.get_file_system_client("nonexistentfs") file_system_client1.create_file_system() self.assertTrue(file_system_client1.exists()) self.assertFalse(file_system_client2.exists()) @DataLakePreparer() def test_create_file_system_with_metadata(self, datalake_storage_account_name, datalake_storage_account_key): self._setUp(datalake_storage_account_name, datalake_storage_account_key) # Arrange metadata = {'hello': 'world', 'number': '42'} file_system_name = self._get_file_system_reference() # Act file_system_client = self.dsc.get_file_system_client(file_system_name) created = file_system_client.create_file_system(metadata=metadata) # Assert meta = file_system_client.get_file_system_properties().metadata self.assertTrue(created) self.assertDictEqual(meta, metadata) @DataLakePreparer() def test_set_file_system_acl(self, datalake_storage_account_name, datalake_storage_account_key): self._setUp(datalake_storage_account_name, datalake_storage_account_key) # Act file_system = self._create_file_system() access_policy = AccessPolicy(permission=FileSystemSasPermissions(read=True), expiry=datetime.utcnow() + timedelta(hours=1), start=datetime.utcnow()) signed_identifier1 = {'testid': access_policy} response = file_system.set_file_system_access_policy(signed_identifier1, public_access=PublicAccess.FileSystem) self.assertIsNotNone(response.get('etag')) self.assertIsNotNone(response.get('last_modified')) acl1 = file_system.get_file_system_access_policy() self.assertIsNotNone(acl1['public_access']) self.assertEqual(len(acl1['signed_identifiers']), 1) # If set signed identifier without specifying the access policy then it will be default to None signed_identifier2 = {'testid': access_policy, 'test2': access_policy} file_system.set_file_system_access_policy(signed_identifier2) acl2 = file_system.get_file_system_access_policy() self.assertIsNone(acl2['public_access']) self.assertEqual(len(acl2['signed_identifiers']), 2) @DataLakePreparer() def test_list_file_systemss(self, datalake_storage_account_name, datalake_storage_account_key): self._setUp(datalake_storage_account_name, datalake_storage_account_key) # Arrange file_system_name = self._get_file_system_reference() file_system = self.dsc.create_file_system(file_system_name) # Act file_systems = list(self.dsc.list_file_systems()) # Assert self.assertIsNotNone(file_systems) self.assertGreaterEqual(len(file_systems), 1) self.assertIsNotNone(file_systems[0]) self.assertNamedItemInContainer(file_systems, file_system.file_system_name) self.assertIsNotNone(file_systems[0].has_immutability_policy) self.assertIsNotNone(file_systems[0].has_legal_hold) @DataLakePreparer() def test_rename_file_system(self, datalake_storage_account_name, datalake_storage_account_key): if not self.is_playback(): return self._setUp(datalake_storage_account_name, datalake_storage_account_key) old_name1 = self._get_file_system_reference(prefix="oldcontainer1") old_name2 = self._get_file_system_reference(prefix="oldcontainer2") new_name = self._get_file_system_reference(prefix="newcontainer") filesystem1 = self.dsc.create_file_system(old_name1) self.dsc.create_file_system(old_name2) new_filesystem = self.dsc._rename_file_system(name=old_name1, new_name=new_name) with self.assertRaises(HttpResponseError): self.dsc._rename_file_system(name=old_name2, new_name=new_name) with self.assertRaises(HttpResponseError): filesystem1.get_file_system_properties() with self.assertRaises(HttpResponseError): self.dsc._rename_file_system(name="badfilesystem", new_name="filesystem") self.assertEqual(new_name, new_filesystem.get_file_system_properties().name) @DataLakePreparer() def test_rename_file_system_with_file_system_client(self, datalake_storage_account_name, datalake_storage_account_key): pytest.skip("Feature not yet enabled. Make sure to record this test once enabled.") self._setUp(datalake_storage_account_name, datalake_storage_account_key) old_name1 = self._get_file_system_reference(prefix="oldcontainer1") old_name2 = self._get_file_system_reference(prefix="oldcontainer2") new_name = self._get_file_system_reference(prefix="newcontainer") bad_name = self._get_file_system_reference(prefix="badcontainer") filesystem1 = self.dsc.create_file_system(old_name1) file_system2 = self.dsc.create_file_system(old_name2) bad_file_system = self.dsc.get_file_system_client(bad_name) new_filesystem = filesystem1._rename_file_system(new_name=new_name) with self.assertRaises(HttpResponseError): file_system2._rename_file_system(new_name=new_name) with self.assertRaises(HttpResponseError): filesystem1.get_file_system_properties() with self.assertRaises(HttpResponseError): bad_file_system._rename_file_system(new_name="filesystem") self.assertEqual(new_name, new_filesystem.get_file_system_properties().name) @DataLakePreparer() def test_rename_file_system_with_source_lease(self, datalake_storage_account_name, datalake_storage_account_key): if not self.is_playback(): return self._setUp(datalake_storage_account_name, datalake_storage_account_key) old_name = self._get_file_system_reference(prefix="old") new_name = self._get_file_system_reference(prefix="new") filesystem = self.dsc.create_file_system(old_name) filesystem_lease_id = filesystem.acquire_lease() with self.assertRaises(HttpResponseError): self.dsc._rename_file_system(name=old_name, new_name=new_name) with self.assertRaises(HttpResponseError): self.dsc._rename_file_system(name=old_name, new_name=new_name, lease="bad_id") new_filesystem = self.dsc._rename_file_system(name=old_name, new_name=new_name, lease=filesystem_lease_id) self.assertEqual(new_name, new_filesystem.get_file_system_properties().name) @DataLakePreparer() def test_undelete_file_system(self, datalake_storage_account_name, datalake_storage_account_key): # Needs soft delete enabled account. if not self.is_playback(): return self._setUp(datalake_storage_account_name, datalake_storage_account_key) name = self._get_file_system_reference() filesystem_client = self.dsc.create_file_system(name) # Act filesystem_client.delete_file_system() # to make sure the filesystem deleted with self.assertRaises(ResourceNotFoundError): filesystem_client.get_file_system_properties() filesystem_list = list(self.dsc.list_file_systems(include_deleted=True)) self.assertTrue(len(filesystem_list) >= 1) restored_version = 0 for filesystem in filesystem_list: # find the deleted filesystem and restore it if filesystem.deleted and filesystem.name == filesystem_client.file_system_name: restored_fs_client = self.dsc.undelete_file_system(filesystem.name, filesystem.deleted_version, new_name="restored" + name + str(restored_version)) restored_version += 1 # to make sure the deleted filesystem is restored props = restored_fs_client.get_file_system_properties() self.assertIsNotNone(props) @DataLakePreparer() def test_restore_to_existing_file_system(self, datalake_storage_account_name, datalake_storage_account_key): # Needs soft delete enabled account. if not self.is_playback(): return self._setUp(datalake_storage_account_name, datalake_storage_account_key) # get an existing filesystem existing_name = self._get_file_system_reference(prefix="existing2") name = self._get_file_system_reference(prefix="filesystem2") existing_filesystem_client = self.dsc.create_file_system(existing_name) filesystem_client = self.dsc.create_file_system(name) # Act filesystem_client.delete_file_system() # to make sure the filesystem deleted with self.assertRaises(ResourceNotFoundError): filesystem_client.get_file_system_properties() filesystem_list = list(self.dsc.list_file_systems(include_deleted=True)) self.assertTrue(len(filesystem_list) >= 1) for filesystem in filesystem_list: # find the deleted filesystem and restore it if filesystem.deleted and filesystem.name == filesystem_client.file_system_name: with self.assertRaises(HttpResponseError): self.dsc.undelete_file_system(filesystem.name, filesystem.deleted_version, new_name=existing_filesystem_client.file_system_name) @DataLakePreparer() def test_restore_file_system_with_sas(self, datalake_storage_account_name, datalake_storage_account_key): pytest.skip( "We are generating a SAS token therefore play only live but we also need a soft delete enabled account.") self._setUp(datalake_storage_account_name, datalake_storage_account_key) token = generate_account_sas( self.dsc.account_name, self.dsc.credential.account_key, ResourceTypes(service=True, file_system=True), AccountSasPermissions(read=True, write=True, list=True, delete=True), datetime.utcnow() + timedelta(hours=1), ) dsc = DataLakeServiceClient(self.dsc.url, token) name = self._get_file_system_reference(prefix="filesystem") filesystem_client = dsc.create_file_system(name) filesystem_client.delete_file_system() # to make sure the filesystem is deleted with self.assertRaises(ResourceNotFoundError): filesystem_client.get_file_system_properties() filesystem_list = list(dsc.list_file_systems(include_deleted=True)) self.assertTrue(len(filesystem_list) >= 1) restored_version = 0 for filesystem in filesystem_list: # find the deleted filesystem and restore it if filesystem.deleted and filesystem.name == filesystem_client.file_system_name: restored_fs_client = dsc.undelete_file_system(filesystem.name, filesystem.deleted_version, new_name="restored" + name + str(restored_version)) restored_version += 1 # to make sure the deleted filesystem is restored props = restored_fs_client.get_file_system_properties() self.assertIsNotNone(props) @DataLakePreparer() def test_delete_file_system_with_existing_file_system(self, datalake_storage_account_name, datalake_storage_account_key): self._setUp(datalake_storage_account_name, datalake_storage_account_key) # Arrange file_system = self._create_file_system() # Act deleted = file_system.delete_file_system() # Assert self.assertIsNone(deleted) @DataLakePreparer() def test_delete_none_existing_file_system(self, datalake_storage_account_name, datalake_storage_account_key): self._setUp(datalake_storage_account_name, datalake_storage_account_key) fake_file_system_client = self.dsc.get_file_system_client("fakeclient") # Act with self.assertRaises(ResourceNotFoundError): fake_file_system_client.delete_file_system(match_condition=MatchConditions.IfMissing) @DataLakePreparer() def test_list_file_systems_with_include_metadata(self, datalake_storage_account_name, datalake_storage_account_key): self._setUp(datalake_storage_account_name, datalake_storage_account_key) # Arrange file_system = self._create_file_system() metadata = {'hello': 'world', 'number': '42'} resp = file_system.set_file_system_metadata(metadata) # Act file_systems = list(self.dsc.list_file_systems( name_starts_with=file_system.file_system_name, include_metadata=True)) # Assert self.assertIsNotNone(file_systems) self.assertGreaterEqual(len(file_systems), 1) self.assertIsNotNone(file_systems[0]) self.assertNamedItemInContainer(file_systems, file_system.file_system_name) self.assertDictEqual(file_systems[0].metadata, metadata) @DataLakePreparer() def test_list_file_systems_by_page(self, datalake_storage_account_name, datalake_storage_account_key): self._setUp(datalake_storage_account_name, datalake_storage_account_key) # Arrange for i in range(0, 6): self._create_file_system(file_system_prefix="filesystem{}".format(i)) # Act file_systems = list(next(self.dsc.list_file_systems( results_per_page=3, name_starts_with="file", include_metadata=True).by_page())) # Assert self.assertIsNotNone(file_systems) self.assertGreaterEqual(len(file_systems), 3) @DataLakePreparer() def test_list_file_systems_with_public_access(self, datalake_storage_account_name, datalake_storage_account_key): self._setUp(datalake_storage_account_name, datalake_storage_account_key) # Arrange file_system_name = self._get_file_system_reference() file_system = self.dsc.get_file_system_client(file_system_name) file_system.create_file_system(public_access="blob") metadata = {'hello': 'world', 'number': '42'} resp = file_system.set_file_system_metadata(metadata) # Act file_systems = list(self.dsc.list_file_systems( name_starts_with=file_system.file_system_name, include_metadata=True)) # Assert self.assertIsNotNone(file_systems) self.assertGreaterEqual(len(file_systems), 1) self.assertIsNotNone(file_systems[0]) self.assertNamedItemInContainer(file_systems, file_system.file_system_name) self.assertDictEqual(file_systems[0].metadata, metadata) self.assertTrue(file_systems[0].public_access is PublicAccess.File) @DataLakePreparer() def test_get_file_system_properties(self, datalake_storage_account_name, datalake_storage_account_key): self._setUp(datalake_storage_account_name, datalake_storage_account_key) # Arrange metadata = {'hello': 'world', 'number': '42'} file_system = self._create_file_system() file_system.set_file_system_metadata(metadata) # Act props = file_system.get_file_system_properties() # Assert self.assertIsNotNone(props) self.assertDictEqual(props.metadata, metadata) self.assertIsNotNone(props.has_immutability_policy) self.assertIsNotNone(props.has_legal_hold) @DataLakePreparer() def test_service_client_session_closes_after_filesystem_creation(self, datalake_storage_account_name, datalake_storage_account_key): self._setUp(datalake_storage_account_name, datalake_storage_account_key) # Arrange dsc2 = DataLakeServiceClient(self.dsc.url, credential=datalake_storage_account_key) with DataLakeServiceClient(self.dsc.url, credential=datalake_storage_account_key) as ds_client: fs1 = ds_client.create_file_system(self._get_file_system_reference(prefix="fs1")) fs1.delete_file_system() dsc2.create_file_system(self._get_file_system_reference(prefix="fs2")) dsc2.close() @DataLakePreparer() def test_list_paths(self, datalake_storage_account_name, datalake_storage_account_key): self._setUp(datalake_storage_account_name, datalake_storage_account_key) # Arrange file_system = self._create_file_system() for i in range(0, 6): file_system.create_directory("dir1{}".format(i)) paths = list(file_system.get_paths(upn=True)) self.assertEqual(len(paths), 6) self.assertTrue(isinstance(paths[0].last_modified, datetime)) @DataLakePreparer() def test_list_paths_which_are_all_files(self, datalake_storage_account_name, datalake_storage_account_key): self._setUp(datalake_storage_account_name, datalake_storage_account_key) # Arrange file_system = self._create_file_system() for i in range(0, 6): file_system.create_file("file{}".format(i)) paths = list(file_system.get_paths(upn=True)) self.assertEqual(len(paths), 6) @DataLakePreparer() def test_list_paths_with_max_per_page(self, datalake_storage_account_name, datalake_storage_account_key): self._setUp(datalake_storage_account_name, datalake_storage_account_key) # Arrange file_system = self._create_file_system() for i in range(0, 6): file_system.create_directory("dir1{}".format(i)) generator1 = file_system.get_paths(max_results=2, upn=True).by_page() paths1 = list(next(generator1)) generator2 = file_system.get_paths(max_results=4, upn=True)\ .by_page(continuation_token=generator1.continuation_token) paths2 = list(next(generator2)) self.assertEqual(len(paths1), 2) self.assertEqual(len(paths2), 4) @DataLakePreparer() def test_list_paths_under_specific_path(self, datalake_storage_account_name, datalake_storage_account_key): self._setUp(datalake_storage_account_name, datalake_storage_account_key) # Arrange file_system = self._create_file_system() for i in range(0, 6): file_system.create_directory("dir1{}".format(i)) # create a subdirectory under the current directory subdir = file_system.get_directory_client("dir1{}".format(i)).create_sub_directory("subdir") subdir.create_sub_directory("subsub") # create a file under the current directory file_client = subdir.create_file("file") file_client.append_data(b"abced", 0, 5) file_client.flush_data(5) generator1 = file_system.get_paths(path="dir10/subdir", max_results=2, upn=True).by_page() paths = list(next(generator1)) self.assertEqual(len(paths), 2) self.assertEqual(paths[0].content_length, 5) @DataLakePreparer() def test_list_paths_recursively(self, datalake_storage_account_name, datalake_storage_account_key): self._setUp(datalake_storage_account_name, datalake_storage_account_key) # Arrange file_system = self._create_file_system() for i in range(0, 6): file_system.create_directory("dir1{}".format(i)) # create a subdirectory under the current directory subdir = file_system.get_directory_client("dir1{}".format(i)).create_sub_directory("subdir") subdir.create_sub_directory("subsub") # create a file under the current directory subdir.create_file("file") paths = list(file_system.get_paths(recursive=True, upn=True)) # there are 24 subpaths in total self.assertEqual(len(paths), 24) @DataLakePreparer() def test_list_paths_pages_correctly(self, datalake_storage_account_name, datalake_storage_account_key): self._setUp(datalake_storage_account_name, datalake_storage_account_key) # Arrange file_system = self._create_file_system(file_system_prefix="fs1") for i in range(0, 6): file_system.create_directory("dir1{}".format(i)) for i in range(0, 6): file_system.create_file("file{}".format(i)) generator = file_system.get_paths(max_results=6, upn=True).by_page() paths1 = list(next(generator)) paths2 = list(next(generator)) with self.assertRaises(StopIteration): list(next(generator)) self.assertEqual(len(paths1), 6) self.assertEqual(len(paths2), 6) @DataLakePreparer() def test_create_directory_from_file_system_client(self, datalake_storage_account_name, datalake_storage_account_key): self._setUp(datalake_storage_account_name, datalake_storage_account_key) # Arrange file_system = self._create_file_system() file_system.create_directory("dir1/dir2") paths = list(file_system.get_paths(recursive=False, upn=True)) self.assertEqual(len(paths), 1) self.assertEqual(paths[0].name, "dir1") @DataLakePreparer() def test_create_file_from_file_system_client(self, datalake_storage_account_name, datalake_storage_account_key): self._setUp(datalake_storage_account_name, datalake_storage_account_key) # Arrange file_system = self._create_file_system() file_system.create_file("dir1/dir2/file") paths = list(file_system.get_paths(recursive=True, upn=True)) self.assertEqual(len(paths), 3) self.assertEqual(paths[0].name, "dir1") self.assertEqual(paths[2].is_directory, False) @DataLakePreparer() def test_get_root_directory_client(self, datalake_storage_account_name, datalake_storage_account_key): self._setUp(datalake_storage_account_name, datalake_storage_account_key) file_system = self._create_file_system() directory_client = file_system._get_root_directory_client() acl = 'user::rwx,group::r-x,other::rwx' directory_client.set_access_control(acl=acl) access_control = directory_client.get_access_control() self.assertEqual(acl, access_control['acl']) @DataLakePreparer() def test_file_system_sessions_closes_properly(self, datalake_storage_account_name, datalake_storage_account_key): self._setUp(datalake_storage_account_name, datalake_storage_account_key) # Arrange file_system_client = self._create_file_system("fenrhxsbfvsdvdsvdsadb") with file_system_client as fs_client: with fs_client.get_file_client("file1.txt") as f_client: f_client.create_file() with fs_client.get_file_client("file2.txt") as f_client: f_client.create_file() with fs_client.get_directory_client("file1") as f_client: f_client.create_directory() with fs_client.get_directory_client("file2") as f_client: f_client.create_directory()
class FileTest(StorageTestCase): def setUp(self): super(FileTest, self).setUp() url = self._get_account_url() self.dsc = DataLakeServiceClient( url, credential=self.settings.STORAGE_DATA_LAKE_ACCOUNT_KEY, logging_enable=True) self.config = self.dsc._config self.file_system_name = self.get_resource_name('filesystem') if not self.is_playback(): file_system = self.dsc.get_file_system_client( self.file_system_name) try: file_system.create_file_system(timeout=5) except ResourceExistsError: pass def tearDown(self): if not self.is_playback(): try: self.dsc.delete_file_system(self.file_system_name) except: pass return super(FileTest, self).tearDown() # --Helpers----------------------------------------------------------------- def _get_directory_reference(self, prefix=TEST_DIRECTORY_PREFIX): directory_name = self.get_resource_name(prefix) return directory_name def _get_file_reference(self, prefix=TEST_FILE_PREFIX): file_name = self.get_resource_name(prefix) return file_name def _create_file_system(self): return self.dsc.create_file_system(self._get_file_system_reference()) def _create_directory_and_return_client(self, directory=None): directory_name = directory if directory else self._get_directory_reference( ) directory_client = self.dsc.get_directory_client( self.file_system_name, directory_name) directory_client.create_directory() return directory_client def _create_file_and_return_client(self, directory="", file=None): if directory: self._create_directory_and_return_client(directory) if not file: file = self._get_file_reference() file_client = self.dsc.get_file_client(self.file_system_name, directory + '/' + file) file_client.create_file() return file_client # --Helpers----------------------------------------------------------------- @record def test_create_file(self): # Arrange directory_name = self._get_directory_reference() # Create a directory to put the file under that directory_client = self.dsc.get_directory_client( self.file_system_name, directory_name) directory_client.create_directory() file_client = directory_client.get_file_client('filename') response = file_client.create_file() # Assert self.assertIsNotNone(response) @record def test_create_file_using_oauth_token_credential(self): # Arrange file_name = self._get_file_reference() token_credential = self.generate_oauth_token() # Create a directory to put the file under that file_client = DataLakeFileClient(self.dsc.url, self.file_system_name, file_name, credential=token_credential) response = file_client.create_file() # Assert self.assertIsNotNone(response) @record def test_create_file_with_existing_name(self): # Arrange file_client = self._create_file_and_return_client() with self.assertRaises(ResourceExistsError): # if the file exists then throw error # if_none_match='*' is to make sure no existing file file_client.create_file(match_condition=MatchConditions.IfMissing) @record def test_create_file_with_lease_id(self): # Arrange directory_name = self._get_directory_reference() directory_client = self.dsc.get_directory_client( self.file_system_name, directory_name) directory_client.create_directory() file_client = directory_client.get_file_client('filename') # Act file_client.create_file() lease = file_client.acquire_lease() create_resp = file_client.create_file(lease=lease) # Assert file_properties = file_client.get_file_properties() self.assertIsNotNone(file_properties) self.assertEqual(file_properties.etag, create_resp.get('etag')) self.assertEqual(file_properties.last_modified, create_resp.get('last_modified')) @record def test_create_file_under_root_directory(self): # Arrange # get a file client to interact with the file under root directory file_client = self.dsc.get_file_client(self.file_system_name, "filename") response = file_client.create_file() # Assert self.assertIsNotNone(response) @record def test_append_data(self): directory_name = self._get_directory_reference() # Create a directory to put the file under that directory_client = self.dsc.get_directory_client( self.file_system_name, directory_name) directory_client.create_directory() file_client = directory_client.get_file_client('filename') file_client.create_file() # Act response = file_client.append_data(b'abc', 0, 3) self.assertIsNotNone(response) @record def test_append_empty_data(self): file_client = self._create_file_and_return_client() # Act file_client.flush_data(0) file_props = file_client.get_file_properties() self.assertIsNotNone(file_props['size'], 0) @record def test_flush_data(self): directory_name = self._get_directory_reference() # Create a directory to put the file under that directory_client = self.dsc.get_directory_client( self.file_system_name, directory_name) directory_client.create_directory() file_client = directory_client.get_file_client('filename') file_client.create_file() # Act file_client.append_data(b'abc', 0, 3) response = file_client.flush_data(3) # Assert prop = file_client.get_file_properties() self.assertIsNotNone(response) self.assertEqual(prop['size'], 3) @record def test_flush_data_with_match_condition(self): directory_name = self._get_directory_reference() # Create a directory to put the file under that directory_client = self.dsc.get_directory_client( self.file_system_name, directory_name) directory_client.create_directory() file_client = directory_client.get_file_client('filename') resp = file_client.create_file() # Act file_client.append_data(b'abc', 0, 3) # flush is successful because it isn't touched response = file_client.flush_data( 3, etag=resp['etag'], match_condition=MatchConditions.IfNotModified) file_client.append_data(b'abc', 3, 3) with self.assertRaises(ResourceModifiedError): # flush is unsuccessful because extra data were appended. file_client.flush_data( 6, etag=resp['etag'], match_condition=MatchConditions.IfNotModified) def test_upload_data_to_none_existing_file(self): # parallel upload cannot be recorded if TestMode.need_recording_file(self.test_mode): return directory_name = self._get_directory_reference() # Create a directory to put the file under that directory_client = self.dsc.get_directory_client( self.file_system_name, directory_name) directory_client.create_directory() file_client = directory_client.get_file_client('filename') data = self.get_random_bytes(200 * 1024) file_client.upload_data(data, overwrite=True, max_concurrency=3) downloaded_data = file_client.download_file().readall() self.assertEqual(data, downloaded_data) @record def test_upload_data_to_existing_file(self): directory_name = self._get_directory_reference() # Create a directory to put the file under that directory_client = self.dsc.get_directory_client( self.file_system_name, directory_name) directory_client.create_directory() # create an existing file file_client = directory_client.get_file_client('filename') file_client.create_file() file_client.append_data(b"abc", 0) file_client.flush_data(3) # to override the existing file data = self.get_random_bytes(100) with self.assertRaises(HttpResponseError): file_client.upload_data(data, max_concurrency=5) file_client.upload_data(data, overwrite=True, max_concurrency=5) downloaded_data = file_client.download_file().readall() self.assertEqual(data, downloaded_data) @record def test_upload_data_to_existing_file_with_content_settings(self): directory_name = self._get_directory_reference() # Create a directory to put the file under that directory_client = self.dsc.get_directory_client( self.file_system_name, directory_name) directory_client.create_directory() # create an existing file file_client = directory_client.get_file_client('filename') etag = file_client.create_file()['etag'] # to override the existing file data = self.get_random_bytes(100) content_settings = ContentSettings(content_language='spanish', content_disposition='inline') file_client.upload_data(data, max_concurrency=5, content_settings=content_settings, etag=etag, match_condition=MatchConditions.IfNotModified) downloaded_data = file_client.download_file().readall() properties = file_client.get_file_properties() self.assertEqual(data, downloaded_data) self.assertEqual(properties.content_settings.content_language, content_settings.content_language) @record def test_upload_data_to_existing_file_with_permission_and_umask(self): directory_name = self._get_directory_reference() # Create a directory to put the file under that directory_client = self.dsc.get_directory_client( self.file_system_name, directory_name) directory_client.create_directory() # create an existing file file_client = directory_client.get_file_client('filename') etag = file_client.create_file()['etag'] # to override the existing file data = self.get_random_bytes(100) file_client.upload_data(data, overwrite=True, max_concurrency=5, permissions='0777', umask="0000", etag=etag, match_condition=MatchConditions.IfNotModified) downloaded_data = file_client.download_file().readall() prop = file_client.get_access_control() # Assert self.assertEqual(data, downloaded_data) self.assertEqual(prop['permissions'], 'rwxrwxrwx') @record def test_read_file(self): file_client = self._create_file_and_return_client() data = self.get_random_bytes(1024) # upload data to file file_client.append_data(data, 0, len(data)) file_client.flush_data(len(data)) # doanload the data and make sure it is the same as uploaded data downloaded_data = file_client.download_file().readall() self.assertEqual(data, downloaded_data) @record def test_read_file_with_user_delegation_key(self): # SAS URL is calculated from storage key, so this test runs live only if TestMode.need_recording_file(self.test_mode): return # Create file file_client = self._create_file_and_return_client() data = self.get_random_bytes(1024) # Upload data to file file_client.append_data(data, 0, len(data)) file_client.flush_data(len(data)) # Get user delegation key token_credential = self.generate_oauth_token() service_client = DataLakeServiceClient(self._get_oauth_account_url(), credential=token_credential) user_delegation_key = service_client.get_user_delegation_key( datetime.utcnow(), datetime.utcnow() + timedelta(hours=1)) sas_token = generate_file_sas( file_client.account_name, file_client.file_system_name, None, file_client.path_name, user_delegation_key, permission=FileSasPermissions(read=True, create=True, write=True, delete=True), expiry=datetime.utcnow() + timedelta(hours=1), ) # doanload the data and make sure it is the same as uploaded data new_file_client = DataLakeFileClient(self._get_account_url(), file_client.file_system_name, file_client.path_name, credential=sas_token) downloaded_data = new_file_client.download_file().readall() self.assertEqual(data, downloaded_data) @record def test_read_file_into_file(self): file_client = self._create_file_and_return_client() data = self.get_random_bytes(1024) # upload data to file file_client.append_data(data, 0, len(data)) file_client.flush_data(len(data)) # doanload the data into a file and make sure it is the same as uploaded data with open(FILE_PATH, 'wb') as stream: download = file_client.download_file(max_concurrency=2) download.readinto(stream) # Assert with open(FILE_PATH, 'rb') as stream: actual = stream.read() self.assertEqual(data, actual) @record def test_read_file_to_text(self): file_client = self._create_file_and_return_client() data = self.get_random_text_data(1024) # upload data to file file_client.append_data(data, 0, len(data)) file_client.flush_data(len(data)) # doanload the text data and make sure it is the same as uploaded data downloaded_data = file_client.download_file( max_concurrency=2, encoding="utf-8").readall() # Assert self.assertEqual(data, downloaded_data) @record def test_account_sas(self): # SAS URL is calculated from storage key, so this test runs live only if TestMode.need_recording_file(self.test_mode): return file_name = self._get_file_reference() # create a file under root directory self._create_file_and_return_client(file=file_name) # generate a token with file level read permission token = generate_account_sas( self.dsc.account_name, self.dsc.credential.account_key, ResourceTypes(file_system=True, object=True), AccountSasPermissions(read=True), datetime.utcnow() + timedelta(hours=1), ) # read the created file which is under root directory file_client = DataLakeFileClient(self.dsc.url, self.file_system_name, file_name, credential=token) properties = file_client.get_file_properties() # make sure we can read the file properties self.assertIsNotNone(properties) # try to write to the created file with the token with self.assertRaises(HttpResponseError): file_client.append_data(b"abcd", 0, 4) @record def test_file_sas_only_applies_to_file_level(self): # SAS URL is calculated from storage key, so this test runs live only if TestMode.need_recording_file(self.test_mode): return file_name = self._get_file_reference() directory_name = self._get_directory_reference() self._create_file_and_return_client(directory=directory_name, file=file_name) # generate a token with file level read and write permissions token = generate_file_sas( self.dsc.account_name, self.file_system_name, directory_name, file_name, self.dsc.credential.account_key, permission=FileSasPermissions(read=True, write=True), expiry=datetime.utcnow() + timedelta(hours=1), ) # read the created file which is under root directory file_client = DataLakeFileClient(self.dsc.url, self.file_system_name, directory_name + '/' + file_name, credential=token) properties = file_client.get_file_properties() # make sure we can read the file properties self.assertIsNotNone(properties) # try to write to the created file with the token response = file_client.append_data(b"abcd", 0, 4, validate_content=True) self.assertIsNotNone(response) # the token is for file level, so users are not supposed to have access to file system level operations file_system_client = FileSystemClient(self.dsc.url, self.file_system_name, credential=token) with self.assertRaises(ClientAuthenticationError): file_system_client.get_file_system_properties() # the token is for file level, so users are not supposed to have access to directory level operations directory_client = DataLakeDirectoryClient(self.dsc.url, self.file_system_name, directory_name, credential=token) with self.assertRaises(ClientAuthenticationError): directory_client.get_directory_properties() @record def test_delete_file(self): # Arrange file_client = self._create_file_and_return_client() file_client.delete_file() with self.assertRaises(ResourceNotFoundError): file_client.get_file_properties() @record def test_delete_file_with_if_unmodified_since(self): # Arrange file_client = self._create_file_and_return_client() prop = file_client.get_file_properties() file_client.delete_file(if_unmodified_since=prop['last_modified']) # Make sure the file was deleted with self.assertRaises(ResourceNotFoundError): file_client.get_file_properties() @record def test_set_access_control(self): file_client = self._create_file_and_return_client() response = file_client.set_access_control(permissions='0777') # Assert self.assertIsNotNone(response) @record def test_set_access_control_with_match_conditions(self): file_client = self._create_file_and_return_client() with self.assertRaises(ResourceModifiedError): file_client.set_access_control( permissions='0777', match_condition=MatchConditions.IfMissing) @record def test_get_access_control(self): file_client = self._create_file_and_return_client() file_client.set_access_control(permissions='0777') # Act response = file_client.get_access_control() # Assert self.assertIsNotNone(response) @record def test_get_access_control_with_if_modified_since(self): file_client = self._create_file_and_return_client() file_client.set_access_control(permissions='0777') prop = file_client.get_file_properties() # Act response = file_client.get_access_control( if_modified_since=prop['last_modified'] - timedelta(minutes=15)) # Assert self.assertIsNotNone(response) @record def test_get_properties(self): # Arrange directory_client = self._create_directory_and_return_client() metadata = {'hello': 'world', 'number': '42'} content_settings = ContentSettings(content_language='spanish', content_disposition='inline') file_client = directory_client.create_file( "newfile", metadata=metadata, content_settings=content_settings) file_client.append_data(b"abc", 0, 3) file_client.flush_data(3) properties = file_client.get_file_properties() # Assert self.assertTrue(properties) self.assertEqual(properties.size, 3) self.assertEqual(properties.metadata['hello'], metadata['hello']) self.assertEqual(properties.content_settings.content_language, content_settings.content_language) @record def test_rename_file_with_non_used_name(self): file_client = self._create_file_and_return_client() data_bytes = b"abc" file_client.append_data(data_bytes, 0, 3) file_client.flush_data(3) new_client = file_client.rename_file(file_client.file_system_name + '/' + 'newname') data = new_client.download_file().readall() self.assertEqual(data, data_bytes) self.assertEqual(new_client.path_name, "newname") @record def test_rename_file_to_existing_file(self): # create the existing file existing_file_client = self._create_file_and_return_client( file="existingfile") existing_file_client.append_data(b"a", 0, 1) existing_file_client.flush_data(1) old_url = existing_file_client.url # prepare to rename the file to the existing file file_client = self._create_file_and_return_client() data_bytes = b"abc" file_client.append_data(data_bytes, 0, 3) file_client.flush_data(3) new_client = file_client.rename_file(file_client.file_system_name + '/' + existing_file_client.path_name) new_url = file_client.url data = new_client.download_file().readall() # the existing file was overridden self.assertEqual(data, data_bytes) @record def test_rename_file_will_not_change_existing_directory(self): # create none empty directory(with 2 files) dir1 = self._create_directory_and_return_client(directory="dir1") f1 = dir1.create_file("file1") f1.append_data(b"file1", 0, 5) f1.flush_data(5) f2 = dir1.create_file("file2") f2.append_data(b"file2", 0, 5) f2.flush_data(5) # create another none empty directory(with 2 files) dir2 = self._create_directory_and_return_client(directory="dir2") f3 = dir2.create_file("file3") f3.append_data(b"file3", 0, 5) f3.flush_data(5) f4 = dir2.create_file("file4") f4.append_data(b"file4", 0, 5) f4.flush_data(5) new_client = f3.rename_file(f1.file_system_name + '/' + f1.path_name) self.assertEqual(new_client.download_file().readall(), b"file3") # make sure the data in file2 and file4 weren't touched f2_data = f2.download_file().readall() self.assertEqual(f2_data, b"file2") f4_data = f4.download_file().readall() self.assertEqual(f4_data, b"file4") with self.assertRaises(HttpResponseError): f3.download_file().readall()
def data_lake_service_sample(self): # Instantiate a DataLakeServiceClient using a connection string # [START create_datalake_service_client] from azure.storage.filedatalake import DataLakeServiceClient datalake_service_client = DataLakeServiceClient.from_connection_string( self.connection_string) # [END create_datalake_service_client] # Instantiate a DataLakeServiceClient Azure Identity credentials. # [START create_datalake_service_client_oauth] from azure.identity import ClientSecretCredential token_credential = ClientSecretCredential( self.active_directory_tenant_id, self.active_directory_application_id, self.active_directory_application_secret, ) datalake_service_client = DataLakeServiceClient( "https://{}.dfs.core.windows.net".format(self.account_name), credential=token_credential) # [END create_datalake_service_client_oauth] # get user delegation key # [START get_user_delegation_key] from datetime import datetime, timedelta user_delegation_key = datalake_service_client.get_user_delegation_key( datetime.utcnow(), datetime.utcnow() + timedelta(hours=1)) # [END get_user_delegation_key] # Create file systems # [START create_file_system_from_service_client] datalake_service_client.create_file_system("filesystem") # [END create_file_system_from_service_client] file_system_client = datalake_service_client.create_file_system( "anotherfilesystem") # List file systems # [START list_file_systems] file_systems = datalake_service_client.list_file_systems() for file_system in file_systems: print(file_system.name) # [END list_file_systems] # Get Clients from DataLakeServiceClient file_system_client = datalake_service_client.get_file_system_client( file_system_client.file_system_name) # [START get_directory_client_from_service_client] directory_client = datalake_service_client.get_directory_client( file_system_client.file_system_name, "mydirectory") # [END get_directory_client_from_service_client] # [START get_file_client_from_service_client] file_client = datalake_service_client.get_file_client( file_system_client.file_system_name, "myfile") # [END get_file_client_from_service_client] # Create file and set properties metadata = {'hello': 'world', 'number': '42'} from azure.storage.filedatalake import ContentSettings content_settings = ContentSettings(content_language='spanish', content_disposition='inline') file_client.create_file(content_settings=content_settings) file_client.set_metadata(metadata=metadata) file_props = file_client.get_file_properties() print(file_props.metadata) # Create file/directory and set properties directory_client.create_directory(content_settings=content_settings, metadata=metadata) dir_props = directory_client.get_directory_properties() print(dir_props.metadata) # Delete File Systems # [START delete_file_system_from_service_client] datalake_service_client.delete_file_system("filesystem") # [END delete_file_system_from_service_client] file_system_client.delete_file_system()
class LargeFileTest(StorageTestCase): def _setUp(self, account_name, account_key): url = self.account_url(account_name, 'dfs') self.payload_dropping_policy = PayloadDroppingPolicy() credential_policy = _format_shared_key_credential( account_name, account_key) self.dsc = DataLakeServiceClient(url, credential=account_key, logging_enable=True, _additional_pipeline_policies=[ self.payload_dropping_policy, credential_policy ]) self.config = self.dsc._config self.file_system_name = self.get_resource_name('filesystem') if not self.is_playback(): file_system = self.dsc.get_file_system_client( self.file_system_name) try: file_system.create_file_system(timeout=5) except ResourceExistsError: pass def tearDown(self): if not self.is_playback(): try: self.dsc.delete_file_system(self.file_system_name) except: pass return super(LargeFileTest, self).tearDown() @pytest.mark.live_test_only @DataLakePreparer() def test_append_large_stream_without_network(self, datalake_storage_account_name, datalake_storage_account_key): self._setUp(datalake_storage_account_name, datalake_storage_account_key) directory_name = self.get_resource_name(TEST_DIRECTORY_PREFIX) # Create a directory to put the file under that directory_client = self.dsc.get_directory_client( self.file_system_name, directory_name) directory_client.create_directory() file_client = directory_client.get_file_client('filename') file_client.create_file() data = LargeStream(LARGEST_BLOCK_SIZE) # Act response = file_client.append_data(data, 0, LARGEST_BLOCK_SIZE) self.assertIsNotNone(response) self.assertEqual(self.payload_dropping_policy.append_counter, 1) self.assertEqual(self.payload_dropping_policy.append_sizes[0], LARGEST_BLOCK_SIZE) @pytest.mark.live_test_only @DataLakePreparer() def test_upload_large_stream_without_network(self, datalake_storage_account_name, datalake_storage_account_key): pytest.skip( "Pypy3 on Linux failed somehow, skip for now to investigate") self._setUp(datalake_storage_account_name, datalake_storage_account_key) directory_name = self.get_resource_name(TEST_DIRECTORY_PREFIX) # Create a directory to put the file under that directory_client = self.dsc.get_directory_client( self.file_system_name, directory_name) directory_client.create_directory() file_client = directory_client.get_file_client('filename') file_client.create_file() length = 2 * LARGEST_BLOCK_SIZE data = LargeStream(length) # Act response = file_client.upload_data(data, length, overwrite=True, chunk_size=LARGEST_BLOCK_SIZE) self.assertIsNotNone(response) self.assertEqual(self.payload_dropping_policy.append_counter, 2) self.assertEqual(self.payload_dropping_policy.append_sizes[0], LARGEST_BLOCK_SIZE) self.assertEqual(self.payload_dropping_policy.append_sizes[1], LARGEST_BLOCK_SIZE)
class StorageQuickQueryTest(StorageTestCase): def _setUp(self, account_name, account_key): url = self._get_account_url(account_name) self.dsc = DataLakeServiceClient(url, credential=account_key, logging_enable=True) self.config = self.dsc._config self.filesystem_name = self.get_resource_name('utqqcontainer') if not self.is_playback(): try: self.dsc.create_file_system(self.filesystem_name) except: pass def tearDown(self): if not self.is_playback(): try: self.dsc.delete_file_system(self.filesystem_name) except: pass return super(StorageQuickQueryTest, self).tearDown() # --Helpers----------------------------------------------------------------- def _get_file_reference(self): return self.get_resource_name("csvfile") # -- Test cases for APIs supporting CPK ---------------------------------------------- @DataLakePreparer() def test_quick_query_readall(self, datalake_storage_account_name, datalake_storage_account_key): self._setUp(datalake_storage_account_name, datalake_storage_account_key) # Arrange # upload the csv file file_name = self._get_file_reference() file_client = self.dsc.get_file_client(self.filesystem_name, file_name) file_client.upload_data(CSV_DATA, overwrite=True) errors = [] def on_error(error): errors.append(error) reader = file_client.query_file("SELECT * from BlobStorage", on_error=on_error) data = reader.readall() self.assertEqual(len(errors), 0) self.assertEqual(len(reader), len(CSV_DATA)) self.assertEqual(len(reader), reader._blob_query_reader._bytes_processed) self.assertEqual(data, CSV_DATA.replace(b'\r\n', b'\n')) @DataLakePreparer() def test_quick_query_datalake_expression(self, datalake_storage_account_name, datalake_storage_account_key): self._setUp(datalake_storage_account_name, datalake_storage_account_key) # Arrange # upload the csv file file_name = self._get_file_reference() file_client = self.dsc.get_file_client(self.filesystem_name, file_name) file_client.upload_data(DATALAKE_CSV_DATA, overwrite=True) errors = [] def on_error(error): errors.append(error) input_format = DelimitedTextDialect(has_header=True) reader = file_client.query_file( "SELECT DataLakeStorage from DataLakeStorage", on_error=on_error, file_format=input_format) reader.readall() self.assertEqual(len(errors), 0) self.assertEqual(len(reader), len(DATALAKE_CSV_DATA)) self.assertEqual(len(reader), reader._blob_query_reader._bytes_processed) @DataLakePreparer() def test_quick_query_iter_records(self, datalake_storage_account_name, datalake_storage_account_key): self._setUp(datalake_storage_account_name, datalake_storage_account_key) # Arrange # upload the csv file file_name = self._get_file_reference() file_client = self.dsc.get_file_client(self.filesystem_name, file_name) file_client.upload_data(CSV_DATA, overwrite=True) reader = file_client.query_file("SELECT * from BlobStorage") read_records = reader.records() # Assert first line has header data = next(read_records) self.assertEqual(data, b'Service,Package,Version,RepoPath,MissingDocs') for record in read_records: data += record self.assertEqual(len(reader), len(CSV_DATA)) self.assertEqual(len(reader), reader._blob_query_reader._bytes_processed) self.assertEqual(data, CSV_DATA.replace(b'\r\n', b'')) @DataLakePreparer() def test_quick_query_readall_with_encoding(self, datalake_storage_account_name, datalake_storage_account_key): self._setUp(datalake_storage_account_name, datalake_storage_account_key) # Arrange # upload the csv file file_name = self._get_file_reference() file_client = self.dsc.get_file_client(self.filesystem_name, file_name) file_client.upload_data(CSV_DATA, overwrite=True) errors = [] def on_error(error): errors.append(error) reader = file_client.query_file("SELECT * from BlobStorage", on_error=on_error, encoding='utf-8') data = reader.readall() self.assertEqual(len(errors), 0) self.assertEqual(len(reader), len(CSV_DATA)) self.assertEqual(len(reader), reader._blob_query_reader._bytes_processed) self.assertEqual(data, CSV_DATA.replace(b'\r\n', b'\n').decode('utf-8')) @DataLakePreparer() def test_quick_query_iter_records_with_encoding( self, datalake_storage_account_name, datalake_storage_account_key): self._setUp(datalake_storage_account_name, datalake_storage_account_key) # Arrange # upload the csv file file_name = self._get_file_reference() file_client = self.dsc.get_file_client(self.filesystem_name, file_name) file_client.upload_data(CSV_DATA, overwrite=True) reader = file_client.query_file("SELECT * from BlobStorage", encoding='utf-8') data = '' for record in reader.records(): data += record self.assertEqual(len(reader), len(CSV_DATA)) self.assertEqual(len(reader), reader._blob_query_reader._bytes_processed) self.assertEqual(data, CSV_DATA.replace(b'\r\n', b'').decode('utf-8')) @DataLakePreparer() def test_quick_query_iter_output_records_excluding_headers( self, datalake_storage_account_name, datalake_storage_account_key): self._setUp(datalake_storage_account_name, datalake_storage_account_key) # Arrange # upload the csv file file_name = self._get_file_reference() file_client = self.dsc.get_file_client(self.filesystem_name, file_name) file_client.upload_data(CSV_DATA, overwrite=True) input_format = DelimitedTextDialect(has_header=True) output_format = DelimitedTextDialect(has_header=False) reader = file_client.query_file("SELECT * from BlobStorage", file_format=input_format, output_format=output_format) read_records = reader.records() # Assert first line does not include header data = next(read_records) self.assertEqual( data, b'App Configuration,azure-data-appconfiguration,1,appconfiguration,FALSE' ) for record in read_records: data += record self.assertEqual(len(reader), len(CSV_DATA)) self.assertEqual(len(reader), reader._blob_query_reader._bytes_processed) self.assertEqual(data, CSV_DATA.replace(b'\r\n', b'')[44:]) @DataLakePreparer() def test_quick_query_iter_output_records_including_headers( self, datalake_storage_account_name, datalake_storage_account_key): self._setUp(datalake_storage_account_name, datalake_storage_account_key) # Arrange # upload the csv file file_name = self._get_file_reference() file_client = self.dsc.get_file_client(self.filesystem_name, file_name) file_client.upload_data(CSV_DATA, overwrite=True) input_format = DelimitedTextDialect(has_header=True) reader = file_client.query_file("SELECT * from BlobStorage", file_format=input_format) read_records = reader.records() # Assert first line does not include header data = next(read_records) self.assertEqual(data, b'Service,Package,Version,RepoPath,MissingDocs') for record in read_records: data += record self.assertEqual(len(reader), len(CSV_DATA)) self.assertEqual(len(reader), reader._blob_query_reader._bytes_processed) self.assertEqual(data, CSV_DATA.replace(b'\r\n', b'')) @DataLakePreparer() def test_quick_query_iter_records_with_progress( self, datalake_storage_account_name, datalake_storage_account_key): self._setUp(datalake_storage_account_name, datalake_storage_account_key) # Arrange # upload the csv file file_name = self._get_file_reference() file_client = self.dsc.get_file_client(self.filesystem_name, file_name) file_client.upload_data(CSV_DATA, overwrite=True) reader = file_client.query_file("SELECT * from BlobStorage") data = b'' progress = 0 for record in reader.records(): if record: data += record progress += len(record) + 2 self.assertEqual(len(reader), len(CSV_DATA)) self.assertEqual(len(reader), reader._blob_query_reader._bytes_processed) self.assertEqual(data, CSV_DATA.replace(b'\r\n', b'')) self.assertEqual(progress, len(reader)) @DataLakePreparer() def test_quick_query_readall_with_serialization_setting( self, datalake_storage_account_name, datalake_storage_account_key): self._setUp(datalake_storage_account_name, datalake_storage_account_key) # Arrange # upload the csv file file_name = self._get_file_reference() file_client = self.dsc.get_file_client(self.filesystem_name, file_name) file_client.upload_data(CSV_DATA, overwrite=True) errors = [] def on_error(error): errors.append(error) input_format = DelimitedTextDialect(delimiter=',', quotechar='"', lineterminator='\n', escapechar='', has_header=False) output_format = DelimitedTextDialect(delimiter=';', quotechar="'", lineterminator='.', escapechar='\\') resp = file_client.query_file("SELECT * from BlobStorage", on_error=on_error, file_format=input_format, output_format=output_format) query_result = resp.readall() self.assertEqual(len(errors), 0) self.assertEqual(len(resp), len(CSV_DATA)) self.assertEqual(query_result, CONVERTED_CSV_DATA) @DataLakePreparer() def test_quick_query_iter_records_with_serialization_setting( self, datalake_storage_account_name, datalake_storage_account_key): self._setUp(datalake_storage_account_name, datalake_storage_account_key) # Arrange # upload the csv file file_name = self._get_file_reference() file_client = self.dsc.get_file_client(self.filesystem_name, file_name) file_client.upload_data(CSV_DATA, overwrite=True) input_format = DelimitedTextDialect(delimiter=',', quotechar='"', lineterminator='\n', escapechar='', has_header=False) output_format = DelimitedTextDialect(delimiter=';', quotechar="'", lineterminator='%', escapechar='\\') reader = file_client.query_file("SELECT * from BlobStorage", file_format=input_format, output_format=output_format) data = [] for record in reader.records(): if record: data.append(record) self.assertEqual(len(reader), len(CSV_DATA)) self.assertEqual(len(reader), reader._blob_query_reader._bytes_processed) self.assertEqual(len(data), 33) @DataLakePreparer() def test_quick_query_readall_with_fatal_error_handler( self, datalake_storage_account_name, datalake_storage_account_key): self._setUp(datalake_storage_account_name, datalake_storage_account_key) # Arrange data1 = b'{name: owner}' data2 = b'{name2: owner2}' data3 = b'{version:0,begin:1601-01-01T00:00:00.000Z,intervalSecs:3600,status:Finalized,config:' \ b'{version:0,configVersionEtag:0x8d75ef460eb1a12,numShards:1,recordsFormat:avro,formatSchemaVersion:3,' \ b'shardDistFnVersion:1},chunkFilePaths:[$blobchangefeed/log/00/1601/01/01/0000/],storageDiagnostics:' \ b'{version:0,lastModifiedTime:2019-11-01T17:53:18.861Z,' \ b'data:{aid:d305317d-a006-0042-00dd-902bbb06fc56}}}' data = data1 + b'\n' + data2 + b'\n' + data1 # upload the json file file_name = self._get_file_reference() file_client = self.dsc.get_file_client(self.filesystem_name, file_name) file_client.upload_data(data, overwrite=True) errors = [] def on_error(error): errors.append(error) input_format = DelimitedJsonDialect() output_format = DelimitedTextDialect(delimiter=';', quotechar="'", lineterminator='.', escapechar='\\') resp = file_client.query_file("SELECT * from BlobStorage", on_error=on_error, file_format=input_format, output_format=output_format) query_result = resp.readall() self.assertEqual(len(errors), 1) self.assertEqual(len(resp), 43) self.assertEqual(query_result, b'') @DataLakePreparer() def test_quick_query_iter_records_with_fatal_error_handler( self, datalake_storage_account_name, datalake_storage_account_key): self._setUp(datalake_storage_account_name, datalake_storage_account_key) # Arrange data1 = b'{name: owner}' data2 = b'{name2: owner2}' data3 = b'{version:0,begin:1601-01-01T00:00:00.000Z,intervalSecs:3600,status:Finalized,config:' \ b'{version:0,configVersionEtag:0x8d75ef460eb1a12,numShards:1,recordsFormat:avro,formatSchemaVersion:3,' \ b'shardDistFnVersion:1},chunkFilePaths:[$blobchangefeed/log/00/1601/01/01/0000/],storageDiagnostics:' \ b'{version:0,lastModifiedTime:2019-11-01T17:53:18.861Z,' \ b'data:{aid:d305317d-a006-0042-00dd-902bbb06fc56}}}' data = data1 + b'\n' + data2 + b'\n' + data1 # upload the json file file_name = self._get_file_reference() file_client = self.dsc.get_file_client(self.filesystem_name, file_name) file_client.upload_data(data, overwrite=True) errors = [] def on_error(error): errors.append(error) input_format = DelimitedJsonDialect() output_format = DelimitedTextDialect(delimiter=';', quotechar="'", lineterminator='.', escapechar='\\') resp = file_client.query_file("SELECT * from BlobStorage", on_error=on_error, file_format=input_format, output_format=output_format) data = [] for record in resp.records(): data.append(record) self.assertEqual(len(errors), 1) self.assertEqual(len(resp), 43) self.assertEqual(data, [b'']) @DataLakePreparer() def test_quick_query_readall_with_fatal_error_handler_raise( self, datalake_storage_account_name, datalake_storage_account_key): self._setUp(datalake_storage_account_name, datalake_storage_account_key) # Arrange data1 = b'{name: owner}' data2 = b'{name2: owner2}' data3 = b'{version:0,begin:1601-01-01T00:00:00.000Z,intervalSecs:3600,status:Finalized,config:' \ b'{version:0,configVersionEtag:0x8d75ef460eb1a12,numShards:1,recordsFormat:avro,formatSchemaVersion:3,' \ b'shardDistFnVersion:1},chunkFilePaths:[$blobchangefeed/log/00/1601/01/01/0000/],storageDiagnostics:' \ b'{version:0,lastModifiedTime:2019-11-01T17:53:18.861Z,' \ b'data:{aid:d305317d-a006-0042-00dd-902bbb06fc56}}}' data = data1 + b'\n' + data2 + b'\n' + data1 # upload the json file file_name = self._get_file_reference() file_client = self.dsc.get_file_client(self.filesystem_name, file_name) file_client.upload_data(data, overwrite=True) errors = [] def on_error(error): raise Exception(error.description) input_format = DelimitedJsonDialect() output_format = DelimitedTextDialect(delimiter=';', quotechar="'", lineterminator='.', escapechar='\\') resp = file_client.query_file("SELECT * from BlobStorage", on_error=on_error, file_format=input_format, output_format=output_format) with pytest.raises(Exception): query_result = resp.readall() @DataLakePreparer() def test_quick_query_iter_records_with_fatal_error_handler_raise( self, datalake_storage_account_name, datalake_storage_account_key): self._setUp(datalake_storage_account_name, datalake_storage_account_key) # Arrange data1 = b'{name: owner}' data2 = b'{name2: owner2}' data3 = b'{version:0,begin:1601-01-01T00:00:00.000Z,intervalSecs:3600,status:Finalized,config:' \ b'{version:0,configVersionEtag:0x8d75ef460eb1a12,numShards:1,recordsFormat:avro,formatSchemaVersion:3,' \ b'shardDistFnVersion:1},chunkFilePaths:[$blobchangefeed/log/00/1601/01/01/0000/],storageDiagnostics:' \ b'{version:0,lastModifiedTime:2019-11-01T17:53:18.861Z,' \ b'data:{aid:d305317d-a006-0042-00dd-902bbb06fc56}}}' data = data1 + b'\n' + data2 + b'\n' + data1 # upload the json file file_name = self._get_file_reference() file_client = self.dsc.get_file_client(self.filesystem_name, file_name) file_client.upload_data(data, overwrite=True) errors = [] def on_error(error): raise Exception(error.description) input_format = DelimitedJsonDialect() output_format = DelimitedTextDialect(delimiter=';', quotechar="'", lineterminator='.', escapechar='\\') resp = file_client.query_file("SELECT * from BlobStorage", on_error=on_error, file_format=input_format, output_format=output_format) with pytest.raises(Exception): for record in resp.records(): print(record) @DataLakePreparer() def test_quick_query_readall_with_fatal_error_ignore( self, datalake_storage_account_name, datalake_storage_account_key): self._setUp(datalake_storage_account_name, datalake_storage_account_key) # Arrange data1 = b'{name: owner}' data2 = b'{name2: owner2}' data = data1 + b'\n' + data2 + b'\n' + data1 # upload the json file file_name = self._get_file_reference() file_client = self.dsc.get_file_client(self.filesystem_name, file_name) file_client.upload_data(data, overwrite=True) input_format = DelimitedJsonDialect() output_format = DelimitedTextDialect(delimiter=';', quotechar="'", lineterminator='.', escapechar='\\') resp = file_client.query_file("SELECT * from BlobStorage", file_format=input_format, output_format=output_format) query_result = resp.readall() @DataLakePreparer() def test_quick_query_iter_records_with_fatal_error_ignore( self, datalake_storage_account_name, datalake_storage_account_key): self._setUp(datalake_storage_account_name, datalake_storage_account_key) # Arrange data1 = b'{name: owner}' data2 = b'{name2: owner2}' data3 = b'{version:0,begin:1601-01-01T00:00:00.000Z,intervalSecs:3600,status:Finalized,config:' \ b'{version:0,configVersionEtag:0x8d75ef460eb1a12,numShards:1,recordsFormat:avro,formatSchemaVersion:3,' \ b'shardDistFnVersion:1},chunkFilePaths:[$blobchangefeed/log/00/1601/01/01/0000/],storageDiagnostics:' \ b'{version:0,lastModifiedTime:2019-11-01T17:53:18.861Z,' \ b'data:{aid:d305317d-a006-0042-00dd-902bbb06fc56}}}' data = data1 + b'\n' + data2 + b'\n' + data1 # upload the json file file_name = self._get_file_reference() file_client = self.dsc.get_file_client(self.filesystem_name, file_name) file_client.upload_data(data, overwrite=True) input_format = DelimitedJsonDialect() output_format = DelimitedTextDialect(delimiter=';', quotechar="'", lineterminator='.', escapechar='\\') resp = file_client.query_file("SELECT * from BlobStorage", file_format=input_format, output_format=output_format) for record in resp.records(): print(record) @DataLakePreparer() def test_quick_query_readall_with_nonfatal_error_handler( self, datalake_storage_account_name, datalake_storage_account_key): self._setUp(datalake_storage_account_name, datalake_storage_account_key) # Arrange # upload the csv file file_name = self._get_file_reference() file_client = self.dsc.get_file_client(self.filesystem_name, file_name) file_client.upload_data(CSV_DATA, overwrite=True) errors = [] def on_error(error): errors.append(error) input_format = DelimitedTextDialect(delimiter=',', quotechar='"', lineterminator='\n', escapechar='', has_header=True) output_format = DelimitedTextDialect( delimiter=';', quotechar="'", lineterminator='.', escapechar='\\', ) resp = file_client.query_file("SELECT RepoPath from BlobStorage", file_format=input_format, output_format=output_format, on_error=on_error) query_result = resp.readall() # the error is because that line only has one column self.assertEqual(len(errors), 1) self.assertEqual(len(resp), len(CSV_DATA)) self.assertTrue(len(query_result) > 0) @DataLakePreparer() def test_quick_query_iter_records_with_nonfatal_error_handler( self, datalake_storage_account_name, datalake_storage_account_key): self._setUp(datalake_storage_account_name, datalake_storage_account_key) # Arrange # upload the csv file file_name = self._get_file_reference() file_client = self.dsc.get_file_client(self.filesystem_name, file_name) file_client.upload_data(CSV_DATA, overwrite=True) errors = [] def on_error(error): errors.append(error) input_format = DelimitedTextDialect(delimiter=',', quotechar='"', lineterminator='\n', escapechar='', has_header=True) output_format = DelimitedTextDialect( delimiter=';', quotechar="'", lineterminator='%', escapechar='\\', ) resp = file_client.query_file("SELECT RepoPath from BlobStorage", file_format=input_format, output_format=output_format, on_error=on_error) data = list(resp.records()) # the error is because that line only has one column self.assertEqual(len(errors), 1) self.assertEqual(len(resp), len(CSV_DATA)) self.assertEqual(len(data), 32) @DataLakePreparer() def test_quick_query_readall_with_nonfatal_error_ignore( self, datalake_storage_account_name, datalake_storage_account_key): self._setUp(datalake_storage_account_name, datalake_storage_account_key) # Arrange # upload the csv file file_name = self._get_file_reference() file_client = self.dsc.get_file_client(self.filesystem_name, file_name) file_client.upload_data(CSV_DATA, overwrite=True) input_format = DelimitedTextDialect(delimiter=',', quotechar='"', lineterminator='\n', escapechar='', has_header=True) output_format = DelimitedTextDialect( delimiter=';', quotechar="'", lineterminator='.', escapechar='\\', ) resp = file_client.query_file("SELECT RepoPath from BlobStorage", file_format=input_format, output_format=output_format) query_result = resp.readall() self.assertEqual(len(resp), len(CSV_DATA)) self.assertTrue(len(query_result) > 0) @DataLakePreparer() def test_quick_query_iter_records_with_nonfatal_error_ignore( self, datalake_storage_account_name, datalake_storage_account_key): self._setUp(datalake_storage_account_name, datalake_storage_account_key) # Arrange # upload the csv file file_name = self._get_file_reference() file_client = self.dsc.get_file_client(self.filesystem_name, file_name) file_client.upload_data(CSV_DATA, overwrite=True) input_format = DelimitedTextDialect(delimiter=',', quotechar='"', lineterminator='\n', escapechar='', has_header=True) output_format = DelimitedTextDialect( delimiter=';', quotechar="'", lineterminator='$', escapechar='\\', ) resp = file_client.query_file("SELECT RepoPath from BlobStorage", file_format=input_format, output_format=output_format) data = list(resp.records()) self.assertEqual(len(resp), len(CSV_DATA)) self.assertEqual(len(data), 32) @DataLakePreparer() def test_quick_query_readall_with_json_serialization_setting( self, datalake_storage_account_name, datalake_storage_account_key): self._setUp(datalake_storage_account_name, datalake_storage_account_key) # Arrange data1 = b'{\"name\": \"owner\", \"id\": 1}' data2 = b'{\"name2\": \"owner2\"}' data = data1 + b'\n' + data2 + b'\n' + data1 # upload the json file file_name = self._get_file_reference() file_client = self.dsc.get_file_client(self.filesystem_name, file_name) file_client.upload_data(data, overwrite=True) errors = [] def on_error(error): errors.append(error) input_format = DelimitedJsonDialect(delimiter='\n') output_format = DelimitedJsonDialect(delimiter=';') resp = file_client.query_file("SELECT name from BlobStorage", on_error=on_error, file_format=input_format, output_format=output_format) query_result = resp.readall() self.assertEqual(len(errors), 0) self.assertEqual(len(resp), len(data)) self.assertEqual(query_result, b'{"name":"owner"};{};{"name":"owner"};') @DataLakePreparer() def test_quick_query_iter_records_with_json_serialization_setting( self, datalake_storage_account_name, datalake_storage_account_key): self._setUp(datalake_storage_account_name, datalake_storage_account_key) # Arrange data1 = b'{\"name\": \"owner\", \"id\": 1}' data2 = b'{\"name2\": \"owner2\"}' data = data1 + b'\n' + data2 + b'\n' + data1 # upload the json file file_name = self._get_file_reference() file_client = self.dsc.get_file_client(self.filesystem_name, file_name) file_client.upload_data(data, overwrite=True) errors = [] def on_error(error): errors.append(error) input_format = DelimitedJsonDialect(delimiter='\n') output_format = DelimitedJsonDialect(delimiter=';') resp = file_client.query_file("SELECT name from BlobStorage", on_error=on_error, file_format=input_format, output_format=output_format) listdata = list(resp.records()) self.assertEqual(len(errors), 0) self.assertEqual(len(resp), len(data)) self.assertEqual( listdata, [b'{"name":"owner"}', b'{}', b'{"name":"owner"}', b'']) @DataLakePreparer() def test_quick_query_with_only_input_json_serialization_setting( self, datalake_storage_account_name, datalake_storage_account_key): self._setUp(datalake_storage_account_name, datalake_storage_account_key) # Arrange data1 = b'{\"name\": \"owner\", \"id\": 1}' data2 = b'{\"name2\": \"owner2\"}' data = data1 + data2 + data1 # upload the json file file_name = self._get_file_reference() file_client = self.dsc.get_file_client(self.filesystem_name, file_name) file_client.upload_data(data, overwrite=True) errors = [] def on_error(error): errors.append(error) input_format = DelimitedJsonDialect(delimiter='\n') output_format = None resp = file_client.query_file("SELECT name from BlobStorage", on_error=on_error, file_format=input_format, output_format=output_format) query_result = resp.readall() self.assertEqual(len(errors), 0) self.assertEqual(len(resp), len(data)) self.assertEqual(query_result, b'{"name":"owner"}\n{}\n{"name":"owner"}\n') @DataLakePreparer() def test_quick_query_output_in_arrow_format(self, datalake_storage_account_name, datalake_storage_account_key): self._setUp(datalake_storage_account_name, datalake_storage_account_key) # Arrange data = b'100,200,300,400\n300,400,500,600\n' # upload the json file file_name = self._get_file_reference() file_client = self.dsc.get_file_client(self.filesystem_name, file_name) file_client.upload_data(data, overwrite=True) errors = [] def on_error(error): errors.append(error) output_format = [ ArrowDialect(ArrowType.DECIMAL, name="abc", precision=4, scale=2) ] expected_result = b"/////3gAAAAQAAAAAAAKAAwABgAFAAgACgAAAAABAwAMAAAACAAIAAAABAAIAAAABAAAAAEAAAAUAAAAEAAUAAgABgAHAAwAAAAQABAAAAAAAAEHJAAAABQAAAAEAAAAAAAAAAgADAAEAAgACAAAAAQAAAACAAAAAwAAAGFiYwD/////cAAAABAAAAAAAAoADgAGAAUACAAKAAAAAAMDABAAAAAAAAoADAAAAAQACAAKAAAAMAAAAAQAAAACAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAQAAAAAAAAAAAAAAAAAAAAAAAAD/////iAAAABQAAAAAAAAADAAWAAYABQAIAAwADAAAAAADAwAYAAAAEAAAAAAAAAAAAAoAGAAMAAQACAAKAAAAPAAAABAAAAABAAAAAAAAAAAAAAACAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAEAAAAAAAAAAAAAAAAQAAAAEAAAAAAAAAAAAAAAAAAACQAQAAAAAAAAAAAAAAAAAA" resp = file_client.query_file( "SELECT _2 from BlobStorage WHERE _1 > 250", on_error=on_error, output_format=output_format) query_result = base64.b64encode(resp.readall()) self.assertEqual(len(errors), 0) self.assertEqual(query_result, expected_result) @DataLakePreparer() def test_quick_query_input_in_arrow_format(self, datalake_storage_account_name, datalake_storage_account_key): self._setUp(datalake_storage_account_name, datalake_storage_account_key) # Arrange file_name = self._get_file_reference() file_client = self.dsc.get_file_client(self.filesystem_name, file_name) errors = [] def on_error(error): errors.append(error) input_format = [ ArrowDialect(ArrowType.DECIMAL, name="abc", precision=4, scale=2) ] with self.assertRaises(ValueError): file_client.query_file("SELECT _2 from BlobStorage WHERE _1 > 250", on_error=on_error, file_format=input_format) @DataLakePreparer() def test_quick_query_input_in_parquet_format(self, datalake_storage_account_name, datalake_storage_account_key): # Arrange self._setUp(datalake_storage_account_name, datalake_storage_account_key) file_name = self._get_file_reference() file_client = self.dsc.get_file_client(self.filesystem_name, file_name) expression = "select * from blobstorage where id < 1;" expected_data = b"0,mdifjt55.ea3,mdifjt55.ea3\n" parquet_path = os.path.abspath( os.path.join(os.path.abspath(__file__), "..", "./resources/parquet.parquet")) with open(parquet_path, "rb") as parquet_data: file_client.upload_data(parquet_data, overwrite=True) reader = file_client.query_file(expression, file_format=QuickQueryDialect.Parquet) real_data = reader.readall() self.assertEqual(real_data, expected_data) @DataLakePreparer() def test_quick_query_output_in_parquet_format( self, datalake_storage_account_name, datalake_storage_account_key): # Arrange self._setUp(datalake_storage_account_name, datalake_storage_account_key) file_name = self._get_file_reference() file_client = self.dsc.get_file_client(self.filesystem_name, file_name) expression = "SELECT * from BlobStorage" parquet_path = os.path.abspath( os.path.join(os.path.abspath(__file__), "..", "./resources/parquet.parquet")) with open(parquet_path, "rb") as parquet_data: file_client.upload_data(parquet_data, overwrite=True) with self.assertRaises(ValueError): file_client.query_file(expression, file_format=QuickQueryDialect.Parquet, output_format=QuickQueryDialect.Parquet)
class FileTest(StorageTestCase): def setUp(self): super(FileTest, self).setUp() url = self._get_account_url() self.dsc = DataLakeServiceClient(url, credential=self.settings.STORAGE_DATA_LAKE_ACCOUNT_KEY) self.config = self.dsc._config self.file_system_name = self.get_resource_name('filesystem') if not self.is_playback(): file_system = self.dsc.get_file_system_client(self.file_system_name) try: file_system.create_file_system(timeout=5) except ResourceExistsError: pass def tearDown(self): if not self.is_playback(): try: self.dsc.delete_file_system(self.file_system_name) except: pass return super(FileTest, self).tearDown() # --Helpers----------------------------------------------------------------- def _get_directory_reference(self, prefix=TEST_DIRECTORY_PREFIX): directory_name = self.get_resource_name(prefix) return directory_name def _get_file_reference(self, prefix=TEST_FILE_PREFIX): file_name = self.get_resource_name(prefix) return file_name def _create_file_system(self): return self.dsc.create_file_system(self._get_file_system_reference()) def _create_directory_and_return_client(self, directory=None): directory_name = directory if directory else self._get_directory_reference() directory_client = self.dsc.get_directory_client(self.file_system_name, directory_name) directory_client.create_directory() return directory_client def _create_file_and_return_client(self, directory="", file=None): if directory: self._create_directory_and_return_client(directory) if not file: file = self._get_file_reference() file_client = self.dsc.get_file_client(self.file_system_name, directory + '/' + file) file_client.create_file() return file_client # --Helpers----------------------------------------------------------------- @record def test_create_file(self): # Arrange directory_name = self._get_directory_reference() # Create a directory to put the file under that directory_client = self.dsc.get_directory_client(self.file_system_name, directory_name) directory_client.create_directory() file_client = directory_client.get_file_client('filename') response = file_client.create_file() # Assert self.assertIsNotNone(response) @record def test_create_file_with_lease_id(self): # Arrange directory_name = self._get_directory_reference() directory_client = self.dsc.get_directory_client(self.file_system_name, directory_name) directory_client.create_directory() file_client = directory_client.get_file_client('filename') # Act file_client.create_file() lease = file_client.acquire_lease() create_resp = file_client.create_file(lease=lease) # Assert file_properties = file_client.get_file_properties() self.assertIsNotNone(file_properties) self.assertEqual(file_properties.etag, create_resp.get('etag')) self.assertEqual(file_properties.last_modified, create_resp.get('last_modified')) @record def test_create_file_under_root_directory(self): # Arrange # get a file client to interact with the file under root directory file_client = self.dsc.get_file_client(self.file_system_name, "filename") response = file_client.create_file() # Assert self.assertIsNotNone(response) @record def test_append_data(self): directory_name = self._get_directory_reference() # Create a directory to put the file under that directory_client = self.dsc.get_directory_client(self.file_system_name, directory_name) directory_client.create_directory() file_client = directory_client.get_file_client('filename') file_client.create_file() # Act response = file_client.append_data(b'abc', 0, 3) self.assertIsNotNone(response) @record def test_flush_data(self): directory_name = self._get_directory_reference() # Create a directory to put the file under that directory_client = self.dsc.get_directory_client(self.file_system_name, directory_name) directory_client.create_directory() file_client = directory_client.get_file_client('filename') file_client.create_file() # Act file_client.append_data(b'abc', 0, 3) response = file_client.flush_data(3) self.assertIsNotNone(response) @record def test_read_file(self): file_client = self._create_file_and_return_client() data = self.get_random_bytes(1024) # upload data to file file_client.append_data(data, 0, len(data)) file_client.flush_data(len(data)) # doanload the data and make sure it is the same as uploaded data downloaded_data = file_client.read_file() self.assertEqual(data, downloaded_data) @record def test_account_sas(self): # SAS URL is calculated from storage key, so this test runs live only if TestMode.need_recording_file(self.test_mode): return file_name = self._get_file_reference() # create a file under root directory self._create_file_and_return_client(file=file_name) # generate a token with file level read permission token = generate_account_sas( self.dsc.account_name, self.dsc.credential.account_key, ResourceTypes(file_system=True, object=True), AccountSasPermissions(read=True), datetime.utcnow() + timedelta(hours=1), ) # read the created file which is under root directory file_client = DataLakeFileClient(self.dsc.url, self.file_system_name, file_name, credential=token) properties = file_client.get_file_properties() # make sure we can read the file properties self.assertIsNotNone(properties) # try to write to the created file with the token with self.assertRaises(StorageErrorException): file_client.append_data(b"abcd", 0, 4) @record def test_file_sas_only_applies_to_file_level(self): # SAS URL is calculated from storage key, so this test runs live only if TestMode.need_recording_file(self.test_mode): return file_name = self._get_file_reference() directory_name = self._get_directory_reference() self._create_file_and_return_client(directory=directory_name, file=file_name) # generate a token with file level read and write permissions token = generate_file_sas( self.dsc.account_name, self.file_system_name, directory_name, file_name, account_key=self.dsc.credential.account_key, permission=FileSasPermissions(read=True, write=True), expiry=datetime.utcnow() + timedelta(hours=1), ) # read the created file which is under root directory file_client = DataLakeFileClient(self.dsc.url, self.file_system_name, directory_name+'/'+file_name, credential=token) properties = file_client.get_file_properties() # make sure we can read the file properties self.assertIsNotNone(properties) # try to write to the created file with the token response = file_client.append_data(b"abcd", 0, 4, validate_content=True) self.assertIsNotNone(response) # the token is for file level, so users are not supposed to have access to file system level operations file_system_client = FileSystemClient(self.dsc.url, self.file_system_name, credential=token) with self.assertRaises(ClientAuthenticationError): file_system_client.get_file_system_properties() # the token is for file level, so users are not supposed to have access to directory level operations directory_client = DataLakeDirectoryClient(self.dsc.url, self.file_system_name, directory_name, credential=token) with self.assertRaises(ClientAuthenticationError): directory_client.get_directory_properties() @record def test_delete_file(self): # Arrange file_client = self._create_file_and_return_client() file_client.delete_file() with self.assertRaises(ResourceNotFoundError): file_client.get_file_properties() @record def test_set_access_control(self): file_client = self._create_file_and_return_client() response = file_client.set_access_control(permissions='0777')\ # Assert self.assertIsNotNone(response) @record def test_get_access_control(self): file_client = self._create_file_and_return_client() file_client.set_access_control(permissions='0777') # Act response = file_client.get_access_control() # Assert self.assertIsNotNone(response) @record def test_get_properties(self): # Arrange directory_client = self._create_directory_and_return_client() metadata = {'hello': 'world', 'number': '42'} content_settings = ContentSettings( content_language='spanish', content_disposition='inline') file_client = directory_client.create_file("newfile", metadata=metadata, content_settings=content_settings) file_client.append_data(b"abc", 0, 3) file_client.flush_data(3) properties = file_client.get_file_properties() # Assert self.assertTrue(properties) self.assertEqual(properties.size, 3) self.assertEqual(properties.metadata['hello'], metadata['hello']) self.assertEqual(properties.content_settings.content_language, content_settings.content_language) @record def test_rename_file_with_non_used_name(self): file_client = self._create_file_and_return_client() data_bytes = b"abc" file_client.append_data(data_bytes, 0, 3) file_client.flush_data(3) new_client = file_client.rename_file(file_client.file_system_name+'/'+'newname') data = new_client.read_file() self.assertEqual(data, data_bytes) self.assertEqual(new_client.path_name, "newname") @record def test_rename_file_to_existing_file(self): # create the existing file existing_file_client = self._create_file_and_return_client(file="existingfile") existing_file_client.append_data(b"a", 0, 1) existing_file_client.flush_data(1) old_url = existing_file_client.url # prepare to rename the file to the existing file file_client = self._create_file_and_return_client() data_bytes = b"abc" file_client.append_data(data_bytes, 0, 3) file_client.flush_data(3) new_client = file_client.rename_file(file_client.file_system_name+'/'+existing_file_client.path_name) new_url = file_client.url data = new_client.read_file() # the existing file was overridden self.assertEqual(data, data_bytes) @record def test_rename_file_will_not_change_existing_directory(self): # create none empty directory(with 2 files) dir1 = self._create_directory_and_return_client(directory="dir1") f1 = dir1.create_file("file1") f1.append_data(b"file1", 0, 5) f1.flush_data(5) f2 = dir1.create_file("file2") f2.append_data(b"file2", 0, 5) f2.flush_data(5) # create another none empty directory(with 2 files) dir2 = self._create_directory_and_return_client(directory="dir2") f3 = dir2.create_file("file3") f3.append_data(b"file3", 0, 5) f3.flush_data(5) f4 = dir2.create_file("file4") f4.append_data(b"file4", 0, 5) f4.flush_data(5) new_client = f3.rename_file(f1.file_system_name+'/'+f1.path_name) self.assertEqual(new_client.read_file(), b"file3") # make sure the data in file2 and file4 weren't touched f2_data = f2.read_file() self.assertEqual(f2_data, b"file2") f4_data = f4.read_file() self.assertEqual(f4_data, b"file4") with self.assertRaises(HttpResponseError): f3.read_file()
class StorageQuickQueryTest(StorageTestCase): def setUp(self): super(StorageQuickQueryTest, self).setUp() url = self._get_account_url() self.dsc = DataLakeServiceClient(url, credential=self.settings.STORAGE_DATA_LAKE_ACCOUNT_KEY, logging_enable=True) self.config = self.dsc._config self.filesystem_name = self.get_resource_name('utqqcontainer') if not self.is_playback(): try: self.dsc.create_file_system(self.filesystem_name) except: pass def tearDown(self): if not self.is_playback(): try: self.dsc.delete_file_system(self.filesystem_name) except: pass return super(StorageQuickQueryTest, self).tearDown() # --Helpers----------------------------------------------------------------- def _get_file_reference(self): return self.get_resource_name("csvfile") # -- Test cases for APIs supporting CPK ---------------------------------------------- @record def test_quick_query_readall(self): # Arrange # upload the csv file file_name = self._get_file_reference() file_client = self.dsc.get_file_client(self.filesystem_name, file_name) file_client.upload_data(CSV_DATA, overwrite=True) errors = [] def on_error(error): errors.append(error) reader = file_client.query_file("SELECT * from BlobStorage", on_error=on_error) data = reader.readall() self.assertEqual(len(errors), 0) self.assertEqual(len(reader), len(CSV_DATA)) self.assertEqual(len(reader), reader._blob_query_reader._bytes_processed) self.assertEqual(data, CSV_DATA.replace(b'\r\n', b'\n')) @record def test_quick_query_datalake_expression(self): # Arrange # upload the csv file file_name = self._get_file_reference() file_client = self.dsc.get_file_client(self.filesystem_name, file_name) file_client.upload_data(DATALAKE_CSV_DATA, overwrite=True) errors = [] def on_error(error): errors.append(error) input_format = DelimitedTextDialect(has_header=True) reader = file_client.query_file("SELECT DataLakeStorage from DataLakeStorage", on_error=on_error, file_format=input_format) reader.readall() self.assertEqual(len(errors), 0) self.assertEqual(len(reader), len(DATALAKE_CSV_DATA)) self.assertEqual(len(reader), reader._blob_query_reader._bytes_processed) @record def test_quick_query_iter_records(self): # Arrange # upload the csv file file_name = self._get_file_reference() file_client = self.dsc.get_file_client(self.filesystem_name, file_name) file_client.upload_data(CSV_DATA, overwrite=True) reader = file_client.query_file("SELECT * from BlobStorage") read_records = reader.records() # Assert first line has header data = next(read_records) self.assertEqual(data, b'Service,Package,Version,RepoPath,MissingDocs') for record in read_records: data += record self.assertEqual(len(reader), len(CSV_DATA)) self.assertEqual(len(reader), reader._blob_query_reader._bytes_processed) self.assertEqual(data, CSV_DATA.replace(b'\r\n', b'')) @record def test_quick_query_readall_with_encoding(self): # Arrange # upload the csv file file_name = self._get_file_reference() file_client = self.dsc.get_file_client(self.filesystem_name, file_name) file_client.upload_data(CSV_DATA, overwrite=True) errors = [] def on_error(error): errors.append(error) reader = file_client.query_file("SELECT * from BlobStorage", on_error=on_error, encoding='utf-8') data = reader.readall() self.assertEqual(len(errors), 0) self.assertEqual(len(reader), len(CSV_DATA)) self.assertEqual(len(reader), reader._blob_query_reader._bytes_processed) self.assertEqual(data, CSV_DATA.replace(b'\r\n', b'\n').decode('utf-8')) @record def test_quick_query_iter_records_with_encoding(self): # Arrange # upload the csv file file_name = self._get_file_reference() file_client = self.dsc.get_file_client(self.filesystem_name, file_name) file_client.upload_data(CSV_DATA, overwrite=True) reader = file_client.query_file("SELECT * from BlobStorage", encoding='utf-8') data = '' for record in reader.records(): data += record self.assertEqual(len(reader), len(CSV_DATA)) self.assertEqual(len(reader), reader._blob_query_reader._bytes_processed) self.assertEqual(data, CSV_DATA.replace(b'\r\n', b'').decode('utf-8')) @record def test_quick_query_iter_records_with_headers(self): # Arrange # upload the csv file file_name = self._get_file_reference() file_client = self.dsc.get_file_client(self.filesystem_name, file_name) file_client.upload_data(CSV_DATA, overwrite=True) input_format = DelimitedTextDialect(has_header=True) reader = file_client.query_file("SELECT * from BlobStorage", file_format=input_format) read_records = reader.records() # Assert first line does not include header data = next(read_records) self.assertEqual(data, b'App Configuration,azure-data-appconfiguration,1,appconfiguration,FALSE') for record in read_records: data += record self.assertEqual(len(reader), len(CSV_DATA)) self.assertEqual(len(reader), reader._blob_query_reader._bytes_processed) self.assertEqual(data, CSV_DATA.replace(b'\r\n', b'')[44:]) @record def test_quick_query_iter_records_with_progress(self): # Arrange # upload the csv file file_name = self._get_file_reference() file_client = self.dsc.get_file_client(self.filesystem_name, file_name) file_client.upload_data(CSV_DATA, overwrite=True) reader = file_client.query_file("SELECT * from BlobStorage") data = b'' progress = 0 for record in reader.records(): if record: data += record progress += len(record) + 2 self.assertEqual(len(reader), len(CSV_DATA)) self.assertEqual(len(reader), reader._blob_query_reader._bytes_processed) self.assertEqual(data, CSV_DATA.replace(b'\r\n', b'')) self.assertEqual(progress, len(reader)) @record def test_quick_query_readall_with_serialization_setting(self): # Arrange # upload the csv file file_name = self._get_file_reference() file_client = self.dsc.get_file_client(self.filesystem_name, file_name) file_client.upload_data(CSV_DATA, overwrite=True) errors = [] def on_error(error): errors.append(error) input_format = DelimitedTextDialect( delimiter=',', quotechar='"', lineterminator='\n', escapechar='', has_header=False ) output_format = DelimitedTextDialect( delimiter=';', quotechar="'", lineterminator='.', escapechar='\\' ) resp = file_client.query_file( "SELECT * from BlobStorage", on_error=on_error, file_format=input_format, output_format=output_format) query_result = resp.readall() self.assertEqual(len(errors), 0) self.assertEqual(len(resp), len(CSV_DATA)) self.assertEqual(query_result, CONVERTED_CSV_DATA) @record def test_quick_query_iter_records_with_serialization_setting(self): # Arrange # upload the csv file file_name = self._get_file_reference() file_client = self.dsc.get_file_client(self.filesystem_name, file_name) file_client.upload_data(CSV_DATA, overwrite=True) input_format = DelimitedTextDialect( delimiter=',', quotechar='"', lineterminator='\n', escapechar='', has_header=False ) output_format = DelimitedTextDialect( delimiter=';', quotechar="'", lineterminator='%', escapechar='\\' ) reader = file_client.query_file( "SELECT * from BlobStorage", file_format=input_format, output_format=output_format) data = [] for record in reader.records(): if record: data.append(record) self.assertEqual(len(reader), len(CSV_DATA)) self.assertEqual(len(reader), reader._blob_query_reader._bytes_processed) self.assertEqual(len(data), 33) @record def test_quick_query_readall_with_fatal_error_handler(self): # Arrange data1 = b'{name: owner}' data2 = b'{name2: owner2}' data3 = b'{version:0,begin:1601-01-01T00:00:00.000Z,intervalSecs:3600,status:Finalized,config:' \ b'{version:0,configVersionEtag:0x8d75ef460eb1a12,numShards:1,recordsFormat:avro,formatSchemaVersion:3,' \ b'shardDistFnVersion:1},chunkFilePaths:[$blobchangefeed/log/00/1601/01/01/0000/],storageDiagnostics:' \ b'{version:0,lastModifiedTime:2019-11-01T17:53:18.861Z,' \ b'data:{aid:d305317d-a006-0042-00dd-902bbb06fc56}}}' data = data1 + b'\n' + data2 + b'\n' + data1 # upload the json file file_name = self._get_file_reference() file_client = self.dsc.get_file_client(self.filesystem_name, file_name) file_client.upload_data(data, overwrite=True) errors = [] def on_error(error): errors.append(error) input_format = DelimitedJsonDialect() output_format = DelimitedTextDialect( delimiter=';', quotechar="'", lineterminator='.', escapechar='\\' ) resp = file_client.query_file( "SELECT * from BlobStorage", on_error=on_error, file_format=input_format, output_format=output_format) query_result = resp.readall() self.assertEqual(len(errors), 1) self.assertEqual(len(resp), 43) self.assertEqual(query_result, b'') @record def test_quick_query_iter_records_with_fatal_error_handler(self): # Arrange data1 = b'{name: owner}' data2 = b'{name2: owner2}' data3 = b'{version:0,begin:1601-01-01T00:00:00.000Z,intervalSecs:3600,status:Finalized,config:' \ b'{version:0,configVersionEtag:0x8d75ef460eb1a12,numShards:1,recordsFormat:avro,formatSchemaVersion:3,' \ b'shardDistFnVersion:1},chunkFilePaths:[$blobchangefeed/log/00/1601/01/01/0000/],storageDiagnostics:' \ b'{version:0,lastModifiedTime:2019-11-01T17:53:18.861Z,' \ b'data:{aid:d305317d-a006-0042-00dd-902bbb06fc56}}}' data = data1 + b'\n' + data2 + b'\n' + data1 # upload the json file file_name = self._get_file_reference() file_client = self.dsc.get_file_client(self.filesystem_name, file_name) file_client.upload_data(data, overwrite=True) errors = [] def on_error(error): errors.append(error) input_format = DelimitedJsonDialect() output_format = DelimitedTextDialect( delimiter=';', quotechar="'", lineterminator='.', escapechar='\\' ) resp = file_client.query_file( "SELECT * from BlobStorage", on_error=on_error, file_format=input_format, output_format=output_format) data = [] for record in resp.records(): data.append(record) self.assertEqual(len(errors), 1) self.assertEqual(len(resp), 43) self.assertEqual(data, [b'']) @record def test_quick_query_readall_with_fatal_error_handler_raise(self): # Arrange data1 = b'{name: owner}' data2 = b'{name2: owner2}' data3 = b'{version:0,begin:1601-01-01T00:00:00.000Z,intervalSecs:3600,status:Finalized,config:' \ b'{version:0,configVersionEtag:0x8d75ef460eb1a12,numShards:1,recordsFormat:avro,formatSchemaVersion:3,' \ b'shardDistFnVersion:1},chunkFilePaths:[$blobchangefeed/log/00/1601/01/01/0000/],storageDiagnostics:' \ b'{version:0,lastModifiedTime:2019-11-01T17:53:18.861Z,' \ b'data:{aid:d305317d-a006-0042-00dd-902bbb06fc56}}}' data = data1 + b'\n' + data2 + b'\n' + data1 # upload the json file file_name = self._get_file_reference() file_client = self.dsc.get_file_client(self.filesystem_name, file_name) file_client.upload_data(data, overwrite=True) errors = [] def on_error(error): raise Exception(error.description) input_format = DelimitedJsonDialect() output_format = DelimitedTextDialect( delimiter=';', quotechar="'", lineterminator='.', escapechar='\\' ) resp = file_client.query_file( "SELECT * from BlobStorage", on_error=on_error, file_format=input_format, output_format=output_format) with pytest.raises(Exception): query_result = resp.readall() @record def test_quick_query_iter_records_with_fatal_error_handler_raise(self): # Arrange data1 = b'{name: owner}' data2 = b'{name2: owner2}' data3 = b'{version:0,begin:1601-01-01T00:00:00.000Z,intervalSecs:3600,status:Finalized,config:' \ b'{version:0,configVersionEtag:0x8d75ef460eb1a12,numShards:1,recordsFormat:avro,formatSchemaVersion:3,' \ b'shardDistFnVersion:1},chunkFilePaths:[$blobchangefeed/log/00/1601/01/01/0000/],storageDiagnostics:' \ b'{version:0,lastModifiedTime:2019-11-01T17:53:18.861Z,' \ b'data:{aid:d305317d-a006-0042-00dd-902bbb06fc56}}}' data = data1 + b'\n' + data2 + b'\n' + data1 # upload the json file file_name = self._get_file_reference() file_client = self.dsc.get_file_client(self.filesystem_name, file_name) file_client.upload_data(data, overwrite=True) errors = [] def on_error(error): raise Exception(error.description) input_format = DelimitedJsonDialect() output_format = DelimitedTextDialect( delimiter=';', quotechar="'", lineterminator='.', escapechar='\\' ) resp = file_client.query_file( "SELECT * from BlobStorage", on_error=on_error, file_format=input_format, output_format=output_format) with pytest.raises(Exception): for record in resp.records(): print(record) @record def test_quick_query_readall_with_fatal_error_ignore(self): # Arrange data1 = b'{name: owner}' data2 = b'{name2: owner2}' data = data1 + b'\n' + data2 + b'\n' + data1 # upload the json file file_name = self._get_file_reference() file_client = self.dsc.get_file_client(self.filesystem_name, file_name) file_client.upload_data(data, overwrite=True) input_format = DelimitedJsonDialect() output_format = DelimitedTextDialect( delimiter=';', quotechar="'", lineterminator='.', escapechar='\\' ) resp = file_client.query_file( "SELECT * from BlobStorage", file_format=input_format, output_format=output_format) query_result = resp.readall() @record def test_quick_query_iter_records_with_fatal_error_ignore(self): # Arrange data1 = b'{name: owner}' data2 = b'{name2: owner2}' data3 = b'{version:0,begin:1601-01-01T00:00:00.000Z,intervalSecs:3600,status:Finalized,config:' \ b'{version:0,configVersionEtag:0x8d75ef460eb1a12,numShards:1,recordsFormat:avro,formatSchemaVersion:3,' \ b'shardDistFnVersion:1},chunkFilePaths:[$blobchangefeed/log/00/1601/01/01/0000/],storageDiagnostics:' \ b'{version:0,lastModifiedTime:2019-11-01T17:53:18.861Z,' \ b'data:{aid:d305317d-a006-0042-00dd-902bbb06fc56}}}' data = data1 + b'\n' + data2 + b'\n' + data1 # upload the json file file_name = self._get_file_reference() file_client = self.dsc.get_file_client(self.filesystem_name, file_name) file_client.upload_data(data, overwrite=True) input_format = DelimitedJsonDialect() output_format = DelimitedTextDialect( delimiter=';', quotechar="'", lineterminator='.', escapechar='\\' ) resp = file_client.query_file( "SELECT * from BlobStorage", file_format=input_format, output_format=output_format) for record in resp.records(): print(record) @record def test_quick_query_readall_with_nonfatal_error_handler(self): # Arrange # upload the csv file file_name = self._get_file_reference() file_client = self.dsc.get_file_client(self.filesystem_name, file_name) file_client.upload_data(CSV_DATA, overwrite=True) errors = [] def on_error(error): errors.append(error) input_format = DelimitedTextDialect( delimiter=',', quotechar='"', lineterminator='\n', escapechar='', has_header=True ) output_format = DelimitedTextDialect( delimiter=';', quotechar="'", lineterminator='.', escapechar='\\', ) resp = file_client.query_file( "SELECT RepoPath from BlobStorage", file_format=input_format, output_format=output_format, on_error=on_error) query_result = resp.readall() # the error is because that line only has one column self.assertEqual(len(errors), 1) self.assertEqual(len(resp), len(CSV_DATA)) self.assertTrue(len(query_result) > 0) @record def test_quick_query_iter_records_with_nonfatal_error_handler(self): # Arrange # upload the csv file file_name = self._get_file_reference() file_client = self.dsc.get_file_client(self.filesystem_name, file_name) file_client.upload_data(CSV_DATA, overwrite=True) errors = [] def on_error(error): errors.append(error) input_format = DelimitedTextDialect( delimiter=',', quotechar='"', lineterminator='\n', escapechar='', has_header=True ) output_format = DelimitedTextDialect( delimiter=';', quotechar="'", lineterminator='%', escapechar='\\', ) resp = file_client.query_file( "SELECT RepoPath from BlobStorage", file_format=input_format, output_format=output_format, on_error=on_error) data = list(resp.records()) # the error is because that line only has one column self.assertEqual(len(errors), 1) self.assertEqual(len(resp), len(CSV_DATA)) self.assertEqual(len(data), 32) @record def test_quick_query_readall_with_nonfatal_error_ignore(self): # Arrange # upload the csv file file_name = self._get_file_reference() file_client = self.dsc.get_file_client(self.filesystem_name, file_name) file_client.upload_data(CSV_DATA, overwrite=True) input_format = DelimitedTextDialect( delimiter=',', quotechar='"', lineterminator='\n', escapechar='', has_header=True ) output_format = DelimitedTextDialect( delimiter=';', quotechar="'", lineterminator='.', escapechar='\\', ) resp = file_client.query_file( "SELECT RepoPath from BlobStorage", file_format=input_format, output_format=output_format) query_result = resp.readall() self.assertEqual(len(resp), len(CSV_DATA)) self.assertTrue(len(query_result) > 0) @record def test_quick_query_iter_records_with_nonfatal_error_ignore(self): # Arrange # upload the csv file file_name = self._get_file_reference() file_client = self.dsc.get_file_client(self.filesystem_name, file_name) file_client.upload_data(CSV_DATA, overwrite=True) input_format = DelimitedTextDialect( delimiter=',', quotechar='"', lineterminator='\n', escapechar='', has_header=True ) output_format = DelimitedTextDialect( delimiter=';', quotechar="'", lineterminator='$', escapechar='\\', ) resp = file_client.query_file( "SELECT RepoPath from BlobStorage", file_format=input_format, output_format=output_format) data = list(resp.records()) self.assertEqual(len(resp), len(CSV_DATA)) self.assertEqual(len(data), 32) @record def test_quick_query_readall_with_json_serialization_setting(self): # Arrange data1 = b'{\"name\": \"owner\", \"id\": 1}' data2 = b'{\"name2\": \"owner2\"}' data = data1 + b'\n' + data2 + b'\n' + data1 # upload the json file file_name = self._get_file_reference() file_client = self.dsc.get_file_client(self.filesystem_name, file_name) file_client.upload_data(data, overwrite=True) errors = [] def on_error(error): errors.append(error) input_format = DelimitedJsonDialect(delimiter='\n') output_format = DelimitedJsonDialect(delimiter=';') resp = file_client.query_file( "SELECT name from BlobStorage", on_error=on_error, file_format=input_format, output_format=output_format) query_result = resp.readall() self.assertEqual(len(errors), 0) self.assertEqual(len(resp), len(data)) self.assertEqual(query_result, b'{"name":"owner"};{};{"name":"owner"};') @record def test_quick_query_iter_records_with_json_serialization_setting(self): # Arrange data1 = b'{\"name\": \"owner\", \"id\": 1}' data2 = b'{\"name2\": \"owner2\"}' data = data1 + b'\n' + data2 + b'\n' + data1 # upload the json file file_name = self._get_file_reference() file_client = self.dsc.get_file_client(self.filesystem_name, file_name) file_client.upload_data(data, overwrite=True) errors = [] def on_error(error): errors.append(error) input_format = DelimitedJsonDialect(delimiter='\n') output_format = DelimitedJsonDialect(delimiter=';') resp = file_client.query_file( "SELECT name from BlobStorage", on_error=on_error, file_format=input_format, output_format=output_format) listdata = list(resp.records()) self.assertEqual(len(errors), 0) self.assertEqual(len(resp), len(data)) self.assertEqual(listdata, [b'{"name":"owner"}',b'{}',b'{"name":"owner"}', b'']) @record def test_quick_query_with_only_input_json_serialization_setting(self): # Arrange data1 = b'{\"name\": \"owner\", \"id\": 1}' data2 = b'{\"name2\": \"owner2\"}' data = data1 + data2 + data1 # upload the json file file_name = self._get_file_reference() file_client = self.dsc.get_file_client(self.filesystem_name, file_name) file_client.upload_data(data, overwrite=True) errors = [] def on_error(error): errors.append(error) input_format = DelimitedJsonDialect(delimiter='\n') output_format = None resp = file_client.query_file( "SELECT name from BlobStorage", on_error=on_error, file_format=input_format, output_format=output_format) query_result = resp.readall() self.assertEqual(len(errors), 0) self.assertEqual(len(resp), len(data)) self.assertEqual(query_result, b'{"name":"owner"}\n{}\n{"name":"owner"}\n')
class FileSystemTest(StorageTestCase): def setUp(self): super(FileSystemTest, self).setUp() url = self._get_account_url() self.dsc = DataLakeServiceClient(url, credential=self.settings.STORAGE_DATA_LAKE_ACCOUNT_KEY) self.config = self.dsc._config self.test_file_systems = [] def tearDown(self): if not self.is_playback(): try: for file_system in self.test_file_systems: self.dsc.delete_file_system(file_system) except: pass return super(FileSystemTest, self).tearDown() # --Helpers----------------------------------------------------------------- def _get_file_system_reference(self, prefix=TEST_FILE_SYSTEM_PREFIX): file_system_name = self.get_resource_name(prefix) self.test_file_systems.append(file_system_name) return file_system_name def _create_file_system(self, file_system_prefix=TEST_FILE_SYSTEM_PREFIX): return self.dsc.create_file_system(self._get_file_system_reference(prefix=file_system_prefix)) # --Helpers----------------------------------------------------------------- @record def test_create_file_system(self): # Arrange file_system_name = self._get_file_system_reference() # Act file_system_client = self.dsc.get_file_system_client(file_system_name) created = file_system_client.create_file_system() # Assert self.assertTrue(created) @record def test_list_file_systemss(self): # Arrange file_system_name = self._get_file_system_reference() file_system = self.dsc.create_file_system(file_system_name) # Act file_systems = list(self.dsc.list_file_systems()) # Assert self.assertIsNotNone(file_systems) self.assertGreaterEqual(len(file_systems), 1) self.assertIsNotNone(file_systems[0]) self.assertNamedItemInContainer(file_systems, file_system.file_system_name) self.assertIsNotNone(file_systems[0].has_immutability_policy) self.assertIsNotNone(file_systems[0].has_legal_hold) @record def test_delete_file_system_with_existing_file_system(self): # Arrange file_system = self._create_file_system() # Act deleted = file_system.delete_file_system() # Assert self.assertIsNone(deleted) @record def test_list_file_systems_with_include_metadata(self): # Arrange file_system = self._create_file_system() metadata = {'hello': 'world', 'number': '42'} resp = file_system.set_file_system_metadata(metadata) # Act file_systems = list(self.dsc.list_file_systems( name_starts_with=file_system.file_system_name, include_metadata=True)) # Assert self.assertIsNotNone(file_systems) self.assertGreaterEqual(len(file_systems), 1) self.assertIsNotNone(file_systems[0]) self.assertNamedItemInContainer(file_systems, file_system.file_system_name) self.assertDictEqual(file_systems[0].metadata, metadata) @record def test_list_file_systems_by_page(self): # Arrange for i in range(0, 6): self._create_file_system(file_system_prefix="filesystem{}".format(i)) # Act file_systems = list(next(self.dsc.list_file_systems( results_per_page=3, name_starts_with="file", include_metadata=True).by_page())) # Assert self.assertIsNotNone(file_systems) self.assertGreaterEqual(len(file_systems), 3) @record def test_list_file_systems_with_public_access(self): # Arrange file_system_name = self._get_file_system_reference() file_system = self.dsc.get_file_system_client(file_system_name) file_system.create_file_system(public_access="blob") metadata = {'hello': 'world', 'number': '42'} resp = file_system.set_file_system_metadata(metadata) # Act file_systems = list(self.dsc.list_file_systems( name_starts_with=file_system.file_system_name, include_metadata=True)) # Assert self.assertIsNotNone(file_systems) self.assertGreaterEqual(len(file_systems), 1) self.assertIsNotNone(file_systems[0]) self.assertNamedItemInContainer(file_systems, file_system.file_system_name) self.assertDictEqual(file_systems[0].metadata, metadata) self.assertTrue(file_systems[0].public_access is PublicAccess.File) @record def test_get_file_system_properties(self): # Arrange metadata = {'hello': 'world', 'number': '42'} file_system = self._create_file_system() file_system.set_file_system_metadata(metadata) # Act props = file_system.get_file_system_properties() # Assert self.assertIsNotNone(props) self.assertDictEqual(props.metadata, metadata) self.assertIsNotNone(props.has_immutability_policy) self.assertIsNotNone(props.has_legal_hold) @record def test_list_paths(self): # Arrange file_system = self._create_file_system() for i in range(0, 6): file_system.create_directory("dir1{}".format(i)) paths = list(file_system.get_paths(upn=True)) self.assertEqual(len(paths), 6) @record def test_list_paths_with_max_per_page(self): # Arrange file_system = self._create_file_system() for i in range(0, 6): file_system.create_directory("dir1{}".format(i)) generator1 = file_system.get_paths(max_results=2, upn=True).by_page() paths1 = list(next(generator1)) generator2 = file_system.get_paths(max_results=4, upn=True)\ .by_page(continuation_token=generator1.continuation_token) paths2 = list(next(generator2)) self.assertEqual(len(paths1), 2) self.assertEqual(len(paths2), 4) @record def test_list_paths_under_specific_path(self): # Arrange file_system = self._create_file_system() for i in range(0, 6): file_system.create_directory("dir1{}".format(i)) # create a subdirectory under the current directory subdir = file_system.get_directory_client("dir1{}".format(i)).create_sub_directory("subdir") subdir.create_sub_directory("subsub") # create a file under the current directory file_client = subdir.create_file("file") file_client.append_data(b"abced", 0, 5) file_client.flush_data(5) generator1 = file_system.get_paths(path="dir10/subdir", max_results=2, upn=True).by_page() paths = list(next(generator1)) self.assertEqual(len(paths), 2) self.assertEqual(paths[0].content_length, 5)