Example #1
0
class FileSystemTest(StorageTestCase):
    def setUp(self):
        super(FileSystemTest, self).setUp()
        url = self._get_account_url()
        self.dsc = DataLakeServiceClient(
            url, credential=self.settings.STORAGE_DATA_LAKE_ACCOUNT_KEY)
        self.config = self.dsc._config
        self.test_file_systems = []

    def tearDown(self):
        if not self.is_playback():
            try:
                for file_system in self.test_file_systems:
                    self.dsc.delete_file_system(file_system)
            except:
                pass

        return super(FileSystemTest, self).tearDown()

    # --Helpers-----------------------------------------------------------------
    def _get_file_system_reference(self, prefix=TEST_FILE_SYSTEM_PREFIX):
        file_system_name = self.get_resource_name(prefix)
        self.test_file_systems.append(file_system_name)
        return file_system_name

    def _create_file_system(self, file_system_prefix=TEST_FILE_SYSTEM_PREFIX):
        return self.dsc.create_file_system(
            self._get_file_system_reference(prefix=file_system_prefix))

    # --Helpers-----------------------------------------------------------------

    @record
    def test_create_file_system(self):
        # Arrange
        file_system_name = self._get_file_system_reference()

        # Act
        file_system_client = self.dsc.get_file_system_client(file_system_name)
        created = file_system_client.create_file_system()

        # Assert
        self.assertTrue(created)

    @record
    def test_create_file_system_with_metadata(self):
        # Arrange
        metadata = {'hello': 'world', 'number': '42'}
        file_system_name = self._get_file_system_reference()

        # Act
        file_system_client = self.dsc.get_file_system_client(file_system_name)
        created = file_system_client.create_file_system(metadata=metadata)

        # Assert
        meta = file_system_client.get_file_system_properties().metadata
        self.assertTrue(created)
        self.assertDictEqual(meta, metadata)

    @record
    def test_set_file_system_acl(self):
        # Act
        file_system = self._create_file_system()
        access_policy = AccessPolicy(
            permission=FileSystemSasPermissions(read=True),
            expiry=datetime.utcnow() + timedelta(hours=1),
            start=datetime.utcnow())
        signed_identifier1 = {'testid': access_policy}
        response = file_system.set_file_system_access_policy(
            signed_identifier1, public_access=PublicAccess.FileSystem)

        self.assertIsNotNone(response.get('etag'))
        self.assertIsNotNone(response.get('last_modified'))
        acl1 = file_system.get_file_system_access_policy()
        self.assertIsNotNone(acl1['public_access'])
        self.assertEqual(len(acl1['signed_identifiers']), 1)

        # If set signed identifier without specifying the access policy then it will be default to None
        signed_identifier2 = {'testid': access_policy, 'test2': access_policy}
        file_system.set_file_system_access_policy(signed_identifier2)
        acl2 = file_system.get_file_system_access_policy()
        self.assertIsNone(acl2['public_access'])
        self.assertEqual(len(acl2['signed_identifiers']), 2)

    @record
    def test_list_file_systemss(self):
        # Arrange
        file_system_name = self._get_file_system_reference()
        file_system = self.dsc.create_file_system(file_system_name)

        # Act
        file_systems = list(self.dsc.list_file_systems())

        # Assert
        self.assertIsNotNone(file_systems)
        self.assertGreaterEqual(len(file_systems), 1)
        self.assertIsNotNone(file_systems[0])
        self.assertNamedItemInContainer(file_systems,
                                        file_system.file_system_name)
        self.assertIsNotNone(file_systems[0].has_immutability_policy)
        self.assertIsNotNone(file_systems[0].has_legal_hold)

    @record
    def test_delete_file_system_with_existing_file_system(self):
        # Arrange
        file_system = self._create_file_system()

        # Act
        deleted = file_system.delete_file_system()

        # Assert
        self.assertIsNone(deleted)

    @record
    def test_delete_none_existing_file_system(self):
        fake_file_system_client = self.dsc.get_file_system_client("fakeclient")

        # Act
        with self.assertRaises(ResourceNotFoundError):
            fake_file_system_client.delete_file_system(
                match_condition=MatchConditions.IfMissing)

    @record
    def test_list_file_systems_with_include_metadata(self):
        # Arrange
        file_system = self._create_file_system()
        metadata = {'hello': 'world', 'number': '42'}
        resp = file_system.set_file_system_metadata(metadata)

        # Act
        file_systems = list(
            self.dsc.list_file_systems(
                name_starts_with=file_system.file_system_name,
                include_metadata=True))

        # Assert
        self.assertIsNotNone(file_systems)
        self.assertGreaterEqual(len(file_systems), 1)
        self.assertIsNotNone(file_systems[0])
        self.assertNamedItemInContainer(file_systems,
                                        file_system.file_system_name)
        self.assertDictEqual(file_systems[0].metadata, metadata)

    @record
    def test_list_file_systems_by_page(self):
        # Arrange
        for i in range(0, 6):
            self._create_file_system(
                file_system_prefix="filesystem{}".format(i))

        # Act
        file_systems = list(
            next(
                self.dsc.list_file_systems(results_per_page=3,
                                           name_starts_with="file",
                                           include_metadata=True).by_page()))

        # Assert
        self.assertIsNotNone(file_systems)
        self.assertGreaterEqual(len(file_systems), 3)

    @record
    def test_list_file_systems_with_public_access(self):
        # Arrange
        file_system_name = self._get_file_system_reference()
        file_system = self.dsc.get_file_system_client(file_system_name)
        file_system.create_file_system(public_access="blob")
        metadata = {'hello': 'world', 'number': '42'}
        resp = file_system.set_file_system_metadata(metadata)

        # Act
        file_systems = list(
            self.dsc.list_file_systems(
                name_starts_with=file_system.file_system_name,
                include_metadata=True))

        # Assert
        self.assertIsNotNone(file_systems)
        self.assertGreaterEqual(len(file_systems), 1)
        self.assertIsNotNone(file_systems[0])
        self.assertNamedItemInContainer(file_systems,
                                        file_system.file_system_name)
        self.assertDictEqual(file_systems[0].metadata, metadata)
        self.assertTrue(file_systems[0].public_access is PublicAccess.File)

    @record
    def test_get_file_system_properties(self):
        # Arrange
        metadata = {'hello': 'world', 'number': '42'}
        file_system = self._create_file_system()
        file_system.set_file_system_metadata(metadata)

        # Act
        props = file_system.get_file_system_properties()

        # Assert
        self.assertIsNotNone(props)
        self.assertDictEqual(props.metadata, metadata)
        self.assertIsNotNone(props.has_immutability_policy)
        self.assertIsNotNone(props.has_legal_hold)

    @record
    def test_list_paths(self):
        # Arrange
        file_system = self._create_file_system()
        for i in range(0, 6):
            file_system.create_directory("dir1{}".format(i))

        paths = list(file_system.get_paths(upn=True))

        self.assertEqual(len(paths), 6)

    @record
    def test_list_paths_which_are_all_files(self):
        # Arrange
        file_system = self._create_file_system()
        for i in range(0, 6):
            file_system.create_file("file{}".format(i))

        paths = list(file_system.get_paths(upn=True))

        self.assertEqual(len(paths), 6)

    @record
    def test_list_paths_with_max_per_page(self):
        # Arrange
        file_system = self._create_file_system()
        for i in range(0, 6):
            file_system.create_directory("dir1{}".format(i))

        generator1 = file_system.get_paths(max_results=2, upn=True).by_page()
        paths1 = list(next(generator1))

        generator2 = file_system.get_paths(max_results=4, upn=True)\
            .by_page(continuation_token=generator1.continuation_token)
        paths2 = list(next(generator2))

        self.assertEqual(len(paths1), 2)
        self.assertEqual(len(paths2), 4)

    @record
    def test_list_paths_under_specific_path(self):
        # Arrange
        file_system = self._create_file_system()
        for i in range(0, 6):
            file_system.create_directory("dir1{}".format(i))

            # create a subdirectory under the current directory
            subdir = file_system.get_directory_client(
                "dir1{}".format(i)).create_sub_directory("subdir")
            subdir.create_sub_directory("subsub")

            # create a file under the current directory
            file_client = subdir.create_file("file")
            file_client.append_data(b"abced", 0, 5)
            file_client.flush_data(5)

        generator1 = file_system.get_paths(path="dir10/subdir",
                                           max_results=2,
                                           upn=True).by_page()
        paths = list(next(generator1))

        self.assertEqual(len(paths), 2)
        self.assertEqual(paths[0].content_length, 5)

    @record
    def test_list_paths_recursively(self):
        # Arrange
        file_system = self._create_file_system()
        for i in range(0, 6):
            file_system.create_directory("dir1{}".format(i))

            # create a subdirectory under the current directory
            subdir = file_system.get_directory_client(
                "dir1{}".format(i)).create_sub_directory("subdir")
            subdir.create_sub_directory("subsub")

            # create a file under the current directory
            subdir.create_file("file")

        paths = list(file_system.get_paths(recursive=True, upn=True))

        # there are 24 subpaths in total
        self.assertEqual(len(paths), 24)

    @record
    def test_create_directory_from_file_system_client(self):
        # Arrange
        file_system = self._create_file_system()
        file_system.create_directory("dir1/dir2")

        paths = list(file_system.get_paths(recursive=False, upn=True))

        self.assertEqual(len(paths), 1)
        self.assertEqual(paths[0].name, "dir1")

    @record
    def test_create_file_from_file_system_client(self):
        # Arrange
        file_system = self._create_file_system()
        file_system.create_file("dir1/dir2/file")

        paths = list(file_system.get_paths(recursive=True, upn=True))

        self.assertEqual(len(paths), 3)
        self.assertEqual(paths[0].name, "dir1")
        self.assertEqual(paths[2].is_directory, False)
Example #2
0
class FileTest(StorageTestCase):
    def _setUp(self, account_name, account_key):
        url = self._get_account_url(account_name)
        self.dsc = DataLakeServiceClient(url,
                                         credential=account_key,
                                         logging_enable=True)
        self.config = self.dsc._config

        self.file_system_name = self.get_resource_name('filesystem')

        if not self.is_playback():
            file_system = self.dsc.get_file_system_client(
                self.file_system_name)
            try:
                file_system.create_file_system(timeout=5)
            except ResourceExistsError:
                pass

    def tearDown(self):
        if not self.is_playback():
            try:
                self.dsc.delete_file_system(self.file_system_name)
            except:
                pass

        return super(FileTest, self).tearDown()

    # --Helpers-----------------------------------------------------------------
    def _get_directory_reference(self, prefix=TEST_DIRECTORY_PREFIX):
        directory_name = self.get_resource_name(prefix)
        return directory_name

    def _get_file_reference(self, prefix=TEST_FILE_PREFIX):
        file_name = self.get_resource_name(prefix)
        return file_name

    def _create_file_system(self):
        return self.dsc.create_file_system(self._get_file_system_reference())

    def _create_directory_and_return_client(self, directory=None):
        directory_name = directory if directory else self._get_directory_reference(
        )
        directory_client = self.dsc.get_directory_client(
            self.file_system_name, directory_name)
        directory_client.create_directory()
        return directory_client

    def _create_file_and_return_client(self, directory="", file=None):
        if directory:
            self._create_directory_and_return_client(directory)
        if not file:
            file = self._get_file_reference()
        file_client = self.dsc.get_file_client(self.file_system_name,
                                               directory + '/' + file)
        file_client.create_file()
        return file_client

    # --Helpers-----------------------------------------------------------------

    @DataLakePreparer()
    def test_create_file(self, datalake_storage_account_name,
                         datalake_storage_account_key):
        self._setUp(datalake_storage_account_name,
                    datalake_storage_account_key)
        # Arrange
        directory_name = self._get_directory_reference()

        # Create a directory to put the file under that
        directory_client = self.dsc.get_directory_client(
            self.file_system_name, directory_name)
        directory_client.create_directory()

        file_client = directory_client.get_file_client('filename')
        response = file_client.create_file()

        # Assert
        self.assertIsNotNone(response)

    @DataLakePreparer()
    def test_file_exists(self, datalake_storage_account_name,
                         datalake_storage_account_key):
        self._setUp(datalake_storage_account_name,
                    datalake_storage_account_key)
        # Arrange
        directory_name = self._get_directory_reference()

        directory_client = self.dsc.get_directory_client(
            self.file_system_name, directory_name)
        directory_client.create_directory()

        file_client1 = directory_client.get_file_client('filename')
        file_client2 = directory_client.get_file_client('nonexistentfile')
        file_client1.create_file()

        self.assertTrue(file_client1.exists())
        self.assertFalse(file_client2.exists())

    @DataLakePreparer()
    def test_create_file_using_oauth_token_credential(
            self, datalake_storage_account_name, datalake_storage_account_key):
        self._setUp(datalake_storage_account_name,
                    datalake_storage_account_key)
        # Arrange
        file_name = self._get_file_reference()
        token_credential = self.generate_oauth_token()

        # Create a directory to put the file under that
        file_client = DataLakeFileClient(self.dsc.url,
                                         self.file_system_name,
                                         file_name,
                                         credential=token_credential)

        response = file_client.create_file()

        # Assert
        self.assertIsNotNone(response)

    @DataLakePreparer()
    def test_create_file_with_existing_name(self,
                                            datalake_storage_account_name,
                                            datalake_storage_account_key):
        self._setUp(datalake_storage_account_name,
                    datalake_storage_account_key)
        # Arrange
        file_client = self._create_file_and_return_client()

        with self.assertRaises(ResourceExistsError):
            # if the file exists then throw error
            # if_none_match='*' is to make sure no existing file
            file_client.create_file(match_condition=MatchConditions.IfMissing)

    @DataLakePreparer()
    def test_create_file_with_lease_id(self, datalake_storage_account_name,
                                       datalake_storage_account_key):
        self._setUp(datalake_storage_account_name,
                    datalake_storage_account_key)
        # Arrange
        directory_name = self._get_directory_reference()
        directory_client = self.dsc.get_directory_client(
            self.file_system_name, directory_name)
        directory_client.create_directory()

        file_client = directory_client.get_file_client('filename')
        # Act
        file_client.create_file()
        lease = file_client.acquire_lease()
        create_resp = file_client.create_file(lease=lease)

        # Assert
        file_properties = file_client.get_file_properties()
        self.assertIsNotNone(file_properties)
        self.assertEqual(file_properties.etag, create_resp.get('etag'))
        self.assertEqual(file_properties.last_modified,
                         create_resp.get('last_modified'))

    @DataLakePreparer()
    def test_create_file_under_root_directory(self,
                                              datalake_storage_account_name,
                                              datalake_storage_account_key):
        self._setUp(datalake_storage_account_name,
                    datalake_storage_account_key)
        # Arrange
        # get a file client to interact with the file under root directory
        file_client = self.dsc.get_file_client(self.file_system_name,
                                               "filename")

        response = file_client.create_file()

        # Assert
        self.assertIsNotNone(response)

    @DataLakePreparer()
    def test_append_data(self, datalake_storage_account_name,
                         datalake_storage_account_key):
        self._setUp(datalake_storage_account_name,
                    datalake_storage_account_key)
        directory_name = self._get_directory_reference()

        # Create a directory to put the file under that
        directory_client = self.dsc.get_directory_client(
            self.file_system_name, directory_name)
        directory_client.create_directory()

        file_client = directory_client.get_file_client('filename')
        file_client.create_file()

        # Act
        response = file_client.append_data(b'abc', 0, 3)
        self.assertIsNotNone(response)

    @DataLakePreparer()
    def test_append_empty_data(self, datalake_storage_account_name,
                               datalake_storage_account_key):
        self._setUp(datalake_storage_account_name,
                    datalake_storage_account_key)
        file_client = self._create_file_and_return_client()

        # Act
        file_client.flush_data(0)
        file_props = file_client.get_file_properties()

        self.assertIsNotNone(file_props['size'], 0)

    @DataLakePreparer()
    def test_flush_data(self, datalake_storage_account_name,
                        datalake_storage_account_key):
        self._setUp(datalake_storage_account_name,
                    datalake_storage_account_key)
        directory_name = self._get_directory_reference()

        # Create a directory to put the file under that
        directory_client = self.dsc.get_directory_client(
            self.file_system_name, directory_name)
        directory_client.create_directory()

        file_client = directory_client.get_file_client('filename')
        file_client.create_file()

        # Act
        file_client.append_data(b'abc', 0, 3)
        response = file_client.flush_data(3)

        # Assert
        prop = file_client.get_file_properties()
        self.assertIsNotNone(response)
        self.assertEqual(prop['size'], 3)

    @DataLakePreparer()
    def test_flush_data_with_match_condition(self,
                                             datalake_storage_account_name,
                                             datalake_storage_account_key):
        self._setUp(datalake_storage_account_name,
                    datalake_storage_account_key)
        directory_name = self._get_directory_reference()

        # Create a directory to put the file under that
        directory_client = self.dsc.get_directory_client(
            self.file_system_name, directory_name)
        directory_client.create_directory()

        file_client = directory_client.get_file_client('filename')
        resp = file_client.create_file()

        # Act
        file_client.append_data(b'abc', 0, 3)

        # flush is successful because it isn't touched
        response = file_client.flush_data(
            3,
            etag=resp['etag'],
            match_condition=MatchConditions.IfNotModified)

        file_client.append_data(b'abc', 3, 3)
        with self.assertRaises(ResourceModifiedError):
            # flush is unsuccessful because extra data were appended.
            file_client.flush_data(
                6,
                etag=resp['etag'],
                match_condition=MatchConditions.IfNotModified)

    @pytest.mark.live_test_only
    @DataLakePreparer()
    def test_upload_data_to_none_existing_file(self,
                                               datalake_storage_account_name,
                                               datalake_storage_account_key):
        self._setUp(datalake_storage_account_name,
                    datalake_storage_account_key)
        # parallel upload cannot be recorded

        directory_name = self._get_directory_reference()

        # Create a directory to put the file under that
        directory_client = self.dsc.get_directory_client(
            self.file_system_name, directory_name)
        directory_client.create_directory()

        file_client = directory_client.get_file_client('filename')
        data = self.get_random_bytes(200 * 1024)
        file_client.upload_data(data, overwrite=True, max_concurrency=3)

        downloaded_data = file_client.download_file().readall()
        self.assertEqual(data, downloaded_data)

    @pytest.mark.live_test_only
    @DataLakePreparer()
    def test_upload_data_in_substreams(self, datalake_storage_account_name,
                                       datalake_storage_account_key):
        self._setUp(datalake_storage_account_name,
                    datalake_storage_account_key)
        # parallel upload cannot be recorded
        directory_name = self._get_directory_reference()

        # Create a directory to put the file under that
        directory_client = self.dsc.get_directory_client(
            self.file_system_name, directory_name)
        directory_client.create_directory()

        file_client = directory_client.get_file_client('filename')
        # Get 16MB data
        data = self.get_random_bytes(16 * 1024 * 1024)
        # Ensure chunk size is greater than threshold (8MB > 4MB) - for optimized upload
        file_client.upload_data(data,
                                chunk_size=8 * 1024 * 1024,
                                overwrite=True,
                                max_concurrency=3)
        downloaded_data = file_client.download_file().readall()
        self.assertEqual(data, downloaded_data)

        # Run on single thread
        file_client.upload_data(data,
                                chunk_size=8 * 1024 * 1024,
                                overwrite=True)
        downloaded_data = file_client.download_file().readall()
        self.assertEqual(data, downloaded_data)

    @DataLakePreparer()
    def test_upload_data_to_existing_file(self, datalake_storage_account_name,
                                          datalake_storage_account_key):
        self._setUp(datalake_storage_account_name,
                    datalake_storage_account_key)
        directory_name = self._get_directory_reference()

        # Create a directory to put the file under that
        directory_client = self.dsc.get_directory_client(
            self.file_system_name, directory_name)
        directory_client.create_directory()

        # create an existing file
        file_client = directory_client.get_file_client('filename')
        file_client.create_file()
        file_client.append_data(b"abc", 0)
        file_client.flush_data(3)

        # to override the existing file
        data = self.get_random_bytes(100)
        with self.assertRaises(HttpResponseError):
            file_client.upload_data(data, max_concurrency=5)
        file_client.upload_data(data, overwrite=True, max_concurrency=5)

        downloaded_data = file_client.download_file().readall()
        self.assertEqual(data, downloaded_data)

    @DataLakePreparer()
    def test_upload_data_to_existing_file_with_content_settings(
            self, datalake_storage_account_name, datalake_storage_account_key):
        self._setUp(datalake_storage_account_name,
                    datalake_storage_account_key)
        directory_name = self._get_directory_reference()

        # Create a directory to put the file under that
        directory_client = self.dsc.get_directory_client(
            self.file_system_name, directory_name)
        directory_client.create_directory()

        # create an existing file
        file_client = directory_client.get_file_client('filename')
        etag = file_client.create_file()['etag']

        # to override the existing file
        data = self.get_random_bytes(100)
        content_settings = ContentSettings(content_language='spanish',
                                           content_disposition='inline')

        file_client.upload_data(data,
                                max_concurrency=5,
                                content_settings=content_settings,
                                etag=etag,
                                match_condition=MatchConditions.IfNotModified)

        downloaded_data = file_client.download_file().readall()
        properties = file_client.get_file_properties()

        self.assertEqual(data, downloaded_data)
        self.assertEqual(properties.content_settings.content_language,
                         content_settings.content_language)

    @DataLakePreparer()
    def test_upload_data_to_existing_file_with_permission_and_umask(
            self, datalake_storage_account_name, datalake_storage_account_key):
        self._setUp(datalake_storage_account_name,
                    datalake_storage_account_key)
        directory_name = self._get_directory_reference()

        # Create a directory to put the file under that
        directory_client = self.dsc.get_directory_client(
            self.file_system_name, directory_name)
        directory_client.create_directory()

        # create an existing file
        file_client = directory_client.get_file_client('filename')
        etag = file_client.create_file()['etag']

        # to override the existing file
        data = self.get_random_bytes(100)

        file_client.upload_data(data,
                                overwrite=True,
                                max_concurrency=5,
                                permissions='0777',
                                umask="0000",
                                etag=etag,
                                match_condition=MatchConditions.IfNotModified)

        downloaded_data = file_client.download_file().readall()
        prop = file_client.get_access_control()

        # Assert
        self.assertEqual(data, downloaded_data)
        self.assertEqual(prop['permissions'], 'rwxrwxrwx')

    @DataLakePreparer()
    def test_read_file(self, datalake_storage_account_name,
                       datalake_storage_account_key):
        self._setUp(datalake_storage_account_name,
                    datalake_storage_account_key)
        file_client = self._create_file_and_return_client()
        data = self.get_random_bytes(1024)

        # upload data to file
        file_client.append_data(data, 0, len(data))
        file_client.flush_data(len(data))

        # doanload the data and make sure it is the same as uploaded data
        downloaded_data = file_client.download_file().readall()
        self.assertEqual(data, downloaded_data)

    @pytest.mark.live_test_only
    @DataLakePreparer()
    def test_read_file_with_user_delegation_key(self,
                                                datalake_storage_account_name,
                                                datalake_storage_account_key):
        self._setUp(datalake_storage_account_name,
                    datalake_storage_account_key)
        # SAS URL is calculated from storage key, so this test runs live only

        # Create file
        file_client = self._create_file_and_return_client()
        data = self.get_random_bytes(1024)
        # Upload data to file
        file_client.append_data(data, 0, len(data))
        file_client.flush_data(len(data))

        # Get user delegation key
        token_credential = self.generate_oauth_token()
        service_client = DataLakeServiceClient(
            self._get_account_url(datalake_storage_account_name),
            credential=token_credential,
            logging_enable=True)
        user_delegation_key = service_client.get_user_delegation_key(
            datetime.utcnow(),
            datetime.utcnow() + timedelta(hours=1))

        sas_token = generate_file_sas(
            file_client.account_name,
            file_client.file_system_name,
            None,
            file_client.path_name,
            user_delegation_key,
            permission=FileSasPermissions(read=True,
                                          create=True,
                                          write=True,
                                          delete=True),
            expiry=datetime.utcnow() + timedelta(hours=1),
        )

        # doanload the data and make sure it is the same as uploaded data
        new_file_client = DataLakeFileClient(
            self._get_account_url(datalake_storage_account_name),
            file_client.file_system_name,
            file_client.path_name,
            credential=sas_token,
            logging_enable=True)
        downloaded_data = new_file_client.download_file().readall()
        self.assertEqual(data, downloaded_data)

    @pytest.mark.live_test_only
    @DataLakePreparer()
    def test_set_acl_with_user_delegation_key(self,
                                              datalake_storage_account_name,
                                              datalake_storage_account_key):
        self._setUp(datalake_storage_account_name,
                    datalake_storage_account_key)
        # SAS URL is calculated from storage key, so this test runs live only

        # Create file
        file_client = self._create_file_and_return_client()
        data = self.get_random_bytes(1024)
        # Upload data to file
        file_client.append_data(data, 0, len(data))
        file_client.flush_data(len(data))

        # Get user delegation key
        token_credential = self.generate_oauth_token()
        service_client = DataLakeServiceClient(
            self._get_account_url(datalake_storage_account_name),
            credential=token_credential)
        user_delegation_key = service_client.get_user_delegation_key(
            datetime.utcnow(),
            datetime.utcnow() + timedelta(hours=1))

        sas_token = generate_file_sas(
            file_client.account_name,
            file_client.file_system_name,
            None,
            file_client.path_name,
            user_delegation_key,
            permission=FileSasPermissions(execute=True,
                                          manage_access_control=True,
                                          manage_ownership=True),
            expiry=datetime.utcnow() + timedelta(hours=1),
        )

        # doanload the data and make sure it is the same as uploaded data
        new_file_client = DataLakeFileClient(
            self._get_account_url(datalake_storage_account_name),
            file_client.file_system_name,
            file_client.path_name,
            credential=sas_token)
        acl = 'user::rwx,group::r-x,other::rwx'
        owner = "dc140949-53b7-44af-b1e9-cd994951fb86"
        new_file_client.set_access_control(acl=acl, owner=owner)
        access_control = new_file_client.get_access_control()
        self.assertEqual(acl, access_control['acl'])
        self.assertEqual(owner, access_control['owner'])

    @pytest.mark.live_test_only
    @DataLakePreparer()
    def test_preauthorize_user_with_user_delegation_key(
            self, datalake_storage_account_name, datalake_storage_account_key):
        self._setUp(datalake_storage_account_name,
                    datalake_storage_account_key)
        # SAS URL is calculated from storage key, so this test runs live only

        # Create file
        file_client = self._create_file_and_return_client()
        data = self.get_random_bytes(1024)
        # Upload data to file
        file_client.append_data(data, 0, len(data))
        file_client.flush_data(len(data))
        file_client.set_access_control(
            owner="68390a19-a643-458b-b726-408abf67b4fc", permissions='0777')
        acl = file_client.get_access_control()

        # Get user delegation key
        token_credential = self.generate_oauth_token()
        service_client = DataLakeServiceClient(
            self._get_account_url(datalake_storage_account_name),
            credential=token_credential)
        user_delegation_key = service_client.get_user_delegation_key(
            datetime.utcnow(),
            datetime.utcnow() + timedelta(hours=1))

        sas_token = generate_file_sas(
            file_client.account_name,
            file_client.file_system_name,
            None,
            file_client.path_name,
            user_delegation_key,
            permission=FileSasPermissions(read=True,
                                          write=True,
                                          manage_access_control=True,
                                          manage_ownership=True),
            expiry=datetime.utcnow() + timedelta(hours=1),
            preauthorized_agent_object_id="68390a19-a643-458b-b726-408abf67b4fc"
        )

        # doanload the data and make sure it is the same as uploaded data
        new_file_client = DataLakeFileClient(
            self._get_account_url(datalake_storage_account_name),
            file_client.file_system_name,
            file_client.path_name,
            credential=sas_token)

        acl = new_file_client.set_access_control(permissions='0777')
        self.assertIsNotNone(acl)

    @DataLakePreparer()
    def test_read_file_into_file(self, datalake_storage_account_name,
                                 datalake_storage_account_key):
        self._setUp(datalake_storage_account_name,
                    datalake_storage_account_key)
        file_client = self._create_file_and_return_client()
        data = self.get_random_bytes(1024)

        # upload data to file
        file_client.append_data(data, 0, len(data))
        file_client.flush_data(len(data))

        # doanload the data into a file and make sure it is the same as uploaded data
        with open(FILE_PATH, 'wb') as stream:
            download = file_client.download_file(max_concurrency=2)
            download.readinto(stream)

        # Assert
        with open(FILE_PATH, 'rb') as stream:
            actual = stream.read()
            self.assertEqual(data, actual)

    @DataLakePreparer()
    def test_read_file_to_text(self, datalake_storage_account_name,
                               datalake_storage_account_key):
        self._setUp(datalake_storage_account_name,
                    datalake_storage_account_key)
        file_client = self._create_file_and_return_client()
        data = self.get_random_text_data(1024)

        # upload data to file
        file_client.append_data(data, 0, len(data))
        file_client.flush_data(len(data))

        # doanload the text data and make sure it is the same as uploaded data
        downloaded_data = file_client.download_file(
            max_concurrency=2, encoding="utf-8").readall()

        # Assert
        self.assertEqual(data, downloaded_data)

    @pytest.mark.live_test_only
    @DataLakePreparer()
    def test_account_sas(self, datalake_storage_account_name,
                         datalake_storage_account_key):
        self._setUp(datalake_storage_account_name,
                    datalake_storage_account_key)
        # SAS URL is calculated from storage key, so this test runs live only

        file_name = self._get_file_reference()
        # create a file under root directory
        self._create_file_and_return_client(file=file_name)

        # generate a token with file level read permission
        token = generate_account_sas(
            self.dsc.account_name,
            self.dsc.credential.account_key,
            ResourceTypes(file_system=True, object=True),
            AccountSasPermissions(read=True),
            datetime.utcnow() + timedelta(hours=1),
        )

        for credential in [token, AzureSasCredential(token)]:
            # read the created file which is under root directory
            file_client = DataLakeFileClient(self.dsc.url,
                                             self.file_system_name,
                                             file_name,
                                             credential=credential)
            properties = file_client.get_file_properties()

            # make sure we can read the file properties
            self.assertIsNotNone(properties)

            # try to write to the created file with the token
            with self.assertRaises(HttpResponseError):
                file_client.append_data(b"abcd", 0, 4)

    @DataLakePreparer()
    def test_account_sas_raises_if_sas_already_in_uri(
            self, datalake_storage_account_name, datalake_storage_account_key):
        self._setUp(datalake_storage_account_name,
                    datalake_storage_account_key)
        with self.assertRaises(ValueError):
            DataLakeFileClient(self.dsc.url + "?sig=foo",
                               self.file_system_name,
                               "foo",
                               credential=AzureSasCredential("?foo=bar"))

    @pytest.mark.live_test_only
    @DataLakePreparer()
    def test_file_sas_only_applies_to_file_level(self,
                                                 datalake_storage_account_name,
                                                 datalake_storage_account_key):
        self._setUp(datalake_storage_account_name,
                    datalake_storage_account_key)
        # SAS URL is calculated from storage key, so this test runs live only
        file_name = self._get_file_reference()
        directory_name = self._get_directory_reference()
        self._create_file_and_return_client(directory=directory_name,
                                            file=file_name)

        # generate a token with file level read and write permissions
        token = generate_file_sas(
            self.dsc.account_name,
            self.file_system_name,
            directory_name,
            file_name,
            self.dsc.credential.account_key,
            permission=FileSasPermissions(read=True, write=True),
            expiry=datetime.utcnow() + timedelta(hours=1),
        )

        # read the created file which is under root directory
        file_client = DataLakeFileClient(self.dsc.url,
                                         self.file_system_name,
                                         directory_name + '/' + file_name,
                                         credential=token)
        properties = file_client.get_file_properties()

        # make sure we can read the file properties
        self.assertIsNotNone(properties)

        # try to write to the created file with the token
        response = file_client.append_data(b"abcd",
                                           0,
                                           4,
                                           validate_content=True)
        self.assertIsNotNone(response)

        # the token is for file level, so users are not supposed to have access to file system level operations
        file_system_client = FileSystemClient(self.dsc.url,
                                              self.file_system_name,
                                              credential=token)
        with self.assertRaises(ClientAuthenticationError):
            file_system_client.get_file_system_properties()

        # the token is for file level, so users are not supposed to have access to directory level operations
        directory_client = DataLakeDirectoryClient(self.dsc.url,
                                                   self.file_system_name,
                                                   directory_name,
                                                   credential=token)
        with self.assertRaises(ClientAuthenticationError):
            directory_client.get_directory_properties()

    @DataLakePreparer()
    def test_delete_file(self, datalake_storage_account_name,
                         datalake_storage_account_key):
        self._setUp(datalake_storage_account_name,
                    datalake_storage_account_key)
        # Arrange
        file_client = self._create_file_and_return_client()

        file_client.delete_file()

        with self.assertRaises(ResourceNotFoundError):
            file_client.get_file_properties()

    @DataLakePreparer()
    def test_delete_file_with_if_unmodified_since(
            self, datalake_storage_account_name, datalake_storage_account_key):
        self._setUp(datalake_storage_account_name,
                    datalake_storage_account_key)
        # Arrange
        file_client = self._create_file_and_return_client()

        prop = file_client.get_file_properties()
        file_client.delete_file(if_unmodified_since=prop['last_modified'])

        # Make sure the file was deleted
        with self.assertRaises(ResourceNotFoundError):
            file_client.get_file_properties()

    @DataLakePreparer()
    def test_set_access_control(self, datalake_storage_account_name,
                                datalake_storage_account_key):
        self._setUp(datalake_storage_account_name,
                    datalake_storage_account_key)
        file_client = self._create_file_and_return_client()

        response = file_client.set_access_control(permissions='0777')

        # Assert
        self.assertIsNotNone(response)

    @DataLakePreparer()
    def test_set_access_control_with_match_conditions(
            self, datalake_storage_account_name, datalake_storage_account_key):
        self._setUp(datalake_storage_account_name,
                    datalake_storage_account_key)
        file_client = self._create_file_and_return_client()

        with self.assertRaises(ResourceModifiedError):
            file_client.set_access_control(
                permissions='0777', match_condition=MatchConditions.IfMissing)

    @DataLakePreparer()
    def test_get_access_control(self, datalake_storage_account_name,
                                datalake_storage_account_key):
        self._setUp(datalake_storage_account_name,
                    datalake_storage_account_key)
        file_client = self._create_file_and_return_client()
        file_client.set_access_control(permissions='0777')

        # Act
        response = file_client.get_access_control()

        # Assert
        self.assertIsNotNone(response)

    @DataLakePreparer()
    def test_get_access_control_with_if_modified_since(
            self, datalake_storage_account_name, datalake_storage_account_key):
        self._setUp(datalake_storage_account_name,
                    datalake_storage_account_key)
        file_client = self._create_file_and_return_client()
        file_client.set_access_control(permissions='0777')

        prop = file_client.get_file_properties()

        # Act
        response = file_client.get_access_control(
            if_modified_since=prop['last_modified'] - timedelta(minutes=15))

        # Assert
        self.assertIsNotNone(response)

    @DataLakePreparer()
    def test_set_access_control_recursive(self, datalake_storage_account_name,
                                          datalake_storage_account_key):
        self._setUp(datalake_storage_account_name,
                    datalake_storage_account_key)
        acl = 'user::rwx,group::r-x,other::rwx'
        file_client = self._create_file_and_return_client()

        summary = file_client.set_access_control_recursive(acl=acl)

        # Assert
        self.assertEqual(summary.counters.directories_successful, 0)
        self.assertEqual(summary.counters.files_successful, 1)
        self.assertEqual(summary.counters.failure_count, 0)
        access_control = file_client.get_access_control()
        self.assertIsNotNone(access_control)
        self.assertEqual(acl, access_control['acl'])

    @DataLakePreparer()
    def test_update_access_control_recursive(self,
                                             datalake_storage_account_name,
                                             datalake_storage_account_key):
        self._setUp(datalake_storage_account_name,
                    datalake_storage_account_key)
        acl = 'user::rwx,group::r-x,other::rwx'
        file_client = self._create_file_and_return_client()

        summary = file_client.update_access_control_recursive(acl=acl)

        # Assert
        self.assertEqual(summary.counters.directories_successful, 0)
        self.assertEqual(summary.counters.files_successful, 1)
        self.assertEqual(summary.counters.failure_count, 0)
        access_control = file_client.get_access_control()
        self.assertIsNotNone(access_control)
        self.assertEqual(acl, access_control['acl'])

    @DataLakePreparer()
    def test_remove_access_control_recursive(self,
                                             datalake_storage_account_name,
                                             datalake_storage_account_key):
        self._setUp(datalake_storage_account_name,
                    datalake_storage_account_key)
        acl = "mask," + "default:user,default:group," + \
             "user:ec3595d6-2c17-4696-8caa-7e139758d24a,group:ec3595d6-2c17-4696-8caa-7e139758d24a," + \
             "default:user:ec3595d6-2c17-4696-8caa-7e139758d24a,default:group:ec3595d6-2c17-4696-8caa-7e139758d24a"
        file_client = self._create_file_and_return_client()
        summary = file_client.remove_access_control_recursive(acl=acl)

        # Assert
        self.assertEqual(summary.counters.directories_successful, 0)
        self.assertEqual(summary.counters.files_successful, 1)
        self.assertEqual(summary.counters.failure_count, 0)

    @DataLakePreparer()
    def test_get_properties(self, datalake_storage_account_name,
                            datalake_storage_account_key):
        self._setUp(datalake_storage_account_name,
                    datalake_storage_account_key)
        # Arrange
        directory_client = self._create_directory_and_return_client()

        metadata = {'hello': 'world', 'number': '42'}
        content_settings = ContentSettings(content_language='spanish',
                                           content_disposition='inline')
        file_client = directory_client.create_file(
            "newfile", metadata=metadata, content_settings=content_settings)
        file_client.append_data(b"abc", 0, 3)
        file_client.flush_data(3)
        properties = file_client.get_file_properties()

        # Assert
        self.assertTrue(properties)
        self.assertEqual(properties.size, 3)
        self.assertEqual(properties.metadata['hello'], metadata['hello'])
        self.assertEqual(properties.content_settings.content_language,
                         content_settings.content_language)

    @DataLakePreparer()
    def test_set_expiry(self, datalake_storage_account_name,
                        datalake_storage_account_key):
        self._setUp(datalake_storage_account_name,
                    datalake_storage_account_key)
        # Arrange
        directory_client = self._create_directory_and_return_client()

        metadata = {'hello': 'world', 'number': '42'}
        content_settings = ContentSettings(content_language='spanish',
                                           content_disposition='inline')
        expires_on = datetime.utcnow() + timedelta(hours=1)
        file_client = directory_client.create_file(
            "newfile", metadata=metadata, content_settings=content_settings)
        file_client.set_file_expiry("Absolute", expires_on=expires_on)
        properties = file_client.get_file_properties()

        # Assert
        self.assertTrue(properties)
        self.assertIsNotNone(properties.expiry_time)

    @DataLakePreparer()
    def test_rename_file_with_non_used_name(self,
                                            datalake_storage_account_name,
                                            datalake_storage_account_key):
        self._setUp(datalake_storage_account_name,
                    datalake_storage_account_key)
        file_client = self._create_file_and_return_client()
        data_bytes = b"abc"
        file_client.append_data(data_bytes, 0, 3)
        file_client.flush_data(3)
        new_client = file_client.rename_file(file_client.file_system_name +
                                             '/' + 'newname')

        data = new_client.download_file().readall()
        self.assertEqual(data, data_bytes)
        self.assertEqual(new_client.path_name, "newname")

    @pytest.mark.live_test_only
    @DataLakePreparer()
    def test_rename_file_with_file_system_sas(self,
                                              datalake_storage_account_name,
                                              datalake_storage_account_key):
        self._setUp(datalake_storage_account_name,
                    datalake_storage_account_key)
        # sas token is calculated from storage key, so live only
        token = generate_file_system_sas(
            self.dsc.account_name,
            self.file_system_name,
            self.dsc.credential.account_key,
            FileSystemSasPermissions(write=True, read=True, delete=True),
            datetime.utcnow() + timedelta(hours=1),
        )

        # read the created file which is under root directory
        file_client = DataLakeFileClient(self.dsc.url,
                                         self.file_system_name,
                                         "oldfile",
                                         credential=token)
        file_client.create_file()
        data_bytes = b"abc"
        file_client.append_data(data_bytes, 0, 3)
        file_client.flush_data(3)
        new_client = file_client.rename_file(file_client.file_system_name +
                                             '/' + 'newname')

        data = new_client.download_file().readall()
        self.assertEqual(data, data_bytes)
        self.assertEqual(new_client.path_name, "newname")

    @pytest.mark.live_test_only
    @DataLakePreparer()
    def test_rename_file_with_file_sas(self, datalake_storage_account_name,
                                       datalake_storage_account_key):
        self._setUp(datalake_storage_account_name,
                    datalake_storage_account_key)
        # SAS URL is calculated from storage key, so this test runs live only
        token = generate_file_sas(
            self.dsc.account_name,
            self.file_system_name,
            None,
            "oldfile",
            datalake_storage_account_key,
            permission=FileSasPermissions(read=True,
                                          create=True,
                                          write=True,
                                          delete=True),
            expiry=datetime.utcnow() + timedelta(hours=1),
        )

        new_token = generate_file_sas(
            self.dsc.account_name,
            self.file_system_name,
            None,
            "newname",
            datalake_storage_account_key,
            permission=FileSasPermissions(read=True,
                                          create=True,
                                          write=True,
                                          delete=True),
            expiry=datetime.utcnow() + timedelta(hours=1),
        )

        # read the created file which is under root directory
        file_client = DataLakeFileClient(self.dsc.url,
                                         self.file_system_name,
                                         "oldfile",
                                         credential=token)
        file_client.create_file()
        data_bytes = b"abc"
        file_client.append_data(data_bytes, 0, 3)
        file_client.flush_data(3)
        new_client = file_client.rename_file(file_client.file_system_name +
                                             '/' + 'newname' + '?' + new_token)

        data = new_client.download_file().readall()
        self.assertEqual(data, data_bytes)
        self.assertEqual(new_client.path_name, "newname")

    @DataLakePreparer()
    def test_rename_file_with_account_sas(self, datalake_storage_account_name,
                                          datalake_storage_account_key):
        self._setUp(datalake_storage_account_name,
                    datalake_storage_account_key)
        pytest.skip("service bug")
        token = generate_account_sas(
            self.dsc.account_name,
            self.dsc.credential.account_key,
            ResourceTypes(object=True),
            AccountSasPermissions(write=True,
                                  read=True,
                                  create=True,
                                  delete=True),
            datetime.utcnow() + timedelta(hours=5),
        )

        # read the created file which is under root directory
        file_client = DataLakeFileClient(self.dsc.url,
                                         self.file_system_name,
                                         "oldfile",
                                         credential=token)
        file_client.create_file()
        data_bytes = b"abc"
        file_client.append_data(data_bytes, 0, 3)
        file_client.flush_data(3)
        new_client = file_client.rename_file(file_client.file_system_name +
                                             '/' + 'newname')

        data = new_client.download_file().readall()
        self.assertEqual(data, data_bytes)
        self.assertEqual(new_client.path_name, "newname")

    @DataLakePreparer()
    def test_rename_file_to_existing_file(self, datalake_storage_account_name,
                                          datalake_storage_account_key):
        self._setUp(datalake_storage_account_name,
                    datalake_storage_account_key)
        # create the existing file
        existing_file_client = self._create_file_and_return_client(
            file="existingfile")
        existing_file_client.append_data(b"a", 0, 1)
        existing_file_client.flush_data(1)
        old_url = existing_file_client.url

        # prepare to rename the file to the existing file
        file_client = self._create_file_and_return_client()
        data_bytes = b"abc"
        file_client.append_data(data_bytes, 0, 3)
        file_client.flush_data(3)
        new_client = file_client.rename_file(file_client.file_system_name +
                                             '/' +
                                             existing_file_client.path_name)
        new_url = file_client.url

        data = new_client.download_file().readall()
        # the existing file was overridden
        self.assertEqual(data, data_bytes)

    @DataLakePreparer()
    def test_rename_file_will_not_change_existing_directory(
            self, datalake_storage_account_name, datalake_storage_account_key):
        self._setUp(datalake_storage_account_name,
                    datalake_storage_account_key)
        # create none empty directory(with 2 files)
        dir1 = self._create_directory_and_return_client(directory="dir1")
        f1 = dir1.create_file("file1")
        f1.append_data(b"file1", 0, 5)
        f1.flush_data(5)
        f2 = dir1.create_file("file2")
        f2.append_data(b"file2", 0, 5)
        f2.flush_data(5)

        # create another none empty directory(with 2 files)
        dir2 = self._create_directory_and_return_client(directory="dir2")
        f3 = dir2.create_file("file3")
        f3.append_data(b"file3", 0, 5)
        f3.flush_data(5)
        f4 = dir2.create_file("file4")
        f4.append_data(b"file4", 0, 5)
        f4.flush_data(5)

        new_client = f3.rename_file(f1.file_system_name + '/' + f1.path_name)

        self.assertEqual(new_client.download_file().readall(), b"file3")

        # make sure the data in file2 and file4 weren't touched
        f2_data = f2.download_file().readall()
        self.assertEqual(f2_data, b"file2")

        f4_data = f4.download_file().readall()
        self.assertEqual(f4_data, b"file4")

        with self.assertRaises(HttpResponseError):
            f3.download_file().readall()
Example #3
0
class DirectoryTest(StorageTestCase):
    def setUp(self):
        super(DirectoryTest, self).setUp()
        url = self._get_account_url()
        self.dsc = DataLakeServiceClient(
            url, credential=self.settings.STORAGE_DATA_LAKE_ACCOUNT_KEY)
        self.config = self.dsc._config

        self.file_system_name = self.get_resource_name('filesystem')

        if not self.is_playback():
            file_system = self.dsc.get_file_system_client(
                self.file_system_name)
            try:
                file_system.create_file_system(timeout=5)
            except ResourceExistsError:
                pass

    def tearDown(self):
        if not self.is_playback():
            try:
                self.dsc.delete_file_system(self.file_system_name)
                for file_system in self.dsc.list_file_systems():
                    self.dsc.delete_file_system(file_system.name)
            except:
                pass

        return super(DirectoryTest, self).tearDown()

    # --Helpers-----------------------------------------------------------------
    def _get_directory_reference(self, prefix=TEST_DIRECTORY_PREFIX):
        directory_name = self.get_resource_name(prefix)
        return directory_name

    def _create_directory_and_get_directory_client(self, directory_name=None):
        directory_name = directory_name if directory_name else self._get_directory_reference(
        )
        directory_client = self.dsc.get_directory_client(
            self.file_system_name, directory_name)
        directory_client.create_directory()
        return directory_client

    def _create_file_system(self):
        return self.dsc.create_file_system(self._get_file_system_reference())

    # --Helpers-----------------------------------------------------------------

    @record
    def test_create_directory(self):
        # Arrange
        directory_name = self._get_directory_reference()
        content_settings = ContentSettings(content_language='spanish',
                                           content_disposition='inline')
        # Act
        directory_client = self.dsc.get_directory_client(
            self.file_system_name, directory_name)
        created = directory_client.create_directory(
            content_settings=content_settings)

        # Assert
        self.assertTrue(created)

    @record
    def test_using_oauth_token_credential_to_create_directory(self):
        # generate a token with directory level create permission
        directory_name = self._get_directory_reference()
        token_credential = self.generate_oauth_token()
        directory_client = DataLakeDirectoryClient(self.dsc.url,
                                                   self.file_system_name,
                                                   directory_name,
                                                   credential=token_credential)
        response = directory_client.create_directory()
        self.assertIsNotNone(response)

    @record
    def test_create_directory_with_match_conditions(self):
        # Arrange
        directory_name = self._get_directory_reference()

        # Act
        directory_client = self.dsc.get_directory_client(
            self.file_system_name, directory_name)
        created = directory_client.create_directory(
            match_condition=MatchConditions.IfMissing)

        # Assert
        self.assertTrue(created)

    @record
    def test_create_directory_with_permission(self):
        # Arrange
        directory_name = self._get_directory_reference()

        # Act
        directory_client = self.dsc.get_directory_client(
            self.file_system_name, directory_name)
        created = directory_client.create_directory(permissions="rwxr--r--",
                                                    umask="0000")

        prop = directory_client.get_access_control()

        # Assert
        self.assertTrue(created)
        self.assertEqual(prop['permissions'], 'rwxr--r--')

    @record
    def test_create_directory_with_content_settings(self):
        # Arrange
        directory_name = self._get_directory_reference()
        content_settings = ContentSettings(content_language='spanish',
                                           content_disposition='inline')
        # Act
        directory_client = self.dsc.get_directory_client(
            self.file_system_name, directory_name)
        created = directory_client.create_directory(
            content_settings=content_settings)

        # Assert
        self.assertTrue(created)

    @record
    def test_create_directory_with_metadata(self):
        # Arrange
        directory_name = self._get_directory_reference()
        metadata = {'hello': 'world', 'number': '42'}
        # Act
        directory_client = self.dsc.get_directory_client(
            self.file_system_name, directory_name)
        created = directory_client.create_directory(metadata=metadata)

        properties = directory_client.get_directory_properties()

        # Assert
        self.assertTrue(created)

    @record
    def test_delete_directory(self):
        # Arrange
        directory_name = self._get_directory_reference()
        metadata = {'hello': 'world', 'number': '42'}
        directory_client = self.dsc.get_directory_client(
            self.file_system_name, directory_name)
        directory_client.create_directory(metadata=metadata)

        response = directory_client.delete_directory()
        # Assert
        self.assertIsNone(response)

    @record
    def test_delete_directory_with_if_modified_since(self):
        # Arrange
        directory_name = self._get_directory_reference()

        directory_client = self.dsc.get_directory_client(
            self.file_system_name, directory_name)
        directory_client.create_directory()
        prop = directory_client.get_directory_properties()

        with self.assertRaises(ResourceModifiedError):
            directory_client.delete_directory(
                if_modified_since=prop['last_modified'])

    @record
    def test_create_sub_directory_and_delete_sub_directory(self):
        # Arrange
        directory_name = self._get_directory_reference()
        metadata = {'hello': 'world', 'number': '42'}

        # Create a directory first, to prepare for creating sub directory
        directory_client = self.dsc.get_directory_client(
            self.file_system_name, directory_name)
        directory_client.create_directory(metadata=metadata)

        # Create sub directory from the current directory
        sub_directory_name = 'subdir'
        sub_directory_created = directory_client.create_sub_directory(
            sub_directory_name)

        # to make sure the sub directory was indeed created by get sub_directory properties from sub directory client
        sub_directory_client = self.dsc.get_directory_client(
            self.file_system_name, directory_name + '/' + sub_directory_name)
        sub_properties = sub_directory_client.get_directory_properties()

        # Assert
        self.assertTrue(sub_directory_created)
        self.assertTrue(sub_properties)

        # Act
        directory_client.delete_sub_directory(sub_directory_name)
        with self.assertRaises(ResourceNotFoundError):
            sub_directory_client.get_directory_properties()

    @record
    def test_set_access_control(self):
        directory_name = self._get_directory_reference()
        metadata = {'hello': 'world', 'number': '42'}
        directory_client = self.dsc.get_directory_client(
            self.file_system_name, directory_name)
        directory_client.create_directory(metadata=metadata)

        response = directory_client.set_access_control(permissions='0777')
        # Assert
        self.assertIsNotNone(response)

    @record
    def test_set_access_control_with_acl(self):
        directory_name = self._get_directory_reference()
        metadata = {'hello': 'world', 'number': '42'}
        directory_client = self.dsc.get_directory_client(
            self.file_system_name, directory_name)
        directory_client.create_directory(metadata=metadata)

        acl = 'user::rwx,group::r-x,other::rwx'
        directory_client.set_access_control(acl=acl)
        access_control = directory_client.get_access_control()

        # Assert

        self.assertIsNotNone(access_control)
        self.assertEqual(acl, access_control['acl'])

    @record
    def test_set_access_control_if_none_modified(self):
        directory_name = self._get_directory_reference()
        directory_client = self.dsc.get_directory_client(
            self.file_system_name, directory_name)
        resp = directory_client.create_directory()

        response = directory_client.set_access_control(
            permissions='0777',
            etag=resp['etag'],
            match_condition=MatchConditions.IfNotModified)
        # Assert
        self.assertIsNotNone(response)

    @record
    def test_get_access_control(self):
        directory_name = self._get_directory_reference()
        metadata = {'hello': 'world', 'number': '42'}
        directory_client = self.dsc.get_directory_client(
            self.file_system_name, directory_name)
        directory_client.create_directory(metadata=metadata,
                                          permissions='0777')

        # Act
        response = directory_client.get_access_control()
        # Assert
        self.assertIsNotNone(response)

    @record
    def test_get_access_control_with_match_conditions(self):
        directory_name = self._get_directory_reference()
        directory_client = self.dsc.get_directory_client(
            self.file_system_name, directory_name)
        resp = directory_client.create_directory(permissions='0777',
                                                 umask='0000')

        # Act
        response = directory_client.get_access_control(
            etag=resp['etag'], match_condition=MatchConditions.IfNotModified)
        # Assert
        self.assertIsNotNone(response)
        self.assertEquals(response['permissions'], 'rwxrwxrwx')

    @record
    def test_rename_from(self):
        metadata = {'hello': 'world', 'number': '42'}
        directory_name = self._get_directory_reference()
        directory_client = self.dsc.get_directory_client(
            self.file_system_name, directory_name)
        directory_client.create_directory()

        new_name = "newname"

        new_directory_client = self.dsc.get_directory_client(
            self.file_system_name, new_name)

        new_directory_client._rename_path('/' + self.file_system_name + '/' +
                                          directory_name,
                                          metadata=metadata)
        properties = new_directory_client.get_directory_properties()

        self.assertIsNotNone(properties)

    @record
    def test_rename_from_a_shorter_directory_to_longer_directory(self):
        # TODO: investigate why rename shorter path to a longer one does not work
        pytest.skip("")
        directory_name = self._get_directory_reference()
        self._create_directory_and_get_directory_client(directory_name="old")

        new_name = "newname"
        new_directory_client = self._create_directory_and_get_directory_client(
            directory_name=new_name)
        new_directory_client = new_directory_client.create_sub_directory(
            "newsub")

        new_directory_client._rename_path('/' + self.file_system_name + '/' +
                                          directory_name)
        properties = new_directory_client.get_directory_properties()

        self.assertIsNotNone(properties)

    @record
    def test_rename_from_a_directory_in_another_file_system(self):
        # create a file dir1 under file system1
        old_file_system_name = "oldfilesystem"
        old_dir_name = "olddir"
        old_client = self.dsc.get_file_system_client(old_file_system_name)
        old_client.create_file_system()
        old_client.create_directory(old_dir_name)

        # create a dir2 under file system2
        new_name = "newname"
        new_directory_client = self._create_directory_and_get_directory_client(
            directory_name=new_name)
        new_directory_client = new_directory_client.create_sub_directory(
            "newsub")

        # rename dir1 under file system1 to dir2 under file system2
        new_directory_client._rename_path('/' + old_file_system_name + '/' +
                                          old_dir_name)
        properties = new_directory_client.get_directory_properties()

        self.assertIsNotNone(properties)
        self.dsc.delete_file_system(old_file_system_name)

    @record
    def test_rename_to_an_existing_directory_in_another_file_system(self):
        # create a file dir1 under file system1
        destination_file_system_name = "destfilesystem"
        destination_dir_name = "destdir"
        fs_client = self.dsc.get_file_system_client(
            destination_file_system_name)
        fs_client.create_file_system()
        destination_directory_client = fs_client.create_directory(
            destination_dir_name)

        # create a dir2 under file system2
        source_name = "source"
        source_directory_client = self._create_directory_and_get_directory_client(
            directory_name=source_name)
        source_directory_client = source_directory_client.create_sub_directory(
            "subdir")

        # rename dir2 under file system2 to dir1 under file system1
        res = source_directory_client.rename_directory(
            '/' + destination_file_system_name + '/' + destination_dir_name)

        # the source directory has been renamed to destination directory, so it cannot be found
        with self.assertRaises(HttpResponseError):
            source_directory_client.get_directory_properties()

        self.assertEquals(res.url, destination_directory_client.url)

    @record
    def test_rename_with_none_existing_destination_condition_and_source_unmodified_condition(
            self):
        non_existing_dir_name = "nonexistingdir"

        # create a file system1
        destination_file_system_name = self._get_directory_reference(
            "destfilesystem")
        fs_client = self.dsc.get_file_system_client(
            destination_file_system_name)
        fs_client.create_file_system()

        # create a dir2 under file system2
        source_name = "source"
        source_directory_client = self._create_directory_and_get_directory_client(
            directory_name=source_name)
        source_directory_client = source_directory_client.create_sub_directory(
            "subdir")

        # rename dir2 under file system2 to a non existing directory under file system1,
        # when dir1 does not exist and dir2 wasn't modified
        etag = source_directory_client.get_directory_properties()['etag']
        res = source_directory_client.rename_directory(
            '/' + destination_file_system_name + '/' + non_existing_dir_name,
            match_condition=MatchConditions.IfMissing,
            source_etag=etag,
            source_match_condition=MatchConditions.IfNotModified)

        # the source directory has been renamed to destination directory, so it cannot be found
        with self.assertRaises(HttpResponseError):
            source_directory_client.get_directory_properties()

        self.assertEquals(non_existing_dir_name, res.path_name)

    @record
    def test_rename_to_an_non_existing_directory_in_another_file_system(self):
        # create a file dir1 under file system1
        destination_file_system_name = self._get_directory_reference(
            "destfilesystem")
        non_existing_dir_name = "nonexistingdir"
        fs_client = self.dsc.get_file_system_client(
            destination_file_system_name)
        fs_client.create_file_system()

        # create a dir2 under file system2
        source_name = "source"
        source_directory_client = self._create_directory_and_get_directory_client(
            directory_name=source_name)
        source_directory_client = source_directory_client.create_sub_directory(
            "subdir")

        # rename dir2 under file system2 to dir1 under file system1
        res = source_directory_client.rename_directory(
            '/' + destination_file_system_name + '/' + non_existing_dir_name)

        # the source directory has been renamed to destination directory, so it cannot be found
        with self.assertRaises(HttpResponseError):
            source_directory_client.get_directory_properties()

        self.assertEquals(non_existing_dir_name, res.path_name)

    @record
    def test_rename_directory_to_non_empty_directory(self):
        # TODO: investigate why rename non empty dir doesn't work
        pytest.skip("")
        dir1 = self._create_directory_and_get_directory_client("dir1")
        dir1.create_sub_directory("subdir")

        dir2 = self._create_directory_and_get_directory_client("dir2")
        dir2.rename_directory(dir1.file_system_name + '/' + dir1.path_name)

        with self.assertRaises(HttpResponseError):
            dir2.get_directory_properties()

    @record
    def test_get_properties(self):
        # Arrange
        directory_name = self._get_directory_reference()
        metadata = {'hello': 'world', 'number': '42'}
        directory_client = self.dsc.get_directory_client(
            self.file_system_name, directory_name)
        directory_client.create_directory(metadata=metadata)

        properties = directory_client.get_directory_properties()
        # Assert
        self.assertTrue(properties)
        self.assertIsNotNone(properties.metadata)
        self.assertEqual(properties.metadata['hello'], metadata['hello'])

    @record
    def test_using_directory_sas_to_read(self):
        # SAS URL is calculated from storage key, so this test runs live only
        if TestMode.need_recording_file(self.test_mode):
            return

        client = self._create_directory_and_get_directory_client()
        directory_name = client.path_name

        # generate a token with directory level read permission
        token = generate_directory_sas(
            self.dsc.account_name,
            self.file_system_name,
            directory_name,
            account_key=self.dsc.credential.account_key,
            permission=DirectorySasPermissions(read=True),
            expiry=datetime.utcnow() + timedelta(hours=1),
        )

        directory_client = DataLakeDirectoryClient(self.dsc.url,
                                                   self.file_system_name,
                                                   directory_name,
                                                   credential=token)
        access_control = directory_client.get_access_control()

        self.assertIsNotNone(access_control)

    @record
    def test_using_directory_sas_to_create(self):
        # SAS URL is calculated from storage key, so this test runs live only
        if TestMode.need_recording_file(self.test_mode):
            return

        # generate a token with directory level create permission
        directory_name = self._get_directory_reference()
        token = generate_directory_sas(
            self.dsc.account_name,
            self.file_system_name,
            directory_name,
            account_key=self.dsc.credential.account_key,
            permission=DirectorySasPermissions(create=True),
            expiry=datetime.utcnow() + timedelta(hours=1),
        )
        directory_client = DataLakeDirectoryClient(self.dsc.url,
                                                   self.file_system_name,
                                                   directory_name,
                                                   credential=token)
        response = directory_client.create_directory()
        self.assertIsNotNone(response)
class FileSystemTest(StorageTestCase):
    def _setUp(self, account_name, account_key):
        url = self._get_account_url(account_name)
        self.dsc = DataLakeServiceClient(url, account_key)
        self.config = self.dsc._config
        self.test_file_systems = []

    def tearDown(self):
        if not self.is_playback():
            try:
                for file_system in self.test_file_systems:
                    self.dsc.delete_file_system(file_system)
            except:
                pass

        return super(FileSystemTest, self).tearDown()

    # --Helpers-----------------------------------------------------------------
    def _get_file_system_reference(self, prefix=TEST_FILE_SYSTEM_PREFIX):
        file_system_name = self.get_resource_name(prefix)
        self.test_file_systems.append(file_system_name)
        return file_system_name

    def _create_file_system(self, file_system_prefix=TEST_FILE_SYSTEM_PREFIX):
        return self.dsc.create_file_system(self._get_file_system_reference(prefix=file_system_prefix))


    # --Helpers-----------------------------------------------------------------

    @DataLakePreparer()
    def test_create_file_system(self, datalake_storage_account_name, datalake_storage_account_key):
        self._setUp(datalake_storage_account_name, datalake_storage_account_key)
        # Arrange
        file_system_name = self._get_file_system_reference()

        # Act
        file_system_client = self.dsc.get_file_system_client(file_system_name)
        created = file_system_client.create_file_system()

        # Assert
        self.assertTrue(created)

    @DataLakePreparer()
    def test_file_system_exists(self, datalake_storage_account_name, datalake_storage_account_key):
        self._setUp(datalake_storage_account_name, datalake_storage_account_key)
        # Arrange
        file_system_name = self._get_file_system_reference()

        # Act
        file_system_client1 = self.dsc.get_file_system_client(file_system_name)
        file_system_client2 = self.dsc.get_file_system_client("nonexistentfs")
        file_system_client1.create_file_system()

        self.assertTrue(file_system_client1.exists())
        self.assertFalse(file_system_client2.exists())

    @DataLakePreparer()
    def test_create_file_system_with_metadata(self, datalake_storage_account_name, datalake_storage_account_key):
        self._setUp(datalake_storage_account_name, datalake_storage_account_key)
        # Arrange
        metadata = {'hello': 'world', 'number': '42'}
        file_system_name = self._get_file_system_reference()

        # Act
        file_system_client = self.dsc.get_file_system_client(file_system_name)
        created = file_system_client.create_file_system(metadata=metadata)

        # Assert
        meta = file_system_client.get_file_system_properties().metadata
        self.assertTrue(created)
        self.assertDictEqual(meta, metadata)

    @DataLakePreparer()
    def test_set_file_system_acl(self, datalake_storage_account_name, datalake_storage_account_key):
        self._setUp(datalake_storage_account_name, datalake_storage_account_key)
        # Act
        file_system = self._create_file_system()
        access_policy = AccessPolicy(permission=FileSystemSasPermissions(read=True),
                                     expiry=datetime.utcnow() + timedelta(hours=1),
                                     start=datetime.utcnow())
        signed_identifier1 = {'testid': access_policy}
        response = file_system.set_file_system_access_policy(signed_identifier1, public_access=PublicAccess.FileSystem)

        self.assertIsNotNone(response.get('etag'))
        self.assertIsNotNone(response.get('last_modified'))
        acl1 = file_system.get_file_system_access_policy()
        self.assertIsNotNone(acl1['public_access'])
        self.assertEqual(len(acl1['signed_identifiers']), 1)

        # If set signed identifier without specifying the access policy then it will be default to None
        signed_identifier2 = {'testid': access_policy, 'test2': access_policy}
        file_system.set_file_system_access_policy(signed_identifier2)
        acl2 = file_system.get_file_system_access_policy()
        self.assertIsNone(acl2['public_access'])
        self.assertEqual(len(acl2['signed_identifiers']), 2)

    @DataLakePreparer()
    def test_list_file_systemss(self, datalake_storage_account_name, datalake_storage_account_key):
        self._setUp(datalake_storage_account_name, datalake_storage_account_key)
        # Arrange
        file_system_name = self._get_file_system_reference()
        file_system = self.dsc.create_file_system(file_system_name)

        # Act
        file_systems = list(self.dsc.list_file_systems())

        # Assert
        self.assertIsNotNone(file_systems)
        self.assertGreaterEqual(len(file_systems), 1)
        self.assertIsNotNone(file_systems[0])
        self.assertNamedItemInContainer(file_systems, file_system.file_system_name)
        self.assertIsNotNone(file_systems[0].has_immutability_policy)
        self.assertIsNotNone(file_systems[0].has_legal_hold)

    @DataLakePreparer()
    def test_rename_file_system(self, datalake_storage_account_name, datalake_storage_account_key):
        if not self.is_playback():
            return
        self._setUp(datalake_storage_account_name, datalake_storage_account_key)
        old_name1 = self._get_file_system_reference(prefix="oldcontainer1")
        old_name2 = self._get_file_system_reference(prefix="oldcontainer2")
        new_name = self._get_file_system_reference(prefix="newcontainer")
        filesystem1 = self.dsc.create_file_system(old_name1)
        self.dsc.create_file_system(old_name2)

        new_filesystem = self.dsc._rename_file_system(name=old_name1, new_name=new_name)
        with self.assertRaises(HttpResponseError):
            self.dsc._rename_file_system(name=old_name2, new_name=new_name)
        with self.assertRaises(HttpResponseError):
            filesystem1.get_file_system_properties()
        with self.assertRaises(HttpResponseError):
            self.dsc._rename_file_system(name="badfilesystem", new_name="filesystem")
        self.assertEqual(new_name, new_filesystem.get_file_system_properties().name)

    @DataLakePreparer()
    def test_rename_file_system_with_file_system_client(self, datalake_storage_account_name, datalake_storage_account_key):
        pytest.skip("Feature not yet enabled. Make sure to record this test once enabled.")
        self._setUp(datalake_storage_account_name, datalake_storage_account_key)
        old_name1 = self._get_file_system_reference(prefix="oldcontainer1")
        old_name2 = self._get_file_system_reference(prefix="oldcontainer2")
        new_name = self._get_file_system_reference(prefix="newcontainer")
        bad_name = self._get_file_system_reference(prefix="badcontainer")
        filesystem1 = self.dsc.create_file_system(old_name1)
        file_system2 = self.dsc.create_file_system(old_name2)
        bad_file_system = self.dsc.get_file_system_client(bad_name)

        new_filesystem = filesystem1._rename_file_system(new_name=new_name)
        with self.assertRaises(HttpResponseError):
            file_system2._rename_file_system(new_name=new_name)
        with self.assertRaises(HttpResponseError):
            filesystem1.get_file_system_properties()
        with self.assertRaises(HttpResponseError):
            bad_file_system._rename_file_system(new_name="filesystem")
        self.assertEqual(new_name, new_filesystem.get_file_system_properties().name)

    @DataLakePreparer()
    def test_rename_file_system_with_source_lease(self, datalake_storage_account_name, datalake_storage_account_key):
        if not self.is_playback():
            return
        self._setUp(datalake_storage_account_name, datalake_storage_account_key)
        old_name = self._get_file_system_reference(prefix="old")
        new_name = self._get_file_system_reference(prefix="new")
        filesystem = self.dsc.create_file_system(old_name)
        filesystem_lease_id = filesystem.acquire_lease()
        with self.assertRaises(HttpResponseError):
            self.dsc._rename_file_system(name=old_name, new_name=new_name)
        with self.assertRaises(HttpResponseError):
            self.dsc._rename_file_system(name=old_name, new_name=new_name, lease="bad_id")
        new_filesystem = self.dsc._rename_file_system(name=old_name, new_name=new_name, lease=filesystem_lease_id)
        self.assertEqual(new_name, new_filesystem.get_file_system_properties().name)

    @DataLakePreparer()
    def test_undelete_file_system(self, datalake_storage_account_name, datalake_storage_account_key):
        # Needs soft delete enabled account.
        if not self.is_playback():
            return
        self._setUp(datalake_storage_account_name, datalake_storage_account_key)
        name = self._get_file_system_reference()
        filesystem_client = self.dsc.create_file_system(name)

        # Act
        filesystem_client.delete_file_system()
        # to make sure the filesystem deleted
        with self.assertRaises(ResourceNotFoundError):
            filesystem_client.get_file_system_properties()

        filesystem_list = list(self.dsc.list_file_systems(include_deleted=True))
        self.assertTrue(len(filesystem_list) >= 1)

        restored_version = 0
        for filesystem in filesystem_list:
            # find the deleted filesystem and restore it
            if filesystem.deleted and filesystem.name == filesystem_client.file_system_name:
                restored_fs_client = self.dsc.undelete_file_system(filesystem.name, filesystem.deleted_version,
                                                                   new_name="restored" + name + str(restored_version))
                restored_version += 1

                # to make sure the deleted filesystem is restored
                props = restored_fs_client.get_file_system_properties()
                self.assertIsNotNone(props)

    @DataLakePreparer()
    def test_restore_to_existing_file_system(self, datalake_storage_account_name, datalake_storage_account_key):
        # Needs soft delete enabled account.
        if not self.is_playback():
            return
        self._setUp(datalake_storage_account_name, datalake_storage_account_key)
        # get an existing filesystem
        existing_name = self._get_file_system_reference(prefix="existing2")
        name = self._get_file_system_reference(prefix="filesystem2")
        existing_filesystem_client = self.dsc.create_file_system(existing_name)
        filesystem_client = self.dsc.create_file_system(name)

        # Act
        filesystem_client.delete_file_system()
        # to make sure the filesystem deleted
        with self.assertRaises(ResourceNotFoundError):
            filesystem_client.get_file_system_properties()

        filesystem_list = list(self.dsc.list_file_systems(include_deleted=True))
        self.assertTrue(len(filesystem_list) >= 1)

        for filesystem in filesystem_list:
            # find the deleted filesystem and restore it
            if filesystem.deleted and filesystem.name == filesystem_client.file_system_name:
                with self.assertRaises(HttpResponseError):
                    self.dsc.undelete_file_system(filesystem.name, filesystem.deleted_version,
                                                  new_name=existing_filesystem_client.file_system_name)

    @DataLakePreparer()
    def test_restore_file_system_with_sas(self, datalake_storage_account_name, datalake_storage_account_key):
        pytest.skip(
            "We are generating a SAS token therefore play only live but we also need a soft delete enabled account.")
        self._setUp(datalake_storage_account_name, datalake_storage_account_key)
        token = generate_account_sas(
            self.dsc.account_name,
            self.dsc.credential.account_key,
            ResourceTypes(service=True, file_system=True),
            AccountSasPermissions(read=True, write=True, list=True, delete=True),
            datetime.utcnow() + timedelta(hours=1),
        )
        dsc = DataLakeServiceClient(self.dsc.url, token)
        name = self._get_file_system_reference(prefix="filesystem")
        filesystem_client = dsc.create_file_system(name)
        filesystem_client.delete_file_system()
        # to make sure the filesystem is deleted
        with self.assertRaises(ResourceNotFoundError):
            filesystem_client.get_file_system_properties()

        filesystem_list = list(dsc.list_file_systems(include_deleted=True))
        self.assertTrue(len(filesystem_list) >= 1)

        restored_version = 0
        for filesystem in filesystem_list:
            # find the deleted filesystem and restore it
            if filesystem.deleted and filesystem.name == filesystem_client.file_system_name:
                restored_fs_client = dsc.undelete_file_system(filesystem.name, filesystem.deleted_version,
                                                              new_name="restored" + name + str(restored_version))
                restored_version += 1

                # to make sure the deleted filesystem is restored
                props = restored_fs_client.get_file_system_properties()
                self.assertIsNotNone(props)

    @DataLakePreparer()
    def test_delete_file_system_with_existing_file_system(self, datalake_storage_account_name, datalake_storage_account_key):
        self._setUp(datalake_storage_account_name, datalake_storage_account_key)
        # Arrange
        file_system = self._create_file_system()

        # Act
        deleted = file_system.delete_file_system()

        # Assert
        self.assertIsNone(deleted)

    @DataLakePreparer()
    def test_delete_none_existing_file_system(self, datalake_storage_account_name, datalake_storage_account_key):
        self._setUp(datalake_storage_account_name, datalake_storage_account_key)
        fake_file_system_client = self.dsc.get_file_system_client("fakeclient")

        # Act
        with self.assertRaises(ResourceNotFoundError):
            fake_file_system_client.delete_file_system(match_condition=MatchConditions.IfMissing)

    @DataLakePreparer()
    def test_list_file_systems_with_include_metadata(self, datalake_storage_account_name, datalake_storage_account_key):
        self._setUp(datalake_storage_account_name, datalake_storage_account_key)
        # Arrange
        file_system = self._create_file_system()
        metadata = {'hello': 'world', 'number': '42'}
        resp = file_system.set_file_system_metadata(metadata)

        # Act
        file_systems = list(self.dsc.list_file_systems(
            name_starts_with=file_system.file_system_name,
            include_metadata=True))

        # Assert
        self.assertIsNotNone(file_systems)
        self.assertGreaterEqual(len(file_systems), 1)
        self.assertIsNotNone(file_systems[0])
        self.assertNamedItemInContainer(file_systems, file_system.file_system_name)
        self.assertDictEqual(file_systems[0].metadata, metadata)

    @DataLakePreparer()
    def test_list_file_systems_by_page(self, datalake_storage_account_name, datalake_storage_account_key):
        self._setUp(datalake_storage_account_name, datalake_storage_account_key)
        # Arrange
        for i in range(0, 6):
            self._create_file_system(file_system_prefix="filesystem{}".format(i))

        # Act
        file_systems = list(next(self.dsc.list_file_systems(
            results_per_page=3,
            name_starts_with="file",
            include_metadata=True).by_page()))

        # Assert
        self.assertIsNotNone(file_systems)
        self.assertGreaterEqual(len(file_systems), 3)

    @DataLakePreparer()
    def test_list_file_systems_with_public_access(self, datalake_storage_account_name, datalake_storage_account_key):
        self._setUp(datalake_storage_account_name, datalake_storage_account_key)
        # Arrange
        file_system_name = self._get_file_system_reference()
        file_system = self.dsc.get_file_system_client(file_system_name)
        file_system.create_file_system(public_access="blob")
        metadata = {'hello': 'world', 'number': '42'}
        resp = file_system.set_file_system_metadata(metadata)

        # Act
        file_systems = list(self.dsc.list_file_systems(
            name_starts_with=file_system.file_system_name,
            include_metadata=True))

        # Assert
        self.assertIsNotNone(file_systems)
        self.assertGreaterEqual(len(file_systems), 1)
        self.assertIsNotNone(file_systems[0])
        self.assertNamedItemInContainer(file_systems, file_system.file_system_name)
        self.assertDictEqual(file_systems[0].metadata, metadata)
        self.assertTrue(file_systems[0].public_access is PublicAccess.File)

    @DataLakePreparer()
    def test_get_file_system_properties(self, datalake_storage_account_name, datalake_storage_account_key):
        self._setUp(datalake_storage_account_name, datalake_storage_account_key)
        # Arrange
        metadata = {'hello': 'world', 'number': '42'}
        file_system = self._create_file_system()
        file_system.set_file_system_metadata(metadata)

        # Act
        props = file_system.get_file_system_properties()

        # Assert
        self.assertIsNotNone(props)
        self.assertDictEqual(props.metadata, metadata)
        self.assertIsNotNone(props.has_immutability_policy)
        self.assertIsNotNone(props.has_legal_hold)

    @DataLakePreparer()
    def test_service_client_session_closes_after_filesystem_creation(self, datalake_storage_account_name, datalake_storage_account_key):
        self._setUp(datalake_storage_account_name, datalake_storage_account_key)
        # Arrange
        dsc2 = DataLakeServiceClient(self.dsc.url, credential=datalake_storage_account_key)
        with DataLakeServiceClient(self.dsc.url, credential=datalake_storage_account_key) as ds_client:
            fs1 = ds_client.create_file_system(self._get_file_system_reference(prefix="fs1"))
            fs1.delete_file_system()
        dsc2.create_file_system(self._get_file_system_reference(prefix="fs2"))
        dsc2.close()

    @DataLakePreparer()
    def test_list_paths(self, datalake_storage_account_name, datalake_storage_account_key):
        self._setUp(datalake_storage_account_name, datalake_storage_account_key)
        # Arrange
        file_system = self._create_file_system()
        for i in range(0, 6):
            file_system.create_directory("dir1{}".format(i))

        paths = list(file_system.get_paths(upn=True))

        self.assertEqual(len(paths), 6)
        self.assertTrue(isinstance(paths[0].last_modified, datetime))

    @DataLakePreparer()
    def test_list_paths_which_are_all_files(self, datalake_storage_account_name, datalake_storage_account_key):
        self._setUp(datalake_storage_account_name, datalake_storage_account_key)
        # Arrange
        file_system = self._create_file_system()
        for i in range(0, 6):
            file_system.create_file("file{}".format(i))

        paths = list(file_system.get_paths(upn=True))

        self.assertEqual(len(paths), 6)

    @DataLakePreparer()
    def test_list_paths_with_max_per_page(self, datalake_storage_account_name, datalake_storage_account_key):
        self._setUp(datalake_storage_account_name, datalake_storage_account_key)
        # Arrange
        file_system = self._create_file_system()
        for i in range(0, 6):
            file_system.create_directory("dir1{}".format(i))

        generator1 = file_system.get_paths(max_results=2, upn=True).by_page()
        paths1 = list(next(generator1))

        generator2 = file_system.get_paths(max_results=4, upn=True)\
            .by_page(continuation_token=generator1.continuation_token)
        paths2 = list(next(generator2))

        self.assertEqual(len(paths1), 2)
        self.assertEqual(len(paths2), 4)

    @DataLakePreparer()
    def test_list_paths_under_specific_path(self, datalake_storage_account_name, datalake_storage_account_key):
        self._setUp(datalake_storage_account_name, datalake_storage_account_key)
        # Arrange
        file_system = self._create_file_system()
        for i in range(0, 6):
            file_system.create_directory("dir1{}".format(i))

            # create a subdirectory under the current directory
            subdir = file_system.get_directory_client("dir1{}".format(i)).create_sub_directory("subdir")
            subdir.create_sub_directory("subsub")

            # create a file under the current directory
            file_client = subdir.create_file("file")
            file_client.append_data(b"abced", 0, 5)
            file_client.flush_data(5)

        generator1 = file_system.get_paths(path="dir10/subdir", max_results=2, upn=True).by_page()
        paths = list(next(generator1))

        self.assertEqual(len(paths), 2)
        self.assertEqual(paths[0].content_length, 5)

    @DataLakePreparer()
    def test_list_paths_recursively(self, datalake_storage_account_name, datalake_storage_account_key):
        self._setUp(datalake_storage_account_name, datalake_storage_account_key)
        # Arrange
        file_system = self._create_file_system()
        for i in range(0, 6):
            file_system.create_directory("dir1{}".format(i))

            # create a subdirectory under the current directory
            subdir = file_system.get_directory_client("dir1{}".format(i)).create_sub_directory("subdir")
            subdir.create_sub_directory("subsub")

            # create a file under the current directory
            subdir.create_file("file")

        paths = list(file_system.get_paths(recursive=True, upn=True))

        # there are 24 subpaths in total
        self.assertEqual(len(paths), 24)

    @DataLakePreparer()
    def test_list_paths_pages_correctly(self, datalake_storage_account_name, datalake_storage_account_key):
        self._setUp(datalake_storage_account_name, datalake_storage_account_key)
        # Arrange
        file_system = self._create_file_system(file_system_prefix="fs1")
        for i in range(0, 6):
            file_system.create_directory("dir1{}".format(i))
        for i in range(0, 6):
            file_system.create_file("file{}".format(i))

        generator = file_system.get_paths(max_results=6, upn=True).by_page()
        paths1 = list(next(generator))
        paths2 = list(next(generator))
        with self.assertRaises(StopIteration):
            list(next(generator))

        self.assertEqual(len(paths1), 6)
        self.assertEqual(len(paths2), 6)

    @DataLakePreparer()
    def test_create_directory_from_file_system_client(self, datalake_storage_account_name, datalake_storage_account_key):
        self._setUp(datalake_storage_account_name, datalake_storage_account_key)
        # Arrange
        file_system = self._create_file_system()
        file_system.create_directory("dir1/dir2")

        paths = list(file_system.get_paths(recursive=False, upn=True))

        self.assertEqual(len(paths), 1)
        self.assertEqual(paths[0].name, "dir1")

    @DataLakePreparer()
    def test_create_file_from_file_system_client(self, datalake_storage_account_name, datalake_storage_account_key):
        self._setUp(datalake_storage_account_name, datalake_storage_account_key)
        # Arrange
        file_system = self._create_file_system()
        file_system.create_file("dir1/dir2/file")

        paths = list(file_system.get_paths(recursive=True, upn=True))

        self.assertEqual(len(paths), 3)
        self.assertEqual(paths[0].name, "dir1")
        self.assertEqual(paths[2].is_directory, False)

    @DataLakePreparer()
    def test_get_root_directory_client(self, datalake_storage_account_name, datalake_storage_account_key):
        self._setUp(datalake_storage_account_name, datalake_storage_account_key)
        file_system = self._create_file_system()
        directory_client = file_system._get_root_directory_client()

        acl = 'user::rwx,group::r-x,other::rwx'
        directory_client.set_access_control(acl=acl)
        access_control = directory_client.get_access_control()

        self.assertEqual(acl, access_control['acl'])

    @DataLakePreparer()
    def test_file_system_sessions_closes_properly(self, datalake_storage_account_name, datalake_storage_account_key):
        self._setUp(datalake_storage_account_name, datalake_storage_account_key)
        # Arrange
        file_system_client = self._create_file_system("fenrhxsbfvsdvdsvdsadb")
        with file_system_client as fs_client:
            with fs_client.get_file_client("file1.txt") as f_client:
                f_client.create_file()
            with fs_client.get_file_client("file2.txt") as f_client:
                f_client.create_file()
            with fs_client.get_directory_client("file1") as f_client:
                f_client.create_directory()
            with fs_client.get_directory_client("file2") as f_client:
                f_client.create_directory()
Example #5
0
class FileTest(StorageTestCase):
    def setUp(self):
        super(FileTest, self).setUp()
        url = self._get_account_url()
        self.dsc = DataLakeServiceClient(
            url,
            credential=self.settings.STORAGE_DATA_LAKE_ACCOUNT_KEY,
            logging_enable=True)
        self.config = self.dsc._config

        self.file_system_name = self.get_resource_name('filesystem')

        if not self.is_playback():
            file_system = self.dsc.get_file_system_client(
                self.file_system_name)
            try:
                file_system.create_file_system(timeout=5)
            except ResourceExistsError:
                pass

    def tearDown(self):
        if not self.is_playback():
            try:
                self.dsc.delete_file_system(self.file_system_name)
            except:
                pass

        return super(FileTest, self).tearDown()

    # --Helpers-----------------------------------------------------------------
    def _get_directory_reference(self, prefix=TEST_DIRECTORY_PREFIX):
        directory_name = self.get_resource_name(prefix)
        return directory_name

    def _get_file_reference(self, prefix=TEST_FILE_PREFIX):
        file_name = self.get_resource_name(prefix)
        return file_name

    def _create_file_system(self):
        return self.dsc.create_file_system(self._get_file_system_reference())

    def _create_directory_and_return_client(self, directory=None):
        directory_name = directory if directory else self._get_directory_reference(
        )
        directory_client = self.dsc.get_directory_client(
            self.file_system_name, directory_name)
        directory_client.create_directory()
        return directory_client

    def _create_file_and_return_client(self, directory="", file=None):
        if directory:
            self._create_directory_and_return_client(directory)
        if not file:
            file = self._get_file_reference()
        file_client = self.dsc.get_file_client(self.file_system_name,
                                               directory + '/' + file)
        file_client.create_file()
        return file_client

    # --Helpers-----------------------------------------------------------------

    @record
    def test_create_file(self):
        # Arrange
        directory_name = self._get_directory_reference()

        # Create a directory to put the file under that
        directory_client = self.dsc.get_directory_client(
            self.file_system_name, directory_name)
        directory_client.create_directory()

        file_client = directory_client.get_file_client('filename')
        response = file_client.create_file()

        # Assert
        self.assertIsNotNone(response)

    @record
    def test_create_file_using_oauth_token_credential(self):
        # Arrange
        file_name = self._get_file_reference()
        token_credential = self.generate_oauth_token()

        # Create a directory to put the file under that
        file_client = DataLakeFileClient(self.dsc.url,
                                         self.file_system_name,
                                         file_name,
                                         credential=token_credential)

        response = file_client.create_file()

        # Assert
        self.assertIsNotNone(response)

    @record
    def test_create_file_with_existing_name(self):
        # Arrange
        file_client = self._create_file_and_return_client()

        with self.assertRaises(ResourceExistsError):
            # if the file exists then throw error
            # if_none_match='*' is to make sure no existing file
            file_client.create_file(match_condition=MatchConditions.IfMissing)

    @record
    def test_create_file_with_lease_id(self):
        # Arrange
        directory_name = self._get_directory_reference()
        directory_client = self.dsc.get_directory_client(
            self.file_system_name, directory_name)
        directory_client.create_directory()

        file_client = directory_client.get_file_client('filename')
        # Act
        file_client.create_file()
        lease = file_client.acquire_lease()
        create_resp = file_client.create_file(lease=lease)

        # Assert
        file_properties = file_client.get_file_properties()
        self.assertIsNotNone(file_properties)
        self.assertEqual(file_properties.etag, create_resp.get('etag'))
        self.assertEqual(file_properties.last_modified,
                         create_resp.get('last_modified'))

    @record
    def test_create_file_under_root_directory(self):
        # Arrange
        # get a file client to interact with the file under root directory
        file_client = self.dsc.get_file_client(self.file_system_name,
                                               "filename")

        response = file_client.create_file()

        # Assert
        self.assertIsNotNone(response)

    @record
    def test_append_data(self):
        directory_name = self._get_directory_reference()

        # Create a directory to put the file under that
        directory_client = self.dsc.get_directory_client(
            self.file_system_name, directory_name)
        directory_client.create_directory()

        file_client = directory_client.get_file_client('filename')
        file_client.create_file()

        # Act
        response = file_client.append_data(b'abc', 0, 3)

        self.assertIsNotNone(response)

    @record
    def test_append_empty_data(self):
        file_client = self._create_file_and_return_client()

        # Act
        file_client.flush_data(0)
        file_props = file_client.get_file_properties()

        self.assertIsNotNone(file_props['size'], 0)

    @record
    def test_flush_data(self):
        directory_name = self._get_directory_reference()

        # Create a directory to put the file under that
        directory_client = self.dsc.get_directory_client(
            self.file_system_name, directory_name)
        directory_client.create_directory()

        file_client = directory_client.get_file_client('filename')
        file_client.create_file()

        # Act
        file_client.append_data(b'abc', 0, 3)
        response = file_client.flush_data(3)

        # Assert
        prop = file_client.get_file_properties()
        self.assertIsNotNone(response)
        self.assertEqual(prop['size'], 3)

    @record
    def test_flush_data_with_match_condition(self):
        directory_name = self._get_directory_reference()

        # Create a directory to put the file under that
        directory_client = self.dsc.get_directory_client(
            self.file_system_name, directory_name)
        directory_client.create_directory()

        file_client = directory_client.get_file_client('filename')
        resp = file_client.create_file()

        # Act
        file_client.append_data(b'abc', 0, 3)

        # flush is successful because it isn't touched
        response = file_client.flush_data(
            3,
            etag=resp['etag'],
            match_condition=MatchConditions.IfNotModified)

        file_client.append_data(b'abc', 3, 3)
        with self.assertRaises(ResourceModifiedError):
            # flush is unsuccessful because extra data were appended.
            file_client.flush_data(
                6,
                etag=resp['etag'],
                match_condition=MatchConditions.IfNotModified)

    def test_upload_data_to_none_existing_file(self):
        # parallel upload cannot be recorded
        if TestMode.need_recording_file(self.test_mode):
            return

        directory_name = self._get_directory_reference()

        # Create a directory to put the file under that
        directory_client = self.dsc.get_directory_client(
            self.file_system_name, directory_name)
        directory_client.create_directory()

        file_client = directory_client.get_file_client('filename')
        data = self.get_random_bytes(200 * 1024)
        file_client.upload_data(data, overwrite=True, max_concurrency=3)

        downloaded_data = file_client.download_file().readall()
        self.assertEqual(data, downloaded_data)

    @record
    def test_upload_data_to_existing_file(self):
        directory_name = self._get_directory_reference()

        # Create a directory to put the file under that
        directory_client = self.dsc.get_directory_client(
            self.file_system_name, directory_name)
        directory_client.create_directory()

        # create an existing file
        file_client = directory_client.get_file_client('filename')
        file_client.create_file()
        file_client.append_data(b"abc", 0)
        file_client.flush_data(3)

        # to override the existing file
        data = self.get_random_bytes(100)
        with self.assertRaises(HttpResponseError):
            file_client.upload_data(data, max_concurrency=5)
        file_client.upload_data(data, overwrite=True, max_concurrency=5)

        downloaded_data = file_client.download_file().readall()
        self.assertEqual(data, downloaded_data)

    @record
    def test_upload_data_to_existing_file_with_content_settings(self):
        directory_name = self._get_directory_reference()

        # Create a directory to put the file under that
        directory_client = self.dsc.get_directory_client(
            self.file_system_name, directory_name)
        directory_client.create_directory()

        # create an existing file
        file_client = directory_client.get_file_client('filename')
        etag = file_client.create_file()['etag']

        # to override the existing file
        data = self.get_random_bytes(100)
        content_settings = ContentSettings(content_language='spanish',
                                           content_disposition='inline')

        file_client.upload_data(data,
                                max_concurrency=5,
                                content_settings=content_settings,
                                etag=etag,
                                match_condition=MatchConditions.IfNotModified)

        downloaded_data = file_client.download_file().readall()
        properties = file_client.get_file_properties()

        self.assertEqual(data, downloaded_data)
        self.assertEqual(properties.content_settings.content_language,
                         content_settings.content_language)

    @record
    def test_upload_data_to_existing_file_with_permission_and_umask(self):
        directory_name = self._get_directory_reference()

        # Create a directory to put the file under that
        directory_client = self.dsc.get_directory_client(
            self.file_system_name, directory_name)
        directory_client.create_directory()

        # create an existing file
        file_client = directory_client.get_file_client('filename')
        etag = file_client.create_file()['etag']

        # to override the existing file
        data = self.get_random_bytes(100)

        file_client.upload_data(data,
                                overwrite=True,
                                max_concurrency=5,
                                permissions='0777',
                                umask="0000",
                                etag=etag,
                                match_condition=MatchConditions.IfNotModified)

        downloaded_data = file_client.download_file().readall()
        prop = file_client.get_access_control()

        # Assert
        self.assertEqual(data, downloaded_data)
        self.assertEqual(prop['permissions'], 'rwxrwxrwx')

    @record
    def test_read_file(self):
        file_client = self._create_file_and_return_client()
        data = self.get_random_bytes(1024)

        # upload data to file
        file_client.append_data(data, 0, len(data))
        file_client.flush_data(len(data))

        # doanload the data and make sure it is the same as uploaded data
        downloaded_data = file_client.download_file().readall()
        self.assertEqual(data, downloaded_data)

    @record
    def test_read_file_with_user_delegation_key(self):
        # SAS URL is calculated from storage key, so this test runs live only
        if TestMode.need_recording_file(self.test_mode):
            return

        # Create file
        file_client = self._create_file_and_return_client()
        data = self.get_random_bytes(1024)
        # Upload data to file
        file_client.append_data(data, 0, len(data))
        file_client.flush_data(len(data))

        # Get user delegation key
        token_credential = self.generate_oauth_token()
        service_client = DataLakeServiceClient(self._get_oauth_account_url(),
                                               credential=token_credential)
        user_delegation_key = service_client.get_user_delegation_key(
            datetime.utcnow(),
            datetime.utcnow() + timedelta(hours=1))

        sas_token = generate_file_sas(
            file_client.account_name,
            file_client.file_system_name,
            None,
            file_client.path_name,
            user_delegation_key,
            permission=FileSasPermissions(read=True,
                                          create=True,
                                          write=True,
                                          delete=True),
            expiry=datetime.utcnow() + timedelta(hours=1),
        )

        # doanload the data and make sure it is the same as uploaded data
        new_file_client = DataLakeFileClient(self._get_account_url(),
                                             file_client.file_system_name,
                                             file_client.path_name,
                                             credential=sas_token)
        downloaded_data = new_file_client.download_file().readall()
        self.assertEqual(data, downloaded_data)

    @record
    def test_read_file_into_file(self):
        file_client = self._create_file_and_return_client()
        data = self.get_random_bytes(1024)

        # upload data to file
        file_client.append_data(data, 0, len(data))
        file_client.flush_data(len(data))

        # doanload the data into a file and make sure it is the same as uploaded data
        with open(FILE_PATH, 'wb') as stream:
            download = file_client.download_file(max_concurrency=2)
            download.readinto(stream)

        # Assert
        with open(FILE_PATH, 'rb') as stream:
            actual = stream.read()
            self.assertEqual(data, actual)

    @record
    def test_read_file_to_text(self):
        file_client = self._create_file_and_return_client()
        data = self.get_random_text_data(1024)

        # upload data to file
        file_client.append_data(data, 0, len(data))
        file_client.flush_data(len(data))

        # doanload the text data and make sure it is the same as uploaded data
        downloaded_data = file_client.download_file(
            max_concurrency=2, encoding="utf-8").readall()

        # Assert
        self.assertEqual(data, downloaded_data)

    @record
    def test_account_sas(self):
        # SAS URL is calculated from storage key, so this test runs live only
        if TestMode.need_recording_file(self.test_mode):
            return

        file_name = self._get_file_reference()
        # create a file under root directory
        self._create_file_and_return_client(file=file_name)

        # generate a token with file level read permission
        token = generate_account_sas(
            self.dsc.account_name,
            self.dsc.credential.account_key,
            ResourceTypes(file_system=True, object=True),
            AccountSasPermissions(read=True),
            datetime.utcnow() + timedelta(hours=1),
        )

        # read the created file which is under root directory
        file_client = DataLakeFileClient(self.dsc.url,
                                         self.file_system_name,
                                         file_name,
                                         credential=token)
        properties = file_client.get_file_properties()

        # make sure we can read the file properties
        self.assertIsNotNone(properties)

        # try to write to the created file with the token
        with self.assertRaises(HttpResponseError):
            file_client.append_data(b"abcd", 0, 4)

    @record
    def test_file_sas_only_applies_to_file_level(self):
        # SAS URL is calculated from storage key, so this test runs live only
        if TestMode.need_recording_file(self.test_mode):
            return

        file_name = self._get_file_reference()
        directory_name = self._get_directory_reference()
        self._create_file_and_return_client(directory=directory_name,
                                            file=file_name)

        # generate a token with file level read and write permissions
        token = generate_file_sas(
            self.dsc.account_name,
            self.file_system_name,
            directory_name,
            file_name,
            self.dsc.credential.account_key,
            permission=FileSasPermissions(read=True, write=True),
            expiry=datetime.utcnow() + timedelta(hours=1),
        )

        # read the created file which is under root directory
        file_client = DataLakeFileClient(self.dsc.url,
                                         self.file_system_name,
                                         directory_name + '/' + file_name,
                                         credential=token)
        properties = file_client.get_file_properties()

        # make sure we can read the file properties
        self.assertIsNotNone(properties)

        # try to write to the created file with the token
        response = file_client.append_data(b"abcd",
                                           0,
                                           4,
                                           validate_content=True)
        self.assertIsNotNone(response)

        # the token is for file level, so users are not supposed to have access to file system level operations
        file_system_client = FileSystemClient(self.dsc.url,
                                              self.file_system_name,
                                              credential=token)
        with self.assertRaises(ClientAuthenticationError):
            file_system_client.get_file_system_properties()

        # the token is for file level, so users are not supposed to have access to directory level operations
        directory_client = DataLakeDirectoryClient(self.dsc.url,
                                                   self.file_system_name,
                                                   directory_name,
                                                   credential=token)
        with self.assertRaises(ClientAuthenticationError):
            directory_client.get_directory_properties()

    @record
    def test_delete_file(self):
        # Arrange
        file_client = self._create_file_and_return_client()

        file_client.delete_file()

        with self.assertRaises(ResourceNotFoundError):
            file_client.get_file_properties()

    @record
    def test_delete_file_with_if_unmodified_since(self):
        # Arrange
        file_client = self._create_file_and_return_client()

        prop = file_client.get_file_properties()
        file_client.delete_file(if_unmodified_since=prop['last_modified'])

        # Make sure the file was deleted
        with self.assertRaises(ResourceNotFoundError):
            file_client.get_file_properties()

    @record
    def test_set_access_control(self):
        file_client = self._create_file_and_return_client()

        response = file_client.set_access_control(permissions='0777')

        # Assert
        self.assertIsNotNone(response)

    @record
    def test_set_access_control_with_match_conditions(self):
        file_client = self._create_file_and_return_client()

        with self.assertRaises(ResourceModifiedError):
            file_client.set_access_control(
                permissions='0777', match_condition=MatchConditions.IfMissing)

    @record
    def test_get_access_control(self):
        file_client = self._create_file_and_return_client()
        file_client.set_access_control(permissions='0777')

        # Act
        response = file_client.get_access_control()

        # Assert
        self.assertIsNotNone(response)

    @record
    def test_get_access_control_with_if_modified_since(self):
        file_client = self._create_file_and_return_client()
        file_client.set_access_control(permissions='0777')

        prop = file_client.get_file_properties()

        # Act
        response = file_client.get_access_control(
            if_modified_since=prop['last_modified'] - timedelta(minutes=15))

        # Assert
        self.assertIsNotNone(response)

    @record
    def test_get_properties(self):
        # Arrange
        directory_client = self._create_directory_and_return_client()

        metadata = {'hello': 'world', 'number': '42'}
        content_settings = ContentSettings(content_language='spanish',
                                           content_disposition='inline')
        file_client = directory_client.create_file(
            "newfile", metadata=metadata, content_settings=content_settings)
        file_client.append_data(b"abc", 0, 3)
        file_client.flush_data(3)
        properties = file_client.get_file_properties()

        # Assert
        self.assertTrue(properties)
        self.assertEqual(properties.size, 3)
        self.assertEqual(properties.metadata['hello'], metadata['hello'])
        self.assertEqual(properties.content_settings.content_language,
                         content_settings.content_language)

    @record
    def test_rename_file_with_non_used_name(self):
        file_client = self._create_file_and_return_client()
        data_bytes = b"abc"
        file_client.append_data(data_bytes, 0, 3)
        file_client.flush_data(3)
        new_client = file_client.rename_file(file_client.file_system_name +
                                             '/' + 'newname')

        data = new_client.download_file().readall()
        self.assertEqual(data, data_bytes)
        self.assertEqual(new_client.path_name, "newname")

    @record
    def test_rename_file_to_existing_file(self):
        # create the existing file
        existing_file_client = self._create_file_and_return_client(
            file="existingfile")
        existing_file_client.append_data(b"a", 0, 1)
        existing_file_client.flush_data(1)
        old_url = existing_file_client.url

        # prepare to rename the file to the existing file
        file_client = self._create_file_and_return_client()
        data_bytes = b"abc"
        file_client.append_data(data_bytes, 0, 3)
        file_client.flush_data(3)
        new_client = file_client.rename_file(file_client.file_system_name +
                                             '/' +
                                             existing_file_client.path_name)
        new_url = file_client.url

        data = new_client.download_file().readall()
        # the existing file was overridden
        self.assertEqual(data, data_bytes)

    @record
    def test_rename_file_will_not_change_existing_directory(self):
        # create none empty directory(with 2 files)
        dir1 = self._create_directory_and_return_client(directory="dir1")
        f1 = dir1.create_file("file1")
        f1.append_data(b"file1", 0, 5)
        f1.flush_data(5)
        f2 = dir1.create_file("file2")
        f2.append_data(b"file2", 0, 5)
        f2.flush_data(5)

        # create another none empty directory(with 2 files)
        dir2 = self._create_directory_and_return_client(directory="dir2")
        f3 = dir2.create_file("file3")
        f3.append_data(b"file3", 0, 5)
        f3.flush_data(5)
        f4 = dir2.create_file("file4")
        f4.append_data(b"file4", 0, 5)
        f4.flush_data(5)

        new_client = f3.rename_file(f1.file_system_name + '/' + f1.path_name)

        self.assertEqual(new_client.download_file().readall(), b"file3")

        # make sure the data in file2 and file4 weren't touched
        f2_data = f2.download_file().readall()
        self.assertEqual(f2_data, b"file2")

        f4_data = f4.download_file().readall()
        self.assertEqual(f4_data, b"file4")

        with self.assertRaises(HttpResponseError):
            f3.download_file().readall()
Example #6
0
    def data_lake_service_sample(self):

        # Instantiate a DataLakeServiceClient using a connection string
        # [START create_datalake_service_client]
        from azure.storage.filedatalake import DataLakeServiceClient
        datalake_service_client = DataLakeServiceClient.from_connection_string(
            self.connection_string)
        # [END create_datalake_service_client]

        # Instantiate a DataLakeServiceClient Azure Identity credentials.
        # [START create_datalake_service_client_oauth]
        from azure.identity import ClientSecretCredential
        token_credential = ClientSecretCredential(
            self.active_directory_tenant_id,
            self.active_directory_application_id,
            self.active_directory_application_secret,
        )
        datalake_service_client = DataLakeServiceClient(
            "https://{}.dfs.core.windows.net".format(self.account_name),
            credential=token_credential)
        # [END create_datalake_service_client_oauth]

        # get user delegation key
        # [START get_user_delegation_key]
        from datetime import datetime, timedelta
        user_delegation_key = datalake_service_client.get_user_delegation_key(
            datetime.utcnow(),
            datetime.utcnow() + timedelta(hours=1))
        # [END get_user_delegation_key]

        # Create file systems
        # [START create_file_system_from_service_client]
        datalake_service_client.create_file_system("filesystem")
        # [END create_file_system_from_service_client]
        file_system_client = datalake_service_client.create_file_system(
            "anotherfilesystem")

        # List file systems
        # [START list_file_systems]
        file_systems = datalake_service_client.list_file_systems()
        for file_system in file_systems:
            print(file_system.name)
        # [END list_file_systems]

        # Get Clients from DataLakeServiceClient
        file_system_client = datalake_service_client.get_file_system_client(
            file_system_client.file_system_name)
        # [START get_directory_client_from_service_client]
        directory_client = datalake_service_client.get_directory_client(
            file_system_client.file_system_name, "mydirectory")
        # [END get_directory_client_from_service_client]
        # [START get_file_client_from_service_client]
        file_client = datalake_service_client.get_file_client(
            file_system_client.file_system_name, "myfile")
        # [END get_file_client_from_service_client]

        # Create file and set properties
        metadata = {'hello': 'world', 'number': '42'}
        from azure.storage.filedatalake import ContentSettings
        content_settings = ContentSettings(content_language='spanish',
                                           content_disposition='inline')
        file_client.create_file(content_settings=content_settings)
        file_client.set_metadata(metadata=metadata)
        file_props = file_client.get_file_properties()
        print(file_props.metadata)

        # Create file/directory and set properties
        directory_client.create_directory(content_settings=content_settings,
                                          metadata=metadata)
        dir_props = directory_client.get_directory_properties()
        print(dir_props.metadata)

        # Delete File Systems
        # [START delete_file_system_from_service_client]
        datalake_service_client.delete_file_system("filesystem")
        # [END delete_file_system_from_service_client]
        file_system_client.delete_file_system()
class LargeFileTest(StorageTestCase):
    def _setUp(self, account_name, account_key):
        url = self.account_url(account_name, 'dfs')
        self.payload_dropping_policy = PayloadDroppingPolicy()
        credential_policy = _format_shared_key_credential(
            account_name, account_key)
        self.dsc = DataLakeServiceClient(url,
                                         credential=account_key,
                                         logging_enable=True,
                                         _additional_pipeline_policies=[
                                             self.payload_dropping_policy,
                                             credential_policy
                                         ])
        self.config = self.dsc._config

        self.file_system_name = self.get_resource_name('filesystem')

        if not self.is_playback():
            file_system = self.dsc.get_file_system_client(
                self.file_system_name)
            try:
                file_system.create_file_system(timeout=5)
            except ResourceExistsError:
                pass

    def tearDown(self):
        if not self.is_playback():
            try:
                self.dsc.delete_file_system(self.file_system_name)
            except:
                pass

        return super(LargeFileTest, self).tearDown()

    @pytest.mark.live_test_only
    @DataLakePreparer()
    def test_append_large_stream_without_network(self,
                                                 datalake_storage_account_name,
                                                 datalake_storage_account_key):
        self._setUp(datalake_storage_account_name,
                    datalake_storage_account_key)
        directory_name = self.get_resource_name(TEST_DIRECTORY_PREFIX)

        # Create a directory to put the file under that
        directory_client = self.dsc.get_directory_client(
            self.file_system_name, directory_name)
        directory_client.create_directory()

        file_client = directory_client.get_file_client('filename')
        file_client.create_file()

        data = LargeStream(LARGEST_BLOCK_SIZE)

        # Act
        response = file_client.append_data(data, 0, LARGEST_BLOCK_SIZE)

        self.assertIsNotNone(response)
        self.assertEqual(self.payload_dropping_policy.append_counter, 1)
        self.assertEqual(self.payload_dropping_policy.append_sizes[0],
                         LARGEST_BLOCK_SIZE)

    @pytest.mark.live_test_only
    @DataLakePreparer()
    def test_upload_large_stream_without_network(self,
                                                 datalake_storage_account_name,
                                                 datalake_storage_account_key):
        pytest.skip(
            "Pypy3 on Linux failed somehow, skip for now to investigate")

        self._setUp(datalake_storage_account_name,
                    datalake_storage_account_key)

        directory_name = self.get_resource_name(TEST_DIRECTORY_PREFIX)

        # Create a directory to put the file under that
        directory_client = self.dsc.get_directory_client(
            self.file_system_name, directory_name)
        directory_client.create_directory()

        file_client = directory_client.get_file_client('filename')
        file_client.create_file()

        length = 2 * LARGEST_BLOCK_SIZE
        data = LargeStream(length)

        # Act
        response = file_client.upload_data(data,
                                           length,
                                           overwrite=True,
                                           chunk_size=LARGEST_BLOCK_SIZE)

        self.assertIsNotNone(response)
        self.assertEqual(self.payload_dropping_policy.append_counter, 2)
        self.assertEqual(self.payload_dropping_policy.append_sizes[0],
                         LARGEST_BLOCK_SIZE)
        self.assertEqual(self.payload_dropping_policy.append_sizes[1],
                         LARGEST_BLOCK_SIZE)
Example #8
0
class StorageQuickQueryTest(StorageTestCase):
    def _setUp(self, account_name, account_key):
        url = self._get_account_url(account_name)
        self.dsc = DataLakeServiceClient(url,
                                         credential=account_key,
                                         logging_enable=True)
        self.config = self.dsc._config
        self.filesystem_name = self.get_resource_name('utqqcontainer')

        if not self.is_playback():
            try:
                self.dsc.create_file_system(self.filesystem_name)
            except:
                pass

    def tearDown(self):
        if not self.is_playback():
            try:
                self.dsc.delete_file_system(self.filesystem_name)
            except:
                pass

        return super(StorageQuickQueryTest, self).tearDown()

    # --Helpers-----------------------------------------------------------------

    def _get_file_reference(self):
        return self.get_resource_name("csvfile")

    # -- Test cases for APIs supporting CPK ----------------------------------------------

    @DataLakePreparer()
    def test_quick_query_readall(self, datalake_storage_account_name,
                                 datalake_storage_account_key):
        self._setUp(datalake_storage_account_name,
                    datalake_storage_account_key)
        # Arrange
        # upload the csv file
        file_name = self._get_file_reference()
        file_client = self.dsc.get_file_client(self.filesystem_name, file_name)
        file_client.upload_data(CSV_DATA, overwrite=True)

        errors = []

        def on_error(error):
            errors.append(error)

        reader = file_client.query_file("SELECT * from BlobStorage",
                                        on_error=on_error)
        data = reader.readall()

        self.assertEqual(len(errors), 0)
        self.assertEqual(len(reader), len(CSV_DATA))
        self.assertEqual(len(reader),
                         reader._blob_query_reader._bytes_processed)
        self.assertEqual(data, CSV_DATA.replace(b'\r\n', b'\n'))

    @DataLakePreparer()
    def test_quick_query_datalake_expression(self,
                                             datalake_storage_account_name,
                                             datalake_storage_account_key):
        self._setUp(datalake_storage_account_name,
                    datalake_storage_account_key)
        # Arrange
        # upload the csv file
        file_name = self._get_file_reference()
        file_client = self.dsc.get_file_client(self.filesystem_name, file_name)
        file_client.upload_data(DATALAKE_CSV_DATA, overwrite=True)

        errors = []

        def on_error(error):
            errors.append(error)

        input_format = DelimitedTextDialect(has_header=True)
        reader = file_client.query_file(
            "SELECT DataLakeStorage from DataLakeStorage",
            on_error=on_error,
            file_format=input_format)
        reader.readall()

        self.assertEqual(len(errors), 0)
        self.assertEqual(len(reader), len(DATALAKE_CSV_DATA))
        self.assertEqual(len(reader),
                         reader._blob_query_reader._bytes_processed)

    @DataLakePreparer()
    def test_quick_query_iter_records(self, datalake_storage_account_name,
                                      datalake_storage_account_key):
        self._setUp(datalake_storage_account_name,
                    datalake_storage_account_key)
        # Arrange
        # upload the csv file
        file_name = self._get_file_reference()
        file_client = self.dsc.get_file_client(self.filesystem_name, file_name)
        file_client.upload_data(CSV_DATA, overwrite=True)

        reader = file_client.query_file("SELECT * from BlobStorage")
        read_records = reader.records()

        # Assert first line has header
        data = next(read_records)
        self.assertEqual(data, b'Service,Package,Version,RepoPath,MissingDocs')

        for record in read_records:
            data += record

        self.assertEqual(len(reader), len(CSV_DATA))
        self.assertEqual(len(reader),
                         reader._blob_query_reader._bytes_processed)
        self.assertEqual(data, CSV_DATA.replace(b'\r\n', b''))

    @DataLakePreparer()
    def test_quick_query_readall_with_encoding(self,
                                               datalake_storage_account_name,
                                               datalake_storage_account_key):
        self._setUp(datalake_storage_account_name,
                    datalake_storage_account_key)
        # Arrange
        # upload the csv file
        file_name = self._get_file_reference()
        file_client = self.dsc.get_file_client(self.filesystem_name, file_name)
        file_client.upload_data(CSV_DATA, overwrite=True)

        errors = []

        def on_error(error):
            errors.append(error)

        reader = file_client.query_file("SELECT * from BlobStorage",
                                        on_error=on_error,
                                        encoding='utf-8')
        data = reader.readall()

        self.assertEqual(len(errors), 0)
        self.assertEqual(len(reader), len(CSV_DATA))
        self.assertEqual(len(reader),
                         reader._blob_query_reader._bytes_processed)
        self.assertEqual(data,
                         CSV_DATA.replace(b'\r\n', b'\n').decode('utf-8'))

    @DataLakePreparer()
    def test_quick_query_iter_records_with_encoding(
            self, datalake_storage_account_name, datalake_storage_account_key):
        self._setUp(datalake_storage_account_name,
                    datalake_storage_account_key)
        # Arrange
        # upload the csv file
        file_name = self._get_file_reference()
        file_client = self.dsc.get_file_client(self.filesystem_name, file_name)
        file_client.upload_data(CSV_DATA, overwrite=True)

        reader = file_client.query_file("SELECT * from BlobStorage",
                                        encoding='utf-8')
        data = ''
        for record in reader.records():
            data += record

        self.assertEqual(len(reader), len(CSV_DATA))
        self.assertEqual(len(reader),
                         reader._blob_query_reader._bytes_processed)
        self.assertEqual(data, CSV_DATA.replace(b'\r\n', b'').decode('utf-8'))

    @DataLakePreparer()
    def test_quick_query_iter_output_records_excluding_headers(
            self, datalake_storage_account_name, datalake_storage_account_key):
        self._setUp(datalake_storage_account_name,
                    datalake_storage_account_key)
        # Arrange
        # upload the csv file
        file_name = self._get_file_reference()
        file_client = self.dsc.get_file_client(self.filesystem_name, file_name)
        file_client.upload_data(CSV_DATA, overwrite=True)

        input_format = DelimitedTextDialect(has_header=True)
        output_format = DelimitedTextDialect(has_header=False)
        reader = file_client.query_file("SELECT * from BlobStorage",
                                        file_format=input_format,
                                        output_format=output_format)
        read_records = reader.records()

        # Assert first line does not include header
        data = next(read_records)
        self.assertEqual(
            data,
            b'App Configuration,azure-data-appconfiguration,1,appconfiguration,FALSE'
        )

        for record in read_records:
            data += record

        self.assertEqual(len(reader), len(CSV_DATA))
        self.assertEqual(len(reader),
                         reader._blob_query_reader._bytes_processed)
        self.assertEqual(data, CSV_DATA.replace(b'\r\n', b'')[44:])

    @DataLakePreparer()
    def test_quick_query_iter_output_records_including_headers(
            self, datalake_storage_account_name, datalake_storage_account_key):
        self._setUp(datalake_storage_account_name,
                    datalake_storage_account_key)
        # Arrange
        # upload the csv file
        file_name = self._get_file_reference()
        file_client = self.dsc.get_file_client(self.filesystem_name, file_name)
        file_client.upload_data(CSV_DATA, overwrite=True)

        input_format = DelimitedTextDialect(has_header=True)
        reader = file_client.query_file("SELECT * from BlobStorage",
                                        file_format=input_format)
        read_records = reader.records()

        # Assert first line does not include header
        data = next(read_records)
        self.assertEqual(data, b'Service,Package,Version,RepoPath,MissingDocs')

        for record in read_records:
            data += record

        self.assertEqual(len(reader), len(CSV_DATA))
        self.assertEqual(len(reader),
                         reader._blob_query_reader._bytes_processed)
        self.assertEqual(data, CSV_DATA.replace(b'\r\n', b''))

    @DataLakePreparer()
    def test_quick_query_iter_records_with_progress(
            self, datalake_storage_account_name, datalake_storage_account_key):
        self._setUp(datalake_storage_account_name,
                    datalake_storage_account_key)
        # Arrange
        # upload the csv file
        file_name = self._get_file_reference()
        file_client = self.dsc.get_file_client(self.filesystem_name, file_name)
        file_client.upload_data(CSV_DATA, overwrite=True)

        reader = file_client.query_file("SELECT * from BlobStorage")
        data = b''
        progress = 0
        for record in reader.records():
            if record:
                data += record
                progress += len(record) + 2
        self.assertEqual(len(reader), len(CSV_DATA))
        self.assertEqual(len(reader),
                         reader._blob_query_reader._bytes_processed)
        self.assertEqual(data, CSV_DATA.replace(b'\r\n', b''))
        self.assertEqual(progress, len(reader))

    @DataLakePreparer()
    def test_quick_query_readall_with_serialization_setting(
            self, datalake_storage_account_name, datalake_storage_account_key):
        self._setUp(datalake_storage_account_name,
                    datalake_storage_account_key)
        # Arrange
        # upload the csv file
        file_name = self._get_file_reference()
        file_client = self.dsc.get_file_client(self.filesystem_name, file_name)
        file_client.upload_data(CSV_DATA, overwrite=True)

        errors = []

        def on_error(error):
            errors.append(error)

        input_format = DelimitedTextDialect(delimiter=',',
                                            quotechar='"',
                                            lineterminator='\n',
                                            escapechar='',
                                            has_header=False)
        output_format = DelimitedTextDialect(delimiter=';',
                                             quotechar="'",
                                             lineterminator='.',
                                             escapechar='\\')
        resp = file_client.query_file("SELECT * from BlobStorage",
                                      on_error=on_error,
                                      file_format=input_format,
                                      output_format=output_format)
        query_result = resp.readall()

        self.assertEqual(len(errors), 0)
        self.assertEqual(len(resp), len(CSV_DATA))
        self.assertEqual(query_result, CONVERTED_CSV_DATA)

    @DataLakePreparer()
    def test_quick_query_iter_records_with_serialization_setting(
            self, datalake_storage_account_name, datalake_storage_account_key):
        self._setUp(datalake_storage_account_name,
                    datalake_storage_account_key)
        # Arrange
        # upload the csv file
        file_name = self._get_file_reference()
        file_client = self.dsc.get_file_client(self.filesystem_name, file_name)
        file_client.upload_data(CSV_DATA, overwrite=True)

        input_format = DelimitedTextDialect(delimiter=',',
                                            quotechar='"',
                                            lineterminator='\n',
                                            escapechar='',
                                            has_header=False)
        output_format = DelimitedTextDialect(delimiter=';',
                                             quotechar="'",
                                             lineterminator='%',
                                             escapechar='\\')

        reader = file_client.query_file("SELECT * from BlobStorage",
                                        file_format=input_format,
                                        output_format=output_format)
        data = []
        for record in reader.records():
            if record:
                data.append(record)

        self.assertEqual(len(reader), len(CSV_DATA))
        self.assertEqual(len(reader),
                         reader._blob_query_reader._bytes_processed)
        self.assertEqual(len(data), 33)

    @DataLakePreparer()
    def test_quick_query_readall_with_fatal_error_handler(
            self, datalake_storage_account_name, datalake_storage_account_key):
        self._setUp(datalake_storage_account_name,
                    datalake_storage_account_key)
        # Arrange
        data1 = b'{name: owner}'
        data2 = b'{name2: owner2}'
        data3 = b'{version:0,begin:1601-01-01T00:00:00.000Z,intervalSecs:3600,status:Finalized,config:' \
                b'{version:0,configVersionEtag:0x8d75ef460eb1a12,numShards:1,recordsFormat:avro,formatSchemaVersion:3,' \
                b'shardDistFnVersion:1},chunkFilePaths:[$blobchangefeed/log/00/1601/01/01/0000/],storageDiagnostics:' \
                b'{version:0,lastModifiedTime:2019-11-01T17:53:18.861Z,' \
                b'data:{aid:d305317d-a006-0042-00dd-902bbb06fc56}}}'
        data = data1 + b'\n' + data2 + b'\n' + data1

        # upload the json file
        file_name = self._get_file_reference()
        file_client = self.dsc.get_file_client(self.filesystem_name, file_name)
        file_client.upload_data(data, overwrite=True)

        errors = []

        def on_error(error):
            errors.append(error)

        input_format = DelimitedJsonDialect()
        output_format = DelimitedTextDialect(delimiter=';',
                                             quotechar="'",
                                             lineterminator='.',
                                             escapechar='\\')
        resp = file_client.query_file("SELECT * from BlobStorage",
                                      on_error=on_error,
                                      file_format=input_format,
                                      output_format=output_format)
        query_result = resp.readall()

        self.assertEqual(len(errors), 1)
        self.assertEqual(len(resp), 43)
        self.assertEqual(query_result, b'')

    @DataLakePreparer()
    def test_quick_query_iter_records_with_fatal_error_handler(
            self, datalake_storage_account_name, datalake_storage_account_key):
        self._setUp(datalake_storage_account_name,
                    datalake_storage_account_key)
        # Arrange
        data1 = b'{name: owner}'
        data2 = b'{name2: owner2}'
        data3 = b'{version:0,begin:1601-01-01T00:00:00.000Z,intervalSecs:3600,status:Finalized,config:' \
                b'{version:0,configVersionEtag:0x8d75ef460eb1a12,numShards:1,recordsFormat:avro,formatSchemaVersion:3,' \
                b'shardDistFnVersion:1},chunkFilePaths:[$blobchangefeed/log/00/1601/01/01/0000/],storageDiagnostics:' \
                b'{version:0,lastModifiedTime:2019-11-01T17:53:18.861Z,' \
                b'data:{aid:d305317d-a006-0042-00dd-902bbb06fc56}}}'
        data = data1 + b'\n' + data2 + b'\n' + data1

        # upload the json file
        file_name = self._get_file_reference()
        file_client = self.dsc.get_file_client(self.filesystem_name, file_name)
        file_client.upload_data(data, overwrite=True)

        errors = []

        def on_error(error):
            errors.append(error)

        input_format = DelimitedJsonDialect()
        output_format = DelimitedTextDialect(delimiter=';',
                                             quotechar="'",
                                             lineterminator='.',
                                             escapechar='\\')
        resp = file_client.query_file("SELECT * from BlobStorage",
                                      on_error=on_error,
                                      file_format=input_format,
                                      output_format=output_format)
        data = []
        for record in resp.records():
            data.append(record)

        self.assertEqual(len(errors), 1)
        self.assertEqual(len(resp), 43)
        self.assertEqual(data, [b''])

    @DataLakePreparer()
    def test_quick_query_readall_with_fatal_error_handler_raise(
            self, datalake_storage_account_name, datalake_storage_account_key):
        self._setUp(datalake_storage_account_name,
                    datalake_storage_account_key)
        # Arrange
        data1 = b'{name: owner}'
        data2 = b'{name2: owner2}'
        data3 = b'{version:0,begin:1601-01-01T00:00:00.000Z,intervalSecs:3600,status:Finalized,config:' \
                b'{version:0,configVersionEtag:0x8d75ef460eb1a12,numShards:1,recordsFormat:avro,formatSchemaVersion:3,' \
                b'shardDistFnVersion:1},chunkFilePaths:[$blobchangefeed/log/00/1601/01/01/0000/],storageDiagnostics:' \
                b'{version:0,lastModifiedTime:2019-11-01T17:53:18.861Z,' \
                b'data:{aid:d305317d-a006-0042-00dd-902bbb06fc56}}}'
        data = data1 + b'\n' + data2 + b'\n' + data1

        # upload the json file
        file_name = self._get_file_reference()
        file_client = self.dsc.get_file_client(self.filesystem_name, file_name)
        file_client.upload_data(data, overwrite=True)

        errors = []

        def on_error(error):
            raise Exception(error.description)

        input_format = DelimitedJsonDialect()
        output_format = DelimitedTextDialect(delimiter=';',
                                             quotechar="'",
                                             lineterminator='.',
                                             escapechar='\\')
        resp = file_client.query_file("SELECT * from BlobStorage",
                                      on_error=on_error,
                                      file_format=input_format,
                                      output_format=output_format)
        with pytest.raises(Exception):
            query_result = resp.readall()

    @DataLakePreparer()
    def test_quick_query_iter_records_with_fatal_error_handler_raise(
            self, datalake_storage_account_name, datalake_storage_account_key):
        self._setUp(datalake_storage_account_name,
                    datalake_storage_account_key)
        # Arrange
        data1 = b'{name: owner}'
        data2 = b'{name2: owner2}'
        data3 = b'{version:0,begin:1601-01-01T00:00:00.000Z,intervalSecs:3600,status:Finalized,config:' \
                b'{version:0,configVersionEtag:0x8d75ef460eb1a12,numShards:1,recordsFormat:avro,formatSchemaVersion:3,' \
                b'shardDistFnVersion:1},chunkFilePaths:[$blobchangefeed/log/00/1601/01/01/0000/],storageDiagnostics:' \
                b'{version:0,lastModifiedTime:2019-11-01T17:53:18.861Z,' \
                b'data:{aid:d305317d-a006-0042-00dd-902bbb06fc56}}}'
        data = data1 + b'\n' + data2 + b'\n' + data1

        # upload the json file
        file_name = self._get_file_reference()
        file_client = self.dsc.get_file_client(self.filesystem_name, file_name)
        file_client.upload_data(data, overwrite=True)

        errors = []

        def on_error(error):
            raise Exception(error.description)

        input_format = DelimitedJsonDialect()
        output_format = DelimitedTextDialect(delimiter=';',
                                             quotechar="'",
                                             lineterminator='.',
                                             escapechar='\\')
        resp = file_client.query_file("SELECT * from BlobStorage",
                                      on_error=on_error,
                                      file_format=input_format,
                                      output_format=output_format)

        with pytest.raises(Exception):
            for record in resp.records():
                print(record)

    @DataLakePreparer()
    def test_quick_query_readall_with_fatal_error_ignore(
            self, datalake_storage_account_name, datalake_storage_account_key):
        self._setUp(datalake_storage_account_name,
                    datalake_storage_account_key)
        # Arrange
        data1 = b'{name: owner}'
        data2 = b'{name2: owner2}'
        data = data1 + b'\n' + data2 + b'\n' + data1

        # upload the json file
        file_name = self._get_file_reference()
        file_client = self.dsc.get_file_client(self.filesystem_name, file_name)
        file_client.upload_data(data, overwrite=True)

        input_format = DelimitedJsonDialect()
        output_format = DelimitedTextDialect(delimiter=';',
                                             quotechar="'",
                                             lineterminator='.',
                                             escapechar='\\')
        resp = file_client.query_file("SELECT * from BlobStorage",
                                      file_format=input_format,
                                      output_format=output_format)
        query_result = resp.readall()

    @DataLakePreparer()
    def test_quick_query_iter_records_with_fatal_error_ignore(
            self, datalake_storage_account_name, datalake_storage_account_key):
        self._setUp(datalake_storage_account_name,
                    datalake_storage_account_key)
        # Arrange
        data1 = b'{name: owner}'
        data2 = b'{name2: owner2}'
        data3 = b'{version:0,begin:1601-01-01T00:00:00.000Z,intervalSecs:3600,status:Finalized,config:' \
                b'{version:0,configVersionEtag:0x8d75ef460eb1a12,numShards:1,recordsFormat:avro,formatSchemaVersion:3,' \
                b'shardDistFnVersion:1},chunkFilePaths:[$blobchangefeed/log/00/1601/01/01/0000/],storageDiagnostics:' \
                b'{version:0,lastModifiedTime:2019-11-01T17:53:18.861Z,' \
                b'data:{aid:d305317d-a006-0042-00dd-902bbb06fc56}}}'
        data = data1 + b'\n' + data2 + b'\n' + data1

        # upload the json file
        file_name = self._get_file_reference()
        file_client = self.dsc.get_file_client(self.filesystem_name, file_name)
        file_client.upload_data(data, overwrite=True)

        input_format = DelimitedJsonDialect()
        output_format = DelimitedTextDialect(delimiter=';',
                                             quotechar="'",
                                             lineterminator='.',
                                             escapechar='\\')
        resp = file_client.query_file("SELECT * from BlobStorage",
                                      file_format=input_format,
                                      output_format=output_format)

        for record in resp.records():
            print(record)

    @DataLakePreparer()
    def test_quick_query_readall_with_nonfatal_error_handler(
            self, datalake_storage_account_name, datalake_storage_account_key):
        self._setUp(datalake_storage_account_name,
                    datalake_storage_account_key)
        # Arrange
        # upload the csv file
        file_name = self._get_file_reference()
        file_client = self.dsc.get_file_client(self.filesystem_name, file_name)
        file_client.upload_data(CSV_DATA, overwrite=True)

        errors = []

        def on_error(error):
            errors.append(error)

        input_format = DelimitedTextDialect(delimiter=',',
                                            quotechar='"',
                                            lineterminator='\n',
                                            escapechar='',
                                            has_header=True)
        output_format = DelimitedTextDialect(
            delimiter=';',
            quotechar="'",
            lineterminator='.',
            escapechar='\\',
        )
        resp = file_client.query_file("SELECT RepoPath from BlobStorage",
                                      file_format=input_format,
                                      output_format=output_format,
                                      on_error=on_error)
        query_result = resp.readall()

        # the error is because that line only has one column
        self.assertEqual(len(errors), 1)
        self.assertEqual(len(resp), len(CSV_DATA))
        self.assertTrue(len(query_result) > 0)

    @DataLakePreparer()
    def test_quick_query_iter_records_with_nonfatal_error_handler(
            self, datalake_storage_account_name, datalake_storage_account_key):
        self._setUp(datalake_storage_account_name,
                    datalake_storage_account_key)
        # Arrange
        # upload the csv file
        file_name = self._get_file_reference()
        file_client = self.dsc.get_file_client(self.filesystem_name, file_name)
        file_client.upload_data(CSV_DATA, overwrite=True)

        errors = []

        def on_error(error):
            errors.append(error)

        input_format = DelimitedTextDialect(delimiter=',',
                                            quotechar='"',
                                            lineterminator='\n',
                                            escapechar='',
                                            has_header=True)
        output_format = DelimitedTextDialect(
            delimiter=';',
            quotechar="'",
            lineterminator='%',
            escapechar='\\',
        )
        resp = file_client.query_file("SELECT RepoPath from BlobStorage",
                                      file_format=input_format,
                                      output_format=output_format,
                                      on_error=on_error)
        data = list(resp.records())

        # the error is because that line only has one column
        self.assertEqual(len(errors), 1)
        self.assertEqual(len(resp), len(CSV_DATA))
        self.assertEqual(len(data), 32)

    @DataLakePreparer()
    def test_quick_query_readall_with_nonfatal_error_ignore(
            self, datalake_storage_account_name, datalake_storage_account_key):
        self._setUp(datalake_storage_account_name,
                    datalake_storage_account_key)
        # Arrange
        # upload the csv file
        file_name = self._get_file_reference()
        file_client = self.dsc.get_file_client(self.filesystem_name, file_name)
        file_client.upload_data(CSV_DATA, overwrite=True)

        input_format = DelimitedTextDialect(delimiter=',',
                                            quotechar='"',
                                            lineterminator='\n',
                                            escapechar='',
                                            has_header=True)
        output_format = DelimitedTextDialect(
            delimiter=';',
            quotechar="'",
            lineterminator='.',
            escapechar='\\',
        )
        resp = file_client.query_file("SELECT RepoPath from BlobStorage",
                                      file_format=input_format,
                                      output_format=output_format)
        query_result = resp.readall()
        self.assertEqual(len(resp), len(CSV_DATA))
        self.assertTrue(len(query_result) > 0)

    @DataLakePreparer()
    def test_quick_query_iter_records_with_nonfatal_error_ignore(
            self, datalake_storage_account_name, datalake_storage_account_key):
        self._setUp(datalake_storage_account_name,
                    datalake_storage_account_key)
        # Arrange
        # upload the csv file
        file_name = self._get_file_reference()
        file_client = self.dsc.get_file_client(self.filesystem_name, file_name)
        file_client.upload_data(CSV_DATA, overwrite=True)

        input_format = DelimitedTextDialect(delimiter=',',
                                            quotechar='"',
                                            lineterminator='\n',
                                            escapechar='',
                                            has_header=True)
        output_format = DelimitedTextDialect(
            delimiter=';',
            quotechar="'",
            lineterminator='$',
            escapechar='\\',
        )
        resp = file_client.query_file("SELECT RepoPath from BlobStorage",
                                      file_format=input_format,
                                      output_format=output_format)
        data = list(resp.records())
        self.assertEqual(len(resp), len(CSV_DATA))
        self.assertEqual(len(data), 32)

    @DataLakePreparer()
    def test_quick_query_readall_with_json_serialization_setting(
            self, datalake_storage_account_name, datalake_storage_account_key):
        self._setUp(datalake_storage_account_name,
                    datalake_storage_account_key)
        # Arrange
        data1 = b'{\"name\": \"owner\", \"id\": 1}'
        data2 = b'{\"name2\": \"owner2\"}'
        data = data1 + b'\n' + data2 + b'\n' + data1

        # upload the json file
        file_name = self._get_file_reference()
        file_client = self.dsc.get_file_client(self.filesystem_name, file_name)
        file_client.upload_data(data, overwrite=True)

        errors = []

        def on_error(error):
            errors.append(error)

        input_format = DelimitedJsonDialect(delimiter='\n')
        output_format = DelimitedJsonDialect(delimiter=';')

        resp = file_client.query_file("SELECT name from BlobStorage",
                                      on_error=on_error,
                                      file_format=input_format,
                                      output_format=output_format)
        query_result = resp.readall()

        self.assertEqual(len(errors), 0)
        self.assertEqual(len(resp), len(data))
        self.assertEqual(query_result,
                         b'{"name":"owner"};{};{"name":"owner"};')

    @DataLakePreparer()
    def test_quick_query_iter_records_with_json_serialization_setting(
            self, datalake_storage_account_name, datalake_storage_account_key):
        self._setUp(datalake_storage_account_name,
                    datalake_storage_account_key)
        # Arrange
        data1 = b'{\"name\": \"owner\", \"id\": 1}'
        data2 = b'{\"name2\": \"owner2\"}'
        data = data1 + b'\n' + data2 + b'\n' + data1

        # upload the json file
        file_name = self._get_file_reference()
        file_client = self.dsc.get_file_client(self.filesystem_name, file_name)
        file_client.upload_data(data, overwrite=True)

        errors = []

        def on_error(error):
            errors.append(error)

        input_format = DelimitedJsonDialect(delimiter='\n')
        output_format = DelimitedJsonDialect(delimiter=';')

        resp = file_client.query_file("SELECT name from BlobStorage",
                                      on_error=on_error,
                                      file_format=input_format,
                                      output_format=output_format)
        listdata = list(resp.records())

        self.assertEqual(len(errors), 0)
        self.assertEqual(len(resp), len(data))
        self.assertEqual(
            listdata, [b'{"name":"owner"}', b'{}', b'{"name":"owner"}', b''])

    @DataLakePreparer()
    def test_quick_query_with_only_input_json_serialization_setting(
            self, datalake_storage_account_name, datalake_storage_account_key):
        self._setUp(datalake_storage_account_name,
                    datalake_storage_account_key)
        # Arrange
        data1 = b'{\"name\": \"owner\", \"id\": 1}'
        data2 = b'{\"name2\": \"owner2\"}'
        data = data1 + data2 + data1

        # upload the json file
        file_name = self._get_file_reference()
        file_client = self.dsc.get_file_client(self.filesystem_name, file_name)
        file_client.upload_data(data, overwrite=True)

        errors = []

        def on_error(error):
            errors.append(error)

        input_format = DelimitedJsonDialect(delimiter='\n')
        output_format = None

        resp = file_client.query_file("SELECT name from BlobStorage",
                                      on_error=on_error,
                                      file_format=input_format,
                                      output_format=output_format)
        query_result = resp.readall()

        self.assertEqual(len(errors), 0)
        self.assertEqual(len(resp), len(data))
        self.assertEqual(query_result,
                         b'{"name":"owner"}\n{}\n{"name":"owner"}\n')

    @DataLakePreparer()
    def test_quick_query_output_in_arrow_format(self,
                                                datalake_storage_account_name,
                                                datalake_storage_account_key):
        self._setUp(datalake_storage_account_name,
                    datalake_storage_account_key)
        # Arrange
        data = b'100,200,300,400\n300,400,500,600\n'

        # upload the json file
        file_name = self._get_file_reference()
        file_client = self.dsc.get_file_client(self.filesystem_name, file_name)
        file_client.upload_data(data, overwrite=True)

        errors = []

        def on_error(error):
            errors.append(error)

        output_format = [
            ArrowDialect(ArrowType.DECIMAL, name="abc", precision=4, scale=2)
        ]

        expected_result = b"/////3gAAAAQAAAAAAAKAAwABgAFAAgACgAAAAABAwAMAAAACAAIAAAABAAIAAAABAAAAAEAAAAUAAAAEAAUAAgABgAHAAwAAAAQABAAAAAAAAEHJAAAABQAAAAEAAAAAAAAAAgADAAEAAgACAAAAAQAAAACAAAAAwAAAGFiYwD/////cAAAABAAAAAAAAoADgAGAAUACAAKAAAAAAMDABAAAAAAAAoADAAAAAQACAAKAAAAMAAAAAQAAAACAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAQAAAAAAAAAAAAAAAAAAAAAAAAD/////iAAAABQAAAAAAAAADAAWAAYABQAIAAwADAAAAAADAwAYAAAAEAAAAAAAAAAAAAoAGAAMAAQACAAKAAAAPAAAABAAAAABAAAAAAAAAAAAAAACAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAEAAAAAAAAAAAAAAAAQAAAAEAAAAAAAAAAAAAAAAAAACQAQAAAAAAAAAAAAAAAAAA"

        resp = file_client.query_file(
            "SELECT _2 from BlobStorage WHERE _1 > 250",
            on_error=on_error,
            output_format=output_format)
        query_result = base64.b64encode(resp.readall())

        self.assertEqual(len(errors), 0)
        self.assertEqual(query_result, expected_result)

    @DataLakePreparer()
    def test_quick_query_input_in_arrow_format(self,
                                               datalake_storage_account_name,
                                               datalake_storage_account_key):
        self._setUp(datalake_storage_account_name,
                    datalake_storage_account_key)
        # Arrange
        file_name = self._get_file_reference()
        file_client = self.dsc.get_file_client(self.filesystem_name, file_name)

        errors = []

        def on_error(error):
            errors.append(error)

        input_format = [
            ArrowDialect(ArrowType.DECIMAL, name="abc", precision=4, scale=2)
        ]

        with self.assertRaises(ValueError):
            file_client.query_file("SELECT _2 from BlobStorage WHERE _1 > 250",
                                   on_error=on_error,
                                   file_format=input_format)

    @DataLakePreparer()
    def test_quick_query_input_in_parquet_format(self,
                                                 datalake_storage_account_name,
                                                 datalake_storage_account_key):
        # Arrange
        self._setUp(datalake_storage_account_name,
                    datalake_storage_account_key)
        file_name = self._get_file_reference()
        file_client = self.dsc.get_file_client(self.filesystem_name, file_name)

        expression = "select * from blobstorage where id < 1;"
        expected_data = b"0,mdifjt55.ea3,mdifjt55.ea3\n"

        parquet_path = os.path.abspath(
            os.path.join(os.path.abspath(__file__), "..",
                         "./resources/parquet.parquet"))
        with open(parquet_path, "rb") as parquet_data:
            file_client.upload_data(parquet_data, overwrite=True)

        reader = file_client.query_file(expression,
                                        file_format=QuickQueryDialect.Parquet)
        real_data = reader.readall()

        self.assertEqual(real_data, expected_data)

    @DataLakePreparer()
    def test_quick_query_output_in_parquet_format(
            self, datalake_storage_account_name, datalake_storage_account_key):
        # Arrange
        self._setUp(datalake_storage_account_name,
                    datalake_storage_account_key)
        file_name = self._get_file_reference()
        file_client = self.dsc.get_file_client(self.filesystem_name, file_name)

        expression = "SELECT * from BlobStorage"
        parquet_path = os.path.abspath(
            os.path.join(os.path.abspath(__file__), "..",
                         "./resources/parquet.parquet"))
        with open(parquet_path, "rb") as parquet_data:
            file_client.upload_data(parquet_data, overwrite=True)

        with self.assertRaises(ValueError):
            file_client.query_file(expression,
                                   file_format=QuickQueryDialect.Parquet,
                                   output_format=QuickQueryDialect.Parquet)
class FileTest(StorageTestCase):
    def setUp(self):
        super(FileTest, self).setUp()
        url = self._get_account_url()
        self.dsc = DataLakeServiceClient(url, credential=self.settings.STORAGE_DATA_LAKE_ACCOUNT_KEY)
        self.config = self.dsc._config

        self.file_system_name = self.get_resource_name('filesystem')

        if not self.is_playback():
            file_system = self.dsc.get_file_system_client(self.file_system_name)
            try:
                file_system.create_file_system(timeout=5)
            except ResourceExistsError:
                pass

    def tearDown(self):
        if not self.is_playback():
            try:
                self.dsc.delete_file_system(self.file_system_name)
            except:
                pass

        return super(FileTest, self).tearDown()

    # --Helpers-----------------------------------------------------------------
    def _get_directory_reference(self, prefix=TEST_DIRECTORY_PREFIX):
        directory_name = self.get_resource_name(prefix)
        return directory_name

    def _get_file_reference(self, prefix=TEST_FILE_PREFIX):
        file_name = self.get_resource_name(prefix)
        return file_name

    def _create_file_system(self):
        return self.dsc.create_file_system(self._get_file_system_reference())

    def _create_directory_and_return_client(self, directory=None):
        directory_name = directory if directory else self._get_directory_reference()
        directory_client = self.dsc.get_directory_client(self.file_system_name, directory_name)
        directory_client.create_directory()
        return directory_client

    def _create_file_and_return_client(self, directory="", file=None):
        if directory:
            self._create_directory_and_return_client(directory)
        if not file:
            file = self._get_file_reference()
        file_client = self.dsc.get_file_client(self.file_system_name, directory + '/' + file)
        file_client.create_file()
        return file_client

    # --Helpers-----------------------------------------------------------------

    @record
    def test_create_file(self):
        # Arrange
        directory_name = self._get_directory_reference()

        # Create a directory to put the file under that
        directory_client = self.dsc.get_directory_client(self.file_system_name, directory_name)
        directory_client.create_directory()

        file_client = directory_client.get_file_client('filename')
        response = file_client.create_file()

        # Assert
        self.assertIsNotNone(response)

    @record
    def test_create_file_with_lease_id(self):
        # Arrange
        directory_name = self._get_directory_reference()
        directory_client = self.dsc.get_directory_client(self.file_system_name, directory_name)
        directory_client.create_directory()

        file_client = directory_client.get_file_client('filename')
        # Act
        file_client.create_file()
        lease = file_client.acquire_lease()
        create_resp = file_client.create_file(lease=lease)

        # Assert
        file_properties = file_client.get_file_properties()
        self.assertIsNotNone(file_properties)
        self.assertEqual(file_properties.etag, create_resp.get('etag'))
        self.assertEqual(file_properties.last_modified, create_resp.get('last_modified'))

    @record
    def test_create_file_under_root_directory(self):
        # Arrange
        # get a file client to interact with the file under root directory
        file_client = self.dsc.get_file_client(self.file_system_name, "filename")

        response = file_client.create_file()

        # Assert
        self.assertIsNotNone(response)

    @record
    def test_append_data(self):
        directory_name = self._get_directory_reference()

        # Create a directory to put the file under that
        directory_client = self.dsc.get_directory_client(self.file_system_name, directory_name)
        directory_client.create_directory()

        file_client = directory_client.get_file_client('filename')
        file_client.create_file()

        # Act
        response = file_client.append_data(b'abc', 0, 3)

        self.assertIsNotNone(response)

    @record
    def test_flush_data(self):
        directory_name = self._get_directory_reference()

        # Create a directory to put the file under that
        directory_client = self.dsc.get_directory_client(self.file_system_name, directory_name)
        directory_client.create_directory()

        file_client = directory_client.get_file_client('filename')
        file_client.create_file()

        # Act
        file_client.append_data(b'abc', 0, 3)
        response = file_client.flush_data(3)

        self.assertIsNotNone(response)

    @record
    def test_read_file(self):
        file_client = self._create_file_and_return_client()
        data = self.get_random_bytes(1024)

        # upload data to file
        file_client.append_data(data, 0, len(data))
        file_client.flush_data(len(data))

        # doanload the data and make sure it is the same as uploaded data
        downloaded_data = file_client.read_file()
        self.assertEqual(data, downloaded_data)

    @record
    def test_account_sas(self):
        # SAS URL is calculated from storage key, so this test runs live only
        if TestMode.need_recording_file(self.test_mode):
            return

        file_name = self._get_file_reference()
        # create a file under root directory
        self._create_file_and_return_client(file=file_name)

        # generate a token with file level read permission
        token = generate_account_sas(
            self.dsc.account_name,
            self.dsc.credential.account_key,
            ResourceTypes(file_system=True, object=True),
            AccountSasPermissions(read=True),
            datetime.utcnow() + timedelta(hours=1),
        )

        # read the created file which is under root directory
        file_client = DataLakeFileClient(self.dsc.url, self.file_system_name, file_name, credential=token)
        properties = file_client.get_file_properties()

        # make sure we can read the file properties
        self.assertIsNotNone(properties)

        # try to write to the created file with the token
        with self.assertRaises(StorageErrorException):
            file_client.append_data(b"abcd", 0, 4)

    @record
    def test_file_sas_only_applies_to_file_level(self):
        # SAS URL is calculated from storage key, so this test runs live only
        if TestMode.need_recording_file(self.test_mode):
            return

        file_name = self._get_file_reference()
        directory_name = self._get_directory_reference()
        self._create_file_and_return_client(directory=directory_name, file=file_name)

        # generate a token with file level read and write permissions
        token = generate_file_sas(
            self.dsc.account_name,
            self.file_system_name,
            directory_name,
            file_name,
            account_key=self.dsc.credential.account_key,
            permission=FileSasPermissions(read=True, write=True),
            expiry=datetime.utcnow() + timedelta(hours=1),
        )

        # read the created file which is under root directory
        file_client = DataLakeFileClient(self.dsc.url, self.file_system_name, directory_name+'/'+file_name,
                                         credential=token)
        properties = file_client.get_file_properties()

        # make sure we can read the file properties
        self.assertIsNotNone(properties)

        # try to write to the created file with the token
        response = file_client.append_data(b"abcd", 0, 4, validate_content=True)
        self.assertIsNotNone(response)

        # the token is for file level, so users are not supposed to have access to file system level operations
        file_system_client = FileSystemClient(self.dsc.url, self.file_system_name, credential=token)
        with self.assertRaises(ClientAuthenticationError):
            file_system_client.get_file_system_properties()

        # the token is for file level, so users are not supposed to have access to directory level operations
        directory_client = DataLakeDirectoryClient(self.dsc.url, self.file_system_name, directory_name,
                                                   credential=token)
        with self.assertRaises(ClientAuthenticationError):
            directory_client.get_directory_properties()

    @record
    def test_delete_file(self):
        # Arrange
        file_client = self._create_file_and_return_client()

        file_client.delete_file()

        with self.assertRaises(ResourceNotFoundError):
            file_client.get_file_properties()

    @record
    def test_set_access_control(self):
        file_client = self._create_file_and_return_client()

        response = file_client.set_access_control(permissions='0777')\

        # Assert
        self.assertIsNotNone(response)

    @record
    def test_get_access_control(self):
        file_client = self._create_file_and_return_client()
        file_client.set_access_control(permissions='0777')

        # Act
        response = file_client.get_access_control()

        # Assert
        self.assertIsNotNone(response)

    @record
    def test_get_properties(self):
        # Arrange
        directory_client = self._create_directory_and_return_client()

        metadata = {'hello': 'world', 'number': '42'}
        content_settings = ContentSettings(
            content_language='spanish',
            content_disposition='inline')
        file_client = directory_client.create_file("newfile", metadata=metadata, content_settings=content_settings)
        file_client.append_data(b"abc", 0, 3)
        file_client.flush_data(3)
        properties = file_client.get_file_properties()

        # Assert
        self.assertTrue(properties)
        self.assertEqual(properties.size, 3)
        self.assertEqual(properties.metadata['hello'], metadata['hello'])
        self.assertEqual(properties.content_settings.content_language, content_settings.content_language)

    @record
    def test_rename_file_with_non_used_name(self):
        file_client = self._create_file_and_return_client()
        data_bytes = b"abc"
        file_client.append_data(data_bytes, 0, 3)
        file_client.flush_data(3)
        new_client = file_client.rename_file(file_client.file_system_name+'/'+'newname')

        data = new_client.read_file()
        self.assertEqual(data, data_bytes)
        self.assertEqual(new_client.path_name, "newname")

    @record
    def test_rename_file_to_existing_file(self):
        # create the existing file
        existing_file_client = self._create_file_and_return_client(file="existingfile")
        existing_file_client.append_data(b"a", 0, 1)
        existing_file_client.flush_data(1)
        old_url = existing_file_client.url

        # prepare to rename the file to the existing file
        file_client = self._create_file_and_return_client()
        data_bytes = b"abc"
        file_client.append_data(data_bytes, 0, 3)
        file_client.flush_data(3)
        new_client = file_client.rename_file(file_client.file_system_name+'/'+existing_file_client.path_name)
        new_url = file_client.url

        data = new_client.read_file()
        # the existing file was overridden
        self.assertEqual(data, data_bytes)

    @record
    def test_rename_file_will_not_change_existing_directory(self):
        # create none empty directory(with 2 files)
        dir1 = self._create_directory_and_return_client(directory="dir1")
        f1 = dir1.create_file("file1")
        f1.append_data(b"file1", 0, 5)
        f1.flush_data(5)
        f2 = dir1.create_file("file2")
        f2.append_data(b"file2", 0, 5)
        f2.flush_data(5)

        # create another none empty directory(with 2 files)
        dir2 = self._create_directory_and_return_client(directory="dir2")
        f3 = dir2.create_file("file3")
        f3.append_data(b"file3", 0, 5)
        f3.flush_data(5)
        f4 = dir2.create_file("file4")
        f4.append_data(b"file4", 0, 5)
        f4.flush_data(5)

        new_client = f3.rename_file(f1.file_system_name+'/'+f1.path_name)

        self.assertEqual(new_client.read_file(), b"file3")

        # make sure the data in file2 and file4 weren't touched
        f2_data = f2.read_file()
        self.assertEqual(f2_data, b"file2")

        f4_data = f4.read_file()
        self.assertEqual(f4_data, b"file4")

        with self.assertRaises(HttpResponseError):
            f3.read_file()
Example #10
0
class StorageQuickQueryTest(StorageTestCase):
    def setUp(self):
        super(StorageQuickQueryTest, self).setUp()
        url = self._get_account_url()
        self.dsc = DataLakeServiceClient(url, credential=self.settings.STORAGE_DATA_LAKE_ACCOUNT_KEY, logging_enable=True)
        self.config = self.dsc._config
        self.filesystem_name = self.get_resource_name('utqqcontainer')

        if not self.is_playback():
            try:
                self.dsc.create_file_system(self.filesystem_name)
            except:
                pass

    def tearDown(self):
        if not self.is_playback():
            try:
                self.dsc.delete_file_system(self.filesystem_name)
            except:
                pass

        return super(StorageQuickQueryTest, self).tearDown()

    # --Helpers-----------------------------------------------------------------

    def _get_file_reference(self):
        return self.get_resource_name("csvfile")

    # -- Test cases for APIs supporting CPK ----------------------------------------------

    @record
    def test_quick_query_readall(self):
        # Arrange
        # upload the csv file
        file_name = self._get_file_reference()
        file_client = self.dsc.get_file_client(self.filesystem_name, file_name)
        file_client.upload_data(CSV_DATA, overwrite=True)

        errors = []

        def on_error(error):
            errors.append(error)

        reader = file_client.query_file("SELECT * from BlobStorage", on_error=on_error)
        data = reader.readall()

        self.assertEqual(len(errors), 0)
        self.assertEqual(len(reader), len(CSV_DATA))
        self.assertEqual(len(reader), reader._blob_query_reader._bytes_processed)
        self.assertEqual(data, CSV_DATA.replace(b'\r\n', b'\n'))

    @record
    def test_quick_query_datalake_expression(self):
        # Arrange
        # upload the csv file
        file_name = self._get_file_reference()
        file_client = self.dsc.get_file_client(self.filesystem_name, file_name)
        file_client.upload_data(DATALAKE_CSV_DATA, overwrite=True)

        errors = []

        def on_error(error):
            errors.append(error)

        input_format = DelimitedTextDialect(has_header=True)
        reader = file_client.query_file("SELECT DataLakeStorage from DataLakeStorage", on_error=on_error,
                                        file_format=input_format)
        reader.readall()

        self.assertEqual(len(errors), 0)
        self.assertEqual(len(reader), len(DATALAKE_CSV_DATA))
        self.assertEqual(len(reader), reader._blob_query_reader._bytes_processed)

    @record
    def test_quick_query_iter_records(self):
        # Arrange
        # upload the csv file
        file_name = self._get_file_reference()
        file_client = self.dsc.get_file_client(self.filesystem_name, file_name)
        file_client.upload_data(CSV_DATA, overwrite=True)

        reader = file_client.query_file("SELECT * from BlobStorage")
        read_records = reader.records()

        # Assert first line has header
        data = next(read_records)
        self.assertEqual(data, b'Service,Package,Version,RepoPath,MissingDocs')

        for record in read_records:
            data += record

        self.assertEqual(len(reader), len(CSV_DATA))
        self.assertEqual(len(reader), reader._blob_query_reader._bytes_processed)
        self.assertEqual(data, CSV_DATA.replace(b'\r\n', b''))

    @record
    def test_quick_query_readall_with_encoding(self):
        # Arrange
        # upload the csv file
        file_name = self._get_file_reference()
        file_client = self.dsc.get_file_client(self.filesystem_name, file_name)
        file_client.upload_data(CSV_DATA, overwrite=True)

        errors = []

        def on_error(error):
            errors.append(error)

        reader = file_client.query_file("SELECT * from BlobStorage", on_error=on_error, encoding='utf-8')
        data = reader.readall()

        self.assertEqual(len(errors), 0)
        self.assertEqual(len(reader), len(CSV_DATA))
        self.assertEqual(len(reader), reader._blob_query_reader._bytes_processed)
        self.assertEqual(data, CSV_DATA.replace(b'\r\n', b'\n').decode('utf-8'))

    @record
    def test_quick_query_iter_records_with_encoding(self):
        # Arrange
        # upload the csv file
        file_name = self._get_file_reference()
        file_client = self.dsc.get_file_client(self.filesystem_name, file_name)
        file_client.upload_data(CSV_DATA, overwrite=True)

        reader = file_client.query_file("SELECT * from BlobStorage", encoding='utf-8')
        data = ''
        for record in reader.records():
            data += record

        self.assertEqual(len(reader), len(CSV_DATA))
        self.assertEqual(len(reader), reader._blob_query_reader._bytes_processed)
        self.assertEqual(data, CSV_DATA.replace(b'\r\n', b'').decode('utf-8'))

    @record
    def test_quick_query_iter_records_with_headers(self):
        # Arrange
        # upload the csv file
        file_name = self._get_file_reference()
        file_client = self.dsc.get_file_client(self.filesystem_name, file_name)
        file_client.upload_data(CSV_DATA, overwrite=True)

        input_format = DelimitedTextDialect(has_header=True)
        reader = file_client.query_file("SELECT * from BlobStorage", file_format=input_format)
        read_records = reader.records()

        # Assert first line does not include header
        data = next(read_records)
        self.assertEqual(data, b'App Configuration,azure-data-appconfiguration,1,appconfiguration,FALSE')

        for record in read_records:
            data += record

        self.assertEqual(len(reader), len(CSV_DATA))
        self.assertEqual(len(reader), reader._blob_query_reader._bytes_processed)
        self.assertEqual(data, CSV_DATA.replace(b'\r\n', b'')[44:])

    @record
    def test_quick_query_iter_records_with_progress(self):
        # Arrange
        # upload the csv file
        file_name = self._get_file_reference()
        file_client = self.dsc.get_file_client(self.filesystem_name, file_name)
        file_client.upload_data(CSV_DATA, overwrite=True)

        reader = file_client.query_file("SELECT * from BlobStorage")
        data = b''
        progress = 0
        for record in reader.records():
            if record:
                data += record
                progress += len(record) + 2
        self.assertEqual(len(reader), len(CSV_DATA))
        self.assertEqual(len(reader), reader._blob_query_reader._bytes_processed)
        self.assertEqual(data, CSV_DATA.replace(b'\r\n', b''))
        self.assertEqual(progress, len(reader))

    @record
    def test_quick_query_readall_with_serialization_setting(self):
        # Arrange
        # upload the csv file
        file_name = self._get_file_reference()
        file_client = self.dsc.get_file_client(self.filesystem_name, file_name)
        file_client.upload_data(CSV_DATA, overwrite=True)

        errors = []

        def on_error(error):
            errors.append(error)

        input_format = DelimitedTextDialect(
            delimiter=',',
            quotechar='"',
            lineterminator='\n',
            escapechar='',
            has_header=False
        )
        output_format = DelimitedTextDialect(
            delimiter=';',
            quotechar="'",
            lineterminator='.',
            escapechar='\\'
        )
        resp = file_client.query_file(
            "SELECT * from BlobStorage",
            on_error=on_error,
            file_format=input_format,
            output_format=output_format)
        query_result = resp.readall()

        self.assertEqual(len(errors), 0)
        self.assertEqual(len(resp), len(CSV_DATA))
        self.assertEqual(query_result, CONVERTED_CSV_DATA)

    @record
    def test_quick_query_iter_records_with_serialization_setting(self):
        # Arrange
        # upload the csv file
        file_name = self._get_file_reference()
        file_client = self.dsc.get_file_client(self.filesystem_name, file_name)
        file_client.upload_data(CSV_DATA, overwrite=True)

        input_format = DelimitedTextDialect(
            delimiter=',',
            quotechar='"',
            lineterminator='\n',
            escapechar='',
            has_header=False
        )
        output_format = DelimitedTextDialect(
            delimiter=';',
            quotechar="'",
            lineterminator='%',
            escapechar='\\'
        )

        reader = file_client.query_file(
            "SELECT * from BlobStorage",
            file_format=input_format,
            output_format=output_format)
        data = []
        for record in reader.records():
            if record:
                data.append(record)

        self.assertEqual(len(reader), len(CSV_DATA))
        self.assertEqual(len(reader), reader._blob_query_reader._bytes_processed)
        self.assertEqual(len(data), 33)

    @record
    def test_quick_query_readall_with_fatal_error_handler(self):
        # Arrange
        data1 = b'{name: owner}'
        data2 = b'{name2: owner2}'
        data3 = b'{version:0,begin:1601-01-01T00:00:00.000Z,intervalSecs:3600,status:Finalized,config:' \
                b'{version:0,configVersionEtag:0x8d75ef460eb1a12,numShards:1,recordsFormat:avro,formatSchemaVersion:3,' \
                b'shardDistFnVersion:1},chunkFilePaths:[$blobchangefeed/log/00/1601/01/01/0000/],storageDiagnostics:' \
                b'{version:0,lastModifiedTime:2019-11-01T17:53:18.861Z,' \
                b'data:{aid:d305317d-a006-0042-00dd-902bbb06fc56}}}'
        data = data1 + b'\n' + data2 + b'\n' + data1

        # upload the json file
        file_name = self._get_file_reference()
        file_client = self.dsc.get_file_client(self.filesystem_name, file_name)
        file_client.upload_data(data, overwrite=True)

        errors = []

        def on_error(error):
            errors.append(error)

        input_format = DelimitedJsonDialect()
        output_format = DelimitedTextDialect(
            delimiter=';',
            quotechar="'",
            lineterminator='.',
            escapechar='\\'
        )
        resp = file_client.query_file(
            "SELECT * from BlobStorage",
            on_error=on_error,
            file_format=input_format,
            output_format=output_format)
        query_result = resp.readall()

        self.assertEqual(len(errors), 1)
        self.assertEqual(len(resp), 43)
        self.assertEqual(query_result, b'')

    @record
    def test_quick_query_iter_records_with_fatal_error_handler(self):
        # Arrange
        data1 = b'{name: owner}'
        data2 = b'{name2: owner2}'
        data3 = b'{version:0,begin:1601-01-01T00:00:00.000Z,intervalSecs:3600,status:Finalized,config:' \
                b'{version:0,configVersionEtag:0x8d75ef460eb1a12,numShards:1,recordsFormat:avro,formatSchemaVersion:3,' \
                b'shardDistFnVersion:1},chunkFilePaths:[$blobchangefeed/log/00/1601/01/01/0000/],storageDiagnostics:' \
                b'{version:0,lastModifiedTime:2019-11-01T17:53:18.861Z,' \
                b'data:{aid:d305317d-a006-0042-00dd-902bbb06fc56}}}'
        data = data1 + b'\n' + data2 + b'\n' + data1

        # upload the json file
        file_name = self._get_file_reference()
        file_client = self.dsc.get_file_client(self.filesystem_name, file_name)
        file_client.upload_data(data, overwrite=True)

        errors = []

        def on_error(error):
            errors.append(error)

        input_format = DelimitedJsonDialect()
        output_format = DelimitedTextDialect(
            delimiter=';',
            quotechar="'",
            lineterminator='.',
            escapechar='\\'
        )
        resp = file_client.query_file(
            "SELECT * from BlobStorage",
            on_error=on_error,
            file_format=input_format,
            output_format=output_format)
        data = []
        for record in resp.records():
            data.append(record)
        
        self.assertEqual(len(errors), 1)
        self.assertEqual(len(resp), 43)
        self.assertEqual(data, [b''])

    @record
    def test_quick_query_readall_with_fatal_error_handler_raise(self):
        # Arrange
        data1 = b'{name: owner}'
        data2 = b'{name2: owner2}'
        data3 = b'{version:0,begin:1601-01-01T00:00:00.000Z,intervalSecs:3600,status:Finalized,config:' \
                b'{version:0,configVersionEtag:0x8d75ef460eb1a12,numShards:1,recordsFormat:avro,formatSchemaVersion:3,' \
                b'shardDistFnVersion:1},chunkFilePaths:[$blobchangefeed/log/00/1601/01/01/0000/],storageDiagnostics:' \
                b'{version:0,lastModifiedTime:2019-11-01T17:53:18.861Z,' \
                b'data:{aid:d305317d-a006-0042-00dd-902bbb06fc56}}}'
        data = data1 + b'\n' + data2 + b'\n' + data1

        # upload the json file
        file_name = self._get_file_reference()
        file_client = self.dsc.get_file_client(self.filesystem_name, file_name)
        file_client.upload_data(data, overwrite=True)

        errors = []

        def on_error(error):
            raise Exception(error.description)

        input_format = DelimitedJsonDialect()
        output_format = DelimitedTextDialect(
            delimiter=';',
            quotechar="'",
            lineterminator='.',
            escapechar='\\'
        )
        resp = file_client.query_file(
            "SELECT * from BlobStorage",
            on_error=on_error,
            file_format=input_format,
            output_format=output_format)
        with pytest.raises(Exception):
            query_result = resp.readall()

    @record
    def test_quick_query_iter_records_with_fatal_error_handler_raise(self):
        # Arrange
        data1 = b'{name: owner}'
        data2 = b'{name2: owner2}'
        data3 = b'{version:0,begin:1601-01-01T00:00:00.000Z,intervalSecs:3600,status:Finalized,config:' \
                b'{version:0,configVersionEtag:0x8d75ef460eb1a12,numShards:1,recordsFormat:avro,formatSchemaVersion:3,' \
                b'shardDistFnVersion:1},chunkFilePaths:[$blobchangefeed/log/00/1601/01/01/0000/],storageDiagnostics:' \
                b'{version:0,lastModifiedTime:2019-11-01T17:53:18.861Z,' \
                b'data:{aid:d305317d-a006-0042-00dd-902bbb06fc56}}}'
        data = data1 + b'\n' + data2 + b'\n' + data1

        # upload the json file
        file_name = self._get_file_reference()
        file_client = self.dsc.get_file_client(self.filesystem_name, file_name)
        file_client.upload_data(data, overwrite=True)

        errors = []

        def on_error(error):
            raise Exception(error.description)

        input_format = DelimitedJsonDialect()
        output_format = DelimitedTextDialect(
            delimiter=';',
            quotechar="'",
            lineterminator='.',
            escapechar='\\'
        )
        resp = file_client.query_file(
            "SELECT * from BlobStorage",
            on_error=on_error,
            file_format=input_format,
            output_format=output_format)

        with pytest.raises(Exception):
            for record in resp.records():
                print(record)

    @record
    def test_quick_query_readall_with_fatal_error_ignore(self):
        # Arrange
        data1 = b'{name: owner}'
        data2 = b'{name2: owner2}'
        data = data1 + b'\n' + data2 + b'\n' + data1

        # upload the json file
        file_name = self._get_file_reference()
        file_client = self.dsc.get_file_client(self.filesystem_name, file_name)
        file_client.upload_data(data, overwrite=True)

        input_format = DelimitedJsonDialect()
        output_format = DelimitedTextDialect(
            delimiter=';',
            quotechar="'",
            lineterminator='.',
            escapechar='\\'
        )
        resp = file_client.query_file(
            "SELECT * from BlobStorage",
            file_format=input_format,
            output_format=output_format)
        query_result = resp.readall()

    @record
    def test_quick_query_iter_records_with_fatal_error_ignore(self):
        # Arrange
        data1 = b'{name: owner}'
        data2 = b'{name2: owner2}'
        data3 = b'{version:0,begin:1601-01-01T00:00:00.000Z,intervalSecs:3600,status:Finalized,config:' \
                b'{version:0,configVersionEtag:0x8d75ef460eb1a12,numShards:1,recordsFormat:avro,formatSchemaVersion:3,' \
                b'shardDistFnVersion:1},chunkFilePaths:[$blobchangefeed/log/00/1601/01/01/0000/],storageDiagnostics:' \
                b'{version:0,lastModifiedTime:2019-11-01T17:53:18.861Z,' \
                b'data:{aid:d305317d-a006-0042-00dd-902bbb06fc56}}}'
        data = data1 + b'\n' + data2 + b'\n' + data1

        # upload the json file
        file_name = self._get_file_reference()
        file_client = self.dsc.get_file_client(self.filesystem_name, file_name)
        file_client.upload_data(data, overwrite=True)

        input_format = DelimitedJsonDialect()
        output_format = DelimitedTextDialect(
            delimiter=';',
            quotechar="'",
            lineterminator='.',
            escapechar='\\'
        )
        resp = file_client.query_file(
            "SELECT * from BlobStorage",
            file_format=input_format,
            output_format=output_format)

        for record in resp.records():
            print(record)

    @record
    def test_quick_query_readall_with_nonfatal_error_handler(self):
        # Arrange
        # upload the csv file
        file_name = self._get_file_reference()
        file_client = self.dsc.get_file_client(self.filesystem_name, file_name)
        file_client.upload_data(CSV_DATA, overwrite=True)

        errors = []
        def on_error(error):
            errors.append(error)

        input_format = DelimitedTextDialect(
            delimiter=',',
            quotechar='"',
            lineterminator='\n',
            escapechar='',
            has_header=True
        )
        output_format = DelimitedTextDialect(
            delimiter=';',
            quotechar="'",
            lineterminator='.',
            escapechar='\\',
        )
        resp = file_client.query_file(
            "SELECT RepoPath from BlobStorage",
            file_format=input_format,
            output_format=output_format,
            on_error=on_error)
        query_result = resp.readall()

        # the error is because that line only has one column
        self.assertEqual(len(errors), 1)
        self.assertEqual(len(resp), len(CSV_DATA))
        self.assertTrue(len(query_result) > 0)

    @record
    def test_quick_query_iter_records_with_nonfatal_error_handler(self):
        # Arrange
        # upload the csv file
        file_name = self._get_file_reference()
        file_client = self.dsc.get_file_client(self.filesystem_name, file_name)
        file_client.upload_data(CSV_DATA, overwrite=True)

        errors = []
        def on_error(error):
            errors.append(error)

        input_format = DelimitedTextDialect(
            delimiter=',',
            quotechar='"',
            lineterminator='\n',
            escapechar='',
            has_header=True
        )
        output_format = DelimitedTextDialect(
            delimiter=';',
            quotechar="'",
            lineterminator='%',
            escapechar='\\',
        )
        resp = file_client.query_file(
            "SELECT RepoPath from BlobStorage",
            file_format=input_format,
            output_format=output_format,
            on_error=on_error)
        data = list(resp.records())

        # the error is because that line only has one column
        self.assertEqual(len(errors), 1)
        self.assertEqual(len(resp), len(CSV_DATA))
        self.assertEqual(len(data), 32)

    @record
    def test_quick_query_readall_with_nonfatal_error_ignore(self):
        # Arrange
        # upload the csv file
        file_name = self._get_file_reference()
        file_client = self.dsc.get_file_client(self.filesystem_name, file_name)
        file_client.upload_data(CSV_DATA, overwrite=True)

        input_format = DelimitedTextDialect(
            delimiter=',',
            quotechar='"',
            lineterminator='\n',
            escapechar='',
            has_header=True
        )
        output_format = DelimitedTextDialect(
            delimiter=';',
            quotechar="'",
            lineterminator='.',
            escapechar='\\',
        )
        resp = file_client.query_file(
            "SELECT RepoPath from BlobStorage",
            file_format=input_format,
            output_format=output_format)
        query_result = resp.readall()
        self.assertEqual(len(resp), len(CSV_DATA))
        self.assertTrue(len(query_result) > 0)

    @record
    def test_quick_query_iter_records_with_nonfatal_error_ignore(self):
        # Arrange
        # upload the csv file
        file_name = self._get_file_reference()
        file_client = self.dsc.get_file_client(self.filesystem_name, file_name)
        file_client.upload_data(CSV_DATA, overwrite=True)

        input_format = DelimitedTextDialect(
            delimiter=',',
            quotechar='"',
            lineterminator='\n',
            escapechar='',
            has_header=True
        )
        output_format = DelimitedTextDialect(
            delimiter=';',
            quotechar="'",
            lineterminator='$',
            escapechar='\\',
        )
        resp = file_client.query_file(
            "SELECT RepoPath from BlobStorage",
            file_format=input_format,
            output_format=output_format)
        data = list(resp.records())
        self.assertEqual(len(resp), len(CSV_DATA))
        self.assertEqual(len(data), 32)

    @record
    def test_quick_query_readall_with_json_serialization_setting(self):
        # Arrange
        data1 = b'{\"name\": \"owner\", \"id\": 1}'
        data2 = b'{\"name2\": \"owner2\"}'
        data = data1 + b'\n' + data2 + b'\n' + data1

        # upload the json file
        file_name = self._get_file_reference()
        file_client = self.dsc.get_file_client(self.filesystem_name, file_name)
        file_client.upload_data(data, overwrite=True)

        errors = []
        def on_error(error):
            errors.append(error)

        input_format = DelimitedJsonDialect(delimiter='\n')
        output_format = DelimitedJsonDialect(delimiter=';')

        resp = file_client.query_file(
            "SELECT name from BlobStorage",
            on_error=on_error,
            file_format=input_format,
            output_format=output_format)
        query_result = resp.readall()

        self.assertEqual(len(errors), 0)
        self.assertEqual(len(resp), len(data))
        self.assertEqual(query_result, b'{"name":"owner"};{};{"name":"owner"};')

    @record
    def test_quick_query_iter_records_with_json_serialization_setting(self):
        # Arrange
        data1 = b'{\"name\": \"owner\", \"id\": 1}'
        data2 = b'{\"name2\": \"owner2\"}'
        data = data1 + b'\n' + data2 + b'\n' + data1

        # upload the json file
        file_name = self._get_file_reference()
        file_client = self.dsc.get_file_client(self.filesystem_name, file_name)
        file_client.upload_data(data, overwrite=True)

        errors = []
        def on_error(error):
            errors.append(error)

        input_format = DelimitedJsonDialect(delimiter='\n')
        output_format = DelimitedJsonDialect(delimiter=';')

        resp = file_client.query_file(
            "SELECT name from BlobStorage",
            on_error=on_error,
            file_format=input_format,
            output_format=output_format)
        listdata = list(resp.records())

        self.assertEqual(len(errors), 0)
        self.assertEqual(len(resp), len(data))
        self.assertEqual(listdata, [b'{"name":"owner"}',b'{}',b'{"name":"owner"}', b''])

    @record
    def test_quick_query_with_only_input_json_serialization_setting(self):
        # Arrange
        data1 = b'{\"name\": \"owner\", \"id\": 1}'
        data2 = b'{\"name2\": \"owner2\"}'
        data = data1 + data2 + data1

        # upload the json file
        file_name = self._get_file_reference()
        file_client = self.dsc.get_file_client(self.filesystem_name, file_name)
        file_client.upload_data(data, overwrite=True)

        errors = []
        def on_error(error):
            errors.append(error)

        input_format = DelimitedJsonDialect(delimiter='\n')
        output_format = None

        resp = file_client.query_file(
            "SELECT name from BlobStorage",
            on_error=on_error,
            file_format=input_format,
            output_format=output_format)
        query_result = resp.readall()

        self.assertEqual(len(errors), 0)
        self.assertEqual(len(resp), len(data))
        self.assertEqual(query_result, b'{"name":"owner"}\n{}\n{"name":"owner"}\n')
Example #11
0
class FileSystemTest(StorageTestCase):
    def setUp(self):
        super(FileSystemTest, self).setUp()
        url = self._get_account_url()
        self.dsc = DataLakeServiceClient(url, credential=self.settings.STORAGE_DATA_LAKE_ACCOUNT_KEY)
        self.config = self.dsc._config
        self.test_file_systems = []

    def tearDown(self):
        if not self.is_playback():
            try:
                for file_system in self.test_file_systems:
                    self.dsc.delete_file_system(file_system)
            except:
                pass

        return super(FileSystemTest, self).tearDown()

    # --Helpers-----------------------------------------------------------------
    def _get_file_system_reference(self, prefix=TEST_FILE_SYSTEM_PREFIX):
        file_system_name = self.get_resource_name(prefix)
        self.test_file_systems.append(file_system_name)
        return file_system_name

    def _create_file_system(self, file_system_prefix=TEST_FILE_SYSTEM_PREFIX):
        return self.dsc.create_file_system(self._get_file_system_reference(prefix=file_system_prefix))


    # --Helpers-----------------------------------------------------------------

    @record
    def test_create_file_system(self):
        # Arrange
        file_system_name = self._get_file_system_reference()

        # Act
        file_system_client = self.dsc.get_file_system_client(file_system_name)
        created = file_system_client.create_file_system()

        # Assert
        self.assertTrue(created)

    @record
    def test_list_file_systemss(self):
        # Arrange
        file_system_name = self._get_file_system_reference()
        file_system = self.dsc.create_file_system(file_system_name)

        # Act
        file_systems = list(self.dsc.list_file_systems())

        # Assert
        self.assertIsNotNone(file_systems)
        self.assertGreaterEqual(len(file_systems), 1)
        self.assertIsNotNone(file_systems[0])
        self.assertNamedItemInContainer(file_systems, file_system.file_system_name)
        self.assertIsNotNone(file_systems[0].has_immutability_policy)
        self.assertIsNotNone(file_systems[0].has_legal_hold)

    @record
    def test_delete_file_system_with_existing_file_system(self):
        # Arrange
        file_system = self._create_file_system()

        # Act
        deleted = file_system.delete_file_system()

        # Assert
        self.assertIsNone(deleted)

    @record
    def test_list_file_systems_with_include_metadata(self):
        # Arrange
        file_system = self._create_file_system()
        metadata = {'hello': 'world', 'number': '42'}
        resp = file_system.set_file_system_metadata(metadata)

        # Act
        file_systems = list(self.dsc.list_file_systems(
            name_starts_with=file_system.file_system_name,
            include_metadata=True))

        # Assert
        self.assertIsNotNone(file_systems)
        self.assertGreaterEqual(len(file_systems), 1)
        self.assertIsNotNone(file_systems[0])
        self.assertNamedItemInContainer(file_systems, file_system.file_system_name)
        self.assertDictEqual(file_systems[0].metadata, metadata)

    @record
    def test_list_file_systems_by_page(self):
        # Arrange
        for i in range(0, 6):
            self._create_file_system(file_system_prefix="filesystem{}".format(i))

        # Act
        file_systems = list(next(self.dsc.list_file_systems(
            results_per_page=3,
            name_starts_with="file",
            include_metadata=True).by_page()))

        # Assert
        self.assertIsNotNone(file_systems)
        self.assertGreaterEqual(len(file_systems), 3)

    @record
    def test_list_file_systems_with_public_access(self):
        # Arrange
        file_system_name = self._get_file_system_reference()
        file_system = self.dsc.get_file_system_client(file_system_name)
        file_system.create_file_system(public_access="blob")
        metadata = {'hello': 'world', 'number': '42'}
        resp = file_system.set_file_system_metadata(metadata)

        # Act
        file_systems = list(self.dsc.list_file_systems(
            name_starts_with=file_system.file_system_name,
            include_metadata=True))

        # Assert
        self.assertIsNotNone(file_systems)
        self.assertGreaterEqual(len(file_systems), 1)
        self.assertIsNotNone(file_systems[0])
        self.assertNamedItemInContainer(file_systems, file_system.file_system_name)
        self.assertDictEqual(file_systems[0].metadata, metadata)
        self.assertTrue(file_systems[0].public_access is PublicAccess.File)

    @record
    def test_get_file_system_properties(self):
        # Arrange
        metadata = {'hello': 'world', 'number': '42'}
        file_system = self._create_file_system()
        file_system.set_file_system_metadata(metadata)

        # Act
        props = file_system.get_file_system_properties()

        # Assert
        self.assertIsNotNone(props)
        self.assertDictEqual(props.metadata, metadata)
        self.assertIsNotNone(props.has_immutability_policy)
        self.assertIsNotNone(props.has_legal_hold)

    @record
    def test_list_paths(self):
        # Arrange
        file_system = self._create_file_system()
        for i in range(0, 6):
            file_system.create_directory("dir1{}".format(i))

        paths = list(file_system.get_paths(upn=True))

        self.assertEqual(len(paths), 6)

    @record
    def test_list_paths_with_max_per_page(self):
        # Arrange
        file_system = self._create_file_system()
        for i in range(0, 6):
            file_system.create_directory("dir1{}".format(i))

        generator1 = file_system.get_paths(max_results=2, upn=True).by_page()
        paths1 = list(next(generator1))

        generator2 = file_system.get_paths(max_results=4, upn=True)\
            .by_page(continuation_token=generator1.continuation_token)
        paths2 = list(next(generator2))

        self.assertEqual(len(paths1), 2)
        self.assertEqual(len(paths2), 4)

    @record
    def test_list_paths_under_specific_path(self):
        # Arrange
        file_system = self._create_file_system()
        for i in range(0, 6):
            file_system.create_directory("dir1{}".format(i))

            # create a subdirectory under the current directory
            subdir = file_system.get_directory_client("dir1{}".format(i)).create_sub_directory("subdir")
            subdir.create_sub_directory("subsub")

            # create a file under the current directory
            file_client = subdir.create_file("file")
            file_client.append_data(b"abced", 0, 5)
            file_client.flush_data(5)

        generator1 = file_system.get_paths(path="dir10/subdir", max_results=2, upn=True).by_page()
        paths = list(next(generator1))

        self.assertEqual(len(paths), 2)
        self.assertEqual(paths[0].content_length, 5)