예제 #1
0
class StoragePageBlobTest(StorageTestCase):

    def setUp(self):
        super(StoragePageBlobTest, self).setUp()

        url = self._get_account_url()

        # test chunking functionality by reducing the size of each chunk,
        # otherwise the tests would take too long to execute
        credential = self._get_shared_key_credential()

        self.bs = BlobServiceClient(
            url,
            credential=credential,
            connection_data_block_size=4 * 1024,
            max_page_size=4 * 1024)
        self.config = self.bs._config
        self.container_name = self.get_resource_name('utcontainer')

        if not self.is_playback():
            self.bs.create_container(self.container_name)

    def tearDown(self):
        if not self.is_playback():
            try:
                self.bs.delete_container(self.container_name)
            except:
                pass

        if os.path.isfile(FILE_PATH):
            try:
                os.remove(FILE_PATH)
            except:
                pass

        return super(StoragePageBlobTest, self).tearDown()

    #--Helpers-----------------------------------------------------------------

    def _get_blob_reference(self):
        return self.bs.get_blob_client(
            self.container_name,
            self.get_resource_name(TEST_BLOB_PREFIX))

    def _create_blob(self, length=512):
        blob = self._get_blob_reference()
        blob.create_page_blob(size=length)
        return blob

    def _wait_for_async_copy(self, blob):
        count = 0
        props = blob.get_blob_properties()
        while props.copy.status == 'pending':
            count = count + 1
            if count > 10:
                self.fail('Timed out waiting for async copy to complete.')
            self.sleep(6)
            props = blob.get_blob_properties()
        return props

    def assertBlobEqual(self, container_name, blob_name, expected_data):
        blob = self.bs.get_blob_client(container_name, blob_name)
        actual_data = blob.download_blob()
        self.assertEqual(b"".join(list(actual_data)), expected_data)

    def assertRangeEqual(self, container_name, blob_name, expected_data, start_range, end_range):
        blob = self.bs.get_blob_client(container_name, blob_name)
        actual_data = blob.download_blob(offset=start_range, length=end_range)
        self.assertEqual(b"".join(list(actual_data)), expected_data)

    class NonSeekableFile(object):
        def __init__(self, wrapped_file):
            self.wrapped_file = wrapped_file

        def write(self, data):
            self.wrapped_file.write(data)

        def read(self, count):
            return self.wrapped_file.read(count)

    #--Test cases for page blobs --------------------------------------------
    @record
    def test_create_blob(self):
        # Arrange
        blob = self._get_blob_reference()

        # Act
        resp = blob.create_page_blob(1024)

        # Assert
        self.assertIsNotNone(resp.get('etag'))
        self.assertIsNotNone(resp.get('last_modified'))
        self.assertTrue(blob.get_blob_properties())

    @record
    def test_create_blob_with_metadata(self):
        # Arrange
        blob = self._get_blob_reference()
        metadata = {'hello': 'world', 'number': '42'}
        
        # Act
        resp = blob.create_page_blob(512, metadata=metadata)

        # Assert
        md = blob.get_blob_properties()
        self.assertDictEqual(md.metadata, metadata)

    @record
    def test_put_page_with_lease_id(self):
        # Arrange
        blob = self._create_blob()
        lease = blob.acquire_lease()

        # Act        
        data = self.get_random_bytes(512)
        blob.upload_page(data, 0, 511, lease=lease)

        # Assert
        content = blob.download_blob(lease=lease)
        self.assertEqual(b"".join(list(content)), data)

    @record
    def test_update_page(self):
        # Arrange
        blob = self._create_blob()

        # Act
        data = self.get_random_bytes(512)
        resp = blob.upload_page(data, 0, 511)

        # Assert
        self.assertIsNotNone(resp.get('etag'))
        self.assertIsNotNone(resp.get('last_modified'))
        self.assertIsNotNone(resp.get('blob_sequence_number'))
        self.assertBlobEqual(self.container_name, blob.blob_name, data)

    @record
    def test_create_8tb_blob(self):
        # Arrange
        blob = self._get_blob_reference()

        # Act
        resp = blob.create_page_blob(EIGHT_TB)
        props = blob.get_blob_properties()
        page_ranges, cleared = blob.get_page_ranges()

        # Assert
        self.assertIsNotNone(resp.get('etag'))
        self.assertIsNotNone(resp.get('last_modified'))
        self.assertIsInstance(props, BlobProperties)
        self.assertEqual(props.size, EIGHT_TB)
        self.assertEqual(0, len(page_ranges))

    @record
    def test_create_larger_than_8tb_blob_fail(self):
        # Arrange
        blob = self._get_blob_reference()

        # Act
        with self.assertRaises(HttpResponseError):
            blob.create_page_blob(EIGHT_TB + 1)

    @record
    def test_update_8tb_blob_page(self):
        # Arrange
        blob = self._get_blob_reference()
        blob.create_page_blob(EIGHT_TB)

        # Act
        data = self.get_random_bytes(512)
        start_range = EIGHT_TB - 512
        end_range = EIGHT_TB - 1
        resp = blob.upload_page(data, start_range, end_range)
        props = blob.get_blob_properties()
        page_ranges, cleared = blob.get_page_ranges()
        
        # Assert
        self.assertIsNotNone(resp.get('etag'))
        self.assertIsNotNone(resp.get('last_modified'))
        self.assertIsNotNone(resp.get('blob_sequence_number'))
        self.assertRangeEqual(self.container_name, blob.blob_name, data, start_range, end_range)
        self.assertEqual(props.size, EIGHT_TB)
        self.assertEqual(1, len(page_ranges))
        self.assertEqual(page_ranges[0]['start'], start_range)
        self.assertEqual(page_ranges[0]['end'], end_range)

    @record
    def test_update_page_with_md5(self):
        # Arrange
        blob = self._create_blob()

        # Act
        data = self.get_random_bytes(512)
        resp = blob.upload_page(data, 0, 511, validate_content=True)

        # Assert

    @record
    def test_clear_page(self):
        # Arrange
        blob = self._create_blob()

        # Act
        resp = blob.clear_page(0, 511)

        # Assert
        self.assertIsNotNone(resp.get('etag'))
        self.assertIsNotNone(resp.get('last_modified'))
        self.assertIsNotNone(resp.get('blob_sequence_number'))
        self.assertBlobEqual(self.container_name, blob.blob_name, b'\x00' * 512)

    @record
    def test_put_page_if_sequence_number_lt_success(self):
        # Arrange     
        blob = self._get_blob_reference() 
        data = self.get_random_bytes(512)

        start_sequence = 10
        blob.create_page_blob(512, sequence_number=start_sequence)

        # Act
        blob.upload_page(data, 0, 511, if_sequence_number_lt=start_sequence + 1)

        # Assert
        self.assertBlobEqual(self.container_name, blob.blob_name, data)

    @record
    def test_update_page_if_sequence_number_lt_failure(self):
        # Arrange
        blob = self._get_blob_reference() 
        data = self.get_random_bytes(512)
        start_sequence = 10
        blob.create_page_blob(512, sequence_number=start_sequence)

        # Act
        with self.assertRaises(HttpResponseError):
            blob.upload_page(data, 0, 511, if_sequence_number_lt=start_sequence)

        # Assert

    @record
    def test_update_page_if_sequence_number_lte_success(self):
        # Arrange
        blob = self._get_blob_reference() 
        data = self.get_random_bytes(512)
        start_sequence = 10
        blob.create_page_blob(512, sequence_number=start_sequence)

        # Act
        blob.upload_page(data, 0, 511, if_sequence_number_lte=start_sequence)

        # Assert
        self.assertBlobEqual(self.container_name, blob.blob_name, data)

    @record
    def test_update_page_if_sequence_number_lte_failure(self):
        # Arrange
        blob = self._get_blob_reference() 
        data = self.get_random_bytes(512)
        start_sequence = 10
        blob.create_page_blob(512, sequence_number=start_sequence)

        # Act
        with self.assertRaises(HttpResponseError):
            blob.upload_page(data, 0, 511, if_sequence_number_lte=start_sequence - 1)

        # Assert

    @record
    def test_update_page_if_sequence_number_eq_success(self):
        # Arrange
        blob = self._get_blob_reference() 
        data = self.get_random_bytes(512)
        start_sequence = 10
        blob.create_page_blob(512, sequence_number=start_sequence)

        # Act
        blob.upload_page(data, 0, 511, if_sequence_number_eq=start_sequence)

        # Assert
        self.assertBlobEqual(self.container_name, blob.blob_name, data)

    @record
    def test_update_page_if_sequence_number_eq_failure(self):
        # Arrange
        blob = self._get_blob_reference() 
        data = self.get_random_bytes(512)
        start_sequence = 10
        blob.create_page_blob(512, sequence_number=start_sequence)

        # Act
        with self.assertRaises(HttpResponseError):
            blob.upload_page(data, 0, 511, if_sequence_number_eq=start_sequence - 1)

        # Assert

    @record
    def test_update_page_unicode(self):
        # Arrange
        blob = self._create_blob()

        # Act
        data = u'abcdefghijklmnop' * 32
        resp = blob.upload_page(data, 0, 511)

        # Assert
        self.assertIsNotNone(resp.get('etag'))
        self.assertIsNotNone(resp.get('last_modified'))

    @record
    def test_get_page_ranges_no_pages(self):
        # Arrange
        blob = self._create_blob()

        # Act
        ranges, cleared = blob.get_page_ranges()

        # Assert
        self.assertIsNotNone(ranges)
        self.assertIsInstance(ranges, list)
        self.assertEqual(len(ranges), 0)

    @record
    def test_get_page_ranges_2_pages(self):
        # Arrange
        blob = self._create_blob(2048)
        data = self.get_random_bytes(512)
        resp1 = blob.upload_page(data, 0, 511)
        resp2 = blob.upload_page(data, 1024, 1535)

        # Act
        ranges, cleared = blob.get_page_ranges()

        # Assert
        self.assertIsNotNone(ranges)
        self.assertIsInstance(ranges, list)
        self.assertEqual(len(ranges), 2)
        self.assertEqual(ranges[0]['start'], 0)
        self.assertEqual(ranges[0]['end'], 511)
        self.assertEqual(ranges[1]['start'], 1024)
        self.assertEqual(ranges[1]['end'], 1535)


    @record
    def test_get_page_ranges_diff(self):
        # Arrange
        blob = self._create_blob(2048)
        data = self.get_random_bytes(1536)
        snapshot1 = blob.create_snapshot()
        blob.upload_page(data, 0, 1535)
        snapshot2 = blob.create_snapshot()
        blob.clear_page(512, 1023)

        # Act
        ranges1, cleared1 = blob.get_page_ranges(previous_snapshot_diff=snapshot1)
        ranges2, cleared2 = blob.get_page_ranges(previous_snapshot_diff=snapshot2['snapshot'])

        # Assert
        self.assertIsNotNone(ranges1)
        self.assertIsInstance(ranges1, list)
        self.assertEqual(len(ranges1), 2)
        self.assertIsInstance(cleared1, list)
        self.assertEqual(len(cleared1), 1)
        self.assertEqual(ranges1[0]['start'], 0)
        self.assertEqual(ranges1[0]['end'], 511)
        self.assertEqual(cleared1[0]['start'], 512)
        self.assertEqual(cleared1[0]['end'], 1023)
        self.assertEqual(ranges1[1]['start'], 1024)
        self.assertEqual(ranges1[1]['end'], 1535)

        self.assertIsNotNone(ranges2)
        self.assertIsInstance(ranges2, list)
        self.assertEqual(len(ranges2), 0)
        self.assertIsInstance(cleared2, list)
        self.assertEqual(len(cleared2), 1)
        self.assertEqual(cleared2[0]['start'], 512)
        self.assertEqual(cleared2[0]['end'], 1023)

    @record    
    def test_update_page_fail(self):
        # Arrange
        blob = self._create_blob(2048)
        data = self.get_random_bytes(512)
        resp1 = blob.upload_page(data, 0, 511)

        # Act
        try:
            blob.upload_page(data, 1024, 1536)
        except ValueError as e:
            self.assertEqual(str(e), 'end_range must be an integer that aligns with 512 page size')
            return

        # Assert
        raise Exception('Page range validation failed to throw on failure case')


    @record
    def test_resize_blob(self):
        # Arrange
        blob = self._create_blob(1024)
        
        # Act
        resp = blob.resize_blob(512)

        # Assert
        self.assertIsNotNone(resp.get('etag'))
        self.assertIsNotNone(resp.get('last_modified'))
        self.assertIsNotNone(resp.get('blob_sequence_number'))
        props = blob.get_blob_properties()
        self.assertIsInstance(props, BlobProperties)
        self.assertEqual(props.size, 512)

    @record
    def test_set_sequence_number_blob(self):
        # Arrange
        blob = self._create_blob()
        
        # Act
        resp = blob.set_sequence_number(SequenceNumberAction.Update, 6)     

        #Assert
        self.assertIsNotNone(resp.get('etag'))
        self.assertIsNotNone(resp.get('last_modified'))
        self.assertIsNotNone(resp.get('blob_sequence_number'))
        props = blob.get_blob_properties()
        self.assertIsInstance(props, BlobProperties)
        self.assertEqual(props.page_blob_sequence_number, 6)

    @record
    def test_create_page_blob_with_no_overwrite(self):
        # Arrange
        blob = self._get_blob_reference()
        data1 = self.get_random_bytes(LARGE_BLOB_SIZE)
        data2 = self.get_random_bytes(LARGE_BLOB_SIZE + 512)

        # Act
        create_resp = blob.upload_blob(
            data1,
            overwrite=True,
            blob_type=BlobType.PageBlob,
            metadata={'BlobData': 'Data1'})

        with self.assertRaises(ResourceExistsError):
            blob.upload_blob(
                data2,
                overwrite=False,
                blob_type=BlobType.PageBlob,
                metadata={'BlobData': 'Data2'})

        props = blob.get_blob_properties()

        # Assert
        self.assertBlobEqual(self.container_name, blob.blob_name, data1)
        self.assertEqual(props.etag, create_resp.get('etag'))
        self.assertEqual(props.last_modified, create_resp.get('last_modified'))
        self.assertEqual(props.metadata, {'BlobData': 'Data1'})
        self.assertEqual(props.size, LARGE_BLOB_SIZE)
        self.assertEqual(props.blob_type, BlobType.PageBlob)

    @record
    def test_create_page_blob_with_overwrite(self):
        # Arrange
        blob = self._get_blob_reference()
        data1 = self.get_random_bytes(LARGE_BLOB_SIZE)
        data2 = self.get_random_bytes(LARGE_BLOB_SIZE + 512)

        # Act
        create_resp = blob.upload_blob(
            data1,
            overwrite=True,
            blob_type=BlobType.PageBlob,
            metadata={'BlobData': 'Data1'})
        update_resp = blob.upload_blob(
            data2,
            overwrite=True,
            blob_type=BlobType.PageBlob,
            metadata={'BlobData': 'Data2'})

        props = blob.get_blob_properties()

        # Assert
        self.assertBlobEqual(self.container_name, blob.blob_name, data2)
        self.assertEqual(props.etag, update_resp.get('etag'))
        self.assertEqual(props.last_modified, update_resp.get('last_modified'))
        self.assertEqual(props.metadata, {'BlobData': 'Data2'})
        self.assertEqual(props.size, LARGE_BLOB_SIZE + 512)
        self.assertEqual(props.blob_type, BlobType.PageBlob)

    def test_create_blob_from_bytes(self):
        # parallel tests introduce random order of requests, can only run live
        if TestMode.need_recording_file(self.test_mode):
            return

        # Arrange
        blob = self._get_blob_reference()
        data = self.get_random_bytes(LARGE_BLOB_SIZE)

        # Act
        create_resp = blob.upload_blob(data, blob_type=BlobType.PageBlob)
        props = blob.get_blob_properties()

        # Assert
        self.assertBlobEqual(self.container_name, blob.blob_name, data)
        self.assertEqual(props.etag, create_resp.get('etag'))
        self.assertEqual(props.last_modified, create_resp.get('last_modified'))

    def test_create_blob_from_0_bytes(self):
        # parallel tests introduce random order of requests, can only run live
        if TestMode.need_recording_file(self.test_mode):
            return

        # Arrange
        blob = self._get_blob_reference()
        data = self.get_random_bytes(0)

        # Act
        create_resp = blob.upload_blob(data, blob_type=BlobType.PageBlob)
        props = blob.get_blob_properties()

        # Assert
        self.assertBlobEqual(self.container_name, blob.blob_name, data)
        self.assertEqual(props.etag, create_resp.get('etag'))
        self.assertEqual(props.last_modified, create_resp.get('last_modified'))

    def test_create_blob_from_bytes_with_progress_first(self):
        # parallel tests introduce random order of requests, can only run live
        if TestMode.need_recording_file(self.test_mode):
            return

        # Arrange
        blob = self._get_blob_reference()
        data = self.get_random_bytes(LARGE_BLOB_SIZE)

        # Act
        progress = []
        def callback(response):
            current = response.context['upload_stream_current']
            total = response.context['data_stream_total']
            if current is not None:
                progress.append((current, total))

        create_resp = blob.upload_blob(
            data, blob_type=BlobType.PageBlob, raw_response_hook=callback)
        props = blob.get_blob_properties()

        # Assert
        self.assertBlobEqual(self.container_name, blob.blob_name, data)
        self.assertEqual(props.etag, create_resp.get('etag'))
        self.assertEqual(props.last_modified, create_resp.get('last_modified'))
        self.assert_upload_progress(LARGE_BLOB_SIZE, self.config.max_page_size, progress)

    def test_create_blob_from_bytes_with_index(self):
        # parallel tests introduce random order of requests, can only run live
        if TestMode.need_recording_file(self.test_mode):
            return

        # Arrange
        blob = self._get_blob_reference()
        data = self.get_random_bytes(LARGE_BLOB_SIZE)
        index = 1024

        # Act
        blob.upload_blob(data[index:], blob_type=BlobType.PageBlob)

        # Assert
        self.assertBlobEqual(self.container_name, blob.blob_name, data[1024:])

    @record
    def test_create_blob_from_bytes_with_index_and_count(self):
        # Arrange
        blob = self._get_blob_reference()
        data = self.get_random_bytes(LARGE_BLOB_SIZE)
        index = 512
        count = 1024

        # Act
        create_resp = blob.upload_blob(data[index:], length=count, blob_type=BlobType.PageBlob)
        props = blob.get_blob_properties()

        # Assert
        self.assertBlobEqual(self.container_name, blob.blob_name, data[index:index + count])
        self.assertEqual(props.etag, create_resp.get('etag'))
        self.assertEqual(props.last_modified, create_resp.get('last_modified'))

    def test_create_blob_from_path(self):
        # parallel tests introduce random order of requests, can only run live
        if TestMode.need_recording_file(self.test_mode):
            return

        # Arrange        
        blob = self._get_blob_reference()
        data = self.get_random_bytes(LARGE_BLOB_SIZE)
        FILE_PATH = 'blob_input.temp.dat'
        with open(FILE_PATH, 'wb') as stream:
            stream.write(data)

        # Act
        with open(FILE_PATH, 'rb') as stream:
            create_resp = blob.upload_blob(stream, blob_type=BlobType.PageBlob)
        props = blob.get_blob_properties()

        # Assert
        self.assertBlobEqual(self.container_name, blob.blob_name, data)
        self.assertEqual(props.etag, create_resp.get('etag'))
        self.assertEqual(props.last_modified, create_resp.get('last_modified'))

    def test_create_blob_from_path_with_progress(self):
        # parallel tests introduce random order of requests, can only run live
        if TestMode.need_recording_file(self.test_mode):
            return

        # Arrange        
        blob = self._get_blob_reference()
        data = self.get_random_bytes(LARGE_BLOB_SIZE)
        with open(FILE_PATH, 'wb') as stream:
            stream.write(data)

        # Act
        progress = []
        def callback(response):
            current = response.context['upload_stream_current']
            total = response.context['data_stream_total']
            if current is not None:
                progress.append((current, total))

        with open(FILE_PATH, 'rb') as stream:
            blob.upload_blob(stream, blob_type=BlobType.PageBlob, raw_response_hook=callback)

        # Assert
        self.assertBlobEqual(self.container_name, blob.blob_name, data)
        self.assert_upload_progress(len(data), self.config.max_page_size, progress)

    def test_create_blob_from_stream(self):
        # parallel tests introduce random order of requests, can only run live
        if TestMode.need_recording_file(self.test_mode):
            return

        # Arrange        
        blob = self._get_blob_reference()
        data = self.get_random_bytes(LARGE_BLOB_SIZE)
        with open(FILE_PATH, 'wb') as stream:
            stream.write(data)

        # Act
        blob_size = len(data)
        with open(FILE_PATH, 'rb') as stream:
            create_resp = blob.upload_blob(stream, length=blob_size, blob_type=BlobType.PageBlob)
        props = blob.get_blob_properties()

        # Assert
        self.assertBlobEqual(self.container_name, blob.blob_name, data[:blob_size])
        self.assertEqual(props.etag, create_resp.get('etag'))
        self.assertEqual(props.last_modified, create_resp.get('last_modified'))

    def test_create_blob_from_stream_with_empty_pages(self):
        # parallel tests introduce random order of requests, can only run live
        if TestMode.need_recording_file(self.test_mode):
            return

        # Arrange
        # data is almost all empty (0s) except two ranges
        blob = self._get_blob_reference()
        data = bytearray(LARGE_BLOB_SIZE)
        data[512: 1024] = self.get_random_bytes(512)
        data[8192: 8196] = self.get_random_bytes(4)
        with open(FILE_PATH, 'wb') as stream:
            stream.write(data)

        # Act
        blob_size = len(data)
        with open(FILE_PATH, 'rb') as stream:
            create_resp = blob.upload_blob(stream, length=blob_size, blob_type=BlobType.PageBlob)
        props = blob.get_blob_properties()

        # Assert
        # the uploader should have skipped the empty ranges
        self.assertBlobEqual(self.container_name, blob.blob_name, data[:blob_size])
        page_ranges, cleared = list(blob.get_page_ranges())
        self.assertEqual(len(page_ranges), 2)
        self.assertEqual(page_ranges[0]['start'], 0)
        self.assertEqual(page_ranges[0]['end'], 4095)
        self.assertEqual(page_ranges[1]['start'], 8192)
        self.assertEqual(page_ranges[1]['end'], 12287)
        self.assertEqual(props.etag, create_resp.get('etag'))
        self.assertEqual(props.last_modified, create_resp.get('last_modified'))

    def test_create_blob_from_stream_non_seekable(self):
        # parallel tests introduce random order of requests, can only run live
        if TestMode.need_recording_file(self.test_mode):
            return

        # Arrange      
        blob = self._get_blob_reference()
        data = self.get_random_bytes(LARGE_BLOB_SIZE)
        with open(FILE_PATH, 'wb') as stream:
            stream.write(data)

        # Act
        blob_size = len(data)
        with open(FILE_PATH, 'rb') as stream:
            non_seekable_file = StoragePageBlobTest.NonSeekableFile(stream)
            blob.upload_blob(
                non_seekable_file,
                length=blob_size,
                max_connections=1,
                blob_type=BlobType.PageBlob)

        # Assert
        self.assertBlobEqual(self.container_name, blob.blob_name, data[:blob_size])

    def test_create_blob_from_stream_with_progress(self):
        # parallel tests introduce random order of requests, can only run live
        if TestMode.need_recording_file(self.test_mode):
            return

        # Arrange      
        blob = self._get_blob_reference()
        data = self.get_random_bytes(LARGE_BLOB_SIZE)
        with open(FILE_PATH, 'wb') as stream:
            stream.write(data)

        # Act
        progress = []
        def callback(response):
            current = response.context['upload_stream_current']
            total = response.context['data_stream_total']
            if current is not None:
                progress.append((current, total))

        blob_size = len(data)
        with open(FILE_PATH, 'rb') as stream:
            blob.upload_blob(
                stream, length=blob_size, blob_type=BlobType.PageBlob, raw_response_hook=callback)

        # Assert
        self.assertBlobEqual(self.container_name, blob.blob_name, data[:blob_size])
        self.assert_upload_progress(len(data), self.config.max_page_size, progress)

    def test_create_blob_from_stream_truncated(self):
        # parallel tests introduce random order of requests, can only run live
        if TestMode.need_recording_file(self.test_mode):
            return

        # Arrange       
        blob = self._get_blob_reference()
        data = self.get_random_bytes(LARGE_BLOB_SIZE)
        with open(FILE_PATH, 'wb') as stream:
            stream.write(data)

        # Act
        blob_size = len(data) - 512
        with open(FILE_PATH, 'rb') as stream:
            blob.upload_blob(stream, length=blob_size, blob_type=BlobType.PageBlob)

        # Assert
        self.assertBlobEqual(self.container_name, blob.blob_name, data[:blob_size])

    def test_create_blob_from_stream_with_progress_truncated(self):
        # parallel tests introduce random order of requests, can only run live
        if TestMode.need_recording_file(self.test_mode):
            return

        # Arrange       
        blob = self._get_blob_reference()
        data = self.get_random_bytes(LARGE_BLOB_SIZE)
        with open(FILE_PATH, 'wb') as stream:
            stream.write(data)

        # Act
        progress = []
        def callback(response):
            current = response.context['upload_stream_current']
            total = response.context['data_stream_total']
            if current is not None:
                progress.append((current, total))

        blob_size = len(data) - 512
        with open(FILE_PATH, 'rb') as stream:
            blob.upload_blob(
                stream, length=blob_size, blob_type=BlobType.PageBlob, raw_response_hook=callback)

        # Assert
        self.assertBlobEqual(self.container_name, blob.blob_name, data[:blob_size])
        self.assert_upload_progress(blob_size, self.config.max_page_size, progress)

    @record
    def test_create_blob_with_md5_small(self):
        # Arrange
        blob = self._get_blob_reference()
        data = self.get_random_bytes(512)

        # Act
        blob.upload_blob(data, validate_content=True, blob_type=BlobType.PageBlob)

        # Assert

    def test_create_blob_with_md5_large(self):
        # parallel tests introduce random order of requests, can only run live
        if TestMode.need_recording_file(self.test_mode):
            return

        # Arrange
        blob = self._get_blob_reference()
        data = self.get_random_bytes(LARGE_BLOB_SIZE)

        # Act
        blob.upload_blob(data, validate_content=True, blob_type=BlobType.PageBlob)

        # Assert

    def test_incremental_copy_blob(self):
        # parallel tests introduce random order of requests, can only run live
        if TestMode.need_recording_file(self.test_mode):
            return

        # Arrange
        source_blob = self._create_blob(2048)
        data = self.get_random_bytes(512)
        resp1 = source_blob.upload_page(data, 0, 511)
        resp2 = source_blob.upload_page(data, 1024, 1535)
        source_snapshot_blob = source_blob.create_snapshot()

        snapshot_blob = BlobClient(
            source_blob.url, credential=source_blob.credential, snapshot=source_snapshot_blob)
        sas_token = snapshot_blob.generate_shared_access_signature(
            permission=BlobPermissions.READ,
            expiry=datetime.utcnow() + timedelta(hours=1),
        )
        sas_blob = BlobClient(snapshot_blob.url, credential=sas_token)


        # Act
        dest_blob = self.bs.get_blob_client(self.container_name, 'dest_blob')
        copy = dest_blob.start_copy_from_url(sas_blob.url, incremental_copy=True)

        # Assert
        self.assertIsNotNone(copy)
        self.assertIsNotNone(copy['copy_id'])
        self.assertEqual(copy['copy_status'], 'pending')

        copy_blob = self._wait_for_async_copy(dest_blob)
        self.assertEqual(copy_blob.copy.status, 'success')
        self.assertIsNotNone(copy_blob.copy.destination_snapshot)

        # strip off protocol
        self.assertTrue(copy_blob.copy.source.endswith(sas_blob.url[5:]))

    @record
    def test_blob_tier_on_create(self):
        url = self._get_premium_account_url()
        credential = self._get_premium_shared_key_credential()
        pbs = BlobServiceClient(url, credential=credential)

        try:
            container_name = self.get_resource_name('utpremiumcontainer')
            container = pbs.get_container_client(container_name)
            if not self.is_playback():
                container.create_container()

            # test create_blob API
            blob = self._get_blob_reference()
            pblob = pbs.get_blob_client(container_name, blob.blob_name)
            pblob.create_page_blob(1024, premium_page_blob_tier=PremiumPageBlobTier.P4)

            props = pblob.get_blob_properties()
            self.assertEqual(props.blob_tier, PremiumPageBlobTier.P4)
            self.assertFalse(props.blob_tier_inferred)

            # test create_blob_from_bytes API
            blob2 = self._get_blob_reference()
            pblob2 = pbs.get_blob_client(container_name, blob2.blob_name)
            byte_data = self.get_random_bytes(1024)
            pblob2.upload_blob(
                byte_data,
                premium_page_blob_tier=PremiumPageBlobTier.P6,
                blob_type=BlobType.PageBlob)

            props2 = pblob2.get_blob_properties()
            self.assertEqual(props2.blob_tier, PremiumPageBlobTier.P6)
            self.assertFalse(props2.blob_tier_inferred)

            # test create_blob_from_path API
            blob3 = self._get_blob_reference()
            pblob3 = pbs.get_blob_client(container_name, blob3.blob_name)
            with open(FILE_PATH, 'wb') as stream:
                stream.write(byte_data)
            with open(FILE_PATH, 'rb') as stream:
                pblob3.upload_blob(
                    stream,
                    blob_type=BlobType.PageBlob,
                    premium_page_blob_tier=PremiumPageBlobTier.P10)

            props3 = pblob3.get_blob_properties()
            self.assertEqual(props3.blob_tier, PremiumPageBlobTier.P10)
            self.assertFalse(props3.blob_tier_inferred)

        finally:
            container.delete_container()

    @record
    def test_blob_tier_set_tier_api(self):
        url = self._get_premium_account_url()
        credential = self._get_premium_shared_key_credential()
        pbs = BlobServiceClient(url, credential=credential)

        try:
            container_name = self.get_resource_name('utpremiumcontainer')
            container = pbs.get_container_client(container_name)

            if not self.is_playback():
                try:
                    container.create_container()
                except ResourceExistsError:
                    pass

            blob = self._get_blob_reference()
            pblob = pbs.get_blob_client(container_name, blob.blob_name)
            pblob.create_page_blob(1024)
            blob_ref = pblob.get_blob_properties()
            self.assertEqual(PremiumPageBlobTier.P10, blob_ref.blob_tier)
            self.assertIsNotNone(blob_ref.blob_tier)
            self.assertTrue(blob_ref.blob_tier_inferred)

            pcontainer = pbs.get_container_client(container_name)
            blobs = list(pcontainer.list_blobs())

            # Assert
            self.assertIsNotNone(blobs)
            self.assertGreaterEqual(len(blobs), 1)
            self.assertIsNotNone(blobs[0])
            self.assertNamedItemInContainer(blobs, blob.blob_name)

            pblob.set_premium_page_blob_tier(PremiumPageBlobTier.P50)

            blob_ref2 = pblob.get_blob_properties()
            self.assertEqual(PremiumPageBlobTier.P50, blob_ref2.blob_tier)
            self.assertFalse(blob_ref2.blob_tier_inferred)

            blobs = list(pcontainer.list_blobs())

            # Assert
            self.assertIsNotNone(blobs)
            self.assertGreaterEqual(len(blobs), 1)
            self.assertIsNotNone(blobs[0])
            self.assertNamedItemInContainer(blobs, blob.blob_name)
            self.assertEqual(blobs[0].blob_tier, PremiumPageBlobTier.P50)
            self.assertFalse(blobs[0].blob_tier_inferred)
        finally:
            container.delete_container()

    @record
    def test_blob_tier_copy_blob(self):
        url = self._get_premium_account_url()
        credential = self._get_premium_shared_key_credential()
        pbs = BlobServiceClient(url, credential=credential)

        try:
            container_name = self.get_resource_name('utpremiumcontainer')
            container = pbs.get_container_client(container_name)

            if not self.is_playback():
                try:
                    container.create_container()
                except ResourceExistsError:
                    pass

            # Arrange
            source_blob = pbs.get_blob_client(
                container_name,
                self.get_resource_name(TEST_BLOB_PREFIX))
            source_blob.create_page_blob(1024, premium_page_blob_tier=PremiumPageBlobTier.P10)

            # Act
            source_blob_url = '{0}/{1}/{2}'.format(
                self._get_premium_account_url(), container_name, source_blob.blob_name)

            copy_blob = pbs.get_blob_client(container_name, 'blob1copy')
            copy = copy_blob.start_copy_from_url(source_blob_url, premium_page_blob_tier=PremiumPageBlobTier.P30)

            # Assert
            self.assertIsNotNone(copy)
            self.assertEqual(copy['copy_status'], 'success')
            self.assertIsNotNone(copy['copy_id'])

            copy_ref = copy_blob.get_blob_properties()
            self.assertEqual(copy_ref.blob_tier, PremiumPageBlobTier.P30)

            source_blob2 = pbs.get_blob_client(
               container_name,
               self.get_resource_name(TEST_BLOB_PREFIX))

            source_blob2.create_page_blob(1024)
            source_blob2_url = '{0}/{1}/{2}'.format(
                self._get_premium_account_url(), source_blob2.container_name, source_blob2.blob_name)

            copy_blob2 = pbs.get_blob_client(container_name, 'blob2copy')
            copy2 = copy_blob2.start_copy_from_url(source_blob2_url, premium_page_blob_tier=PremiumPageBlobTier.P60)
            self.assertIsNotNone(copy2)
            self.assertEqual(copy2['copy_status'], 'success')
            self.assertIsNotNone(copy2['copy_id'])

            copy_ref2 = copy_blob2.get_blob_properties()
            self.assertEqual(copy_ref2.blob_tier, PremiumPageBlobTier.P60)
            self.assertFalse(copy_ref2.blob_tier_inferred)

            copy_blob3 = pbs.get_blob_client(container_name, 'blob3copy')
            copy3 = copy_blob3.start_copy_from_url(source_blob2_url)
            self.assertIsNotNone(copy3)
            self.assertEqual(copy3['copy_status'], 'success')
            self.assertIsNotNone(copy3['copy_id'])

            copy_ref3 = copy_blob3.get_blob_properties()
            self.assertEqual(copy_ref3.blob_tier, PremiumPageBlobTier.P10)
            self.assertTrue(copy_ref3.blob_tier_inferred)
        finally:
            container.delete_container()
예제 #2
0
class AzureStorage(BaseStorage):
    def __init__(
        self,
        context,
        azure_container,
        storage_path,
        azure_account_name,
        azure_account_key=None,
        sas_token=None,
        connection_string=None,
    ):
        super(AzureStorage, self).__init__()
        self._context = context
        self._storage_path = storage_path.lstrip("/")

        self._azure_account_name = azure_account_name
        self._azure_account_key = azure_account_key
        self._azure_sas_token = sas_token
        self._azure_container = azure_container
        self._azure_connection_string = connection_string

        self._blob_service_client = BlobServiceClient(
            AZURE_STORAGE_URL_STRING.format(self._azure_account_name),
            credential=self._azure_account_key,
        )

        # https://docs.microsoft.com/en-us/rest/api/storageservices/understanding-block-blobs--append-blobs--and-page-blobs
        api_version = self._blob_service_client.api_version
        api_version_dt = datetime.strptime(api_version, "%Y-%m-%d")
        if api_version_dt < _API_VERSION_LIMITS["2016-05-31"][0]:
            self._max_block_size = _API_VERSION_LIMITS["2016-05-31"][1]
        elif api_version_dt <= _API_VERSION_LIMITS["2019-07-07"][0]:
            self._max_block_size = _API_VERSION_LIMITS["2019-07-07"][1]
        elif api_version_dt >= _API_VERSION_LIMITS["2019-12-12"][0]:
            self._max_block_size = _API_VERSION_LIMITS["2019-12-12"][1]
        else:
            raise Exception("Unknown Azure api version %s" % api_version)

    def _blob_name_from_path(self, object_path):
        if ".." in object_path:
            raise Exception("Relative paths are not allowed; found %s" %
                            object_path)

        return os.path.join(self._storage_path, object_path).rstrip("/")

    def _upload_blob_path_from_uuid(self, uuid):
        return self._blob_name_from_path(
            self._upload_blob_name_from_uuid(uuid))

    def _upload_blob_name_from_uuid(self, uuid):
        return "uploads/{0}".format(uuid)

    def _blob(self, blob_name):
        return self._blob_service_client.get_blob_client(
            self._azure_container, blob_name)

    @property
    def _container(self):
        return self._blob_service_client.get_container_client(
            self._azure_container)

    def get_direct_download_url(self,
                                object_path,
                                request_ip=None,
                                expires_in=60,
                                requires_cors=False,
                                head=False):
        blob_name = self._blob_name_from_path(object_path)

        try:
            sas_token = generate_blob_sas(
                self._azure_account_name,
                self._azure_container,
                blob_name,
                account_key=self._azure_account_key,
                permission=ContainerSasPermissions.from_string("r"),
                expiry=datetime.utcnow() + timedelta(seconds=expires_in),
            )

            blob_url = "{}?{}".format(self._blob(blob_name).url, sas_token)

        except AzureError:
            logger.exception(
                "Exception when trying to get direct download for path %s",
                object_path)
            raise IOError("Exception when trying to get direct download")

        return blob_url

    def validate(self, client):
        super(AzureStorage, self).validate(client)

    def get_content(self, path):
        blob_name = self._blob_name_from_path(path)
        try:
            blob_stream = self._blob(blob_name).download_blob()
        except AzureError:
            logger.exception("Exception when trying to get path %s", path)
            raise IOError("Exception when trying to get path")

        return blob_stream.content_as_bytes()

    def put_content(self, path, content):
        blob_name = self._blob_name_from_path(path)
        try:
            self._blob(blob_name).upload_blob(content,
                                              blob_type=BlobType.BlockBlob)
        except AzureError:
            logger.exception("Exception when trying to put path %s", path)
            raise IOError("Exception when trying to put path")

    def stream_read(self, path):
        with self.stream_read_file(path) as f:
            while True:
                buf = f.read(self.buffer_size)
                if not buf:
                    break
                yield buf

    def stream_read_file(self, path):
        blob_name = self._blob_name_from_path(path)

        try:
            output_stream = io.BytesIO()
            self._blob(blob_name).download_blob().download_to_stream(
                output_stream)
            output_stream.seek(0)
        except AzureError:
            logger.exception(
                "Exception when trying to stream_file_read path %s", path)
            raise IOError("Exception when trying to stream_file_read path")

        return output_stream

    def stream_write(self, path, fp, content_type=None, content_encoding=None):
        blob_name = self._blob_name_from_path(path)
        content_settings = ContentSettings(
            content_type=content_type,
            content_encoding=content_encoding,
        )

        try:
            self._blob(blob_name).upload_blob(
                fp, content_settings=content_settings)
        except AzureError as ae:
            logger.exception("Exception when trying to stream_write path %s",
                             path)
            raise IOError("Exception when trying to stream_write path", ae)

    def exists(self, path):
        blob_name = self._blob_name_from_path(path)

        try:
            self._blob(blob_name).get_blob_properties()
        except ResourceNotFoundError:
            return False
        except AzureError:
            logger.exception("Exception when trying to check exists path %s",
                             path)
            raise IOError("Exception when trying to check exists path")

        return True

    def remove(self, path):
        blob_name = self._blob_name_from_path(path)
        try:
            self._blob(blob_name).delete_blob()
        except AzureError:
            logger.exception("Exception when trying to remove path %s", path)
            raise IOError("Exception when trying to remove path")

    def get_checksum(self, path):
        blob_name = self._blob_name_from_path(path)
        try:
            blob_properties = self._blob(blob_name).get_blob_properties()
        except AzureError:
            logger.exception(
                "Exception when trying to get_checksum for path %s", path)
            raise IOError("Exception when trying to get_checksum path")
        return blob_properties.etag

    def initiate_chunked_upload(self):
        random_uuid = str(uuid.uuid4())
        metadata = {
            _BLOCKS_KEY: [],
            _CONTENT_TYPE_KEY: None,
        }
        return random_uuid, metadata

    def stream_upload_chunk(self,
                            uuid,
                            offset,
                            length,
                            in_fp,
                            storage_metadata,
                            content_type=None):
        if length == 0:
            return 0, storage_metadata, None

        upload_blob_path = self._upload_blob_path_from_uuid(uuid)
        new_metadata = copy.deepcopy(storage_metadata)

        total_bytes_written = 0

        while True:
            current_length = length - total_bytes_written
            max_length = (min(current_length, self._max_block_size) if
                          length != READ_UNTIL_END else self._max_block_size)
            if max_length <= 0:
                break

            limited = LimitingStream(in_fp, max_length, seekable=False)

            # Note: Azure fails if a zero-length block is uploaded, so we read all the data here,
            # and, if there is none, terminate early.
            block_data = b""
            for chunk in iter(lambda: limited.read(31457280), b""):
                block_data += chunk

            if len(block_data) == 0:
                break

            block_index = len(new_metadata[_BLOCKS_KEY])
            block_id = format(block_index, "05")
            new_metadata[_BLOCKS_KEY].append(block_id)

            try:
                self._blob(upload_blob_path).stage_block(block_id,
                                                         block_data,
                                                         validate_content=True)
            except AzureError as ae:
                logger.exception(
                    "Exception when trying to stream_upload_chunk block %s for %s",
                    block_id, uuid)
                return total_bytes_written, new_metadata, ae

            bytes_written = len(block_data)
            total_bytes_written += bytes_written
            if bytes_written == 0 or bytes_written < max_length:
                break

        if content_type is not None:
            new_metadata[_CONTENT_TYPE_KEY] = content_type

        return total_bytes_written, new_metadata, None

    def complete_chunked_upload(self, uuid, final_path, storage_metadata):
        """
        Complete the chunked upload and store the final results in the path indicated.

        Returns nothing.
        """
        # Commit the blob's blocks.
        upload_blob_name = self._upload_blob_name_from_uuid(
            uuid)  # upload/<uuid>
        upload_blob_path = self._upload_blob_path_from_uuid(
            uuid)  # storage/path/upload/<uuid>
        block_list = [
            BlobBlock(block_id) for block_id in storage_metadata[_BLOCKS_KEY]
        ]

        try:
            if storage_metadata[_CONTENT_TYPE_KEY] is not None:
                content_settings = ContentSettings(
                    content_type=storage_metadata[_CONTENT_TYPE_KEY])
                self._blob(upload_blob_path).commit_block_list(
                    block_list, content_settings=content_settings)
            else:
                self._blob(upload_blob_path).commit_block_list(block_list)
        except AzureError:
            logger.exception(
                "Exception when trying to put block list for path %s from upload %s",
                final_path,
                uuid,
            )
            raise IOError("Exception when trying to put block list")

        # Copy the blob to its final location.
        upload_blob_name = self._upload_blob_name_from_uuid(uuid)
        copy_source_url = self.get_direct_download_url(upload_blob_name,
                                                       expires_in=300)

        try:
            final_blob_name = self._blob_name_from_path(final_path)
            cp = self._blob(final_blob_name).start_copy_from_url(
                copy_source_url)
        except AzureError:
            logger.exception(
                "Exception when trying to set copy uploaded blob %s to path %s",
                uuid, final_path)
            raise IOError("Exception when trying to copy uploaded blob")

        self._await_copy(final_blob_name)

        # Delete the original blob.
        logger.debug("Deleting chunked upload %s at path %s", uuid,
                     upload_blob_path)
        try:
            self._blob(upload_blob_path).delete_blob()
        except AzureError:
            logger.exception(
                "Exception when trying to set delete uploaded blob %s", uuid)
            raise IOError("Exception when trying to delete uploaded blob")

    def cancel_chunked_upload(self, uuid, storage_metadata):
        """
        Cancel the chunked upload and clean up any outstanding partially uploaded data.

        Returns nothing.
        """
        upload_blob_path = self._upload_blob_path_from_uuid(uuid)
        logger.debug("Canceling chunked upload %s at path %s", uuid,
                     upload_blob_path)
        try:
            self._blob(upload_blob_path).delete_blob()
        except ResourceNotFoundError:
            pass

    def _await_copy(self, blob_name):
        # Poll for copy completion.
        blob = self._blob(blob_name)
        copy_prop = blob.get_blob_properties().copy

        count = 0
        while copy_prop.status == "pending":
            props = blob.get_blob_properties()
            copy_prop = props.copy

            if copy_prop.status == "success":
                return

            if copy_prop.status == "failed" or copy_prop.status == "aborted":
                raise IOError("Copy of blob %s failed with status %s" %
                              (blob_name, copy_prop.status))

            count = count + 1
            if count > _MAX_COPY_POLL_COUNT:
                raise IOError("Timed out waiting for copy to complete")

            time.sleep(_COPY_POLL_SLEEP)

    def copy_to(self, destination, path):
        if self.__class__ == destination.__class__:
            logger.debug(
                "Starting copying file from Azure %s to Azure %s via an Azure copy",
                self._azure_container,
                destination._azure_container,
            )
            copy_source_url = self.get_direct_download_url(path)
            blob_name = destination._blob_name_from_path(path)
            dest_blob = destination._blob(blob_name)

            destination._blob(blob_name).start_copy_from_url(copy_source_url)
            destination._await_copy(blob_name)
            logger.debug(
                "Finished copying file from Azure %s to Azure %s via an Azure copy",
                self._azure_container,
                destination._azure_container,
            )
            return

        # Fallback to a slower, default copy.
        logger.debug(
            "Copying file from Azure container %s to %s via a streamed copy",
            self._azure_container,
            destination,
        )
        with self.stream_read_file(path) as fp:
            destination.stream_write(path, fp)

    def setup(self):
        # From: https://docs.microsoft.com/en-us/rest/api/storageservices/cross-origin-resource-sharing--cors--support-for-the-azure-storage-services
        cors = [
            CorsRule(
                allowed_origins="*",
                allowed_methods=["GET", "PUT"],
                max_age_in_seconds=3000,
                exposed_headers=["x-ms-meta-*"],
                allowed_headers=[
                    "x-ms-meta-data*",
                    "x-ms-meta-target*",
                    "x-ms-meta-abc",
                    "Content-Type",
                ],
            )
        ]

        self._blob_service_client.set_service_properties(cors=cors)
예제 #3
0
class StorageAppendBlobTest(StorageTestCase):
    def setUp(self):
        super(StorageAppendBlobTest, self).setUp()

        url = self._get_account_url()
        credential = self._get_shared_key_credential()

        self.bsc = BlobServiceClient(url,
                                     credential=credential,
                                     max_block_size=4 * 1024)
        self.config = self.bsc._config
        self.container_name = self.get_resource_name('utcontainer')
        self.source_container_name = self.get_resource_name(
            'utcontainersource')

        if not self.is_playback():
            self.bsc.create_container(self.container_name)
            self.bsc.create_container(self.source_container_name)

    def tearDown(self):
        if not self.is_playback():
            try:
                self.bsc.delete_container(self.container_name)
            except:
                pass
            try:
                self.bsc.delete_container(self.source_container_name)
            except:
                pass

        if os.path.isfile(FILE_PATH):
            try:
                os.remove(FILE_PATH)
            except:
                pass

        return super(StorageAppendBlobTest, self).tearDown()

    # --Helpers-----------------------------------------------------------------
    def _get_blob_reference(self):
        return self.get_resource_name(TEST_BLOB_PREFIX)

    def _create_blob(self):
        blob_name = self._get_blob_reference()
        blob = self.bsc.get_blob_client(self.container_name, blob_name)
        blob.create_append_blob()
        return blob

    def _create_source_blob(self, data):
        blob_client = self.bsc.get_blob_client(
            self.source_container_name,
            self.get_resource_name(TEST_BLOB_PREFIX))
        blob_client.create_append_blob()
        blob_client.append_block(data)
        return blob_client

    def assertBlobEqual(self, blob, expected_data):
        stream = blob.download_blob()
        actual_data = b"".join(list(stream))
        self.assertEqual(actual_data, expected_data)

    class NonSeekableFile(object):
        def __init__(self, wrapped_file):
            self.wrapped_file = wrapped_file

        def write(self, data):
            self.wrapped_file.write(data)

        def read(self, count):
            return self.wrapped_file.read(count)

    # --Test cases for block blobs --------------------------------------------

    @record
    def test_create_blob(self):
        # Arrange
        blob_name = self._get_blob_reference()

        # Act
        blob = self.bsc.get_blob_client(self.container_name, blob_name)
        create_resp = blob.create_append_blob()

        # Assert
        blob_properties = blob.get_blob_properties()
        self.assertIsNotNone(blob_properties)
        self.assertEqual(blob_properties.etag, create_resp.get('etag'))
        self.assertEqual(blob_properties.last_modified,
                         create_resp.get('last_modified'))

    @record
    def test_create_blob_with_lease_id(self):
        # Arrange
        blob = self._create_blob()

        # Act
        lease = blob.acquire_lease()
        create_resp = blob.create_append_blob(lease=lease)

        # Assert
        blob_properties = blob.get_blob_properties()
        self.assertIsNotNone(blob_properties)
        self.assertEqual(blob_properties.etag, create_resp.get('etag'))
        self.assertEqual(blob_properties.last_modified,
                         create_resp.get('last_modified'))

    @record
    def test_create_blob_with_metadata(self):
        # Arrange
        metadata = {'hello': 'world', 'number': '42'}
        blob_name = self._get_blob_reference()
        blob = self.bsc.get_blob_client(self.container_name, blob_name)

        # Act
        blob.create_append_blob(metadata=metadata)

        # Assert
        md = blob.get_blob_properties().metadata
        self.assertDictEqual(md, metadata)

    @record
    def test_append_block(self):
        # Arrange
        blob = self._create_blob()

        # Act
        for i in range(5):
            resp = blob.append_block(u'block {0}'.format(i).encode('utf-8'))
            self.assertEqual(int(resp['blob_append_offset']), 7 * i)
            self.assertEqual(resp['blob_committed_block_count'], i + 1)
            self.assertIsNotNone(resp['etag'])
            self.assertIsNotNone(resp['last_modified'])

        # Assert
        self.assertBlobEqual(blob, b'block 0block 1block 2block 3block 4')

    @record
    def test_append_block_unicode(self):
        # Arrange
        blob = self._create_blob()

        # Act
        resp = blob.append_block(u'啊齄丂狛狜', encoding='utf-16')
        self.assertEqual(int(resp['blob_append_offset']), 0)
        self.assertEqual(resp['blob_committed_block_count'], 1)
        self.assertIsNotNone(resp['etag'])
        self.assertIsNotNone(resp['last_modified'])

        # Assert

    @record
    def test_append_block_with_md5(self):
        # Arrange
        blob = self._create_blob()

        # Act
        resp = blob.append_block(b'block', validate_content=True)
        self.assertEqual(int(resp['blob_append_offset']), 0)
        self.assertEqual(resp['blob_committed_block_count'], 1)
        self.assertIsNotNone(resp['etag'])
        self.assertIsNotNone(resp['last_modified'])

        # Assert

    @record
    def test_append_block_from_url(self):
        # Arrange
        source_blob_data = self.get_random_bytes(LARGE_BLOB_SIZE)
        source_blob_client = self._create_source_blob(source_blob_data)
        sas = source_blob_client.generate_shared_access_signature(
            permission=BlobSasPermissions(read=True, delete=True),
            expiry=datetime.utcnow() + timedelta(hours=1),
        )

        destination_blob_client = self._create_blob()

        # Act: make append block from url calls
        split = 4 * 1024
        resp = destination_blob_client.append_block_from_url(
            source_blob_client.url + '?' + sas,
            source_offset=0,
            source_length=split)
        self.assertEqual(resp.get('blob_append_offset'), '0')
        self.assertEqual(resp.get('blob_committed_block_count'), 1)
        self.assertIsNotNone(resp.get('etag'))
        self.assertIsNotNone(resp.get('last_modified'))

        resp = destination_blob_client.append_block_from_url(
            source_blob_client.url + '?' + sas,
            source_offset=split,
            source_length=LARGE_BLOB_SIZE - split)
        self.assertEqual(resp.get('blob_append_offset'), str(4 * 1024))
        self.assertEqual(resp.get('blob_committed_block_count'), 2)
        self.assertIsNotNone(resp.get('etag'))
        self.assertIsNotNone(resp.get('last_modified'))

        # Assert the destination blob is constructed correctly
        destination_blob_properties = destination_blob_client.get_blob_properties(
        )
        self.assertBlobEqual(destination_blob_client, source_blob_data)
        self.assertEqual(destination_blob_properties.get('etag'),
                         resp.get('etag'))
        self.assertEqual(destination_blob_properties.get('last_modified'),
                         resp.get('last_modified'))
        self.assertEqual(destination_blob_properties.get('size'),
                         LARGE_BLOB_SIZE)

        # Missing start range shouldn't pass the validation
        with self.assertRaises(ValueError):
            destination_blob_client.append_block_from_url(
                source_blob_client.url + '?' + sas,
                source_length=LARGE_BLOB_SIZE)

    @record
    def test_append_block_from_url_and_validate_content_md5(self):
        # Arrange
        source_blob_data = self.get_random_bytes(LARGE_BLOB_SIZE)
        source_blob_client = self._create_source_blob(source_blob_data)
        src_md5 = StorageContentValidation.get_content_md5(source_blob_data)
        sas = source_blob_client.generate_shared_access_signature(
            permission=BlobSasPermissions(read=True, delete=True),
            expiry=datetime.utcnow() + timedelta(hours=1),
        )

        destination_blob_client = self._create_blob()

        # Act part 1: make append block from url calls with correct md5
        resp = destination_blob_client.append_block_from_url(
            source_blob_client.url + '?' + sas, source_content_md5=src_md5)
        self.assertEqual(resp.get('blob_append_offset'), '0')
        self.assertEqual(resp.get('blob_committed_block_count'), 1)
        self.assertIsNotNone(resp.get('etag'))
        self.assertIsNotNone(resp.get('last_modified'))

        # Assert the destination blob is constructed correctly
        destination_blob_properties = destination_blob_client.get_blob_properties(
        )
        self.assertBlobEqual(destination_blob_client, source_blob_data)
        self.assertEqual(destination_blob_properties.get('etag'),
                         resp.get('etag'))
        self.assertEqual(destination_blob_properties.get('last_modified'),
                         resp.get('last_modified'))

        # Act part 2: put block from url with wrong md5
        with self.assertRaises(HttpResponseError):
            destination_blob_client.append_block_from_url(
                source_blob_client.url + '?' + sas,
                source_content_md5=StorageContentValidation.get_content_md5(
                    b"POTATO"))

    @record
    def test_append_block_from_url_with_source_if_modified(self):
        # Arrange
        source_blob_data = self.get_random_bytes(LARGE_BLOB_SIZE)
        source_blob_client = self._create_source_blob(source_blob_data)
        source_blob_properties = source_blob_client.get_blob_properties()
        sas = source_blob_client.generate_shared_access_signature(
            permission=BlobSasPermissions(read=True, delete=True),
            expiry=datetime.utcnow() + timedelta(hours=1),
        )

        destination_blob_client = self._create_blob()

        # Act part 1: make append block from url calls
        resp = destination_blob_client.append_block_from_url(
            source_blob_client.url + '?' + sas,
            source_offset=0,
            source_length=LARGE_BLOB_SIZE,
            source_if_modified_since=source_blob_properties.get(
                'last_modified') - timedelta(hours=15))
        self.assertEqual(resp.get('blob_append_offset'), '0')
        self.assertEqual(resp.get('blob_committed_block_count'), 1)
        self.assertIsNotNone(resp.get('etag'))
        self.assertIsNotNone(resp.get('last_modified'))

        # Assert the destination blob is constructed correctly
        destination_blob_properties = destination_blob_client.get_blob_properties(
        )
        self.assertBlobEqual(destination_blob_client, source_blob_data)
        self.assertEqual(destination_blob_properties.get('etag'),
                         resp.get('etag'))
        self.assertEqual(destination_blob_properties.get('last_modified'),
                         resp.get('last_modified'))
        self.assertEqual(destination_blob_properties.get('size'),
                         LARGE_BLOB_SIZE)

        # Act part 2: put block from url with failing condition
        with self.assertRaises(ResourceNotFoundError):
            destination_blob_client.append_block_from_url(
                source_blob_client.url + '?' + sas,
                source_offset=0,
                source_length=LARGE_BLOB_SIZE,
                source_if_modified_since=source_blob_properties.get(
                    'last_modified'))

    @record
    def test_append_block_from_url_with_source_if_unmodified(self):
        # Arrange
        source_blob_data = self.get_random_bytes(LARGE_BLOB_SIZE)
        source_blob_client = self._create_source_blob(source_blob_data)
        source_blob_properties = source_blob_client.get_blob_properties()
        sas = source_blob_client.generate_shared_access_signature(
            permission=BlobSasPermissions(read=True, delete=True),
            expiry=datetime.utcnow() + timedelta(hours=1),
        )

        destination_blob_client = self._create_blob()

        # Act part 1: make append block from url calls
        resp = destination_blob_client.append_block_from_url(
            source_blob_client.url + '?' + sas,
            source_offset=0,
            source_length=LARGE_BLOB_SIZE,
            source_if_unmodified_since=source_blob_properties.get(
                'last_modified'))
        self.assertEqual(resp.get('blob_append_offset'), '0')
        self.assertEqual(resp.get('blob_committed_block_count'), 1)
        self.assertIsNotNone(resp.get('etag'))
        self.assertIsNotNone(resp.get('last_modified'))

        # Assert the destination blob is constructed correctly
        destination_blob_properties = destination_blob_client.get_blob_properties(
        )
        self.assertBlobEqual(destination_blob_client, source_blob_data)
        self.assertEqual(destination_blob_properties.get('etag'),
                         resp.get('etag'))
        self.assertEqual(destination_blob_properties.get('last_modified'),
                         resp.get('last_modified'))
        self.assertEqual(destination_blob_properties.get('size'),
                         LARGE_BLOB_SIZE)

        # Act part 2: put block from url with failing condition
        with self.assertRaises(ResourceModifiedError):
            destination_blob_client \
                .append_block_from_url(source_blob_client.url + '?' + sas,
                                       source_offset=0, source_length=LARGE_BLOB_SIZE,
                                       if_unmodified_since=source_blob_properties.get('last_modified') - timedelta(
                                           hours=15))

    @record
    def test_append_block_from_url_with_source_if_match(self):
        # Arrange
        source_blob_data = self.get_random_bytes(LARGE_BLOB_SIZE)
        source_blob_client = self._create_source_blob(source_blob_data)
        source_blob_properties = source_blob_client.get_blob_properties()
        sas = source_blob_client.generate_shared_access_signature(
            permission=BlobSasPermissions(read=True, delete=True),
            expiry=datetime.utcnow() + timedelta(hours=1),
        )

        destination_blob_client = self._create_blob()

        # Act part 1: make append block from url calls
        resp = destination_blob_client. \
            append_block_from_url(source_blob_client.url + '?' + sas,
                                  source_offset=0, source_length=LARGE_BLOB_SIZE,
                                  source_if_match=source_blob_properties.get('etag'))
        self.assertEqual(resp.get('blob_append_offset'), '0')
        self.assertEqual(resp.get('blob_committed_block_count'), 1)
        self.assertIsNotNone(resp.get('etag'))
        self.assertIsNotNone(resp.get('last_modified'))

        # Assert the destination blob is constructed correctly
        destination_blob_properties = destination_blob_client.get_blob_properties(
        )
        self.assertBlobEqual(destination_blob_client, source_blob_data)
        self.assertEqual(destination_blob_properties.get('etag'),
                         resp.get('etag'))
        self.assertEqual(destination_blob_properties.get('last_modified'),
                         resp.get('last_modified'))
        self.assertEqual(destination_blob_properties.get('size'),
                         LARGE_BLOB_SIZE)

        # Act part 2: put block from url with failing condition
        with self.assertRaises(ResourceNotFoundError):
            destination_blob_client.append_block_from_url(
                source_blob_client.url + '?' + sas,
                source_offset=0,
                source_length=LARGE_BLOB_SIZE,
                source_if_match='0x111111111111111')

    @record
    def test_append_block_from_url_with_source_if_none_match(self):
        # Arrange
        source_blob_data = self.get_random_bytes(LARGE_BLOB_SIZE)
        source_blob_client = self._create_source_blob(source_blob_data)
        source_blob_properties = source_blob_client.get_blob_properties()
        sas = source_blob_client.generate_shared_access_signature(
            permission=BlobSasPermissions(read=True, delete=True),
            expiry=datetime.utcnow() + timedelta(hours=1),
        )

        destination_blob_client = self._create_blob()

        # Act part 1: make append block from url calls
        resp = destination_blob_client. \
            append_block_from_url(source_blob_client.url + '?' + sas,
                                  source_offset=0, source_length=LARGE_BLOB_SIZE,
                                  source_if_none_match='0x111111111111111')
        self.assertEqual(resp.get('blob_append_offset'), '0')
        self.assertEqual(resp.get('blob_committed_block_count'), 1)
        self.assertIsNotNone(resp.get('etag'))
        self.assertIsNotNone(resp.get('last_modified'))

        # Assert the destination blob is constructed correctly
        destination_blob_properties = destination_blob_client.get_blob_properties(
        )
        self.assertBlobEqual(destination_blob_client, source_blob_data)
        self.assertEqual(destination_blob_properties.get('etag'),
                         resp.get('etag'))
        self.assertEqual(destination_blob_properties.get('last_modified'),
                         resp.get('last_modified'))
        self.assertEqual(destination_blob_properties.get('size'),
                         LARGE_BLOB_SIZE)

        # Act part 2: put block from url with failing condition
        with self.assertRaises(ResourceNotFoundError):
            destination_blob_client.append_block_from_url(
                source_blob_client.url + '?' + sas,
                source_offset=0,
                source_length=LARGE_BLOB_SIZE,
                source_if_none_match=source_blob_properties.get('etag'))

    @record
    def test_append_block_from_url_with_if_match(self):
        # Arrange
        source_blob_data = self.get_random_bytes(LARGE_BLOB_SIZE)
        source_blob_client = self._create_source_blob(source_blob_data)
        sas = source_blob_client.generate_shared_access_signature(
            permission=BlobSasPermissions(read=True, delete=True),
            expiry=datetime.utcnow() + timedelta(hours=1),
        )

        destination_blob_name = self._get_blob_reference()
        destination_blob_client = self.bsc.get_blob_client(
            self.container_name, destination_blob_name)
        destination_blob_properties_on_creation = destination_blob_client.create_append_blob(
        )

        # Act part 1: make append block from url calls
        resp = destination_blob_client. \
            append_block_from_url(source_blob_client.url + '?' + sas,
                                  source_offset=0, source_length=LARGE_BLOB_SIZE,
                                  if_match=destination_blob_properties_on_creation.get('etag'))
        self.assertEqual(resp.get('blob_append_offset'), '0')
        self.assertEqual(resp.get('blob_committed_block_count'), 1)
        self.assertIsNotNone(resp.get('etag'))
        self.assertIsNotNone(resp.get('last_modified'))

        # Assert the destination blob is constructed correctly
        destination_blob_properties = destination_blob_client.get_blob_properties(
        )
        self.assertBlobEqual(destination_blob_client, source_blob_data)
        self.assertEqual(destination_blob_properties.get('etag'),
                         resp.get('etag'))
        self.assertEqual(destination_blob_properties.get('last_modified'),
                         resp.get('last_modified'))
        self.assertEqual(destination_blob_properties.get('size'),
                         LARGE_BLOB_SIZE)

        # Act part 2: put block from url with failing condition
        with self.assertRaises(ResourceModifiedError):
            destination_blob_client.append_block_from_url(
                source_blob_client.url + '?' + sas,
                source_offset=0,
                source_length=LARGE_BLOB_SIZE,
                if_match='0x111111111111111')

    @record
    def test_append_block_from_url_with_if_none_match(self):
        # Arrange
        source_blob_data = self.get_random_bytes(LARGE_BLOB_SIZE)
        source_blob_client = self._create_source_blob(source_blob_data)
        sas = source_blob_client.generate_shared_access_signature(
            permission=BlobSasPermissions(read=True, delete=True),
            expiry=datetime.utcnow() + timedelta(hours=1),
        )

        destination_blob_client = self._create_blob()

        # Act part 1: make append block from url calls
        resp = destination_blob_client. \
            append_block_from_url(source_blob_client.url + '?' + sas,
                                  source_offset=0, source_length=LARGE_BLOB_SIZE,
                                  if_none_match='0x111111111111111')
        self.assertEqual(resp.get('blob_append_offset'), '0')
        self.assertEqual(resp.get('blob_committed_block_count'), 1)
        self.assertIsNotNone(resp.get('etag'))
        self.assertIsNotNone(resp.get('last_modified'))

        # Assert the destination blob is constructed correctly
        destination_blob_properties = destination_blob_client.get_blob_properties(
        )
        self.assertBlobEqual(destination_blob_client, source_blob_data)
        self.assertEqual(destination_blob_properties.get('etag'),
                         resp.get('etag'))
        self.assertEqual(destination_blob_properties.get('last_modified'),
                         resp.get('last_modified'))
        self.assertEqual(destination_blob_properties.get('size'),
                         LARGE_BLOB_SIZE)

        # Act part 2: put block from url with failing condition
        with self.assertRaises(ResourceModifiedError):
            destination_blob_client.append_block_from_url(
                source_blob_client.url + '?' + sas,
                source_offset=0,
                source_length=LARGE_BLOB_SIZE,
                if_none_match=destination_blob_properties.get('etag'))

    @record
    def test_append_block_from_url_with_maxsize_condition(self):
        # Arrange
        source_blob_data = self.get_random_bytes(LARGE_BLOB_SIZE)
        source_blob_client = self._create_source_blob(source_blob_data)
        sas = source_blob_client.generate_shared_access_signature(
            permission=BlobSasPermissions(read=True, delete=True),
            expiry=datetime.utcnow() + timedelta(hours=1),
        )

        destination_blob_client = self._create_blob()

        # Act part 1: make append block from url calls
        resp = destination_blob_client. \
            append_block_from_url(source_blob_client.url + '?' + sas,
                                  source_offset=0, source_length=LARGE_BLOB_SIZE,
                                  maxsize_condition=LARGE_BLOB_SIZE + 1)
        self.assertEqual(resp.get('blob_append_offset'), '0')
        self.assertEqual(resp.get('blob_committed_block_count'), 1)
        self.assertIsNotNone(resp.get('etag'))
        self.assertIsNotNone(resp.get('last_modified'))

        # Assert the destination blob is constructed correctly
        destination_blob_properties = destination_blob_client.get_blob_properties(
        )
        self.assertBlobEqual(destination_blob_client, source_blob_data)
        self.assertEqual(destination_blob_properties.get('etag'),
                         resp.get('etag'))
        self.assertEqual(destination_blob_properties.get('last_modified'),
                         resp.get('last_modified'))
        self.assertEqual(destination_blob_properties.get('size'),
                         LARGE_BLOB_SIZE)

        # Act part 2: put block from url with failing condition
        with self.assertRaises(HttpResponseError):
            destination_blob_client.append_block_from_url(
                source_blob_client.url + '?' + sas,
                source_offset=0,
                source_length=LARGE_BLOB_SIZE,
                maxsize_condition=LARGE_BLOB_SIZE + 1)

    @record
    def test_append_block_from_url_with_appendpos_condition(self):
        # Arrange
        source_blob_data = self.get_random_bytes(LARGE_BLOB_SIZE)
        source_blob_client = self._create_source_blob(source_blob_data)
        sas = source_blob_client.generate_shared_access_signature(
            permission=BlobSasPermissions(read=True, delete=True),
            expiry=datetime.utcnow() + timedelta(hours=1),
        )

        destination_blob_client = self._create_blob()

        # Act part 1: make append block from url calls
        resp = destination_blob_client. \
            append_block_from_url(source_blob_client.url + '?' + sas,
                                  source_offset=0, source_length=LARGE_BLOB_SIZE,
                                  appendpos_condition=0)
        self.assertEqual(resp.get('blob_append_offset'), '0')
        self.assertEqual(resp.get('blob_committed_block_count'), 1)
        self.assertIsNotNone(resp.get('etag'))
        self.assertIsNotNone(resp.get('last_modified'))

        # Assert the destination blob is constructed correctly
        destination_blob_properties = destination_blob_client.get_blob_properties(
        )
        self.assertBlobEqual(destination_blob_client, source_blob_data)
        self.assertEqual(destination_blob_properties.get('etag'),
                         resp.get('etag'))
        self.assertEqual(destination_blob_properties.get('last_modified'),
                         resp.get('last_modified'))
        self.assertEqual(destination_blob_properties.get('size'),
                         LARGE_BLOB_SIZE)

        # Act part 2: put block from url with failing condition
        with self.assertRaises(HttpResponseError):
            destination_blob_client.append_block_from_url(
                source_blob_client.url + '?' + sas,
                source_offset=0,
                source_length=LARGE_BLOB_SIZE,
                appendpos_condition=0)

    @record
    def test_append_block_from_url_with_if_modified(self):
        # Arrange
        source_blob_data = self.get_random_bytes(LARGE_BLOB_SIZE)
        source_blob_client = self._create_source_blob(source_blob_data)
        source_properties = source_blob_client.get_blob_properties()
        sas = source_blob_client.generate_shared_access_signature(
            permission=BlobSasPermissions(read=True, delete=True),
            expiry=datetime.utcnow() + timedelta(hours=1),
        )

        destination_blob_client = self._create_blob()

        # Act part 1: make append block from url calls
        resp = destination_blob_client. \
            append_block_from_url(source_blob_client.url + '?' + sas,
                                  source_offset=0, source_length=LARGE_BLOB_SIZE,
                                  if_modified_since=source_properties.get('last_modified') - timedelta(minutes=15))
        self.assertEqual(resp.get('blob_append_offset'), '0')
        self.assertEqual(resp.get('blob_committed_block_count'), 1)
        self.assertIsNotNone(resp.get('etag'))
        self.assertIsNotNone(resp.get('last_modified'))

        # Assert the destination blob is constructed correctly
        destination_blob_properties = destination_blob_client.get_blob_properties(
        )
        self.assertBlobEqual(destination_blob_client, source_blob_data)
        self.assertEqual(destination_blob_properties.get('etag'),
                         resp.get('etag'))
        self.assertEqual(destination_blob_properties.get('last_modified'),
                         resp.get('last_modified'))
        self.assertEqual(destination_blob_properties.get('size'),
                         LARGE_BLOB_SIZE)

        # Act part 2: put block from url with failing condition
        with self.assertRaises(HttpResponseError):
            destination_blob_client.append_block_from_url(
                source_blob_client.url + '?' + sas,
                source_offset=0,
                source_length=LARGE_BLOB_SIZE,
                if_modified_since=destination_blob_properties.get(
                    'last_modified'))

    @record
    def test_append_block_from_url_with_if_unmodified(self):
        # Arrange
        source_blob_data = self.get_random_bytes(LARGE_BLOB_SIZE)
        source_blob_client = self._create_source_blob(source_blob_data)
        source_properties = source_blob_client.append_block(source_blob_data)
        sas = source_blob_client.generate_shared_access_signature(
            permission=BlobSasPermissions(read=True, delete=True),
            expiry=datetime.utcnow() + timedelta(hours=1),
        )

        destination_blob_client = self._create_blob()

        # Act part 1: make append block from url calls
        resp = destination_blob_client. \
            append_block_from_url(source_blob_client.url + '?' + sas,
                                  source_offset=0, source_length=LARGE_BLOB_SIZE,
                                  if_unmodified_since=source_properties.get('last_modified'))
        self.assertEqual(resp.get('blob_append_offset'), '0')
        self.assertEqual(resp.get('blob_committed_block_count'), 1)
        self.assertIsNotNone(resp.get('etag'))
        self.assertIsNotNone(resp.get('last_modified'))

        # Assert the destination blob is constructed correctly
        destination_blob_properties = destination_blob_client.get_blob_properties(
        )
        self.assertBlobEqual(destination_blob_client, source_blob_data)
        self.assertEqual(destination_blob_properties.get('etag'),
                         resp.get('etag'))
        self.assertEqual(destination_blob_properties.get('last_modified'),
                         resp.get('last_modified'))
        self.assertEqual(destination_blob_properties.get('size'),
                         LARGE_BLOB_SIZE)

        # Act part 2: put block from url with failing condition
        with self.assertRaises(ResourceModifiedError):
            destination_blob_client.append_block_from_url(
                source_blob_client.url + '?' + sas,
                source_offset=0,
                source_length=LARGE_BLOB_SIZE,
                if_unmodified_since=source_properties.get('last_modified') -
                timedelta(minutes=15))

    @record
    def test_create_append_blob_with_no_overwrite(self):
        # Arrange
        blob_name = self._get_blob_reference()
        blob = self.bsc.get_blob_client(self.container_name, blob_name)
        data1 = self.get_random_bytes(LARGE_BLOB_SIZE)
        data2 = self.get_random_bytes(LARGE_BLOB_SIZE + 512)

        # Act
        create_resp = blob.upload_blob(data1,
                                       overwrite=True,
                                       blob_type=BlobType.AppendBlob,
                                       metadata={'BlobData': 'Data1'})

        update_resp = blob.upload_blob(data2,
                                       overwrite=False,
                                       blob_type=BlobType.AppendBlob,
                                       metadata={'BlobData': 'Data2'})

        props = blob.get_blob_properties()

        # Assert
        appended_data = data1 + data2
        self.assertBlobEqual(blob, appended_data)
        self.assertEqual(props.etag, update_resp.get('etag'))
        self.assertEqual(props.blob_type, BlobType.AppendBlob)
        self.assertEqual(props.last_modified, update_resp.get('last_modified'))
        self.assertEqual(props.metadata, {'BlobData': 'Data1'})
        self.assertEqual(props.size, LARGE_BLOB_SIZE + LARGE_BLOB_SIZE + 512)

    @record
    def test_create_append_blob_with_overwrite(self):
        # Arrange
        blob_name = self._get_blob_reference()
        blob = self.bsc.get_blob_client(self.container_name, blob_name)
        data1 = self.get_random_bytes(LARGE_BLOB_SIZE)
        data2 = self.get_random_bytes(LARGE_BLOB_SIZE + 512)

        # Act
        create_resp = blob.upload_blob(data1,
                                       overwrite=True,
                                       blob_type=BlobType.AppendBlob,
                                       metadata={'BlobData': 'Data1'})
        update_resp = blob.upload_blob(data2,
                                       overwrite=True,
                                       blob_type=BlobType.AppendBlob,
                                       metadata={'BlobData': 'Data2'})

        props = blob.get_blob_properties()

        # Assert
        self.assertBlobEqual(blob, data2)
        self.assertEqual(props.etag, update_resp.get('etag'))
        self.assertEqual(props.last_modified, update_resp.get('last_modified'))
        self.assertEqual(props.metadata, {'BlobData': 'Data2'})
        self.assertEqual(props.blob_type, BlobType.AppendBlob)
        self.assertEqual(props.size, LARGE_BLOB_SIZE + 512)

    @record
    def test_append_blob_from_bytes(self):
        # Arrange
        blob = self._create_blob()

        # Act
        data = b'abcdefghijklmnopqrstuvwxyz'
        append_resp = blob.upload_blob(data, blob_type=BlobType.AppendBlob)
        blob_properties = blob.get_blob_properties()

        # Assert
        self.assertBlobEqual(blob, data)
        self.assertEqual(blob_properties.etag, append_resp['etag'])
        self.assertEqual(blob_properties.last_modified,
                         append_resp['last_modified'])

    @record
    def test_append_blob_from_0_bytes(self):
        # Arrange
        blob = self._create_blob()

        # Act
        data = b''
        append_resp = blob.upload_blob(data, blob_type=BlobType.AppendBlob)

        # Assert
        self.assertBlobEqual(blob, data)
        # appending nothing should not make any network call
        self.assertIsNone(append_resp.get('etag'))
        self.assertIsNone(append_resp.get('last_modified'))

    @record
    def test_append_blob_from_bytes_with_progress(self):
        # Arrange
        blob = self._create_blob()
        data = b'abcdefghijklmnopqrstuvwxyz'

        # Act
        progress = []

        def progress_gen(upload):
            progress.append((0, len(upload)))
            yield upload

        upload_data = progress_gen(data)
        blob.upload_blob(upload_data, blob_type=BlobType.AppendBlob)

        # Assert
        self.assertBlobEqual(blob, data)
        self.assert_upload_progress(len(data), self.config.max_block_size,
                                    progress)

    @record
    def test_append_blob_from_bytes_with_index(self):
        # Arrange
        blob = self._create_blob()

        # Act
        data = b'abcdefghijklmnopqrstuvwxyz'
        blob.upload_blob(data[3:], blob_type=BlobType.AppendBlob)

        # Assert
        self.assertBlobEqual(blob, data[3:])

    @record
    def test_append_blob_from_bytes_with_index_and_count(self):
        # Arrange
        blob = self._create_blob()

        # Act
        data = b'abcdefghijklmnopqrstuvwxyz'
        blob.upload_blob(data[3:], length=5, blob_type=BlobType.AppendBlob)

        # Assert
        self.assertBlobEqual(blob, data[3:8])

    @record
    def test_append_blob_from_bytes_chunked_upload(self):
        # Arrange
        blob = self._create_blob()
        data = self.get_random_bytes(LARGE_BLOB_SIZE)

        # Act
        append_resp = blob.upload_blob(data, blob_type=BlobType.AppendBlob)
        blob_properties = blob.get_blob_properties()

        # Assert
        self.assertBlobEqual(blob, data)
        self.assertEqual(blob_properties.etag, append_resp['etag'])
        self.assertEqual(blob_properties.last_modified,
                         append_resp.get('last_modified'))

    @record
    def test_append_blob_from_bytes_with_progress_chunked_upload(self):
        # Arrange
        blob = self._create_blob()
        data = self.get_random_bytes(LARGE_BLOB_SIZE)

        # Act
        progress = []

        def progress_gen(upload):
            n = self.config.max_block_size
            total = len(upload)
            current = 0
            while upload:
                progress.append((current, total))
                yield upload[:n]
                current += len(upload[:n])
                upload = upload[n:]

        upload_data = progress_gen(data)
        blob.upload_blob(upload_data, blob_type=BlobType.AppendBlob)

        # Assert
        self.assertBlobEqual(blob, data)
        self.assert_upload_progress(len(data), self.config.max_block_size,
                                    progress)

    @record
    def test_append_blob_from_bytes_chunked_upload_with_index_and_count(self):
        # Arrange
        blob = self._create_blob()
        data = self.get_random_bytes(LARGE_BLOB_SIZE)
        index = 33
        blob_size = len(data) - 66

        # Act
        blob.upload_blob(data[index:],
                         length=blob_size,
                         blob_type=BlobType.AppendBlob)

        # Assert
        self.assertBlobEqual(blob, data[index:index + blob_size])

    @record
    def test_append_blob_from_path_chunked_upload(self):
        # Arrange
        blob = self._create_blob()
        data = self.get_random_bytes(LARGE_BLOB_SIZE)
        with open(FILE_PATH, 'wb') as stream:
            stream.write(data)

        # Act
        with open(FILE_PATH, 'rb') as stream:
            append_resp = blob.upload_blob(stream,
                                           blob_type=BlobType.AppendBlob)

        blob_properties = blob.get_blob_properties()

        # Assert
        self.assertBlobEqual(blob, data)
        self.assertEqual(blob_properties.etag, append_resp.get('etag'))
        self.assertEqual(blob_properties.last_modified,
                         append_resp.get('last_modified'))

    @record
    def test_append_blob_from_path_with_progress_chunked_upload(self):
        # Arrange
        blob = self._create_blob()
        data = self.get_random_bytes(LARGE_BLOB_SIZE)
        with open(FILE_PATH, 'wb') as stream:
            stream.write(data)

        # Act
        progress = []

        def progress_gen(upload):
            n = self.config.max_block_size
            total = LARGE_BLOB_SIZE
            current = 0
            while upload:
                chunk = upload.read(n)
                if not chunk:
                    break
                progress.append((current, total))
                yield chunk
                current += len(chunk)

        with open(FILE_PATH, 'rb') as stream:
            upload_data = progress_gen(stream)
            blob.upload_blob(upload_data, blob_type=BlobType.AppendBlob)

        # Assert
        self.assertBlobEqual(blob, data)
        self.assert_upload_progress(len(data), self.config.max_block_size,
                                    progress)

    @record
    def test_append_blob_from_stream_chunked_upload(self):
        # Arrange
        blob = self._create_blob()
        data = self.get_random_bytes(LARGE_BLOB_SIZE)
        with open(FILE_PATH, 'wb') as stream:
            stream.write(data)

        # Act
        with open(FILE_PATH, 'rb') as stream:
            append_resp = blob.upload_blob(stream,
                                           blob_type=BlobType.AppendBlob)
        blob_properties = blob.get_blob_properties()

        # Assert
        self.assertBlobEqual(blob, data)
        self.assertEqual(blob_properties.etag, append_resp.get('etag'))
        self.assertEqual(blob_properties.last_modified,
                         append_resp.get('last_modified'))

    @record
    def test_append_blob_from_stream_non_seekable_chunked_upload_known_size(
            self):
        # Arrange
        blob = self._create_blob()
        data = self.get_random_bytes(LARGE_BLOB_SIZE)
        with open(FILE_PATH, 'wb') as stream:
            stream.write(data)
        blob_size = len(data) - 66

        # Act
        with open(FILE_PATH, 'rb') as stream:
            non_seekable_file = StorageAppendBlobTest.NonSeekableFile(stream)
            blob.upload_blob(non_seekable_file,
                             length=blob_size,
                             blob_type=BlobType.AppendBlob)

        # Assert
        self.assertBlobEqual(blob, data[:blob_size])

    @record
    def test_append_blob_from_stream_non_seekable_chunked_upload_unknown_size(
            self):
        # Arrange
        blob = self._create_blob()
        data = self.get_random_bytes(LARGE_BLOB_SIZE)
        with open(FILE_PATH, 'wb') as stream:
            stream.write(data)

        # Act
        with open(FILE_PATH, 'rb') as stream:
            non_seekable_file = StorageAppendBlobTest.NonSeekableFile(stream)
            blob.upload_blob(non_seekable_file, blob_type=BlobType.AppendBlob)

        # Assert
        self.assertBlobEqual(blob, data)

    @record
    def test_append_blob_from_stream_with_multiple_appends(self):
        # Arrange
        blob = self._create_blob()
        data = self.get_random_bytes(LARGE_BLOB_SIZE)
        with open(FILE_PATH, 'wb') as stream1:
            stream1.write(data)
        with open(FILE_PATH, 'wb') as stream2:
            stream2.write(data)

        # Act
        with open(FILE_PATH, 'rb') as stream1:
            blob.upload_blob(stream1, blob_type=BlobType.AppendBlob)
        with open(FILE_PATH, 'rb') as stream2:
            blob.upload_blob(stream2, blob_type=BlobType.AppendBlob)

        # Assert
        data = data * 2
        self.assertBlobEqual(blob, data)

    @record
    def test_append_blob_from_stream_chunked_upload_with_count(self):
        # Arrange
        blob = self._create_blob()
        data = self.get_random_bytes(LARGE_BLOB_SIZE)
        with open(FILE_PATH, 'wb') as stream:
            stream.write(data)

        # Act
        blob_size = len(data) - 301
        with open(FILE_PATH, 'rb') as stream:
            blob.upload_blob(stream,
                             length=blob_size,
                             blob_type=BlobType.AppendBlob)

        # Assert
        self.assertBlobEqual(blob, data[:blob_size])

    def test_append_blob_from_stream_chunked_upload_with_count_parallel(self):
        # parallel tests introduce random order of requests, can only run live
        if TestMode.need_recording_file(self.test_mode):
            return

        # Arrange
        blob = self._create_blob()
        data = self.get_random_bytes(LARGE_BLOB_SIZE)
        with open(FILE_PATH, 'wb') as stream:
            stream.write(data)

        # Act
        blob_size = len(data) - 301
        with open(FILE_PATH, 'rb') as stream:
            append_resp = blob.upload_blob(stream,
                                           length=blob_size,
                                           blob_type=BlobType.AppendBlob)
        blob_properties = blob.get_blob_properties()

        # Assert
        self.assertBlobEqual(blob, data[:blob_size])
        self.assertEqual(blob_properties.etag, append_resp.get('etag'))
        self.assertEqual(blob_properties.last_modified,
                         append_resp.get('last_modified'))

    @record
    def test_append_blob_from_text(self):
        # Arrange
        blob = self._create_blob()
        text = u'hello 啊齄丂狛狜 world'
        data = text.encode('utf-8')

        # Act
        append_resp = blob.upload_blob(text, blob_type=BlobType.AppendBlob)
        blob_properties = blob.get_blob_properties()

        # Assert
        self.assertBlobEqual(blob, data)
        self.assertEqual(blob_properties.etag, append_resp.get('etag'))
        self.assertEqual(blob_properties.last_modified,
                         append_resp.get('last_modified'))

    @record
    def test_append_blob_from_text_with_encoding(self):
        # Arrange
        blob = self._create_blob()
        text = u'hello 啊齄丂狛狜 world'
        data = text.encode('utf-16')

        # Act
        blob.upload_blob(text,
                         encoding='utf-16',
                         blob_type=BlobType.AppendBlob)

        # Assert
        self.assertBlobEqual(blob, data)

    @record
    def test_append_blob_from_text_with_encoding_and_progress(self):
        # Arrange
        blob = self._create_blob()
        text = u'hello 啊齄丂狛狜 world'
        data = text.encode('utf-16')

        # Act
        progress = []

        def progress_gen(upload):
            progress.append((0, len(data)))
            yield upload

        upload_data = progress_gen(text)
        blob.upload_blob(upload_data,
                         encoding='utf-16',
                         blob_type=BlobType.AppendBlob)

        # Assert
        self.assert_upload_progress(len(data), self.config.max_block_size,
                                    progress)

    @record
    def test_append_blob_from_text_chunked_upload(self):
        # Arrange
        blob = self._create_blob()
        data = self.get_random_text_data(LARGE_BLOB_SIZE)
        encoded_data = data.encode('utf-8')

        # Act
        blob.upload_blob(data, blob_type=BlobType.AppendBlob)

        # Assert
        self.assertBlobEqual(blob, encoded_data)

    @record
    def test_append_blob_with_md5(self):
        # Arrange
        blob = self._create_blob()
        data = b'hello world'

        # Act
        blob.append_block(data, validate_content=True)
예제 #4
0
class StorageGetBlobTest(StorageTestCase):
    def setUp(self):
        super(StorageGetBlobTest, self).setUp()

        url = self._get_account_url()
        credential = self._get_shared_key_credential()

        # test chunking functionality by reducing the threshold
        # for chunking and the size of each chunk, otherwise
        # the tests would take too long to execute
        self.bsc = BlobServiceClient(url,
                                     credential=credential,
                                     max_single_get_size=1024,
                                     max_chunk_get_size=1024)
        self.config = self.bsc._config
        self.container_name = self.get_resource_name('utcontainer')

        if not self.is_playback():
            container = self.bsc.get_container_client(self.container_name)
            container.create_container()

        self.byte_blob = self.get_resource_name('byteblob')
        self.byte_data = self.get_random_bytes(64 * 1024 + 5)

        if not self.is_playback():
            blob = self.bsc.get_blob_client(self.container_name,
                                            self.byte_blob)
            blob.upload_blob(self.byte_data)

    def tearDown(self):
        if not self.is_playback():
            try:
                self.bsc.delete_container(self.container_name)
            except:
                pass

        if os.path.isfile(FILE_PATH):
            try:
                os.remove(FILE_PATH)
            except:
                pass

        return super(StorageGetBlobTest, self).tearDown()

    # --Helpers-----------------------------------------------------------------

    def _get_blob_reference(self):
        return self.get_resource_name(TEST_BLOB_PREFIX)

    class NonSeekableFile(object):
        def __init__(self, wrapped_file):
            self.wrapped_file = wrapped_file

        def write(self, data):
            self.wrapped_file.write(data)

        def read(self, count):
            return self.wrapped_file.read(count)

        def seekable(self):
            return False

    # -- Get test cases for blobs ----------------------------------------------

    @record
    def test_unicode_get_blob_unicode_data(self):
        # Arrange
        blob_data = u'hello world啊齄丂狛狜'.encode('utf-8')
        blob_name = self._get_blob_reference()
        blob = self.bsc.get_blob_client(self.container_name, blob_name)
        blob.upload_blob(blob_data)

        # Act
        content = blob.download_blob()

        # Assert
        self.assertIsInstance(content.properties, BlobProperties)
        self.assertEqual(content.readall(), blob_data)

    @record
    def test_unicode_get_blob_binary_data(self):
        # Arrange
        base64_data = 'AAECAwQFBgcICQoLDA0ODxAREhMUFRYXGBkaGxwdHh8gISIjJCUmJygpKissLS4vMDEyMzQ1Njc4OTo7PD0+P0BBQkNERUZHSElKS0xNTk9QUVJTVFVWV1hZWltcXV5fYGFiY2RlZmdoaWprbG1ub3BxcnN0dXZ3eHl6e3x9fn+AgYKDhIWGh4iJiouMjY6PkJGSk5SVlpeYmZqbnJ2en6ChoqOkpaanqKmqq6ytrq+wsbKztLW2t7i5uru8vb6/wMHCw8TFxsfIycrLzM3Oz9DR0tPU1dbX2Nna29zd3t/g4eLj5OXm5+jp6uvs7e7v8PHy8/T19vf4+fr7/P3+/wABAgMEBQYHCAkKCwwNDg8QERITFBUWFxgZGhscHR4fICEiIyQlJicoKSorLC0uLzAxMjM0NTY3ODk6Ozw9Pj9AQUJDREVGR0hJSktMTU5PUFFSU1RVVldYWVpbXF1eX2BhYmNkZWZnaGlqa2xtbm9wcXJzdHV2d3h5ent8fX5/gIGCg4SFhoeIiYqLjI2Oj5CRkpOUlZaXmJmam5ydnp+goaKjpKWmp6ipqqusra6vsLGys7S1tre4ubq7vL2+v8DBwsPExcbHyMnKy8zNzs/Q0dLT1NXW19jZ2tvc3d7f4OHi4+Tl5ufo6err7O3u7/Dx8vP09fb3+Pn6+/z9/v8AAQIDBAUGBwgJCgsMDQ4PEBESExQVFhcYGRobHB0eHyAhIiMkJSYnKCkqKywtLi8wMTIzNDU2Nzg5Ojs8PT4/QEFCQ0RFRkdISUpLTE1OT1BRUlNUVVZXWFlaW1xdXl9gYWJjZGVmZ2hpamtsbW5vcHFyc3R1dnd4eXp7fH1+f4CBgoOEhYaHiImKi4yNjo+QkZKTlJWWl5iZmpucnZ6foKGio6SlpqeoqaqrrK2ur7CxsrO0tba3uLm6u7y9vr/AwcLDxMXGx8jJysvMzc7P0NHS09TV1tfY2drb3N3e3+Dh4uPk5ebn6Onq6+zt7u/w8fLz9PX29/j5+vv8/f7/AAECAwQFBgcICQoLDA0ODxAREhMUFRYXGBkaGxwdHh8gISIjJCUmJygpKissLS4vMDEyMzQ1Njc4OTo7PD0+P0BBQkNERUZHSElKS0xNTk9QUVJTVFVWV1hZWltcXV5fYGFiY2RlZmdoaWprbG1ub3BxcnN0dXZ3eHl6e3x9fn+AgYKDhIWGh4iJiouMjY6PkJGSk5SVlpeYmZqbnJ2en6ChoqOkpaanqKmqq6ytrq+wsbKztLW2t7i5uru8vb6/wMHCw8TFxsfIycrLzM3Oz9DR0tPU1dbX2Nna29zd3t/g4eLj5OXm5+jp6uvs7e7v8PHy8/T19vf4+fr7/P3+/w=='
        binary_data = base64.b64decode(base64_data)

        blob_name = self._get_blob_reference()
        blob = self.bsc.get_blob_client(self.container_name, blob_name)
        blob.upload_blob(binary_data)

        # Act
        content = blob.download_blob()

        # Assert
        self.assertIsInstance(content.properties, BlobProperties)
        self.assertEqual(content.readall(), binary_data)

    @record
    def test_get_blob_no_content(self):
        # Arrange
        blob_data = b''
        blob_name = self._get_blob_reference()
        blob = self.bsc.get_blob_client(self.container_name, blob_name)
        blob.upload_blob(blob_data)

        # Act
        content = blob.download_blob()

        # Assert
        self.assertEqual(blob_data, content.readall())
        self.assertEqual(0, content.properties.size)

    def test_get_blob_to_bytes(self):
        # parallel tests introduce random order of requests, can only run live
        if TestMode.need_recording_file(self.test_mode):
            return

        # Arrange
        blob = self.bsc.get_blob_client(self.container_name, self.byte_blob)

        # Act
        content = blob.download_blob(max_concurrency=2).readall()

        # Assert
        self.assertEqual(self.byte_data, content)

    def test_ranged_get_blob_to_bytes_with_single_byte(self):
        # parallel tests introduce random order of requests, can only run live
        if TestMode.need_recording_file(self.test_mode):
            return

        # Arrange
        blob = self.bsc.get_blob_client(self.container_name, self.byte_blob)

        # Act
        content = blob.download_blob(offset=0, length=1).readall()

        # Assert
        self.assertEqual(1, len(content))
        self.assertEqual(self.byte_data[0], content[0])

        # Act
        content = blob.download_blob(offset=5, length=1).readall()

        # Assert
        self.assertEqual(1, len(content))
        self.assertEqual(self.byte_data[5], content[0])

    @record
    def test_ranged_get_blob_to_bytes_with_zero_byte(self):
        blob_data = b''
        blob_name = self._get_blob_reference()
        blob = self.bsc.get_blob_client(self.container_name, blob_name)
        blob.upload_blob(blob_data)

        # Act
        # the get request should fail in this case since the blob is empty and yet there is a range specified
        with self.assertRaises(HttpResponseError) as e:
            blob.download_blob(offset=0, length=5)
        self.assertEqual(StorageErrorCode.invalid_range,
                         e.exception.error_code)

        with self.assertRaises(HttpResponseError) as e:
            blob.download_blob(offset=3, length=5)
        self.assertEqual(StorageErrorCode.invalid_range,
                         e.exception.error_code)

    @record
    def test_ranged_get_blob_with_missing_start_range(self):
        blob_data = b'foobar'
        blob_name = self._get_blob_reference()
        blob = self.bsc.get_blob_client(self.container_name, blob_name)
        blob.upload_blob(blob_data)

        # Act
        # the get request should fail fast in this case since start_range is missing while end_range is specified
        with self.assertRaises(ValueError):
            blob.download_blob(length=3)

    def test_get_blob_to_bytes_snapshot(self):
        # parallel tests introduce random order of requests, can only run live
        if TestMode.need_recording_file(self.test_mode):
            return

        # Arrange
        blob = self.bsc.get_blob_client(self.container_name, self.byte_blob)
        snapshot_ref = blob.create_snapshot()
        snapshot = self.bsc.get_blob_client(self.container_name,
                                            self.byte_blob,
                                            snapshot=snapshot_ref)

        blob.upload_blob(
            self.byte_data,
            overwrite=True)  # Modify the blob so the Etag no longer matches

        # Act
        content = snapshot.download_blob(max_concurrency=2).readall()

        # Assert
        self.assertEqual(self.byte_data, content)

    def test_get_blob_to_bytes_with_progress(self):
        # parallel tests introduce random order of requests, can only run live
        if TestMode.need_recording_file(self.test_mode):
            return

        # Arrange
        progress = []
        blob = self.bsc.get_blob_client(self.container_name, self.byte_blob)

        def callback(response):
            current = response.context['download_stream_current']
            total = response.context['data_stream_total']
            progress.append((current, total))

        # Act
        content = blob.download_blob(raw_response_hook=callback,
                                     max_concurrency=2).readall()

        # Assert
        self.assertEqual(self.byte_data, content)
        self.assert_download_progress(len(self.byte_data),
                                      self.config.max_chunk_get_size,
                                      self.config.max_single_get_size,
                                      progress)

    @record
    def test_get_blob_to_bytes_non_parallel(self):
        # Arrange
        progress = []
        blob = self.bsc.get_blob_client(self.container_name, self.byte_blob)

        def callback(response):
            current = response.context['download_stream_current']
            total = response.context['data_stream_total']
            progress.append((current, total))

        # Act
        content = blob.download_blob(raw_response_hook=callback,
                                     max_concurrency=1).readall()

        # Assert
        self.assertEqual(self.byte_data, content)
        self.assert_download_progress(len(self.byte_data),
                                      self.config.max_chunk_get_size,
                                      self.config.max_single_get_size,
                                      progress)

    @record
    def test_get_blob_to_bytes_small(self):
        # Arrange
        blob_data = self.get_random_bytes(1024)
        blob_name = self._get_blob_reference()
        blob = self.bsc.get_blob_client(self.container_name, blob_name)
        blob.upload_blob(blob_data)

        progress = []

        def callback(response):
            current = response.context['download_stream_current']
            total = response.context['data_stream_total']
            progress.append((current, total))

        # Act
        content = blob.download_blob(raw_response_hook=callback).readall()

        # Assert
        self.assertEqual(blob_data, content)
        self.assert_download_progress(len(blob_data),
                                      self.config.max_chunk_get_size,
                                      self.config.max_single_get_size,
                                      progress)

    def test_get_blob_to_stream(self):
        # parallel tests introduce random order of requests, can only run live
        if TestMode.need_recording_file(self.test_mode):
            return

        # Arrange
        blob = self.bsc.get_blob_client(self.container_name, self.byte_blob)

        # Act
        with open(FILE_PATH, 'wb') as stream:
            downloader = blob.download_blob(max_concurrency=2)
            read_bytes = downloader.readinto(stream)

        # Assert
        self.assertEqual(read_bytes, len(self.byte_data))
        with open(FILE_PATH, 'rb') as stream:
            actual = stream.read()
            self.assertEqual(self.byte_data, actual)

    def test_get_blob_to_stream_with_progress(self):
        # parallel tests introduce random order of requests, can only run live
        if TestMode.need_recording_file(self.test_mode):
            return

        # Arrange
        progress = []
        blob = self.bsc.get_blob_client(self.container_name, self.byte_blob)

        def callback(response):
            current = response.context['download_stream_current']
            total = response.context['data_stream_total']
            progress.append((current, total))

        # Act
        with open(FILE_PATH, 'wb') as stream:
            downloader = blob.download_blob(raw_response_hook=callback,
                                            max_concurrency=2)
            read_bytes = downloader.readinto(stream)
        # Assert
        self.assertEqual(read_bytes, len(self.byte_data))
        with open(FILE_PATH, 'rb') as stream:
            actual = stream.read()
            self.assertEqual(self.byte_data, actual)
        self.assert_download_progress(len(self.byte_data),
                                      self.config.max_chunk_get_size,
                                      self.config.max_single_get_size,
                                      progress)

    @record
    def test_get_blob_to_stream_non_parallel(self):
        # Arrange
        progress = []
        blob = self.bsc.get_blob_client(self.container_name, self.byte_blob)

        def callback(response):
            current = response.context['download_stream_current']
            total = response.context['data_stream_total']
            progress.append((current, total))

        # Act
        with open(FILE_PATH, 'wb') as stream:
            downloader = blob.download_blob(raw_response_hook=callback,
                                            max_concurrency=1)
            read_bytes = downloader.readinto(stream)

        # Assert
        self.assertEqual(read_bytes, len(self.byte_data))
        with open(FILE_PATH, 'rb') as stream:
            actual = stream.read()
            self.assertEqual(self.byte_data, actual)
        self.assert_download_progress(len(self.byte_data),
                                      self.config.max_chunk_get_size,
                                      self.config.max_single_get_size,
                                      progress)

    @record
    def test_get_blob_to_stream_small(self):
        # Arrange
        blob_data = self.get_random_bytes(1024)
        blob_name = self._get_blob_reference()
        blob = self.bsc.get_blob_client(self.container_name, blob_name)
        blob.upload_blob(blob_data)

        progress = []

        def callback(response):
            current = response.context['download_stream_current']
            total = response.context['data_stream_total']
            progress.append((current, total))

        # Act
        with open(FILE_PATH, 'wb') as stream:
            downloader = blob.download_blob(raw_response_hook=callback,
                                            max_concurrency=2)
            read_bytes = downloader.readinto(stream)

        # Assert
        self.assertEqual(read_bytes, 1024)
        with open(FILE_PATH, 'rb') as stream:
            actual = stream.read()
            self.assertEqual(blob_data, actual)
        self.assert_download_progress(len(blob_data),
                                      self.config.max_chunk_get_size,
                                      self.config.max_single_get_size,
                                      progress)

    def test_ranged_get_blob_to_path(self):
        # parallel tests introduce random order of requests, can only run live
        if TestMode.need_recording_file(self.test_mode):
            return

        # Arrange
        blob = self.bsc.get_blob_client(self.container_name, self.byte_blob)

        # Act
        end_range = self.config.max_single_get_size
        with open(FILE_PATH, 'wb') as stream:
            downloader = blob.download_blob(offset=1,
                                            length=end_range - 1,
                                            max_concurrency=2)
            read_bytes = downloader.readinto(stream)

        # Assert
        self.assertEqual(read_bytes, end_range - 1)
        with open(FILE_PATH, 'rb') as stream:
            actual = stream.read()
            self.assertEqual(self.byte_data[1:end_range], actual)

    def test_ranged_get_blob_to_path_with_progress(self):
        # parallel tests introduce random order of requests, can only run live
        if TestMode.need_recording_file(self.test_mode):
            return

        # Arrange
        progress = []
        blob = self.bsc.get_blob_client(self.container_name, self.byte_blob)

        def callback(response):
            current = response.context['download_stream_current']
            total = response.context['data_stream_total']
            progress.append((current, total))

        # Act
        start_range = 3
        end_range = self.config.max_single_get_size + 1024
        with open(FILE_PATH, 'wb') as stream:
            downloader = blob.download_blob(offset=start_range,
                                            length=end_range,
                                            raw_response_hook=callback,
                                            max_concurrency=2)
            read_bytes = downloader.readinto(stream)

        # Assert
        self.assertEqual(read_bytes, end_range)
        with open(FILE_PATH, 'rb') as stream:
            actual = stream.read()
            self.assertEqual(
                self.byte_data[start_range:end_range + start_range], actual)
        self.assert_download_progress(end_range,
                                      self.config.max_chunk_get_size,
                                      self.config.max_single_get_size,
                                      progress)

    @record
    def test_ranged_get_blob_to_path_small(self):
        # Arrange
        blob = self.bsc.get_blob_client(self.container_name, self.byte_blob)

        # Act
        with open(FILE_PATH, 'wb') as stream:
            downloader = blob.download_blob(offset=1,
                                            length=4,
                                            max_concurrency=2)
            read_bytes = downloader.readinto(stream)

        # Assert
        self.assertEqual(read_bytes, 4)
        with open(FILE_PATH, 'rb') as stream:
            actual = stream.read()
            self.assertEqual(self.byte_data[1:5], actual)

    @record
    def test_ranged_get_blob_to_path_non_parallel(self):
        # Arrange
        blob = self.bsc.get_blob_client(self.container_name, self.byte_blob)

        # Act
        with open(FILE_PATH, 'wb') as stream:
            downloader = blob.download_blob(offset=1,
                                            length=3,
                                            max_concurrency=1)
            read_bytes = downloader.readinto(stream)

        # Assert
        self.assertEqual(read_bytes, 3)
        with open(FILE_PATH, 'rb') as stream:
            actual = stream.read()
            self.assertEqual(self.byte_data[1:4], actual)

    @record
    def test_ranged_get_blob_to_path_invalid_range_parallel(self):
        # parallel tests introduce random order of requests, can only run live
        if TestMode.need_recording_file(self.test_mode):
            return

        # Arrange
        blob_size = self.config.max_single_get_size + 1
        blob_data = self.get_random_bytes(blob_size)
        blob_name = self._get_blob_reference()
        blob = self.bsc.get_blob_client(self.container_name, blob_name)
        blob.upload_blob(blob_data)

        # Act
        end_range = 2 * self.config.max_single_get_size
        with open(FILE_PATH, 'wb') as stream:
            downloader = blob.download_blob(offset=1,
                                            length=end_range,
                                            max_concurrency=2)
            read_bytes = downloader.readinto(stream)

        # Assert
        self.assertEqual(read_bytes, blob_size)
        with open(FILE_PATH, 'rb') as stream:
            actual = stream.read()
            self.assertEqual(blob_data[1:blob_size], actual)

    @record
    def test_ranged_get_blob_to_path_invalid_range_non_parallel(self):
        # parallel tests introduce random order of requests, can only run live
        if TestMode.need_recording_file(self.test_mode):
            return

        # Arrange
        blob_size = 1024
        blob_data = self.get_random_bytes(blob_size)
        blob_name = self._get_blob_reference()
        blob = self.bsc.get_blob_client(self.container_name, blob_name)
        blob.upload_blob(blob_data)

        # Act
        end_range = 2 * self.config.max_single_get_size
        with open(FILE_PATH, 'wb') as stream:
            downloader = blob.download_blob(offset=1,
                                            length=end_range,
                                            max_concurrency=2)
            read_bytes = downloader.readinto(stream)

        # Assert
        self.assertEqual(read_bytes, blob_size)
        with open(FILE_PATH, 'rb') as stream:
            actual = stream.read()
            self.assertEqual(blob_data[1:blob_size], actual)

            # Assert

    def test_get_blob_to_text(self):
        # parallel tests introduce random order of requests, can only run live
        if TestMode.need_recording_file(self.test_mode):
            return

        # Arrange
        text_blob = self.get_resource_name('textblob')
        text_data = self.get_random_text_data(self.config.max_single_get_size +
                                              1)
        blob = self.bsc.get_blob_client(self.container_name, text_blob)
        blob.upload_blob(text_data)

        # Act
        stream = blob.download_blob(max_concurrency=2, encoding='UTF-8')
        content = stream.readall()

        # Assert
        self.assertEqual(text_data, content)

    def test_get_blob_to_text_with_progress(self):
        # parallel tests introduce random order of requests, can only run live
        if TestMode.need_recording_file(self.test_mode):
            return

        # Arrange
        text_blob = self.get_resource_name('textblob')
        text_data = self.get_random_text_data(self.config.max_single_get_size +
                                              1)
        blob = self.bsc.get_blob_client(self.container_name, text_blob)
        blob.upload_blob(text_data)

        progress = []

        def callback(response):
            current = response.context['download_stream_current']
            total = response.context['data_stream_total']
            progress.append((current, total))

        # Act
        stream = blob.download_blob(raw_response_hook=callback,
                                    max_concurrency=2,
                                    encoding='UTF-8')
        content = stream.readall()

        # Assert
        self.assertEqual(text_data, content)
        self.assert_download_progress(len(text_data.encode('utf-8')),
                                      self.config.max_chunk_get_size,
                                      self.config.max_single_get_size,
                                      progress)

    @record
    def test_get_blob_to_text_non_parallel(self):
        # Arrange
        text_blob = self._get_blob_reference()
        text_data = self.get_random_text_data(self.config.max_single_get_size +
                                              1)
        blob = self.bsc.get_blob_client(self.container_name, text_blob)
        blob.upload_blob(text_data)

        progress = []

        def callback(response):
            current = response.context['download_stream_current']
            total = response.context['data_stream_total']
            progress.append((current, total))

        # Act
        stream = blob.download_blob(raw_response_hook=callback,
                                    max_concurrency=1,
                                    encoding='UTF-8')
        content = stream.readall()

        # Assert
        self.assertEqual(text_data, content)
        self.assert_download_progress(len(text_data),
                                      self.config.max_chunk_get_size,
                                      self.config.max_single_get_size,
                                      progress)

    @record
    def test_get_blob_to_text_small(self):
        # Arrange
        blob_data = self.get_random_text_data(1024)
        blob_name = self._get_blob_reference()
        blob = self.bsc.get_blob_client(self.container_name, blob_name)
        blob.upload_blob(blob_data)

        progress = []

        def callback(response):
            current = response.context['download_stream_current']
            total = response.context['data_stream_total']
            progress.append((current, total))

        # Act
        stream = blob.download_blob(raw_response_hook=callback,
                                    encoding='UTF-8')
        content = stream.readall()

        # Assert
        self.assertEqual(blob_data, content)
        self.assert_download_progress(len(blob_data),
                                      self.config.max_chunk_get_size,
                                      self.config.max_single_get_size,
                                      progress)

    @record
    def test_get_blob_to_text_with_encoding(self):
        # Arrange
        text = u'hello 啊齄丂狛狜 world'
        blob_name = self._get_blob_reference()
        blob = self.bsc.get_blob_client(self.container_name, blob_name)
        blob.upload_blob(text, encoding='utf-16')

        # Act
        stream = blob.download_blob(encoding='UTF-16')
        content = stream.readall()

        # Assert
        self.assertEqual(text, content)

    @record
    def test_get_blob_to_text_with_encoding_and_progress(self):
        # Arrange
        text = u'hello 啊齄丂狛狜 world'
        blob_name = self._get_blob_reference()
        blob = self.bsc.get_blob_client(self.container_name, blob_name)
        blob.upload_blob(text, encoding='utf-16')

        # Act
        progress = []

        def callback(response):
            current = response.context['download_stream_current']
            total = response.context['data_stream_total']
            progress.append((current, total))

        stream = blob.download_blob(raw_response_hook=callback,
                                    encoding='UTF-16')
        content = stream.readall()

        # Assert
        self.assertEqual(text, content)
        self.assert_download_progress(len(text.encode('utf-8')),
                                      self.config.max_chunk_get_size,
                                      self.config.max_single_get_size,
                                      progress)

    @record
    def test_get_blob_non_seekable(self):
        # Arrange
        blob = self.bsc.get_blob_client(self.container_name, self.byte_blob)

        # Act
        with open(FILE_PATH, 'wb') as stream:
            non_seekable_stream = StorageGetBlobTest.NonSeekableFile(stream)
            downloader = blob.download_blob(max_concurrency=1)
            read_bytes = downloader.readinto(non_seekable_stream)

        # Assert
        self.assertEqual(read_bytes, len(self.byte_data))
        with open(FILE_PATH, 'rb') as stream:
            actual = stream.read()
            self.assertEqual(self.byte_data, actual)

    def test_get_blob_non_seekable_parallel(self):
        # parallel tests introduce random order of requests, can only run live
        if TestMode.need_recording_file(self.test_mode):
            return

        # Arrange
        blob = self.bsc.get_blob_client(self.container_name, self.byte_blob)

        # Act
        with open(FILE_PATH, 'wb') as stream:
            non_seekable_stream = StorageGetBlobTest.NonSeekableFile(stream)

            with self.assertRaises(ValueError):
                downloader = blob.download_blob(max_concurrency=2)
                properties = downloader.readinto(non_seekable_stream)

    @record
    def test_get_blob_to_stream_exact_get_size(self):
        # Arrange
        blob_name = self._get_blob_reference()
        byte_data = self.get_random_bytes(self.config.max_single_get_size)
        blob = self.bsc.get_blob_client(self.container_name, blob_name)
        blob.upload_blob(byte_data)

        progress = []

        def callback(response):
            current = response.context['download_stream_current']
            total = response.context['data_stream_total']
            progress.append((current, total))

        # Act
        with open(FILE_PATH, 'wb') as stream:
            downloader = blob.download_blob(raw_response_hook=callback,
                                            max_concurrency=2)
            properties = downloader.readinto(stream)

        # Assert
        with open(FILE_PATH, 'rb') as stream:
            actual = stream.read()
            self.assertEqual(byte_data, actual)
        self.assert_download_progress(len(byte_data),
                                      self.config.max_chunk_get_size,
                                      self.config.max_single_get_size,
                                      progress)

    @record
    def test_get_blob_exact_get_size(self):
        # Arrange
        blob_name = self._get_blob_reference()
        byte_data = self.get_random_bytes(self.config.max_single_get_size)
        blob = self.bsc.get_blob_client(self.container_name, blob_name)
        blob.upload_blob(byte_data)

        progress = []

        def callback(response):
            current = response.context['download_stream_current']
            total = response.context['data_stream_total']
            progress.append((current, total))

        # Act
        content = blob.download_blob(raw_response_hook=callback).readall()

        # Assert
        self.assertEqual(byte_data, content)
        self.assert_download_progress(len(byte_data),
                                      self.config.max_chunk_get_size,
                                      self.config.max_single_get_size,
                                      progress)

    def test_get_blob_exact_chunk_size(self):
        # parallel tests introduce random order of requests, can only run live
        if TestMode.need_recording_file(self.test_mode):
            return

        # Arrange
        blob_name = self._get_blob_reference()
        byte_data = self.get_random_bytes(self.config.max_single_get_size +
                                          self.config.max_chunk_get_size)
        blob = self.bsc.get_blob_client(self.container_name, blob_name)
        blob.upload_blob(byte_data)

        progress = []

        def callback(response):
            current = response.context['download_stream_current']
            total = response.context['data_stream_total']
            progress.append((current, total))

        # Act
        content = blob.download_blob(raw_response_hook=callback).readall()

        # Assert
        self.assertEqual(byte_data, content)
        self.assert_download_progress(len(byte_data),
                                      self.config.max_chunk_get_size,
                                      self.config.max_single_get_size,
                                      progress)

    def test_get_blob_to_stream_with_md5(self):
        # parallel tests introduce random order of requests, can only run live
        if TestMode.need_recording_file(self.test_mode):
            return

        # Arrange
        blob = self.bsc.get_blob_client(self.container_name, self.byte_blob)

        # Act
        with open(FILE_PATH, 'wb') as stream:
            downloader = blob.download_blob(validate_content=True,
                                            max_concurrency=2)
            read_bytes = downloader.readinto(stream)

        # Assert
        self.assertEqual(read_bytes, len(self.byte_data))
        with open(FILE_PATH, 'rb') as stream:
            actual = stream.read()
            self.assertEqual(self.byte_data, actual)

    def test_get_blob_with_md5(self):
        # parallel tests introduce random order of requests, can only run live
        if TestMode.need_recording_file(self.test_mode):
            return

        # Arrange
        blob = self.bsc.get_blob_client(self.container_name, self.byte_blob)

        # Act
        content = blob.download_blob(validate_content=True,
                                     max_concurrency=2).readall()

        # Assert
        self.assertEqual(self.byte_data, content)

    def test_get_blob_range_to_stream_with_overall_md5(self):
        # parallel tests introduce random order of requests, can only run live
        if TestMode.need_recording_file(self.test_mode):
            return

        # Arrange
        blob = self.bsc.get_blob_client(self.container_name, self.byte_blob)
        props = blob.get_blob_properties()
        props.content_settings.content_md5 = b'MDAwMDAwMDA='
        blob.set_http_headers(props.content_settings)

        # Act
        with open(FILE_PATH, 'wb') as stream:
            downloader = blob.download_blob(offset=0,
                                            length=1024,
                                            validate_content=True,
                                            max_concurrency=2)
            read_bytes = downloader.readinto(stream)

        # Assert
        self.assertEqual(downloader.size, 1024)
        self.assertEqual(read_bytes, 1024)
        self.assertEqual(b'MDAwMDAwMDA=',
                         downloader.properties.content_settings.content_md5)

    def test_get_blob_range_with_overall_md5(self):
        # parallel tests introduce random order of requests, can only run live
        if TestMode.need_recording_file(self.test_mode):
            return

        blob = self.bsc.get_blob_client(self.container_name, self.byte_blob)
        content = blob.download_blob(offset=0,
                                     length=1024,
                                     validate_content=True)

        # Arrange
        props = blob.get_blob_properties()
        props.content_settings.content_md5 = b'MDAwMDAwMDA='
        blob.set_http_headers(props.content_settings)

        # Act
        content = blob.download_blob(offset=0,
                                     length=1024,
                                     validate_content=True)

        # Assert
        self.assertEqual(content.properties.size, 1024)
        self.assertEqual(b'MDAwMDAwMDA=',
                         content.properties.content_settings.content_md5)

    def test_get_blob_range_with_range_md5(self):
        # parallel tests introduce random order of requests, can only run live
        if TestMode.need_recording_file(self.test_mode):
            return

        blob = self.bsc.get_blob_client(self.container_name, self.byte_blob)
        content = blob.download_blob(offset=0,
                                     length=1024,
                                     validate_content=True)

        # Arrange
        props = blob.get_blob_properties()
        props.content_settings.content_md5 = None
        blob.set_http_headers(props.content_settings)

        # Act
        content = blob.download_blob(offset=0,
                                     length=1024,
                                     validate_content=True)

        # Assert
        self.assertIsNotNone(content.properties.content_settings.content_type)
        self.assertIsNone(content.properties.content_settings.content_md5)
예제 #5
0
class StorageBlobRetryTest(StorageTestCase):
    def setUp(self):
        super(StorageBlobRetryTest, self).setUp()

        url = self._get_account_url()
        credential = self._get_shared_key_credential()
        retry = ExponentialRetry(initial_backoff=1,
                                 increment_base=2,
                                 retry_total=3)

        self.bs = BlobServiceClient(url,
                                    credential=credential,
                                    retry_policy=retry)
        self.container_name = self.get_resource_name('utcontainer')

        if not self.is_playback():
            try:
                self.bs.create_container(self.container_name)
            except ResourceExistsError:
                pass

    def tearDown(self):
        if not self.is_playback():
            try:
                self.bs.delete_container(self.container_name)
            except HttpResponseError:
                pass

        return super(StorageBlobRetryTest, self).tearDown()

    # --Helpers-----------------------------------------------------------------

    class NonSeekableStream(object):
        def __init__(self, wrapped_stream):
            self.wrapped_stream = wrapped_stream

        def write(self, data):
            self.wrapped_stream.write(data)

        def read(self, count):
            return self.wrapped_stream.read(count)

        def seek(self, *args, **kwargs):
            raise UnsupportedOperation("boom!")

        def tell(self):
            return self.wrapped_stream.tell()

    @record
    def test_retry_put_block_with_seekable_stream(self):
        if TestMode.need_recording_file(self.test_mode):
            return

        # Arrange
        blob_name = self.get_resource_name('blob')
        data = self.get_random_bytes(PUT_BLOCK_SIZE)
        data_stream = BytesIO(data)

        # rig the response so that it fails for a single time
        responder = ResponseCallback(status=201, new_status=408)

        # Act
        blob = self.bs.get_blob_client(self.container_name, blob_name)
        blob.stage_block(1,
                         data_stream,
                         raw_response_hook=responder.override_first_status)

        # Assert
        _, uncommitted_blocks = blob.get_block_list(
            block_list_type="uncommitted",
            raw_response_hook=responder.override_first_status)
        self.assertEqual(len(uncommitted_blocks), 1)
        self.assertEqual(uncommitted_blocks[0].size, PUT_BLOCK_SIZE)

        # Commit block and verify content
        blob.commit_block_list(
            ['1'], raw_response_hook=responder.override_first_status)

        # Assert
        content = blob.download_blob().readall()
        self.assertEqual(content, data)

    @record
    def test_retry_put_block_with_non_seekable_stream(self):
        if TestMode.need_recording_file(self.test_mode):
            return

        # Arrange
        blob_name = self.get_resource_name('blob')
        data = self.get_random_bytes(PUT_BLOCK_SIZE)
        data_stream = self.NonSeekableStream(BytesIO(data))

        # rig the response so that it fails for a single time
        responder = ResponseCallback(status=201, new_status=408)

        # Act
        blob = self.bs.get_blob_client(self.container_name, blob_name)
        # Note: put_block transforms non-seekable streams into byte arrays before handing it off to the executor
        blob.stage_block(1,
                         data_stream,
                         raw_response_hook=responder.override_first_status)

        # Assert
        _, uncommitted_blocks = blob.get_block_list(
            block_list_type="uncommitted",
            raw_response_hook=responder.override_first_status)
        self.assertEqual(len(uncommitted_blocks), 1)
        self.assertEqual(uncommitted_blocks[0].size, PUT_BLOCK_SIZE)

        # Commit block and verify content
        blob.commit_block_list(
            ['1'], raw_response_hook=responder.override_first_status)

        # Assert
        content = blob.download_blob().readall()
        self.assertEqual(content, data)

    @record
    def test_retry_put_block_with_non_seekable_stream_fail(self):
        if TestMode.need_recording_file(self.test_mode):
            return

        # Arrange
        blob_name = self.get_resource_name('blob')
        data = self.get_random_bytes(PUT_BLOCK_SIZE)
        data_stream = self.NonSeekableStream(BytesIO(data))

        # rig the response so that it fails for a single time
        responder = ResponseCallback(status=201, new_status=408)

        # Act
        blob = self.bs.get_blob_client(self.container_name, blob_name)

        with self.assertRaises(HttpResponseError) as error:
            blob.stage_block(1,
                             data_stream,
                             length=PUT_BLOCK_SIZE,
                             raw_response_hook=responder.override_first_status)

        # Assert
        self.assertEqual(error.exception.response.status_code, 408)
# create an output Asset
print("Creating output asset " + out_asset_name)
# From SDK
# create_or_update(resource_group_name, account_name, asset_name, parameters, custom_headers=None, raw=False, **operation_config)
outputAsset = client.assets.create_or_update(resource_group_name, account_name,
                                             out_asset_name, output_asset)

### Use the Storage SDK to upload the video ###
print("Uploading the file " + source_file)
# From SDK
# BlobServiceClient(account_url, credential=None, **kwargs)
blob_service_client = BlobServiceClient(account_url=storage_blob_url,
                                        credential=storage_account_key)
# From SDK
# get_blob_client(container, blob, snapshot=None)
blob_client = blob_service_client.get_blob_client(in_container, source_file)
# Upload the video to storage as a block blob
with open(source_file, "rb") as data:
    # From SDK
    # upload_blob(data, blob_type=<BlobType.BlockBlob: 'BlockBlob'>, length=None, metadata=None, **kwargs)
    blob_client.upload_blob(data, blob_type="BlockBlob")

### Create a Transform ###
transform_name = 'MyTrans' + str(thisRandom)
# From SDK
# TransformOutput(*, preset, on_error=None, relative_priority=None, **kwargs) -> None
transform_output = TransformOutput(preset=BuiltInStandardEncoderPreset(
    preset_name="AdaptiveStreaming"))
print("Creating transform " + transform_name)
# From SDK
# Create_or_update(resource_group_name, account_name, transform_name, outputs, description=None, custom_headers=None, raw=False, **operation_config)
예제 #7
0
class StorageBlockBlobTest(StorageTestCase):
    def _setup(self, name, key):
        # test chunking functionality by reducing the size of each chunk,
        # otherwise the tests would take too long to execute
        self.bsc = BlobServiceClient(self.account_url(name, "blob"),
                                     credential=key,
                                     connection_data_block_size=4 * 1024,
                                     max_single_put_size=32 * 1024,
                                     max_block_size=4 * 1024)
        self.config = self.bsc._config
        self.container_name = self.get_resource_name('utcontainer')

        if self.is_live:
            self.bsc.create_container(self.container_name)

    def _teardown(self, FILE_PATH):
        if os.path.isfile(FILE_PATH):
            try:
                os.remove(FILE_PATH)
            except:
                pass

    #--Helpers-----------------------------------------------------------------
    def _get_blob_reference(self):
        return self.get_resource_name(TEST_BLOB_PREFIX)

    def _create_blob(self):
        blob_name = self._get_blob_reference()
        blob = self.bsc.get_blob_client(self.container_name, blob_name)
        blob.upload_blob(b'')
        return blob

    def assertBlobEqual(self, container_name, blob_name, expected_data):
        blob = self.bsc.get_blob_client(container_name, blob_name)
        actual_data = blob.download_blob()
        self.assertEqual(actual_data.readall(), expected_data)

    class NonSeekableFile(object):
        def __init__(self, wrapped_file):
            self.wrapped_file = wrapped_file

        def write(self, data):
            self.wrapped_file.write(data)

        def read(self, count):
            return self.wrapped_file.read(count)

    #--Test cases for block blobs --------------------------------------------

    @GlobalStorageAccountPreparer()
    def test_put_block(self, resource_group, location, storage_account,
                       storage_account_key):
        self._setup(storage_account.name, storage_account_key)
        blob = self._create_blob()

        # Act
        for i in range(5):
            resp = blob.stage_block(i, 'block {0}'.format(i).encode('utf-8'))
            self.assertIsNone(resp)

        # Assert

    @GlobalStorageAccountPreparer()
    def test_put_block_unicode(self, resource_group, location, storage_account,
                               storage_account_key):
        self._setup(storage_account.name, storage_account_key)
        blob = self._create_blob()

        # Act
        resp = blob.stage_block('1', u'啊齄丂狛狜')
        self.assertIsNone(resp)

        # Assert

    @GlobalStorageAccountPreparer()
    def test_put_block_with_md5(self, resource_group, location,
                                storage_account, storage_account_key):
        self._setup(storage_account.name, storage_account_key)
        blob = self._create_blob()

        # Act
        blob.stage_block(1, b'block', validate_content=True)

        # Assert

    @GlobalStorageAccountPreparer()
    def test_put_block_list(self, resource_group, location, storage_account,
                            storage_account_key):
        self._setup(storage_account.name, storage_account_key)
        blob_name = self._get_blob_reference()
        blob = self.bsc.get_blob_client(self.container_name, blob_name)
        blob.stage_block('1', b'AAA')
        blob.stage_block('2', b'BBB')
        blob.stage_block('3', b'CCC')

        # Act
        block_list = [
            BlobBlock(block_id='1'),
            BlobBlock(block_id='2'),
            BlobBlock(block_id='3')
        ]
        put_block_list_resp = blob.commit_block_list(block_list)

        # Assert
        content = blob.download_blob()
        self.assertEqual(content.readall(), b'AAABBBCCC')
        self.assertEqual(content.properties.etag,
                         put_block_list_resp.get('etag'))
        self.assertEqual(content.properties.last_modified,
                         put_block_list_resp.get('last_modified'))

    @GlobalStorageAccountPreparer()
    def test_put_block_list_invalid_block_id(self, resource_group, location,
                                             storage_account,
                                             storage_account_key):
        self._setup(storage_account.name, storage_account_key)
        blob_name = self._get_blob_reference()
        blob = self.bsc.get_blob_client(self.container_name, blob_name)
        blob.stage_block('1', b'AAA')
        blob.stage_block('2', b'BBB')
        blob.stage_block('3', b'CCC')

        # Act
        try:
            block_list = [
                BlobBlock(block_id='1'),
                BlobBlock(block_id='2'),
                BlobBlock(block_id='4')
            ]
            blob.commit_block_list(block_list)
            self.fail()
        except HttpResponseError as e:
            self.assertGreaterEqual(
                str(e).find('specified block list is invalid'), 0)

        # Assert

    @GlobalStorageAccountPreparer()
    def test_put_block_list_with_md5(self, resource_group, location,
                                     storage_account, storage_account_key):
        self._setup(storage_account.name, storage_account_key)
        blob_name = self._get_blob_reference()
        blob = self.bsc.get_blob_client(self.container_name, blob_name)
        blob.stage_block('1', b'AAA')
        blob.stage_block('2', b'BBB')
        blob.stage_block('3', b'CCC')

        # Act
        block_list = [
            BlobBlock(block_id='1'),
            BlobBlock(block_id='2'),
            BlobBlock(block_id='3')
        ]
        blob.commit_block_list(block_list, validate_content=True)

        # Assert

    @GlobalStorageAccountPreparer()
    def test_put_block_list_with_blob_tier_specified(self, resource_group,
                                                     location, storage_account,
                                                     storage_account_key):

        # Arrange
        self._setup(storage_account.name, storage_account_key)
        blob_name = self._get_blob_reference()
        blob_client = self.bsc.get_blob_client(self.container_name, blob_name)
        blob_client.stage_block('1', b'AAA')
        blob_client.stage_block('2', b'BBB')
        blob_client.stage_block('3', b'CCC')
        blob_tier = StandardBlobTier.Cool

        # Act
        block_list = [
            BlobBlock(block_id='1'),
            BlobBlock(block_id='2'),
            BlobBlock(block_id='3')
        ]
        blob_client.commit_block_list(block_list, standard_blob_tier=blob_tier)

        # Assert
        blob_properties = blob_client.get_blob_properties()
        self.assertEqual(blob_properties.blob_tier, blob_tier)

    @GlobalStorageAccountPreparer()
    def test_get_block_list_no_blocks(self, resource_group, location,
                                      storage_account, storage_account_key):
        self._setup(storage_account.name, storage_account_key)
        blob = self._create_blob()

        # Act
        block_list = blob.get_block_list('all')

        # Assert
        self.assertIsNotNone(block_list)
        self.assertEqual(len(block_list[1]), 0)
        self.assertEqual(len(block_list[0]), 0)

    @GlobalStorageAccountPreparer()
    def test_get_block_list_uncommitted_blocks(self, resource_group, location,
                                               storage_account,
                                               storage_account_key):
        self._setup(storage_account.name, storage_account_key)
        blob_name = self._get_blob_reference()
        blob = self.bsc.get_blob_client(self.container_name, blob_name)
        blob.stage_block('1', b'AAA')
        blob.stage_block('2', b'BBB')
        blob.stage_block('3', b'CCC')

        # Act
        block_list = blob.get_block_list('uncommitted')

        # Assert
        self.assertIsNotNone(block_list)
        self.assertEqual(len(block_list), 2)
        self.assertEqual(len(block_list[1]), 3)
        self.assertEqual(len(block_list[0]), 0)
        self.assertEqual(block_list[1][0].id, '1')
        self.assertEqual(block_list[1][0].size, 3)
        self.assertEqual(block_list[1][1].id, '2')
        self.assertEqual(block_list[1][1].size, 3)
        self.assertEqual(block_list[1][2].id, '3')
        self.assertEqual(block_list[1][2].size, 3)

    @GlobalStorageAccountPreparer()
    def test_get_block_list_committed_blocks(self, resource_group, location,
                                             storage_account,
                                             storage_account_key):
        self._setup(storage_account.name, storage_account_key)
        blob_name = self._get_blob_reference()
        blob = self.bsc.get_blob_client(self.container_name, blob_name)
        blob.stage_block('1', b'AAA')
        blob.stage_block('2', b'BBB')
        blob.stage_block('3', b'CCC')

        block_list = [
            BlobBlock(block_id='1'),
            BlobBlock(block_id='2'),
            BlobBlock(block_id='3')
        ]
        blob.commit_block_list(block_list)

        # Act
        block_list = blob.get_block_list('committed')

        # Assert
        self.assertIsNotNone(block_list)
        self.assertEqual(len(block_list), 2)
        self.assertEqual(len(block_list[1]), 0)
        self.assertEqual(len(block_list[0]), 3)
        self.assertEqual(block_list[0][0].id, '1')
        self.assertEqual(block_list[0][0].size, 3)
        self.assertEqual(block_list[0][1].id, '2')
        self.assertEqual(block_list[0][1].size, 3)
        self.assertEqual(block_list[0][2].id, '3')
        self.assertEqual(block_list[0][2].size, 3)

    @GlobalStorageAccountPreparer()
    def test_create_small_block_blob_with_no_overwrite(self, resource_group,
                                                       location,
                                                       storage_account,
                                                       storage_account_key):
        self._setup(storage_account.name, storage_account_key)
        blob_name = self._get_blob_reference()
        blob = self.bsc.get_blob_client(self.container_name, blob_name)
        data1 = b'hello world'
        data2 = b'hello second world'

        # Act
        create_resp = blob.upload_blob(data1, overwrite=True)

        with self.assertRaises(ResourceExistsError):
            blob.upload_blob(data2, overwrite=False)

        props = blob.get_blob_properties()

        # Assert
        self.assertBlobEqual(self.container_name, blob_name, data1)
        self.assertEqual(props.etag, create_resp.get('etag'))
        self.assertEqual(props.last_modified, create_resp.get('last_modified'))
        self.assertEqual(props.blob_type, BlobType.BlockBlob)

    @GlobalStorageAccountPreparer()
    def test_create_small_block_blob_with_overwrite(self, resource_group,
                                                    location, storage_account,
                                                    storage_account_key):
        self._setup(storage_account.name, storage_account_key)
        blob_name = self._get_blob_reference()
        blob = self.bsc.get_blob_client(self.container_name, blob_name)
        data1 = b'hello world'
        data2 = b'hello second world'

        # Act
        create_resp = blob.upload_blob(data1, overwrite=True)
        update_resp = blob.upload_blob(data2, overwrite=True)

        props = blob.get_blob_properties()

        # Assert
        self.assertBlobEqual(self.container_name, blob_name, data2)
        self.assertEqual(props.etag, update_resp.get('etag'))
        self.assertEqual(props.last_modified, update_resp.get('last_modified'))
        self.assertEqual(props.blob_type, BlobType.BlockBlob)

    @GlobalStorageAccountPreparer()
    def test_create_large_block_blob_with_no_overwrite(self, resource_group,
                                                       location,
                                                       storage_account,
                                                       storage_account_key):
        self._setup(storage_account.name, storage_account_key)
        blob_name = self._get_blob_reference()
        blob = self.bsc.get_blob_client(self.container_name, blob_name)
        data1 = self.get_random_bytes(LARGE_BLOB_SIZE)
        data2 = self.get_random_bytes(LARGE_BLOB_SIZE)

        # Act
        create_resp = blob.upload_blob(data1,
                                       overwrite=True,
                                       metadata={'blobdata': 'data1'})

        with self.assertRaises(ResourceExistsError):
            blob.upload_blob(data2,
                             overwrite=False,
                             metadata={'blobdata': 'data2'})

        props = blob.get_blob_properties()

        # Assert
        self.assertBlobEqual(self.container_name, blob_name, data1)
        self.assertEqual(props.etag, create_resp.get('etag'))
        self.assertEqual(props.last_modified, create_resp.get('last_modified'))
        self.assertEqual(props.blob_type, BlobType.BlockBlob)
        self.assertEqual(props.metadata, {'blobdata': 'data1'})
        self.assertEqual(props.size, LARGE_BLOB_SIZE)

    @GlobalStorageAccountPreparer()
    def test_create_large_block_blob_with_overwrite(self, resource_group,
                                                    location, storage_account,
                                                    storage_account_key):
        self._setup(storage_account.name, storage_account_key)
        blob_name = self._get_blob_reference()
        blob = self.bsc.get_blob_client(self.container_name, blob_name)
        data1 = self.get_random_bytes(LARGE_BLOB_SIZE)
        data2 = self.get_random_bytes(LARGE_BLOB_SIZE + 512)

        # Act
        create_resp = blob.upload_blob(data1,
                                       overwrite=True,
                                       metadata={'blobdata': 'data1'})
        update_resp = blob.upload_blob(data2,
                                       overwrite=True,
                                       metadata={'blobdata': 'data2'})

        props = blob.get_blob_properties()

        # Assert
        self.assertBlobEqual(self.container_name, blob_name, data2)
        self.assertEqual(props.etag, update_resp.get('etag'))
        self.assertEqual(props.last_modified, update_resp.get('last_modified'))
        self.assertEqual(props.blob_type, BlobType.BlockBlob)
        self.assertEqual(props.metadata, {'blobdata': 'data2'})
        self.assertEqual(props.size, LARGE_BLOB_SIZE + 512)

    @GlobalStorageAccountPreparer()
    def test_create_blob_from_bytes_single_put(self, resource_group, location,
                                               storage_account,
                                               storage_account_key):
        self._setup(storage_account.name, storage_account_key)
        blob_name = self._get_blob_reference()
        blob = self.bsc.get_blob_client(self.container_name, blob_name)
        data = b'hello world'

        # Act
        create_resp = blob.upload_blob(data)
        props = blob.get_blob_properties()

        # Assert
        self.assertBlobEqual(self.container_name, blob_name, data)
        self.assertEqual(props.etag, create_resp.get('etag'))
        self.assertEqual(props.last_modified, create_resp.get('last_modified'))

    @GlobalStorageAccountPreparer()
    def test_create_blob_from_0_bytes(self, resource_group, location,
                                      storage_account, storage_account_key):
        self._setup(storage_account.name, storage_account_key)
        blob_name = self._get_blob_reference()
        blob = self.bsc.get_blob_client(self.container_name, blob_name)
        data = b''

        # Act
        create_resp = blob.upload_blob(data)
        props = blob.get_blob_properties()

        # Assert
        self.assertBlobEqual(self.container_name, blob_name, data)
        self.assertEqual(props.etag, create_resp.get('etag'))
        self.assertEqual(props.last_modified, create_resp.get('last_modified'))

    @GlobalStorageAccountPreparer()
    def test_create_from_bytes_blob_unicode(self, resource_group, location,
                                            storage_account,
                                            storage_account_key):
        self._setup(storage_account.name, storage_account_key)
        blob_name = self._get_blob_reference()
        blob = self.bsc.get_blob_client(self.container_name, blob_name)
        data = u'hello world'

        # Act
        create_resp = blob.upload_blob(data)
        props = blob.get_blob_properties()

        # Assert
        self.assertBlobEqual(self.container_name, blob_name, data)
        self.assertEqual(props.etag, create_resp.get('etag'))
        self.assertEqual(props.last_modified, create_resp.get('last_modified'))

    @GlobalStorageAccountPreparer()
    def test_create_from_bytes_blob_unicode(self, resource_group, location,
                                            storage_account,
                                            storage_account_key):
        self._setup(storage_account.name, storage_account_key)
        blob_name = self._get_blob_reference()
        blob = self.bsc.get_blob_client(self.container_name, blob_name)

        # Act
        data = u'hello world'
        create_resp = blob.upload_blob(data)
        props = blob.get_blob_properties()

        # Assert
        self.assertBlobEqual(self.container_name, blob_name,
                             data.encode('utf-8'))
        self.assertEqual(props.etag, create_resp.get('etag'))
        self.assertEqual(props.last_modified, create_resp.get('last_modified'))

    @pytest.mark.live_test_only
    @GlobalStorageAccountPreparer()
    def test_create_from_bytes_blob_with_lease_id(self, resource_group,
                                                  location, storage_account,
                                                  storage_account_key):
        # parallel tests introduce random order of requests, can only run live

        self._setup(storage_account.name, storage_account_key)
        blob = self._create_blob()
        data = self.get_random_bytes(LARGE_BLOB_SIZE)
        lease = blob.acquire_lease()

        # Act
        create_resp = blob.upload_blob(data, lease=lease)

        # Assert
        output = blob.download_blob(lease=lease)
        self.assertEqual(output.readall(), data)
        self.assertEqual(output.properties.etag, create_resp.get('etag'))
        self.assertEqual(output.properties.last_modified,
                         create_resp.get('last_modified'))

    @pytest.mark.live_test_only
    @GlobalStorageAccountPreparer()
    def test_create_blob_from_bytes_with_metadata(self, resource_group,
                                                  location, storage_account,
                                                  storage_account_key):
        # parallel tests introduce random order of requests, can only run live

        self._setup(storage_account.name, storage_account_key)
        blob_name = self._get_blob_reference()
        blob = self.bsc.get_blob_client(self.container_name, blob_name)
        data = self.get_random_bytes(LARGE_BLOB_SIZE)
        metadata = {'hello': 'world', 'number': '42'}

        # Act
        blob.upload_blob(data, metadata=metadata)

        # Assert
        md = blob.get_blob_properties().metadata
        self.assertDictEqual(md, metadata)

    @pytest.mark.live_test_only
    @GlobalStorageAccountPreparer()
    def test_create_blob_from_bytes_with_properties(self, resource_group,
                                                    location, storage_account,
                                                    storage_account_key):
        # parallel tests introduce random order of requests, can only run live

        self._setup(storage_account.name, storage_account_key)
        blob_name = self._get_blob_reference()
        blob = self.bsc.get_blob_client(self.container_name, blob_name)
        data = self.get_random_bytes(LARGE_BLOB_SIZE)

        # Act
        content_settings = ContentSettings(content_type='image/png',
                                           content_language='spanish')
        blob.upload_blob(data, content_settings=content_settings)

        # Assert
        self.assertBlobEqual(self.container_name, blob_name, data)
        properties = blob.get_blob_properties()
        self.assertEqual(properties.content_settings.content_type,
                         content_settings.content_type)
        self.assertEqual(properties.content_settings.content_language,
                         content_settings.content_language)

    @pytest.mark.live_test_only
    @GlobalStorageAccountPreparer()
    def test_create_blob_from_bytes_with_progress(self, resource_group,
                                                  location, storage_account,
                                                  storage_account_key):
        # parallel tests introduce random order of requests, can only run live

        self._setup(storage_account.name, storage_account_key)
        blob_name = self._get_blob_reference()
        blob = self.bsc.get_blob_client(self.container_name, blob_name)
        data = self.get_random_bytes(LARGE_BLOB_SIZE)

        # Act
        progress = []

        def callback(response):
            current = response.context['upload_stream_current']
            total = response.context['data_stream_total']
            if current is not None:
                progress.append((current, total))

        create_resp = blob.upload_blob(data, raw_response_hook=callback)
        props = blob.get_blob_properties()

        # Assert
        self.assertBlobEqual(self.container_name, blob_name, data)
        self.assert_upload_progress(len(data), self.config.max_block_size,
                                    progress)
        self.assertEqual(props.etag, create_resp.get('etag'))
        self.assertEqual(props.last_modified, create_resp.get('last_modified'))

    @pytest.mark.live_test_only
    @GlobalStorageAccountPreparer()
    def test_create_blob_from_bytes_with_index(self, resource_group, location,
                                               storage_account,
                                               storage_account_key):
        # parallel tests introduce random order of requests, can only run live

        self._setup(storage_account.name, storage_account_key)
        blob_name = self._get_blob_reference()
        blob = self.bsc.get_blob_client(self.container_name, blob_name)
        data = self.get_random_bytes(LARGE_BLOB_SIZE)

        # Act
        blob.upload_blob(data[3:])

        # Assert
        self.assertEqual(data[3:], blob.download_blob().readall())

    @GlobalStorageAccountPreparer()
    def test_create_blob_from_bytes_with_index_and_count(
            self, resource_group, location, storage_account,
            storage_account_key):
        self._setup(storage_account.name, storage_account_key)
        blob_name = self._get_blob_reference()
        blob = self.bsc.get_blob_client(self.container_name, blob_name)
        data = self.get_random_bytes(LARGE_BLOB_SIZE)

        # Act
        blob.upload_blob(data[3:], length=5)

        # Assert
        self.assertEqual(data[3:8], blob.download_blob().readall())

    @GlobalStorageAccountPreparer()
    def test_create_blob_from_bytes_with_index_and_count_and_properties(
            self, resource_group, location, storage_account,
            storage_account_key):
        self._setup(storage_account.name, storage_account_key)
        blob_name = self._get_blob_reference()
        blob = self.bsc.get_blob_client(self.container_name, blob_name)
        data = self.get_random_bytes(LARGE_BLOB_SIZE)

        # Act
        content_settings = ContentSettings(content_type='image/png',
                                           content_language='spanish')
        blob.upload_blob(data[3:], length=5, content_settings=content_settings)

        # Assert
        self.assertEqual(data[3:8], blob.download_blob().readall())
        properties = blob.get_blob_properties()
        self.assertEqual(properties.content_settings.content_type,
                         content_settings.content_type)
        self.assertEqual(properties.content_settings.content_language,
                         content_settings.content_language)

    @GlobalStorageAccountPreparer()
    def test_create_blob_from_bytes_non_parallel(self, resource_group,
                                                 location, storage_account,
                                                 storage_account_key):
        self._setup(storage_account.name, storage_account_key)
        blob_name = self._get_blob_reference()
        blob = self.bsc.get_blob_client(self.container_name, blob_name)
        data = self.get_random_bytes(LARGE_BLOB_SIZE)

        # Act
        blob.upload_blob(data, length=LARGE_BLOB_SIZE, max_concurrency=1)

        # Assert
        self.assertBlobEqual(self.container_name, blob.blob_name, data)

    @GlobalStorageAccountPreparer()
    def test_create_blob_from_bytes_with_blob_tier_specified(
            self, resource_group, location, storage_account,
            storage_account_key):
        # Arrange
        self._setup(storage_account.name, storage_account_key)
        blob_name = self._get_blob_reference()
        blob_client = self.bsc.get_blob_client(self.container_name, blob_name)
        data = b'hello world'
        blob_tier = StandardBlobTier.Cool

        # Act
        blob_client.upload_blob(data, standard_blob_tier=blob_tier)
        blob_properties = blob_client.get_blob_properties()

        # Assert
        self.assertEqual(blob_properties.blob_tier, blob_tier)

    @pytest.mark.live_test_only
    @GlobalStorageAccountPreparer()
    def test_create_blob_from_path(self, resource_group, location,
                                   storage_account, storage_account_key):
        # parallel tests introduce random order of requests, can only run live

        self._setup(storage_account.name, storage_account_key)
        blob_name = self._get_blob_reference()
        blob = self.bsc.get_blob_client(self.container_name, blob_name)
        data = self.get_random_bytes(LARGE_BLOB_SIZE)
        FILE_PATH = 'create_blob_from_input.temp.{}.dat'.format(
            str(uuid.uuid4()))
        with open(FILE_PATH, 'wb') as stream:
            stream.write(data)

        # Act
        with open(FILE_PATH, 'rb') as stream:
            create_resp = blob.upload_blob(stream)
        props = blob.get_blob_properties()

        # Assert
        self.assertBlobEqual(self.container_name, blob_name, data)
        self.assertEqual(props.etag, create_resp.get('etag'))
        self.assertEqual(props.last_modified, create_resp.get('last_modified'))
        self._teardown(FILE_PATH)

    @GlobalStorageAccountPreparer()
    def test_create_blob_from_path_non_parallel(self, resource_group, location,
                                                storage_account,
                                                storage_account_key):
        self._setup(storage_account.name, storage_account_key)
        blob_name = self._get_blob_reference()
        blob = self.bsc.get_blob_client(self.container_name, blob_name)
        data = self.get_random_bytes(100)
        FILE_PATH = 'create_blob_from_path_non_par.temp.{}.dat'.format(
            str(uuid.uuid4()))
        with open(FILE_PATH, 'wb') as stream:
            stream.write(data)

        # Act
        with open(FILE_PATH, 'rb') as stream:
            create_resp = blob.upload_blob(stream,
                                           length=100,
                                           max_concurrency=1)
        props = blob.get_blob_properties()

        # Assert
        self.assertBlobEqual(self.container_name, blob_name, data)
        self.assertEqual(props.etag, create_resp.get('etag'))
        self.assertEqual(props.last_modified, create_resp.get('last_modified'))
        self._teardown(FILE_PATH)

    @GlobalStorageAccountPreparer()
    def test_upload_blob_from_path_non_parallel_with_standard_blob_tier(
            self, resource_group, location, storage_account,
            storage_account_key):
        # Arrange
        self._setup(storage_account.name, storage_account_key)
        blob_name = self._get_blob_reference()
        blob = self.bsc.get_blob_client(self.container_name, blob_name)
        data = self.get_random_bytes(100)
        FILE_PATH = '_path_non_parallel_with_standard_blob.temp.{}.dat'.format(
            str(uuid.uuid4()))
        with open(FILE_PATH, 'wb') as stream:
            stream.write(data)
        blob_tier = StandardBlobTier.Cool
        # Act
        with open(FILE_PATH, 'rb') as stream:
            blob.upload_blob(stream,
                             length=100,
                             max_concurrency=1,
                             standard_blob_tier=blob_tier)
        props = blob.get_blob_properties()

        # Assert
        self.assertEqual(props.blob_tier, blob_tier)
        self._teardown(FILE_PATH)

    @pytest.mark.live_test_only
    @GlobalStorageAccountPreparer()
    def test_create_blob_from_path_with_progress(self, resource_group,
                                                 location, storage_account,
                                                 storage_account_key):
        # parallel tests introduce random order of requests, can only run live

        self._setup(storage_account.name, storage_account_key)
        blob_name = self._get_blob_reference()
        blob = self.bsc.get_blob_client(self.container_name, blob_name)
        data = self.get_random_bytes(LARGE_BLOB_SIZE)
        FILE_PATH = 'create_blob_from_path_with_progr.temp.{}.dat'.format(
            str(uuid.uuid4()))
        with open(FILE_PATH, 'wb') as stream:
            stream.write(data)

        # Act
        progress = []

        def callback(response):
            current = response.context['upload_stream_current']
            total = response.context['data_stream_total']
            if current is not None:
                progress.append((current, total))

        with open(FILE_PATH, 'rb') as stream:
            blob.upload_blob(stream, raw_response_hook=callback)

        # Assert
        self.assertBlobEqual(self.container_name, blob_name, data)
        self.assert_upload_progress(len(data), self.config.max_block_size,
                                    progress)
        self._teardown(FILE_PATH)

    @pytest.mark.live_test_only
    @GlobalStorageAccountPreparer()
    def test_create_blob_from_path_with_properties(self, resource_group,
                                                   location, storage_account,
                                                   storage_account_key):
        # parallel tests introduce random order of requests, can only run live

        self._setup(storage_account.name, storage_account_key)
        blob_name = self._get_blob_reference()
        blob = self.bsc.get_blob_client(self.container_name, blob_name)
        data = self.get_random_bytes(LARGE_BLOB_SIZE)
        FILE_PATH = 'blob_from_path_with_properties.temp.{}.dat'.format(
            str(uuid.uuid4()))
        with open(FILE_PATH, 'wb') as stream:
            stream.write(data)

        # Act
        content_settings = ContentSettings(content_type='image/png',
                                           content_language='spanish')
        with open(FILE_PATH, 'rb') as stream:
            blob.upload_blob(stream, content_settings=content_settings)

        # Assert
        self.assertBlobEqual(self.container_name, blob_name, data)
        properties = blob.get_blob_properties()
        self.assertEqual(properties.content_settings.content_type,
                         content_settings.content_type)
        self.assertEqual(properties.content_settings.content_language,
                         content_settings.content_language)
        self._teardown(FILE_PATH)

    @pytest.mark.live_test_only
    @GlobalStorageAccountPreparer()
    def test_create_blob_from_stream_chunked_upload(self, resource_group,
                                                    location, storage_account,
                                                    storage_account_key):
        # parallel tests introduce random order of requests, can only run live

        self._setup(storage_account.name, storage_account_key)
        blob_name = self._get_blob_reference()
        blob = self.bsc.get_blob_client(self.container_name, blob_name)
        data = self.get_random_bytes(LARGE_BLOB_SIZE)
        FILE_PATH = 'blob_from_stream_chunked_up.temp.{}.dat'.format(
            str(uuid.uuid4()))
        with open(FILE_PATH, 'wb') as stream:
            stream.write(data)

        # Act
        with open(FILE_PATH, 'rb') as stream:
            create_resp = blob.upload_blob(stream)
        props = blob.get_blob_properties()

        # Assert
        self.assertBlobEqual(self.container_name, blob_name, data)
        self.assertEqual(props.etag, create_resp.get('etag'))
        self.assertEqual(props.last_modified, create_resp.get('last_modified'))
        self._teardown(FILE_PATH)

    @pytest.mark.live_test_only
    @GlobalStorageAccountPreparer()
    def test_create_frm_stream_nonseek_chunk_upld_knwn_size(
            self, resource_group, location, storage_account,
            storage_account_key):
        # parallel tests introduce random order of requests, can only run live

        self._setup(storage_account.name, storage_account_key)
        blob_name = self._get_blob_reference()
        blob = self.bsc.get_blob_client(self.container_name, blob_name)
        data = self.get_random_bytes(LARGE_BLOB_SIZE)
        blob_size = len(data) - 66
        FILE_PATH = 'stream_nonseek_chunk_upld_knwn_size.temp.{}.dat'.format(
            str(uuid.uuid4()))
        with open(FILE_PATH, 'wb') as stream:
            stream.write(data)

        # Act
        with open(FILE_PATH, 'rb') as stream:
            non_seekable_file = StorageBlockBlobTest.NonSeekableFile(stream)
            blob.upload_blob(non_seekable_file,
                             length=blob_size,
                             max_concurrency=1)

        # Assert
        self.assertBlobEqual(self.container_name, blob_name, data[:blob_size])
        self._teardown(FILE_PATH)

    @pytest.mark.live_test_only
    @GlobalStorageAccountPreparer()
    def test_create_from_stream_nonseek_chunk_upld_unkwn_size(
            self, resource_group, location, storage_account,
            storage_account_key):
        # parallel tests introduce random order of requests, can only run live

        self._setup(storage_account.name, storage_account_key)
        blob_name = self._get_blob_reference()
        blob = self.bsc.get_blob_client(self.container_name, blob_name)
        data = self.get_random_bytes(LARGE_BLOB_SIZE)
        FILE_PATH = 'stream_nonseek_chunk_upld.temp.{}.dat'.format(
            str(uuid.uuid4()))
        with open(FILE_PATH, 'wb') as stream:
            stream.write(data)

        # Act
        with open(FILE_PATH, 'rb') as stream:
            non_seekable_file = StorageBlockBlobTest.NonSeekableFile(stream)
            blob.upload_blob(non_seekable_file, max_concurrency=1)

        # Assert
        self.assertBlobEqual(self.container_name, blob_name, data)
        self._teardown(FILE_PATH)

    @pytest.mark.live_test_only
    @GlobalStorageAccountPreparer()
    def test_create_blob_from_stream_with_progress_chunked_upload(
            self, resource_group, location, storage_account,
            storage_account_key):
        # parallel tests introduce random order of requests, can only run live

        self._setup(storage_account.name, storage_account_key)
        blob_name = self._get_blob_reference()
        blob = self.bsc.get_blob_client(self.container_name, blob_name)
        data = self.get_random_bytes(LARGE_BLOB_SIZE)
        FILE_PATH = 'stream_with_progress_chunked.temp.{}.dat'.format(
            str(uuid.uuid4()))
        with open(FILE_PATH, 'wb') as stream:
            stream.write(data)

        # Act
        progress = []

        def callback(response):
            current = response.context['upload_stream_current']
            total = response.context['data_stream_total']
            if current is not None:
                progress.append((current, total))

        with open(FILE_PATH, 'rb') as stream:
            blob.upload_blob(stream, raw_response_hook=callback)

        # Assert
        self.assertBlobEqual(self.container_name, blob_name, data)
        self.assert_upload_progress(len(data), self.config.max_block_size,
                                    progress)
        self._teardown(FILE_PATH)

    @pytest.mark.live_test_only
    @GlobalStorageAccountPreparer()
    def test_create_blob_from_stream_chunked_upload_with_count(
            self, resource_group, location, storage_account,
            storage_account_key):
        # parallel tests introduce random order of requests, can only run live

        self._setup(storage_account.name, storage_account_key)
        blob_name = self._get_blob_reference()
        blob = self.bsc.get_blob_client(self.container_name, blob_name)
        data = self.get_random_bytes(LARGE_BLOB_SIZE)
        FILE_PATH = 'chunked_upload_with_count.temp.{}.dat'.format(
            str(uuid.uuid4()))
        with open(FILE_PATH, 'wb') as stream:
            stream.write(data)

        # Act
        blob_size = len(data) - 301
        with open(FILE_PATH, 'rb') as stream:
            resp = blob.upload_blob(stream, length=blob_size)

        # Assert
        self.assertBlobEqual(self.container_name, blob_name, data[:blob_size])
        self._teardown(FILE_PATH)

    @pytest.mark.live_test_only
    @GlobalStorageAccountPreparer()
    def test_create_from_stream_chunk_upload_with_cntandrops(
            self, resource_group, location, storage_account,
            storage_account_key):
        # parallel tests introduce random order of requests, can only run live

        self._setup(storage_account.name, storage_account_key)
        blob_name = self._get_blob_reference()
        blob = self.bsc.get_blob_client(self.container_name, blob_name)
        data = self.get_random_bytes(LARGE_BLOB_SIZE)
        FILE_PATH = 'from_stream_chunk_upload_with_cntandrops.temp.{}.dat'.format(
            str(uuid.uuid4()))
        with open(FILE_PATH, 'wb') as stream:
            stream.write(data)

        # Act
        content_settings = ContentSettings(content_type='image/png',
                                           content_language='spanish')
        blob_size = len(data) - 301
        with open(FILE_PATH, 'rb') as stream:
            blob.upload_blob(stream,
                             length=blob_size,
                             content_settings=content_settings)

        # Assert
        self.assertBlobEqual(self.container_name, blob_name, data[:blob_size])
        properties = blob.get_blob_properties()
        self.assertEqual(properties.content_settings.content_type,
                         content_settings.content_type)
        self.assertEqual(properties.content_settings.content_language,
                         content_settings.content_language)
        self._teardown(FILE_PATH)

    @pytest.mark.live_test_only
    @GlobalStorageAccountPreparer()
    def test_create_blob_from_stream_chnked_upload_with_properties(
            self, resource_group, location, storage_account,
            storage_account_key):
        # parallel tests introduce random order of requests, can only run live

        self._setup(storage_account.name, storage_account_key)
        blob_name = self._get_blob_reference()
        blob = self.bsc.get_blob_client(self.container_name, blob_name)
        data = self.get_random_bytes(LARGE_BLOB_SIZE)
        FILE_PATH = 'chnked_upload_with_properti.temp.{}.dat'.format(
            str(uuid.uuid4()))
        with open(FILE_PATH, 'wb') as stream:
            stream.write(data)

        # Act
        content_settings = ContentSettings(content_type='image/png',
                                           content_language='spanish')
        with open(FILE_PATH, 'rb') as stream:
            blob.upload_blob(stream, content_settings=content_settings)

        # Assert
        self.assertBlobEqual(self.container_name, blob_name, data)
        properties = blob.get_blob_properties()
        self.assertEqual(properties.content_settings.content_type,
                         content_settings.content_type)
        self.assertEqual(properties.content_settings.content_language,
                         content_settings.content_language)
        self._teardown(FILE_PATH)

    @pytest.mark.live_test_only
    @GlobalStorageAccountPreparer()
    def test_create_blob_from_stream_chunked_upload_with_properties(
            self, resource_group, location, storage_account,
            storage_account_key):
        # parallel tests introduce random order of requests, can only run live

        # Arrange
        self._setup(storage_account.name, storage_account_key)
        blob_name = self._get_blob_reference()
        blob = self.bsc.get_blob_client(self.container_name, blob_name)
        data = self.get_random_bytes(LARGE_BLOB_SIZE)
        FILE_PATH = 'blob_from_stream_chunked_upload.temp.{}.dat'.format(
            str(uuid.uuid4()))
        with open(FILE_PATH, 'wb') as stream:
            stream.write(data)
        blob_tier = StandardBlobTier.Cool

        # Act
        content_settings = ContentSettings(content_type='image/png',
                                           content_language='spanish')
        with open(FILE_PATH, 'rb') as stream:
            blob.upload_blob(stream,
                             content_settings=content_settings,
                             max_concurrency=2,
                             standard_blob_tier=blob_tier)

        properties = blob.get_blob_properties()

        # Assert
        self.assertEqual(properties.blob_tier, blob_tier)
        self._teardown(FILE_PATH)

    @GlobalStorageAccountPreparer()
    def test_create_blob_from_text(self, resource_group, location,
                                   storage_account, storage_account_key):
        self._setup(storage_account.name, storage_account_key)
        blob_name = self._get_blob_reference()
        blob = self.bsc.get_blob_client(self.container_name, blob_name)
        text = u'hello 啊齄丂狛狜 world'
        data = text.encode('utf-8')

        # Act
        create_resp = blob.upload_blob(text)
        props = blob.get_blob_properties()

        # Assert
        self.assertBlobEqual(self.container_name, blob_name, data)
        self.assertEqual(props.etag, create_resp.get('etag'))
        self.assertEqual(props.last_modified, create_resp.get('last_modified'))

    @GlobalStorageAccountPreparer()
    def test_create_blob_from_text_with_encoding(self, resource_group,
                                                 location, storage_account,
                                                 storage_account_key):
        self._setup(storage_account.name, storage_account_key)
        blob_name = self._get_blob_reference()
        blob = self.bsc.get_blob_client(self.container_name, blob_name)
        text = u'hello 啊齄丂狛狜 world'
        data = text.encode('utf-16')

        # Act
        blob.upload_blob(text, encoding='utf-16')

        # Assert
        self.assertBlobEqual(self.container_name, blob_name, data)

    @GlobalStorageAccountPreparer()
    def test_create_blob_from_text_with_encoding_and_progress(
            self, resource_group, location, storage_account,
            storage_account_key):
        self._setup(storage_account.name, storage_account_key)
        blob_name = self._get_blob_reference()
        blob = self.bsc.get_blob_client(self.container_name, blob_name)
        text = u'hello 啊齄丂狛狜 world'
        data = text.encode('utf-16')

        # Act
        progress = []

        def callback(response):
            current = response.context['upload_stream_current']
            total = response.context['data_stream_total']
            if current is not None:
                progress.append((current, total))

        blob.upload_blob(text, encoding='utf-16', raw_response_hook=callback)

        # Assert
        self.assertBlobEqual(self.container_name, blob_name, data)
        self.assert_upload_progress(len(data), self.config.max_block_size,
                                    progress)

    @pytest.mark.live_test_only
    @GlobalStorageAccountPreparer()
    def test_create_blob_from_text_chunked_upload(self, resource_group,
                                                  location, storage_account,
                                                  storage_account_key):
        # parallel tests introduce random order of requests, can only run live

        self._setup(storage_account.name, storage_account_key)
        blob_name = self._get_blob_reference()
        blob = self.bsc.get_blob_client(self.container_name, blob_name)
        data = self.get_random_text_data(LARGE_BLOB_SIZE)
        encoded_data = data.encode('utf-8')

        # Act
        blob.upload_blob(data)

        # Assert
        self.assertBlobEqual(self.container_name, blob_name, encoded_data)

        # Assert
        self.assertBlobEqual(self.container_name, blob_name, encoded_data)

    @GlobalStorageAccountPreparer()
    def test_create_blob_with_md5(self, resource_group, location,
                                  storage_account, storage_account_key):
        self._setup(storage_account.name, storage_account_key)
        blob_name = self._get_blob_reference()
        blob = self.bsc.get_blob_client(self.container_name, blob_name)
        data = b'hello world'

        # Act
        blob.upload_blob(data, validate_content=True)

        # Assert

    @pytest.mark.live_test_only
    @GlobalStorageAccountPreparer()
    def test_create_blob_with_md5_chunked(self, resource_group, location,
                                          storage_account,
                                          storage_account_key):
        # parallel tests introduce random order of requests, can only run live

        self._setup(storage_account.name, storage_account_key)
        blob_name = self._get_blob_reference()
        blob = self.bsc.get_blob_client(self.container_name, blob_name)
        data = self.get_random_bytes(LARGE_BLOB_SIZE)

        # Act
        blob.upload_blob(data, validate_content=True)
예제 #8
0
    conn_str="DefaultEndpointsProtocol=https;AccountName=ebhdevstorage"
    "001;AccountKey=QYSKZ1suXASpD3Cy67U7pkFHOOWPB0Jtl4MEOFF+CNn"
    "PDp72j4uDVEv9p5X7HTvpafiJpbakvBsyWiSHEqFDOQ==;EndpointSuff"
    "ix=core.windows.net",
    container_name="dailyinsightsmailing")
blob_service_client = BlobServiceClient(
    account_url="https://ebhdevstorage001.blob.core.windows.net/",
    credential=
    "QYSKZ1suXASpD3Cy67U7pkFHOOWPB0Jtl4MEOFF+CNnPDp72j4uDVEv9p5X7HTvpafi"
    "JpbakvBsyWiSHEqFDOQ==")
data_list = []
blob_list = container.list_blobs()
for blob in blob_list:
    if date_string in blob.name:
        name = blob.name
        blob_client = blob_service_client.get_blob_client(
            blob=name, container="dailyinsightsmailing")
        stream = blob_client.download_blob().content_as_text()
        stream = stream.split("}}}")
        stream = stream[:-1]
        for ele in stream:
            ele = ele + "}}}"
            data_list.append(json.loads(ele))
no_of_clicks = 0
no_of_searches = 0
id_list = []
os_list = []
city_list = []
rank_list = []
tag_list = []
for ele in data_list:
    if ele['event'][0]['name'] == 'Click':
예제 #9
0
class AzureBlobClient(Client):
    """Client class for Azure Blob Storage which handles authentication with Azure for
    [`AzureBlobPath`](../azblobpath/) instances. See documentation for the
    [`__init__` method][cloudpathlib.azure.azblobclient.AzureBlobClient.__init__] for detailed
    authentication options.
    """
    def __init__(
        self,
        account_url: Optional[str] = None,
        credential: Optional[Any] = None,
        connection_string: Optional[str] = None,
        blob_service_client: Optional["BlobServiceClient"] = None,
        local_cache_dir: Optional[Union[str, os.PathLike]] = None,
    ):
        """Class constructor. Sets up a [`BlobServiceClient`](
        https://docs.microsoft.com/en-us/python/api/azure-storage-blob/azure.storage.blob.blobserviceclient?view=azure-python).
        Supports the following authentication methods of `BlobServiceClient`.

        - Environment variable `""AZURE_STORAGE_CONNECTION_STRING"` containing connecting string
        with account credentials. See [Azure Storage SDK documentation](
        https://docs.microsoft.com/en-us/azure/storage/blobs/storage-quickstart-blobs-python#copy-your-credentials-from-the-azure-portal).
        - Account URL via `account_url`, authenticated either with an embedded SAS token, or with
        credentials passed to `credentials`.
        - Connection string via `connection_string`, authenticated either with an embedded SAS
        token or with credentials passed to `credentials`.
        - Instantiated and already authenticated [`BlobServiceClient`](
        https://docs.microsoft.com/en-us/python/api/azure-storage-blob/azure.storage.blob.blobserviceclient?view=azure-python).

        If multiple methods are used, priority order is reverse of list above (later in list takes
        priority). If no methods are used, a [`MissingCredentialsError`][cloudpathlib.exceptions.MissingCredentialsError]
        exception will be raised raised.

        Args:
            account_url (Optional[str]): The URL to the blob storage account, optionally
                authenticated with a SAS token. See documentation for [`BlobServiceClient`](
                https://docs.microsoft.com/en-us/python/api/azure-storage-blob/azure.storage.blob.blobserviceclient?view=azure-python).
            credential (Optional[Any]): Credentials with which to authenticate. Can be used with
                `account_url` or `connection_string`, but is unnecessary if the other already has
                an SAS token. See documentation for [`BlobServiceClient`](
                https://docs.microsoft.com/en-us/python/api/azure-storage-blob/azure.storage.blob.blobserviceclient?view=azure-python)
                or [`BlobServiceClient.from_connection_string`](
                https://docs.microsoft.com/en-us/python/api/azure-storage-blob/azure.storage.blob.blobserviceclient?view=azure-python#from-connection-string-conn-str--credential-none----kwargs-).
            connection_string (Optional[str]): A connection string to an Azure Storage account. See
                [Azure Storage SDK documentation](
                https://docs.microsoft.com/en-us/azure/storage/blobs/storage-quickstart-blobs-python#copy-your-credentials-from-the-azure-portal).
            blob_service_client (Optional[BlobServiceClient]): Instantiated [`BlobServiceClient`](
                https://docs.microsoft.com/en-us/python/api/azure-storage-blob/azure.storage.blob.blobserviceclient?view=azure-python).
            local_cache_dir (Optional[Union[str, os.PathLike]]): Path to directory to use as cache
                for downloaded files. If None, will use a temporary directory.
        """
        if connection_string is None:
            connection_string = os.getenv("AZURE_STORAGE_CONNECTION_STRING",
                                          None)

        if blob_service_client is not None:
            self.service_client = blob_service_client
        elif connection_string is not None:
            self.service_client = BlobServiceClient.from_connection_string(
                conn_str=connection_string, credential=credential)
        elif account_url is not None:
            self.service_client = BlobServiceClient(account_url=account_url,
                                                    credential=credential)
        else:
            raise MissingCredentialsError(
                "AzureBlobClient does not support anonymous instantiation. "
                "Credentials are required; see docs for options.")

        super().__init__(local_cache_dir=local_cache_dir)

    def _get_metadata(self, cloud_path: AzureBlobPath) -> Dict[str, Any]:
        blob = self.service_client.get_blob_client(
            container=cloud_path.container, blob=cloud_path.blob)
        properties = blob.get_blob_properties()

        return properties

    def _download_file(self, cloud_path: AzureBlobPath,
                       local_path: Union[str, os.PathLike]) -> Path:
        blob = self.service_client.get_blob_client(
            container=cloud_path.container, blob=cloud_path.blob)

        download_stream = blob.download_blob()

        local_path = Path(local_path)

        local_path.parent.mkdir(exist_ok=True, parents=True)

        local_path.write_bytes(download_stream.readall())

        return local_path

    def _is_file_or_dir(self, cloud_path: AzureBlobPath) -> Optional[str]:
        # short-circuit the root-level container
        if not cloud_path.blob:
            return "dir"

        try:
            self._get_metadata(cloud_path)
            return "file"
        except ResourceNotFoundError:
            prefix = cloud_path.blob
            if prefix and not prefix.endswith("/"):
                prefix += "/"

            # not a file, see if it is a directory
            container_client = self.service_client.get_container_client(
                cloud_path.container)

            try:
                next(container_client.list_blobs(name_starts_with=prefix))
                return "dir"
            except StopIteration:
                return None

    def _exists(self, cloud_path: AzureBlobPath) -> bool:
        return self._is_file_or_dir(cloud_path) in ["file", "dir"]

    def _list_dir(self,
                  cloud_path: AzureBlobPath,
                  recursive: bool = False) -> Iterable[AzureBlobPath]:
        container_client = self.service_client.get_container_client(
            cloud_path.container)

        prefix = cloud_path.blob
        if prefix and not prefix.endswith("/"):
            prefix += "/"

        yielded_dirs = set()

        # NOTE: Not recursive may be slower than necessary since it just filters
        #   the recursive implementation
        for o in container_client.list_blobs(name_starts_with=prefix):
            # get directory from this path
            for parent in PurePosixPath(o.name[len(prefix):]).parents:

                # if we haven't surfaced thei directory already
                if parent not in yielded_dirs and str(parent) != ".":

                    # skip if not recursive and this is beyond our depth
                    if not recursive and "/" in str(parent):
                        continue

                    yield self.CloudPath(
                        f"az://{cloud_path.container}/{prefix}{parent}")
                    yielded_dirs.add(parent)

            # skip file if not recursive and this is beyond our depth
            if not recursive and "/" in o.name[len(prefix):]:
                continue

            yield self.CloudPath(f"az://{cloud_path.container}/{o.name}")

    def _move_file(self,
                   src: AzureBlobPath,
                   dst: AzureBlobPath,
                   remove_src: bool = True) -> AzureBlobPath:
        # just a touch, so "REPLACE" metadata
        if src == dst:
            blob_client = self.service_client.get_blob_client(
                container=src.container, blob=src.blob)

            blob_client.set_blob_metadata(metadata=dict(
                last_modified=str(datetime.utcnow().timestamp())))

        else:
            target = self.service_client.get_blob_client(
                container=dst.container, blob=dst.blob)

            source = self.service_client.get_blob_client(
                container=src.container, blob=src.blob)

            target.start_copy_from_url(source.url)

            if remove_src:
                self._remove(src)

        return dst

    def _remove(self, cloud_path: AzureBlobPath) -> None:
        if self._is_file_or_dir(cloud_path) == "dir":
            blobs = [
                b.blob for b in self._list_dir(cloud_path, recursive=True)
            ]
            container_client = self.service_client.get_container_client(
                cloud_path.container)
            container_client.delete_blobs(*blobs)
        elif self._is_file_or_dir(cloud_path) == "file":
            blob = self.service_client.get_blob_client(
                container=cloud_path.container, blob=cloud_path.blob)

            blob.delete_blob()

    def _upload_file(self, local_path: Union[str, os.PathLike],
                     cloud_path: AzureBlobPath) -> AzureBlobPath:
        blob = self.service_client.get_blob_client(
            container=cloud_path.container, blob=cloud_path.blob)

        blob.upload_blob(Path(local_path).read_bytes(), overwrite=True)

        return cloud_path
예제 #10
0
class AzureBlobStorage:
    """Class for interacting with Azure Blob Storage."""
    def __init__(self, abs_name: str, connect: bool = False):
        """Initialize connector for Azure Python SDK."""
        self.connected = False
        self.abs_site = f"{abs_name}.blob.core.windows.net"
        self.credentials: Optional[AzCredentials] = None
        self.abs_client: Optional[BlobServiceClient] = None
        if connect is True:
            self.connect()

    def connect(
        self,
        auth_methods: List = None,
        silent: bool = False,
    ):
        """Authenticate with the SDK."""
        self.credentials = az_connect(auth_methods=auth_methods, silent=silent)
        if not self.credentials:
            raise CloudError("Could not obtain credentials.")
        self.abs_client = BlobServiceClient(self.abs_site,
                                            self.credentials.modern)
        if not self.abs_client:
            raise CloudError("Could not create a Blob Storage client.")
        self.connected = True

    def containers(self) -> pd.DataFrame:
        """Return containers in the Azure Blob Storage Account."""
        try:
            container_list = self.abs_client.list_containers()  # type:ignore
        except ServiceRequestError as err:
            raise CloudError(
                "Unable to connect check the Azure Blob Store account name"
            ) from err
        if container_list:
            containers_df = _parse_returned_items(
                container_list, remove_list=["lease", "encryption_scope"])
        else:
            containers_df = None
        return containers_df

    def create_container(self, container_name: str, **kwargs) -> pd.DataFrame:
        """
        Create a new container within the Azure Blob Storage account.

        Parameters
        ----------
        container_name : str
            The name for the new container.
        Additional container parameters can be passed as kwargs

        Returns
        -------
        pd.DataFrame
            Details of the created container.

        """
        try:
            new_container = self.abs_client.create_container(  # type: ignore
                container_name, **kwargs)  # type:ignore
        except ResourceExistsError as err:
            raise CloudError(
                f"Container {container_name} already exists.") from err
        properties = new_container.get_container_properties()
        container_df = _parse_returned_items([properties],
                                             ["encryption_scope", "lease"])
        return container_df

    def blobs(self, container_name: str) -> Optional[pd.DataFrame]:
        """
        Get a list of blobs in a container.

        Parameters
        ----------
        container_name : str
            The name of the container to get blobs from.

        Returns
        -------
        pd.DataFrame
            Details of the blobs.

        """
        container_client = self.abs_client.get_container_client(
            container_name)  # type: ignore
        blobs = list(container_client.list_blobs())
        return _parse_returned_items(blobs) if blobs else None

    def upload_to_blob(self,
                       blob: Any,
                       container_name: str,
                       blob_name: str,
                       overwrite: bool = True):
        """
        Upload a blob of data.

        Parameters
        ----------
        blob : Any
            The data to upload.
        container_name : str
            The name of the container to upload the blob to.
        blob_name : str
            The name to give the blob.
        overwrite : bool, optional
            Whether or not you want to overwrite the blob if it exists, by default True.

        """
        try:
            blob_client = self.abs_client.get_blob_client(  # type:ignore
                container=container_name, blob=blob_name)
            upload = blob_client.upload_blob(blob, overwrite=overwrite)
        except ResourceNotFoundError as err:
            raise CloudError(
                "Unknown container, check container name or create it first."
            ) from err
        if not upload["error_code"]:
            print("Upload complete")
        else:
            raise CloudError(
                f"There was a problem uploading the blob: {upload['error_code']}"
            )
        return True

    def get_blob(self, container_name: str, blob_name: str) -> bytes:
        """
        Get a blob from the Azure Blob Storage account.

        Parameters
        ----------
        container_name : str
            The name of the container that holds the blob.
        blob_name : str
            The name of the blob to download.

        Returns
        -------
        bytes
            The content of the blob in bytes.

        """
        blob_client = self.abs_client.get_blob_client(  # type: ignore
            container=container_name, blob=blob_name)
        if blob_client.exists():
            data_stream = blob_client.download_blob()
            data = data_stream.content_as_bytes()
        else:
            raise CloudError(
                f"The blob {blob_name} does not exist in {container_name}")
        return data

    def delete_blob(self, container_name: str, blob_name: str) -> bool:
        """
        Delete a blob from the Azure Blob Storage account.

        Parameters
        ----------
        container_name : str
            The container name that has the blob.
        blob_name : str
            The name of the blob to delete.
        Note deleting a blob also deletes associated snapshots.

        Returns
        -------
        bool
            True if blob successfully deleted

        """
        blob_client = self.abs_client.get_blob_client(  # type: ignore
            container=container_name, blob=blob_name)
        if blob_client.exists():
            blob_client.delete_blob(delete_snapshots="include")
        else:
            raise CloudError(
                f"The blob {blob_name} does not exist in {container_name}")

        return True

    def get_sas_token(
        self,
        container_name: str,
        blob_name: str,
        end: datetime.datetime = None,
        permission: str = "r",
    ) -> str:
        """
        Generate a shared access string (SAS) token for a blob.

        Parameters
        ----------
        container_name : str
            The name of the Azure Blob Storage container that holds the blob.
        blob_name : str
            The name of the blob to generate the SAS token for.
        end : datetime.datetime, optional
            The datetime the SAS token should expire, by default this is 7 days from now.
        permission : str, optional
            The permissions to give the SAS token, by default 'r' for read.

        Returns
        -------
        str
            A URI of the blob with SAS token.

        """
        start = datetime.datetime.now()
        if not end:
            end = start + datetime.timedelta(days=7)
        key = self.abs_client.get_user_delegation_key(start,
                                                      end)  # type: ignore
        abs_name = self.abs_client.account_name  # type: ignore
        sast = generate_blob_sas(
            abs_name,
            container_name,
            blob_name,
            user_delegation_key=key,
            permission=permission,
            expiry=end,
            start=start,
        )
        full_path = f"https://{abs_name}.blob.core.windows.net/{container_name}/{blob_name}?{sast}"
        return full_path
예제 #11
0
class StorageCPKTest(StorageTestCase):
    def setUp(self):
        super(StorageCPKTest, self).setUp()

        url = self._get_account_url()

        # test chunking functionality by reducing the size of each chunk,
        # otherwise the tests would take too long to execute
        self.bsc = BlobServiceClient(
            url,
            credential=self.settings.STORAGE_ACCOUNT_KEY,
            connection_data_block_size=1024,
            max_single_put_size=1024,
            min_large_block_upload_threshold=1024,
            max_block_size=1024,
            max_page_size=1024)
        self.config = self.bsc._config
        self.container_name = self.get_resource_name('utcontainer')

        # prep some test data so that they can be used in upload tests
        self.byte_data = self.get_random_bytes(64 * 1024)

        if not self.is_playback():
            self.bsc.create_container(self.container_name)

    def tearDown(self):
        if not self.is_playback():
            try:
                self.bsc.delete_container(self.container_name)
            except:
                pass

        return super(StorageCPKTest, self).tearDown()

    # --Helpers-----------------------------------------------------------------

    def _get_blob_reference(self):
        return self.get_resource_name("cpk")

    def _create_block_blob(self,
                           blob_name=None,
                           data=None,
                           cpk=None,
                           max_concurrency=1):
        blob_name = blob_name if blob_name else self._get_blob_reference()
        blob_client = self.bsc.get_blob_client(self.container_name, blob_name)
        data = data if data else b''
        resp = blob_client.upload_blob(data,
                                       cpk=cpk,
                                       max_concurrency=max_concurrency)
        return blob_client, resp

    def _create_append_blob(self, cpk=None):
        blob_name = self._get_blob_reference()
        blob = self.bsc.get_blob_client(self.container_name, blob_name)
        blob.create_append_blob(cpk=cpk)
        return blob

    def _create_page_blob(self, cpk=None):
        blob_name = self._get_blob_reference()
        blob = self.bsc.get_blob_client(self.container_name, blob_name)
        blob.create_page_blob(1024 * 1024, cpk=cpk)
        return blob

    # -- Test cases for APIs supporting CPK ----------------------------------------------

    @record
    def test_put_block_and_put_block_list(self):
        # Arrange
        blob_client, _ = self._create_block_blob()
        blob_client.stage_block('1', b'AAA', cpk=TEST_ENCRYPTION_KEY)
        blob_client.stage_block('2', b'BBB', cpk=TEST_ENCRYPTION_KEY)
        blob_client.stage_block('3', b'CCC', cpk=TEST_ENCRYPTION_KEY)

        # Act
        block_list = [
            BlobBlock(block_id='1'),
            BlobBlock(block_id='2'),
            BlobBlock(block_id='3')
        ]
        put_block_list_resp = blob_client.commit_block_list(
            block_list, cpk=TEST_ENCRYPTION_KEY)

        # Assert
        self.assertIsNotNone(put_block_list_resp['etag'])
        self.assertIsNotNone(put_block_list_resp['last_modified'])
        self.assertTrue(put_block_list_resp['request_server_encrypted'])
        self.assertEqual(put_block_list_resp['encryption_key_sha256'],
                         TEST_ENCRYPTION_KEY.key_hash)

        # Act get the blob content without cpk should fail
        with self.assertRaises(HttpResponseError):
            blob_client.download_blob()

        # Act get the blob content
        blob = blob_client.download_blob(cpk=TEST_ENCRYPTION_KEY)

        # Assert content was retrieved with the cpk
        self.assertEqual(blob.content_as_bytes(), b'AAABBBCCC')
        self.assertEqual(blob.properties.etag, put_block_list_resp['etag'])
        self.assertEqual(blob.properties.last_modified,
                         put_block_list_resp['last_modified'])
        self.assertEqual(blob.properties.encryption_key_sha256,
                         TEST_ENCRYPTION_KEY.key_hash)

    def test_create_block_blob_with_chunks(self):
        # parallel operation
        if TestMode.need_recording_file(self.test_mode):
            return
        # Arrange
        #  to force the in-memory chunks to be used
        self.config.use_byte_buffer = True

        # Act
        # create_blob_from_bytes forces the in-memory chunks to be used
        blob_client, upload_response = self._create_block_blob(
            data=self.byte_data, cpk=TEST_ENCRYPTION_KEY, max_concurrency=2)

        # Assert
        self.assertIsNotNone(upload_response['etag'])
        self.assertIsNotNone(upload_response['last_modified'])
        self.assertTrue(upload_response['request_server_encrypted'])
        self.assertEqual(upload_response['encryption_key_sha256'],
                         TEST_ENCRYPTION_KEY.key_hash)

        # Act get the blob content without cpk should fail
        with self.assertRaises(HttpResponseError):
            blob_client.download_blob()

        # Act get the blob content
        blob = blob_client.download_blob(cpk=TEST_ENCRYPTION_KEY)

        # Assert content was retrieved with the cpk
        self.assertEqual(blob.content_as_bytes(), self.byte_data)
        self.assertEqual(blob.properties.etag, upload_response['etag'])
        self.assertEqual(blob.properties.last_modified,
                         upload_response['last_modified'])
        self.assertEqual(blob.properties.encryption_key_sha256,
                         TEST_ENCRYPTION_KEY.key_hash)

    def test_create_block_blob_with_sub_streams(self):
        # problem with the recording framework can only run live
        if TestMode.need_recording_file(self.test_mode):
            return

        # Act
        # create_blob_from_bytes forces the in-memory chunks to be used
        blob_client, upload_response = self._create_block_blob(
            data=self.byte_data, cpk=TEST_ENCRYPTION_KEY, max_concurrency=2)

        # Assert
        self.assertIsNotNone(upload_response['etag'])
        self.assertIsNotNone(upload_response['last_modified'])
        self.assertTrue(upload_response['request_server_encrypted'])
        self.assertEqual(upload_response['encryption_key_sha256'],
                         TEST_ENCRYPTION_KEY.key_hash)

        # Act get the blob content without cpk should fail
        with self.assertRaises(HttpResponseError):
            blob_client.download_blob()

        # Act get the blob content
        blob = blob_client.download_blob(cpk=TEST_ENCRYPTION_KEY)

        # Assert content was retrieved with the cpk
        self.assertEqual(blob.content_as_bytes(), self.byte_data)
        self.assertEqual(blob.properties.etag, upload_response['etag'])
        self.assertEqual(blob.properties.last_modified,
                         upload_response['last_modified'])
        self.assertEqual(blob.properties.encryption_key_sha256,
                         TEST_ENCRYPTION_KEY.key_hash)

    @record
    def test_create_block_blob_with_single_chunk(self):
        # Act
        data = b'AAABBBCCC'
        # create_blob_from_bytes forces the in-memory chunks to be used
        blob_client, upload_response = self._create_block_blob(
            data=data, cpk=TEST_ENCRYPTION_KEY)

        # Assert
        self.assertIsNotNone(upload_response['etag'])
        self.assertIsNotNone(upload_response['last_modified'])
        self.assertTrue(upload_response['request_server_encrypted'])
        self.assertEqual(upload_response['encryption_key_sha256'],
                         TEST_ENCRYPTION_KEY.key_hash)

        # Act get the blob content without cpk should fail
        with self.assertRaises(HttpResponseError):
            blob_client.download_blob()

        # Act get the blob content
        blob = blob_client.download_blob(cpk=TEST_ENCRYPTION_KEY)

        # Assert content was retrieved with the cpk
        self.assertEqual(blob.content_as_bytes(), data)
        self.assertEqual(blob.properties.etag, upload_response['etag'])
        self.assertEqual(blob.properties.last_modified,
                         upload_response['last_modified'])
        self.assertEqual(blob.properties.encryption_key_sha256,
                         TEST_ENCRYPTION_KEY.key_hash)

    @record
    def test_put_block_from_url_and_commit_with_cpk(self):
        # Arrange
        # create source blob and get source blob url
        source_blob_name = self.get_resource_name("sourceblob")
        self.config.use_byte_buffer = True  # Make sure using chunk upload, then we can record the request
        source_blob_client, _ = self._create_block_blob(
            blob_name=source_blob_name, data=self.byte_data)
        source_blob_sas = source_blob_client.generate_shared_access_signature(
            permission=BlobSasPermissions(read=True),
            expiry=datetime.utcnow() + timedelta(hours=1))
        source_blob_url = source_blob_client.url + "?" + source_blob_sas

        # create destination blob
        self.config.use_byte_buffer = False
        destination_blob_client, _ = self._create_block_blob(
            cpk=TEST_ENCRYPTION_KEY)

        # Act part 1: make put block from url calls
        destination_blob_client.stage_block_from_url(
            block_id=1,
            source_url=source_blob_url,
            source_offset=0,
            source_length=4 * 1024,
            cpk=TEST_ENCRYPTION_KEY)
        destination_blob_client.stage_block_from_url(
            block_id=2,
            source_url=source_blob_url,
            source_offset=4 * 1024,
            source_length=4 * 1024,
            cpk=TEST_ENCRYPTION_KEY)

        # Assert blocks
        committed, uncommitted = destination_blob_client.get_block_list('all')
        self.assertEqual(len(uncommitted), 2)
        self.assertEqual(len(committed), 0)

        # commit the blocks without cpk should fail
        block_list = [BlobBlock(block_id='1'), BlobBlock(block_id='2')]
        with self.assertRaises(HttpResponseError):
            destination_blob_client.commit_block_list(block_list)

        # Act commit the blocks with cpk should succeed
        put_block_list_resp = destination_blob_client.commit_block_list(
            block_list, cpk=TEST_ENCRYPTION_KEY)

        # Assert
        self.assertIsNotNone(put_block_list_resp['etag'])
        self.assertIsNotNone(put_block_list_resp['last_modified'])
        self.assertTrue(put_block_list_resp['request_server_encrypted'])
        self.assertEqual(put_block_list_resp['encryption_key_sha256'],
                         TEST_ENCRYPTION_KEY.key_hash)

        # Act get the blob content
        blob = destination_blob_client.download_blob(cpk=TEST_ENCRYPTION_KEY)

        # Assert content was retrieved with the cpk
        self.assertEqual(blob.content_as_bytes(), self.byte_data[0:8 * 1024])
        self.assertEqual(blob.properties.etag, put_block_list_resp['etag'])
        self.assertEqual(blob.properties.last_modified,
                         put_block_list_resp['last_modified'])
        self.assertEqual(blob.properties.encryption_key_sha256,
                         TEST_ENCRYPTION_KEY.key_hash)

    @record
    def test_append_block(self):
        # Arrange
        blob_client = self._create_append_blob(cpk=TEST_ENCRYPTION_KEY)

        # Act
        for content in [b'AAA', b'BBB', b'CCC']:
            append_blob_prop = blob_client.append_block(
                content, cpk=TEST_ENCRYPTION_KEY)

            # Assert
            self.assertIsNotNone(append_blob_prop['etag'])
            self.assertIsNotNone(append_blob_prop['last_modified'])
            self.assertTrue(append_blob_prop['request_server_encrypted'])
            self.assertEqual(append_blob_prop['encryption_key_sha256'],
                             TEST_ENCRYPTION_KEY.key_hash)

        # Act get the blob content without cpk should fail
        with self.assertRaises(HttpResponseError):
            blob_client.download_blob()

        # Act get the blob content
        blob = blob_client.download_blob(cpk=TEST_ENCRYPTION_KEY)

        # Assert content was retrieved with the cpk
        self.assertEqual(blob.content_as_bytes(), b'AAABBBCCC')
        self.assertEqual(blob.properties.encryption_key_sha256,
                         TEST_ENCRYPTION_KEY.key_hash)

    @record
    def test_append_block_from_url(self):
        # Arrange
        source_blob_name = self.get_resource_name("sourceblob")
        self.config.use_byte_buffer = True  # chunk upload
        source_blob_client, _ = self._create_block_blob(
            blob_name=source_blob_name, data=self.byte_data)
        source_blob_sas = source_blob_client.generate_shared_access_signature(
            permission=BlobSasPermissions(read=True),
            expiry=datetime.utcnow() + timedelta(hours=1))
        source_blob_url = source_blob_client.url + "?" + source_blob_sas

        self.config.use_byte_buffer = False
        destination_blob_client = self._create_append_blob(
            cpk=TEST_ENCRYPTION_KEY)

        # Act
        append_blob_prop = destination_blob_client.append_block_from_url(
            source_blob_url,
            source_offset=0,
            source_length=4 * 1024,
            cpk=TEST_ENCRYPTION_KEY)

        # Assert
        self.assertIsNotNone(append_blob_prop['etag'])
        self.assertIsNotNone(append_blob_prop['last_modified'])
        # TODO: verify that the swagger is correct, header wasn't added for the response
        # self.assertTrue(append_blob_prop['request_server_encrypted'])
        self.assertEqual(append_blob_prop['encryption_key_sha256'],
                         TEST_ENCRYPTION_KEY.key_hash)

        # Act get the blob content without cpk should fail
        with self.assertRaises(HttpResponseError):
            destination_blob_client.download_blob()

            # Act get the blob content
        blob = destination_blob_client.download_blob(cpk=TEST_ENCRYPTION_KEY)

        # Assert content was retrieved with the cpk
        self.assertEqual(blob.content_as_bytes(), self.byte_data[0:4 * 1024])
        self.assertEqual(blob.properties.encryption_key_sha256,
                         TEST_ENCRYPTION_KEY.key_hash)

    @record
    def test_create_append_blob_with_chunks(self):
        # Arrange
        blob_client = self._create_append_blob(cpk=TEST_ENCRYPTION_KEY)

        # Act
        append_blob_prop = blob_client.upload_blob(
            self.byte_data,
            blob_type=BlobType.AppendBlob,
            cpk=TEST_ENCRYPTION_KEY)

        # Assert
        self.assertIsNotNone(append_blob_prop['etag'])
        self.assertIsNotNone(append_blob_prop['last_modified'])
        self.assertTrue(append_blob_prop['request_server_encrypted'])
        self.assertEqual(append_blob_prop['encryption_key_sha256'],
                         TEST_ENCRYPTION_KEY.key_hash)

        # Act get the blob content without cpk should fail
        with self.assertRaises(HttpResponseError):
            blob_client.download_blob()

        # Act get the blob content
        blob = blob_client.download_blob(cpk=TEST_ENCRYPTION_KEY)

        # Assert content was retrieved with the cpk
        self.assertEqual(blob.content_as_bytes(), self.byte_data)
        self.assertEqual(blob.properties.encryption_key_sha256,
                         TEST_ENCRYPTION_KEY.key_hash)

    @record
    def test_update_page(self):
        # Arrange
        blob_client = self._create_page_blob(cpk=TEST_ENCRYPTION_KEY)

        # Act
        page_blob_prop = blob_client.upload_page(self.byte_data,
                                                 offset=0,
                                                 length=len(self.byte_data),
                                                 cpk=TEST_ENCRYPTION_KEY)

        # Assert
        self.assertIsNotNone(page_blob_prop['etag'])
        self.assertIsNotNone(page_blob_prop['last_modified'])
        self.assertTrue(page_blob_prop['request_server_encrypted'])
        self.assertEqual(page_blob_prop['encryption_key_sha256'],
                         TEST_ENCRYPTION_KEY.key_hash)

        # Act get the blob content without cpk should fail
        with self.assertRaises(HttpResponseError):
            blob_client.download_blob()

        # Act get the blob content
        blob = blob_client.download_blob(
            offset=0,
            length=len(self.byte_data),
            cpk=TEST_ENCRYPTION_KEY,
        )

        # Assert content was retrieved with the cpk
        self.assertEqual(blob.content_as_bytes(), self.byte_data)
        self.assertEqual(blob.properties.encryption_key_sha256,
                         TEST_ENCRYPTION_KEY.key_hash)

    @record
    def test_update_page_from_url(self):
        # Arrange
        source_blob_name = self.get_resource_name("sourceblob")
        self.config.use_byte_buffer = True  # Make sure using chunk upload, then we can record the request
        source_blob_client, _ = self._create_block_blob(
            blob_name=source_blob_name, data=self.byte_data)
        source_blob_sas = source_blob_client.generate_shared_access_signature(
            permission=BlobSasPermissions(read=True),
            expiry=datetime.utcnow() + timedelta(hours=1))
        source_blob_url = source_blob_client.url + "?" + source_blob_sas

        self.config.use_byte_buffer = False
        blob_client = self._create_page_blob(cpk=TEST_ENCRYPTION_KEY)

        # Act
        page_blob_prop = blob_client.upload_pages_from_url(
            source_blob_url,
            offset=0,
            length=len(self.byte_data),
            source_offset=0,
            cpk=TEST_ENCRYPTION_KEY)

        # Assert
        self.assertIsNotNone(page_blob_prop['etag'])
        self.assertIsNotNone(page_blob_prop['last_modified'])
        self.assertTrue(page_blob_prop['request_server_encrypted'])
        # TODO: FIX SWAGGER
        # self.assertEqual(page_blob_prop['encryption_key_sha256'], TEST_ENCRYPTION_KEY.key_hash)

        # Act get the blob content without cpk should fail
        with self.assertRaises(HttpResponseError):
            blob_client.download_blob()

        # Act get the blob content
        blob = blob_client.download_blob(
            offset=0,
            length=len(self.byte_data),
            cpk=TEST_ENCRYPTION_KEY,
        )

        # Assert content was retrieved with the cpk
        self.assertEqual(blob.content_as_bytes(), self.byte_data)
        self.assertEqual(blob.properties.encryption_key_sha256,
                         TEST_ENCRYPTION_KEY.key_hash)

    def test_create_page_blob_with_chunks(self):
        if TestMode.need_recording_file(self.test_mode):
            return

        # Act
        blob_client = self.bsc.get_blob_client(self.container_name,
                                               self._get_blob_reference())
        page_blob_prop = blob_client.upload_blob(self.byte_data,
                                                 blob_type=BlobType.PageBlob,
                                                 max_concurrency=2,
                                                 cpk=TEST_ENCRYPTION_KEY)

        # Assert
        self.assertIsNotNone(page_blob_prop['etag'])
        self.assertIsNotNone(page_blob_prop['last_modified'])
        self.assertTrue(page_blob_prop['request_server_encrypted'])
        self.assertEqual(page_blob_prop['encryption_key_sha256'],
                         TEST_ENCRYPTION_KEY.key_hash)

        # Act get the blob content without cpk should fail
        with self.assertRaises(HttpResponseError):
            blob_client.download_blob()

        # Act get the blob content
        blob = blob_client.download_blob(cpk=TEST_ENCRYPTION_KEY)

        # Assert content was retrieved with the cpk
        self.assertEqual(blob.content_as_bytes(), self.byte_data)
        self.assertEqual(blob.properties.encryption_key_sha256,
                         TEST_ENCRYPTION_KEY.key_hash)

    # TODO: verify why clear page works without providing cpk
    # @record
    # def test_clear_page(self):
    #     # Arrange
    #     blob_client = self.bsc.get_blob_client(self.container_name, self._get_blob_reference())
    #     data = self.get_random_bytes(1024)
    #     blob_client.upload_blob(data, blob_type=BlobType.PageBlob, cpk=TEST_ENCRYPTION_KEY)
    #
    #     # Act
    #     blob = blob_client.download_blob(cpk=TEST_ENCRYPTION_KEY)
    #     self.assertEquals(blob.content_as_bytes(), data)
    #
    #     # with self.assertRaises(HttpResponseError):
    #     #     blob_client.clear_page(0, 511)
    #
    #     resp = blob_client.clear_page(0, 511, cpk=TEST_ENCRYPTION_KEY)
    #     blob = blob_client.download_blob(0, 511, cpk=TEST_ENCRYPTION_KEY)
    #
    #     # Assert
    #     self.assertIsNotNone(resp.get('etag'))
    #     self.assertIsNotNone(resp.get('last_modified'))
    #     self.assertIsNotNone(resp.get('blob_sequence_number'))
    #     self.assertEquals(blob.content_as_bytes(), b'\x00' * 512)
    #
    #     blob = blob_client.download_blob(512, 1023, cpk=TEST_ENCRYPTION_KEY)
    #     self.assertEquals(blob.content_as_bytes(), data[512:])

    @record
    def test_get_set_blob_metadata(self):
        # Arrange
        blob_client, _ = self._create_block_blob(data=b'AAABBBCCC',
                                                 cpk=TEST_ENCRYPTION_KEY)

        # Act without the encryption key should fail
        with self.assertRaises(HttpResponseError):
            blob_client.get_blob_properties()

        # Act
        blob_props = blob_client.get_blob_properties(cpk=TEST_ENCRYPTION_KEY)

        # Assert
        self.assertTrue(blob_props.server_encrypted)
        self.assertEqual(blob_props.encryption_key_sha256,
                         TEST_ENCRYPTION_KEY.key_hash)

        # Act set blob properties
        metadata = {'hello': 'world', 'number': '42', 'UP': 'UPval'}
        with self.assertRaises(HttpResponseError):
            blob_client.set_blob_metadata(metadata=metadata, )

        blob_client.set_blob_metadata(metadata=metadata,
                                      cpk=TEST_ENCRYPTION_KEY)

        # Assert
        blob_props = blob_client.get_blob_properties(cpk=TEST_ENCRYPTION_KEY)
        md = blob_props.metadata
        self.assertEqual(3, len(md))
        self.assertEqual(md['hello'], 'world')
        self.assertEqual(md['number'], '42')
        self.assertEqual(md['UP'], 'UPval')
        self.assertFalse('up' in md)

    @record
    def test_snapshot_blob(self):
        # Arrange
        blob_client, _ = self._create_block_blob(data=b'AAABBBCCC',
                                                 cpk=TEST_ENCRYPTION_KEY)

        # Act without cpk should not work
        with self.assertRaises(HttpResponseError):
            blob_client.create_snapshot()

        # Act with cpk should work
        blob_snapshot = blob_client.create_snapshot(cpk=TEST_ENCRYPTION_KEY)

        # Assert
        self.assertIsNotNone(blob_snapshot)
예제 #12
0
#Class to hold API functions
class get_flightdata():
    def __init__(self):
        return

    def states_all(self):
        url = "https://opensky-network.org/api/states/all"
        response = requests.get(url).json()
        return response


#JSON Response needs to be parsed and inserted into table
json_data = get_flightdata().states_all()
#print json_data
flights = []
timepulled = json_data['time']
for i in json_data['states']:
    if i[1].startswith('JBU') == True:
        flights.append(i)

#Create pandas data table from the response
headers = ['icao24','callsign','origin_country','time_position','last_contact','longitude','latitude','baro_altitude','on_ground','velocity','true_track','vertical_rate','sensors','geo_altitude','squawk','spi','position_source']
data = pd.DataFrame(flights, columns=headers)

#Connect to azure blob storage account and dump the JSON response into the storage account
credential = "8g2Fqc9sTpHKfwmew7A54I182vyVmBnQM6Z9lHf9V0fvxj5A0oq5WsagpRrR/Dtas8+a/2m7jwMMFoqq8Qk7Qw=="
service = BlobServiceClient(account_url="https://openskystorage.blob.core.windows.net/", credential=credential)
theblob = service.get_blob_client(container="statesall", blob=str(time.time()))
theblob.upload_blob(data.to_csv(index=False))
예제 #13
0
class StorageLargestBlockBlobTest(StorageTestCase):
    def _setup(self,
               storage_account,
               key,
               additional_policies=None,
               min_large_block_upload_threshold=1 * 1024 * 1024,
               max_single_put_size=32 * 1024):
        self.bsc = BlobServiceClient(
            self.account_url(storage_account, "blob"),
            credential=key,
            max_single_put_size=max_single_put_size,
            max_block_size=LARGEST_BLOCK_SIZE,
            min_large_block_upload_threshold=min_large_block_upload_threshold,
            _additional_pipeline_policies=additional_policies)
        self.config = self.bsc._config
        self.container_name = self.get_resource_name('utcontainer')
        self.container_name = self.container_name + str(uuid.uuid4())

        if self.is_live:
            self.bsc.create_container(self.container_name)

    def _teardown(self, file_name):
        if path.isfile(file_name):
            try:
                remove(file_name)
            except:
                pass

    # --Helpers-----------------------------------------------------------------
    def _get_blob_reference(self):
        return self.get_resource_name(TEST_BLOB_PREFIX)

    def _create_blob(self):
        blob_name = self._get_blob_reference()
        blob = self.bsc.get_blob_client(self.container_name, blob_name)
        blob.upload_blob(b'')
        return blob

    # --Test cases for block blobs --------------------------------------------
    @pytest.mark.live_test_only
    @pytest.mark.skip(reason="This takes really long time")
    @GlobalStorageAccountPreparer()
    def test_put_block_bytes_largest(self, resource_group, location,
                                     storage_account, storage_account_key):
        self._setup(storage_account, storage_account_key)
        blob = self._create_blob()

        # Act
        data = urandom(LARGEST_BLOCK_SIZE)
        blockId = str(uuid.uuid4()).encode('utf-8')
        resp = blob.stage_block(blockId, data, length=LARGEST_BLOCK_SIZE)
        blob.commit_block_list([BlobBlock(blockId)])
        block_list = blob.get_block_list()

        # Assert
        self.assertIsNotNone(resp)
        assert 'content_md5' in resp
        assert 'content_crc64' in resp
        assert 'request_id' in resp
        self.assertIsNotNone(block_list)
        self.assertEqual(len(block_list), 2)
        self.assertEqual(len(block_list[1]), 0)
        self.assertEqual(len(block_list[0]), 1)
        self.assertEqual(block_list[0][0].size, LARGEST_BLOCK_SIZE)

    @pytest.mark.live_test_only
    @GlobalStorageAccountPreparer()
    def test_put_block_bytes_largest_without_network(self, resource_group,
                                                     location, storage_account,
                                                     storage_account_key):
        payload_dropping_policy = PayloadDroppingPolicy()
        credential_policy = _format_shared_key_credential(
            storage_account.name, storage_account_key)
        self._setup(storage_account, storage_account_key,
                    [payload_dropping_policy, credential_policy])
        blob = self._create_blob()

        # Act
        data = urandom(LARGEST_BLOCK_SIZE)
        blockId = str(uuid.uuid4()).encode('utf-8')
        resp = blob.stage_block(blockId, data, length=LARGEST_BLOCK_SIZE)
        blob.commit_block_list([BlobBlock(blockId)])
        block_list = blob.get_block_list()

        # Assert
        self.assertIsNotNone(resp)
        assert 'content_md5' in resp
        assert 'content_crc64' in resp
        assert 'request_id' in resp
        self.assertIsNotNone(block_list)
        self.assertEqual(len(block_list), 2)
        self.assertEqual(len(block_list[1]), 0)
        self.assertEqual(len(block_list[0]), 1)
        self.assertEqual(payload_dropping_policy.put_block_counter, 1)
        self.assertEqual(payload_dropping_policy.put_block_sizes[0],
                         LARGEST_BLOCK_SIZE)

    @pytest.mark.live_test_only
    @pytest.mark.skip(reason="This takes really long time")
    @GlobalStorageAccountPreparer()
    def test_put_block_stream_largest(self, resource_group, location,
                                      storage_account, storage_account_key):
        self._setup(storage_account, storage_account_key)
        blob = self._create_blob()

        # Act
        stream = LargeStream(LARGEST_BLOCK_SIZE)
        blockId = str(uuid.uuid4())
        requestId = str(uuid.uuid4())
        resp = blob.stage_block(blockId,
                                stream,
                                length=LARGEST_BLOCK_SIZE,
                                client_request_id=requestId)
        blob.commit_block_list([BlobBlock(blockId)])
        block_list = blob.get_block_list()

        # Assert
        self.assertIsNotNone(resp)
        assert 'content_md5' in resp
        assert 'content_crc64' in resp
        assert 'request_id' in resp
        self.assertIsNotNone(block_list)
        self.assertEqual(len(block_list), 2)
        self.assertEqual(len(block_list[1]), 0)
        self.assertEqual(len(block_list[0]), 1)
        self.assertEqual(block_list[0][0].size, LARGEST_BLOCK_SIZE)

    @pytest.mark.live_test_only
    @GlobalStorageAccountPreparer()
    def test_put_block_stream_largest_without_network(self, resource_group,
                                                      location,
                                                      storage_account,
                                                      storage_account_key):
        payload_dropping_policy = PayloadDroppingPolicy()
        credential_policy = _format_shared_key_credential(
            storage_account.name, storage_account_key)
        self._setup(storage_account, storage_account_key,
                    [payload_dropping_policy, credential_policy])
        blob = self._create_blob()

        # Act
        stream = LargeStream(LARGEST_BLOCK_SIZE)
        blockId = str(uuid.uuid4())
        requestId = str(uuid.uuid4())
        resp = blob.stage_block(blockId,
                                stream,
                                length=LARGEST_BLOCK_SIZE,
                                client_request_id=requestId)
        blob.commit_block_list([BlobBlock(blockId)])
        block_list = blob.get_block_list()

        # Assert
        self.assertIsNotNone(resp)
        assert 'content_md5' in resp
        assert 'content_crc64' in resp
        assert 'request_id' in resp
        self.assertIsNotNone(block_list)
        self.assertEqual(len(block_list), 2)
        self.assertEqual(len(block_list[1]), 0)
        self.assertEqual(len(block_list[0]), 1)
        self.assertEqual(payload_dropping_policy.put_block_counter, 1)
        self.assertEqual(payload_dropping_policy.put_block_sizes[0],
                         LARGEST_BLOCK_SIZE)

    @pytest.mark.live_test_only
    @pytest.mark.skip(reason="This takes really long time")
    @GlobalStorageAccountPreparer()
    def test_create_largest_blob_from_path(self, resource_group, location,
                                           storage_account,
                                           storage_account_key):
        self._setup(storage_account, storage_account_key)
        blob_name = self._get_blob_reference()
        blob = self.bsc.get_blob_client(self.container_name, blob_name)
        FILE_PATH = 'largest_blob_from_path.temp.{}.dat'.format(
            str(uuid.uuid4()))
        with open(FILE_PATH, 'wb') as stream:
            largeStream = LargeStream(LARGEST_BLOCK_SIZE, 100 * 1024 * 1024)
            chunk = largeStream.read()
            while chunk:
                stream.write(chunk)
                chunk = largeStream.read()

        # Act
        with open(FILE_PATH, 'rb') as stream:
            blob.upload_blob(stream, max_concurrency=2)

        # Assert
        self._teardown(FILE_PATH)

    @pytest.mark.live_test_only
    @GlobalStorageAccountPreparer()
    def test_create_largest_blob_from_path_without_network(
            self, resource_group, location, storage_account,
            storage_account_key):
        payload_dropping_policy = PayloadDroppingPolicy()
        credential_policy = _format_shared_key_credential(
            storage_account.name, storage_account_key)
        self._setup(storage_account, storage_account_key,
                    [payload_dropping_policy, credential_policy])
        blob_name = self._get_blob_reference()
        blob = self.bsc.get_blob_client(self.container_name, blob_name)
        FILE_PATH = 'largest_blob_from_path.temp.{}.dat'.format(
            str(uuid.uuid4()))
        with open(FILE_PATH, 'wb') as stream:
            largeStream = LargeStream(LARGEST_BLOCK_SIZE, 100 * 1024 * 1024)
            chunk = largeStream.read()
            while chunk:
                stream.write(chunk)
                chunk = largeStream.read()

        # Act
        with open(FILE_PATH, 'rb') as stream:
            blob.upload_blob(stream, max_concurrency=2)

        # Assert
        self._teardown(FILE_PATH)
        self.assertEqual(payload_dropping_policy.put_block_counter, 1)
        self.assertEqual(payload_dropping_policy.put_block_sizes[0],
                         LARGEST_BLOCK_SIZE)

    @pytest.mark.skip(reason="This takes really long time")
    @pytest.mark.live_test_only
    @GlobalStorageAccountPreparer()
    def test_create_largest_blob_from_stream_without_network(
            self, resource_group, location, storage_account,
            storage_account_key):
        payload_dropping_policy = PayloadDroppingPolicy()
        credential_policy = _format_shared_key_credential(
            storage_account.name, storage_account_key)
        self._setup(storage_account, storage_account_key,
                    [payload_dropping_policy, credential_policy])
        blob_name = self._get_blob_reference()
        blob = self.bsc.get_blob_client(self.container_name, blob_name)

        number_of_blocks = 50000

        stream = LargeStream(LARGEST_BLOCK_SIZE * number_of_blocks)

        # Act
        blob.upload_blob(stream, max_concurrency=1)

        # Assert
        self.assertEqual(payload_dropping_policy.put_block_counter,
                         number_of_blocks)
        self.assertEqual(payload_dropping_policy.put_block_sizes[0],
                         LARGEST_BLOCK_SIZE)

    @pytest.mark.live_test_only
    @GlobalStorageAccountPreparer()
    def test_create_largest_blob_from_stream_single_upload_without_network(
            self, resource_group, location, storage_account,
            storage_account_key):
        payload_dropping_policy = PayloadDroppingPolicy()
        credential_policy = _format_shared_key_credential(
            storage_account.name, storage_account_key)
        self._setup(storage_account,
                    storage_account_key,
                    [payload_dropping_policy, credential_policy],
                    max_single_put_size=LARGEST_SINGLE_UPLOAD_SIZE + 1)
        blob_name = self._get_blob_reference()
        blob = self.bsc.get_blob_client(self.container_name, blob_name)

        stream = LargeStream(LARGEST_SINGLE_UPLOAD_SIZE)

        # Act
        blob.upload_blob(stream,
                         length=LARGEST_SINGLE_UPLOAD_SIZE,
                         max_concurrency=1)

        # Assert
        self.assertEqual(payload_dropping_policy.put_block_counter, 0)
        self.assertEqual(payload_dropping_policy.put_blob_counter, 1)
예제 #14
0
class StorageLargeBlockBlobTest(StorageTestCase):
    def _setup(self, name, key):
        # test chunking functionality by reducing the threshold
        # for chunking and the size of each chunk, otherwise
        # the tests would take too long to execute
        self.bsc = BlobServiceClient(self.account_url(name, "blob"),
                                     credential=key,
                                     max_single_put_size=32 * 1024,
                                     max_block_size=2 * 1024 * 1024,
                                     min_large_block_upload_threshold=1 *
                                     1024 * 1024)
        self.config = self.bsc._config
        self.container_name = self.get_resource_name('utcontainer')

        if self.is_live:
            self.bsc.create_container(self.container_name)

    def _teardown(self, file_name):
        if path.isfile(file_name):
            try:
                remove(file_name)
            except:
                pass

    # --Helpers-----------------------------------------------------------------
    def _get_blob_reference(self):
        return self.get_resource_name(TEST_BLOB_PREFIX)

    def _create_blob(self):
        blob_name = self._get_blob_reference()
        blob = self.bsc.get_blob_client(self.container_name, blob_name)
        blob.upload_blob(b'')
        return blob

    def assertBlobEqual(self, container_name, blob_name, expected_data):
        blob = self.bsc.get_blob_client(container_name, blob_name)
        actual_data = blob.download_blob()
        self.assertEqual(b"".join(list(actual_data.chunks())), expected_data)

    # --Test cases for block blobs --------------------------------------------
    @pytest.mark.live_test_only
    @GlobalStorageAccountPreparer()
    def test_put_block_bytes_large(self, resource_group, location,
                                   storage_account, storage_account_key):

        self._setup(storage_account.name, storage_account_key)
        blob = self._create_blob()

        # Act
        for i in range(5):
            resp = blob.stage_block('block {0}'.format(i).encode('utf-8'),
                                    urandom(LARGE_BLOCK_SIZE))
            self.assertIsNone(resp)

            # Assert

    @pytest.mark.live_test_only
    @GlobalStorageAccountPreparer()
    def test_put_block_bytes_large_with_md5(self, resource_group, location,
                                            storage_account,
                                            storage_account_key):

        self._setup(storage_account.name, storage_account_key)
        blob = self._create_blob()

        # Act
        for i in range(5):
            resp = blob.stage_block('block {0}'.format(i).encode('utf-8'),
                                    urandom(LARGE_BLOCK_SIZE),
                                    validate_content=True)
            self.assertIsNone(resp)

    @pytest.mark.live_test_only
    @GlobalStorageAccountPreparer()
    def test_put_block_stream_large(self, resource_group, location,
                                    storage_account, storage_account_key):

        self._setup(storage_account.name, storage_account_key)
        blob = self._create_blob()

        # Act
        for i in range(5):
            stream = BytesIO(bytearray(LARGE_BLOCK_SIZE))
            resp = resp = blob.stage_block(
                'block {0}'.format(i).encode('utf-8'),
                stream,
                length=LARGE_BLOCK_SIZE)
            self.assertIsNone(resp)

            # Assert

    @pytest.mark.live_test_only
    @GlobalStorageAccountPreparer()
    def test_put_block_stream_large_with_md5(self, resource_group, location,
                                             storage_account,
                                             storage_account_key):

        self._setup(storage_account.name, storage_account_key)
        blob = self._create_blob()

        # Act
        for i in range(5):
            stream = BytesIO(bytearray(LARGE_BLOCK_SIZE))
            resp = resp = blob.stage_block(
                'block {0}'.format(i).encode('utf-8'),
                stream,
                length=LARGE_BLOCK_SIZE,
                validate_content=True)
            self.assertIsNone(resp)

        # Assert

    @pytest.mark.live_test_only
    @GlobalStorageAccountPreparer()
    def test_create_large_blob_from_path(self, resource_group, location,
                                         storage_account, storage_account_key):
        # parallel tests introduce random order of requests, can only run live

        self._setup(storage_account.name, storage_account_key)
        blob_name = self._get_blob_reference()
        blob = self.bsc.get_blob_client(self.container_name, blob_name)
        data = bytearray(urandom(LARGE_BLOB_SIZE))
        FILE_PATH = 'large_blob_from_path.temp.dat'
        with open(FILE_PATH, 'wb') as stream:
            stream.write(data)

        # Act
        with open(FILE_PATH, 'rb') as stream:
            blob.upload_blob(stream, max_concurrency=2)

        # Assert
        self.assertBlobEqual(self.container_name, blob_name, data)
        self._teardown(FILE_PATH)

    @pytest.mark.live_test_only
    @GlobalStorageAccountPreparer()
    def test_create_large_blob_from_path_with_md5(self, resource_group,
                                                  location, storage_account,
                                                  storage_account_key):
        # parallel tests introduce random order of requests, can only run live

        self._setup(storage_account.name, storage_account_key)
        blob_name = self._get_blob_reference()
        blob = self.bsc.get_blob_client(self.container_name, blob_name)
        data = bytearray(urandom(LARGE_BLOB_SIZE))
        FILE_PATH = "blob_from_path_with_md5.temp.dat"
        with open(FILE_PATH, 'wb') as stream:
            stream.write(data)

        # Act
        with open(FILE_PATH, 'rb') as stream:
            blob.upload_blob(stream, validate_content=True, max_concurrency=2)

        # Assert
        self.assertBlobEqual(self.container_name, blob_name, data)
        self._teardown(FILE_PATH)

    @pytest.mark.live_test_only
    @GlobalStorageAccountPreparer()
    def test_create_large_blob_from_path_non_parallel(self, resource_group,
                                                      location,
                                                      storage_account,
                                                      storage_account_key):

        self._setup(storage_account.name, storage_account_key)
        blob_name = self._get_blob_reference()
        blob = self.bsc.get_blob_client(self.container_name, blob_name)
        data = bytearray(self.get_random_bytes(100))
        FILE_PATH = "blob_from_path_non_parallel.temp.dat"
        with open(FILE_PATH, 'wb') as stream:
            stream.write(data)

        # Act
        with open(FILE_PATH, 'rb') as stream:
            blob.upload_blob(stream, max_concurrency=1)

        # Assert
        self.assertBlobEqual(self.container_name, blob_name, data)
        self._teardown(FILE_PATH)

    @pytest.mark.live_test_only
    @GlobalStorageAccountPreparer()
    def test_create_large_blob_from_path_with_progress(self, resource_group,
                                                       location,
                                                       storage_account,
                                                       storage_account_key):
        # parallel tests introduce random order of requests, can only run live

        self._setup(storage_account.name, storage_account_key)
        blob_name = self._get_blob_reference()
        blob = self.bsc.get_blob_client(self.container_name, blob_name)
        data = bytearray(urandom(LARGE_BLOB_SIZE))
        FILE_PATH = "blob_from_path_with_progress.temp.dat"
        with open(FILE_PATH, 'wb') as stream:
            stream.write(data)

        # Act
        progress = []

        def callback(response):
            current = response.context['upload_stream_current']
            total = response.context['data_stream_total']
            if current is not None:
                progress.append((current, total))

        with open(FILE_PATH, 'rb') as stream:
            blob.upload_blob(stream,
                             max_concurrency=2,
                             raw_response_hook=callback)

        # Assert
        self.assertBlobEqual(self.container_name, blob_name, data)
        self.assert_upload_progress(len(data), self.config.max_block_size,
                                    progress)
        self._teardown(FILE_PATH)

    @pytest.mark.live_test_only
    @GlobalStorageAccountPreparer()
    def test_create_large_blob_from_path_with_properties(
            self, resource_group, location, storage_account,
            storage_account_key):
        # parallel tests introduce random order of requests, can only run live

        self._setup(storage_account.name, storage_account_key)
        blob_name = self._get_blob_reference()
        blob = self.bsc.get_blob_client(self.container_name, blob_name)
        data = bytearray(urandom(LARGE_BLOB_SIZE))
        FILE_PATH = 'blob_from_path_with_properties.temp.dat'
        with open(FILE_PATH, 'wb') as stream:
            stream.write(data)

        # Act
        content_settings = ContentSettings(content_type='image/png',
                                           content_language='spanish')
        with open(FILE_PATH, 'rb') as stream:
            blob.upload_blob(stream,
                             content_settings=content_settings,
                             max_concurrency=2)

        # Assert
        self.assertBlobEqual(self.container_name, blob_name, data)
        properties = blob.get_blob_properties()
        self.assertEqual(properties.content_settings.content_type,
                         content_settings.content_type)
        self.assertEqual(properties.content_settings.content_language,
                         content_settings.content_language)
        self._teardown(FILE_PATH)

    @pytest.mark.live_test_only
    @GlobalStorageAccountPreparer()
    def test_create_large_blob_from_stream_chunked_upload(
            self, resource_group, location, storage_account,
            storage_account_key):
        # parallel tests introduce random order of requests, can only run live

        self._setup(storage_account.name, storage_account_key)
        blob_name = self._get_blob_reference()
        blob = self.bsc.get_blob_client(self.container_name, blob_name)
        data = bytearray(urandom(LARGE_BLOB_SIZE))
        FILE_PATH = 'blob_from_stream_chunked_upload.temp.dat'
        with open(FILE_PATH, 'wb') as stream:
            stream.write(data)

        # Act
        with open(FILE_PATH, 'rb') as stream:
            blob.upload_blob(stream, max_concurrency=2)

        # Assert
        self.assertBlobEqual(self.container_name, blob_name, data)
        self._teardown(FILE_PATH)

    @pytest.mark.live_test_only
    @GlobalStorageAccountPreparer()
    def test_creat_lrgblob_frm_stream_w_progress_chnkd_upload(
            self, resource_group, location, storage_account,
            storage_account_key):
        # parallel tests introduce random order of requests, can only run live

        self._setup(storage_account.name, storage_account_key)
        blob_name = self._get_blob_reference()
        blob = self.bsc.get_blob_client(self.container_name, blob_name)
        data = bytearray(urandom(LARGE_BLOB_SIZE))
        FILE_PATH = 'stream_w_progress_chnkd_upload.temp.dat'
        with open(FILE_PATH, 'wb') as stream:
            stream.write(data)

        # Act
        progress = []

        def callback(response):
            current = response.context['upload_stream_current']
            total = response.context['data_stream_total']
            if current is not None:
                progress.append((current, total))

        with open(FILE_PATH, 'rb') as stream:
            blob.upload_blob(stream,
                             max_concurrency=2,
                             raw_response_hook=callback)

        # Assert
        self.assertBlobEqual(self.container_name, blob_name, data)
        self.assert_upload_progress(len(data), self.config.max_block_size,
                                    progress)
        self._teardown(FILE_PATH)

    @pytest.mark.live_test_only
    @GlobalStorageAccountPreparer()
    def test_create_large_blob_from_stream_chunked_upload_with_count(
            self, resource_group, location, storage_account,
            storage_account_key):
        # parallel tests introduce random order of requests, can only run live
        self._setup(storage_account.name, storage_account_key)
        blob_name = self._get_blob_reference()
        blob = self.bsc.get_blob_client(self.container_name, blob_name)
        data = bytearray(urandom(LARGE_BLOB_SIZE))
        FILE_PATH = 'chunked_upload_with_count.temp.dat'
        with open(FILE_PATH, 'wb') as stream:
            stream.write(data)

        # Act
        blob_size = len(data) - 301
        with open(FILE_PATH, 'rb') as stream:
            blob.upload_blob(stream, length=blob_size, max_concurrency=2)

        # Assert
        self.assertBlobEqual(self.container_name, blob_name, data[:blob_size])
        self._teardown(FILE_PATH)

    @pytest.mark.live_test_only
    @GlobalStorageAccountPreparer()
    def test_creat_lrgblob_frm_strm_chnkd_uplod_w_count_n_props(
            self, resource_group, location, storage_account,
            storage_account_key):
        # parallel tests introduce random order of requests, can only run live

        self._setup(storage_account.name, storage_account_key)
        blob_name = self._get_blob_reference()
        blob = self.bsc.get_blob_client(self.container_name, blob_name)
        data = bytearray(urandom(LARGE_BLOB_SIZE))
        FILE_PATH = 'plod_w_count_n_props.temp.dat'
        with open(FILE_PATH, 'wb') as stream:
            stream.write(data)

        # Act
        content_settings = ContentSettings(content_type='image/png',
                                           content_language='spanish')
        blob_size = len(data) - 301
        with open(FILE_PATH, 'rb') as stream:
            blob.upload_blob(stream,
                             length=blob_size,
                             content_settings=content_settings,
                             max_concurrency=2)

        # Assert
        self.assertBlobEqual(self.container_name, blob_name, data[:blob_size])
        properties = blob.get_blob_properties()
        self.assertEqual(properties.content_settings.content_type,
                         content_settings.content_type)
        self.assertEqual(properties.content_settings.content_language,
                         content_settings.content_language)
        self._teardown(FILE_PATH)

    @pytest.mark.live_test_only
    @GlobalStorageAccountPreparer()
    def test_creat_lrg_blob_frm_stream_chnked_upload_w_props(
            self, resource_group, location, storage_account,
            storage_account_key):
        # parallel tests introduce random order of requests, can only run live

        self._setup(storage_account.name, storage_account_key)
        blob_name = self._get_blob_reference()
        blob = self.bsc.get_blob_client(self.container_name, blob_name)
        data = bytearray(urandom(LARGE_BLOB_SIZE))
        FILE_PATH = 'creat_lrg_blob.temp.dat'
        with open(FILE_PATH, 'wb') as stream:
            stream.write(data)

        # Act
        content_settings = ContentSettings(content_type='image/png',
                                           content_language='spanish')
        with open(FILE_PATH, 'rb') as stream:
            blob.upload_blob(stream,
                             content_settings=content_settings,
                             max_concurrency=2)

        # Assert
        self.assertBlobEqual(self.container_name, blob_name, data)
        properties = blob.get_blob_properties()
        self.assertEqual(properties.content_settings.content_type,
                         content_settings.content_type)
        self.assertEqual(properties.content_settings.content_language,
                         content_settings.content_language)
        self._teardown(FILE_PATH)
def test_adfv2_dataflows_adlsgen2_delete_piicolumns(pytestconfig):

	# https://docs.microsoft.com/en-us/samples/azure-samples/data-lake-analytics-python-auth-options/authenticating-your-python-application-against-azure-active-directory/
	# access_token = credentials.token["access_token"]
	adfv2name = pytestconfig.getoption('adfv2name')
	adlsgen2stor = pytestconfig.getoption('adlsgen2stor')
	accesskeyadls = pytestconfig.getoption('accesskeyadls')
	subscriptionid = pytestconfig.getoption('subscriptionid')
	rg = pytestconfig.getoption('rg')
	#
	# Since Azure DevOps SPN created ADFv2 instance, Azure DevOps SPN has owner rights and can execute pipelin using REST (Contributor is minimally required)
	tokenadf = pytestconfig.getoption('tokenadf')
	adfv2namepipeline = "adlsgen2-dataflows-delete-piicolumns"
	url = "https://management.azure.com/subscriptions/{}/resourceGroups/{}/providers/Microsoft.DataFactory/factories/{}/pipelines/{}/createRun?api-version=2018-06-01".format(subscriptionid, rg, adfv2name, adfv2namepipeline)
	response = requests.post(url, 
		headers={'Authorization': "Bearer " + tokenadf},
		json={
			"outputfolder": "curated"
		}
	)
	#
	assert response.status_code == 200, "test failed, pipeline not started, " + str(response.content)
	#
	runid = response.json()['runId']
	#
	count = 0
	while True:
		response = requests.get(
			"https://management.azure.com/subscriptions/{}/resourceGroups/{}/providers/Microsoft.DataFactory/factories/{}/pipelineruns/{}?api-version=2018-06-01".format(subscriptionid, rg, adfv2name, runid),
			headers={'Authorization': "Bearer " + tokenadf}
        )
		status = response.json()['status']
		if status == "InProgress" or status == "Queued":
			count += 1
			if count < 30:
				time.sleep(30) # wait 30 seconds before next status update
			else:
				# timeout
				break
		else:
			# pipeline has end state, script has finished
			print("hier2")
			break
	#
	assert count <30, "test failed, time out"
	#credential = CustomTokenCredential(tokenadls)
	credential = accesskeyadls
	storage_account_source_url = "https://" + adlsgen2stor + ".blob.core.windows.net"
	#
	client_source = BlobServiceClient(account_url=storage_account_source_url, credential=credential)
	container_source = client_source.get_container_client("curated")
	#
	blob_list = container_source.list_blobs(include=['snapshots'])
	for blob in blob_list:
		bottled_file = blob.name
	assert bottled_file == "AdultCensusIncomePIIremoved.parquet", "parquet file not found"
	#
	blob_client = client_source.get_blob_client(container="curated", blob="AdultCensusIncomePIIremoved.parquet")
	with open("AdultCensusIncomePIIremoved.parquet", "wb") as my_blob:
		download_stream = blob_client.download_blob()
		my_blob.write(download_stream.readall())
	#
	parquet_file = pq.ParquetFile('AdultCensusIncomePIIremoved.parquet')
	i = 0
	while i < parquet_file.metadata.row_group(0).num_columns:
		print(parquet_file.metadata.row_group(0).column(i).path_in_schema)
		if parquet_file.metadata.row_group(0).column(i).path_in_schema == "age":
			break
		i+=1
	# 
	assert i == parquet_file.metadata.row_group(0).num_columns, "PII age data still present"
예제 #16
0
class StorageAppendBlobTest(StorageTestCase):
    def setUp(self):
        super(StorageAppendBlobTest, self).setUp()

        url = self._get_account_url()
        credential = self._get_shared_key_credential()

        self.bsc = BlobServiceClient(url,
                                     credential=credential,
                                     max_block_size=4 * 1024)
        self.config = self.bsc._config
        self.container_name = self.get_resource_name('utcontainer')

        if not self.is_playback():
            self.bsc.create_container(self.container_name)

    def tearDown(self):
        if not self.is_playback():
            try:
                self.bsc.delete_container(self.container_name)
            except:
                pass

        if os.path.isfile(FILE_PATH):
            try:
                os.remove(FILE_PATH)
            except:
                pass

        return super(StorageAppendBlobTest, self).tearDown()

    #--Helpers-----------------------------------------------------------------
    def _get_blob_reference(self):
        return self.get_resource_name(TEST_BLOB_PREFIX)

    def _create_blob(self):
        blob_name = self._get_blob_reference()
        blob = self.bsc.get_blob_client(self.container_name, blob_name)
        blob.create_append_blob()
        return blob

    def assertBlobEqual(self, blob, expected_data):
        stream = blob.download_blob()
        actual_data = b"".join(list(stream))
        self.assertEqual(actual_data, expected_data)

    class NonSeekableFile(object):
        def __init__(self, wrapped_file):
            self.wrapped_file = wrapped_file

        def write(self, data):
            self.wrapped_file.write(data)

        def read(self, count):
            return self.wrapped_file.read(count)

    #--Test cases for block blobs --------------------------------------------

    @record
    def test_create_blob(self):
        # Arrange
        blob_name = self._get_blob_reference()

        # Act
        blob = self.bsc.get_blob_client(self.container_name, blob_name)
        create_resp = blob.create_append_blob()

        # Assert
        blob_properties = blob.get_blob_properties()
        self.assertIsNotNone(blob_properties)
        self.assertEqual(blob_properties.etag, create_resp.get('etag'))
        self.assertEqual(blob_properties.last_modified,
                         create_resp.get('last_modified'))

    @record
    def test_create_blob_with_lease_id(self):
        # Arrange
        blob = self._create_blob()

        # Act
        lease = blob.acquire_lease()
        create_resp = blob.create_append_blob(lease=lease)

        # Assert
        blob_properties = blob.get_blob_properties()
        self.assertIsNotNone(blob_properties)
        self.assertEqual(blob_properties.etag, create_resp.get('etag'))
        self.assertEqual(blob_properties.last_modified,
                         create_resp.get('last_modified'))

    @record
    def test_create_blob_with_metadata(self):
        # Arrange
        metadata = {'hello': 'world', 'number': '42'}
        blob_name = self._get_blob_reference()
        blob = self.bsc.get_blob_client(self.container_name, blob_name)

        # Act
        blob.create_append_blob(metadata=metadata)

        # Assert
        md = blob.get_blob_properties().metadata
        self.assertDictEqual(md, metadata)

    @record
    def test_append_block(self):
        # Arrange
        blob = self._create_blob()

        # Act
        for i in range(5):
            resp = blob.append_block(u'block {0}'.format(i).encode('utf-8'))
            self.assertEqual(int(resp['blob_append_offset']), 7 * i)
            self.assertEqual(resp['blob_committed_block_count'], i + 1)
            self.assertIsNotNone(resp['etag'])
            self.assertIsNotNone(resp['last_modified'])

        # Assert
        self.assertBlobEqual(blob, b'block 0block 1block 2block 3block 4')

    @record
    def test_append_block_unicode(self):
        # Arrange
        blob = self._create_blob()

        # Act
        resp = blob.append_block(u'啊齄丂狛狜', encoding='utf-16')
        self.assertEqual(int(resp['blob_append_offset']), 0)
        self.assertEqual(resp['blob_committed_block_count'], 1)
        self.assertIsNotNone(resp['etag'])
        self.assertIsNotNone(resp['last_modified'])

        # Assert

    @record
    def test_append_block_with_md5(self):
        # Arrange
        blob = self._create_blob()

        # Act
        resp = blob.append_block(b'block', validate_content=True)
        self.assertEqual(int(resp['blob_append_offset']), 0)
        self.assertEqual(resp['blob_committed_block_count'], 1)
        self.assertIsNotNone(resp['etag'])
        self.assertIsNotNone(resp['last_modified'])

        # Assert

    @record
    def test_create_append_blob_with_no_overwrite(self):
        # Arrange
        blob_name = self._get_blob_reference()
        blob = self.bsc.get_blob_client(self.container_name, blob_name)
        data1 = self.get_random_bytes(LARGE_BLOB_SIZE)
        data2 = self.get_random_bytes(LARGE_BLOB_SIZE + 512)

        # Act
        create_resp = blob.upload_blob(data1,
                                       overwrite=True,
                                       blob_type=BlobType.AppendBlob,
                                       metadata={'BlobData': 'Data1'})

        update_resp = blob.upload_blob(data2,
                                       overwrite=False,
                                       blob_type=BlobType.AppendBlob,
                                       metadata={'BlobData': 'Data2'})

        props = blob.get_blob_properties()

        # Assert
        appended_data = data1 + data2
        self.assertBlobEqual(blob, appended_data)
        self.assertEqual(props.etag, update_resp.get('etag'))
        self.assertEqual(props.blob_type, BlobType.AppendBlob)
        self.assertEqual(props.last_modified, update_resp.get('last_modified'))
        self.assertEqual(props.metadata, {'BlobData': 'Data1'})
        self.assertEqual(props.size, LARGE_BLOB_SIZE + LARGE_BLOB_SIZE + 512)

    @record
    def test_create_append_blob_with_overwrite(self):
        # Arrange
        blob_name = self._get_blob_reference()
        blob = self.bsc.get_blob_client(self.container_name, blob_name)
        data1 = self.get_random_bytes(LARGE_BLOB_SIZE)
        data2 = self.get_random_bytes(LARGE_BLOB_SIZE + 512)

        # Act
        create_resp = blob.upload_blob(data1,
                                       overwrite=True,
                                       blob_type=BlobType.AppendBlob,
                                       metadata={'BlobData': 'Data1'})
        update_resp = blob.upload_blob(data2,
                                       overwrite=True,
                                       blob_type=BlobType.AppendBlob,
                                       metadata={'BlobData': 'Data2'})

        props = blob.get_blob_properties()

        # Assert
        self.assertBlobEqual(blob, data2)
        self.assertEqual(props.etag, update_resp.get('etag'))
        self.assertEqual(props.last_modified, update_resp.get('last_modified'))
        self.assertEqual(props.metadata, {'BlobData': 'Data2'})
        self.assertEqual(props.blob_type, BlobType.AppendBlob)
        self.assertEqual(props.size, LARGE_BLOB_SIZE + 512)

    @record
    def test_append_blob_from_bytes(self):
        # Arrange
        blob = self._create_blob()

        # Act
        data = b'abcdefghijklmnopqrstuvwxyz'
        append_resp = blob.upload_blob(data, blob_type=BlobType.AppendBlob)
        blob_properties = blob.get_blob_properties()

        # Assert
        self.assertBlobEqual(blob, data)
        self.assertEqual(blob_properties.etag, append_resp['etag'])
        self.assertEqual(blob_properties.last_modified,
                         append_resp['last_modified'])

    @record
    def test_append_blob_from_0_bytes(self):
        # Arrange
        blob = self._create_blob()

        # Act
        data = b''
        append_resp = blob.upload_blob(data, blob_type=BlobType.AppendBlob)

        # Assert
        self.assertBlobEqual(blob, data)
        # appending nothing should not make any network call
        self.assertIsNone(append_resp.get('etag'))
        self.assertIsNone(append_resp.get('last_modified'))

    @record
    def test_append_blob_from_bytes_with_progress(self):
        # Arrange
        blob = self._create_blob()
        data = b'abcdefghijklmnopqrstuvwxyz'

        # Act
        progress = []

        def progress_gen(upload):
            progress.append((0, len(upload)))
            yield upload

        upload_data = progress_gen(data)
        blob.upload_blob(upload_data, blob_type=BlobType.AppendBlob)

        # Assert
        self.assertBlobEqual(blob, data)
        self.assert_upload_progress(len(data), self.config.max_block_size,
                                    progress)

    @record
    def test_append_blob_from_bytes_with_index(self):
        # Arrange
        blob = self._create_blob()

        # Act
        data = b'abcdefghijklmnopqrstuvwxyz'
        blob.upload_blob(data[3:], blob_type=BlobType.AppendBlob)

        # Assert
        self.assertBlobEqual(blob, data[3:])

    @record
    def test_append_blob_from_bytes_with_index_and_count(self):
        # Arrange
        blob = self._create_blob()

        # Act
        data = b'abcdefghijklmnopqrstuvwxyz'
        blob.upload_blob(data[3:], length=5, blob_type=BlobType.AppendBlob)

        # Assert
        self.assertBlobEqual(blob, data[3:8])

    @record
    def test_append_blob_from_bytes_chunked_upload(self):
        # Arrange
        blob = self._create_blob()
        data = self.get_random_bytes(LARGE_BLOB_SIZE)

        # Act
        append_resp = blob.upload_blob(data, blob_type=BlobType.AppendBlob)
        blob_properties = blob.get_blob_properties()

        # Assert
        self.assertBlobEqual(blob, data)
        self.assertEqual(blob_properties.etag, append_resp['etag'])
        self.assertEqual(blob_properties.last_modified,
                         append_resp.get('last_modified'))

    @record
    def test_append_blob_from_bytes_with_progress_chunked_upload(self):
        # Arrange
        blob = self._create_blob()
        data = self.get_random_bytes(LARGE_BLOB_SIZE)

        # Act
        progress = []

        def progress_gen(upload):
            n = self.config.max_block_size
            total = len(upload)
            current = 0
            while upload:
                progress.append((current, total))
                yield upload[:n]
                current += len(upload[:n])
                upload = upload[n:]

        upload_data = progress_gen(data)
        blob.upload_blob(upload_data, blob_type=BlobType.AppendBlob)

        # Assert
        self.assertBlobEqual(blob, data)
        self.assert_upload_progress(len(data), self.config.max_block_size,
                                    progress)

    @record
    def test_append_blob_from_bytes_chunked_upload_with_index_and_count(self):
        # Arrange
        blob = self._create_blob()
        data = self.get_random_bytes(LARGE_BLOB_SIZE)
        index = 33
        blob_size = len(data) - 66

        # Act
        blob.upload_blob(data[index:],
                         length=blob_size,
                         blob_type=BlobType.AppendBlob)

        # Assert
        self.assertBlobEqual(blob, data[index:index + blob_size])

    @record
    def test_append_blob_from_path_chunked_upload(self):
        # Arrange
        blob = self._create_blob()
        data = self.get_random_bytes(LARGE_BLOB_SIZE)
        with open(FILE_PATH, 'wb') as stream:
            stream.write(data)

        # Act
        with open(FILE_PATH, 'rb') as stream:
            append_resp = blob.upload_blob(stream,
                                           blob_type=BlobType.AppendBlob)

        blob_properties = blob.get_blob_properties()

        # Assert
        self.assertBlobEqual(blob, data)
        self.assertEqual(blob_properties.etag, append_resp.get('etag'))
        self.assertEqual(blob_properties.last_modified,
                         append_resp.get('last_modified'))

    @record
    def test_append_blob_from_path_with_progress_chunked_upload(self):
        # Arrange
        blob = self._create_blob()
        data = self.get_random_bytes(LARGE_BLOB_SIZE)
        with open(FILE_PATH, 'wb') as stream:
            stream.write(data)

        # Act
        progress = []

        def progress_gen(upload):
            n = self.config.max_block_size
            total = LARGE_BLOB_SIZE
            current = 0
            while upload:
                chunk = upload.read(n)
                if not chunk:
                    break
                progress.append((current, total))
                yield chunk
                current += len(chunk)

        with open(FILE_PATH, 'rb') as stream:
            upload_data = progress_gen(stream)
            blob.upload_blob(upload_data, blob_type=BlobType.AppendBlob)

        # Assert
        self.assertBlobEqual(blob, data)
        self.assert_upload_progress(len(data), self.config.max_block_size,
                                    progress)

    @record
    def test_append_blob_from_stream_chunked_upload(self):
        # Arrange
        blob = self._create_blob()
        data = self.get_random_bytes(LARGE_BLOB_SIZE)
        with open(FILE_PATH, 'wb') as stream:
            stream.write(data)

        # Act
        with open(FILE_PATH, 'rb') as stream:
            append_resp = blob.upload_blob(stream,
                                           blob_type=BlobType.AppendBlob)
        blob_properties = blob.get_blob_properties()

        # Assert
        self.assertBlobEqual(blob, data)
        self.assertEqual(blob_properties.etag, append_resp.get('etag'))
        self.assertEqual(blob_properties.last_modified,
                         append_resp.get('last_modified'))

    @record
    def test_append_blob_from_stream_non_seekable_chunked_upload_known_size(
            self):
        # Arrange
        blob = self._create_blob()
        data = self.get_random_bytes(LARGE_BLOB_SIZE)
        with open(FILE_PATH, 'wb') as stream:
            stream.write(data)
        blob_size = len(data) - 66

        # Act
        with open(FILE_PATH, 'rb') as stream:
            non_seekable_file = StorageAppendBlobTest.NonSeekableFile(stream)
            blob.upload_blob(non_seekable_file,
                             length=blob_size,
                             blob_type=BlobType.AppendBlob)

        # Assert
        self.assertBlobEqual(blob, data[:blob_size])

    @record
    def test_append_blob_from_stream_non_seekable_chunked_upload_unknown_size(
            self):
        # Arrange
        blob = self._create_blob()
        data = self.get_random_bytes(LARGE_BLOB_SIZE)
        with open(FILE_PATH, 'wb') as stream:
            stream.write(data)

        # Act
        with open(FILE_PATH, 'rb') as stream:
            non_seekable_file = StorageAppendBlobTest.NonSeekableFile(stream)
            blob.upload_blob(non_seekable_file, blob_type=BlobType.AppendBlob)

        # Assert
        self.assertBlobEqual(blob, data)

    @record
    def test_append_blob_from_stream_with_multiple_appends(self):
        # Arrange
        blob = self._create_blob()
        data = self.get_random_bytes(LARGE_BLOB_SIZE)
        with open(FILE_PATH, 'wb') as stream1:
            stream1.write(data)
        with open(FILE_PATH, 'wb') as stream2:
            stream2.write(data)

        # Act
        with open(FILE_PATH, 'rb') as stream1:
            blob.upload_blob(stream1, blob_type=BlobType.AppendBlob)
        with open(FILE_PATH, 'rb') as stream2:
            blob.upload_blob(stream2, blob_type=BlobType.AppendBlob)

        # Assert
        data = data * 2
        self.assertBlobEqual(blob, data)

    @record
    def test_append_blob_from_stream_chunked_upload_with_count(self):
        # Arrange
        blob = self._create_blob()
        data = self.get_random_bytes(LARGE_BLOB_SIZE)
        with open(FILE_PATH, 'wb') as stream:
            stream.write(data)

        # Act
        blob_size = len(data) - 301
        with open(FILE_PATH, 'rb') as stream:
            blob.upload_blob(stream,
                             length=blob_size,
                             blob_type=BlobType.AppendBlob)

        # Assert
        self.assertBlobEqual(blob, data[:blob_size])

    def test_append_blob_from_stream_chunked_upload_with_count_parallel(self):
        # parallel tests introduce random order of requests, can only run live
        if TestMode.need_recording_file(self.test_mode):
            return

        # Arrange
        blob = self._create_blob()
        data = self.get_random_bytes(LARGE_BLOB_SIZE)
        with open(FILE_PATH, 'wb') as stream:
            stream.write(data)

        # Act
        blob_size = len(data) - 301
        with open(FILE_PATH, 'rb') as stream:
            append_resp = blob.upload_blob(stream,
                                           length=blob_size,
                                           blob_type=BlobType.AppendBlob)
        blob_properties = blob.get_blob_properties()

        # Assert
        self.assertBlobEqual(blob, data[:blob_size])
        self.assertEqual(blob_properties.etag, append_resp.get('etag'))
        self.assertEqual(blob_properties.last_modified,
                         append_resp.get('last_modified'))

    @record
    def test_append_blob_from_text(self):
        # Arrange
        blob = self._create_blob()
        text = u'hello 啊齄丂狛狜 world'
        data = text.encode('utf-8')

        # Act
        append_resp = blob.upload_blob(text, blob_type=BlobType.AppendBlob)
        blob_properties = blob.get_blob_properties()

        # Assert
        self.assertBlobEqual(blob, data)
        self.assertEqual(blob_properties.etag, append_resp.get('etag'))
        self.assertEqual(blob_properties.last_modified,
                         append_resp.get('last_modified'))

    @record
    def test_append_blob_from_text_with_encoding(self):
        # Arrange
        blob = self._create_blob()
        text = u'hello 啊齄丂狛狜 world'
        data = text.encode('utf-16')

        # Act
        blob.upload_blob(text,
                         encoding='utf-16',
                         blob_type=BlobType.AppendBlob)

        # Assert
        self.assertBlobEqual(blob, data)

    @record
    def test_append_blob_from_text_with_encoding_and_progress(self):
        # Arrange
        blob = self._create_blob()
        text = u'hello 啊齄丂狛狜 world'
        data = text.encode('utf-16')

        # Act
        progress = []

        def progress_gen(upload):
            progress.append((0, len(data)))
            yield upload

        upload_data = progress_gen(text)
        blob.upload_blob(upload_data,
                         encoding='utf-16',
                         blob_type=BlobType.AppendBlob)

        # Assert
        self.assert_upload_progress(len(data), self.config.max_block_size,
                                    progress)

    @record
    def test_append_blob_from_text_chunked_upload(self):
        # Arrange
        blob = self._create_blob()
        data = self.get_random_text_data(LARGE_BLOB_SIZE)
        encoded_data = data.encode('utf-8')

        # Act
        blob.upload_blob(data, blob_type=BlobType.AppendBlob)

        # Assert
        self.assertBlobEqual(blob, encoded_data)

    @record
    def test_append_blob_with_md5(self):
        # Arrange
        blob = self._create_blob()
        data = b'hello world'

        # Act
        blob.append_block(data, validate_content=True)
예제 #17
0
class AzureBlobFileSystem(AbstractFileSystem):
    def __init__(
        self,
        account_name: str,
        account_key: str = None,
        connection_string: str = None,
        credential: str = None,
        sas_token: str = None,
        request_session=None,
        socket_timeout: int = None,
        client_id: str = None,
        client_secret: str = None,
        tenant_id: str = None,
    ):

        super().__init__()
        self.account_name = account_name
        self.account_key = account_key
        self.connection_string = connection_string
        self.credential = credential
        self.sas_token = sas_token
        self.request_session = request_session
        self.socket_timeout = socket_timeout
        self.client_id = client_id
        self.client_secret = client_secret
        self.tenant_id = tenant_id
        if (self.credential is None and self.account_key is None
                and self.sas_token is None and self.client_id is not None):
            self.credential = self._get_credential_from_service_principal()
        self.do_connect()

    def _get_credential_from_service_principal(self):
        """
        Create a Credential for authentication.  This can include a TokenCredential
        client_id, client_secret and tenant_id

        Returns
        -------
        Credential
        """
        from azure.identity import ClientSecretCredential

        sp_token = ClientSecretCredential(
            tenant_id=self.tenant_id,
            client_id=self.client_id,
            client_secret=self.client_secret,
        )
        return sp_token

    def do_connect(self):
        """Connect to the BlobServiceClient, using user-specified connection details.
        Tries credentials first, then connection string and finally account key

        Raises
        ------
        ValueError if none of the connection details are available
        """
        try:
            self.account_url: str = f"https://{self.account_name}.blob.core.windows.net"
            if self.credential is not None:
                self.service_client = BlobServiceClient(
                    account_url=self.account_url, credential=self.credential)
            elif self.connection_string is not None:
                self.service_client = BlobServiceClient.from_connection_string(
                    conn_str=self.connection_string)
            elif self.account_key is not None:
                self.service_client = BlobServiceClient(
                    account_url=self.account_url, credential=self.account_key)
            elif self.sas_token is not None:
                self.service_client = BlobServiceClient(
                    account_url=self.account_url + self.sas_token,
                    credential=None)
            else:
                self.service_client = BlobServiceClient(
                    account_url=self.account_url)

        except Exception as e:
            raise ValueError(f"unable to connect to account for {e}")

    def exists(self, path):
        """
        Checks whether the given path exists in the File System

        Returns
        -------
        Boolean
        """
        split_path = path.split("/")
        container_name = split_path[0]
        sub_path = "/".join(split_path[1:])
        container = self.service_client.get_container_client(container_name)
        it = container.list_blobs(name_starts_with=sub_path)
        return len(list(it)) > 0

    def ls(self, path, refresh=True):
        """
        Finds all the files in the given path in the File System
        Returns
        -------
        List of full paths of all files found in given path
        """
        return self.find(path)

    def isfile(self, path):
        """Is this entry file-like?
        Azure fs only stores path to files and not folders. This is always true
        """
        return True

    def find(self, path):
        """
        Finds all the files in the given path in the File System

        Returns
        -------
        List of full paths of all files found in given path
        """
        split_path = path.split("/")
        container_name = split_path[0]
        sub_path = "/".join(split_path[1:])
        container = self.service_client.get_container_client(container_name)
        it = container.list_blobs(name_starts_with=sub_path)
        return [f"{container_name}/{item['name']}" for item in it]

    def rm(self, path, recursive=False, maxdepth=None):
        """Removes all the files in the given path"""
        split_path = path.split("/")
        container_name = split_path[0]
        sub_path = "/".join(split_path[1:])
        container = self.service_client.get_container_client(container_name)
        it = container.list_blobs(name_starts_with=sub_path)
        for item in it:
            container.delete_blob(item)

    def makedirs(self, path, exist_ok=False):
        """Recursively creates directories in path"""
        # in azure empty directories have no meaning, so makedirs not needed
        return

    def get_mapper(self, root, check=False, create=False):
        """Create key-value interface for given root"""
        return FSMap(root, self)

    def upload(self, path, value):
        """Uploads value to the given path"""
        split_path = path.split("/")
        container_name = split_path[0]
        sub_path = "/".join(split_path[1:])
        blob_client = self.service_client.get_blob_client(
            container_name, sub_path)
        blob_client.upload_blob(value, overwrite=True)

    def download(self, path):
        """Downloads the value from the given path"""
        if not self.exists(path):
            raise KeyError()
        split_path = path.split("/")
        container_name = split_path[0]
        sub_path = "/".join(split_path[1:])
        blob_client = self.service_client.get_blob_client(
            container_name, sub_path)
        return blob_client.download_blob().readall()

    def cat_file(self, path):
        return self.download(path)

    def pipe_file(self, path, value):
        return self.upload(path, value)
예제 #18
0
class StorageBlobEncryptionTest(StorageTestCase):

    def setUp(self):
        super(StorageBlobEncryptionTest, self).setUp()

        url = self._get_account_url()
        credential = self._get_shared_key_credential()

        # test chunking functionality by reducing the threshold
        # for chunking and the size of each chunk, otherwise
        # the tests would take too long to execute
        self.bsc = BlobServiceClient(
            url,
            credential=credential,
            max_single_put_size=32 * 1024,
            max_block_size=4 * 1024,
            max_page_size=4 * 1024)
        self.config = self.bsc._config
        self.container_name = self.get_resource_name('utcontainer')
        self.blob_types = (BlobType.BlockBlob, BlobType.PageBlob, BlobType.AppendBlob)

        self.container_name = self.get_resource_name('utcontainer')
        self.bytes = b'Foo'

        if not self.is_playback():
            container = self.bsc.get_container_client(self.container_name)
            container.create_container()


    def tearDown(self):
        if not self.is_playback():
            try:
                self.bsc.delete_container(self.container_name)
            except:
                pass
        if path.isfile(FILE_PATH):
            try:
                remove(FILE_PATH)
            except:
                pass

        return super(StorageBlobEncryptionTest, self).tearDown()

    #--Helpers-----------------------------------------------------------------
    def _get_container_reference(self):
        return self.get_resource_name(TEST_CONTAINER_PREFIX)

    def _get_blob_reference(self, blob_type):
        return self.get_resource_name(TEST_BLOB_PREFIXES[blob_type.value])

    def _create_small_blob(self, blob_type):
        blob_name = self._get_blob_reference(blob_type)
        blob = self.bsc.get_blob_client(self.container_name, blob_name)
        blob.upload_blob(self.bytes, blob_type=blob_type)
        return blob
        
    #--Test cases for blob encryption ----------------------------------------

    @record
    def test_missing_attribute_kek_wrap(self):
        # In the shared method _generate_blob_encryption_key
        # Arrange
        self.bsc.require_encryption = True
        valid_key = KeyWrapper('key1')

        # Act
        invalid_key_1 = lambda: None #functions are objects, so this effectively creates an empty object
        invalid_key_1.get_key_wrap_algorithm = valid_key.get_key_wrap_algorithm
        invalid_key_1.get_kid = valid_key.get_kid
        # No attribute wrap_key
        self.bsc.key_encryption_key = invalid_key_1
        with self.assertRaises(AttributeError):
            self._create_small_blob(BlobType.BlockBlob)

        invalid_key_2 = lambda: None #functions are objects, so this effectively creates an empty object
        invalid_key_2.wrap_key = valid_key.wrap_key
        invalid_key_2.get_kid = valid_key.get_kid
        # No attribute get_key_wrap_algorithm
        self.bsc.key_encryption_key = invalid_key_2
        with self.assertRaises(AttributeError):
            self._create_small_blob(BlobType.BlockBlob)
        
        invalid_key_3 = lambda: None #functions are objects, so this effectively creates an empty object
        invalid_key_3.get_key_wrap_algorithm = valid_key.get_key_wrap_algorithm
        invalid_key_3.wrap_key = valid_key.wrap_key
        # No attribute get_kid
        self.bsc.key_encryption_key = invalid_key_2
        with self.assertRaises(AttributeError):
            self._create_small_blob(BlobType.BlockBlob)

    @record
    def test_invalid_value_kek_wrap(self):
        # Arrange
        self.bsc.require_encryption = True
        self.bsc.key_encryption_key = KeyWrapper('key1')

        self.bsc.key_encryption_key.get_key_wrap_algorithm = None
        try:
            self._create_small_blob(BlobType.BlockBlob)
            self.fail()
        except AttributeError as e:
            self.assertEqual(str(e), _ERROR_OBJECT_INVALID.format('key encryption key', 'get_key_wrap_algorithm'))

        self.bsc.key_encryption_key = KeyWrapper('key1')
        self.bsc.key_encryption_key.get_kid = None
        with self.assertRaises(AttributeError):
            self._create_small_blob(BlobType.BlockBlob)

        self.bsc.key_encryption_key = KeyWrapper('key1')
        self.bsc.key_encryption_key.wrap_key = None
        with self.assertRaises(AttributeError):
            self._create_small_blob(BlobType.BlockBlob)

    @record
    def test_missing_attribute_kek_unwrap(self):
        # Shared between all services in decrypt_blob
        # Arrange
        self.bsc.require_encryption = True
        valid_key = KeyWrapper('key1')
        self.bsc.key_encryption_key = valid_key
        blob = self._create_small_blob(BlobType.BlockBlob)

        # Act
        # Note that KeyWrapper has a default value for key_id, so these Exceptions
        # are not due to non_matching kids.
        invalid_key_1 = lambda: None #functions are objects, so this effectively creates an empty object
        invalid_key_1.get_kid = valid_key.get_kid
        #No attribute unwrap_key
        blob.key_encryption_key = invalid_key_1
        with self.assertRaises(HttpResponseError):
            blob.download_blob().content_as_bytes()

        invalid_key_2 = lambda: None #functions are objects, so this effectively creates an empty object
        invalid_key_2.unwrap_key = valid_key.unwrap_key
        blob.key_encryption_key = invalid_key_2
        #No attribute get_kid
        with self.assertRaises(HttpResponseError):
            blob.download_blob().content_as_bytes()

    @record
    def test_invalid_value_kek_unwrap(self):
        if TestMode.need_recording_file(self.test_mode):
            return 
        # Arrange
        self.bsc.require_encryption = True
        self.bsc.key_encryption_key = KeyWrapper('key1')
        blob = self._create_small_blob(BlobType.BlockBlob)

        # Act
        blob.key_encryption_key = KeyWrapper('key1')
        blob.key_encryption_key.unwrap_key = None

        with self.assertRaises(HttpResponseError) as e:
            blob.download_blob().content_as_bytes()
        self.assertEqual(str(e.exception), 'Decryption failed.')

    @record
    def test_get_blob_kek(self):
        # Arrange
        self.bsc.require_encryption = True
        self.bsc.key_encryption_key = KeyWrapper('key1')
        blob = self._create_small_blob(BlobType.BlockBlob)

        # Act
        content = blob.download_blob()

        # Assert
        self.assertEqual(b"".join(list(content)), self.bytes)
        

    @record
    def test_get_blob_resolver(self):
        # Arrange
        self.bsc.require_encryption = True
        self.bsc.key_encryption_key = KeyWrapper('key1')
        key_resolver = KeyResolver()
        key_resolver.put_key(self.bsc.key_encryption_key)
        self.bsc.key_resolver_function = key_resolver.resolve_key
        blob = self._create_small_blob(BlobType.BlockBlob)

        # Act
        self.bsc.key_encryption_key = None
        content = blob.download_blob().content_as_bytes()

        # Assert
        self.assertEqual(content, self.bytes)

    def test_get_blob_kek_RSA(self):
        # We can only generate random RSA keys, so this must be run live or 
        # the playback test will fail due to a change in kek values.
        if TestMode.need_recording_file(self.test_mode):
            return 

        # Arrange
        self.bsc.require_encryption = True
        self.bsc.key_encryption_key = RSAKeyWrapper('key2')
        blob = self._create_small_blob(BlobType.BlockBlob)

        # Act
        content = blob.download_blob()

        # Assert
        self.assertEqual(b"".join(list(content)), self.bytes)

    @record
    def test_get_blob_nonmatching_kid(self):
        if TestMode.need_recording_file(self.test_mode):
            return 
        # Arrange
        self.bsc.require_encryption = True
        self.bsc.key_encryption_key = KeyWrapper('key1')
        blob = self._create_small_blob(BlobType.BlockBlob)

        # Act
        self.bsc.key_encryption_key.kid = 'Invalid'

        # Assert
        with self.assertRaises(HttpResponseError) as e:
            blob.download_blob().content_as_bytes()
        self.assertEqual(str(e.exception), 'Decryption failed.')

    @record
    def test_put_blob_invalid_stream_type(self):
        # Arrange
        self.bsc.require_encryption = True
        self.bsc.key_encryption_key = KeyWrapper('key1')
        small_stream = StringIO(u'small')
        large_stream = StringIO(u'large' * self.config.max_single_put_size)
        blob_name = self._get_blob_reference(BlobType.BlockBlob)
        blob = self.bsc.get_blob_client(self.container_name, blob_name)

        # Assert
        # Block blob specific single shot
        with self.assertRaises(TypeError) as e:
            blob.upload_blob(small_stream, length=5)
        self.assertTrue('Blob data should be of type bytes.' in str(e.exception))

        # Generic blob chunked
        with self.assertRaises(TypeError) as e:
            blob.upload_blob(large_stream)
        self.assertTrue('Blob data should be of type bytes.' in str(e.exception))

    def test_put_blob_chunking_required_mult_of_block_size(self):
        # parallel tests introduce random order of requests, can only run live
        if TestMode.need_recording_file(self.test_mode):
            return

        # Arrange
        self.bsc.key_encryption_key = KeyWrapper('key1')
        self.bsc.require_encryption = True
        content = self.get_random_bytes(
            self.config.max_single_put_size + self.config.max_block_size)
        blob_name = self._get_blob_reference(BlobType.BlockBlob)
        blob = self.bsc.get_blob_client(self.container_name, blob_name)

        # Act
        blob.upload_blob(content, max_concurrency=3)
        blob_content = blob.download_blob().content_as_bytes(max_concurrency=3)

        # Assert
        self.assertEqual(content, blob_content)

    def test_put_blob_chunking_required_non_mult_of_block_size(self):
        # parallel tests introduce random order of requests, can only run live
        if TestMode.need_recording_file(self.test_mode):
            return

        # Arrange
        self.bsc.key_encryption_key = KeyWrapper('key1')
        self.bsc.require_encryption = True
        content = urandom(self.config.max_single_put_size + 1)
        blob_name = self._get_blob_reference(BlobType.BlockBlob)
        blob = self.bsc.get_blob_client(self.container_name, blob_name)

        # Act
        blob.upload_blob(content, max_concurrency=3)
        blob_content = blob.download_blob().content_as_bytes(max_concurrency=3)

        # Assert
        self.assertEqual(content, blob_content)

    def test_put_blob_chunking_required_range_specified(self):
        # parallel tests introduce random order of requests, can only run live
        if TestMode.need_recording_file(self.test_mode):
            return

        # Arrange
        self.bsc.key_encryption_key = KeyWrapper('key1')
        self.bsc.require_encryption = True
        content = self.get_random_bytes(self.config.max_single_put_size * 2)
        blob_name = self._get_blob_reference(BlobType.BlockBlob)
        blob = self.bsc.get_blob_client(self.container_name, blob_name)

        # Act
        blob.upload_blob(
            content,
            length=self.config.max_single_put_size + 53,
            max_concurrency=3)
        blob_content = blob.download_blob().content_as_bytes(max_concurrency=3)

        # Assert
        self.assertEqual(content[:self.config.max_single_put_size+53], blob_content)

    @record
    def test_put_block_blob_single_shot(self):
        # Arrange
        self.bsc.key_encryption_key = KeyWrapper('key1')
        self.bsc.require_encryption = True
        content = b'small'
        blob_name = self._get_blob_reference(BlobType.BlockBlob)
        blob = self.bsc.get_blob_client(self.container_name, blob_name)

        # Act
        blob.upload_blob(content)
        blob_content = blob.download_blob().content_as_bytes()

        # Assert
        self.assertEqual(content, blob_content)

    @record
    def test_put_blob_range(self):
        # Arrange
        self.bsc.require_encryption = True
        self.bsc.key_encryption_key = KeyWrapper('key1')
        content = b'Random repeats' * self.config.max_single_put_size * 5

        # All page blob uploads call _upload_chunks, so this will test the ability
        # of that function to handle ranges even though it's a small blob
        blob_name = self._get_blob_reference(BlobType.BlockBlob)
        blob = self.bsc.get_blob_client(self.container_name, blob_name)

        # Act
        blob.upload_blob(
            content[2:],
            length=self.config.max_single_put_size + 5,
            max_concurrency=1)
        blob_content = blob.download_blob().content_as_bytes(max_concurrency=1)

        # Assert
        self.assertEqual(content[2:2 + self.config.max_single_put_size + 5], blob_content)

    @record
    def test_put_blob_empty(self):
        # Arrange
        self.bsc.key_encryption_key = KeyWrapper('key1')
        self.bsc.require_encryption = True
        content = b''
        blob_name = self._get_blob_reference(BlobType.BlockBlob)
        blob = self.bsc.get_blob_client(self.container_name, blob_name)

        # Act
        blob.upload_blob(content)
        blob_content = blob.download_blob().content_as_bytes(max_concurrency=2)

        # Assert
        self.assertEqual(content, blob_content)

    @record
    def test_put_blob_serial_upload_chunking(self):
        # Arrange
        self.bsc.key_encryption_key = KeyWrapper('key1')
        self.bsc.require_encryption = True
        content = self.get_random_bytes(self.config.max_single_put_size + 1)
        blob_name = self._get_blob_reference(BlobType.BlockBlob)
        blob = self.bsc.get_blob_client(self.container_name, blob_name)

        # Act
        blob.upload_blob(content, max_concurrency=1)
        blob_content = blob.download_blob().content_as_bytes(max_concurrency=1)

        # Assert
        self.assertEqual(content, blob_content)

    @record
    def test_get_blob_range_beginning_to_middle(self):
        # Arrange
        self.bsc.key_encryption_key = KeyWrapper('key1')
        self.bsc.require_encryption = True
        content = self.get_random_bytes(128)
        blob_name = self._get_blob_reference(BlobType.BlockBlob)
        blob = self.bsc.get_blob_client(self.container_name, blob_name)

        # Act
        blob.upload_blob(content, max_concurrency=1)
        blob_content = blob.download_blob(offset=0, length=50).content_as_bytes(max_concurrency=1)

        # Assert
        self.assertEqual(content[:50], blob_content)

    @record
    def test_get_blob_range_middle_to_end(self):
        # Arrange
        self.bsc.key_encryption_key = KeyWrapper('key1')
        self.bsc.require_encryption = True
        content = self.get_random_bytes(128)
        blob_name = self._get_blob_reference(BlobType.BlockBlob)
        blob = self.bsc.get_blob_client(self.container_name, blob_name)

        # Act
        blob.upload_blob(content, max_concurrency=1)
        blob_content = blob.download_blob(offset=100, length=28).content_as_bytes()
        blob_content2 = blob.download_blob(offset=100).content_as_bytes()

        # Assert
        self.assertEqual(content[100:], blob_content)
        self.assertEqual(content[100:], blob_content2)

    @record
    def test_get_blob_range_middle_to_middle(self):
        # Arrange
        self.bsc.key_encryption_key = KeyWrapper('key1')
        self.bsc.require_encryption = True
        content = self.get_random_bytes(128)
        blob_name = self._get_blob_reference(BlobType.BlockBlob)
        blob = self.bsc.get_blob_client(self.container_name, blob_name)

        # Act
        blob.upload_blob(content)
        blob_content = blob.download_blob(offset=5, length=93).content_as_bytes()

        # Assert
        self.assertEqual(content[5:98], blob_content)

    @record
    def test_get_blob_range_aligns_on_16_byte_block(self):
        # Arrange
        self.bsc.key_encryption_key = KeyWrapper('key1')
        self.bsc.require_encryption = True
        content = self.get_random_bytes(128)
        blob_name = self._get_blob_reference(BlobType.BlockBlob)
        blob = self.bsc.get_blob_client(self.container_name, blob_name)

        # Act
        blob.upload_blob(content)
        blob_content = blob.download_blob(offset=48, length=16).content_as_bytes()

        # Assert
        self.assertEqual(content[48:64], blob_content)

    @record
    def test_get_blob_range_expanded_to_beginning_block_align(self):
        # Arrange
        self.bsc.key_encryption_key = KeyWrapper('key1')
        self.bsc.require_encryption = True
        content = self.get_random_bytes(128)
        blob_name = self._get_blob_reference(BlobType.BlockBlob)
        blob = self.bsc.get_blob_client(self.container_name, blob_name)

        # Act
        blob.upload_blob(content)
        blob_content = blob.download_blob(offset=5, length=50).content_as_bytes()

        # Assert
        self.assertEqual(content[5:55], blob_content)

    @record
    def test_get_blob_range_expanded_to_beginning_iv(self):
        # Arrange
        self.bsc.key_encryption_key = KeyWrapper('key1')
        self.bsc.require_encryption = True
        content = self.get_random_bytes(128)
        blob_name = self._get_blob_reference(BlobType.BlockBlob)
        blob = self.bsc.get_blob_client(self.container_name, blob_name)

        # Act
        blob.upload_blob(content)
        blob_content = blob.download_blob(offset=22, length=20).content_as_bytes()

        # Assert
        self.assertEqual(content[22:42], blob_content)

    @record
    def test_put_blob_strict_mode(self):
        # Arrange
        self.bsc.require_encryption = True
        content = urandom(512)

        # Assert
        for service in self.blob_types:
            blob_name = self._get_blob_reference(service)
            blob = self.bsc.get_blob_client(self.container_name, blob_name)

            with self.assertRaises(ValueError):
                blob.upload_blob(content, blob_type=service)

            stream = BytesIO(content)
            with self.assertRaises(ValueError):
                blob.upload_blob(stream, length=512, blob_type=service)

            FILE_PATH = 'blob_input.temp.dat'
            with open(FILE_PATH, 'wb') as stream:
                stream.write(content)
            with open(FILE_PATH, 'rb') as stream:
                with self.assertRaises(ValueError):
                    blob.upload_blob(stream, blob_type=service)

            with self.assertRaises(ValueError):
                blob.upload_blob('To encrypt', blob_type=service)

    @record
    def test_get_blob_strict_mode_no_policy(self):
        # Arrange
        self.bsc.require_encryption = True
        self.bsc.key_encryption_key = KeyWrapper('key1')
        blob = self._create_small_blob(BlobType.BlockBlob)

        # Act
        blob.key_encryption_key = None

        # Assert
        with self.assertRaises(ValueError):
            blob.download_blob().content_as_bytes()


    @record
    def test_get_blob_strict_mode_unencrypted_blob(self):
        # Arrange
        blob = self._create_small_blob(BlobType.BlockBlob)

        # Act
        blob.require_encryption = True
        blob.key_encryption_key = KeyWrapper('key1')

        # Assert
        with self.assertRaises(HttpResponseError):
            blob.download_blob().content_as_bytes()

    @record
    def test_invalid_methods_fail_block(self):
        # Arrange
        self.bsc.key_encryption_key = KeyWrapper('key1')
        blob_name = self._get_blob_reference(BlobType.BlockBlob)
        blob = self.bsc.get_blob_client(self.container_name, blob_name)

        # Assert
        with self.assertRaises(ValueError) as e:
            blob.stage_block('block1', urandom(32))
        self.assertEqual(str(e.exception), _ERROR_UNSUPPORTED_METHOD_FOR_ENCRYPTION)

        with self.assertRaises(ValueError) as e:
            blob.commit_block_list(['block1'])
        self.assertEqual(str(e.exception), _ERROR_UNSUPPORTED_METHOD_FOR_ENCRYPTION)

    @record
    def test_invalid_methods_fail_append(self):
        # Arrange
        self.bsc.key_encryption_key = KeyWrapper('key1')
        blob_name = self._get_blob_reference(BlobType.AppendBlob)
        blob = self.bsc.get_blob_client(self.container_name, blob_name)

        # Assert
        with self.assertRaises(ValueError) as e:
            blob.append_block(urandom(32))
        self.assertEqual(str(e.exception), _ERROR_UNSUPPORTED_METHOD_FOR_ENCRYPTION)

        with self.assertRaises(ValueError) as e:
            blob.create_append_blob()
        self.assertEqual(str(e.exception), _ERROR_UNSUPPORTED_METHOD_FOR_ENCRYPTION)

        # All append_from operations funnel into append_from_stream, so testing one is sufficient
        with self.assertRaises(ValueError) as e:
            blob.upload_blob(b'To encrypt', blob_type=BlobType.AppendBlob)
        self.assertEqual(str(e.exception), _ERROR_UNSUPPORTED_METHOD_FOR_ENCRYPTION)

    @record
    def test_invalid_methods_fail_page(self):
        # Arrange
        self.bsc.key_encryption_key = KeyWrapper('key1')
        blob_name = self._get_blob_reference(BlobType.PageBlob)
        blob = self.bsc.get_blob_client(self.container_name, blob_name)

        # Assert
        with self.assertRaises(ValueError) as e:
            blob.upload_page(urandom(512), offset=0, length=512)
        self.assertEqual(str(e.exception), _ERROR_UNSUPPORTED_METHOD_FOR_ENCRYPTION)

        with self.assertRaises(ValueError) as e:
            blob.create_page_blob(512)
        self.assertEqual(str(e.exception), _ERROR_UNSUPPORTED_METHOD_FOR_ENCRYPTION)

    @record
    def test_validate_encryption(self):
        # Arrange
        self.bsc.require_encryption = True
        kek = KeyWrapper('key1')
        self.bsc.key_encryption_key = kek
        blob = self._create_small_blob(BlobType.BlockBlob)

        # Act
        blob.require_encryption = False
        blob.key_encryption_key = None
        content = blob.download_blob()
        data = content.content_as_bytes()

        encryption_data = _dict_to_encryption_data(loads(content.properties.metadata['encryptiondata']))
        iv = encryption_data.content_encryption_IV
        content_encryption_key = _validate_and_unwrap_cek(encryption_data, kek, None)
        cipher = _generate_AES_CBC_cipher(content_encryption_key, iv)
        decryptor = cipher.decryptor()
        unpadder = PKCS7(128).unpadder()

        content = decryptor.update(data) + decryptor.finalize()
        content = unpadder.update(content) + unpadder.finalize()
        
        self.assertEqual(self.bytes, content)

    @record
    def test_create_block_blob_from_star(self):
        self._create_blob_from_star(BlobType.BlockBlob, self.bytes, self.bytes)

        stream = BytesIO(self.bytes)
        self._create_blob_from_star(BlobType.BlockBlob, self.bytes, stream)

        FILE_PATH = 'blob_input.temp.dat'
        with open(FILE_PATH, 'wb') as stream:
            stream.write(self.bytes)
        with open(FILE_PATH, 'rb') as stream:
            self._create_blob_from_star(BlobType.BlockBlob, self.bytes, stream)

        self._create_blob_from_star(BlobType.BlockBlob, b'To encrypt', 'To encrypt')

    @record
    def test_create_page_blob_from_star(self):
        content = self.get_random_bytes(512)
        self._create_blob_from_star(BlobType.PageBlob, content, content)

        stream = BytesIO(content)
        self._create_blob_from_star(BlobType.PageBlob, content, stream, length=512)

        FILE_PATH = 'blob_input.temp.dat'
        with open(FILE_PATH, 'wb') as stream:
            stream.write(content)

        with open(FILE_PATH, 'rb') as stream:
            self._create_blob_from_star(BlobType.PageBlob, content, stream)

    def _create_blob_from_star(self, blob_type, content, data, **kwargs):
        blob_name = self._get_blob_reference(blob_type)
        blob = self.bsc.get_blob_client(self.container_name, blob_name)
        blob.key_encryption_key = KeyWrapper('key1')
        blob.require_encryption = True
        blob.upload_blob(data, blob_type=blob_type, **kwargs)

        blob_content = blob.download_blob().content_as_bytes()
        self.assertEqual(content, blob_content)
        blob.delete_blob()

    @record
    def test_get_blob_to_star(self):
        # Arrange
        self.bsc.require_encryption = True
        self.bsc.key_encryption_key = KeyWrapper('key1')
        blob = self._create_small_blob(BlobType.BlockBlob)

        # Act
        iter_blob = b"".join(list(blob.download_blob()))
        bytes_blob = blob.download_blob().content_as_bytes()
        stream_blob = BytesIO()
        blob.download_blob().download_to_stream(stream_blob)
        stream_blob.seek(0)
        text_blob = blob.download_blob().content_as_text()

        # Assert
        self.assertEqual(self.bytes, iter_blob)
        self.assertEqual(self.bytes, bytes_blob)
        self.assertEqual(self.bytes, stream_blob.read())
        self.assertEqual(self.bytes.decode(), text_blob)
예제 #19
0
        datetime.now().strftime("Time:%H:%M:%S-Azure Connection - OK"))
except:
    logging.info(
        datetime.now().strftime("Time:%H:%M:%S-Azure Connection - FAILED"))

try:
    #Local File
    local_path = (os.path.abspath(r"../Files/"))
    csvfile = os.listdir(local_path)
    local_file_name = "/" + csvfile[0]
    full_path_to_file = (local_path + local_file_name)

    logging.info(datetime.now().strftime("Time:%H:%M:%S-Local File - OK"))
except:
    logging.info(datetime.now().strftime("Time:%H:%M:%S-Local File - ERROR"))

try:
    #Upload Local File to Container
    logging.info(
        datetime.now().strftime("Time:%H:%M:%S-CSV to Azure-Uploading"))
    blob_client = blob_service.get_blob_client(container=container_name,
                                               blob=local_file_name)
    with open(full_path_to_file, "rb") as data:
        blob_client.upload_blob(data, overwrite=True)

    logging.info(
        datetime.now().strftime("Time:%H:%M:%S-CSV to Azure-Finished"))
except:
    logging.info(
        datetime.now().strftime("Time:%H:%M:%S-Upload CSV to Azure - FAILED"))
예제 #20
0
def traverse_and_create_index(dir,
                              sas_url=None,
                              overwrite_files=False,
                              template_fun=create_plain_index,
                              basepath=None):
    '''
    Recursively traverses the local directory *dir* and generates a index
    file for each folder using *template_fun* to generate the HTML output.
    Excludes hidden files.

    Args:
        dir: string, path to directory
        template_fun: function taking three arguments (string, list of string, list of string)
            representing the current root, the list of folders, and the list of files.
            Should return the HTML source of the index file

    Return:
        None
    '''

    print("Traversing {}".format(dir))

    # Make sure we remove the trailing /
    dir = os.path.normpath(dir)

    # If we want to set the content type in blob storage using a SAS URL
    if sas_url:

        # Example: sas_url = 'https://accname.blob.core.windows.net/bname/path/to/folder?st=...&se=...&sp=...&...'
        if '?' in sas_url:
            # 'https://accname.blob.core.windows.net/bname/path/to/folder' and 'st=...&se=...&sp=...&...'
            base_url, sas_token = sas_url.split('?', 1)
        else:
            # 'https://accname.blob.core.windows.net/bname/path/to/folder' and None
            base_url, sas_token = sas_url, None
        # Remove https:// from base url
        # 'accname.blob.core.windows.net/bname/path/to/folder'
        base_url = base_url.split("//", 1)[1]
        # Everything up to the first dot is accout name
        # 'accname'
        account_name = base_url.split(".", 1)[0]
        # get everything after the first /
        # 'bname/path/to/folder'
        query_string = base_url.split("/", 1)[1]
        # Get container name and subfolder
        if '/' in query_string:
            # 'bname', 'path/to/folder'
            container_name, container_folder = query_string.split("/", 1)
        else:
            container_name, container_folder = query_string, ''

        # Prepare the storage access
        target_settings = ContentSettings(content_type='text/html')
        blob_service = BlobServiceClient(
            account_url=f'{account_name}.blob.core.windows.net',
            credential=sas_token)

    # Traverse directory and all sub directories, excluding hidden files
    for root, dirs, files in os.walk(dir):

        # Exclude files and folders that are hidden
        files = [f for f in files if not f[0] == '.']
        dirs[:] = [d for d in dirs if not d[0] == '.']

        # Output is written to file *root*/index.html
        output_file = os.path.join(root, "index.html")

        if not overwrite_files and os.path.isfile(output_file):
            print('Skipping {}, file exists'.format(output_file))
            continue

        print("Generating {}".format(output_file))

        # Generate HTML with template function
        dirname = None
        if basepath is not None:
            dirname = os.path.relpath(root, basepath)
        html = template_fun(root[len(dir):], dirs, files, dirname)

        # Write to file
        with open(output_file, 'wt') as fi:
            fi.write(html)

        # Set content type in blob storage
        if sas_url:
            if container_folder:
                output_blob_path = container_folder + '/' + output_file[
                    len(dir) + 1:]
            else:
                output_blob_path = output_file[len(dir) + 1:]
            try:
                blob_client = blob_service.get_blob_client(
                    container_name, output_blob_path)
                blob_client.set_http_headers(content_settings=target_settings)
            except azure.common.AzureMissingResourceHttpError:
                print(
                    'ERROR: It seems the SAS URL is incorrect or does not allow setting properties.'
                )
                return
class AzureStorage:

    # Initialize the Azure Storage client
    def __init__(self, storage_url, container_name):

        self.account_url = storage_url
        self.container_name = container_name

        # Acquire a credential object for the app identity. When running in the cloud,
        # DefaultAzureCredential uses the app's managed identity or user-assigned service principal.
        # When run locally, DefaultAzureCredential relies on environment variables named
        # AZURE_CLIENT_ID, AZURE_CLIENT_SECRET, and AZURE_TENANT_ID.
        credential = DefaultAzureCredential()

        # Create the BlobServiceClient and connect to the storage container
        try:
            self.blob_service_client = BlobServiceClient(
                account_url=self.account_url, credential=credential)
            self.container_client = self.blob_service_client.get_container_client(
                self.container_name)
        except Exception as e:
            logger.error(e)

    # Upload blob to Azure Storage
    def upload_blob(self, file, subfolder=''):
        if subfolder == '':
            target_blob = os.path.basename(file)
        else:
            target_blob = subfolder + "/" + os.path.basename(file)

        try:
            # Create a blob client using the local file name as the name for the blob
            blob_client = self.blob_service_client.get_blob_client(
                container=self.container_name, blob=(target_blob))
            try:
                # Check if blob already exists
                if blob_client.get_blob_properties()['size'] > 0:
                    logger.warning(
                        f"{target_blob} already exists in the selected path. Skipping upload."
                    )
                    return None
            except ResourceNotFoundError as e:
                # catch exception that indicates that the blob does not exist and we are good to upload file
                pass
            logger.info(f"Uploading {target_blob} to Azure Storage")

            # Upload the file and measure upload time
            elapsed_time = time.time()
            with open(file, "rb") as data:
                blob_client.upload_blob(data)
            elapsed_time = round(time.time() - elapsed_time, 2)
            logger.info(
                f"Upload succeeded after {str(elapsed_time)} seconds for: {target_blob}"
            )

        except Exception as e:
            logger.error(e)
            return None

        blob_url = self.account_url + self.container_name + '/' + target_blob

        return blob_url

    # Download blob from Azure Storage
    def download_blob(self,
                      destination_file,
                      source_file,
                      destination_folder='',
                      source_folder=''):
        # Check if file was included in the Post, if not return warning
        filename = secure_filename(source_file)
        if not filename:
            logger.warning("Must select a file to download first!")
            return None

        if source_folder == '':
            target_blob = filename
        else:
            target_blob = source_folder + '/' + filename

        if destination_folder == '':
            out_file = os.path.join(os.getcwd(), destination_file)
        else:
            out_file = os.path.join(destination_folder, destination_file)

        try:
            # Create a blob client to
            blob_client = self.blob_service_client.get_blob_client(
                container=self.container_name, blob=target_blob)
            try:
                # Attempt download of blob to local storage
                with open(out_file, "wb") as my_blob:
                    blob_data = blob_client.download_blob()
                    blob_data.readinto(my_blob)
            except ResourceNotFoundError as e:
                logger.error(f"Download file failed. {target_blob} not found")
                return None

            logger.info(f"Downloaded {target_blob} to {out_file}")

        except Exception as e:
            logger.error(e)
            return None

        return out_file

    # Delete specified blob
    def delete_blob(self, blob_name):
        if blob_name is None:
            logger.warning("Sent delete request without specified blob name")
        else:
            try:
                blob_client = self.blob_service_client.get_blob_client(
                    container=self.container_name, blob=blob_name)
                logger.info(f"Deleting blob: {blob_name}")
                blob_client.delete_blob(delete_snapshots=False)
            except ResourceNotFoundError:
                logger.warning(
                    f"Sent delete request for: { blob_name } but blob was not found"
                )

    # Return list of blobs in the container
    def list_blobs(self):
        try:
            blob_list = self.container_client.list_blobs()
        except Exception:
            logger.error(
                f"Failed to list Blobs in container {self.container_name}")
            return None

        return blob_list

    # Delete all blobs in the storage container
    def clear_storage(self):
        blob_list = self.list_blobs()
        for blob in blob_list:
            self.delete_blob(blob['name'])
예제 #22
0
class AzureStorageHelper(object):
    def __init__(self, *args, **kwargs):
        if "stay_on_remote" in kwargs:
            del kwargs["stay_on_remote"]

        # if not handed down explicitely, try to read credentials from
        # environment variables.
        for (csavar, envvar) in [
            ("account_url", "AZ_BLOB_ACCOUNT_URL"),
            ("credential", "AZ_BLOB_CREDENTIAL"),
        ]:
            if csavar not in kwargs and envvar in os.environ:
                kwargs[csavar] = os.environ.get(envvar)
        assert (
            "account_url" in kwargs
        ), "Missing AZ_BLOB_ACCOUNT_URL env var (and possibly AZ_BLOB_CREDENTIAL)"
        # remove leading '?' from SAS if needed
        # if kwargs.get("sas_token", "").startswith("?"):
        #    kwargs["sas_token"] = kwargs["sas_token"][1:]

        # by right only account_key or sas_token should be set, but we let
        # BlobServiceClient deal with the ambiguity
        self.blob_service_client = BlobServiceClient(**kwargs)

    def container_exists(self, container_name):
        return any(
            True
            for _ in self.blob_service_client.list_containers(container_name))

    def upload_to_azure_storage(
        self,
        container_name,
        file_path,
        blob_name=None,
        use_relative_path_for_blob_name=True,
        relative_start_dir=None,
        extra_args=None,
    ):
        """ Upload a file to Azure Storage
            This function uploads a file to an Azure Storage Container as a blob.
            Args:
                container_name: the name of the Azure container to use
                file_path: The path to the file to upload.
                blob_name: The name to set for the blob on Azure. If not specified, this will default to the
                    name of the file.
            Returns: The blob_name of the file on Azure if written, None otherwise
        """
        file_path = os.path.realpath(os.path.expanduser(file_path))

        assert container_name, "container_name must be specified"
        assert os.path.exists(file_path), (
            "The file path specified does not exist: %s" % file_path)
        assert os.path.isfile(file_path), (
            "The file path specified does not appear to be a file: %s" %
            file_path)

        container_client = self.blob_service_client.get_container_client(
            container_name)
        try:
            container_client.create_container()
        except azure.core.exceptions.ResourceExistsError:
            pass

        if not blob_name:
            if use_relative_path_for_blob_name:
                if relative_start_dir:
                    path_blob_name = os.path.relpath(file_path,
                                                     relative_start_dir)
                else:
                    path_blob_name = os.path.relpath(file_path)
            else:
                path_blob_name = os.path.basename(file_path)
            blob_name = path_blob_name
        blob_client = container_client.get_blob_client(blob_name)

        # upload_blob fails, if blob exists
        if self.exists_in_container(container_name, blob_name):
            blob_client.delete_blob()
        try:
            with open(file_path, "rb") as data:
                blob_client.upload_blob(data, blob_type="BlockBlob")
            return blob_client.get_blob_properties().name
        except Exception as e:
            raise WorkflowError("Error in creating blob. %s" % str(e))
            # return None

    def download_from_azure_storage(
        self,
        container_name,
        blob_name,
        destination_path=None,
        expandBlobNameIntoDirs=True,
        make_dest_dirs=True,
        create_stub_only=False,
    ):
        """ Download a file from Azure Storage
            This function downloads an object from a specified Azure Storage container.
            Args:
                container_name: the name of the Azure Storage container to use (container name only)
                destination_path: If specified, the file will be saved to this path, otherwise cwd.
                expandBlobNameIntoDirs: Since Azure blob names can include slashes, if this is True (defult)
                    then Azure blob names with slashes are expanded into directories on the receiving end.
                    If it is False, the blob name is passed to os.path.basename() to get the substring
                    following the last slash.
                make_dest_dirs: If this is True (default) and the destination path includes directories
                    that do not exist, they will be created.
            Returns:
                The destination path of the downloaded file on the receiving end, or None if the destination_path
                could not be downloaded
        """
        assert container_name, "container_name must be specified"
        assert blob_name, "blob_name must be specified"
        if destination_path:
            destination_path = os.path.realpath(
                os.path.expanduser(destination_path))
        else:
            if expandBlobNameIntoDirs:
                destination_path = os.path.join(os.getcwd(), blob_name)
            else:
                destination_path = os.path.join(os.getcwd(),
                                                os.path.basename(blob_name))
        # if the destination path does not exist
        if make_dest_dirs:
            os.makedirs(os.path.dirname(destination_path), exist_ok=True)
        b = self.blob_service_client.get_blob_client(container_name, blob_name)
        if not create_stub_only:
            with open(destination_path, "wb") as my_blob:
                blob_data = b.download_blob()
                blob_data.readinto(my_blob)
        else:
            # just create an empty file with the right timestamps
            ts = b.get_blob_properties().last_modified.timestamp()
            with open(destination_path, "wb") as fp:
                os.utime(
                    fp.name,
                    (ts, ts),
                )
        return destination_path

    def delete_from_container(self, container_name, blob_name):
        """ Delete a file from Azure Storage container

            This function deletes an object from a specified Azure Storage container.

            Args:
                container_name: the name of the Azure Storage container to use (container name only, not endpoint)
                blob_name: the name of the blob to delete from the container

            Returns:
                nothing
        """
        assert container_name, "container_name must be specified"
        assert blob_name, "blob_name must be specified"
        b = self.blob_service_client.get_blob_client(container_name, blob_name)
        b.delete_blob()

    def exists_in_container(self, container_name, blob_name):
        """ Returns whether the blob exists in the container

            Args:
                container_name: the name of the Azure Storage container (container name only, not endpoint)
                blob_name: the blob_name of the object to delete from the container

            Returns:
                True | False
        """

        assert (
            container_name
        ), 'container_name must be specified (did you try to write to "root" or forgot to set --default-remote-prefix?)'
        assert blob_name, "blob_name must be specified"
        cc = self.blob_service_client.get_container_client(container_name)
        return any(True for _ in cc.list_blobs(name_starts_with=blob_name))

    def blob_size(self, container_name, blob_name):
        """ Returns the size of a blob

            Args:
                container_name: the name of the Azure Storage container (container name only, not endpoint)
                blob_name: the blob_name of the object to delete from the container

            Returns:
                Size in kb
        """
        assert container_name, "container_name must be specified"
        assert blob_name, "blob_name must be specified"

        b = self.blob_service_client.get_blob_client(container_name, blob_name)
        return b.get_blob_properties().size // 1024

    def blob_last_modified(self, container_name, blob_name):
        """ Returns a timestamp of a blob

            Args:
                container_name: the name of the Azure Storage container (container name only, not endpoint)
                blob_name: the blob_name of the object to delete from the container

            Returns:
                timestamp
        """
        assert container_name, "container_name must be specified"
        assert blob_name, "blob_name must be specified"
        b = self.blob_service_client.get_blob_client(container_name, blob_name)
        return b.get_blob_properties().last_modified.timestamp()

    def list_blobs(self, container_name):
        """ Returns a list of blobs from the container

            Args:
                container_name: the name of the Azure Storage container (container name only, not endpoint)

            Returns:
                list of blobs
        """
        assert container_name, "container_name must be specified"
        c = self.blob_service_client.get_container_client(container_name)
        return [b.name for b in c.list_blobs()]
class StorageBlockBlobTest(StorageTestCase):
    def _setup(self,
               storage_account_name,
               key,
               container_prefix='utcontainer'):
        account_url = self.account_url(storage_account_name, "blob")
        if not isinstance(account_url, str):
            account_url = account_url.encode('utf-8')
            key = key.encode('utf-8')
        self.bsc = BlobServiceClient(account_url,
                                     credential=key,
                                     connection_data_block_size=4 * 1024,
                                     max_single_put_size=32 * 1024,
                                     max_block_size=4 * 1024)
        self.config = self.bsc._config
        self.container_name = self.get_resource_name(container_prefix)

        # create source blob to be copied from
        self.source_blob_name = self.get_resource_name('srcblob')
        self.source_blob_name_with_special_chars = 'भारत¥test/testsubÐirÍ/' + self.get_resource_name(
            'srcÆblob')
        self.source_blob_data = self.get_random_bytes(SOURCE_BLOB_SIZE)
        self.source_blob_with_special_chars_data = self.get_random_bytes(
            SOURCE_BLOB_SIZE)

        blob = self.bsc.get_blob_client(self.container_name,
                                        self.source_blob_name)
        blob_with_special_chars = self.bsc.get_blob_client(
            self.container_name, self.source_blob_name_with_special_chars)

        if self.is_live:
            self.bsc.create_container(self.container_name)
            blob.upload_blob(self.source_blob_data)
            blob_with_special_chars.upload_blob(
                self.source_blob_with_special_chars_data)

        # generate a SAS so that it is accessible with a URL
        sas_token = generate_blob_sas(
            blob.account_name,
            blob.container_name,
            blob.blob_name,
            snapshot=blob.snapshot,
            account_key=blob.credential.account_key,
            permission=BlobSasPermissions(read=True),
            expiry=datetime.utcnow() + timedelta(hours=1),
        )
        # generate a SAS so that it is accessible with a URL
        sas_token_for_special_chars = generate_blob_sas(
            blob_with_special_chars.account_name,
            blob_with_special_chars.container_name,
            blob_with_special_chars.blob_name,
            snapshot=blob_with_special_chars.snapshot,
            account_key=blob_with_special_chars.credential.account_key,
            permission=BlobSasPermissions(read=True),
            expiry=datetime.utcnow() + timedelta(hours=1),
        )
        self.source_blob_url_without_sas = blob.url
        self.source_blob_url = BlobClient.from_blob_url(
            blob.url, credential=sas_token).url
        self.source_blob_url_with_special_chars = BlobClient.from_blob_url(
            blob_with_special_chars.url,
            credential=sas_token_for_special_chars).url

    @BlobPreparer()
    def test_put_block_from_url_with_oauth(self, storage_account_name,
                                           storage_account_key):
        # Arrange
        self._setup(storage_account_name,
                    storage_account_key,
                    container_prefix="container1")
        split = 4 * 1024
        destination_blob_name = self.get_resource_name('destblob')
        destination_blob_client = self.bsc.get_blob_client(
            self.container_name, destination_blob_name)
        token = "Bearer {}".format(self.generate_oauth_token().get_token(
            "https://storage.azure.com/.default").token)

        # Assert this operation fails without a credential
        with self.assertRaises(HttpResponseError):
            destination_blob_client.stage_block_from_url(
                block_id=1,
                source_url=self.source_blob_url_without_sas,
                source_offset=0,
                source_length=split)
        # Assert it passes after passing an oauth credential
        destination_blob_client.stage_block_from_url(
            block_id=1,
            source_url=self.source_blob_url_without_sas,
            source_offset=0,
            source_length=split,
            source_authorization=token)
        destination_blob_client.stage_block_from_url(
            block_id=2,
            source_url=self.source_blob_url_without_sas,
            source_offset=split,
            source_length=split,
            source_authorization=token)

        committed, uncommitted = destination_blob_client.get_block_list('all')
        self.assertEqual(len(uncommitted), 2)
        self.assertEqual(len(committed), 0)

        # Act part 2: commit the blocks
        destination_blob_client.commit_block_list(['1', '2'])

        # Assert destination blob has right content
        destination_blob_data = destination_blob_client.download_blob(
        ).readall()
        self.assertEqual(len(destination_blob_data), 8 * 1024)
        self.assertEqual(destination_blob_data, self.source_blob_data)
        self.assertEqual(self.source_blob_data, destination_blob_data)

    @BlobPreparer()
    def test_put_block_from_url_and_commit(self, storage_account_name,
                                           storage_account_key):
        self._setup(storage_account_name, storage_account_key)
        dest_blob_name = self.get_resource_name('destblob')
        dest_blob = self.bsc.get_blob_client(self.container_name,
                                             dest_blob_name)

        # Act part 1: make put block from url calls
        split = 4 * 1024
        dest_blob.stage_block_from_url(block_id=1,
                                       source_url=self.source_blob_url,
                                       source_offset=0,
                                       source_length=split)
        dest_blob.stage_block_from_url(block_id=2,
                                       source_url=self.source_blob_url,
                                       source_offset=split,
                                       source_length=split)

        # Assert blocks
        committed, uncommitted = dest_blob.get_block_list('all')
        self.assertEqual(len(uncommitted), 2)
        self.assertEqual(len(committed), 0)

        # Act part 2: commit the blocks
        dest_blob.commit_block_list(['1', '2'])

        # Assert destination blob has right content
        content = dest_blob.download_blob().readall()
        self.assertEqual(len(content), 8 * 1024)
        self.assertEqual(content, self.source_blob_data)

        dest_blob.stage_block_from_url(
            block_id=3,
            source_url=self.source_blob_url_with_special_chars,
            source_offset=0,
            source_length=split)
        dest_blob.stage_block_from_url(
            block_id=4,
            source_url=self.source_blob_url_with_special_chars,
            source_offset=split,
            source_length=split)

        # Assert blocks
        committed, uncommitted = dest_blob.get_block_list('all')
        self.assertEqual(len(uncommitted), 2)
        self.assertEqual(len(committed), 2)

        # Act part 2: commit the blocks
        dest_blob.commit_block_list(['3', '4'])

        # Assert destination blob has right content
        content = dest_blob.download_blob().readall()
        self.assertEqual(len(content), 8 * 1024)
        self.assertEqual(content, self.source_blob_with_special_chars_data)

    @BlobPreparer()
    def test_put_block_from_url_and_validate_content_md5(
            self, storage_account_name, storage_account_key):
        self._setup(storage_account_name, storage_account_key)
        dest_blob_name = self.get_resource_name('destblob')
        dest_blob = self.bsc.get_blob_client(self.container_name,
                                             dest_blob_name)
        src_md5 = StorageContentValidation.get_content_md5(
            self.source_blob_data)

        # Act part 1: put block from url with md5 validation
        dest_blob.stage_block_from_url(block_id=1,
                                       source_url=self.source_blob_url,
                                       source_content_md5=src_md5,
                                       source_offset=0,
                                       source_length=8 * 1024)

        # Assert block was staged
        committed, uncommitted = dest_blob.get_block_list('all')
        self.assertEqual(len(uncommitted), 1)
        self.assertEqual(len(committed), 0)

        # Act part 2: put block from url with wrong md5
        fake_md5 = StorageContentValidation.get_content_md5(b"POTATO")
        with self.assertRaises(HttpResponseError) as error:
            dest_blob.stage_block_from_url(block_id=2,
                                           source_url=self.source_blob_url,
                                           source_content_md5=fake_md5,
                                           source_offset=0,
                                           source_length=8 * 1024)
        self.assertEqual(error.exception.error_code,
                         StorageErrorCode.md5_mismatch)

        # Assert block was not staged
        committed, uncommitted = dest_blob.get_block_list('all')
        self.assertEqual(len(uncommitted), 1)
        self.assertEqual(len(committed), 0)

    @BlobPreparer()
    def test_copy_blob_sync(self, storage_account_name, storage_account_key):
        self._setup(storage_account_name, storage_account_key)
        dest_blob_name = self.get_resource_name('destblob')
        dest_blob = self.bsc.get_blob_client(self.container_name,
                                             dest_blob_name)

        # Act
        copy_props = dest_blob.start_copy_from_url(self.source_blob_url,
                                                   requires_sync=True)

        # Assert
        self.assertIsNotNone(copy_props)
        self.assertIsNotNone(copy_props['copy_id'])
        self.assertEqual('success', copy_props['copy_status'])

        # Verify content
        content = dest_blob.download_blob().readall()
        self.assertEqual(self.source_blob_data, content)

        copy_props_with_special_chars = dest_blob.start_copy_from_url(
            self.source_blob_url_with_special_chars, requires_sync=True)

        # Assert
        self.assertIsNotNone(copy_props_with_special_chars)
        self.assertIsNotNone(copy_props_with_special_chars['copy_id'])
        self.assertEqual('success',
                         copy_props_with_special_chars['copy_status'])

        # Verify content
        content = dest_blob.download_blob().readall()
        self.assertEqual(self.source_blob_with_special_chars_data, content)

    @pytest.mark.playback_test_only
    @BlobPreparer()
    def test_sync_copy_blob_returns_vid(self, storage_account_name,
                                        storage_account_key):
        self._setup(storage_account_name, storage_account_key)
        dest_blob_name = self.get_resource_name('destblob')
        dest_blob = self.bsc.get_blob_client(self.container_name,
                                             dest_blob_name)

        # Act
        copy_props = dest_blob.start_copy_from_url(self.source_blob_url,
                                                   requires_sync=True)

        # Assert
        self.assertIsNotNone(copy_props['version_id'])
        self.assertIsNotNone(copy_props)
        self.assertIsNotNone(copy_props['copy_id'])
        self.assertEqual('success', copy_props['copy_status'])

        # Verify content
        content = dest_blob.download_blob().readall()
        self.assertEqual(self.source_blob_data, content)
def main(message: func.ServiceBusMessage):
    # Log the Service Bus Message as plaintext

    message_body = message.get_body().decode("utf-8")

    logging.info('Python ServiceBus topic trigger processed message.')
    logging.info(f'Message Body: {message_body}')

    quarantine_storage_connection_string = os.environ.get(
        'QUARANTINE_STORAGE_CONNECTION_STRING')
    promote_storage_connection_string = os.environ.get(
        'PROMOTE_STORAGE_CONNECTION_STRING')

    promote_mode = get_promote_mode()
    quarantine_mode = get_quarantine_mode()

    message = json.loads(message_body)
    file_url = message['file_url']
    (_, blob_container, blob_name) = parse_blob_information(file_url)

    credential = DefaultAzureCredential(exclude_environment_credential=True)
    source_blob_service_client = BlobServiceClient(
        account_url=get_blob_account_url(file_url), credential=credential)
    blob_url_sas = get_blob_url_sas(source_blob_service_client, file_url)

    protecting_blob_client = source_blob_service_client.get_blob_client(
        blob_container, blob_name)
    existing_metadata = get_existing_metadata(protecting_blob_client)
    existing_tags = get_existing_tags(protecting_blob_client)

    result = message['scanning_result']
    findings = result['Findings']
    logging.info(f'findings: {json.dumps(findings)}')

    operation = 'quarantine' if findings else 'promotion'
    mode = quarantine_mode if findings else promote_mode
    dest_storage_connection_string = quarantine_storage_connection_string if findings else promote_storage_connection_string
    scan_result = 'malicious' if findings else 'no issues found'
    scan_date = time.strftime('%Y/%m/%d %H:%M:%S',
                              time.localtime(message['timestamp']))

    if not dest_storage_connection_string:
        print(f'Skip: No storage connection string specified for {operation}')
        return

    codes = result['Codes']
    code = CODE_EMPTY
    if len(codes) > 0:
        code = CODE_SKIP_MULTIPLE if len(codes) > 1 else codes[0]
    fssTags = {
        'scanned': 'true',
        'scanDate': scan_date,
        'scanResult': scan_result,
        'scanDetailCode': str(code),
        'scanDetailMessage': CODE_MESSAGES.get(code, CODE_MESSAGES[CODE_MISC])
    }
    logging.info(f'FSS tags: {fssTags}')
    metadata = compose_metadata(existing_metadata, fssTags)
    tags = compose_tags(existing_tags, fssTags)

    dest_blob_service_client = BlobServiceClient.from_connection_string(
        dest_storage_connection_string)
    copy_object(
        source_blob_url=blob_url_sas,
        container=blob_container,
        blob_name=blob_name,
        metadata=metadata,
        tags=tags,
        dest_blob_service_client=dest_blob_service_client,
    )

    if mode == 'move':
        protecting_blob_client.delete_blob()

    logging.info(f'File {operation} is successful (mode: {mode})')
예제 #25
0
class StorageLargeBlockBlobTest(StorageTestCase):
    def setUp(self):
        super(StorageLargeBlockBlobTest, self).setUp()

        url = self._get_account_url()
        credential = self._get_shared_key_credential()

        # test chunking functionality by reducing the threshold
        # for chunking and the size of each chunk, otherwise
        # the tests would take too long to execute
        self.bsc = BlobServiceClient(url,
                                     credential=credential,
                                     max_single_put_size=32 * 1024,
                                     max_block_size=2 * 1024 * 1024,
                                     min_large_block_upload_threshold=1 *
                                     1024 * 1024)
        self.config = self.bsc._config
        self.container_name = self.get_resource_name('utcontainer')

        if not self.is_playback():
            self.bsc.create_container(self.container_name)

    def tearDown(self):
        if not self.is_playback():
            try:
                self.bsc.delete_container(self.container_name)
            except:
                pass

        if os.path.isfile(FILE_PATH):
            try:
                os.remove(FILE_PATH)
            except:
                pass

        return super(StorageLargeBlockBlobTest, self).tearDown()

    # --Helpers-----------------------------------------------------------------
    def _get_blob_reference(self):
        return self.get_resource_name(TEST_BLOB_PREFIX)

    def _create_blob(self):
        blob_name = self._get_blob_reference()
        blob = self.bsc.get_blob_client(self.container_name, blob_name)
        blob.upload_blob(b'')
        return blob

    def assertBlobEqual(self, container_name, blob_name, expected_data):
        blob = self.bsc.get_blob_client(container_name, blob_name)
        actual_data = blob.download_blob()
        self.assertEqual(b"".join(list(actual_data.chunks())), expected_data)

    # --Test cases for block blobs --------------------------------------------

    def test_put_block_bytes_large(self):
        if TestMode.need_recording_file(self.test_mode):
            return

        # Arrange
        blob = self._create_blob()

        # Act
        for i in range(5):
            resp = blob.stage_block('block {0}'.format(i).encode('utf-8'),
                                    os.urandom(LARGE_BLOCK_SIZE))
            self.assertIsNone(resp)

            # Assert

    def test_put_block_bytes_large_with_md5(self):
        if TestMode.need_recording_file(self.test_mode):
            return

        # Arrange
        blob = self._create_blob()

        # Act
        for i in range(5):
            resp = blob.stage_block('block {0}'.format(i).encode('utf-8'),
                                    os.urandom(LARGE_BLOCK_SIZE),
                                    validate_content=True)
            self.assertIsNone(resp)

    def test_put_block_stream_large(self):
        if TestMode.need_recording_file(self.test_mode):
            return

        # Arrange
        blob = self._create_blob()

        # Act
        for i in range(5):
            stream = BytesIO(bytearray(LARGE_BLOCK_SIZE))
            resp = resp = blob.stage_block(
                'block {0}'.format(i).encode('utf-8'),
                stream,
                length=LARGE_BLOCK_SIZE)
            self.assertIsNone(resp)

            # Assert

    def test_put_block_stream_large_with_md5(self):
        if TestMode.need_recording_file(self.test_mode):
            return

        # Arrange
        blob = self._create_blob()

        # Act
        for i in range(5):
            stream = BytesIO(bytearray(LARGE_BLOCK_SIZE))
            resp = resp = blob.stage_block(
                'block {0}'.format(i).encode('utf-8'),
                stream,
                length=LARGE_BLOCK_SIZE,
                validate_content=True)
            self.assertIsNone(resp)

        # Assert

    def test_create_large_blob_from_path(self):
        # parallel tests introduce random order of requests, can only run live
        if TestMode.need_recording_file(self.test_mode):
            return

        # Arrange
        blob_name = self._get_blob_reference()
        blob = self.bsc.get_blob_client(self.container_name, blob_name)
        data = bytearray(os.urandom(LARGE_BLOB_SIZE))
        with open(FILE_PATH, 'wb') as stream:
            stream.write(data)

        # Act
        with open(FILE_PATH, 'rb') as stream:
            blob.upload_blob(stream, max_concurrency=2)

        # Assert
        self.assertBlobEqual(self.container_name, blob_name, data)

    def test_create_large_blob_from_path_with_md5(self):
        # parallel tests introduce random order of requests, can only run live
        if TestMode.need_recording_file(self.test_mode):
            return

        # Arrange
        blob_name = self._get_blob_reference()
        blob = self.bsc.get_blob_client(self.container_name, blob_name)
        data = bytearray(os.urandom(LARGE_BLOB_SIZE))
        with open(FILE_PATH, 'wb') as stream:
            stream.write(data)

        # Act
        with open(FILE_PATH, 'rb') as stream:
            blob.upload_blob(stream, validate_content=True, max_concurrency=2)

        # Assert
        self.assertBlobEqual(self.container_name, blob_name, data)

    def test_create_large_blob_from_path_non_parallel(self):
        if TestMode.need_recording_file(self.test_mode):
            return

        # Arrange
        blob_name = self._get_blob_reference()
        blob = self.bsc.get_blob_client(self.container_name, blob_name)
        data = bytearray(self.get_random_bytes(100))
        with open(FILE_PATH, 'wb') as stream:
            stream.write(data)

        # Act
        with open(FILE_PATH, 'rb') as stream:
            blob.upload_blob(stream, max_concurrency=1)

        # Assert
        self.assertBlobEqual(self.container_name, blob_name, data)

    def test_create_large_blob_from_path_with_progress(self):
        # parallel tests introduce random order of requests, can only run live
        if TestMode.need_recording_file(self.test_mode):
            return

        # Arrange
        blob_name = self._get_blob_reference()
        blob = self.bsc.get_blob_client(self.container_name, blob_name)
        data = bytearray(os.urandom(LARGE_BLOB_SIZE))
        with open(FILE_PATH, 'wb') as stream:
            stream.write(data)

        # Act
        progress = []

        def callback(response):
            current = response.context['upload_stream_current']
            total = response.context['data_stream_total']
            if current is not None:
                progress.append((current, total))

        with open(FILE_PATH, 'rb') as stream:
            blob.upload_blob(stream,
                             max_concurrency=2,
                             raw_response_hook=callback)

        # Assert
        self.assertBlobEqual(self.container_name, blob_name, data)
        self.assert_upload_progress(len(data), self.config.max_block_size,
                                    progress)

    def test_create_large_blob_from_path_with_properties(self):
        # parallel tests introduce random order of requests, can only run live
        if TestMode.need_recording_file(self.test_mode):
            return

        # Arrange
        blob_name = self._get_blob_reference()
        blob = self.bsc.get_blob_client(self.container_name, blob_name)
        data = bytearray(os.urandom(LARGE_BLOB_SIZE))
        with open(FILE_PATH, 'wb') as stream:
            stream.write(data)

        # Act
        content_settings = ContentSettings(content_type='image/png',
                                           content_language='spanish')
        with open(FILE_PATH, 'rb') as stream:
            blob.upload_blob(stream,
                             content_settings=content_settings,
                             max_concurrency=2)

        # Assert
        self.assertBlobEqual(self.container_name, blob_name, data)
        properties = blob.get_blob_properties()
        self.assertEqual(properties.content_settings.content_type,
                         content_settings.content_type)
        self.assertEqual(properties.content_settings.content_language,
                         content_settings.content_language)

    def test_create_large_blob_from_stream_chunked_upload(self):
        # parallel tests introduce random order of requests, can only run live
        if TestMode.need_recording_file(self.test_mode):
            return

        # Arrange
        blob_name = self._get_blob_reference()
        blob = self.bsc.get_blob_client(self.container_name, blob_name)
        data = bytearray(os.urandom(LARGE_BLOB_SIZE))
        with open(FILE_PATH, 'wb') as stream:
            stream.write(data)

        # Act
        with open(FILE_PATH, 'rb') as stream:
            blob.upload_blob(stream, max_concurrency=2)

        # Assert
        self.assertBlobEqual(self.container_name, blob_name, data)

    def test_create_large_blob_from_stream_with_progress_chunked_upload(self):
        # parallel tests introduce random order of requests, can only run live
        if TestMode.need_recording_file(self.test_mode):
            return

        # Arrange
        blob_name = self._get_blob_reference()
        blob = self.bsc.get_blob_client(self.container_name, blob_name)
        data = bytearray(os.urandom(LARGE_BLOB_SIZE))
        with open(FILE_PATH, 'wb') as stream:
            stream.write(data)

        # Act
        progress = []

        def callback(response):
            current = response.context['upload_stream_current']
            total = response.context['data_stream_total']
            if current is not None:
                progress.append((current, total))

        with open(FILE_PATH, 'rb') as stream:
            blob.upload_blob(stream,
                             max_concurrency=2,
                             raw_response_hook=callback)

        # Assert
        self.assertBlobEqual(self.container_name, blob_name, data)
        self.assert_upload_progress(len(data), self.config.max_block_size,
                                    progress)

    def test_create_large_blob_from_stream_chunked_upload_with_count(self):
        # parallel tests introduce random order of requests, can only run live
        if TestMode.need_recording_file(self.test_mode):
            return

        # Arrange
        blob_name = self._get_blob_reference()
        blob = self.bsc.get_blob_client(self.container_name, blob_name)
        data = bytearray(os.urandom(LARGE_BLOB_SIZE))
        with open(FILE_PATH, 'wb') as stream:
            stream.write(data)

        # Act
        blob_size = len(data) - 301
        with open(FILE_PATH, 'rb') as stream:
            blob.upload_blob(stream, length=blob_size, max_concurrency=2)

        # Assert
        self.assertBlobEqual(self.container_name, blob_name, data[:blob_size])

    def test_create_large_blob_from_stream_chunked_upload_with_count_and_properties(
            self):
        # parallel tests introduce random order of requests, can only run live
        if TestMode.need_recording_file(self.test_mode):
            return

        # Arrange
        blob_name = self._get_blob_reference()
        blob = self.bsc.get_blob_client(self.container_name, blob_name)
        data = bytearray(os.urandom(LARGE_BLOB_SIZE))
        with open(FILE_PATH, 'wb') as stream:
            stream.write(data)

        # Act
        content_settings = ContentSettings(content_type='image/png',
                                           content_language='spanish')
        blob_size = len(data) - 301
        with open(FILE_PATH, 'rb') as stream:
            blob.upload_blob(stream,
                             length=blob_size,
                             content_settings=content_settings,
                             max_concurrency=2)

        # Assert
        self.assertBlobEqual(self.container_name, blob_name, data[:blob_size])
        properties = blob.get_blob_properties()
        self.assertEqual(properties.content_settings.content_type,
                         content_settings.content_type)
        self.assertEqual(properties.content_settings.content_language,
                         content_settings.content_language)

    def test_create_large_blob_from_stream_chunked_upload_with_properties(
            self):
        # parallel tests introduce random order of requests, can only run live
        if TestMode.need_recording_file(self.test_mode):
            return

        # Arrange
        blob_name = self._get_blob_reference()
        blob = self.bsc.get_blob_client(self.container_name, blob_name)
        data = bytearray(os.urandom(LARGE_BLOB_SIZE))
        with open(FILE_PATH, 'wb') as stream:
            stream.write(data)

        # Act
        content_settings = ContentSettings(content_type='image/png',
                                           content_language='spanish')
        with open(FILE_PATH, 'rb') as stream:
            blob.upload_blob(stream,
                             content_settings=content_settings,
                             max_concurrency=2)

        # Assert
        self.assertBlobEqual(self.container_name, blob_name, data)
        properties = blob.get_blob_properties()
        self.assertEqual(properties.content_settings.content_type,
                         content_settings.content_type)
        self.assertEqual(properties.content_settings.content_language,
                         content_settings.content_language)
예제 #26
0
    def create_blob_client(self, BlobServiceClient, Containername, Blobname):
        """Create a blob client using Blob service client"""

        client = BlobServiceClient.get_blob_client(container=Containername,
                                                   blob=Blobname)
        return client
class BlobStorageAccountTest(StorageTestCase):
    def setUp(self):
        super(BlobStorageAccountTest, self).setUp()

        url = self._get_account_url()
        credential = self._get_shared_key_credential()
        self.bsc = BlobServiceClient(url, credential=credential)
        self.container_name = self.get_resource_name('utcontainer')

        if not self.is_playback():
            self.bsc.create_container(self.container_name)

    def tearDown(self):
        if not self.is_playback():
            try:
                self.bsc.delete_container(self.container_name)
            except:
                pass

        return super(BlobStorageAccountTest, self).tearDown()

    # --Helpers-----------------------------------------------------------------
    def _get_blob_reference(self):
        blob_name = self.get_resource_name(TEST_BLOB_PREFIX)
        return self.bsc.get_blob_client(self.container_name, blob_name)

    def _create_blob(self):
        blob = self._get_blob_reference()
        blob.upload_blob(b'')
        return blob

    def assertBlobEqual(self, container_name, blob_name, expected_data):
        blob = self.bsc.get_blob_client(container_name, blob_name)
        actual_data = blob.download_blob().content_as_bytes()
        self.assertEqual(actual_data, expected_data)

    # --Tests specific to Blob Storage Accounts (not general purpose)------------

    @record
    def test_standard_blob_tier_set_tier_api(self):
        container = self.bsc.get_container_client(self.container_name)
        tiers = [
            StandardBlobTier.Archive, StandardBlobTier.Cool,
            StandardBlobTier.Hot
        ]

        for tier in tiers:
            blob = self._get_blob_reference()
            data = b'hello world'
            blob.upload_blob(data)

            blob_ref = blob.get_blob_properties()
            self.assertIsNotNone(blob_ref.blob_tier)
            self.assertTrue(blob_ref.blob_tier_inferred)
            self.assertIsNone(blob_ref.blob_tier_change_time)

            blobs = list(container.list_blobs())

            # Assert
            self.assertIsNotNone(blobs)
            self.assertGreaterEqual(len(blobs), 1)
            self.assertIsNotNone(blobs[0])
            self.assertNamedItemInContainer(blobs, blob.blob_name)
            self.assertIsNotNone(blobs[0].blob_tier)
            self.assertTrue(blobs[0].blob_tier_inferred)
            self.assertIsNone(blobs[0].blob_tier_change_time)

            blob.set_standard_blob_tier(tier)

            blob_ref2 = blob.get_blob_properties()
            self.assertEqual(tier, blob_ref2.blob_tier)
            self.assertFalse(blob_ref2.blob_tier_inferred)
            self.assertIsNotNone(blob_ref2.blob_tier_change_time)

            blobs = list(container.list_blobs())

            # Assert
            self.assertIsNotNone(blobs)
            self.assertGreaterEqual(len(blobs), 1)
            self.assertIsNotNone(blobs[0])
            self.assertNamedItemInContainer(blobs, blob.blob_name)
            self.assertEqual(blobs[0].blob_tier, tier)
            self.assertFalse(blobs[0].blob_tier_inferred)
            self.assertIsNotNone(blobs[0].blob_tier_change_time)

            blob.delete_blob()

    @record
    def test_rehydration_status(self):
        blob_name = 'rehydration_test_blob_1'
        blob_name2 = 'rehydration_test_blob_2'
        container = self.bsc.get_container_client(self.container_name)

        data = b'hello world'
        blob = container.upload_blob(blob_name, data)
        blob.set_standard_blob_tier(StandardBlobTier.Archive)
        blob.set_standard_blob_tier(StandardBlobTier.Cool)

        blob_ref = blob.get_blob_properties()
        self.assertEqual(StandardBlobTier.Archive, blob_ref.blob_tier)
        self.assertEqual("rehydrate-pending-to-cool", blob_ref.archive_status)
        self.assertFalse(blob_ref.blob_tier_inferred)

        blobs = list(container.list_blobs())
        blob.delete_blob()

        # Assert
        self.assertIsNotNone(blobs)
        self.assertGreaterEqual(len(blobs), 1)
        self.assertIsNotNone(blobs[0])
        self.assertNamedItemInContainer(blobs, blob.blob_name)
        self.assertEqual(StandardBlobTier.Archive, blobs[0].blob_tier)
        self.assertEqual("rehydrate-pending-to-cool", blobs[0].archive_status)
        self.assertFalse(blobs[0].blob_tier_inferred)

        blob2 = container.upload_blob(blob_name2, data)
        blob2.set_standard_blob_tier(StandardBlobTier.Archive)
        blob2.set_standard_blob_tier(StandardBlobTier.Hot)

        blob_ref2 = blob2.get_blob_properties()
        self.assertEqual(StandardBlobTier.Archive, blob_ref2.blob_tier)
        self.assertEqual("rehydrate-pending-to-hot", blob_ref2.archive_status)
        self.assertFalse(blob_ref2.blob_tier_inferred)

        blobs = list(container.list_blobs())

        # Assert
        self.assertIsNotNone(blobs)
        self.assertGreaterEqual(len(blobs), 1)
        self.assertIsNotNone(blobs[0])
        self.assertNamedItemInContainer(blobs, blob2.blob_name)
        self.assertEqual(StandardBlobTier.Archive, blobs[0].blob_tier)
        self.assertEqual("rehydrate-pending-to-hot", blobs[0].archive_status)
        self.assertFalse(blobs[0].blob_tier_inferred)
예제 #28
0
    def test_blob_tier_copy_blob(self):
        url = self._get_premium_account_url()
        credential = self._get_premium_shared_key_credential()
        pbs = BlobServiceClient(url, credential=credential)

        try:
            container_name = self.get_resource_name('utpremiumcontainer')
            container = pbs.get_container_client(container_name)

            if not self.is_playback():
                try:
                    container.create_container()
                except ResourceExistsError:
                    pass

            # Arrange
            source_blob = pbs.get_blob_client(
                container_name,
                self.get_resource_name(TEST_BLOB_PREFIX))
            source_blob.create_page_blob(1024, premium_page_blob_tier=PremiumPageBlobTier.P10)

            # Act
            source_blob_url = '{0}/{1}/{2}'.format(
                self._get_premium_account_url(), container_name, source_blob.blob_name)

            copy_blob = pbs.get_blob_client(container_name, 'blob1copy')
            copy = copy_blob.start_copy_from_url(source_blob_url, premium_page_blob_tier=PremiumPageBlobTier.P30)

            # Assert
            self.assertIsNotNone(copy)
            self.assertEqual(copy['copy_status'], 'success')
            self.assertIsNotNone(copy['copy_id'])

            copy_ref = copy_blob.get_blob_properties()
            self.assertEqual(copy_ref.blob_tier, PremiumPageBlobTier.P30)

            source_blob2 = pbs.get_blob_client(
               container_name,
               self.get_resource_name(TEST_BLOB_PREFIX))

            source_blob2.create_page_blob(1024)
            source_blob2_url = '{0}/{1}/{2}'.format(
                self._get_premium_account_url(), source_blob2.container_name, source_blob2.blob_name)

            copy_blob2 = pbs.get_blob_client(container_name, 'blob2copy')
            copy2 = copy_blob2.start_copy_from_url(source_blob2_url, premium_page_blob_tier=PremiumPageBlobTier.P60)
            self.assertIsNotNone(copy2)
            self.assertEqual(copy2['copy_status'], 'success')
            self.assertIsNotNone(copy2['copy_id'])

            copy_ref2 = copy_blob2.get_blob_properties()
            self.assertEqual(copy_ref2.blob_tier, PremiumPageBlobTier.P60)
            self.assertFalse(copy_ref2.blob_tier_inferred)

            copy_blob3 = pbs.get_blob_client(container_name, 'blob3copy')
            copy3 = copy_blob3.start_copy_from_url(source_blob2_url)
            self.assertIsNotNone(copy3)
            self.assertEqual(copy3['copy_status'], 'success')
            self.assertIsNotNone(copy3['copy_id'])

            copy_ref3 = copy_blob3.get_blob_properties()
            self.assertEqual(copy_ref3.blob_tier, PremiumPageBlobTier.P10)
            self.assertTrue(copy_ref3.blob_tier_inferred)
        finally:
            container.delete_container()
예제 #29
0
class AzureStorageClient(object):
    """Connects to an Azure Blob Storage service account."""

    def __init__(
        self,
        container: str,
        connection_string: Optional[str] = None,
        account_url: Optional[str] = None,
        credential: Optional[str] = None,
    ) -> None:
        if connection_string:
            self.client = BlobServiceClient.from_connection_string(connection_string)
        elif account_url:
            self.client = BlobServiceClient(account_url, credential)

        logging.info("Trying to create Azure Blob Storage Container: {}.".format(container))
        try:
            self.client.create_container(container.split("/")[0])
            logging.info("Successfully created container {}.".format(container))
        except ResourceExistsError:
            logging.info(
                "Container {} already exists, and will be used to store checkpoints.".format(
                    container
                )
            )
        except HttpResponseError as e:
            if e.error_code == StorageErrorCode.invalid_uri:  # type: ignore
                logging.warning(
                    (
                        "The storage client raised the following HttpResponseError:\n{}\nPlease "
                        "ignore this warning if this is because the account url provided points "
                        "to a container instead of a storage account; otherwise, it may be "
                        "necessary to fix your config.yaml."
                    ).format(e)
                )
            else:
                logging.error("Failed while trying to create container {}.".format(container))
                raise e

    @util.preserve_random_state
    def put(self, container_name: str, blob_name: str, filename: Union[str, Path]) -> None:
        """Upload a file to the specified blob in the specified container."""
        with open(filename, "rb") as file:
            self.client.get_blob_client(container_name, blob_name).upload_blob(file)

    @util.preserve_random_state
    def get(self, container_name: str, blob_name: str, filename: str) -> None:
        """Download the specified blob in the specified container to a file."""
        with open(filename, "wb") as file:
            stream = self.client.get_blob_client(container_name, blob_name).download_blob()
            stream.readinto(file)

    @util.preserve_random_state
    def delete_files(self, container_name: str, files: List[str]) -> None:
        """Deletes the specified files from the specified container."""
        for file in files:
            self.client.get_blob_client(container_name, file).delete_blob()

    @util.preserve_random_state
    def list_files(
        self, container_name: str, file_prefix: Optional[Union[str, Path]] = None
    ) -> List[str]:
        """Lists files within the specified container that have the specified file prefix.
        Lists all files if file_prefix is None.
        """
        container = self.client.get_container_client(container_name)
        files = [blob["name"] for blob in container.list_blobs(name_starts_with=file_prefix)]
        return files