def test_batch_insert_merge(self): # Arrange # Act entity = Entity() entity.PartitionKey = '001' entity.RowKey = 'batch_insert_merge' entity.test = EntityProperty(EdmType.BOOLEAN, 'true') entity.test2 = 'value' entity.test3 = 3 entity.test4 = EntityProperty(EdmType.INT64, '1234567890') entity.test5 = datetime.utcnow() batch = TableBatch() batch.insert_or_merge_entity(entity) resp = self.ts.commit_batch(self.table_name, batch) # Assert self.assertIsNotNone(resp) entity = self.ts.get_entity(self.table_name, '001', 'batch_insert_merge') self.assertIsNotNone(entity) self.assertEqual('value', entity.test2) self.assertEqual(1234567890, entity.test4) self.assertEqual(resp[0], entity.etag)
def batch(self): table_name = self._create_table() entity = Entity() entity.PartitionKey = 'batch' entity.test = True # All operations in the same batch must have the same partition key but different row keys # Batches can hold from 1 to 100 entities # Batches are atomic. All operations completed simulatenously. If one operation fails, they all fail. # Insert, update, merge, insert or merge, insert or replace, and delete entity operations are supported # Context manager style with self.service.batch(table_name) as batch: for i in range(0, 5): entity.RowKey = 'context_{}'.format(i) batch.insert_entity(entity) # Commit style batch = TableBatch() for i in range(0, 5): entity.RowKey = 'commit_{}'.format(i) batch.insert_entity(entity) self.service.commit_batch(table_name, batch) self.service.delete_table(table_name)
def test_query_entities_large(self): # Arrange table_name = self._create_query_table(0) total_entities_count = 1000 entities_per_batch = 50 for j in range(total_entities_count // entities_per_batch): batch = TableBatch() for i in range(entities_per_batch): entity = Entity() entity.PartitionKey = 'large' entity.RowKey = 'batch{0}-item{1}'.format(j, i) entity.test = EntityProperty(EdmType.BOOLEAN, 'true') entity.test2 = 'hello world;' * 100 entity.test3 = 3 entity.test4 = EntityProperty(EdmType.INT64, '1234567890') entity.test5 = datetime(2016, 12, 31, 11, 59, 59, 0) batch.insert_entity(entity) self.ts.commit_batch(table_name, batch) # Act start_time = datetime.now() entities = list(self.ts.query_entities(table_name)) elapsed_time = datetime.now() - start_time # Assert print('query_entities took {0} secs.'.format( elapsed_time.total_seconds())) # azure allocates 5 seconds to execute a query # if it runs slowly, it will return fewer results and make the test fail self.assertEqual(len(entities), total_entities_count)
def test_batch_update_if_doesnt_match(self): # Arrange entity = self._create_default_entity_dict() self.ts.insert_entity(self.table_name, entity) # Act sent_entity1 = self._create_updated_entity_dict( entity['PartitionKey'], entity['RowKey']) batch = TableBatch() batch.update_entity( sent_entity1, if_match=u'W/"datetime\'2012-06-15T22%3A51%3A44.9662825Z\'"') try: self.ts.commit_batch(self.table_name, batch) except AzureBatchOperationError as error: self.assertEqual(error.code, 'UpdateConditionNotSatisfied') self.assertTrue( 'The update condition specified in the request was not satisfied.' in str(error)) else: self.fail('AzureBatchOperationError was expected') # Assert received_entity = self.ts.get_entity(self.table_name, entity['PartitionKey'], entity['RowKey']) self._assert_default_entity(received_entity)
def test_batch_strict_mode(self): # Arrange self.ts.require_encryption = True entity = self._create_default_entity_for_encryption() # Act batch = TableBatch(require_encryption=True) # Assert with self.assertRaises(ValueError): batch.insert_entity(entity)
def _process_batch(self, entity_list): """ Processes the jobs in sets of batches of max_batch_size """ for segment in [ entity_list[i * self.max_batch_size:(i + 1) * self.max_batch_size] for i in range((len(entity_list) + self.max_batch_size - 1) // self.max_batch_size) ]: batch = TableBatch() for entity in segment: batch.insert_or_replace_entity(entity) logging.info('Committing batch size %i', len(segment)) self.table_service.commit_batch(self.table_name, batch)
def test_batch_too_many_ops(self): # Arrange entity = self._create_default_entity_dict('001', 'batch_negative_1') self.ts.insert_entity(self.table_name, entity) # Act with self.assertRaises(AzureBatchValidationError): batch = TableBatch() for i in range(0, 101): entity = Entity() entity.PartitionKey = 'large' entity.RowKey = 'item{0}'.format(i) batch.insert_entity(entity) self.ts.commit_batch(self.table_name, batch)
def test_batch_update_if_match(self): # Arrange entity = self._create_default_entity_dict() etag = self.ts.insert_entity(self.table_name, entity) # Act sent_entity = self._create_updated_entity_dict(entity['PartitionKey'], entity['RowKey']) batch = TableBatch() batch.update_entity(sent_entity, etag) resp = self.ts.commit_batch(self.table_name, batch) # Assert self.assertIsNotNone(resp) received_entity = self.ts.get_entity(self.table_name, entity['PartitionKey'], entity['RowKey']) self._assert_updated_entity(received_entity) self.assertEqual(resp[0], received_entity.etag)
def batch_encrypted_entities(self): table_name = self._create_table() entity1 = self._create_entity_for_encryption() entity2 = self._create_entity_for_encryption() entity2['PartitionKey'] = entity1['PartitionKey'] # Batches will encrypt the entities at the time of inserting into the batch, not # committing the batch to the service, so the encryption policy must be # passed in at the time of batch creation. kek = KeyWrapper('key1') batch = TableBatch(require_encryption=True, key_encryption_key=kek) batch.insert_entity(entity1) batch.insert_entity(entity2) self.service.commit_batch(table_name, batch) # When using the batch as a context manager, the tableservice object will # automatically apply its encryption policy to the batch. entity3 = self._create_entity_for_encryption() entity4 = self._create_entity_for_encryption() entity4['PartitionKey'] = entity3['PartitionKey'] self.service.key_encryption_key = KeyWrapper('key1') with self.service.batch(table_name) as batch: batch.insert_entity(entity3) batch.insert_entity(entity4) # Note that batches follow all the same client-side-encryption behavior as # the corresponding individual table operations. self.service.delete_table(table_name)
def test_batch_inserts(self): # Arrange # Act entity = Entity() entity.PartitionKey = 'batch_inserts' entity.test = EntityProperty(EdmType.BOOLEAN, 'true') entity.test2 = 'value' entity.test3 = 3 entity.test4 = EntityProperty(EdmType.INT64, '1234567890') batch = TableBatch() for i in range(100): entity.RowKey = str(i) batch.insert_entity(entity) self.ts.commit_batch(self.table_name, batch) entities = list(self.ts.query_entities(self.table_name, "PartitionKey eq 'batch_inserts'", '')) # Assert self.assertIsNotNone(entities) self.assertEqual(100, len(entities))
def put(runData, testData): azureTable = getTableConnection() batch = TableBatch() entity = {} # Add the run data to the batch. for key, value in runData.items(): if key == 'partitionkey': entity['PartitionKey'] = value elif key == 'rowkey': entity['RowKey'] = '{0}_{1}'.format(value, 0) else: entity[key] = str(value) batch.insert_entity(entity) # Add the test data to the batch. rowNo = 1 for testName, testResults in testData.items(): entity = {} entity['PartitionKey'] = runData['partitionkey'] entity['RowKey'] = '{0}_{1}'.format(runData['rowkey'], rowNo) entity['test_name'] = testName for metric, value in testResults.items(): entity[metric] = str(value) # Azure Table only allows batches of 100 entities. if rowNo % 100 == 0: azureTable.commitBatch(batch) batch = TableBatch() batch.insert_entity(entity) rowNo += 1 # Commit any leftovers in the batch. azureTable.commitBatch(batch) print '======================================================================' print 'Benchmark data successfully saved to Azure Table Storage' print '======================================================================' print 'PartitionKey: {0}'.format(runData['partitionkey']) print 'RowKey: {0}'.format(runData['rowkey']) print '# of records inserted: {0}'.format(rowNo)
def test_batch_delete(self): # Arrange # Act entity = Entity() entity.PartitionKey = '001' entity.RowKey = 'batch_delete' entity.test = EntityProperty(EdmType.BOOLEAN, 'true') entity.test2 = 'value' entity.test3 = 3 entity.test4 = EntityProperty(EdmType.INT64, '1234567890') entity.test5 = datetime.utcnow() self.ts.insert_entity(self.table_name, entity) entity = self.ts.get_entity(self.table_name, '001', 'batch_delete') self.assertEqual(3, entity.test3) batch = TableBatch() batch.delete_entity('001', 'batch_delete') resp = self.ts.commit_batch(self.table_name, batch) # Assert self.assertIsNotNone(resp) self.assertIsNone(resp[0])
def test_invalid_encryption_operations_fail_batch(self): # Arrange entity = self._create_default_entity_for_encryption() self.ts.key_encryption_key = KeyWrapper('key1') self.ts.insert_entity(self.table_name, entity) # Act batch = TableBatch(require_encryption=True, key_encryption_key=self.ts.key_encryption_key) # Assert with self.assertRaises(ValueError): batch.merge_entity(entity) with self.assertRaises(ValueError): batch.insert_or_merge_entity(entity)
def test_batch_different_partition_operations_fail(self): # Arrange entity = self._create_default_entity_dict('001', 'batch_negative_1') self.ts.insert_entity(self.table_name, entity) # Act with self.assertRaises(AzureBatchValidationError): batch = TableBatch() entity = self._create_updated_entity_dict('001', 'batch_negative_1') batch.update_entity(entity) entity = self._create_default_entity_dict('002', 'batch_negative_1') batch.insert_entity(entity)
def wait_for_tasks_to_complete( table_service, batch_client, entity_pk, entity_rk, job_id): """ Returns when all tasks in the specified job reach the Completed state. """ while True: entity = table_service.get_entity( 'SearchEntity', entity_pk, entity_rk) tasks = [task for task in batch_client.task.list(job_id) if task.id != "JobManager"] incomplete_tasks = [task for task in tasks if task.state != batchmodels.TaskState.completed] complete_tasks = [task for task in tasks if task.state == batchmodels.TaskState.completed] failed_tasks = [task for task in complete_tasks if task.execution_info.exit_code != 0 or task.execution_info.result is batchmodels.TaskExecutionResult.failure] queries = table_service.query_entities( 'SearchQueryEntity', filter="PartitionKey eq '{}'".format(entity.RowKey)) current_batch_count = 0 updateBatch = TableBatch() for task in tasks: matching_queries = [q for q in queries if q.RowKey == task.id] if not matching_queries: print('Could not find query {}'.format(task.id)) continue query = matching_queries[0] update = False state = get_query_state(task) if query._State != state: query._State = state update = True if task.state == batchmodels.TaskState.running: if not hasattr(query, 'StartTime'): query.StartTime = task.execution_info.start_time update = True if task.state == batchmodels.TaskState.completed: if not hasattr(query, 'EndTime'): query.EndTime = task.execution_info.end_time update = True if update: updateBatch.update_entity(query) current_batch_count += 1 if current_batch_count == 99: table_service.commit_batch('SearchQueryEntity', updateBatch) current_batch_count = 0 updateBatch = TableBatch() if current_batch_count > 0: table_service.commit_batch('SearchQueryEntity', updateBatch) all_tasks_complete = not incomplete_tasks any_failures = len(failed_tasks) > 0 entity.CompletedTasks = len(complete_tasks) entity._State = get_search_state(all_tasks_complete, any_failures) if not incomplete_tasks: entity.EndTime = datetime.datetime.utcnow() table_service.update_entity('SearchEntity', entity) return else: table_service.update_entity('SearchEntity', entity) time.sleep(5)
def test_batch_reuse(self): # Arrange table2 = self._get_table_reference('table2') self.ts.create_table(table2) # Act entity = Entity() entity.PartitionKey = '003' entity.RowKey = 'batch_all_operations_together-1' entity.test = EntityProperty(EdmType.BOOLEAN, 'true') entity.test2 = 'value' entity.test3 = 3 entity.test4 = EntityProperty(EdmType.INT64, '1234567890') entity.test5 = datetime.utcnow() batch = TableBatch() batch.insert_entity(entity) entity.RowKey = 'batch_all_operations_together-2' batch.insert_entity(entity) entity.RowKey = 'batch_all_operations_together-3' batch.insert_entity(entity) entity.RowKey = 'batch_all_operations_together-4' batch.insert_entity(entity) self.ts.commit_batch(self.table_name, batch) self.ts.commit_batch(table2, batch) batch = TableBatch() entity.RowKey = 'batch_all_operations_together' batch.insert_entity(entity) entity.RowKey = 'batch_all_operations_together-1' batch.delete_entity(entity.PartitionKey, entity.RowKey) entity.RowKey = 'batch_all_operations_together-2' entity.test3 = 10 batch.update_entity(entity) entity.RowKey = 'batch_all_operations_together-3' entity.test3 = 100 batch.merge_entity(entity) entity.RowKey = 'batch_all_operations_together-4' entity.test3 = 10 batch.insert_or_replace_entity(entity) entity.RowKey = 'batch_all_operations_together-5' batch.insert_or_merge_entity(entity) self.ts.commit_batch(self.table_name, batch) resp = self.ts.commit_batch(table2, batch) # Assert self.assertEqual(6, len(resp)) entities = list( self.ts.query_entities(self.table_name, "PartitionKey eq '003'", '')) self.assertEqual(5, len(entities))