def test_query_entities_large(self): # Arrange table_name = self._create_query_table(0) total_entities_count = 1000 entities_per_batch = 50 for j in range(total_entities_count // entities_per_batch): batch = TableBatch() for i in range(entities_per_batch): entity = Entity() entity.PartitionKey = 'large' entity.RowKey = 'batch{0}-item{1}'.format(j, i) entity.test = EntityProperty(EdmType.BOOLEAN, 'true') entity.test2 = 'hello world;' * 100 entity.test3 = 3 entity.test4 = EntityProperty(EdmType.INT64, '1234567890') entity.test5 = datetime(2016, 12, 31, 11, 59, 59, 0) batch.insert_entity(entity) self.ts.commit_batch(table_name, batch) # Act start_time = datetime.now() entities = list(self.ts.query_entities(table_name)) elapsed_time = datetime.now() - start_time # Assert print('query_entities took {0} secs.'.format( elapsed_time.total_seconds())) # azure allocates 5 seconds to execute a query # if it runs slowly, it will return fewer results and make the test fail self.assertEqual(len(entities), total_entities_count)
def _create_random_entity_class(self, pk=None, rk=None): ''' Creates a class-based entity with fixed values, using all of the supported data types. ''' partition = pk if pk is not None else self.get_resource_name('pk') row = rk if rk is not None else self.get_resource_name('rk') entity = Entity() entity.PartitionKey = partition entity.RowKey = row entity.age = 39 entity.sex = 'male' entity.married = True entity.deceased = False entity.optional = None entity.evenratio = 3.0 entity.ratio = 3.1 entity.large = 933311100 entity.Birthday = datetime(1973, 10, 4) entity.birthday = datetime(1970, 10, 4) entity.binary = None entity.other = EntityProperty(EdmType.INT32, 20) entity.clsid = EntityProperty(EdmType.GUID, 'c9da6455-213d-42c9-9a79-3e9149a57833') return entity
def put_encrypted_entity_properties(self): table_name = self._create_table() # Can use a dict or the Entity class to encrypt entities. # The EntityProperty object takes an optional parameteter, 'encrypt' # that marks the property for encryption when set to true. entity1 = self._create_base_entity_dict() entity1['foo'] = EntityProperty(EdmType.STRING, 'bar', True) entity2 = self._create_base_entity_class() entity2.foo = EntityProperty(EdmType.STRING, 'bar', True) entity3 = self._create_base_entity_class() entity3['badValue'] = EntityProperty(EdmType.INT64, 12, True) entity4 = self._create_base_entity_class() # KeyWrapper implements the key encryption key interface outlined # in the insert/get entity documentation. # Setting this property will tell these APIs to encrypt the entity. self.service.key_encryption_key = KeyWrapper('key1') self.service.insert_entity(table_name, entity1) self.service.insert_entity(table_name, entity2) # Note: The internal encryption process requires two properties, so there # are only 250 custom properties available when encrypting. # Note: str is the only type valid for encryption. Trying to encrypt other # properties will throw. self.service.delete_table(table_name)
def test_batch_insert_merge(self): # Arrange # Act entity = Entity() entity.PartitionKey = '001' entity.RowKey = 'batch_insert_merge' entity.test = EntityProperty(EdmType.BOOLEAN, 'true') entity.test2 = 'value' entity.test3 = 3 entity.test4 = EntityProperty(EdmType.INT64, '1234567890') entity.test5 = datetime.utcnow() batch = TableBatch() batch.insert_or_merge_entity(entity) resp = self.ts.commit_batch(self.table_name, batch) # Assert self.assertIsNotNone(resp) entity = self.ts.get_entity(self.table_name, '001', 'batch_insert_merge') self.assertIsNotNone(entity) self.assertEqual('value', entity.test2) self.assertEqual(1234567890, entity.test4) self.assertEqual(resp[0], entity.etag)
def _associateFileWithJob(self, jobStoreFileID, jobStoreID=None): if jobStoreID is not None: self.jobFileIDs.insert_entity( entity={ 'PartitionKey': EntityProperty('Edm.String', jobStoreID), 'RowKey': EntityProperty('Edm.String', jobStoreFileID) })
def test_batch_reuse(self): # Arrange table2 = self._get_table_reference('table2') self.ts.create_table(table2) # Act entity = Entity() entity.PartitionKey = '003' entity.RowKey = 'batch_all_operations_together-1' entity.test = EntityProperty(EdmType.BOOLEAN, 'true') entity.test2 = 'value' entity.test3 = 3 entity.test4 = EntityProperty(EdmType.INT64, '1234567890') entity.test5 = datetime.utcnow() batch = TableBatch() batch.insert_entity(entity) entity.RowKey = 'batch_all_operations_together-2' batch.insert_entity(entity) entity.RowKey = 'batch_all_operations_together-3' batch.insert_entity(entity) entity.RowKey = 'batch_all_operations_together-4' batch.insert_entity(entity) self.ts.commit_batch(self.table_name, batch) self.ts.commit_batch(table2, batch) batch = TableBatch() entity.RowKey = 'batch_all_operations_together' batch.insert_entity(entity) entity.RowKey = 'batch_all_operations_together-1' batch.delete_entity(entity.PartitionKey, entity.RowKey) entity.RowKey = 'batch_all_operations_together-2' entity.test3 = 10 batch.update_entity(entity) entity.RowKey = 'batch_all_operations_together-3' entity.test3 = 100 batch.merge_entity(entity) entity.RowKey = 'batch_all_operations_together-4' entity.test3 = 10 batch.insert_or_replace_entity(entity) entity.RowKey = 'batch_all_operations_together-5' batch.insert_or_merge_entity(entity) self.ts.commit_batch(self.table_name, batch) resp = self.ts.commit_batch(table2, batch) # Assert self.assertEqual(6, len(resp)) entities = list( self.ts.query_entities(self.table_name, "PartitionKey eq '003'", '')) self.assertEqual(5, len(entities))
def test_insert_entity_with_large_int64_value_throws(self): # Arrange # Act dict64 = self._create_random_base_entity_dict() dict64['large'] = EntityProperty(EdmType.INT64, 2**63) # Assert with self.assertRaisesRegexp(TypeError, '{0} is too large to be cast to type Edm.Int64.'.format(2**63)): self.ts.insert_entity(self.table_name, dict64) dict64['large'] = EntityProperty(EdmType.INT64, -(2**63 + 1)) with self.assertRaisesRegexp(TypeError, '{0} is too large to be cast to type Edm.Int64.'.format(-(2**63 + 1))): self.ts.insert_entity(self.table_name, dict64)
def create(self, jobNode): jobStoreID = self._newJobID() job = AzureJob.fromJobNode(jobNode, jobStoreID, self._defaultTryCount()) entity = job.toItem(chunkSize=self.jobChunkSize) entity['RowKey'] = EntityProperty('Edm.String', jobStoreID) self.jobItems.insert_entity(entity=entity) return job
def test_batch_all_operations_together_context_manager(self): # Arrange # Act entity = Entity() entity.PartitionKey = '003' entity.RowKey = 'batch_all_operations_together-1' entity.test = EntityProperty(EdmType.BOOLEAN, 'true') entity.test2 = 'value' entity.test3 = 3 entity.test4 = EntityProperty(EdmType.INT64, '1234567890') entity.test5 = datetime.utcnow() self.ts.insert_entity(self.table_name, entity) entity.RowKey = 'batch_all_operations_together-2' self.ts.insert_entity(self.table_name, entity) entity.RowKey = 'batch_all_operations_together-3' self.ts.insert_entity(self.table_name, entity) entity.RowKey = 'batch_all_operations_together-4' self.ts.insert_entity(self.table_name, entity) with self.ts.batch(self.table_name) as batch: entity.RowKey = 'batch_all_operations_together' batch.insert_entity(entity) entity.RowKey = 'batch_all_operations_together-1' batch.delete_entity(entity.PartitionKey, entity.RowKey) entity.RowKey = 'batch_all_operations_together-2' entity.test3 = 10 batch.update_entity(entity) entity.RowKey = 'batch_all_operations_together-3' entity.test3 = 100 batch.merge_entity(entity) entity.RowKey = 'batch_all_operations_together-4' entity.test3 = 10 batch.insert_or_replace_entity(entity) entity.RowKey = 'batch_all_operations_together-5' batch.insert_or_merge_entity(entity) # Assert entities = list( self.ts.query_entities(self.table_name, "PartitionKey eq '003'", '')) self.assertEqual(5, len(entities))
def _create_default_entity_dict(self, pk=None, rk=None): ''' Creates a dictionary-based entity with fixed values, using all of the supported data types. ''' partition = pk if pk is not None else self.get_resource_name('pk') row = rk if rk is not None else self.get_resource_name('rk') return { 'PartitionKey': partition, 'RowKey': row, 'age': 39, 'sex': 'male', 'name': 'John Doe', 'married': True, 'deceased': False, 'optional': None, 'ratio': 3.1, 'evenratio': 3.0, 'large': 933311100, 'Birthday': datetime(1973, 10, 4), 'birthday': datetime(1970, 10, 4), 'binary': EntityProperty(EdmType.BINARY, b'binary'), 'other': EntityProperty(EdmType.INT32, 20), 'clsid': EntityProperty(EdmType.GUID, 'c9da6455-213d-42c9-9a79-3e9149a57833') }
def date_for_azure(dt): # receives a datetime object # returns the object in a format that will go into Azure as a datetime object # first, strip the time component from the datetime to ensure dates of the same day will match dtNoTime = datetime.datetime(dt.year, dt.month, dt.day) # add the timezone component required for an "aware" date object dtAware = pytz.timezone('US/Eastern').localize(dtNoTime) # now cast this as an EntityProperty for use in the Azure entity object to be passed to the table update # Azure Table Storage requires that the date have a time zone component (i.e., is "aware") ep = EntityProperty(EdmType.DATETIME, dtAware) return ep
def test_insert_entity_with_large_int32_value_throws(self): # Arrange # Act dict32 = self._create_random_base_entity_dict() dict32['large'] = EntityProperty(EdmType.INT32, 2**15) # Assert with self.assertRaisesRegexp( TypeError, '{0} is too large to be cast to type Edm.Int32.'.format( 2**15)): self.ts.insert_entity(self.table_name, dict32)
def toItem(self, chunkSize=maxAzureTablePropertySize): """ :param chunkSize: the size of a chunk for splitting up the serialized job into chunks that each fit into a property value of the an Azure table entity :rtype: dict """ assert chunkSize <= maxAzureTablePropertySize item = {} serializedAndEncodedJob = bz2.compress(cPickle.dumps(self)) jobChunks = [serializedAndEncodedJob[i:i + chunkSize] for i in range(0, len(serializedAndEncodedJob), chunkSize)] for attributeOrder, chunk in enumerate(jobChunks): item['_' + str(attributeOrder).zfill(3)] = EntityProperty('Edm.Binary', chunk) return item
def test_binary_property_value(self): # Arrange binary_data = b'\x01\x02\x03\x04\x05\x06\x07\x08\t\n' entity = self._create_random_base_entity_dict() entity.update({'binary': EntityProperty(EdmType.BINARY, binary_data)}) # Act self.ts.insert_entity(self.table_name, entity) resp = self.ts.get_entity(self.table_name, entity['PartitionKey'], entity['RowKey']) # Assert self.assertIsNotNone(resp) self.assertEqual(resp.binary.type, EdmType.BINARY) self.assertEqual(resp.binary.value, binary_data)
def test_batch_inserts(self): # Arrange # Act entity = Entity() entity.PartitionKey = 'batch_inserts' entity.test = EntityProperty(EdmType.BOOLEAN, 'true') entity.test2 = 'value' entity.test3 = 3 entity.test4 = EntityProperty(EdmType.INT64, '1234567890') batch = TableBatch() for i in range(100): entity.RowKey = str(i) batch.insert_entity(entity) self.ts.commit_batch(self.table_name, batch) entities = list( self.ts.query_entities(self.table_name, "PartitionKey eq 'batch_inserts'", '')) # Assert self.assertIsNotNone(entities) self.assertEqual(100, len(entities))
def test_batch_delete(self): # Arrange # Act entity = Entity() entity.PartitionKey = '001' entity.RowKey = 'batch_delete' entity.test = EntityProperty(EdmType.BOOLEAN, 'true') entity.test2 = 'value' entity.test3 = 3 entity.test4 = EntityProperty(EdmType.INT64, '1234567890') entity.test5 = datetime.utcnow() self.ts.insert_entity(self.table_name, entity) entity = self.ts.get_entity(self.table_name, '001', 'batch_delete') self.assertEqual(3, entity.test3) batch = TableBatch() batch.delete_entity('001', 'batch_delete') resp = self.ts.commit_batch(self.table_name, batch) # Assert self.assertIsNotNone(resp) self.assertIsNone(resp[0])
def test_get_encrypted_dict(self): # Arrange self.ts.require_encryption = True entity = self._create_default_entity_dict() entity['sex'] = EntityProperty(EdmType.STRING, entity['sex'], True) self.ts.key_encryption_key = KeyWrapper('key1') self.ts.insert_entity(self.table_name, entity) # Act new_entity = self.ts.get_entity(self.table_name, entity['PartitionKey'], entity['RowKey']) # Assert self._assert_default_entity(new_entity)
def test_replace_entity(self): # Arrange entity = self._create_random_entity_class() self.ts.insert_entity(self.table_name, entity) entity['sex'] = EntityProperty(EdmType.STRING, 'female', True) self.ts.key_encryption_key = KeyWrapper('key1') # Act self.ts.require_encryption = True self.ts.update_entity(self.table_name, entity) new_entity = self.ts.get_entity(self.table_name, entity['PartitionKey'], entity['RowKey']) # Assert self.assertEqual(new_entity['sex'], entity['sex'].value)
def test_insert_encrypt_invalid_types(self): # Arrange self.ts.require_encryption = True entity_binary = self._create_random_entity_class() entity_binary['bytes'] = EntityProperty(EdmType.BINARY, urandom(10), True) entity_boolean = self._create_random_entity_class() entity_boolean['married'] = EntityProperty(EdmType.BOOLEAN, True, True) entity_date_time = self._create_random_entity_class() entity_date_time['birthday'] = EntityProperty( EdmType.DATETIME, entity_date_time['birthday'], True) entity_double = self._create_random_entity_class() entity_double['ratio'] = EntityProperty(EdmType.DATETIME, entity_double['ratio'], True) entity_guid = self._create_random_entity_class() entity_guid['clsid'].encrypt = True entity_int32 = self._create_random_entity_class() entity_int32['other'].encrypt = True entity_int64 = self._create_random_entity_class() entity_int64['large'] = EntityProperty(EdmType.INT64, entity_int64['large'], True) self.ts.key_encryption_key = KeyWrapper('key1') entity_none_str = self._create_random_entity_class() entity_none_str['none_str'] = EntityProperty(EdmType.STRING, None, True) # Act # Assert try: self.ts.insert_entity(self.table_name, entity_binary) self.fail() except ValueError as e: self.assertEqual(str(e), _ERROR_UNSUPPORTED_TYPE_FOR_ENCRYPTION) with self.assertRaises(ValueError): self.ts.insert_entity(self.table_name, entity_boolean) with self.assertRaises(ValueError): self.ts.insert_entity(self.table_name, entity_date_time) with self.assertRaises(ValueError): self.ts.insert_entity(self.table_name, entity_double) with self.assertRaises(ValueError): self.ts.insert_entity(self.table_name, entity_guid) with self.assertRaises(ValueError): self.ts.insert_entity(self.table_name, entity_int32) with self.assertRaises(ValueError): self.ts.insert_entity(self.table_name, entity_int64) with self.assertRaises(ValueError): self.ts.insert_entity(self.table_name, entity_none_str)
def log_to_table(table_service, table_name, message, status, result=None, exception=None, create=False): """ Logs to table service the status/result of a task :param table_service: azure.storage.table.TableService :param table_name: Name of the Azure table to use. :param message: Dict from Azure queue or azure.storage.table.Entity :param status: Status of the task. Ex: STARTED, FAILED etc... :param result: Result if any. :param exception: Exception, if any. :param create: Bool. Adds the created date. Used to keep it even after updating an existing row. """ create_table_if_missing(table_service, table_name) entity = Entity() # To support both an Entity or message from queue partition_key = message.get('task_name') or message.get('PartitionKey') row_key = message.get('job_id') or message.get('RowKey') if not partition_key or not row_key: raise PawError('message did not contained all required information. ' '"task_name" {}, "job_id" {}'.format( partition_key, row_key)) if message.get('additional_log'): entity.update(message['additional_log']) entity.PartitionKey = partition_key entity.RowKey = row_key entity.status = status # # Added in this manner because Azure SDK's serializer fails # when results are repr(list). if result: # noinspection PyTypeChecker entity.result = EntityProperty(type=EdmType.STRING, value=repr(result)) if exception: # noinspection PyTypeChecker entity.exception = EntityProperty(type=EdmType.STRING, value=repr(exception)) if create: entity.dequeue_time = datetime.datetime.utcnow() retries = 60 while retries: try: table_service.insert_or_merge_entity(table_name, entity) break except AzureException as e: LOGGER.warning("Error from Azure table service: " "{}".format(traceback.format_exc())) retries -= 1 if not retries: LOGGER.error("Error from Azure table service: " "{}".format(traceback.format_exc())) raise PawError(e) time.sleep(2)
def _create_default_entity_for_encryption(self): entity = self._create_random_entity_class() entity['sex'] = EntityProperty(EdmType.STRING, entity['sex'], True) entity['name'] = EntityProperty(EdmType.STRING, entity['name'], True) return entity
def create_entity(self, instagram_post_entity, pk=None, rk=None): entity = {} if (pk != None): entity['PartitionKey'] = pk else: entity['PartitionKey'] = self.get_pk() if (rk != None): entity['RowKey'] = rk else: entity['RowKey'] = self.get_pk() if (PICTURE_ID in instagram_post_entity and instagram_post_entity[PICTURE_ID] != None): entity[PICTURE_ID] = EntityProperty( EdmType.INT64, instagram_post_entity[PICTURE_ID]) if (OWNER_ID in instagram_post_entity and instagram_post_entity[OWNER_ID] != None): entity[OWNER_ID] = EntityProperty(EdmType.INT64, instagram_post_entity[OWNER_ID]) if (LOGO_NAME in instagram_post_entity and instagram_post_entity[LOGO_NAME] != None): entity[LOGO_NAME] = EntityProperty( EdmType.STRING, instagram_post_entity[LOGO_NAME]) #This is a unix epoch timestamp (we will do conversion from utc to epoch for search) if (TIME in instagram_post_entity and instagram_post_entity[TIME] != None): entity[TIME] = EntityProperty(EdmType.INT64, instagram_post_entity[TIME]) if (CAPTION in instagram_post_entity and instagram_post_entity[CAPTION] != None): entity[CAPTION] = EntityProperty(EdmType.STRING, instagram_post_entity[CAPTION]) if (TAGS in instagram_post_entity and instagram_post_entity[TAGS] != None): entity[TAGS] = EntityProperty( EdmType.STRING, self.serialize_entity_attribute_value( instagram_post_entity[TAGS])) if (HAS_LOGO in instagram_post_entity and instagram_post_entity[HAS_LOGO] != None): entity[HAS_LOGO] = EntityProperty(EdmType.BOOLEAN, instagram_post_entity[HAS_LOGO]) if (ACCURACY in instagram_post_entity and instagram_post_entity[ACCURACY] != None): entity[ACCURACY] = EntityProperty(EdmType.DOUBLE, instagram_post_entity[ACCURACY]) if (IMAGE_CONTEXT in instagram_post_entity and instagram_post_entity[IMAGE_CONTEXT] != None): entity[IMAGE_CONTEXT] = EntityProperty( EdmType.STRING, self.serialize_entity_attribute_value( instagram_post_entity[IMAGE_CONTEXT])) if (IMAGE_PATH in instagram_post_entity and instagram_post_entity[IMAGE_PATH] != None): entity[IMAGE_PATH] = EntityProperty( EdmType.STRING, instagram_post_entity[IMAGE_PATH]) if (DIMENSIONS in instagram_post_entity and instagram_post_entity[DIMENSIONS] != None): entity[DIMENSIONS] = EntityProperty( EdmType.STRING, self.serialize_entity_attribute_value( instagram_post_entity[DIMENSIONS])) return entity
def _create_entity_for_encryption(self): entity = self._create_base_entity_class() entity['foo'] = EntityProperty(EdmType.STRING, 'bar', True) return entity