Exemple #1
0
    def test_query_entities_large(self):
        # Arrange
        table_name = self._create_query_table(0)
        total_entities_count = 1000
        entities_per_batch = 50

        for j in range(total_entities_count // entities_per_batch):
            batch = TableBatch()
            for i in range(entities_per_batch):
                entity = Entity()
                entity.PartitionKey = 'large'
                entity.RowKey = 'batch{0}-item{1}'.format(j, i)
                entity.test = EntityProperty(EdmType.BOOLEAN, 'true')
                entity.test2 = 'hello world;' * 100
                entity.test3 = 3
                entity.test4 = EntityProperty(EdmType.INT64, '1234567890')
                entity.test5 = datetime(2016, 12, 31, 11, 59, 59, 0)
                batch.insert_entity(entity)
            self.ts.commit_batch(table_name, batch)

        # Act
        start_time = datetime.now()
        entities = list(self.ts.query_entities(table_name))
        elapsed_time = datetime.now() - start_time

        # Assert
        print('query_entities took {0} secs.'.format(
            elapsed_time.total_seconds()))
        # azure allocates 5 seconds to execute a query
        # if it runs slowly, it will return fewer results and make the test fail
        self.assertEqual(len(entities), total_entities_count)
Exemple #2
0
 def _create_random_entity_class(self, pk=None, rk=None):
     '''
     Creates a class-based entity with fixed values, using all
     of the supported data types.
     '''
     partition = pk if pk is not None else self.get_resource_name('pk')
     row = rk if rk is not None else self.get_resource_name('rk')
     entity = Entity()
     entity.PartitionKey = partition
     entity.RowKey = row
     entity.age = 39
     entity.sex = 'male'
     entity.married = True
     entity.deceased = False
     entity.optional = None
     entity.evenratio = 3.0
     entity.ratio = 3.1
     entity.large = 933311100
     entity.Birthday = datetime(1973, 10, 4)
     entity.birthday = datetime(1970, 10, 4)
     entity.binary = None
     entity.other = EntityProperty(EdmType.INT32, 20)
     entity.clsid = EntityProperty(EdmType.GUID,
                                   'c9da6455-213d-42c9-9a79-3e9149a57833')
     return entity
Exemple #3
0
    def put_encrypted_entity_properties(self):
        table_name = self._create_table()

        # Can use a dict or the Entity class to encrypt entities.
        # The EntityProperty object takes an optional parameteter, 'encrypt'
        # that marks the property for encryption when set to true.
        entity1 = self._create_base_entity_dict()
        entity1['foo'] = EntityProperty(EdmType.STRING, 'bar', True)
        entity2 = self._create_base_entity_class()
        entity2.foo = EntityProperty(EdmType.STRING, 'bar', True)
        entity3 = self._create_base_entity_class()
        entity3['badValue'] = EntityProperty(EdmType.INT64, 12, True)
        entity4 = self._create_base_entity_class()
        
        # KeyWrapper implements the key encryption key interface outlined
        # in the insert/get entity documentation.
        # Setting this property will tell these APIs to encrypt the entity.
        self.service.key_encryption_key = KeyWrapper('key1')
        self.service.insert_entity(table_name, entity1)
        self.service.insert_entity(table_name, entity2)

        # Note: The internal encryption process requires two properties, so there
        # are only 250 custom properties available when encrypting.
        # Note: str is the only type valid for encryption. Trying to encrypt other
        # properties will throw.
        
        self.service.delete_table(table_name)
Exemple #4
0
    def test_batch_insert_merge(self):
        # Arrange

        # Act
        entity = Entity()
        entity.PartitionKey = '001'
        entity.RowKey = 'batch_insert_merge'
        entity.test = EntityProperty(EdmType.BOOLEAN, 'true')
        entity.test2 = 'value'
        entity.test3 = 3
        entity.test4 = EntityProperty(EdmType.INT64, '1234567890')
        entity.test5 = datetime.utcnow()

        batch = TableBatch()
        batch.insert_or_merge_entity(entity)
        resp = self.ts.commit_batch(self.table_name, batch)

        # Assert
        self.assertIsNotNone(resp)
        entity = self.ts.get_entity(self.table_name, '001',
                                    'batch_insert_merge')
        self.assertIsNotNone(entity)
        self.assertEqual('value', entity.test2)
        self.assertEqual(1234567890, entity.test4)
        self.assertEqual(resp[0], entity.etag)
Exemple #5
0
 def _associateFileWithJob(self, jobStoreFileID, jobStoreID=None):
     if jobStoreID is not None:
         self.jobFileIDs.insert_entity(
             entity={
                 'PartitionKey': EntityProperty('Edm.String', jobStoreID),
                 'RowKey': EntityProperty('Edm.String', jobStoreFileID)
             })
Exemple #6
0
    def test_batch_reuse(self):
        # Arrange

        table2 = self._get_table_reference('table2')
        self.ts.create_table(table2)

        # Act
        entity = Entity()
        entity.PartitionKey = '003'
        entity.RowKey = 'batch_all_operations_together-1'
        entity.test = EntityProperty(EdmType.BOOLEAN, 'true')
        entity.test2 = 'value'
        entity.test3 = 3
        entity.test4 = EntityProperty(EdmType.INT64, '1234567890')
        entity.test5 = datetime.utcnow()

        batch = TableBatch()
        batch.insert_entity(entity)
        entity.RowKey = 'batch_all_operations_together-2'
        batch.insert_entity(entity)
        entity.RowKey = 'batch_all_operations_together-3'
        batch.insert_entity(entity)
        entity.RowKey = 'batch_all_operations_together-4'
        batch.insert_entity(entity)

        self.ts.commit_batch(self.table_name, batch)
        self.ts.commit_batch(table2, batch)

        batch = TableBatch()
        entity.RowKey = 'batch_all_operations_together'
        batch.insert_entity(entity)
        entity.RowKey = 'batch_all_operations_together-1'
        batch.delete_entity(entity.PartitionKey, entity.RowKey)
        entity.RowKey = 'batch_all_operations_together-2'
        entity.test3 = 10
        batch.update_entity(entity)
        entity.RowKey = 'batch_all_operations_together-3'
        entity.test3 = 100
        batch.merge_entity(entity)
        entity.RowKey = 'batch_all_operations_together-4'
        entity.test3 = 10
        batch.insert_or_replace_entity(entity)
        entity.RowKey = 'batch_all_operations_together-5'
        batch.insert_or_merge_entity(entity)

        self.ts.commit_batch(self.table_name, batch)
        resp = self.ts.commit_batch(table2, batch)

        # Assert
        self.assertEqual(6, len(resp))
        entities = list(
            self.ts.query_entities(self.table_name, "PartitionKey eq '003'",
                                   ''))
        self.assertEqual(5, len(entities))
    def test_insert_entity_with_large_int64_value_throws(self):
        # Arrange

        # Act
        dict64 = self._create_random_base_entity_dict()
        dict64['large'] = EntityProperty(EdmType.INT64, 2**63)

        # Assert
        with self.assertRaisesRegexp(TypeError,
                               '{0} is too large to be cast to type Edm.Int64.'.format(2**63)):
            self.ts.insert_entity(self.table_name, dict64)

        dict64['large'] = EntityProperty(EdmType.INT64, -(2**63 + 1))
        with self.assertRaisesRegexp(TypeError,
                                '{0} is too large to be cast to type Edm.Int64.'.format(-(2**63 + 1))):
            self.ts.insert_entity(self.table_name, dict64)
Exemple #8
0
 def create(self, jobNode):
     jobStoreID = self._newJobID()
     job = AzureJob.fromJobNode(jobNode, jobStoreID,
                                self._defaultTryCount())
     entity = job.toItem(chunkSize=self.jobChunkSize)
     entity['RowKey'] = EntityProperty('Edm.String', jobStoreID)
     self.jobItems.insert_entity(entity=entity)
     return job
Exemple #9
0
    def test_batch_all_operations_together_context_manager(self):
        # Arrange

        # Act
        entity = Entity()
        entity.PartitionKey = '003'
        entity.RowKey = 'batch_all_operations_together-1'
        entity.test = EntityProperty(EdmType.BOOLEAN, 'true')
        entity.test2 = 'value'
        entity.test3 = 3
        entity.test4 = EntityProperty(EdmType.INT64, '1234567890')
        entity.test5 = datetime.utcnow()
        self.ts.insert_entity(self.table_name, entity)
        entity.RowKey = 'batch_all_operations_together-2'
        self.ts.insert_entity(self.table_name, entity)
        entity.RowKey = 'batch_all_operations_together-3'
        self.ts.insert_entity(self.table_name, entity)
        entity.RowKey = 'batch_all_operations_together-4'
        self.ts.insert_entity(self.table_name, entity)

        with self.ts.batch(self.table_name) as batch:
            entity.RowKey = 'batch_all_operations_together'
            batch.insert_entity(entity)
            entity.RowKey = 'batch_all_operations_together-1'
            batch.delete_entity(entity.PartitionKey, entity.RowKey)
            entity.RowKey = 'batch_all_operations_together-2'
            entity.test3 = 10
            batch.update_entity(entity)
            entity.RowKey = 'batch_all_operations_together-3'
            entity.test3 = 100
            batch.merge_entity(entity)
            entity.RowKey = 'batch_all_operations_together-4'
            entity.test3 = 10
            batch.insert_or_replace_entity(entity)
            entity.RowKey = 'batch_all_operations_together-5'
            batch.insert_or_merge_entity(entity)

        # Assert
        entities = list(
            self.ts.query_entities(self.table_name, "PartitionKey eq '003'",
                                   ''))
        self.assertEqual(5, len(entities))
Exemple #10
0
 def _create_default_entity_dict(self, pk=None, rk=None):
     '''
     Creates a dictionary-based entity with fixed values, using all
     of the supported data types.
     '''
     partition = pk if pk is not None else self.get_resource_name('pk')
     row = rk if rk is not None else self.get_resource_name('rk')
     return {
         'PartitionKey':
         partition,
         'RowKey':
         row,
         'age':
         39,
         'sex':
         'male',
         'name':
         'John Doe',
         'married':
         True,
         'deceased':
         False,
         'optional':
         None,
         'ratio':
         3.1,
         'evenratio':
         3.0,
         'large':
         933311100,
         'Birthday':
         datetime(1973, 10, 4),
         'birthday':
         datetime(1970, 10, 4),
         'binary':
         EntityProperty(EdmType.BINARY, b'binary'),
         'other':
         EntityProperty(EdmType.INT32, 20),
         'clsid':
         EntityProperty(EdmType.GUID,
                        'c9da6455-213d-42c9-9a79-3e9149a57833')
     }
def date_for_azure(dt):
    # receives a datetime object
    # returns the object in a format that will go into Azure as a datetime object

    # first, strip the time component from the datetime to ensure dates of the same day will match
    dtNoTime = datetime.datetime(dt.year, dt.month, dt.day)
    # add the timezone component required for an "aware" date object
    dtAware = pytz.timezone('US/Eastern').localize(dtNoTime)

    # now cast this as an EntityProperty for use in the Azure entity object to be passed to the table update
    # Azure Table Storage requires that the date have a time zone component (i.e., is "aware")
    ep = EntityProperty(EdmType.DATETIME, dtAware)
    return ep
Exemple #12
0
    def test_insert_entity_with_large_int32_value_throws(self):
        # Arrange

        # Act
        dict32 = self._create_random_base_entity_dict()
        dict32['large'] = EntityProperty(EdmType.INT32, 2**15)

        # Assert
        with self.assertRaisesRegexp(
                TypeError,
                '{0} is too large to be cast to type Edm.Int32.'.format(
                    2**15)):
            self.ts.insert_entity(self.table_name, dict32)
Exemple #13
0
 def toItem(self, chunkSize=maxAzureTablePropertySize):
     """
     :param chunkSize: the size of a chunk for splitting up the serialized job into chunks
     that each fit into a property value of the an Azure table entity
     :rtype: dict
     """
     assert chunkSize <= maxAzureTablePropertySize
     item = {}
     serializedAndEncodedJob = bz2.compress(cPickle.dumps(self))
     jobChunks = [serializedAndEncodedJob[i:i + chunkSize]
                  for i in range(0, len(serializedAndEncodedJob), chunkSize)]
     for attributeOrder, chunk in enumerate(jobChunks):
         item['_' + str(attributeOrder).zfill(3)] = EntityProperty('Edm.Binary', chunk)
     return item
    def test_binary_property_value(self):
        # Arrange
        binary_data = b'\x01\x02\x03\x04\x05\x06\x07\x08\t\n'
        entity = self._create_random_base_entity_dict()
        entity.update({'binary': EntityProperty(EdmType.BINARY, binary_data)})

        # Act  
        self.ts.insert_entity(self.table_name, entity)
        resp = self.ts.get_entity(self.table_name, entity['PartitionKey'], entity['RowKey'])

        # Assert
        self.assertIsNotNone(resp)
        self.assertEqual(resp.binary.type, EdmType.BINARY)
        self.assertEqual(resp.binary.value, binary_data)
Exemple #15
0
    def test_batch_inserts(self):
        # Arrange

        # Act
        entity = Entity()
        entity.PartitionKey = 'batch_inserts'
        entity.test = EntityProperty(EdmType.BOOLEAN, 'true')
        entity.test2 = 'value'
        entity.test3 = 3
        entity.test4 = EntityProperty(EdmType.INT64, '1234567890')

        batch = TableBatch()
        for i in range(100):
            entity.RowKey = str(i)
            batch.insert_entity(entity)
        self.ts.commit_batch(self.table_name, batch)

        entities = list(
            self.ts.query_entities(self.table_name,
                                   "PartitionKey eq 'batch_inserts'", ''))

        # Assert
        self.assertIsNotNone(entities)
        self.assertEqual(100, len(entities))
Exemple #16
0
    def test_batch_delete(self):
        # Arrange

        # Act
        entity = Entity()
        entity.PartitionKey = '001'
        entity.RowKey = 'batch_delete'
        entity.test = EntityProperty(EdmType.BOOLEAN, 'true')
        entity.test2 = 'value'
        entity.test3 = 3
        entity.test4 = EntityProperty(EdmType.INT64, '1234567890')
        entity.test5 = datetime.utcnow()
        self.ts.insert_entity(self.table_name, entity)

        entity = self.ts.get_entity(self.table_name, '001', 'batch_delete')
        self.assertEqual(3, entity.test3)

        batch = TableBatch()
        batch.delete_entity('001', 'batch_delete')
        resp = self.ts.commit_batch(self.table_name, batch)

        # Assert
        self.assertIsNotNone(resp)
        self.assertIsNone(resp[0])
Exemple #17
0
    def test_get_encrypted_dict(self):
        # Arrange
        self.ts.require_encryption = True
        entity = self._create_default_entity_dict()
        entity['sex'] = EntityProperty(EdmType.STRING, entity['sex'], True)
        self.ts.key_encryption_key = KeyWrapper('key1')
        self.ts.insert_entity(self.table_name, entity)

        # Act
        new_entity = self.ts.get_entity(self.table_name,
                                        entity['PartitionKey'],
                                        entity['RowKey'])

        # Assert
        self._assert_default_entity(new_entity)
Exemple #18
0
    def test_replace_entity(self):
        # Arrange
        entity = self._create_random_entity_class()
        self.ts.insert_entity(self.table_name, entity)
        entity['sex'] = EntityProperty(EdmType.STRING, 'female', True)
        self.ts.key_encryption_key = KeyWrapper('key1')

        # Act
        self.ts.require_encryption = True
        self.ts.update_entity(self.table_name, entity)
        new_entity = self.ts.get_entity(self.table_name,
                                        entity['PartitionKey'],
                                        entity['RowKey'])

        # Assert
        self.assertEqual(new_entity['sex'], entity['sex'].value)
Exemple #19
0
    def test_insert_encrypt_invalid_types(self):
        # Arrange
        self.ts.require_encryption = True
        entity_binary = self._create_random_entity_class()
        entity_binary['bytes'] = EntityProperty(EdmType.BINARY, urandom(10),
                                                True)
        entity_boolean = self._create_random_entity_class()
        entity_boolean['married'] = EntityProperty(EdmType.BOOLEAN, True, True)
        entity_date_time = self._create_random_entity_class()
        entity_date_time['birthday'] = EntityProperty(
            EdmType.DATETIME, entity_date_time['birthday'], True)
        entity_double = self._create_random_entity_class()
        entity_double['ratio'] = EntityProperty(EdmType.DATETIME,
                                                entity_double['ratio'], True)
        entity_guid = self._create_random_entity_class()
        entity_guid['clsid'].encrypt = True
        entity_int32 = self._create_random_entity_class()
        entity_int32['other'].encrypt = True
        entity_int64 = self._create_random_entity_class()
        entity_int64['large'] = EntityProperty(EdmType.INT64,
                                               entity_int64['large'], True)
        self.ts.key_encryption_key = KeyWrapper('key1')
        entity_none_str = self._create_random_entity_class()
        entity_none_str['none_str'] = EntityProperty(EdmType.STRING, None,
                                                     True)

        # Act

        # Assert
        try:
            self.ts.insert_entity(self.table_name, entity_binary)
            self.fail()
        except ValueError as e:
            self.assertEqual(str(e), _ERROR_UNSUPPORTED_TYPE_FOR_ENCRYPTION)
        with self.assertRaises(ValueError):
            self.ts.insert_entity(self.table_name, entity_boolean)
        with self.assertRaises(ValueError):
            self.ts.insert_entity(self.table_name, entity_date_time)
        with self.assertRaises(ValueError):
            self.ts.insert_entity(self.table_name, entity_double)
        with self.assertRaises(ValueError):
            self.ts.insert_entity(self.table_name, entity_guid)
        with self.assertRaises(ValueError):
            self.ts.insert_entity(self.table_name, entity_int32)
        with self.assertRaises(ValueError):
            self.ts.insert_entity(self.table_name, entity_int64)
        with self.assertRaises(ValueError):
            self.ts.insert_entity(self.table_name, entity_none_str)
Exemple #20
0
def log_to_table(table_service,
                 table_name,
                 message,
                 status,
                 result=None,
                 exception=None,
                 create=False):
    """
    Logs to table service the status/result of a task

    :param table_service: azure.storage.table.TableService
    :param table_name: Name of the Azure table to use.
    :param message: Dict from Azure queue or azure.storage.table.Entity
    :param status: Status of the task. Ex: STARTED, FAILED etc...
    :param result: Result if any.
    :param exception: Exception, if any.
    :param create: Bool. Adds the created date. Used to keep it even after
                   updating an existing row.
    """
    create_table_if_missing(table_service, table_name)
    entity = Entity()
    # To support both an Entity or message from queue
    partition_key = message.get('task_name') or message.get('PartitionKey')
    row_key = message.get('job_id') or message.get('RowKey')

    if not partition_key or not row_key:
        raise PawError('message did not contained all required information. '
                       '"task_name" {}, "job_id" {}'.format(
                           partition_key, row_key))

    if message.get('additional_log'):
        entity.update(message['additional_log'])

    entity.PartitionKey = partition_key
    entity.RowKey = row_key
    entity.status = status

    #
    # Added in this manner because Azure SDK's serializer fails
    # when results are repr(list).
    if result:
        # noinspection PyTypeChecker
        entity.result = EntityProperty(type=EdmType.STRING, value=repr(result))
    if exception:
        # noinspection PyTypeChecker
        entity.exception = EntityProperty(type=EdmType.STRING,
                                          value=repr(exception))

    if create:
        entity.dequeue_time = datetime.datetime.utcnow()

    retries = 60

    while retries:
        try:
            table_service.insert_or_merge_entity(table_name, entity)
            break
        except AzureException as e:
            LOGGER.warning("Error from Azure table service: "
                           "{}".format(traceback.format_exc()))
            retries -= 1
            if not retries:
                LOGGER.error("Error from Azure table service: "
                             "{}".format(traceback.format_exc()))
                raise PawError(e)

            time.sleep(2)
Exemple #21
0
 def _create_default_entity_for_encryption(self):
     entity = self._create_random_entity_class()
     entity['sex'] = EntityProperty(EdmType.STRING, entity['sex'], True)
     entity['name'] = EntityProperty(EdmType.STRING, entity['name'], True)
     return entity
    def create_entity(self, instagram_post_entity, pk=None, rk=None):
        entity = {}

        if (pk != None):
            entity['PartitionKey'] = pk
        else:
            entity['PartitionKey'] = self.get_pk()

        if (rk != None):
            entity['RowKey'] = rk
        else:
            entity['RowKey'] = self.get_pk()

        if (PICTURE_ID in instagram_post_entity
                and instagram_post_entity[PICTURE_ID] != None):
            entity[PICTURE_ID] = EntityProperty(
                EdmType.INT64, instagram_post_entity[PICTURE_ID])

        if (OWNER_ID in instagram_post_entity
                and instagram_post_entity[OWNER_ID] != None):
            entity[OWNER_ID] = EntityProperty(EdmType.INT64,
                                              instagram_post_entity[OWNER_ID])

        if (LOGO_NAME in instagram_post_entity
                and instagram_post_entity[LOGO_NAME] != None):
            entity[LOGO_NAME] = EntityProperty(
                EdmType.STRING, instagram_post_entity[LOGO_NAME])

        #This is a unix epoch timestamp (we will do conversion from utc to epoch for search)
        if (TIME in instagram_post_entity
                and instagram_post_entity[TIME] != None):
            entity[TIME] = EntityProperty(EdmType.INT64,
                                          instagram_post_entity[TIME])

        if (CAPTION in instagram_post_entity
                and instagram_post_entity[CAPTION] != None):
            entity[CAPTION] = EntityProperty(EdmType.STRING,
                                             instagram_post_entity[CAPTION])

        if (TAGS in instagram_post_entity
                and instagram_post_entity[TAGS] != None):
            entity[TAGS] = EntityProperty(
                EdmType.STRING,
                self.serialize_entity_attribute_value(
                    instagram_post_entity[TAGS]))

        if (HAS_LOGO in instagram_post_entity
                and instagram_post_entity[HAS_LOGO] != None):
            entity[HAS_LOGO] = EntityProperty(EdmType.BOOLEAN,
                                              instagram_post_entity[HAS_LOGO])

        if (ACCURACY in instagram_post_entity
                and instagram_post_entity[ACCURACY] != None):
            entity[ACCURACY] = EntityProperty(EdmType.DOUBLE,
                                              instagram_post_entity[ACCURACY])

        if (IMAGE_CONTEXT in instagram_post_entity
                and instagram_post_entity[IMAGE_CONTEXT] != None):
            entity[IMAGE_CONTEXT] = EntityProperty(
                EdmType.STRING,
                self.serialize_entity_attribute_value(
                    instagram_post_entity[IMAGE_CONTEXT]))

        if (IMAGE_PATH in instagram_post_entity
                and instagram_post_entity[IMAGE_PATH] != None):
            entity[IMAGE_PATH] = EntityProperty(
                EdmType.STRING, instagram_post_entity[IMAGE_PATH])

        if (DIMENSIONS in instagram_post_entity
                and instagram_post_entity[DIMENSIONS] != None):
            entity[DIMENSIONS] = EntityProperty(
                EdmType.STRING,
                self.serialize_entity_attribute_value(
                    instagram_post_entity[DIMENSIONS]))
        return entity
Exemple #23
0
 def _create_entity_for_encryption(self):
     entity = self._create_base_entity_class()
     entity['foo'] = EntityProperty(EdmType.STRING, 'bar', True)
     return entity