def db_log_step(userID, recipe, step): """Log the most recent step that a user has been given """ dynamo = db_connect() ts = TypeSerializer() put_resp = dynamo.put_item(TableName=os.environ['STEP_HISTORY_TABLE'], Item={ 'userID': { 'S': userID }, 'time': { 'N': str(time.time()) }, 'step': ts.serialize(step), 'recipe': ts.serialize(recipe) }) upd_resp = dynamo.update_item(TableName=os.environ['STEP_LAST_TABLE'], Key={'userID': { 'S': userID }}, AttributeUpdates={ 'step': { 'Action': 'PUT', 'Value': ts.serialize(step) }, 'recipe': { 'Action': 'PUT', 'Value': ts.serialize(recipe) } }) return (put_resp, upd_resp)
def updateData(self, numRows, start_key, field_key, field_value): client = self.dynamodb_client() deserializer = TypeDeserializer() serializer = TypeSerializer() table_configs = self.expected_table_config() for table in table_configs: LOGGER.info('Updating %s Items by setting field with key %s to the value %s, with start_key %s, for table %s', numRows, field_key, field_value, start_key, table['TableName']) for item in table['generator'](numRows, start_key): record = deserializer.deserialize(item) hashKey = table['HashKey'] key = { hashKey: serializer.serialize(record[hashKey]) } serializedFieldValue = serializer.serialize(field_value) # https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/dynamodb.html#DynamoDB.Client.update_item client.update_item( TableName=table['TableName'], Key=key, UpdateExpression='set {}=:v'.format(field_key), ExpressionAttributeValues={ ':v': serializedFieldValue, }, )
def lambda_handler(event, context): accountId = str(event['accountId']) accountName = str(event['description']) accountRole = "customer" confidentialKMSKey = 'alias/TSI_Base_ConfidentialS3Key' internalKMSKey = 'alias/TSI_Base_InternalS3Key' if 'customermasteraccountid' in event: masteraccountId = event['customermasteraccountid'] else: masteraccountId = os.environ['accountid'] readonlyRole = 'TSI_Base_ReadOnlySwitchRole' securityEmail = str(event['email']) accountemail = str(event['accountemail']) enabledregions = event['enabledregions'].split(',') supportenabled = 'false' awsconfigenabled = 'false' if 'config' in event: config = event['config'] else: config = "disabled" if 'support' in event: support = event['support'] else: config = "disabled" ouname = event['ouname'] terraformVersion = '1.0' writeRole = 'TSI_Base_FullAccess' featureLevel = 'full' dynamoentry = { 'accountId': accountId, 'accountName': accountName, 'config': config, 'support': support, 'ouname': ouname, 'accountRole': accountRole, 'confidentialKMSKey': confidentialKMSKey, 'internalKMSKey': internalKMSKey, 'masteraccountId': masteraccountId, 'readonlyRole': readonlyRole, 'securityEmail': securityEmail, 'accountemail': accountemail, 'enabledregions': enabledregions, 'supportenabled': supportenabled, 'awsconfigenabled': awsconfigenabled, 'terraformVersion': terraformVersion, 'writeRole': writeRole, 'featureLevel': featureLevel } serializer = TypeSerializer() print(json.dumps(serializer.serialize(dynamoentry)['M'])) dynamoclient = boto3.client('dynamodb') return (dynamoclient.put_item(TableName='accounts', Item=serializer.serialize(dynamoentry)['M']))
def _save_dice_pools(self): if self.pools is None: raise Exception("Tried to save dice pools before loading them.") ser = TypeSerializer() item = { 'game': ser.serialize('Shadowrun'), 'timestamp': ser.serialize(Decimal(time.time())), 'pools': ser.serialize(self.pools) } logger.debug("Item before put_item()ing: {}".format(item)) self._client.put_item(TableName='DicePools', Item=item)
def boto3_serializer(python_dict): serializer = TypeSerializer() return { k: serializer.serialize( v if not isinstance(v, float) else Decimal(str(v))) for k, v in python_dict.items() }
def getSpecificProductInformation(self, keyValue): print("In getSpecificProductInformation() method !") try: table = self.dynamodb.Table('Product') resp = table.get_item(Key=keyValue) # resp returns following value. print(resp) # {'Item': {'Id': Decimal('1'), 'ProductCategoryId': Decimal('1'), 'Name': 'IPhone 8\n'}, 'ResponseMetadata': {'RequestId': 'NMDA30M1HHF514MJVVUIBCELEFVV4KQNSO5AEMVJF66Q9ASUAAJG', 'HTTPStatusCode': 200, 'HTTPHeaders': {'server': 'Server', 'date': 'Mon, 05 Aug 2019 17:12:19 GMT', 'content-type': 'application/x-amz-json-1.0', 'content-length': '81', 'connection': 'keep-alive', 'x-amzn-requestid': 'NMDA30M1HHF514MJVVUIBCELEFVV4KQNSO5AEMVJF66Q9ASUAAJG', 'x-amz-crc32': '3394591035'}, 'RetryAttempts': 0}} if resp and resp["ResponseMetadata"]["HTTPStatusCode"] == 200: deserializer = TypeDeserializer() serializer = TypeSerializer() if resp["Item"]: print(resp["Item"]) # {'Id': Decimal('1'), 'ProductCategoryId': Decimal('1'), 'Name': 'IPhone 8\n'} data = {k: serializer.serialize(v) for k, v in resp["Item"].items()} else: data = {} return data else: raise Exception except Exception as error: print("Not able to fetch item !") raise error
def construct_dynamo_type_dict(d: dict): """ DynamoDB transactions need a different way of specifying transaction. The structure has to be recursively implemented as: 'string': { 'S': 'string', 'N': 'string', 'B': b'bytes', 'SS': [ 'string', ], 'NS': [ 'string', ], 'BS': [ b'bytes', ], 'M': { 'string': {'... recursive ...'} }, 'L': [ {'... recursive ...'}, ], 'NULL': True|False, 'BOOL': True|False } https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/dynamodb.html#DynamoDB.Client.transact_write_items TypeSerializer and TypeDeserializer needs to be used to convert to appropriate representations for DynamoDB. serialize(d)['M'] has been used in line with the documentation. """ serializer = TypeSerializer() return serializer.serialize(d)['M']
def generate_items(self, num_items): serializer = TypeSerializer() for i in range(num_items): record = { 'int_id': int(i / 10.0), 'decimal_field': decimal.Decimal(str(i) + '.00000000001'), 'string_field': str(i), 'byte_field': b'some_bytes', 'int_list_field': [i, i + 1, i + 2], 'int_set_field': set([i, i + 1, i + 2]), 'map_field': { 'map_entry_1': 'map_value_1', 'map_entry_2': 'map_value_2' }, 'string_list': [ self.random_string_generator(), self.random_string_generator(), self.random_string_generator() ], 'boolean_field': True, 'other_boolean_field': False, 'null_field': None } yield serializer.serialize(record)
def as_dynamo_flat_dict(self): """ Flattens out User.as_dict() output into a simple structure without any signature or metadata. Effectively, this outputs something like this: ```{'uuid': '11c8a5c8-0305-4524-8b41-95970baba84c', 'user_id': 'email|c3cbf9f5830f1358e28d6b68a3e4bf15', ...``` `flatten()` is recursive. Note that this form cannot be verified or validated back since it's missing all attributes! Return: dynamodb serialized low level dict of user in a "flattened" form for dynamodb consumption in particular """ user = self._clean_dict() def flatten(attrs, field=None): flat = {} for f in attrs: # Skip "schema" if isinstance(attrs[f], str): continue if not set(["value", "values"]).isdisjoint(set(attrs[f])): res = attrs[f].get("value", attrs[f].get("values")) if res is not None and res != "": flat[f] = res else: flat[f] = flatten(attrs[f]) return flat serializer = TypeSerializer() return {k: serializer.serialize(v) for k, v in flatten(user).items()}
def convert_json_to_dynamo_json(input_json: list) -> list: """ Re-Serializes the data into DynamoDB compatible JSON that can be used to put items into the dynamo table """ # HACK: When serializing the JSON without dynamodb attribute types included, it wants to convert # The DynamoDB 'String Set' objects to DynamoDB List objects because python loads the data as lists and not sets. # I am choosing to go with DynamoDB attribute string sets, because I do not want duplicate entries for periods, # and it is easier to parse visually. The only drawback I have seen so far is that sets are unordered, # but since we are not evaluating the period string set responses in any particular order that should not matter. # Can we change this to use cls instead of for loops? serializer = TypeSerializer() py_data, json_data = [], [] logger.info(f"Converting JSON config to DynamoDB compatible JSON.") # Loop through JSON file data looking for Python object type list # Convert the list object into a set of strings # Store new data types as python object for data in input_json: for k, v in data.items(): if isinstance(v, list): data[k] = set(v) py_data.append(data) # Serialize previously modified python object data into DynamoDB JSON for data in py_data: dynamo_data = {k: serializer.serialize(v) for k, v in data.items()} json_data.append(dynamo_data) return json_data
def serialize_output(value): try: td = TypeSerializer() for k, v in dict(value).items(): value[k] = td.serialize(v) except BaseException: pass return value
def dynamodb_put_item(ddb_client, table_name: str, item: dict): serializer = TypeSerializer() serialized_item = serializer.serialize(item)['M'] try: ddb_client.put_item(TableName=table_name, Item=serialized_item) except ddb_client.exceptions.ResourceNotFoundException: raise TyphoonResourceNotFoundError( f'Table {table_name} does not exist in DynamoDB')
def generate_items(self, num_items, start_key=0): serializer = TypeSerializer() for i in range(start_key, start_key + num_items): record = { 'int_id': i, 'string_field': self.random_string_generator(), 'boolean_field': True, } yield serializer.serialize(record)
def update_item_from_dict(table_name, key, dictionary, client): """ Update the item identified by `key` in the DynamoDB `table` by adding all of the attributes in the `dictionary`. Args: table_name (str): key (dict): dictionary (dict): client: Returns: dict """ serializer = TypeSerializer() deserializer = TypeDeserializer() # Prepare data by generating an alphanumeric version of the key working_data = {k: [pattern.sub("", k), v] for k, v in dictionary.items()} updates_string = ', '.join( [f'#{v[0]} = :{v[0]}' for v in working_data.values()]) update_expression = f'SET {updates_string}' attribute_names = {f'#{v[0]}': k for k, v in working_data.items()} attribute_values = { f':{v[0]}': serializer.serialize(v[1]) for k, v in working_data.items() } item = client.update_item( TableName=table_name, Key={k: serializer.serialize(v) for k, v in key.items()}, UpdateExpression=update_expression, ExpressionAttributeNames=attribute_names, ExpressionAttributeValues=attribute_values, ReturnValues='ALL_NEW', ) if item: result_data = item.get('Attributes', {}) output_data = {} for k, v in result_data.items(): output_data[k] = deserializer.deserialize(v) return output_data else: return None
def fetchItems(hashes, projections, l18n = 'en'): projections.append("#h") print(projections) serializer = TypeSerializer() keys = [ { 'partition': serializer.serialize(f'{l18n}#{component}#{hash}'), 'sort': serializer.serialize('version#current') } for component in hashes for hash in hashes[component] ] print(keys) result = [] while len(keys) > 0: bulk = keys[:100] keys = keys[100:] # call bulk get response = dynamodb.batch_get_item( ReturnConsumedCapacity = 'TOTAL', RequestItems = { 'mywarmind-table': { 'Keys': bulk, 'ProjectionExpression': ", ".join(projections), 'ExpressionAttributeNames': { '#h': 'hash' } } } ) def deserializeItem(item): deserializer = TypeDeserializer() return {k: deserializer.deserialize(v) for k,v in item.items()} result.extend(list(map(deserializeItem, response['Responses']['mywarmind']))) del response['Responses'] print(json.dumps(response, cls=DecimalEncoder)) return dict((int(item['hash']), item) for item in result)
def dict_to_ddb(item): # type: (Dict[str, Any]) -> Dict[str, Any] # TODO: narrow these types down """Converts a native Python dictionary to a raw DynamoDB item. :param dict item: Native item :returns: DynamoDB item :rtype: dict """ serializer = TypeSerializer() return {key: serializer.serialize(value) for key, value in item.items()}
def _get_dict(self): ''' Parses self.__dict__ and returns only those objects that should be stored in the database. :rtype: dict ''' ts = TypeSerializer() d = {} for a in [k for k in dir(self) if k not in dir(type(self))]: # limited to only instance attributes, not class attributes try: if not inspect.ismethod(getattr(self,a)) and not inspect.isfunction(getattr(self,a)) and not a[0] == '_' and not (hasattr(type(self),a) and isinstance(getattr(type(self),a), property)): if not isinstance(getattr(self,a), Object): # if DDB will choke on the data type, this will throw an error and prevent it from getting added to the dict # however, we convert Objects to foreign-key references before saving, so don't do this if it's one of ours. ts.serialize(getattr(self,a)) d[a] = getattr(self,a) except Exception as e: pass # logger.exception("Exception occured while parsing attr {} of object {}. NOT STORING.".format(str(a), str(self))) return d
def dict_to_ddb(item): # type: (Dict[str, Any]) -> Dict[str, Any] # narrow these types down # https://github.com/aws/aws-dynamodb-encryption-python/issues/66 """Converts a native Python dictionary to a raw DynamoDB item. :param dict item: Native item :returns: DynamoDB item :rtype: dict """ serializer = TypeSerializer() return {key: serializer.serialize(value) for key, value in item.items()}
def as_dynamo_flat_dict(self): """ Flattens out User.as_dict() output into a simple structure without any signature or metadata. Effectively, this outputs something like this: ```{'uuid': '11c8a5c8-0305-4524-8b41-95970baba84c', 'user_id': 'email|c3cbf9f5830f1358e28d6b68a3e4bf15', ...``` `flatten()` is recursive. Note that this form cannot be verified or validated back since it's missing all attributes! Return: dynamodb serialized low level dict of user in a "flattened" form for dynamodb consumption in particular """ user = self._clean_dict() def sanitize(attrs): # Types whose values need no sanitization to serialize. supported_base_types = [type(None), bool, int, float] # Empty strings cannot be sanitized. def is_nonempty_str(s): return isinstance(s, str) and len(s) > 0 def not_empty_str(v): return not isinstance(v, str) or is_nonempty_str(v) if type(attrs) in supported_base_types or is_nonempty_str(attrs): return attrs # We want to remove empty strings from lists and sanitize everything else. if isinstance(attrs, list): cleaned = filter(not_empty_str, attrs) return list(map(sanitize, cleaned)) # We are dealing with a dictionary. cleaned = { key: sanitize(value) for key, value in attrs.items() if not_empty_str(key) and not_empty_str(value) } # If we have a dictionary, we want to ensure it only has one of either # the "value" key or "values" key. has_value = "value" in cleaned has_values = "values" in cleaned if (has_value and not has_values) or (has_values and not has_value): return cleaned.get("value", cleaned.get("values")) return cleaned serializer = TypeSerializer() return {k: serializer.serialize(v) for k, v in sanitize(user).items()}
def build_shopify_url(self): #Get page number: ts = TypeSerializer() pageNumber = int(ts.serialize(self.params['pages'])['N']) query_params = [{ "url": "{}products.json?page={}".format(self.params['url'], p), "country": self.params['country'], "current_datetime": self.current_datetime, "id_shop": self.idTable } for p in range(1, pageNumber + 1)] return query_params
class DdbDeserializer(ff.DomainService): _serializer: TypeSerializer = None _deserializer: TypeDeserializer = None def __init__(self): self._serializer = TypeSerializer() self._deserializer = TypeDeserializer() def serialize(self, data): return self._serializer.serialize(data) def deserialize(self, data: dict): return { k: self._deserializer.deserialize(v) for k, v in data.items() }
def generate_items(self, num_items, start_key=0): serializer = TypeSerializer() for i in range(start_key, start_key + num_items): record = { 'int_id': i, 'decimal_field': decimal.Decimal(str(i) + '.00000000001'), 'string_field': self.random_string_generator(), 'byte_field': b'some_bytes', 'int_list_field': [i, i + 1, i + 2], 'int_set_field': set([i, i + 1, i + 2]), 'map_field': { 'map_entry_1': 'map_value_1', 'map_entry_2': 'map_value_2', 'list_entry': [i, i + 1, i + 2] }, 'list_map': [{ 'a': 1, 'b': 2 }, { 'a': 100, 'b': 200 }], 'string_list': [ self.random_string_generator(), self.random_string_generator(), self.random_string_generator() ], 'boolean_field': True, 'other_boolean_field': False, 'null_field': None } yield serializer.serialize(record)
class DynamoDbClient: """ Has default methods for different types of DynamoDB tables. The current implementation supports only one fixed table during initialization, but you are free to initialize multiple simultaneous dynamo_clients in your Lambda with different configs. Config should have a mapping for the field types and required fields. Config example: .. code-block:: python { 'row_mapper': { 'col_name_1': 'N', # Number 'col_name_2': 'S', # String }, 'required_fields': ['col_name_1'] 'table_name': 'some_table_name', # If a table is not specified, this table will be used. 'hash_key': 'the_hash_key', 'dont_json_loads_results': True # Use this if you don't want to convert json strings into json } """ def __init__(self, config): assert isinstance(config, dict), "Config must be provided during DynamoDbClient initialization" # If this is a test, make sure the table is a test table if os.environ.get('STAGE') == 'test' and 'table_name' in config: assert config['table_name'].startswith('autotest_') or config['table_name'] == 'config', \ f"Bad table name {config['table_name']} in autotest" self.config = config if not str(config.get('table_name')).startswith('autotest_mock_'): self.dynamo_client = boto3.client('dynamodb') else: logger.info(f"Initialized DynamoClient without boto3 client for table {config.get('table_name')}") # storage for table description(s) self._table_descriptions: Optional[Dict[str, Dict]] = {} # initialize table store self._table_capacity = {} self.identify_dynamo_capacity(table_name=self.config['table_name']) self.stats = defaultdict(int) if not hasattr(self, 'row_mapper'): self.row_mapper = self.config.get('row_mapper') self.type_serializer = TypeSerializer() self.type_deserializer = TypeDeserializer() def identify_dynamo_capacity(self, table_name=None): """Identify and store the table capacity for a given table on the object Arguments: table_name {str} -- short name of the dynamo db table to analyze """ # Use the config value if not provided if table_name is None: table_name = self.config['table_name'] logging.debug("Got `table_name` from config: {table_name}") logging.debug(f"DynamoDB table name identified as {table_name}") # Fetch the actual configuration of the dynamodb table directly for table_description = self._describe_table(table_name) # Hash to the capacity table_capacity = table_description["Table"]["ProvisionedThroughput"] self._table_capacity[table_name] = { 'read': int(table_capacity["ReadCapacityUnits"]), 'write': int(table_capacity["WriteCapacityUnits"]), } def _describe_table(self, table_name: Optional[str] = None) -> Dict: """ Returns description of the table from AWS. Response like: https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/dynamodb.html#DynamoDB.Client.describe_table :return: Description of the table """ table_name = self._get_validate_table_name(table_name) if self._table_descriptions and table_name in self._table_descriptions: return self._table_descriptions[table_name] else: table_description = self.dynamo_client.describe_table(TableName=table_name) self._table_descriptions[table_name] = table_description return table_description def get_table_keys(self, table_name: Optional[str] = None) -> Tuple[str, Optional[str]]: """ Returns table's hash key name and range key name :param table_name: :return: hash key and range key names """ table_description = self._describe_table(table_name) key_schema: List[Dict[str, str]] = table_description['Table']['KeySchema'] hash_key = range_key = None for key in key_schema: if key['KeyType'] == 'HASH': hash_key = key['AttributeName'] elif key['KeyType'] == 'RANGE': range_key = key['AttributeName'] return hash_key, range_key def get_table_indexes(self, table_name: Optional[str] = None) -> Dict: """ Returns **active** indexes of the table: their hash key, range key, and projection type. .. code-block:: python { 'index_1_name': { 'projection_type': 'ALL', # One of: 'ALL'|'KEYS_ONLY'|'INCLUDE' 'hash_key': 'the_hash_key_column_name', 'range_key': 'the_range_key_column_name', # Can be None if the index has no range key 'provisioned_throughput': { 'write_capacity': 5, 'read_capacity': 10 } }, 'index_2_name': ... } """ indexes = {} table_description = self._describe_table(table_name) local_secondary_indexes = table_description['Table'].get('LocalSecondaryIndexes', []) global_secondary_indexes = table_description['Table'].get('GlobalSecondaryIndexes', []) for index in local_secondary_indexes + global_secondary_indexes: if index.get('IndexStatus') is not None and index.get('IndexStatus') != 'ACTIVE': # Only global sec. indexes has IndexStatus, and if it's not ready for use, we don't return it continue name = index['IndexName'] projection_type = index['Projection']['ProjectionType'] # 'ALL'|'KEYS_ONLY'|'INCLUDE' key_schema = index['KeySchema'] hash_key = range_key = None for key in key_schema: if key['KeyType'] == 'HASH': hash_key = key['AttributeName'] elif key['KeyType'] == 'RANGE': range_key = key['AttributeName'] # Get write & read capacity. # global sec. indexes have their own capacities, while a local sec. index uses the capacity of the table. write_capacity = index.get('ProvisionedThroughput', {}).get('WriteCapacityUnits') or \ table_description['ProvisionedThroughput']['WriteCapacityUnits'] read_capacity = index.get('ProvisionedThroughput', {}).get('ReadCapacityUnits') or \ table_description['ProvisionedThroughput']['ReadCapacityUnits'] indexes[name] = { 'projection_type': projection_type, 'hash_key': hash_key, 'range_key': range_key, 'provisioned_throughput': { 'write_capacity': write_capacity, 'read_capacity': read_capacity } } return indexes def dynamo_to_dict(self, dynamo_row: Dict, strict: bool = None, fetch_all_fields: Optional[bool] = None) -> Dict: """ Convert the ugly DynamoDB syntax of the row, to regular dictionary. We currently support only String or Numeric values. Latest ones are converted to int or float. Takes settings from row_mapper. e.g.: {'key1': {'N': '3'}, 'key2': {'S': 'value2'}} will convert to: {'key1': 3, 'key2': 'value2'} :param dict dynamo_row: DynamoDB row item :param bool strict: DEPRECATED. :param bool fetch_all_fields: If False only row_mapper fields will be extracted from dynamo_row, else, all fields will be extracted from dynamo_row. :return: The row in a key-value format :rtype: dict """ if strict is not None: logging.warning(f"dynamo_to_dict `strict` variable is deprecated in sosw 0.7.13+. " f"Please replace it's usage with `fetch_all_fields` (and reverse the boolean value)") fetch_all_fields = fetch_all_fields if fetch_all_fields is not None else False if strict is None else not strict result = {} # Get fields from dynamo_row which are present in row mapper if not fetch_all_fields: for key, key_type in self.row_mapper.items(): val_dict = dynamo_row.get(key) # Ex: {'N': "1234"} or {'S': "myvalue"} if val_dict: val = val_dict.get(key_type) # Ex: 1234 or "myvalue" # type_deserializer.deserialize() parses 'N' to `Decimal` type but it cant be parsed to a datetime # so we cast it to either an integer or a float. if key_type == 'N': result[key] = float(val) if '.' in val else int(val) elif key_type == 'M': result[key] = self.dynamo_to_dict(val, fetch_all_fields=True) elif key_type == 'S': # Try to load to a dictionary if looks like JSON. if val.startswith('{') and val.endswith('}') and \ not self.config.get('dont_json_loads_results'): try: result[key] = json.loads(val) except ValueError: logger.warning(f"A JSON-looking string failed to parse: {val}") result[key] = val else: result[key] = val else: result[key] = self.type_deserializer.deserialize(val_dict) # Get all fields from dynamo_row else: for key, val_dict in dynamo_row.items(): for val_type, val in val_dict.items(): # type_deserializer.deserialize() parses 'N' to `Decimal` type but it cant be parsed to a datetime # so we cast it to either an integer or a float. if val_type == 'N': result[key] = float(val) if '.' in val else int(val) elif val_type == 'M': result[key] = self.dynamo_to_dict(val, fetch_all_fields=True) elif val_type == 'S': # Try to load to a dictionary if looks like JSON. if val.startswith('{') and val.endswith('}') and \ not self.config.get('dont_json_loads_results'): try: result[key] = json.loads(val) except ValueError: logger.warning(f"A JSON-looking string failed to parse: {val}") result[key] = val else: result[key] = val else: result[key] = self.type_deserializer.deserialize(val_dict) assert all(True for x in self.config['required_fields'] if result.get(x)), "Some `required_fields` are missing" return result def dict_to_dynamo(self, row_dict, add_prefix=None, strict=True): """ Convert the row from regular dictionary to the ugly DynamoDB syntax. Takes settings from row_mapper. e.g. {'key1': 'value1', 'key2': 'value2'} will convert to: {'key1': {'Type1': 'value1'}, 'key2': {'Type2': 'value2'}} :param dict row_dict: A row we want to convert to dynamo syntax. :param str add_prefix: A string prefix to add to the key in the result dict. Useful for queries like update. :param bool strict: If False, will get the type from the value in the dict (this works for numbers and strings). If True, won't add them if they're not in the required_fields, and if they are, will raise an error. :return: DynamoDB Task item :rtype: dict """ if add_prefix is None: add_prefix = '' result = {} # Keys from row mapper for key, key_type in self.row_mapper.items(): val = row_dict.get(key) if val is not None: key_with_prefix = f"{add_prefix}{key}" if key_type == 'BOOL': result[key_with_prefix] = {'BOOL': to_bool(val)} elif key_type == 'N': result[key_with_prefix] = {'N': str(val)} elif key_type == 'S': result[key_with_prefix] = {'S': str(val)} elif key_type == 'M': result[key_with_prefix] = {'M': self.dict_to_dynamo(val, strict=False)} else: result[key_with_prefix] = self.type_serializer.serialize(val) result_keys = result.keys() if add_prefix: result_keys = [x[len(add_prefix):] for x in result.keys()] # Keys which are not in row mapper for key in list(set(row_dict.keys()) - set(result_keys)): if not strict: val = row_dict.get(key) key_with_prefix = f"{add_prefix}{key}" if isinstance(val, bool): result[key_with_prefix] = {'BOOL': to_bool(val)} elif isinstance(val, (int, float)) or (isinstance(val, str) and val.isnumeric()): result[key_with_prefix] = {'N': str(val)} elif isinstance(val, str): result[key_with_prefix] = {'S': str(val)} elif isinstance(val, dict): result[key_with_prefix] = {'M': self.dict_to_dynamo(val, strict=False)} else: result[key_with_prefix] = self.type_serializer.serialize(val) else: if key not in self.config.get('required_fields', []): logger.warning(f"Field {key} is missing from row_mapper, so we can't convert it to DynamoDB " f"syntax. This is not a required field, so we continue, but please investigate " f"row: {row_dict}") else: raise ValueError(f"Field {key} is missing from row_mapper, so we can't convert it to DynamoDB " f"syntax. This is a required field, so we can not continue. Row: {row_dict}") logger.debug(f"dict_to_dynamo result: {result}") return result def get_by_query(self, keys: Dict, table_name: Optional[str] = None, index_name: Optional[str] = None, comparisons: Optional[Dict] = None, max_items: Optional[int] = None, filter_expression: Optional[str] = None, strict: bool = None, return_count: bool = False, desc: bool = False, fetch_all_fields: bool = None) -> Union[List[Dict], int]: """ Get an item from a table, by some keys. Can specify an index. If an index is not specified, will query the table. IMPORTANT: You must specify the rows you expect to be converted in row mapper in config, otherwise you won't get them in the result. If you want to get items from dynamo by non-key attributes, this method is not for you. :param dict keys: Keys and values of the items we get. You must specify the hash key, and can optionally also add the range key. Example, in a table where the hash key is 'hk' and the range key is 'rk': * {'hk': 'cat', 'rk': '123'} * {'hk': 'cat'} Optional :param str table_name: Name of the dynamo table. If not specified, will use table_name from the config. :param str index_name: Name of the secondary index in the table. If not specified, will query the table itself. :param dict comparisons: Type of comparison for each key. If a key is not mentioned, comparison type will be =. Valid values: `=`, `<`, `<=`, `>`, `>=`, `begins_with`. Comparisons only work for the range key. Example: if keys={'hk': 'cat', 'rk': 100} and comparisons={'rk': '<='} -> will get items where rk <= 100 :param int max_items: Limit the number of items to fetch. :param str filter_expression: Supports regular comparisons and `between`. Input must be a regular human string e.g. 'key <= 42', 'name = marta', 'foo between 10 and 20', etc. :param bool strict: DEPRECATED. :param bool return_count: If True, will return the number of items in the result instead of the items themselves :param bool desc: By default (False) the the values will be sorted ascending by the SortKey. To reverse the order set the argument `desc = True`. :param bool fetch_all_fields: If False, will only get the attributes specified in the row mapper. If True, will get all attributes. Default is False. :return: List of items from the table, each item in key-value format OR the count if `return_count` is True """ if strict is not None: logging.warning(f"get_by_query `strict` variable is deprecated in sosw 0.7.13+. " f"Please replace it's usage with `fetch_all_fields` (and reverse the boolean value)") fetch_all_fields = fetch_all_fields if fetch_all_fields is not None else False if strict is None else not strict table_name = self._get_validate_table_name(table_name) filter_values = self.dict_to_dynamo(keys, add_prefix=':', strict=False) cond_expr_parts = [] for key_attr_name in keys: # Find comparison for key. The formatting of conditions could be different, so a little spaghetti. if key_attr_name.startswith('st_between_'): # This is just a marker to construct a custom expression later compr = 'between' elif key_attr_name.startswith('en_between_'): # This attribute is used in the expression with st_between continue elif comparisons: compr = comparisons.get(key_attr_name) or '=' else: compr = '=' if compr == 'begins_with': cond_expr_parts.append(f"begins_with ({key_attr_name}, :{key_attr_name})") elif compr == 'between': key = key_attr_name[11:] cond_expr_parts.append(f"{key} between :st_between_{key} and :en_between_{key}") else: assert compr in ('=', '<', '<=', '>', '>='), f"Comparison not valid: {compr} for {key_attr_name}" cond_expr_parts.append(f"{key_attr_name} {compr} :{key_attr_name}") cond_expr = " AND ".join(cond_expr_parts) select = ('ALL_ATTRIBUTES' if index_name is None else 'ALL_PROJECTED_ATTRIBUTES') if not return_count else 'COUNT' logger.debug(cond_expr, filter_values) query_args = { 'TableName': table_name, 'Select': select, 'ExpressionAttributeValues': filter_values, # Ex: {':key1_name': 'key1_value', ...} 'KeyConditionExpression': cond_expr # Ex: "key1_name = :key1_name AND ..." } # In case we have a filter expression, we parse it and add variables (values) to the ExpressionAttributeValues # Expression is also transformed to use these variables. if filter_expression: expr, values = self._parse_filter_expression(filter_expression) query_args['FilterExpression'] = expr query_args['ExpressionAttributeValues'].update(values) if index_name: query_args['IndexName'] = index_name if max_items: query_args['PaginationConfig'] = {'MaxItems': max_items} if return_count: raise Exception(f"DynamoDbCLient.get_by_query does not support `max_items` and `return_count` together") if desc: query_args['ScanIndexForward'] = False logger.debug(f"Querying dynamo: {query_args}") paginator = self.dynamo_client.get_paginator('query') response_iterator = paginator.paginate(**query_args) result = [] if return_count: return sum([page['Count'] for page in response_iterator]) for page in response_iterator: result += [self.dynamo_to_dict(x, fetch_all_fields=fetch_all_fields) for x in page['Items']] self.stats['dynamo_get_queries'] += 1 if max_items and len(result) >= max_items: break return result[:max_items] if max_items else result def _parse_filter_expression(self, expression: str) -> Tuple[str, Dict]: """ Converts FilterExpression to Dynamo syntax. We still do not support some operators. Feel free to implement: https://docs.aws.amazon.com/amazondynamodb/latest/developerguide/Expressions.OperatorsAndFunctions.html Supported: regular comparators, between, attribute_[not_]exists :return: Returns a tuple of the transformed expression and extracted variables already Dynamo formatted. """ assert isinstance(expression, str), f"Filter expression must be a string: {expression}" words = [x.strip() for x in expression.split()] # Filter Expression should be 2, 3 or 5 words. See doc for more details. # This must be a function if len(words) == 2: operator, key = words assert operator.lower() in ('attribute_exists', 'attribute_not_exists') result_expr, result_values = f"{operator} ({key})", {} # This must be a regular comparison elif len(words) == 3: key, operator, value = words assert operator in ('=', '<>', '<', '<=', '>', '>='), f"Unsupported operator for filtering: {expression}" # It is important to add prefix to value here to avoid attribute naming conflicts for example # in conditional_update expressions. e.g you update some field only if it's value is matching condition. result_expr = f"{key} {operator} :filter_{key}" result_values = self.dict_to_dynamo({f"filter_{key}": words[-1]}, add_prefix=':', strict=False) # This must be `between` statement. elif len(words) == 5: assert (words[1].lower(), words[3].lower()) == ('between', 'and'), \ f"Unsupported expression for Filtering: {expression}" key = words[0] result_expr = f"{key} between :st_between_{key} and :en_between_{key}" result_values = self.dict_to_dynamo({f"st_between_{key}": words[2], f"en_between_{key}": words[4]}, add_prefix=':', strict=False) else: raise ValueError(f"Unsupported expression for Filtering: {expression}") return result_expr, result_values def get_by_scan(self, attrs=None, table_name=None, strict=None, fetch_all_fields=None): """ Scans a table. Don't use this method if you want to select by keys. It is SLOW compared to get_by_query. Careful - don't make queries of too many items, this could run for a long time. Optional: :param dict attrs: Attribute names and values of the items we get. Can be empty to get the whole table. :param str table_name: Name of the dynamo table. If not specified, will use table_name from the config. :param bool strict: DEPRECATED. :param bool fetch_all_fields: If False, will only get the attributes specified in the row mapper. If True, will get all attributes. Default is False. :return: List of items from the table, each item in key-value format :rtype: list """ if strict is not None: logging.warning(f"get_by_query `strict` variable is deprecated in sosw 0.7.13+. " f"Please replace it's usage with `fetch_all_fields` (and reverse the boolean value)") fetch_all_fields = fetch_all_fields if fetch_all_fields is not None else False if strict is None else not strict response_iterator = self._build_scan_iterator(attrs, table_name) result = [] for page in response_iterator: result += [self.dynamo_to_dict(x, fetch_all_fields=fetch_all_fields) for x in page['Items']] self.stats['dynamo_scan_queries'] += 1 return result def get_by_scan_generator(self, attrs=None, table_name=None, strict=None, fetch_all_fields=None): """ Scans a table. Don't use this method if you want to select by keys. It is SLOW compared to get_by_query. Careful - don't make queries of too many items, this could run for a long time. Same as get_by_scan, but yields parts of the results. Optional: :param dict attrs: Attribute names and values of the items we get. Can be empty to get the whole table. :param str table_name: Name of the dynamo table. If not specified, will use table_name from the config. :param bool strict: DEPRECATED. :param bool fetch_all_fields: If False, will only get the attributes specified in the row mapper. If false, will get all attributes. Default is True. :return: List of items from the table, each item in key-value format :rtype: list """ if strict is not None: logging.warning(f"get_by_query `strict` variable is deprecated in sosw 0.7.13+. " f"Please replace it's usage with `fetch_all_fields` (and reverse the boolean value)") fetch_all_fields = fetch_all_fields if fetch_all_fields is not None else False if strict is None else not strict response_iterator = self._build_scan_iterator(attrs, table_name) for page in response_iterator: self.stats['dynamo_scan_queries'] += 1 yield [self.dynamo_to_dict(x, fetch_all_fields=fetch_all_fields) for x in page['Items']] def _build_scan_iterator(self, attrs=None, table_name=None): table_name = self._get_validate_table_name(table_name) filter_values = None cond_expr = None if attrs: filter_values = self.dict_to_dynamo(attrs, add_prefix=':', strict=False) cond_expr_parts = [] for key_attr_name in attrs: cond_expr_parts.append(f"{key_attr_name} = :{key_attr_name}") cond_expr = " AND ".join(cond_expr_parts) query_args = { 'TableName': table_name, 'Select': 'ALL_ATTRIBUTES', } if cond_expr: query_args['FilterExpression'] = cond_expr if filter_values: query_args['ExpressionAttributeValues'] = filter_values logger.debug(f"Scanning dynamo: {query_args}") paginator = self.dynamo_client.get_paginator('scan') response_iterator = paginator.paginate(**query_args) return response_iterator def batch_get_items_one_table(self, keys_list, table_name=None, max_retries=0, retry_wait_base_time=0.2, strict=None, fetch_all_fields=None): """ Gets a batch of items from a single dynamo table. Only accepts keys, can't query by other columns. :param list keys_list: A list of the keys of the items we want to get. Gets the items that match the given keys. If some key doesn't exist - it just skips it and gets the others. e.g. [{'hash_col': '1, 'range_col': 2}, {'hash_col': 3}] - will get a row where `hash_col` is 1 and `range_col` is 2, and also all rows where `hash_col` is 3. Optional :param str table_name: :param int max_retries: If failed to get some items, retry this many times. Waiting between retries is multiplied by 2 after each retry, so `retries` shouldn't be a big number. Default is 1. :param int retry_wait_base_time: Wait this much time after first retry. Will wait twice longer in each retry. :param bool strict: DEPRECATED. :param bool fetch_all_fields: If False, will only get the attributes specified in the row mapper. If True, will get all attributes. Default is False. :return: List of items from the table :rtype: list """ if strict is not None: logging.warning(f"batch_get_items_one_table `strict` variable is deprecated in sosw 0.7.13+. " f"Please replace it's usage with `fetch_all_fields` (and reverse the boolean value)") fetch_all_fields = fetch_all_fields if fetch_all_fields is not None else False if strict is None else not strict table_name = self._get_validate_table_name(table_name) # Convert given keys to dynamo syntax query_keys = [self.dict_to_dynamo(item) for item in keys_list] # Check if we skipped something - if we did, try again. def get_unprocessed_keys(db_result): return 'UnprocessedKeys' in db_result and db_result['UnprocessedKeys'] \ and table_name in db_result['UnprocessedKeys'] and db_result['UnprocessedKeys'][table_name]['Keys'] all_items = [] for query_keys_chunk in chunks(query_keys, 100): batch_get_item_query = { 'RequestItems': { table_name: { 'Keys': query_keys_chunk } } } logger.debug(f"batch_get_item query: {batch_get_item_query}") latest_result = self.dynamo_client.batch_get_item(**batch_get_item_query) logger.debug(f"latest_result: {latest_result}") unprocessed_keys = get_unprocessed_keys(latest_result) all_items += latest_result['Responses'][table_name] logger.debug(f"batch_get_items_one_table response: {latest_result}") if unprocessed_keys: # Retry several times retry_num = 0 wait_time = retry_wait_base_time while unprocessed_keys and retry_num < max_retries: logger.warning(f"batch_get_item action did NOT finish successfully.") time.sleep(wait_time) batch_get_item_query['RequestItems'][table_name]['Keys'] = unprocessed_keys latest_result = self.dynamo_client.batch_get_item(**batch_get_item_query) logger.debug(f"latest_result: {latest_result}") all_items += latest_result['Responses'][table_name] retry_num += 1 wait_time *= 2 unprocessed_keys = get_unprocessed_keys(latest_result) # After the retries still we have a bad result... then raise Exception if get_unprocessed_keys(latest_result): raise Exception(f"batch_get_items action failed for table {table_name}, keys_list {keys_list}") result = [] for item in all_items: result.append(self.dynamo_to_dict(item, fetch_all_fields=fetch_all_fields)) return result def build_put_query(self, row, table_name=None, overwrite_existing=True): table_name = self._get_validate_table_name(table_name) dynamo_formatted_row = self.dict_to_dynamo(row, strict=False) query = { 'TableName': table_name, 'Item': dynamo_formatted_row } if not overwrite_existing: hash_key = self.config['hash_key'] query['ConditionExpression'] = f"attribute_not_exists({hash_key})" return query def build_delete_query(self, delete_keys: Dict, table_name: str = None): table_name = self._get_validate_table_name(table_name) dynamo_formatted_row = self.dict_to_dynamo(delete_keys, strict=False) query = { 'TableName': table_name, 'Key': dynamo_formatted_row } return query def put(self, row, table_name=None, overwrite_existing=True): """ Adds a row to the database :param dict row: The row to add to the table. key is column name, value is value. :param string table_name: Name of the dynamo table to add the row to. :param bool overwrite_existing: Overwrite the existing row if True, otherwise will raise an exception if exists. """ table_name = self._get_validate_table_name(table_name) put_query = self.build_put_query(row, table_name, overwrite_existing) logger.debug(f"Put to DB: {put_query}") dynamo_response = self.dynamo_client.put_item(**put_query) logger.debug(f"Response from dynamo {dynamo_response}") self.stats['dynamo_put_queries'] += 1 def create(self, row, table_name=None): self.put(row, table_name, overwrite_existing=False) # @benchmark def update(self, keys: Dict, attributes_to_update: Optional[Dict] = None, attributes_to_increment: Optional[Dict] = None, table_name: Optional[str] = None, condition_expression: Optional[str] = None): """ Updates an item in DynamoDB. Will create a new item if doesn't exist. If you want to make sure it exists, use ``patch`` method :param dict keys: Keys and values of the row we update. Example, in a table where the hash key is 'hk' and the range key is 'rk': {'hk': 'cat', 'rk': '123'} :param dict attributes_to_update: Dict of the attributes to be updated. Can contain both existing attributes and new attributes. Will update existing, and create new attributes. Example: {'col_name': 'some_value'} :param dict attributes_to_increment: Attribute names to increment, and the value to increment by. If the attribute doesn't exist, will create it. Example: {'some_counter': '3'} :param str condition_expression: Condition Expression that must be fulfilled on the object to update. :param str table_name: Name of the table """ table_name = self._get_validate_table_name(table_name) if not attributes_to_update and not attributes_to_increment: raise ValueError(f"In dynamodb.update, please specify either attributes_to_update " f"or attributes_to_increment") expression_attributes = {} update_expr_parts = [] attribute_values = {} if attributes_to_update: for col in attributes_to_update: update_expr_parts.append(f"#{col} = :{col}") expression_attributes[f"#{col}"] = col if attributes_to_increment: for col in attributes_to_increment: update_expr_parts.append(f"#{col} = if_not_exists(#{col}, :zero) + :{col}") expression_attributes[f"#{col}"] = col attribute_values.update({'zero': '0'}) keys = self.dict_to_dynamo(keys, strict=False) attribute_values.update((attributes_to_update or {})) attribute_values.update(attributes_to_increment or {}) attribute_values = self.dict_to_dynamo(attribute_values.copy(), add_prefix=":", strict=False) update_expr = "SET " + ", ".join(update_expr_parts) update_item_query = { 'ExpressionAttributeNames': expression_attributes, # Ex. {'#attr_name': 'attr_name', ...} 'ExpressionAttributeValues': attribute_values, # Ex. {':attr_name': 'some_value', ...} 'Key': keys, # Ex. {'key_name': 'key_value', ...} 'TableName': table_name, 'UpdateExpression': update_expr # Ex. "SET #attr_name = :attr_name AND ..." } if condition_expression: expr, values = self._parse_filter_expression(condition_expression) update_item_query['ConditionExpression'] = expr update_item_query['ExpressionAttributeValues'].update(values) logger.debug(f"Updating an item, query: {update_item_query}") response = self.dynamo_client.update_item(**update_item_query) logger.debug(f"Update result: {response}") self.stats['dynamo_update_queries'] += 1 def patch(self, keys: Dict, attributes_to_update: Optional[Dict] = None, attributes_to_increment: Optional[Dict] = None, table_name: Optional[str] = None): """ Updates an item in DynamoDB. Will fail if an item with these keys does not exist. """ hash_key = self.config['hash_key'] condition_expression = f'attribute_exists {hash_key}' self.update(keys, attributes_to_update, attributes_to_increment, table_name, condition_expression) def delete(self, keys: Dict, table_name: Optional[str] = None): """ :param dict keys: Keys and values of the row we delete. :param table_name: """ query = self.build_delete_query(keys, table_name) self.dynamo_client.delete_item(**query) def make_put_transaction_item(self, row, table_name=None): return {'Put': self.build_put_query(row, table_name)} def make_delete_transaction_item(self, row, table_name): return {'Delete': self.build_delete_query(row, table_name)} def transact_write(self, *transactions: Dict): """ Executes many write transaction. Can execute operations on different tables. Will split transactions to chunks - because transact_write_items accepts up to 10 actions. WARNING: If you're expecting a transaction on more than 10 operations - AWS DynamoDB doesn't support it. .. code-block:: python dynamo_db_client = DynamoDbClient(config) t1 = dynamo_db_client.make_put_transaction_item(row, table_name='table1') t2 = dynamo_db_client.make_delete_transaction_item(row, table_name='table2') dynamo_db_client.transact_write(t1, t2) """ supported_actions = ['Put', 'Delete'] for t in transactions: assert isinstance(t, dict), "transaction must be a dictionary" assert len(t) == 1, "one transaction must contain only one operation" action = list(t.keys())[0] assert action in supported_actions, f"Bad action '{action}'. " \ f"Supported actions: {', '.join(supported_actions)}" assert isinstance(t[action], dict), f"transaction[{action}] must be a dictionary. bad type: " \ f"{type(t[action])}" for t_chunk in chunks(transactions, 10): logger.debug(f"Transactions: \n{pprint.pformat(t_chunk)}") response = self.dynamo_client.transact_write_items(TransactItems=t_chunk) self.stats['dynamo_transact_write_operations'] += 1 logger.debug(f"Response from transact_write_items: {response}") def _get_validate_table_name(self, table_name=None): if table_name is None: table_name = self.config.get('table_name') if table_name is None: raise RuntimeError("Failed to dynamo action. no 'table_name' in config and table_name wasn't " "specified in the arguments.") if os.environ.get('STAGE') == 'test': assert table_name.startswith('autotest_') or table_name == 'config', f"Bad table name in test: {table_name}" return table_name def get_stats(self): """ Return statistics of operations performed by current instance of the Class. :return: - dict - key: int statistics. """ return self.stats def get_capacity(self, table_name=None): """Fetches capacity for data tables Keyword Arguments: table_name {str} -- DynamoDB (default: {None}) Returns: dict -- read/write capacity for the table requested """ if table_name is None: logging.debug(self.config) table_name = self.config['table_name'] logging.debug(f"DynamoDB table name identified as {table_name}") if table_name in self._table_capacity.keys(): return self._table_capacity[table_name] else: self.identify_dynamo_capacity(table_name=table_name) return self._table_capacity[table_name] def reset_stats(self): """ Cleans statistics. """ self.stats = defaultdict(int)
class TestSerializer(unittest.TestCase): def setUp(self): self.serializer = TypeSerializer() def test_serialize_unsupported_type(self): with self.assertRaisesRegexp(TypeError, 'Unsupported type'): self.serializer.serialize(object()) def test_serialize_null(self): self.assertEqual(self.serializer.serialize(None), {'NULL': True}) def test_serialize_boolean(self): self.assertEqual(self.serializer.serialize(False), {'BOOL': False}) def test_serialize_integer(self): self.assertEqual(self.serializer.serialize(1), {'N': '1'}) def test_serialize_decimal(self): self.assertEqual( self.serializer.serialize(Decimal('1.25')), {'N': '1.25'}) def test_serialize_float_error(self): with self.assertRaisesRegexp( TypeError, 'Float types are not supported. Use Decimal types instead'): self.serializer.serialize(1.25) def test_serialize_NaN_error(self): with self.assertRaisesRegexp( TypeError, 'Infinity and NaN not supported'): self.serializer.serialize(Decimal('NaN')) def test_serialize_string(self): self.assertEqual(self.serializer.serialize('foo'), {'S': 'foo'}) def test_serialize_binary(self): self.assertEqual(self.serializer.serialize( Binary(b'\x01')), {'B': b'\x01'}) def test_serialize_bytearray(self): self.assertEqual(self.serializer.serialize(bytearray([1])), {'B': b'\x01'}) @unittest.skipIf(six.PY2, 'This is a test when using python3 version of bytes') def test_serialize_bytes(self): self.assertEqual(self.serializer.serialize(b'\x01'), {'B': b'\x01'}) def test_serialize_number_set(self): serialized_value = self.serializer.serialize(set([1, 2, 3])) self.assertEqual(len(serialized_value), 1) self.assertIn('NS', serialized_value) self.assertCountEqual(serialized_value['NS'], ['1', '2', '3']) def test_serialize_string_set(self): serialized_value = self.serializer.serialize(set(['foo', 'bar'])) self.assertEqual(len(serialized_value), 1) self.assertIn('SS', serialized_value) self.assertCountEqual(serialized_value['SS'], ['foo', 'bar']) def test_serialize_binary_set(self): serialized_value = self.serializer.serialize( set([Binary(b'\x01'), Binary(b'\x02')])) self.assertEqual(len(serialized_value), 1) self.assertIn('BS', serialized_value) self.assertCountEqual(serialized_value['BS'], [b'\x01', b'\x02']) def test_serialize_list(self): serialized_value = self.serializer.serialize(['foo', 1, [1]]) self.assertEqual(len(serialized_value), 1) self.assertIn('L', serialized_value) self.assertCountEqual( serialized_value['L'], [{'S': 'foo'}, {'N': '1'}, {'L': [{'N': '1'}]}] ) def test_serialize_map(self): serialized_value = self.serializer.serialize( {'foo': 'bar', 'baz': {'biz': 1}}) self.assertEqual( serialized_value, {'M': {'foo': {'S': 'bar'}, 'baz': {'M': {'biz': {'N': '1'}}}}})
def serialize_input(value): output = {} ty = TypeSerializer() for k, v in value.items(): output[k] = ty.serialize(v) return output
def converterToDynamodbFormat(data): typer = TypeSerializer() dynamodbJsonData = json.dumps(typer.serialize(data)['M']) return replaceObjectName(dynamodbJsonData)
polarity = blob.sentiment[0] subjectivity = blob.sentiment[1] td = { "Id": count, "Username": ef(row['username']), "Following": int(row['following']), "Followers": int(row['followers']), "Totaltweets": int(row['totaltweets']), "Tweetcreatedts": row['tweetcreatedts'], "Query": row['query'], "Text": text, "Retweet": row['retweet'], "Party": row['party'], "Targeted": row['targeted'], "Polarity": polarity, "Subjectivity": subjectivity, } # Convert rows to workable values for DynamoDB t = json.loads(json.dumps(td), parse_float=Decimal) dbb = json.dumps(typer.serialize(t)['M']) dbb = dbb.replace('"M"', '"m"') dbb = dbb.replace('"L"', '"l"') dbb = dbb.replace('"S"', '"s"') dbb = dbb.replace('"N"', '"n"') f.write(dbb) f.write('\n') count += 1
def formatReport(report): seri = TypeSerializer() report = seri.serialize(report) return report
class TestSerializer(unittest.TestCase): def setUp(self): self.serializer = TypeSerializer() def test_serialize_unsupported_type(self): with self.assertRaisesRegexp(TypeError, 'Unsupported type'): self.serializer.serialize(object()) def test_serialize_null(self): self.assertEqual(self.serializer.serialize(None), {'NULL': True}) def test_serialize_boolean(self): self.assertEqual(self.serializer.serialize(False), {'BOOL': False}) def test_serialize_integer(self): self.assertEqual(self.serializer.serialize(1), {'N': '1'}) def test_serialize_decimal(self): self.assertEqual( self.serializer.serialize(Decimal('1.25')), {'N': '1.25'}) def test_serialize_float_error(self): with self.assertRaisesRegexp( TypeError, 'Float types are not supported. Use Decimal types instead'): self.serializer.serialize(1.25) def test_serialize_NaN_error(self): with self.assertRaisesRegexp( TypeError, 'Infinity and NaN not supported'): self.serializer.serialize(Decimal('NaN')) def test_serialize_string(self): self.assertEqual(self.serializer.serialize('foo'), {'S': 'foo'}) def test_serialize_binary(self): self.assertEqual(self.serializer.serialize( Binary(b'\x01')), {'B': b'\x01'}) def test_serialize_bytearray(self): self.assertEqual(self.serializer.serialize(bytearray([1])), {'B': b'\x01'}) @unittest.skipIf(six.PY2, 'This is a test when using python3 version of bytes') def test_serialize_bytes(self): self.assertEqual(self.serializer.serialize(b'\x01'), {'B': b'\x01'}) def test_serialize_number_set(self): serialized_value = self.serializer.serialize(set([1, 2, 3])) self.assertEqual(len(serialized_value), 1) self.assertIn('NS', serialized_value) self.assertCountEqual(serialized_value['NS'], ['1', '2', '3']) def test_serialize_string_set(self): serialized_value = self.serializer.serialize(set(['foo', 'bar'])) self.assertEqual(len(serialized_value), 1) self.assertIn('SS', serialized_value) self.assertCountEqual(serialized_value['SS'], ['foo', 'bar']) def test_serialize_binary_set(self): serialized_value = self.serializer.serialize( set([Binary(b'\x01'), Binary(b'\x02')])) self.assertEqual(len(serialized_value), 1) self.assertIn('BS', serialized_value) self.assertCountEqual(serialized_value['BS'], [b'\x01', b'\x02']) def test_serialize_list(self): serialized_value = self.serializer.serialize(['foo', 1, [1]]) self.assertEqual(len(serialized_value), 1) self.assertIn('L', serialized_value) self.assertCountEqual( serialized_value['L'], [{'S': 'foo'}, {'N': '1'}, {'L': [{'N': '1'}]}] ) def test_serialize_tuple(self): serialized_value = self.serializer.serialize(('foo', 1, (1,))) self.assertEqual(len(serialized_value), 1) self.assertIn('L', serialized_value) self.assertCountEqual( serialized_value['L'], [{'S': 'foo'}, {'N': '1'}, {'L': [{'N': '1'}]}] ) def test_serialize_map(self): serialized_value = self.serializer.serialize( {'foo': 'bar', 'baz': {'biz': 1}}) self.assertEqual( serialized_value, {'M': {'foo': {'S': 'bar'}, 'baz': {'M': {'biz': {'N': '1'}}}}})
class DynamoDBExporter(AbstractExporter): def __init__(self) -> None: super().__init__() if 'DYNAMODB_ENDPOINT_URL' in os.environ: self.dynamodb = boto3.resource( 'dynamodb', endpoint_url=os.environ['DYNAMODB_ENDPOINT_URL']) else: self.dynamodb = boto3.resource('dynamodb') self.table = self.dynamodb.Table(os.environ['DYNAMODB_TABLENAME']) if settings.DYNAMODB_COLLECTION_OVERRIDE: try: self.table.delete() except: # pass in case the table does not exist pass try: self.dynamodb.create_table( TableName=os.environ['DYNAMODB_TABLENAME'], KeySchema=[ { 'AttributeName': 'id', 'KeyType': 'HASH' # Partition key }, ], AttributeDefinitions=[ { 'AttributeName': 'id', 'AttributeType': 'S' }, ], ProvisionedThroughput={ 'ReadCapacityUnits': 10, 'WriteCapacityUnits': 10 }) except: # pass in case the table does not exist pass self.values = rec_dd() self.serializer = TypeSerializer() self.deserializer = TypeDeserializer() def bulk_insert(self): to_insert = [] for k, item in self.values.items(): record = item record['id'] = k to_insert.append(record) with self.table.batch_writer() as batch: for r in to_insert: batch.put_item(r) def online_upsert(self, keys, item, x): resp = self.table.get_item(Key={'id': keys[0]}) if 'Item' in resp: # object is already present nested_obj = nested_get(item, keys[1:]) if nested_obj: nested_obj = self.deserializer.deserialize(nested_obj) new_obj = x.__class__(nested_obj) + x else: new_obj = x keys_dot_notation = '.'.join( ['#key' + str(i) for i in range(len(keys) - 1)]) response = self.table.update_item( Key={'id': keys[0]}, UpdateExpression="set " + keys_dot_notation + ' =:r', ExpressionAttributeNames={ k: key for k, key in zip(keys_dot_notation.split('.'), keys[1:]) }, ExpressionAttributeValues={':r': new_obj.to_dict()}, ReturnValues="UPDATED_NEW") print(response) else: # insert the new object item = {**item, 'id': keys[0]} self.table.put_item(Item=item) logger.debug(f'Insert: {dict(item)}') def __call__(self, item, *args: Any, **kwds: Any) -> Any: keys, x, value = item # only bulk insert is supported for now d = self.values[keys[0]] for _key in keys[1:-1]: d = d[_key] new_x = {} for k, v in x.to_dict().items(): if isinstance(v, float): v = Decimal(v) new_x[k] = v d[keys[-1]] = self.serializer.serialize(new_x) if settings.DYNAMO_DB_ONLINE: # recast it to the acc point new_x = x.__class__(new_x) self.online_upsert(keys, self.values[keys[0]], new_x)
class TestSerializer(unittest.TestCase): def setUp(self): self.serializer = TypeSerializer() def test_serialize_unsupported_type(self): with pytest.raises(TypeError, match=r'Unsupported type'): self.serializer.serialize(object()) def test_serialize_null(self): assert self.serializer.serialize(None) == {'NULL': True} def test_serialize_boolean(self): assert self.serializer.serialize(False) == {'BOOL': False} def test_serialize_integer(self): assert self.serializer.serialize(1) == {'N': '1'} def test_serialize_decimal(self): assert self.serializer.serialize(Decimal('1.25')) == {'N': '1.25'} def test_serialize_float_error(self): error_msg = r'Float types are not supported. Use Decimal types instead' with pytest.raises(TypeError, match=error_msg): self.serializer.serialize(1.25) def test_serialize_NaN_error(self): with pytest.raises(TypeError, match=r'Infinity and NaN not supported'): self.serializer.serialize(Decimal('NaN')) def test_serialize_string(self): assert self.serializer.serialize('foo') == {'S': 'foo'} def test_serialize_binary(self): assert self.serializer.serialize(Binary(b'\x01')) == {'B': b'\x01'} def test_serialize_bytearray(self): assert self.serializer.serialize(bytearray([1])) == {'B': b'\x01'} def test_serialize_bytes(self): assert self.serializer.serialize(b'\x01') == {'B': b'\x01'} def test_serialize_number_set(self): serialized_value = self.serializer.serialize({1, 2, 3}) assert len(serialized_value) == 1 assert 'NS' in serialized_value self.assertCountEqual(serialized_value['NS'], ['1', '2', '3']) def test_serialize_string_set(self): serialized_value = self.serializer.serialize({'foo', 'bar'}) assert len(serialized_value) == 1 assert 'SS' in serialized_value self.assertCountEqual(serialized_value['SS'], ['foo', 'bar']) def test_serialize_binary_set(self): serialized_value = self.serializer.serialize( {Binary(b'\x01'), Binary(b'\x02')}) assert len(serialized_value) == 1 assert 'BS' in serialized_value self.assertCountEqual(serialized_value['BS'], [b'\x01', b'\x02']) def test_serialize_list(self): serialized_value = self.serializer.serialize(['foo', 1, [1]]) assert len(serialized_value) == 1 assert 'L' in serialized_value self.assertCountEqual( serialized_value['L'], [{ 'S': 'foo' }, { 'N': '1' }, { 'L': [{ 'N': '1' }] }], ) def test_serialize_tuple(self): serialized_value = self.serializer.serialize(('foo', 1, (1, ))) self.assertEqual(len(serialized_value), 1) self.assertIn('L', serialized_value) self.assertCountEqual( serialized_value['L'], [{ 'S': 'foo' }, { 'N': '1' }, { 'L': [{ 'N': '1' }] }], ) def test_serialize_map(self): serialized_value = self.serializer.serialize({ 'foo': 'bar', 'baz': { 'biz': 1 } }) assert serialized_value == { 'M': { 'foo': { 'S': 'bar' }, 'baz': { 'M': { 'biz': { 'N': '1' } } } } }
def serialize(a_dict): serializer = TypeSerializer() return {k: serializer.serialize(v) for k, v in a_dict.items()}