Ejemplo n.º 1
0
def db_log_step(userID, recipe, step):
    """Log the most recent step that a user has been given
    """
    dynamo = db_connect()
    ts = TypeSerializer()

    put_resp = dynamo.put_item(TableName=os.environ['STEP_HISTORY_TABLE'],
                               Item={
                                   'userID': {
                                       'S': userID
                                   },
                                   'time': {
                                       'N': str(time.time())
                                   },
                                   'step': ts.serialize(step),
                                   'recipe': ts.serialize(recipe)
                               })

    upd_resp = dynamo.update_item(TableName=os.environ['STEP_LAST_TABLE'],
                                  Key={'userID': {
                                      'S': userID
                                  }},
                                  AttributeUpdates={
                                      'step': {
                                          'Action': 'PUT',
                                          'Value': ts.serialize(step)
                                      },
                                      'recipe': {
                                          'Action': 'PUT',
                                          'Value': ts.serialize(recipe)
                                      }
                                  })

    return (put_resp, upd_resp)
Ejemplo n.º 2
0
    def updateData(self, numRows, start_key, field_key, field_value):
        client = self.dynamodb_client()
        deserializer = TypeDeserializer()
        serializer = TypeSerializer()

        table_configs = self.expected_table_config()

        for table in table_configs:
            LOGGER.info('Updating %s Items by setting field with key %s to the value %s, with start_key %s, for table %s', numRows, field_key, field_value, start_key, table['TableName'])
            for item in table['generator'](numRows, start_key):
                record = deserializer.deserialize(item)
                hashKey = table['HashKey']
                key = {
                    hashKey: serializer.serialize(record[hashKey])
                }
                serializedFieldValue = serializer.serialize(field_value)
                # https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/dynamodb.html#DynamoDB.Client.update_item
                client.update_item(
                    TableName=table['TableName'],
                    Key=key,
                    UpdateExpression='set {}=:v'.format(field_key),
                    ExpressionAttributeValues={
                        ':v': serializedFieldValue,
                    },
                )
def lambda_handler(event, context):
    accountId = str(event['accountId'])
    accountName = str(event['description'])
    accountRole = "customer"
    confidentialKMSKey = 'alias/TSI_Base_ConfidentialS3Key'
    internalKMSKey = 'alias/TSI_Base_InternalS3Key'
    if 'customermasteraccountid' in event:
        masteraccountId = event['customermasteraccountid']
    else:
        masteraccountId = os.environ['accountid']
    readonlyRole = 'TSI_Base_ReadOnlySwitchRole'
    securityEmail = str(event['email'])
    accountemail = str(event['accountemail'])
    enabledregions = event['enabledregions'].split(',')
    supportenabled = 'false'
    awsconfigenabled = 'false'
    if 'config' in event:
        config = event['config']
    else:
        config = "disabled"
    if 'support' in event:
        support = event['support']
    else:
        config = "disabled"
    ouname = event['ouname']
    terraformVersion = '1.0'
    writeRole = 'TSI_Base_FullAccess'
    featureLevel = 'full'
    dynamoentry = {
        'accountId': accountId,
        'accountName': accountName,
        'config': config,
        'support': support,
        'ouname': ouname,
        'accountRole': accountRole,
        'confidentialKMSKey': confidentialKMSKey,
        'internalKMSKey': internalKMSKey,
        'masteraccountId': masteraccountId,
        'readonlyRole': readonlyRole,
        'securityEmail': securityEmail,
        'accountemail': accountemail,
        'enabledregions': enabledregions,
        'supportenabled': supportenabled,
        'awsconfigenabled': awsconfigenabled,
        'terraformVersion': terraformVersion,
        'writeRole': writeRole,
        'featureLevel': featureLevel
    }
    serializer = TypeSerializer()
    print(json.dumps(serializer.serialize(dynamoentry)['M']))
    dynamoclient = boto3.client('dynamodb')
    return (dynamoclient.put_item(TableName='accounts',
                                  Item=serializer.serialize(dynamoentry)['M']))
Ejemplo n.º 4
0
    def _save_dice_pools(self):
        if self.pools is None:
            raise Exception("Tried to save dice pools before loading them.")

        ser = TypeSerializer()
        item = {
            'game': ser.serialize('Shadowrun'),
            'timestamp': ser.serialize(Decimal(time.time())),
            'pools': ser.serialize(self.pools)
        }
        logger.debug("Item before put_item()ing: {}".format(item))
        self._client.put_item(TableName='DicePools', Item=item)
Ejemplo n.º 5
0
def boto3_serializer(python_dict):
    serializer = TypeSerializer()
    return {
        k: serializer.serialize(
            v if not isinstance(v, float) else Decimal(str(v)))
        for k, v in python_dict.items()
    }
Ejemplo n.º 6
0
    def getSpecificProductInformation(self, keyValue):

        print("In getSpecificProductInformation() method !")

        try:

            table = self.dynamodb.Table('Product')
            resp = table.get_item(Key=keyValue)

            # resp returns following value.
            print(resp)
            # {'Item': {'Id': Decimal('1'), 'ProductCategoryId': Decimal('1'), 'Name': 'IPhone 8\n'}, 'ResponseMetadata': {'RequestId': 'NMDA30M1HHF514MJVVUIBCELEFVV4KQNSO5AEMVJF66Q9ASUAAJG', 'HTTPStatusCode': 200, 'HTTPHeaders': {'server': 'Server', 'date': 'Mon, 05 Aug 2019 17:12:19 GMT', 'content-type': 'application/x-amz-json-1.0', 'content-length': '81', 'connection': 'keep-alive', 'x-amzn-requestid': 'NMDA30M1HHF514MJVVUIBCELEFVV4KQNSO5AEMVJF66Q9ASUAAJG', 'x-amz-crc32': '3394591035'}, 'RetryAttempts': 0}}

            if resp and resp["ResponseMetadata"]["HTTPStatusCode"] == 200:

                deserializer = TypeDeserializer()
                serializer = TypeSerializer()

                if resp["Item"]:
                    print(resp["Item"])
                    # {'Id': Decimal('1'), 'ProductCategoryId': Decimal('1'), 'Name': 'IPhone 8\n'}
                    data = {k: serializer.serialize(v) for k, v in resp["Item"].items()}

                else:
                    data = {}

                return data
            else:
                raise Exception

        except Exception as error:
            print("Not able to fetch item !")
            raise error
Ejemplo n.º 7
0
def construct_dynamo_type_dict(d: dict):
    """
    DynamoDB transactions need a different way of specifying transaction.
    The structure has to be recursively implemented as:
                    'string': {
                        'S': 'string',
                        'N': 'string',
                        'B': b'bytes',
                        'SS': [
                            'string',
                        ],
                        'NS': [
                            'string',
                        ],
                        'BS': [
                            b'bytes',
                        ],
                        'M': {
                            'string': {'... recursive ...'}
                        },
                        'L': [
                            {'... recursive ...'},
                        ],
                        'NULL': True|False,
                        'BOOL': True|False
                    }
    https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/dynamodb.html#DynamoDB.Client.transact_write_items
    TypeSerializer and TypeDeserializer needs to be used to convert to appropriate representations for DynamoDB.
    serialize(d)['M'] has been used in line with the documentation.
    """
    serializer = TypeSerializer()
    return serializer.serialize(d)['M']
 def generate_items(self, num_items):
     serializer = TypeSerializer()
     for i in range(num_items):
         record = {
             'int_id':
             int(i / 10.0),
             'decimal_field':
             decimal.Decimal(str(i) + '.00000000001'),
             'string_field':
             str(i),
             'byte_field':
             b'some_bytes',
             'int_list_field': [i, i + 1, i + 2],
             'int_set_field':
             set([i, i + 1, i + 2]),
             'map_field': {
                 'map_entry_1': 'map_value_1',
                 'map_entry_2': 'map_value_2'
             },
             'string_list': [
                 self.random_string_generator(),
                 self.random_string_generator(),
                 self.random_string_generator()
             ],
             'boolean_field':
             True,
             'other_boolean_field':
             False,
             'null_field':
             None
         }
         yield serializer.serialize(record)
Ejemplo n.º 9
0
    def as_dynamo_flat_dict(self):
        """
        Flattens out User.as_dict() output into a simple structure without any signature or metadata.
        Effectively, this outputs something like this:
        ```{'uuid': '11c8a5c8-0305-4524-8b41-95970baba84c', 'user_id': 'email|c3cbf9f5830f1358e28d6b68a3e4bf15', ...```
        `flatten()` is recursive.
        Note that this form cannot be verified or validated back since it's missing all attributes!

        Return: dynamodb serialized low level dict of user in a "flattened" form for dynamodb consumption in particular
        """
        user = self._clean_dict()

        def flatten(attrs, field=None):
            flat = {}
            for f in attrs:
                # Skip "schema"
                if isinstance(attrs[f], str):
                    continue
                if not set(["value", "values"]).isdisjoint(set(attrs[f])):
                    res = attrs[f].get("value", attrs[f].get("values"))
                    if res is not None and res != "":
                        flat[f] = res
                else:
                    flat[f] = flatten(attrs[f])

            return flat

        serializer = TypeSerializer()
        return {k: serializer.serialize(v) for k, v in flatten(user).items()}
Ejemplo n.º 10
0
def convert_json_to_dynamo_json(input_json: list) -> list:
    """
    Re-Serializes the data into DynamoDB compatible JSON that can be used to put items into the dynamo table
    """
    # HACK: When serializing the JSON without dynamodb attribute types included, it wants to convert
    # The DynamoDB 'String Set' objects to DynamoDB List objects because python loads the data as lists and not sets.
    # I am choosing to go with DynamoDB attribute string sets, because I do not want duplicate entries for periods,
    # and it is easier to parse visually. The only drawback I have seen so far is that sets are unordered,
    # but since we are not evaluating the period string set responses in any particular order that should not matter.
    # Can we change this to use cls instead of for loops?

    serializer = TypeSerializer()
    py_data, json_data = [], []

    logger.info(f"Converting JSON config to DynamoDB compatible JSON.")

    # Loop through JSON file data looking for Python object type list
    # Convert the list object into a set of strings
    # Store new data types as python object
    for data in input_json:
        for k, v in data.items():
            if isinstance(v, list):
                data[k] = set(v)
        py_data.append(data)

    # Serialize previously modified python object data into DynamoDB JSON
    for data in py_data:
        dynamo_data = {k: serializer.serialize(v) for k, v in data.items()}
        json_data.append(dynamo_data)

    return json_data
Ejemplo n.º 11
0
 def serialize_output(value):
     try:
         td = TypeSerializer()
         for k, v in dict(value).items():
             value[k] = td.serialize(v)
     except BaseException:
         pass
     return value
def dynamodb_put_item(ddb_client, table_name: str, item: dict):
    serializer = TypeSerializer()
    serialized_item = serializer.serialize(item)['M']
    try:
        ddb_client.put_item(TableName=table_name, Item=serialized_item)
    except ddb_client.exceptions.ResourceNotFoundException:
        raise TyphoonResourceNotFoundError(
            f'Table {table_name} does not exist in DynamoDB')
Ejemplo n.º 13
0
 def generate_items(self, num_items, start_key=0):
     serializer = TypeSerializer()
     for i in range(start_key, start_key + num_items):
         record = {
             'int_id': i,
             'string_field': self.random_string_generator(),
             'boolean_field': True,
         }
         yield serializer.serialize(record)
Ejemplo n.º 14
0
def update_item_from_dict(table_name, key, dictionary, client):
    """
    Update the item identified by `key` in the DynamoDB `table` by adding
    all of the attributes in the `dictionary`.
    Args:
        table_name (str):
        key (dict):
        dictionary (dict):
        client:

    Returns:
        dict
    """
    serializer = TypeSerializer()
    deserializer = TypeDeserializer()

    # Prepare data by generating an alphanumeric version of the key
    working_data = {k: [pattern.sub("", k), v] for k, v in dictionary.items()}

    updates_string = ', '.join(
        [f'#{v[0]} = :{v[0]}' for v in working_data.values()])
    update_expression = f'SET {updates_string}'
    attribute_names = {f'#{v[0]}': k for k, v in working_data.items()}
    attribute_values = {
        f':{v[0]}': serializer.serialize(v[1])
        for k, v in working_data.items()
    }
    item = client.update_item(
        TableName=table_name,
        Key={k: serializer.serialize(v)
             for k, v in key.items()},
        UpdateExpression=update_expression,
        ExpressionAttributeNames=attribute_names,
        ExpressionAttributeValues=attribute_values,
        ReturnValues='ALL_NEW',
    )
    if item:
        result_data = item.get('Attributes', {})
        output_data = {}
        for k, v in result_data.items():
            output_data[k] = deserializer.deserialize(v)
        return output_data
    else:
        return None
Ejemplo n.º 15
0
    def fetchItems(hashes, projections, l18n = 'en'):
        projections.append("#h")
        print(projections)

        serializer = TypeSerializer()
        keys = [
            {
                'partition': serializer.serialize(f'{l18n}#{component}#{hash}'),
                'sort': serializer.serialize('version#current')
            }
            for component in hashes
            for hash in hashes[component]
        ]
        print(keys)

        result = []

        while len(keys) > 0:
            bulk = keys[:100]
            keys = keys[100:]

            # call bulk get
            response = dynamodb.batch_get_item(
                ReturnConsumedCapacity = 'TOTAL',
                RequestItems = {
                    'mywarmind-table': {
                        'Keys': bulk,
                        'ProjectionExpression': ", ".join(projections),
                        'ExpressionAttributeNames': {
                            '#h': 'hash'
                        }
                    }
                }
            )

            def deserializeItem(item):
                deserializer = TypeDeserializer()
                return {k: deserializer.deserialize(v) for k,v in item.items()}

            result.extend(list(map(deserializeItem, response['Responses']['mywarmind'])))
            del response['Responses']
            print(json.dumps(response, cls=DecimalEncoder))

        return dict((int(item['hash']), item) for item in result)
Ejemplo n.º 16
0
def dict_to_ddb(item):
    # type: (Dict[str, Any]) -> Dict[str, Any]
    # TODO: narrow these types down
    """Converts a native Python dictionary to a raw DynamoDB item.

    :param dict item: Native item
    :returns: DynamoDB item
    :rtype: dict
    """
    serializer = TypeSerializer()
    return {key: serializer.serialize(value) for key, value in item.items()}
Ejemplo n.º 17
0
    def _get_dict(self):
        '''
        Parses self.__dict__ and returns only those objects that should be stored in the database.

        :rtype: dict
        '''
        ts = TypeSerializer()
        d = {}
        for a in [k for k in dir(self) if k not in dir(type(self))]:
            # limited to only instance attributes, not class attributes
            try:
                if not inspect.ismethod(getattr(self,a)) and not inspect.isfunction(getattr(self,a)) and not a[0] == '_' and not (hasattr(type(self),a) and isinstance(getattr(type(self),a), property)):
                    if not isinstance(getattr(self,a), Object):
                        # if DDB will choke on the data type, this will throw an error and prevent it from getting added to the dict
                        # however, we convert Objects to foreign-key references before saving, so don't do this if it's one of ours.
                        ts.serialize(getattr(self,a))
                    d[a] = getattr(self,a)
            except Exception as e:
                pass
#                 logger.exception("Exception occured while parsing attr {} of object {}.  NOT STORING.".format(str(a), str(self)))
        return d
Ejemplo n.º 18
0
def dict_to_ddb(item):
    # type: (Dict[str, Any]) -> Dict[str, Any]
    # narrow these types down
    # https://github.com/aws/aws-dynamodb-encryption-python/issues/66
    """Converts a native Python dictionary to a raw DynamoDB item.

    :param dict item: Native item
    :returns: DynamoDB item
    :rtype: dict
    """
    serializer = TypeSerializer()
    return {key: serializer.serialize(value) for key, value in item.items()}
Ejemplo n.º 19
0
    def as_dynamo_flat_dict(self):
        """
        Flattens out User.as_dict() output into a simple structure without any signature or metadata.
        Effectively, this outputs something like this:
        ```{'uuid': '11c8a5c8-0305-4524-8b41-95970baba84c', 'user_id': 'email|c3cbf9f5830f1358e28d6b68a3e4bf15', ...```
        `flatten()` is recursive.
        Note that this form cannot be verified or validated back since it's missing all attributes!

        Return: dynamodb serialized low level dict of user in a "flattened" form for dynamodb consumption in particular
        """
        user = self._clean_dict()

        def sanitize(attrs):
            # Types whose values need no sanitization to serialize.
            supported_base_types = [type(None), bool, int, float]

            # Empty strings cannot be sanitized.
            def is_nonempty_str(s):
                return isinstance(s, str) and len(s) > 0

            def not_empty_str(v):
                return not isinstance(v, str) or is_nonempty_str(v)

            if type(attrs) in supported_base_types or is_nonempty_str(attrs):
                return attrs

            # We want to remove empty strings from lists and sanitize everything else.
            if isinstance(attrs, list):
                cleaned = filter(not_empty_str, attrs)

                return list(map(sanitize, cleaned))

            # We are dealing with a dictionary.
            cleaned = {
                key: sanitize(value)
                for key, value in attrs.items()
                if not_empty_str(key) and not_empty_str(value)
            }

            # If we have a dictionary, we want to ensure it only has one of either
            # the "value" key or "values" key.
            has_value = "value" in cleaned
            has_values = "values" in cleaned

            if (has_value and not has_values) or (has_values
                                                  and not has_value):
                return cleaned.get("value", cleaned.get("values"))

            return cleaned

        serializer = TypeSerializer()
        return {k: serializer.serialize(v) for k, v in sanitize(user).items()}
Ejemplo n.º 20
0
 def build_shopify_url(self):
     #Get page number:
     ts = TypeSerializer()
     pageNumber = int(ts.serialize(self.params['pages'])['N'])
     query_params = [{
         "url":
         "{}products.json?page={}".format(self.params['url'], p),
         "country":
         self.params['country'],
         "current_datetime":
         self.current_datetime,
         "id_shop":
         self.idTable
     } for p in range(1, pageNumber + 1)]
     return query_params
Ejemplo n.º 21
0
class DdbDeserializer(ff.DomainService):
    _serializer: TypeSerializer = None
    _deserializer: TypeDeserializer = None

    def __init__(self):
        self._serializer = TypeSerializer()
        self._deserializer = TypeDeserializer()

    def serialize(self, data):
        return self._serializer.serialize(data)

    def deserialize(self, data: dict):
        return {
            k: self._deserializer.deserialize(v)
            for k, v in data.items()
        }
Ejemplo n.º 22
0
 def generate_items(self, num_items, start_key=0):
     serializer = TypeSerializer()
     for i in range(start_key, start_key + num_items):
         record = {
             'int_id':
             i,
             'decimal_field':
             decimal.Decimal(str(i) + '.00000000001'),
             'string_field':
             self.random_string_generator(),
             'byte_field':
             b'some_bytes',
             'int_list_field': [i, i + 1, i + 2],
             'int_set_field':
             set([i, i + 1, i + 2]),
             'map_field': {
                 'map_entry_1': 'map_value_1',
                 'map_entry_2': 'map_value_2',
                 'list_entry': [i, i + 1, i + 2]
             },
             'list_map': [{
                 'a': 1,
                 'b': 2
             }, {
                 'a': 100,
                 'b': 200
             }],
             'string_list': [
                 self.random_string_generator(),
                 self.random_string_generator(),
                 self.random_string_generator()
             ],
             'boolean_field':
             True,
             'other_boolean_field':
             False,
             'null_field':
             None
         }
         yield serializer.serialize(record)
Ejemplo n.º 23
0
class DynamoDbClient:
    """
    Has default methods for different types of DynamoDB tables.

    The current implementation supports only one fixed table during initialization,
    but you are free to initialize multiple simultaneous dynamo_clients in your Lambda with different configs.

    Config should have a mapping for the field types and required fields.
    Config example:

    .. code-block:: python

        {
            'row_mapper':     {
                'col_name_1':      'N', # Number
                'col_name_2':      'S', # String
            },
            'required_fields': ['col_name_1']
            'table_name': 'some_table_name',  # If a table is not specified, this table will be used.
            'hash_key': 'the_hash_key',
            'dont_json_loads_results': True  # Use this if you don't want to convert json strings into json
        }

    """
    def __init__(self, config):
        assert isinstance(config, dict), "Config must be provided during DynamoDbClient initialization"

        # If this is a test, make sure the table is a test table
        if os.environ.get('STAGE') == 'test' and 'table_name' in config:
            assert config['table_name'].startswith('autotest_') or config['table_name'] == 'config', \
                f"Bad table name {config['table_name']} in autotest"

        self.config = config

        if not str(config.get('table_name')).startswith('autotest_mock_'):
            self.dynamo_client = boto3.client('dynamodb')
        else:
            logger.info(f"Initialized DynamoClient without boto3 client for table {config.get('table_name')}")

        # storage for table description(s)
        self._table_descriptions: Optional[Dict[str, Dict]] = {}

        # initialize table store
        self._table_capacity = {}
        self.identify_dynamo_capacity(table_name=self.config['table_name'])

        self.stats = defaultdict(int)
        if not hasattr(self, 'row_mapper'):
            self.row_mapper = self.config.get('row_mapper')

        self.type_serializer = TypeSerializer()
        self.type_deserializer = TypeDeserializer()


    def identify_dynamo_capacity(self, table_name=None):
        """Identify and store the table capacity for a given table on the object

        Arguments:
            table_name {str} -- short name of the dynamo db table to analyze
        """
        # Use the config value if not provided
        if table_name is None:
            table_name = self.config['table_name']
            logging.debug("Got `table_name` from config: {table_name}")

        logging.debug(f"DynamoDB table name identified as {table_name}")

        # Fetch the actual configuration of the dynamodb table directly for
        table_description = self._describe_table(table_name)
        # Hash to the capacity
        table_capacity = table_description["Table"]["ProvisionedThroughput"]

        self._table_capacity[table_name] = {
            'read': int(table_capacity["ReadCapacityUnits"]),
            'write': int(table_capacity["WriteCapacityUnits"]),
        }


    def _describe_table(self, table_name: Optional[str] = None) -> Dict:
        """
        Returns description of the table from AWS. Response like:
        https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/dynamodb.html#DynamoDB.Client.describe_table

        :return: Description of the table
        """

        table_name = self._get_validate_table_name(table_name)

        if self._table_descriptions and table_name in self._table_descriptions:
            return self._table_descriptions[table_name]
        else:
            table_description = self.dynamo_client.describe_table(TableName=table_name)
            self._table_descriptions[table_name] = table_description
            return table_description


    def get_table_keys(self, table_name: Optional[str] = None) -> Tuple[str, Optional[str]]:
        """
        Returns table's hash key name and range key name

        :param table_name:
        :return: hash key and range key names
        """

        table_description = self._describe_table(table_name)
        key_schema: List[Dict[str, str]] = table_description['Table']['KeySchema']
        hash_key = range_key = None

        for key in key_schema:
            if key['KeyType'] == 'HASH':
                hash_key = key['AttributeName']
            elif key['KeyType'] == 'RANGE':
                range_key = key['AttributeName']

        return hash_key, range_key


    def get_table_indexes(self, table_name: Optional[str] = None) -> Dict:
        """
        Returns **active** indexes of the table: their hash key, range key, and projection type.

        .. code-block:: python

           {
               'index_1_name': {
                   'projection_type': 'ALL',  # One of: 'ALL'|'KEYS_ONLY'|'INCLUDE'
                   'hash_key': 'the_hash_key_column_name',
                   'range_key': 'the_range_key_column_name',  # Can be None if the index has no range key
                   'provisioned_throughput': {
                       'write_capacity': 5,
                       'read_capacity': 10
                   }
               },
               'index_2_name': ...
           }

        """

        indexes = {}

        table_description = self._describe_table(table_name)
        local_secondary_indexes = table_description['Table'].get('LocalSecondaryIndexes', [])
        global_secondary_indexes = table_description['Table'].get('GlobalSecondaryIndexes', [])

        for index in local_secondary_indexes + global_secondary_indexes:

            if index.get('IndexStatus') is not None and index.get('IndexStatus') != 'ACTIVE':
                # Only global sec. indexes has IndexStatus, and if it's not ready for use, we don't return it
                continue

            name = index['IndexName']
            projection_type = index['Projection']['ProjectionType']  # 'ALL'|'KEYS_ONLY'|'INCLUDE'

            key_schema = index['KeySchema']
            hash_key = range_key = None

            for key in key_schema:
                if key['KeyType'] == 'HASH':
                    hash_key = key['AttributeName']
                elif key['KeyType'] == 'RANGE':
                    range_key = key['AttributeName']

            # Get write & read capacity.
            # global sec. indexes have their own capacities, while a local sec. index uses the capacity of the table.
            write_capacity = index.get('ProvisionedThroughput', {}).get('WriteCapacityUnits') or \
                             table_description['ProvisionedThroughput']['WriteCapacityUnits']
            read_capacity = index.get('ProvisionedThroughput', {}).get('ReadCapacityUnits') or \
                            table_description['ProvisionedThroughput']['ReadCapacityUnits']

            indexes[name] = {
                'projection_type': projection_type,
                'hash_key': hash_key,
                'range_key': range_key,
                'provisioned_throughput': {
                    'write_capacity': write_capacity,
                    'read_capacity': read_capacity
                }
            }

        return indexes


    def dynamo_to_dict(self, dynamo_row: Dict, strict: bool = None, fetch_all_fields: Optional[bool] = None) -> Dict:
        """
        Convert the ugly DynamoDB syntax of the row, to regular dictionary.
        We currently support only String or Numeric values. Latest ones are converted to int or float.
        Takes settings from row_mapper.

        e.g.:               {'key1': {'N': '3'}, 'key2': {'S': 'value2'}}
        will convert to:    {'key1': 3, 'key2': 'value2'}

        :param dict dynamo_row:       DynamoDB row item
        :param bool strict:           DEPRECATED.
        :param bool fetch_all_fields: If False only row_mapper fields will be extracted from dynamo_row, else, all
                                      fields will be extracted from dynamo_row.
        :return: The row in a key-value format
        :rtype: dict
        """

        if strict is not None:
            logging.warning(f"dynamo_to_dict `strict` variable is deprecated in sosw 0.7.13+. "
                            f"Please replace it's usage with `fetch_all_fields` (and reverse the boolean value)")
        fetch_all_fields = fetch_all_fields if fetch_all_fields is not None else False if strict is None else not strict
        result = {}

        # Get fields from dynamo_row which are present in row mapper
        if not fetch_all_fields:
            for key, key_type in self.row_mapper.items():
                val_dict = dynamo_row.get(key)  # Ex: {'N': "1234"} or {'S': "myvalue"}
                if val_dict:
                    val = val_dict.get(key_type)  # Ex: 1234 or "myvalue"

                    # type_deserializer.deserialize() parses 'N' to `Decimal` type but it cant be parsed to a datetime
                    # so we cast it to either an integer or a float.
                    if key_type == 'N':
                        result[key] = float(val) if '.' in val else int(val)
                    elif key_type == 'M':
                        result[key] = self.dynamo_to_dict(val, fetch_all_fields=True)
                    elif key_type == 'S':
                        # Try to load to a dictionary if looks like JSON.
                        if val.startswith('{') and val.endswith('}') and \
                                not self.config.get('dont_json_loads_results'):
                            try:
                                result[key] = json.loads(val)
                            except ValueError:
                                logger.warning(f"A JSON-looking string failed to parse: {val}")
                                result[key] = val
                        else:
                            result[key] = val
                    else:
                        result[key] = self.type_deserializer.deserialize(val_dict)

        # Get all fields from dynamo_row
        else:
            for key, val_dict in dynamo_row.items():
                for val_type, val in val_dict.items():

                    # type_deserializer.deserialize() parses 'N' to `Decimal` type but it cant be parsed to a datetime
                    # so we cast it to either an integer or a float.
                    if val_type == 'N':
                        result[key] = float(val) if '.' in val else int(val)
                    elif val_type == 'M':
                        result[key] = self.dynamo_to_dict(val, fetch_all_fields=True)
                    elif val_type == 'S':
                        # Try to load to a dictionary if looks like JSON.
                        if val.startswith('{') and val.endswith('}') and \
                                not self.config.get('dont_json_loads_results'):
                            try:
                                result[key] = json.loads(val)
                            except ValueError:
                                logger.warning(f"A JSON-looking string failed to parse: {val}")
                                result[key] = val
                        else:
                            result[key] = val
                    else:
                        result[key] = self.type_deserializer.deserialize(val_dict)

        assert all(True for x in self.config['required_fields'] if result.get(x)), "Some `required_fields` are missing"
        return result


    def dict_to_dynamo(self, row_dict, add_prefix=None, strict=True):
        """
        Convert the row from regular dictionary to the ugly DynamoDB syntax. Takes settings from row_mapper.

        e.g.                {'key1': 'value1', 'key2': 'value2'}
        will convert to:    {'key1': {'Type1': 'value1'}, 'key2': {'Type2': 'value2'}}

        :param dict row_dict:   A row we want to convert to dynamo syntax.
        :param str add_prefix:  A string prefix to add to the key in the result dict. Useful for queries like update.
        :param bool strict:     If False, will get the type from the value in the dict (this works for numbers and
                                strings). If True, won't add them if they're not in the required_fields, and if they
                                are, will raise an error.

        :return:                DynamoDB Task item
        :rtype:                 dict
        """

        if add_prefix is None:
            add_prefix = ''

        result = {}

        # Keys from row mapper
        for key, key_type in self.row_mapper.items():
            val = row_dict.get(key)
            if val is not None:
                key_with_prefix = f"{add_prefix}{key}"
                if key_type == 'BOOL':
                    result[key_with_prefix] = {'BOOL': to_bool(val)}
                elif key_type == 'N':
                    result[key_with_prefix] = {'N': str(val)}
                elif key_type == 'S':
                    result[key_with_prefix] = {'S': str(val)}
                elif key_type == 'M':
                    result[key_with_prefix] = {'M': self.dict_to_dynamo(val, strict=False)}
                else:
                    result[key_with_prefix] = self.type_serializer.serialize(val)

        result_keys = result.keys()
        if add_prefix:
            result_keys = [x[len(add_prefix):] for x in result.keys()]

        # Keys which are not in row mapper
        for key in list(set(row_dict.keys()) - set(result_keys)):
            if not strict:
                val = row_dict.get(key)
                key_with_prefix = f"{add_prefix}{key}"
                if isinstance(val, bool):
                    result[key_with_prefix] = {'BOOL': to_bool(val)}
                elif isinstance(val, (int, float)) or (isinstance(val, str) and val.isnumeric()):
                    result[key_with_prefix] = {'N': str(val)}
                elif isinstance(val, str):
                    result[key_with_prefix] = {'S': str(val)}
                elif isinstance(val, dict):
                    result[key_with_prefix] = {'M': self.dict_to_dynamo(val, strict=False)}
                else:
                    result[key_with_prefix] = self.type_serializer.serialize(val)
            else:
                if key not in self.config.get('required_fields', []):
                    logger.warning(f"Field {key} is missing from row_mapper, so we can't convert it to DynamoDB "
                                   f"syntax. This is not a required field, so we continue, but please investigate "
                                   f"row: {row_dict}")
                else:
                    raise ValueError(f"Field {key} is missing from row_mapper, so we can't convert it to DynamoDB "
                                     f"syntax. This is a required field, so we can not continue. Row: {row_dict}")

        logger.debug(f"dict_to_dynamo result: {result}")
        return result


    def get_by_query(self, keys: Dict, table_name: Optional[str] = None, index_name: Optional[str] = None,
                     comparisons: Optional[Dict] = None, max_items: Optional[int] = None,
                     filter_expression: Optional[str] = None, strict: bool = None, return_count: bool = False,
                     desc: bool = False, fetch_all_fields: bool = None) -> Union[List[Dict], int]:
        """
        Get an item from a table, by some keys. Can specify an index.
        If an index is not specified, will query the table.
        IMPORTANT: You must specify the rows you expect to be converted in row mapper in config, otherwise you won't
        get them in the result.
        If you want to get items from dynamo by non-key attributes, this method is not for you.

        :param dict keys: Keys and values of the items we get.
            You must specify the hash key, and can optionally also add the range key.
            Example, in a table where the hash key is 'hk' and the range key is 'rk':
            * {'hk': 'cat', 'rk': '123'}
            * {'hk': 'cat'}

        Optional

        :param str table_name:  Name of the dynamo table. If not specified, will use table_name from the config.
        :param str index_name:  Name of the secondary index in the table. If not specified, will query the table itself.
        :param dict comparisons: Type of comparison for each key. If a key is not mentioned, comparison type will be =.
            Valid values: `=`, `<`, `<=`, `>`, `>=`, `begins_with`.
            Comparisons only work for the range key.
            Example: if keys={'hk': 'cat', 'rk': 100} and comparisons={'rk': '<='} -> will get items where rk <= 100

        :param int max_items:   Limit the number of items to fetch.
        :param str filter_expression:  Supports regular comparisons and `between`. Input must be a regular human string
            e.g. 'key <= 42', 'name = marta', 'foo between 10 and 20', etc.
        :param bool strict: DEPRECATED.
        :param bool return_count: If True, will return the number of items in the result instead of the items themselves
        :param bool desc:    By default (False) the the values will be sorted ascending by the SortKey.
                             To reverse the order set the argument `desc = True`.
        :param bool fetch_all_fields: If False, will only get the attributes specified in the row mapper.
                                      If True, will get all attributes. Default is False.

        :return: List of items from the table, each item in key-value format
            OR the count if `return_count` is True
        """

        if strict is not None:
            logging.warning(f"get_by_query `strict` variable is deprecated in sosw 0.7.13+. "
                            f"Please replace it's usage with `fetch_all_fields` (and reverse the boolean value)")
        fetch_all_fields = fetch_all_fields if fetch_all_fields is not None else False if strict is None else not strict

        table_name = self._get_validate_table_name(table_name)

        filter_values = self.dict_to_dynamo(keys, add_prefix=':', strict=False)
        cond_expr_parts = []

        for key_attr_name in keys:
            # Find comparison for key. The formatting of conditions could be different, so a little spaghetti.
            if key_attr_name.startswith('st_between_'):  # This is just a marker to construct a custom expression later
                compr = 'between'
            elif key_attr_name.startswith('en_between_'):  # This attribute is used in the expression with st_between
                continue
            elif comparisons:
                compr = comparisons.get(key_attr_name) or '='
            else:
                compr = '='

            if compr == 'begins_with':
                cond_expr_parts.append(f"begins_with ({key_attr_name}, :{key_attr_name})")

            elif compr == 'between':
                key = key_attr_name[11:]
                cond_expr_parts.append(f"{key} between :st_between_{key} and :en_between_{key}")
            else:
                assert compr in ('=', '<', '<=', '>', '>='), f"Comparison not valid: {compr} for {key_attr_name}"
                cond_expr_parts.append(f"{key_attr_name} {compr} :{key_attr_name}")

        cond_expr = " AND ".join(cond_expr_parts)

        select = ('ALL_ATTRIBUTES' if index_name is None else 'ALL_PROJECTED_ATTRIBUTES') if not return_count else 'COUNT'

        logger.debug(cond_expr, filter_values)
        query_args = {
            'TableName':                 table_name,
            'Select':                    select,
            'ExpressionAttributeValues': filter_values,  # Ex: {':key1_name': 'key1_value', ...}
            'KeyConditionExpression':    cond_expr  # Ex: "key1_name = :key1_name AND ..."
        }

        # In case we have a filter expression, we parse it and add variables (values) to the ExpressionAttributeValues
        # Expression is also transformed to use these variables.
        if filter_expression:
            expr, values = self._parse_filter_expression(filter_expression)
            query_args['FilterExpression'] = expr
            query_args['ExpressionAttributeValues'].update(values)

        if index_name:
            query_args['IndexName'] = index_name

        if max_items:
            query_args['PaginationConfig'] = {'MaxItems': max_items}
            if return_count:
                raise Exception(f"DynamoDbCLient.get_by_query does not support `max_items` and `return_count` together")

        if desc:
            query_args['ScanIndexForward'] = False

        logger.debug(f"Querying dynamo: {query_args}")

        paginator = self.dynamo_client.get_paginator('query')
        response_iterator = paginator.paginate(**query_args)
        result = []

        if return_count:
            return sum([page['Count'] for page in response_iterator])

        for page in response_iterator:
            result += [self.dynamo_to_dict(x, fetch_all_fields=fetch_all_fields) for x in page['Items']]
            self.stats['dynamo_get_queries'] += 1
            if max_items and len(result) >= max_items:
                break

        return result[:max_items] if max_items else result


    def _parse_filter_expression(self, expression: str) -> Tuple[str, Dict]:
        """
        Converts FilterExpression to Dynamo syntax. We still do not support some operators. Feel free to implement:
        https://docs.aws.amazon.com/amazondynamodb/latest/developerguide/Expressions.OperatorsAndFunctions.html

        Supported: regular comparators, between, attribute_[not_]exists

        :return:  Returns a tuple of the transformed expression and extracted variables already Dynamo formatted.
        """

        assert isinstance(expression, str), f"Filter expression must be a string: {expression}"

        words = [x.strip() for x in expression.split()]

        # Filter Expression should be 2, 3 or 5 words. See doc for more details.
        # This must be a function
        if len(words) == 2:
            operator, key = words
            assert operator.lower() in ('attribute_exists', 'attribute_not_exists')
            result_expr, result_values = f"{operator} ({key})", {}

        # This must be a regular comparison
        elif len(words) == 3:
            key, operator, value = words
            assert operator in ('=', '<>', '<', '<=', '>', '>='), f"Unsupported operator for filtering: {expression}"

            # It is important to add prefix to value here to avoid attribute naming conflicts for example
            # in conditional_update expressions. e.g you update some field only if it's value is matching condition.
            result_expr = f"{key} {operator} :filter_{key}"
            result_values = self.dict_to_dynamo({f"filter_{key}": words[-1]}, add_prefix=':', strict=False)

        # This must be `between` statement.
        elif len(words) == 5:
            assert (words[1].lower(), words[3].lower()) == ('between', 'and'), \
                f"Unsupported expression for Filtering: {expression}"
            key = words[0]
            result_expr = f"{key} between :st_between_{key} and :en_between_{key}"
            result_values = self.dict_to_dynamo({f"st_between_{key}": words[2],
                                                 f"en_between_{key}": words[4]}, add_prefix=':', strict=False)
        else:
            raise ValueError(f"Unsupported expression for Filtering: {expression}")

        return result_expr, result_values


    def get_by_scan(self, attrs=None, table_name=None, strict=None, fetch_all_fields=None):
        """
        Scans a table. Don't use this method if you want to select by keys. It is SLOW compared to get_by_query.
        Careful - don't make queries of too many items, this could run for a long time.

        Optional:

        :param dict attrs: Attribute names and values of the items we get. Can be empty to get the whole table.
        :param str table_name: Name of the dynamo table. If not specified, will use table_name from the config.
        :param bool strict: DEPRECATED.
        :param bool fetch_all_fields: If False, will only get the attributes specified in the row mapper.
            If True, will get all attributes. Default is False.
        :return: List of items from the table, each item in key-value format
        :rtype: list
        """

        if strict is not None:
            logging.warning(f"get_by_query `strict` variable is deprecated in sosw 0.7.13+. "
                            f"Please replace it's usage with `fetch_all_fields` (and reverse the boolean value)")
        fetch_all_fields = fetch_all_fields if fetch_all_fields is not None else False if strict is None else not strict

        response_iterator = self._build_scan_iterator(attrs, table_name)

        result = []
        for page in response_iterator:
            result += [self.dynamo_to_dict(x, fetch_all_fields=fetch_all_fields) for x in page['Items']]
            self.stats['dynamo_scan_queries'] += 1

        return result


    def get_by_scan_generator(self, attrs=None, table_name=None, strict=None, fetch_all_fields=None):
        """
        Scans a table. Don't use this method if you want to select by keys. It is SLOW compared to get_by_query.
        Careful - don't make queries of too many items, this could run for a long time.
        Same as get_by_scan, but yields parts of the results.

        Optional:

        :param dict attrs: Attribute names and values of the items we get. Can be empty to get the whole table.
        :param str table_name: Name of the dynamo table. If not specified, will use table_name from the config.
        :param bool strict: DEPRECATED.
        :param bool fetch_all_fields: If False, will only get the attributes specified in the row mapper.
            If false, will get all attributes. Default is True.
        :return: List of items from the table, each item in key-value format
        :rtype: list
        """

        if strict is not None:
            logging.warning(f"get_by_query `strict` variable is deprecated in sosw 0.7.13+. "
                            f"Please replace it's usage with `fetch_all_fields` (and reverse the boolean value)")
        fetch_all_fields = fetch_all_fields if fetch_all_fields is not None else False if strict is None else not strict

        response_iterator = self._build_scan_iterator(attrs, table_name)
        for page in response_iterator:
            self.stats['dynamo_scan_queries'] += 1
            yield [self.dynamo_to_dict(x, fetch_all_fields=fetch_all_fields) for x in page['Items']]


    def _build_scan_iterator(self, attrs=None, table_name=None):
        table_name = self._get_validate_table_name(table_name)

        filter_values = None
        cond_expr = None
        if attrs:
            filter_values = self.dict_to_dynamo(attrs, add_prefix=':', strict=False)

            cond_expr_parts = []

            for key_attr_name in attrs:
                cond_expr_parts.append(f"{key_attr_name} = :{key_attr_name}")

            cond_expr = " AND ".join(cond_expr_parts)

        query_args = {
            'TableName': table_name,
            'Select':    'ALL_ATTRIBUTES',
        }
        if cond_expr:
            query_args['FilterExpression'] = cond_expr
        if filter_values:
            query_args['ExpressionAttributeValues'] = filter_values

        logger.debug(f"Scanning dynamo: {query_args}")

        paginator = self.dynamo_client.get_paginator('scan')
        response_iterator = paginator.paginate(**query_args)
        return response_iterator


    def batch_get_items_one_table(self, keys_list, table_name=None, max_retries=0, retry_wait_base_time=0.2,
                                  strict=None, fetch_all_fields=None):
        """
        Gets a batch of items from a single dynamo table.
        Only accepts keys, can't query by other columns.

        :param list keys_list: A list of the keys of the items we want to get. Gets the items that match the given keys.
                               If some key doesn't exist - it just skips it and gets the others.
                               e.g. [{'hash_col': '1, 'range_col': 2}, {'hash_col': 3}]
                               - will get a row where `hash_col` is 1 and `range_col` is 2, and also all rows where
                               `hash_col` is 3.

        Optional

        :param str table_name:
        :param int max_retries: If failed to get some items, retry this many times. Waiting between retries is
                                multiplied by 2 after each retry, so `retries` shouldn't be a big number.
                                Default is 1.
        :param int retry_wait_base_time: Wait this much time after first retry. Will wait twice longer in each retry.
        :param bool strict: DEPRECATED.
        :param bool fetch_all_fields: If False, will only get the attributes specified in the row mapper.
                                      If True, will get all attributes. Default is False.
        :return: List of items from the table
        :rtype: list
        """

        if strict is not None:
            logging.warning(f"batch_get_items_one_table `strict` variable is deprecated in sosw 0.7.13+. "
                            f"Please replace it's usage with `fetch_all_fields` (and reverse the boolean value)")
        fetch_all_fields = fetch_all_fields if fetch_all_fields is not None else False if strict is None else not strict

        table_name = self._get_validate_table_name(table_name)

        # Convert given keys to dynamo syntax
        query_keys = [self.dict_to_dynamo(item) for item in keys_list]

        # Check if we skipped something - if we did, try again.
        def get_unprocessed_keys(db_result):
            return 'UnprocessedKeys' in db_result and db_result['UnprocessedKeys'] \
                   and table_name in db_result['UnprocessedKeys'] and db_result['UnprocessedKeys'][table_name]['Keys']

        all_items = []

        for query_keys_chunk in chunks(query_keys, 100):

            batch_get_item_query = {
                'RequestItems': {
                    table_name: {
                        'Keys': query_keys_chunk
                    }
                }
            }

            logger.debug(f"batch_get_item query: {batch_get_item_query}")
            latest_result = self.dynamo_client.batch_get_item(**batch_get_item_query)
            logger.debug(f"latest_result: {latest_result}")
            unprocessed_keys = get_unprocessed_keys(latest_result)
            all_items += latest_result['Responses'][table_name]
            logger.debug(f"batch_get_items_one_table response: {latest_result}")

            if unprocessed_keys:
                # Retry several times
                retry_num = 0
                wait_time = retry_wait_base_time
                while unprocessed_keys and retry_num < max_retries:
                    logger.warning(f"batch_get_item action did NOT finish successfully.")
                    time.sleep(wait_time)
                    batch_get_item_query['RequestItems'][table_name]['Keys'] = unprocessed_keys
                    latest_result = self.dynamo_client.batch_get_item(**batch_get_item_query)
                    logger.debug(f"latest_result: {latest_result}")
                    all_items += latest_result['Responses'][table_name]
                    retry_num += 1
                    wait_time *= 2
                    unprocessed_keys = get_unprocessed_keys(latest_result)

            # After the retries still we have a bad result... then raise Exception
            if get_unprocessed_keys(latest_result):
                raise Exception(f"batch_get_items action failed for table {table_name}, keys_list {keys_list}")

        result = []
        for item in all_items:
            result.append(self.dynamo_to_dict(item, fetch_all_fields=fetch_all_fields))

        return result


    def build_put_query(self, row, table_name=None, overwrite_existing=True):
        table_name = self._get_validate_table_name(table_name)
        dynamo_formatted_row = self.dict_to_dynamo(row, strict=False)
        query = {
            'TableName': table_name,
            'Item':      dynamo_formatted_row
        }
        if not overwrite_existing:
            hash_key = self.config['hash_key']
            query['ConditionExpression'] = f"attribute_not_exists({hash_key})"
        return query


    def build_delete_query(self, delete_keys: Dict, table_name: str = None):
        table_name = self._get_validate_table_name(table_name)
        dynamo_formatted_row = self.dict_to_dynamo(delete_keys, strict=False)
        query = {
            'TableName': table_name,
            'Key':       dynamo_formatted_row
        }
        return query


    def put(self, row, table_name=None, overwrite_existing=True):
        """
        Adds a row to the database

        :param dict row:                The row to add to the table. key is column name, value is value.
        :param string table_name:       Name of the dynamo table to add the row to.
        :param bool overwrite_existing: Overwrite the existing row if True, otherwise will raise an exception if exists.
        """

        table_name = self._get_validate_table_name(table_name)

        put_query = self.build_put_query(row, table_name, overwrite_existing)
        logger.debug(f"Put to DB: {put_query}")

        dynamo_response = self.dynamo_client.put_item(**put_query)

        logger.debug(f"Response from dynamo {dynamo_response}")

        self.stats['dynamo_put_queries'] += 1


    def create(self, row, table_name=None):
        self.put(row, table_name, overwrite_existing=False)


    # @benchmark
    def update(self, keys: Dict, attributes_to_update: Optional[Dict] = None,
               attributes_to_increment: Optional[Dict] = None, table_name: Optional[str] = None,
               condition_expression: Optional[str] = None):
        """
        Updates an item in DynamoDB. Will create a new item if doesn't exist.
        If you want to make sure it exists, use ``patch`` method

        :param dict keys:
            Keys and values of the row we update.
            Example, in a table where the hash key is 'hk' and the range key is 'rk':
            {'hk': 'cat', 'rk': '123'}
        :param dict attributes_to_update:
            Dict of the attributes to be updated.
            Can contain both existing attributes and new attributes.
            Will update existing, and create new attributes.
            Example: {'col_name': 'some_value'}
        :param dict attributes_to_increment:
            Attribute names to increment, and the value to increment by. If the attribute doesn't exist, will create it.
            Example: {'some_counter': '3'}
        :param str condition_expression: Condition Expression that must be fulfilled on the object to update.
        :param str table_name: Name of the table
        """

        table_name = self._get_validate_table_name(table_name)

        if not attributes_to_update and not attributes_to_increment:
            raise ValueError(f"In dynamodb.update, please specify either attributes_to_update "
                             f"or attributes_to_increment")

        expression_attributes = {}
        update_expr_parts = []
        attribute_values = {}
        if attributes_to_update:
            for col in attributes_to_update:
                update_expr_parts.append(f"#{col} = :{col}")
                expression_attributes[f"#{col}"] = col

        if attributes_to_increment:
            for col in attributes_to_increment:
                update_expr_parts.append(f"#{col} = if_not_exists(#{col}, :zero) + :{col}")
                expression_attributes[f"#{col}"] = col
                attribute_values.update({'zero': '0'})

        keys = self.dict_to_dynamo(keys, strict=False)

        attribute_values.update((attributes_to_update or {}))
        attribute_values.update(attributes_to_increment or {})
        attribute_values = self.dict_to_dynamo(attribute_values.copy(), add_prefix=":", strict=False)

        update_expr = "SET " + ", ".join(update_expr_parts)

        update_item_query = {
            'ExpressionAttributeNames':  expression_attributes,  # Ex. {'#attr_name': 'attr_name', ...}
            'ExpressionAttributeValues': attribute_values,  # Ex. {':attr_name': 'some_value', ...}
            'Key':                       keys,  # Ex. {'key_name':   'key_value', ...}
            'TableName':                 table_name,
            'UpdateExpression':          update_expr  # Ex. "SET #attr_name = :attr_name AND ..."
        }

        if condition_expression:
            expr, values = self._parse_filter_expression(condition_expression)
            update_item_query['ConditionExpression'] = expr
            update_item_query['ExpressionAttributeValues'].update(values)

        logger.debug(f"Updating an item, query: {update_item_query}")
        response = self.dynamo_client.update_item(**update_item_query)
        logger.debug(f"Update result: {response}")
        self.stats['dynamo_update_queries'] += 1


    def patch(self, keys: Dict, attributes_to_update: Optional[Dict] = None,
              attributes_to_increment: Optional[Dict] = None, table_name: Optional[str] = None):
        """
        Updates an item in DynamoDB. Will fail if an item with these keys does not exist.
        """

        hash_key = self.config['hash_key']
        condition_expression = f'attribute_exists {hash_key}'
        self.update(keys, attributes_to_update, attributes_to_increment, table_name, condition_expression)


    def delete(self, keys: Dict, table_name: Optional[str] = None):
        """

        :param dict keys: Keys and values of the row we delete.
        :param table_name:
        """

        query = self.build_delete_query(keys, table_name)
        self.dynamo_client.delete_item(**query)


    def make_put_transaction_item(self, row, table_name=None):
        return {'Put': self.build_put_query(row, table_name)}


    def make_delete_transaction_item(self, row, table_name):
        return {'Delete': self.build_delete_query(row, table_name)}


    def transact_write(self, *transactions: Dict):
        """
        Executes many write transaction. Can execute operations on different tables.
        Will split transactions to chunks - because transact_write_items accepts up to 10 actions.
        WARNING: If you're expecting a transaction on more than 10 operations - AWS DynamoDB doesn't support it.

        .. code-block:: python

            dynamo_db_client = DynamoDbClient(config)
            t1 = dynamo_db_client.make_put_transaction_item(row, table_name='table1')
            t2 = dynamo_db_client.make_delete_transaction_item(row, table_name='table2')
            dynamo_db_client.transact_write(t1, t2)

        """

        supported_actions = ['Put', 'Delete']
        for t in transactions:
            assert isinstance(t, dict), "transaction must be a dictionary"
            assert len(t) == 1, "one transaction must contain only one operation"
            action = list(t.keys())[0]
            assert action in supported_actions, f"Bad action '{action}'. " \
                                                f"Supported actions: {', '.join(supported_actions)}"
            assert isinstance(t[action], dict), f"transaction[{action}] must be a dictionary. bad type: " \
                                                f"{type(t[action])}"

        for t_chunk in chunks(transactions, 10):
            logger.debug(f"Transactions: \n{pprint.pformat(t_chunk)}")

            response = self.dynamo_client.transact_write_items(TransactItems=t_chunk)

            self.stats['dynamo_transact_write_operations'] += 1
            logger.debug(f"Response from transact_write_items: {response}")


    def _get_validate_table_name(self, table_name=None):
        if table_name is None:
            table_name = self.config.get('table_name')

            if table_name is None:
                raise RuntimeError("Failed to dynamo action. no 'table_name' in config  and table_name wasn't "
                                   "specified in the arguments.")
        if os.environ.get('STAGE') == 'test':
            assert table_name.startswith('autotest_') or table_name == 'config', f"Bad table name in test: {table_name}"

        return table_name


    def get_stats(self):
        """
        Return statistics of operations performed by current instance of the Class.

        :return:    -   dict    - key: int statistics.
        """
        return self.stats

    def get_capacity(self, table_name=None):
        """Fetches capacity for data tables

        Keyword Arguments:
            table_name {str} -- DynamoDB (default: {None})

        Returns:
            dict -- read/write capacity for the table requested
        """

        if table_name is None:
            logging.debug(self.config)
            table_name = self.config['table_name']

        logging.debug(f"DynamoDB table name identified as {table_name}")

        if table_name in self._table_capacity.keys():
            return self._table_capacity[table_name]
        else:
            self.identify_dynamo_capacity(table_name=table_name)
            return self._table_capacity[table_name]


    def reset_stats(self):
        """
        Cleans statistics.
        """
        self.stats = defaultdict(int)
Ejemplo n.º 24
0
class TestSerializer(unittest.TestCase):
    def setUp(self):
        self.serializer = TypeSerializer()

    def test_serialize_unsupported_type(self):
        with self.assertRaisesRegexp(TypeError, 'Unsupported type'):
            self.serializer.serialize(object())

    def test_serialize_null(self):
        self.assertEqual(self.serializer.serialize(None), {'NULL': True})

    def test_serialize_boolean(self):
        self.assertEqual(self.serializer.serialize(False), {'BOOL': False})

    def test_serialize_integer(self):
        self.assertEqual(self.serializer.serialize(1), {'N': '1'})

    def test_serialize_decimal(self):
        self.assertEqual(
            self.serializer.serialize(Decimal('1.25')), {'N': '1.25'})

    def test_serialize_float_error(self):
        with self.assertRaisesRegexp(
                TypeError,
                'Float types are not supported. Use Decimal types instead'):
            self.serializer.serialize(1.25)

    def test_serialize_NaN_error(self):
        with self.assertRaisesRegexp(
                TypeError,
                'Infinity and NaN not supported'):
            self.serializer.serialize(Decimal('NaN'))

    def test_serialize_string(self):
        self.assertEqual(self.serializer.serialize('foo'), {'S': 'foo'})

    def test_serialize_binary(self):
        self.assertEqual(self.serializer.serialize(
            Binary(b'\x01')), {'B': b'\x01'})

    def test_serialize_bytearray(self):
        self.assertEqual(self.serializer.serialize(bytearray([1])),
                         {'B': b'\x01'})

    @unittest.skipIf(six.PY2,
                     'This is a test when using python3 version of bytes')
    def test_serialize_bytes(self):
        self.assertEqual(self.serializer.serialize(b'\x01'), {'B': b'\x01'})

    def test_serialize_number_set(self):
        serialized_value = self.serializer.serialize(set([1, 2, 3]))
        self.assertEqual(len(serialized_value), 1)
        self.assertIn('NS', serialized_value)
        self.assertCountEqual(serialized_value['NS'], ['1', '2', '3'])

    def test_serialize_string_set(self):
        serialized_value = self.serializer.serialize(set(['foo', 'bar']))
        self.assertEqual(len(serialized_value), 1)
        self.assertIn('SS', serialized_value)
        self.assertCountEqual(serialized_value['SS'], ['foo', 'bar'])

    def test_serialize_binary_set(self):
        serialized_value = self.serializer.serialize(
            set([Binary(b'\x01'), Binary(b'\x02')]))
        self.assertEqual(len(serialized_value), 1)
        self.assertIn('BS', serialized_value)
        self.assertCountEqual(serialized_value['BS'], [b'\x01', b'\x02'])

    def test_serialize_list(self):
        serialized_value = self.serializer.serialize(['foo', 1, [1]])
        self.assertEqual(len(serialized_value), 1)
        self.assertIn('L', serialized_value)
        self.assertCountEqual(
            serialized_value['L'],
            [{'S': 'foo'}, {'N': '1'}, {'L': [{'N': '1'}]}]
        )

    def test_serialize_map(self):
        serialized_value = self.serializer.serialize(
            {'foo': 'bar', 'baz': {'biz': 1}})
        self.assertEqual(
            serialized_value,
            {'M': {'foo': {'S': 'bar'}, 'baz': {'M': {'biz': {'N': '1'}}}}})
Ejemplo n.º 25
0
def serialize_input(value):
    output = {}
    ty = TypeSerializer()
    for k, v in value.items():
        output[k] = ty.serialize(v)
    return output
Ejemplo n.º 26
0
def converterToDynamodbFormat(data):
    typer = TypeSerializer()
    dynamodbJsonData = json.dumps(typer.serialize(data)['M'])
    return replaceObjectName(dynamodbJsonData)
Ejemplo n.º 27
0
            polarity = blob.sentiment[0]
            subjectivity = blob.sentiment[1]

            td = {
                "Id": count,
                "Username": ef(row['username']),
                "Following": int(row['following']),
                "Followers": int(row['followers']),
                "Totaltweets": int(row['totaltweets']),
                "Tweetcreatedts": row['tweetcreatedts'],
                "Query": row['query'],
                "Text": text,
                "Retweet": row['retweet'],
                "Party": row['party'],
                "Targeted": row['targeted'],
                "Polarity": polarity,
                "Subjectivity": subjectivity,
            }

            # Convert rows to workable values for DynamoDB
            t = json.loads(json.dumps(td), parse_float=Decimal)
            dbb = json.dumps(typer.serialize(t)['M'])
            dbb = dbb.replace('"M"', '"m"')
            dbb = dbb.replace('"L"', '"l"')
            dbb = dbb.replace('"S"', '"s"')
            dbb = dbb.replace('"N"', '"n"')

            f.write(dbb)
            f.write('\n')
            count += 1
Ejemplo n.º 28
0
def formatReport(report):
    seri = TypeSerializer()

    report = seri.serialize(report)
    return report
Ejemplo n.º 29
0
class TestSerializer(unittest.TestCase):
    def setUp(self):
        self.serializer = TypeSerializer()

    def test_serialize_unsupported_type(self):
        with self.assertRaisesRegexp(TypeError, 'Unsupported type'):
            self.serializer.serialize(object())

    def test_serialize_null(self):
        self.assertEqual(self.serializer.serialize(None), {'NULL': True})

    def test_serialize_boolean(self):
        self.assertEqual(self.serializer.serialize(False), {'BOOL': False})

    def test_serialize_integer(self):
        self.assertEqual(self.serializer.serialize(1), {'N': '1'})

    def test_serialize_decimal(self):
        self.assertEqual(
            self.serializer.serialize(Decimal('1.25')), {'N': '1.25'})

    def test_serialize_float_error(self):
        with self.assertRaisesRegexp(
                TypeError,
                'Float types are not supported. Use Decimal types instead'):
            self.serializer.serialize(1.25)

    def test_serialize_NaN_error(self):
        with self.assertRaisesRegexp(
                TypeError,
                'Infinity and NaN not supported'):
            self.serializer.serialize(Decimal('NaN'))

    def test_serialize_string(self):
        self.assertEqual(self.serializer.serialize('foo'), {'S': 'foo'})

    def test_serialize_binary(self):
        self.assertEqual(self.serializer.serialize(
            Binary(b'\x01')), {'B': b'\x01'})

    def test_serialize_bytearray(self):
        self.assertEqual(self.serializer.serialize(bytearray([1])),
                         {'B': b'\x01'})

    @unittest.skipIf(six.PY2,
                     'This is a test when using python3 version of bytes')
    def test_serialize_bytes(self):
        self.assertEqual(self.serializer.serialize(b'\x01'), {'B': b'\x01'})

    def test_serialize_number_set(self):
        serialized_value = self.serializer.serialize(set([1, 2, 3]))
        self.assertEqual(len(serialized_value), 1)
        self.assertIn('NS', serialized_value)
        self.assertCountEqual(serialized_value['NS'], ['1', '2', '3'])

    def test_serialize_string_set(self):
        serialized_value = self.serializer.serialize(set(['foo', 'bar']))
        self.assertEqual(len(serialized_value), 1)
        self.assertIn('SS', serialized_value)
        self.assertCountEqual(serialized_value['SS'], ['foo', 'bar'])

    def test_serialize_binary_set(self):
        serialized_value = self.serializer.serialize(
            set([Binary(b'\x01'), Binary(b'\x02')]))
        self.assertEqual(len(serialized_value), 1)
        self.assertIn('BS', serialized_value)
        self.assertCountEqual(serialized_value['BS'], [b'\x01', b'\x02'])

    def test_serialize_list(self):
        serialized_value = self.serializer.serialize(['foo', 1, [1]])
        self.assertEqual(len(serialized_value), 1)
        self.assertIn('L', serialized_value)
        self.assertCountEqual(
            serialized_value['L'],
            [{'S': 'foo'}, {'N': '1'}, {'L': [{'N': '1'}]}]
        )

    def test_serialize_tuple(self):
        serialized_value = self.serializer.serialize(('foo', 1, (1,)))
        self.assertEqual(len(serialized_value), 1)
        self.assertIn('L', serialized_value)
        self.assertCountEqual(
            serialized_value['L'],
            [{'S': 'foo'}, {'N': '1'}, {'L': [{'N': '1'}]}]
        )

    def test_serialize_map(self):
        serialized_value = self.serializer.serialize(
            {'foo': 'bar', 'baz': {'biz': 1}})
        self.assertEqual(
            serialized_value,
            {'M': {'foo': {'S': 'bar'}, 'baz': {'M': {'biz': {'N': '1'}}}}})
Ejemplo n.º 30
0
class DynamoDBExporter(AbstractExporter):
    def __init__(self) -> None:
        super().__init__()
        if 'DYNAMODB_ENDPOINT_URL' in os.environ:
            self.dynamodb = boto3.resource(
                'dynamodb', endpoint_url=os.environ['DYNAMODB_ENDPOINT_URL'])
        else:
            self.dynamodb = boto3.resource('dynamodb')

        self.table = self.dynamodb.Table(os.environ['DYNAMODB_TABLENAME'])

        if settings.DYNAMODB_COLLECTION_OVERRIDE:
            try:
                self.table.delete()
            except:
                # pass in case the table does not exist
                pass
            try:
                self.dynamodb.create_table(
                    TableName=os.environ['DYNAMODB_TABLENAME'],
                    KeySchema=[
                        {
                            'AttributeName': 'id',
                            'KeyType': 'HASH'  # Partition key
                        },
                    ],
                    AttributeDefinitions=[
                        {
                            'AttributeName': 'id',
                            'AttributeType': 'S'
                        },
                    ],
                    ProvisionedThroughput={
                        'ReadCapacityUnits': 10,
                        'WriteCapacityUnits': 10
                    })
            except:
                # pass in case the table does not exist
                pass

        self.values = rec_dd()
        self.serializer = TypeSerializer()
        self.deserializer = TypeDeserializer()

    def bulk_insert(self):
        to_insert = []

        for k, item in self.values.items():
            record = item
            record['id'] = k
            to_insert.append(record)
        with self.table.batch_writer() as batch:
            for r in to_insert:
                batch.put_item(r)

    def online_upsert(self, keys, item, x):
        resp = self.table.get_item(Key={'id': keys[0]})
        if 'Item' in resp:
            # object is already present
            nested_obj = nested_get(item, keys[1:])
            if nested_obj:
                nested_obj = self.deserializer.deserialize(nested_obj)
                new_obj = x.__class__(nested_obj) + x
            else:
                new_obj = x

            keys_dot_notation = '.'.join(
                ['#key' + str(i) for i in range(len(keys) - 1)])
            response = self.table.update_item(
                Key={'id': keys[0]},
                UpdateExpression="set " + keys_dot_notation + ' =:r',
                ExpressionAttributeNames={
                    k: key
                    for k, key in zip(keys_dot_notation.split('.'), keys[1:])
                },
                ExpressionAttributeValues={':r': new_obj.to_dict()},
                ReturnValues="UPDATED_NEW")
            print(response)
        else:
            # insert the new object
            item = {**item, 'id': keys[0]}
            self.table.put_item(Item=item)
            logger.debug(f'Insert: {dict(item)}')

    def __call__(self, item, *args: Any, **kwds: Any) -> Any:
        keys, x, value = item

        # only bulk insert is supported for now
        d = self.values[keys[0]]
        for _key in keys[1:-1]:
            d = d[_key]
        new_x = {}
        for k, v in x.to_dict().items():
            if isinstance(v, float):
                v = Decimal(v)
            new_x[k] = v
        d[keys[-1]] = self.serializer.serialize(new_x)
        if settings.DYNAMO_DB_ONLINE:
            # recast it to the acc point
            new_x = x.__class__(new_x)
            self.online_upsert(keys, self.values[keys[0]], new_x)
Ejemplo n.º 31
0
class TestSerializer(unittest.TestCase):
    def setUp(self):
        self.serializer = TypeSerializer()

    def test_serialize_unsupported_type(self):
        with pytest.raises(TypeError, match=r'Unsupported type'):
            self.serializer.serialize(object())

    def test_serialize_null(self):
        assert self.serializer.serialize(None) == {'NULL': True}

    def test_serialize_boolean(self):
        assert self.serializer.serialize(False) == {'BOOL': False}

    def test_serialize_integer(self):
        assert self.serializer.serialize(1) == {'N': '1'}

    def test_serialize_decimal(self):
        assert self.serializer.serialize(Decimal('1.25')) == {'N': '1.25'}

    def test_serialize_float_error(self):
        error_msg = r'Float types are not supported. Use Decimal types instead'
        with pytest.raises(TypeError, match=error_msg):
            self.serializer.serialize(1.25)

    def test_serialize_NaN_error(self):
        with pytest.raises(TypeError, match=r'Infinity and NaN not supported'):
            self.serializer.serialize(Decimal('NaN'))

    def test_serialize_string(self):
        assert self.serializer.serialize('foo') == {'S': 'foo'}

    def test_serialize_binary(self):
        assert self.serializer.serialize(Binary(b'\x01')) == {'B': b'\x01'}

    def test_serialize_bytearray(self):
        assert self.serializer.serialize(bytearray([1])) == {'B': b'\x01'}

    def test_serialize_bytes(self):
        assert self.serializer.serialize(b'\x01') == {'B': b'\x01'}

    def test_serialize_number_set(self):
        serialized_value = self.serializer.serialize({1, 2, 3})
        assert len(serialized_value) == 1
        assert 'NS' in serialized_value
        self.assertCountEqual(serialized_value['NS'], ['1', '2', '3'])

    def test_serialize_string_set(self):
        serialized_value = self.serializer.serialize({'foo', 'bar'})
        assert len(serialized_value) == 1
        assert 'SS' in serialized_value
        self.assertCountEqual(serialized_value['SS'], ['foo', 'bar'])

    def test_serialize_binary_set(self):
        serialized_value = self.serializer.serialize(
            {Binary(b'\x01'), Binary(b'\x02')})
        assert len(serialized_value) == 1
        assert 'BS' in serialized_value
        self.assertCountEqual(serialized_value['BS'], [b'\x01', b'\x02'])

    def test_serialize_list(self):
        serialized_value = self.serializer.serialize(['foo', 1, [1]])
        assert len(serialized_value) == 1
        assert 'L' in serialized_value
        self.assertCountEqual(
            serialized_value['L'],
            [{
                'S': 'foo'
            }, {
                'N': '1'
            }, {
                'L': [{
                    'N': '1'
                }]
            }],
        )

    def test_serialize_tuple(self):
        serialized_value = self.serializer.serialize(('foo', 1, (1, )))
        self.assertEqual(len(serialized_value), 1)
        self.assertIn('L', serialized_value)
        self.assertCountEqual(
            serialized_value['L'],
            [{
                'S': 'foo'
            }, {
                'N': '1'
            }, {
                'L': [{
                    'N': '1'
                }]
            }],
        )

    def test_serialize_map(self):
        serialized_value = self.serializer.serialize({
            'foo': 'bar',
            'baz': {
                'biz': 1
            }
        })
        assert serialized_value == {
            'M': {
                'foo': {
                    'S': 'bar'
                },
                'baz': {
                    'M': {
                        'biz': {
                            'N': '1'
                        }
                    }
                }
            }
        }
Ejemplo n.º 32
0
def serialize(a_dict):
    serializer = TypeSerializer()
    return {k: serializer.serialize(v) for k, v in a_dict.items()}