def _wipe_dynamodb_table(table: Table) -> None: """ Based off https://stackoverflow.com/a/61641725 """ # get the table keys - assume AttributeName exists table_key_names = [str(key["AttributeName"]) for key in table.key_schema] """ NOTE: there are reserved attributes for key names, please see https://docs.aws.amazon.com/amazondynamodb/latest/developerguide/ReservedWords.html if a hash or range key is in the reserved word list, you will need to use the ExpressionAttributeNames parameter described at https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/dynamodb.html#DynamoDB.Table.scan """ # Only retrieve the keys for each item in the table (minimize data transfer) projection_expression = ", ".join(table_key_names) response = table.scan(ProjectionExpression=projection_expression) data = response.get("Items") assert data is not None, f"Expected items, got {data}" while "LastEvaluatedKey" in response: response = table.scan( ProjectionExpression=projection_expression, ExclusiveStartKey=response["LastEvaluatedKey"], ) data.extend(response["Items"]) with table.batch_writer() as batch: for each in data: batch.delete_item(Key={key: each[key] for key in table_key_names})
def _update_field_in_table_if_exists( self, table: Table, field_value: t.Any, field_name: str ) -> bool: """ Only write the field for object in table if the objects with matchig PK and SK already exist (also updates updated_at). Returns true if object existed and therefore update was successful otherwise false. """ try: table.update_item( Key={ "PK": self.get_dynamodb_signal_key( self.SIGNAL_SOURCE_SHORTCODE, self.signal_id ), "SK": self.get_sort_key(self.privacy_group_id), }, # service_resource.Table.update_item's ConditionExpression params is not typed to use its own objects here... ConditionExpression=And(Attr("PK").exists(), Attr("SK").exists()), # type: ignore ExpressionAttributeValues={ ":f": field_value, ":u": self.updated_at.isoformat(), }, ExpressionAttributeNames={ "#F": field_name, "#U": "UpdatedAt", }, UpdateExpression="SET #F = :f, #U = :u", ) except ClientError as e: if e.response["Error"]["Code"] != "ConditionalCheckFailedException": raise e return False return True
def get_recent_items_page( cls, table: Table, exclusive_start_key: t.Optional[DynamoDBCursorKey] = None ) -> RecentItems: """ Get a paginated list of recent match records. Subsequent calls must use `return_value.last_evaluated_key`. """ if not exclusive_start_key: # Evidently, https://github.com/boto/boto3/issues/2813 boto is able # to distinguish fun(Parameter=None) from fun(). So, we can't use # exclusive_start_key's optionality. We have to do an if clause! # Fun! result = table.query( IndexName="GSI-2", Limit=100, ScanIndexForward=False, KeyConditionExpression=Key("GSI2-PK").eq( DynamoDBItem.get_dynamodb_type_key(cls.__name__)), ) else: result = table.query( IndexName="GSI-2", Limit=100, ExclusiveStartKey=exclusive_start_key, ScanIndexForward=False, KeyConditionExpression=Key("GSI2-PK").eq( DynamoDBItem.get_dynamodb_type_key(cls.__name__)), ) return RecentItems( t.cast(DynamoDBCursorKey, result.get("LastEvaluatedKey", None)), cls._result_items_to_records(result["Items"]), )
def store_schema_properties(table: Table, schema: Schema) -> None: properties: List[SchemaPropertyDict] = [ { "name": prop_name, # Special case: treat uids as int "primitive": prop_type.primitive.name if prop_name != "uid" else "Int", "is_set": prop_type.is_set, } for prop_name, prop_type in schema.get_properties().items() ] # Don't send over these edges denylist_edges = ("in_scope", ) edges: List[SchemaPropertyDict] = [ { "name": edge_name, "primitive": edge_tuple[0].dest.self_type(), # Forward edge goes to this type "is_set": edge_tuple[0].is_to_many(), } for edge_name, edge_tuple in schema.forward_edges.items() if edge_name not in denylist_edges ] type_definition: SchemaDict = {"properties": properties + edges} table.put_item( Item={ "node_type": schema.self_type(), # Dynamodb doesn't like my fancy typedict "type_definition": cast(Dict[str, Any], type_definition), "display_property": schema.get_display_property(), })
def write_to_table(self, table: Table): """ Write operations for this object need to be special cased (to avoid overwritting) Therefore we do not implement `to_dynamodb_item` however basically the body of that method is used in this class's impl of `write_to_table_if_not_found` """ # put_item does not support UpdateExpression table.update_item( Key={ "PK": self.get_dynamodb_content_key(self.content_id), "SK": self.get_dynamodb_content_type_key(), }, # If ContentRef exists it needs to match or BAD THING(tm) can happen... ConditionExpression=Or( Attr("ContentRef").not_exists(), Attr("ContentRef").eq(self.content_ref)), # type: ignore # Unfortunately while prod is happy with this on multiple lines pytest breaks... UpdateExpression= """SET ContentType = :ct, ContentRef = :cr, ContentRefType = :crt, SubmissionTimes = list_append(if_not_exists(SubmissionTimes, :empty_list), :s), CreatedAt = if_not_exists(CreatedAt, :c), UpdatedAt = :u ADD AdditionalFields :af""", ExpressionAttributeValues={ ":ct": self.content_type.get_name(), ":cr": self.content_ref, ":crt": self.content_ref_type.value, ":af": self.additional_fields if self.additional_fields else {self.ADDITIONAL_FIELDS_PLACE_HOLDER}, ":s": [s.isoformat() for s in self.submission_times], ":c": self.created_at.isoformat(), ":u": self.updated_at.isoformat(), ":empty_list": [], }, )
def dec(self, table: Table, by=1): """ Increment count. Default by 1, unless specified. """ table.update_item( Key={"PK": self.get_pkey(), "SK": self.get_skey()}, UpdateExpression="SET CurrentCount = if_not_exists(CurrentCount, :zero) - :by", ExpressionAttributeValues={":by": by, ":zero": 0}, )
def write_to_table_if_not_found(self, table: Table) -> bool: try: table.put_item( Item=self.to_dynamodb_item(), ConditionExpression= "attribute_not_exists(PK) AND attribute_not_exists(SK)", ) except ClientError as client_error: # boto3 exception handling https://imgflip.com/i/5f5zfj if (client_error.response.get("Error", {"Code", "Unknown"}).get( "Code", "Unknown") == "ConditionalCheckFailedException"): return False else: raise client_error return True
def get_all_counts(cls, table: Table) -> t.Dict[str, int]: response = table.scan(FilterExpression=Attr("PK").eq(cls._get_pkey())) return { t.cast(str, item["SK"]).split("#", 1)[1]: int(t.cast(Decimal, item["WriteCount"])) for item in response["Items"] }
def get_from_content_id( cls, table: Table, content_id: str, signal_type: t.Optional[t.Type[SignalType]] = None, ) -> t.List["PipelineHashRecord"]: """ Returns all available PipelineHashRecords for a content_id. """ expected_pk = cls.get_dynamodb_content_key(content_id) if signal_type is None: condition_expression = Key("PK").eq(expected_pk) & Key("SK").begins_with( DynamoDBItem.TYPE_PREFIX ) else: condition_expression = Key("PK").eq(expected_pk) & Key("SK").eq( DynamoDBItem.get_dynamodb_type_key(signal_type.get_name()) ) return cls._result_items_to_records( table.query( KeyConditionExpression=condition_expression, ).get("Items", []) )
def get_count(cls, table: Table, privacy_group: str) -> int: response = table.get_item(Key={ "PK": cls._get_pkey(), "SK": cls._get_skey(privacy_group) }) return ("Item" in response and int(t.cast(Decimal, response["Item"]["WriteCount"])) or 0)
def increment_counts(cls, table: Table, counts: t.Dict[str, int]): for pg in counts: table.update_item( # Couldn't find a way to do update_item in batch. Can optimize # if found. Key={ "PK": cls._get_pkey(), "SK": cls._get_skey(pg) }, UpdateExpression= "SET WriteCount = if_not_exists(WriteCount, :zero) + :by", ExpressionAttributeValues={ ":by": counts[pg], ":zero": 0 }, )
def store_schema(table: Table, schema: Schema) -> None: for f_edge, (edge_t, r_edge) in schema.get_edges().items(): if not (f_edge and r_edge): LOGGER.warn(f"missing {f_edge} {r_edge} for {schema.self_type()}") continue table.put_item(Item={ "f_edge": f_edge, "r_edge": r_edge, "relationship": int(edge_t.rel), }) table.put_item( Item={ "f_edge": r_edge, "r_edge": f_edge, "relationship": int(edge_t.rel.reverse()), })
def get_item_exists(table: Table, wizard: str) -> bool: try: db_response = table.get_item(Key={'username': wizard}) item = db_response['Item'] return True except Exception as e: print("Something went wrong: ", e) return False
def scan_info(table: Table, house: str) -> Union[Any, bool]: try: db_response = table.scan( FilterExpression=Attr('house').eq(house.lower())) items = db_response['Items'] return items except Exception as e: print("Something went wrong: ", e) return False
def _ensure_indexes( table: Table, desired_indexes: List[GlobalSecondaryIndexTypeDef] ) -> None: to_add: List[GlobalSecondaryIndexTypeDef] = [] for i in desired_indexes: if not _has_index(table, i["IndexName"]): to_add.append(i) index_updates = [{"Create": i} for i in to_add] attribute_updates = [] for i in to_add: attribute_updates.extend(_attributes_from_index(i)) if to_add: table.update( GlobalSecondaryIndexUpdates=index_updates, AttributeDefinitions=attribute_updates, )
def _table_is_empty(table: Table) -> bool: """ fun fact: some_table.item_count? It's only updated every 6 hours. you have to do a scan. """ items = table.scan()["Items"] if items: return False # Something's in there! return True
def _dump_dynamodb_table(table: Table) -> Optional[str]: """ Outputs a nicely-formatted Python list of all the items in the table. (you may need a `from decimal import Decimal` to interact with it, though.) """ items = table.scan()["Items"] if not items: return None return pretty_format(items)
def get_from_signal_and_ds_id( cls, table: Table, signal_id: t.Union[str, int], signal_source: str, ds_id: str) -> t.Optional["PDQSignalMetadata"]: item = table.get_item(Key={ "PK": cls.get_dynamodb_signal_key(signal_source, signal_id), "SK": cls.DATASET_PREFIX + ds_id, }, ).get("Item") return cls._result_item_to_metadata(item) if item else None
def write_to_table_if_not_found(self, table: Table) -> bool: """ Write operations for this object need to be special cased (to avoid overwritting) Therefore we do not implement `to_dynamodb_item` however basically the body of that method is used here Returns false if a content object with that Id is already present and does not write to table. True is write was successful. """ try: table.put_item( Item={ "PK": self.get_dynamodb_content_key(self.content_id), "SK": self.get_dynamodb_content_type_key(), "ContentType": self.content_type.get_name(), "ContentRef": self.content_ref, "ContentRefType": self.content_ref_type.value, "AdditionalFields": self.additional_fields if self.additional_fields else {self.ADDITIONAL_FIELDS_PLACE_HOLDER}, "SubmissionTimes": [s.isoformat() for s in self.submission_times], "CreatedAt": self.created_at.isoformat(), "UpdatedAt": self.updated_at.isoformat(), }, ConditionExpression= "attribute_not_exists(PK) AND attribute_not_exists(SK)", ) except ClientError as client_error: # boto3 exception handling https://imgflip.com/i/5f5zfj if (client_error.response.get("Error", {"Code", "Unknown"}).get( "Code", "Unknown") == "ConditionalCheckFailedException"): return False else: raise client_error return True
def seed_fake_data_if_needed(pac_table: Table): if pac_table.scan()['Count'] != 0: return print("Seeding data") for _ in range(100): price = random.choice([499, 999, 1999]) status = Status.Available funder = '0x1234' signer = random_eth_address() pac_table.put_item( Item={ 'id': signer, # using signer as the unique id 'price': str(price), 'status': status.name, 'price_status': "{}-{}".format(price, status.name), 'funder': funder, 'signer': signer, 'updated': datetime.now().isoformat() })
def get_value(self, table: Table) -> int: """ Get current value for the counter. """ return t.cast( int, table.get_item(Key={"PK": self.get_pkey(), "SK": self.get_skey()}) .get("Item", {}) .get("CurrentCount", 0), )
def write_to_table(self, table: Table): """ Write operations for this object need to be special cased (to avoid overwritting) Therefore we do not implement `to_dynamodb_item` If you're curious it would ~look like this: def to_dynamodb_item(self) -> dict: return { "PK": self.get_dynamodb_content_key(self.content_id), "SK": self.get_dynamodb_content_type_key(self.content_type), "ContentRef": self.content_ref, "ContentRefType": self.content_ref_type, "AdditionalFields": self.additional_fields, "SubmissionTimes": [s.isoformat() for s in self.submission_times], "CreatedOn": self.created_at.isoformat(), "UpdatedAt": self.updated_at.isoformat(), } """ # put_item does not support UpdateExpression table.update_item( Key={ "PK": self.get_dynamodb_content_key(self.content_id), "SK": self.get_dynamodb_content_type_key(self.content_type), }, # If ContentRef exists it needs to match or BAD THING(tm) can happen... ConditionExpression=Or( Attr("ContentRef").not_exists(), Attr("ContentRef").eq(self.content_ref)), # type: ignore # Unfortunately while prod is happy with this on multiple lines pytest breaks... UpdateExpression= """SET ContentRef = :cr, ContentRefType = :crt, SubmissionTimes = list_append(if_not_exists(SubmissionTimes, :empty_list), :s), CreatedAt = if_not_exists(CreatedAt, :c), UpdatedAt = :u ADD AdditionalFields :af""", ExpressionAttributeValues={ ":cr": self.content_ref, ":crt": self.content_ref_type.value, ":af": self.additional_fields if self.additional_fields else {"Placeholder"}, ":s": [s.isoformat() for s in self.submission_times], ":c": self.created_at.isoformat(), ":u": self.updated_at.isoformat(), ":empty_list": [], }, )
def get_from_signal_and_privacy_group( cls, table: Table, signal_id: t.Union[str, int], privacy_group_id: str ) -> t.Optional["ThreatExchangeSignalMetadata"]: """ Load object for this signal and privacy_group combination. """ pk = cls.get_dynamodb_signal_key(cls.SIGNAL_SOURCE_SHORTCODE, signal_id) sk = cls.get_sort_key(privacy_group_id) item = table.get_item(Key={"PK": pk, "SK": sk}) return "Item" in item and cls._result_item_to_metadata(item["Item"]) or None
def update_item(table: Table, wizard: str, update_expression: str, attributes: dict, return_values: Any) -> Union[Any, bool]: try: db_response = table.update_item(Key={'username': wizard}, UpdateExpression=update_expression, ExpressionAttributeValues=attributes, ReturnValues=return_values) return db_response except Exception as e: print("Something went wrong: ", e) return False
def get_item_info(table: Table, wizard: str) -> Dict[Any, Any]: try: db_response = table.get_item(Key={'username': wizard}) item = db_response['Item'] return item except Exception as e: print("Something went wrong: ", e) message = { 'text': f'Witch/wizard _*{wizard}*_ is not listed as a *Hogwarts Alumni*, most likely to be enrolled in _Beauxbatons_ or _Durmstrang_' } return message
def get_from_content_id(cls, table: Table, content_id: str) -> t.List["MatchRecord"]: """ Return all matches for a content_id. """ content_key = DynamoDBItem.get_dynamodb_content_key(content_id) source_prefix = DynamoDBItem.SIGNAL_KEY_PREFIX return cls._result_items_to_records( table.query(KeyConditionExpression=Key("PK").eq(content_key) & Key("SK").begins_with(source_prefix), ).get( "Items", []))
def write_to_table_if_not_found(self, table: Table) -> bool: """ Write record to DDB if the PK/SK combination does not exist. Returns: * True when record was written (did not exist) * False when record could not be written (PK/SK combo existed) """ try: table.put_item( Item=self.to_dynamodb_item(), ConditionExpression= "attribute_not_exists(PK) AND attribute_not_exists(SK)", ) except ClientError as client_error: # boto3 exception handling https://imgflip.com/i/5f5zfj if (client_error.response.get("Error", {"Code", "Unknown"}).get( "Code", "Unknown") == "ConditionalCheckFailedException"): return False else: raise client_error return True
def get_from_content_id( cls, table: Table, content_id: str, signal_type_mapping: HMASignalTypeMapping ) -> t.Optional["ContentObject"]: if not content_id: return None item = table.get_item( Key={ "PK": cls.get_dynamodb_content_key(content_id), "SK": cls.get_dynamodb_content_type_key(), }).get("Item", None) if item: return cls._result_item_to_object(item, signal_type_mapping) return None
def get_from_content_id( cls, table: Table, content_id: str, ) -> t.List["ActionEvent"]: items = table.query( KeyConditionExpression=Key("PK").eq( cls.get_dynamodb_content_key(content_id)) & Key("SK").begins_with(cls.ACTION_TIME_PREFIX), ProjectionExpression=cls.DEFAULT_PROJ_EXP, ).get("Items", []) return cls._result_item_to_action_event(items)
def get_from_signal(cls, table: Table, signal_id: t.Union[str, int], signal_source: str) -> t.List["MatchRecord"]: """ Return all matches for a signal. Needs source and id to uniquely identify a signal. """ signal_key = DynamoDBItem.get_dynamodb_signal_key( signal_source, signal_id) return cls._result_items_to_records( table.query( IndexName="GSI-1", KeyConditionExpression=Key("GSI1-PK").eq(signal_key), ).get("Items", []))