def test_drop_duplicates(self): class MyRecord(DictClass): first_name: str last_name: str sex: str data_table = DataTable( record_class=MyRecord, base_dict={ "first_name": ["ABC", "ABC", "DEF", "DEF", "DEF"], "last_name": ["XYZ", "XYZ", "MNO", "MNO", "MNO"], "sex": ["M", "M", "F", "M", "F"], }, ) assert isinstance(data_table.get_record(0), MyRecord) deduplicated_data_table = data_table.drop_duplicates() assert isinstance(deduplicated_data_table.get_record(0), MyRecord) assert { "first_name": ["ABC", "DEF", "DEF"], "last_name": ["XYZ", "MNO", "MNO"], "sex": ["M", "F", "M"], } == deduplicated_data_table deduplicated_data_table = data_table.drop_duplicates( subset=("first_name", )) assert isinstance(deduplicated_data_table.get_record(0), MyRecord) assert { "first_name": ["ABC", "DEF"], "last_name": ["XYZ", "MNO"], "sex": ["M", "F"], } == deduplicated_data_table deduplicated_data_table = data_table.drop_duplicates( subset=("last_name", "sex")) assert isinstance(deduplicated_data_table.get_record(0), MyRecord) assert { "first_name": ["ABC", "DEF", "DEF"], "last_name": ["XYZ", "MNO", "MNO"], "sex": ["M", "F", "M"], } == deduplicated_data_table # it should throw an error as the column name provied is invalid with pytest.raises(DataTableError): _ = data_table.drop_duplicates(subset=("invalid_column", )) # not normalized table data_table = DataTable( record_class=MyRecord, base_dict={ "first_name": ["ABC", "ABC", "DEF", "DEF", "DEF"], "last_name": ["XYZ", "XYZ", "MNO", "MNO"], "sex": ["M", "M", "F", "M", "F"], }, ) # It should throw an error as table is not normalized with pytest.raises(DataTableError): _ = data_table.drop_duplicates()
def test_cached_batch_get(self): self.client_mock.batch_get_item.return_value = { "Responses": { "my_table_name": [{ "pk": "my_pk", "sk": "my_sk", "data": "value" }] } } data_table = DataTable().add_record({"pk": "my_pk", "sk": "my_sk"}) result = list(self.result.cached_batch_get(data_table).get_records()) assert result == [{"pk": "my_pk", "sk": "my_sk", "data": "value"}] self.client_mock.batch_get_item.assert_called_with( RequestItems={ "my_table_name": { "Keys": [{ "pk": "my_pk", "sk": "my_sk" }] } }, ReturnConsumedCapacity="NONE", ) self.client_mock.batch_get_item.reset_mock() result = list(self.result.cached_batch_get(data_table).get_records()) self.client_mock.batch_get_item.assert_not_called() assert list(self.result.cached_batch_get( DataTable()).get_records()) == []
def test_batch_delete(self): self.client_mock.batch_write_item.return_value = { "Responses": { "my_table_name": [{ "pk": "my_pk", "sk": "my_sk", "data": "value" }] } } data_table = DataTable().add_record({"pk": "my_pk", "sk": "my_sk"}) assert list(self.result.batch_delete(data_table).get_records()) == [{ "pk": "my_pk", "sk": "my_sk" }] self.client_mock.batch_write_item.assert_called_with( RequestItems={ "my_table_name": [{ "DeleteRequest": { "Key": { "pk": "my_pk", "sk": "my_sk" } } }] }, ReturnConsumedCapacity="NONE", ReturnItemCollectionMetrics="NONE", ) assert list(self.result.batch_delete(DataTable()).get_records()) == []
def test_get_records() -> None: data_table = DataTable({"a": [1, 2], "b": [3, 4]}) records = data_table.get_records() assert next(records) == {"a": 1, "b": 3} assert next(records) == {"a": 2, "b": 4} with pytest.raises(StopIteration): next(records)
def test_get_column() -> None: data_table = DataTable({"a": [1, 2], "b": [3, DataTable.NOT_SET]}) assert data_table.get_column("a") == [1, 2] assert data_table.get_column("b") == [3, None] assert data_table.get_column("c") == [None, None] with pytest.raises(DataTableError): DataTable({"a": [1], "b": []}).get_column("a")
def test_is_normalized() -> None: assert DataTable({"a": [1, 2], "b": [3, 4]}).is_normalized() assert not DataTable({"a": [1, 2], "b": [3]}).is_normalized() assert not DataTable({ "a": [1, 2], "b": [3, 4], "c": [] }).is_normalized()
def test_get_lenghts() -> None: data_table = DataTable({"a": [1, 2, 3], "b": [1, 2]}) assert data_table["a"] == [1, 2, 3] assert data_table["b"] == [1, 2] assert data_table.max_length == 3 assert data_table.min_length == 2 assert data_table.get_lengths() == [3, 2] assert DataTable().get_lengths() == [] assert DataTable({"a": [None]}).get_lengths() == [1]
def batch_get(self, data_table: DataTable[_RecordType], consistent_read: bool = False) -> DataTable[_RecordType]: """ Get multuple records as a DataTable from DB. `data_table` must have all columns to calculate table keys. Example: ```python # UserTable is a subclass of a DynamoTable user_table = UserTable() # we should provide table keys or fields to calculate them # in our case, PK is calculated from `email` field. users_table = DataTable[UserRecord]().add_record( { "email": "*****@*****.**", }, { "email": "*****@*****.**", }, ) user_records = user_table.batch_get(users_table) for user_record in user_records: # print found records # if record was not found - it will still be returned # but only with the data you provided print(user_record) ``` Arguments: data_table -- Request data table. consistent_read -- `ConsistentRead` boto3 parameter. Returns: DataTable with existing records. """ if not data_table: return data_table.copy() get_data_table = DataTable() for record in data_table.get_records(): record = self._convert_record(record) record = self.normalize_record(record) record.update(self._get_record_keys(record)) get_data_table.add_record(record) results: DataTable[Any] = ( self.dynamo_query_class.build_batch_get_item( consistent_read=consistent_read, logger=self._logger).table( table_keys=self.table_keys, table=self.table).execute(data_table=get_data_table)) return DataTable(record_class=self.record_class).add_table(results)
def test_add_record() -> None: data_table = DataTable({"a": [1], "b": [3]}) result = data_table.add_record({"a": 5, "c": 4}, {"c": 5}) assert result is data_table assert data_table == { "a": [1, 5, data_table.NOT_SET], "b": [3, data_table.NOT_SET, data_table.NOT_SET], "c": [data_table.NOT_SET, 4, 5], } with pytest.raises(DataTableError): DataTable({"a": [1], "b": []}).add_record({"a": 1})
def _validate_data_table_has_table_keys(self, data_table: DataTable) -> None: for table_key in self.table_keys: if data_table.has_set_column(table_key): continue if data_table.has_column(table_key): raise DynamoQueryError( f'Column "{table_key}" has missing values in input data,' f" but present in table keys {self.table_keys}") raise DynamoQueryError( f'Column "{table_key}" is missing in input data,' f" but present in table keys {self.table_keys}")
def _validate_required_value_keys(self, data_table: DataTable) -> None: for name, expression in self._expressions.items(): required_value_keys = expression.get_format_values() for required_value_key in required_value_keys: if data_table.has_set_column(required_value_key): continue if data_table.has_column(required_value_key): raise DynamoQueryError( f'Column "{required_value_key}"" has missing values in input data,' f' but present in {name} = "{expression}"') raise DynamoQueryError( f'Column "{required_value_key}" is missing in input data,' f' but present in {name} = "{expression}"')
def test_batch_get_records(self): self.client_mock.batch_get_item.return_value = { "Responses": { "my_table_name": [{ "pk": "my_pk", "sk": "my_sk", "data": "value" }] } } assert list( self.result.batch_get_records([{ "pk": "my_pk", "sk": "my_sk" }])) == [{ "pk": "my_pk", "sk": "my_sk", "data": "value" }] self.client_mock.batch_get_item.assert_called_with( RequestItems={ "my_table_name": { "Keys": [{ "pk": "my_pk", "sk": "my_sk" }] } }, ReturnConsumedCapacity="NONE", ) assert list(self.result.batch_get(DataTable()).get_records()) == []
def test_has_set_column() -> None: data_table = DataTable({"a": [1, 2], "b": [DataTable.NOT_SET, 3]}) assert data_table.has_set_column() assert data_table.has_set_column("a") assert not data_table.has_set_column("b") assert not data_table.has_set_column("c") assert not data_table.has_set_column("b", "a") assert not data_table.has_set_column("c") assert not data_table.has_set_column("a", "c")
def test_normalize() -> None: data_table = DataTable({"a": [1, 2, 3], "b": [3, 4], "c": []}) data_table.normalize() assert data_table == { "a": [1, 2, 3], "b": [3, 4, data_table.NOT_SET], "c": [data_table.NOT_SET, data_table.NOT_SET, data_table.NOT_SET], } assert data_table.get_record(0) == {"a": 1, "b": 3, "c": None} data_table = DataTable({"a": [1, 2], "b": [3, 4]}) data_table.normalize() assert data_table == {"a": [1, 2], "b": [3, 4]}
def test_set() -> None: data_table = DataTable({"a": [1, 2], "b": [DataTable.NOT_SET]}) result = data_table.set("a", 1, "value_a").set("b", 0, "value_b") assert result is data_table assert data_table == DataTable({"a": [1, "value_a"], "b": ["value_b"]}) with pytest.raises(DataTableError): data_table.set("b", 1, "value_b") with pytest.raises(DataTableError): data_table.set("c", 0, "value_c")
def test_errors() -> None: filter_expression_mock = MagicMock() projection_expression_mock = MagicMock() table_resource_mock = MagicMock() query = DynamoQuery.build_query( key_condition_expression=ConditionExpression("key", "contains"), index_name="my_index", filter_expression=filter_expression_mock, projection_expression=projection_expression_mock, limit=100, ).table(table=table_resource_mock, table_keys=("pk", "sk")) with pytest.raises(DynamoQueryError): query.execute_dict({"key": "value"}) query = DynamoQuery.build_query( key_condition_expression=ConditionExpression("key"), index_name="my_index", filter_expression=filter_expression_mock, projection_expression=projection_expression_mock, limit=100, ).table(table=table_resource_mock, table_keys=("pk", "sk")) with pytest.raises(DynamoQueryError): query.execute_dict({"key1": "value"}) with pytest.raises(DynamoQueryError): query.execute(DataTable({"key": [1, 2], "b": [3]})) with pytest.raises(DynamoQueryError): query.execute(DataTable({"key": [3, DataTable.NOT_SET]})) with pytest.raises(DynamoQueryError): DynamoQuery.build_batch_get_item().table( table=table_resource_mock, table_keys=("pk", "sk")).execute( DataTable({ "pk": ["test"], "sk": [DataTable.NOT_SET] })) with pytest.raises(DynamoQueryError): DynamoQuery.build_batch_get_item().table( table=table_resource_mock, table_keys=("pk", "sk")).execute(DataTable({"pk": ["test"]}))
def test_has_column() -> None: data_table = DataTable({"a": [1, 2], "b": [3, 4]}) assert data_table.has_column() assert data_table.has_column("a") assert data_table.has_column("b") assert data_table.has_column("b", "a") assert not data_table.has_column("c") assert not data_table.has_column("a", "c")
def _execute_method_scan( self, data_table: DataTable, ) -> DataTable: self._validate_last_evaluated_key() self._validate_required_value_keys(data_table) result = DataTable[Dict[str, Any]]() for record in data_table.get_records(): result.add_table(self._execute_paginated_query(data=record)) return result
def _execute_method_batch_get_item(self, data_table: DataTable) -> DataTable: self._validate_data_table_has_table_keys(data_table) record_chunks = chunkify(data_table.get_records(), self.MAX_BATCH_SIZE) table_name = self.table_resource.name response_table = DataTable[Dict[str, Any]]() for record_chunk in record_chunks: key_data_list = [] for record in record_chunk: key_data = { k: v for k, v in record.items() if k in self.table_keys } key_data_list.append(key_data) request_items = { table_name: { "Keys": key_data_list, "ConsistentRead": self._consistent_read } } response = self._batch_get_item( RequestItems=request_items, **self._extra_params, ) if response.get("Responses", {}).get(table_name): response_table.add_record(*response["Responses"][table_name]) result = DataTable[Dict[str, Any]]() for record in data_table.get_records(): key_data = { k: v for k, v in record.items() if k in self.table_keys } response_records = response_table.filter_records( key_data).get_records() for response_record in response_records: record.update(response_record) result.add_record(record) return result
def test_filter_records_not_equals() -> None: data_table = DataTable({"a": [1, 2, 1], "b": [3, 4, 5]}) assert data_table.filter_records({"a": 1}, operand=Filter.NOT_EQUALS) == { "a": [2], "b": [4], } assert data_table.filter_records({ "a": 2, "b": 4 }, operand=Filter.NOT_EQUALS) == { "a": [1, 1], "b": [3, 5], } assert data_table.filter_records({ "a": 1, "b": 4 }, operand=Filter.NOT_EQUALS) == { "a": [1, 2, 1], "b": [3, 4, 5], } with pytest.raises(DataTableError): DataTable({ "a": [1, 2, 1], "b": [3, 4] }).filter_records({"a": 1}, operand=Filter.NOT_EQUALS)
def test_add_table() -> None: data_table = DataTable({"a": [5], "b": [6]}) assert DataTable({ "a": [1, 2], "b": [3, 4] }).add_table(data_table) == { "a": [1, 2, 5], "b": [3, 4, 6], } assert DataTable({ "a": [1, 2], "b": [3, 4] }).add_table(data_table, data_table) == { "a": [1, 2, 5, 5], "b": [3, 4, 6, 6], } with pytest.raises(DataTableError): DataTable({ "a": [1, 2], "b": [3, 4] }).add_table(DataTable({ "a": [5], "b": [] })) with pytest.raises(DataTableError): DataTable({"a": [5], "b": []}).add_table(data_table)
def _execute_method_get_item(self, data_table: DataTable) -> DataTable: self._validate_data_table_has_table_keys(data_table) result = DataTable[Dict[str, Any]]() for record in data_table.get_records(): key_data = { k: v for k, v in record.items() if k in self.table_keys } result_record = self._execute_item_query(key_data=key_data, item_data=record) if result_record is not None: record.update(result_record) result.add_record(record) return result
def test_builtin_copy() -> None: base_dict = {"a": [[1, 2, 3]]} data_table = DataTable(base_dict) data_table_copy = copy(data_table) assert isinstance(data_table_copy, DataTable) assert data_table_copy is not data_table assert data_table_copy["a"] is not data_table["a"] assert data_table_copy["a"][0] is base_dict["a"][0] data_table_deepcopy = deepcopy(data_table) assert isinstance(data_table_deepcopy, DataTable) assert data_table_deepcopy is not data_table assert data_table_deepcopy["a"] is not data_table["a"] assert data_table_deepcopy["a"][0] is not base_dict["a"][0]
def test_get_record() -> None: data_table = DataTable({"a": [1, 2], "b": [3, 4]}) assert data_table.get_record(0) == {"a": 1, "b": 3} assert data_table.get_record(1) == {"a": 2, "b": 4} with pytest.raises(DataTableError): data_table.get_record(2) with pytest.raises(DataTableError): DataTable({"a": [1, 2], "b": [3]}).get_record(2)
def _execute_method_batch_update_item(self, data_table: DataTable) -> DataTable: self._validate_data_table_has_table_keys(data_table) record_chunks = chunkify(data_table.get_records(), self.MAX_BATCH_SIZE) table_name = self.table_resource.name for record_chunk in record_chunks: request_list = [] for record in record_chunk: request_list.append({"PutRequest": {"Item": dict(record)}}) request_items = {table_name: request_list} self._batch_write_item( RequestItems=request_items, **self._extra_params, ) return data_table
def batch_upsert_records( self, records: Iterable[_RecordType], set_if_not_exists_keys: Iterable[str] = (), ) -> None: """ Upsert records to DB. See `DynamoTable.batch_upsert`. Arguments: records -- Full or partial records data. set_if_not_exists_keys -- List of keys to set only if they no do exist in DB. """ for records_chunk in chunkify(records, self.max_batch_size): upsert_data_table = DataTable( record_class=self.record_class).add_record(*records_chunk) self.batch_upsert(upsert_data_table, set_if_not_exists_keys=set_if_not_exists_keys)
def _execute_method_update_item(self, data_table: DataTable) -> DataTable: self._validate_data_table_has_table_keys(data_table) self._validate_required_value_keys(data_table) result = DataTable[Dict[str, Any]]() for record in data_table.get_records(): if self.UPDATE_EXPRESSION not in self._expressions: raise DynamoQueryError( f"{self} must have {self.UPDATE_EXPRESSION} or `update` method." ) key_data = { k: v for k, v in record.items() if k in self.table_keys } result_record = self._execute_item_query( key_data=key_data, item_data=record, ) if result_record is not None: result.add_record(result_record) return result
def _execute_method_batch_delete_item(self, data_table: DataTable) -> DataTable: self._validate_data_table_has_table_keys(data_table) record_chunks = chunkify(data_table.get_records(), self.MAX_BATCH_SIZE) table_name = self.table_resource.name for record_chunk in record_chunks: request_list = [] for record in record_chunk: key_data = { k: v for k, v in record.items() if k in self.table_keys } request_item = {"DeleteRequest": {"Key": key_data}} if request_item not in request_list: request_list.append(request_item) request_items = {table_name: request_list} self._batch_write_item( RequestItems=request_items, **self._extra_params, ) return data_table
def _execute_method_query(self, data_table: DataTable) -> DataTable: self._validate_last_evaluated_key() self._validate_required_value_keys(data_table) for operator in self._expressions[ self.KEY_CONDITION_EXPRESSION].get_operators(): if operator not in ( Operator.EQ.value, Operator.LT.value, Operator.GT.value, Operator.LTE.value, Operator.GTE.value, Operator.BETWEEN.value, Operator.BEGINS_WITH.value, ): raise DynamoQueryError( f"{self.KEY_CONDITION_EXPRESSION} does not support operator" f' "{operator}".') result = DataTable[Dict[str, Any]]() for record in data_table.get_records(): result.add_table(self._execute_paginated_query(data=record)) return result
def batch_get_records( self, records: Iterable[_RecordType], consistent_read: bool = False) -> Iterator[_RecordType]: """ Get records as an iterator from DB. See `DynamoTable.batch_get`. Arguments: records -- Full or partial records data. consistent_read -- `ConsistentRead` boto3 parameter. Yields: Found or not found record data. """ for records_chunk in chunkify(records, self.max_batch_size): get_data_table = DataTable( record_class=self.record_class).add_record(*records_chunk) result_data_table = self.batch_get(get_data_table, consistent_read=consistent_read) for record in result_data_table.get_records(): yield self._convert_record(record)