def identity_aggregate_schema( identity_aggregate_schema_spec: Dict[str, Any], store_spec: Dict[str, Any]) -> IdentityAggregateSchema: schema_loader = SchemaLoader() name = schema_loader.add_schema_spec(identity_aggregate_schema_spec) schema_loader.add_schema_spec(store_spec, name) return IdentityAggregateSchema(name, schema_loader)
def window_aggregate_schema(schema_loader_with_mem_store: SchemaLoader, mem_store_name: str, stream_dtc_name: str) -> WindowAggregateSchema: schema_loader_with_mem_store.add_schema_spec({ 'Type': Type.BLURR_AGGREGATE_BLOCK, 'Name': 'session', 'Store': mem_store_name, 'Fields': [ { 'Name': 'events', 'Type': Type.INTEGER, 'Value': 'session.events + 1', }, ], }, stream_dtc_name) name = schema_loader_with_mem_store.add_schema_spec({ 'Type': Type.BLURR_AGGREGATE_WINDOW, 'Name': 'test_window_name', 'WindowType': Type.DAY, 'WindowValue': 1, 'Source': stream_dtc_name + '.session', 'Fields': [{ 'Name': 'total_events', 'Type': Type.INTEGER, 'Value': 'sum(source.events)' }] }) return WindowAggregateSchema(name, schema_loader_with_mem_store)
def activity_aggregate_schema( activity_aggregate_schema_spec: Dict[str, Any], store_spec: Dict[str, Any]) -> ActivityAggregateSchema: schema_loader = SchemaLoader() name = schema_loader.add_schema_spec(activity_aggregate_schema_spec) schema_loader.add_schema_spec(store_spec, name) return ActivityAggregateSchema(name, schema_loader)
def test_get_store_success(nested_schema_spec: Dict) -> None: nested_schema_spec['Store'] = { 'Name': 'memstore', 'Type': Type.BLURR_STORE_MEMORY } schema_loader = SchemaLoader() schema_loader.add_schema_spec(nested_schema_spec) assert isinstance(schema_loader.get_store('test.memstore'), MemoryStore)
def test_block_aggregate_schema_initialization(schema_spec, store_spec): schema_loader = SchemaLoader() name = schema_loader.add_schema_spec(schema_spec) schema_loader.add_schema_spec(store_spec, name) schema = BlockAggregateSchema(name, schema_loader) assert match_fields(schema._spec['Fields']) loader_spec = schema_loader.get_schema_spec(name) assert match_fields(loader_spec['Fields'])
def test_get_schema_object_error(nested_schema_spec_bad_type: Dict) -> None: schema_loader = SchemaLoader() schema_loader.add_schema_spec(nested_schema_spec_bad_type) with raises(GenericSchemaError, match='Type `Blurr:Unknown` not found.'): schema_loader.get_schema_object('test') with raises(GenericSchemaError, match='`Type` not defined in schema `test.test_group`'): schema_loader.get_schema_object('test.test_group')
def test_block_aggregate_schema_missing_split_attribute_adds_error( schema_spec, store_spec): del schema_spec[BlockAggregateSchema.ATTRIBUTE_SPLIT] schema_loader = SchemaLoader() name = schema_loader.add_schema_spec(schema_spec) schema_loader.add_schema_spec(store_spec, name) schema = BlockAggregateSchema(name, schema_loader) assert 1 == len(schema.errors) assert isinstance(schema.errors[0], RequiredAttributeError) assert BlockAggregateSchema.ATTRIBUTE_SPLIT == schema.errors[0].attribute
def test_block_aggregate_schema_with_split_initialization( schema_spec, store_spec): schema_spec['Split'] = '4 > 2' schema_loader = SchemaLoader() name = schema_loader.add_schema_spec(schema_spec) schema_loader.add_schema_spec(store_spec, name) schema = BlockAggregateSchema(name, schema_loader) assert isinstance(schema.split, Expression) assert match_fields(schema_spec['Fields']) loader_spec = schema_loader.get_schema_spec(name) assert match_fields(loader_spec['Fields'])
def test_get_all_stores(nested_schema_spec: Dict) -> None: nested_schema_spec['Store'] = {'Name': 'memstore', 'Type': Type.BLURR_STORE_MEMORY} schema_loader = SchemaLoader() schema_loader.add_schema_spec(nested_schema_spec) # No store instantiated yet. assert schema_loader.get_all_stores() == [] assert isinstance(schema_loader.get_store('test.memstore'), MemoryStore) stores = schema_loader.get_all_stores() assert len(stores) == 1 assert isinstance(stores[0], MemoryStore)
def test_initialization_with_valid_source( schema_loader_with_mem_store: SchemaLoader, aggregate_block_schema_spec: Dict[str, Any], window_schema_spec: Dict[str, Any], stream_dtc_name: str): schema_loader_with_mem_store.add_schema_spec(aggregate_block_schema_spec, stream_dtc_name) name = schema_loader_with_mem_store.add_schema_spec(window_schema_spec) window_aggregate_schema = WindowAggregateSchema( name, schema_loader_with_mem_store) assert Type.is_type_equal(window_aggregate_schema.window_type, Type.DAY) assert window_aggregate_schema.window_value == 1 assert isinstance(window_aggregate_schema.source, BlockAggregateSchema) assert window_aggregate_schema.source.name == 'session'
def test_streaming_transformer_schema_get_time_datetime_not_defined( schema_loader: SchemaLoader, schema_spec: Dict[str, Any]) -> None: del schema_spec['Import'] streaming_bts = schema_loader.add_schema_spec(schema_spec) transformer_schema = StreamingTransformerSchema(streaming_bts, schema_loader) with pytest.raises(NameError, match='name \'datetime\' is not defined'): assert transformer_schema.get_time(Context())
def test_streaming_transformer_schema_schema_init( schema_loader: SchemaLoader, schema_spec: Dict[str, Any]) -> None: streaming_dtc = schema_loader.add_schema_spec(schema_spec) transformer_schema = StreamingTransformerSchema(streaming_dtc, schema_loader) assert transformer_schema.identity.code_string == '\'user1\'' assert transformer_schema.time.code_string == 'datetime(2016,10,10)'
def get_per_identity_records( self, events: Iterable, data_processor: DataProcessor ) -> Generator[Tuple[str, TimeAndRecord], None, None]: """ Uses the given iteratable events and the data processor convert the event into a list of Records along with its identity and time. :param events: iteratable events. :param data_processor: DataProcessor to process each event in events. :return: yields Tuple[Identity, TimeAndRecord] for all Records in events, """ schema_loader = SchemaLoader() stream_bts_name = schema_loader.add_schema_spec(self._stream_bts) stream_transformer_schema: StreamingTransformerSchema = schema_loader.get_schema_object( stream_bts_name) for event in events: try: for record in data_processor.process_data(event): try: id = stream_transformer_schema.get_identity(record) time = stream_transformer_schema.get_time(record) yield (id, (time, record)) except Exception as err: logging.error('{} in parsing Record {}.'.format( err, record)) except Exception as err: logging.error('{} in parsing Event {}.'.format(err, event))
def _execute_stream_bts( self, identity_events: List[TimeAndRecord], identity: str, schema_loader: SchemaLoader, old_state: Optional[Dict] = None) -> Dict[Key, Any]: if self._stream_bts is None: return {} stream_bts_name = schema_loader.add_schema_spec(self._stream_bts) stream_transformer_schema = schema_loader.get_schema_object( stream_bts_name) store = self._get_store(schema_loader) if old_state: for k, v in old_state.items(): store.save(k, v) if identity_events: stream_transformer = StreamingTransformer( stream_transformer_schema, identity) for time, event in identity_events: stream_transformer.run_evaluate(event) stream_transformer.run_finalize() return self._get_store(schema_loader).get_all(identity)
def test_streaming_transformer_schema_get_time_type_error(schema_loader: SchemaLoader, schema_spec: Dict[str, Any]) -> None: schema_spec['Time'] = '1' streaming_bts = schema_loader.add_schema_spec(schema_spec) transformer_schema = StreamingTransformerSchema(streaming_bts, schema_loader) with pytest.raises(TimeError, match='Could not determine time using 1'): assert transformer_schema.get_time(Context())
def test_aggregate_schema_initialization_with_store(aggregate_schema_spec, store_spec): aggregate_schema_spec['Store'] = 'memory' schema_loader = SchemaLoader() name = schema_loader.add_schema_spec(aggregate_schema_spec) with pytest.raises(GenericSchemaError, match="user.memory not declared in schema"): MockAggregateSchema(name, schema_loader) schema_loader.add_schema_spec(store_spec, 'user') aggregate_schema = MockAggregateSchema(name, schema_loader) store = schema_loader.get_store( aggregate_schema.store_schema.fully_qualified_name) assert store is not None assert store._schema.name == 'memory' assert aggregate_schema.store_schema.name == 'memory'
def test_aggregate_schema_contains_identity_field(aggregate_schema_spec): schema_loader = SchemaLoader() name = schema_loader.add_schema_spec(aggregate_schema_spec) aggregate_schema = MockAggregateSchema(name, schema_loader) assert len(aggregate_schema.nested_schema) == 2 assert '_identity' in aggregate_schema.nested_schema
def empty_memory_store() -> MemoryStore: schema_loader = SchemaLoader() name = schema_loader.add_schema_spec({ 'Name': 'memstore', 'Type': Type.BLURR_STORE_MEMORY }) return schema_loader.get_store(name)
def test_streaming_transformer_schema_get_identity_error(schema_loader: SchemaLoader, schema_spec: Dict[str, Any]) -> None: schema_spec['Identity'] = 'source.user' streaming_bts = schema_loader.add_schema_spec(schema_spec) transformer_schema = StreamingTransformerSchema(streaming_bts, schema_loader) with pytest.raises(IdentityError, match='Could not determine identity using source.user'): assert transformer_schema.get_identity(Record())
def aggregate_schema_without_store(): schema_loader = SchemaLoader() aggregate_schema_spec = get_aggregate_schema_spec() del aggregate_schema_spec['Store'] name = schema_loader.add_schema_spec(aggregate_schema_spec) return MockAggregateSchema(fully_qualified_name=name, schema_loader=schema_loader)
def _execute_window_dtc(self, identity: str, schema_loader: SchemaLoader) -> List[Dict]: if self._window_dtc is None: logging.debug('Window DTC not provided') return [] stream_transformer = StreamingTransformer( self._get_streaming_transformer_schema(schema_loader), identity) all_data = self._get_store(schema_loader).get_all(identity) stream_transformer.run_restore(all_data) exec_context = Context() exec_context.add(stream_transformer._schema.name, stream_transformer) block_obj = None for aggregate in stream_transformer._nested_items.values(): if not isinstance(aggregate, BlockAggregate): continue if block_obj is not None: raise Exception( ('Window operation is supported against Streaming ', 'DTC with only one BlockAggregate')) block_obj = aggregate if block_obj is None: raise Exception( 'No BlockAggregate found in the Streaming DTC file') window_data = [] window_dtc_name = schema_loader.add_schema_spec(self._window_dtc) window_transformer_schema = schema_loader.get_schema_object( window_dtc_name) window_transformer = WindowTransformer(window_transformer_schema, identity, exec_context) logging.debug('Running Window DTC for identity {}'.format(identity)) anchors = 0 blocks = 0 for key, data in all_data.items(): if key.group != block_obj._schema.name: continue try: blocks += 1 if window_transformer.run_evaluate( block_obj.run_restore(data)): anchors += 1 window_data.append( window_transformer.run_flattened_snapshot) except PrepareWindowMissingBlocksError as err: logging.debug('{} with {}'.format(err, key)) if anchors == 0: logging.debug( 'No anchors found for identity {} out of {} blocks'.format( identity, blocks)) return window_data
def test_aggregate_schema_initialization_with_store(aggregate_schema_spec, store_spec): aggregate_schema_spec['Store'] = 'memory' schema_loader = SchemaLoader() name = schema_loader.add_schema_spec(aggregate_schema_spec) MockAggregateSchema(name, schema_loader) assert isinstance( schema_loader.get_errors(name, True)[0], SpecNotFoundError) schema_loader.add_schema_spec(store_spec, 'user') aggregate_schema = MockAggregateSchema(name, schema_loader) store = schema_loader.get_store( aggregate_schema.store_schema.fully_qualified_name) assert store is not None assert store._schema.name == 'memory' assert aggregate_schema.store_schema.name == 'memory'
def test_field_schema() -> MockFieldSchema: schema_loader = SchemaLoader() name = schema_loader.add_schema_spec({ 'Name': 'max_attempts', 'Type': Type.INTEGER, 'Value': 5 }) return MockFieldSchema(name, schema_loader)
def test_streaming_transformer_schema_get_identity_from_record( schema_loader: SchemaLoader, schema_spec: Dict[str, Any]) -> None: schema_spec['Identity'] = 'source.user' streaming_dtc = schema_loader.add_schema_spec(schema_spec) transformer_schema = StreamingTransformerSchema(streaming_dtc, schema_loader) assert transformer_schema.get_identity(Record({'user': '******'})) == 'user1'
def test_initialization_with_invalid_source( schema_loader_with_mem_store: SchemaLoader, window_schema_spec: Dict[str, Any], stream_dtc_name: str): name = schema_loader_with_mem_store.add_schema_spec(window_schema_spec) with raises(GenericSchemaError, match=stream_dtc_name + '.session not declared in schema'): WindowAggregateSchema(name, schema_loader_with_mem_store)
def test_block_aggregate_schema_initialization(schema_spec, store_spec): schema_loader = SchemaLoader() name = schema_loader.add_schema_spec(schema_spec) schema_loader.add_schema_spec(store_spec, name) schema = BlockAggregateSchema(name, schema_loader) assert schema._spec[BlockAggregateSchema.ATTRIBUTE_DIMENSIONS] == [{ 'Name': 'label', 'Type': Type.STRING, 'Value': 'source.label' }] assert match_fields(schema._spec['Fields']) loader_spec = schema_loader.get_schema_spec(name) assert match_fields(loader_spec['Fields'])
def test_initialization_with_invalid_source( schema_loader_with_mem_store: SchemaLoader, window_schema_spec: Dict[str, Any]): name = schema_loader_with_mem_store.add_schema_spec(window_schema_spec) schema = WindowAggregateSchema(name, schema_loader_with_mem_store) assert len(schema.errors) == 0 assert len(schema_loader_with_mem_store.get_errors()) == 0
def anchor_schema_max_two(schema_loader: SchemaLoader) -> AnchorSchema: name = schema_loader.add_schema_spec({ 'Condition': True, 'Max': 2, 'Name': 'anchor', 'Type': Type.ANCHOR }) return AnchorSchema(name, schema_loader)
def test_variable_aggregate_initialization(schema_spec): schema_loader = SchemaLoader() name = schema_loader.add_schema_spec(schema_spec) schema = VariableAggregateSchema(name, schema_loader) assert match_fields(schema._spec['Fields']) loader_spec = schema_loader.get_schema_spec(name) assert match_fields(loader_spec['Fields'])
def test_schema_init(dynamo_store_spec: Dict[str, Any]) -> None: schema_loader = SchemaLoader() name = schema_loader.add_schema_spec(dynamo_store_spec) store_schema = schema_loader.get_schema_object(name) assert store_schema.name == dynamo_store_spec['Name'] assert store_schema.table_name == dynamo_store_spec['Table'] assert store_schema.rcu == 5 assert store_schema.wcu == 5