def activity_events() -> List[Record]: return [ Record({ 'id': 'user1', 'event_value': 10, 'event_time': '2018-01-01T01:01:01+00:00' }), Record({ 'id': 'user1', 'event_value': 100, 'event_time': '2018-01-01T01:01:05+00:00' }), Record({ 'id': 'user1', 'event_value': 1, 'event_time': '2018-01-01T01:02:01+00:00' }), Record({ 'id': 'user1', 'event_value': 1000, 'event_time': '2018-01-01T03:01:01+00:00' }), Record({ 'id': 'user1', 'event_value': 10000, 'event_time': '2018-01-02T01:01:01+00:00' }) ]
def records() -> List[Record]: return [ Record({ 'id': 'user1', 'label': 'a', 'event_value': 10, 'event_time': '2018-01-01T01:01:01+00:00' }), Record({ 'id': 'user1', 'label': 'b', 'event_value': 1, 'event_time': '2018-01-01T01:02:01+00:00' }), Record({ 'id': 'user1', 'label': 'a', 'event_value': 100, 'event_time': '2018-01-01T01:01:05+00:00' }), Record({ 'id': 'user1', 'label': 'c', 'event_value': 10000, 'event_time': '2018-01-02T01:01:01+00:00' }), Record({ 'id': 'user1', 'label': 'c', 'event_value': 1000, 'event_time': '2018-01-01T03:01:01+00:00' }), ]
def test_block_aggregate_schema_evaluate_with_dimensions( block_aggregate_schema_spec, schema_loader): block_aggregate_schema_spec['Dimensions'] = [{ 'Name': 'label', 'Type': Type.STRING, 'Value': 'source.label' }] name = schema_loader.add_schema_spec(block_aggregate_schema_spec) block_aggregate_schema = BlockAggregateSchema(name, schema_loader) identity = 'userA' time = datetime(2018, 3, 7, 19, 35, 31, 0, timezone.utc) block_aggregate = create_block_aggregate(block_aggregate_schema, time, identity) block_aggregate._evaluation_context.global_add('source', Record({'label': 'label1'})) block_aggregate.run_evaluate() block_aggregate.run_evaluate() # Check eval results of various fields before split assert check_fields( block_aggregate._nested_items, { '_identity': identity, 'event_count': 2, '_start_time': time, '_end_time': time, 'label': 'label1', }) current_snapshot = block_aggregate._snapshot block_aggregate._evaluation_context.global_add('source', Record({'label': 'label2'})) block_aggregate.run_evaluate() # Check eval results of various fields assert check_fields( block_aggregate._nested_items, { '_identity': identity, 'event_count': 1, '_start_time': time, '_end_time': time, 'label': 'label2', }) # Check aggregate snapshot present in store assert block_aggregate._store.get( Key(key_type=KeyType.DIMENSION, identity=block_aggregate._identity, group=block_aggregate._name, dimensions=['label1'])) == current_snapshot
def test_streaming_transformer_schema_get_identity_error(schema_loader: SchemaLoader, schema_spec: Dict[str, Any]) -> None: schema_spec['Identity'] = 'source.user' streaming_bts = schema_loader.add_schema_spec(schema_spec) transformer_schema = StreamingTransformerSchema(streaming_bts, schema_loader) with pytest.raises(IdentityError, match='Could not determine identity using source.user'): assert transformer_schema.get_identity(Record())
def test_streaming_transformer_evaluate(schema_loader: SchemaLoader, schema_spec: Dict[str, Any]) -> None: streaming_bts = schema_loader.add_schema_spec(schema_spec) transformer_schema = StreamingTransformerSchema(streaming_bts, schema_loader) transformer = StreamingTransformer(transformer_schema, 'user1') transformer.run_evaluate(Record()) assert transformer._snapshot == {'test_group': {'_identity': 'user1', 'events': 1}}
def test_streaming_transformer_evaluate_time_error(schema_loader: SchemaLoader, schema_spec: Dict[str, Any]) -> None: del schema_spec['Import'] streaming_bts = schema_loader.add_schema_spec(schema_spec) transformer_schema = StreamingTransformerSchema(streaming_bts, schema_loader) transformer = StreamingTransformer(transformer_schema, 'user1') with pytest.raises(NameError, match='name \'datetime\' is not defined'): assert transformer.run_evaluate(Record())
def test_streaming_transformer_schema_get_identity_from_record( schema_loader: SchemaLoader, schema_spec: Dict[str, Any]) -> None: schema_spec['Identity'] = 'source.user' streaming_dtc = schema_loader.add_schema_spec(schema_spec) transformer_schema = StreamingTransformerSchema(streaming_dtc, schema_loader) assert transformer_schema.get_identity(Record({'user': '******'})) == 'user1'
def test_getattr(): record = Record({'test': 1}) # Defined class attributes work as expected. assert record.__class__ == Record # Un-defined class attributes throw exception. with pytest.raises(AttributeError, match='Record object has no __test__ attribute.'): record.__test__
def test_streaming_transformer_evaluate_user_mismatch(schema_loader: SchemaLoader, schema_spec: Dict[str, Any]) -> None: streaming_bts = schema_loader.add_schema_spec(schema_spec) transformer_schema = StreamingTransformerSchema(streaming_bts, schema_loader) transformer = StreamingTransformer(transformer_schema, 'user2') with pytest.raises( IdentityError, match='Identity in transformer \(user2\) and new record \(user1\) do not match'): assert transformer.run_evaluate(Record())
def test_complex_array() -> None: record = Record({ 'complex_array_field': [{ 'field_1': 'one', 'field_2': 2 }, ['one', 'two'], 1] }) assert len(record.complex_array_field) == 3 assert record.complex_array_field[0].field_1 == 'one' assert record.complex_array_field[1][0] == 'one' assert record.complex_array_field[2] == 1
def test_ipfix_data_processor_success(): data_processor = IpfixDataProcessor() test_data = ( '{"Other":1,"DataSets":[' '[{"I":2,"E":1230,"V":"test1"}],' '[{"I":56,"V":"aa:aa:aa:aa:aa:aa"},{"I":12,"V":"0.0.0.0"},{"I":182,"V":80},{"I":183,"V":81},{"I":4,"V":6},{"I":150,"V":1522385684}],' '[{"I":56,"V":"bb:bb:bb:bb:bb:bb"}, {"I":123,"V":321}]' ']}') assert data_processor.process_data(test_data) == [ Record({ 'source_mac': 'aa:aa:aa:aa:aa:aa', 'dest_ip': '0.0.0.0', 'source_port': 80, 'dest_port': 81, 'protocol': 6, 'timestamp': 1522385684 }), Record({ 'source_mac': 'bb:bb:bb:bb:bb:bb', 123: 321 }) ]
def process_data(self, data_string: str) -> List[Record]: data = json.loads(data_string) if not isinstance(data, dict): return [] record_list = [] for data_row in data.get('DataSets', []): record = {} for event_dict in data_row: i = event_dict.get('I', 0) record[self.IPFIX_EVENT_MAPPER.get(i, i)] = event_dict['V'] if self.IPFIX_EVENT_MAPPER[56] in record: record_list.append(Record(record)) return record_list
def test_streaming_transformer_finalize(schema_loader: SchemaLoader, schema_spec: Dict[str, Any]) -> None: streaming_bts = schema_loader.add_schema_spec(schema_spec) transformer_schema = StreamingTransformerSchema(streaming_bts, schema_loader) transformer = StreamingTransformer(transformer_schema, 'user1') store = schema_loader.get_store('test.memstore') transformer.run_finalize() assert store.get(Key(KeyType.DIMENSION, 'user1', 'test_group')) is None transformer.run_evaluate(Record()) transformer.run_finalize() assert store.get(Key(KeyType.DIMENSION, 'user1', 'test_group')) == { '_identity': 'user1', 'events': 1 }
def test_invalid_field() -> None: record = Record({}) assert record.missing_field is None
def test_simple_json_processor_success(): data_processor = SimpleJsonDataProcessor() assert data_processor.process_data('{"test": 1}') == [Record({'test': 1})]
def test_streaming_transformer_schema_get_time_constant( schema_loader: SchemaLoader, schema_spec: Dict[str, Any]) -> None: streaming_dtc = schema_loader.add_schema_spec(schema_spec) transformer_schema = StreamingTransformerSchema(streaming_dtc, schema_loader) assert transformer_schema.get_time(Record()) == datetime(2016, 10, 10)
def process_data(self, data_string: str) -> List[Record]: return [Record(json.loads(data_string))]
def test_simple_fields() -> None: record = Record({'string_field': 'string value', 'int_field': 1}) assert record.string_field == 'string value' assert record.int_field == 1
def test_pickle(): record = Record({'test': 1}) pickled_record = pickle.dumps(record) assert record == pickle.loads(pickled_record)
def test_array() -> None: record = Record({'array_field': ['one', 'two', 'three']}) assert len(record.array_field) == 3 assert record.array_field[0] == 'one'
def test_dict() -> None: record = Record({'dict_field': {'field_1': 'one', 'field_2': 2}}) assert record.dict_field.field_1 == 'one' assert record.dict_field.field_2 == 2
def process_data(self, data_dict: Dict) -> List[Record]: return [Record(data_dict)]