def update_template(template_json, updates_list: list): template = DStream() template.load_from_json(template_json) template['stream_token'] = template_json['stream_token'] old_template = deepcopy(template) for update in updates_list: update_fn = update_guide[update['field']][update['type']]['function'] args = [] args.append(template) if update_guide[update['field']][update['type']]['field_key_arg']: args.append(update['field']) args.extend(update['args']) kwargs = update['kwargs'] update_fn(*args, **kwargs) if valid_update(template) is True: template.publish_version() return 'ok', template else: return 'invalid update', old_template, valid_update(template)
class TestFunFactory(unittest.TestCase): def setUp(self): demo_data_dir = "demo_data/" self.dstream_dict = json.load( open(demo_data_dir + "demo_template_unit_test.txt")) self.dstream = DStream() self.dstream.load_from_json(self.dstream_dict) self.dstream['filters'][0]['transform_id'] = 1 self.dstream['filters'][1]['transform_id'] = 2 counter = 1 for dparam in self.dstream['dparam_rules']: dparam['transform_id'] = counter counter += 1 self.test_event_rules = { "partition_list": [], "measure_list": ["timestamp", "head1"], "transform_type": "detect_event", "transform_name": "DetectThreshold", "param_dict": { "event_rules": { "measure": "head1", "threshold_value": 69.2, "comparison_operator": ">=", "absolute_compare": True }, "event_name": "nice_event", "stream_id": "abc123", }, "logical_comparison": "AND" } self.test_dparam_rules_list = [{ "partition_list": [("timestamp", 1510603551106, ">"), ("timestamp", 1510603551391, "<")], "measure_list": ["timestamp", "timestamp_winning"], "transform_type": "derive_param", "transform_name": "DeriveSlope", "param_dict": { "func_params": { "window_len": 1 }, "measure_rules": { "rise_measure": "timestamp_winning", "run_measure": "timestamp", "output_name": "time_slope" } }, "logical_comparison": "AND" }, { "partition_list": [], "measure_list": [ "timestamp", ], "transform_type": "derive_param", "transform_name": "DeriveChange", "param_dict": { "func_params": { "window_len": 1, "angle_change": False }, "measure_rules": { "target_measure": "timestamp", "output_name": "time_change" } }, "logical_comparison": "AND" }, { "partition_list": [], "measure_list": [ "timestamp", ], "transform_type": "derive_param", "transform_name": "DeriveCumsum", "param_dict": { "func_params": { "offset": 0 }, "measure_rules": { "target_measure": "timestamp", "output_name": "time_sum" } }, "logical_comparison": "AND" }, { "partition_list": [], "measure_list": [ "timestamp", ], "transform_type": "derive_param", "transform_name": "DeriveWindowSum", "param_dict": { "func_params": { "window_len": 3 }, "measure_rules": { "target_measure": "timestamp", "output_name": "time_window_sum" } }, "logical_comparison": "AND" }, { "partition_list": [], "measure_list": [ "timestamp", ], "transform_type": "derive_param", "transform_name": "DeriveScaled", "param_dict": { "func_params": { "scalar": -1 }, "measure_rules": { "target_measure": "timestamp", "output_name": "negatime" } }, "logical_comparison": "AND" }, { "partition_list": [], "measure_list": [ "location", ], "transform_type": "derive_param", "transform_name": "DeriveDistance", "param_dict": { "func_params": { "window_len": 1, "distance_func": "euclidean", "swap_lon_lat": True }, "measure_rules": { "spatial_measure": "location", "output_name": "dist1" } }, "logical_comparison": "AND" }, { "partition_list": [], "measure_list": [ "location", ], "transform_type": "derive_param", "transform_name": "DeriveDistance", "param_dict": { "func_params": { "window_len": 1, "distance_func": "great_circle", "swap_lon_lat": True }, "measure_rules": { "spatial_measure": "location", "output_name": "dist2" } }, "logical_comparison": "AND" }, { "partition_list": [], "measure_list": [ "location", ], "transform_type": "derive_param", "transform_name": "DeriveHeading", "param_dict": { "func_params": { "window_len": 1, "units": "deg", "heading_type": "bearing", "swap_lon_lat": True }, "measure_rules": { "spatial_measure": "location", "output_name": "head1" } }, "logical_comparison": "AND" }, { "partition_list": [], "measure_list": [ "location", ], "transform_type": "derive_param", "transform_name": "DeriveHeading", "param_dict": { "func_params": { "window_len": 1, "units": "deg", "heading_type": "flat_angle", "swap_lon_lat": True }, "measure_rules": { "spatial_measure": "location", "output_name": "head2" } }, "logical_comparison": "AND" }, { "partition_list": [], "measure_list": [ "location", ], "transform_type": "derive_param", "transform_name": "DeriveInBox", "param_dict": { "func_params": { "upper_left_corner": (-122.6835826856399, 45.515814287782455), "lower_right_corner": (-122.678529, 45.511597) }, "measure_rules": { "spatial_measure": "location", "output_name": "boxy" } }, "logical_comparison": "AND" }] def test_update_stream_name(self): update_stream_name(self.dstream, 'chadwick') self.assertEqual(self.dstream['stream_name'], 'chadwick') def test_update_source_key(self): update_source_key(self.dstream, 'idd') self.assertEqual(self.dstream['source_key'], 'idd') def test_update_description(self): update_description(self.dstream, 'lmao') self.assertEqual(self.dstream['user_description'], 'lmao') def test_update_user_id(self): update_user_id(self.dstream, 'galactic_id', old_id='driver-id') self.assertIn('galactic_id', self.dstream['user_ids'].keys()) self.assertNotIn('driver-id', self.dstream['user_ids'].keys()) def test_update_field(self): update_field(self.dstream, 'dumb_field') self.assertIn('dumb_field', self.dstream['fields']) update_field(self.dstream, 'asteroid_field', old_field='dumb_field') self.assertNotIn('dumb_field', self.dstream['fields'].keys()) self.assertIn('asteroid_field', self.dstream['fields'].keys()) def test_update_tag(self): update_tag(self.dstream, 'not_poodles') self.assertIn('not_poodles', self.dstream['tags'].keys()) update_tag(self.dstream, 'poodles', old_tag='not_poodles') self.assertIn('poodles', self.dstream['tags'].keys()) self.assertNotIn('not_poodles', self.dstream['tags'].keys()) def test_update_fk(self): update_foreign_key(self.dstream, 'ugh') self.assertIn({'ugh': None}, self.dstream['foreign_keys']) update_foreign_key(self.dstream, 'wah', old_fk='ugh') self.assertIn({'wah': None}, self.dstream['foreign_keys']) self.assertNotIn({'ugh': None}, self.dstream['foreign_keys']) def test_update_rules(self): update_rules(self.dstream, 'storage_rules', [('store_raw', False), ('store_filtered', False)]) self.assertDictEqual( { "store_raw": False, "store_filtered": False, "store_derived": True }, self.dstream['storage_rules']) update_rules(self.dstream, 'ingest_rules', [('is_this_all_fake', True)]) self.assertIn('is_this_all_fake', self.dstream['ingest_rules'].keys()) self.assertTrue(self.dstream['ingest_rules']['is_this_all_fake']) def test_modify_filter(self): modify_filter(self.dstream, 1, [('order', 3), ('nyquist', 0.69)], new_partition_list=['whatever'], change_comparison=True) self.assertEqual(self.dstream['filters'][0]['param_dict']['order'], 3) self.assertEqual(self.dstream['filters'][0]['param_dict']['nyquist'], 0.69) self.assertEqual(self.dstream['filters'][0]['logical_comparison'], 'OR') self.assertIn('whatever', self.dstream['filters'][0]['partition_list']) def test_modify_dparam(self): modify_dparam(self.dstream, 2, [('window_len', 2)]) self.assertEqual( self.dstream['dparam_rules'][1]['param_dict']['func_params'] ['window_len'], 2) self.assertEqual( len(self.dstream['dparam_rules'][1]['partition_list']), 0) modify_dparam(self.dstream, 2, [], new_partition_list=['new']) self.assertEqual( list(self.dstream['dparam_rules'][1]['param_dict'] ['func_params'].items()), [('window_len', 2), ('angle_change', False)]) self.assertEqual(self.dstream['dparam_rules'][1]['partition_list'], ['new']) def test_modify_event(self): modify_event(self.dstream, 'test_event', [('threshold_value', 70), ('absolute_compare', False)]) self.assertEqual( self.dstream['event_rules']['test_event']['param_dict'] ['event_rules']['threshold_value'], 70) self.assertFalse(self.dstream['event_rules']['test_event'] ['param_dict']['event_rules']['absolute_compare']) def test_remove_transform(self): remove_transform(self.dstream, 'filters', 2) self.assertEqual(len(self.dstream['filters']), 1) self.assertEqual(self.dstream['filters'][0]['transform_name'], 'ButterLowpass')
class TestDStream(unittest.TestCase): def setUp(self): self.dstream = DStream() def test_init(self): init_keys = [ 'stream_name', 'user_description', 'version', 'stream_token', 'source_key', 'template_id', 'storage_rules', 'ingest_rules', 'engine_rules', 'timestamp', 'measures', 'fields', 'user_ids', 'tags', 'foreign_keys', 'filters', 'dparam_rules', 'event_rules', 'data_rules' ] for key in init_keys: print(self.dstream.keys()) self.assertIn(key, self.dstream.keys()) def testadd_methods(self): self.assertIsInstance(self.dstream["stream_token"], str) m_name = "viscosity" m_dtype = "float" self.dstream.add_measure(m_name, m_dtype) self.assertTrue(m_name in self.dstream["measures"].keys()) self.assertEqual(self.dstream["measures"][m_name]["dtype"], m_dtype) f_name = "strawberry" self.dstream.add_field(f_name) self.assertTrue(f_name in self.dstream["fields"].keys()) uid_name = "my_id" self.dstream.add_user_id(uid_name) self.assertTrue(uid_name in self.dstream["user_ids"].keys()) tag_name = "Really good sensor" self.dstream.add_tag(tag_name) self.assertIn(tag_name, self.dstream["tags"]) fk = "key to the city" self.dstream.add_fk(fk) self.assertTrue({fk: None} in self.dstream["foreign_keys"]) fake_filter = {"func_name": "Make all values 0"} self.dstream.add_filter(fake_filter) self.assertEqual(fake_filter, self.dstream["filters"][0]) fake_dparam = {"measure": "viscosity", "drule": "max of mins"} self.dstream.add_derived_param(fake_dparam) self.assertEqual(fake_dparam, self.dstream["dparam_rules"][0]) fake_event_name = "My birthday" fake_event = {"param": "viscosity", "threshold": "too viscous"} self.dstream.add_event(fake_event_name, fake_event) self.assertEqual(fake_event, self.dstream["event_rules"][fake_event_name]) old_version = self.dstream["version"] self.dstream.publish_version() self.assertEqual(old_version + 1, self.dstream["version"]) fake_mapping = ["fake", "mapping", "list"] self.dstream.add_data_rules(fake_mapping) self.assertEqual(fake_mapping, self.dstream["data_rules"]) def test_load_from_json(self): test_dict = {"stream_token": "foo", "version": 900} self.dstream.load_from_json((test_dict)) self.assertEqual(test_dict["version"], self.dstream["version"]) self.assertIsInstance(self.dstream["stream_token"], str) def test_filter(self): self.dstream.add_filter({"test": "filterdict"})
class TestFunFactory(unittest.TestCase): def setUp(self): demo_data_dir = "demo_data/" self.dstream_dict = json.load(open(demo_data_dir + "demo_template_unit_test.txt")) self.dstream = DStream() self.dstream.load_from_json(self.dstream_dict) self.dstream['filters'][0]['transform_id'] = 1 self.dstream['filters'][1]['transform_id'] = 2 counter = 1 for dparam in self.dstream['dparam_rules']: dparam['transform_id'] = counter counter += 1 self.test_event_rules = { "partition_list": [], "measure_list":["timestamp", "head1"], "transform_type":"detect_event", "transform_name":"DetectThreshold", "param_dict":{ "event_rules":{ "measure":"head1", "threshold_value":69.2, "comparison_operator":">=", "absolute_compare":True }, "event_name":"nice_event", "stream_id":"abc123", }, "logical_comparison": "AND" } self.test_dparam_rules_list = [ { "partition_list": [("timestamp", 1510603551106, ">"), ("timestamp", 1510603551391, "<")], "measure_list":["timestamp", "timestamp_winning"], "transform_type": "derive_param", "transform_name": "DeriveSlope", "param_dict":{ "func_params":{"window_len":1}, "measure_rules":{"rise_measure":"timestamp_winning", "run_measure":"timestamp","output_name":"time_slope"} }, "logical_comparison":"AND" }, { "partition_list":[], "measure_list":["timestamp",], "transform_type": "derive_param", "transform_name": "DeriveChange", "param_dict":{ "func_params":{"window_len":1, "angle_change":False}, "measure_rules":{"target_measure":"timestamp","output_name":"time_change"} }, "logical_comparison":"AND" }, { "partition_list":[], "measure_list":["timestamp",], "transform_type": "derive_param", "transform_name": "DeriveCumsum", "param_dict":{ "func_params":{"offset":0}, "measure_rules":{"target_measure":"timestamp","output_name":"time_sum"} }, "logical_comparison":"AND" }, { "partition_list":[], "measure_list":["timestamp",], "transform_type": "derive_param", "transform_name": "DeriveWindowSum", "param_dict":{ "func_params":{"window_len":3}, "measure_rules":{"target_measure":"timestamp","output_name":"time_window_sum"} }, "logical_comparison":"AND" }, { "partition_list":[], "measure_list":["timestamp",], "transform_type": "derive_param", "transform_name": "DeriveScaled", "param_dict":{ "func_params":{"scalar":-1}, "measure_rules":{"target_measure":"timestamp","output_name":"negatime"} }, "logical_comparison":"AND" }, { "partition_list":[], "measure_list":["location",], "transform_type": "derive_param", "transform_name": "DeriveDistance", "param_dict":{ "func_params":{"window_len":1, "distance_func":"euclidean", "swap_lon_lat":True}, "measure_rules":{"spatial_measure":"location","output_name":"dist1"} }, "logical_comparison":"AND" }, { "partition_list":[], "measure_list":["location",], "transform_type": "derive_param", "transform_name": "DeriveDistance", "param_dict":{ "func_params":{"window_len":1, "distance_func":"great_circle", "swap_lon_lat":True}, "measure_rules":{"spatial_measure":"location","output_name":"dist2"} }, "logical_comparison":"AND" }, { "partition_list":[], "measure_list":["location",], "transform_type": "derive_param", "transform_name": "DeriveHeading", "param_dict":{ "func_params":{"window_len":1, "units":"deg","heading_type":"bearing", "swap_lon_lat":True}, "measure_rules":{"spatial_measure":"location","output_name":"head1"} }, "logical_comparison":"AND" }, { "partition_list":[], "measure_list":["location",], "transform_type": "derive_param", "transform_name": "DeriveHeading", "param_dict":{ "func_params":{"window_len":1, "units":"deg","heading_type":"flat_angle", "swap_lon_lat":True}, "measure_rules":{"spatial_measure":"location","output_name":"head2"} }, "logical_comparison":"AND" }, { "partition_list":[], "measure_list":["location",], "transform_type": "derive_param", "transform_name": "DeriveInBox", "param_dict":{ "func_params":{"upper_left_corner":(-122.6835826856399, 45.515814287782455), "lower_right_corner":(-122.678529, 45.511597)}, "measure_rules":{"spatial_measure":"location","output_name":"boxy"} }, "logical_comparison":"AND" } ] def test_create_template(self): t1 = create_template_dstream('tester', 'driver_id',[('location', 'geo')], ['driver-id', 'idd'], [('test_event', self.test_event_rules)], self.test_dparam_rules_list, [], {}) self.assertEqual(t1['stream_name'], 'tester') self.assertEqual(t1['source_key'], 'driver_id') self.assertIn('location', t1['measures'].keys()) self.assertEqual(t1['measures']['location']['dtype'], 'geo') self.assertIn('driver-id', t1['user_ids']) self.assertIn('idd', t1['user_ids']) self.assertDictEqual(t1['storage_rules'], {"store_raw":True, "store_filtered":True, "store_derived":True}) self.assertIn('test_event', t1['event_rules'].keys()) def test_build_rules_from_event(self): k1 = {'partition_list': [], 'turn_value': 45, 'stream_id': 'abc123'} r1 = build_rules_from_event('turn', [('location', 'geo')], **k1) for i in ['event_rules', 'dparam_rules', 'filter_rules']: self.assertIn(i, r1) self.assertEqual(r1['event_rules'][0], 'turn_45.000000_location') self.assertEqual(len(r1['dparam_rules']), 2) # with self.assertRaises(ValueError): # build_rules_from_event('turn', [('smokeation', 'smokey')], **k1) k2 = {'partition_list': [], 'urn_value': 45, 'stream_id': 'abc123'} with self.assertRaises(ValueError): build_rules_from_event('turn', [('location', 'geo')], **k2) k3 = {'partition_list': [], 'turn_value': 45} with self.assertRaises(ValueError): build_rules_from_event('turn', [('location', 'geo')], **k3) k4 = {'turn_value': 45, 'stream_id': 'abc123'} with self.assertRaises(ValueError): build_rules_from_event('turn', [('location', 'geo')], **k4) def test_build_temp_event_filters(self): skey = 'driver_id' uids = ['driver-id', 'idd'] k1 = {'partition_list': [], 'turn_value': 45, 'stream_id': 'abc123'} f1 = ('butter_lowpass', {"partition_list": [], "measure_list": ["location"]}) f2 = ('butter_lowpass', {"partition_list": [], "measure_list": ["location"]}) m = [([('location', 'geo')], [], [], [('turn', k1, ['location'])])] m2 = [([('location', 'geo')], [], [], [('turn', k1, ['location_buttered']),])] t = build_template('test', skey, m, uids, [f1]) self.assertIn('turn_45.000000_location', t['event_rules']) t2 = build_template('test', skey, m2, uids, [f2]) self.assertIn('turn_45.000000_location_buttered', t2['event_rules']) def test_build_temp_dparam(self): f = ('butter_lowpass', {"partition_list": [], "measure_list": ["location"]}) m = [([('location', 'geo')], [], [('heading', {'partition_list': [], 'measure_list': ['location'], }, {'spatial_measure': 'location'})], [])] t = build_template('test','driver_id', m, ['driver-id', 'idd'], []) self.assertEqual(len(t['dparam_rules']), 1) self.assertEqual(t['dparam_rules'][0]['transform_name'], 'DeriveHeading') m2 = [([('location', 'geo')], [], [('heading', {'partition_list': [], 'measure_list': ['location_buttered'], }, {'spatial_measure': 'location_buttered'})], [])] t2 = build_template('test','driver_id', m2, ['driver-id', 'idd'], [f]) def test_build_template_event(self): skey = 'driver_id' uids = ['driver-id', 'idd'] k1 = {'partition_list': [], 'turn_value': 45, 'stream_id': 'abc123'} k2 = {'partition_list': [], 'turn_value': 66, 'stream_id': 'abc123'} k3 = {'partition_list': [], 'turn_value': 30, 'stream_id': 'abc123'} k4 = {'partition_list': [], 'turn_value': 57, 'stream_id': 'abc123'} m = [([('location', 'geo')], [], [], [('turn', k1, ['location']), ('turn', k2, ['location'])])] m2 = [([('location', 'geo'), ('smokeation', 'geo')], [], [], [('turn', k1, ['location']), ('turn', k2, ['smokeation'])])] m3 = [([('location', 'geo')], [], [], [('turn', k1, ['location']), ('turn', k2, ['location'])]), ([('smokeation', 'geo')], [], [], [('turn', k3, ['smokeation']), ('turn', k4, ['smokeation'])])] m4 = [([('location', 'geo')], [], [], [('turn', k1, ['smokeation'])])] t = build_template('test', skey, m, uids, []) self.assertEqual(len(t['event_rules']), 2) self.assertEqual(len(t['measures']), 1) self.assertEqual(len(t['dparam_rules']), 4) for i in ['turn_45.000000_location', 'turn_66.000000_location']: self.assertIn(i, t['event_rules']) t2 = build_template('test', skey, m2, uids, []) self.assertEqual(len(t2['event_rules']), 2) self.assertEqual(len(t2['measures']), 2) self.assertEqual(len(t2['dparam_rules']), 4) for i in ['turn_45.000000_location', 'turn_66.000000_smokeation']: self.assertIn(i, t2['event_rules']) t3 = build_template('test', skey, m3, uids, []) self.assertEqual(len(t3['event_rules']), 4) self.assertEqual(len(t3['measures']), 2) self.assertEqual(len(t3['dparam_rules']), 8) for i in ['turn_45.000000_location', 'turn_66.000000_location', 'turn_30.000000_smokeation', 'turn_57.000000_smokeation']: self.assertIn(i, t3['event_rules']) with self.assertRaises(ValueError): build_template('test', skey, m4, uids, []) # # with self.assertRaises(TypeError): # build_template('test', skey, [1,2,3,4], uids, []) # # with self.assertRaises(TypeError): # build_template('test', skey,(1,2,3), uids, []) def test_update_template(self): name_update = {'field': 'stream_name', 'type': 'new', 'args': ['shit'], 'kwargs': {}} desc_update = {'field': 'user_description', 'type': 'new', 'args': ['i hate this shit'], 'kwargs': {}} source_key_update = {'field': 'source_key', 'type': 'new', 'args': ['vom_id'], 'kwargs': {}} user_id_update1 = {'field': 'user_ids', 'type': 'new', 'args': ['bananas'], 'kwargs': {}} user_id_update2 = {'field': 'user_ids', 'type': 'new', 'args': ['shit_kiwis'], 'kwargs': {'old_id': 'id'}} user_id_update3 = {'field': 'user_ids', 'type': 'remove', 'args': ['driver-id'], 'kwargs': {}} field_update1 = {'field': 'fields', 'type': 'new', 'args': ['field_of_garbage'], 'kwargs': {}} field_update2 = {'field': 'fields', 'type': 'new', 'args': ['field_of_trash'], 'kwargs': {'old_field': 'region-code'}} field_update3 = {'field': 'fields', 'type': 'remove', 'args': ['field_of_garbage'], 'kwargs': {}} tag_update1 = {'field': 'tags', 'type': 'new', 'args': ['hash'], 'kwargs': {}} tag_update2 = {'field': 'tags', 'type': 'new', 'args': ['price'], 'kwargs': {}} tag_update3 = {'field': 'tags', 'type': 'new', 'args': ['toe',], 'kwargs': {'old_tag': 'hash'}} tag_update4 = {'field': 'tags', 'type': 'remove', 'args': ['price'], 'kwargs': {}} fk_update1 = {'field': 'foreign_keys', 'type': 'new', 'args': ['romania'], 'kwargs': {}} fk_update2 = {'field': 'foreign_keys', 'type': 'new', 'args': ['slovakia'], 'kwargs': {}} fk_update3 = {'field': 'foreign_keys', 'type': 'new', 'args': ['lithuania',], 'kwargs': {'old_fk': 'romania'}} fk_update4 = {'field': 'foreign_keys', 'type': 'remove', 'args': ['slovakia'], 'kwargs': {}} storage_update = {'field': 'storage_rules', 'type': 'modify', 'args': [[('store_raw', False), ('store_derived', False)]], 'kwargs': {}} ingest_update = {'field': 'ingest_rules', 'type': 'modify', 'args': [[('im_real', False),]], 'kwargs': {}} engine_update = {'field': 'engine_rules', 'type': 'modify', 'args': [[('fuck_buffer', True), ('notfuck_buffer', False)]], 'kwargs': {}} measure_update_add = {'field': 'measures', 'type': 'new', 'args': [('poodles', 'poodle')], 'kwargs': {}} measure_update_remove_ok = {'field': 'measures', 'type': 'remove', 'args': ['poodles'], 'kwargs': {}} measure_update_remove_bad = {'field': 'measures', 'type': 'remove', 'args': ['location'], 'kwargs': {}} filter_update_add = {'field': 'filters', 'type': 'new', 'args': [{ 'transform_id': 3,'fake_filter': 'yes', 'param_dict': {'filter_name': '_fake'}, 'measure_list': []}], 'kwargs': {}} filter_update_modify = {'field': 'filters', 'type': 'modify', 'args': [1, [('order', 1)]], 'kwargs': {}} filter_update_remove_ok = {'field': 'filters', 'type': 'remove', 'args': [3], 'kwargs': {}} filter_update_remove_bad = {'field': 'filters', 'type': 'remove', 'args': [2], 'kwargs': {}} dparam_update_add = {'field': 'dparam_rules', 'type': 'new', 'args': [{'transform_id': 15, 'fake_param': 'yes', 'measure_list': [], 'param_dict': {'measure_rules': {'output_name': 'new'}}}], 'kwargs': {}} dparam_update_modify = {'field': 'dparam_rules', 'type': 'modify', 'args': [9, [('window_len', 2)]], 'kwargs': {'new_partition_list': ['dumb']}} dparam_update_remove_ok = {'field': 'dparam_rules', 'type': 'remove', 'args': [15], 'kwargs': {}} dparam_update_remove_bad = {'field': 'dparam_rules', 'type': 'remove', 'args': [8], 'kwargs': {}} event_update_add = {'field': 'event_rules', 'type': 'new', 'args': ['fuck_this_event', { "partition_list": [], "measure_list":["timestamp", "head1"], "transform_type":"detect_event", "transform_name":"DetectThreshold", "param_dict":{ "event_rules":{ "measure":"head1", "threshold_value":69.2, "comparison_operator":">=", "absolute_compare":True }, "event_name":"nice_event", "stream_id":"abc123", }, "logical_comparison": "AND" }], 'kwargs': {}} event_update_modify = {'field': 'event_rules', 'type': 'modify', 'args': ['fuck_this_event', [('threshold_value', 70)]], 'kwargs': {}} update1 = [name_update, desc_update, source_key_update, user_id_update1, user_id_update1, field_update1, fk_update1, tag_update1, storage_update, engine_update, ingest_update, measure_update_add, filter_update_add, dparam_update_add, event_update_add] update2 = [user_id_update2, field_update2, tag_update2, tag_update3, fk_update2, fk_update3, filter_update_modify, dparam_update_modify, event_update_modify] update3 = [user_id_update3, field_update3, fk_update4, tag_update4, measure_update_remove_ok, filter_update_remove_ok, dparam_update_remove_ok] update4 = [measure_update_remove_bad, filter_update_remove_bad, dparam_update_remove_bad] # update 1 result1 = update_template(self.dstream, update1) updated_template = result1[1] self.assertEqual(result1[0], 'ok') self.assertEqual(updated_template['stream_name'], 'shit') self.assertEqual(updated_template['user_description'], 'i hate this shit') self.assertEqual(updated_template['source_key'], 'vom_id') self.assertIn('bananas', updated_template['user_ids']) self.assertIn('field_of_garbage', updated_template['fields']) self.assertIn({'romania': None}, updated_template['foreign_keys']) self.assertIn('hash', updated_template['tags']) self.assertDictEqual(updated_template['storage_rules'], {"store_raw":False, "store_filtered":True, "store_derived":False}) self.assertDictEqual(updated_template['engine_rules'], {'kafka': 'test', 'fuck_buffer': True, 'notfuck_buffer': False}) self.assertDictEqual(updated_template['ingest_rules'], {'im_real': False}) self.assertEqual(len(updated_template['measures'].keys()), 2) self.assertEqual(len(updated_template['filters']), 3) self.assertEqual(len(updated_template['dparam_rules']), 11) self.assertIn('fuck_this_event', updated_template['event_rules']) # update 2 result2 = update_template(updated_template, update2) updated_template2 = result2[1] self.assertEqual(result2[0], 'ok') self.assertEqual(len(updated_template2['user_ids']), 3) self.assertIn('shit_kiwis', updated_template2['user_ids']) self.assertNotIn('id', updated_template2['user_ids']) self.assertEqual(len(updated_template2['fields']), 2) self.assertIn('field_of_trash', updated_template2['fields']) self.assertNotIn('region-code', updated_template2['fields']) self.assertEqual(len(updated_template2['tags']), 2) self.assertIn('price', updated_template2['tags']) self.assertIn('toe', updated_template2['tags']) self.assertNotIn('hash', updated_template2['tags']) self.assertEqual(len(updated_template2['foreign_keys']), 2) self.assertIn({'slovakia': None}, updated_template2['foreign_keys']) self.assertIn({'lithuania': None}, updated_template2['foreign_keys']) self.assertNotIn({'romania': None}, updated_template2['foreign_keys']) self.assertEqual(updated_template2['filters'][0]['param_dict']['order'], 1) self.assertEqual(updated_template2['dparam_rules'][8]['param_dict']['func_params']['window_len'], 2) self.assertEqual(updated_template2['dparam_rules'][8]['partition_list'], ['dumb']) self.assertEqual(updated_template2['event_rules']['fuck_this_event']['param_dict']['event_rules']['threshold_value'], 70) # update 3 result3 = update_template(updated_template2, update3) updated_template3 = result3[1] self.assertEqual(result3[0], 'ok') self.assertEqual(len(updated_template3['user_ids']), 2) self.assertNotIn('driver-id', updated_template3['user_ids']) self.assertEqual(len(updated_template3['fields']), 1) self.assertNotIn('field_of_garbage', updated_template3['fields']) self.assertEqual(len(updated_template3['tags']), 1) self.assertNotIn('price', updated_template3['tags']) self.assertEqual(len(updated_template3['foreign_keys']), 1) self.assertNotIn({'slovakia': None}, updated_template3['foreign_keys']) self.assertEqual(len(updated_template3['measures'].keys()), 1) self.assertEqual(len(updated_template3['filters']), 2) self.assertEqual(len(updated_template3['dparam_rules']), 10) # update 4 result4 = update_template(updated_template3, update4) self.assertEqual(result4[0], 'invalid update') bad_guys = result4[2] self.assertEqual(len(bad_guys), 7) self.assertIn(('derived param', 'DeriveSlope', 'measure', 'timestamp_winning'), bad_guys) self.assertIn(('derived param', 'DeriveDistance', 'measure', 'location'), bad_guys) self.assertIn(('derived param', 'DeriveDistance', 'measure', 'location'), bad_guys) self.assertIn(('derived param', 'DeriveHeading', 'measure', 'location'), bad_guys) self.assertIn(('derived param', 'DeriveInBox', 'measure', 'location'), bad_guys) self.assertIn(('event', 'test_event', 'derived param', 'head1'), bad_guys) self.assertIn(('event', 'fuck_this_event', 'derived param', 'head1'), bad_guys) # def test_build_data_rules(self): source_inds = [0,1,2,4,6,8,3] t_keys = [["user_ids","sex"],["measures","length", "val"],["measures","diameter", "val"],["measures","whole_weight", "val"],["measures","viscera_weight", "val"],["fields","rings"],["timestamp"]] d = build_data_rules(source_inds, t_keys) self.assertDictEqual(d, {'mapping_list': [(0, ['user_ids', 'sex']), (1, ['measures', 'length', 'val']), (2, ['measures', 'diameter', 'val']), (4, ['measures', 'whole_weight', 'val']), (6, ['measures', 'viscera_weight', 'val']), (8, ['fields', 'rings']), (3, ['timestamp'])], 'date_format': None, 'puller': {}, 'pull': False}) # d2 = build_data_rules(source_inds, t_keys, puller=['dir', [['path', 'strom/data_puller/test/'], ['file_type', 'csv']]]) self.assertDictEqual(d2, {'mapping_list': [(0, ['user_ids', 'sex']), (1, ['measures', 'length', 'val']), (2, ['measures', 'diameter', 'val']), (4, ['measures', 'whole_weight', 'val']), (6, ['measures', 'viscera_weight', 'val']), (8, ['fields', 'rings']), (3, ['timestamp'])], 'date_format': None, 'puller': {'type': 'dir', 'inputs': {'path': 'strom/data_puller/test/', 'file_type': 'csv'}}, 'pull': True}) d3 = build_data_rules(source_inds, t_keys, puller=['dir', [['path', 'strom/data_puller/test/'], ['file_type', 'csv'], ['delimiter', ',']]]) self.assertDictEqual(d3, {'mapping_list': [(0, ['user_ids', 'sex']), (1, ['measures', 'length', 'val']), (2, ['measures', 'diameter', 'val']), (4, ['measures', 'whole_weight', 'val']), (6, ['measures', 'viscera_weight', 'val']), (8, ['fields', 'rings']), (3, ['timestamp'])], 'date_format': None, 'puller': {'type': 'dir', 'inputs': {'path': 'strom/data_puller/test/', 'file_type': 'csv', 'delimiter': ','}}, 'pull': True} ) def test_build_new_rules_updates(self): k = {'partition_list': [], 'turn_value': 99, 'stream_id': 'abc123'} f = ('butter_lowpass', {"partition_list": [], "measure_list": ["where"]}) m = [([('location', 'geo')], [], [], [('turn', k, ['location'])])] m2 = [([('where', 'geo')], [], [], [('turn', k, ['where'])])] f2 = ('butter_lowpass', {"partition_list": [], "measure_list": ["location"]}) ff = ('butter_lowpass', {"partition_list": [], "measure_list": ["where"]}) r = update(self.dstream, [{'field': 'user_description', 'type': 'new', 'args': ['new shit'], 'kwargs': {}}], m, []) self.assertEqual(len(r), 2) self.assertEqual(r[0], 'ok') self.assertEqual(len(r[1]['measures']), 1) self.assertIn('turn_99.000000_location', r[1]['event_rules']) rawr = deepcopy(r[1]) r2 = update(rawr, [], m2, [ff]) self.assertEqual(len(r2), 2) self.assertEqual(r[0], 'ok') self.assertEqual(len(r2[1]['measures']), 2) self.assertEqual(len(r2[1]['filters']), 3) self.assertIn('turn_99.000000_location', r2[1]['event_rules']) self.assertIn('turn_99.000000_where', r2[1]['event_rules']) d = DStream() d['measures'] = {'location': {'val': None, 'dtype': 'geo'}} t = update(d, [], [], [f2]) t2 = update(d, [], [], [f]) self.assertEqual(t[0], 'ok') self.assertEqual(t2[0], 'invalid update')
class TestEngineThread(unittest.TestCase): def setUp(self): self.con1, self.con1b = Pipe() self.con2, self.con2b = Pipe() self.con3, self.con3b = Pipe() self.con4, self.con4b = Pipe() self.con5, self.con5b = Pipe() self.con6, self.con6b = Pipe() self.engine = Engine(self.con1b, processors=2, buffer_max_batch=4, buffer_max_seconds=5, test_mode=True, test_outfile='engine_test_output/engine_test1') self.engine2 = Engine(self.con2b, processors=2, buffer_max_batch=4, buffer_max_seconds=5, test_mode=True, test_outfile='engine_test_output/engine_test2') self.engine3 = Engine(self.con3b, processors=2, buffer_max_batch=4, buffer_max_seconds=5, test_mode=True, test_outfile='engine_test_output/engine_test3') self.engine4 = Engine(self.con4b, processors=2, buffer_roll=1, buffer_max_batch=4, buffer_max_seconds=5, test_mode=True, test_outfile='engine_test_output/engine_test4') self.engine5 = Engine(self.con5b, processors=2, buffer_roll=1, buffer_max_batch=4, buffer_max_seconds=5, test_mode=True, test_outfile='engine_test_output/engine_test5') self.engine6 = Engine(self.con6b, processors=2, buffer_roll=1, buffer_max_batch=4, buffer_max_seconds=5, test_mode=True, test_outfile='engine_test_output/engine_test6') self.test_batch1 = [{ "stream_token": "abc123", "message": "hi1" }, { "stream_token": "abc123", "message": "hi2" }, { "stream_token": "abc123", "message": "hi3" }, { "stream_token": "abc123", "message": "hi4" }] self.test_batch2 = [{ "stream_token": "abc1234", "message": "hello1" }, { "stream_token": "abc1234", "message": "hello2" }, { "stream_token": "abc1234", "message": "hello3" }, { "stream_token": "abc1234", "message": "hello4" }] self.test_batch3 = [{ "stream_token": "abc123", "message": "hi5" }, { "stream_token": "abc123", "message": "hi6" }, { "stream_token": "abc123", "message": "hi7" }, { "stream_token": "abc123", "message": "hi8" }] self.test_batch4 = [{ "stream_token": "abc1234", "message": "hello5" }, { "stream_token": "abc1234", "message": "hello6" }, { "stream_token": "abc1234", "message": "hello7" }, { "stream_token": "abc1234", "message": "hello8" }] # roll results self.test_batch5 = [{ "stream_token": "abc123", "message": "hi4" }, { "stream_token": "abc123", "message": "hi5" }, { "stream_token": "abc123", "message": "hi6" }, { "stream_token": "abc123", "message": "hi7" }] # send batches self.test_batch6 = [{ "stream_token": "abc123", "message": "hi9" }, { "stream_token": "abc123", "message": "hi10" }, { "stream_token": "abc123", "message": "hi11" }, { "stream_token": "abc123", "message": "hi12" }] self.test_batch7 = [{ "stream_token": "abc123", "message": "hi13" }, { "stream_token": "abc123", "message": "hi14" }, { "stream_token": "abc123", "message": "hi15" }, { "stream_token": "abc123", "message": "hi16" }] # roll results self.test_batch8 = [{ "stream_token": "abc123", "message": "hi12" }, { "stream_token": "abc123", "message": "hi13" }, { "stream_token": "abc123", "message": "hi14" }, { "stream_token": "abc123", "message": "hi15" }] self.test_batch_mix = [{ "stream_token": "abc123", "message": "hi1" }, { "stream_token": "abc1234", "message": "hello1" }, { "stream_token": "abc123", "message": "hi2" }, { "stream_token": "abc1234", "message": "hello2" }, { "stream_token": "abc123", "message": "hi3" }, { "stream_token": "abc1234", "message": "hello3" }, { "stream_token": "abc123", "message": "hi4" }, { "stream_token": "abc1234", "message": "hello4" }] self.test_batch_1to4 = self.test_batch1 + self.test_batch2 + self.test_batch3 + self.test_batch4 self.outfiles = [] self.abalone_con, self.abalone_conb = Pipe() self.abalone = json.load(open(demo_data_dir + "demo_template_dir.txt")) self.abalone_temp = DStream() self.abalone_temp.load_from_json(self.abalone) self.abalone_engine = Engine( self.abalone_conb, processors=2, buffer_max_batch=10, buffer_max_seconds=5, test_mode=True, test_outfile='engine_test_output/engine_test_abalone') def tearDown(self): print("Tear it all down") sleep(1) for o in self.outfiles: print(o) try: remove_outfile(o) except OSError as oserr: print(oserr) self.outfiles = [] def test_buffer1(self): outfiles = [ 'engine_test_output/engine_test1_abc123_1.txt', 'engine_test_output/engine_test1_abc123_2.txt', 'engine_test_output/engine_test1_abc1234_1.txt', 'engine_test_output/engine_test1_abc1234_2.txt', ] self.engine.start() # sleep(5) # test all sent to processor in batches of 4 for i in self.test_batch_1to4: self.con1.send((i, 'load')) sleep(5) result = [] for o in outfiles: result.extend(read_outfile(o)) self.assertEqual(len(result), 4) self.assertIn(self.test_batch1, result) self.assertIn(self.test_batch2, result) self.assertIn(self.test_batch3, result) self.assertIn(self.test_batch4, result) self.outfiles.extend(outfiles) self.con1.send("stop_poison_pill") def test_buffer2(self): # test all sent to processor in batches of 4 - GROUPED BY TOKEN (2 buffs) outfiles = [ 'engine_test_output/engine_test2_abc123_1.txt', 'engine_test_output/engine_test2_abc1234_1.txt' ] self.engine2.start() sleep(5) for i in self.test_batch_mix: self.con2.send((i, 'load')) sleep(5) result2 = [] for o in outfiles: result2.extend(read_outfile(o)) self.assertEqual(len(result2), 2) self.assertIn(self.test_batch1, result2) self.assertIn(self.test_batch2, result2) self.outfiles.extend(outfiles) self.con2.send("stop_poison_pill") def test_buffer3(self): # leftovers outfiles = [ 'engine_test_output/engine_test3_abc123_1.txt', 'engine_test_output/engine_test3_abc1234_1.txt' ] self.engine3.start() sleep(5) for i in self.test_batch1[:2]: self.con3.send((i, 'load')) for i in self.test_batch2[:2]: self.con3.send((i, 'load')) sleep(7) result3 = [] for o in outfiles: result3.extend(read_outfile(o)) self.assertEqual(len(result3), 2) for x in result3: self.assertEqual(len(x), 2) self.outfiles.extend(outfiles) self.con3.send("stop_poison_pill") def test_buffer4(self): # w rolling window outfiles = [ 'engine_test_output/engine_test4_abc123_1.txt', 'engine_test_output/engine_test4_abc123_2.txt', 'engine_test_output/engine_test4_abc123_3.txt', ] self.engine4.start() sleep(5) for i in self.test_batch1 + self.test_batch3: self.con4.send((i, 'load')) sleep(7) result4 = [] for o in outfiles: result4.extend(read_outfile(o)) # for r in result4: # print(r) self.assertEqual(len(result4), 3) self.assertIn(self.test_batch1, result4) self.assertIn(self.test_batch5, result4) self.assertIn(self.test_batch3[-2:], result4) self.outfiles.extend(outfiles) self.con4.send("stop_poison_pill") def test_buffer5(self): # no leftovers that are just buffer roll outfiles = [ 'engine_test_output/engine_test5_abc123_1.txt', ] self.engine5.start() sleep(5) for i in self.test_batch1: self.con5.send((i, 'load')) sleep(7) result5 = [] for o in outfiles: result5.extend(read_outfile(o)) self.assertEqual(len(result5), 1) self.assertIn(self.test_batch1, result5) self.outfiles.extend(outfiles) self.con5.send("stop_poison_pill") def test_buffer6(self): # row resets correctly after leftovers outfiles = [ 'engine_test_output/engine_test6_abc123_1.txt', 'engine_test_output/engine_test6_abc123_2.txt', 'engine_test_output/engine_test6_abc123_3.txt', 'engine_test_output/engine_test6_abc123_4.txt', 'engine_test_output/engine_test6_abc123_5.txt', 'engine_test_output/engine_test6_abc123_6.txt', ] self.engine6.start() sleep(5) for i in self.test_batch1: self.con6.send((i, 'load')) for i in self.test_batch3: self.con6.send((i, 'load')) sleep(7) for i in self.test_batch6: self.con6.send((i, 'load')) for i in self.test_batch7: self.con6.send((i, 'load')) sleep(7) result6 = [] for o in outfiles: result6.extend(read_outfile(o)) # for i in result6: # print(i) self.assertEqual(len(result6), 6) self.assertIn(self.test_batch1, result6) self.assertIn(self.test_batch5, result6) self.assertIn(self.test_batch3[-2:], result6) self.assertIn(self.test_batch6, result6) self.assertIn(self.test_batch8, result6) self.assertIn(self.test_batch7[-2:], result6) self.outfiles.extend(outfiles) self.con6.send("stop_poison_pill") def test_new_with_puller(self): self.abalone_engine.start() sleep(3) self.abalone_con.send((self.abalone_temp, 'new')) sleep(5) outfiles = glob.glob('engine_test_output/engine_test_abalone*') result = [] for o in outfiles: result.extend(read_outfile(o)) self.assertEqual(len(result), 2) self.outfiles.extend(outfiles) self.abalone_con.send("stop_poison_pill")