Beispiel #1
0
def update_template(template_json, updates_list: list):
    template = DStream()
    template.load_from_json(template_json)
    template['stream_token'] = template_json['stream_token']
    old_template = deepcopy(template)

    for update in updates_list:
        update_fn = update_guide[update['field']][update['type']]['function']
        args = []
        args.append(template)
        if update_guide[update['field']][update['type']]['field_key_arg']:
            args.append(update['field'])
        args.extend(update['args'])
        kwargs = update['kwargs']
        update_fn(*args, **kwargs)

    if valid_update(template) is True:
        template.publish_version()
        return 'ok', template
    else:
        return 'invalid update', old_template, valid_update(template)
Beispiel #2
0
class TestFunFactory(unittest.TestCase):
    def setUp(self):
        demo_data_dir = "demo_data/"
        self.dstream_dict = json.load(
            open(demo_data_dir + "demo_template_unit_test.txt"))
        self.dstream = DStream()
        self.dstream.load_from_json(self.dstream_dict)
        self.dstream['filters'][0]['transform_id'] = 1
        self.dstream['filters'][1]['transform_id'] = 2
        counter = 1
        for dparam in self.dstream['dparam_rules']:
            dparam['transform_id'] = counter
            counter += 1
        self.test_event_rules = {
            "partition_list": [],
            "measure_list": ["timestamp", "head1"],
            "transform_type": "detect_event",
            "transform_name": "DetectThreshold",
            "param_dict": {
                "event_rules": {
                    "measure": "head1",
                    "threshold_value": 69.2,
                    "comparison_operator": ">=",
                    "absolute_compare": True
                },
                "event_name": "nice_event",
                "stream_id": "abc123",
            },
            "logical_comparison": "AND"
        }

        self.test_dparam_rules_list = [{
            "partition_list": [("timestamp", 1510603551106, ">"),
                               ("timestamp", 1510603551391, "<")],
            "measure_list": ["timestamp", "timestamp_winning"],
            "transform_type":
            "derive_param",
            "transform_name":
            "DeriveSlope",
            "param_dict": {
                "func_params": {
                    "window_len": 1
                },
                "measure_rules": {
                    "rise_measure": "timestamp_winning",
                    "run_measure": "timestamp",
                    "output_name": "time_slope"
                }
            },
            "logical_comparison":
            "AND"
        }, {
            "partition_list": [],
            "measure_list": [
                "timestamp",
            ],
            "transform_type": "derive_param",
            "transform_name": "DeriveChange",
            "param_dict": {
                "func_params": {
                    "window_len": 1,
                    "angle_change": False
                },
                "measure_rules": {
                    "target_measure": "timestamp",
                    "output_name": "time_change"
                }
            },
            "logical_comparison": "AND"
        }, {
            "partition_list": [],
            "measure_list": [
                "timestamp",
            ],
            "transform_type": "derive_param",
            "transform_name": "DeriveCumsum",
            "param_dict": {
                "func_params": {
                    "offset": 0
                },
                "measure_rules": {
                    "target_measure": "timestamp",
                    "output_name": "time_sum"
                }
            },
            "logical_comparison": "AND"
        }, {
            "partition_list": [],
            "measure_list": [
                "timestamp",
            ],
            "transform_type": "derive_param",
            "transform_name": "DeriveWindowSum",
            "param_dict": {
                "func_params": {
                    "window_len": 3
                },
                "measure_rules": {
                    "target_measure": "timestamp",
                    "output_name": "time_window_sum"
                }
            },
            "logical_comparison": "AND"
        }, {
            "partition_list": [],
            "measure_list": [
                "timestamp",
            ],
            "transform_type": "derive_param",
            "transform_name": "DeriveScaled",
            "param_dict": {
                "func_params": {
                    "scalar": -1
                },
                "measure_rules": {
                    "target_measure": "timestamp",
                    "output_name": "negatime"
                }
            },
            "logical_comparison": "AND"
        }, {
            "partition_list": [],
            "measure_list": [
                "location",
            ],
            "transform_type": "derive_param",
            "transform_name": "DeriveDistance",
            "param_dict": {
                "func_params": {
                    "window_len": 1,
                    "distance_func": "euclidean",
                    "swap_lon_lat": True
                },
                "measure_rules": {
                    "spatial_measure": "location",
                    "output_name": "dist1"
                }
            },
            "logical_comparison": "AND"
        }, {
            "partition_list": [],
            "measure_list": [
                "location",
            ],
            "transform_type": "derive_param",
            "transform_name": "DeriveDistance",
            "param_dict": {
                "func_params": {
                    "window_len": 1,
                    "distance_func": "great_circle",
                    "swap_lon_lat": True
                },
                "measure_rules": {
                    "spatial_measure": "location",
                    "output_name": "dist2"
                }
            },
            "logical_comparison": "AND"
        }, {
            "partition_list": [],
            "measure_list": [
                "location",
            ],
            "transform_type": "derive_param",
            "transform_name": "DeriveHeading",
            "param_dict": {
                "func_params": {
                    "window_len": 1,
                    "units": "deg",
                    "heading_type": "bearing",
                    "swap_lon_lat": True
                },
                "measure_rules": {
                    "spatial_measure": "location",
                    "output_name": "head1"
                }
            },
            "logical_comparison": "AND"
        }, {
            "partition_list": [],
            "measure_list": [
                "location",
            ],
            "transform_type": "derive_param",
            "transform_name": "DeriveHeading",
            "param_dict": {
                "func_params": {
                    "window_len": 1,
                    "units": "deg",
                    "heading_type": "flat_angle",
                    "swap_lon_lat": True
                },
                "measure_rules": {
                    "spatial_measure": "location",
                    "output_name": "head2"
                }
            },
            "logical_comparison": "AND"
        }, {
            "partition_list": [],
            "measure_list": [
                "location",
            ],
            "transform_type": "derive_param",
            "transform_name": "DeriveInBox",
            "param_dict": {
                "func_params": {
                    "upper_left_corner":
                    (-122.6835826856399, 45.515814287782455),
                    "lower_right_corner": (-122.678529, 45.511597)
                },
                "measure_rules": {
                    "spatial_measure": "location",
                    "output_name": "boxy"
                }
            },
            "logical_comparison": "AND"
        }]

    def test_update_stream_name(self):
        update_stream_name(self.dstream, 'chadwick')
        self.assertEqual(self.dstream['stream_name'], 'chadwick')

    def test_update_source_key(self):
        update_source_key(self.dstream, 'idd')
        self.assertEqual(self.dstream['source_key'], 'idd')

    def test_update_description(self):
        update_description(self.dstream, 'lmao')
        self.assertEqual(self.dstream['user_description'], 'lmao')

    def test_update_user_id(self):
        update_user_id(self.dstream, 'galactic_id', old_id='driver-id')
        self.assertIn('galactic_id', self.dstream['user_ids'].keys())
        self.assertNotIn('driver-id', self.dstream['user_ids'].keys())

    def test_update_field(self):
        update_field(self.dstream, 'dumb_field')
        self.assertIn('dumb_field', self.dstream['fields'])
        update_field(self.dstream, 'asteroid_field', old_field='dumb_field')
        self.assertNotIn('dumb_field', self.dstream['fields'].keys())
        self.assertIn('asteroid_field', self.dstream['fields'].keys())

    def test_update_tag(self):
        update_tag(self.dstream, 'not_poodles')
        self.assertIn('not_poodles', self.dstream['tags'].keys())
        update_tag(self.dstream, 'poodles', old_tag='not_poodles')
        self.assertIn('poodles', self.dstream['tags'].keys())
        self.assertNotIn('not_poodles', self.dstream['tags'].keys())

    def test_update_fk(self):
        update_foreign_key(self.dstream, 'ugh')
        self.assertIn({'ugh': None}, self.dstream['foreign_keys'])
        update_foreign_key(self.dstream, 'wah', old_fk='ugh')
        self.assertIn({'wah': None}, self.dstream['foreign_keys'])
        self.assertNotIn({'ugh': None}, self.dstream['foreign_keys'])

    def test_update_rules(self):
        update_rules(self.dstream, 'storage_rules',
                     [('store_raw', False), ('store_filtered', False)])
        self.assertDictEqual(
            {
                "store_raw": False,
                "store_filtered": False,
                "store_derived": True
            }, self.dstream['storage_rules'])
        update_rules(self.dstream, 'ingest_rules',
                     [('is_this_all_fake', True)])
        self.assertIn('is_this_all_fake', self.dstream['ingest_rules'].keys())
        self.assertTrue(self.dstream['ingest_rules']['is_this_all_fake'])

    def test_modify_filter(self):
        modify_filter(self.dstream,
                      1, [('order', 3), ('nyquist', 0.69)],
                      new_partition_list=['whatever'],
                      change_comparison=True)
        self.assertEqual(self.dstream['filters'][0]['param_dict']['order'], 3)
        self.assertEqual(self.dstream['filters'][0]['param_dict']['nyquist'],
                         0.69)
        self.assertEqual(self.dstream['filters'][0]['logical_comparison'],
                         'OR')
        self.assertIn('whatever', self.dstream['filters'][0]['partition_list'])

    def test_modify_dparam(self):
        modify_dparam(self.dstream, 2, [('window_len', 2)])
        self.assertEqual(
            self.dstream['dparam_rules'][1]['param_dict']['func_params']
            ['window_len'], 2)
        self.assertEqual(
            len(self.dstream['dparam_rules'][1]['partition_list']), 0)

        modify_dparam(self.dstream, 2, [], new_partition_list=['new'])
        self.assertEqual(
            list(self.dstream['dparam_rules'][1]['param_dict']
                 ['func_params'].items()), [('window_len', 2),
                                            ('angle_change', False)])
        self.assertEqual(self.dstream['dparam_rules'][1]['partition_list'],
                         ['new'])

    def test_modify_event(self):
        modify_event(self.dstream, 'test_event', [('threshold_value', 70),
                                                  ('absolute_compare', False)])
        self.assertEqual(
            self.dstream['event_rules']['test_event']['param_dict']
            ['event_rules']['threshold_value'], 70)
        self.assertFalse(self.dstream['event_rules']['test_event']
                         ['param_dict']['event_rules']['absolute_compare'])

    def test_remove_transform(self):
        remove_transform(self.dstream, 'filters', 2)
        self.assertEqual(len(self.dstream['filters']), 1)
        self.assertEqual(self.dstream['filters'][0]['transform_name'],
                         'ButterLowpass')
Beispiel #3
0
class TestDStream(unittest.TestCase):
    def setUp(self):
        self.dstream = DStream()

    def test_init(self):
        init_keys = [
            'stream_name', 'user_description', 'version', 'stream_token',
            'source_key', 'template_id', 'storage_rules', 'ingest_rules',
            'engine_rules', 'timestamp', 'measures', 'fields', 'user_ids',
            'tags', 'foreign_keys', 'filters', 'dparam_rules', 'event_rules',
            'data_rules'
        ]
        for key in init_keys:
            print(self.dstream.keys())
            self.assertIn(key, self.dstream.keys())

    def testadd_methods(self):
        self.assertIsInstance(self.dstream["stream_token"], str)

        m_name = "viscosity"
        m_dtype = "float"
        self.dstream.add_measure(m_name, m_dtype)
        self.assertTrue(m_name in self.dstream["measures"].keys())
        self.assertEqual(self.dstream["measures"][m_name]["dtype"], m_dtype)

        f_name = "strawberry"
        self.dstream.add_field(f_name)
        self.assertTrue(f_name in self.dstream["fields"].keys())

        uid_name = "my_id"
        self.dstream.add_user_id(uid_name)
        self.assertTrue(uid_name in self.dstream["user_ids"].keys())

        tag_name = "Really good sensor"

        self.dstream.add_tag(tag_name)
        self.assertIn(tag_name, self.dstream["tags"])

        fk = "key to the city"
        self.dstream.add_fk(fk)
        self.assertTrue({fk: None} in self.dstream["foreign_keys"])

        fake_filter = {"func_name": "Make all values 0"}
        self.dstream.add_filter(fake_filter)
        self.assertEqual(fake_filter, self.dstream["filters"][0])

        fake_dparam = {"measure": "viscosity", "drule": "max of mins"}
        self.dstream.add_derived_param(fake_dparam)
        self.assertEqual(fake_dparam, self.dstream["dparam_rules"][0])

        fake_event_name = "My birthday"
        fake_event = {"param": "viscosity", "threshold": "too viscous"}
        self.dstream.add_event(fake_event_name, fake_event)
        self.assertEqual(fake_event,
                         self.dstream["event_rules"][fake_event_name])

        old_version = self.dstream["version"]
        self.dstream.publish_version()
        self.assertEqual(old_version + 1, self.dstream["version"])

        fake_mapping = ["fake", "mapping", "list"]
        self.dstream.add_data_rules(fake_mapping)
        self.assertEqual(fake_mapping, self.dstream["data_rules"])

    def test_load_from_json(self):
        test_dict = {"stream_token": "foo", "version": 900}
        self.dstream.load_from_json((test_dict))
        self.assertEqual(test_dict["version"], self.dstream["version"])
        self.assertIsInstance(self.dstream["stream_token"], str)

    def test_filter(self):
        self.dstream.add_filter({"test": "filterdict"})
Beispiel #4
0
class TestFunFactory(unittest.TestCase):
    def setUp(self):
        demo_data_dir = "demo_data/"
        self.dstream_dict = json.load(open(demo_data_dir + "demo_template_unit_test.txt"))
        self.dstream = DStream()
        self.dstream.load_from_json(self.dstream_dict)
        self.dstream['filters'][0]['transform_id'] = 1
        self.dstream['filters'][1]['transform_id'] = 2
        counter = 1
        for dparam in self.dstream['dparam_rules']:
            dparam['transform_id'] = counter
            counter += 1
        self.test_event_rules = {

        "partition_list": [],
        "measure_list":["timestamp", "head1"],
        "transform_type":"detect_event",
        "transform_name":"DetectThreshold",
        "param_dict":{
            "event_rules":{
                "measure":"head1",
                "threshold_value":69.2,
                "comparison_operator":">=",
                "absolute_compare":True
            },
            "event_name":"nice_event",
            "stream_id":"abc123",
        },
        "logical_comparison": "AND"
        }

        self.test_dparam_rules_list = [
        {
            "partition_list": [("timestamp", 1510603551106, ">"), ("timestamp", 1510603551391, "<")],
            "measure_list":["timestamp", "timestamp_winning"],
            "transform_type": "derive_param",
            "transform_name": "DeriveSlope",
            "param_dict":{
                "func_params":{"window_len":1},
                "measure_rules":{"rise_measure":"timestamp_winning", "run_measure":"timestamp","output_name":"time_slope"}
                },
            "logical_comparison":"AND"
        },
        {
            "partition_list":[],
            "measure_list":["timestamp",],
            "transform_type": "derive_param",
            "transform_name": "DeriveChange",
            "param_dict":{
                "func_params":{"window_len":1, "angle_change":False},
                "measure_rules":{"target_measure":"timestamp","output_name":"time_change"}
                },
            "logical_comparison":"AND"
        },
        {
            "partition_list":[],
            "measure_list":["timestamp",],
            "transform_type": "derive_param",
            "transform_name": "DeriveCumsum",
            "param_dict":{
                "func_params":{"offset":0},
                "measure_rules":{"target_measure":"timestamp","output_name":"time_sum"}
                },
            "logical_comparison":"AND"
        },
        {
            "partition_list":[],
            "measure_list":["timestamp",],
            "transform_type": "derive_param",
            "transform_name": "DeriveWindowSum",
            "param_dict":{
                "func_params":{"window_len":3},
                "measure_rules":{"target_measure":"timestamp","output_name":"time_window_sum"}
                },
            "logical_comparison":"AND"
        },
        {
            "partition_list":[],
            "measure_list":["timestamp",],
            "transform_type": "derive_param",
            "transform_name": "DeriveScaled",
            "param_dict":{
                "func_params":{"scalar":-1},
                "measure_rules":{"target_measure":"timestamp","output_name":"negatime"}
                },
            "logical_comparison":"AND"
        },
        {
            "partition_list":[],
            "measure_list":["location",],
            "transform_type": "derive_param",
            "transform_name": "DeriveDistance",
            "param_dict":{
                "func_params":{"window_len":1, "distance_func":"euclidean", "swap_lon_lat":True},
                "measure_rules":{"spatial_measure":"location","output_name":"dist1"}
                },
            "logical_comparison":"AND"
        },
         {
            "partition_list":[],
            "measure_list":["location",],
            "transform_type": "derive_param",
            "transform_name": "DeriveDistance",
            "param_dict":{
                "func_params":{"window_len":1, "distance_func":"great_circle", "swap_lon_lat":True},
                "measure_rules":{"spatial_measure":"location","output_name":"dist2"}
                },
            "logical_comparison":"AND"
        },
        {
            "partition_list":[],
            "measure_list":["location",],
            "transform_type": "derive_param",
            "transform_name": "DeriveHeading",
            "param_dict":{
                "func_params":{"window_len":1, "units":"deg","heading_type":"bearing", "swap_lon_lat":True},
                "measure_rules":{"spatial_measure":"location","output_name":"head1"}
                },
            "logical_comparison":"AND"
        },
            {
            "partition_list":[],
            "measure_list":["location",],
            "transform_type": "derive_param",
            "transform_name": "DeriveHeading",
            "param_dict":{
                "func_params":{"window_len":1, "units":"deg","heading_type":"flat_angle", "swap_lon_lat":True},
                "measure_rules":{"spatial_measure":"location","output_name":"head2"}
                },
            "logical_comparison":"AND"
        },
        {
            "partition_list":[],
            "measure_list":["location",],
            "transform_type": "derive_param",
            "transform_name": "DeriveInBox",
            "param_dict":{
                "func_params":{"upper_left_corner":(-122.6835826856399, 45.515814287782455), "lower_right_corner":(-122.678529, 45.511597)},
                "measure_rules":{"spatial_measure":"location","output_name":"boxy"}
                },
            "logical_comparison":"AND"
        }
    ]

    def test_create_template(self):
        t1 = create_template_dstream('tester', 'driver_id',[('location', 'geo')], ['driver-id', 'idd'], [('test_event', self.test_event_rules)], self.test_dparam_rules_list, [], {})

        self.assertEqual(t1['stream_name'], 'tester')
        self.assertEqual(t1['source_key'], 'driver_id')
        self.assertIn('location', t1['measures'].keys())
        self.assertEqual(t1['measures']['location']['dtype'], 'geo')
        self.assertIn('driver-id', t1['user_ids'])
        self.assertIn('idd', t1['user_ids'])
        self.assertDictEqual(t1['storage_rules'], {"store_raw":True, "store_filtered":True, "store_derived":True})
        self.assertIn('test_event', t1['event_rules'].keys())

    def test_build_rules_from_event(self):
        k1 = {'partition_list': [], 'turn_value': 45, 'stream_id': 'abc123'}

        r1 = build_rules_from_event('turn', [('location', 'geo')], **k1)
        for i in ['event_rules', 'dparam_rules', 'filter_rules']:
            self.assertIn(i, r1)

        self.assertEqual(r1['event_rules'][0], 'turn_45.000000_location')
        self.assertEqual(len(r1['dparam_rules']), 2)

        # with self.assertRaises(ValueError):
        #     build_rules_from_event('turn', [('smokeation', 'smokey')], **k1)

        k2 = {'partition_list': [], 'urn_value': 45, 'stream_id': 'abc123'}
        with self.assertRaises(ValueError):
            build_rules_from_event('turn', [('location', 'geo')], **k2)

        k3 = {'partition_list': [], 'turn_value': 45}
        with self.assertRaises(ValueError):
            build_rules_from_event('turn', [('location', 'geo')], **k3)

        k4 = {'turn_value': 45, 'stream_id': 'abc123'}
        with self.assertRaises(ValueError):
            build_rules_from_event('turn', [('location', 'geo')], **k4)

    def test_build_temp_event_filters(self):
        skey = 'driver_id'
        uids = ['driver-id', 'idd']
        k1 = {'partition_list': [], 'turn_value': 45, 'stream_id': 'abc123'}
        f1 = ('butter_lowpass', {"partition_list": [], "measure_list": ["location"]})
        f2 = ('butter_lowpass', {"partition_list": [], "measure_list": ["location"]})
        m = [([('location', 'geo')], [], [], [('turn', k1, ['location'])])]
        m2 = [([('location', 'geo')], [], [], [('turn', k1, ['location_buttered']),])]


        t = build_template('test', skey, m, uids, [f1])
        self.assertIn('turn_45.000000_location', t['event_rules'])
        t2 = build_template('test', skey, m2, uids, [f2])
        self.assertIn('turn_45.000000_location_buttered', t2['event_rules'])

    def test_build_temp_dparam(self):
        f = ('butter_lowpass', {"partition_list": [], "measure_list": ["location"]})
        m = [([('location', 'geo')], [], [('heading', {'partition_list': [], 'measure_list': ['location'], }, {'spatial_measure': 'location'})], [])]
        t = build_template('test','driver_id', m, ['driver-id', 'idd'], [])
        self.assertEqual(len(t['dparam_rules']), 1)
        self.assertEqual(t['dparam_rules'][0]['transform_name'], 'DeriveHeading')

        m2 = [([('location', 'geo')], [], [('heading', {'partition_list': [], 'measure_list': ['location_buttered'], }, {'spatial_measure': 'location_buttered'})], [])]
        t2 = build_template('test','driver_id', m2, ['driver-id', 'idd'], [f])

    def test_build_template_event(self):
        skey = 'driver_id'
        uids = ['driver-id', 'idd']
        k1 = {'partition_list': [], 'turn_value': 45, 'stream_id': 'abc123'}
        k2 = {'partition_list': [], 'turn_value': 66, 'stream_id': 'abc123'}
        k3 = {'partition_list': [], 'turn_value': 30, 'stream_id': 'abc123'}
        k4 = {'partition_list': [], 'turn_value': 57, 'stream_id': 'abc123'}
        m = [([('location', 'geo')], [], [], [('turn', k1, ['location']), ('turn', k2, ['location'])])]
        m2 = [([('location', 'geo'), ('smokeation', 'geo')], [], [], [('turn', k1, ['location']),  ('turn', k2, ['smokeation'])])]
        m3 = [([('location', 'geo')], [], [], [('turn', k1, ['location']), ('turn', k2, ['location'])]), ([('smokeation', 'geo')], [], [], [('turn', k3, ['smokeation']), ('turn', k4, ['smokeation'])])]
        m4 = [([('location', 'geo')], [], [], [('turn', k1, ['smokeation'])])]

        t = build_template('test', skey, m, uids, [])
        self.assertEqual(len(t['event_rules']), 2)
        self.assertEqual(len(t['measures']), 1)
        self.assertEqual(len(t['dparam_rules']), 4)
        for i in ['turn_45.000000_location', 'turn_66.000000_location']:
            self.assertIn(i, t['event_rules'])

        t2 = build_template('test', skey, m2, uids, [])
        self.assertEqual(len(t2['event_rules']), 2)
        self.assertEqual(len(t2['measures']), 2)
        self.assertEqual(len(t2['dparam_rules']), 4)
        for i in ['turn_45.000000_location', 'turn_66.000000_smokeation']:
            self.assertIn(i, t2['event_rules'])


        t3 = build_template('test', skey, m3, uids, [])
        self.assertEqual(len(t3['event_rules']), 4)
        self.assertEqual(len(t3['measures']), 2)
        self.assertEqual(len(t3['dparam_rules']), 8)
        for i in ['turn_45.000000_location', 'turn_66.000000_location', 'turn_30.000000_smokeation', 'turn_57.000000_smokeation']:
            self.assertIn(i, t3['event_rules'])

        with self.assertRaises(ValueError):
            build_template('test', skey, m4, uids, [])
    #
        # with self.assertRaises(TypeError):
        #     build_template('test', skey, [1,2,3,4], uids, [])
        #
        # with self.assertRaises(TypeError):
        #     build_template('test', skey,(1,2,3), uids, [])

    def test_update_template(self):
        name_update = {'field': 'stream_name', 'type': 'new', 'args': ['shit'], 'kwargs': {}}
        desc_update = {'field': 'user_description', 'type': 'new', 'args': ['i hate this shit'], 'kwargs': {}}
        source_key_update = {'field': 'source_key', 'type': 'new', 'args': ['vom_id'], 'kwargs': {}}
        user_id_update1 = {'field': 'user_ids', 'type': 'new', 'args': ['bananas'], 'kwargs': {}}
        user_id_update2 = {'field': 'user_ids', 'type': 'new', 'args': ['shit_kiwis'], 'kwargs': {'old_id': 'id'}}
        user_id_update3 = {'field': 'user_ids', 'type': 'remove', 'args': ['driver-id'], 'kwargs': {}}
        field_update1 = {'field': 'fields', 'type': 'new', 'args': ['field_of_garbage'], 'kwargs': {}}
        field_update2 = {'field': 'fields', 'type': 'new', 'args': ['field_of_trash'], 'kwargs': {'old_field': 'region-code'}}
        field_update3 = {'field': 'fields', 'type': 'remove', 'args': ['field_of_garbage'], 'kwargs': {}}
        tag_update1 = {'field': 'tags', 'type': 'new', 'args': ['hash'], 'kwargs': {}}
        tag_update2 = {'field': 'tags', 'type': 'new', 'args': ['price'], 'kwargs': {}}
        tag_update3 = {'field': 'tags', 'type': 'new', 'args': ['toe',], 'kwargs': {'old_tag': 'hash'}}
        tag_update4 = {'field': 'tags', 'type': 'remove', 'args': ['price'], 'kwargs': {}}
        fk_update1 = {'field': 'foreign_keys', 'type': 'new', 'args': ['romania'], 'kwargs': {}}
        fk_update2 = {'field': 'foreign_keys', 'type': 'new', 'args': ['slovakia'], 'kwargs': {}}
        fk_update3 = {'field': 'foreign_keys', 'type': 'new', 'args': ['lithuania',], 'kwargs': {'old_fk': 'romania'}}
        fk_update4 = {'field': 'foreign_keys', 'type': 'remove', 'args': ['slovakia'], 'kwargs': {}}
        storage_update = {'field': 'storage_rules', 'type': 'modify', 'args': [[('store_raw', False), ('store_derived', False)]], 'kwargs': {}}
        ingest_update = {'field': 'ingest_rules', 'type': 'modify', 'args': [[('im_real', False),]], 'kwargs': {}}
        engine_update = {'field': 'engine_rules', 'type': 'modify',
                          'args': [[('fuck_buffer', True), ('notfuck_buffer', False)]], 'kwargs': {}}
        measure_update_add = {'field': 'measures', 'type': 'new', 'args': [('poodles', 'poodle')], 'kwargs': {}}
        measure_update_remove_ok = {'field': 'measures', 'type': 'remove', 'args': ['poodles'], 'kwargs': {}}
        measure_update_remove_bad = {'field': 'measures', 'type': 'remove', 'args': ['location'], 'kwargs': {}}
        filter_update_add = {'field': 'filters', 'type': 'new', 'args': [{ 'transform_id': 3,'fake_filter': 'yes', 'param_dict': {'filter_name': '_fake'}, 'measure_list': []}], 'kwargs': {}}
        filter_update_modify = {'field': 'filters', 'type': 'modify', 'args': [1, [('order', 1)]], 'kwargs': {}}
        filter_update_remove_ok = {'field': 'filters', 'type': 'remove', 'args': [3], 'kwargs': {}}
        filter_update_remove_bad = {'field': 'filters', 'type': 'remove', 'args': [2], 'kwargs': {}}
        dparam_update_add = {'field': 'dparam_rules', 'type': 'new', 'args': [{'transform_id': 15, 'fake_param': 'yes', 'measure_list': [], 'param_dict': {'measure_rules': {'output_name': 'new'}}}], 'kwargs': {}}
        dparam_update_modify = {'field': 'dparam_rules', 'type': 'modify', 'args': [9, [('window_len', 2)]], 'kwargs': {'new_partition_list': ['dumb']}}
        dparam_update_remove_ok = {'field': 'dparam_rules', 'type': 'remove', 'args': [15], 'kwargs': {}}
        dparam_update_remove_bad = {'field': 'dparam_rules', 'type': 'remove', 'args': [8], 'kwargs': {}}
        event_update_add = {'field': 'event_rules', 'type': 'new', 'args': ['fuck_this_event', {
        "partition_list": [],
        "measure_list":["timestamp", "head1"],
        "transform_type":"detect_event",
        "transform_name":"DetectThreshold",
        "param_dict":{
            "event_rules":{
                "measure":"head1",
                "threshold_value":69.2,
                "comparison_operator":">=",
                "absolute_compare":True
            },
            "event_name":"nice_event",
            "stream_id":"abc123",
        },
        "logical_comparison": "AND"
        }], 'kwargs': {}}
        event_update_modify = {'field': 'event_rules', 'type': 'modify', 'args': ['fuck_this_event', [('threshold_value', 70)]], 'kwargs': {}}

        update1 = [name_update, desc_update, source_key_update, user_id_update1, user_id_update1, field_update1, fk_update1, tag_update1, storage_update, engine_update, ingest_update, measure_update_add, filter_update_add, dparam_update_add, event_update_add]

        update2 = [user_id_update2, field_update2, tag_update2, tag_update3, fk_update2, fk_update3, filter_update_modify, dparam_update_modify, event_update_modify]

        update3 = [user_id_update3, field_update3, fk_update4, tag_update4, measure_update_remove_ok, filter_update_remove_ok, dparam_update_remove_ok]

        update4 = [measure_update_remove_bad, filter_update_remove_bad, dparam_update_remove_bad]

        # update 1
        result1 = update_template(self.dstream, update1)
        updated_template = result1[1]
        self.assertEqual(result1[0], 'ok')
        self.assertEqual(updated_template['stream_name'], 'shit')
        self.assertEqual(updated_template['user_description'], 'i hate this shit')
        self.assertEqual(updated_template['source_key'], 'vom_id')
        self.assertIn('bananas', updated_template['user_ids'])
        self.assertIn('field_of_garbage', updated_template['fields'])
        self.assertIn({'romania': None}, updated_template['foreign_keys'])
        self.assertIn('hash', updated_template['tags'])
        self.assertDictEqual(updated_template['storage_rules'], {"store_raw":False, "store_filtered":True, "store_derived":False})
        self.assertDictEqual(updated_template['engine_rules'], {'kafka': 'test', 'fuck_buffer': True, 'notfuck_buffer': False})
        self.assertDictEqual(updated_template['ingest_rules'], {'im_real': False})
        self.assertEqual(len(updated_template['measures'].keys()), 2)
        self.assertEqual(len(updated_template['filters']), 3)
        self.assertEqual(len(updated_template['dparam_rules']), 11)
        self.assertIn('fuck_this_event', updated_template['event_rules'])

        # update 2
        result2 = update_template(updated_template, update2)
        updated_template2 = result2[1]
        self.assertEqual(result2[0], 'ok')
        self.assertEqual(len(updated_template2['user_ids']), 3)
        self.assertIn('shit_kiwis', updated_template2['user_ids'])
        self.assertNotIn('id', updated_template2['user_ids'])
        self.assertEqual(len(updated_template2['fields']), 2)
        self.assertIn('field_of_trash', updated_template2['fields'])
        self.assertNotIn('region-code', updated_template2['fields'])
        self.assertEqual(len(updated_template2['tags']), 2)
        self.assertIn('price', updated_template2['tags'])
        self.assertIn('toe', updated_template2['tags'])
        self.assertNotIn('hash', updated_template2['tags'])
        self.assertEqual(len(updated_template2['foreign_keys']), 2)
        self.assertIn({'slovakia': None}, updated_template2['foreign_keys'])
        self.assertIn({'lithuania': None}, updated_template2['foreign_keys'])
        self.assertNotIn({'romania': None}, updated_template2['foreign_keys'])
        self.assertEqual(updated_template2['filters'][0]['param_dict']['order'], 1)
        self.assertEqual(updated_template2['dparam_rules'][8]['param_dict']['func_params']['window_len'], 2)
        self.assertEqual(updated_template2['dparam_rules'][8]['partition_list'], ['dumb'])
        self.assertEqual(updated_template2['event_rules']['fuck_this_event']['param_dict']['event_rules']['threshold_value'], 70)

        # update 3
        result3 = update_template(updated_template2, update3)
        updated_template3 = result3[1]
        self.assertEqual(result3[0], 'ok')
        self.assertEqual(len(updated_template3['user_ids']), 2)
        self.assertNotIn('driver-id', updated_template3['user_ids'])
        self.assertEqual(len(updated_template3['fields']), 1)
        self.assertNotIn('field_of_garbage', updated_template3['fields'])
        self.assertEqual(len(updated_template3['tags']), 1)
        self.assertNotIn('price', updated_template3['tags'])
        self.assertEqual(len(updated_template3['foreign_keys']), 1)
        self.assertNotIn({'slovakia': None}, updated_template3['foreign_keys'])
        self.assertEqual(len(updated_template3['measures'].keys()), 1)
        self.assertEqual(len(updated_template3['filters']), 2)
        self.assertEqual(len(updated_template3['dparam_rules']), 10)

        # update 4
        result4 = update_template(updated_template3, update4)
        self.assertEqual(result4[0], 'invalid update')
        bad_guys = result4[2]
        self.assertEqual(len(bad_guys), 7)

        self.assertIn(('derived param', 'DeriveSlope', 'measure', 'timestamp_winning'), bad_guys)
        self.assertIn(('derived param', 'DeriveDistance', 'measure', 'location'), bad_guys)
        self.assertIn(('derived param', 'DeriveDistance', 'measure', 'location'), bad_guys)
        self.assertIn(('derived param', 'DeriveHeading', 'measure', 'location'), bad_guys)
        self.assertIn(('derived param', 'DeriveInBox', 'measure', 'location'), bad_guys)
        self.assertIn(('event', 'test_event', 'derived param', 'head1'), bad_guys)
        self.assertIn(('event', 'fuck_this_event', 'derived param', 'head1'), bad_guys)
    #
    def test_build_data_rules(self):
        source_inds = [0,1,2,4,6,8,3]
        t_keys = [["user_ids","sex"],["measures","length", "val"],["measures","diameter", "val"],["measures","whole_weight", "val"],["measures","viscera_weight", "val"],["fields","rings"],["timestamp"]]
        d = build_data_rules(source_inds, t_keys)
        self.assertDictEqual(d, {'mapping_list': [(0, ['user_ids', 'sex']), (1, ['measures', 'length', 'val']), (2, ['measures', 'diameter', 'val']), (4, ['measures', 'whole_weight', 'val']), (6, ['measures', 'viscera_weight', 'val']), (8, ['fields', 'rings']), (3, ['timestamp'])], 'date_format': None, 'puller': {}, 'pull': False})
    #
        d2 = build_data_rules(source_inds, t_keys, puller=['dir', [['path', 'strom/data_puller/test/'], ['file_type', 'csv']]])

        self.assertDictEqual(d2,  {'mapping_list': [(0, ['user_ids', 'sex']), (1, ['measures', 'length', 'val']), (2, ['measures', 'diameter', 'val']), (4, ['measures', 'whole_weight', 'val']), (6, ['measures', 'viscera_weight', 'val']), (8, ['fields', 'rings']), (3, ['timestamp'])], 'date_format': None, 'puller': {'type': 'dir', 'inputs': {'path': 'strom/data_puller/test/', 'file_type': 'csv'}}, 'pull': True})

        d3 = build_data_rules(source_inds, t_keys, puller=['dir', [['path', 'strom/data_puller/test/'], ['file_type', 'csv'], ['delimiter', ',']]])

        self.assertDictEqual(d3, {'mapping_list': [(0, ['user_ids', 'sex']), (1, ['measures', 'length', 'val']), (2, ['measures', 'diameter', 'val']), (4, ['measures', 'whole_weight', 'val']), (6, ['measures', 'viscera_weight', 'val']), (8, ['fields', 'rings']), (3, ['timestamp'])], 'date_format': None, 'puller': {'type': 'dir', 'inputs': {'path': 'strom/data_puller/test/', 'file_type': 'csv', 'delimiter': ','}}, 'pull': True} )

    def test_build_new_rules_updates(self):
        k = {'partition_list': [], 'turn_value': 99, 'stream_id': 'abc123'}
        f = ('butter_lowpass', {"partition_list": [], "measure_list": ["where"]})
        m = [([('location', 'geo')], [], [], [('turn', k, ['location'])])]
        m2 = [([('where', 'geo')], [], [], [('turn', k, ['where'])])]
        f2 = ('butter_lowpass', {"partition_list": [], "measure_list": ["location"]})

        ff = ('butter_lowpass', {"partition_list": [], "measure_list": ["where"]})

        r = update(self.dstream, [{'field': 'user_description', 'type': 'new', 'args': ['new shit'], 'kwargs': {}}], m, [])
        self.assertEqual(len(r), 2)
        self.assertEqual(r[0], 'ok')
        self.assertEqual(len(r[1]['measures']), 1)
        self.assertIn('turn_99.000000_location', r[1]['event_rules'])

        rawr = deepcopy(r[1])
        r2 = update(rawr, [], m2, [ff])
        self.assertEqual(len(r2), 2)
        self.assertEqual(r[0], 'ok')
        self.assertEqual(len(r2[1]['measures']), 2)
        self.assertEqual(len(r2[1]['filters']), 3)
        self.assertIn('turn_99.000000_location', r2[1]['event_rules'])
        self.assertIn('turn_99.000000_where', r2[1]['event_rules'])


        d = DStream()
        d['measures'] = {'location': {'val': None, 'dtype': 'geo'}}
        t = update(d, [], [], [f2])
        t2 = update(d, [], [], [f])
        self.assertEqual(t[0], 'ok')
        self.assertEqual(t2[0], 'invalid update')
Beispiel #5
0
class TestEngineThread(unittest.TestCase):
    def setUp(self):
        self.con1, self.con1b = Pipe()
        self.con2, self.con2b = Pipe()
        self.con3, self.con3b = Pipe()
        self.con4, self.con4b = Pipe()
        self.con5, self.con5b = Pipe()
        self.con6, self.con6b = Pipe()
        self.engine = Engine(self.con1b,
                             processors=2,
                             buffer_max_batch=4,
                             buffer_max_seconds=5,
                             test_mode=True,
                             test_outfile='engine_test_output/engine_test1')
        self.engine2 = Engine(self.con2b,
                              processors=2,
                              buffer_max_batch=4,
                              buffer_max_seconds=5,
                              test_mode=True,
                              test_outfile='engine_test_output/engine_test2')
        self.engine3 = Engine(self.con3b,
                              processors=2,
                              buffer_max_batch=4,
                              buffer_max_seconds=5,
                              test_mode=True,
                              test_outfile='engine_test_output/engine_test3')
        self.engine4 = Engine(self.con4b,
                              processors=2,
                              buffer_roll=1,
                              buffer_max_batch=4,
                              buffer_max_seconds=5,
                              test_mode=True,
                              test_outfile='engine_test_output/engine_test4')
        self.engine5 = Engine(self.con5b,
                              processors=2,
                              buffer_roll=1,
                              buffer_max_batch=4,
                              buffer_max_seconds=5,
                              test_mode=True,
                              test_outfile='engine_test_output/engine_test5')
        self.engine6 = Engine(self.con6b,
                              processors=2,
                              buffer_roll=1,
                              buffer_max_batch=4,
                              buffer_max_seconds=5,
                              test_mode=True,
                              test_outfile='engine_test_output/engine_test6')

        self.test_batch1 = [{
            "stream_token": "abc123",
            "message": "hi1"
        }, {
            "stream_token": "abc123",
            "message": "hi2"
        }, {
            "stream_token": "abc123",
            "message": "hi3"
        }, {
            "stream_token": "abc123",
            "message": "hi4"
        }]
        self.test_batch2 = [{
            "stream_token": "abc1234",
            "message": "hello1"
        }, {
            "stream_token": "abc1234",
            "message": "hello2"
        }, {
            "stream_token": "abc1234",
            "message": "hello3"
        }, {
            "stream_token": "abc1234",
            "message": "hello4"
        }]
        self.test_batch3 = [{
            "stream_token": "abc123",
            "message": "hi5"
        }, {
            "stream_token": "abc123",
            "message": "hi6"
        }, {
            "stream_token": "abc123",
            "message": "hi7"
        }, {
            "stream_token": "abc123",
            "message": "hi8"
        }]
        self.test_batch4 = [{
            "stream_token": "abc1234",
            "message": "hello5"
        }, {
            "stream_token": "abc1234",
            "message": "hello6"
        }, {
            "stream_token": "abc1234",
            "message": "hello7"
        }, {
            "stream_token": "abc1234",
            "message": "hello8"
        }]
        # roll results
        self.test_batch5 = [{
            "stream_token": "abc123",
            "message": "hi4"
        }, {
            "stream_token": "abc123",
            "message": "hi5"
        }, {
            "stream_token": "abc123",
            "message": "hi6"
        }, {
            "stream_token": "abc123",
            "message": "hi7"
        }]
        # send batches
        self.test_batch6 = [{
            "stream_token": "abc123",
            "message": "hi9"
        }, {
            "stream_token": "abc123",
            "message": "hi10"
        }, {
            "stream_token": "abc123",
            "message": "hi11"
        }, {
            "stream_token": "abc123",
            "message": "hi12"
        }]
        self.test_batch7 = [{
            "stream_token": "abc123",
            "message": "hi13"
        }, {
            "stream_token": "abc123",
            "message": "hi14"
        }, {
            "stream_token": "abc123",
            "message": "hi15"
        }, {
            "stream_token": "abc123",
            "message": "hi16"
        }]
        # roll results
        self.test_batch8 = [{
            "stream_token": "abc123",
            "message": "hi12"
        }, {
            "stream_token": "abc123",
            "message": "hi13"
        }, {
            "stream_token": "abc123",
            "message": "hi14"
        }, {
            "stream_token": "abc123",
            "message": "hi15"
        }]
        self.test_batch_mix = [{
            "stream_token": "abc123",
            "message": "hi1"
        }, {
            "stream_token": "abc1234",
            "message": "hello1"
        }, {
            "stream_token": "abc123",
            "message": "hi2"
        }, {
            "stream_token": "abc1234",
            "message": "hello2"
        }, {
            "stream_token": "abc123",
            "message": "hi3"
        }, {
            "stream_token": "abc1234",
            "message": "hello3"
        }, {
            "stream_token": "abc123",
            "message": "hi4"
        }, {
            "stream_token": "abc1234",
            "message": "hello4"
        }]
        self.test_batch_1to4 = self.test_batch1 + self.test_batch2 + self.test_batch3 + self.test_batch4
        self.outfiles = []
        self.abalone_con, self.abalone_conb = Pipe()
        self.abalone = json.load(open(demo_data_dir + "demo_template_dir.txt"))
        self.abalone_temp = DStream()
        self.abalone_temp.load_from_json(self.abalone)
        self.abalone_engine = Engine(
            self.abalone_conb,
            processors=2,
            buffer_max_batch=10,
            buffer_max_seconds=5,
            test_mode=True,
            test_outfile='engine_test_output/engine_test_abalone')

    def tearDown(self):
        print("Tear it all down")
        sleep(1)
        for o in self.outfiles:
            print(o)
            try:
                remove_outfile(o)
            except OSError as oserr:
                print(oserr)
        self.outfiles = []

    def test_buffer1(self):
        outfiles = [
            'engine_test_output/engine_test1_abc123_1.txt',
            'engine_test_output/engine_test1_abc123_2.txt',
            'engine_test_output/engine_test1_abc1234_1.txt',
            'engine_test_output/engine_test1_abc1234_2.txt',
        ]
        self.engine.start()
        # sleep(5)
        # test all sent to processor in batches of 4
        for i in self.test_batch_1to4:
            self.con1.send((i, 'load'))
        sleep(5)
        result = []
        for o in outfiles:
            result.extend(read_outfile(o))

        self.assertEqual(len(result), 4)
        self.assertIn(self.test_batch1, result)
        self.assertIn(self.test_batch2, result)
        self.assertIn(self.test_batch3, result)
        self.assertIn(self.test_batch4, result)

        self.outfiles.extend(outfiles)
        self.con1.send("stop_poison_pill")

    def test_buffer2(self):
        # test all sent to processor in batches of 4 - GROUPED BY TOKEN (2 buffs)
        outfiles = [
            'engine_test_output/engine_test2_abc123_1.txt',
            'engine_test_output/engine_test2_abc1234_1.txt'
        ]
        self.engine2.start()
        sleep(5)
        for i in self.test_batch_mix:
            self.con2.send((i, 'load'))
        sleep(5)
        result2 = []
        for o in outfiles:
            result2.extend(read_outfile(o))

        self.assertEqual(len(result2), 2)
        self.assertIn(self.test_batch1, result2)
        self.assertIn(self.test_batch2, result2)

        self.outfiles.extend(outfiles)
        self.con2.send("stop_poison_pill")

    def test_buffer3(self):
        # leftovers
        outfiles = [
            'engine_test_output/engine_test3_abc123_1.txt',
            'engine_test_output/engine_test3_abc1234_1.txt'
        ]
        self.engine3.start()
        sleep(5)
        for i in self.test_batch1[:2]:
            self.con3.send((i, 'load'))
        for i in self.test_batch2[:2]:
            self.con3.send((i, 'load'))
        sleep(7)
        result3 = []
        for o in outfiles:
            result3.extend(read_outfile(o))
        self.assertEqual(len(result3), 2)
        for x in result3:
            self.assertEqual(len(x), 2)

        self.outfiles.extend(outfiles)
        self.con3.send("stop_poison_pill")

    def test_buffer4(self):
        # w rolling window
        outfiles = [
            'engine_test_output/engine_test4_abc123_1.txt',
            'engine_test_output/engine_test4_abc123_2.txt',
            'engine_test_output/engine_test4_abc123_3.txt',
        ]
        self.engine4.start()
        sleep(5)

        for i in self.test_batch1 + self.test_batch3:
            self.con4.send((i, 'load'))
        sleep(7)
        result4 = []
        for o in outfiles:
            result4.extend(read_outfile(o))
        # for r in result4:
        #     print(r)

        self.assertEqual(len(result4), 3)
        self.assertIn(self.test_batch1, result4)
        self.assertIn(self.test_batch5, result4)
        self.assertIn(self.test_batch3[-2:], result4)

        self.outfiles.extend(outfiles)
        self.con4.send("stop_poison_pill")

    def test_buffer5(self):
        # no leftovers that are just buffer roll
        outfiles = [
            'engine_test_output/engine_test5_abc123_1.txt',
        ]
        self.engine5.start()
        sleep(5)
        for i in self.test_batch1:
            self.con5.send((i, 'load'))
        sleep(7)
        result5 = []
        for o in outfiles:
            result5.extend(read_outfile(o))
        self.assertEqual(len(result5), 1)
        self.assertIn(self.test_batch1, result5)

        self.outfiles.extend(outfiles)
        self.con5.send("stop_poison_pill")

    def test_buffer6(self):
        # row resets correctly after leftovers
        outfiles = [
            'engine_test_output/engine_test6_abc123_1.txt',
            'engine_test_output/engine_test6_abc123_2.txt',
            'engine_test_output/engine_test6_abc123_3.txt',
            'engine_test_output/engine_test6_abc123_4.txt',
            'engine_test_output/engine_test6_abc123_5.txt',
            'engine_test_output/engine_test6_abc123_6.txt',
        ]
        self.engine6.start()
        sleep(5)
        for i in self.test_batch1:
            self.con6.send((i, 'load'))
        for i in self.test_batch3:
            self.con6.send((i, 'load'))
        sleep(7)
        for i in self.test_batch6:
            self.con6.send((i, 'load'))
        for i in self.test_batch7:
            self.con6.send((i, 'load'))
        sleep(7)
        result6 = []
        for o in outfiles:
            result6.extend(read_outfile(o))
        # for i in result6:
        #     print(i)

        self.assertEqual(len(result6), 6)
        self.assertIn(self.test_batch1, result6)
        self.assertIn(self.test_batch5, result6)
        self.assertIn(self.test_batch3[-2:], result6)
        self.assertIn(self.test_batch6, result6)
        self.assertIn(self.test_batch8, result6)
        self.assertIn(self.test_batch7[-2:], result6)

        self.outfiles.extend(outfiles)
        self.con6.send("stop_poison_pill")

    def test_new_with_puller(self):
        self.abalone_engine.start()
        sleep(3)
        self.abalone_con.send((self.abalone_temp, 'new'))
        sleep(5)
        outfiles = glob.glob('engine_test_output/engine_test_abalone*')
        result = []
        for o in outfiles:
            result.extend(read_outfile(o))
        self.assertEqual(len(result), 2)
        self.outfiles.extend(outfiles)
        self.abalone_con.send("stop_poison_pill")