Ejemplo n.º 1
0
 def setUp(self):
     template_dstream = DStream()
     template_dstream.add_measure("length", "float")
     template_dstream.add_measure("diameter", "float")
     template_dstream.add_measure("whole_weight", "float")
     template_dstream.add_measure("viscera_weight", "float")
     template_dstream.add_user_id("sex")
     template_dstream.add_field("rings")
     self.template = template_dstream
     self.mapping_list = [(0, ["user_ids", "sex"]),
                          (1, ["measures", "length", "val"]),
                          (2, ["measures", "diameter", "val"]),
                          (4, ["measures", "whole_weight", "val"]),
                          (6, ["measures", "viscera_weight", "val"]),
                          (8, ["fields", "rings"]), (3, ["timestamp"])]
     self.endpoint = "http://localhost:5000/api/load"
     self.data_format = "csv"
     self.uid = "abaloneID"
     self.test_data = "strom/data_puller/test/abalone0.csv"
     config["walltime"] = 10
     self.mqtt_context = MQTTContext(self.mapping_list,
                                     self.template,
                                     uid=self.uid,
                                     data_format=self.data_format,
                                     endpoint=self.endpoint,
                                     userdata=config)
     self.mqtt_reader = MQTTReader(self.mqtt_context)
Ejemplo n.º 2
0
    def setUp(self):
        template_dstream = DStream()
        template_dstream.add_measure("length", "float")
        template_dstream.add_measure("diameter", "float")
        template_dstream.add_measure("whole_weight", "float")
        template_dstream.add_measure("viscera_weight", "float")
        template_dstream.add_user_id("sex")
        template_dstream.add_field("rings")
        self.template = template_dstream
        self.mapping_list = [(0, ["user_ids", "sex"]),
                             (1, ["measures", "length", "val"]),
                             (2, ["measures", "diameter", "val"]),
                             (4, ["measures", "whole_weight", "val"]),
                             (6, ["measures", "viscera_weight", "val"]),
                             (8, ["fields", "rings"]), (3, ["timestamp"])]
        self.url = "localhost:9092"
        self.topic = b"data_pulling"
        self.offset = 28
        self.data_format = "csv"
        self.timeout = 1000
        self.test_data = "strom/data_puller/test/abalone0.csv"

        self.kc = KafkaContext(self.mapping_list,
                               self.template,
                               url=self.url,
                               topic=self.topic,
                               offset=self.offset,
                               data_format=self.data_format,
                               timeout=self.timeout)
        self.kafka_reader = KafkaReader(self.kc)
Ejemplo n.º 3
0
 def setUp(self):
     self.dir = "strom/data_puller/test/"
     self.file_type = "csv"
     template_dstream = DStream()
     template_dstream.add_measure("length", "float")
     template_dstream.add_measure("diameter", "float")
     template_dstream.add_measure("whole_weight", "float")
     template_dstream.add_measure("viscera_weight", "float")
     template_dstream.add_user_id("sex")
     template_dstream.add_field("rings")
     self.template = template_dstream
     self.mapping_list = [(0, ["user_ids", "sex"]),
                          (1, ["measures", "length", "val"]),
                          (2, ["measures", "diameter", "val"]),
                          (4, ["measures", "whole_weight", "val"]),
                          (6, ["measures", "viscera_weight", "val"]),
                          (8, ["fields", "rings"]), (3, ["timestamp"])]
     self.delimiter = ","
     self.endpoint = "http://localhost:5000/api/load"
     self.dc = DirectoryContext(self.mapping_list,
                                self.template,
                                path=self.dir,
                                file_type=self.file_type,
                                delimiter=self.delimiter,
                                endpoint=self.endpoint)
     self.source_reader = DirectoryReader(self.dc)
Ejemplo n.º 4
0
    def setUp(self):

        template_dstream = DStream()
        template_dstream.add_measure("length", "float")
        template_dstream.add_measure("diameter", "float")
        template_dstream.add_measure("whole_weight", "float")
        template_dstream.add_measure("viscera_weight", "float")
        template_dstream.add_user_id("sex")
        template_dstream.add_field("rings")
        self.template = template_dstream
        self.mapping_list = [(0, ["user_ids", "sex"]),
                             (1, ["measures", "length", "val"]),
                             (2, ["measures", "diameter", "val"]),
                             (4, ["measures", "whole_weight", "val"]),
                             (6, ["measures", "viscera_weight", "val"]),
                             (8, ["fields", "rings"]), (3, ["timestamp"])]
        self.csvf = CSVFormatter(self.mapping_list, self.template)
Ejemplo n.º 5
0
 def setUp(self):
     template_dstream = DStream()
     template_dstream['data_rules'] = {
         "pull": True,
         "puller": {
             "type": "dir",
             "inputs": {
                 "path": "strom/data_puller/test/",
                 "file_type": "csv",
                 "delimiter": ","
             }
         },
         "mapping_list": [(0,["user_ids","sex"]), (1,["measures","length", "val"]), (2,["measures","diameter", "val"]), (4,["measures","whole_weight", "val"]), (6,["measures","viscera_weight", "val"]), (8,["fields","rings"]), (3,["timestamp"])]
     }
     template_dstream.add_measure("length", "float")
     template_dstream.add_measure("diameter", "float")
     template_dstream.add_measure("whole_weight", "float")
     template_dstream.add_measure("viscera_weight", "float")
     template_dstream.add_user_id("sex")
     template_dstream.add_field("rings")
     self.template = template_dstream
     self.q = Queue()
     self.puller = DataPuller(self.template, self.q)
Ejemplo n.º 6
0
def update_user_id(template: DStream, new_id: str, old_id=None):
    if old_id is not None:
        prune_key(template, 'user_ids', old_id)
    template.add_user_id(new_id)
Ejemplo n.º 7
0
class TestDStream(unittest.TestCase):
    def setUp(self):
        self.dstream = DStream()

    def test_init(self):
        init_keys = [
            'stream_name', 'user_description', 'version', 'stream_token',
            'source_key', 'template_id', 'storage_rules', 'ingest_rules',
            'engine_rules', 'timestamp', 'measures', 'fields', 'user_ids',
            'tags', 'foreign_keys', 'filters', 'dparam_rules', 'event_rules',
            'data_rules'
        ]
        for key in init_keys:
            print(self.dstream.keys())
            self.assertIn(key, self.dstream.keys())

    def testadd_methods(self):
        self.assertIsInstance(self.dstream["stream_token"], str)

        m_name = "viscosity"
        m_dtype = "float"
        self.dstream.add_measure(m_name, m_dtype)
        self.assertTrue(m_name in self.dstream["measures"].keys())
        self.assertEqual(self.dstream["measures"][m_name]["dtype"], m_dtype)

        f_name = "strawberry"
        self.dstream.add_field(f_name)
        self.assertTrue(f_name in self.dstream["fields"].keys())

        uid_name = "my_id"
        self.dstream.add_user_id(uid_name)
        self.assertTrue(uid_name in self.dstream["user_ids"].keys())

        tag_name = "Really good sensor"

        self.dstream.add_tag(tag_name)
        self.assertIn(tag_name, self.dstream["tags"])

        fk = "key to the city"
        self.dstream.add_fk(fk)
        self.assertTrue({fk: None} in self.dstream["foreign_keys"])

        fake_filter = {"func_name": "Make all values 0"}
        self.dstream.add_filter(fake_filter)
        self.assertEqual(fake_filter, self.dstream["filters"][0])

        fake_dparam = {"measure": "viscosity", "drule": "max of mins"}
        self.dstream.add_derived_param(fake_dparam)
        self.assertEqual(fake_dparam, self.dstream["dparam_rules"][0])

        fake_event_name = "My birthday"
        fake_event = {"param": "viscosity", "threshold": "too viscous"}
        self.dstream.add_event(fake_event_name, fake_event)
        self.assertEqual(fake_event,
                         self.dstream["event_rules"][fake_event_name])

        old_version = self.dstream["version"]
        self.dstream.publish_version()
        self.assertEqual(old_version + 1, self.dstream["version"])

        fake_mapping = ["fake", "mapping", "list"]
        self.dstream.add_data_rules(fake_mapping)
        self.assertEqual(fake_mapping, self.dstream["data_rules"])

    def test_load_from_json(self):
        test_dict = {"stream_token": "foo", "version": 900}
        self.dstream.load_from_json((test_dict))
        self.assertEqual(test_dict["version"], self.dstream["version"])
        self.assertIsInstance(self.dstream["stream_token"], str)

    def test_filter(self):
        self.dstream.add_filter({"test": "filterdict"})