def test_05_get_stream_ids_of_owner(self):
        start_time = datetime.datetime(2017, 4, 24, 0, 0, 1)
        end_time = datetime.datetime(2017, 4, 24, 0, 0, 2)

        by_id = Metadata(self.CC).get_stream_ids_of_owner(
            "06634264-56bc-4c92-abd7-377dbbad79dd")
        self.assertIsInstance(by_id, list)
        self.assertEqual(by_id[0], "6db98dfb-d6e8-4b27-8d55-95b20fa0f754")

        by_name_id = Metadata(self.CC).get_stream_ids_of_owner(
            "06634264-56bc-4c92-abd7-377dbbad79dd", "data-store-test")
        self.assertIsInstance(by_name_id, list)
        self.assertEqual(by_name_id[0], "6db98dfb-d6e8-4b27-8d55-95b20fa0f754")

        by_name_id_start_time = Metadata(self.CC).get_stream_ids_of_owner(
            "06634264-56bc-4c92-abd7-377dbbad79dd", "data-store-test",
            start_time)
        self.assertIsInstance(by_name_id_start_time, list)
        self.assertEqual(by_name_id_start_time[0],
                         "6db98dfb-d6e8-4b27-8d55-95b20fa0f754")

        by_name_id_start_time_end_time = Metadata(
            self.CC).get_stream_ids_of_owner(
                "06634264-56bc-4c92-abd7-377dbbad79dd", "data-store-test",
                start_time, end_time)
        self.assertIsInstance(by_name_id_start_time_end_time, list)
        self.assertEqual(by_name_id_start_time_end_time[0],
                         "6db98dfb-d6e8-4b27-8d55-95b20fa0f754")
Exemple #2
0
    def store_stream(self, datastream: DataStream):
        """
        :param datastream:
        """

        ownerID = datastream.owner
        name = datastream.name
        data_descriptor = datastream.data_descriptor
        execution_context = datastream.execution_context
        annotations = datastream.annotations
        stream_type = datastream.datastream_type
        data = datastream.data

        if data:
            if isinstance(data, list):
                total_dp = len(data) - 1
                new_start_time = data[0].start_time
                new_end_time = data[total_dp].start_time
            else:
                new_start_time = data.start_time
                new_end_time = data.start_time

            result = Metadata(self.CC_obj).is_id_created(
                ownerID, name, execution_context)

            stream_identifier = result["id"]
            Metadata(self.CC_obj).store_stream_info(
                stream_identifier, ownerID, name, data_descriptor,
                execution_context, annotations, stream_type, new_start_time,
                new_end_time, result["status"])

            dataframe = self.map_datapoint_to_dataframe(
                stream_identifier, data)

            self.store_data(dataframe, self.datapointTable)
    def test_01_setup_data(self):
        data_descriptor = {}
        execution_context = json.loads(
            '{"execution_context": {"algorithm": {"method": "cerebralcortex.data_processor.data_diagnostic.BatteryDataMarker"}}}'
        )
        annotations = {}
        stream_type = "datastream"
        start_time = datetime.datetime(2017, 4, 24, 0, 0, 1)
        end_time = datetime.datetime(2017, 4, 24, 0, 0, 2)

        result = Metadata(self.CC).is_id_created(
            "06634264-56bc-4c92-abd7-377dbbad79dd", "data-store-test",
            execution_context)

        if result["status"] == "new":
            stream_identifier = "6db98dfb-d6e8-4b27-8d55-95b20fa0f754"
        else:
            stream_identifier = result["id"]

        self.assertEqual(stream_identifier,
                         "6db98dfb-d6e8-4b27-8d55-95b20fa0f754")

        Metadata(self.CC).store_stream_info(
            stream_identifier, "06634264-56bc-4c92-abd7-377dbbad79dd",
            "data-store-test", data_descriptor, execution_context, annotations,
            stream_type, start_time, end_time, result["status"])
Exemple #4
0
    def __init__(self,
                 CC_obj: CerebralCortex,
                 export_dir_path: str,
                 owner_ids: List = None,
                 owner_user_names: List = None,
                 owner_name_regex: str = None,
                 start_time: str = None,
                 end_time: str = None):
        """
        :param CC_obj:
        :param export_dir_path:
        :param owner_ids: owner_user_name and owner_name_regex must be None if using owner_id
        :param owner_user_names: owner_id and owner_name_regex must be None if using owner_user_name
        :param owner_name_regex: owner_id and owner_user_name must be None if using owner_name_reges
        :param start_time:
        :param end_time:
        """

        self.streamData = Data(CC_obj)
        self.export_dir_path = export_dir_path
        self.metadata = Metadata(CC_obj)
        self.owner_ids = owner_ids
        self.owner_user_names = owner_user_names
        self.owner_name_regex = str(owner_name_regex)
        self.start_time = start_time
        self.end_time = end_time
    def test_03_append_annotations(self):
        self.assertRaises(Exception,
                          Metadata(self.CC).append_annotations,
                          "6db98dfb-d6e8-4b27-8d55-95b20fa0f754",
                          "06634264-56bc-4c92-abd7-377dbbad79dd",
                          "data-store-test", {}, {}, {}, "datastream1")

        self.assertRaises(Exception,
                          Metadata(self.CC).append_annotations,
                          "6db98dfb-d6e8-4b27-8d55-95b20fa0f754",
                          "06634264-56bc-4c92-abd7-377dbbad79dd",
                          "data-store-test", {}, {"some": "none"}, {},
                          "datastream1")

        self.assertRaises(Exception,
                          Metadata(self.CC).append_annotations,
                          "6db98dfb-d6e8-4b27-8d55-95b20fa0f754",
                          "06634264-56bc-4c92-abd7-377dbbad79dd",
                          "data-store-test", {"a": "b"}, {}, {}, "datastream1")

        self.assertRaises(Exception,
                          Metadata(self.CC).append_annotations,
                          "6db98dfb-d6e8-4b27-8d55-95b20fa0f754",
                          "06634264-56bc-4c92-abd7-377dbbad79dd",
                          "data-diagnostic_diff", {}, {}, {}, "datastream1")

        annotations_unchanged = Metadata(self.CC).append_annotations(
            "6db98dfb-d6e8-4b27-8d55-95b20fa0f754",
            "06634264-56bc-4c92-abd7-377dbbad79dd", "data-store-test", {},
            json.loads(
                '{"execution_context": {"algorithm": {"method": "cerebralcortex.data_processor.data_diagnostic.BatteryDataMarker"}}}'
            ), {}, "datastream")
        self.assertEqual(annotations_unchanged, "unchanged")
    def store_stream(self, datastream: DataStream, type):
        """
        :param datastream:
        :param type: support types are formatted json object or CC Datastream objects
        """
        if (type == "json"):
            datastream = self.json_to_datastream(datastream)
        elif (type != "json" and type != "datastream"):
            raise ValueError(type + " is not supported data type")

        ownerID = datastream.owner
        name = datastream.name
        data_descriptor = datastream.data_descriptor
        execution_context = datastream.execution_context
        annotations = datastream.annotations
        stream_type = datastream.datastream_type
        data = datastream.data

        if data:
            if isinstance(data, list):
                total_dp = len(data) - 1
                if not datastream._start_time:
                    new_start_time = data[0].start_time
                else:
                    new_start_time = datastream._start_time
                if not datastream._end_time:
                    new_end_time = data[total_dp].start_time
                else:
                    new_end_time = datastream._end_time
            else:
                if not datastream._start_time:
                    new_start_time = data.start_time
                else:
                    new_start_time = datastream._start_time
                if not datastream._end_time:
                    new_end_time = data.start_time
                else:
                    new_end_time = datastream._end_time

            stream_identifier = datastream.identifier
            result = Metadata(self.CC_obj).is_id_created(stream_identifier)

            Metadata(self.CC_obj).store_stream_info(stream_identifier, ownerID, name,
                                                    data_descriptor, execution_context,
                                                    annotations,
                                                    stream_type, new_start_time, new_end_time, result["status"])

            self.add_to_cassandra(stream_identifier, data)
    def get_participant_streams(self, participant_id: uuid) -> dict:
        """

        :param participant_id:
        :return:
        """
        return Metadata(self).get_participant_streams(participant_id)
    def get_all_participants(self, study_name: str) -> dict:
        """

        :param study_name:
        :return:
        """
        return Metadata(self).get_all_participants(study_name)
    def login_user(self, user_name: str, password: str) -> bool:
        """

        :param user_name:
        :param password:
        :return:
        """
        return Metadata(self).login_user(user_name, password)
Exemple #10
0
    def get_stream_start_end_time(self, stream_id: uuid) -> dict:
        """

        :param stream_id:
        :param time_type: acceptable parameters are start_time OR end_time
        :return:
        """
        return Metadata(self).get_stream_start_end_time(stream_id)
Exemple #11
0
    def is_auth_token_valid(self, token_owner: str, auth_token: str, auth_token_expiry_time: datetime) -> bool:
        """

        :param token_owner:
        :param auth_token:
        :param auth_token_expiry_time:
        :return:
        """
        return Metadata(self).is_auth_token_valid(token_owner, auth_token, auth_token_expiry_time)
Exemple #12
0
    def store_or_update_Kafka_offset(self, topic: str, topic_partition: str, offset_start: str, offset_until: str):

        """
        :param topic:
        :param topic_partition:
        :param offset_start:
        :param offset_until:
        """
        Metadata(self).store_or_update_Kafka_offset(topic, topic_partition, offset_start, offset_until)
Exemple #13
0
 def get_stream_ids_of_owner(self, owner_id: uuid, stream_name: str = None, start_time: datetime = None,
                             end_time: datetime = None) -> List:
     """
     Returns a list of all stream IDs owned by an owner
     :param owner_id:
     :param stream_name:
     :param start_time:
     :param end_time:
     :return:
     """
     return Metadata(self).get_stream_ids_of_owner(owner_id, stream_name, start_time, end_time)
Exemple #14
0
 def get_stream_ids_by_name(self, stream_name: str, owner_id: uuid = None, start_time: datetime = None,
                            end_time: datetime = None) -> str:
     """
     It returns a list of all the stream ids that match the name provided in the argument
     :param stream_name:
     :param owner_id:
     :param start_time:
     :param end_time:
     :return:
     """
     return Metadata(self).get_stream_ids_by_name(stream_name, owner_id, start_time, end_time)
Exemple #15
0
    def update_auth_token(self, username: str, auth_token: str, auth_token_issued_time: datetime,
                          auth_token_expiry_time: datetime)->str:
        """

        :param username:
        :param auth_token:
        :param auth_token_issued_time:
        :param auth_token_expiry_time:
        :return: uuid of the current user
        """
        user_uuid = Metadata(self).update_auth_token(username, auth_token, auth_token_issued_time, auth_token_expiry_time)
        return user_uuid
Exemple #16
0
 def filter_stream(self, data_stream_id: uuid, annotation_stream_name: uuid, annotation: str,
                   start_time: datetime = None, end_time: datetime = None) -> List[DataPoint]:
     """
     This method maps derived annotation stream to a data stream and returns a List of mapped Datapoints
     :param data_stream_id:
     :param annotation_stream_name:
     :param annotation:
     :param start_time:
     :param end_time:
     :return:
     """
     annotation_stream_id = Metadata(self).get_annotation_id(data_stream_id, annotation_stream_name)
     return Data(self).get_annotation_stream(data_stream_id, annotation_stream_id, annotation, start_time, end_time)
    def update_auth_token(self, user_name: str, auth_token: str,
                          auth_token_issued_time: datetime,
                          auth_token_expiry_time: datetime):
        """

        :param user_name:
        :param auth_token:
        :param auth_token_issued_time:
        :param auth_token_expiry_time:
        :return:
        """
        return Metadata(self).update_auth_token(user_name, auth_token,
                                                auth_token_issued_time,
                                                auth_token_expiry_time)
    def test_02_get_stream_info(self):

        stream_info = Metadata(
            self.CC).get_stream_info("6db98dfb-d6e8-4b27-8d55-95b20fa0f754")

        self.assertEqual(stream_info[0]["identifier"],
                         "6db98dfb-d6e8-4b27-8d55-95b20fa0f754")
        self.assertEqual(stream_info[0]["owner"],
                         "06634264-56bc-4c92-abd7-377dbbad79dd")
        self.assertEqual(stream_info[0]["name"], "data-store-test")
        self.assertEqual(stream_info[0]["data_descriptor"], "{}")
        self.assertEqual(
            stream_info[0]["execution_context"],
            '{"execution_context": {"algorithm": {"method": "cerebralcortex.data_processor.data_diagnostic.BatteryDataMarker"}}}'
        )
        self.assertEqual(stream_info[0]["annotations"], "{}")
        self.assertEqual(stream_info[0]["type"], "datastream")
    def store_datastream(self, datastream):
        datastream_identifier = datastream.get_identifier()
        study_ids = datastream.getStudyIDs(
        )  # TO-DO, only add study-ids if they exist
        user_id = datastream.userObj.getID()

        processing_module_id = datastream.processingModuleObj.getID()
        datastream_type = datastream.get_datastream_type()
        metadata = datastream.getMetadata().getMetadata()
        source_ids = datastream.get_source_ids()
        data = datastream.data

        # if datastream_identifier is empty then create a new datastream_identifier in MySQL database and return the newly added datastream_identifier
        lastAddedRecordID = Metadata(self.configuration).storeDatastrem(
            datastream_identifier, study_ids, user_id, processing_module_id,
            source_ids, datastream_type, metadata)

        if datastream_identifier == "":
            datastream_identifier = lastAddedRecordID

        dataframe = self.map_datapoint_to_dataframe(datastream_identifier,
                                                    data)

        self.save_datapoint(dataframe)
Exemple #20
0
    def map_datapoint_and_metadata_to_datastream(self, stream_id: int,
                                                 data: list) -> DataStream:
        """
        This method will map the datapoint and metadata to datastream object
        :param stream_id:
        :param data: list
        :return: datastream object
        """

        # query datastream(mysql) for metadata
        datastream_info = Metadata(self.CC_obj).get_stream_info(stream_id)

        ownerID = datastream_info[0]["owner"]
        name = datastream_info[0]["name"]
        data_descriptor = json.loads(datastream_info[0]["data_descriptor"])
        execution_context = json.loads(datastream_info[0]["execution_context"])
        annotations = json.loads(datastream_info[0]["annotations"])
        stream_type = datastream_info[0]["type"]
        start_time = datastream_info[0]["start_time"]
        end_time = datastream_info[0]["end_time"]

        return DataStream(stream_id, ownerID, name, data_descriptor,
                          execution_context, annotations, stream_type,
                          start_time, end_time, data)
Exemple #21
0
class DataExporter():
    def __init__(self,
                 CC_obj: CerebralCortex,
                 export_dir_path: str,
                 owner_ids: List = None,
                 owner_user_names: List = None,
                 owner_name_regex: str = None,
                 start_time: str = None,
                 end_time: str = None):
        """
        :param CC_obj:
        :param export_dir_path:
        :param owner_ids: owner_user_name and owner_name_regex must be None if using owner_id
        :param owner_user_names: owner_id and owner_name_regex must be None if using owner_user_name
        :param owner_name_regex: owner_id and owner_user_name must be None if using owner_name_reges
        :param start_time:
        :param end_time:
        """

        self.streamData = Data(CC_obj)
        self.export_dir_path = export_dir_path
        self.metadata = Metadata(CC_obj)
        self.owner_ids = owner_ids
        self.owner_user_names = owner_user_names
        self.owner_name_regex = str(owner_name_regex)
        self.start_time = start_time
        self.end_time = end_time

    def start(self):
        if self.owner_ids and self.owner_ids != 'None':
            for owner_id in self.owner_ids:
                owner_name = self.metadata.owner_id_to_name(owner_id)
                self.export_data(owner_id=owner_id, owner_name=owner_name)
        elif self.owner_user_names and self.owner_user_names != 'None':
            for owner_user_name in self.owner_user_names:
                owner_id = self.metadata.owner_name_to_id(owner_user_name)
                self.export_data(owner_id=owner_id, owner_name=owner_user_name)
        elif self.owner_name_regex and self.owner_name_regex != 'None':
            owner_idz = self.metadata.get_owner_ids_by_owner_name_regex(
                self.owner_name_regex)
            for owner_id in owner_idz:
                owner_name = self.metadata.owner_id_to_name(
                    owner_id["identifier"])
                self.export_data(owner_id=owner_id["identifier"],
                                 owner_name=owner_name)

    @calculate_time
    def export_data(self, owner_id=None, owner_name=None):

        rows = self.metadata.get_stream_metadata_by_owner_id(owner_id)
        if rows == "NULL":
            print("No data found for => owner-id: " + owner_id +
                  " - owner-name: " + owner_name)
            return

        for row in rows:
            stream_id = row["identifier"]
            data_start_time = row["start_time"]
            data_end_time = row["end_time"]
            stream_metadata = {
                "identifier": stream_id,
                "owner_id": row["owner"],
                "name": row["name"],
                "data_available": {
                    "start_time": str(data_start_time),
                    "end_time": str(data_end_time)
                }
            }

            data_descriptor = json.loads(row["data_descriptor"])
            execution_context = json.loads(row["execution_context"])
            annotations = json.loads(row["annotations"])

            stream_metadata.update({"data_descriptor": data_descriptor})
            stream_metadata.update({"execution_context": execution_context})
            stream_metadata.update({"annotations": annotations})

            file_path = self.export_dir_path + owner_name
            if not os.path.exists(file_path):
                os.mkdir(file_path)

            # write metadata to json file
            self.write_to_file(file_path + "/" + stream_id + ".json",
                               json.dumps(stream_metadata))

            # load and write stream raw data to bz2
            delta = data_end_time - data_start_time

            for i in range(delta.days + 1):
                day = data_start_time + timedelta(days=i)
                day = datetime.strftime(day, "%Y%m%d")
            self.writeStreamDataToZipFile(stream_id, day, file_path)

    def writeStreamDataToZipFile(self, stream_id: uuid, day, file_path: str):
        """

        :param stream_id:
        :param file_path:
        """
        if stream_id:
            where_clause = "identifier='" + stream_id + "' and day='" + str(
                day) + "'"
        else:
            raise ValueError("Missing owner ID.")

        if self.start_time and self.end_time:
            where_clause += " and start_time>=cast('" + str(
                self.start_time
            ) + "' as timestamp) and start_time<=cast('" + str(
                self.end_time) + "' as timestamp)"
        elif self.start_time and not self.end_time:
            where_clause += " and start_time>=cast('" + str(
                self.start_time) + "' as timestamp)"
        elif not self.start_time and self.end_time:
            where_clause += " start_time<=cast('" + str(
                self.end_time) + "' as timestamp)"

        df = self.streamData.load_data_from_cassandra(
            self.streamData.datapointTable, where_clause, 1)
        df.write \
            .format("csv") \
            .option("codec", "org.apache.hadoop.io.compress.GzipCodec") \
            .save(file_path + "/" + stream_id)

        os.system("cat " + file_path + "/" + stream_id + "/p* > " + file_path +
                  "/" + stream_id + ".gz")
        if os.path.exists(file_path + "/" + stream_id + "/"):
            shutil.rmtree(file_path + "/" + stream_id + "/",
                          ignore_errors=True)

    def write_to_bz2(self, file_name, data):
        with open(file_name, 'wb+') as outfile:
            compressed_data = bz2.compress(data, 9)
            outfile.write(compressed_data)

    def write_to_file(self, file_name: str, data: str):
        """
        :param file_name:
        :param data:
        """
        with open(file_name, 'w+') as outfile:
            outfile.write(data)
class TestDataStoreEngine(unittest.TestCase):
    testConfigFile = os.path.join(os.path.dirname(__file__),
                                  'res/test_configuration.yml')
    CC = CerebralCortex(testConfigFile,
                        master="local[*]",
                        name="Cerebral Cortex DataStoreEngine Tests",
                        time_zone="US/Central",
                        load_spark=True)
    configuration = CC.configuration
    meta_obj = Metadata(CC)

    def test_01_setup_data(self):
        data_descriptor = {}
        execution_context = json.loads(
            '{"execution_context": {"algorithm": {"method": "cerebralcortex.data_processor.data_diagnostic.BatteryDataMarker"}}}'
        )
        annotations = {}
        stream_type = "datastream"
        start_time = datetime.datetime(2017, 4, 24, 0, 0, 1)
        end_time = datetime.datetime(2017, 4, 24, 0, 0, 2)

        result = Metadata(self.CC).is_id_created(
            "06634264-56bc-4c92-abd7-377dbbad79dd", "data-store-test",
            execution_context)

        if result["status"] == "new":
            stream_identifier = "6db98dfb-d6e8-4b27-8d55-95b20fa0f754"
        else:
            stream_identifier = result["id"]

        self.assertEqual(stream_identifier,
                         "6db98dfb-d6e8-4b27-8d55-95b20fa0f754")

        Metadata(self.CC).store_stream_info(
            stream_identifier, "06634264-56bc-4c92-abd7-377dbbad79dd",
            "data-store-test", data_descriptor, execution_context, annotations,
            stream_type, start_time, end_time, result["status"])

    def test_02_get_stream_info(self):

        stream_info = Metadata(
            self.CC).get_stream_info("6db98dfb-d6e8-4b27-8d55-95b20fa0f754")

        self.assertEqual(stream_info[0]["identifier"],
                         "6db98dfb-d6e8-4b27-8d55-95b20fa0f754")
        self.assertEqual(stream_info[0]["owner"],
                         "06634264-56bc-4c92-abd7-377dbbad79dd")
        self.assertEqual(stream_info[0]["name"], "data-store-test")
        self.assertEqual(stream_info[0]["data_descriptor"], "{}")
        self.assertEqual(
            stream_info[0]["execution_context"],
            '{"execution_context": {"algorithm": {"method": "cerebralcortex.data_processor.data_diagnostic.BatteryDataMarker"}}}'
        )
        self.assertEqual(stream_info[0]["annotations"], "{}")
        self.assertEqual(stream_info[0]["type"], "datastream")

    def test_03_append_annotations(self):
        self.assertRaises(Exception,
                          Metadata(self.CC).append_annotations,
                          "6db98dfb-d6e8-4b27-8d55-95b20fa0f754",
                          "06634264-56bc-4c92-abd7-377dbbad79dd",
                          "data-store-test", {}, {}, {}, "datastream1")

        self.assertRaises(Exception,
                          Metadata(self.CC).append_annotations,
                          "6db98dfb-d6e8-4b27-8d55-95b20fa0f754",
                          "06634264-56bc-4c92-abd7-377dbbad79dd",
                          "data-store-test", {}, {"some": "none"}, {},
                          "datastream1")

        self.assertRaises(Exception,
                          Metadata(self.CC).append_annotations,
                          "6db98dfb-d6e8-4b27-8d55-95b20fa0f754",
                          "06634264-56bc-4c92-abd7-377dbbad79dd",
                          "data-store-test", {"a": "b"}, {}, {}, "datastream1")

        self.assertRaises(Exception,
                          Metadata(self.CC).append_annotations,
                          "6db98dfb-d6e8-4b27-8d55-95b20fa0f754",
                          "06634264-56bc-4c92-abd7-377dbbad79dd",
                          "data-diagnostic_diff", {}, {}, {}, "datastream1")

        annotations_unchanged = Metadata(self.CC).append_annotations(
            "6db98dfb-d6e8-4b27-8d55-95b20fa0f754",
            "06634264-56bc-4c92-abd7-377dbbad79dd", "data-store-test", {},
            json.loads(
                '{"execution_context": {"algorithm": {"method": "cerebralcortex.data_processor.data_diagnostic.BatteryDataMarker"}}}'
            ), {}, "datastream")
        self.assertEqual(annotations_unchanged, "unchanged")

    def test_04_get_stream_ids_by_name(self):
        start_time = datetime.datetime(2017, 4, 24, 0, 0, 1)
        end_time = datetime.datetime(2017, 4, 24, 0, 0, 2)

        by_name = Metadata(self.CC).get_stream_ids_by_name("data-store-test")
        self.assertIsInstance(by_name, list)
        self.assertEqual(by_name[0], "6db98dfb-d6e8-4b27-8d55-95b20fa0f754")

        by_name_id = Metadata(self.CC).get_stream_ids_by_name(
            "data-store-test", "06634264-56bc-4c92-abd7-377dbbad79dd")
        self.assertIsInstance(by_name_id, list)
        self.assertEqual(by_name_id[0], "6db98dfb-d6e8-4b27-8d55-95b20fa0f754")

        by_name_id_start_time = Metadata(self.CC).get_stream_ids_by_name(
            "data-store-test", "06634264-56bc-4c92-abd7-377dbbad79dd",
            start_time)
        self.assertIsInstance(by_name_id_start_time, list)
        self.assertEqual(by_name_id_start_time[0],
                         "6db98dfb-d6e8-4b27-8d55-95b20fa0f754")

        by_name_id_start_time_end_time = Metadata(
            self.CC).get_stream_ids_by_name(
                "data-store-test", "06634264-56bc-4c92-abd7-377dbbad79dd",
                start_time, end_time)
        self.assertIsInstance(by_name_id_start_time_end_time, list)
        self.assertEqual(by_name_id_start_time_end_time[0],
                         "6db98dfb-d6e8-4b27-8d55-95b20fa0f754")

    def test_05_get_stream_ids_of_owner(self):
        start_time = datetime.datetime(2017, 4, 24, 0, 0, 1)
        end_time = datetime.datetime(2017, 4, 24, 0, 0, 2)

        by_id = Metadata(self.CC).get_stream_ids_of_owner(
            "06634264-56bc-4c92-abd7-377dbbad79dd")
        self.assertIsInstance(by_id, list)
        self.assertEqual(by_id[0], "6db98dfb-d6e8-4b27-8d55-95b20fa0f754")

        by_name_id = Metadata(self.CC).get_stream_ids_of_owner(
            "06634264-56bc-4c92-abd7-377dbbad79dd", "data-store-test")
        self.assertIsInstance(by_name_id, list)
        self.assertEqual(by_name_id[0], "6db98dfb-d6e8-4b27-8d55-95b20fa0f754")

        by_name_id_start_time = Metadata(self.CC).get_stream_ids_of_owner(
            "06634264-56bc-4c92-abd7-377dbbad79dd", "data-store-test",
            start_time)
        self.assertIsInstance(by_name_id_start_time, list)
        self.assertEqual(by_name_id_start_time[0],
                         "6db98dfb-d6e8-4b27-8d55-95b20fa0f754")

        by_name_id_start_time_end_time = Metadata(
            self.CC).get_stream_ids_of_owner(
                "06634264-56bc-4c92-abd7-377dbbad79dd", "data-store-test",
                start_time, end_time)
        self.assertIsInstance(by_name_id_start_time_end_time, list)
        self.assertEqual(by_name_id_start_time_end_time[0],
                         "6db98dfb-d6e8-4b27-8d55-95b20fa0f754")

    def test_06_store_stream(self):
        identifier = "6db98dfb-d6e8-4b27-8d55-95b20fa0f754"
        owner = "06634264-56bc-4c92-abd7-377dbbad79dd"
        name = "data-store-test"
        data_descriptor = {}
        execution_context = json.loads(
            '{"execution_context": {"algorithm": {"method": "cerebralcortex.data_processor.data_diagnostic.BatteryDataMarker"}}}'
        )
        annotations = {}
        datapoints = []
        stream_type = "datastream"
        start_time = datetime.datetime(2017, 4, 24, 0, 0, 1)
        end_time = datetime.datetime(2017, 4, 24, 0, 0, 2)
        localtz = timezone('US/Central')
        start_time = localtz.localize(start_time)
        end_time = localtz.localize(end_time)
        sample = {'Foo3': 123}

        dp1 = DataPoint(start_time=start_time,
                        end_time=end_time,
                        sample=sample)

        datapoints.append(dp1)

        ds = DataStream(identifier, owner, name, data_descriptor,
                        execution_context, annotations, stream_type,
                        start_time, end_time, datapoints)

        self.CC.save_datastream(ds)
        stream = self.CC.get_datastream(identifier, data_type=DataSet.COMPLETE)
        self.assertEqual(stream._identifier, identifier)
        self.assertEqual(stream._owner, owner)
        self.assertEqual(stream._name, name)
        self.assertEqual(stream._data_descriptor, data_descriptor)
        self.assertEqual(stream._execution_context, execution_context)
        self.assertEqual(stream._annotations, annotations)
        self.assertEqual(stream._datastream_type, stream_type)

        self.assertEqual(stream.data[0].start_time, start_time)
        self.assertEqual(stream.data[0].end_time, end_time)
        self.assertEqual(stream.data[0].sample, sample)

    def test_07_stream_filter(self):
        identifier_anno = "6db98dfb-d6e8-4b27-8d55-95b20fa0f750"
        identifier_data = "6db98dfb-d6e8-4b27-8d55-95b20fa0f751"
        owner_id = "06634264-56bc-4c92-abd7-377dbbad79dd"
        name_anno = "data-store-test-annotation"
        name_data = "data-store-test-data"
        data_descriptor = {}
        execution_context_anno = json.loads(
            '{"execution_context": {"algorithm": {"method": "test.data_store.annotation.filter"}}}'
        )
        execution_context_data = json.loads(
            '{"execution_context": {"algorithm": {"method": "test.data_store.data.filter"}}}'
        )
        annotations_data = json.loads(
            '[{"name": "test-case","identifier": "6db98dfb-d6e8-4b27-8d55-95b20fa0f750"}]'
        )
        annotations_anno = {}
        datapoints_anno = []
        datapoints_data = []

        result_data = Metadata(self.CC).is_id_created(owner_id, name_data,
                                                      execution_context_data)
        if result_data["status"] != "new":
            identifier_data = result_data["id"]

        Metadata(self.CC).store_stream_info(
            identifier_anno, owner_id, name_anno, data_descriptor,
            execution_context_anno, annotations_anno, "annotations",
            datetime.datetime(2017, 4, 24, 0, 0, 1),
            datetime.datetime(2017, 4, 24, 0, 0, 5), result_data["status"])

        result_anno = Metadata(self.CC).is_id_created(owner_id, name_data,
                                                      execution_context_data)
        if result_anno["status"] != "new":
            identifier_anno = result_anno["id"]

        Metadata(self.CC).store_stream_info(
            identifier_data, owner_id, name_data, data_descriptor,
            execution_context_data, annotations_data, "datastream",
            datetime.datetime(2017, 4, 24, 0, 0, 1),
            datetime.datetime(2017, 4, 24, 0, 0, 5), result_anno["status"])

        for i in range(0, 5):
            if (i % 2 == 0):
                sample_anno = 'good'
            else:
                sample_anno = 'bad'
            sample_data = i, i + 2, i + 3
            start_time_anno = datetime.datetime(2017, 4, 24, 0, 0, i)
            end_time_anno = datetime.datetime(2017, 4, 24, 0, 0, (5 + i))

            start_time_data = datetime.datetime(2017, 4, 24, 0, 0, i)
            end_time_data = datetime.datetime(2017, 4, 24, 0, 0, (3 + i))

            localtz = timezone('US/Central')
            start_time_anno = localtz.localize(start_time_anno)
            end_time_anno = localtz.localize(end_time_anno)
            start_time_data = localtz.localize(start_time_data)
            end_time_data = localtz.localize(end_time_data)

            datapoints_anno.append(
                DataPoint(start_time=start_time_anno,
                          end_time=end_time_anno,
                          sample=sample_anno))
            datapoints_data.append(
                DataPoint(start_time=start_time_data,
                          end_time=end_time_data,
                          sample=sample_data))

        ds_anno = DataStream(uuid.UUID(identifier_anno), owner_id, name_anno,
                             data_descriptor, execution_context_anno,
                             annotations_data, "annotations", start_time_anno,
                             end_time_anno, datapoints_anno)

        ds_data = DataStream(uuid.UUID(identifier_data), owner_id, name_data,
                             data_descriptor, execution_context_data,
                             annotations_anno, "datastream", start_time_anno,
                             end_time_anno, datapoints_data)

        self.CC.save_datastream(ds_anno)
        self.CC.save_datastream(ds_data)

        filted_stream = self.CC.filter_stream(identifier_data, "test-case",
                                              "good")

        self.assertEqual(len(filted_stream), 5)

        for i in range(0, 5):
            sample_data = [i, i + 2, i + 3]
            start_time_data = datetime.datetime(2017, 4, 24, 0, 0, i)
            end_time_data = datetime.datetime(2017, 4, 24, 0, 0, (3 + i))
            start_time_data = localtz.localize(start_time_data)
            end_time_data = localtz.localize(end_time_data)

            self.assertEqual(filted_stream[i].start_time, start_time_data)
            self.assertEqual(filted_stream[i].end_time, end_time_data)
            self.assertEqual(filted_stream[i].sample, sample_data)
    def test_07_stream_filter(self):
        identifier_anno = "6db98dfb-d6e8-4b27-8d55-95b20fa0f750"
        identifier_data = "6db98dfb-d6e8-4b27-8d55-95b20fa0f751"
        owner_id = "06634264-56bc-4c92-abd7-377dbbad79dd"
        name_anno = "data-store-test-annotation"
        name_data = "data-store-test-data"
        data_descriptor = {}
        execution_context_anno = json.loads(
            '{"execution_context": {"algorithm": {"method": "test.data_store.annotation.filter"}}}'
        )
        execution_context_data = json.loads(
            '{"execution_context": {"algorithm": {"method": "test.data_store.data.filter"}}}'
        )
        annotations_data = json.loads(
            '[{"name": "test-case","identifier": "6db98dfb-d6e8-4b27-8d55-95b20fa0f750"}]'
        )
        annotations_anno = {}
        datapoints_anno = []
        datapoints_data = []

        result_data = Metadata(self.CC).is_id_created(owner_id, name_data,
                                                      execution_context_data)
        if result_data["status"] != "new":
            identifier_data = result_data["id"]

        Metadata(self.CC).store_stream_info(
            identifier_anno, owner_id, name_anno, data_descriptor,
            execution_context_anno, annotations_anno, "annotations",
            datetime.datetime(2017, 4, 24, 0, 0, 1),
            datetime.datetime(2017, 4, 24, 0, 0, 5), result_data["status"])

        result_anno = Metadata(self.CC).is_id_created(owner_id, name_data,
                                                      execution_context_data)
        if result_anno["status"] != "new":
            identifier_anno = result_anno["id"]

        Metadata(self.CC).store_stream_info(
            identifier_data, owner_id, name_data, data_descriptor,
            execution_context_data, annotations_data, "datastream",
            datetime.datetime(2017, 4, 24, 0, 0, 1),
            datetime.datetime(2017, 4, 24, 0, 0, 5), result_anno["status"])

        for i in range(0, 5):
            if (i % 2 == 0):
                sample_anno = 'good'
            else:
                sample_anno = 'bad'
            sample_data = i, i + 2, i + 3
            start_time_anno = datetime.datetime(2017, 4, 24, 0, 0, i)
            end_time_anno = datetime.datetime(2017, 4, 24, 0, 0, (5 + i))

            start_time_data = datetime.datetime(2017, 4, 24, 0, 0, i)
            end_time_data = datetime.datetime(2017, 4, 24, 0, 0, (3 + i))

            localtz = timezone('US/Central')
            start_time_anno = localtz.localize(start_time_anno)
            end_time_anno = localtz.localize(end_time_anno)
            start_time_data = localtz.localize(start_time_data)
            end_time_data = localtz.localize(end_time_data)

            datapoints_anno.append(
                DataPoint(start_time=start_time_anno,
                          end_time=end_time_anno,
                          sample=sample_anno))
            datapoints_data.append(
                DataPoint(start_time=start_time_data,
                          end_time=end_time_data,
                          sample=sample_data))

        ds_anno = DataStream(uuid.UUID(identifier_anno), owner_id, name_anno,
                             data_descriptor, execution_context_anno,
                             annotations_data, "annotations", start_time_anno,
                             end_time_anno, datapoints_anno)

        ds_data = DataStream(uuid.UUID(identifier_data), owner_id, name_data,
                             data_descriptor, execution_context_data,
                             annotations_anno, "datastream", start_time_anno,
                             end_time_anno, datapoints_data)

        self.CC.save_datastream(ds_anno)
        self.CC.save_datastream(ds_data)

        filted_stream = self.CC.filter_stream(identifier_data, "test-case",
                                              "good")

        self.assertEqual(len(filted_stream), 5)

        for i in range(0, 5):
            sample_data = [i, i + 2, i + 3]
            start_time_data = datetime.datetime(2017, 4, 24, 0, 0, i)
            end_time_data = datetime.datetime(2017, 4, 24, 0, 0, (3 + i))
            start_time_data = localtz.localize(start_time_data)
            end_time_data = localtz.localize(end_time_data)

            self.assertEqual(filted_stream[i].start_time, start_time_data)
            self.assertEqual(filted_stream[i].end_time, end_time_data)
            self.assertEqual(filted_stream[i].sample, sample_data)
    def store_data_to_influxdb(self, datastream: DataStream):

        """
        :param datastream:
        """
        st = datetime.now()
        client = InfluxDBClient(host=self.influxdbIP, port=self.influxdbPort, username=self.influxdbUser, password=self.influxdbPassword, database=self.influxdbDatabase)
        datapoints = datastream.data
        stream_identifier = datastream.identifier
        stream_owner_id = datastream.owner
        stream_owner_name = Metadata(self.CC_obj).owner_id_to_name(stream_owner_id)
        stream_name = datastream.name

        if datastream.data_descriptor:
            total_dd_columns = len(datastream.data_descriptor)
            data_descriptor = datastream.data_descriptor
        else:
            data_descriptor = []
            total_dd_columns = 0

        influx_data = []
        for datapoint in datapoints:
            object = {}
            object['measurement'] = stream_name
            object['tags'] = {'stream_id':stream_identifier, 'owner_id': stream_owner_id, 'owner_name': stream_owner_name}

            object['time'] = datapoint.start_time
            values = datapoint.sample

            if isinstance(values, tuple):
                values = list(values)
            else:
                try:
                    values = [float(values)]
                except:
                    try:
                        values = list(map(float, values.split(',')))
                    except:
                        values = values


            try:
                object['fields'] = {}
                if isinstance(values, list):
                    for i, sample_val in enumerate(values):
                        if len(values)==total_dd_columns:
                            dd = data_descriptor[i]
                            if "NAME" in dd:
                                object['fields'][dd["NAME"]] = sample_val
                            else:
                                object['fields']['value_'+str(i)] = sample_val
                        else:
                            object['fields']['value_'+str(i)] = sample_val
                else:
                    dd = data_descriptor[0]
                    if not values:
                        values = "NULL"
                    try:
                        values = float(values)
                    except:
                        values = values
                    if "NAME" in dd:
                        object['fields'][dd["NAME"]] = values
                    else:
                        object['fields']['value_0'] = values
            except:
                try:
                    values = json.dumps(values)
                    object['fields']['value_0'] = values
                except:
                    cc_log("Datapoint sample values conversion: "+str(values),"WARNING")
                    object['fields']['value_0'] = str(values)

            influx_data.append(object)
        et = datetime

        #print('InfluxDB - Yielding:', stream_owner_id, len(influx_data), stream_identifier)

        try:
            client.write_points(influx_data)
            et2 = datetime.now()
            #print("Influx Time BreakDown: Processing: ", et-st, " Inserting: ",et2-et, " Size: ",len(influx_data))
        except:
            cc_log()
Exemple #25
0
 def get_kafka_offsets(self, topic: str) -> dict:
     """
     :param topic:
     :return:
     """
     return Metadata(self).get_kafka_offsets(topic)