def test_05_get_stream_ids_of_owner(self): start_time = datetime.datetime(2017, 4, 24, 0, 0, 1) end_time = datetime.datetime(2017, 4, 24, 0, 0, 2) by_id = Metadata(self.CC).get_stream_ids_of_owner( "06634264-56bc-4c92-abd7-377dbbad79dd") self.assertIsInstance(by_id, list) self.assertEqual(by_id[0], "6db98dfb-d6e8-4b27-8d55-95b20fa0f754") by_name_id = Metadata(self.CC).get_stream_ids_of_owner( "06634264-56bc-4c92-abd7-377dbbad79dd", "data-store-test") self.assertIsInstance(by_name_id, list) self.assertEqual(by_name_id[0], "6db98dfb-d6e8-4b27-8d55-95b20fa0f754") by_name_id_start_time = Metadata(self.CC).get_stream_ids_of_owner( "06634264-56bc-4c92-abd7-377dbbad79dd", "data-store-test", start_time) self.assertIsInstance(by_name_id_start_time, list) self.assertEqual(by_name_id_start_time[0], "6db98dfb-d6e8-4b27-8d55-95b20fa0f754") by_name_id_start_time_end_time = Metadata( self.CC).get_stream_ids_of_owner( "06634264-56bc-4c92-abd7-377dbbad79dd", "data-store-test", start_time, end_time) self.assertIsInstance(by_name_id_start_time_end_time, list) self.assertEqual(by_name_id_start_time_end_time[0], "6db98dfb-d6e8-4b27-8d55-95b20fa0f754")
def store_stream(self, datastream: DataStream): """ :param datastream: """ ownerID = datastream.owner name = datastream.name data_descriptor = datastream.data_descriptor execution_context = datastream.execution_context annotations = datastream.annotations stream_type = datastream.datastream_type data = datastream.data if data: if isinstance(data, list): total_dp = len(data) - 1 new_start_time = data[0].start_time new_end_time = data[total_dp].start_time else: new_start_time = data.start_time new_end_time = data.start_time result = Metadata(self.CC_obj).is_id_created( ownerID, name, execution_context) stream_identifier = result["id"] Metadata(self.CC_obj).store_stream_info( stream_identifier, ownerID, name, data_descriptor, execution_context, annotations, stream_type, new_start_time, new_end_time, result["status"]) dataframe = self.map_datapoint_to_dataframe( stream_identifier, data) self.store_data(dataframe, self.datapointTable)
def test_01_setup_data(self): data_descriptor = {} execution_context = json.loads( '{"execution_context": {"algorithm": {"method": "cerebralcortex.data_processor.data_diagnostic.BatteryDataMarker"}}}' ) annotations = {} stream_type = "datastream" start_time = datetime.datetime(2017, 4, 24, 0, 0, 1) end_time = datetime.datetime(2017, 4, 24, 0, 0, 2) result = Metadata(self.CC).is_id_created( "06634264-56bc-4c92-abd7-377dbbad79dd", "data-store-test", execution_context) if result["status"] == "new": stream_identifier = "6db98dfb-d6e8-4b27-8d55-95b20fa0f754" else: stream_identifier = result["id"] self.assertEqual(stream_identifier, "6db98dfb-d6e8-4b27-8d55-95b20fa0f754") Metadata(self.CC).store_stream_info( stream_identifier, "06634264-56bc-4c92-abd7-377dbbad79dd", "data-store-test", data_descriptor, execution_context, annotations, stream_type, start_time, end_time, result["status"])
def __init__(self, CC_obj: CerebralCortex, export_dir_path: str, owner_ids: List = None, owner_user_names: List = None, owner_name_regex: str = None, start_time: str = None, end_time: str = None): """ :param CC_obj: :param export_dir_path: :param owner_ids: owner_user_name and owner_name_regex must be None if using owner_id :param owner_user_names: owner_id and owner_name_regex must be None if using owner_user_name :param owner_name_regex: owner_id and owner_user_name must be None if using owner_name_reges :param start_time: :param end_time: """ self.streamData = Data(CC_obj) self.export_dir_path = export_dir_path self.metadata = Metadata(CC_obj) self.owner_ids = owner_ids self.owner_user_names = owner_user_names self.owner_name_regex = str(owner_name_regex) self.start_time = start_time self.end_time = end_time
def test_03_append_annotations(self): self.assertRaises(Exception, Metadata(self.CC).append_annotations, "6db98dfb-d6e8-4b27-8d55-95b20fa0f754", "06634264-56bc-4c92-abd7-377dbbad79dd", "data-store-test", {}, {}, {}, "datastream1") self.assertRaises(Exception, Metadata(self.CC).append_annotations, "6db98dfb-d6e8-4b27-8d55-95b20fa0f754", "06634264-56bc-4c92-abd7-377dbbad79dd", "data-store-test", {}, {"some": "none"}, {}, "datastream1") self.assertRaises(Exception, Metadata(self.CC).append_annotations, "6db98dfb-d6e8-4b27-8d55-95b20fa0f754", "06634264-56bc-4c92-abd7-377dbbad79dd", "data-store-test", {"a": "b"}, {}, {}, "datastream1") self.assertRaises(Exception, Metadata(self.CC).append_annotations, "6db98dfb-d6e8-4b27-8d55-95b20fa0f754", "06634264-56bc-4c92-abd7-377dbbad79dd", "data-diagnostic_diff", {}, {}, {}, "datastream1") annotations_unchanged = Metadata(self.CC).append_annotations( "6db98dfb-d6e8-4b27-8d55-95b20fa0f754", "06634264-56bc-4c92-abd7-377dbbad79dd", "data-store-test", {}, json.loads( '{"execution_context": {"algorithm": {"method": "cerebralcortex.data_processor.data_diagnostic.BatteryDataMarker"}}}' ), {}, "datastream") self.assertEqual(annotations_unchanged, "unchanged")
def store_stream(self, datastream: DataStream, type): """ :param datastream: :param type: support types are formatted json object or CC Datastream objects """ if (type == "json"): datastream = self.json_to_datastream(datastream) elif (type != "json" and type != "datastream"): raise ValueError(type + " is not supported data type") ownerID = datastream.owner name = datastream.name data_descriptor = datastream.data_descriptor execution_context = datastream.execution_context annotations = datastream.annotations stream_type = datastream.datastream_type data = datastream.data if data: if isinstance(data, list): total_dp = len(data) - 1 if not datastream._start_time: new_start_time = data[0].start_time else: new_start_time = datastream._start_time if not datastream._end_time: new_end_time = data[total_dp].start_time else: new_end_time = datastream._end_time else: if not datastream._start_time: new_start_time = data.start_time else: new_start_time = datastream._start_time if not datastream._end_time: new_end_time = data.start_time else: new_end_time = datastream._end_time stream_identifier = datastream.identifier result = Metadata(self.CC_obj).is_id_created(stream_identifier) Metadata(self.CC_obj).store_stream_info(stream_identifier, ownerID, name, data_descriptor, execution_context, annotations, stream_type, new_start_time, new_end_time, result["status"]) self.add_to_cassandra(stream_identifier, data)
def get_participant_streams(self, participant_id: uuid) -> dict: """ :param participant_id: :return: """ return Metadata(self).get_participant_streams(participant_id)
def get_all_participants(self, study_name: str) -> dict: """ :param study_name: :return: """ return Metadata(self).get_all_participants(study_name)
def login_user(self, user_name: str, password: str) -> bool: """ :param user_name: :param password: :return: """ return Metadata(self).login_user(user_name, password)
def get_stream_start_end_time(self, stream_id: uuid) -> dict: """ :param stream_id: :param time_type: acceptable parameters are start_time OR end_time :return: """ return Metadata(self).get_stream_start_end_time(stream_id)
def is_auth_token_valid(self, token_owner: str, auth_token: str, auth_token_expiry_time: datetime) -> bool: """ :param token_owner: :param auth_token: :param auth_token_expiry_time: :return: """ return Metadata(self).is_auth_token_valid(token_owner, auth_token, auth_token_expiry_time)
def store_or_update_Kafka_offset(self, topic: str, topic_partition: str, offset_start: str, offset_until: str): """ :param topic: :param topic_partition: :param offset_start: :param offset_until: """ Metadata(self).store_or_update_Kafka_offset(topic, topic_partition, offset_start, offset_until)
def get_stream_ids_of_owner(self, owner_id: uuid, stream_name: str = None, start_time: datetime = None, end_time: datetime = None) -> List: """ Returns a list of all stream IDs owned by an owner :param owner_id: :param stream_name: :param start_time: :param end_time: :return: """ return Metadata(self).get_stream_ids_of_owner(owner_id, stream_name, start_time, end_time)
def get_stream_ids_by_name(self, stream_name: str, owner_id: uuid = None, start_time: datetime = None, end_time: datetime = None) -> str: """ It returns a list of all the stream ids that match the name provided in the argument :param stream_name: :param owner_id: :param start_time: :param end_time: :return: """ return Metadata(self).get_stream_ids_by_name(stream_name, owner_id, start_time, end_time)
def update_auth_token(self, username: str, auth_token: str, auth_token_issued_time: datetime, auth_token_expiry_time: datetime)->str: """ :param username: :param auth_token: :param auth_token_issued_time: :param auth_token_expiry_time: :return: uuid of the current user """ user_uuid = Metadata(self).update_auth_token(username, auth_token, auth_token_issued_time, auth_token_expiry_time) return user_uuid
def filter_stream(self, data_stream_id: uuid, annotation_stream_name: uuid, annotation: str, start_time: datetime = None, end_time: datetime = None) -> List[DataPoint]: """ This method maps derived annotation stream to a data stream and returns a List of mapped Datapoints :param data_stream_id: :param annotation_stream_name: :param annotation: :param start_time: :param end_time: :return: """ annotation_stream_id = Metadata(self).get_annotation_id(data_stream_id, annotation_stream_name) return Data(self).get_annotation_stream(data_stream_id, annotation_stream_id, annotation, start_time, end_time)
def update_auth_token(self, user_name: str, auth_token: str, auth_token_issued_time: datetime, auth_token_expiry_time: datetime): """ :param user_name: :param auth_token: :param auth_token_issued_time: :param auth_token_expiry_time: :return: """ return Metadata(self).update_auth_token(user_name, auth_token, auth_token_issued_time, auth_token_expiry_time)
def test_02_get_stream_info(self): stream_info = Metadata( self.CC).get_stream_info("6db98dfb-d6e8-4b27-8d55-95b20fa0f754") self.assertEqual(stream_info[0]["identifier"], "6db98dfb-d6e8-4b27-8d55-95b20fa0f754") self.assertEqual(stream_info[0]["owner"], "06634264-56bc-4c92-abd7-377dbbad79dd") self.assertEqual(stream_info[0]["name"], "data-store-test") self.assertEqual(stream_info[0]["data_descriptor"], "{}") self.assertEqual( stream_info[0]["execution_context"], '{"execution_context": {"algorithm": {"method": "cerebralcortex.data_processor.data_diagnostic.BatteryDataMarker"}}}' ) self.assertEqual(stream_info[0]["annotations"], "{}") self.assertEqual(stream_info[0]["type"], "datastream")
def store_datastream(self, datastream): datastream_identifier = datastream.get_identifier() study_ids = datastream.getStudyIDs( ) # TO-DO, only add study-ids if they exist user_id = datastream.userObj.getID() processing_module_id = datastream.processingModuleObj.getID() datastream_type = datastream.get_datastream_type() metadata = datastream.getMetadata().getMetadata() source_ids = datastream.get_source_ids() data = datastream.data # if datastream_identifier is empty then create a new datastream_identifier in MySQL database and return the newly added datastream_identifier lastAddedRecordID = Metadata(self.configuration).storeDatastrem( datastream_identifier, study_ids, user_id, processing_module_id, source_ids, datastream_type, metadata) if datastream_identifier == "": datastream_identifier = lastAddedRecordID dataframe = self.map_datapoint_to_dataframe(datastream_identifier, data) self.save_datapoint(dataframe)
def map_datapoint_and_metadata_to_datastream(self, stream_id: int, data: list) -> DataStream: """ This method will map the datapoint and metadata to datastream object :param stream_id: :param data: list :return: datastream object """ # query datastream(mysql) for metadata datastream_info = Metadata(self.CC_obj).get_stream_info(stream_id) ownerID = datastream_info[0]["owner"] name = datastream_info[0]["name"] data_descriptor = json.loads(datastream_info[0]["data_descriptor"]) execution_context = json.loads(datastream_info[0]["execution_context"]) annotations = json.loads(datastream_info[0]["annotations"]) stream_type = datastream_info[0]["type"] start_time = datastream_info[0]["start_time"] end_time = datastream_info[0]["end_time"] return DataStream(stream_id, ownerID, name, data_descriptor, execution_context, annotations, stream_type, start_time, end_time, data)
class DataExporter(): def __init__(self, CC_obj: CerebralCortex, export_dir_path: str, owner_ids: List = None, owner_user_names: List = None, owner_name_regex: str = None, start_time: str = None, end_time: str = None): """ :param CC_obj: :param export_dir_path: :param owner_ids: owner_user_name and owner_name_regex must be None if using owner_id :param owner_user_names: owner_id and owner_name_regex must be None if using owner_user_name :param owner_name_regex: owner_id and owner_user_name must be None if using owner_name_reges :param start_time: :param end_time: """ self.streamData = Data(CC_obj) self.export_dir_path = export_dir_path self.metadata = Metadata(CC_obj) self.owner_ids = owner_ids self.owner_user_names = owner_user_names self.owner_name_regex = str(owner_name_regex) self.start_time = start_time self.end_time = end_time def start(self): if self.owner_ids and self.owner_ids != 'None': for owner_id in self.owner_ids: owner_name = self.metadata.owner_id_to_name(owner_id) self.export_data(owner_id=owner_id, owner_name=owner_name) elif self.owner_user_names and self.owner_user_names != 'None': for owner_user_name in self.owner_user_names: owner_id = self.metadata.owner_name_to_id(owner_user_name) self.export_data(owner_id=owner_id, owner_name=owner_user_name) elif self.owner_name_regex and self.owner_name_regex != 'None': owner_idz = self.metadata.get_owner_ids_by_owner_name_regex( self.owner_name_regex) for owner_id in owner_idz: owner_name = self.metadata.owner_id_to_name( owner_id["identifier"]) self.export_data(owner_id=owner_id["identifier"], owner_name=owner_name) @calculate_time def export_data(self, owner_id=None, owner_name=None): rows = self.metadata.get_stream_metadata_by_owner_id(owner_id) if rows == "NULL": print("No data found for => owner-id: " + owner_id + " - owner-name: " + owner_name) return for row in rows: stream_id = row["identifier"] data_start_time = row["start_time"] data_end_time = row["end_time"] stream_metadata = { "identifier": stream_id, "owner_id": row["owner"], "name": row["name"], "data_available": { "start_time": str(data_start_time), "end_time": str(data_end_time) } } data_descriptor = json.loads(row["data_descriptor"]) execution_context = json.loads(row["execution_context"]) annotations = json.loads(row["annotations"]) stream_metadata.update({"data_descriptor": data_descriptor}) stream_metadata.update({"execution_context": execution_context}) stream_metadata.update({"annotations": annotations}) file_path = self.export_dir_path + owner_name if not os.path.exists(file_path): os.mkdir(file_path) # write metadata to json file self.write_to_file(file_path + "/" + stream_id + ".json", json.dumps(stream_metadata)) # load and write stream raw data to bz2 delta = data_end_time - data_start_time for i in range(delta.days + 1): day = data_start_time + timedelta(days=i) day = datetime.strftime(day, "%Y%m%d") self.writeStreamDataToZipFile(stream_id, day, file_path) def writeStreamDataToZipFile(self, stream_id: uuid, day, file_path: str): """ :param stream_id: :param file_path: """ if stream_id: where_clause = "identifier='" + stream_id + "' and day='" + str( day) + "'" else: raise ValueError("Missing owner ID.") if self.start_time and self.end_time: where_clause += " and start_time>=cast('" + str( self.start_time ) + "' as timestamp) and start_time<=cast('" + str( self.end_time) + "' as timestamp)" elif self.start_time and not self.end_time: where_clause += " and start_time>=cast('" + str( self.start_time) + "' as timestamp)" elif not self.start_time and self.end_time: where_clause += " start_time<=cast('" + str( self.end_time) + "' as timestamp)" df = self.streamData.load_data_from_cassandra( self.streamData.datapointTable, where_clause, 1) df.write \ .format("csv") \ .option("codec", "org.apache.hadoop.io.compress.GzipCodec") \ .save(file_path + "/" + stream_id) os.system("cat " + file_path + "/" + stream_id + "/p* > " + file_path + "/" + stream_id + ".gz") if os.path.exists(file_path + "/" + stream_id + "/"): shutil.rmtree(file_path + "/" + stream_id + "/", ignore_errors=True) def write_to_bz2(self, file_name, data): with open(file_name, 'wb+') as outfile: compressed_data = bz2.compress(data, 9) outfile.write(compressed_data) def write_to_file(self, file_name: str, data: str): """ :param file_name: :param data: """ with open(file_name, 'w+') as outfile: outfile.write(data)
class TestDataStoreEngine(unittest.TestCase): testConfigFile = os.path.join(os.path.dirname(__file__), 'res/test_configuration.yml') CC = CerebralCortex(testConfigFile, master="local[*]", name="Cerebral Cortex DataStoreEngine Tests", time_zone="US/Central", load_spark=True) configuration = CC.configuration meta_obj = Metadata(CC) def test_01_setup_data(self): data_descriptor = {} execution_context = json.loads( '{"execution_context": {"algorithm": {"method": "cerebralcortex.data_processor.data_diagnostic.BatteryDataMarker"}}}' ) annotations = {} stream_type = "datastream" start_time = datetime.datetime(2017, 4, 24, 0, 0, 1) end_time = datetime.datetime(2017, 4, 24, 0, 0, 2) result = Metadata(self.CC).is_id_created( "06634264-56bc-4c92-abd7-377dbbad79dd", "data-store-test", execution_context) if result["status"] == "new": stream_identifier = "6db98dfb-d6e8-4b27-8d55-95b20fa0f754" else: stream_identifier = result["id"] self.assertEqual(stream_identifier, "6db98dfb-d6e8-4b27-8d55-95b20fa0f754") Metadata(self.CC).store_stream_info( stream_identifier, "06634264-56bc-4c92-abd7-377dbbad79dd", "data-store-test", data_descriptor, execution_context, annotations, stream_type, start_time, end_time, result["status"]) def test_02_get_stream_info(self): stream_info = Metadata( self.CC).get_stream_info("6db98dfb-d6e8-4b27-8d55-95b20fa0f754") self.assertEqual(stream_info[0]["identifier"], "6db98dfb-d6e8-4b27-8d55-95b20fa0f754") self.assertEqual(stream_info[0]["owner"], "06634264-56bc-4c92-abd7-377dbbad79dd") self.assertEqual(stream_info[0]["name"], "data-store-test") self.assertEqual(stream_info[0]["data_descriptor"], "{}") self.assertEqual( stream_info[0]["execution_context"], '{"execution_context": {"algorithm": {"method": "cerebralcortex.data_processor.data_diagnostic.BatteryDataMarker"}}}' ) self.assertEqual(stream_info[0]["annotations"], "{}") self.assertEqual(stream_info[0]["type"], "datastream") def test_03_append_annotations(self): self.assertRaises(Exception, Metadata(self.CC).append_annotations, "6db98dfb-d6e8-4b27-8d55-95b20fa0f754", "06634264-56bc-4c92-abd7-377dbbad79dd", "data-store-test", {}, {}, {}, "datastream1") self.assertRaises(Exception, Metadata(self.CC).append_annotations, "6db98dfb-d6e8-4b27-8d55-95b20fa0f754", "06634264-56bc-4c92-abd7-377dbbad79dd", "data-store-test", {}, {"some": "none"}, {}, "datastream1") self.assertRaises(Exception, Metadata(self.CC).append_annotations, "6db98dfb-d6e8-4b27-8d55-95b20fa0f754", "06634264-56bc-4c92-abd7-377dbbad79dd", "data-store-test", {"a": "b"}, {}, {}, "datastream1") self.assertRaises(Exception, Metadata(self.CC).append_annotations, "6db98dfb-d6e8-4b27-8d55-95b20fa0f754", "06634264-56bc-4c92-abd7-377dbbad79dd", "data-diagnostic_diff", {}, {}, {}, "datastream1") annotations_unchanged = Metadata(self.CC).append_annotations( "6db98dfb-d6e8-4b27-8d55-95b20fa0f754", "06634264-56bc-4c92-abd7-377dbbad79dd", "data-store-test", {}, json.loads( '{"execution_context": {"algorithm": {"method": "cerebralcortex.data_processor.data_diagnostic.BatteryDataMarker"}}}' ), {}, "datastream") self.assertEqual(annotations_unchanged, "unchanged") def test_04_get_stream_ids_by_name(self): start_time = datetime.datetime(2017, 4, 24, 0, 0, 1) end_time = datetime.datetime(2017, 4, 24, 0, 0, 2) by_name = Metadata(self.CC).get_stream_ids_by_name("data-store-test") self.assertIsInstance(by_name, list) self.assertEqual(by_name[0], "6db98dfb-d6e8-4b27-8d55-95b20fa0f754") by_name_id = Metadata(self.CC).get_stream_ids_by_name( "data-store-test", "06634264-56bc-4c92-abd7-377dbbad79dd") self.assertIsInstance(by_name_id, list) self.assertEqual(by_name_id[0], "6db98dfb-d6e8-4b27-8d55-95b20fa0f754") by_name_id_start_time = Metadata(self.CC).get_stream_ids_by_name( "data-store-test", "06634264-56bc-4c92-abd7-377dbbad79dd", start_time) self.assertIsInstance(by_name_id_start_time, list) self.assertEqual(by_name_id_start_time[0], "6db98dfb-d6e8-4b27-8d55-95b20fa0f754") by_name_id_start_time_end_time = Metadata( self.CC).get_stream_ids_by_name( "data-store-test", "06634264-56bc-4c92-abd7-377dbbad79dd", start_time, end_time) self.assertIsInstance(by_name_id_start_time_end_time, list) self.assertEqual(by_name_id_start_time_end_time[0], "6db98dfb-d6e8-4b27-8d55-95b20fa0f754") def test_05_get_stream_ids_of_owner(self): start_time = datetime.datetime(2017, 4, 24, 0, 0, 1) end_time = datetime.datetime(2017, 4, 24, 0, 0, 2) by_id = Metadata(self.CC).get_stream_ids_of_owner( "06634264-56bc-4c92-abd7-377dbbad79dd") self.assertIsInstance(by_id, list) self.assertEqual(by_id[0], "6db98dfb-d6e8-4b27-8d55-95b20fa0f754") by_name_id = Metadata(self.CC).get_stream_ids_of_owner( "06634264-56bc-4c92-abd7-377dbbad79dd", "data-store-test") self.assertIsInstance(by_name_id, list) self.assertEqual(by_name_id[0], "6db98dfb-d6e8-4b27-8d55-95b20fa0f754") by_name_id_start_time = Metadata(self.CC).get_stream_ids_of_owner( "06634264-56bc-4c92-abd7-377dbbad79dd", "data-store-test", start_time) self.assertIsInstance(by_name_id_start_time, list) self.assertEqual(by_name_id_start_time[0], "6db98dfb-d6e8-4b27-8d55-95b20fa0f754") by_name_id_start_time_end_time = Metadata( self.CC).get_stream_ids_of_owner( "06634264-56bc-4c92-abd7-377dbbad79dd", "data-store-test", start_time, end_time) self.assertIsInstance(by_name_id_start_time_end_time, list) self.assertEqual(by_name_id_start_time_end_time[0], "6db98dfb-d6e8-4b27-8d55-95b20fa0f754") def test_06_store_stream(self): identifier = "6db98dfb-d6e8-4b27-8d55-95b20fa0f754" owner = "06634264-56bc-4c92-abd7-377dbbad79dd" name = "data-store-test" data_descriptor = {} execution_context = json.loads( '{"execution_context": {"algorithm": {"method": "cerebralcortex.data_processor.data_diagnostic.BatteryDataMarker"}}}' ) annotations = {} datapoints = [] stream_type = "datastream" start_time = datetime.datetime(2017, 4, 24, 0, 0, 1) end_time = datetime.datetime(2017, 4, 24, 0, 0, 2) localtz = timezone('US/Central') start_time = localtz.localize(start_time) end_time = localtz.localize(end_time) sample = {'Foo3': 123} dp1 = DataPoint(start_time=start_time, end_time=end_time, sample=sample) datapoints.append(dp1) ds = DataStream(identifier, owner, name, data_descriptor, execution_context, annotations, stream_type, start_time, end_time, datapoints) self.CC.save_datastream(ds) stream = self.CC.get_datastream(identifier, data_type=DataSet.COMPLETE) self.assertEqual(stream._identifier, identifier) self.assertEqual(stream._owner, owner) self.assertEqual(stream._name, name) self.assertEqual(stream._data_descriptor, data_descriptor) self.assertEqual(stream._execution_context, execution_context) self.assertEqual(stream._annotations, annotations) self.assertEqual(stream._datastream_type, stream_type) self.assertEqual(stream.data[0].start_time, start_time) self.assertEqual(stream.data[0].end_time, end_time) self.assertEqual(stream.data[0].sample, sample) def test_07_stream_filter(self): identifier_anno = "6db98dfb-d6e8-4b27-8d55-95b20fa0f750" identifier_data = "6db98dfb-d6e8-4b27-8d55-95b20fa0f751" owner_id = "06634264-56bc-4c92-abd7-377dbbad79dd" name_anno = "data-store-test-annotation" name_data = "data-store-test-data" data_descriptor = {} execution_context_anno = json.loads( '{"execution_context": {"algorithm": {"method": "test.data_store.annotation.filter"}}}' ) execution_context_data = json.loads( '{"execution_context": {"algorithm": {"method": "test.data_store.data.filter"}}}' ) annotations_data = json.loads( '[{"name": "test-case","identifier": "6db98dfb-d6e8-4b27-8d55-95b20fa0f750"}]' ) annotations_anno = {} datapoints_anno = [] datapoints_data = [] result_data = Metadata(self.CC).is_id_created(owner_id, name_data, execution_context_data) if result_data["status"] != "new": identifier_data = result_data["id"] Metadata(self.CC).store_stream_info( identifier_anno, owner_id, name_anno, data_descriptor, execution_context_anno, annotations_anno, "annotations", datetime.datetime(2017, 4, 24, 0, 0, 1), datetime.datetime(2017, 4, 24, 0, 0, 5), result_data["status"]) result_anno = Metadata(self.CC).is_id_created(owner_id, name_data, execution_context_data) if result_anno["status"] != "new": identifier_anno = result_anno["id"] Metadata(self.CC).store_stream_info( identifier_data, owner_id, name_data, data_descriptor, execution_context_data, annotations_data, "datastream", datetime.datetime(2017, 4, 24, 0, 0, 1), datetime.datetime(2017, 4, 24, 0, 0, 5), result_anno["status"]) for i in range(0, 5): if (i % 2 == 0): sample_anno = 'good' else: sample_anno = 'bad' sample_data = i, i + 2, i + 3 start_time_anno = datetime.datetime(2017, 4, 24, 0, 0, i) end_time_anno = datetime.datetime(2017, 4, 24, 0, 0, (5 + i)) start_time_data = datetime.datetime(2017, 4, 24, 0, 0, i) end_time_data = datetime.datetime(2017, 4, 24, 0, 0, (3 + i)) localtz = timezone('US/Central') start_time_anno = localtz.localize(start_time_anno) end_time_anno = localtz.localize(end_time_anno) start_time_data = localtz.localize(start_time_data) end_time_data = localtz.localize(end_time_data) datapoints_anno.append( DataPoint(start_time=start_time_anno, end_time=end_time_anno, sample=sample_anno)) datapoints_data.append( DataPoint(start_time=start_time_data, end_time=end_time_data, sample=sample_data)) ds_anno = DataStream(uuid.UUID(identifier_anno), owner_id, name_anno, data_descriptor, execution_context_anno, annotations_data, "annotations", start_time_anno, end_time_anno, datapoints_anno) ds_data = DataStream(uuid.UUID(identifier_data), owner_id, name_data, data_descriptor, execution_context_data, annotations_anno, "datastream", start_time_anno, end_time_anno, datapoints_data) self.CC.save_datastream(ds_anno) self.CC.save_datastream(ds_data) filted_stream = self.CC.filter_stream(identifier_data, "test-case", "good") self.assertEqual(len(filted_stream), 5) for i in range(0, 5): sample_data = [i, i + 2, i + 3] start_time_data = datetime.datetime(2017, 4, 24, 0, 0, i) end_time_data = datetime.datetime(2017, 4, 24, 0, 0, (3 + i)) start_time_data = localtz.localize(start_time_data) end_time_data = localtz.localize(end_time_data) self.assertEqual(filted_stream[i].start_time, start_time_data) self.assertEqual(filted_stream[i].end_time, end_time_data) self.assertEqual(filted_stream[i].sample, sample_data)
def test_07_stream_filter(self): identifier_anno = "6db98dfb-d6e8-4b27-8d55-95b20fa0f750" identifier_data = "6db98dfb-d6e8-4b27-8d55-95b20fa0f751" owner_id = "06634264-56bc-4c92-abd7-377dbbad79dd" name_anno = "data-store-test-annotation" name_data = "data-store-test-data" data_descriptor = {} execution_context_anno = json.loads( '{"execution_context": {"algorithm": {"method": "test.data_store.annotation.filter"}}}' ) execution_context_data = json.loads( '{"execution_context": {"algorithm": {"method": "test.data_store.data.filter"}}}' ) annotations_data = json.loads( '[{"name": "test-case","identifier": "6db98dfb-d6e8-4b27-8d55-95b20fa0f750"}]' ) annotations_anno = {} datapoints_anno = [] datapoints_data = [] result_data = Metadata(self.CC).is_id_created(owner_id, name_data, execution_context_data) if result_data["status"] != "new": identifier_data = result_data["id"] Metadata(self.CC).store_stream_info( identifier_anno, owner_id, name_anno, data_descriptor, execution_context_anno, annotations_anno, "annotations", datetime.datetime(2017, 4, 24, 0, 0, 1), datetime.datetime(2017, 4, 24, 0, 0, 5), result_data["status"]) result_anno = Metadata(self.CC).is_id_created(owner_id, name_data, execution_context_data) if result_anno["status"] != "new": identifier_anno = result_anno["id"] Metadata(self.CC).store_stream_info( identifier_data, owner_id, name_data, data_descriptor, execution_context_data, annotations_data, "datastream", datetime.datetime(2017, 4, 24, 0, 0, 1), datetime.datetime(2017, 4, 24, 0, 0, 5), result_anno["status"]) for i in range(0, 5): if (i % 2 == 0): sample_anno = 'good' else: sample_anno = 'bad' sample_data = i, i + 2, i + 3 start_time_anno = datetime.datetime(2017, 4, 24, 0, 0, i) end_time_anno = datetime.datetime(2017, 4, 24, 0, 0, (5 + i)) start_time_data = datetime.datetime(2017, 4, 24, 0, 0, i) end_time_data = datetime.datetime(2017, 4, 24, 0, 0, (3 + i)) localtz = timezone('US/Central') start_time_anno = localtz.localize(start_time_anno) end_time_anno = localtz.localize(end_time_anno) start_time_data = localtz.localize(start_time_data) end_time_data = localtz.localize(end_time_data) datapoints_anno.append( DataPoint(start_time=start_time_anno, end_time=end_time_anno, sample=sample_anno)) datapoints_data.append( DataPoint(start_time=start_time_data, end_time=end_time_data, sample=sample_data)) ds_anno = DataStream(uuid.UUID(identifier_anno), owner_id, name_anno, data_descriptor, execution_context_anno, annotations_data, "annotations", start_time_anno, end_time_anno, datapoints_anno) ds_data = DataStream(uuid.UUID(identifier_data), owner_id, name_data, data_descriptor, execution_context_data, annotations_anno, "datastream", start_time_anno, end_time_anno, datapoints_data) self.CC.save_datastream(ds_anno) self.CC.save_datastream(ds_data) filted_stream = self.CC.filter_stream(identifier_data, "test-case", "good") self.assertEqual(len(filted_stream), 5) for i in range(0, 5): sample_data = [i, i + 2, i + 3] start_time_data = datetime.datetime(2017, 4, 24, 0, 0, i) end_time_data = datetime.datetime(2017, 4, 24, 0, 0, (3 + i)) start_time_data = localtz.localize(start_time_data) end_time_data = localtz.localize(end_time_data) self.assertEqual(filted_stream[i].start_time, start_time_data) self.assertEqual(filted_stream[i].end_time, end_time_data) self.assertEqual(filted_stream[i].sample, sample_data)
def store_data_to_influxdb(self, datastream: DataStream): """ :param datastream: """ st = datetime.now() client = InfluxDBClient(host=self.influxdbIP, port=self.influxdbPort, username=self.influxdbUser, password=self.influxdbPassword, database=self.influxdbDatabase) datapoints = datastream.data stream_identifier = datastream.identifier stream_owner_id = datastream.owner stream_owner_name = Metadata(self.CC_obj).owner_id_to_name(stream_owner_id) stream_name = datastream.name if datastream.data_descriptor: total_dd_columns = len(datastream.data_descriptor) data_descriptor = datastream.data_descriptor else: data_descriptor = [] total_dd_columns = 0 influx_data = [] for datapoint in datapoints: object = {} object['measurement'] = stream_name object['tags'] = {'stream_id':stream_identifier, 'owner_id': stream_owner_id, 'owner_name': stream_owner_name} object['time'] = datapoint.start_time values = datapoint.sample if isinstance(values, tuple): values = list(values) else: try: values = [float(values)] except: try: values = list(map(float, values.split(','))) except: values = values try: object['fields'] = {} if isinstance(values, list): for i, sample_val in enumerate(values): if len(values)==total_dd_columns: dd = data_descriptor[i] if "NAME" in dd: object['fields'][dd["NAME"]] = sample_val else: object['fields']['value_'+str(i)] = sample_val else: object['fields']['value_'+str(i)] = sample_val else: dd = data_descriptor[0] if not values: values = "NULL" try: values = float(values) except: values = values if "NAME" in dd: object['fields'][dd["NAME"]] = values else: object['fields']['value_0'] = values except: try: values = json.dumps(values) object['fields']['value_0'] = values except: cc_log("Datapoint sample values conversion: "+str(values),"WARNING") object['fields']['value_0'] = str(values) influx_data.append(object) et = datetime #print('InfluxDB - Yielding:', stream_owner_id, len(influx_data), stream_identifier) try: client.write_points(influx_data) et2 = datetime.now() #print("Influx Time BreakDown: Processing: ", et-st, " Inserting: ",et2-et, " Size: ",len(influx_data)) except: cc_log()
def get_kafka_offsets(self, topic: str) -> dict: """ :param topic: :return: """ return Metadata(self).get_kafka_offsets(topic)