def store(data: OrderedDict, input_streams: dict, output_streams: dict, metadata, CC_obj: CerebralCortex, config: dict): """ Store diagnostic results with its metadata in the data-store :param input_streams: :param data: :param CC_obj: :param config: :param algo_type: """ if data: # basic output stream info owner = input_streams[0]["owner_id"] dd_stream_id = output_streams["id"] dd_stream_name = output_streams["name"] stream_type = "ds" data_descriptor = metadata["dd"] execution_context = metadata["ec"] annotations = metadata["anno"] ds = DataStream(identifier=dd_stream_id, owner=owner, name=dd_stream_name, data_descriptor=data_descriptor, execution_context=execution_context, annotations=annotations, stream_type=stream_type, data=data) CC_obj.save_datastream(ds, "datastream")
def filter_battery_off_windows(stream_id: uuid, stream_name: str, main_stream_windows: dict, owner_id: uuid, config: dict, CC_obj: CerebralCortex) -> dict: """ :param stream_id: :param stream_name: :param main_stream_windows: :param owner_id: :param config: :param CC_obj: :return: """ start_time = "" end_time = "" # load phone battery data phone_battery_marker_stream_id = uuid.uuid3( uuid.NAMESPACE_DNS, str(stream_id + config["stream_name"]["phone_battery"] + owner_id)) phone_battery_marker_stream = CC_obj.get_datastream( phone_battery_marker_stream_id, data_type=DataSet.ONLY_DATA, start_time=start_time, end_time=end_time) # load sensor battery data if stream_name == config["stream_names"][ "autosense_ecg"] or stream_name == config["stream_names"][ "autosense_rip"]: sensor_battery_marker_stream = CC_obj.get_datastream( phone_battery_marker_stream_id, data_type=DataSet.ONLY_DATA, start_time=start_time, end_time=end_time) elif stream_name == config["stream_names"]["motionsense_hrv_accel_right"]: sensor_battery_marker_stream = CC_obj.get_datastream( phone_battery_marker_stream_id, data_type=DataSet.ONLY_DATA, start_time=start_time, end_time=end_time) elif stream_name == config["stream_names"]["motionsense_hrv_accel_left"]: sensor_battery_marker_stream = CC_obj.get_datastream( phone_battery_marker_stream_id, data_type=DataSet.ONLY_DATA, start_time=start_time, end_time=end_time) battery_marker = 0 results = None for key, data in main_stream_windows.items(): for phone_key, phone_data in phone_battery_marker_stream.items(): if phone_key.start_time <= key.start_time and phone_key.end_time >= key.end_time: battery_marker = 1 for sensor_key, sensor_data in sensor_battery_marker_stream.items(): if sensor_key.start_time <= key.start_time and sensor_key.end_time >= key.end_time: battery_marker = 1 if battery_marker != 1: results[key] = data return results
def store(input_streams: dict, data: OrderedDict, CC_obj: CerebralCortex, config: dict, algo_type: str): """ Store diagnostic results with its metadata in the data-store :param input_streams: :param data: :param CC_obj: :param config: :param algo_type: """ parent_stream_id = input_streams[0]["id"] stream_name = input_streams[0]["name"] result = process_data(stream_name, input_streams, algo_type, config) data_descriptor = json.loads(result["dd"]) execution_context = json.loads(result["ec"]) annotations = json.loads(result["anno"]) metadata = CC_obj.get_datastream(parent_stream_id, data_type=DataSet.ONLY_METADATA) owner = metadata.owner name = metadata.name stream_type = "datastream" ds = DataStream(owner=owner, name=name, data_descriptor=data_descriptor, execution_context=execution_context, annotations=annotations, stream_type=stream_type, data=data) CC_obj.save_datastream(ds)
def sensor_failure_marker(attachment_marker_stream_id: uuid, mshrv_accel_id: uuid, mshrv_gyro_id: uuid, wrist: str, owner_id: uuid, dd_stream_name, CC: CerebralCortex, config: dict): """ Label a window as packet-loss if received packets are less than the expected packets. All the labeled data (st, et, label) with its metadata are then stored in a datastore. :param stream_id: :param CC_obj: :param config: """ # using stream_id, data-diagnostic-stream-id, and owner id to generate a unique stream ID for battery-marker sensor_failure_stream_id = uuid.uuid3(uuid.NAMESPACE_DNS, str( attachment_marker_stream_id + dd_stream_name + owner_id + "SENSOR FAILURE MARKER")) stream_days = get_stream_days(attachment_marker_stream_id, sensor_failure_stream_id, CC) try: for day in stream_days: # load stream data to be diagnosed attachment_marker_stream = CC.get_datastream(attachment_marker_stream_id, day, data_type=DataSet.COMPLETE) results = OrderedDict() if attachment_marker_stream.data: for marker_window in attachment_marker_stream.data: if "MOTIONSENSE-ON-BODY" in marker_window.sample: mshrv_accel_stream = CC.get_datastream(mshrv_accel_id, day, data_type=DataSet.ONLY_DATA, start_time=marker_window.start_time, end_time=marker_window.end_time) mshrv_gyro_stream = CC.get_datastream(mshrv_gyro_id, day, data_type=DataSet.ONLY_DATA, start_time=marker_window.start_time, end_time=marker_window.end_time) results_accel = process_windows(mshrv_accel_stream, config) results_gyro = process_windows(mshrv_gyro_stream, config) key = marker_window.start_time, marker_window.end_time # if sensor failure period is more than 12 hours then mark it as a sensor failure if results_accel > 0 and results_gyro < 1: sample = "MOTIONSENE-HRV-" + str(wrist) + "ACCELEROMETER-FAILURE" results[key].append(DataPoint(marker_window.start_time, marker_window.end_time, sample)) elif results_accel < 1 and results_gyro > 0: sample = "MOTIONSENE-HRV-" + str(wrist) + "GYRO-FAILURE" results[key].append(DataPoint(marker_window.start_time, marker_window.end_time, sample)) merged_windows = merge_consective_windows(results) if len(results) > 0: input_streams = [{"owner_id": owner_id, "id": str(attachment_marker_stream_id), "name": attachment_marker_stream.name}] output_stream = {"id": sensor_failure_stream_id, "name": dd_stream_name, "algo_type": config["algo_type"]["sensor_failure"]} metadata = get_metadata(dd_stream_name, input_streams, config) store(merged_windows, input_streams, output_stream, metadata, CC, config) except Exception as e: print(e)
def battery_marker(raw_stream_id: uuid, stream_name: str, owner_id, dd_stream_name, CC: CerebralCortex, config: dict, start_time=None, end_time=None): """ This algorithm uses battery percentages to decide whether device was powered-off or battery was low. All the labeled data (st, et, label) with its metadata are then stored in a datastore. :param raw_stream_id: :param CC: :param config: """ try: # using stream_id, data-diagnostic-stream-id, and owner id to generate a unique stream ID for battery-marker battery_marker_stream_id = uuid.uuid3(uuid.NAMESPACE_DNS, str(raw_stream_id + dd_stream_name + owner_id)) stream_days = get_stream_days(raw_stream_id, battery_marker_stream_id, CC) for day in stream_days: stream = CC.get_datastream(raw_stream_id, data_type=DataSet.COMPLETE, day=day) if len(stream.data) > 0: windowed_data = window(stream.data, config['general']['window_size'], True) results = process_windows(windowed_data, stream_name, config) merged_windows = merge_consective_windows(results) if len(merged_windows) > 0: input_streams = [{"owner_id": owner_id, "id": str(raw_stream_id), "name": stream_name}] output_stream = {"id": battery_marker_stream_id, "name": dd_stream_name, "algo_type": config["algo_type"]["battery_marker"]} labelled_windows = mark_windows(battery_marker_stream_id, merged_windows, CC, config) metadata = get_metadata(dd_stream_name, input_streams, config) store(labelled_windows, input_streams, output_stream, metadata, CC, config) except Exception as e: print(e)
def setUpClass(cls): configuration_file = os.path.join(os.path.dirname(__file__), '../../../cerebralcortex.yml') cls.CC = CerebralCortex(configuration_file, master="local[*]", name="Data Diagnostic App", time_zone="US/Central") cls.config = Configuration( filepath="../data_diagnostic/data_diagnostic_config.yml").config cls.sample_battery_data = [] for row in range(1, 481): if row < 61: battery = 87.0 elif row > 60 and row < 120: battery = 0.0 elif row > 120 and row < 240: battery = 87.0 elif row > 240 and row < 300: battery = 7.0 elif row > 300 and row < 360: battery = 0.0 elif row > 360: battery = 60.0 tz = pytz.timezone("US/Central") start_time = tz.localize( datetime.fromtimestamp( int(round((time.time() + row) * 1000)) / 1e3)) dp = DataPoint(start_time=start_time, sample=battery) cls.sample_battery_data.append(dp) cls.window_size = 60
def mobile_app_availability_marker(raw_stream_id: uuid, stream_name: str, owner_id, dd_stream_name, CC: CerebralCortex, config: dict, start_time=None, end_time=None): """ This algorithm uses phone battery percentages to decide whether mobile app was available or unavailable. Theoretically, phone battery data shall be collected 24/7. :param raw_stream_id: :param CC: :param config: """ try: # using stream_id, data-diagnostic-stream-id, and owner id to generate a unique stream ID for battery-marker app_availability_marker_stream_id = uuid.uuid3(uuid.NAMESPACE_DNS, str( raw_stream_id + dd_stream_name + owner_id + "mobile app availability marker")) stream_days = get_stream_days(raw_stream_id, app_availability_marker_stream_id, CC) for day in stream_days: stream = CC.get_datastream(raw_stream_id, data_type=DataSet.COMPLETE, day=day) if len(stream.data) > 0: windowed_data = window(stream.data, config['general']['window_size'], True) results = process_windows(windowed_data, config) merged_windows = merge_consective_windows(results) if len(merged_windows) > 0: input_streams = [{"owner_id": owner_id, "id": str(raw_stream_id), "name": stream_name}] output_stream = {"id": app_availability_marker_stream_id, "name": dd_stream_name, "algo_type": config["algo_type"]["app_availability_marker"]} metadata = get_metadata(dd_stream_name, input_streams, config) store(merged_windows, input_streams, output_stream, metadata, CC, config) except Exception as e: print(e)
def run(): parser = argparse.ArgumentParser(description='CerebralCortex Data Exporter.') parser.add_argument("-o", "--output_dir", help="Directory path where exported data will be stored", required=True) parser.add_argument("-idz", "--owner_ids", help="Comma separated users' UUIDs", required=False) parser.add_argument("-namez", "--owner_user_names", help="Comma separated user-names", required=False) parser.add_argument("-nr", "--owner_name_regex", help="User name pattern. For example, '-nr ali' will export all users' data that start with user-name 'ali'", required=False) args = vars(parser.parse_args()) if args["owner_ids"] and (args["owner_user_names"] or args["owner_name_regex"]): raise ValueError("Expecting owner_ids: got owner_user_names and/or owner_name_regex too.") elif args["owner_user_names"] and (args["owner_ids"] or args["owner_name_regex"]): raise ValueError("Expecting owner_user_names: got owner_ids and/or owner_name_regex too.") elif args["owner_name_regex"] and (args["owner_ids"] or args["owner_user_names"]): raise ValueError("Expecting owner_name_regex: got owner_ids and owner_user_names too.") testConfigFile = os.path.join(os.path.dirname(__file__), '../../cerebralcortex.yml') CC_obj = CerebralCortex(testConfigFile, master="local[*]", name="Cerebral Cortex Data Importer and Exporter", time_zone="US/Central", load_spark=True) CC_obj.sc.setLogLevel("warn") if args["owner_ids"]: DataExporter(CC_obj, args["output_dir"], owner_ids=args["owner_ids"].split(",")).start() elif args["owner_user_names"]: DataExporter(CC_obj, args["output_dir"], owner_user_names=args["owner_user_names"].split(",")).start() elif args["owner_name_regex"]: DataExporter(CC_obj, args["output_dir"], owner_name_regex=args["owner_name_regex"]).start() else: parser.print_help() print("Please provide at least one of these: comma separated owner-ids OR comma separated owner-names OR owner-name pattern")
def gsr_response(stream_id: uuid, start_time: datetime, end_time: datetime, label_attachment: str, label_off: str, CC_obj: CerebralCortex, config: dict) -> str: """ This method analyzes Galvanic skin response to label a window as improper attachment or sensor-off-body :param stream_id: UUID :param start_time: :param end_time: :param label_attachment: :param label_off: :param CC_obj: :param config: :return: string """ datapoints = CC_obj.get_datastream(stream_id, start_time=start_time, end_time=end_time, data_type=DataSet.COMPLETE) vals = [] for dp in datapoints: vals.append(dp.sample) if stat.median(stat.array( vals)) < config["attachment_marker"]["improper_attachment"]: return label_attachment elif stat.median( stat.array(vals)) > config["attachment_marker"]["gsr_off_body"]: return label_off
def get_stream_days(raw_stream_id: uuid, dd_stream_id: uuid, CC: CerebralCortex) -> List: """ Returns a list of days that needs be diagnosed for a participant :param raw_stream_id: :param dd_stream_id: """ dd_stream_days = CC.get_stream_start_end_time(dd_stream_id)["end_time"] if not dd_stream_days: stream_days = CC.get_stream_start_end_time(raw_stream_id) days = stream_days["end_time"] - stream_days["start_time"] for day in range(days.days + 1): stream_days.append((stream_days["start_time"] + timedelta(days=day)).strftime('%Y%m%d')) else: stream_days = [(dd_stream_days + timedelta(days=1)).strftime('%Y%m%d')] return stream_days
def phone_screen_touch_marker(raw_stream_id: uuid, raw_stream_name: str, owner_id, dd_stream_name, CC: CerebralCortex, config: dict, start_time=None, end_time=None): """ This is not part of core data diagnostic suite. It only calculates how many screen touches are there. :param raw_stream_id: :param CC: :param config: """ try: # using stream_id, data-diagnostic-stream-id, and owner id to generate a unique stream ID for battery-marker screen_touch_stream_id = uuid.uuid3( uuid.NAMESPACE_DNS, str(raw_stream_id + dd_stream_name + owner_id + "mobile phone screen touch marker")) stream_days = get_stream_days(raw_stream_id, screen_touch_stream_id, CC) for day in stream_days: stream = CC.get_datastream(raw_stream_id, data_type=DataSet.COMPLETE, day=day, start_time=start_time, end_time=end_time) if len(stream.data) > 0: windowed_data = window(stream.data, config['general']['window_size'], True) results = process_windows(windowed_data) merged_windows = merge_consective_windows(results) if len(merged_windows) > 0: input_streams = [{ "owner_id": owner_id, "id": str(raw_stream_id), "name": raw_stream_name }] output_stream = { "id": screen_touch_stream_id, "name": dd_stream_name, "algo_type": config["algo_type"]["app_availability_marker"] } metadata = get_metadata(dd_stream_name, input_streams, config) store(merged_windows, input_streams, output_stream, metadata, CC, config) except Exception as e: print(e)
def migrate(folder_path: str, data_block_size): """ Migrate data from old CerebralCortex structure to new CerebralCortex structure :param folder_path: """ configuration_file = os.path.join(os.path.dirname(__file__), '../../cerebralcortex.yml') CC = CerebralCortex(configuration_file, master="local[*]", name="Data Migrator API", time_zone="US/Central", load_spark=True) if not folder_path: raise ValueError("Path to the data directory cannot be empty.") for filename in glob.iglob(folder_path + '/**/*.json', recursive=True): print( str(datetime.datetime.now()) + " -- Started processing file " + filename) tmp = filename.split("/") tmp = tmp[len(tmp) - 1].split("+") owner_id = tmp[0] stream_id = str( uuid.uuid3(uuid.NAMESPACE_DNS, str(tmp[0] + " " + tmp[1]))) name = '' for i in tmp[3:]: name += i + " " name = name.strip().replace(".json", "") name = tmp[1] + " " + name pm_algo_name = tmp[2] data_filename = filename.replace(".json", ".csv.bz2") old_schema = read_file(filename) execution_context = get_execution_context(pm_algo_name, old_schema) data_descriptor = get_data_descriptor(old_schema) annotations = get_annotations() print( str(datetime.datetime.now()) + " -- Schema building is complete ") print( str(datetime.datetime.now()) + " -- Started unzipping file and adding records in Cassandra ") for data_block in bz2file_to_datapoints(data_filename, data_block_size): persist_data(execution_context, data_descriptor, annotations, stream_id, name, owner_id, data_block, CC) print( str(datetime.datetime.now()) + " -- Completed processing file " + filename)
def attachment_marker(stream_id: uuid, CC_obj: CerebralCortex, config: dict, start_time=None, end_time=None): """ Label sensor data as sensor-on-body, sensor-off-body, or improper-attachment. All the labeled data (st, et, label) with its metadata are then stored in a datastore :param stream_id: UUID :param CC_obj: CerebralCortex object :param config: Data diagnostics configurations """ stream = CC_obj.get_datastream(stream_id, data_type=DataSet.COMPLETE, start_time=start_time, end_time=end_time) results = OrderedDict() threshold_val = None stream_name = stream._name if stream_name == config["stream_names"]["autosense_ecg"]: threshold_val = config['attachment_marker']['ecg_on_body'] label_on = config['labels']['ecg_on_body'] label_off = config['labels']['ecg_off_body'] elif stream_name == config["stream_names"]["autosense_rip"]: threshold_val = config['attachment_marker']['rip_on_body'] label_on = config['labels']['rip_on_body'] label_off = config['labels']['rip_off_body'] else: raise ValueError("Incorrect sensor type.") windowed_data = window(stream.data, config['general']['window_size'], False) for key, data in windowed_data.items(): # remove outliers from a window data normal_values = outlier_detection(data) if stat.variance(normal_values) < threshold_val: results[key] = label_off else: results[key] = label_on merged_windows = merge_consective_windows(results) input_streams = [{"id": str(stream_id), "name": stream_name}] store(input_streams, merged_windows, CC_obj, config, config["algo_names"]["attachment_marker"])
def sensor_availability(raw_stream_id: uuid, stream_name: str, owner_id: uuid, dd_stream_name, phone_physical_activity, CC: CerebralCortex, config: dict): """ Mark missing data as wireless disconnection if a participate walks away from phone or sensor :param raw_stream_id: :param stream_name: :param owner_id: :param dd_stream_name: :param phone_physical_activity: :param CC: :param config: """ # using stream_id, data-diagnostic-stream-id, and owner id to generate a unique stream ID for battery-marker wireless_marker_stream_id = uuid.uuid3( uuid.NAMESPACE_DNS, str(raw_stream_id + dd_stream_name + owner_id)) stream_days = get_stream_days(raw_stream_id, wireless_marker_stream_id, CC) for day in stream_days: # load stream data to be diagnosed raw_stream = CC.get_datastream(raw_stream_id, day, data_type=DataSet.COMPLETE) if len(raw_stream.data) > 0: windowed_data = window(raw_stream.data, config['general']['window_size'], True) results = process_windows(windowed_data, day, CC, phone_physical_activity, config) merged_windows = merge_consective_windows(results) if len(merged_windows) > 0: input_streams = [{ "owner_id": owner_id, "id": str(raw_stream_id), "name": stream_name }] output_stream = { "id": wireless_marker_stream_id, "name": dd_stream_name, "algo_type": config["algo_type"]["sensor_unavailable_marker"] } metadata = get_metadata(dd_stream_name, input_streams, config) store(merged_windows, input_streams, output_stream, metadata, CC, config)
def packet_loss_marker(stream_id: uuid, CC_obj: CerebralCortex, config: dict, start_time=None, end_time=None): """ Label a window as packet-loss if received packets are less than the expected packets. All the labeled data (st, et, label) with its metadata are then stored in a datastore. :param stream_id: :param CC_obj: :param config: """ stream = CC_obj.get_datastream(stream_id, data_type=DataSet.COMPLETE, start_time=start_time, end_time=end_time) name = stream._name results = OrderedDict() if name == config["sensor_types"]["autosense_ecg"]: sampling_rate = config["sampling_rate"]["ecg"] threshold_val = config["packet_loss_marker"]["ecg_acceptable_packet_loss"] label = config["labels"]["ecg_packet_loss"] windowed_data = window(stream.data, config['general']['window_size'], False) elif name == config["sensor_types"]["autosense_rip"]: sampling_rate = config["sampling_rate"]["rip"] threshold_val = config["packet_loss_marker"]["rip_acceptable_packet_loss"] label = config["labels"]["rip_packet_loss"] windowed_data = window(stream.data, config['general']['window_size'], False) elif name == config["sensor_types"]["motionsense_accel"]: sampling_rate = config["sampling_rate"]["motionsense"] threshold_val = config["packet_loss_marker"]["motionsense_acceptable_packet_loss"] label = config["labels"]["motionsense_packet_loss"] motionsense_accel_magni = magnitude(stream) windowed_data = window(motionsense_accel_magni.data, config['general']['window_size'], False) else: raise ValueError("Incorrect sensor type.") for key, data in windowed_data.items(): available_packets = len(data) expected_packets = sampling_rate * config['general']['window_size'] if (available_packets / expected_packets) < threshold_val: results[key] = label merged_windows = merge_consective_windows(results) input_streams = [{"id": str(stream_id), "name": name}] store(input_streams, merged_windows, CC_obj, config, config["algo_names"]["packet_loss_marker"])
def battery_marker(stream_id: uuid, CC_obj: CerebralCortex, config: dict, start_time=None, end_time=None): """ This algorithm uses battery percentages to decide whether phone was powered-off or battery was low. All the labeled data (st, et, label) with its metadata are then stored in a datastore. :param stream_id: :param CC_obj: :param config: """ results = OrderedDict() # stream = CC_obj.get_datastream(stream_id, data_type="all") stream = CC_obj.get_datastream(stream_id, data_type=DataSet.COMPLETE, start_time=start_time, end_time=end_time) windowed_data = window(stream.data, config['general']['window_size'], True) name = stream._name for key, data in windowed_data.items(): dp = [] for k in data: dp.append(float(k.sample)) if name == config["sensor_types"]["phone_battery"]: results[key] = phone_battery(dp, config) elif name == config["sensor_types"]["motionsense_battery"]: results[key] = motionsense_battery(dp, config) elif name == config["sensor_types"]["autosense_battery"]: results[key] = autosense_battery(dp, config) else: raise ValueError("Incorrect sensor type.") merged_windows = merge_consective_windows(results) input_streams = [{"id": str(stream_id), "name": name}] store(input_streams, merged_windows, CC_obj, config, config["algo_names"]["battery_marker"])
def attachment_marker(raw_stream_id: uuid, stream_name: str, owner_id: uuid, dd_stream_name, CC: CerebralCortex, config: dict): """ Label sensor data as sensor-on-body, sensor-off-body, or improper-attachment. All the labeled data (st, et, label) with its metadata are then stored in a datastore """ # TODO: quality streams could be multiple so find the one computed with CC # using stream_id, data-diagnostic-stream-id, and owner id to generate a unique stream ID for battery-marker attachment_marker_stream_id = uuid.uuid3( uuid.NAMESPACE_DNS, str(raw_stream_id + dd_stream_name + owner_id)) stream_days = get_stream_days(raw_stream_id, attachment_marker_stream_id, CC) for day in stream_days: # load stream data to be diagnosed raw_stream = CC.get_datastream(raw_stream_id, day, data_type=DataSet.COMPLETE) if len(raw_stream.data) > 0: windowed_data = window(raw_stream.data, config['general']['window_size'], True) results = process_windows(windowed_data, config) merged_windows = merge_consective_windows(results) input_streams = [{ "owner_id": owner_id, "id": str(raw_stream_id), "name": stream_name }] output_stream = { "id": attachment_marker_stream_id, "name": dd_stream_name, "algo_type": config["algo_type"]["attachment_marker"] } metadata = get_metadata(dd_stream_name, input_streams, config) store(merged_windows, input_streams, output_stream, metadata, CC, config)
def diagnose_pipeline(participant_id: uuid, CC: CerebralCortex, config: dict): """ Contains pipeline execution of all the diagnosis algorithms :param participant_id: :param CC: :param config: """ # get all the streams belong to a participant streams = CC.get_participant_streams(participant_id) if streams and len(streams) > 0: # phone battery if config["stream_names"]["phone_battery"] in streams: battery_marker( streams[config["stream_names"]["phone_battery"]]["identifier"], streams[config["stream_names"]["phone_battery"]]["name"], participant_id, config["stream_names"]["phone_battery_marker"], CC, config) # mobile phone availability marker mobile_app_availability_marker( streams[config["stream_names"]["phone_battery"]]["identifier"], streams[config["stream_names"]["phone_battery"]]["name"], participant_id, config["stream_names"]["app_availability_marker"], CC, config) # autosense battery if config["stream_names"]["autosense_battery"] in streams: battery_marker( streams[config["stream_names"] ["autosense_battery"]]["identifier"], streams[config["stream_names"]["autosense_battery"]]["name"], participant_id, config["stream_names"]["autosense_battery_marker"], CC, config) # TODO: Motionsense battery values are not available. # TODO: Uncomment following code when the motionsense battery values are available # if config["stream_names"]["motionsense_hrv_battery_right"] in streams: # battery_marker(streams[config["stream_names"]["motionsense_hrv_battery_right"]]["identifier"], streams[config["stream_names"]["motionsense_hrv_battery_right"]]["name"], participant_id, config["stream_names"]["motionsense_hrv_battery_right_marker"], CC, config) # if config["stream_names"]["motionsense_hrv_battery_left"] in streams: # battery_marker(streams[config["stream_names"]["motionsense_hrv_battery_left"]]["identifier"], streams[config["stream_names"]["motionsense_hrv_battery_left"]]["name"], participant_id, config["stream_names"]["motionsense_hrv_battery_left_marker"], CC, config) ### Sensor unavailable - wireless disconnection if config["stream_names"]["phone_physical_activity"] in streams: phone_physical_activity = streams[config["stream_names"][ "phone_physical_activity"]]["identifier"] else: phone_physical_activity = None if config["stream_names"]["motionsense_hrv_accel_right"] in streams: if config["stream_names"]["motionsense_hrv_gyro_right"]: sensor_failure_marker( streams[config["stream_names"] ["motionsense_hrv_right_attachment_marker"]] ["identifier"], streams[config["stream_names"] ["motionsense_hrv_accel_right"]]["identifier"], streams[config["stream_names"] ["motionsense_hrv_gyro_right"]]["identifier"], "right", participant_id, config["stream_names"] ["motionsense_hrv_right_sensor_failure_marker"], CC, config) ms_wd( streams[config["stream_names"] ["motionsense_hrv_accel_right"]]["identifier"], streams[config["stream_names"]["motionsense_hrv_accel_right"]] ["name"], participant_id, config["stream_names"] ["motionsense_hrv_right_wireless_marker"], phone_physical_activity, CC, config) if config["stream_names"]["motionsense_hrv_accel_left"] in streams: if config["stream_names"]["motionsense_hrv_gyro_left"]: sensor_failure_marker( streams[config["stream_names"] ["motionsense_hrv_left_attachment_marker"]] ["identifier"], streams[config["stream_names"] ["motionsense_hrv_accel_left"]]["identifier"], streams[config["stream_names"] ["motionsense_hrv_gyro_left"]]["identifier"], "left", participant_id, config["stream_names"] ["motionsense_hrv_left_sensor_failure_marker"], CC, config) ms_wd( streams[config["stream_names"] ["motionsense_hrv_accel_left"]]["identifier"], streams[config["stream_names"]["motionsense_hrv_accel_left"]] ["name"], participant_id, config["stream_names"]["motionsense_hrv_left_wireless_marker"], phone_physical_activity, CC, config) ### Attachment marker if config["stream_names"][ "motionsense_hrv_led_quality_right"] in streams: ms_attachment_marker( streams[config["stream_names"] ["motionsense_hrv_led_quality_right"]]["identifier"], streams[config["stream_names"] ["motionsense_hrv_led_quality_right"]]["name"], participant_id, config["stream_names"] ["motionsense_hrv_right_attachment_marker"], CC, config) if config["stream_names"][ "motionsense_hrv_led_quality_left"] in streams: ms_attachment_marker( streams[config["stream_names"] ["motionsense_hrv_led_quality_left"]]["identifier"], streams[config["stream_names"] ["motionsense_hrv_led_quality_left"]]["name"], participant_id, config["stream_names"] ["motionsense_hrv_left_attachment_marker"], CC, config) ### Packet-loss marker if config["stream_names"]["motionsense_hrv_accel_right"] in streams: packet_loss_marker( streams[config["stream_names"] ["motionsense_hrv_accel_right"]]["identifier"], streams[config["stream_names"]["motionsense_hrv_accel_right"]] ["name"], participant_id, config["stream_names"] ["motionsense_hrv_accel_right_packetloss_marker"], CC, config) if config["stream_names"]["motionsense_hrv_accel_left"] in streams: packet_loss_marker( streams[config["stream_names"] ["motionsense_hrv_accel_left"]]["identifier"], streams[config["stream_names"]["motionsense_hrv_accel_left"]] ["name"], participant_id, config["stream_names"] ["motionsense_hrv_accel_left_packetloss_marker"], CC, config) if config["stream_names"]["motionsense_hrv_gyro_right"] in streams: packet_loss_marker( streams[config["stream_names"] ["motionsense_hrv_gyro_right"]]["identifier"], streams[config["stream_names"]["motionsense_hrv_gyro_right"]] ["name"], participant_id, config["stream_names"] ["motionsense_hrv_gyro_right_packetloss_marker"], CC, config) if config["stream_names"]["motionsense_hrv_gyro_left"] in streams: packet_loss_marker( streams[config["stream_names"]["motionsense_hrv_gyro_left"]] ["identifier"], streams[config["stream_names"] ["motionsense_hrv_gyro_left"]]["name"], participant_id, config["stream_names"] ["motionsense_hrv_gyro_left_packetloss_marker"], CC, config) if config["stream_names"]["phone_screen_touch"] in streams: phone_screen_touch_marker( streams[config["stream_names"] ["phone_screen_touch"]]["identifier"], streams[config["stream_names"]["phone_screen_touch"]]["name"], participant_id, config["stream_names"]["phone_screen_touch_marker"], CC, config)
import os import imp from cerebralcortex.kernel.datatypes.datastream import DataStream from datetime import datetime from cerebralcortex.kernel.utils.logging import cc_log from threading import Thread from importlib import import_module ################################### from cerebralcortex.CerebralCortex import CerebralCortex #Sandeep: Give path to .yml file of APIServer configuration_file = os.path.join(os.path.dirname(__file__), 'cerebralcortex_apiserver.yml') CC = CerebralCortex(configuration_file, time_zone="America/Los_Angeles", load_spark=False) ################################## Global variables # filelist = [] # cur_time = 1513236910 #hard coded, should use datetime.now() in the future # let user define start time ################################### from pyspark.streaming.kafka import KafkaDStream #from core.kafka_offset import storeOffsetRanges from cerebralcortex.kernel.utils.logging import cc_log def verify_fields(msg): if "metadata" in msg and "data" in msg:
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. import os, shutil from cerebralcortex.CerebralCortex import CerebralCortex from cerebralcortex.configuration import Configuration from datetime import timedelta from pytz import timezone #create and load CerebralCortex object and configs configuration_file = os.path.join(os.path.dirname(__file__), '../../cerebralcortex.yml') CC_driver = CerebralCortex(configuration_file, master="local[*]", name="Data Diagnostic App", load_spark=True) CC_worker = CerebralCortex(configuration_file, master="local[*]", name="Data Diagnostic App", load_spark=False) #output folder path output_folder = "/home/ali/Desktop/DUMP/data/tmp/" shutil.rmtree(output_folder) # load data diagnostic configs config = Configuration( filepath="../data_processor/data_diagnostic/data_diagnostic_config.yml" ).config
def wireless_disconnection(stream_id: uuid, stream_name: str, owner_id: uuid, CC_obj: CerebralCortex, config: dict): """ Analyze whether a sensor was unavailable due to a wireless disconnection or due to sensor powered off. This method automatically loads related accelerometer streams of an owner. All the labeled data (st, et, label) with its metadata are then stored in a datastore. Note: If an owner owns more than one accelerometer (for example, more than one motionsense accelerometer) then this might not work. :param stream_id: stream_id should be of "battery-powered-off" :param CC_obj: :param config: """ results = OrderedDict() stream_end_time = CC_obj.get_stream_start_end_time(stream_id)["end_time"] day = stream_end_time # load stream data to be diagnosed stream = CC_obj.get_datastream(stream_id, day, data_type=DataSet.COMPLETE) windowed_data = window(stream.data, config['general']['window_size'], True) owner_id = stream._owner stream_name = stream._name windowed_data = filter_battery_off_windows(stream_id, stream_name, windowed_data, owner_id, config, CC_obj) threshold = config['sensor_unavailable_marker']['autosense'] label = config['labels']['autosense_unavailable'] if windowed_data: # prepare input streams metadata x = all_stream_ids_names[config["stream_names"]["autosense_accel_x"]] y = all_stream_ids_names[config["stream_names"]["autosense_accel_y"]] z = all_stream_ids_names[config["stream_names"]["autosense_accel_z"]] input_streams = [{ "id": str(stream_id), "name": stream_name }, { "id": str(x), "name": config["stream_names"]["autosense_accel_x"] }, { "id": str(y), "name": config["stream_names"]["autosense_accel_y"] }, { "id": str(z), "name": config["stream_names"]["autosense_accel_z"] }] for dp in windowed_data: if not dp.data and dp.start_time != "" and dp.end_time != "": start_time = dp.start_time - timedelta( seconds=config['general']['window_size']) end_time = dp.start_time autosense_accel_x = CC_obj.get_datastream( x, start_time=start_time, end_time=end_time, data_type=DataSet.ONLY_DATA) autosense_accel_y = CC_obj.get_datastream( y, start_time=start_time, end_time=end_time, data_type=DataSet.ONLY_DATA) autosense_accel_z = CC_obj.get_datastream( z, start_time=start_time, end_time=end_time, data_type=DataSet.ONLY_DATA) magnitudeVals = magnitude_autosense_v1(autosense_accel_x, autosense_accel_y, autosense_accel_z) if np.var(magnitudeVals) > threshold: key = (dp.start_time, dp.end_time) results[key] = label merged_windows = merge_consective_windows(results) store(input_streams, merged_windows, CC_obj, config, config["algo_names"]["sensor_unavailable_marker"])
from cerebralcortex.model_development.model_development import cstress_model argparser = argparse.ArgumentParser( description="Cerebral Cortex Test Application") argparser.add_argument('--base_directory') args = argparser.parse_args() # To run this program, please specific a program argument for base_directory that is the path to the test data files. # e.g. --base_directory /Users/hnat/data/ basedir = args.base_directory configuration_file = os.path.join(os.path.dirname(__file__), 'cerebralcortex.yml') CC = CerebralCortex(configuration_file, master="local[*]", name="Memphis cStress Development App") def readfile(filename): data = [] with gzip.open(filename, 'rt') as f: count = 0 for l in f: dp = parser.data_processor(l) if isinstance(dp, DataPoint): data.append(dp) count += 1 if count > 200000: break return data
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. import os from cerebralcortex.CerebralCortex import CerebralCortex configuration_file = os.path.join(os.path.dirname(__file__), '../cerebralcortex_apiserver.yml') CC = CerebralCortex(configuration_file, time_zone="US/Central", load_spark=False) debug_mode = os.environ.get('FLASK_DEBUG') if debug_mode: CC.configuration['apiserver']['debug'] = debug_mode minio_host = os.environ.get('MINIO_HOST') if minio_host: CC.configuration['minio']['host'] = minio_host minio_access_key = os.environ.get('MINIO_ACCESS_KEY') if minio_access_key: CC.configuration['minio']['access_key'] = minio_access_key minio_secret_key = os.environ.get('MINIO_SECRET_KEY') if minio_secret_key: CC.configuration['minio']['secret_key'] = minio_secret_key
def store(data: OrderedDict, input_streams: dict, output_streams: dict, CC_obj: CerebralCortex): """ Store diagnostic results with its metadata in the data-store :param input_streams: :param data: :param CC_obj: :param config: :param algo_type: """ if data: #basic output stream info owner = input_streams[0]["owner_id"] dd_stream_id = output_streams["id"] dd_stream_name = output_streams["name"] stream_type = "ds" data_descriptor = [{ "NAME": "Data Quality (LED)", "DATA_TYPE": "int", "FREQUENCY": "0.33", "MAX_VALUE": "4", "MIN_VALUE": "0", "DESCRIPTION": "measures the Data Quality of LED. Values= GOOD(0), BAND_OFF(1), NOT_WORN(2), BAND_LOOSE(3), NOISE(4)" }] execution_context = { "platform_metadata": { "NAME": "MotionSense HRV", "DEVICE_ID": "" }, "processing_module": { "name": "", "environment": "cerebralcortex", "algorithm": [{ "method": "", "authors": ["Nasir Ali", " Md Azim Ullah"], "version": "0.0.1", "reference": { "url": "http://md2k.org/" }, "description": "" }], "description": "", "input_streams": input_streams, "output_streams": output_streams, "input_parameters": {} }, "datasource_metadata": { "NAME": "Data Quality (LED)", "DATA_TYPE": "org.md2k.datakitapi.datatype.DataTypeInt", "FREQUENCY": "0.33", "DESCRIPTION": "measures the Data Quality of LED. Values= GOOD(0), BAND_OFF(1), NOT_WORN(2), BAND_LOOSE(3), NOISE(4)" }, "application_metadata": { "NAME": "MotionSense", "DESCRIPTION": "Collects data from the motion sense. Sensors supported: [Accelerometer, Gyroscope, Battery, LED, DataQuality]", "VERSION_NAME": "0.0.1", "VERSION_NUMBER": "2000500" } } annotations = [] ds = DataStream(identifier=dd_stream_id, owner=owner, name=dd_stream_name, data_descriptor=data_descriptor, execution_context=execution_context, annotations=annotations, stream_type=stream_type, data=data) CC_obj.save_datastream(ds, "datastream")
def wireless_disconnection(stream_id: uuid, CC_obj: CerebralCortex, config: dict, start_time=None, end_time=None): """ Analyze whether a sensor was unavailable due to a wireless disconnection or due to sensor powered off. This method automatically loads related accelerometer streams of an owner. All the labeled data (st, et, label) with its metadata are then stored in a datastore. Note: If an owner owns more than one accelerometer (for example, more than one motionsense accelerometer) then this might not work. :param stream_id: stream_id should be of "battery-powered-off" :param CC_obj: :param config: """ results = OrderedDict() threshold = 0 stream_info = CC_obj.get_datastream(stream_id, data_type=DataSet.ONLY_METADATA, start_time=start_time, end_time=end_time) owner_id = stream_info._owner name = stream_info._name stream_name = stream_info._name if name == config["sensor_types"]["autosense_ecg"]: threshold = config['sensor_unavailable_marker']['ecg'] label = config['labels']['autosense_unavailable'] if name == config["sensor_types"]["autosense_rip"]: threshold = config['sensor_unavailable_marker']['rip'] label = config['labels']['autosense_unavailable'] elif name == config["sensor_types"]["motionsense_accel"]: threshold = config['sensor_unavailable_marker']['motionsense'] label = config['labels']['motionsense_unavailable'] battery_off_data = CC_obj.get_datastream(stream_id, data_type=DataSet.ONLY_DATA, start_time=start_time, end_time=end_time) if battery_off_data: if name == config["sensor_types"]["motionsense_accel"]: motionsense_accel_stream_id = CC_obj.get_stream_id_by_owner_id( owner_id, config["sensor_types"]["motionsense_accel"], "id") input_streams = [{ "id": str(stream_id), "name": str(stream_name) }, { "id": str(motionsense_accel_stream_id), "name": config["sensor_types"]["motionsense_accel"] }] else: x = CC_obj.get_stream_id_by_owner_id( owner_id, config["sensor_types"]["autosense_accel_x"]) y = CC_obj.get_stream_id_by_owner_id( owner_id, config["sensor_types"]["autosense_accel_y"]) z = CC_obj.get_stream_id_by_owner_id( owner_id, config["sensor_types"]["autosense_accel_z"]) input_streams = [{ "id": str(stream_id), "name": stream_name }, { "id": str(x), "name": config["sensor_types"]["autosense_accel_x"] }, { "id": str(y), "name": config["sensor_types"]["autosense_accel_y"] }, { "id": str(z), "name": config["sensor_types"]["autosense_accel_z"] }] for dp in battery_off_data: if dp.start_time != "" and dp.end_time != "": # get a window prior to a battery powered off start_time = dp.start_time - timedelta( seconds=config['general']['window_size']) end_time = dp.start_time if name == config["sensor_types"]["motionsense_accel"]: motionsense_accel_xyz = CC_obj.get_datastream( motionsense_accel_stream_id, start_time=start_time, end_time=end_time, data_type=DataSet.COMPLETE) magnitudeValStream = magnitude(motionsense_accel_xyz) magnitudeVals = [] for mv in magnitudeValStream.data: magnitudeVals.append(mv.sample) else: autosense_acc_x = CC_obj.get_datastream( x, start_time=start_time, end_time=end_time, data_type=DataSet.ONLY_DATA) autosense_acc_y = CC_obj.get_datastream( y, start_time=start_time, end_time=end_time, data_type=DataSet.ONLY_DATA) autosense_acc_z = CC_obj.get_datastream( z, start_time=start_time, end_time=end_time, data_type=DataSet.ONLY_DATA) magnitudeVals = autosense_calculate_magnitude( autosense_acc_x, autosense_acc_y, autosense_acc_z) if np.var(magnitudeVals) > threshold: key = (dp.start_time, dp.end_time) results[key] = label merged_windows = merge_consective_windows(results) store(input_streams, merged_windows, CC_obj, config, config["algo_names"]["sensor_unavailable_marker"])
class TestDataStoreEngine(unittest.TestCase): testConfigFile = os.path.join(os.path.dirname(__file__), 'res/test_configuration.yml') CC = CerebralCortex(testConfigFile, master="local[*]", name="Cerebral Cortex DataStoreEngine Tests", time_zone="US/Central", load_spark=True) configuration = CC.configuration meta_obj = Metadata(CC) def test_01_setup_data(self): data_descriptor = {} execution_context = json.loads( '{"execution_context": {"algorithm": {"method": "cerebralcortex.data_processor.data_diagnostic.BatteryDataMarker"}}}' ) annotations = {} stream_type = "datastream" start_time = datetime.datetime(2017, 4, 24, 0, 0, 1) end_time = datetime.datetime(2017, 4, 24, 0, 0, 2) result = Metadata(self.CC).is_id_created( "06634264-56bc-4c92-abd7-377dbbad79dd", "data-store-test", execution_context) if result["status"] == "new": stream_identifier = "6db98dfb-d6e8-4b27-8d55-95b20fa0f754" else: stream_identifier = result["id"] self.assertEqual(stream_identifier, "6db98dfb-d6e8-4b27-8d55-95b20fa0f754") Metadata(self.CC).store_stream_info( stream_identifier, "06634264-56bc-4c92-abd7-377dbbad79dd", "data-store-test", data_descriptor, execution_context, annotations, stream_type, start_time, end_time, result["status"]) def test_02_get_stream_info(self): stream_info = Metadata( self.CC).get_stream_info("6db98dfb-d6e8-4b27-8d55-95b20fa0f754") self.assertEqual(stream_info[0]["identifier"], "6db98dfb-d6e8-4b27-8d55-95b20fa0f754") self.assertEqual(stream_info[0]["owner"], "06634264-56bc-4c92-abd7-377dbbad79dd") self.assertEqual(stream_info[0]["name"], "data-store-test") self.assertEqual(stream_info[0]["data_descriptor"], "{}") self.assertEqual( stream_info[0]["execution_context"], '{"execution_context": {"algorithm": {"method": "cerebralcortex.data_processor.data_diagnostic.BatteryDataMarker"}}}' ) self.assertEqual(stream_info[0]["annotations"], "{}") self.assertEqual(stream_info[0]["type"], "datastream") def test_03_append_annotations(self): self.assertRaises(Exception, Metadata(self.CC).append_annotations, "6db98dfb-d6e8-4b27-8d55-95b20fa0f754", "06634264-56bc-4c92-abd7-377dbbad79dd", "data-store-test", {}, {}, {}, "datastream1") self.assertRaises(Exception, Metadata(self.CC).append_annotations, "6db98dfb-d6e8-4b27-8d55-95b20fa0f754", "06634264-56bc-4c92-abd7-377dbbad79dd", "data-store-test", {}, {"some": "none"}, {}, "datastream1") self.assertRaises(Exception, Metadata(self.CC).append_annotations, "6db98dfb-d6e8-4b27-8d55-95b20fa0f754", "06634264-56bc-4c92-abd7-377dbbad79dd", "data-store-test", {"a": "b"}, {}, {}, "datastream1") self.assertRaises(Exception, Metadata(self.CC).append_annotations, "6db98dfb-d6e8-4b27-8d55-95b20fa0f754", "06634264-56bc-4c92-abd7-377dbbad79dd", "data-diagnostic_diff", {}, {}, {}, "datastream1") annotations_unchanged = Metadata(self.CC).append_annotations( "6db98dfb-d6e8-4b27-8d55-95b20fa0f754", "06634264-56bc-4c92-abd7-377dbbad79dd", "data-store-test", {}, json.loads( '{"execution_context": {"algorithm": {"method": "cerebralcortex.data_processor.data_diagnostic.BatteryDataMarker"}}}' ), {}, "datastream") self.assertEqual(annotations_unchanged, "unchanged") def test_04_get_stream_ids_by_name(self): start_time = datetime.datetime(2017, 4, 24, 0, 0, 1) end_time = datetime.datetime(2017, 4, 24, 0, 0, 2) by_name = Metadata(self.CC).get_stream_ids_by_name("data-store-test") self.assertIsInstance(by_name, list) self.assertEqual(by_name[0], "6db98dfb-d6e8-4b27-8d55-95b20fa0f754") by_name_id = Metadata(self.CC).get_stream_ids_by_name( "data-store-test", "06634264-56bc-4c92-abd7-377dbbad79dd") self.assertIsInstance(by_name_id, list) self.assertEqual(by_name_id[0], "6db98dfb-d6e8-4b27-8d55-95b20fa0f754") by_name_id_start_time = Metadata(self.CC).get_stream_ids_by_name( "data-store-test", "06634264-56bc-4c92-abd7-377dbbad79dd", start_time) self.assertIsInstance(by_name_id_start_time, list) self.assertEqual(by_name_id_start_time[0], "6db98dfb-d6e8-4b27-8d55-95b20fa0f754") by_name_id_start_time_end_time = Metadata( self.CC).get_stream_ids_by_name( "data-store-test", "06634264-56bc-4c92-abd7-377dbbad79dd", start_time, end_time) self.assertIsInstance(by_name_id_start_time_end_time, list) self.assertEqual(by_name_id_start_time_end_time[0], "6db98dfb-d6e8-4b27-8d55-95b20fa0f754") def test_05_get_stream_ids_of_owner(self): start_time = datetime.datetime(2017, 4, 24, 0, 0, 1) end_time = datetime.datetime(2017, 4, 24, 0, 0, 2) by_id = Metadata(self.CC).get_stream_ids_of_owner( "06634264-56bc-4c92-abd7-377dbbad79dd") self.assertIsInstance(by_id, list) self.assertEqual(by_id[0], "6db98dfb-d6e8-4b27-8d55-95b20fa0f754") by_name_id = Metadata(self.CC).get_stream_ids_of_owner( "06634264-56bc-4c92-abd7-377dbbad79dd", "data-store-test") self.assertIsInstance(by_name_id, list) self.assertEqual(by_name_id[0], "6db98dfb-d6e8-4b27-8d55-95b20fa0f754") by_name_id_start_time = Metadata(self.CC).get_stream_ids_of_owner( "06634264-56bc-4c92-abd7-377dbbad79dd", "data-store-test", start_time) self.assertIsInstance(by_name_id_start_time, list) self.assertEqual(by_name_id_start_time[0], "6db98dfb-d6e8-4b27-8d55-95b20fa0f754") by_name_id_start_time_end_time = Metadata( self.CC).get_stream_ids_of_owner( "06634264-56bc-4c92-abd7-377dbbad79dd", "data-store-test", start_time, end_time) self.assertIsInstance(by_name_id_start_time_end_time, list) self.assertEqual(by_name_id_start_time_end_time[0], "6db98dfb-d6e8-4b27-8d55-95b20fa0f754") def test_06_store_stream(self): identifier = "6db98dfb-d6e8-4b27-8d55-95b20fa0f754" owner = "06634264-56bc-4c92-abd7-377dbbad79dd" name = "data-store-test" data_descriptor = {} execution_context = json.loads( '{"execution_context": {"algorithm": {"method": "cerebralcortex.data_processor.data_diagnostic.BatteryDataMarker"}}}' ) annotations = {} datapoints = [] stream_type = "datastream" start_time = datetime.datetime(2017, 4, 24, 0, 0, 1) end_time = datetime.datetime(2017, 4, 24, 0, 0, 2) localtz = timezone('US/Central') start_time = localtz.localize(start_time) end_time = localtz.localize(end_time) sample = {'Foo3': 123} dp1 = DataPoint(start_time=start_time, end_time=end_time, sample=sample) datapoints.append(dp1) ds = DataStream(identifier, owner, name, data_descriptor, execution_context, annotations, stream_type, start_time, end_time, datapoints) self.CC.save_datastream(ds) stream = self.CC.get_datastream(identifier, data_type=DataSet.COMPLETE) self.assertEqual(stream._identifier, identifier) self.assertEqual(stream._owner, owner) self.assertEqual(stream._name, name) self.assertEqual(stream._data_descriptor, data_descriptor) self.assertEqual(stream._execution_context, execution_context) self.assertEqual(stream._annotations, annotations) self.assertEqual(stream._datastream_type, stream_type) self.assertEqual(stream.data[0].start_time, start_time) self.assertEqual(stream.data[0].end_time, end_time) self.assertEqual(stream.data[0].sample, sample) def test_07_stream_filter(self): identifier_anno = "6db98dfb-d6e8-4b27-8d55-95b20fa0f750" identifier_data = "6db98dfb-d6e8-4b27-8d55-95b20fa0f751" owner_id = "06634264-56bc-4c92-abd7-377dbbad79dd" name_anno = "data-store-test-annotation" name_data = "data-store-test-data" data_descriptor = {} execution_context_anno = json.loads( '{"execution_context": {"algorithm": {"method": "test.data_store.annotation.filter"}}}' ) execution_context_data = json.loads( '{"execution_context": {"algorithm": {"method": "test.data_store.data.filter"}}}' ) annotations_data = json.loads( '[{"name": "test-case","identifier": "6db98dfb-d6e8-4b27-8d55-95b20fa0f750"}]' ) annotations_anno = {} datapoints_anno = [] datapoints_data = [] result_data = Metadata(self.CC).is_id_created(owner_id, name_data, execution_context_data) if result_data["status"] != "new": identifier_data = result_data["id"] Metadata(self.CC).store_stream_info( identifier_anno, owner_id, name_anno, data_descriptor, execution_context_anno, annotations_anno, "annotations", datetime.datetime(2017, 4, 24, 0, 0, 1), datetime.datetime(2017, 4, 24, 0, 0, 5), result_data["status"]) result_anno = Metadata(self.CC).is_id_created(owner_id, name_data, execution_context_data) if result_anno["status"] != "new": identifier_anno = result_anno["id"] Metadata(self.CC).store_stream_info( identifier_data, owner_id, name_data, data_descriptor, execution_context_data, annotations_data, "datastream", datetime.datetime(2017, 4, 24, 0, 0, 1), datetime.datetime(2017, 4, 24, 0, 0, 5), result_anno["status"]) for i in range(0, 5): if (i % 2 == 0): sample_anno = 'good' else: sample_anno = 'bad' sample_data = i, i + 2, i + 3 start_time_anno = datetime.datetime(2017, 4, 24, 0, 0, i) end_time_anno = datetime.datetime(2017, 4, 24, 0, 0, (5 + i)) start_time_data = datetime.datetime(2017, 4, 24, 0, 0, i) end_time_data = datetime.datetime(2017, 4, 24, 0, 0, (3 + i)) localtz = timezone('US/Central') start_time_anno = localtz.localize(start_time_anno) end_time_anno = localtz.localize(end_time_anno) start_time_data = localtz.localize(start_time_data) end_time_data = localtz.localize(end_time_data) datapoints_anno.append( DataPoint(start_time=start_time_anno, end_time=end_time_anno, sample=sample_anno)) datapoints_data.append( DataPoint(start_time=start_time_data, end_time=end_time_data, sample=sample_data)) ds_anno = DataStream(uuid.UUID(identifier_anno), owner_id, name_anno, data_descriptor, execution_context_anno, annotations_data, "annotations", start_time_anno, end_time_anno, datapoints_anno) ds_data = DataStream(uuid.UUID(identifier_data), owner_id, name_data, data_descriptor, execution_context_data, annotations_anno, "datastream", start_time_anno, end_time_anno, datapoints_data) self.CC.save_datastream(ds_anno) self.CC.save_datastream(ds_data) filted_stream = self.CC.filter_stream(identifier_data, "test-case", "good") self.assertEqual(len(filted_stream), 5) for i in range(0, 5): sample_data = [i, i + 2, i + 3] start_time_data = datetime.datetime(2017, 4, 24, 0, 0, i) end_time_data = datetime.datetime(2017, 4, 24, 0, 0, (3 + i)) start_time_data = localtz.localize(start_time_data) end_time_data = localtz.localize(end_time_data) self.assertEqual(filted_stream[i].start_time, start_time_data) self.assertEqual(filted_stream[i].end_time, end_time_data) self.assertEqual(filted_stream[i].sample, sample_data)
def packet_loss_marker(raw_stream_id: uuid, stream_name: str, owner_id: uuid, dd_stream_name, CC: CerebralCortex, config: dict): """ Label a window as packet-loss if received packets are less than the expected packets. All the labeled data (st, et, label) with its metadata are then stored in a datastore. :param raw_stream_id: :param CC_obj: :param config: """ # using stream_id, data-diagnostic-stream-id, and owner id to generate a unique stream ID for battery-marker packetloss_marker_stream_id = uuid.uuid3( uuid.NAMESPACE_DNS, str(raw_stream_id + dd_stream_name + owner_id)) stream_days = get_stream_days(raw_stream_id, packetloss_marker_stream_id, CC) if stream_name == config["stream_names"]["autosense_ecg"]: sampling_rate = config["sampling_rate"]["ecg"] threshold_val = config["packet_loss_marker"][ "ecg_acceptable_packet_loss"] label = config["labels"]["ecg_packet_loss"] elif stream_name == config["stream_names"]["autosense_rip"]: sampling_rate = config["sampling_rate"]["rip"] threshold_val = config["packet_loss_marker"][ "rip_acceptable_packet_loss"] label = config["labels"]["rip_packet_loss"] elif stream_name == config["stream_names"][ "motionsense_hrv_accel_right"] or stream_name == config[ "stream_names"]["motionsense_hrv_accel_left"]: sampling_rate = config["sampling_rate"]["motionsense_accel"] threshold_val = config["packet_loss_marker"][ "motionsense_accel_acceptable_packet_loss"] label = config["labels"]["motionsense_gyro_packet_loss"] elif stream_name == config["stream_names"][ "motionsense_hrv_gyro_right"] or stream_name == config[ "stream_names"]["motionsense_hrv_gyro_left"]: sampling_rate = config["sampling_rate"]["motionsense_gyro"] threshold_val = config["packet_loss_marker"][ "motionsense_gyro_acceptable_packet_loss"] label = config["labels"]["motionsense_gyro_packet_loss"] for day in stream_days: # load stream data to be diagnosed stream = CC.get_datastream(raw_stream_id, day, data_type=DataSet.COMPLETE) if len(stream.data) > 0: windowed_data = window(stream.data, config['general']['window_size'], True) results = process_windows(windowed_data, sampling_rate, threshold_val, label, config) merged_windows = merge_consective_windows(results) if len(merged_windows) > 0: input_streams = [{ "owner_id": owner_id, "id": str(raw_stream_id), "name": stream_name }] output_stream = { "id": packetloss_marker_stream_id, "name": dd_stream_name, "algo_type": config["algo_type"]["packet_loss_marker"] } metadata = get_metadata(dd_stream_name, input_streams, config) store(merged_windows, input_streams, output_stream, metadata, CC, config)