def store(data: OrderedDict, input_streams: dict, output_streams: dict, metadata, CC_obj: CerebralCortex, config: dict):
    """
    Store diagnostic results with its metadata in the data-store
    :param input_streams:
    :param data:
    :param CC_obj:
    :param config:
    :param algo_type:
    """
    if data:
        # basic output stream info
        owner = input_streams[0]["owner_id"]
        dd_stream_id = output_streams["id"]
        dd_stream_name = output_streams["name"]
        stream_type = "ds"

        data_descriptor = metadata["dd"]
        execution_context = metadata["ec"]
        annotations = metadata["anno"]

        ds = DataStream(identifier=dd_stream_id, owner=owner, name=dd_stream_name, data_descriptor=data_descriptor,
                        execution_context=execution_context, annotations=annotations,
                        stream_type=stream_type, data=data)

        CC_obj.save_datastream(ds, "datastream")
Exemple #2
0
def filter_battery_off_windows(stream_id: uuid, stream_name: str,
                               main_stream_windows: dict, owner_id: uuid,
                               config: dict, CC_obj: CerebralCortex) -> dict:
    """

    :param stream_id:
    :param stream_name:
    :param main_stream_windows:
    :param owner_id:
    :param config:
    :param CC_obj:
    :return:
    """

    start_time = ""
    end_time = ""
    # load phone battery data
    phone_battery_marker_stream_id = uuid.uuid3(
        uuid.NAMESPACE_DNS,
        str(stream_id + config["stream_name"]["phone_battery"] + owner_id))
    phone_battery_marker_stream = CC_obj.get_datastream(
        phone_battery_marker_stream_id,
        data_type=DataSet.ONLY_DATA,
        start_time=start_time,
        end_time=end_time)

    # load sensor battery data
    if stream_name == config["stream_names"][
            "autosense_ecg"] or stream_name == config["stream_names"][
                "autosense_rip"]:
        sensor_battery_marker_stream = CC_obj.get_datastream(
            phone_battery_marker_stream_id,
            data_type=DataSet.ONLY_DATA,
            start_time=start_time,
            end_time=end_time)
    elif stream_name == config["stream_names"]["motionsense_hrv_accel_right"]:
        sensor_battery_marker_stream = CC_obj.get_datastream(
            phone_battery_marker_stream_id,
            data_type=DataSet.ONLY_DATA,
            start_time=start_time,
            end_time=end_time)
    elif stream_name == config["stream_names"]["motionsense_hrv_accel_left"]:
        sensor_battery_marker_stream = CC_obj.get_datastream(
            phone_battery_marker_stream_id,
            data_type=DataSet.ONLY_DATA,
            start_time=start_time,
            end_time=end_time)
    battery_marker = 0
    results = None
    for key, data in main_stream_windows.items():
        for phone_key, phone_data in phone_battery_marker_stream.items():
            if phone_key.start_time <= key.start_time and phone_key.end_time >= key.end_time:
                battery_marker = 1
        for sensor_key, sensor_data in sensor_battery_marker_stream.items():
            if sensor_key.start_time <= key.start_time and sensor_key.end_time >= key.end_time:
                battery_marker = 1

        if battery_marker != 1:
            results[key] = data
    return results
def store(input_streams: dict, data: OrderedDict, CC_obj: CerebralCortex,
          config: dict, algo_type: str):
    """
    Store diagnostic results with its metadata in the data-store
    :param input_streams:
    :param data:
    :param CC_obj:
    :param config:
    :param algo_type:
    """
    parent_stream_id = input_streams[0]["id"]
    stream_name = input_streams[0]["name"]

    result = process_data(stream_name, input_streams, algo_type, config)

    data_descriptor = json.loads(result["dd"])
    execution_context = json.loads(result["ec"])
    annotations = json.loads(result["anno"])

    metadata = CC_obj.get_datastream(parent_stream_id,
                                     data_type=DataSet.ONLY_METADATA)

    owner = metadata.owner
    name = metadata.name
    stream_type = "datastream"

    ds = DataStream(owner=owner,
                    name=name,
                    data_descriptor=data_descriptor,
                    execution_context=execution_context,
                    annotations=annotations,
                    stream_type=stream_type,
                    data=data)

    CC_obj.save_datastream(ds)
def sensor_failure_marker(attachment_marker_stream_id: uuid, mshrv_accel_id: uuid, mshrv_gyro_id: uuid, wrist: str,
                          owner_id: uuid, dd_stream_name, CC: CerebralCortex, config: dict):
    """
    Label a window as packet-loss if received packets are less than the expected packets.
    All the labeled data (st, et, label) with its metadata are then stored in a datastore.
    :param stream_id:
    :param CC_obj:
    :param config:
    """

    # using stream_id, data-diagnostic-stream-id, and owner id to generate a unique stream ID for battery-marker
    sensor_failure_stream_id = uuid.uuid3(uuid.NAMESPACE_DNS, str(
        attachment_marker_stream_id + dd_stream_name + owner_id + "SENSOR FAILURE MARKER"))

    stream_days = get_stream_days(attachment_marker_stream_id, sensor_failure_stream_id, CC)

    try:
        for day in stream_days:
            # load stream data to be diagnosed
            attachment_marker_stream = CC.get_datastream(attachment_marker_stream_id, day, data_type=DataSet.COMPLETE)
            results = OrderedDict()
            if attachment_marker_stream.data:
                for marker_window in attachment_marker_stream.data:
                    if "MOTIONSENSE-ON-BODY" in marker_window.sample:
                        mshrv_accel_stream = CC.get_datastream(mshrv_accel_id, day, data_type=DataSet.ONLY_DATA,
                                                               start_time=marker_window.start_time,
                                                               end_time=marker_window.end_time)
                        mshrv_gyro_stream = CC.get_datastream(mshrv_gyro_id, day, data_type=DataSet.ONLY_DATA,
                                                              start_time=marker_window.start_time,
                                                              end_time=marker_window.end_time)

                    results_accel = process_windows(mshrv_accel_stream, config)
                    results_gyro = process_windows(mshrv_gyro_stream, config)

                    key = marker_window.start_time, marker_window.end_time

                    # if sensor failure period is more than 12 hours then mark it as a sensor failure
                    if results_accel > 0 and results_gyro < 1:
                        sample = "MOTIONSENE-HRV-" + str(wrist) + "ACCELEROMETER-FAILURE"
                        results[key].append(DataPoint(marker_window.start_time, marker_window.end_time, sample))
                    elif results_accel < 1 and results_gyro > 0:
                        sample = "MOTIONSENE-HRV-" + str(wrist) + "GYRO-FAILURE"
                        results[key].append(DataPoint(marker_window.start_time, marker_window.end_time, sample))

                    merged_windows = merge_consective_windows(results)

                if len(results) > 0:
                    input_streams = [{"owner_id": owner_id, "id": str(attachment_marker_stream_id),
                                      "name": attachment_marker_stream.name}]
                    output_stream = {"id": sensor_failure_stream_id, "name": dd_stream_name,
                                     "algo_type": config["algo_type"]["sensor_failure"]}
                    metadata = get_metadata(dd_stream_name, input_streams, config)
                    store(merged_windows, input_streams, output_stream, metadata, CC, config)
    except Exception as e:
        print(e)
def battery_marker(raw_stream_id: uuid, stream_name: str, owner_id, dd_stream_name, CC: CerebralCortex, config: dict,
                   start_time=None, end_time=None):
    """
    This algorithm uses battery percentages to decide whether device was powered-off or battery was low.
    All the labeled data (st, et, label) with its metadata are then stored in a datastore.
    :param raw_stream_id:
    :param CC:
    :param config:
    """

    try:
        # using stream_id, data-diagnostic-stream-id, and owner id to generate a unique stream ID for battery-marker
        battery_marker_stream_id = uuid.uuid3(uuid.NAMESPACE_DNS, str(raw_stream_id + dd_stream_name + owner_id))

        stream_days = get_stream_days(raw_stream_id, battery_marker_stream_id, CC)

        for day in stream_days:
            stream = CC.get_datastream(raw_stream_id, data_type=DataSet.COMPLETE, day=day)

            if len(stream.data) > 0:
                windowed_data = window(stream.data, config['general']['window_size'], True)
                results = process_windows(windowed_data, stream_name, config)

                merged_windows = merge_consective_windows(results)
                if len(merged_windows) > 0:
                    input_streams = [{"owner_id": owner_id, "id": str(raw_stream_id), "name": stream_name}]
                    output_stream = {"id": battery_marker_stream_id, "name": dd_stream_name,
                                     "algo_type": config["algo_type"]["battery_marker"]}
                    labelled_windows = mark_windows(battery_marker_stream_id, merged_windows, CC, config)
                    metadata = get_metadata(dd_stream_name, input_streams, config)
                    store(labelled_windows, input_streams, output_stream, metadata, CC, config)
    except Exception as e:
        print(e)
    def setUpClass(cls):
        configuration_file = os.path.join(os.path.dirname(__file__),
                                          '../../../cerebralcortex.yml')
        cls.CC = CerebralCortex(configuration_file,
                                master="local[*]",
                                name="Data Diagnostic App",
                                time_zone="US/Central")
        cls.config = Configuration(
            filepath="../data_diagnostic/data_diagnostic_config.yml").config

        cls.sample_battery_data = []
        for row in range(1, 481):
            if row < 61:
                battery = 87.0
            elif row > 60 and row < 120:
                battery = 0.0
            elif row > 120 and row < 240:
                battery = 87.0
            elif row > 240 and row < 300:
                battery = 7.0
            elif row > 300 and row < 360:
                battery = 0.0
            elif row > 360:
                battery = 60.0

            tz = pytz.timezone("US/Central")
            start_time = tz.localize(
                datetime.fromtimestamp(
                    int(round((time.time() + row) * 1000)) / 1e3))

            dp = DataPoint(start_time=start_time, sample=battery)
            cls.sample_battery_data.append(dp)
        cls.window_size = 60
Exemple #7
0
def mobile_app_availability_marker(raw_stream_id: uuid, stream_name: str, owner_id, dd_stream_name, CC: CerebralCortex,
                                   config: dict, start_time=None, end_time=None):
    """
    This algorithm uses phone battery percentages to decide whether mobile app was available or unavailable.
    Theoretically, phone battery data shall be collected 24/7.
    :param raw_stream_id:
    :param CC:
    :param config:
    """

    try:
        # using stream_id, data-diagnostic-stream-id, and owner id to generate a unique stream ID for battery-marker
        app_availability_marker_stream_id = uuid.uuid3(uuid.NAMESPACE_DNS, str(
            raw_stream_id + dd_stream_name + owner_id + "mobile app availability marker"))

        stream_days = get_stream_days(raw_stream_id, app_availability_marker_stream_id, CC)

        for day in stream_days:
            stream = CC.get_datastream(raw_stream_id, data_type=DataSet.COMPLETE, day=day)
            if len(stream.data) > 0:
                windowed_data = window(stream.data, config['general']['window_size'], True)
                results = process_windows(windowed_data, config)

                merged_windows = merge_consective_windows(results)
                if len(merged_windows) > 0:
                    input_streams = [{"owner_id": owner_id, "id": str(raw_stream_id), "name": stream_name}]
                    output_stream = {"id": app_availability_marker_stream_id, "name": dd_stream_name,
                                     "algo_type": config["algo_type"]["app_availability_marker"]}
                    metadata = get_metadata(dd_stream_name, input_streams, config)
                    store(merged_windows, input_streams, output_stream, metadata, CC, config)

    except Exception as e:
        print(e)
def run():
    parser = argparse.ArgumentParser(description='CerebralCortex Data Exporter.')
    parser.add_argument("-o", "--output_dir", help="Directory path where exported data will be stored", required=True)
    parser.add_argument("-idz", "--owner_ids", help="Comma separated users' UUIDs", required=False)
    parser.add_argument("-namez", "--owner_user_names", help="Comma separated user-names", required=False)
    parser.add_argument("-nr", "--owner_name_regex", help="User name pattern. For example, '-nr ali' will export all users' data that start with user-name 'ali'", required=False)
    args = vars(parser.parse_args())

    if args["owner_ids"] and (args["owner_user_names"] or args["owner_name_regex"]):
        raise ValueError("Expecting owner_ids: got owner_user_names and/or owner_name_regex too.")
    elif args["owner_user_names"] and (args["owner_ids"] or args["owner_name_regex"]):
        raise ValueError("Expecting owner_user_names: got owner_ids and/or owner_name_regex too.")
    elif args["owner_name_regex"] and (args["owner_ids"] or args["owner_user_names"]):
        raise ValueError("Expecting owner_name_regex: got owner_ids and owner_user_names too.")

    testConfigFile = os.path.join(os.path.dirname(__file__), '../../cerebralcortex.yml')
    CC_obj = CerebralCortex(testConfigFile, master="local[*]", name="Cerebral Cortex Data Importer and Exporter",
                        time_zone="US/Central", load_spark=True)
    CC_obj.sc.setLogLevel("warn")

    if args["owner_ids"]:
        DataExporter(CC_obj, args["output_dir"], owner_ids=args["owner_ids"].split(",")).start()
    elif args["owner_user_names"]:
        DataExporter(CC_obj, args["output_dir"], owner_user_names=args["owner_user_names"].split(",")).start()
    elif args["owner_name_regex"]:
        DataExporter(CC_obj, args["output_dir"], owner_name_regex=args["owner_name_regex"]).start()
    else:
        parser.print_help()
        print("Please provide at least one of these: comma separated owner-ids OR comma separated owner-names OR owner-name pattern")
Exemple #9
0
def gsr_response(stream_id: uuid, start_time: datetime, end_time: datetime,
                 label_attachment: str, label_off: str, CC_obj: CerebralCortex,
                 config: dict) -> str:
    """
    This method analyzes Galvanic skin response to label a window as improper attachment or sensor-off-body
    :param stream_id: UUID
    :param start_time:
    :param end_time:
    :param label_attachment:
    :param label_off:
    :param CC_obj:
    :param config:
    :return: string
    """
    datapoints = CC_obj.get_datastream(stream_id,
                                       start_time=start_time,
                                       end_time=end_time,
                                       data_type=DataSet.COMPLETE)

    vals = []
    for dp in datapoints:
        vals.append(dp.sample)

    if stat.median(stat.array(
            vals)) < config["attachment_marker"]["improper_attachment"]:
        return label_attachment
    elif stat.median(
            stat.array(vals)) > config["attachment_marker"]["gsr_off_body"]:
        return label_off
def get_stream_days(raw_stream_id: uuid, dd_stream_id: uuid,
                    CC: CerebralCortex) -> List:
    """
    Returns a list of days that needs be diagnosed for a participant
    :param raw_stream_id:
    :param dd_stream_id:
    """
    dd_stream_days = CC.get_stream_start_end_time(dd_stream_id)["end_time"]

    if not dd_stream_days:
        stream_days = CC.get_stream_start_end_time(raw_stream_id)
        days = stream_days["end_time"] - stream_days["start_time"]
        for day in range(days.days + 1):
            stream_days.append((stream_days["start_time"] +
                                timedelta(days=day)).strftime('%Y%m%d'))
    else:
        stream_days = [(dd_stream_days + timedelta(days=1)).strftime('%Y%m%d')]
    return stream_days
Exemple #11
0
def phone_screen_touch_marker(raw_stream_id: uuid,
                              raw_stream_name: str,
                              owner_id,
                              dd_stream_name,
                              CC: CerebralCortex,
                              config: dict,
                              start_time=None,
                              end_time=None):
    """
    This is not part of core data diagnostic suite.
    It only calculates how many screen touches are there.
    :param raw_stream_id:
    :param CC:
    :param config:
    """

    try:
        # using stream_id, data-diagnostic-stream-id, and owner id to generate a unique stream ID for battery-marker
        screen_touch_stream_id = uuid.uuid3(
            uuid.NAMESPACE_DNS,
            str(raw_stream_id + dd_stream_name + owner_id +
                "mobile phone screen touch marker"))

        stream_days = get_stream_days(raw_stream_id, screen_touch_stream_id,
                                      CC)

        for day in stream_days:
            stream = CC.get_datastream(raw_stream_id,
                                       data_type=DataSet.COMPLETE,
                                       day=day,
                                       start_time=start_time,
                                       end_time=end_time)
            if len(stream.data) > 0:
                windowed_data = window(stream.data,
                                       config['general']['window_size'], True)
                results = process_windows(windowed_data)

                merged_windows = merge_consective_windows(results)
                if len(merged_windows) > 0:
                    input_streams = [{
                        "owner_id": owner_id,
                        "id": str(raw_stream_id),
                        "name": raw_stream_name
                    }]
                    output_stream = {
                        "id": screen_touch_stream_id,
                        "name": dd_stream_name,
                        "algo_type":
                        config["algo_type"]["app_availability_marker"]
                    }
                    metadata = get_metadata(dd_stream_name, input_streams,
                                            config)
                    store(merged_windows, input_streams, output_stream,
                          metadata, CC, config)

    except Exception as e:
        print(e)
def migrate(folder_path: str, data_block_size):
    """
    Migrate data from old CerebralCortex structure to new CerebralCortex structure
    :param folder_path:
    """

    configuration_file = os.path.join(os.path.dirname(__file__),
                                      '../../cerebralcortex.yml')
    CC = CerebralCortex(configuration_file,
                        master="local[*]",
                        name="Data Migrator API",
                        time_zone="US/Central",
                        load_spark=True)

    if not folder_path:
        raise ValueError("Path to the data directory cannot be empty.")

    for filename in glob.iglob(folder_path + '/**/*.json', recursive=True):
        print(
            str(datetime.datetime.now()) + " -- Started processing file " +
            filename)

        tmp = filename.split("/")
        tmp = tmp[len(tmp) - 1].split("+")
        owner_id = tmp[0]
        stream_id = str(
            uuid.uuid3(uuid.NAMESPACE_DNS, str(tmp[0] + " " + tmp[1])))

        name = ''
        for i in tmp[3:]:
            name += i + " "

        name = name.strip().replace(".json", "")
        name = tmp[1] + " " + name

        pm_algo_name = tmp[2]

        data_filename = filename.replace(".json", ".csv.bz2")
        old_schema = read_file(filename)
        execution_context = get_execution_context(pm_algo_name, old_schema)
        data_descriptor = get_data_descriptor(old_schema)
        annotations = get_annotations()
        print(
            str(datetime.datetime.now()) + " -- Schema building is complete ")
        print(
            str(datetime.datetime.now()) +
            " -- Started unzipping file and adding records in Cassandra ")
        for data_block in bz2file_to_datapoints(data_filename,
                                                data_block_size):
            persist_data(execution_context, data_descriptor, annotations,
                         stream_id, name, owner_id, data_block, CC)
        print(
            str(datetime.datetime.now()) + " -- Completed processing file " +
            filename)
def attachment_marker(stream_id: uuid,
                      CC_obj: CerebralCortex,
                      config: dict,
                      start_time=None,
                      end_time=None):
    """
    Label sensor data as sensor-on-body, sensor-off-body, or improper-attachment.
    All the labeled data (st, et, label) with its metadata are then stored in a datastore
    :param stream_id: UUID
    :param CC_obj: CerebralCortex object
    :param config: Data diagnostics configurations
    """

    stream = CC_obj.get_datastream(stream_id,
                                   data_type=DataSet.COMPLETE,
                                   start_time=start_time,
                                   end_time=end_time)

    results = OrderedDict()
    threshold_val = None
    stream_name = stream._name

    if stream_name == config["stream_names"]["autosense_ecg"]:
        threshold_val = config['attachment_marker']['ecg_on_body']
        label_on = config['labels']['ecg_on_body']
        label_off = config['labels']['ecg_off_body']
    elif stream_name == config["stream_names"]["autosense_rip"]:
        threshold_val = config['attachment_marker']['rip_on_body']
        label_on = config['labels']['rip_on_body']
        label_off = config['labels']['rip_off_body']
    else:
        raise ValueError("Incorrect sensor type.")

    windowed_data = window(stream.data, config['general']['window_size'],
                           False)

    for key, data in windowed_data.items():
        # remove outliers from a window data
        normal_values = outlier_detection(data)

        if stat.variance(normal_values) < threshold_val:
            results[key] = label_off
        else:
            results[key] = label_on

    merged_windows = merge_consective_windows(results)
    input_streams = [{"id": str(stream_id), "name": stream_name}]
    store(input_streams, merged_windows, CC_obj, config,
          config["algo_names"]["attachment_marker"])
def sensor_availability(raw_stream_id: uuid, stream_name: str, owner_id: uuid,
                        dd_stream_name, phone_physical_activity,
                        CC: CerebralCortex, config: dict):
    """
    Mark missing data as wireless disconnection if a participate walks away from phone or sensor
    :param raw_stream_id:
    :param stream_name:
    :param owner_id:
    :param dd_stream_name:
    :param phone_physical_activity:
    :param CC:
    :param config:
    """

    # using stream_id, data-diagnostic-stream-id, and owner id to generate a unique stream ID for battery-marker
    wireless_marker_stream_id = uuid.uuid3(
        uuid.NAMESPACE_DNS, str(raw_stream_id + dd_stream_name + owner_id))

    stream_days = get_stream_days(raw_stream_id, wireless_marker_stream_id, CC)

    for day in stream_days:
        # load stream data to be diagnosed
        raw_stream = CC.get_datastream(raw_stream_id,
                                       day,
                                       data_type=DataSet.COMPLETE)
        if len(raw_stream.data) > 0:

            windowed_data = window(raw_stream.data,
                                   config['general']['window_size'], True)
            results = process_windows(windowed_data, day, CC,
                                      phone_physical_activity, config)
            merged_windows = merge_consective_windows(results)

            if len(merged_windows) > 0:
                input_streams = [{
                    "owner_id": owner_id,
                    "id": str(raw_stream_id),
                    "name": stream_name
                }]
                output_stream = {
                    "id": wireless_marker_stream_id,
                    "name": dd_stream_name,
                    "algo_type":
                    config["algo_type"]["sensor_unavailable_marker"]
                }
                metadata = get_metadata(dd_stream_name, input_streams, config)
                store(merged_windows, input_streams, output_stream, metadata,
                      CC, config)
def packet_loss_marker(stream_id: uuid, CC_obj: CerebralCortex, config: dict, start_time=None, end_time=None):
    """
    Label a window as packet-loss if received packets are less than the expected packets.
    All the labeled data (st, et, label) with its metadata are then stored in a datastore.
    :param stream_id:
    :param CC_obj:
    :param config:
    """
    stream = CC_obj.get_datastream(stream_id, data_type=DataSet.COMPLETE, start_time=start_time, end_time=end_time)
    name = stream._name
    results = OrderedDict()

    if name == config["sensor_types"]["autosense_ecg"]:
        sampling_rate = config["sampling_rate"]["ecg"]
        threshold_val = config["packet_loss_marker"]["ecg_acceptable_packet_loss"]
        label = config["labels"]["ecg_packet_loss"]
        windowed_data = window(stream.data, config['general']['window_size'], False)
    elif name == config["sensor_types"]["autosense_rip"]:
        sampling_rate = config["sampling_rate"]["rip"]
        threshold_val = config["packet_loss_marker"]["rip_acceptable_packet_loss"]
        label = config["labels"]["rip_packet_loss"]
        windowed_data = window(stream.data, config['general']['window_size'], False)
    elif name == config["sensor_types"]["motionsense_accel"]:
        sampling_rate = config["sampling_rate"]["motionsense"]
        threshold_val = config["packet_loss_marker"]["motionsense_acceptable_packet_loss"]
        label = config["labels"]["motionsense_packet_loss"]
        motionsense_accel_magni = magnitude(stream)
        windowed_data = window(motionsense_accel_magni.data, config['general']['window_size'], False)
    else:
        raise ValueError("Incorrect sensor type.")

    for key, data in windowed_data.items():

        available_packets = len(data)
        expected_packets = sampling_rate * config['general']['window_size']

        if (available_packets / expected_packets) < threshold_val:
            results[key] = label

    merged_windows = merge_consective_windows(results)
    input_streams = [{"id": str(stream_id), "name": name}]
    store(input_streams, merged_windows, CC_obj, config, config["algo_names"]["packet_loss_marker"])
def battery_marker(stream_id: uuid,
                   CC_obj: CerebralCortex,
                   config: dict,
                   start_time=None,
                   end_time=None):
    """
    This algorithm uses battery percentages to decide whether phone was powered-off or battery was low.
    All the labeled data (st, et, label) with its metadata are then stored in a datastore.
    :param stream_id:
    :param CC_obj:
    :param config:
    """
    results = OrderedDict()

    # stream = CC_obj.get_datastream(stream_id, data_type="all")

    stream = CC_obj.get_datastream(stream_id,
                                   data_type=DataSet.COMPLETE,
                                   start_time=start_time,
                                   end_time=end_time)
    windowed_data = window(stream.data, config['general']['window_size'], True)

    name = stream._name

    for key, data in windowed_data.items():
        dp = []
        for k in data:
            dp.append(float(k.sample))

        if name == config["sensor_types"]["phone_battery"]:
            results[key] = phone_battery(dp, config)
        elif name == config["sensor_types"]["motionsense_battery"]:
            results[key] = motionsense_battery(dp, config)
        elif name == config["sensor_types"]["autosense_battery"]:
            results[key] = autosense_battery(dp, config)
        else:
            raise ValueError("Incorrect sensor type.")

    merged_windows = merge_consective_windows(results)
    input_streams = [{"id": str(stream_id), "name": name}]
    store(input_streams, merged_windows, CC_obj, config,
          config["algo_names"]["battery_marker"])
Exemple #17
0
def attachment_marker(raw_stream_id: uuid, stream_name: str, owner_id: uuid,
                      dd_stream_name, CC: CerebralCortex, config: dict):
    """
    Label sensor data as sensor-on-body, sensor-off-body, or improper-attachment.
    All the labeled data (st, et, label) with its metadata are then stored in a datastore

    """
    # TODO: quality streams could be multiple so find the one computed with CC
    # using stream_id, data-diagnostic-stream-id, and owner id to generate a unique stream ID for battery-marker
    attachment_marker_stream_id = uuid.uuid3(
        uuid.NAMESPACE_DNS, str(raw_stream_id + dd_stream_name + owner_id))

    stream_days = get_stream_days(raw_stream_id, attachment_marker_stream_id,
                                  CC)

    for day in stream_days:
        # load stream data to be diagnosed
        raw_stream = CC.get_datastream(raw_stream_id,
                                       day,
                                       data_type=DataSet.COMPLETE)

        if len(raw_stream.data) > 0:
            windowed_data = window(raw_stream.data,
                                   config['general']['window_size'], True)
            results = process_windows(windowed_data, config)
            merged_windows = merge_consective_windows(results)

            input_streams = [{
                "owner_id": owner_id,
                "id": str(raw_stream_id),
                "name": stream_name
            }]
            output_stream = {
                "id": attachment_marker_stream_id,
                "name": dd_stream_name,
                "algo_type": config["algo_type"]["attachment_marker"]
            }
            metadata = get_metadata(dd_stream_name, input_streams, config)
            store(merged_windows, input_streams, output_stream, metadata, CC,
                  config)
def diagnose_pipeline(participant_id: uuid, CC: CerebralCortex, config: dict):
    """
    Contains pipeline execution of all the diagnosis algorithms
    :param participant_id:
    :param CC:
    :param config:
    """

    # get all the streams belong to a participant
    streams = CC.get_participant_streams(participant_id)
    if streams and len(streams) > 0:

        # phone battery
        if config["stream_names"]["phone_battery"] in streams:
            battery_marker(
                streams[config["stream_names"]["phone_battery"]]["identifier"],
                streams[config["stream_names"]["phone_battery"]]["name"],
                participant_id, config["stream_names"]["phone_battery_marker"],
                CC, config)

            # mobile phone availability marker
            mobile_app_availability_marker(
                streams[config["stream_names"]["phone_battery"]]["identifier"],
                streams[config["stream_names"]["phone_battery"]]["name"],
                participant_id,
                config["stream_names"]["app_availability_marker"], CC, config)

        # autosense battery
        if config["stream_names"]["autosense_battery"] in streams:
            battery_marker(
                streams[config["stream_names"]
                        ["autosense_battery"]]["identifier"],
                streams[config["stream_names"]["autosense_battery"]]["name"],
                participant_id,
                config["stream_names"]["autosense_battery_marker"], CC, config)

        # TODO: Motionsense battery values are not available.
        # TODO: Uncomment following code when the motionsense battery values are available
        # if config["stream_names"]["motionsense_hrv_battery_right"] in streams:
        #     battery_marker(streams[config["stream_names"]["motionsense_hrv_battery_right"]]["identifier"], streams[config["stream_names"]["motionsense_hrv_battery_right"]]["name"], participant_id,  config["stream_names"]["motionsense_hrv_battery_right_marker"], CC, config)
        # if config["stream_names"]["motionsense_hrv_battery_left"] in streams:
        #     battery_marker(streams[config["stream_names"]["motionsense_hrv_battery_left"]]["identifier"], streams[config["stream_names"]["motionsense_hrv_battery_left"]]["name"], participant_id,  config["stream_names"]["motionsense_hrv_battery_left_marker"], CC, config)

        ### Sensor unavailable - wireless disconnection
        if config["stream_names"]["phone_physical_activity"] in streams:
            phone_physical_activity = streams[config["stream_names"][
                "phone_physical_activity"]]["identifier"]
        else:
            phone_physical_activity = None

        if config["stream_names"]["motionsense_hrv_accel_right"] in streams:
            if config["stream_names"]["motionsense_hrv_gyro_right"]:
                sensor_failure_marker(
                    streams[config["stream_names"]
                            ["motionsense_hrv_right_attachment_marker"]]
                    ["identifier"],
                    streams[config["stream_names"]
                            ["motionsense_hrv_accel_right"]]["identifier"],
                    streams[config["stream_names"]
                            ["motionsense_hrv_gyro_right"]]["identifier"],
                    "right", participant_id, config["stream_names"]
                    ["motionsense_hrv_right_sensor_failure_marker"], CC,
                    config)

            ms_wd(
                streams[config["stream_names"]
                        ["motionsense_hrv_accel_right"]]["identifier"],
                streams[config["stream_names"]["motionsense_hrv_accel_right"]]
                ["name"], participant_id, config["stream_names"]
                ["motionsense_hrv_right_wireless_marker"],
                phone_physical_activity, CC, config)

        if config["stream_names"]["motionsense_hrv_accel_left"] in streams:
            if config["stream_names"]["motionsense_hrv_gyro_left"]:
                sensor_failure_marker(
                    streams[config["stream_names"]
                            ["motionsense_hrv_left_attachment_marker"]]
                    ["identifier"],
                    streams[config["stream_names"]
                            ["motionsense_hrv_accel_left"]]["identifier"],
                    streams[config["stream_names"]
                            ["motionsense_hrv_gyro_left"]]["identifier"],
                    "left", participant_id, config["stream_names"]
                    ["motionsense_hrv_left_sensor_failure_marker"], CC, config)

            ms_wd(
                streams[config["stream_names"]
                        ["motionsense_hrv_accel_left"]]["identifier"],
                streams[config["stream_names"]["motionsense_hrv_accel_left"]]
                ["name"], participant_id,
                config["stream_names"]["motionsense_hrv_left_wireless_marker"],
                phone_physical_activity, CC, config)

        ### Attachment marker
        if config["stream_names"][
                "motionsense_hrv_led_quality_right"] in streams:
            ms_attachment_marker(
                streams[config["stream_names"]
                        ["motionsense_hrv_led_quality_right"]]["identifier"],
                streams[config["stream_names"]
                        ["motionsense_hrv_led_quality_right"]]["name"],
                participant_id, config["stream_names"]
                ["motionsense_hrv_right_attachment_marker"], CC, config)
        if config["stream_names"][
                "motionsense_hrv_led_quality_left"] in streams:
            ms_attachment_marker(
                streams[config["stream_names"]
                        ["motionsense_hrv_led_quality_left"]]["identifier"],
                streams[config["stream_names"]
                        ["motionsense_hrv_led_quality_left"]]["name"],
                participant_id, config["stream_names"]
                ["motionsense_hrv_left_attachment_marker"], CC, config)

        ### Packet-loss marker
        if config["stream_names"]["motionsense_hrv_accel_right"] in streams:
            packet_loss_marker(
                streams[config["stream_names"]
                        ["motionsense_hrv_accel_right"]]["identifier"],
                streams[config["stream_names"]["motionsense_hrv_accel_right"]]
                ["name"], participant_id, config["stream_names"]
                ["motionsense_hrv_accel_right_packetloss_marker"], CC, config)
        if config["stream_names"]["motionsense_hrv_accel_left"] in streams:
            packet_loss_marker(
                streams[config["stream_names"]
                        ["motionsense_hrv_accel_left"]]["identifier"],
                streams[config["stream_names"]["motionsense_hrv_accel_left"]]
                ["name"], participant_id, config["stream_names"]
                ["motionsense_hrv_accel_left_packetloss_marker"], CC, config)
        if config["stream_names"]["motionsense_hrv_gyro_right"] in streams:
            packet_loss_marker(
                streams[config["stream_names"]
                        ["motionsense_hrv_gyro_right"]]["identifier"],
                streams[config["stream_names"]["motionsense_hrv_gyro_right"]]
                ["name"], participant_id, config["stream_names"]
                ["motionsense_hrv_gyro_right_packetloss_marker"], CC, config)

        if config["stream_names"]["motionsense_hrv_gyro_left"] in streams:
            packet_loss_marker(
                streams[config["stream_names"]["motionsense_hrv_gyro_left"]]
                ["identifier"], streams[config["stream_names"]
                                        ["motionsense_hrv_gyro_left"]]["name"],
                participant_id, config["stream_names"]
                ["motionsense_hrv_gyro_left_packetloss_marker"], CC, config)

        if config["stream_names"]["phone_screen_touch"] in streams:
            phone_screen_touch_marker(
                streams[config["stream_names"]
                        ["phone_screen_touch"]]["identifier"],
                streams[config["stream_names"]["phone_screen_touch"]]["name"],
                participant_id,
                config["stream_names"]["phone_screen_touch_marker"], CC,
                config)
import os
import imp
from cerebralcortex.kernel.datatypes.datastream import DataStream
from datetime import datetime
from cerebralcortex.kernel.utils.logging import cc_log
from threading import Thread
from importlib import import_module

###################################
from cerebralcortex.CerebralCortex import CerebralCortex

#Sandeep: Give path to .yml file of APIServer
configuration_file = os.path.join(os.path.dirname(__file__),
                                  'cerebralcortex_apiserver.yml')
CC = CerebralCortex(configuration_file,
                    time_zone="America/Los_Angeles",
                    load_spark=False)

################################## Global variables
# filelist = []
# cur_time = 1513236910 #hard coded, should use datetime.now() in the future
# let user define start time

###################################
from pyspark.streaming.kafka import KafkaDStream
#from core.kafka_offset import storeOffsetRanges
from cerebralcortex.kernel.utils.logging import cc_log


def verify_fields(msg):
    if "metadata" in msg and "data" in msg:
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import os, shutil
from cerebralcortex.CerebralCortex import CerebralCortex
from cerebralcortex.configuration import Configuration
from datetime import timedelta
from pytz import timezone

#create and load CerebralCortex object and configs
configuration_file = os.path.join(os.path.dirname(__file__),
                                  '../../cerebralcortex.yml')
CC_driver = CerebralCortex(configuration_file,
                           master="local[*]",
                           name="Data Diagnostic App",
                           load_spark=True)
CC_worker = CerebralCortex(configuration_file,
                           master="local[*]",
                           name="Data Diagnostic App",
                           load_spark=False)

#output folder path
output_folder = "/home/ali/Desktop/DUMP/data/tmp/"

shutil.rmtree(output_folder)

# load data diagnostic configs
config = Configuration(
    filepath="../data_processor/data_diagnostic/data_diagnostic_config.yml"
).config
def wireless_disconnection(stream_id: uuid, stream_name: str, owner_id: uuid,
                           CC_obj: CerebralCortex, config: dict):
    """
    Analyze whether a sensor was unavailable due to a wireless disconnection
    or due to sensor powered off. This method automatically loads related
    accelerometer streams of an owner. All the labeled data (st, et, label)
    with its metadata are then stored in a datastore.
    Note: If an owner owns more than one accelerometer (for example, more
    than one motionsense accelerometer) then this might not work.
    :param stream_id: stream_id should be of "battery-powered-off"
    :param CC_obj:
    :param config:
    """

    results = OrderedDict()

    stream_end_time = CC_obj.get_stream_start_end_time(stream_id)["end_time"]
    day = stream_end_time

    # load stream data to be diagnosed
    stream = CC_obj.get_datastream(stream_id, day, data_type=DataSet.COMPLETE)
    windowed_data = window(stream.data, config['general']['window_size'], True)

    owner_id = stream._owner
    stream_name = stream._name

    windowed_data = filter_battery_off_windows(stream_id, stream_name,
                                               windowed_data, owner_id, config,
                                               CC_obj)

    threshold = config['sensor_unavailable_marker']['autosense']
    label = config['labels']['autosense_unavailable']

    if windowed_data:
        # prepare input streams metadata
        x = all_stream_ids_names[config["stream_names"]["autosense_accel_x"]]
        y = all_stream_ids_names[config["stream_names"]["autosense_accel_y"]]
        z = all_stream_ids_names[config["stream_names"]["autosense_accel_z"]]

        input_streams = [{
            "id": str(stream_id),
            "name": stream_name
        }, {
            "id": str(x),
            "name": config["stream_names"]["autosense_accel_x"]
        }, {
            "id": str(y),
            "name": config["stream_names"]["autosense_accel_y"]
        }, {
            "id": str(z),
            "name": config["stream_names"]["autosense_accel_z"]
        }]

        for dp in windowed_data:
            if not dp.data and dp.start_time != "" and dp.end_time != "":
                start_time = dp.start_time - timedelta(
                    seconds=config['general']['window_size'])
                end_time = dp.start_time

                autosense_accel_x = CC_obj.get_datastream(
                    x,
                    start_time=start_time,
                    end_time=end_time,
                    data_type=DataSet.ONLY_DATA)
                autosense_accel_y = CC_obj.get_datastream(
                    y,
                    start_time=start_time,
                    end_time=end_time,
                    data_type=DataSet.ONLY_DATA)
                autosense_accel_z = CC_obj.get_datastream(
                    z,
                    start_time=start_time,
                    end_time=end_time,
                    data_type=DataSet.ONLY_DATA)

                magnitudeVals = magnitude_autosense_v1(autosense_accel_x,
                                                       autosense_accel_y,
                                                       autosense_accel_z)

                if np.var(magnitudeVals) > threshold:
                    key = (dp.start_time, dp.end_time)
                    results[key] = label

        merged_windows = merge_consective_windows(results)
        store(input_streams, merged_windows, CC_obj, config,
              config["algo_names"]["sensor_unavailable_marker"])
from cerebralcortex.model_development.model_development import cstress_model

argparser = argparse.ArgumentParser(
    description="Cerebral Cortex Test Application")
argparser.add_argument('--base_directory')
args = argparser.parse_args()

# To run this program, please specific a program argument for base_directory that is the path to the test data files.
# e.g. --base_directory /Users/hnat/data/
basedir = args.base_directory

configuration_file = os.path.join(os.path.dirname(__file__),
                                  'cerebralcortex.yml')

CC = CerebralCortex(configuration_file,
                    master="local[*]",
                    name="Memphis cStress Development App")


def readfile(filename):
    data = []
    with gzip.open(filename, 'rt') as f:
        count = 0
        for l in f:
            dp = parser.data_processor(l)
            if isinstance(dp, DataPoint):
                data.append(dp)
                count += 1
            if count > 200000:
                break
    return data
Exemple #23
0
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import os

from cerebralcortex.CerebralCortex import CerebralCortex

configuration_file = os.path.join(os.path.dirname(__file__),
                                  '../cerebralcortex_apiserver.yml')

CC = CerebralCortex(configuration_file,
                    time_zone="US/Central",
                    load_spark=False)

debug_mode = os.environ.get('FLASK_DEBUG')
if debug_mode:
    CC.configuration['apiserver']['debug'] = debug_mode

minio_host = os.environ.get('MINIO_HOST')
if minio_host:
    CC.configuration['minio']['host'] = minio_host
minio_access_key = os.environ.get('MINIO_ACCESS_KEY')
if minio_access_key:
    CC.configuration['minio']['access_key'] = minio_access_key
minio_secret_key = os.environ.get('MINIO_SECRET_KEY')
if minio_secret_key:
    CC.configuration['minio']['secret_key'] = minio_secret_key
Exemple #24
0
def store(data: OrderedDict, input_streams: dict, output_streams: dict,
          CC_obj: CerebralCortex):
    """
    Store diagnostic results with its metadata in the data-store
    :param input_streams:
    :param data:
    :param CC_obj:
    :param config:
    :param algo_type:
    """
    if data:
        #basic output stream info
        owner = input_streams[0]["owner_id"]
        dd_stream_id = output_streams["id"]
        dd_stream_name = output_streams["name"]
        stream_type = "ds"

        data_descriptor = [{
            "NAME":
            "Data Quality (LED)",
            "DATA_TYPE":
            "int",
            "FREQUENCY":
            "0.33",
            "MAX_VALUE":
            "4",
            "MIN_VALUE":
            "0",
            "DESCRIPTION":
            "measures the Data Quality of LED. Values= GOOD(0), BAND_OFF(1), NOT_WORN(2), BAND_LOOSE(3), NOISE(4)"
        }]
        execution_context = {
            "platform_metadata": {
                "NAME": "MotionSense HRV",
                "DEVICE_ID": ""
            },
            "processing_module": {
                "name":
                "",
                "environment":
                "cerebralcortex",
                "algorithm": [{
                    "method": "",
                    "authors": ["Nasir Ali", " Md Azim Ullah"],
                    "version": "0.0.1",
                    "reference": {
                        "url": "http://md2k.org/"
                    },
                    "description": ""
                }],
                "description":
                "",
                "input_streams":
                input_streams,
                "output_streams":
                output_streams,
                "input_parameters": {}
            },
            "datasource_metadata": {
                "NAME":
                "Data Quality (LED)",
                "DATA_TYPE":
                "org.md2k.datakitapi.datatype.DataTypeInt",
                "FREQUENCY":
                "0.33",
                "DESCRIPTION":
                "measures the Data Quality of LED. Values= GOOD(0), BAND_OFF(1), NOT_WORN(2), BAND_LOOSE(3), NOISE(4)"
            },
            "application_metadata": {
                "NAME": "MotionSense",
                "DESCRIPTION":
                "Collects data from the motion sense. Sensors supported: [Accelerometer, Gyroscope, Battery, LED, DataQuality]",
                "VERSION_NAME": "0.0.1",
                "VERSION_NUMBER": "2000500"
            }
        }
        annotations = []

        ds = DataStream(identifier=dd_stream_id,
                        owner=owner,
                        name=dd_stream_name,
                        data_descriptor=data_descriptor,
                        execution_context=execution_context,
                        annotations=annotations,
                        stream_type=stream_type,
                        data=data)

        CC_obj.save_datastream(ds, "datastream")
def wireless_disconnection(stream_id: uuid,
                           CC_obj: CerebralCortex,
                           config: dict,
                           start_time=None,
                           end_time=None):
    """
    Analyze whether a sensor was unavailable due to a wireless disconnection
    or due to sensor powered off. This method automatically loads related
    accelerometer streams of an owner. All the labeled data (st, et, label)
    with its metadata are then stored in a datastore.
    Note: If an owner owns more than one accelerometer (for example, more
    than one motionsense accelerometer) then this might not work.
    :param stream_id: stream_id should be of "battery-powered-off"
    :param CC_obj:
    :param config:
    """

    results = OrderedDict()
    threshold = 0

    stream_info = CC_obj.get_datastream(stream_id,
                                        data_type=DataSet.ONLY_METADATA,
                                        start_time=start_time,
                                        end_time=end_time)

    owner_id = stream_info._owner
    name = stream_info._name

    stream_name = stream_info._name

    if name == config["sensor_types"]["autosense_ecg"]:
        threshold = config['sensor_unavailable_marker']['ecg']
        label = config['labels']['autosense_unavailable']
    if name == config["sensor_types"]["autosense_rip"]:
        threshold = config['sensor_unavailable_marker']['rip']
        label = config['labels']['autosense_unavailable']
    elif name == config["sensor_types"]["motionsense_accel"]:
        threshold = config['sensor_unavailable_marker']['motionsense']
        label = config['labels']['motionsense_unavailable']

    battery_off_data = CC_obj.get_datastream(stream_id,
                                             data_type=DataSet.ONLY_DATA,
                                             start_time=start_time,
                                             end_time=end_time)

    if battery_off_data:
        if name == config["sensor_types"]["motionsense_accel"]:
            motionsense_accel_stream_id = CC_obj.get_stream_id_by_owner_id(
                owner_id, config["sensor_types"]["motionsense_accel"], "id")
            input_streams = [{
                "id": str(stream_id),
                "name": str(stream_name)
            }, {
                "id":
                str(motionsense_accel_stream_id),
                "name":
                config["sensor_types"]["motionsense_accel"]
            }]
        else:
            x = CC_obj.get_stream_id_by_owner_id(
                owner_id, config["sensor_types"]["autosense_accel_x"])
            y = CC_obj.get_stream_id_by_owner_id(
                owner_id, config["sensor_types"]["autosense_accel_y"])
            z = CC_obj.get_stream_id_by_owner_id(
                owner_id, config["sensor_types"]["autosense_accel_z"])
            input_streams = [{
                "id": str(stream_id),
                "name": stream_name
            }, {
                "id":
                str(x),
                "name":
                config["sensor_types"]["autosense_accel_x"]
            }, {
                "id":
                str(y),
                "name":
                config["sensor_types"]["autosense_accel_y"]
            }, {
                "id":
                str(z),
                "name":
                config["sensor_types"]["autosense_accel_z"]
            }]

        for dp in battery_off_data:
            if dp.start_time != "" and dp.end_time != "":
                # get a window prior to a battery powered off
                start_time = dp.start_time - timedelta(
                    seconds=config['general']['window_size'])
                end_time = dp.start_time
                if name == config["sensor_types"]["motionsense_accel"]:
                    motionsense_accel_xyz = CC_obj.get_datastream(
                        motionsense_accel_stream_id,
                        start_time=start_time,
                        end_time=end_time,
                        data_type=DataSet.COMPLETE)
                    magnitudeValStream = magnitude(motionsense_accel_xyz)
                    magnitudeVals = []
                    for mv in magnitudeValStream.data:
                        magnitudeVals.append(mv.sample)

                else:
                    autosense_acc_x = CC_obj.get_datastream(
                        x,
                        start_time=start_time,
                        end_time=end_time,
                        data_type=DataSet.ONLY_DATA)
                    autosense_acc_y = CC_obj.get_datastream(
                        y,
                        start_time=start_time,
                        end_time=end_time,
                        data_type=DataSet.ONLY_DATA)
                    autosense_acc_z = CC_obj.get_datastream(
                        z,
                        start_time=start_time,
                        end_time=end_time,
                        data_type=DataSet.ONLY_DATA)

                    magnitudeVals = autosense_calculate_magnitude(
                        autosense_acc_x, autosense_acc_y, autosense_acc_z)

                if np.var(magnitudeVals) > threshold:
                    key = (dp.start_time, dp.end_time)
                    results[key] = label

        merged_windows = merge_consective_windows(results)
        store(input_streams, merged_windows, CC_obj, config,
              config["algo_names"]["sensor_unavailable_marker"])
class TestDataStoreEngine(unittest.TestCase):
    testConfigFile = os.path.join(os.path.dirname(__file__),
                                  'res/test_configuration.yml')
    CC = CerebralCortex(testConfigFile,
                        master="local[*]",
                        name="Cerebral Cortex DataStoreEngine Tests",
                        time_zone="US/Central",
                        load_spark=True)
    configuration = CC.configuration
    meta_obj = Metadata(CC)

    def test_01_setup_data(self):
        data_descriptor = {}
        execution_context = json.loads(
            '{"execution_context": {"algorithm": {"method": "cerebralcortex.data_processor.data_diagnostic.BatteryDataMarker"}}}'
        )
        annotations = {}
        stream_type = "datastream"
        start_time = datetime.datetime(2017, 4, 24, 0, 0, 1)
        end_time = datetime.datetime(2017, 4, 24, 0, 0, 2)

        result = Metadata(self.CC).is_id_created(
            "06634264-56bc-4c92-abd7-377dbbad79dd", "data-store-test",
            execution_context)

        if result["status"] == "new":
            stream_identifier = "6db98dfb-d6e8-4b27-8d55-95b20fa0f754"
        else:
            stream_identifier = result["id"]

        self.assertEqual(stream_identifier,
                         "6db98dfb-d6e8-4b27-8d55-95b20fa0f754")

        Metadata(self.CC).store_stream_info(
            stream_identifier, "06634264-56bc-4c92-abd7-377dbbad79dd",
            "data-store-test", data_descriptor, execution_context, annotations,
            stream_type, start_time, end_time, result["status"])

    def test_02_get_stream_info(self):

        stream_info = Metadata(
            self.CC).get_stream_info("6db98dfb-d6e8-4b27-8d55-95b20fa0f754")

        self.assertEqual(stream_info[0]["identifier"],
                         "6db98dfb-d6e8-4b27-8d55-95b20fa0f754")
        self.assertEqual(stream_info[0]["owner"],
                         "06634264-56bc-4c92-abd7-377dbbad79dd")
        self.assertEqual(stream_info[0]["name"], "data-store-test")
        self.assertEqual(stream_info[0]["data_descriptor"], "{}")
        self.assertEqual(
            stream_info[0]["execution_context"],
            '{"execution_context": {"algorithm": {"method": "cerebralcortex.data_processor.data_diagnostic.BatteryDataMarker"}}}'
        )
        self.assertEqual(stream_info[0]["annotations"], "{}")
        self.assertEqual(stream_info[0]["type"], "datastream")

    def test_03_append_annotations(self):
        self.assertRaises(Exception,
                          Metadata(self.CC).append_annotations,
                          "6db98dfb-d6e8-4b27-8d55-95b20fa0f754",
                          "06634264-56bc-4c92-abd7-377dbbad79dd",
                          "data-store-test", {}, {}, {}, "datastream1")

        self.assertRaises(Exception,
                          Metadata(self.CC).append_annotations,
                          "6db98dfb-d6e8-4b27-8d55-95b20fa0f754",
                          "06634264-56bc-4c92-abd7-377dbbad79dd",
                          "data-store-test", {}, {"some": "none"}, {},
                          "datastream1")

        self.assertRaises(Exception,
                          Metadata(self.CC).append_annotations,
                          "6db98dfb-d6e8-4b27-8d55-95b20fa0f754",
                          "06634264-56bc-4c92-abd7-377dbbad79dd",
                          "data-store-test", {"a": "b"}, {}, {}, "datastream1")

        self.assertRaises(Exception,
                          Metadata(self.CC).append_annotations,
                          "6db98dfb-d6e8-4b27-8d55-95b20fa0f754",
                          "06634264-56bc-4c92-abd7-377dbbad79dd",
                          "data-diagnostic_diff", {}, {}, {}, "datastream1")

        annotations_unchanged = Metadata(self.CC).append_annotations(
            "6db98dfb-d6e8-4b27-8d55-95b20fa0f754",
            "06634264-56bc-4c92-abd7-377dbbad79dd", "data-store-test", {},
            json.loads(
                '{"execution_context": {"algorithm": {"method": "cerebralcortex.data_processor.data_diagnostic.BatteryDataMarker"}}}'
            ), {}, "datastream")
        self.assertEqual(annotations_unchanged, "unchanged")

    def test_04_get_stream_ids_by_name(self):
        start_time = datetime.datetime(2017, 4, 24, 0, 0, 1)
        end_time = datetime.datetime(2017, 4, 24, 0, 0, 2)

        by_name = Metadata(self.CC).get_stream_ids_by_name("data-store-test")
        self.assertIsInstance(by_name, list)
        self.assertEqual(by_name[0], "6db98dfb-d6e8-4b27-8d55-95b20fa0f754")

        by_name_id = Metadata(self.CC).get_stream_ids_by_name(
            "data-store-test", "06634264-56bc-4c92-abd7-377dbbad79dd")
        self.assertIsInstance(by_name_id, list)
        self.assertEqual(by_name_id[0], "6db98dfb-d6e8-4b27-8d55-95b20fa0f754")

        by_name_id_start_time = Metadata(self.CC).get_stream_ids_by_name(
            "data-store-test", "06634264-56bc-4c92-abd7-377dbbad79dd",
            start_time)
        self.assertIsInstance(by_name_id_start_time, list)
        self.assertEqual(by_name_id_start_time[0],
                         "6db98dfb-d6e8-4b27-8d55-95b20fa0f754")

        by_name_id_start_time_end_time = Metadata(
            self.CC).get_stream_ids_by_name(
                "data-store-test", "06634264-56bc-4c92-abd7-377dbbad79dd",
                start_time, end_time)
        self.assertIsInstance(by_name_id_start_time_end_time, list)
        self.assertEqual(by_name_id_start_time_end_time[0],
                         "6db98dfb-d6e8-4b27-8d55-95b20fa0f754")

    def test_05_get_stream_ids_of_owner(self):
        start_time = datetime.datetime(2017, 4, 24, 0, 0, 1)
        end_time = datetime.datetime(2017, 4, 24, 0, 0, 2)

        by_id = Metadata(self.CC).get_stream_ids_of_owner(
            "06634264-56bc-4c92-abd7-377dbbad79dd")
        self.assertIsInstance(by_id, list)
        self.assertEqual(by_id[0], "6db98dfb-d6e8-4b27-8d55-95b20fa0f754")

        by_name_id = Metadata(self.CC).get_stream_ids_of_owner(
            "06634264-56bc-4c92-abd7-377dbbad79dd", "data-store-test")
        self.assertIsInstance(by_name_id, list)
        self.assertEqual(by_name_id[0], "6db98dfb-d6e8-4b27-8d55-95b20fa0f754")

        by_name_id_start_time = Metadata(self.CC).get_stream_ids_of_owner(
            "06634264-56bc-4c92-abd7-377dbbad79dd", "data-store-test",
            start_time)
        self.assertIsInstance(by_name_id_start_time, list)
        self.assertEqual(by_name_id_start_time[0],
                         "6db98dfb-d6e8-4b27-8d55-95b20fa0f754")

        by_name_id_start_time_end_time = Metadata(
            self.CC).get_stream_ids_of_owner(
                "06634264-56bc-4c92-abd7-377dbbad79dd", "data-store-test",
                start_time, end_time)
        self.assertIsInstance(by_name_id_start_time_end_time, list)
        self.assertEqual(by_name_id_start_time_end_time[0],
                         "6db98dfb-d6e8-4b27-8d55-95b20fa0f754")

    def test_06_store_stream(self):
        identifier = "6db98dfb-d6e8-4b27-8d55-95b20fa0f754"
        owner = "06634264-56bc-4c92-abd7-377dbbad79dd"
        name = "data-store-test"
        data_descriptor = {}
        execution_context = json.loads(
            '{"execution_context": {"algorithm": {"method": "cerebralcortex.data_processor.data_diagnostic.BatteryDataMarker"}}}'
        )
        annotations = {}
        datapoints = []
        stream_type = "datastream"
        start_time = datetime.datetime(2017, 4, 24, 0, 0, 1)
        end_time = datetime.datetime(2017, 4, 24, 0, 0, 2)
        localtz = timezone('US/Central')
        start_time = localtz.localize(start_time)
        end_time = localtz.localize(end_time)
        sample = {'Foo3': 123}

        dp1 = DataPoint(start_time=start_time,
                        end_time=end_time,
                        sample=sample)

        datapoints.append(dp1)

        ds = DataStream(identifier, owner, name, data_descriptor,
                        execution_context, annotations, stream_type,
                        start_time, end_time, datapoints)

        self.CC.save_datastream(ds)
        stream = self.CC.get_datastream(identifier, data_type=DataSet.COMPLETE)
        self.assertEqual(stream._identifier, identifier)
        self.assertEqual(stream._owner, owner)
        self.assertEqual(stream._name, name)
        self.assertEqual(stream._data_descriptor, data_descriptor)
        self.assertEqual(stream._execution_context, execution_context)
        self.assertEqual(stream._annotations, annotations)
        self.assertEqual(stream._datastream_type, stream_type)

        self.assertEqual(stream.data[0].start_time, start_time)
        self.assertEqual(stream.data[0].end_time, end_time)
        self.assertEqual(stream.data[0].sample, sample)

    def test_07_stream_filter(self):
        identifier_anno = "6db98dfb-d6e8-4b27-8d55-95b20fa0f750"
        identifier_data = "6db98dfb-d6e8-4b27-8d55-95b20fa0f751"
        owner_id = "06634264-56bc-4c92-abd7-377dbbad79dd"
        name_anno = "data-store-test-annotation"
        name_data = "data-store-test-data"
        data_descriptor = {}
        execution_context_anno = json.loads(
            '{"execution_context": {"algorithm": {"method": "test.data_store.annotation.filter"}}}'
        )
        execution_context_data = json.loads(
            '{"execution_context": {"algorithm": {"method": "test.data_store.data.filter"}}}'
        )
        annotations_data = json.loads(
            '[{"name": "test-case","identifier": "6db98dfb-d6e8-4b27-8d55-95b20fa0f750"}]'
        )
        annotations_anno = {}
        datapoints_anno = []
        datapoints_data = []

        result_data = Metadata(self.CC).is_id_created(owner_id, name_data,
                                                      execution_context_data)
        if result_data["status"] != "new":
            identifier_data = result_data["id"]

        Metadata(self.CC).store_stream_info(
            identifier_anno, owner_id, name_anno, data_descriptor,
            execution_context_anno, annotations_anno, "annotations",
            datetime.datetime(2017, 4, 24, 0, 0, 1),
            datetime.datetime(2017, 4, 24, 0, 0, 5), result_data["status"])

        result_anno = Metadata(self.CC).is_id_created(owner_id, name_data,
                                                      execution_context_data)
        if result_anno["status"] != "new":
            identifier_anno = result_anno["id"]

        Metadata(self.CC).store_stream_info(
            identifier_data, owner_id, name_data, data_descriptor,
            execution_context_data, annotations_data, "datastream",
            datetime.datetime(2017, 4, 24, 0, 0, 1),
            datetime.datetime(2017, 4, 24, 0, 0, 5), result_anno["status"])

        for i in range(0, 5):
            if (i % 2 == 0):
                sample_anno = 'good'
            else:
                sample_anno = 'bad'
            sample_data = i, i + 2, i + 3
            start_time_anno = datetime.datetime(2017, 4, 24, 0, 0, i)
            end_time_anno = datetime.datetime(2017, 4, 24, 0, 0, (5 + i))

            start_time_data = datetime.datetime(2017, 4, 24, 0, 0, i)
            end_time_data = datetime.datetime(2017, 4, 24, 0, 0, (3 + i))

            localtz = timezone('US/Central')
            start_time_anno = localtz.localize(start_time_anno)
            end_time_anno = localtz.localize(end_time_anno)
            start_time_data = localtz.localize(start_time_data)
            end_time_data = localtz.localize(end_time_data)

            datapoints_anno.append(
                DataPoint(start_time=start_time_anno,
                          end_time=end_time_anno,
                          sample=sample_anno))
            datapoints_data.append(
                DataPoint(start_time=start_time_data,
                          end_time=end_time_data,
                          sample=sample_data))

        ds_anno = DataStream(uuid.UUID(identifier_anno), owner_id, name_anno,
                             data_descriptor, execution_context_anno,
                             annotations_data, "annotations", start_time_anno,
                             end_time_anno, datapoints_anno)

        ds_data = DataStream(uuid.UUID(identifier_data), owner_id, name_data,
                             data_descriptor, execution_context_data,
                             annotations_anno, "datastream", start_time_anno,
                             end_time_anno, datapoints_data)

        self.CC.save_datastream(ds_anno)
        self.CC.save_datastream(ds_data)

        filted_stream = self.CC.filter_stream(identifier_data, "test-case",
                                              "good")

        self.assertEqual(len(filted_stream), 5)

        for i in range(0, 5):
            sample_data = [i, i + 2, i + 3]
            start_time_data = datetime.datetime(2017, 4, 24, 0, 0, i)
            end_time_data = datetime.datetime(2017, 4, 24, 0, 0, (3 + i))
            start_time_data = localtz.localize(start_time_data)
            end_time_data = localtz.localize(end_time_data)

            self.assertEqual(filted_stream[i].start_time, start_time_data)
            self.assertEqual(filted_stream[i].end_time, end_time_data)
            self.assertEqual(filted_stream[i].sample, sample_data)
def packet_loss_marker(raw_stream_id: uuid, stream_name: str, owner_id: uuid,
                       dd_stream_name, CC: CerebralCortex, config: dict):
    """
    Label a window as packet-loss if received packets are less than the expected packets.
    All the labeled data (st, et, label) with its metadata are then stored in a datastore.
    :param raw_stream_id:
    :param CC_obj:
    :param config:
    """

    # using stream_id, data-diagnostic-stream-id, and owner id to generate a unique stream ID for battery-marker
    packetloss_marker_stream_id = uuid.uuid3(
        uuid.NAMESPACE_DNS, str(raw_stream_id + dd_stream_name + owner_id))

    stream_days = get_stream_days(raw_stream_id, packetloss_marker_stream_id,
                                  CC)

    if stream_name == config["stream_names"]["autosense_ecg"]:
        sampling_rate = config["sampling_rate"]["ecg"]
        threshold_val = config["packet_loss_marker"][
            "ecg_acceptable_packet_loss"]
        label = config["labels"]["ecg_packet_loss"]
    elif stream_name == config["stream_names"]["autosense_rip"]:
        sampling_rate = config["sampling_rate"]["rip"]
        threshold_val = config["packet_loss_marker"][
            "rip_acceptable_packet_loss"]
        label = config["labels"]["rip_packet_loss"]
    elif stream_name == config["stream_names"][
            "motionsense_hrv_accel_right"] or stream_name == config[
                "stream_names"]["motionsense_hrv_accel_left"]:
        sampling_rate = config["sampling_rate"]["motionsense_accel"]
        threshold_val = config["packet_loss_marker"][
            "motionsense_accel_acceptable_packet_loss"]
        label = config["labels"]["motionsense_gyro_packet_loss"]
    elif stream_name == config["stream_names"][
            "motionsense_hrv_gyro_right"] or stream_name == config[
                "stream_names"]["motionsense_hrv_gyro_left"]:
        sampling_rate = config["sampling_rate"]["motionsense_gyro"]
        threshold_val = config["packet_loss_marker"][
            "motionsense_gyro_acceptable_packet_loss"]
        label = config["labels"]["motionsense_gyro_packet_loss"]

    for day in stream_days:
        # load stream data to be diagnosed
        stream = CC.get_datastream(raw_stream_id,
                                   day,
                                   data_type=DataSet.COMPLETE)

        if len(stream.data) > 0:

            windowed_data = window(stream.data,
                                   config['general']['window_size'], True)

            results = process_windows(windowed_data, sampling_rate,
                                      threshold_val, label, config)
            merged_windows = merge_consective_windows(results)
            if len(merged_windows) > 0:
                input_streams = [{
                    "owner_id": owner_id,
                    "id": str(raw_stream_id),
                    "name": stream_name
                }]
                output_stream = {
                    "id": packetloss_marker_stream_id,
                    "name": dd_stream_name,
                    "algo_type": config["algo_type"]["packet_loss_marker"]
                }
                metadata = get_metadata(dd_stream_name, input_streams, config)
                store(merged_windows, input_streams, output_stream, metadata,
                      CC, config)