def mobile_app_availability_marker(raw_stream_id: uuid, stream_name: str, owner_id, dd_stream_name, CC: CerebralCortex,
                                   config: dict):
    """
    This algorithm uses phone battery percentages to decide whether mobile app was available or unavailable.
    Theoretically, phone battery data shall be collected 24/7.
    :param raw_stream_id:
    :param CC:
    :param config:
    """

    try:
        # using stream_id, data-diagnostic-stream-id, and owner id to generate a unique stream ID for battery-marker
        app_availability_marker_stream_id = uuid.uuid3(uuid.NAMESPACE_DNS, str(
            raw_stream_id + dd_stream_name + owner_id + "MOBILE APP AVAILABILITY MARKER"))

        stream_days = get_stream_days(raw_stream_id, app_availability_marker_stream_id, CC)

        for day in stream_days:
            stream = CC.get_stream(raw_stream_id, day=day, data_type=DataSet.COMPLETE)
            if len(stream.data) > 0:
                windowed_data = window(stream.data, config['general']['window_size'], True)
                results = process_windows(windowed_data, config)

                merged_windows = merge_consective_windows(results)
                if len(merged_windows) > 0:
                    input_streams = [{"owner_id": owner_id, "id": str(raw_stream_id), "name": stream_name}]
                    output_stream = {"id": app_availability_marker_stream_id, "name": dd_stream_name,
                                     "algo_type": config["algo_type"]["app_availability_marker"]}
                    metadata = get_metadata(dd_stream_name, input_streams, config)
                    store(merged_windows, input_streams, output_stream, metadata, CC, config)

    except Exception as e:
        print(e)
Exemple #2
0
def compute_data_yield(stream_id: uuid, username: str, report_stream_name: str,
                       CC: CerebralCortex, config: dict):
    """
    This uses LED quality stream to calculate total good quality data for each data
    LED quality stream has data quality available for 3 second windows

    """
    data_dir = config["output"]["folder_path"] + "/" + config["reports"][
        "data_yield_per_day"] + "/"
    data_yield_report = data_dir + username + "_" + report_stream_name + ".csv"
    if not os.path.exists(data_dir):
        os.mkdir(data_dir)
        os.mknod(data_yield_report)

    stream_days = get_stream_days(stream_id, CC)

    with open(data_yield_report, "w") as report:
        report.write(report_stream_name + ",,,,\n")
        report.write("day, good hours, total_hours,, \n")
        for day in stream_days:
            # load stream data
            raw_stream = CC.get_stream(stream_id,
                                       day=day,
                                       data_type=DataSet.ONLY_DATA)

            if len(raw_stream) > 0:
                results = process_stream(raw_stream)
                results = str(day) + "," + results
                report.write(results)
def get_streams(stream_id: uuid, username: str, wrist: str, CC: CerebralCortex,
                config: dict):
    """
    This uses LED quality stream to calculate total good quality data for each data
    LED quality stream has data quality available for 3 second windows

    """
    data_dir = config["output"]["folder_path"] + "/" + config["reports"][
        "data_yield_per_day"] + "/"
    data_yield_report = data_dir + username + "_" + wrist + ".csv"
    if not os.path.exists(data_dir):
        os.mkdir(data_dir)
        os.mknod(data_yield_report)

    stream_days = get_stream_days(stream_id, CC)

    with open(data_yield_report, "w") as report:
        report.write(
            "day, good, noise, bad, band_off, missing, not_worn, band_loose \n"
        )
        for day in stream_days:
            # load stream data
            raw_stream = CC.get_stream(stream_id,
                                       day=day,
                                       data_type=DataSet.COMPLETE)

            if len(raw_stream.data) > 0:
                results = process_stream(raw_stream.data)
                results = str(day) + "," + results
                report.write(results)
Exemple #4
0
def audit_user_streams(user_id, all_days, cc_config):
    print('X' * 100, cc_config)
    CC = CerebralCortex(cc_config)
    all_user_streams = CC.get_user_streams(user_id)
    userbuf = ''
    for user_stream_key in all_user_streams:
        user_stream = all_user_streams[user_stream_key]

        if 'analysis' not in user_stream['name']:
            continue

        for day in all_days:
            data_points = 0
            for stream_id in user_stream['stream_ids']:
                ds = CC.get_stream(stream_id, user_id, day)
                data_points += len(ds.data)

            buf = '%s\t%s\t%s\t%d\n' % (user_id, user_stream['name'], str(day),
                                        data_points)
            userbuf += buf

    out_dir = '/tmp/data_audit'
    if not os.path.exists(out_dir):
        os.mkdir(out_dir)

    file_path = os.path.join(out_dir, user_id)
    f = open(file_path, 'w')
    f.write(userbuf)
    f.close()
Exemple #5
0
def mobile_app_availability_marker(all_streams, stream_name: str, owner_id,
                                   CC: CerebralCortex, config: dict):
    """
    This algorithm uses phone battery percentages to decide whether mobile app was available or unavailable.
    Theoretically, phone battery data shall be collected 24/7.
    :param raw_stream_id:
    :param CC:
    :param config:
    """
    marker_version = "0.0.1"

    if config["stream_names"]["phone_battery"] in all_streams:
        raw_stream_ids = all_streams[config["stream_names"]
                                     ["phone_battery"]]["stream_ids"]
        dd_stream_name = config["stream_names"]["app_availability_marker"]

        # using stream_id, data-diagnostic-stream-id, and owner id to generate a unique stream ID for battery-marker
        app_availability_marker_stream_id = generate_dd_stream_uuid(
            dd_stream_name, marker_version, owner_id,
            "MOBILE APP AVAILABILITY MARKER")
        input_streams = [{
            "owner_id": owner_id,
            "id": raw_stream_ids,
            "name": stream_name
        }]
        output_stream = {
            "id": app_availability_marker_stream_id,
            "name": dd_stream_name,
            "algo_type": config["algo_type"]["app_availability_marker"]
        }
        metadata = get_metadata(dd_stream_name, input_streams, config)

        if isinstance(raw_stream_ids, list):
            for raw_stream_id in raw_stream_ids:

                stream_days = CC.get_stream_days(
                    raw_stream_id, app_availability_marker_stream_id, CC)

                for day in stream_days:
                    try:
                        stream = CC.get_stream(raw_stream_id,
                                               day=day,
                                               data_type=DataSet.COMPLETE)
                        if len(stream.data) > 0:
                            windowed_data = window(
                                stream.data, config['general']['window_size'],
                                True)
                            results = process_windows(windowed_data, config)

                            merged_windows = merge_consective_windows(results)
                            if len(merged_windows) > 0:
                                store(merged_windows, input_streams,
                                      output_stream, metadata, CC, config)

                    except Exception as e:
                        CC.logging.log(
                            "Error processing: owner-id: %s, stream-id: %s, stream-name: %s, day: %s. Error: "
                            % (str(owner_id), str(raw_stream_id),
                               str(stream_name), str(day), str(e)))
def get_datastream(CC:CerebralCortex,
                   identifier:str,
                   day:str,
                   user_id:str,
                   localtime:bool)->List[DataPoint]:
    stream_ids = CC.get_stream_id(user_id,identifier)
    data = []
    for stream_id in stream_ids:
        temp_data = CC.get_stream(stream_id=stream_id['identifier'],user_id=user_id,day=day,localtime=localtime)
        if len(temp_data.data)>0:
            data.extend(temp_data.data)
    return data
def sensor_availability(all_streams, wrist: str, owner_id: uuid, CC: CerebralCortex, config: dict):
    """
    Mark missing data as wireless disconnection if a participate walks away from phone or sensor
    :param raw_stream_id:
    :param stream_name:
    :param owner_id:
    :param dd_stream_name:
    :param phone_physical_activity:
    :param CC:
    :param config:
    """
    marker_version = "0.0.1"

    if config["stream_names"]["phone_physical_activity"] in all_streams:
        phone_physical_activity = all_streams[config["stream_names"]["phone_physical_activity"]]["stream_ids"]
    else:
        phone_physical_activity = None

    key0 = "motionsense_hrv_accel_"+wrist
    key1 = "motionsense_hrv_"+wrist+"_wireless_marker"

    raw_stream_ids = all_streams[config["stream_names"][key0]]["stream_ids"],
    stream_name = all_streams[config["stream_names"][key0]]["name"]
    dd_stream_name = config["stream_names"][key1]

    if config["stream_names"]["phone_physical_activity"] in all_streams:
        # using stream_id, data-diagnostic-stream-id, and owner id to generate a unique stream ID for battery-marker
        wireless_marker_stream_id = generate_dd_stream_uuid(dd_stream_name, marker_version, owner_id, "WIRELESS DISCONNECTION MARKER")
        input_streams = [{"owner_id": owner_id, "id": raw_stream_ids, "name": stream_name}]
        output_stream = {"id": wireless_marker_stream_id, "name": dd_stream_name,
                         "algo_type": config["algo_type"]["sensor_unavailable_marker"]}
        metadata = get_metadata(dd_stream_name, input_streams, config)

        if isinstance(raw_stream_ids, list):
            for raw_stream_id in raw_stream_ids:
                stream_days = CC.get_stream_days(raw_stream_id, wireless_marker_stream_id, CC)

                for day in stream_days:
                    try:
                        # load stream data to be diagnosed
                        raw_stream = CC.get_stream(raw_stream_id, day=day, data_type=DataSet.COMPLETE)
                        if len(raw_stream.data) > 0:

                            windowed_data = window(raw_stream.data, config['general']['window_size'], True)
                            results = process_windows(windowed_data, day, CC, phone_physical_activity, config)
                            merged_windows = merge_consective_windows(results)

                            if len(merged_windows) > 0:

                                store(merged_windows, input_streams, output_stream, metadata, CC, config)
                    except Exception as e:
                        CC.logging.log("Error processing: owner-id: %s, stream-id: %s, stream-name: %s, day: %s. Error: "
                                       %(str(owner_id), str(raw_stream_id), str(stream_name), str(day), str(e)))
Exemple #8
0
def battery_marker(raw_stream_id: uuid, stream_name: str, user_id,
                   dd_stream_name, CC: CerebralCortex, config: dict):
    """
    This algorithm uses battery percentages to decide whether device was powered-off or battery was low.
    All the labeled data (st, et, label) with its metadata are then stored in a datastore.
    :param raw_stream_id:
    :param CC:
    :param config:
    """

    try:
        # using stream_id, data-diagnostic-stream-id, and owner id to generate a unique stream ID for battery-marker
        battery_marker_stream_id = uuid.uuid3(
            uuid.NAMESPACE_DNS,
            str(raw_stream_id + dd_stream_name + user_id + "BATTERY MARKER"))

        stream_days = get_stream_days(raw_stream_id, battery_marker_stream_id,
                                      CC)

        for day in stream_days:
            stream = CC.get_stream(raw_stream_id,
                                   day=day,
                                   data_type=DataSet.COMPLETE)

            if len(stream.data) > 0:
                windowed_data = window(stream.data,
                                       config['general']['window_size'], True)
                results = process_windows(windowed_data, stream_name, config)

                merged_windows = merge_consective_windows(results)
                if len(merged_windows) > 0:
                    input_streams = [{
                        "owner_id": user_id,
                        "id": str(raw_stream_id),
                        "name": stream_name
                    }]
                    output_stream = {
                        "id": battery_marker_stream_id,
                        "name": dd_stream_name,
                        "algo_type": config["algo_type"]["battery_marker"]
                    }
                    labelled_windows = mark_windows(battery_marker_stream_id,
                                                    merged_windows, CC, config)
                    metadata = get_metadata(dd_stream_name, input_streams,
                                            config)
                    store(labelled_windows, input_streams, output_stream,
                          metadata, CC, config)
    except Exception as e:
        print(e)
def sensor_availability(raw_stream_id: uuid, stream_name: str, owner_id: uuid,
                        dd_stream_name, phone_physical_activity,
                        CC: CerebralCortex, config: dict):
    """
    Mark missing data as wireless disconnection if a participate walks away from phone or sensor
    :param raw_stream_id:
    :param stream_name:
    :param owner_id:
    :param dd_stream_name:
    :param phone_physical_activity:
    :param CC:
    :param config:
    """

    # using stream_id, data-diagnostic-stream-id, and owner id to generate a unique stream ID for battery-marker
    wireless_marker_stream_id = uuid.uuid3(
        uuid.NAMESPACE_DNS, str(raw_stream_id + dd_stream_name + owner_id))

    stream_days = get_stream_days(raw_stream_id, wireless_marker_stream_id, CC)

    for day in stream_days:
        # load stream data to be diagnosed
        raw_stream = CC.get_stream(raw_stream_id,
                                   day=day,
                                   data_type=DataSet.COMPLETE)
        if len(raw_stream.data) > 0:

            windowed_data = window(raw_stream.data,
                                   config['general']['window_size'], True)
            results = process_windows(windowed_data, day, CC,
                                      phone_physical_activity, config)
            merged_windows = merge_consective_windows(results)

            if len(merged_windows) > 0:
                input_streams = [{
                    "owner_id": owner_id,
                    "id": str(raw_stream_id),
                    "name": stream_name
                }]
                output_stream = {
                    "id": wireless_marker_stream_id,
                    "name": dd_stream_name,
                    "algo_type":
                    config["algo_type"]["sensor_unavailable_marker"]
                }
                metadata = get_metadata(dd_stream_name, input_streams, config)
                store(merged_windows, input_streams, output_stream, metadata,
                      CC, config)
def attachment_marker(raw_stream_id: uuid, stream_name: str, owner_id: uuid,
                      dd_stream_name, CC: CerebralCortex, config: dict):
    """
    Label sensor data as sensor-on-body, sensor-off-body, or improper-attachment.
    All the labeled data (st, et, label) with its metadata are then stored in a datastore

    """
    # TODO: quality streams could be multiple so find the one computed with CC
    # using stream_id, data-diagnostic-stream-id, and owner id to generate a unique stream ID for battery-marker
    attachment_marker_stream_id = uuid.uuid3(
        uuid.NAMESPACE_DNS,
        str(raw_stream_id + dd_stream_name + owner_id + "ATTACHMENT MARKER"))

    stream_days = get_stream_days(raw_stream_id, attachment_marker_stream_id,
                                  CC)

    for day in stream_days:
        # load stream data to be diagnosed
        raw_stream = CC.get_stream(raw_stream_id,
                                   day=day,
                                   data_type=DataSet.COMPLETE)

        if len(raw_stream.data) > 0:
            windowed_data = window(raw_stream.data,
                                   config['general']['window_size'], True)
            results = process_windows(windowed_data, config)
            merged_windows = merge_consective_windows(results)

            input_streams = [{
                "owner_id": owner_id,
                "id": str(raw_stream_id),
                "name": stream_name
            }]
            output_stream = {
                "id": attachment_marker_stream_id,
                "name": dd_stream_name,
                "algo_type": config["algo_type"]["attachment_marker"]
            }
            metadata = get_metadata(dd_stream_name, input_streams, config)
            store(merged_windows, input_streams, output_stream, metadata, CC,
                  config)
def write_data_file(file, streams, user, s):
    cc = CerebralCortex(
        "/cerebralcortex/code/config/cc_starwars_configuration.yml")

    if os.path.isfile(file + '.gz'):
        print("Already Processed %s" % file + '.gz')
        return True

    with open(file + '_temp', 'wt') as output_file:
        for stream_id in streams[s]['stream_ids']:
            logger.info('Processing %s' % streams[s]['name'])
            print('Processing %s' % streams[s]['name'])
            days = get_stream_days(cc, stream_id, streams[s])
            for day in days:
                st = datetime.datetime.now()

                print("XXXXXXXXXX", streams[s]['name'], user['identifier'],
                      stream_id, day)

                datastream = cc.get_stream(stream_id,
                                           user['identifier'],
                                           day,
                                           localtime=False)
                et = datetime.datetime.now()
                if len(datastream.data) > 0:
                    if len(datastream.data) > 100000:
                        logger.info('%s %s %d %s' %
                                    (streams[s]['name'], day,
                                     len(datastream.data), str(et - st)))
                        print('%s %s %d %s' %
                              (streams[s]['name'], day, len(
                                  datastream.data), str(et - st)))
                    try:
                        for d in datastream.data:
                            output_string = str(
                                int(d.start_time.timestamp() * 1e6))

                            if type(d.end_time) is datetime:
                                output_string += ',' + str(
                                    int(d.end_time.timestamp() * 1e6))
                            else:
                                output_string += ',-1'

                            output_string += ',' + str(int(d.offset))

                            if type(d.sample) is list:
                                output_string += ',' + ','.join(
                                    map(str, d.sample))
                            else:
                                output_string += ',' + str(d.sample)

                            output_file.write(output_string + '\n')
                    except Exception as e:
                        logger.error("Stream %s has had a parsing error" %
                                     streams[s]['name'])
                        print("Stream %s has had a parsing error" %
                              streams[s]['name'])
                        logger.error(str(e))
                        print(str(e))

    os.system('sort ' + file + '_temp | gzip > ' + file + '.gz')
    os.system('rm ' + file + '_temp')

    return True
end_date = '20180530'                                                                                                                                                                                                                  
end_date = datetime.strptime(end_date, date_format)  

all_days = []                                                                                                                                                                                                                          
while True:                                                                                                                                                                                                                            
    all_days.append(start_date.strftime(date_format))                                                                                                                                                                                  
    start_date += timedelta(days = 1)                                                                                                                                                                                                  
    if start_date > end_date : break 
        
usr = '******'
    
stream_ids = get_latest_stream_id(usr, activity_stream_name)

print(stream_ids)
if not len(stream_ids):
    print('Y'*100,usr, strm)

strm_id = stream_ids[0]['identifier']
stream_dps_count = 0
stream_corrupt_dps_count = 0
for day in all_days:
    ds = CC.get_stream(strm_id, usr, day)
    if len(ds.data):
        dp = ds.data[0]
        num_day_dps = dp.sample[0]
        num_day_corrupt_dps = len(dp.sample[1])
        if num_day_corrupt_dps:
            print(dp.sample[1])
            break
            
Exemple #13
0
def attachment_marker(all_streams, wrist, owner_id: uuid, CC: CerebralCortex,
                      config: dict):
    """
    Label sensor data as sensor-on-body, sensor-off-body, or improper-attachment.
    All the labeled data (st, et, label) with its metadata are then stored in a datastore

    """
    marker_version = "0.0.1"

    # TODO: quality streams could be multiple so find the one computed with CC
    # using stream_id, data-diagnostic-stream-id, and owner id to generate a unique stream ID for battery-marker

    key0 = "motionsense_hrv_led_quality_" + wrist
    key1 = "motionsense_hrv_" + wrist + "_attachment_marker"
    raw_stream_ids = all_streams[config["stream_names"][key0]]["stream_ids"]
    stream_name = all_streams[config["stream_names"][key0]]["name"]
    dd_stream_name = config["stream_names"][key1]

    if config["stream_names"][key0] in all_streams:

        attachment_marker_stream_id = generate_dd_stream_uuid(
            dd_stream_name, marker_version, owner_id, "ATTACHMENT MARKER")
        input_streams = [{
            "owner_id": owner_id,
            "id": raw_stream_ids,
            "name": stream_name
        }]
        output_stream = {
            "id": attachment_marker_stream_id,
            "name": dd_stream_name,
            "algo_type": config["algo_type"]["attachment_marker"]
        }
        metadata = get_metadata(dd_stream_name, input_streams, config)

        if isinstance(raw_stream_ids, list):
            for raw_stream_id in raw_stream_ids:
                stream_days = CC.get_stream_days(raw_stream_id,
                                                 attachment_marker_stream_id,
                                                 CC)

                for day in stream_days:
                    try:
                        # load stream data to be diagnosed
                        raw_stream = CC.get_stream(raw_stream_id,
                                                   day=day,
                                                   data_type=DataSet.COMPLETE)

                        if len(raw_stream.data) > 0:
                            windowed_data = window(
                                raw_stream.data,
                                config['general']['window_size'], True)
                            results = process_windows(windowed_data, config)
                            merged_windows = merge_consective_windows(results)

                            store(merged_windows, input_streams, output_stream,
                                  metadata, CC, config)
                    except Exception as e:
                        CC.logging.log(
                            "Error processing: owner-id: %s, stream-id: %s, stream-name: %s, day: %s. Error: "
                            % (str(owner_id), str(raw_stream_id),
                               str(stream_name), str(day), str(e)))
Exemple #14
0
def get_corrupt_data_count(userid, all_days, cc_config_path):
    stream_names = []

    sms_stream_name = 'CU_SMS_LENGTH--edu.dartmouth.eureka_corrupt_data'
    stream_names.append(sms_stream_name)

    call_stream_name = 'CU_CALL_DURATION--edu.dartmouth.eureka_corrupt_data'
    stream_names.append(call_stream_name)

    proximity_stream_name = \
    'PROXIMITY--org.md2k.phonesensor--PHONE_corrupt_data'
    stream_names.append(proximity_stream_name)

    cu_appusage_stream_name = 'CU_APPUSAGE--edu.dartmouth.eureka_corrupt_data'
    stream_names.append(cu_appusage_stream_name)

    light_stream_name = \
    'AMBIENT_LIGHT--org.md2k.phonesensor--PHONE_corrupt_data'
    stream_names.append(light_stream_name)

    call_number_stream_name = \
    "CU_CALL_NUMBER--edu.dartmouth.eureka_corrupt_data"
    stream_names.append(call_number_stream_name)

    sms_number_stream_name = "CU_SMS_NUMBER--edu.dartmouth.eureka_corrupt_data"
    stream_names.append(sms_number_stream_name)

    activity_stream_name = \
    "ACTIVITY_TYPE--org.md2k.phonesensor--PHONE_corrupt_data"
    stream_names.append(activity_stream_name)

    call_type_stream_name = "CU_CALL_TYPE--edu.dartmouth.eureka_corrupt_data"
    stream_names.append(call_type_stream_name)

    sms_type_stream_name = "CU_SMS_TYPE--edu.dartmouth.eureka_corrupt_data" 
    stream_names.append(sms_type_stream_name)

    location_stream = 'LOCATION--org.md2k.phonesensor--PHONE_corrupt_data'
    stream_names.append(location_stream)

    geofence_list_stream = \
    'GEOFENCE--LIST--org.md2k.phonesensor--PHONE_corrupt_data'
    stream_names.append(geofence_list_stream)

    CC = CerebralCortex(cc_config_path)

    all_stream_quality = {}        

    count = 0
    started_time = datetime.now()
    userids = [userid]
    for usr in userids[:1]:
        print('processing %d of %d' % (count,len(userids)))
        count += 1

        output_per_day_dir = '/tmp/corruption_per_day/'
        if not os.path.exists(output_per_day_dir):
            os.mkdir(output_per_day_dir)
        buf_day = ''
        for strm in stream_names:
            if not strm in all_stream_quality:
                all_stream_quality[strm] = [0, 0, 0]
            
            stream_ids = get_latest_stream_id(usr, strm, CC)
                
            strm_id = stream_ids[0]['identifier']
            stream_dps_count = 0
            stream_corrupt_dps_count = 0
            stream_possible_accl_gyro_dps = 0

            for day in all_days:
                ds = CC.get_stream(strm_id, usr, day)
                if len(ds.data):
                    dp = ds.data[0]
                    num_day_dps = dp.sample[0]
                    num_day_corrupt_dps = len(dp.sample[1])
                    num_possible_accl_sample = 0
                    # check if the corrupted datapoints could be accl or gyro
                    # samples
                    if num_day_corrupt_dps:
                        for corrupt_dp in dp.sample[1]:
                            if type(corrupt_dp.sample) is list and len(corrupt_dp.sample) == 3:
                                try:
                                    if corrupt_dp.sample[0] >=  MIN_ACCL_VAL and corrupt_dp.sample[0] <= MAX_ACCL_VAL:
                                        if corrupt_dp.sample[1] >=  MIN_ACCL_VAL and corrupt_dp.sample[1] <= MAX_ACCL_VAL:
                                            if corrupt_dp.sample[2] >=  MIN_ACCL_VAL and corrupt_dp.sample[2] <= MAX_ACCL_VAL:
                                                num_possible_accl_sample += 1
                                except Exception as e:
                                    print(corrupt_dp)
                                    print(str(e))

                    buf_day += str(usr) + '\t' + str(strm) + '\t' + str(day) +'\t' +\
                                str(num_day_dps) + '\t' + str(num_day_corrupt_dps) + '\t' +\
                                str(num_possible_accl_sample) + '\n'

                    stream_dps_count += num_day_dps
                    stream_corrupt_dps_count += num_day_corrupt_dps
                    stream_possible_accl_gyro_dps += num_possible_accl_sample
                    
            #print('X'*50)
            #print(usr, strm, stream_dps_count, stream_corrupt_dps_count)
            all_stream_quality[strm][0] += stream_dps_count
            all_stream_quality[strm][1] += stream_corrupt_dps_count
            all_stream_quality[strm][2] += stream_possible_accl_gyro_dps
        print(all_stream_quality)
    
        output_dir = '/tmp/corruption_count/'
        if not os.path.exists(output_dir):
            os.mkdir(output_dir)
        file_name = usr + '.pickle'
        f = open(os.path.join(output_dir,file_name),'wb')
        pickle.dump(all_stream_quality, f)
        f.close()

        f = open(os.path.join(output_per_day_dir,file_name),'w')
        f.write(buf_day)
        f.close()

    return all_stream_quality
Exemple #15
0
def sensor_failure_marker(all_streams, wrist: str, owner_id: uuid,
                          CC: CerebralCortex, config: dict):
    """
    Label a window as packet-loss if received packets are less than the expected packets.
    All the labeled data (st, et, label) with its metadata are then stored in a datastore.
    :param stream_id:
    :param CC_obj:
    :param config:
    """
    marker_version = "0.0.1"

    key0 = "motionsense_hrv_" + wrist + "_attachment_marker"
    key1 = "motionsense_hrv_" + wrist + "_attachment_marker"
    key2 = "motionsense_hrv_accel_" + wrist
    key3 = "motionsense_hrv_gyro_" + wrist
    key4 = "motionsense_hrv_" + wrist + "_sensor_failure_marker"

    stream_name = all_streams[config["stream_names"][key0]]["name"]
    raw_stream_ids = all_streams[config["stream_names"][key1]]["stream_ids"]
    mshrv_accel_id = all_streams[config["stream_names"][key2]]["stream_ids"]
    mshrv_gyro_id = all_streams[config["stream_names"][key3]]["stream_ids"]
    dd_stream_name = config["stream_names"][key4]

    if config["stream_names"][key2] in all_streams:

        # using stream_id, data-diagnostic-stream-id, and owner id to generate a unique stream ID for battery-marker
        sensor_failure_stream_id = generate_dd_stream_uuid(
            dd_stream_name, marker_version, owner_id, "SENSOR FAILURE MARKER")
        input_streams = [{
            "owner_id": owner_id,
            "id": raw_stream_ids,
            "name": stream_name
        }]
        output_stream = {
            "id": sensor_failure_stream_id,
            "name": dd_stream_name,
            "algo_type": config["algo_type"]["sensor_failure"]
        }
        metadata = get_metadata(dd_stream_name, input_streams, config)

        if isinstance(raw_stream_ids, list):
            for raw_stream_id in raw_stream_ids:
                stream_days = CC.get_stream_days(raw_stream_id,
                                                 sensor_failure_stream_id, CC)
                for day in stream_days:
                    try:
                        # load stream data to be diagnosed
                        attachment_marker_stream = CC.get_stream(
                            raw_stream_id, day, data_type=DataSet.COMPLETE)
                        results = OrderedDict()
                        if attachment_marker_stream.data:
                            for marker_window in attachment_marker_stream.data:
                                if "MOTIONSENSE-ON-BODY" in marker_window.sample:
                                    mshrv_accel_stream = CC.get_stream(
                                        mshrv_accel_id,
                                        day,
                                        start_time=marker_window.start_time,
                                        end_time=marker_window.end_time,
                                        data_type=DataSet.ONLY_DATA)
                                    mshrv_gyro_stream = CC.get_stream(
                                        mshrv_gyro_id,
                                        day,
                                        start_time=marker_window.start_time,
                                        end_time=marker_window.end_time,
                                        data_type=DataSet.ONLY_DATA)

                                results_accel = process_windows(
                                    mshrv_accel_stream, config)
                                results_gyro = process_windows(
                                    mshrv_gyro_stream, config)

                                key = marker_window.start_time, marker_window.end_time

                                # if sensor failure period is more than 12 hours then mark it as a sensor failure
                                if results_accel > 0 and results_gyro < 1:
                                    sample = "MOTIONSENE-HRV-" + str(
                                        wrist) + "ACCELEROMETER-FAILURE"
                                    results[key].append(
                                        DataPoint(marker_window.start_time,
                                                  marker_window.end_time,
                                                  sample))
                                elif results_accel < 1 and results_gyro > 0:
                                    sample = "MOTIONSENE-HRV-" + str(
                                        wrist) + "GYRO-FAILURE"
                                    results[key].append(
                                        DataPoint(marker_window.start_time,
                                                  marker_window.end_time,
                                                  sample))

                            if len(results) > 0:
                                merged_windows = merge_consective_windows(
                                    results)
                                store(merged_windows, input_streams,
                                      output_stream, metadata, CC, config)
                    except Exception as e:
                        CC.logging.log(
                            "Error processing: owner-id: %s, stream-id: %s, Algo-name: %s, day: %s. Error: "
                            % (str(owner_id), str(raw_stream_id),
                               "sensor_failure_marker", str(day), str(e)))
def sensor_failure_marker(attachment_marker_stream_id: uuid,
                          mshrv_accel_id: uuid, mshrv_gyro_id: uuid,
                          wrist: str, owner_id: uuid, dd_stream_name,
                          CC: CerebralCortex, config: dict):
    """
    Label a window as packet-loss if received packets are less than the expected packets.
    All the labeled data (st, et, label) with its metadata are then stored in a datastore.
    :param stream_id:
    :param CC_obj:
    :param config:
    """

    # using stream_id, data-diagnostic-stream-id, and owner id to generate a unique stream ID for battery-marker
    sensor_failure_stream_id = uuid.uuid3(
        uuid.NAMESPACE_DNS,
        str(attachment_marker_stream_id + dd_stream_name + owner_id +
            "SENSOR FAILURE MARKER"))

    stream_days = get_stream_days(attachment_marker_stream_id,
                                  sensor_failure_stream_id, CC)

    try:
        for day in stream_days:
            # load stream data to be diagnosed
            attachment_marker_stream = CC.get_stream(
                attachment_marker_stream_id, day, data_type=DataSet.COMPLETE)
            results = OrderedDict()
            if attachment_marker_stream.data:
                for marker_window in attachment_marker_stream.data:
                    if "MOTIONSENSE-ON-BODY" in marker_window.sample:
                        mshrv_accel_stream = CC.get_stream(
                            mshrv_accel_id,
                            day,
                            start_time=marker_window.start_time,
                            end_time=marker_window.end_time,
                            data_type=DataSet.ONLY_DATA)
                        mshrv_gyro_stream = CC.get_stream(
                            mshrv_gyro_id,
                            day,
                            start_time=marker_window.start_time,
                            end_time=marker_window.end_time,
                            data_type=DataSet.ONLY_DATA)

                    results_accel = process_windows(mshrv_accel_stream, config)
                    results_gyro = process_windows(mshrv_gyro_stream, config)

                    key = marker_window.start_time, marker_window.end_time

                    # if sensor failure period is more than 12 hours then mark it as a sensor failure
                    if results_accel > 0 and results_gyro < 1:
                        sample = "MOTIONSENE-HRV-" + str(
                            wrist) + "ACCELEROMETER-FAILURE"
                        results[key].append(
                            DataPoint(marker_window.start_time,
                                      marker_window.end_time, sample))
                    elif results_accel < 1 and results_gyro > 0:
                        sample = "MOTIONSENE-HRV-" + str(
                            wrist) + "GYRO-FAILURE"
                        results[key].append(
                            DataPoint(marker_window.start_time,
                                      marker_window.end_time, sample))

                    merged_windows = merge_consective_windows(results)

                if len(results) > 0:
                    input_streams = [{
                        "owner_id": owner_id,
                        "id": str(attachment_marker_stream_id),
                        "name": attachment_marker_stream.name
                    }]
                    output_stream = {
                        "id": sensor_failure_stream_id,
                        "name": dd_stream_name,
                        "algo_type": config["algo_type"]["sensor_failure"]
                    }
                    metadata = get_metadata(dd_stream_name, input_streams,
                                            config)
                    store(merged_windows, input_streams, output_stream,
                          metadata, CC, config)
    except Exception as e:
        print(e)
def battery_marker(all_streams, owner_id, stream_name, CC: CerebralCortex,
                   config: dict):
    """
    This algorithm uses battery percentages to decide whether device was powered-off or battery was low.
    All the labeled data (st, et, label) with its metadata are then stored in a datastore.
    :param raw_stream_id:
    :param CC:
    :param config:
    """
    marker_version = "0.0.1"

    if stream_name in all_streams:
        raw_stream_ids = all_streams[config["stream_names"]
                                     ["phone_battery"]]["stream_ids"]
        dd_stream_name = config["stream_names"]["phone_battery_marker"]

        # using stream_id, data-diagnostic-stream-id, and owner id to generate a unique stream ID for battery-marker
        battery_marker_stream_id = generate_dd_stream_uuid(
            dd_stream_name, marker_version, owner_id, "BATTERY MARKER")

        input_streams = [{
            "owner_id": owner_id,
            "id": raw_stream_ids,
            "name": stream_name
        }]
        output_stream = {
            "id": battery_marker_stream_id,
            "name": dd_stream_name,
            "algo_type": config["algo_type"]["battery_marker"]
        }
        metadata = get_metadata(dd_stream_name, input_streams, config)

        if isinstance(raw_stream_ids, list):
            for raw_stream_id in raw_stream_ids:

                stream_days = CC.get_stream_days(raw_stream_id,
                                                 battery_marker_stream_id, CC)

                for day in stream_days:
                    try:
                        stream = CC.get_stream(raw_stream_id,
                                               day=day,
                                               data_type=DataSet.COMPLETE)

                        if len(stream.data) > 0:
                            windowed_data = window(
                                stream.data, config['general']['window_size'],
                                True)
                            results = process_windows(windowed_data,
                                                      stream_name, config)

                            merged_windows = merge_consective_windows(results)
                            if len(merged_windows) > 0:
                                labelled_windows = mark_windows(
                                    battery_marker_stream_id, merged_windows,
                                    CC, config)
                                store(labelled_windows, input_streams,
                                      output_stream, metadata, CC, config)
                    except Exception as e:
                        CC.logging.log(
                            "Error processing: owner-id: %s, stream-id: %s, stream-name: %s, day: %s. Error: "
                            % (str(owner_id), str(raw_stream_id),
                               str(stream_name), str(day), str(e)))
class SqlToCCStream():
    def __init__(self, config):

        self.CC = CerebralCortex(config)
        self.config = self.CC.config
        self.sqlData = SqlData(self.config,
                               dbName="environmental_data_collection")
        self.process()

    def process(self):
        user_ids = self.filter_user_ids()
        # get all locations lats/longs
        all_locations = self.sqlData.get_latitude_llongitude()
        with open("weather_data.json", "r") as wd:
            metadata = wd.read()
        metadata = json.loads(metadata)
        input_stream_name = 'LOCATION--org.md2k.phonesensor--PHONE'
        for uid in user_ids:
            stream_ids = self.CC.get_stream_id(uid, input_stream_name)

            # START TEST CODE
            # location_id = self.get_location_id((37.439168,-122.086283), all_locations)
            # day = datetime.strptime("20171221", "%Y%m%d").strftime("%Y-%m-%d")
            # weather_data = self.sqlData.get_weather_data_by_city_id(location_id, day)
            # dps = []
            #
            # for wd in weather_data:
            #     dp_sample = []
            #     wd["temperature"] = json.loads(wd["temperature"])
            #     wd["wind"] = json.loads(wd["wind"])
            #
            #     dp_sample["sunrise"] = wd["sunrise"]
            #     dp_sample["sunset"] = wd["sunset"]
            #     dp_sample["wind_deg"] = wd.get("wind").get("deg","")
            #     dp_sample["wind_speed"] = wd.get("wind").get("speed","")
            #     dp_sample["current_temp"] = wd["temperature"]["temp"]
            #     dp_sample["max_temp"] = wd["temperature"]["temp_max"]
            #     dp_sample["min_temp"] = wd["temperature"]["temp_min"]
            #     dp_sample["humidity"] = int(wd["humidity"])
            #     dp_sample["clouds"] = int(wd["clouds"])
            #     dp_sample["other"] = wd["other"]
            #     dp_sample = [wd["sunrise"],wd["sunset"],wd.get("wind").get("deg",""),wd.get("wind").get("speed",""),wd["temperature"]["temp"],wd["temperature"]["temp_max"],wd["temperature"]["temp_min"],int(wd["humidity"]),int(wd["clouds"]),wd["other"]]
            #     dps.append(DataPoint(wd["start_time"], None, None, dp_sample))
            # END TEST CODE
            if len(stream_ids) > 0:
                print("Processing:", uid)
                for sid in stream_ids:
                    sid = sid["identifier"]
                    days = self.CC.get_stream_days(sid)
                    for day in days:
                        print("User ID, Stream ID, Day", uid, sid, day)
                        output_stream_id = ""
                        # get gps data from stream-name 'LOCATION--org.md2k.phonesensor--PHONE'
                        location_stream = self.CC.get_stream(stream_id=sid,
                                                             day=day)

                        if len(location_stream.data) > 0:
                            # compute median on lat. and long. vals
                            user_loc = self.compute_lat_long_median(
                                location_stream.data)
                            if user_loc != (0, 0):
                                offset = location_stream.data[0].offset
                                # get weather data for match lat/long values
                                location_id = self.get_location_id(
                                    user_loc, all_locations)

                                if location_id is not None:
                                    formated_day = datetime.strptime(
                                        day, "%Y%m%d").strftime("%Y-%m-%d")
                                    weather_data = self.sqlData.get_weather_data_by_city_id(
                                        location_id, formated_day)

                                    # convert data into datastream
                                    execution_context = metadata[
                                        "execution_context"]
                                    input_streams_metadata = [{
                                        "id":
                                        sid,
                                        "name":
                                        input_stream_name
                                    }]
                                    metadata["execution_context"]["processing_module"]["input_streams"] \
                                        = input_streams_metadata
                                    dps = []
                                    for wd in weather_data:
                                        dp_sample = []
                                        wd["temperature"] = json.loads(
                                            wd["temperature"])
                                        wd["wind"] = json.loads(wd["wind"])
                                        day_light_duration = (
                                            (wd["sunset"] -
                                             wd["sunrise"]).seconds
                                        ) / 3600  # difference in hours
                                        dp_sample = [
                                            wd["sunrise"], wd["sunset"],
                                            day_light_duration,
                                            wd.get("wind", float('nan')).get(
                                                "deg", float('nan')),
                                            wd.get("wind", float('nan')).get(
                                                "speed", float('nan')),
                                            wd["temperature"]["temp"],
                                            wd["temperature"]["temp_max"],
                                            wd["temperature"]["temp_min"],
                                            int(wd["humidity"]),
                                            int(wd["clouds"]), wd["other"]
                                        ]

                                        dps.append(
                                            DataPoint(wd["start_time"], None,
                                                      offset, dp_sample))
                                    if len(dps) > 0:
                                        # generate UUID for stream
                                        output_stream_id = str(
                                            metadata["data_descriptor"]) + str(
                                                execution_context) + str(
                                                    metadata["annotations"])
                                        output_stream_id += "weather-data-stream"
                                        output_stream_id += "weather-data-stream"
                                        output_stream_id += str(uid)
                                        output_stream_id += str(sid)
                                        # output_stream_id += str(day)
                                        output_stream_id = str(
                                            uuid.uuid3(uuid.NAMESPACE_DNS,
                                                       output_stream_id))
                                        ds = DataStream(
                                            identifier=output_stream_id,
                                            owner=uid,
                                            name=metadata["name"],
                                            data_descriptor=metadata[
                                                "data_descriptor"],
                                            execution_context=execution_context,
                                            annotations=metadata[
                                                "annotations"],
                                            stream_type=metadata["type"],
                                            data=dps)

                                        # store data stream
                                        self.CC.save_stream(ds)

    def compute_lat_long_median(self, data):
        latitude = []
        longitude = []
        valid_data = False
        for dp in data:
            if isinstance(dp.sample, list) and len(dp.sample) == 6:
                latitude.append(dp.sample[0])
                longitude.append(dp.sample[1])
                valid_data = True
        if valid_data:
            return statistics.median(latitude), statistics.median(longitude)
        else:
            return 0, 0

    def get_location_id(self, user_loc, all_locations):
        # find distance between user location and weather lat/long
        closest = None
        location_id = None
        for loc in all_locations:
            distance = haversine(
                user_loc, (float(loc["latitude"]), float(loc["longitude"])),
                miles=True)
            if closest is None:
                closest = distance
                location_id = loc["id"]
            elif distance < closest:
                closest = distance
                location_id = loc["id"]
        if closest <= 30:  #if distance is below then 30 miles then select it as weather location
            return location_id
        else:
            return None

    def filter_user_ids(self):

        active_users = []
        all_users = []
        for uid in self.CC.get_all_users("mperf"):
            all_users.append(uid["identifier"])

        data_dir = self.config["data_replay"]["data_dir"]
        for owner_dir in os.scandir(data_dir):
            if owner_dir.name in all_users:
                active_users.append(owner_dir.name)

        return active_users
def packet_loss_marker(raw_stream_id: uuid, stream_name: str, owner_id: uuid,
                       dd_stream_name, CC: CerebralCortex, config: dict):
    """
    Label a window as packet-loss if received packets are less than the expected packets.
    All the labeled data (st, et, label) with its metadata are then stored in a datastore.
    :param raw_stream_id:
    :param CC_obj:
    :param config:
    """

    # using stream_id, data-diagnostic-stream-id, and owner id to generate a unique stream ID for battery-marker
    packetloss_marker_stream_id = uuid.uuid3(
        uuid.NAMESPACE_DNS,
        str(raw_stream_id + dd_stream_name + owner_id + "PACKET LOSS MARKER"))

    stream_days = get_stream_days(raw_stream_id, packetloss_marker_stream_id,
                                  CC)

    if stream_name == config["stream_names"]["autosense_ecg"]:
        sampling_rate = config["sampling_rate"]["ecg"]
        threshold_val = config["packet_loss_marker"][
            "ecg_acceptable_packet_loss"]
        label = config["labels"]["ecg_packet_loss"]
    elif stream_name == config["stream_names"]["autosense_rip"]:
        sampling_rate = config["sampling_rate"]["rip"]
        threshold_val = config["packet_loss_marker"][
            "rip_acceptable_packet_loss"]
        label = config["labels"]["rip_packet_loss"]
    elif stream_name == config["stream_names"][
            "motionsense_hrv_accel_right"] or stream_name == config[
                "stream_names"]["motionsense_hrv_accel_left"]:
        sampling_rate = config["sampling_rate"]["motionsense_accel"]
        threshold_val = config["packet_loss_marker"][
            "motionsense_accel_acceptable_packet_loss"]
        label = config["labels"]["motionsense_gyro_packet_loss"]
    elif stream_name == config["stream_names"][
            "motionsense_hrv_gyro_right"] or stream_name == config[
                "stream_names"]["motionsense_hrv_gyro_left"]:
        sampling_rate = config["sampling_rate"]["motionsense_gyro"]
        threshold_val = config["packet_loss_marker"][
            "motionsense_gyro_acceptable_packet_loss"]
        label = config["labels"]["motionsense_gyro_packet_loss"]

    for day in stream_days:
        # load stream data to be diagnosed
        stream = CC.get_stream(raw_stream_id,
                               day=day,
                               data_type=DataSet.COMPLETE)

        if len(stream.data) > 0:

            windowed_data = window(stream.data,
                                   config['general']['window_size'], True)

            results = process_windows(windowed_data, sampling_rate,
                                      threshold_val, label, config)
            merged_windows = merge_consective_windows(results)
            if len(merged_windows) > 0:
                input_streams = [{
                    "owner_id": owner_id,
                    "id": str(raw_stream_id),
                    "name": stream_name
                }]
                output_stream = {
                    "id": packetloss_marker_stream_id,
                    "name": dd_stream_name,
                    "algo_type": config["algo_type"]["packet_loss_marker"]
                }
                metadata = get_metadata(dd_stream_name, input_streams, config)
                store(merged_windows, input_streams, output_stream, metadata,
                      CC, config)