def mobile_app_availability_marker(raw_stream_id: uuid, stream_name: str, owner_id, dd_stream_name, CC: CerebralCortex, config: dict): """ This algorithm uses phone battery percentages to decide whether mobile app was available or unavailable. Theoretically, phone battery data shall be collected 24/7. :param raw_stream_id: :param CC: :param config: """ try: # using stream_id, data-diagnostic-stream-id, and owner id to generate a unique stream ID for battery-marker app_availability_marker_stream_id = uuid.uuid3(uuid.NAMESPACE_DNS, str( raw_stream_id + dd_stream_name + owner_id + "MOBILE APP AVAILABILITY MARKER")) stream_days = get_stream_days(raw_stream_id, app_availability_marker_stream_id, CC) for day in stream_days: stream = CC.get_stream(raw_stream_id, day=day, data_type=DataSet.COMPLETE) if len(stream.data) > 0: windowed_data = window(stream.data, config['general']['window_size'], True) results = process_windows(windowed_data, config) merged_windows = merge_consective_windows(results) if len(merged_windows) > 0: input_streams = [{"owner_id": owner_id, "id": str(raw_stream_id), "name": stream_name}] output_stream = {"id": app_availability_marker_stream_id, "name": dd_stream_name, "algo_type": config["algo_type"]["app_availability_marker"]} metadata = get_metadata(dd_stream_name, input_streams, config) store(merged_windows, input_streams, output_stream, metadata, CC, config) except Exception as e: print(e)
def compute_data_yield(stream_id: uuid, username: str, report_stream_name: str, CC: CerebralCortex, config: dict): """ This uses LED quality stream to calculate total good quality data for each data LED quality stream has data quality available for 3 second windows """ data_dir = config["output"]["folder_path"] + "/" + config["reports"][ "data_yield_per_day"] + "/" data_yield_report = data_dir + username + "_" + report_stream_name + ".csv" if not os.path.exists(data_dir): os.mkdir(data_dir) os.mknod(data_yield_report) stream_days = get_stream_days(stream_id, CC) with open(data_yield_report, "w") as report: report.write(report_stream_name + ",,,,\n") report.write("day, good hours, total_hours,, \n") for day in stream_days: # load stream data raw_stream = CC.get_stream(stream_id, day=day, data_type=DataSet.ONLY_DATA) if len(raw_stream) > 0: results = process_stream(raw_stream) results = str(day) + "," + results report.write(results)
def get_streams(stream_id: uuid, username: str, wrist: str, CC: CerebralCortex, config: dict): """ This uses LED quality stream to calculate total good quality data for each data LED quality stream has data quality available for 3 second windows """ data_dir = config["output"]["folder_path"] + "/" + config["reports"][ "data_yield_per_day"] + "/" data_yield_report = data_dir + username + "_" + wrist + ".csv" if not os.path.exists(data_dir): os.mkdir(data_dir) os.mknod(data_yield_report) stream_days = get_stream_days(stream_id, CC) with open(data_yield_report, "w") as report: report.write( "day, good, noise, bad, band_off, missing, not_worn, band_loose \n" ) for day in stream_days: # load stream data raw_stream = CC.get_stream(stream_id, day=day, data_type=DataSet.COMPLETE) if len(raw_stream.data) > 0: results = process_stream(raw_stream.data) results = str(day) + "," + results report.write(results)
def audit_user_streams(user_id, all_days, cc_config): print('X' * 100, cc_config) CC = CerebralCortex(cc_config) all_user_streams = CC.get_user_streams(user_id) userbuf = '' for user_stream_key in all_user_streams: user_stream = all_user_streams[user_stream_key] if 'analysis' not in user_stream['name']: continue for day in all_days: data_points = 0 for stream_id in user_stream['stream_ids']: ds = CC.get_stream(stream_id, user_id, day) data_points += len(ds.data) buf = '%s\t%s\t%s\t%d\n' % (user_id, user_stream['name'], str(day), data_points) userbuf += buf out_dir = '/tmp/data_audit' if not os.path.exists(out_dir): os.mkdir(out_dir) file_path = os.path.join(out_dir, user_id) f = open(file_path, 'w') f.write(userbuf) f.close()
def mobile_app_availability_marker(all_streams, stream_name: str, owner_id, CC: CerebralCortex, config: dict): """ This algorithm uses phone battery percentages to decide whether mobile app was available or unavailable. Theoretically, phone battery data shall be collected 24/7. :param raw_stream_id: :param CC: :param config: """ marker_version = "0.0.1" if config["stream_names"]["phone_battery"] in all_streams: raw_stream_ids = all_streams[config["stream_names"] ["phone_battery"]]["stream_ids"] dd_stream_name = config["stream_names"]["app_availability_marker"] # using stream_id, data-diagnostic-stream-id, and owner id to generate a unique stream ID for battery-marker app_availability_marker_stream_id = generate_dd_stream_uuid( dd_stream_name, marker_version, owner_id, "MOBILE APP AVAILABILITY MARKER") input_streams = [{ "owner_id": owner_id, "id": raw_stream_ids, "name": stream_name }] output_stream = { "id": app_availability_marker_stream_id, "name": dd_stream_name, "algo_type": config["algo_type"]["app_availability_marker"] } metadata = get_metadata(dd_stream_name, input_streams, config) if isinstance(raw_stream_ids, list): for raw_stream_id in raw_stream_ids: stream_days = CC.get_stream_days( raw_stream_id, app_availability_marker_stream_id, CC) for day in stream_days: try: stream = CC.get_stream(raw_stream_id, day=day, data_type=DataSet.COMPLETE) if len(stream.data) > 0: windowed_data = window( stream.data, config['general']['window_size'], True) results = process_windows(windowed_data, config) merged_windows = merge_consective_windows(results) if len(merged_windows) > 0: store(merged_windows, input_streams, output_stream, metadata, CC, config) except Exception as e: CC.logging.log( "Error processing: owner-id: %s, stream-id: %s, stream-name: %s, day: %s. Error: " % (str(owner_id), str(raw_stream_id), str(stream_name), str(day), str(e)))
def get_datastream(CC:CerebralCortex, identifier:str, day:str, user_id:str, localtime:bool)->List[DataPoint]: stream_ids = CC.get_stream_id(user_id,identifier) data = [] for stream_id in stream_ids: temp_data = CC.get_stream(stream_id=stream_id['identifier'],user_id=user_id,day=day,localtime=localtime) if len(temp_data.data)>0: data.extend(temp_data.data) return data
def sensor_availability(all_streams, wrist: str, owner_id: uuid, CC: CerebralCortex, config: dict): """ Mark missing data as wireless disconnection if a participate walks away from phone or sensor :param raw_stream_id: :param stream_name: :param owner_id: :param dd_stream_name: :param phone_physical_activity: :param CC: :param config: """ marker_version = "0.0.1" if config["stream_names"]["phone_physical_activity"] in all_streams: phone_physical_activity = all_streams[config["stream_names"]["phone_physical_activity"]]["stream_ids"] else: phone_physical_activity = None key0 = "motionsense_hrv_accel_"+wrist key1 = "motionsense_hrv_"+wrist+"_wireless_marker" raw_stream_ids = all_streams[config["stream_names"][key0]]["stream_ids"], stream_name = all_streams[config["stream_names"][key0]]["name"] dd_stream_name = config["stream_names"][key1] if config["stream_names"]["phone_physical_activity"] in all_streams: # using stream_id, data-diagnostic-stream-id, and owner id to generate a unique stream ID for battery-marker wireless_marker_stream_id = generate_dd_stream_uuid(dd_stream_name, marker_version, owner_id, "WIRELESS DISCONNECTION MARKER") input_streams = [{"owner_id": owner_id, "id": raw_stream_ids, "name": stream_name}] output_stream = {"id": wireless_marker_stream_id, "name": dd_stream_name, "algo_type": config["algo_type"]["sensor_unavailable_marker"]} metadata = get_metadata(dd_stream_name, input_streams, config) if isinstance(raw_stream_ids, list): for raw_stream_id in raw_stream_ids: stream_days = CC.get_stream_days(raw_stream_id, wireless_marker_stream_id, CC) for day in stream_days: try: # load stream data to be diagnosed raw_stream = CC.get_stream(raw_stream_id, day=day, data_type=DataSet.COMPLETE) if len(raw_stream.data) > 0: windowed_data = window(raw_stream.data, config['general']['window_size'], True) results = process_windows(windowed_data, day, CC, phone_physical_activity, config) merged_windows = merge_consective_windows(results) if len(merged_windows) > 0: store(merged_windows, input_streams, output_stream, metadata, CC, config) except Exception as e: CC.logging.log("Error processing: owner-id: %s, stream-id: %s, stream-name: %s, day: %s. Error: " %(str(owner_id), str(raw_stream_id), str(stream_name), str(day), str(e)))
def battery_marker(raw_stream_id: uuid, stream_name: str, user_id, dd_stream_name, CC: CerebralCortex, config: dict): """ This algorithm uses battery percentages to decide whether device was powered-off or battery was low. All the labeled data (st, et, label) with its metadata are then stored in a datastore. :param raw_stream_id: :param CC: :param config: """ try: # using stream_id, data-diagnostic-stream-id, and owner id to generate a unique stream ID for battery-marker battery_marker_stream_id = uuid.uuid3( uuid.NAMESPACE_DNS, str(raw_stream_id + dd_stream_name + user_id + "BATTERY MARKER")) stream_days = get_stream_days(raw_stream_id, battery_marker_stream_id, CC) for day in stream_days: stream = CC.get_stream(raw_stream_id, day=day, data_type=DataSet.COMPLETE) if len(stream.data) > 0: windowed_data = window(stream.data, config['general']['window_size'], True) results = process_windows(windowed_data, stream_name, config) merged_windows = merge_consective_windows(results) if len(merged_windows) > 0: input_streams = [{ "owner_id": user_id, "id": str(raw_stream_id), "name": stream_name }] output_stream = { "id": battery_marker_stream_id, "name": dd_stream_name, "algo_type": config["algo_type"]["battery_marker"] } labelled_windows = mark_windows(battery_marker_stream_id, merged_windows, CC, config) metadata = get_metadata(dd_stream_name, input_streams, config) store(labelled_windows, input_streams, output_stream, metadata, CC, config) except Exception as e: print(e)
def sensor_availability(raw_stream_id: uuid, stream_name: str, owner_id: uuid, dd_stream_name, phone_physical_activity, CC: CerebralCortex, config: dict): """ Mark missing data as wireless disconnection if a participate walks away from phone or sensor :param raw_stream_id: :param stream_name: :param owner_id: :param dd_stream_name: :param phone_physical_activity: :param CC: :param config: """ # using stream_id, data-diagnostic-stream-id, and owner id to generate a unique stream ID for battery-marker wireless_marker_stream_id = uuid.uuid3( uuid.NAMESPACE_DNS, str(raw_stream_id + dd_stream_name + owner_id)) stream_days = get_stream_days(raw_stream_id, wireless_marker_stream_id, CC) for day in stream_days: # load stream data to be diagnosed raw_stream = CC.get_stream(raw_stream_id, day=day, data_type=DataSet.COMPLETE) if len(raw_stream.data) > 0: windowed_data = window(raw_stream.data, config['general']['window_size'], True) results = process_windows(windowed_data, day, CC, phone_physical_activity, config) merged_windows = merge_consective_windows(results) if len(merged_windows) > 0: input_streams = [{ "owner_id": owner_id, "id": str(raw_stream_id), "name": stream_name }] output_stream = { "id": wireless_marker_stream_id, "name": dd_stream_name, "algo_type": config["algo_type"]["sensor_unavailable_marker"] } metadata = get_metadata(dd_stream_name, input_streams, config) store(merged_windows, input_streams, output_stream, metadata, CC, config)
def attachment_marker(raw_stream_id: uuid, stream_name: str, owner_id: uuid, dd_stream_name, CC: CerebralCortex, config: dict): """ Label sensor data as sensor-on-body, sensor-off-body, or improper-attachment. All the labeled data (st, et, label) with its metadata are then stored in a datastore """ # TODO: quality streams could be multiple so find the one computed with CC # using stream_id, data-diagnostic-stream-id, and owner id to generate a unique stream ID for battery-marker attachment_marker_stream_id = uuid.uuid3( uuid.NAMESPACE_DNS, str(raw_stream_id + dd_stream_name + owner_id + "ATTACHMENT MARKER")) stream_days = get_stream_days(raw_stream_id, attachment_marker_stream_id, CC) for day in stream_days: # load stream data to be diagnosed raw_stream = CC.get_stream(raw_stream_id, day=day, data_type=DataSet.COMPLETE) if len(raw_stream.data) > 0: windowed_data = window(raw_stream.data, config['general']['window_size'], True) results = process_windows(windowed_data, config) merged_windows = merge_consective_windows(results) input_streams = [{ "owner_id": owner_id, "id": str(raw_stream_id), "name": stream_name }] output_stream = { "id": attachment_marker_stream_id, "name": dd_stream_name, "algo_type": config["algo_type"]["attachment_marker"] } metadata = get_metadata(dd_stream_name, input_streams, config) store(merged_windows, input_streams, output_stream, metadata, CC, config)
def write_data_file(file, streams, user, s): cc = CerebralCortex( "/cerebralcortex/code/config/cc_starwars_configuration.yml") if os.path.isfile(file + '.gz'): print("Already Processed %s" % file + '.gz') return True with open(file + '_temp', 'wt') as output_file: for stream_id in streams[s]['stream_ids']: logger.info('Processing %s' % streams[s]['name']) print('Processing %s' % streams[s]['name']) days = get_stream_days(cc, stream_id, streams[s]) for day in days: st = datetime.datetime.now() print("XXXXXXXXXX", streams[s]['name'], user['identifier'], stream_id, day) datastream = cc.get_stream(stream_id, user['identifier'], day, localtime=False) et = datetime.datetime.now() if len(datastream.data) > 0: if len(datastream.data) > 100000: logger.info('%s %s %d %s' % (streams[s]['name'], day, len(datastream.data), str(et - st))) print('%s %s %d %s' % (streams[s]['name'], day, len( datastream.data), str(et - st))) try: for d in datastream.data: output_string = str( int(d.start_time.timestamp() * 1e6)) if type(d.end_time) is datetime: output_string += ',' + str( int(d.end_time.timestamp() * 1e6)) else: output_string += ',-1' output_string += ',' + str(int(d.offset)) if type(d.sample) is list: output_string += ',' + ','.join( map(str, d.sample)) else: output_string += ',' + str(d.sample) output_file.write(output_string + '\n') except Exception as e: logger.error("Stream %s has had a parsing error" % streams[s]['name']) print("Stream %s has had a parsing error" % streams[s]['name']) logger.error(str(e)) print(str(e)) os.system('sort ' + file + '_temp | gzip > ' + file + '.gz') os.system('rm ' + file + '_temp') return True
end_date = '20180530' end_date = datetime.strptime(end_date, date_format) all_days = [] while True: all_days.append(start_date.strftime(date_format)) start_date += timedelta(days = 1) if start_date > end_date : break usr = '******' stream_ids = get_latest_stream_id(usr, activity_stream_name) print(stream_ids) if not len(stream_ids): print('Y'*100,usr, strm) strm_id = stream_ids[0]['identifier'] stream_dps_count = 0 stream_corrupt_dps_count = 0 for day in all_days: ds = CC.get_stream(strm_id, usr, day) if len(ds.data): dp = ds.data[0] num_day_dps = dp.sample[0] num_day_corrupt_dps = len(dp.sample[1]) if num_day_corrupt_dps: print(dp.sample[1]) break
def attachment_marker(all_streams, wrist, owner_id: uuid, CC: CerebralCortex, config: dict): """ Label sensor data as sensor-on-body, sensor-off-body, or improper-attachment. All the labeled data (st, et, label) with its metadata are then stored in a datastore """ marker_version = "0.0.1" # TODO: quality streams could be multiple so find the one computed with CC # using stream_id, data-diagnostic-stream-id, and owner id to generate a unique stream ID for battery-marker key0 = "motionsense_hrv_led_quality_" + wrist key1 = "motionsense_hrv_" + wrist + "_attachment_marker" raw_stream_ids = all_streams[config["stream_names"][key0]]["stream_ids"] stream_name = all_streams[config["stream_names"][key0]]["name"] dd_stream_name = config["stream_names"][key1] if config["stream_names"][key0] in all_streams: attachment_marker_stream_id = generate_dd_stream_uuid( dd_stream_name, marker_version, owner_id, "ATTACHMENT MARKER") input_streams = [{ "owner_id": owner_id, "id": raw_stream_ids, "name": stream_name }] output_stream = { "id": attachment_marker_stream_id, "name": dd_stream_name, "algo_type": config["algo_type"]["attachment_marker"] } metadata = get_metadata(dd_stream_name, input_streams, config) if isinstance(raw_stream_ids, list): for raw_stream_id in raw_stream_ids: stream_days = CC.get_stream_days(raw_stream_id, attachment_marker_stream_id, CC) for day in stream_days: try: # load stream data to be diagnosed raw_stream = CC.get_stream(raw_stream_id, day=day, data_type=DataSet.COMPLETE) if len(raw_stream.data) > 0: windowed_data = window( raw_stream.data, config['general']['window_size'], True) results = process_windows(windowed_data, config) merged_windows = merge_consective_windows(results) store(merged_windows, input_streams, output_stream, metadata, CC, config) except Exception as e: CC.logging.log( "Error processing: owner-id: %s, stream-id: %s, stream-name: %s, day: %s. Error: " % (str(owner_id), str(raw_stream_id), str(stream_name), str(day), str(e)))
def get_corrupt_data_count(userid, all_days, cc_config_path): stream_names = [] sms_stream_name = 'CU_SMS_LENGTH--edu.dartmouth.eureka_corrupt_data' stream_names.append(sms_stream_name) call_stream_name = 'CU_CALL_DURATION--edu.dartmouth.eureka_corrupt_data' stream_names.append(call_stream_name) proximity_stream_name = \ 'PROXIMITY--org.md2k.phonesensor--PHONE_corrupt_data' stream_names.append(proximity_stream_name) cu_appusage_stream_name = 'CU_APPUSAGE--edu.dartmouth.eureka_corrupt_data' stream_names.append(cu_appusage_stream_name) light_stream_name = \ 'AMBIENT_LIGHT--org.md2k.phonesensor--PHONE_corrupt_data' stream_names.append(light_stream_name) call_number_stream_name = \ "CU_CALL_NUMBER--edu.dartmouth.eureka_corrupt_data" stream_names.append(call_number_stream_name) sms_number_stream_name = "CU_SMS_NUMBER--edu.dartmouth.eureka_corrupt_data" stream_names.append(sms_number_stream_name) activity_stream_name = \ "ACTIVITY_TYPE--org.md2k.phonesensor--PHONE_corrupt_data" stream_names.append(activity_stream_name) call_type_stream_name = "CU_CALL_TYPE--edu.dartmouth.eureka_corrupt_data" stream_names.append(call_type_stream_name) sms_type_stream_name = "CU_SMS_TYPE--edu.dartmouth.eureka_corrupt_data" stream_names.append(sms_type_stream_name) location_stream = 'LOCATION--org.md2k.phonesensor--PHONE_corrupt_data' stream_names.append(location_stream) geofence_list_stream = \ 'GEOFENCE--LIST--org.md2k.phonesensor--PHONE_corrupt_data' stream_names.append(geofence_list_stream) CC = CerebralCortex(cc_config_path) all_stream_quality = {} count = 0 started_time = datetime.now() userids = [userid] for usr in userids[:1]: print('processing %d of %d' % (count,len(userids))) count += 1 output_per_day_dir = '/tmp/corruption_per_day/' if not os.path.exists(output_per_day_dir): os.mkdir(output_per_day_dir) buf_day = '' for strm in stream_names: if not strm in all_stream_quality: all_stream_quality[strm] = [0, 0, 0] stream_ids = get_latest_stream_id(usr, strm, CC) strm_id = stream_ids[0]['identifier'] stream_dps_count = 0 stream_corrupt_dps_count = 0 stream_possible_accl_gyro_dps = 0 for day in all_days: ds = CC.get_stream(strm_id, usr, day) if len(ds.data): dp = ds.data[0] num_day_dps = dp.sample[0] num_day_corrupt_dps = len(dp.sample[1]) num_possible_accl_sample = 0 # check if the corrupted datapoints could be accl or gyro # samples if num_day_corrupt_dps: for corrupt_dp in dp.sample[1]: if type(corrupt_dp.sample) is list and len(corrupt_dp.sample) == 3: try: if corrupt_dp.sample[0] >= MIN_ACCL_VAL and corrupt_dp.sample[0] <= MAX_ACCL_VAL: if corrupt_dp.sample[1] >= MIN_ACCL_VAL and corrupt_dp.sample[1] <= MAX_ACCL_VAL: if corrupt_dp.sample[2] >= MIN_ACCL_VAL and corrupt_dp.sample[2] <= MAX_ACCL_VAL: num_possible_accl_sample += 1 except Exception as e: print(corrupt_dp) print(str(e)) buf_day += str(usr) + '\t' + str(strm) + '\t' + str(day) +'\t' +\ str(num_day_dps) + '\t' + str(num_day_corrupt_dps) + '\t' +\ str(num_possible_accl_sample) + '\n' stream_dps_count += num_day_dps stream_corrupt_dps_count += num_day_corrupt_dps stream_possible_accl_gyro_dps += num_possible_accl_sample #print('X'*50) #print(usr, strm, stream_dps_count, stream_corrupt_dps_count) all_stream_quality[strm][0] += stream_dps_count all_stream_quality[strm][1] += stream_corrupt_dps_count all_stream_quality[strm][2] += stream_possible_accl_gyro_dps print(all_stream_quality) output_dir = '/tmp/corruption_count/' if not os.path.exists(output_dir): os.mkdir(output_dir) file_name = usr + '.pickle' f = open(os.path.join(output_dir,file_name),'wb') pickle.dump(all_stream_quality, f) f.close() f = open(os.path.join(output_per_day_dir,file_name),'w') f.write(buf_day) f.close() return all_stream_quality
def sensor_failure_marker(all_streams, wrist: str, owner_id: uuid, CC: CerebralCortex, config: dict): """ Label a window as packet-loss if received packets are less than the expected packets. All the labeled data (st, et, label) with its metadata are then stored in a datastore. :param stream_id: :param CC_obj: :param config: """ marker_version = "0.0.1" key0 = "motionsense_hrv_" + wrist + "_attachment_marker" key1 = "motionsense_hrv_" + wrist + "_attachment_marker" key2 = "motionsense_hrv_accel_" + wrist key3 = "motionsense_hrv_gyro_" + wrist key4 = "motionsense_hrv_" + wrist + "_sensor_failure_marker" stream_name = all_streams[config["stream_names"][key0]]["name"] raw_stream_ids = all_streams[config["stream_names"][key1]]["stream_ids"] mshrv_accel_id = all_streams[config["stream_names"][key2]]["stream_ids"] mshrv_gyro_id = all_streams[config["stream_names"][key3]]["stream_ids"] dd_stream_name = config["stream_names"][key4] if config["stream_names"][key2] in all_streams: # using stream_id, data-diagnostic-stream-id, and owner id to generate a unique stream ID for battery-marker sensor_failure_stream_id = generate_dd_stream_uuid( dd_stream_name, marker_version, owner_id, "SENSOR FAILURE MARKER") input_streams = [{ "owner_id": owner_id, "id": raw_stream_ids, "name": stream_name }] output_stream = { "id": sensor_failure_stream_id, "name": dd_stream_name, "algo_type": config["algo_type"]["sensor_failure"] } metadata = get_metadata(dd_stream_name, input_streams, config) if isinstance(raw_stream_ids, list): for raw_stream_id in raw_stream_ids: stream_days = CC.get_stream_days(raw_stream_id, sensor_failure_stream_id, CC) for day in stream_days: try: # load stream data to be diagnosed attachment_marker_stream = CC.get_stream( raw_stream_id, day, data_type=DataSet.COMPLETE) results = OrderedDict() if attachment_marker_stream.data: for marker_window in attachment_marker_stream.data: if "MOTIONSENSE-ON-BODY" in marker_window.sample: mshrv_accel_stream = CC.get_stream( mshrv_accel_id, day, start_time=marker_window.start_time, end_time=marker_window.end_time, data_type=DataSet.ONLY_DATA) mshrv_gyro_stream = CC.get_stream( mshrv_gyro_id, day, start_time=marker_window.start_time, end_time=marker_window.end_time, data_type=DataSet.ONLY_DATA) results_accel = process_windows( mshrv_accel_stream, config) results_gyro = process_windows( mshrv_gyro_stream, config) key = marker_window.start_time, marker_window.end_time # if sensor failure period is more than 12 hours then mark it as a sensor failure if results_accel > 0 and results_gyro < 1: sample = "MOTIONSENE-HRV-" + str( wrist) + "ACCELEROMETER-FAILURE" results[key].append( DataPoint(marker_window.start_time, marker_window.end_time, sample)) elif results_accel < 1 and results_gyro > 0: sample = "MOTIONSENE-HRV-" + str( wrist) + "GYRO-FAILURE" results[key].append( DataPoint(marker_window.start_time, marker_window.end_time, sample)) if len(results) > 0: merged_windows = merge_consective_windows( results) store(merged_windows, input_streams, output_stream, metadata, CC, config) except Exception as e: CC.logging.log( "Error processing: owner-id: %s, stream-id: %s, Algo-name: %s, day: %s. Error: " % (str(owner_id), str(raw_stream_id), "sensor_failure_marker", str(day), str(e)))
def sensor_failure_marker(attachment_marker_stream_id: uuid, mshrv_accel_id: uuid, mshrv_gyro_id: uuid, wrist: str, owner_id: uuid, dd_stream_name, CC: CerebralCortex, config: dict): """ Label a window as packet-loss if received packets are less than the expected packets. All the labeled data (st, et, label) with its metadata are then stored in a datastore. :param stream_id: :param CC_obj: :param config: """ # using stream_id, data-diagnostic-stream-id, and owner id to generate a unique stream ID for battery-marker sensor_failure_stream_id = uuid.uuid3( uuid.NAMESPACE_DNS, str(attachment_marker_stream_id + dd_stream_name + owner_id + "SENSOR FAILURE MARKER")) stream_days = get_stream_days(attachment_marker_stream_id, sensor_failure_stream_id, CC) try: for day in stream_days: # load stream data to be diagnosed attachment_marker_stream = CC.get_stream( attachment_marker_stream_id, day, data_type=DataSet.COMPLETE) results = OrderedDict() if attachment_marker_stream.data: for marker_window in attachment_marker_stream.data: if "MOTIONSENSE-ON-BODY" in marker_window.sample: mshrv_accel_stream = CC.get_stream( mshrv_accel_id, day, start_time=marker_window.start_time, end_time=marker_window.end_time, data_type=DataSet.ONLY_DATA) mshrv_gyro_stream = CC.get_stream( mshrv_gyro_id, day, start_time=marker_window.start_time, end_time=marker_window.end_time, data_type=DataSet.ONLY_DATA) results_accel = process_windows(mshrv_accel_stream, config) results_gyro = process_windows(mshrv_gyro_stream, config) key = marker_window.start_time, marker_window.end_time # if sensor failure period is more than 12 hours then mark it as a sensor failure if results_accel > 0 and results_gyro < 1: sample = "MOTIONSENE-HRV-" + str( wrist) + "ACCELEROMETER-FAILURE" results[key].append( DataPoint(marker_window.start_time, marker_window.end_time, sample)) elif results_accel < 1 and results_gyro > 0: sample = "MOTIONSENE-HRV-" + str( wrist) + "GYRO-FAILURE" results[key].append( DataPoint(marker_window.start_time, marker_window.end_time, sample)) merged_windows = merge_consective_windows(results) if len(results) > 0: input_streams = [{ "owner_id": owner_id, "id": str(attachment_marker_stream_id), "name": attachment_marker_stream.name }] output_stream = { "id": sensor_failure_stream_id, "name": dd_stream_name, "algo_type": config["algo_type"]["sensor_failure"] } metadata = get_metadata(dd_stream_name, input_streams, config) store(merged_windows, input_streams, output_stream, metadata, CC, config) except Exception as e: print(e)
def battery_marker(all_streams, owner_id, stream_name, CC: CerebralCortex, config: dict): """ This algorithm uses battery percentages to decide whether device was powered-off or battery was low. All the labeled data (st, et, label) with its metadata are then stored in a datastore. :param raw_stream_id: :param CC: :param config: """ marker_version = "0.0.1" if stream_name in all_streams: raw_stream_ids = all_streams[config["stream_names"] ["phone_battery"]]["stream_ids"] dd_stream_name = config["stream_names"]["phone_battery_marker"] # using stream_id, data-diagnostic-stream-id, and owner id to generate a unique stream ID for battery-marker battery_marker_stream_id = generate_dd_stream_uuid( dd_stream_name, marker_version, owner_id, "BATTERY MARKER") input_streams = [{ "owner_id": owner_id, "id": raw_stream_ids, "name": stream_name }] output_stream = { "id": battery_marker_stream_id, "name": dd_stream_name, "algo_type": config["algo_type"]["battery_marker"] } metadata = get_metadata(dd_stream_name, input_streams, config) if isinstance(raw_stream_ids, list): for raw_stream_id in raw_stream_ids: stream_days = CC.get_stream_days(raw_stream_id, battery_marker_stream_id, CC) for day in stream_days: try: stream = CC.get_stream(raw_stream_id, day=day, data_type=DataSet.COMPLETE) if len(stream.data) > 0: windowed_data = window( stream.data, config['general']['window_size'], True) results = process_windows(windowed_data, stream_name, config) merged_windows = merge_consective_windows(results) if len(merged_windows) > 0: labelled_windows = mark_windows( battery_marker_stream_id, merged_windows, CC, config) store(labelled_windows, input_streams, output_stream, metadata, CC, config) except Exception as e: CC.logging.log( "Error processing: owner-id: %s, stream-id: %s, stream-name: %s, day: %s. Error: " % (str(owner_id), str(raw_stream_id), str(stream_name), str(day), str(e)))
class SqlToCCStream(): def __init__(self, config): self.CC = CerebralCortex(config) self.config = self.CC.config self.sqlData = SqlData(self.config, dbName="environmental_data_collection") self.process() def process(self): user_ids = self.filter_user_ids() # get all locations lats/longs all_locations = self.sqlData.get_latitude_llongitude() with open("weather_data.json", "r") as wd: metadata = wd.read() metadata = json.loads(metadata) input_stream_name = 'LOCATION--org.md2k.phonesensor--PHONE' for uid in user_ids: stream_ids = self.CC.get_stream_id(uid, input_stream_name) # START TEST CODE # location_id = self.get_location_id((37.439168,-122.086283), all_locations) # day = datetime.strptime("20171221", "%Y%m%d").strftime("%Y-%m-%d") # weather_data = self.sqlData.get_weather_data_by_city_id(location_id, day) # dps = [] # # for wd in weather_data: # dp_sample = [] # wd["temperature"] = json.loads(wd["temperature"]) # wd["wind"] = json.loads(wd["wind"]) # # dp_sample["sunrise"] = wd["sunrise"] # dp_sample["sunset"] = wd["sunset"] # dp_sample["wind_deg"] = wd.get("wind").get("deg","") # dp_sample["wind_speed"] = wd.get("wind").get("speed","") # dp_sample["current_temp"] = wd["temperature"]["temp"] # dp_sample["max_temp"] = wd["temperature"]["temp_max"] # dp_sample["min_temp"] = wd["temperature"]["temp_min"] # dp_sample["humidity"] = int(wd["humidity"]) # dp_sample["clouds"] = int(wd["clouds"]) # dp_sample["other"] = wd["other"] # dp_sample = [wd["sunrise"],wd["sunset"],wd.get("wind").get("deg",""),wd.get("wind").get("speed",""),wd["temperature"]["temp"],wd["temperature"]["temp_max"],wd["temperature"]["temp_min"],int(wd["humidity"]),int(wd["clouds"]),wd["other"]] # dps.append(DataPoint(wd["start_time"], None, None, dp_sample)) # END TEST CODE if len(stream_ids) > 0: print("Processing:", uid) for sid in stream_ids: sid = sid["identifier"] days = self.CC.get_stream_days(sid) for day in days: print("User ID, Stream ID, Day", uid, sid, day) output_stream_id = "" # get gps data from stream-name 'LOCATION--org.md2k.phonesensor--PHONE' location_stream = self.CC.get_stream(stream_id=sid, day=day) if len(location_stream.data) > 0: # compute median on lat. and long. vals user_loc = self.compute_lat_long_median( location_stream.data) if user_loc != (0, 0): offset = location_stream.data[0].offset # get weather data for match lat/long values location_id = self.get_location_id( user_loc, all_locations) if location_id is not None: formated_day = datetime.strptime( day, "%Y%m%d").strftime("%Y-%m-%d") weather_data = self.sqlData.get_weather_data_by_city_id( location_id, formated_day) # convert data into datastream execution_context = metadata[ "execution_context"] input_streams_metadata = [{ "id": sid, "name": input_stream_name }] metadata["execution_context"]["processing_module"]["input_streams"] \ = input_streams_metadata dps = [] for wd in weather_data: dp_sample = [] wd["temperature"] = json.loads( wd["temperature"]) wd["wind"] = json.loads(wd["wind"]) day_light_duration = ( (wd["sunset"] - wd["sunrise"]).seconds ) / 3600 # difference in hours dp_sample = [ wd["sunrise"], wd["sunset"], day_light_duration, wd.get("wind", float('nan')).get( "deg", float('nan')), wd.get("wind", float('nan')).get( "speed", float('nan')), wd["temperature"]["temp"], wd["temperature"]["temp_max"], wd["temperature"]["temp_min"], int(wd["humidity"]), int(wd["clouds"]), wd["other"] ] dps.append( DataPoint(wd["start_time"], None, offset, dp_sample)) if len(dps) > 0: # generate UUID for stream output_stream_id = str( metadata["data_descriptor"]) + str( execution_context) + str( metadata["annotations"]) output_stream_id += "weather-data-stream" output_stream_id += "weather-data-stream" output_stream_id += str(uid) output_stream_id += str(sid) # output_stream_id += str(day) output_stream_id = str( uuid.uuid3(uuid.NAMESPACE_DNS, output_stream_id)) ds = DataStream( identifier=output_stream_id, owner=uid, name=metadata["name"], data_descriptor=metadata[ "data_descriptor"], execution_context=execution_context, annotations=metadata[ "annotations"], stream_type=metadata["type"], data=dps) # store data stream self.CC.save_stream(ds) def compute_lat_long_median(self, data): latitude = [] longitude = [] valid_data = False for dp in data: if isinstance(dp.sample, list) and len(dp.sample) == 6: latitude.append(dp.sample[0]) longitude.append(dp.sample[1]) valid_data = True if valid_data: return statistics.median(latitude), statistics.median(longitude) else: return 0, 0 def get_location_id(self, user_loc, all_locations): # find distance between user location and weather lat/long closest = None location_id = None for loc in all_locations: distance = haversine( user_loc, (float(loc["latitude"]), float(loc["longitude"])), miles=True) if closest is None: closest = distance location_id = loc["id"] elif distance < closest: closest = distance location_id = loc["id"] if closest <= 30: #if distance is below then 30 miles then select it as weather location return location_id else: return None def filter_user_ids(self): active_users = [] all_users = [] for uid in self.CC.get_all_users("mperf"): all_users.append(uid["identifier"]) data_dir = self.config["data_replay"]["data_dir"] for owner_dir in os.scandir(data_dir): if owner_dir.name in all_users: active_users.append(owner_dir.name) return active_users
def packet_loss_marker(raw_stream_id: uuid, stream_name: str, owner_id: uuid, dd_stream_name, CC: CerebralCortex, config: dict): """ Label a window as packet-loss if received packets are less than the expected packets. All the labeled data (st, et, label) with its metadata are then stored in a datastore. :param raw_stream_id: :param CC_obj: :param config: """ # using stream_id, data-diagnostic-stream-id, and owner id to generate a unique stream ID for battery-marker packetloss_marker_stream_id = uuid.uuid3( uuid.NAMESPACE_DNS, str(raw_stream_id + dd_stream_name + owner_id + "PACKET LOSS MARKER")) stream_days = get_stream_days(raw_stream_id, packetloss_marker_stream_id, CC) if stream_name == config["stream_names"]["autosense_ecg"]: sampling_rate = config["sampling_rate"]["ecg"] threshold_val = config["packet_loss_marker"][ "ecg_acceptable_packet_loss"] label = config["labels"]["ecg_packet_loss"] elif stream_name == config["stream_names"]["autosense_rip"]: sampling_rate = config["sampling_rate"]["rip"] threshold_val = config["packet_loss_marker"][ "rip_acceptable_packet_loss"] label = config["labels"]["rip_packet_loss"] elif stream_name == config["stream_names"][ "motionsense_hrv_accel_right"] or stream_name == config[ "stream_names"]["motionsense_hrv_accel_left"]: sampling_rate = config["sampling_rate"]["motionsense_accel"] threshold_val = config["packet_loss_marker"][ "motionsense_accel_acceptable_packet_loss"] label = config["labels"]["motionsense_gyro_packet_loss"] elif stream_name == config["stream_names"][ "motionsense_hrv_gyro_right"] or stream_name == config[ "stream_names"]["motionsense_hrv_gyro_left"]: sampling_rate = config["sampling_rate"]["motionsense_gyro"] threshold_val = config["packet_loss_marker"][ "motionsense_gyro_acceptable_packet_loss"] label = config["labels"]["motionsense_gyro_packet_loss"] for day in stream_days: # load stream data to be diagnosed stream = CC.get_stream(raw_stream_id, day=day, data_type=DataSet.COMPLETE) if len(stream.data) > 0: windowed_data = window(stream.data, config['general']['window_size'], True) results = process_windows(windowed_data, sampling_rate, threshold_val, label, config) merged_windows = merge_consective_windows(results) if len(merged_windows) > 0: input_streams = [{ "owner_id": owner_id, "id": str(raw_stream_id), "name": stream_name }] output_stream = { "id": packetloss_marker_stream_id, "name": dd_stream_name, "algo_type": config["algo_type"]["packet_loss_marker"] } metadata = get_metadata(dd_stream_name, input_streams, config) store(merged_windows, input_streams, output_stream, metadata, CC, config)