def label_appusage_intervals(self, data: List[DataPoint], intervals: List, interval_label: List[str]) -> List[DataPoint]: """ Helper function to label screen touch in a fixed app category usage :param List(DataPoint) data: Phone touch screen data stream :param List appusage: List appusage: list of app usage duration of specific app categories of the form [start_time, end_time, category] :param intervals: List of integers containing screen touch gap as in touch screen timestamp unit (milliseconds) :param interval_label: A list of possible type of screen touch which are [typing, pause, reading, unknown] :return: Labelled touche interval :rtype: List(DataPoint) """ ret = [] for i in range(1, len(data)): last = data[i-1].start_time diff = (data[i].start_time - last).total_seconds() for j in range(len(interval_label)): if intervals[j][0] <= diff <= intervals[j][1]: if len(ret) > 0: last_entry = ret.pop() if last_entry.end_time == last and last_entry.sample == interval_label[j]: ret.append(DataPoint(start_time = last_entry.start_time, end_time = data[i].start_time, offset = last_entry.offset, sample = last_entry.sample)) else: ret.append(last_entry) ret.append(DataPoint(start_time = last, end_time = data[i].start_time, offset = data[i].offset, sample=interval_label[j])) else: ret.append(DataPoint(start_time = last, end_time = data[i].start_time, offset = data[i].offset, sample=interval_label[j])) break return ret
def transition_counter(self, semanticdata: object) -> object: """ Number of transitions from one type of place to another. :param semanticdata: DataPoint array of semantic stream :return: number of transitions from one type of location to another :rtype: List(DataPoint) with a single element (dictionary). """ semanticwithouttransit = [] jj = 0 while jj < len(semanticdata): if (str(semanticdata[jj].sample) != "transit"): semanticwithouttransit.append(semanticdata[jj]) jj = jj + 1 number_of_trans_dict = {} i = 0 to_work_transitions = 0 to_home_transitions = 0 while i < len(semanticwithouttransit) - 1: pre_loc = semanticwithouttransit[i] post_loc = semanticwithouttransit[i + 1] if pre_loc.sample != post_loc.sample: key_string = pre_loc.sample + " " + post_loc.sample get_pre_num = 0 if post_loc.sample.lower() == 'work': to_work_transitions += 1 if post_loc.sample.lower() == 'home': to_home_transitions += 1 if (key_string in number_of_trans_dict.keys()): get_pre_num = number_of_trans_dict[key_string] new_num = get_pre_num + 1 # print (new_num) number_of_trans_dict[key_string] = new_num i = i + 1 start_time = semanticdata[0].start_time end_time = semanticdata[-1].end_time offset = semanticdata[0].offset output_datapoint = DataPoint(start_time, end_time, offset, number_of_trans_dict) to_work_transitions_datapoint = DataPoint(start_time, end_time, offset, to_work_transitions) to_home_transitions_datapoint = DataPoint(start_time, end_time, offset, to_home_transitions) toreturn = [] toreturn.append(output_datapoint) toreturn.append(to_work_transitions_datapoint) toreturn.append(to_home_transitions_datapoint) return toreturn
def json_to_datapoints(json_obj): if isinstance(json_obj["value"], str): sample = json_obj["value"] else: sample = json.dumps(json_obj["value"]) start_time = parse(json_obj["starttime"]) if "endtime" in json_obj: # Test-code, this if will not be executed return DataPoint(start_time=start_time, end_time=json_obj["endtime"], sample=sample) else: return DataPoint(start_time=start_time, sample=sample)
def split_datapoint_array_by_day(self, data: object) -> object: """ Returns DataPoint array splitted wth respect to days considering localtime. :param data: Input data (single DataPoint) :return: Splitted list of DataPoints :rtype: List(DataPoint) """ data_by_day = [] for dp in data: start_date = dp.start_time.date() end_date = dp.end_time.date() start_time = dp.start_time end_time = dp.end_time offset = dp.offset timezoneinfo = start_time.tzinfo if start_date == end_date: data_by_day.append(dp) continue while (start_date != end_date): new_end_time = start_time + timedelta(days=1) new_end_date = new_end_time.date() new_end_date_str = str(new_end_date).replace("-", "") new_end_datetime = datetime.strptime(new_end_date_str, "%Y%m%d") new_end_datetime = new_end_datetime.replace( tzinfo=timezoneinfo) new_datapoint = DataPoint(start_time, new_end_datetime, offset, dp.sample) data_by_day.append(new_datapoint) start_date = new_end_date start_date_str = str(start_date).replace("-", "") start_time = start_time + timedelta(days=1) new_start_str = str(start_date).replace("-", "") new_start_datetime = datetime.strptime(new_start_str, "%Y%m%d") new_start_datetime = new_start_datetime.replace( tzinfo=timezoneinfo) new_datapoint = DataPoint(new_start_datetime, end_time, offset, dp.sample) data_by_day.append(new_datapoint) return data_by_day
def get_screen_touch_rate(self, data: List[DataPoint], typing_episodes: List) -> List[DataPoint]: """ Average screen touch rate for a whole day during typing episodes (only productivity and communication apps are considered during calculation) :param List(DataPoint) data: screen touch stream data points :param List(Tuple) typing_episodes: (start_time, end_time) for each item in the list, the starting and end time of a typing episode :return: A list with single data point containing the average screen touch rate. :rtype: List(DataPoint) """ if not data: return None total_touch_count = 0 total_typing_time = 0 for ep in typing_episodes: total_typing_time += (ep[1] - ep[0]).total_seconds() for d in data: if ep[0] <= d.start_time <= ep[1]: total_touch_count += 1 if total_typing_time == 0 or total_touch_count == 0: return None start_time = copy.deepcopy(data[0].start_time) start_time = start_time.replace(hour=0, minute=0, second=0, microsecond=0) end_time = datetime.datetime.combine(start_time.date(), datetime.time.max) end_time = end_time.replace(tzinfo=data[0].start_time.tzinfo) return [DataPoint(start_time=start_time, end_time=end_time, offset=data[0].offset, sample=total_touch_count/total_typing_time)]
def row_to_datapoint(row: str) -> dict: """ Format data based on mCerebrum's current GZ-CSV format into what Cerebral Cortex expects :param row: :return: """ ts, offset, values = row.split(',', 2) ts = int(ts) / 1000.0 offset = int(offset) if isinstance(values, tuple): values = list(values) else: try: values = json.loads(values) except: try: values = [float(values)] except: try: values = list(map(float, values.split(','))) except: values = values timezone = datetime.timezone(datetime.timedelta(milliseconds=offset)) ts = datetime.datetime.fromtimestamp(ts, timezone) return DataPoint(start_time=ts, sample=values)
def save_point(user, value, start_time, end_time, offset, metadata, stream_name_suffix): dp = DataPoint(start_time=start_time, end_time=end_time, offset=offset, sample=[value]) metadata_name = metadata['name'] metadata_name = metadata_name + stream_name_suffix output_stream_id = str( uuid.uuid3(uuid.NAMESPACE_DNS, str(metadata_name + user + str(metadata)))) ds = DataStream(identifier=output_stream_id, owner=user, name=metadata_name, data_descriptor=metadata['data_descriptor'], execution_context=metadata['execution_context'], annotations=metadata['annotations'], stream_type=1, data=[dp]) #print(str(user),str(output_stream_id),len(feature_data[user])) try: CC.save_stream(ds, localtime=True) except Exception as e: print(e)
def get_data_yield(self, data: List[DataPoint], max_data_gap_threshold_seconds: float = 300) \ -> Tuple[List[DataPoint], float]: """ This method produces series of data points containing interval of data present or not. In the sample a 0 means data is not present in this interval, 1 means data is there. Also it returns another data points with total hour of data is present in the data stream for a the whole day. :param List(DataPoint) data: list of data points :param float max_data_gap_threshold_seconds: maximum allowed gap in seconds between two consecutive data points :return: Interval when the data is available and total duration in hour of tha available data :rtype: Tuple(List(DataPoint), float) or Tuple(None, None) """ if not data: return None, None start_time = datetime.datetime.combine(data[0].start_time.date(), datetime.time.min) start_time = start_time.replace(tzinfo=data[0].start_time.tzinfo) end_time = datetime.datetime.combine(data[0].start_time.date(), datetime.time.max) end_time = end_time.replace(tzinfo=data[0].start_time.tzinfo) L = len(data) last = start_time yield_data = [] data_duration = datetime.timedelta() if (data[0].start_time - start_time).total_seconds() > max_data_gap_threshold_seconds: yield_data.append(DataPoint(start_time, data[0].start_time, data[0].offset, 0)) last = self.get_end_time(data[0]) i = 1 s = None t = None while i < L: s = self.get_end_time(data[i - 1]) t = self.get_end_time(data[i]) while i < L and (t - s).total_seconds() <= max_data_gap_threshold_seconds: i += 1 if i < L: s = t t = self.get_end_time(data[i]) if i < len(data): yield_data.append(DataPoint(last, s, data[0].offset, 1)) yield_data.append(DataPoint(s, t, data[0].offset, 0)) data_duration += (s - last) last = data[i].start_time i += 1 if t and (end_time - t).total_seconds() > max_data_gap_threshold_seconds: yield_data.append(DataPoint(last, t, data[0].offset, 1)) yield_data.append(DataPoint(t, end_time, data[0].offset, 0)) data_duration += (t - last) else: yield_data.append(DataPoint(last, end_time, data[0].offset, 1)) data_duration += (end_time - last) total_duration_data = [ DataPoint(start_time, end_time, data[0].offset, round(data_duration.total_seconds() / (60 * 60), 2))] return yield_data, total_duration_data
def get_screen_touch_variance_hourly(self, data: List[DataPoint], typing_episodes: List) -> List[DataPoint]: """ This method returns hourly variance of time between two consecutive touch in a typing episode. In case of multiple typing episode, variance is calculated for each typing episode and combined using standard formula to combine multiple variances. :param List(DataPoint) data: screen touch stream data points :param List(Tuple) typing_episodes: (start_time, end_time) for each item in the list, the starting and end time of a typing episode :return: A list of variances for each hour (if there is input data for this hour) of a day. :rtype: List(DataPoint) """ if len(data) <= 1: return None combined_data = copy.deepcopy(data) for s in combined_data: s.end_time = s.start_time new_data = [] tmp_time = copy.deepcopy(combined_data[0].start_time) tmp_time = tmp_time.replace(hour=0, minute=0, second=0, microsecond=0) for h in range(0, 24): datalist = [] start = tmp_time.replace(hour=h) end = start + datetime.timedelta(minutes=59) for d in combined_data: if start <= d.start_time <= end or start <= d.end_time <= end: datalist.append(d) if len(datalist) <= 1: continue splitted_data = [[]]*len(typing_episodes) for i, ep in enumerate(typing_episodes): for d in datalist: if ep[0]<= d.start_time and d.end_time <= ep[1]: splitted_data[i].append(d) splitted_data = list(filter(lambda x: len(x)>1, splitted_data)) if not splitted_data: continue episode_data = list(map(self.inter_event_time_list, splitted_data)) Xc = np.mean(episode_data) var = 0 n = 0 for L in episode_data: X = np.mean(L) V = np.var(L) var += len(L) * (V + (X - Xc)*(X - Xc)) n += len(L) var /= n if np.isnan(var): continue new_data.append(DataPoint(start_time=start, end_time=end, offset=combined_data[0].offset, sample=var)) return new_data
def radius_of_gyration(self, centroiddatapoints: object) -> object: """ Radius of gyration of a participant in a day. :param data: DataPoint array of centroid stream :return: radius_of_gyration :rtype: List(DataPoint) with a single element. """ data = [] for dp in centroiddatapoints: if (float(dp.sample[0]) != -1.0): data.append(dp) if len(data) == 0: return [] summed_lattitude = 0 summed_longitude = 0 for dp in data: summed_lattitude = summed_lattitude + float(dp.sample[1]) summed_longitude = summed_longitude + float(dp.sample[2]) mean_lattitude = 0 mean_longitude = 0 if len(data) > 0: mean_lattitude = summed_lattitude / len(data) mean_longitude = summed_longitude / len(data) total_time = 0 time_distance = 0 for dp in data: total_time = total_time + (dp.end_time - dp.start_time).total_seconds() distance = self.haversine(float(dp.sample[2]), float(dp.sample[1]), mean_longitude, mean_lattitude) time_distance = time_distance + ( (dp.end_time - dp.start_time).total_seconds()) * distance * distance rad_of_gyration = 0 if total_time > 0: rad_of_gyration = sqrt(time_distance / total_time) start_time = centroiddatapoints[0].start_time end_time = centroiddatapoints[-1].end_time offset = centroiddatapoints[0].offset rad_gyr_datapoint = DataPoint(start_time, end_time, offset, rad_of_gyration) return [rad_gyr_datapoint]
def classify_activity(features: List[DataPoint], is_gravity) -> List[DataPoint]: clf = get_activity_model(is_gravity) labels = [] prediction_values = [dp.sample for dp in features] preds = clf.predict(prediction_values) for i, dp in enumerate(features): labels.append(DataPoint(start_time=dp.start_time, end_time=dp.end_time, offset=dp.offset, sample=str(preds[i]))) return labels
def fill_missing_values(datapoints: List[DataPoint], freq: float) -> List[DataPoint]: """ Introperlate the datapoints based on assigned frequency. """ if not datapoints: return datapoints if freq == 0.0: return datapoints # Convert frequency to time intveral in second. time_interval = 1.0 / freq new_datapoints = list() start_t = datapoints[0].start_time end_t = datapoints[-1].start_time # Create a new list of timestamps with adjacent timestamp separated by time_invertal. t = start_t new_ts = list() # Interpolate the data list while t <= end_t: new_ts.append(t) t += timedelta(seconds=time_interval) #print(start_t, end_t) #print('# of new dp:', len(new_ts)) j = 0 for i in range(len(new_ts)): if new_ts[i] >= datapoints[j].start_time: new_datapoints.append( DataPoint(new_ts[i], None, datapoints[j].offset, datapoints[j].sample)) j += 1 else: new_datapoints.append( DataPoint(new_ts[i], None, datapoints[j - 1].offset, datapoints[j - 1].sample)) return new_datapoints
def process_day_data(self, user_id: str, day: str, streams: dict): raw_led_hrvp_lw = "RAW--CHARACTERISTIC_LED--org.md2k.motionsense--MOTION_SENSE_HRV_PLUS--LEFT_WRIST" raw_led_hrvp_rw = "RAW--CHARACTERISTIC_LED--org.md2k.motionsense--MOTION_SENSE_HRV_PLUS--RIGHT_WRIST" raw_led_hrv_lw = "RAW--CHARACTERISTIC_LED--org.md2k.motionsense--MOTION_SENSE_HRV--LEFT_WRIST" raw_led_hrv_rw = "RAW--CHARACTERISTIC_LED--org.md2k.motionsense--MOTION_SENSE_HRV--RIGHT_WRIST" raw_hrv_lw = "RAW--org.md2k.motionsense--MOTION_SENSE_HRV--LEFT_WRIST" raw_hrv_wr = "RAW--org.md2k.motionsense--MOTION_SENSE_HRV--RIGHT_WRIST" raw_stream_names = [raw_led_hrvp_lw, raw_led_hrvp_rw, raw_led_hrv_lw, raw_led_hrv_rw, raw_hrv_lw, raw_hrv_wr] ppg_data = [] input_streams = [] try: for rs in raw_stream_names: if rs not in streams: continue data = utils.get_raw_data_by_stream_name(rs, user_id, day, self.CC, localtime=False) raw_data = [] for d in data: if type(d.sample) is list and len(d.sample)!=20: continue if type(d.sample) is str: st = list(map(float, d.sample.strip().split(","))) if len(st)!=20: continue tmp = [tm.mktime(d.start_time.timetuple()), d.offset] tmp += st else: tmp = [tm.mktime(d.start_time.timetuple()), d.offset] tmp += d.sample raw_data.append(tmp) if not raw_data: return None data = get_realigned_data(np.array(raw_data)).tolist() input_streams.append(streams[rs]) ppg_data += data if not ppg_data: return ppg_data = np.array(sorted(ppg_data)) offset = ppg_data[0, 1] stress_data = get_stress_time_series(ppg_data) data = [] for d in stress_data: data.append(DataPoint(start_time=datetime.datetime.fromtimestamp(d[0]/1000), offset=offset, sample=[d[1]])) self.store_stream(filepath="stress-from-wrist.json", input_streams=input_streams, user_id=user_id, data=data, localtime=False) except Exception as e: self.CC.logging.log("user_id: "+ user_id + " day: " + day) self.CC.logging.log("Exception:", str(e)) self.CC.logging.log(str(traceback.format_exc()))
def standard_deviation_of_displacements(self, datawithtransit: object) -> object: """ Standard deviation of displacements of a user in a day. :param datawithtransit: DataPoint array of centroid stream :return: standard deviation of displacements in a day :rtype: List(DataPoint) with a single element. """ data = [] ii = 0 while ii < len(datawithtransit): if (float(datawithtransit[ii].sample[0]) != -1.0): data.append(datawithtransit[ii]) ii = ii + 1 if len(datawithtransit) == 0: return [] mean_distance = 0 i = 0 while i < len(data) - 1: mean_distance = mean_distance + self.haversine( data[i].sample[2], data[i].sample[1], data[i + 1].sample[2], data[i + 1].sample[1]) i = i + 1 if len(data) < 2: return [] mean_distance = mean_distance / (len(data) - 1) var_distance = 0 j = 0 while j < len(data) - 1: var_distance = var_distance + (self.haversine( data[j].sample[2], data[j].sample[1], data[j + 1].sample[2], data[j + 1].sample[1]) - mean_distance)**2 j = j + 1 standard_deviation = sqrt(var_distance / (len(data) - 1)) start_time = data[0].start_time end_time = data[-1].end_time offset = data[0].offset stan_dev_datapoint = DataPoint(start_time, end_time, offset, standard_deviation) return [stan_dev_datapoint]
def maximum_distance_from_home(self, home_lattitude: object, home_longitude: object, centroiddata: object) -> object: """ Maximum distance from home. :return: :param home_lattitude: lattitude of home's location :param home_longitude: longitude of home's location :param centroiddata: list of centroid datapoints. :rtype: List(DataPoint) with a single element. """ max = 0 jj = 0 centroidwithouttransit = [] while jj < len(centroiddata): if float(centroiddata[jj].sample[1]) != -1.0: centroidwithouttransit.append(centroiddata[jj]) jj = jj + 1 i = 0 if len(centroidwithouttransit) == 0: return [] while i < len(centroidwithouttransit): lattitude = centroidwithouttransit[i].sample[1] longitude = centroidwithouttransit[i].sample[2] distance = self.haversine(float(home_longitude), float(home_lattitude), float(longitude), float(lattitude)) # print (distance) if (max < distance): max = distance i = i + 1 start_time = centroiddata[0].start_time end_time = centroiddata[-1].end_time offset = centroiddata[0].offset max_datapoint = DataPoint(start_time, end_time, offset, max) return [max_datapoint]
def number_of_different_places(self, data: object) -> object: """ Number of different places the participant visited in a day. :param data: DataPoint array of centroid stream :return: number of different places the participant visited in a day :rtype: List(DataPoint) with a single element. """ num_diff_places = 0 loc_array = [] ii = 0 while ii < len(data): if float(data[ii].sample[1]) == -1.0: ii = ii + 1 continue concat_string = str(data[ii].sample[1]) + str(data[ii].sample[2]) loc_array.append(concat_string) ii = ii + 1 if len(loc_array) == 0: return [] loc_dict = {} i = 0 same = 0 while i < len(loc_array): if (loc_array[i] in loc_dict.keys()): same = same + 1 else: num_diff_places = num_diff_places + 1 loc_dict[loc_array[i]] = 1 i = i + 1 start_time = data[0].start_time end_time = data[-1].end_time offset = data[0].offset num_of_diff_pls_datapoint = DataPoint(start_time, end_time, offset, num_diff_places) return [num_of_diff_pls_datapoint]
def mobility_places(self, data: object) -> object: """ Returns list of lists places visited by the user in whole study. Each element lists the places visited by the user in a day. :param data: DataPoint array of centroid stream :return: Mobility places of one participant in a day ( with interval 15 minutes ). :rtype: List(DataPoint). """ mob_places = [] # window size 15 minutes data_window_min = 15 hours_in_day = 24 i = 0 while i < hours_in_day * 60 / data_window_min: mob_places.append("MISSING") i = i + 1 for dp in data: start_hour = dp.start_time.hour start_minute = dp.start_time.minute start_index = ceil( (start_hour * 60 + start_minute) / data_window_min) end_hour = dp.end_time.hour end_minute = dp.end_time.minute end_index = floor((end_hour * 60 + end_minute) / data_window_min) index = start_index while index <= end_index: if (float(dp.sample[0]) != -1.0): mob_places[index] = str(dp.sample[1]) + str(dp.sample[2]) index = index + 1 start_time = data[0].start_time end_time = data[-1].end_time offset = data[0].offset mob_pls_datapoint = DataPoint(start_time, end_time, offset, mob_places) return mob_pls_datapoint
def maximum_distance_between_two_locations(self, data: object) -> object: """ Maximum distance between two locations covered by participant in kilometers in a day. :param data: DataPoint array of centroid stream :return: maximum distance between two locations covered by participant in kilometers :rtype: List(DataPoint) with a single element. """ data_without_transit = [] for dp in data: if (float(dp.sample[1]) != -1.0): data_without_transit.append(dp) if len(data_without_transit) == 0: return [] max_dist_bet_two_locations = 0 i = 0 j = 0 while i < len(data_without_transit): while j < len(data_without_transit): dist_bet_i_j = self.haversine( float(data_without_transit[i].sample[2]), float(data_without_transit[i].sample[1]), float(data_without_transit[j].sample[2]), float(data_without_transit[j].sample[1])) if (dist_bet_i_j > max_dist_bet_two_locations): max_dist_bet_two_locations = dist_bet_i_j j = j + 1 i = i + 1 start_time = data[0].start_time end_time = data[-1].end_time offset = data[0].offset max_distance_datapoint = DataPoint(start_time, end_time, offset, max_dist_bet_two_locations) return [max_distance_datapoint]
def setUpClass(self): self.pp = PhoneFeatures() self.data = [] for t in range(10, 1, -1): currentTime = datetime.datetime.now() self.data.append( DataPoint(currentTime - datetime.timedelta(hours=t - .1), currentTime - datetime.timedelta(hours=t - .9), t)) ownerUUID = uuid.uuid4() phonedata = [] self.phoneDataStream = DataStream(identifier=uuid.uuid4(), owner=ownerUUID) self.phoneDataStream.data = phonedata smsdata = [] self.smsDataStream = DataStream(identifier=uuid.uuid4(), owner=ownerUUID) self.smsDataStream.data = smsdata
def total_distance_covered(self, data: object) -> object: """ Total distance covered in a day. :return: :param data: DataPoint array of centroid stream :return: total distance covered by participant in kilometers :rtype: List(DataPoint) with a single element. """ total_distance = 0 data_without_transit = [] for dp in data: if (float(dp.sample[1]) != -1.0): data_without_transit.append(dp) if len(data_without_transit) == 0: return [] i = 0 while i <= len(data_without_transit) - 2: lattitude_pre = float(data_without_transit[i].sample[1]) longitude_pre = float(data_without_transit[i].sample[2]) lattitude_post = float(data_without_transit[i + 1].sample[1]) longitude_post = float(data_without_transit[i + 1].sample[2]) distance = self.haversine(longitude_pre, lattitude_pre, longitude_post, lattitude_post) total_distance = total_distance + distance i += 1 start_time = data[0].start_time end_time = data[-1].end_time offset = data[0].offset total_distance_datapoint = DataPoint(start_time, end_time, offset, total_distance) return [total_distance_datapoint]
def available_data_in_time(self, data: object) -> object: """ Available data of a participant in seconds. :param data: DataPoint array of centroid stream :return: available data in a day in seconds :rtype: List(DataPoint) with a single element. """ total_time = 0 for dp in data: total_time += (dp.end_time - dp.start_time).total_seconds() if total_time < 0: return [] start_time = data[0].start_time end_time = data[-1].end_time offset = data[0].offset datapoint = DataPoint(start_time, end_time, offset, total_time) return [datapoint]
def classify_activity(features: List[DataPoint], is_gravity) -> List[DataPoint]: """ Classify activity from a set of input features based on a predefined ML model. :type is_gravity: bool :type features: List[DataPoint] :rtype: List[DataPoint] :param features: A set of features to run activity classification on :param is_gravity: Flag to account for gravity or not :return: Labeled activities """ clf = get_activity_model(is_gravity) labels = [] prediction_values = [dp.sample for dp in features] preds = clf.predict(prediction_values) for i, dp in enumerate(features): labels.append(DataPoint(start_time=dp.start_time, end_time=dp.end_time, offset=dp.offset, sample=str(preds[i]))) return labels
def routine_index(self, places: object) -> object: """ Returns Routine Index for all days of the participant. :param places: :return: total distance covered by participant in kilometers :rtype: List(DataPoint) with a single element. """ if len(places) <= 1: return [] routine_ind_datapoints = [] i = 0 while i < len(places): j = 0 summed_diff = 0 while j < len(places): if (i != j): summed_diff = summed_diff + self.average_difference( places[i], places[j]) j = j + 1 routine_index_value = summed_diff / (len(places) - 1) start_time = places[i].start_time end_time = places[i].end_time offset = places[i].offset routine_ind_datapoint = DataPoint(start_time, end_time, offset, routine_index_value) routine_ind_datapoints.append(routine_ind_datapoint) i = i + 1 return routine_ind_datapoints
def listing_all_staying_times(self, user_id: str, all_days: List[str]): """ Produce and save the list of work_day's staying_time at office from "org.md2k.data_analysis.feature.working_days" stream and marked each day's staying_time as Usual_staying_time or More_than_usual or Less_than_usual. Staying time is saved in minute :param str user_id: UUID of the stream owner :param List(str) all_days: All days of the user in the format 'YYYYMMDD' :return: """ self.CC.logging.log('%s started processing for user_id %s' % (self.__class__.__name__, str(user_id))) stream_ids = self.CC.get_stream_id(user_id, Working_Days_STREAM) staying_time_data = [] office_staying_times = list() for stream_id in stream_ids: for day in all_days: work_data_stream = \ self.CC.get_stream(stream_id["identifier"], user_id, day, localtime=True) for data in work_data_stream.data: arrival_time = data.start_time.hour * 60 + data.start_time.minute leave_time = data.end_time.hour * 60 + data.end_time.minute staying_time = leave_time - arrival_time office_staying_times.append(staying_time) sample = [] temp = DataPoint(data.start_time, data.end_time, data.offset, sample) temp.sample.append(staying_time) staying_time_data.append(temp) if not len(office_staying_times): return median = np.median(office_staying_times) mad_office_staying_times = [] for staying_time in office_staying_times: # mad = median absolute deviation mad_office_staying_times.append(abs(staying_time - median)) median2 = np.median(mad_office_staying_times) mad_value = median2 * MEDIAN_ABSOLUTE_DEVIATION_MULTIPLIER outlier_border = mad_value * OUTLIER_DETECTION_MULTIPLIER outlier_removed_office_staying_times = [] for staying_time in office_staying_times: if (median - outlier_border) < staying_time < (median + outlier_border): outlier_removed_office_staying_times.append(staying_time) if not len(outlier_removed_office_staying_times): outlier_removed_office_staying_times = office_staying_times mean = np.mean(outlier_removed_office_staying_times) standard_deviation = np.std(outlier_removed_office_staying_times) for data in staying_time_data: staying_time = data.sample[0] if staying_time > mean + standard_deviation: data.sample.append("more_than_usual") data.sample.append( math.ceil(staying_time - (mean + standard_deviation))) data.sample.append(1) elif staying_time < mean - standard_deviation: data.sample.append("less_than_usual") data.sample.append( math.ceil(mean - standard_deviation - staying_time)) data.sample.append(0) else: data.sample.append("usual_staying_time") data.sample.append(0) data.sample.append(1) try: if len(staying_time_data): streams = self.CC.get_user_streams(user_id) for stream_name, stream_metadata in streams.items(): if stream_name == Working_Days_STREAM: print("Going to pickle the file: ", staying_time_data) self.store_stream(filepath="staying_time.json", input_streams=[stream_metadata], user_id=user_id, data=staying_time_data, localtime=True) break except Exception as e: print("Exception:", str(e)) print(traceback.format_exc()) self.CC.logging.log( '%s finished processing for user_id %s saved %d ' 'data points' % (self.__class__.__name__, str(user_id), len(staying_time_data)))
def listing_all_sleep_duration_analysis(self, user_id: str, all_days: List[str]): """ Produce and save the list of sleep duration acoording to day in one stream and marked each day's staying_time as Usual_sleep_duration or More_than_usual or Less_than_usual. Sleep duration is saved in hour. For each day's sleep duration the deviation from usual sleep duration is saved. All measure are in hour :param str user_id: UUID of the stream owner :param List(str) all_days: All days of the user in the format 'YYYYMMDD' :return: """ self.CC.logging.log('%s started processing for user_id %s' % (self.__class__.__name__, str(user_id))) stream_ids = self.CC.get_stream_id(user_id, Sleep_Durations_STREAM) sleep_duration_data = [] sleep_durations = list() for stream_id in stream_ids: for day in all_days: sleep_duration_stream = \ self.CC.get_stream(stream_id["identifier"], user_id, day) for data in sleep_duration_stream.data: sleep_duration = data.sample sleep_durations.append(sleep_duration) sample = [] sample.append(sleep_duration) temp = DataPoint(data.start_time, data.end_time, data.offset, sample) sleep_duration_data.append(temp) if not len(sleep_durations): return median = np.median(sleep_durations) mad_sleep_durations = [] for sleep_duration in sleep_durations: # mad = median absolute deviation mad_sleep_durations.append(abs(sleep_duration - median)) median2 = np.median(mad_sleep_durations) mad_value = median2 * MEDIAN_ABSOLUTE_DEVIATION_MULTIPLIER outlier_border = mad_value * OUTLIER_DETECTION_MULTIPLIER outlier_removed_sleep_durations = [] for sleep_duration in sleep_durations: if sleep_duration > (median - outlier_border) and sleep_duration < (median + outlier_border): outlier_removed_sleep_durations.append(sleep_duration) if not len(outlier_removed_sleep_durations): outlier_removed_sleep_durations = sleep_durations mean = np.mean(outlier_removed_sleep_durations) standard_deviation = np.std(outlier_removed_sleep_durations) for data in sleep_duration_data: sleep_duration = data.sample[0] if sleep_duration > mean + standard_deviation: data.sample.append("more_than_usual") data.sample.append(sleep_duration - (mean + standard_deviation)) data.sample.append(1) elif sleep_duration < mean-standard_deviation: data.sample.append("less_than_usual") data.sample.append(mean-standard_deviation - sleep_duration) data.sample.append(0) else: data.sample.append("usual_sleep_duration") data.sample.append(0) data.sample.append(1) try: if len(sleep_duration_data)>0: streams = self.CC.get_user_streams(user_id) if streams: for stream_name, stream_metadata in streams.items(): if stream_name == Sleep_Durations_STREAM: self.store_stream(filepath="sleep_duration_analysis.json", input_streams=[stream_metadata], user_id=user_id, data=sleep_duration_data) break except Exception as e: print("Exception:", str(e)) print(traceback.format_exc()) self.CC.logging.log('%s finished processing for user_id %s saved %d ' 'data points' % (self.__class__.__name__, str(user_id), len(sleep_duration_data)))
def listing_all_expected_staying_times(self, user_id, all_days): """ Produce and save the list of work_day's staying_time at office from "org.md2k.data_analysis.feature.working_days" stream and marked each day's staying_time as in_expected_conservative_time or more_than_expected_conservative_time or less_than_expected_conservative_time in one stream and in another stream each day's staying_time is marked as in_expected_liberal_time or more_than_expected_liberal_time or less_than_expected_liberal_time """ self.CC.logging.log('%s started processing for user_id %s' % (self.__class__.__name__, str(user_id))) stream_ids = self.CC.get_stream_id(user_id, Working_Days_STREAM) expected_conservative_staying_data = [] expected_liberal_staying_data = [] office_staying_times = list() for stream_id in stream_ids: if stream_id[ "identifier"] == '11e0934a-05fd-36d7-903a-9123a2e9f19b': print("This is that stream:") continue for day in all_days: work_data_stream = \ self.CC.get_stream(stream_id["identifier"], user_id, day) for data in work_data_stream.data: arrival_time = data.start_time.hour * 60 + data.start_time.minute leave_time = data.end_time.hour * 60 + data.end_time.minute staying_time = leave_time - arrival_time office_staying_times.append(staying_time) sample = [] temp = DataPoint(data.start_time, data.end_time, data.offset, sample) temp.sample.append(staying_time) expected_conservative_staying_data.append(temp) if not len(office_staying_times): return median = np.median(office_staying_times) mad_office_staying_times = [] for staying_time in office_staying_times: # mad = median absolute deviation mad_office_staying_times.append(abs(staying_time - median)) median2 = np.median(mad_office_staying_times) mad_value = median2 * MEDIAN_ABSOLUTE_DEVIATION_MULTIPLIER outlier_border = mad_value * OUTLIER_DETECTION_MULTIPLIER outlier_removed_office_staying_times = [] for staying_time in office_staying_times: if staying_time > (median - outlier_border) and staying_time < ( median + outlier_border): outlier_removed_office_staying_times.append(staying_time) if not len(outlier_removed_office_staying_times): outlier_removed_office_staying_times = office_staying_times actual_staying_time = np.mean(outlier_removed_office_staying_times) actual_minute = int(actual_staying_time % 60) actual_hour = int(actual_staying_time / 60) conservative_hour = actual_hour liberal_hour = actual_hour if actual_minute < 30: conservative_minute = 30 liberal_minute = 0 else: conservative_minute = 0 liberal_minute = 30 conservative_hour += 1 conservative_time = conservative_hour * 60 + conservative_minute liberal_time = liberal_hour * 60 + liberal_minute for data in expected_conservative_staying_data: sample = [] temp = DataPoint(data.start_time, data.end_time, data.offset, sample) staying_time = data.sample[0] if staying_time > conservative_time: data.sample.append("more_than_expected_conservative_time") data.sample.append(math.ceil(staying_time - conservative_time)) elif staying_time < conservative_time: data.sample.append("less_than_expected_conservative_time") data.sample.append(math.ceil(conservative_time - staying_time)) elif staying_time == conservative_time: data.sample.append("in_expected_conservative_time") data.sample.append(0) temp.sample.append(staying_time) if staying_time > liberal_time: temp.sample.append("more_than_expected_liberal_time") temp.sample.append(math.ceil(staying_time - liberal_time)) elif staying_time < liberal_time: temp.sample.append("less_than_expected_liberal_time") temp.sample.append(math.ceil(liberal_time - staying_time)) elif staying_time == liberal_time: temp.sample.append("in_expected_liberal_time") temp.sample.append(0) expected_liberal_staying_data.append(temp) # for data in expected_conservative_staying_data: # print(data.start_time,data.sample) # for data in expected_liberal_staying_data: # print(data.start_time,data.sample) try: if len(expected_conservative_staying_data): streams = self.CC.get_user_streams(user_id) for stream_name, stream_metadata in streams.items(): if stream_name == Working_Days_STREAM: self.store_stream( filepath="expected_conservative_staying_time.json", input_streams=[stream_metadata], user_id=user_id, data=expected_conservative_staying_data) break except Exception as e: print("Exception:", str(e)) print(traceback.format_exc()) self.CC.logging.log('%s finished processing for user_id %s saved %d ' 'data points' % (self.__class__.__name__, str(user_id), len(expected_conservative_staying_data))) try: if len(expected_liberal_staying_data): streams = self.CC.get_user_streams(user_id) for stream_name, stream_metadata in streams.items(): if stream_name == Working_Days_STREAM: self.store_stream( filepath="expected_liberal_staying_time.json", input_streams=[stream_metadata], user_id=user_id, data=expected_liberal_staying_data) break except Exception as e: print("Exception:", str(e)) print(traceback.format_exc()) self.CC.logging.log('%s finished processing for user_id %s saved %d ' 'data points' % (self.__class__.__name__, str(user_id), len(expected_liberal_staying_data)))
def listing_all_expected_arrival_times_from_beacon(self, user_id: str, all_days: List[str]): """ Produce and save the list of work_day's arrival_time at office from "org.md2k.data_analysis.feature.working_days_from_beacon" stream and marked each day's arrival_time as In_expected_conservative_time or before_expected_conservative_time or after_expected_conservative_time in one stream and in another stream each day's arrival_time is marked as In_expected_liberal_time or before_expected_liberal_time or after_expected_liberal_time :param str user_id: UUID of the stream owner :param List(str) all_days: All days of the user in the format 'YYYYMMDD' :return: """ self.CC.logging.log('%s started processing for user_id %s' % (self.__class__.__name__, str(user_id))) stream_ids = self.get_latest_stream_id(user_id, Working_Days_STREAM) expected_conservative_arrival_data = [] expected_liberal_arrival_data = [] office_arrival_times = list() for stream_id in stream_ids: for day in all_days: work_data_stream = \ self.CC.get_stream(stream_id["identifier"], user_id, day, localtime=True) for data in work_data_stream.data: arrival_time = data.start_time.hour * 60 + data.start_time.minute office_arrival_times.append(arrival_time) sample = [] temp = DataPoint(data.start_time, data.end_time, data.offset, sample) expected_conservative_arrival_data.append(temp) if not len(office_arrival_times): return median = np.median(office_arrival_times) mad_arrival_times = [] for arrival_time in office_arrival_times: # mad = median absolute deviation mad_arrival_times.append(abs(arrival_time - median)) median2 = np.median(mad_arrival_times) mad_value = median2 * MEDIAN_ABSOLUTE_DEVIATION_MULTIPLIER outlier_border = mad_value * OUTLIER_DETECTION_MULTIPLIER outlier_removed_office_arrival_times = [] for arrival_time in office_arrival_times: if (median - outlier_border) < arrival_time < (median + outlier_border): outlier_removed_office_arrival_times.append(arrival_time) if not len(outlier_removed_office_arrival_times): outlier_removed_office_arrival_times = office_arrival_times actual_time = np.mean(outlier_removed_office_arrival_times) actual_minute = int(actual_time % 60) actual_hour = int(actual_time / 60) conservative_hour = actual_hour liberal_hour = actual_hour if actual_minute < 30: conservative_minute = 0 liberal_minute = 30 else: conservative_minute = 30 liberal_minute = 0 liberal_hour += 1 conservative_time = conservative_hour * 60 + conservative_minute liberal_time = liberal_hour * 60 + liberal_minute for data in expected_conservative_arrival_data: sample = [] temp = DataPoint(data.start_time, data.end_time, data.offset, sample) arrival_time = data.start_time.hour * 60 + data.start_time.minute data.sample.append(data.start_time.time()) if arrival_time > conservative_time: data.sample.append("after_expected_conservative_time") data.sample.append(math.ceil(arrival_time - conservative_time)) data.sample.append(0) elif arrival_time < conservative_time: data.sample.append("before_expected_conservative_time") data.sample.append(math.ceil(conservative_time - arrival_time)) data.sample.append(1) elif arrival_time == conservative_time: data.sample.append("in_expected_conservative_time") data.sample.append(0) data.sample.append(1) temp.sample.append(data.start_time.time()) if arrival_time > liberal_time: temp.sample.append("after_expected_liberal_time") temp.sample.append(math.ceil(arrival_time - liberal_time)) temp.sample.append(0) elif arrival_time < liberal_time: temp.sample.append("before_expected_liberal_time") temp.sample.append(math.ceil(liberal_time - arrival_time)) temp.sample.append(1) elif arrival_time == liberal_time: temp.sample.append("in_expected_liberal_time") temp.sample.append(0) temp.sample.append(1) expected_liberal_arrival_data.append(temp) try: if len(expected_conservative_arrival_data): streams = self.CC.get_user_streams(user_id) for stream_name, stream_metadata in streams.items(): if stream_name == Working_Days_STREAM: self.store_stream(filepath="expected_conservative_arrival_time_from_beacon.json", input_streams=[stream_metadata], user_id=user_id, data=expected_conservative_arrival_data, localtime=True) break except Exception as e: print("Exception:", str(e)) print(traceback.format_exc()) self.CC.logging.log('%s finished processing for user_id %s saved %d ' 'data points' % (self.__class__.__name__, str(user_id), len(expected_conservative_arrival_data))) try: if len(expected_liberal_arrival_data): streams = self.CC.get_user_streams(user_id) for stream_name, stream_metadata in streams.items(): if stream_name == Working_Days_STREAM: self.store_stream(filepath="expected_liberal_arrival_time_from_beacon.json", input_streams=[stream_metadata], user_id=user_id, data=expected_liberal_arrival_data, localtime=True) break except Exception as e: print("Exception:", str(e)) print(traceback.format_exc()) self.CC.logging.log('%s finished processing for user_id %s saved %d ' 'data points' % (self.__class__.__name__, str(user_id), len(expected_liberal_arrival_data)))
def listing_all_arrival_times_from_beacon(self, user_id, all_days): """ Produce and save the list of work_day's arrival_time at office's beacon from "org.md2k.data_analysis.feature.working_days_from_beacon" stream and marked each day's arrival_time as usual or before_time or after_time """ self.CC.logging.log('%s started processing for user_id %s' % (self.__class__.__name__, str(user_id))) stream_ids = self.CC.get_stream_id(user_id, Working_Days_STREAM) arrival_data = [] office_arrival_times = list() for stream_id in stream_ids: for day in all_days: work_data_stream = \ self.CC.get_stream(stream_id["identifier"], user_id, day) for data in work_data_stream.data: print(data) arrival_time = data.start_time.hour*60+data.start_time.minute office_arrival_times.append(arrival_time) sample = [] temp = DataPoint(data.start_time, data.end_time, data.offset, sample) arrival_data.append(temp) if not len(office_arrival_times): return median = np.median(office_arrival_times) mad_arrival_times = [] for arrival_time in office_arrival_times: # mad = median absolute deviation mad_arrival_times.append(abs(arrival_time - median)) median2 = np.median(mad_arrival_times) mad_value = median2 * MEDIAN_ABSOLUTE_DEVIATION_MULTIPLIER outlier_border = mad_value * OUTLIER_DETECTION_MULTIPLIER outlier_removed_office_arrival_times = [] for arrival_time in office_arrival_times: if arrival_time > (median - outlier_border) and arrival_time < (median + outlier_border): outlier_removed_office_arrival_times.append(arrival_time) if not len(outlier_removed_office_arrival_times): outlier_removed_office_arrival_times = office_arrival_times mean = np.mean(outlier_removed_office_arrival_times) standard_deviation = np.std(outlier_removed_office_arrival_times) for data in arrival_data: arrival_time = data.start_time.hour*60 + data.start_time.minute data.sample.append(data.start_time.time()) if arrival_time > mean+standard_deviation: data.sample.append("after_usual_time") data.sample.append(math.ceil(arrival_time-(mean+standard_deviation))) elif arrival_time < mean-standard_deviation: data.sample.append("before_usual_time") data.sample.append(math.ceil(mean-standard_deviation-arrival_time)) else: data.sample.append("usual_time") data.sample.append(0) #print(arrival_data) try: if len(arrival_data): streams = self.CC.get_user_streams(user_id) for stream_name, stream_metadata in streams.items(): if stream_name == Working_Days_STREAM: # print(stream_metadata) print("Going to pickle the file: ",arrival_data) self.store_stream(filepath="arrival_time_from_beacon.json", input_streams=[stream_metadata], user_id=user_id, data=arrival_data) break except Exception as e: print("Exception:", str(e)) print(traceback.format_exc()) self.CC.logging.log('%s finished processing for user_id %s saved %d ' 'data points' % (self.__class__.__name__, str(user_id), len(arrival_data)))
def listing_all_work_days(self, user_id: str, all_days: List[str]): """ Produce and save the list of work_days Works-days are generated by the gps location of participant's which is labeled as 'Work' the first time of a day is marked as start time and the last time marked as end time and sample is saved as 'Office' :param str user_id: UUID of the stream owner :param List(str) all_days: All days of the user in the format 'YYYYMMDD' :return: """ self.CC.logging.log('%s started processing for user_id %s' % (self.__class__.__name__, str(user_id))) work_data = [] work_data_ems = [] location_data = [] stream_ids = self.CC.get_stream_id( user_id, GPS_EPISODES_AND_SEMANTIC_lOCATION_STREAM) current_day = None # in beginning current day is null for stream_id in stream_ids: for day in all_days: location_data_stream = \ self.CC.get_stream(stream_id["identifier"], user_id, day, localtime=True) location_data += location_data_stream.data location_data = list(set(location_data)) location_data.sort(key=lambda x: x.start_time) for data in location_data: print(data) if data.sample.lower() != "work": # only the data marked as Work are needed continue d = DataPoint(data.start_time, data.end_time, data.offset, data.sample) if d.start_time.date() != current_day: ''' when the day in d.start_time.date() is not equal current_day that means its a new day. ''' if current_day: temp = DataPoint(data.start_time, data.end_time, data.offset, data.sample) temp.start_time = work_start_time temp.end_time = work_end_time temp.sample = 'Office' work_data.append(temp) work_data_ems.append( DataPoint(temp.start_time, temp.end_time, temp.offset, 1)) work_start_time = d.start_time # save the new day as current day current_day = d.start_time.date() work_end_time = d.end_time if current_day: temp = DataPoint(data.start_time, data.end_time, data.offset, data.sample) temp.start_time = work_start_time temp.end_time = work_end_time temp.sample = 'Office' work_data.append(temp) work_data_ems.append( DataPoint(temp.start_time, temp.end_time, temp.offset, 1)) try: if len(work_data): streams = self.CC.get_user_streams(user_id) for stream_name, stream_metadata in streams.items(): if stream_name == GPS_EPISODES_AND_SEMANTIC_lOCATION_STREAM: self.store_stream(filepath="working_days.json", input_streams=[stream_metadata], user_id=user_id, data=work_data, localtime=True) self.store_stream(filepath="working_days_ems.json", input_streams=[stream_metadata], user_id=user_id, data=work_data_ems, localtime=True) break except Exception as e: print("Exception:", str(e)) print(traceback.format_exc()) self.CC.logging.log( '%s finished processing for user_id %s saved %d ' 'data points' % (self.__class__.__name__, str(user_id), len(work_data)))
def listing_all_gps_location_daywise(self, user_id: str, all_days: List[str]): """ Produce and save the gps location of participant's in day basis :param str user_id: UUID of the stream owner :param List(str) all_days: All days of the user in the format 'YYYYMMDD' """ self.CC.logging.log('%s started processing for user_id %s' % (self.__class__.__name__, str(user_id))) gps_data = [] stream_ids = self.CC.get_stream_id( user_id, GPS_EPISODES_AND_SEMANTIC_lOCATION_STREAM) for stream_id in stream_ids: for day in all_days: location_data_stream = \ self.CC.get_stream(stream_id["identifier"], user_id, day, localtime=False) for data in set(location_data_stream.data): if data.start_time.date() != data.end_time.date(): temp = DataPoint(data.start_time, data.end_time, data.offset, data.sample) start_day = data.start_time.date() end_time = datetime.combine(start_day, time.max) end_time = end_time.replace( tzinfo=data.start_time.tzinfo) temp.end_time = end_time gps_data.append(temp) end_day = data.end_time.date() start_day += timedelta(days=1) while start_day != end_day: temp = DataPoint(data.start_time, data.end_time, data.offset, data.sample) start_time = datetime.combine(start_day, time.min) start_time = start_time.replace( tzinfo=data.start_time.tzinfo) temp.start_time = start_time end_time = datetime.combine(start_day, time.max) end_time = end_time.replace( tzinfo=data.start_time.tzinfo) temp.end_time = end_time gps_data.append(temp) start_day += timedelta(days=1) temp = DataPoint(data.start_time, data.end_time, data.offset, data.sample) start_time = datetime.combine(start_day, time.min) start_time = start_time.replace( tzinfo=data.start_time.tzinfo) temp.start_time = start_time gps_data.append(temp) else: gps_data.append(data) try: if len(gps_data): streams = self.CC.get_user_streams(user_id) for stream_name, stream_metadata in streams.items(): if stream_name == GPS_EPISODES_AND_SEMANTIC_lOCATION_STREAM: self.store_stream(filepath="gps_location_daywise.json", input_streams=[stream_metadata], user_id=user_id, data=gps_data) break except Exception as e: self.CC.logging.log("Exception:", str(e)) self.CC.logging.log(traceback.format_exc()) self.CC.logging.log( '%s finished processing for user_id %s saved %d ' 'data points' % (self.__class__.__name__, str(user_id), len(gps_data)))