def get_circadian_mvmt(): X = data_utils.get_column("loc_x") Y = data_utils.get_column("loc_y") fft_x = np.fft.rfft(X) fft_y = np.fft.rfft(Y) ps_x = np.sum(np.abs(fft_x)**2) ps_y = np.sum(np.abs(fft_y)**2) return math.log(ps_x + ps_y) - math.log(24)
def _get_phone_use_duration(): T = data_utils.get_column("time_recorded") P = data_utils.get_column("phone_on") time_used = 0.0 last_time = T[0] phone_used_last_time = P[0] for time, p in zip(T, P): if p == 1 and phone_used_last_time: time_used += (time - last_time) last_time = time phone_used_last_time = p return time_used / ((T[-1] - T[0]) / (60 * 60 * 24))
def _get_phone_use_freq(threshold=3): P = data_utils.get_column("phone_on").flatten() freq = 0 last_status = 0 for i, status in enumerate(P): if status == 1 and all(P[max(0, i - threshold + 1):i] == 1): freq += 1 last_status = status return freq
def compute_optimal_location_clusters(max_clusters=10, return_labels=False): X = data_utils.get_column("loc_x, loc_y") best_inertia = float('inf') best_centers = [] if return_labels: best_labels = [] for i in range(1, max_clusters): km = KMeans(n_clusters=i, random_state=42) km.fit(X) if km.inertia_ < best_inertia: best_inertia = km.inertia_ best_centers = km.cluster_centers_ if return_labels: best_labels = km.labels_ if return_labels: return X, best_labels, best_centers else: return best_centers
def _compute_location_variance(): x = data_utils.get_column("loc_x") y = data_utils.get_column("loc_y") return math.log(np.std(x)**2 + np.std(y)**2)