def min_dist(p, positions): ''' Calculate minimize distance from point p to point set poisition :param p: [$lon, $lat] :param positions: [[$lon, $lat]] :return: precision, recall ''' d = cal_distance(p, positions[0]) for i in range(1, len(positions)): tmp = cal_distance(p, positions[i]) d = tmp if tmp < d else d return d
def plot_trajectories(signal_set): ''' Plot all trajectories in signal set according to time. :param signal_set: A list of trajactory. :return: ''' plt.close() # clf() # 清图 cla() # 清坐标轴 close() # 关窗口 fig = plt.figure() # plt.show() ax = fig.add_subplot(1, 1, 1) plt.grid(True) # 添加网格 plt.ion() # interactive mode on for i in range(len(signal_set)): signal = signal_set[i] max_time = max(signal.dates) min_time = min(signal.dates) max_time_str = datetime_tostr(max_time, '%H:%M') min_time_str = datetime_tostr(min_time, '%H:%M') day = datetime_tostr(min_time, '%m/%d') delta_time = (max_time - min_time).total_seconds() / 60 for j in range(len(signal)-1): lon1, lon2 = signal['lon'][j:(j+2)] lat1, lat2 = signal_set[i]['lat'][j:(j+2)] dist = cal_distance([lon1, lat1], [lon2, lat2]) plt.title('[Trace %d %s(%.2f min)]%s to %s. T=%d, D=%.2fm.' % (i, day, delta_time, min_time_str, max_time_str, j, dist)) ax.plot(signal['lon'][j:(j+2)], signal_set[i]['lat'][j:(j+2)], 'C' + str(i % 10)) if j == 0: ax.scatter([lon1, lon2], [lat1, lat2], s=10, color='red') else: # TODO: make the forward points to be black ax.scatter(signal.lon[:j], signal.lat[:j], color='black', s=10) ax.scatter([lon2], [lat2], s=10, color='red') plt.pause(0.5) plt.pause(1)
def dtw_boundary(self): ''' Get a rough upper boundary :return: ''' min_dist = 0 node = [0, 0] # for i in range(max(self.m, self.n)-1): # nxt = [node[0] + 1, node[1] + 1] # if nxt[0] >= self.m: # nxt[0] = self.m - 1 # if nxt[1] >= self.n: # nxt[1] = self.n - 1 # max_dist += cal_distance(self.ts1[nxt[0]], self.ts2[nxt[1]]) # node = nxt while node != [self.m - 1, self.n - 1]: value = np.inf for dir in self.directions: tmp = [node[0] + dir[0], node[1] + dir[1]] if not self.is_in_bound(tmp[0], tmp[1]): continue tmp_value = cal_distance(self.ts1[tmp[0]], self.ts2[tmp[1]]) if tmp_value < value: nxt = tmp value = tmp_value min_dist += value node = nxt return min_dist
def dtw(self): mat = np.ones((self.m, self.n)) + np.inf mat_ind = {} start_nodes = [(0, 0)] mat[start_nodes[0]] = 0 layer_cnt = 0 end_node = (self.m - 1, self.n - 1) print('m = %d, n = %d' % (self.m, self.n)) while len(start_nodes) != 0: next_nodes = [] for node in start_nodes: for dir in self.directions: succ = (node[0] + dir[0], node[1] + dir[1]) if 0 <= succ[0] < self.m and 0 <= succ[1] < self.n: new_dist = mat[node] + cal_distance( self.ts1[succ[0]], self.ts2[succ[1]]) if succ in mat_ind.keys(): if new_dist < mat[succ]: mat_ind[succ] = [node] mat[succ] = new_dist elif new_dist == mat[succ]: mat_ind[succ].append(node) else: mat_ind[succ] = [node] mat[succ] = new_dist if succ not in next_nodes: next_nodes.append(succ) start_nodes = next_nodes layer_cnt += 1 print('Layer %d is finished' % layer_cnt) print('Minimum distance = %f' % mat[end_node]) return mat, mat_ind
def get_median_dist(self, mat_ind=None): if mat_ind is None: _, mat_ind = self.shortest_path_dtw() nts1, nts2 = self.get_new_ts(mat_ind) dist = [] for t in range(len(nts1)): dist.append(cal_distance(nts1[t], nts2[t])) return np.median(dist)
def get_average_dist(self, mat_ind=None): if mat_ind is None: _, mat_ind = self.shortest_path_dtw() nts1, nts2 = self.get_new_ts(mat_ind) dist = 0 for t in range(len(nts1)): dist += cal_distance(nts1[t], nts2[t]) return dist / len(nts1)
def deal_pingpong(obs, tol=10): ''' Deal with one of the ping pong problem: 1. p1 -> p2 -> p1 => Replace by p1. 2. p1 -> p2 -> p1 -> p2=> Replace by mean(p1, p2). :param obs: A DataFrame with ['dates', 'lon', 'lat'] :param tol: If dist(p1, p1') < tol, take p1 and p1' as the same point. Unit: Meter(m). :return: A dealed obs DataFrame. ''' obs = obs.reset_index(drop=True) i = 2 stored = [[obs.lon[0], obs.lat[0]], [obs.lon[1], obs.lat[1]], 0, 1] res = obs while i < len(obs): pos = [obs.lon[i], obs.lat[i]] if cal_distance(stored[0], pos) < tol: if i + 1 < len(obs): if cal_distance(stored[1], [obs.lon[i + 1], obs.lat[i + 1]]) < tol: res = res.drop([stored[2], stored[3], i + 1]) res.lon[i] = (stored[0][0] + stored[1][0]) / 2 res.lat[i] = (stored[0][1] + stored[1][1]) / 2 stored = [pos, pos, i, i] i = i + 2 else: res = res.drop([stored[2], stored[3]]) stored = [pos, [obs.lon[i + 1], obs.lat[i + 1]], i, i + 1] i = i + 2 else: res = res.drop([stored[2], stored[3]]) i = i + 1 else: stored[0] = stored[1] stored[1] = pos i = i + 1 res = res.reset_index(drop=True) return res
def fill_with_dist(self, num): ''' Fill the observation with distance. :param num: The num of final observation. :return: A new observation. ''' total_dist = self.get_total_dist() each_dist = total_dist / (num - 1) dist = 0 obs = self.obs lon = [obs.lon[0]] lat = [obs.lat[0]] dates = [obs.dates[0]] for i in range(1, len(obs)): p1 = [obs.lon[i - 1], obs.lat[i - 1]] p2 = [obs.lon[i], obs.lat[i]] d_have = cal_distance(p1, p2) d_need = each_dist - dist while d_have >= d_need: p_need = d_need / d_have p1 = [(1 - p_need) * p1[0] + p_need * p2[0], (1 - p_need) * p1[1] + p_need * p2[1]] lon.append(p1[0]) lat.append(p1[1]) dates.append(obs.dates[i - 1] + (obs.dates[i] - obs.dates[i - 1]) * p_need) dist = 0 d_have = cal_distance(p1, p2) d_need = each_dist - dist dist += d_have return pd.DataFrame({'lon': lon, 'lat': lat, 'dates': dates})
def density_clustering(points, eps=600): ''' Density Clustering of DBSCAN in sklearn :param points: [[lon, lat]] :return: labels of all points. If label == -1: Means the point is a single point. If label >= 0: Means the point is in the group label. ''' n = len(points) dist_mat = np.zeros((n, n)) for i in range(n): for j in range(i + 1, n): dist = cal_distance(points[i], points[j]) dist_mat[i, j] = dist_mat[j, i] = dist labels = DBSCAN(eps=eps, metric='precomputed', min_samples=2).fit_predict(dist_mat) return labels
def cal_multi_distance(self, ind): ''' ind is a tuple :param ind: e.x: ind = (1, 2, 4) Calculate distance between ts0_1, ts1_2, ts2_4. :return: ''' ind = list(ind) combs = list(combinations(range(len(ind)), 2)) dist = 0 for comb in combs: ts1_ind = comb[0] ts2_ind = comb[1] ts1_t = ind[ts1_ind] ts2_t = ind[ts2_ind] dist += cal_distance(self.tses[ts1_ind][ts1_t], self.tses[ts2_ind][ts2_t]) return dist / len(combs)
def emission_score(self, road: Vertex, lon, lat): dist = cal_distance([road.lon, road.lat], [lon, lat]) / 1000 # return exp(- dist**2) if dist < 2 else 0 return 2 + exp(-dist**2) #if dist < 2 else 0
def trajectories_tojson(signal_set, user_id, save_path, is_json=None): ''' Save the json file. :param signal_set: A list of trajectory DataFrame. :param user_id: String. :param save_path: The path to save json. :param is_json: Whether to save as json. If True, json; if False, js. :return: { user_id: String, trajectories: [{lon: float, lat: float, start_time, end_time}] } ''' res = {'user_id': user_id} trajectories = [] mean_lon = [] mean_lat = [] for i in range(len(signal_set)): signal = signal_set[i] # ind = [limit_time(t, 7, 10) or limit_time(t, 17, 20) for t in signal.dates] # signal = signal[ind] signal = signal.reset_index(drop=True) if len(signal) < 5: continue max_time = max(signal.dates) min_time = min(signal.dates) start_time = datetime_tostr(min_time, '%H:%M') end_time = datetime_tostr(max_time, '%H:%M') day = datetime_tostr(min_time, '%m/%d') delta_time = (max_time - min_time).total_seconds() / 60 # min N = len(signal) dist = cal_distance([signal.lon[0], signal.lat[0]], [signal.lon[N-1], signal.lat[N-1]]) # m if delta_time == 0: continue rough_speed = dist / delta_time / 60 # m/s trajectory = {'lon': signal.lon.tolist(), 'lat': signal.lat.tolist(), 'start': '[%d-S]%s(%.1fmin)%s' % (i, day, delta_time, start_time), 'end': '[%d-E]%s(%.1fm)%s' % (i, day, dist, end_time), 'start_time': start_time, 'end_time': end_time, 'day': day, 'delta_time': '%.1f' % delta_time, 'speed': '%.2f' % rough_speed, 'dist': '%.1f' % dist, 'dates': signal.dates.apply(datetime_tostr).tolist()} trajectories.append(trajectory) mean_lon.append(np.mean(signal.lon)) mean_lat.append(np.mean(signal.lat)) res['trajectories'] = trajectories res['lon'] = np.mean(mean_lon) res['lat'] = np.mean(mean_lat) if is_json: save_json(res, save_path) else: save_js(res, save_path)
links = get_link() cells = get_cellSheet() cells = cells[0:10] start = time.time() tree = KDTree(np.array(links[['blon', 'clat']]), leaf_size=1000) end = time.time() this_link = [] dists = [] for i in range(len(cells)): cell = cells[i:(i + 1)] query = cell[['lon', 'lat']].as_matrix()[0] _, ind = tree.query(query) ind = ind[0][0] dist = cal_distance(query, links.loc[ind:ind, ['blon', 'clat']].values[0]) dists.append(dist) this_link.append(links[ind:(ind + 1)].values[0]) if i % 1 == 0: print(i) this_link = np.array(this_link).T cells['link'] = this_link[0] cells['link_lon'] = this_link[1] cells['link_lat'] = this_link[2] cells['link_dist'] = dists print(cells.head(10)) #cells.to_csv('../../res/cells_process.csv', index=False) print('prepare time', end - start)
def dist_trajectories(ts1, ts2): assert len(ts2) == len(ts1) dist = 0 for i in range(len(ts1)): dist += cal_distance(ts1[i], ts2[i]) return dist / len(ts1)
def get_total_dist(self): dist = 0 for i in range(1, len(self.obs)): dist += cal_distance([self.obs.lon[i - 1], self.obs.lat[i - 1]], [self.obs.lon[i], self.obs.lat[i]]) return dist
def get_dist_matrix(self, ts1, ts2): dist_mat = np.zeros((self.m, self.n)) for i in range(self.m): for j in range(self.n): dist_mat[i, j] = cal_distance(ts1[i], ts2[j]) return dist_mat