def get_linkage(self, metric="sspd", method="ward", type_d="euclidean"): traj_list = [] for traj in self.lat.columns: ds = pd.DataFrame([]) ds["lat"] = self.lat[traj] ds["lon"] = self.lon[traj] traj_list.append(ds.values) self.p_dist = tdist.pdist(traj_list, metric=metric, type_d=type_d) self.link = fc.linkage(self.p_dist, method=method) return self
def get_space_sim(d): coors = [rdp(t[xy_cols].values, 1e-4) for t in d] spatial_mat = np.identity(n) spatial_dist = dist.pdist(coors, dist_measure) min = spatial_dist.min() spatial_dist = np.divide(spatial_dist - min, spatial_dist.max() - min) k = 0 for i in range(n): for j in range(i + 1, n): spatial_mat[i, j] = spatial_mat[j, i] = spatial_dist[k] k += 1 # print(spatial_dist) return np.divide(1, 1 + spatial_mat)
import pandas as pd import numpy as np traj_list = pickle.load( open( "/Users/bguillouet/These/trajectory_distance/data/benchmark_trajectories.pkl", "rb"))[:100] time_dict = collections.defaultdict(dict) for distance in [ "sspd", "frechet", "discret_frechet", "hausdorff", "dtw", "lcss", "edr", "erp" ]: t_euclidean = timeit.timeit( lambda: tdist.pdist(traj_list, metric=distance), number=1) if not (distance in ["frechet", "discret_frechet"]): t_spherical = timeit.timeit(lambda: tdist.pdist( traj_list, metric=distance, type_d="spherical"), number=1) else: t_spherical = -1 time_dict[distance] = {"Euclidean": t_euclidean, "Spherical": t_spherical} t_cells_conversion_dic = collections.defaultdict(int) for precision in [5, 6, 7]: cells_list_, _, _, _, _ = trajectory_set_grid(traj_list, precision=precision) cells_list = map(lambda x: np.array(x)[:, :2], cells_list_)
print('--', i) name = namelist[i] if (label[name] == 1).sum() > 0: trajs = resample(data[name]) trajs_norm = [] for t in trajs: t[:, 0] /= 960. t[:, 1] /= 540. trajs_norm.append(t) trajs = np.array(trajs_norm) traj_num = len(trajs) y = sd.pdist( trajs.transpose(0, 2, 1).reshape(traj_num, -1), 'euclidean') dm_euc = sd.squareform(y) y = tdist.pdist(trajs, metric='dtw') dm_dtw = sd.squareform(y) y = tdist.pdist(trajs, metric='sspd') dm_sspd = sd.squareform(y) y = tdist.pdist(trajs, metric='lcss', eps=0.05) dm_lcss = sd.squareform(y) y = tdist.pdist(trajs, metric='edr', eps=0.05) dm_edr = sd.squareform(y) y = tdist.pdist(trajs, metric='erp', g=np.zeros(2, dtype=float)) dm_erp = sd.squareform(y) y = tdist.pdist(trajs, metric='frechet') dm_fre = sd.squareform(y) y = tdist.pdist(trajs, metric='hausdorff') dm_hau = sd.squareform(y) dms_euc.append(dm_euc)
import numpy as np import traj_dist.distance as tdist import pickle traj_list = pickle.load(open("/Users/bguillouet/These/trajectory_distance/data/benchmark_trajectories.pkl", "rb"))[:10] traj_A = traj_list[0] traj_B = traj_list[1] # Simple distance dist = tdist.sspd(traj_A, traj_B) print(dist) # Pairwise distance pdist = tdist.pdist(traj_list, metric="sspd") print(pdist) # Distance between two list of trajectories cdist = tdist.cdist(traj_list, traj_list, metric="sspd") print(cdist)
def __initial_match(self, candidate_list: (np.ndarray, np.generic), min_pts=2, t=50, criterion='distance'): # TODO group matching for non-grouped user # 1 : dbscan algorithm + gps based movement vector alignment -> clear! # 2 : acceleration -> let's discuss """Performs initial-clustering on cn candidate_list(nT x 2 numpy array) and returns group lists. Parameters ---------- candidate_list : array of shape (n_samples, n_of_time_steps, pair of latitude and longitude min_pts : minimum members of a group for HDBSCAN-algorithm t : scalar For criteria 'inconsistent', 'distance' or 'monocrit', this is the threshold to apply when forming flat clusters. For 'maxclust' or 'maxclust_monocrit' criteria, this would be max number of clusters requested. criterion : str, optional The criterion to use in forming flat clusters. This can be any of the following values: ``inconsistent`` : If a cluster node and all its descendants have an inconsistent value less than or equal to `t` then all its leaf descendants belong to the same flat cluster. When no non-singleton cluster meets this criterion, every node is assigned to its own cluster. (Default) ``distance`` : Forms flat clusters so that the original observations in each flat cluster have no greater a cophenetic distance than `t`. ``maxclust`` : Finds a minimum threshold ``r`` so that the cophenetic distance between any two original observations in the same flat cluster is no more than ``r`` and no more than `t` flat clusters are formed. ``monocrit`` : Forms a flat cluster from a cluster node c with index i when ``monocrit[j] <= t``. For example, to threshold on the maximum mean distance as computed in the inconsistency matrix R with a threshold of 0.8 do:: MR = maxRstat(Z, R, 3) cluster(Z, t=0.8, criterion='monocrit', monocrit=MR) ``maxclust_monocrit`` : Forms a flat cluster from a non-singleton cluster node ``c`` when ``monocrit[i] <= r`` for all cluster indices ``i`` below and including ``c``. ``r`` is minimized such that no more than ``t`` flat clusters are formed. monocrit must be monotonic. For example, to minimize the threshold t on maximum inconsistency values so that no more than 3 flat clusters are formed, do:: MI = maxinconsts(Z, R) cluster(Z, t=3, criterion='maxclust_monocrit', monocrit=MI) Returns ---------- groups : list of shape (n_clusters, n_members) Examples ---------- >>> candidate_list = np.array([,...,], shape=[5,3,2]) -> labels of candidate_list = [0,1,0,1,0] >>> groups = [[0,2,4],[1,3]] """ assert isinstance(candidate_list, (np.ndarray, np.generic)) num_of_data, num_time_steps, _ = candidate_list.shape X = np.array([ candidate_list[i, num_time_steps - 1, :] for i in range(num_of_data) ]) rads = np.radians(X) # [N,2] # Clustering with gps-data of 1-time step. # 'haversine' do clustering using distance transformed from (lat, long) clusterer = hdbscan.HDBSCAN(min_cluster_size=min_pts, min_samples=2, metric='haversine') labels = clusterer.fit_predict(rads) print('Before trajectory clustering, labels are ', labels) n_clusters_ = len(set(labels)) - (1 if -1 in labels else 0) groups = [] for ulb in range(n_clusters_): groups.append([]) for i, lb in enumerate(labels): if lb == -1: continue groups[lb].append(i) total_n_clusters = n_clusters_ # Group refinement considering user's trajectory for nc in range(n_clusters_): group_member_mask = (labels == nc) group_members = candidate_list[group_member_mask] pdist = tdist.pdist(group_members.transpose([0, 2, 1]), metric="sspd", type_d="spherical") Z = fc.linkage(pdist, method="ward") sub_labels = sch.fcluster(Z, t, criterion=criterion) - 1 unique_sub_labels = len(set(sub_labels)) if unique_sub_labels == 1: continue for ad in range(unique_sub_labels - 1): groups.append([]) member_indices = list( compress(range(len(group_member_mask)), group_member_mask)) for sb in range(unique_sub_labels): sub_group_mask = (sub_labels == sb) sub_member_indices = list( compress(range(len(sub_group_mask)), sub_group_mask)) # Noise case if len(sub_member_indices) == 1: groups[nc].remove(member_indices[sub_member_indices[0]]) labels[member_indices[sub_member_indices[0]]] = -1 continue for m in range(len(sub_member_indices)): # remove from wrong group groups[nc].remove(member_indices[sub_member_indices[m]]) # add to refined group groups[total_n_clusters].append( member_indices[sub_member_indices[m]]) labels[member_indices[ sub_member_indices[m]]] = total_n_clusters total_n_clusters += 1 print('After trajectory clustering, labels are ', labels) return groups.copy()
import numpy as np # Three 2-D Trajectory traj_A = np.array([[-122.39534, 37.77678],[-122.3992 , 37.77631],[-122.40235, 37.77594],[-122.40553, 37.77848], [-122.40801, 37.78043],[-122.40837, 37.78066],[-122.41103, 37.78463],[-122.41207, 37.78954], [-122.41252, 37.79232],[-122.41316, 37.7951 ],[-122.41392, 37.7989 ],[-122.41435, 37.80129], [-122.41434, 37.80129]]) traj_B = np.array([[-122.39472, 37.77672],[-122.3946 , 37.77679],[-122.39314, 37.77846],[-122.39566, 37.78113], [-122.39978, 37.78438],[-122.40301, 37.78708],[-122.4048 , 37.78666],[-122.40584, 37.78564], [-122.40826, 37.78385],[-122.41061, 37.78321],[-122.41252, 37.78299]]) traj_C = np.array([[-122.39542, 37.77665],[-122.3988 , 37.77417],[-122.41042, 37.76944],[-122.41459, 37.77016], [-122.41462, 37.77013]]) traj_list = [traj_A, traj_B, traj_C] import traj_dist.distance as tdist # Simple distance dist = tdist.sspd(traj_A,traj_B) print(dist) # Pairwise distance pdist = tdist.pdist(traj_list,metric="sspd") print(pdist) # Distance between two list of trajectories cdist = tdist.cdist(traj_list, traj_list,metric="sspd") print(cdist)