Esempio n. 1
0
 def get_linkage(self, metric="sspd", method="ward", type_d="euclidean"):
     traj_list = []
     for traj in self.lat.columns:
         ds = pd.DataFrame([])
         ds["lat"] = self.lat[traj]
         ds["lon"] = self.lon[traj]
         traj_list.append(ds.values)
     self.p_dist = tdist.pdist(traj_list, metric=metric, type_d=type_d)
     self.link = fc.linkage(self.p_dist, method=method)
     return self
Esempio n. 2
0
 def get_space_sim(d):
     coors = [rdp(t[xy_cols].values, 1e-4) for t in d]
     spatial_mat = np.identity(n)
     spatial_dist = dist.pdist(coors, dist_measure)
     min = spatial_dist.min()
     spatial_dist = np.divide(spatial_dist - min, spatial_dist.max() - min)
     k = 0
     for i in range(n):
         for j in range(i + 1, n):
             spatial_mat[i, j] = spatial_mat[j, i] = spatial_dist[k]
             k += 1
     # print(spatial_dist)
     return np.divide(1, 1 + spatial_mat)
Esempio n. 3
0
import pandas as pd
import numpy as np

traj_list = pickle.load(
    open(
        "/Users/bguillouet/These/trajectory_distance/data/benchmark_trajectories.pkl",
        "rb"))[:100]

time_dict = collections.defaultdict(dict)

for distance in [
        "sspd", "frechet", "discret_frechet", "hausdorff", "dtw", "lcss",
        "edr", "erp"
]:
    t_euclidean = timeit.timeit(
        lambda: tdist.pdist(traj_list, metric=distance), number=1)

    if not (distance in ["frechet", "discret_frechet"]):
        t_spherical = timeit.timeit(lambda: tdist.pdist(
            traj_list, metric=distance, type_d="spherical"),
                                    number=1)
    else:
        t_spherical = -1
    time_dict[distance] = {"Euclidean": t_euclidean, "Spherical": t_spherical}

t_cells_conversion_dic = collections.defaultdict(int)
for precision in [5, 6, 7]:
    cells_list_, _, _, _, _ = trajectory_set_grid(traj_list,
                                                  precision=precision)
    cells_list = map(lambda x: np.array(x)[:, :2], cells_list_)
        print('--', i)
        name = namelist[i]
        if (label[name] == 1).sum() > 0:
            trajs = resample(data[name])
            trajs_norm = []
            for t in trajs:
                t[:, 0] /= 960.
                t[:, 1] /= 540.
                trajs_norm.append(t)
            trajs = np.array(trajs_norm)
            traj_num = len(trajs)

            y = sd.pdist(
                trajs.transpose(0, 2, 1).reshape(traj_num, -1), 'euclidean')
            dm_euc = sd.squareform(y)
            y = tdist.pdist(trajs, metric='dtw')
            dm_dtw = sd.squareform(y)
            y = tdist.pdist(trajs, metric='sspd')
            dm_sspd = sd.squareform(y)
            y = tdist.pdist(trajs, metric='lcss', eps=0.05)
            dm_lcss = sd.squareform(y)
            y = tdist.pdist(trajs, metric='edr', eps=0.05)
            dm_edr = sd.squareform(y)
            y = tdist.pdist(trajs, metric='erp', g=np.zeros(2, dtype=float))
            dm_erp = sd.squareform(y)
            y = tdist.pdist(trajs, metric='frechet')
            dm_fre = sd.squareform(y)
            y = tdist.pdist(trajs, metric='hausdorff')
            dm_hau = sd.squareform(y)

            dms_euc.append(dm_euc)
Esempio n. 5
0
import numpy as np
import traj_dist.distance as tdist
import pickle

traj_list = pickle.load(open("/Users/bguillouet/These/trajectory_distance/data/benchmark_trajectories.pkl", "rb"))[:10]
traj_A = traj_list[0]
traj_B = traj_list[1]



# Simple distance

dist = tdist.sspd(traj_A, traj_B)
print(dist)

# Pairwise distance

pdist = tdist.pdist(traj_list, metric="sspd")
print(pdist)

# Distance between two list of trajectories

cdist = tdist.cdist(traj_list, traj_list, metric="sspd")
print(cdist)
    def __initial_match(self,
                        candidate_list: (np.ndarray, np.generic),
                        min_pts=2,
                        t=50,
                        criterion='distance'):
        # TODO group matching for non-grouped user
        # 1 : dbscan algorithm + gps based movement vector alignment -> clear!
        # 2 : acceleration -> let's discuss
        """Performs initial-clustering on cn candidate_list(nT x 2 numpy array) and returns group lists.
        Parameters
        ----------
        candidate_list : array of shape (n_samples, n_of_time_steps, pair of latitude and longitude
        min_pts : minimum members of a group for HDBSCAN-algorithm
        t : scalar
            For criteria 'inconsistent', 'distance' or 'monocrit',
            this is the threshold to apply when forming flat clusters.
            For 'maxclust' or 'maxclust_monocrit' criteria,
            this would be max number of clusters requested.
        criterion : str, optional
        The criterion to use in forming flat clusters. This can
        be any of the following values:

          ``inconsistent`` :
              If a cluster node and all its
              descendants have an inconsistent value less than or equal
              to `t` then all its leaf descendants belong to the
              same flat cluster. When no non-singleton cluster meets
              this criterion, every node is assigned to its own
              cluster. (Default)

          ``distance`` :
              Forms flat clusters so that the original
              observations in each flat cluster have no greater a
              cophenetic distance than `t`.

          ``maxclust`` :
              Finds a minimum threshold ``r`` so that
              the cophenetic distance between any two original
              observations in the same flat cluster is no more than
              ``r`` and no more than `t` flat clusters are formed.

          ``monocrit`` :
              Forms a flat cluster from a cluster node c
              with index i when ``monocrit[j] <= t``.

              For example, to threshold on the maximum mean distance
              as computed in the inconsistency matrix R with a
              threshold of 0.8 do::

                  MR = maxRstat(Z, R, 3)
                  cluster(Z, t=0.8, criterion='monocrit', monocrit=MR)

          ``maxclust_monocrit`` :
              Forms a flat cluster from a
              non-singleton cluster node ``c`` when ``monocrit[i] <=
              r`` for all cluster indices ``i`` below and including
              ``c``. ``r`` is minimized such that no more than ``t``
              flat clusters are formed. monocrit must be
              monotonic. For example, to minimize the threshold t on
              maximum inconsistency values so that no more than 3 flat
              clusters are formed, do::

                  MI = maxinconsts(Z, R)
                  cluster(Z, t=3, criterion='maxclust_monocrit', monocrit=MI)
        Returns
        ----------
        groups : list of shape (n_clusters, n_members)

        Examples
        ----------
        >>> candidate_list = np.array([,...,], shape=[5,3,2]) -> labels of candidate_list = [0,1,0,1,0]
        >>> groups = [[0,2,4],[1,3]]
        """
        assert isinstance(candidate_list, (np.ndarray, np.generic))
        num_of_data, num_time_steps, _ = candidate_list.shape
        X = np.array([
            candidate_list[i, num_time_steps - 1, :]
            for i in range(num_of_data)
        ])
        rads = np.radians(X)  # [N,2]
        # Clustering with gps-data of 1-time step.
        # 'haversine' do clustering using distance transformed from (lat, long)
        clusterer = hdbscan.HDBSCAN(min_cluster_size=min_pts,
                                    min_samples=2,
                                    metric='haversine')
        labels = clusterer.fit_predict(rads)
        print('Before trajectory clustering, labels are ', labels)
        n_clusters_ = len(set(labels)) - (1 if -1 in labels else 0)
        groups = []
        for ulb in range(n_clusters_):
            groups.append([])
        for i, lb in enumerate(labels):
            if lb == -1:
                continue
            groups[lb].append(i)
        total_n_clusters = n_clusters_
        # Group refinement considering user's trajectory
        for nc in range(n_clusters_):
            group_member_mask = (labels == nc)
            group_members = candidate_list[group_member_mask]
            pdist = tdist.pdist(group_members.transpose([0, 2, 1]),
                                metric="sspd",
                                type_d="spherical")
            Z = fc.linkage(pdist, method="ward")
            sub_labels = sch.fcluster(Z, t, criterion=criterion) - 1
            unique_sub_labels = len(set(sub_labels))
            if unique_sub_labels == 1:
                continue
            for ad in range(unique_sub_labels - 1):
                groups.append([])
            member_indices = list(
                compress(range(len(group_member_mask)), group_member_mask))
            for sb in range(unique_sub_labels):
                sub_group_mask = (sub_labels == sb)
                sub_member_indices = list(
                    compress(range(len(sub_group_mask)), sub_group_mask))
                # Noise case
                if len(sub_member_indices) == 1:
                    groups[nc].remove(member_indices[sub_member_indices[0]])
                    labels[member_indices[sub_member_indices[0]]] = -1
                    continue
                for m in range(len(sub_member_indices)):
                    # remove from wrong group
                    groups[nc].remove(member_indices[sub_member_indices[m]])
                    # add to refined group
                    groups[total_n_clusters].append(
                        member_indices[sub_member_indices[m]])
                    labels[member_indices[
                        sub_member_indices[m]]] = total_n_clusters
                total_n_clusters += 1
        print('After trajectory clustering, labels are ', labels)
        return groups.copy()
import numpy as np

# Three 2-D Trajectory
traj_A = np.array([[-122.39534, 37.77678],[-122.3992 , 37.77631],[-122.40235, 37.77594],[-122.40553, 37.77848],
                   [-122.40801, 37.78043],[-122.40837, 37.78066],[-122.41103, 37.78463],[-122.41207, 37.78954],
                   [-122.41252, 37.79232],[-122.41316, 37.7951 ],[-122.41392, 37.7989 ],[-122.41435, 37.80129],
                   [-122.41434, 37.80129]])
traj_B = np.array([[-122.39472, 37.77672],[-122.3946 , 37.77679],[-122.39314, 37.77846],[-122.39566, 37.78113],
                   [-122.39978, 37.78438],[-122.40301, 37.78708],[-122.4048 , 37.78666],[-122.40584, 37.78564],
                   [-122.40826, 37.78385],[-122.41061, 37.78321],[-122.41252, 37.78299]])
traj_C = np.array([[-122.39542, 37.77665],[-122.3988 , 37.77417],[-122.41042, 37.76944],[-122.41459, 37.77016],
                   [-122.41462, 37.77013]])
traj_list = [traj_A, traj_B, traj_C]

import traj_dist.distance as tdist

# Simple distance

dist = tdist.sspd(traj_A,traj_B)
print(dist)

# Pairwise distance

pdist = tdist.pdist(traj_list,metric="sspd")
print(pdist)

# Distance between two list of trajectories

cdist = tdist.cdist(traj_list, traj_list,metric="sspd")
print(cdist)