Example #1
0
def _calculate_haversine_length_single(linestring):
    """
    calculate the length of a single linestring using the haversine distance.

    Parameters
    ----------
    linestring : 'shapely.geometry.linestring.LineString'
        Coordinates of the linestring are expected to be in WGS84

    Returns
    -------
    int
        length of the linestring in meter

    Examples
    --------
    >>> from shapely.geometry import LineString
    >>> from trackintel.geogr.distances import _calculate_haversine_length_single
    >>> ls = LineString([(13.476808430, 48.573711823), (11.5675446, 48.1485459), (8.5067847, 47.4084269)])
    >>> _calculate_haversine_length_single(ls)
    """
    coords_df = pd.DataFrame(linestring.xy, index=["x_0", "y_0"]).transpose()
    coords_df["x_1"] = coords_df["x_0"].shift(-1)
    coords_df["y_1"] = coords_df["y_0"].shift(-1)
    coords_df.dropna(axis=0, inplace=True)

    distances = haversine_dist(coords_df.x_0, coords_df.y_0, coords_df.x_1,
                               coords_df.y_1)
    return np.sum(distances)
Example #2
0
def calculate_distance_matrix(X,
                              Y=None,
                              dist_metric="haversine",
                              n_jobs=0,
                              **kwds):
    """
    Calculate a distance matrix based on a specific distance metric.

    If only X is given, the pair-wise distances between all elements in X are calculated. If X and Y are given, the
    distances between all combinations of X and Y are calculated. Distances between elements of X and X, and distances
    between elements of Y and Y are not calculated.

    Parameters
    ----------
    X : GeoDataFrame (as trackintel staypoints or triplegs)

    Y : GeoDataFrame (as trackintel staypoints or triplegs), optional

    dist_metric: {'haversine', 'euclidean', 'dtw', 'frechet'}
        The distance metric to be used for calculating the matrix.

        For staypoints, common choice is 'haversine' or 'euclidean'. This function wraps around
        the ``pairwise_distance`` function from scikit-learn if only `X` is given and wraps around the
        ``scipy.spatial.distance.cdist`` function if X and Y are given.
        Therefore the following metrics are also accepted:

        via ``scikit-learn``: `[‘cityblock’, ‘cosine’, ‘euclidean’, ‘l1’, ‘l2’, ‘manhattan’]`

        via ``scipy.spatial.distance``: `[‘braycurtis’, ‘canberra’, ‘chebyshev’, ‘correlation’, ‘dice’, ‘hamming’, ‘jaccard’,
        ‘kulsinski’, ‘mahalanobis’, ‘minkowski’, ‘rogerstanimoto’, ‘russellrao’, ‘seuclidean’, ‘sokalmichener’,
        ‘sokalsneath’, ‘sqeuclidean’, ‘yule’]`

        For triplegs, common choice is 'dtw' or 'frechet'. This function uses the implementation
        from similaritymeasures.

    n_jobs: int
        Number of cores to use: 'dtw', 'frechet' and all distance metrics from `pairwise_distance` (only available
        if only X is given) are parallelized.

    **kwds:
        optional keywords passed to the distance functions.

    Returns
    -------
    D: np.array
        matrix of shape (len(X), len(X)) or of shape (len(X), len(Y)) if Y is provided.

    """
    geom_type = X.geometry.iat[0].geom_type
    if Y is None:
        Y = X
    assert Y.geometry.iat[0].geom_type == Y.geometry.iat[0].geom_type, (
        "x and y need same geometry type "
        "(only first column checked)")

    if geom_type == "Point":
        x1 = X.geometry.x.values
        y1 = X.geometry.y.values
        x2 = Y.geometry.x.values
        y2 = Y.geometry.y.values

        if dist_metric == "haversine":
            # create point pairs for distance calculation
            nx = len(X)
            ny = len(Y)

            # if y != x they could have different dimensions
            if ny >= nx:
                ix_1, ix_2 = np.triu_indices(nx, k=1, m=ny)
                trilix = np.tril_indices(nx, k=-1, m=ny)
            else:
                ix_1, ix_2 = np.tril_indices(nx, k=-1, m=ny)
                trilix = np.triu_indices(nx, k=1, m=ny)

            x1 = x1[ix_1]
            y1 = y1[ix_1]
            x2 = x2[ix_2]
            y2 = y2[ix_2]

            d = haversine_dist(x1, y1, x2, y2)

            D = np.zeros((nx, ny))
            D[(ix_1, ix_2)] = d

            # mirror triangle matrix to be conform with scikit-learn format and to
            # allow for non-symmetric distances in the future
            D[trilix] = D.T[trilix]

        else:
            xy1 = np.concatenate((x1.reshape(-1, 1), y1.reshape(-1, 1)),
                                 axis=1)

            if Y is not None:
                xy2 = np.concatenate((x2.reshape(-1, 1), y2.reshape(-1, 1)),
                                     axis=1)
                D = cdist(xy1, xy2, metric=dist_metric, **kwds)
            else:
                D = pairwise_distances(xy1, metric=dist_metric, n_jobs=n_jobs)

        return D

    elif geom_type == "LineString":

        if dist_metric in ["dtw", "frechet"]:
            # these are the preparation steps for all distance functions based only on coordinates

            if dist_metric == "dtw":
                d_fun = partial(similaritymeasures.dtw, **kwds)
            else:
                d_fun = partial(similaritymeasures.frechet_dist, **kwds)

            # get combinations of distances that have to be calculated
            nx = len(X)
            ny = len(Y)

            if ny >= nx:
                ix_1, ix_2 = np.triu_indices(nx, k=1, m=ny)
                trilix = np.tril_indices(nx, k=-1, m=ny)
            else:
                ix_1, ix_2 = np.tril_indices(nx, k=-1, m=ny)
                trilix = np.triu_indices(nx, k=1, m=ny)

            # get the coordinates as list of each LineString
            left = list(X.iloc[ix_1].geometry.apply(lambda x: x.coords))
            right = list(Y.iloc[ix_2].geometry.apply(lambda x: x.coords))

            # map the combinations to the distance function
            if n_jobs == -1 or n_jobs > 1:
                if n_jobs == -1:
                    n_jobs = multiprocessing.cpu_count()
                with multiprocessing.Pool(processes=n_jobs) as pool:
                    left_right = list(zip(left, right))
                    res = list(pool.starmap(d_fun, left_right))
            else:
                res = list(map(d_fun, left, right))

            if dist_metric == "dtw":
                # the first return is the dtw distance, see docs of similaritymeasures.dtw
                d = [dist[0] for dist in res]
            else:
                d = res

            # write results to (symmetric) distance matrix
            D = np.zeros((nx, ny))
            D[(ix_1, ix_2)] = d
            D[trilix] = D.T[trilix]
            return D

        else:
            raise AttributeError(
                "Metric unknown. We only support ['dtw', 'frechet'] for LineStrings. "
                f"You passed {dist_metric}")
    else:
        raise AttributeError(
            f"We only support 'Point' and 'LineString'. Your geometry is {geom_type}"
        )