def test_haversine_dist(self):
        """
        input_latlng saves different combinations of haversine-distances in meters and the longitude & latitudes from
        two different points in WGS84

        References
        ------
        https://community.esri.com/groups/coordinate-reference-systems/blog/2017/10/05/haversine-formula
        """

        # {haversine-distance in meters[longitude_P1, latitudes_P1, longitude_P2, latitudes_P2]}
        input_latlng = {
            18749: [8.5, 47.3, 8.7,
                    47.2],  # Source: see Information to function
            5897658.289: [-0.116773, 51.510357, -77.009003, 38.889931],
            3780627: [0.0, 4.0, 0.0, 38],
            # Source for next lines: self-computation with formula from link above
            2306879.363: [-7.345, -7.345, 7.345, 7.345],
            13222121.519: [-0.118746, 73.998, -120.947783, -21.4783],
            785767.221: [50, 0, 45, 5]
        }

        for haversine, latlng in input_latlng.items():
            haversine_output = haversine_dist(latlng[0], latlng[1], latlng[2],
                                              latlng[3])
            assert np.isclose(haversine_output, haversine, atol=0.1)
    def test_haversine_vectorized(self):
        spts = ti.read_staypoints_csv(
            os.path.join('tests', 'data', 'geolife', 'geolife_staypoints.csv'))
        x = spts.geometry.x.values
        y = spts.geometry.y.values

        n = len(x)
        # our distance
        ix_1, ix_2 = np.triu_indices(n, k=1)

        x1 = x[ix_1]
        y1 = y[ix_1]
        x2 = x[ix_2]
        y2 = y[ix_2]

        d_ours = haversine_dist(x1, y1, x2, y2)

        # their distance
        x_rad = np.asarray([radians(_) for _ in x])
        y_rad = np.asarray([radians(_) for _ in y])
        yx = np.concatenate((y_rad.reshape(-1, 1), x_rad.reshape(-1, 1)),
                            axis=1)

        D_theirs = haversine_distances(yx, yx) * 6371000
        d_theirs = D_theirs[ix_1, ix_2]
        assert np.sum(
            np.abs(d_ours -
                   d_theirs)) < 0.01  #  1cm for 58 should be good enough
Beispiel #3
0
    def identify_mode(tripleg, wgs, categories):
        """
        Identify the mode based on the (overall) tripleg speed.

        Parameters
        ----------
        tripleg : trackintel triplegs GeoDataFrame
            the tripleg to analyse
        wgs : bool
            whether the tripleg is in WGS84 or not.
        categories : dict
            the upper boundaries (as keys) and the names of the categories as values.

        Returns
        -------
        str
            the identified mode.
        """
        # Computes distance over whole tripleg geometry (using the Haversine distance).
        if wgs:
            distance = sum([haversine_dist(pt1[0], pt1[1], pt2[0], pt2[1]) for pt1, pt2
                            in zip(tripleg.geom.coords[:-1], tripleg.geom.coords[1:])])
        else:
            distance = tripleg.geom.length

        duration = (tripleg['finished_at'] - tripleg['started_at']).total_seconds()
        speed = distance / duration  # The unit of the speed is m/s

        for bound in categories:
            if speed < bound:
                return categories[bound]
    def test_example_from_sklean(self):

        bsas = [-34.83333, -58.5166646]
        paris = [49.0083899664, 2.53844117956]
        bsas_in_radians = [radians(_) for _ in bsas]
        paris_in_radians = [radians(_) for _ in paris]
        d_theirs = haversine_distances([bsas_in_radians, paris_in_radians]) * 6371000

        d_ours = haversine_dist(bsas[1], bsas[0], paris[1], paris[0])

        assert np.abs(d_theirs[1][0] - d_ours) < 0.01
Beispiel #5
0
def findStayPoints(locs, dataName, accuracy_threshold, dist_threshold,
                   time_threshold, timemax_threshold):
    """
    Finds the staypoints from the raw locations, following Li's algorithm with the help of trackintel   

    Parameters
    ----------
    locs : gdf - raw points as a geopandas df
    dataName : str - ID of participant
    accuracy_threshold, dist_threshold, time_threshold, timemax_threshold : float - Different thresholds needed in the analysis

    Returns
    -------
    pds : gdf - positionfixes, also the raw locations but in the format of trackintel
    stps : gdf - staypoints, found by the algorithm of trackintel

    """
    # Calculate time and distance difference
    if True:
        locs['d_diff'] = np.append(
            haversine_dist(locs.longitudeE7[1:], locs.latitudeE7[1:],
                           locs.longitudeE7[:-1], locs.latitudeE7[:-1]), 0)

        locs = locs[locs['accuracy'] < accuracy_threshold]
        #locs = locs[locs['accuracy']<locs['d_diff']]

        if not (os.path.exists('../data/results/shp/' + dataName + '/')):
            os.makedirs('../data/results/shp/' + dataName + '/')

        hlp.loc2csv4ti(locs, dataName)
    pfs = ti.read_positionfixes_csv('../data/results/csv/' + dataName + '/' +
                                    dataName + '.csv',
                                    sep=';')

    # Find staypoints using a slightly modified version of the trackintel script
    stps = tim.extract_staypoints_ipa(pfs,
                                      method='sliding',
                                      dist_threshold=dist_threshold,
                                      time_threshold=time_threshold,
                                      timemax_threshold=timemax_threshold)
    return pfs, stps
Beispiel #6
0
def stydiffstat(dataNameList, SELECT_RANGE, dateStart, dateEnd):
    """
    Return the place name of input places   

    Parameters
    ----------
    dataNameList : list - list of strings of all participant id with shared data
    SELECT_RANGE: var - flag to define if select certain period
    dateStart: str - the start date of the period if selecting certain period
    dateEnd: str - the end date of the period if selecting certain period
        
    Returns
    -------
    staythredstat: useful statistics to semi-automatically choose thresholds

    """
    ddiff_max = []
    ddiff_min = []
    ddiff_mean = []
    ddiff_median = []
    ddiff_quar = []

    tdiff_max = []
    tdiff_min = []
    tdiff_mean = []
    tdiff_median = []
    tdiff_quar = []

    for dataName in dataNameList:
        dataPathLocs, dataPathTrips = hlp.getDataPaths(dataName)

        if SELECT_RANGE:
            dataPathLocs, dataPathTrips = hlp.selectRange(
                dataPathLocs, dataPathTrips, dateStart, dateEnd)

        locs, locsgdf = hlp.parseLocs(dataPathLocs)

        locs['d_diff'] = np.append(
            haversine_dist(locs.longitudeE7[1:], locs.latitudeE7[1:],
                           locs.longitudeE7[:-1], locs.latitudeE7[:-1]), 0)
        accuracy_threshold = np.quantile(locs['d_diff'], .95)

        locs['t_diff'] = np.append(
            (locs.index[1:] - locs.index[:-1]).total_seconds(), 0)

        maxi = max(locs['d_diff'])
        ddiff_max.append(maxi)
        mini = min(locs['d_diff'])
        ddiff_min.append(mini)
        meani = np.mean(locs['d_diff'])
        ddiff_mean.append(meani)
        mediani = np.median(locs['d_diff'])
        ddiff_median.append(mediani)
        quari = np.quantile(locs['d_diff'], .25)
        ddiff_quar.append(quari)

        maxi = max(locs['t_diff'])
        tdiff_max.append(maxi)
        mini = min(locs['t_diff'])
        tdiff_min.append(mini)
        meani = np.mean(locs['t_diff'])
        tdiff_mean.append(meani)
        mediani = np.median(locs['t_diff'])
        tdiff_median.append(mediani)
        quari = np.quantile(locs['t_diff'], .25)
        tdiff_quar.append(quari)

    ddiff_max = np.array(ddiff_max)
    ddiff_max = np.transpose(ddiff_max)
    ddiff_min = np.array(ddiff_min)
    ddiff_min = np.transpose(ddiff_min)
    ddiff_mean = np.array(ddiff_mean)
    ddiff_mean = np.transpose(ddiff_mean)
    ddiff_median = np.array(ddiff_median)
    ddiff_median = np.transpose(ddiff_median)
    ddiff_quar = np.array(ddiff_quar)
    ddiff_quar = np.transpose(ddiff_quar)

    tdiff_max = np.array(tdiff_max)
    tdiff_max = np.transpose(tdiff_max)
    tdiff_min = np.array(tdiff_min)
    tdiff_min = np.transpose(tdiff_min)
    tdiff_mean = np.array(tdiff_mean)
    tdiff_mean = np.transpose(tdiff_mean)
    tdiff_median = np.array(tdiff_median)
    tdiff_median = np.transpose(tdiff_median)
    tdiff_quar = np.array(tdiff_quar)
    tdiff_quar = np.transpose(tdiff_quar)

    thredstat = {
        'dataName': np.array(dataNameList),
        'dist_max': ddiff_max,
        'dist_min': ddiff_min,
        'dist_range': ddiff_max - ddiff_min,
        'dist_mean': ddiff_mean,
        'dist_median': ddiff_median,
        'dist_quarter': ddiff_quar,
        'time_max': tdiff_max,
        'time_min': tdiff_min,
        'time_range': tdiff_max - tdiff_min,
        'time_mean': tdiff_mean,
        'time_median': tdiff_median,
        'time_quarter': tdiff_quar
    }

    staythredstat = pd.DataFrame(thredstat)

    return staythredstat
Beispiel #7
0
    dataPathLocs, dataPathTrips = hlp.selectRange(
        dataPathLocs,
        dataPathTrips,
        mac,
        dateStart=thresholds["dateStart"],
        dateEnd=thresholds["dateEnd"],
    )

locs, locsgdf = hlp.parseLocs(dataPathLocs)
trips, tripdf, tripsgdf = hlp.parseTrips(dataPathTrips)

# add location data to the trips file (not used now because only for visualization of google results)
# tripsgdf = hlp.parseTripsWithLocs(dataPathTrips, locsgdf)

locs['d_diff'] = np.append(
    haversine_dist(locs.longitudeE7[1:], locs.latitudeE7[1:],
                   locs.longitudeE7[:-1], locs.latitudeE7[:-1]), 0)
if CHOOSE_THRES:
    thresholds['accuracy_threshold'] = np.quantile(locs['d_diff'], .95)

# export to shapefile
if exportShp:
    hlp.loc2shp(locsgdf, dataName)
    hlp.trip2shp(tripsgdf, dataName)

#%% FIND STAY POINTS
if FIND_STAY_POINTS:
    print("-> Finding stay points ")
    pfs, stps = main.findStayPoints(locs, dataName,
                                    thresholds["accuracy_threshold"],
                                    thresholds["dist_threshold"],
                                    thresholds["time_threshold"],
Beispiel #8
0
def extract_staypoints_ipa(positionfixes,
                           method='sliding',
                           dist_threshold=50,
                           time_threshold=5 * 60,
                           timemax_threshold=12 * 3600,
                           epsilon=100,
                           dist_func=haversine_dist,
                           eps=None,
                           num_samples=None):
    """Extract staypoints from positionfixes.
    This function modifies the positionfixes and adds staypoint_ids.
    Parameters
    ----------
    num_samples
    eps
    positionfixes : GeoDataFrame
        The positionfixes have to follow the standard definition for positionfixes DataFrames.
    method : {'sliding' or 'dbscan'}
        The following methods are available to extract staypoints from positionfixes:
        'sliding' : Applies a sliding window over the data.
        'dbscan' : Uses the DBSCAN algorithm to find clusters of staypoints.
    dist_threshold : float
        The distance threshold for the 'sliding' method, i.e., how far someone has to travel to
        generate a new staypoint.
    time_threshold : float
        The time threshold for the 'sliding' method in seconds, i.e., how long someone has to 
        stay within an area to consider it as a staypoint.
    epsilon : float
        The epsilon for the 'dbscan' method.
    dist_func : function
        A function that expects (lon_1, lat_1, lon_2, lat_2) and computes a distance in meters.
    Returns
    -------
    GeoDataFrame
        A new GeoDataFrame containing points where a person spent some time.
    Examples
    --------
    >>> psfs.as_positionfixes.extract_staypoints('sliding', dist_threshold=100)
    References
    ----------
    Zheng, Y. (2015). Trajectory data mining: an overview. ACM Transactions on Intelligent Systems 
    and Technology (TIST), 6(3), 29.
    Li, Q., Zheng, Y., Xie, X., Chen, Y., Liu, W., & Ma, W. Y. (2008, November). Mining user 
    similarity based on location history. In Proceedings of the 16th ACM SIGSPATIAL international 
    conference on Advances in geographic information systems (p. 34). ACM.
    """
    if 'id' not in positionfixes.columns:
        positionfixes['id'] = positionfixes.index

    ret_staypoints = pd.DataFrame(
        columns=['started_at', 'finished_at', 'geom', 'id'])

    if method == 'sliding':
        # Algorithm from Li et al. (2008). For details, please refer to the paper.
        staypoint_id_counter = 0
        positionfixes[
            'staypoint_id'] = -1  # this marks all that are not part of a SP

        for user_id_this in positionfixes['user_id'].unique():

            positionfixes_user_this = positionfixes.loc[
                positionfixes['user_id'] == user_id_this]  # this is no copy

            pfs = positionfixes_user_this.sort_values('tracked_at').to_dict(
                'records')
            num_pfs = len(pfs)

            posfix_staypoint_matching = {}

            i = 0
            j = 0  # is zero because it gets incremented in the beginning
            while i < num_pfs:
                if j == num_pfs:
                    # We're at the end, this can happen if in the last "bin",
                    # the dist_threshold is never crossed anymore.
                    break
                else:
                    j = i + 1
                while j < num_pfs:

                    dist = haversine_dist(pfs[i]['geom'].x, pfs[i]['geom'].y,
                                          pfs[j]['geom'].x, pfs[j]['geom'].y)

                    if (dist > dist_threshold):
                        delta_t = (pfs[j]['tracked_at'] -
                                   pfs[i]['tracked_at']).total_seconds()

                        # Compare with the maximum time threshold
                        if (delta_t > time_threshold):
                            if (delta_t > timemax_threshold):
                                hrdiff = []
                                hrsum = 0
                                for x in range(i, j):
                                    hrdiff.append(
                                        (pfs[x + 1]['tracked_at'] -
                                         pfs[x]['tracked_at']).total_seconds())
                                i0 = i
                                for mid in range(0, j - i0):
                                    hrsum += hrdiff[mid]
                                    if (hrsum > timemax_threshold
                                            or mid == j - i0 - 1):
                                        staypoint = {}
                                        staypoint['user_id'] = pfs[i][
                                            'user_id']
                                        staypoint['geom'] = Point(
                                            np.mean([
                                                pfs[k]['geom'].x
                                                for k in range(
                                                    i, i0 + mid + 1)
                                            ]),
                                            np.mean([
                                                pfs[k]['geom'].y
                                                for k in range(
                                                    i, i0 + mid + 1)
                                            ]))
                                        if 'elevation' in pfs[i].keys():
                                            staypoint['elevation'] = np.mean([
                                                pfs[k]['elevation']
                                                for k in range(
                                                    i, i0 + mid + 1)
                                            ])
                                        if 'velocity' in pfs[i].keys():
                                            staypoint['velocity'] = np.mean([
                                                pfs[k]['velocity']
                                                for k in range(
                                                    i, i0 + mid + 1)
                                            ])
                                        staypoint['started_at'] = pfs[i][
                                            'tracked_at']
                                        staypoint['finished_at'] = pfs[
                                            i0 + mid +
                                            1]['tracked_at']  # TODO: should this not be j-1? because j is not part of the staypoint. DB: Changed
                                        staypoint['id'] = staypoint_id_counter
                                        # store matching
                                        posfix_staypoint_matching[
                                            staypoint_id_counter] = [
                                                pfs[k]['id'] for k in range(
                                                    i, i0 + mid + 1)
                                            ]
                                        staypoint_id_counter += 1
                                        # add staypoint
                                        ret_staypoints = ret_staypoints.append(
                                            staypoint, ignore_index=True)

                                        i = i0 + mid + 1
                                        hrsum = 0
                            else:
                                staypoint = {}
                                staypoint['user_id'] = pfs[i]['user_id']
                                staypoint['geom'] = Point(
                                    np.mean([
                                        pfs[k]['geom'].x for k in range(i, j)
                                    ]),
                                    np.mean([
                                        pfs[k]['geom'].y for k in range(i, j)
                                    ]))
                                if 'elevation' in pfs[i].keys():
                                    staypoint['elevation'] = np.mean([
                                        pfs[k]['elevation']
                                        for k in range(i, j)
                                    ])
                                if 'velocity' in pfs[i].keys():
                                    staypoint['velocity'] = np.mean([
                                        pfs[k]['velocity']
                                        for k in range(i, j)
                                    ])
                                staypoint['started_at'] = pfs[i]['tracked_at']
                                staypoint['finished_at'] = pfs[j][
                                    'tracked_at']  # TODO: should this not be j-1? because j is not part of the staypoint. DB: Changed
                                staypoint['id'] = staypoint_id_counter
                                # store matching
                                posfix_staypoint_matching[
                                    staypoint_id_counter] = [
                                        pfs[k]['id'] for k in range(i, j)
                                    ]
                                staypoint_id_counter += 1
                                # add staypoint
                                ret_staypoints = ret_staypoints.append(
                                    staypoint, ignore_index=True)

                            # TODO Discussion: Is this last point really a staypoint? As we don't know if the
                            #      person "moves on" afterwards...
                        i = j
                        break

                    # If the last point meets the minimum time threshold, then it is added to the stay point
                    if (j == num_pfs - 1):

                        delta_t = (pfs[j]['tracked_at'] -
                                   pfs[i]['tracked_at']).total_seconds()
                        if (delta_t > time_threshold):
                            if (delta_t > timemax_threshold):
                                hrdiff = []
                                hrsum = 0
                                for x in range(i, j):
                                    hrdiff.append(
                                        (pfs[x + 1]['tracked_at'] -
                                         pfs[x]['tracked_at']).total_seconds())

                                i0 = i
                                for mid in range(0, j - i0):
                                    hrsum += hrdiff[mid]

                                    if (hrsum > timemax_threshold
                                            or mid == j - i0 - 1):
                                        staypoint = {}
                                        staypoint['user_id'] = pfs[i][
                                            'user_id']
                                        staypoint['geom'] = Point(
                                            np.mean([
                                                pfs[k]['geom'].x
                                                for k in range(
                                                    i, i0 + mid + 1)
                                            ]),
                                            np.mean([
                                                pfs[k]['geom'].y
                                                for k in range(
                                                    i, i0 + mid + 1)
                                            ]))
                                        if 'elevation' in pfs[i].keys():
                                            staypoint['elevation'] = np.mean([
                                                pfs[k]['elevation']
                                                for k in range(
                                                    i, i0 + mid + 1)
                                            ])
                                        if 'velocity' in pfs[i].keys():
                                            staypoint['velocity'] = np.mean([
                                                pfs[k]['velocity']
                                                for k in range(
                                                    i, i0 + mid + 1)
                                            ])
                                        staypoint['started_at'] = pfs[i][
                                            'tracked_at']
                                        staypoint['finished_at'] = pfs[
                                            i0 + mid +
                                            1]['tracked_at']  # TODO: should this not be j-1? because j is not part of the staypoint. DB: Changed
                                        staypoint['id'] = staypoint_id_counter
                                        # store matching
                                        posfix_staypoint_matching[
                                            staypoint_id_counter] = [
                                                pfs[k]['id'] for k in range(
                                                    i, i0 + mid + 1)
                                            ]
                                        staypoint_id_counter += 1
                                        # add staypoint
                                        ret_staypoints = ret_staypoints.append(
                                            staypoint, ignore_index=True)

                                        i = i0 + mid + 1
                                        hrsum = 0
                            else:
                                staypoint = {}
                                staypoint['user_id'] = pfs[i]['user_id']
                                staypoint['geom'] = Point(
                                    np.mean([
                                        pfs[k]['geom'].x
                                        for k in range(i, j + 1)
                                    ]),
                                    np.mean([
                                        pfs[k]['geom'].y
                                        for k in range(i, j + 1)
                                    ]))
                                if 'elevation' in pfs[i].keys():
                                    staypoint['elevation'] = np.mean([
                                        pfs[k]['elevation']
                                        for k in range(i, j + 1)
                                    ])
                                if 'velocity' in pfs[i].keys():
                                    staypoint['velocity'] = np.mean([
                                        pfs[k]['velocity']
                                        for k in range(i, j + 1)
                                    ])
                                staypoint['started_at'] = pfs[i]['tracked_at']
                                staypoint['finished_at'] = pfs[j][
                                    'tracked_at']  # TODO: should this not be j-1? because j is not part of the staypoint. DB: Changed.
                                staypoint['id'] = staypoint_id_counter

                                # store matching
                                posfix_staypoint_matching[
                                    staypoint_id_counter] = [
                                        pfs[k]['id'] for k in range(i, j + 1)
                                    ]

                                # posfix_staypoint_matching[staypoint_id_counter] = [
                                #     j]  # rather [k for k in range(i, j)]?

                                staypoint_id_counter += 1
                                ret_staypoints = ret_staypoints.append(
                                    staypoint, ignore_index=True)

                    j = j + 1

            # add matching to original positionfixes (for every user)

            for staypoints_id, posfix_idlist in posfix_staypoint_matching.items(
            ):
                # note that we use .loc because above we have saved the id
                # of the positionfixes not thier absolut position
                positionfixes.loc[posfix_idlist,
                                  'staypoint_id'] = staypoints_id

    elif method == 'dbscan':
        # TODO: Make sure time information is included in the clustering!
        # time information is in the column 'started at', however the user should be able to
        # adjust the distance metric e.g. chebychev

        db = DBSCAN(eps=epsilon, min_samples=num_samples)
        for user_id_this in positionfixes['user_id'].unique():

            user_positionfixes = positionfixes[positionfixes[
                'user_id'] == user_id_this]  # this is not a copy!

            # TODO: enable transformations to temporary (metric) system
            transform_crs = None
            if transform_crs is not None:
                pass

            # get staypoint matching
            coordinates = np.array([[g.x, g.y]
                                    for g in user_positionfixes['geom']])
            labels = db.fit_predict(coordinates)

            # add positionfixes - staypoint matching to original positionfixes
            positionfixes.loc[user_positionfixes.index,
                              'staypoint_id'] = labels

        # create staypoints as the center of the grouped positionfixes
        grouped_df = positionfixes.groupby(['user_id', 'staypoint_id'])
        for combined_id, group in grouped_df:
            user_id, staypoint_id = combined_id

            if int(staypoint_id) != -1:
                staypoint = {}
                staypoint['user_id'] = user_id
                staypoint['id'] = staypoint_id

                # point geometry of staypoint
                staypoint['geom'] = Point(group.geometry.x.mean(),
                                          group.geometry.y.mean())

                ret_staypoints = ret_staypoints.append(staypoint,
                                                       ignore_index=True)

    ret_staypoints = gpd.GeoDataFrame(ret_staypoints,
                                      geometry='geom',
                                      crs=positionfixes.crs)
    ret_staypoints['id'] = ret_staypoints['id'].astype('int')

    return ret_staypoints