Example #1
0
    def test_keyword_combinations(self):
        spts_file = os.path.join('tests', 'data', 'geolife',
                                 'geolife_staypoints.csv')
        spts = ti.read_staypoints_csv(spts_file, tz='utc', index_col='id')

        x = spts.iloc[0:5]
        y = spts.iloc[5:15]

        _ = calculate_distance_matrix(X=x,
                                      Y=y,
                                      dist_metric='euclidean',
                                      n_jobs=-1)
        _ = calculate_distance_matrix(X=y,
                                      Y=x,
                                      dist_metric='haversine',
                                      n_jobs=-1)
        d_mink1 = calculate_distance_matrix(X=x,
                                            Y=x,
                                            dist_metric='minkowski',
                                            p=1)
        d_mink2 = calculate_distance_matrix(X=x,
                                            Y=x,
                                            dist_metric='minkowski',
                                            p=2)
        d_euc = calculate_distance_matrix(X=x, Y=x, dist_metric='euclidean')

        assert not np.array_equal(d_mink1, d_mink2)
        assert np.array_equal(d_euc, d_mink2)
Example #2
0
    def test_keyword_combinations(self):
        stps_file = os.path.join("tests", "data", "geolife",
                                 "geolife_staypoints.csv")
        stps = ti.read_staypoints_csv(stps_file, tz="utc", index_col="id")

        x = stps.iloc[0:5]
        y = stps.iloc[5:15]

        _ = calculate_distance_matrix(X=x,
                                      Y=y,
                                      dist_metric="euclidean",
                                      n_jobs=-1)
        _ = calculate_distance_matrix(X=y,
                                      Y=x,
                                      dist_metric="haversine",
                                      n_jobs=-1)
        d_mink1 = calculate_distance_matrix(X=x,
                                            Y=x,
                                            dist_metric="minkowski",
                                            p=1)
        d_mink2 = calculate_distance_matrix(X=x,
                                            Y=x,
                                            dist_metric="minkowski",
                                            p=2)
        d_euc = calculate_distance_matrix(X=x, Y=x, dist_metric="euclidean")

        assert not np.array_equal(d_mink1, d_mink2)
        assert np.array_equal(d_euc, d_mink2)
    def test_trajectory_distance(self):
        tpls = ti.read_triplegs_csv(
            os.path.join('tests', 'data', 'geolife', 'geolife_triplegs.csv'))
        D_single = calculate_distance_matrix(X=tpls.iloc[0:4],
                                             dist_metric='dtw',
                                             n_jobs=1)
        D_multi = calculate_distance_matrix(X=tpls.iloc[0:4],
                                            dist_metric='dtw',
                                            n_jobs=4)

        assert np.isclose(np.sum(np.abs(D_single - D_multi)), 0)
Example #4
0
    def test_trajectory_distance_frechet(self, geolife_tpls):
        """Calculate Linestring length using frechet, single and multi core."""
        tpls = geolife_tpls

        D_single = calculate_distance_matrix(X=tpls.iloc[0:4],
                                             dist_metric="frechet",
                                             n_jobs=1)
        D_multi = calculate_distance_matrix(X=tpls.iloc[0:4],
                                            dist_metric="frechet",
                                            n_jobs=4)

        assert np.isclose(np.sum(np.abs(D_single - D_multi)), 0)
Example #5
0
    def test_distance_error(self, single_linestring):
        """Test if the an error is raised when wrong geometry is passed."""
        # construct a gdf with two MultiLineStrings
        multi = MultiLineString([single_linestring, single_linestring])
        a_list = [(0, multi), (1, multi)]
        gdf = gpd.GeoDataFrame(a_list,
                               columns=["id",
                                        "geometry"]).set_geometry("geometry")
        gdf = gdf.set_crs("wgs84")

        with pytest.raises(AttributeError):
            calculate_distance_matrix(X=gdf, dist_metric="dtw", n_jobs=1)
    def test_shape_for_different_array_length(self):
        spts = ti.read_staypoints_csv(
            os.path.join('tests', 'data', 'geolife', 'geolife_staypoints.csv'))

        x = spts.iloc[0:5]
        y = spts.iloc[5:15]

        d_euc1 = calculate_distance_matrix(X=x, Y=y, dist_metric='euclidean')
        d_euc2 = calculate_distance_matrix(X=y, Y=x, dist_metric='euclidean')
        d_hav1 = calculate_distance_matrix(X=x, Y=y, dist_metric='haversine')
        d_hav2 = calculate_distance_matrix(X=y, Y=x, dist_metric='haversine')

        assert d_euc1.shape == d_hav1.shape == (5, 10)
        assert d_euc2.shape == d_hav2.shape == (10, 5)
        assert np.isclose(0, np.sum(np.abs(d_euc1 - d_euc2.T)))
        assert np.isclose(0, np.sum(np.abs(d_hav1 - d_hav2.T)))
Example #7
0
    def test_shape_for_different_array_length(self):
        stps_file = os.path.join("tests", "data", "geolife",
                                 "geolife_staypoints.csv")
        stps = ti.read_staypoints_csv(stps_file, tz="utc", index_col="id")

        x = stps.iloc[0:5]
        y = stps.iloc[5:15]

        d_euc1 = calculate_distance_matrix(X=x, Y=y, dist_metric="euclidean")
        d_euc2 = calculate_distance_matrix(X=y, Y=x, dist_metric="euclidean")
        d_hav1 = calculate_distance_matrix(X=x, Y=y, dist_metric="haversine")
        d_hav2 = calculate_distance_matrix(X=y, Y=x, dist_metric="haversine")

        assert d_euc1.shape == d_hav1.shape == (5, 10)
        assert d_euc2.shape == d_hav2.shape == (10, 5)
        assert np.isclose(0, np.sum(np.abs(d_euc1 - d_euc2.T)))
        assert np.isclose(0, np.sum(np.abs(d_hav1 - d_hav2.T)))
Example #8
0
    def test_trajectory_distance_dtw(self, geolife_tpls):
        """Calculate Linestring length using dtw, single and multi core."""
        tpls = geolife_tpls

        D_all = calculate_distance_matrix(X=tpls.iloc[0:4],
                                          dist_metric="dtw",
                                          n_jobs=-1)
        D_zero = calculate_distance_matrix(X=tpls.iloc[0:4],
                                           dist_metric="dtw",
                                           n_jobs=0)

        D_single = calculate_distance_matrix(X=tpls.iloc[0:4],
                                             dist_metric="dtw",
                                             n_jobs=1)
        D_multi = calculate_distance_matrix(X=tpls.iloc[0:4],
                                            dist_metric="dtw",
                                            n_jobs=4)

        assert np.isclose(np.sum(np.abs(D_single - D_multi)), 0)
        assert np.isclose(np.sum(np.abs(D_all - D_multi)), 0)
        assert np.isclose(np.sum(np.abs(D_zero - D_multi)), 0)
Example #9
0
    def test_dbscan_haversine(self):
        """Test haversine dbscan location result with manually calling the DBSCAN method."""
        stps_file = os.path.join("tests", "data", "geolife", "geolife_staypoints.csv")
        stps = ti.read_staypoints_csv(stps_file, tz="utc", index_col="id")

        # haversine calculation using sklearn.metrics.pairwise_distances
        stps, locs = stps.as_staypoints.generate_locations(
            method="dbscan", epsilon=10, num_samples=0, distance_metric="haversine", agg_level="dataset"
        )

        # calculate pairwise haversine matrix and fed to dbscan
        sp_distance_matrix = calculate_distance_matrix(stps, dist_metric="haversine")
        db = DBSCAN(eps=10, min_samples=0, metric="precomputed")
        labels = db.fit_predict(sp_distance_matrix)

        assert len(set(locs.index)) == len(set(labels))
    def test_compare_haversine_to_scikit_xy(self):
        spts = ti.read_staypoints_csv(
            os.path.join('tests', 'data', 'geolife', 'geolife_staypoints.csv'))
        our_d_matrix = calculate_distance_matrix(X=spts,
                                                 Y=spts,
                                                 dist_metric='haversine')

        x = spts.geometry.x.values
        y = spts.geometry.y.values

        x_rad = np.asarray([radians(_) for _ in x])
        y_rad = np.asarray([radians(_) for _ in y])
        yx = np.concatenate((y_rad.reshape(-1, 1), x_rad.reshape(-1, 1)),
                            axis=1)

        their_d_matrix = pairwise_distances(yx, metric='haversine') * 6371000
        assert np.allclose(np.abs(our_d_matrix - their_d_matrix),
                           0,
                           atol=0.001)  # atol = 1mm
Example #11
0
    def test_cluster_staypoints_dbscan_haversine(self):
        spts = ti.read_staypoints_csv(
            os.path.join('tests', 'data', 'geolife', 'geolife_staypoints.csv'))

        # haversine calculation using sklearn.metrics.pairwise_distances, epsilon converted to radius
        spts, locs = spts.as_staypoints.extract_locations(
            method='dbscan',
            epsilon=10,
            num_samples=0,
            distance_matrix_metric='haversine',
            agg_level='dataset')

        # calculate pairwise haversine matrix and fed to dbscan
        sp_distance_matrix = calculate_distance_matrix(spts,
                                                       dist_metric="haversine")
        db = DBSCAN(eps=10, min_samples=0, metric="precomputed")
        labels = db.fit_predict(sp_distance_matrix)

        assert len(set(locs['location_id'])) == len(
            set(labels)), "The #location should be the same"
Example #12
0
    def test_compare_haversine_to_scikit_xy(self):
        stps_file = os.path.join("tests", "data", "geolife",
                                 "geolife_staypoints.csv")
        stps = ti.read_staypoints_csv(stps_file, tz="utc", index_col="id")
        our_d_matrix = calculate_distance_matrix(X=stps,
                                                 Y=stps,
                                                 dist_metric="haversine")

        x = stps.geometry.x.values
        y = stps.geometry.y.values

        x_rad = np.asarray([radians(_) for _ in x])
        y_rad = np.asarray([radians(_) for _ in y])
        yx = np.concatenate((y_rad.reshape(-1, 1), x_rad.reshape(-1, 1)),
                            axis=1)

        their_d_matrix = pairwise_distances(yx, metric="haversine") * 6371000
        assert np.allclose(np.abs(our_d_matrix - their_d_matrix),
                           0,
                           atol=0.001)  # atol = 1mm
Example #13
0
    def test_generate_locations_dbscan_haversine(self):
        stps_file = os.path.join('tests', 'data', 'geolife',
                                 'geolife_staypoints.csv')
        stps = ti.read_staypoints_csv(stps_file, tz='utc', index_col='id')

        # haversine calculation using sklearn.metrics.pairwise_distances
        stps, locs = stps.as_staypoints.generate_locations(
            method='dbscan',
            epsilon=10,
            num_samples=0,
            distance_matrix_metric='haversine',
            agg_level='dataset')

        # calculate pairwise haversine matrix and fed to dbscan
        sp_distance_matrix = calculate_distance_matrix(stps,
                                                       dist_metric="haversine")
        db = DBSCAN(eps=10, min_samples=0, metric="precomputed")
        labels = db.fit_predict(sp_distance_matrix)

        assert len(set(locs.index)) == len(
            set(labels)), "The number of locations should be the same"
def weights_delaunay(locations,
                     to_crs=None,
                     distance_matrix_metric='haversine',
                     adjacency_dict=None):

    all_users = locations["user_id"].unique()
    if adjacency_dict is None:
        adjacency_dict = {}

    sorted_locs = locations.set_index('user_id', drop=False)
    sorted_locs.index.name = 'user_id_ix'
    sorted_locs.sort_index(inplace=True)

    for user_id_this in all_users:

        user_locs = sorted_locs[sorted_locs.index == user_id_this]
        org_ixs = user_locs['location_id'].values
        loc_id_order = org_ixs
        edge_name = 'delaunay'

        if to_crs is not None:
            geometry = user_locs['center'].to_crs(to_crs)
            points = list(zip(geometry.x, geometry.y))
        else:
            try:
                points = list(zip(locations['long'], locations['lat']))

            except KeyError:
                geometry = user_locs['center']
                points = list(zip(geometry.x, geometry.y))

        # import point data as xy coordinates
        # nx graph from scipy.spatial.Delaunay:
        # https://groups.google.com/forum/#!topic/networkx-discuss/D7fMmuzVBAw

        # --------------------------------------

        # make a Delaunay triangulation of the point data
        try:
            delTri = scipy.spatial.Delaunay(points)

            # create a set for edges that are indexes of the points
            edges = set()
            # for each Delaunay triangle
            for n in range(delTri.nsimplex):
                # for each edge of the triangle
                # sort the vertices
                # (sorting avoids duplicated edges being added to the set)
                # and add to the edges set
                edge = sorted([delTri.vertices[n, 0], delTri.vertices[n, 1]])
                edges.add((edge[0], edge[1]))
                edge = sorted([delTri.vertices[n, 0], delTri.vertices[n, 2]])
                edges.add((edge[0], edge[1]))
                edge = sorted([delTri.vertices[n, 1], delTri.vertices[n, 2]])
                edges.add((edge[0], edge[1]))

            # add distances to edges
            locs_distance_matrix = calculate_distance_matrix(
                user_locs, dist_metric=distance_matrix_metric)

            # invert distance matrix, so that close places have a high weight
            locs_distance_matrix = np.reciprocal(
                locs_distance_matrix, where=locs_distance_matrix != 0)

            edges = [(u, v, locs_distance_matrix[u, v]) for u, v in edges]
            row_ixs, col_ixs, values = map(list, zip(*edges))

            # enforce symmetry:
            col_ixs_temp = col_ixs.copy()
            col_ixs = col_ixs + row_ixs
            row_ixs = row_ixs + col_ixs_temp
            values = values + values

            # create adjacency matrix
            shape = locs_distance_matrix.shape
            A = coo_matrix((values, (row_ixs, col_ixs)), shape=shape)

        except QhullError:
            A = coo_matrix((0, 0))
            loc_id_order = np.asarray([])

        if user_id_this not in adjacency_dict:
            adjacency_dict[user_id_this] = {
                'A': [A],
                'loc_id_order': [loc_id_order],
                'edge_name': [edge_name]
            }
        else:
            adjacency_dict[user_id_this]['A'].append(A)
            adjacency_dict[user_id_this]['loc_id_order'].append(loc_id_order)
            adjacency_dict[user_id_this]['edge_name'].append(edge_name)

    return adjacency_dict
def weights_n_neighbors(locations,
                        n=None,
                        distance_matrix_metric='haversine',
                        adjacency_dict=None):
    """
    Calculate the distance of the n nearest locations as graph weights.

    Graphs based on the activity locations (trackintel locations) can have several
    types of weighted edges. This function calculates the edge weight based
    on the distance to the n closest neighbors (locations) of the same user.

    Parameters
    ----------
    locations: GeoDataFrame
    
    n: int
    number of nearst locations to take into account
    
    distance_matrix_metric: String
    can be 

    Returns
    -------
    distance_matrix_metric: string
        The distance metric used to calculate the distance between locations.
        Uses the Trackintel.geogr.distances.calculate_distance_matrix()
        function. Possible metrics are: {'haversine', 'euclidean'} or any 
        mentioned in: 
            https://scikit-learn.org/stable/modules/generated/
            sklearn.metrics.pairwise_distances.html
    """
    # todo: check if cluster id is missing
    # todo: check if adjacency matrix is symmetric?
    # todo: What if n is too large?

    all_users = locations["user_id"].unique()
    if adjacency_dict is None:
        adjacency_dict = {}

    sorted_locs = locations.set_index('user_id', drop=False)
    sorted_locs.index.name = 'user_id_ix'
    sorted_locs.sort_index(inplace=True)

    for user_id_this in all_users:
        row_ixs = []
        col_ixs = []
        values = []

        user_locs = sorted_locs[sorted_locs.index == user_id_this]

        locs_distance_matrix = calculate_distance_matrix(
            user_locs, dist_metric=distance_matrix_metric)
        # invert such that close nodes have a high weight
        locs_distance_matrix = np.reciprocal(locs_distance_matrix,
                                             where=locs_distance_matrix != 0)
        org_ixs = user_locs['location_id'].values

        shape = locs_distance_matrix.shape
        loc_id_order = org_ixs

        if n is None or n == 'fconn':
            A = coo_matrix(locs_distance_matrix)
            edge_name = 'fconn_distant'

        else:

            # for every row, keep only the n largest elements
            for row_ix_this in range(shape[0]):
                row_this = locs_distance_matrix[row_ix_this, :]

                max_ixs = np.argsort(row_this)[::-1][0:n + 1]

                col_ixs = col_ixs + list(max_ixs)
                row_ixs = row_ixs + [row_ix_this for x in range(len(max_ixs))]
                values = values + list(row_this[max_ixs])

            # enforce symmetry:
            col_ixs_temp = col_ixs.copy()
            col_ixs = col_ixs + row_ixs
            row_ixs = row_ixs + col_ixs_temp
            values = values + values

            A = coo_matrix((values, (row_ixs, col_ixs)), shape=shape)
            a = A.todense()
            edge_name = '{}_distant'.format(n)

        if user_id_this not in adjacency_dict:
            adjacency_dict[user_id_this] = {
                'A': [A],
                'loc_id_order': [loc_id_order],
                'edge_name': [edge_name]
            }
        else:
            adjacency_dict[user_id_this]['A'].append(A)
            adjacency_dict[user_id_this]['loc_id_order'].append(loc_id_order)
            adjacency_dict[user_id_this]['edge_name'].append(edge_name)

    return adjacency_dict
Example #16
0
def cluster_staypoints(staypoints,
                       method='dbscan',
                       epsilon=100,
                       num_samples=1,
                       distance_matrix_metric=None,
                       agg_level='user'):
    """Clusters staypoints to get locations.

    Parameters
    ----------
    staypoints : GeoDataFrame
        The staypoints have to follow the standard definition for staypoints DataFrames.

    method : str, {'dbscan'}, default 'dbscan'
        The following methods are available to cluster staypoints into locations:
        'dbscan' : Uses the DBSCAN algorithm to cluster staypoints.

    epsilon : float, default 100
        The epsilon for the 'dbscan' method.

    num_samples : int, default 1
        The minimal number of samples in a cluster. 

    distance_matrix_metric: str (optional)
        When given, dbscan will work on a precomputed a distance matrix that is
        created using the staypoints based on the given metric. Possible metrics
        are: {'haversine', 'euclidean'} or any mentioned in: 
        https://scikit-learn.org/stable/modules/generated/
        sklearn.metrics.pairwise_distances.html
        
    agg_level: str, {'user' or 'dataset'}, default 'user'
        The level of aggregation when generating locations:
        'user'      : locations are generated independently per-user.
        'dataset'   : shared locations are generated for all users.
    
    Returns
    -------
    GeoDataFrame
        A new GeoDataFrame containing locations that a person visited multiple times.
        
    Examples
    --------
    >>> spts.as_staypoints.cluster_staypoints(method='dbscan', epsilon=100, num_samples=1)
    """

    if agg_level not in ['user', 'dataset']:
        raise AttributeError(
            "The parameter agg_level must be one of ['user', 'dataset'].")

    ret_sp = staypoints.copy()
    if method == 'dbscan':

        if distance_matrix_metric is not None:
            db = DBSCAN(eps=epsilon,
                        min_samples=num_samples,
                        metric='precomputed')
        else:
            db = DBSCAN(eps=epsilon, min_samples=num_samples)

        if agg_level == 'user':
            location_id_counter = 0
            for user_id_this in ret_sp["user_id"].unique():
                # Slice staypoints array by user. This is not a copy!
                user_staypoints = ret_sp[ret_sp["user_id"] == user_id_this]

                if distance_matrix_metric is not None:
                    sp_distance_matrix = calculate_distance_matrix(
                        user_staypoints, dist_metric=distance_matrix_metric)
                    labels = db.fit_predict(sp_distance_matrix)
                else:
                    coordinates = np.array([[g.x, g.y]
                                            for g in user_staypoints.geometry])
                    labels = db.fit_predict(coordinates)

                # enforce unique lables across all users without changing noise labels
                max_label = np.max(labels)
                labels[labels !=
                       -1] = labels[labels != -1] + location_id_counter + 1
                if max_label > -1:
                    location_id_counter = location_id_counter + max_label + 1

                # add staypoint - location matching to original staypoints
                ret_sp.loc[user_staypoints.index, 'location_id'] = labels
        else:
            if distance_matrix_metric is not None:
                sp_distance_matrix = calculate_distance_matrix(
                    ret_sp, dist_metric=distance_matrix_metric)
                labels = db.fit_predict(sp_distance_matrix)
            else:
                coordinates = np.array([[g.x, g.y] for g in ret_sp.geometry])
                labels = db.fit_predict(coordinates)

            # add 1 to match the 'user' level result
            ret_sp['location_id'] = labels + 1

        # create locations as grouped staypoints
        temp_sp = ret_sp[['user_id', 'location_id', ret_sp.geometry.name]]
        ret_loc = temp_sp.dissolve(by=['user_id', 'location_id'],
                                   as_index=False)
        # filter outlier
        ret_loc = ret_loc.loc[ret_loc['location_id'] != -1]

        # locations with only one staypoints is of type "Point"
        point_idx = ret_loc.geom_type == 'Point'
        ret_loc['center'] = 0  # initialize
        ret_loc.loc[point_idx, 'center'] = ret_loc.loc[point_idx, 'geom']
        # locations with multiple staypoints is of type "MultiPoint"
        ret_loc.loc[~point_idx,
                    'center'] = ret_loc.loc[~point_idx,
                                            'geom'].apply(lambda p: Point(
                                                np.array(p)[:, 0].mean(),
                                                np.array(p)[:, 1].mean()))

        # extent is the convex hull of the geometry
        ret_loc['extent'] = ret_loc['geom'].apply(lambda p: p.convex_hull)
        # convex_hull of one point would be a Point and two points a Linestring,
        # we change them into Polygon by creating a buffer of epsilon around them.
        pointLine_idx = (ret_loc['extent'].geom_type == 'LineString') | (
            ret_loc['extent'].geom_type == 'Point')

        # Perform meter to decimal conversion if the distance metric is haversine
        if distance_matrix_metric == 'haversine':
            ret_loc.loc[
                pointLine_idx, 'extent'] = ret_loc.loc[pointLine_idx].apply(
                    lambda p: p['extent'].buffer(
                        meters_to_decimal_degrees(epsilon, p['center'].y)),
                    axis=1)
        else:
            ret_loc.loc[pointLine_idx,
                        'extent'] = ret_loc.loc[pointLine_idx].apply(
                            lambda p: p['extent'].buffer(epsilon), axis=1)

        ret_loc = ret_loc.set_geometry('center')
        ret_loc = ret_loc[['user_id', 'location_id', 'center', 'extent']]
        ret_loc['location_id'] = ret_loc['location_id'].astype('int')

    return ret_sp, ret_loc
Example #17
0
def cluster_staypoints(staypoints, method='dbscan',
                       epsilon=100, num_samples=3, distance_matrix_metric=None):
    """Clusters staypoints to get places.

    Parameters
    ----------
    staypoints : GeoDataFrame
        The staypoints have to follow the standard definition for staypoints DataFrames.

    method : str, {'dbscan'}, default 'dbscan'
        The following methods are available to cluster staypoints into places:
        'dbscan' : Uses the DBSCAN algorithm to cluster staypoints.

    epsilon : float
        The epsilon for the 'dbscan' method.

    num_samples : int
        The minimal number of samples in a cluster. 

    distance_matrix_metric: string (optional)
        When given, dbscan will work on a precomputed a distance matrix that is
        created using the staypoints based on the given metric. Possible metrics
        are: {'haversine', 'euclidean'} or any mentioned in: 
        https://scikit-learn.org/stable/modules/generated/
        sklearn.metrics.pairwise_distances.html

    Returns
    -------
    GeoDataFrame
        A new GeoDataFrame containing places that a person visited multiple times.
        
    Examples
    --------
    >>> spts.as_staypoints.cluster_staypoints(method='dbscan', epsilon=50, num_samples=3)
    """
    ret_places = pd.DataFrame(columns=['user_id', 'place_id','center', 'extent'])

    if method=='dbscan':

        if distance_matrix_metric is not None:
            db = DBSCAN(eps=epsilon, min_samples=num_samples,
                        metric='precomputed')
        else:    
            db = DBSCAN(eps=epsilon, min_samples=num_samples)
            
        place_id_counter = 0
            
        for user_id_this in staypoints["user_id"].unique():
            # Slice staypoints array by user. This is not a copy!
            user_staypoints = staypoints[staypoints["user_id"] == user_id_this]  
            
            if distance_matrix_metric is not None:
                sp_distance_matrix = calculate_distance_matrix(
                        user_staypoints, dist_metric=distance_matrix_metric)
                labels = db.fit_predict(sp_distance_matrix)
            
            else:  
                coordinates = np.array([[g.x, g.y] for g in user_staypoints['geom']])
                labels = db.fit_predict(coordinates)
                
            # enforce unique lables across all users without changing noise
            # labels
            max_label = np.max(labels)
            labels[labels != -1] = labels[labels != -1] + place_id_counter +1
            if max_label > -1:
                place_id_counter = place_id_counter + max_label + 1
            
            # add staypoint - place matching to original staypoints
            staypoints.loc[user_staypoints.index,'place_id'] = labels

            
    
        # create places as grouped staypoints
        grouped_df = staypoints.groupby(['user_id','place_id'])
        for combined_id, group in grouped_df:
            user_id, place_id = combined_id
    
            if int(place_id) != -1:
                ret_place = {}
                ret_place['user_id'] = user_id
                ret_place['place_id'] = place_id
                
                # point geometry of place
                ret_place['center'] = Point(group.geometry.x.mean(),
                     group.geometry.y.mean())
                # polygon geometry of place
                ret_place['extent'] = MultiPoint(points=list(group.geometry)).convex_hull
    
                ret_places = ret_places.append(ret_place, ignore_index=True)
    
        ret_places = gpd.GeoDataFrame(ret_places, geometry='center',
                                      crs=staypoints.crs)
        ret_places['place_id'] = ret_places['place_id'].astype('int')
        
    return ret_places