Exemple #1
0
    def create_all_polygons_on_grid(self):
        """
        Create all polygons that are represented in a grid.

        Stores the polygons in the `grid_polygon` key

        """
        operation = begin_operation('create_all_polygons_on_grid')

        logger.debug('\nCreating all polygons on virtual grid')
        grid_polygon = np.array([[None for _ in range(self.grid_size_lon_x)]
                                 for _ in range(self.grid_size_lat_y)])
        lat_init = self.lat_min_y
        cell_size = self.cell_size_by_degree
        for i in progress_bar(range(self.grid_size_lat_y),
                              desc='Creating polygons'):
            lon_init = self.lon_min_x
            for j in range(self.grid_size_lon_x):
                # Cria o polygon da célula
                grid_polygon[i][j] = Polygon(
                    ((lon_init, lat_init), (lon_init, lat_init + cell_size),
                     (lon_init + cell_size,
                      lat_init + cell_size), (lon_init + cell_size, lat_init)))
                lon_init += cell_size
            lat_init += cell_size
        self.grid_polygon = grid_polygon
        logger.debug('...geometries saved on Grid grid_polygon property')
        self.last_operation = end_operation(operation)
Exemple #2
0
def _prepare_segmentation(move_data: DataFrame, label_id: str,
                          label_new_tid: str):
    """
    Resets the dataframe index, collects unique ids and initiates curr_id and count.

    Parameters
    ----------
    move_data : dataframe
        Dataframe to be filtered
    label_id : str
        label of the feature
    label_new_tid : str
        label of the new feature

    Returns
    -------
    int
        initial curr_tid
    numpy.ndarray
        unique ids
    int
        initial count

    """
    if move_data.index.name is None:
        logger.debug(f'...setting {label_id} as index')
        move_data.set_index(label_id, inplace=True)
    curr_tid = 0
    if label_new_tid not in move_data:
        move_data[label_new_tid] = curr_tid

    ids = move_data.index.unique()
    count = 0
    return curr_tid, ids, count
Exemple #3
0
    def create_all_polygons_to_all_point_on_grid(self,
                                                 data: DataFrame) -> DataFrame:
        """
        Create all polygons to all points represented in a grid.

        Parameters
        ----------
        data : DataFrame
            Represents the dataset with contains lat, long and datetime

        Returns
        -------
        DataFrame
            Represents the same dataset with new key 'polygon'
            where polygons were saved.

        """
        operation = begin_operation('create_all_polygons_to_all_point_on_grid')
        if INDEX_GRID_LAT not in data or INDEX_GRID_LON not in data:
            self.create_update_index_grid_feature(data, unique_index=False)

        datapolygons = data[[TRAJ_ID, INDEX_GRID_LAT,
                             INDEX_GRID_LON]].drop_duplicates()

        polygons = datapolygons.apply(
            lambda row: self.create_one_polygon_to_point_on_grid(
                row[INDEX_GRID_LAT], row[INDEX_GRID_LON]),
            axis=1)

        logger.debug('...polygons were created')
        datapolygons['polygon'] = polygons
        self.last_operation = end_operation(operation)
        return datapolygons
Exemple #4
0
    def point_to_index_grid(self, event_lat: float,
                            event_lon: float) -> tuple[int, int]:
        """
        Locate the coordinates x and y in a grid of point (lat, long).

        Parameters
        ----------
        event_lat : float
            Represents the latitude of a point
        event_lon : float
            Represents the longitude of a point

        Returns
        -------
        Tuple[int, int]
            Represents the index y in a grid of a point (lat, long)
            Represents the index x in a grid of a point (lat, long)

        """
        operation = begin_operation('create_all_polygons_to_all_point_on_grid')

        indexes_lat_y = np.floor((np.float64(event_lat) - self.lat_min_y) /
                                 self.cell_size_by_degree)
        indexes_lon_x = np.floor((np.float64(event_lon) - self.lon_min_x) /
                                 self.cell_size_by_degree)
        logger.debug('...[%s,%s] indexes were created to lat and lon' %
                     (indexes_lat_y.size, indexes_lon_x.size))
        self.last_operation = end_operation(operation)

        return indexes_lat_y, indexes_lon_x
Exemple #5
0
def by_max_speed(
    move_data: 'PandasMoveDataFrame' | 'DaskMoveDataFrame',
    label_id: str = TRAJ_ID,
    max_speed_between_adj_points: float = 50.0,
    drop_single_points: bool = True,
    label_new_tid: str = TID_SPEED,
    inplace: bool = False,
) -> 'PandasMoveDataFrame' | 'DaskMoveDataFrame' | None:
    """
    Splits the trajectories into segments based on a maximum speed.

    Parameters
    ----------
    move_data : dataframe.
       The input trajectory data.
    label_id : str, optional
         Indicates the label of the id column in the users dataframe, by default TRAJ_ID
    max_speed_between_adj_points : float, optional
        Specify the maximum speed between two adjacent points, by default 50
    drop_single_points : boolean, optional
        If set to True, drops the trajectories with only one point, by default True
    label_new_tid : str, optional
        The label of the column containing the ids of the formed segments,
        by default TID_SPEED
        Is the new splitted id.
    inplace : boolean, optional
        if set to true the original dataframe will be altered to
        contain the result of the filtering, otherwise a copy will be returned,
        by default False

    Returns
    -------
    DataFrame
        DataFrame with the aditional features: label_segment,
        that indicates the trajectory segment to which the point belongs to

    Note
    ----
    Speed features must be updated after split.

    """
    if not inplace:
        move_data = move_data.copy()

    logger.debug(
        'Split trajectories by max_speed_between_adj_points: {}'.format(
            max_speed_between_adj_points))

    if SPEED_TO_PREV not in move_data:
        move_data.generate_dist_time_speed_features()

    move_data = _filter_by(move_data,
                           label_id,
                           label_new_tid,
                           drop_single_points,
                           feature=SPEED_TO_PREV,
                           max_between_adj_points=max_speed_between_adj_points,
                           all=False)
    if not inplace:
        return move_data
Exemple #6
0
def clean_gps_speed_max_radius(
    move_data: 'PandasMoveDataFrame' | 'DaskMoveDataFrame',
    label_id: str = TRAJ_ID,
    speed_max: float = 50.0,
    label_dtype: Callable = np.float64,
    inplace: bool = False,
) -> 'PandasMoveDataFrame' | 'DaskMoveDataFrame' | None:
    """
    Removes trajectories points with higher speed.

    Given any point p of the trajectory, the point will
    be removed if one of the following happens: if the travel speed from the
    point before p to p is greater than the  max value of speed between adjacent
    points set by the user. Or the travel speed between point p and the next
    point is greater than the value set by the user. When the cleaning is done,
    the function will update the time and distance features in the dataframe and
    will call itself again. The function will finish processing when it can no
    longer find points disrespecting the limit of speed.

    Parameters
    ----------
    move_data : dataframe
        The input trajectory data
    label_id : str, optional
        Indicates the label of the id column in the user dataframe, by default TRAJ_ID
    speed_max : float, optional
        Indicates the maximum value a point speed_to_prev and speed_to_next
        should have, in order not to be dropped, by default 50
    label_dtype : type, optional
        Represents column id type, by default np.float64.
    inplace : boolean, optional
        if set to true the operation is done in place, the original
        dataframe will be altered and None is returned, by default False

    Returns
    -------
    DataFrame
        The filtered trajectories without the gps nearby points or None

    """
    if not inplace:
        move_data = move_data.copy()

    if SPEED_TO_PREV not in move_data:
        move_data.generate_dist_time_speed_features(label_id=label_id,
                                                    label_dtype=label_dtype)

    logger.debug('\nClean gps points with speed max > %s meters by seconds' %
                 speed_max)

    move_data = _clean_gps(move_data,
                           _filter_speed_max_radius,
                           arg1=SPEED_TO_PREV,
                           arg2=speed_max,
                           outliers=False)
    if not inplace:
        return move_data
Exemple #7
0
def outliers(
    move_data: 'PandasMoveDataFrame' | 'DaskMoveDataFrame',
    jump_coefficient: float = 3.0,
    threshold: float = 1,
    new_label: str = OUTLIER,
    inplace: bool = False
) -> 'PandasMoveDataFrame' | 'DaskMoveDataFrame' | None:
    """
    Create or update a boolean feature to detect outliers.

    Parameters
    ----------
    move_data : dataframe
        The input trajectory data
    jump_coefficient : float, optional
        by default 3
    threshold : float, optional
        Minimum value that the distance features must have
        in order to be considered outliers, by default 1
    new_label: string, optional
        The name of the new feature with detected points out of the bbox,
        by default OUTLIER
    inplace : bool, optional
        if set to true the original dataframe will be altered to contain
        the result of the filtering, otherwise a copy will be returned, by default False

    Returns
    -------
    DataFrame
        Returns a dataframe with the trajectories outliers or None

    """
    if not inplace:
        move_data = move_data.copy()

    if DIST_TO_PREV not in move_data:
        move_data.generate_dist_features()

    if move_data.index.name is not None:
        logger.debug('...Reset index for filtering\n')
        move_data.reset_index(inplace=True)

    if (DIST_TO_PREV in move_data and DIST_TO_NEXT
            and DIST_PREV_TO_NEXT in move_data):
        jump = jump_coefficient * move_data[DIST_PREV_TO_NEXT]
        filter_ = ((move_data[DIST_TO_NEXT] > threshold)
                   & (move_data[DIST_TO_PREV] > threshold)
                   & (move_data[DIST_PREV_TO_NEXT] > threshold)
                   & (jump < move_data[DIST_TO_NEXT])
                   & (jump < move_data[DIST_TO_PREV]))
        move_data[new_label] = filter_
    else:
        logger.warning('...Distances features were not created')

    if not inplace:
        return move_data
Exemple #8
0
def create_or_update_gps_block_signal(
    move_data: 'PandasMoveDataFrame' | 'DaskMoveDataFrame',
    max_time_stop: float = 7200,
    new_label: str = BLOCK,
    label_tid: str = TID_PART,
    inplace: bool = False
) -> 'PandasMoveDataFrame' | 'DaskMoveDataFrame' | None:
    """
    Creates a new feature that inform segments with periods without moving.

    Parameters
    ----------
    move_data: dataFrame
        The input trajectories data.
    max_time_stop: float, optional
        Maximum time allowed with speed 0, by default 7200
    new_label: string, optional
        The name of the new feature with detected deactivated signals, by default BLOCK
    label_tid : str, optional
        The label of the column containing the ids of the formed segments,
        by default TID_PART
        Is the new slitted id.
    inplace : boolean, optional
        if set to true the original dataframe will be altered to contain
        the result of the filtering, otherwise a copy will be returned,
        by default False

    Returns
    -------
    DataFrame
        DataFrame with the additional features or None
        'dist_to_prev', 'time_to_prev', 'speed_to_prev',
        'tid_dist', 'block_signal'

    """
    if not inplace:
        move_data = move_data.copy()

    message = 'Create or update block_signal if max time stop > %s seconds\n'
    logger.debug(message % max_time_stop)
    segmentation.by_max_dist(move_data,
                             max_dist_between_adj_points=0.0,
                             label_new_tid=label_tid,
                             inplace=True)

    logger.debug('Updating dist time speed values')
    move_data.generate_dist_time_speed_features(label_id=label_tid)

    move_data[new_label] = False

    df_agg_tid = move_data.groupby(by=label_tid).agg({TIME_TO_PREV: 'sum'})
    filter_ = df_agg_tid[TIME_TO_PREV] >= max_time_stop
    idx = df_agg_tid[filter_].index
    move_data.loc[move_data[label_tid].isin(idx), new_label] = True

    return _end_create_operation(move_data, new_label, inplace)
Exemple #9
0
def elbow_method(move_data: DataFrame,
                 k_initial: int = 1,
                 max_clusters: int = 15,
                 k_iteration: int = 1,
                 random_state: int | None = None) -> dict:
    """
    Determines the optimal number of clusters.

    In the range set by the user using the elbow method.

    Parameters
    ----------
    move_data : dataframe
        The input trajectory data.
    k_initial: int, optional
        The initial value used in the interaction of the elbow method.
        Represents the maximum numbers of clusters, by default 1
    max_clusters: int, optional
        The maximum value used in the interaction of the elbow method.
        Maximum number of clusters to test for, by default 15
    k_iteration: int, optional
        Increment value of the sequence used by the elbow method, by default 1
    random_state: int, RandomState instance
        Determines random number generation for centroid initialization.
        Use an int to make the randomness deterministic, by default None

    Returns
    -------
    dict
        The inertia values ​​for the different numbers of clusters

    Example
    -------
    clustering.elbow_method(move_data=move_df, k_iteration=3)
        {
            1: 55084.15957839036,
            4: 245.68365592382938,
            7: 92.31472644640075,
            10: 62.618599956870355,
            13: 45.59653757292055,
        }

    """
    message = 'Executing Elbow Method for {} to {} clusters at {} steps\n'.format(
        k_initial, max_clusters, k_iteration)
    logger.debug(message)
    inertia_dic = {}
    for k in progress_bar(range(k_initial, max_clusters + 1, k_iteration),
                          desc='Running KMeans'):
        km = KMeans(n_clusters=k, random_state=random_state)
        inertia_dic[k] = km.fit(move_data[[LATITUDE, LONGITUDE]]).inertia_
    return inertia_dic
Exemple #10
0
def clean_gps_jumps_by_distance(
    move_data: 'PandasMoveDataFrame' | 'DaskMoveDataFrame',
    label_id: str = TRAJ_ID,
    jump_coefficient: float = 3.0,
    threshold: float = 1,
    label_dtype: Callable = np.float64,
    inplace: bool = False,
) -> 'PandasMoveDataFrame' | 'DaskMoveDataFrame' | None:
    """
    Removes the trajectories points that are outliers from the dataframe.

    Parameters
    ----------
    move_data : dataframe
        The input trajectory data
    label_id : str, optional
         Indicates the label of the id column in the user dataframe, by default TRAJ_ID
    jump_coefficient : float, optional
        by default 3
    threshold : float, optional
        Minimum value that the distance features must have
        in order to be considered outliers, by default 1
    label_dtype : type, optional
        Represents column id type, by default np.float64.
    inplace : boolean, optional
        if set to true the operation is done in place, the original
        dataframe will be altered and None is returned, by default False

    Returns
    -------
    DataFrame
        The filtered trajectories without the gps jumps or None

    """
    if not inplace:
        move_data = move_data.copy()

    if DIST_TO_PREV not in move_data:
        move_data.generate_dist_features(label_id=label_id,
                                         label_dtype=label_dtype)

    logger.debug(
        '\nCleaning gps jumps by distance to jump_coefficient %s...\n' %
        jump_coefficient)
    move_data = _clean_gps(move_data,
                           outliers,
                           arg1=jump_coefficient,
                           arg2=threshold,
                           outliers=True)

    if not inplace:
        return move_data
Exemple #11
0
def _drop_single_point(move_data: DataFrame, label_new_tid: str,
                       label_id: str):
    """
    Removes trajectory with single point.

    Parameters
    ----------
    move_data: dataframe
        dataframe with trajectories
    label_new_tid : str
        The label of the column containing the ids of the formed segments.
        Is the new splitted id.
    label_id : str
         Indicates the label of the id column in the user dataframe, by default TRAJ_ID

    """
    shape_before_drop = move_data.shape
    idx = move_data[move_data[label_new_tid] == -1].index
    if idx.shape[0] > 0:
        logger.debug('...Drop Trajectory with a unique GPS point\n')
        ids_before_drop = move_data[label_id].unique().shape[0]
        move_data.drop(index=idx, inplace=True)
        logger.debug('...Object - before drop: {} - after drop: {}'.format(
            ids_before_drop, move_data[label_id].unique().shape[0]))
        logger.debug('...Shape - before drop: {} - after drop: {}'.format(
            shape_before_drop, move_data.shape))
    else:
        logger.debug('...No trajectories with only one point.')
Exemple #12
0
    def discretize_based_grid(self, region_size: int = 1000):
        """
        Discrete space in cells of the same size, assigning a unique id to each cell.

        Parameters
        ----------
        region_size: int, optional
            Size of grid cell, by default 1000
        """
        operation = begin_operation('discretize based on grid')
        logger.debug('\nDiscretizing dataframe...')
        grid = Grid(self, cell_size=region_size)
        grid.create_update_index_grid_feature(self)
        self.reset_index(drop=True, inplace=True)
        self.last_operation = end_operation(operation)
Exemple #13
0
def clean_gps_nearby_points_by_distances(
    move_data: 'PandasMoveDataFrame' | 'DaskMoveDataFrame',
    label_id: str = TRAJ_ID,
    radius_area: float = 10.0,
    label_dtype: Callable = np.float64,
    inplace: bool = False,
) -> 'PandasMoveDataFrame' | 'DaskMoveDataFrame' | None:
    """
    Removes points from the trajectories with smaller distance from the point before.

    Parameters
    ----------
    move_data : dataframe
        The input trajectory data
    label_id : str, optional
         Indicates the label of the id column in the user dataframe, by default TRAJ_ID
    radius_area : float, optional
        Species the minimum distance a point must have to it"srs previous point
        in order not to be dropped, by default 10
    label_dtype : type, optional
        Represents column id type, ,y default np.float64.
    inplace : boolean, optional
        if set to true the operation is done in place, the original
        dataframe will be altered and None is returned, be default False

    Returns
    -------
    DataFrame
        The filtered trajectories without the gps nearby points by distance or None

    """
    if not inplace:
        move_data = move_data.copy()

    if DIST_TO_PREV not in move_data:
        move_data.generate_dist_features(label_id=label_id,
                                         label_dtype=label_dtype)

    logger.debug('\nCleaning gps points from radius of %s meters\n' %
                 radius_area)

    move_data = _clean_gps(move_data,
                           _filter_single_by_max,
                           arg1=DIST_TO_PREV,
                           arg2=radius_area,
                           outliers=False)
    if not inplace:
        return move_data
Exemple #14
0
def clean_gps_nearby_points_by_speed(
    move_data: 'PandasMoveDataFrame' | 'DaskMoveDataFrame',
    label_id: str = TRAJ_ID,
    speed_radius: float = 0.0,
    label_dtype: Callable = np.float64,
    inplace: bool = False,
) -> 'PandasMoveDataFrame' | 'DaskMoveDataFrame' | None:
    """
    Removes points from the trajectories with smaller speed of travel.

    Parameters
    ----------
    move_data : dataframe
        The input trajectory data
    label_id : str, optional
         Indicates the label of the id column in the user dataframe, be defalt TRAJ_ID
    speed_radius : float, optional
        Species the minimum speed a point must have from it"srs previous point,
        in order not to be dropped, by default 0
    label_dtype : type, optional
        Represents column id type, by default np.float64.
    inplace : boolean, optional
        if set to true the operation is done in place, the original
        dataframe will be altered and None is returned, by default False

    Returns
    -------
    DataFrame
        The filtered trajectories without the gps nearby points by speed or None

    """
    if not inplace:
        move_data = move_data.copy()

    if SPEED_TO_PREV not in move_data:
        move_data.generate_dist_time_speed_features(label_id=label_id,
                                                    label_dtype=label_dtype)

    logger.debug('\nCleaning gps points using %s speed radius\n' %
                 speed_radius)

    move_data = _clean_gps(move_data,
                           _filter_single_by_max,
                           arg1=SPEED_TO_PREV,
                           arg2=speed_radius,
                           outliers=False)
    if not inplace:
        return move_data
Exemple #15
0
    def _create_virtual_grid(self, data: DataFrame, cell_size: float,
                             meters_by_degree: float):
        """
        Create a virtual grid based in dataset bound box.

        Parameters
        ----------
        data : DataFrame
            Represents the dataset with contains lat, long and datetime
        cell_size : float
            Size of grid cell
        meters_by_degree : float
            Represents the meters degree of latitude

        """
        operation = begin_operation('_create_virtual_grid')

        bbox = data.get_bbox()
        logger.debug('\nCreating a virtual grid without polygons')

        cell_size_by_degree = cell_size / meters_by_degree
        logger.debug('...cell size by degree: %s' % cell_size_by_degree)

        lat_min_y = bbox[0]
        lon_min_x = bbox[1]
        lat_max_y = bbox[2]
        lon_max_x = bbox[3]

        # If cell size does not fit in the grid area, an expansion is made
        if math.fmod((lat_max_y - lat_min_y), cell_size_by_degree) != 0:
            lat_max_y = lat_min_y + cell_size_by_degree * (math.floor(
                (lat_max_y - lat_min_y) / cell_size_by_degree) + 1)

        if math.fmod((lon_max_x - lon_min_x), cell_size_by_degree) != 0:
            lon_max_x = lon_min_x + cell_size_by_degree * (math.floor(
                (lon_max_x - lon_min_x) / cell_size_by_degree) + 1)

        # adjust grid size to lat and lon
        grid_size_lat_y = int(
            round((lat_max_y - lat_min_y) / cell_size_by_degree))
        grid_size_lon_x = int(
            round((lon_max_x - lon_min_x) / cell_size_by_degree))

        logger.debug('...grid_size_lat_y:%s\ngrid_size_lon_x:%s' %
                     (grid_size_lat_y, grid_size_lon_x))

        self.lon_min_x = lon_min_x
        self.lat_min_y = lat_min_y
        self.grid_size_lat_y = grid_size_lat_y
        self.grid_size_lon_x = grid_size_lon_x
        self.cell_size_by_degree = cell_size_by_degree
        logger.debug('\n..A virtual grid was created')

        self.last_operation = end_operation(operation)
Exemple #16
0
def _update_curr_tid_count(filter_: ndarray, move_data: DataFrame, idx: int,
                           label_new_tid: str, curr_tid: int,
                           count: int) -> tuple[int, int]:
    """
    Updates the tid.

    Parameters
    ----------
    filter_ : numpy.ndarray
        Filtered indexes
    move_data : dataframe
        Dataframe to be filtered
    idx : int
        row to compare
    label_new_tid : str
        label of the new feature
    curr_tid : int
        current tid
    count : int
        count of

    Returns
    -------
    int
        updated current tid
    int
        updated count ids

    """
    curr_tid += 1
    if filter_.shape == ():
        logger.debug(f'id: {idx} has no point to split')
        move_data.at[idx, label_new_tid] = curr_tid
        count += 1
    else:
        tids = np.empty(filter_.shape[0], dtype=np.int64)
        tids.fill(curr_tid)
        for i, has_problem in enumerate(filter_):
            if has_problem:
                curr_tid += 1
                tids[i:] = curr_tid
        count += tids.shape[0]
        move_data.at[idx, label_new_tid] = tids
    return curr_tid, count
Exemple #17
0
def bbox_split(bbox: tuple[int, int, int, int],
               number_grids: int) -> DataFrame:
    """
    Splits the bounding box in N grids of the same size.

    Parameters
    ----------
    bbox: tuple
        Tuple of 4 elements, containing the minimum and maximum values
        of latitude and longitude of the bounding box.
    number_grids: int
        Determines the number of grids to split the bounding box.

    Returns
    -------
    DataFrame
        Returns the latitude and longitude coordinates of
        the grids after the split.

    """
    lat_min = bbox[0]
    lon_min = bbox[1]
    lat_max = bbox[2]
    lon_max = bbox[3]

    const_lat = abs(abs(lat_max) - abs(lat_min)) / number_grids
    const_lon = abs(abs(lon_max) - abs(lon_min)) / number_grids
    logger.debug(f'const_lat: {const_lat}\nconst_lon: {const_lon}')

    move_data = pd.DataFrame(
        columns=['lat_min', 'lon_min', 'lat_max', 'lon_max'])
    for i in range(number_grids):
        move_data = move_data.append(
            {
                'lat_min': lat_min,
                'lon_min': lon_min + (const_lon * i),
                'lat_max': lat_max,
                'lon_max': lon_min + (const_lon * (i + 1)),
            },
            ignore_index=True,
        )

    return move_data
Exemple #18
0
def create_or_update_gps_deactivated_signal(
    move_data: 'PandasMoveDataFrame' | 'DaskMoveDataFrame',
    max_time_between_adj_points: float = 7200,
    new_label: str = DEACTIVATED,
    inplace: bool = False
) -> 'PandasMoveDataFrame' | 'DaskMoveDataFrame' | None:
    """
    Creates a new feature that inform if point invalid.

    If the max time between adjacent points is equal or
    less than max_time_between_adj_points.

    Parameters
    ----------
    move_data: dataframe
        The input trajectories data.
    max_time_between_adj_points: float, optional
        The max time between adjacent points, by default 7200
    new_label: string, optional
        The name of the new feature with detected deactivated signals,
        by default DEACTIVATED
    inplace : boolean, optional
        if set to true the original dataframe will be altered to contain
        the result of the filtering, otherwise a copy will be returned,
        by default False

    Returns
    -------
    DataFrame
        DataFrame with the additional features or None
        'time_to_prev', 'time_to_next', 'time_prev_to_next', 'deactivate_signal'

    """
    if not inplace:
        move_data = move_data.copy()

    message = 'Create or update deactivated signal if time max > %s seconds\n'
    logger.debug(message % max_time_between_adj_points)
    move_data.generate_time_features()

    return _process_simple_filter(move_data, new_label, TIME_TO_PREV,
                                  max_time_between_adj_points, inplace)
Exemple #19
0
def create_or_update_gps_jump(
    move_data: 'PandasMoveDataFrame' | 'DaskMoveDataFrame',
    max_dist_between_adj_points: float = 3000,
    new_label: str = JUMP,
    inplace: bool = False
) -> 'PandasMoveDataFrame' | 'DaskMoveDataFrame' | None:
    """
    Creates a new feature that inform if point is a gps jump.

    A jump is defined if the maximum distance between adjacent points
    is greater than max_dist_between_adj_points.

    Parameters
    ----------
    move_data: dataframe
        The input trajectories data.
    max_dist_between_adj_points: float, optional
        The maximum distance between adjacent points, by default 3000
    new_label: string, optional
        The name of the new feature with detected deactivated signals, by default GPS_JUMP
    inplace : boolean, optional
        if set to true the original dataframe will be altered to contain
        the result of the filtering, otherwise a copy will be returned,
        by default False

    Returns
    -------
    DataFrame
        DataFrame with the additional features or None
        'dist_to_prev', 'dist_to_next', 'dist_prev_to_next', 'jump'

    """
    if not inplace:
        move_data = move_data.copy()

    message = 'Create or update jump if dist max > %s meters\n'
    logger.debug(message % max_dist_between_adj_points)
    move_data.generate_dist_features()

    return _process_simple_filter(move_data, new_label, DIST_TO_PREV,
                                  max_dist_between_adj_points, inplace)
Exemple #20
0
def clean_id_by_time_max(
    move_data: 'PandasMoveDataFrame' | 'DaskMoveDataFrame',
    label_id: str = TRAJ_ID,
    time_max: float = 3600,
    label_dtype: Callable = np.float64,
    inplace: bool = False,
) -> 'PandasMoveDataFrame' | 'DaskMoveDataFrame' | None:
    """
    Clears GPS points with time by ID greater than a user-defined limit.

    Parameters
    ----------
    move_data: dataframe.
        The input data.
    label_id: str, optional
        The label of the column which contains the id of the trajectories,
        by default TRAJ_ID
    time_max: float, optional
        Indicates the maximum value time a set of points with the
        same id should have in order not to be dropped, by default 3600
    label_dtype : type, optional
        Represents column id type, by default np.float64.
    inplace : boolean, optional
        if set to true the operation is done in place, the original
        dataframe will be altered and None is returned, by default False

    Returns
    -------
    dataframe or None
        The filtered trajectories with the maximum time.

    """
    if not inplace:
        move_data = move_data.copy()

    if TIME_TO_PREV not in move_data:
        move_data.generate_dist_time_speed_features(label_id=label_id,
                                                    label_dtype=label_dtype)

    logger.debug('\nClean gps points with time max by id < %s seconds' %
                 time_max)
    move_dataid_drop = (move_data.groupby([label_id], as_index=False).agg({
        TIME_TO_PREV:
        'sum'
    }).query(f'{TIME_TO_PREV} < {time_max}'))
    logger.debug(
        '...Ids total: %s\nIds to drop:%s' %
        (move_data[label_id].nunique(), move_dataid_drop[label_id].nunique()))
    if move_dataid_drop.shape[0] > 0:
        before_drop = move_data.shape[0]
        filter_ = move_data[label_id].isin(move_dataid_drop[label_id])
        idx = move_data[filter_].index
        move_data.drop(idx, inplace=True)
        logger.debug('...Rows before drop: %s\n Rows after drop: %s' %
                     (before_drop, move_data.shape[0]))

    if not inplace:
        return move_data
Exemple #21
0
    def create_update_index_grid_feature(self,
                                         data: DataFrame,
                                         unique_index: bool = True,
                                         label_dtype: Callable = np.int64,
                                         sort: bool = True):
        """
        Create or update index grid feature.

        It is not necessary pass dic_grid, because it creates a dic_grid if not provided.

        Parameters
        ----------
        data : DataFrame
            Represents the dataset with contains lat, long and datetime.
        unique_index: bool, optional
            How to index the grid, by default True
        label_dtype : Callable, optional
            Represents the type of a value of new column in dataframe, by default np.int64
        sort : bool, optional
            Represents if needs to sort the dataframe, by default True

        """
        operation = begin_operation('create_update_index_grid_feature')

        logger.debug('\nCreating or updating index of the grid feature..\n')
        if sort:
            data.sort_values([TRAJ_ID, DATETIME], inplace=True)
        lat_, lon_ = self.point_to_index_grid(data[LATITUDE], data[LONGITUDE])
        lat_, lon_ = label_dtype(lat_), label_dtype(lon_)
        dict_grid = self.get_grid()
        if unique_index:
            data[INDEX_GRID] = lon_ * dict_grid['grid_size_lat_y'] + lat_
        else:
            data[INDEX_GRID_LAT] = lat_
            data[INDEX_GRID_LON] = lon_
        self.last_operation = end_operation(operation)
Exemple #22
0
def _end_create_operation(move_data: DataFrame, new_label: str,
                          inplace: bool) -> DataFrame | None:
    """
    Returns the dataframe after create operation.

    Parameters
    ----------
    move_data: dataframe
        The input trajectories data.
    new_label: string
        The name of the new feature with detected deactivated signals.
    inplace : boolean
        if set to true the original dataframe will be altered to contain
        the result of the filtering, otherwise a copy will be returned.

    Returns
    -------
    DataFrame
        DataFrame with the additional features or None

    """
    logger.debug(move_data[new_label].value_counts())
    if not inplace:
        return move_data
Exemple #23
0
def create_or_update_out_of_the_bbox(
        move_data: DataFrame,
        bbox: tuple[int, int, int, int],
        new_label: str = OUT_BBOX,
        inplace: bool = False) -> DataFrame | None:
    """
    Create or update a boolean feature to detect points out of the bbox.

    Parameters
    ----------
    move_data: dataframe
        The input trajectories data.
    bbox : tuple
        Tuple of 4 elements, containing the minimum and maximum values
        of latitude and longitude of the bounding box.
    new_label: string, optional
        The name of the new feature with detected points out of the bbox,
        by default OUT_BBOX
    inplace : boolean, optional
        if set to true the original dataframe will be altered to contain
        the result of the filtering, otherwise a copy will be returned,
        by default False

    Returns
    -------
    DataFrame
        Returns dataframe with a boolean feature with detected
        points out of the bbox, or None

    Raises
    ------
    ValueError
        If feature generation fails
    """
    if not inplace:
        move_data = move_data.copy()

    logger.debug(
        '\nCreate or update boolean feature to detect points out of the bbox')
    filtered_ = filters.by_bbox(move_data, bbox, filter_out=True)

    if filtered_ is None:
        raise ValueError('Filter bbox failed!')

    logger.debug('...Creating a new label named as %s' % new_label)
    move_data[new_label] = False

    if filtered_.shape[0] > 0:
        logger.debug('...Setting % as True\n' % new_label)
        move_data.at[filtered_.index, new_label] = True

    return _end_create_operation(move_data, new_label, inplace)
Exemple #24
0
def _clean_gps(move_data: DataFrame, f: Callable, **kwargs):
    """
    Cleans gps points from a dataframe using condition from given function.

    Parameters
    ----------
    move_data : dataframe
        Dataframe to be filtered.
    f : function
        Filtering function
    **kwargs : arguments
        - arg1 : feature
        - arg2 : value
        - outliers : special behavior if cleaning by outliers

    Returns
    -------
    dataframe
        Filtered dataframe.

    """
    if move_data.index.name is not None:
        logger.debug('...Reset index for filtering\n')
        move_data.reset_index(inplace=True)

    filter_data_points, rows_to_drop = _filter_data(move_data, f, kwargs)

    sum_drop = 0
    while rows_to_drop > 0:
        logger.debug('...Dropping %s rows of gps points\n' % rows_to_drop)
        shape_before = move_data.shape[0]
        move_data.drop(index=filter_data_points.index, inplace=True)
        sum_drop = sum_drop + rows_to_drop
        logger.debug('...Rows before: %s, Rows after:%s, Sum drop:%s\n' %
                     (shape_before, move_data.shape[0], sum_drop))

        filter_data_points, rows_to_drop = _filter_data(move_data, f, kwargs)

    logger.debug('%s GPS points were dropped' % sum_drop)

    return move_data
Exemple #25
0
def create_or_update_move_stop_by_dist_time(
    move_data: 'PandasMoveDataFrame' | 'DaskMoveDataFrame',
    dist_radius: float = 30,
    time_radius: float = 900,
    label_id: str = TRAJ_ID,
    new_label: str = SEGMENT_STOP,
    inplace: bool = False
) -> 'PandasMoveDataFrame' | 'DaskMoveDataFrame' | None:
    """
    Determines the stops and moves points of the dataframe.

    If these points already exist, they will be updated.

    Parameters
    ----------
    move_data : dataframe
       The input trajectory data
    dist_radius : float, optional
        The first step in this function is segmenting the trajectory
        The segments are used to find the stop points
        The dist_radius defines the distance used in the segmentation,
        by default 30
    time_radius :  float, optional
        The time_radius used to determine if a segment is a stop
        If the user stayed in the segment for a time
        greater than time_radius, than the segment is a stop,
        by default 900
    label_id : str, optional
         Indicates the label of the id column in the user dataframe, by default TRAJ_ID
    new_label : float, optional
        Is the name of the column to indicates if a point is a stop of a move,
        by default SEGMENT_STOP
    inplace : bool, optional
        if set to true the original dataframe will be altered to
        contain the result of the filtering, otherwise a copy will be returned,
        by default False

    Returns
    -------
    DataFrame
        DataFrame with 2 aditional features: segment_stop and stop.
        segment_stop indicates the trajectory segment to which the point belongs
        stop indicates if the point represents a stop.

    """
    if not inplace:
        move_data = move_data.copy()

    by_max_dist(
        move_data,
        label_id=label_id,
        max_dist_between_adj_points=dist_radius,
        label_new_tid=new_label,
        inplace=True
    )

    move_data.generate_dist_time_speed_features(
        label_id=new_label
    )

    logger.debug('Create or update stop as True or False')
    logger.debug(
        '...Creating stop features as True or False using %s to time in seconds'
        % time_radius
    )
    move_data[STOP] = False
    move_dataagg_tid = (
        move_data.groupby(by=new_label)
        .agg({TIME_TO_PREV: 'sum'})
        .query(f'{TIME_TO_PREV} > {time_radius}')
        .index
    )
    idx = move_data[
        move_data[new_label].isin(move_dataagg_tid)
    ].index
    move_data.at[idx, STOP] = True
    logger.debug(move_data[STOP].value_counts())

    if not inplace:
        return move_data
Exemple #26
0
def create_or_update_move_and_stop_by_radius(
    move_data: 'PandasMoveDataFrame' | 'DaskMoveDataFrame',
    radius: float = 0,
    target_label: str = DIST_TO_PREV,
    new_label: str = SITUATION,
    inplace: bool = False,
) -> 'PandasMoveDataFrame' | 'DaskMoveDataFrame' | None:
    """
    Finds the stops and moves points of the dataframe.

    If these points already exist, they will be updated.

    Parameters
    ----------
    move_data : dataframe
       The input trajectory data
    radius :  float, optional
        The radius value is used to determine if a segment is a stop.
        If the value of the point in target_label is
        greater than radius, the segment is a stop, otherwise it's a move,
        by default 0
    target_label : String, optional
        The feature used to calculate the stay points, by default DIST_TO_PREV
    new_label : String, optional
        Is the name of the column to indicates if a point is a stop of a move,
        by default SITUATION
    inplace : bool, optional
        if set to true the original dataframe will be altered to
        contain the result of the filtering, otherwise a copy will be returned,
        by default False

    Returns
    -------
    DataFrame
        dataframe with 2 aditional features: segment_stop and new_label.
        segment_stop indicates the trajectory segment to which the point belongs
        new_label indicates if the point represents a stop or moving point.

    """
    logger.debug('\nCreating or updating features MOVE and STOPS...\n')

    if not inplace:
        move_data = move_data.copy()

    if DIST_TO_PREV not in move_data:
        move_data.generate_dist_features()

    conditions = (
        (move_data[target_label] > radius),
        (move_data[target_label] <= radius),
    )
    choices = [MOVE, STOP]

    move_data[new_label] = np.select(conditions, choices, np.nan)
    logger.debug(
        '\n....There are %s stops to this parameters\n'
        % (move_data[move_data[new_label] == STOP].shape[0])
    )

    if not inplace:
        return move_data
Exemple #27
0
def by_dist_time_speed(
    move_data: 'PandasMoveDataFrame' | 'DaskMoveDataFrame',
    label_id: str = TRAJ_ID,
    max_dist_between_adj_points: float = 3000,
    max_time_between_adj_points: float = 900,
    max_speed_between_adj_points: float = 50.0,
    drop_single_points: bool = True,
    label_new_tid: str = TID_PART,
    inplace: bool = False,
) -> 'PandasMoveDataFrame' | 'DaskMoveDataFrame' | None:
    """
    Splits the trajectories into segments based on distance, time and speed.

    Parameters
    ----------
    move_data : dataframe
       The input trajectory data
    label_id : str, optional
         Indicates the label of the id column in the user dataframe, by default TRAJ_ID
    max_dist_between_adj_points : float, optional
        Specify the maximum distance a point should have from
        the previous point, in order not to be dropped, by default 3000
    max_time_between_adj_points : float, optional
        Specify the maximum travel time between two adjacent points, by default 900
    max_speed_between_adj_points : float, optional
        Specify the maximum speed of travel between two adjacent points, by default 50
    drop_single_points : boolean, optional
        If set to True, drops the trajectories with only one point, by default True
    label_new_tid : str, optional
        The label of the column containing the ids of the formed segments.
        Is the new splitted id, by default TID_PART
    inplace : boolean, optional
        if set to true the original dataframe will be altered to
        contain the result of the filtering, otherwise a copy will be returned,
        by default False

    Returns
    -------
    DataFrame
        DataFrame with the aditional features: label_new_tid,
        that indicates the trajectory segment to which the point belongs to,
        by default False

    Note
    ----
    Time, distance and speed features must be updated after split.

    """
    if not inplace:
        move_data = move_data.copy()

    logger.debug('\nSplit trajectories')
    logger.debug('...max_dist_between_adj_points: {}'.format(
        max_dist_between_adj_points))
    logger.debug('...max_time_between_adj_points: {}'.format(
        max_time_between_adj_points))
    logger.debug('...max_speed_between_adj_points: {}'.format(
        max_speed_between_adj_points))

    if TIME_TO_PREV not in move_data:
        move_data.generate_dist_time_speed_features()

    move_data = _filter_by(move_data,
                           label_id,
                           label_new_tid,
                           drop_single_points,
                           max_dist=max_dist_between_adj_points,
                           max_time=max_time_between_adj_points,
                           max_speed=max_speed_between_adj_points,
                           all=True)
    if not inplace:
        return move_data
Exemple #28
0
def _filter_by(move_data: DataFrame, label_id: str, label_new_tid: str,
               drop_single_points: bool, **kwargs) -> DataFrame:
    """
    Splits the trajectories into segments.

    Parameters
    ----------
    move_data : dataframe
       The input trajectory data
    label_id : str, optional
         Indicates the label of the id column in the user dataframe, by default TRAJ_ID
    label_new_tid : str, optional(TID_PART by default)
        The label of the column containing the ids of the formed segments.
        Is the new splitted id.
    drop_single_points : boolean, optional(True by default)
        If set to True, drops the trajectories with only one point.
    **kwargs : arguments
        depends on the type of segmentation
        - all : if is a segmentation by all features
        - max_dist : maximum dist between adjacent points
        - max_time : maximum time between adjacent points
        - max_speed : maximum speed between adjacent points
        - feature : feature to use for segmentation
        - max_between_adj_points : maximum value for feature

    Returns
    -------
    dataframe
        DataFrame with the aditional features: label_new_tid,
        that indicates the trajectory segment to which the point belongs to.

    Note
    ----
    Time, distance and speed features must be updated after split.

    """
    curr_tid, ids, count = _prepare_segmentation(move_data, label_id,
                                                 label_new_tid)

    for idx in progress_bar(ids, desc='Generating %s' % label_new_tid):
        if kwargs['all']:
            filter_ = _filter_and_dist_time_speed(move_data, idx,
                                                  kwargs['max_dist'],
                                                  kwargs['max_time'],
                                                  kwargs['max_speed'])
        else:
            filter_ = _filter_or_dist_time_speed(
                move_data, idx, kwargs['feature'],
                kwargs['max_between_adj_points'])

        curr_tid, count = _update_curr_tid_count(filter_, move_data, idx,
                                                 label_new_tid, curr_tid,
                                                 count)

    if label_id == label_new_tid:
        move_data.reset_index(drop=True, inplace=True)
        logger.debug(
            '... label_tid = label_new_id, then reseting and drop index')
    else:
        move_data.reset_index(inplace=True)
        logger.debug('... Reseting index\n')

    if drop_single_points:
        _drop_single_point(move_data, label_new_tid, label_id)
        move_data.generate_dist_time_speed_features()

    return move_data
Exemple #29
0
def compress_segment_stop_to_point(
    move_data: DataFrame,
    label_segment: str = SEGMENT_STOP,
    label_stop: str = STOP,
    point_mean: str = 'default',
    drop_moves: bool = False,
    label_id: str = TRAJ_ID,
    dist_radius: float = 30,
    time_radius: float = 900,
    inplace: bool = False,
) -> DataFrame:
    """
    Compress the trajectories using the stop points in the dataframe.

    Compress a segment to point setting lat_mean e lon_mean to each segment.

    Parameters
    ----------
    move_data : dataframe
       The input trajectory data
    label_segment : String, optional
        The label of the column containing the ids of the formed segments.
        Is the new splitted id, by default SEGMENT_STOP
    label_stop : String, optional
        Is the name of the column that indicates if a point is a stop, by default STOP
    point_mean : String, optional
        Indicates whether the mean points should be calculated using
        centroids or the point that repeat the most, by default 'default'
    drop_moves : Boolean, optional
        If set to true, the moving points will be dropped from the dataframe,
        by default False
    label_id : String, optional
         Used to create the stay points used in the compression.
         If the dataset already has the stop move, this
         parameter should be ignored.
         Indicates the label of the id column in the user dataframe, by default TRAJ_ID
    dist_radius : Double, optional
        Used to create the stay points used in the compression, by default 30
        If the dataset already has the stop move, this
        parameter should be ignored.
        The first step in this function is segmenting the trajectory.
        The segments are used to find the stop points.
        The dist_radius defines the distance used in the segmentation.
    time_radius :  Double, optional
        Used to create the stay points used in the compression, by default 900
        If the dataset already has the stop move, this
         parameter should be ignored.
        The time_radius used to determine if a segment is a stop.
        If the user stayed in the segment for a time
        greater than time_radius, than the segment is a stop.
    inplace : boolean, optional
        if set to true the original dataframe will be altered to contain
        the result of the filtering, otherwise a copy will be returned, by default False

    Returns
    -------
    DataFrame
        Data with 3 additional features: segment_stop, lat_mean and lon_mean or None
        segment_stop indicates the trajectory segment to which the point belongs
        lat_mean and lon_mean:
            if the default option is used, lat_mean and lon_mean are defined
            based on point that repeats most within the segment
            On the other hand, if centroid option is used,
            lat_mean and lon_mean are defined by centroid of
            the all points into segment

    """
    if not inplace:
        move_data = move_data.copy()

    if (label_segment not in move_data) & (label_stop not in move_data):
        create_or_update_move_stop_by_dist_time(move_data,
                                                dist_radius,
                                                time_radius,
                                                label_id,
                                                inplace=True)

    logger.debug('...setting mean to lat and lon...')
    lat_mean = np.full(move_data.shape[0], -1.0, dtype=np.float64)
    lon_mean = np.full(move_data.shape[0], -1.0, dtype=np.float64)

    if drop_moves is False:
        lat_mean[move_data[~move_data[label_stop]].index] = np.NaN
        lon_mean[move_data[~move_data[label_stop]].index] = np.NaN
    else:
        logger.debug('...move segments will be dropped...')

    logger.debug('...get only segments stop...')
    segments = move_data[move_data[label_stop]][label_segment].unique()

    for idx in progress_bar(
            segments, desc=f'Generating {label_segment} and {label_stop}'):
        filter_ = move_data[label_segment] == idx

        size_id = move_data[filter_].shape[0]
        # verify if filter is None
        if size_id > 1:
            # get first and last point of each stop segment
            ind_start = move_data[filter_].iloc[[0]].index
            ind_end = move_data[filter_].iloc[[-1]].index

            if point_mean == 'default':
                p = (move_data[filter_].groupby([LATITUDE, LONGITUDE],
                                                as_index=False).agg({
                                                    'id':
                                                    'count'
                                                }).sort_values(['id']).tail(1))
                lat_mean[ind_start] = p.iloc[0, 0]
                lon_mean[ind_start] = p.iloc[0, 1]
                lat_mean[ind_end] = p.iloc[0, 0]
                lon_mean[ind_end] = p.iloc[0, 1]

            elif point_mean == 'centroid':
                # set lat and lon mean to first_point
                # and last points to each segment
                lat_mean[ind_start] = move_data.loc[filter_][LATITUDE].mean()
                lon_mean[ind_start] = move_data.loc[filter_][LONGITUDE].mean()
                lat_mean[ind_end] = move_data.loc[filter_][LATITUDE].mean()
                lon_mean[ind_end] = move_data.loc[filter_][LONGITUDE].mean()
        else:
            logger.debug(f'There are segments with only one point: {idx}')

    move_data[LAT_MEAN] = lat_mean
    move_data[LON_MEAN] = lon_mean
    del lat_mean
    del lon_mean

    shape_before = move_data.shape[0]
    # filter points to drop
    filter_drop = ((move_data[LAT_MEAN] == -1.0)
                   & (move_data[LON_MEAN] == -1.0))
    shape_drop = move_data[filter_drop].shape[0]

    if shape_drop > 0:
        logger.debug('...Dropping %s points...' % shape_drop)
        move_data.drop(move_data[filter_drop].index, inplace=True)

    logger.debug('...Shape_before: %s\n...Current shape: %s' %
                 (shape_before, move_data.shape[0]))

    if not inplace:
        return move_data
Exemple #30
0
def knn_query(
    traj: DataFrame,
    move_df: DataFrame,
    k: int = 5,
    id_: str = TRAJ_ID,
    distance: str = MEDP,
    latitude: str = LATITUDE,
    longitude: str = LONGITUDE,
    datetime: str = DATETIME
) -> DataFrame:
    """
    Returns the k neighboring trajectories closest to the trajectory.

    Given a k, a trajectory and a DataFrame with multiple paths.

    Parameters
    ----------
    traj: dataframe
        The input of one trajectory.
    move_df: dataframe
        The input trajectory data.
    k: int, optional
        neighboring trajectories, by default 5
    id_: str, optional
        Label of the trajectories dataframe user id, by default TRAJ_ID
    distance: string, optional
        Distance measure type, by default MEDP
    latitude: string, optional
        Label of the trajectories dataframe referring to the latitude,
        by default LATITUDE
    longitude: string, optional
        Label of the trajectories dataframe referring to the longitude,
        by default LONGITUDE
    datetime: string, optional
        Label of the trajectories dataframe referring to the timestamp,
        by default DATETIME

    Returns
    -------
    DataFrame
        dataframe with near trajectories


    Raises
    ------
        ValueError: if distance measure is invalid

    Examples
    --------
    >>> from pymove.query.query import knn_query
    >>> traj_df
         lat      lon              datetime  id
    0   16.4    -54.9   2014-10-11 18:00:00   1
    1   16.4    -55.9   2014-10-12 00:00:00   1
    2   16.4    -56.9   2014-10-12 06:00:00   1
    >>> move_df
         lat      lon              datetime  id
    0   33.1    -77.0   2012-05-19 00:00:00   2
    1   32.8    -77.1   2012-05-19 06:00:00   3
    2   32.5    -77.3   2012-05-19 12:00:00   4
    >>> knn_query(
    >>>    traj_df, move_df, k=1
    >>> )
         lat      lon              datetime  id
    0	16.4	-54.9	2014-10-11 18:00:00   1
    1	16.4	-55.9	2014-10-12 00:00:00	  1
    2	16.4	-56.9	2014-10-12 06:00:00	  1
    2	32.5	-77.3	2012-05-19 12:00:00	  4
    """
    k_list = pd.DataFrame([[np.Inf, 'empty']] * k, columns=['distance', TRAJ_ID])

    if (distance == MEDP):
        def dist_measure(traj, this, latitude, longitude, datetime):
            return distances.medp(
                traj, this, latitude, longitude
            )
    elif (distance == MEDT):
        def dist_measure(traj, this, latitude, longitude, datetime):
            return distances.medt(
                traj, this, latitude, longitude, datetime
            )
    else:
        raise ValueError('Unknown distance measure. Use MEDP or MEDT')

    for traj_id in progress_bar(
        move_df[id_].unique(), desc=f'Querying knn by {distance}'
    ):
        if (traj_id != traj[id_].values[0]):
            this = move_df.loc[move_df[id_] == traj_id]
            this_distance = dist_measure(
                traj, this, latitude, longitude, datetime
            )
            n = 0
            for n in range(k):
                if (this_distance < k_list.loc[n, 'distance']):
                    k_list.loc[n, 'distance'] = this_distance
                    k_list.loc[n, 'traj_id'] = traj_id
                    break
                n = n + 1

    result = traj.copy()
    logger.debug('Generating DataFrame with k nearest trajectories.')
    for n in range(k):
        result = result.append(
            move_df.loc[move_df[id_] == k_list.loc[n, 'traj_id']]
        )

    return result