Esempio n. 1
0
def create_or_update_out_of_the_bbox(
        move_data: DataFrame,
        bbox: tuple[int, int, int, int],
        new_label: str = OUT_BBOX,
        inplace: bool = False) -> DataFrame | None:
    """
    Create or update a boolean feature to detect points out of the bbox.

    Parameters
    ----------
    move_data: dataframe
        The input trajectories data.
    bbox : tuple
        Tuple of 4 elements, containing the minimum and maximum values
        of latitude and longitude of the bounding box.
    new_label: string, optional
        The name of the new feature with detected points out of the bbox,
        by default OUT_BBOX
    inplace : boolean, optional
        if set to true the original dataframe will be altered to contain
        the result of the filtering, otherwise a copy will be returned,
        by default False

    Returns
    -------
    DataFrame
        Returns dataframe with a boolean feature with detected
        points out of the bbox, or None

    Raises
    ------
    ValueError
        If feature generation fails
    """
    if not inplace:
        move_data = move_data.copy()

    logger.debug(
        '\nCreate or update boolean feature to detect points out of the bbox')
    filtered_ = filters.by_bbox(move_data, bbox, filter_out=True)

    if filtered_ is None:
        raise ValueError('Filter bbox failed!')

    logger.debug('...Creating a new label named as %s' % new_label)
    move_data[new_label] = False

    if filtered_.shape[0] > 0:
        logger.debug('...Setting % as True\n' % new_label)
        move_data.at[filtered_.index, new_label] = True

    return _end_create_operation(move_data, new_label, inplace)
Esempio n. 2
0
def create_or_update_out_of_the_bbox(
        move_data, bbox, new_label=OUT_BBOX, inplace=True
):
    """
    Create or update a boolean feature to detect points out of the bbox.

    Parameters
    __________
    move_data: dataframe
        The input trajectories data.
    bbox : tuple
        Tuple of 4 elements, containing the minimum and maximum values
        of latitude and longitude of the bounding box.
    new_label: string, optional, default 'out_Bbox'
        The name of the new feature with detected points out of the bbox.
    inplace : boolean, optional, default True
        if set to true the original dataframe will be altered to contain
        the result of the filtering, otherwise a copy will be returned.

    Returns
    _______
    dataframe
        Returns dataframe with a boolean feature with detected
        points out of the bbox.

    """

    try:
        if not inplace:
            move_data = move_data[:]

        print('\nCreate or update boolean feature to detect points out of the bbox')
        filtered_ = filters.by_bbox(move_data, bbox, filter_out=True)

        print('...Creating a new label named as %s' % new_label)
        move_data[new_label] = False
        if filtered_.shape[0] > 0:
            print('...Setting % as True\n' % new_label)
            move_data.at[filtered_.index, new_label] = True

        return _end_create_operation(
            move_data, new_label, inplace
        )
    except Exception as e:
        raise e
Esempio n. 3
0
def join_with_pois_by_dist_and_datetime(
    data: DataFrame,
    df_pois: DataFrame,
    label_date: Optional[Text] = DATETIME,
    label_event_id: Optional[Text] = EVENT_ID,
    label_event_type: Optional[Text] = EVENT_TYPE,
    time_window: Optional[float] = 3600,
    radius: Optional[float] = 1000,
):
    """
    It performs the integration between trajectories and points of interest,
    generating new columns referring to the category of the point of interest,
    the distance between the location of the user and location of the poi
    based on the distance and on time of each point of the trajectories.

    Parameters
    ----------
    data : DataFrame
        The input trajectory data.
    df_pois : DataFrame
        The input events points of interest data.
    label_date : str, optional
        Label of data referring to the datetime of the input trajectory data,
        by default DATETIME
    label_event_id : str, optional
        Label of df_events referring to the id of the event, by default EVENT_ID
    label_event_type : str, optional
        Label of df_events referring to the type of the event, by default EVENT_TYPE
    time_window : float, optional
        tolerable length of time range for assigning the event's
        point of interest to the trajectory point, by default 3600
    radius: float, optional
        maximum radius of pois, by default 1000

    """

    print('Integration with Events...')

    if label_date not in df_pois:
        raise KeyError("POI's DataFrame must contain a %s column" % label_date)

    values = _reset_set_window_and_creates_event_id_type_all(
        data, df_pois, label_date, time_window)

    window_start, window_end, current_distances, event_id, event_type = values

    for idx, row in progress_bar(data.iterrows(), total=data.shape[0]):

        # set min and max of coordinates by radius
        bbox = filters.get_bbox_by_radius((row[LATITUDE], row[LONGITUDE]),
                                          radius)

        # filter event by radius
        df_filtered = filters.by_bbox(df_pois, bbox)

        # filter event by datetime
        filters.by_datetime(df_filtered,
                            start_datetime=window_start[idx],
                            end_datetime=window_end[idx],
                            inplace=True)

        # get df_filtered size
        size_filter = df_filtered.shape[0]

        if size_filter > 0:
            # reseting index of data frame
            df_filtered.reset_index(drop=True, inplace=True)

            # create lat and lon array to operation
            lat_user = np.full(size_filter, row[LATITUDE], dtype=np.float64)
            lon_user = np.full(size_filter, row[LONGITUDE], dtype=np.float64)

            # calculate of distances between points
            distances = haversine(lat_user, lon_user,
                                  df_filtered[LATITUDE].to_numpy(),
                                  df_filtered[LONGITUDE].to_numpy())

            current_distances[idx] = distances
            event_type[idx] = df_filtered[label_event_type].to_numpy(
                dtype=np.ndarray)
            event_id[idx] = df_filtered[label_event_id].to_numpy(
                dtype=np.ndarray)

    data[label_event_id] = event_id
    data[DIST_EVENT] = current_distances
    data[label_event_type] = event_type
    print('Integration with event was completed')