Beispiel #1
0
def join_with_pois(data: DataFrame,
                   df_pois: DataFrame,
                   label_id: Optional[Text] = TRAJ_ID,
                   label_poi_name: Optional[Text] = NAME_POI,
                   reset_index: Optional[Text] = True):
    """
    Performs the integration between trajectories and points
    of interest, generating two new columns referring to the
    name and the distance from the point of interest closest
    to each point of the trajectory.

    Parameters
    ----------
    data : DataFrame
        The input trajectory data.
    df_pois : DataFrame
        The input point of interest data.
    label_id : str, optional
        Label of df_pois referring to the Point of Interest id, by default TRAJ_ID
    label_poi_name : str, optional
        Label of df_pois referring to the Point of Interest name, by default NAME_POI
    reset_index : bool, optional
        Flag for reset index of the df_pois and data dataframes before the join,
        by default True

    """

    print('Integration with POIs...')

    values = _reset_and_creates_id_and_lat_lon(data, df_pois, True,
                                               reset_index)
    current_distances, ids_POIs, tag_POIs, lat_user, lon_user = values

    for idx, row in progress_bar(data.iterrows(), total=len(data)):
        # create a vector to each lat
        lat_user.fill(row[LATITUDE])
        lon_user.fill(row[LONGITUDE])

        # computing distances to idx
        distances = np.float64(
            haversine(
                lat_user,
                lon_user,
                df_pois[LATITUDE].values,
                df_pois[LONGITUDE].values,
            ))

        # get index to arg_min and min distance
        index_min = np.argmin(distances)
        current_distances[idx] = np.min(distances)

        # setting data for a single object movement
        ids_POIs[idx] = df_pois.at[index_min, label_id]
        tag_POIs[idx] = df_pois.at[index_min, label_poi_name]

    data[ID_POI] = ids_POIs
    data[DIST_POI] = current_distances
    data[NAME_POI] = tag_POIs

    print('Integration with POI was finalized')
Beispiel #2
0
def join_with_home_by_id(
    data: DataFrame,
    df_home: DataFrame,
    label_id: Optional[Text] = TRAJ_ID,
    label_address: Optional[Text] = ADDRESS,
    label_city: Optional[Text] = CITY,
    drop_id_without_home: Optional[bool] = False,
):
    """
    It performs the integration between trajectories and home points,
    generating new columns referring to the distance of the nearest
    home point, address and city of each trajectory point.

    Parameters
    ----------
    data : DataFrame
        The input trajectory data.
    df_home : DataFrame
        The input home points data.
    label_id : str, optional
        Label of df_home referring to the home point id, by default TRAJ_ID
    label_address : str, optional
        Label of df_home referring to the home point address, by default ADDRESS
    label_city : str, optional
        Label of df_home referring to the point city, by default CITY
    drop_id_without_home : bool, optional
        flag as an option to drop id's that don't have houses, by default FALSE

    """

    print('Integration with Home...')
    ids_without_home = []

    if data.index.name is None:
        print('...setting {} as index'.format(label_id))
        data.set_index(label_id, inplace=True)

    for idx in progress_bar(data.index.unique()):
        filter_home = df_home[label_id] == idx

        if df_home[filter_home].shape[0] == 0:
            print('...id: {} has not HOME'.format(idx))
            ids_without_home.append(idx)
        else:
            home = df_home[filter_home].iloc[0]
            lat_user = data.at[idx, LATITUDE].values
            lon_user = data.at[idx, LONGITUDE].values

            # if user has a single tuple
            if not isinstance(lat_user, np.ndarray):
                lat_home = home[LATITUDE].values
                lon_home = home[LONGITUDE].values
                data.at[idx, DIST_HOME] = haversine(lat_user, lon_user,
                                                    lat_home, lon_home)
                data.at[idx, HOME] = home[label_address]
                data.at[idx, label_city] = home[label_city]
            else:
                lat_home = np.full(data.loc[idx].shape[0],
                                   home[LATITUDE],
                                   dtype=np.float64)
                lon_home = np.full(data.loc[idx].shape[0],
                                   home[LONGITUDE],
                                   dtype=np.float64)
                data.at[idx, DIST_HOME] = haversine(lat_user, lon_user,
                                                    lat_home, lon_home)
                data.at[idx, HOME] = np.array(home[label_address])
                data.at[idx, label_city] = np.array(home[label_city])

    data.reset_index(inplace=True)
    print('... Resetting index')

    if drop_id_without_home:
        data.drop(data.loc[data[TRAJ_ID].isin(ids_without_home)].index,
                  inplace=True)
Beispiel #3
0
def join_with_pois_by_dist_and_datetime(
    data: DataFrame,
    df_pois: DataFrame,
    label_date: Optional[Text] = DATETIME,
    label_event_id: Optional[Text] = EVENT_ID,
    label_event_type: Optional[Text] = EVENT_TYPE,
    time_window: Optional[float] = 3600,
    radius: Optional[float] = 1000,
):
    """
    It performs the integration between trajectories and points of interest,
    generating new columns referring to the category of the point of interest,
    the distance between the location of the user and location of the poi
    based on the distance and on time of each point of the trajectories.

    Parameters
    ----------
    data : DataFrame
        The input trajectory data.
    df_pois : DataFrame
        The input events points of interest data.
    label_date : str, optional
        Label of data referring to the datetime of the input trajectory data,
        by default DATETIME
    label_event_id : str, optional
        Label of df_events referring to the id of the event, by default EVENT_ID
    label_event_type : str, optional
        Label of df_events referring to the type of the event, by default EVENT_TYPE
    time_window : float, optional
        tolerable length of time range for assigning the event's
        point of interest to the trajectory point, by default 3600
    radius: float, optional
        maximum radius of pois, by default 1000

    """

    print('Integration with Events...')

    if label_date not in df_pois:
        raise KeyError("POI's DataFrame must contain a %s column" % label_date)

    values = _reset_set_window_and_creates_event_id_type_all(
        data, df_pois, label_date, time_window)

    window_start, window_end, current_distances, event_id, event_type = values

    for idx, row in progress_bar(data.iterrows(), total=data.shape[0]):

        # set min and max of coordinates by radius
        bbox = filters.get_bbox_by_radius((row[LATITUDE], row[LONGITUDE]),
                                          radius)

        # filter event by radius
        df_filtered = filters.by_bbox(df_pois, bbox)

        # filter event by datetime
        filters.by_datetime(df_filtered,
                            start_datetime=window_start[idx],
                            end_datetime=window_end[idx],
                            inplace=True)

        # get df_filtered size
        size_filter = df_filtered.shape[0]

        if size_filter > 0:
            # reseting index of data frame
            df_filtered.reset_index(drop=True, inplace=True)

            # create lat and lon array to operation
            lat_user = np.full(size_filter, row[LATITUDE], dtype=np.float64)
            lon_user = np.full(size_filter, row[LONGITUDE], dtype=np.float64)

            # calculate of distances between points
            distances = haversine(lat_user, lon_user,
                                  df_filtered[LATITUDE].to_numpy(),
                                  df_filtered[LONGITUDE].to_numpy())

            current_distances[idx] = distances
            event_type[idx] = df_filtered[label_event_type].to_numpy(
                dtype=np.ndarray)
            event_id[idx] = df_filtered[label_event_id].to_numpy(
                dtype=np.ndarray)

    data[label_event_id] = event_id
    data[DIST_EVENT] = current_distances
    data[label_event_type] = event_type
    print('Integration with event was completed')
Beispiel #4
0
def join_with_poi_datetime_optimizer(
        data: DataFrame,
        df_events: DataFrame,
        label_date: Optional[Text] = DATETIME,
        time_window: Optional[int] = 900,
        label_event_id: Optional[Text] = EVENT_ID,
        label_event_type: Optional[Text] = EVENT_TYPE):
    """
    It performs a optimized integration between trajectories and points
    of interest of events, generating new columns referring to
    the category of the event, the distance from the nearest
    event and the time when the event happened at each point of
    the trajectories.

    Parameters
    ----------
    data : DataFrame
        The input trajectory data.
    df_events : DataFrame
        The input events points of interest data.
    label_date : str, optional
        Label of data referring to the datetime of the input trajectory data,
        by default DATETIME
    time_window : float, optional
        tolerable length of time range for assigning the event's
        point of interest to the trajectory point, by default 900
    label_event_id : str, optional
        Label of df_events referring to the id of the event, by default EVENT_ID
    label_event_type : str, optional
        Label of df_events referring to the type of the event, by default EVENT_TYPE

    """

    print('Integration with Events...')

    values = _reset_set_window__and_creates_event_id_type(
        data, df_events, label_date, time_window)
    window_starts, window_ends, current_distances, event_id, event_type = values

    minimum_distances = np.full(data.shape[0], np.Infinity, dtype=np.float64)

    # Rename for access columns of each row directly
    df_events.rename(columns={
        label_event_id: label_event_id,
        label_event_type: label_event_type
    },
                     inplace=True)

    for idx, row in progress_bar(df_events.iterrows(), total=len(df_events)):
        df_filtered = filters.by_datetime(data, window_starts[idx],
                                          window_ends[idx])

        size_filter = df_filtered.shape[0]

        if size_filter > 0:
            indexes = df_filtered.index
            lat_event = np.full(df_filtered.shape[0],
                                row[LATITUDE],
                                dtype=np.float64)
            lon_event = np.full(df_filtered.shape[0],
                                row[LONGITUDE],
                                dtype=np.float64)

            # First iteration is minimum distances
            if idx == 0:
                minimum_distances[indexes] = haversine(
                    lat_event,
                    lon_event,
                    df_filtered[LATITUDE].values,
                    df_filtered[LONGITUDE].values,
                )
                event_id[indexes] = row.event_id
                event_type[indexes] = row.event_type
            else:
                current_distances[indexes] = haversine(
                    lat_event,
                    lon_event,
                    df_filtered[LATITUDE].values,
                    df_filtered[LONGITUDE].values,
                )
                compare = current_distances < minimum_distances
                index_True = np.where(compare is True)[0]

                minimum_distances = np.minimum(current_distances,
                                               minimum_distances)
                event_id[index_True] = row.event_id
                event_type[index_True] = row.event_type

    data[label_event_id] = event_id
    data[DIST_EVENT] = minimum_distances
    data[label_event_type] = event_type
    print('Integration with events was completed')
Beispiel #5
0
def join_with_poi_datetime(data: DataFrame,
                           df_events: DataFrame,
                           label_date: Optional[Text] = DATETIME,
                           time_window: Optional[int] = 900,
                           label_event_id: Optional[Text] = EVENT_ID,
                           label_event_type: Optional[Text] = EVENT_TYPE):
    """
    It performs the integration between trajectories and points
    of interest, generating new columns referring to the
    category of the point of interest, the distance from the
    nearest point of interest based on time of each point of
    the trajectories.

    Parameters
    ----------
    data : DataFrame
        The input trajectory data.
    df_events : DataFrame
        The input events points of interest data.
    label_date : str, optional
        Label of data referring to the datetime of the input trajectory data,
        by default DATETIME
    time_window : float, optional
        tolerable length of time range for assigning the event's
        point of interest to the trajectory point, by default 900
    label_event_id : str, optional
        Label of df_events referring to the id of the event, by default EVENT_ID
    label_event_type : str, optional
        Label of df_events referring to the type of the event, by default EVENT_TYPE

    """

    print('Integration with Events...')

    values = _reset_set_window__and_creates_event_id_type(
        data, df_events, label_date, time_window)
    window_starts, window_ends, current_distances, event_id, event_type = values

    for idx in progress_bar(data.index):
        # filter event by datetime
        df_filtered = filters.by_datetime(df_events, window_starts[idx],
                                          window_ends[idx])
        size_filter = df_filtered.shape[0]

        if size_filter > 0:
            df_filtered.reset_index(drop=True, inplace=True)
            lat_user = np.full(size_filter,
                               data.at[idx, LATITUDE],
                               dtype=np.float64)
            lon_user = np.full(size_filter,
                               data.at[idx, LONGITUDE],
                               dtype=np.float64)

            # compute dist to poi filtered
            distances = haversine(
                lat_user,
                lon_user,
                df_filtered[LATITUDE].values,
                df_filtered[LONGITUDE].values,
            )
            # get index to arg_min
            index_arg_min = np.argmin(distances)
            # get min distances
            min_distance = np.min(distances)
            # store data
            current_distances[idx] = min_distance
            event_type[idx] = df_filtered.at[index_arg_min, label_event_type]
            event_id[idx] = df_filtered.at[index_arg_min, label_event_id]

    data[label_event_id] = event_id
    data[DIST_EVENT] = current_distances
    data[label_event_type] = event_type
    print('Integration with event was completed')
Beispiel #6
0
def join_with_pois_by_category(data: DataFrame,
                               df_pois: DataFrame,
                               label_category: Optional[Text] = TYPE_POI,
                               label_id: Optional[Text] = TRAJ_ID):
    """
    It performs the integration between trajectories and points
    of interest, generating new columns referring to the
    category and distance from the nearest point of interest
    that has this category at each point of the trajectory.

    Parameters
    ----------
    data : DataFrame
        The input trajectory data.
    df_pois : DataFrame
        The input point of interest data.
    label_category : str, optional
        Label of df_pois referring to the point of interest category, by default TYPE_POI
    label_id : str, optional
        Label of df_pois referring to the point of interest id, by default TRAJ_ID

    """

    print('Integration with POIs...')

    # get a vector with windows time to each point
    data.reset_index(drop=True, inplace=True)
    df_pois.reset_index(drop=True, inplace=True)

    # create numpy array to store new column to DataFrame of movement objects
    current_distances = np.full(data.shape[0], np.Infinity, dtype=np.float64)
    ids_POIs = np.full(data.shape[0], np.NAN, dtype='object_')

    unique_categories = df_pois[label_category].unique()
    size_categories = len(unique_categories)
    print('There are %s categories' % size_categories)

    for i, c in enumerate(unique_categories, start=1):
        # creating lat and lon array to operation
        df_category = df_pois[df_pois[label_category] == c]
        df_category.reset_index(drop=True, inplace=True)

        desc = 'computing dist to {} category ({}/{})'.format(
            c, i, size_categories)
        for idx, row in progress_bar(data.iterrows(),
                                     total=len(data),
                                     desc=desc):
            lat_user = np.full(df_category.shape[0],
                               row[LATITUDE],
                               dtype=np.float64)
            lon_user = np.full(df_category.shape[0],
                               row[LONGITUDE],
                               dtype=np.float64)

            # computing distances to
            distances = haversine(
                lat_user,
                lon_user,
                df_category[LATITUDE].values,
                df_category[LONGITUDE].values,
            )

            # get index to arg_min and min distance
            index_min = np.argmin(distances)

            # setting data for a single object movement
            current_distances[idx] = np.min(distances)
            ids_POIs[idx] = df_category.at[index_min, label_id]

        data['id_%s' % c] = ids_POIs
        data['dist_%s' % c] = current_distances
    print('Integration with POI was finalized')
Beispiel #7
0
def join_with_pois_optimizer(data,
                             df_pois: DataFrame,
                             label_id: Optional[Text] = TRAJ_ID,
                             label_poi_name: Optional[Text] = NAME_POI,
                             dist_poi: Optional[List] = None,
                             reset_index: Optional[Text] = True):
    """
    Performs the integration between trajectories and points
    of interest, generating two new columns referring to the
    name and distance from the nearest point of interest,
    within the limit of distance determined by the parameter 'dist_poi',
    of each point in the trajectory.

    Parameters
    ----------
    data : DataFrame
        The input trajectory data.
    df_pois : DataFrame
        The input point of interest data.
    label_id : str, optional
        Label of df_pois referring to the Point of Interest id, by default TRAJ_ID
    label_poi_name : str, optional
        Label of df_pois referring to the Point of Interest name, by default NAME_POI
    dist_poi : list, optional
        List containing the minimum distance limit between each type of
        point of interest and each point of the trajectory to classify the
        point of interest closest to each point of the trajectory, by default None
    reset_index : bool, optional
        Flag for reset index of the df_pois and data dataframes before the join,
        by default True

    """

    print('Integration with POIs optimized...')

    if len(df_pois[label_poi_name].unique()) == len(dist_poi):
        values = _reset_and_creates_id_and_lat_lon(data, df_pois, False,
                                                   reset_index)
        minimum_distances, ids_POIs, tag_POIs, lat_POI, lon_POI = values

        df_pois.rename(columns={
            label_id: TRAJ_ID,
            label_poi_name: NAME_POI
        },
                       inplace=True)

        for idx, row in progress_bar(df_pois.iterrows(), total=len(df_pois)):
            # update lat and lon of current index
            lat_POI.fill(row[LATITUDE])
            lon_POI.fill(row[LONGITUDE])

            # First iteration is minimum distances
            if idx == 0:
                minimum_distances = np.float64(
                    haversine(lat_POI, lon_POI, data[LATITUDE].values,
                              data[LONGITUDE].values))
                ids_POIs.fill(row.id)
                tag_POIs.fill(row.type_poi)
            else:
                # compute dist between a POI and ALL
                print(data[LONGITUDE].values)
                current_distances = np.float64(
                    haversine(lat_POI, lon_POI, data[LATITUDE].values,
                              data[LONGITUDE].values))
                compare = current_distances < minimum_distances
                index_True = np.where(compare is True)[0]
                minimum_distances = np.minimum(current_distances,
                                               minimum_distances,
                                               dtype=np.float64)

                if index_True.shape[0] > 0:
                    ids_POIs[index_True] = row.id
                    tag_POIs[index_True] = row.type_poi

        data[ID_POI] = ids_POIs
        data[DIST_POI] = minimum_distances
        data[NAME_POI] = tag_POIs
        print('Integration with POI was finalized')
    else:
        print('the size of the dist_poi is different from the size of pois')
Beispiel #8
0
def join_with_home_by_id(
    df_,
    df_home,
    label_id=TRAJ_ID,
    label_address=ADDRESS,
    label_city=CITY,
    drop_id_without_home=False,
):
    """
    It performs the integration between trajectories and home points,
    generating new columns referring to the distance of the nearest
    home point, address and city of each trajectory point.

    Parameters
    ----------
    df_ : dataframe
        The input trajectory data.

    df_home : dataframe
        The input home points data.

    label_id : String, optional("id" by default)
        Label of df_home referring to the home point id.

    label_address : String, optional("formatted_address" by default)
        Label of df_home referring to the home point address.

    label_city : String, optional("city" by default)
        Label of df_home referring to the point city.

    drop_id_without_home : Boolean, optional(False by default)
        flag as an option to drop id's that don't have houses.

    """

    try:
        print('Integration with Home...')
        ids_without_home = []

        if df_.index.name is None:
            print('...setting {} as index'.format(label_id))
            df_.set_index(label_id, inplace=True)

        for idx in progress_bar(df_.index.unique()):
            filter_home = df_home[label_id] == idx

            if df_home[filter_home].shape[0] == 0:
                print('...id: {} has not HOME'.format(idx))
                ids_without_home.append(idx)
            else:
                home = df_home[filter_home].iloc[0]
                lat_user = df_.at[idx, LATITUDE]
                lon_user = df_.at[idx, LONGITUDE]

                # if user has a single tuple
                if not isinstance(lat_user, np.ndarray):
                    df_.at[idx, DIST_HOME] = haversine(lat_user, lon_user,
                                                       home[LATITUDE],
                                                       home[LONGITUDE])
                    df_.at[idx, HOME] = home[label_address]
                    df_.at[idx, label_city] = home[label_city]
                else:
                    lat_home = np.full(df_.loc[idx].shape[0],
                                       home[LATITUDE],
                                       dtype=np.float64)
                    lon_home = np.full(df_.loc[idx].shape[0],
                                       home[LONGITUDE],
                                       dtype=np.float64)
                    df_.at[idx, DIST_HOME] = haversine(lat_user, lon_user,
                                                       lat_home, lon_home)
                    df_.at[idx, HOME] = np.array(home[label_address])
                    df_.at[idx, label_city] = np.array(home[label_city])

        df_.reset_index(inplace=True)
        print('... Resetting index')

        if drop_id_without_home:
            for tid in ids_without_home:
                df_.drop(df_.loc[df_[TRAJ_ID] == tid].index, inplace=True)
    except Exception as e:
        raise e
Beispiel #9
0
def join_with_pois(df_,
                   df_pois,
                   label_id=TRAJ_ID,
                   label_poi_name=NAME_POI,
                   reset_index=True):
    """
    Performs the integration between trajectories and points
    of interest, generating two new columns referring to the
    name and the distance from the point of interest closest
    to each point of the trajectory.

    Parameters
    ----------
    df_ : dataframe
        The input trajectory data.

    df_pois : dataframe
        The input point of interest data.

    label_id : String, optional("id" by default)
        Label of df_pois referring to the Point of Interest id.

    label_poi_name : String, optional("type_poi" by default)
        Label of df_pois referring to the Point of Interest name.

    reset_index : Boolean, optional(True by default)
        Flag for reset index of the df_pois and df_ dataframes before the join.

    """

    try:
        print('Integration with POIs...')

        values = _reset_and_creates_id_and_lat_lon(df_, df_pois, True,
                                                   reset_index)
        current_distances, ids_POIs, tag_POIs, lat_user, lon_user = values

        for idx, row in progress_bar(df_.iterrows(), total=len(df_)):
            # create a vector to each lat
            lat_user.fill(row[LATITUDE])
            lon_user.fill(row[LONGITUDE])

            # computing distances to idx
            distances = np.float64(
                haversine(
                    lat_user,
                    lon_user,
                    df_pois[LATITUDE].values,
                    df_pois[LONGITUDE].values,
                ))

            # get index to arg_min and min distance
            index_min = np.argmin(distances)
            current_distances[idx] = np.min(distances)

            # setting data for a single object movement
            ids_POIs[idx] = df_pois.at[index_min, label_id]
            tag_POIs[idx] = df_pois.at[index_min, label_poi_name]

        df_[ID_POI] = ids_POIs
        df_[DIST_POI] = current_distances
        df_[NAME_POI] = tag_POIs

        print('Integration with POI was finalized')
    except Exception as e:
        raise e