def create_or_update_out_of_the_bbox( move_data: DataFrame, bbox: tuple[int, int, int, int], new_label: str = OUT_BBOX, inplace: bool = False) -> DataFrame | None: """ Create or update a boolean feature to detect points out of the bbox. Parameters ---------- move_data: dataframe The input trajectories data. bbox : tuple Tuple of 4 elements, containing the minimum and maximum values of latitude and longitude of the bounding box. new_label: string, optional The name of the new feature with detected points out of the bbox, by default OUT_BBOX inplace : boolean, optional if set to true the original dataframe will be altered to contain the result of the filtering, otherwise a copy will be returned, by default False Returns ------- DataFrame Returns dataframe with a boolean feature with detected points out of the bbox, or None Raises ------ ValueError If feature generation fails """ if not inplace: move_data = move_data.copy() logger.debug( '\nCreate or update boolean feature to detect points out of the bbox') filtered_ = filters.by_bbox(move_data, bbox, filter_out=True) if filtered_ is None: raise ValueError('Filter bbox failed!') logger.debug('...Creating a new label named as %s' % new_label) move_data[new_label] = False if filtered_.shape[0] > 0: logger.debug('...Setting % as True\n' % new_label) move_data.at[filtered_.index, new_label] = True return _end_create_operation(move_data, new_label, inplace)
def create_or_update_out_of_the_bbox( move_data, bbox, new_label=OUT_BBOX, inplace=True ): """ Create or update a boolean feature to detect points out of the bbox. Parameters __________ move_data: dataframe The input trajectories data. bbox : tuple Tuple of 4 elements, containing the minimum and maximum values of latitude and longitude of the bounding box. new_label: string, optional, default 'out_Bbox' The name of the new feature with detected points out of the bbox. inplace : boolean, optional, default True if set to true the original dataframe will be altered to contain the result of the filtering, otherwise a copy will be returned. Returns _______ dataframe Returns dataframe with a boolean feature with detected points out of the bbox. """ try: if not inplace: move_data = move_data[:] print('\nCreate or update boolean feature to detect points out of the bbox') filtered_ = filters.by_bbox(move_data, bbox, filter_out=True) print('...Creating a new label named as %s' % new_label) move_data[new_label] = False if filtered_.shape[0] > 0: print('...Setting % as True\n' % new_label) move_data.at[filtered_.index, new_label] = True return _end_create_operation( move_data, new_label, inplace ) except Exception as e: raise e
def join_with_pois_by_dist_and_datetime( data: DataFrame, df_pois: DataFrame, label_date: Optional[Text] = DATETIME, label_event_id: Optional[Text] = EVENT_ID, label_event_type: Optional[Text] = EVENT_TYPE, time_window: Optional[float] = 3600, radius: Optional[float] = 1000, ): """ It performs the integration between trajectories and points of interest, generating new columns referring to the category of the point of interest, the distance between the location of the user and location of the poi based on the distance and on time of each point of the trajectories. Parameters ---------- data : DataFrame The input trajectory data. df_pois : DataFrame The input events points of interest data. label_date : str, optional Label of data referring to the datetime of the input trajectory data, by default DATETIME label_event_id : str, optional Label of df_events referring to the id of the event, by default EVENT_ID label_event_type : str, optional Label of df_events referring to the type of the event, by default EVENT_TYPE time_window : float, optional tolerable length of time range for assigning the event's point of interest to the trajectory point, by default 3600 radius: float, optional maximum radius of pois, by default 1000 """ print('Integration with Events...') if label_date not in df_pois: raise KeyError("POI's DataFrame must contain a %s column" % label_date) values = _reset_set_window_and_creates_event_id_type_all( data, df_pois, label_date, time_window) window_start, window_end, current_distances, event_id, event_type = values for idx, row in progress_bar(data.iterrows(), total=data.shape[0]): # set min and max of coordinates by radius bbox = filters.get_bbox_by_radius((row[LATITUDE], row[LONGITUDE]), radius) # filter event by radius df_filtered = filters.by_bbox(df_pois, bbox) # filter event by datetime filters.by_datetime(df_filtered, start_datetime=window_start[idx], end_datetime=window_end[idx], inplace=True) # get df_filtered size size_filter = df_filtered.shape[0] if size_filter > 0: # reseting index of data frame df_filtered.reset_index(drop=True, inplace=True) # create lat and lon array to operation lat_user = np.full(size_filter, row[LATITUDE], dtype=np.float64) lon_user = np.full(size_filter, row[LONGITUDE], dtype=np.float64) # calculate of distances between points distances = haversine(lat_user, lon_user, df_filtered[LATITUDE].to_numpy(), df_filtered[LONGITUDE].to_numpy()) current_distances[idx] = distances event_type[idx] = df_filtered[label_event_type].to_numpy( dtype=np.ndarray) event_id[idx] = df_filtered[label_event_id].to_numpy( dtype=np.ndarray) data[label_event_id] = event_id data[DIST_EVENT] = current_distances data[label_event_type] = event_type print('Integration with event was completed')