def generate_prev_local_features( self, label_id: Optional[Text] = TRAJ_ID, local_label: Optional[Text] = LOCAL_LABEL, sort: Optional[bool] = True, inplace: Optional[bool] = True ) -> Optional['PandasDiscreteMoveDataFrame']: """ Create a feature prev_local with the label of previous local to current point. Parameters ---------- label_id : str, optional Represents name of column of trajectory id, by default TRAJ_ID local_label : str, optional Indicates name of column of place labels on symbolic trajectory, by default LOCAL_LABEL sort : bool, optional Wether the dataframe will be sorted, by default True inplace : bool, optional Represents whether the operation will be performed on the data provided or in a copy, by default True Returns ------- PandasDiscreteMoveDataFrame Object with new features or None """ operation = begin_operation('generate_prev_equ_feature') columns = set(self.columns) ids, sum_size_id, size_id, idx = self._prepare_generate_data( self, sort, label_id) try: message = '\nCreating generate_prev_equ_feature' message += ' in previous equ\n' print(message) if (self[local_label].dtype == 'int'): self[local_label] = self[local_label].astype(np.float16) for idx in progress_bar( ids, desc='Generating previous {}'.format(local_label)): current_local = self.at[idx, local_label] current_local = np.array(current_local) size_id = current_local.size if size_id <= 1: self.at[idx, PREV_LOCAL] = np.nan else: prev_local = shift(current_local, 1) # previous to current point self.at[idx, PREV_LOCAL] = prev_local return self._return_generated_data(self, columns, operation, inplace) except Exception as e: print('label_tid:%s\nidx:%s\nsize_id:%s\nsum_size_id:%s' % (label_id, idx, size_id, sum_size_id)) self.last_operation = end_operation(operation) raise e
def interpolate_add_deltatime_speed_features( move_data: DataFrame, label_tid: Optional[Text] = TID, max_dist_between_adj_points: Optional[float] = 5000, max_time_between_adj_points: Optional[float] = 900, max_speed: Optional[float] = 30, inplace: Optional[bool] = True) -> Optional[DataFrame]: """ Use to interpolate distances (x) to find times (y). Parameters ---------- move_data : dataframe The input trajectories data label_tid: str, optional("tid" by default) The name of the column to set as the new index during function execution. Indicates the tid column. max_dist_between_adj_points: float, optional The maximum distance between two adjacent points, by default 5000 max_time_between_adj_points: float, optional The maximum time interval between two adjacent points, by default 900 max_speed: float, optional The maximum speed between two adjacent points, by default 30 inplace: boolean, optional if set to true the original dataframe will be altered, otherwise the alteration will be made in a copy, that will be returned, by default True Returns ------- DataFrame A copy of the original dataframe or None """ if not inplace: move_data = move_data.copy() if TID not in move_data: move_data.generate_tid_based_on_id_datetime() if move_data.index.name is not None: print('reseting index...') move_data.reset_index(inplace=True) tids = move_data[label_tid].unique() move_data['isNone'] = move_data['datetime'].isnull() if move_data.index.name is None: print('creating index...') move_data.set_index(label_tid, inplace=True) drop_trajectories = [] size = move_data.shape[0] count = 0 time.time() move_data['delta_time'] = np.nan move_data['speed'] = np.nan for tid in progress_bar(tids): filter_nodes = move_data.at[tid, 'isNone'] size_id = 1 if filter_nodes.shape == () else filter_nodes.shape[0] count += size_id y_ = move_data.at[tid, 'time'][~filter_nodes] if y_.shape[0] < 2: drop_trajectories.append(tid) continue assert np.all( y_[1:] >= y_[:-1]), 'time feature is not in ascending order' x_ = move_data.at[tid, 'distFromTrajStartToCurrPoint'][~filter_nodes] assert np.all( x_[1:] >= x_[:-1]), 'distance feature is not in ascending order' idx_duplicates = np.where(x_[1:] == x_[:-1])[0] if idx_duplicates.shape[0] > 0: x_ = np.delete(x_, idx_duplicates) y_ = np.delete(y_, idx_duplicates) if y_.shape[0] < 2: drop_trajectories.append(tid) continue delta_time = ((shift(y_.astype(np.float64), -1) - y_) / 1000.0)[:-1] dist_curr_to_next = (shift(x_, -1) - x_)[:-1] speed = (dist_curr_to_next / delta_time)[:-1] assert np.all( delta_time <= max_time_between_adj_points ), 'delta_time between points cannot be more than {}'.format( max_time_between_adj_points) assert np.all( dist_curr_to_next <= max_dist_between_adj_points ), 'distance between points cannot be more than {}'.format( max_dist_between_adj_points) assert np.all( speed <= max_speed ), 'speed between points cannot be more than {}'.format(max_speed) assert np.all( x_[1:] >= x_[:-1]), 'distance feature is not in ascending order' f_intp = interp1d(x_, y_, fill_value='extrapolate') x2_ = move_data.at[tid, 'distFromTrajStartToCurrPoint'][filter_nodes] assert np.all(x2_[1:] >= x2_[:-1] ), 'distances in nodes are not in ascending order' intp_result = f_intp(x2_) assert np.all(intp_result[1:] >= intp_result[:-1] ), 'resulting times are not in ascending order' assert ~np.isin( np.inf, intp_result), 'interpolation results with np.inf value(srs)' # update time features for nodes. initially they are empty. values = intp_result.astype(np.int64) feature_values_using_filter(move_data, tid, 'time', filter_nodes, values) values = (shift( move_data.at[tid, 'time'][filter_nodes].astype(np.float64), -1, ) - move_data.at[tid, 'time'][filter_nodes]) / 1000 feature_values_using_filter(move_data, tid, 'delta_time', filter_nodes, values) move_data['datetime'] = None datetime = [] for d in move_data['time'].values: data = Timestamp(int(d), unit='s', tz='America/Fortaleza') datetime.append(str(data)[:-6]) move_data['datetime'] = datetime values = (move_data.at[tid, 'edgeDistance'][filter_nodes] / move_data.at[tid, 'delta_time'][filter_nodes]) feature_values_using_filter(move_data, tid, 'speed', filter_nodes, values) print(count, size) print('we still need to drop {} trajectories with only 1 gps point'.format( len(drop_trajectories))) move_data.reset_index(inplace=True) idxs_drop = move_data[move_data[label_tid].isin( drop_trajectories)].index.values print('dropping {} rows in {} trajectories with only 1 gps point'.format( idxs_drop.shape[0], len(drop_trajectories))) if idxs_drop.shape[0] > 0: print('shape before dropping: {}'.format(move_data.shape)) move_data.drop(index=idxs_drop, inplace=True) print('shape after dropping: {}'.format(move_data.shape)) if not inplace: return move_data
def check_time_dist(move_data: DataFrame, index_name: Optional[Text] = TID, tids: Optional[Text] = None, max_dist_between_adj_points: Optional[float] = 5000, max_time_between_adj_points: Optional[float] = 900, max_speed: Optional[float] = 30): """ Used to verify that the trajectories points are in the correct order after map matching, considering time and distance. Parameters ---------- move_data : dataframe The input trajectories data index_name: str, optional The name of the column to set as the new index during function execution, by default TID tids: array, optional The list of the unique keys of the index_name column, by default None max_dist_between_adj_points: float, optional The maximum distance between two adjacent points, by default 5000 max_time_between_adj_points: float, optional The maximum time interval between two adjacent points, by default 900 max_speed: float, optional The maximum speed between two adjacent points, by default 30 Raises ------ ValueError if the data is not in order """ if move_data.index.name is not None: print('reseting index...') move_data.reset_index(inplace=True) if tids is None: tids = move_data[index_name].unique() if move_data.index.name is None: print('creating index...') move_data.set_index(index_name, inplace=True) move_data['isNone'] = move_data['datetime'].isnull() for tid in progress_bar(tids, desc='checking ascending distance and time'): filter_ = move_data.at[tid, 'isNone'] # be sure that distances are in ascending order dists = move_data.at[tid, 'distFromTrajStartToCurrPoint'][filter_] if not np.all(dists[:-1] < dists[1:]): raise ValueError('distance feature is not in ascending order') # be sure that times are in ascending order times = move_data.at[tid, 'datetime'][filter_].astype(int) if not np.all(times[:-1] < times[1:]): raise ValueError('time feature is not in ascending order') count = 0 for tid in progress_bar( tids, desc='checking delta_times, delta_dists and speeds'): filter_ = move_data.at[tid, 'isNone'] dists = move_data.at[tid, 'distFromTrajStartToCurrPoint'][filter_] delta_dists = (shift(dists, -1) - dists)[:-1] if not np.all(delta_dists <= max_dist_between_adj_points): raise ValueError('delta_dists must be <= {}'.format( max_dist_between_adj_points)) times = move_data.at[tid, 'datetime'][filter_].astype(int) delta_times = ((shift(times, -1) - times) / 1000.0)[:-1] if not np.all(delta_times <= max_time_between_adj_points): raise ValueError('delta_times must be <= {}'.format( max_time_between_adj_points)) if not np.all(delta_times > 0): raise ValueError('delta_times must be > 0') speeds = delta_dists / delta_times if not np.all(speeds <= max_speed): raise ValueError('speeds > {}'.format(max_speed)) size_id = 1 if filter_.shape == () else filter_.shape[0] count += size_id move_data.reset_index(inplace=True)
def generate_prev_local_features( self, label_id: str = TRAJ_ID, local_label: str = LOCAL_LABEL, sort: bool = True, inplace: bool = True) -> 'PandasDiscreteMoveDataFrame' | None: """ Create a feature prev_local with the label of previous local to current point. Parameters ---------- label_id : str, optional Represents name of column of trajectory id, by default TRAJ_ID local_label : str, optional Indicates name of column of place labels on symbolic trajectory, by default LOCAL_LABEL sort : bool, optional Wether the dataframe will be sorted, by default True inplace : bool, optional Represents whether the operation will be performed on the data provided or in a copy, by default True Returns ------- PandasDiscreteMoveDataFrame Object with new features or None """ operation = begin_operation('generate_prev_equ_feature') if inplace: data_ = self else: data_ = self.copy() ids, size_id, idx = self._prepare_generate_data(self, sort, label_id) message = '\nCreating generate_prev_equ_feature in previous equ\n' logger.debug(message) if (data_[local_label].dtype == 'int'): data_[local_label] = data_[local_label].astype(np.float16) for idx in progress_bar(ids, desc=f'Generating previous {local_label}'): current_local = data_.at[idx, local_label] current_local = np.array(current_local) size_id = current_local.size if size_id <= 1: data_.at[idx, PREV_LOCAL] = np.nan else: prev_local = shift(current_local, 1) # previous to current point data_.at[idx, PREV_LOCAL] = prev_local data_.reset_index(inplace=True) data_.last_operation = end_operation(operation) if not inplace: return data_
def interpolate_add_deltatime_speed_features( move_data, label_id="tid", max_time_between_adj_points=900, max_dist_between_adj_points=5000, max_speed=30, inplace=True, ): """Use to interpolate distances (x) to find times (y). Parameters ---------- move_data : dataframe The input trajectories data label_id: String, optional("tid" by default) The name of the column to set as the new index during function execution. Indicates the tid column. max_dist_between_adj_points: double, optional(5000 by default) The maximum distance between two adjacent points. Used only for verification. max_time_between_adj_points: double, optional(900 by default) The maximum time interval between two adjacent points. Used only for verification. max_speed: double, optional(30 by default) The maximum speed between two adjacent points. Used only for verification. inplace: boolean, optional(True by default) if set to true the original dataframe will be altered, otherwise the alteration will be made in a copy, that will be returned. Returns ------- move_data : dataframe A copy of the original dataframe, with the alterations done by the function. (When inplace is False) None When inplace is True """ if not inplace: move_data = PandasMoveDataFrame(data=move_data.to_DataFrame()) if TID not in move_data: move_data.generate_tid_based_on_id_datatime() if move_data.index.name is not None: print("reseting index...") move_data.reset_index(inplace=True) tids = move_data[label_id].unique() # tids = [2] if move_data.index.name is None: print("creating index...") move_data.set_index(label_id, inplace=True) drop_trajectories = [] size = move_data.shape[0] count = 0 time.time() move_data["delta_time"] = np.nan move_data["speed"] = np.nan try: for tid in progress_bar(tids): filter_nodes = move_data.at[tid, "isNode"] == 1 size_id = 1 if filter_nodes.shape == () else filter_nodes.shape[0] count += size_id # y - time of snapped points y_ = move_data.at[tid, "time"][~filter_nodes] if y_.shape[0] < 2: # print("traj: {} - insuficient points ({}) for interpolation. # adding to drop list...".format(tid, y_.shape[0])) drop_trajectories.append(tid) continue assert np.all( y_[1:] >= y_[:-1]), "time feature is not in ascending order" # x - distance from traj start to snapped points x_ = move_data.at[tid, "distFromTrajStartToCurrPoint"][~filter_nodes] assert np.all(x_[1:] >= x_[:-1] ), "distance feature is not in ascending order" # remove duplicates in distances to avoid np.inf in future interpolation results idx_duplicates = np.where(x_[1:] == x_[:-1])[0] if idx_duplicates.shape[0] > 0: x_ = np.delete(x_, idx_duplicates) y_ = np.delete(y_, idx_duplicates) if y_.shape[0] < 2: # print("traj: {} - insuficient points ({}) for interpolation. # adding to drop list...".format(tid, y_.shape[0])) drop_trajectories.append(tid) continue # compute delta_time and distance between points # values = (ut.shift(move_data.at[tid, "time"][filter_nodes].astype(np.float64), -1) # - move_data.at[tid, "time"][filter_nodes]) / 1000 # ut.change_move_datafeature_values_using_filter(move_data, tid, "delta_time", filter_nodes, values) delta_time = ((shift(y_.astype(np.float64), -1) - y_) / 1000.0)[:-1] dist_curr_to_next = (shift(x_, -1) - x_)[:-1] speed = (dist_curr_to_next / delta_time)[:-1] assert np.all( delta_time <= max_time_between_adj_points ), "delta_time between points cannot be more than {}".format( max_time_between_adj_points) assert np.all( dist_curr_to_next <= max_dist_between_adj_points ), "distance between points cannot be more than {}".format( max_dist_between_adj_points) assert np.all( speed <= max_speed ), "speed between points cannot be more than {}".format(max_speed) assert np.all(x_[1:] >= x_[:-1] ), "distance feature is not in ascending order" f_intp = interp1d(x_, y_, fill_value="extrapolate") x2_ = move_data.at[tid, "distFromTrajStartToCurrPoint"][filter_nodes] assert np.all(x2_[1:] >= x2_[:-1] ), "distances in nodes are not in ascending order" intp_result = f_intp(x2_) # .astype(np.int64) assert np.all(intp_result[1:] >= intp_result[:-1] ), "resulting times are not in ascending order" assert ~np.isin( np.inf, intp_result), "interpolation results with np.inf value(s)" # update time features for nodes. initially they are empty. values = intp_result.astype(np.int64) feature_values_using_filter(move_data, tid, "time", filter_nodes, values) # create delta_time feature values = (shift( move_data.at[tid, "time"][filter_nodes].astype(np.float64), -1, ) - move_data.at[tid, "time"][filter_nodes]) / 1000 feature_values_using_filter(move_data, tid, "delta_time", filter_nodes, values) # create speed feature values = (move_data.at[tid, "edgeDistance"][filter_nodes] / move_data.at[tid, "delta_time"][filter_nodes]) feature_values_using_filter(move_data, tid, "speed", filter_nodes, values) except Exception as e: raise e print(count, size) print("we still need to drop {} trajectories with only 1 gps point".format( len(drop_trajectories))) move_data.reset_index(inplace=True) idxs_drop = move_data[move_data[label_id].isin( drop_trajectories)].index.values print("dropping {} rows in {} trajectories with only 1 gps point".format( idxs_drop.shape[0], len(drop_trajectories))) if idxs_drop.shape[0] > 0: print("shape before dropping: {}".format(move_data.shape)) move_data.drop(index=idxs_drop, inplace=True) print("shape after dropping: {}".format(move_data.shape)) if not inplace: return move_data
def check_time_dist( move_data, index_name="tid", tids=None, max_dist_between_adj_points=5000, max_time_between_adj_points=900, max_speed=30, inplace=True, ): """ Used to verify that the trajectories points are in the correct order after map matching, considering time and distance. Parameters ---------- move_data : dataframe The input trajectories data index_name: String, optional("tid" by default) The name of the column to set as the new index during function execution. Indicates the tid column. tids: array, optional(None by default) The list of the unique keys of the index_name column. max_dist_between_adj_points: double, optional(5000 by default) The maximum distance between two adjacent points. max_time_between_adj_points: double, optional(900 by default) The maximum time interval between two adjacent points. max_speed: double, optional(30 by default) The maximum speed between two adjacent points. inplace: boolean, optional(True by default) if set to true the original dataframe will be altered, otherwise the alteration will be made in a copy, that will be returned. Returns ------- move_data : dataframe A copy of the original dataframe, with the alterations done by the function. (When inplace is False) None When inplace is True """ if not inplace: move_data = PandasMoveDataFrame(data=move_data.to_DataFrame()) try: if TID not in move_data: move_data.generate_tid_based_on_id_datatime() if move_data.index.name is not None: print("reseting index...") move_data.reset_index(inplace=True) if tids is None: tids = move_data[index_name].unique() if move_data.index.name is None: print("creating index...") move_data.set_index(index_name, inplace=True) for tid in progress_bar(tids, desc="checking ascending distance and time"): filter_ = move_data.at[tid, "isNode"] != 1 # be sure that distances are in ascending order dists = move_data.at[tid, "distFromTrajStartToCurrPoint"][filter_] assert np.all(dists[:-1] < dists[1:] ), "distance feature is not in ascending order" # be sure that times are in ascending order times = move_data.at[tid, "time"][filter_].astype(np.float64) assert np.all(times[:-1] < times[1:] ), "time feature is not in ascending order" count = 0 for tid in progress_bar( tids, desc="checking delta_times, delta_dists and speeds"): filter_ = move_data.at[tid, "isNode"] != 1 dists = move_data.at[tid, "distFromTrajStartToCurrPoint"][filter_] delta_dists = (shift(dists, -1) - dists)[:-1] # do not use last element (np.nan) assert np.all( delta_dists <= max_dist_between_adj_points ), "delta_dists must be <= {}".format(max_dist_between_adj_points) times = move_data.at[tid, "time"][filter_].astype(np.float64) delta_times = ((shift(times, -1) - times) / 1000.0)[:-1] # do not use last element (np.nan) assert np.all( delta_times <= max_time_between_adj_points ), "delta_times must be <= {}".format(max_time_between_adj_points) assert np.all(delta_times > 0), "delta_times must be > 0" assert np.all(delta_dists > 0), "delta_dists must be > 0" speeds = delta_dists / delta_times assert np.all(speeds <= max_speed), "speeds > {}".format(max_speed) size_id = 1 if filter_.shape == () else filter_.shape[0] count += size_id move_data.reset_index(inplace=True) if not inplace: return move_data except Exception as e: raise e