Example #1
0
    def generate_prev_local_features(
        self,
        label_id: Optional[Text] = TRAJ_ID,
        local_label: Optional[Text] = LOCAL_LABEL,
        sort: Optional[bool] = True,
        inplace: Optional[bool] = True
    ) -> Optional['PandasDiscreteMoveDataFrame']:
        """
        Create a feature prev_local with the label of previous local to current point.

        Parameters
        ----------
        label_id : str, optional
            Represents name of column of trajectory id, by default TRAJ_ID
        local_label : str, optional
            Indicates name of column of place labels on symbolic trajectory,
                by default LOCAL_LABEL
        sort : bool, optional
            Wether the dataframe will be sorted, by default True
        inplace : bool, optional
            Represents whether the operation will be performed on
            the data provided or in a copy, by default True

        Returns
        -------
        PandasDiscreteMoveDataFrame
             Object with new features or None

        """
        operation = begin_operation('generate_prev_equ_feature')
        columns = set(self.columns)
        ids, sum_size_id, size_id, idx = self._prepare_generate_data(
            self, sort, label_id)

        try:
            message = '\nCreating generate_prev_equ_feature'
            message += ' in previous equ\n'
            print(message)

            if (self[local_label].dtype == 'int'):
                self[local_label] = self[local_label].astype(np.float16)
            for idx in progress_bar(
                    ids, desc='Generating previous {}'.format(local_label)):
                current_local = self.at[idx, local_label]
                current_local = np.array(current_local)
                size_id = current_local.size

                if size_id <= 1:
                    self.at[idx, PREV_LOCAL] = np.nan

                else:
                    prev_local = shift(current_local, 1)

                    # previous to current point
                    self.at[idx, PREV_LOCAL] = prev_local

            return self._return_generated_data(self, columns, operation,
                                               inplace)

        except Exception as e:
            print('label_tid:%s\nidx:%s\nsize_id:%s\nsum_size_id:%s' %
                  (label_id, idx, size_id, sum_size_id))
            self.last_operation = end_operation(operation)
            raise e
Example #2
0
def interpolate_add_deltatime_speed_features(
        move_data: DataFrame,
        label_tid: Optional[Text] = TID,
        max_dist_between_adj_points: Optional[float] = 5000,
        max_time_between_adj_points: Optional[float] = 900,
        max_speed: Optional[float] = 30,
        inplace: Optional[bool] = True) -> Optional[DataFrame]:
    """
    Use to interpolate distances (x) to find times (y).

     Parameters
    ----------
    move_data : dataframe
       The input trajectories data
    label_tid: str, optional("tid" by default)
        The name of the column to set as the new index during function execution.
        Indicates the tid column.
        max_dist_between_adj_points: float, optional
     The maximum distance between two adjacent points, by default 5000
    max_time_between_adj_points: float, optional
     The maximum time interval between two adjacent points, by default 900
    max_speed: float, optional
     The maximum speed between two adjacent points, by default 30
    inplace: boolean, optional
        if set to true the original dataframe will be altered,
        otherwise the alteration will be made in a copy, that will be returned,
        by default True

    Returns
    -------
    DataFrame
        A copy of the original dataframe or None
    """

    if not inplace:
        move_data = move_data.copy()

    if TID not in move_data:
        move_data.generate_tid_based_on_id_datetime()

    if move_data.index.name is not None:
        print('reseting index...')
        move_data.reset_index(inplace=True)

    tids = move_data[label_tid].unique()
    move_data['isNone'] = move_data['datetime'].isnull()

    if move_data.index.name is None:
        print('creating index...')
        move_data.set_index(label_tid, inplace=True)

    drop_trajectories = []
    size = move_data.shape[0]
    count = 0
    time.time()

    move_data['delta_time'] = np.nan
    move_data['speed'] = np.nan

    for tid in progress_bar(tids):
        filter_nodes = move_data.at[tid, 'isNone']
        size_id = 1 if filter_nodes.shape == () else filter_nodes.shape[0]
        count += size_id

        y_ = move_data.at[tid, 'time'][~filter_nodes]
        if y_.shape[0] < 2:
            drop_trajectories.append(tid)
            continue

        assert np.all(
            y_[1:] >= y_[:-1]), 'time feature is not in ascending order'

        x_ = move_data.at[tid, 'distFromTrajStartToCurrPoint'][~filter_nodes]

        assert np.all(
            x_[1:] >= x_[:-1]), 'distance feature is not in ascending order'

        idx_duplicates = np.where(x_[1:] == x_[:-1])[0]
        if idx_duplicates.shape[0] > 0:
            x_ = np.delete(x_, idx_duplicates)
            y_ = np.delete(y_, idx_duplicates)

        if y_.shape[0] < 2:
            drop_trajectories.append(tid)
            continue

        delta_time = ((shift(y_.astype(np.float64), -1) - y_) / 1000.0)[:-1]
        dist_curr_to_next = (shift(x_, -1) - x_)[:-1]
        speed = (dist_curr_to_next / delta_time)[:-1]

        assert np.all(
            delta_time <= max_time_between_adj_points
        ), 'delta_time between points cannot be more than {}'.format(
            max_time_between_adj_points)
        assert np.all(
            dist_curr_to_next <= max_dist_between_adj_points
        ), 'distance between points cannot be more than {}'.format(
            max_dist_between_adj_points)
        assert np.all(
            speed <= max_speed
        ), 'speed between points cannot be more than {}'.format(max_speed)

        assert np.all(
            x_[1:] >= x_[:-1]), 'distance feature is not in ascending order'

        f_intp = interp1d(x_, y_, fill_value='extrapolate')

        x2_ = move_data.at[tid, 'distFromTrajStartToCurrPoint'][filter_nodes]
        assert np.all(x2_[1:] >= x2_[:-1]
                      ), 'distances in nodes are not in ascending order'

        intp_result = f_intp(x2_)
        assert np.all(intp_result[1:] >= intp_result[:-1]
                      ), 'resulting times are not in ascending order'

        assert ~np.isin(
            np.inf,
            intp_result), 'interpolation results with np.inf value(srs)'

        # update time features for nodes. initially they are empty.
        values = intp_result.astype(np.int64)
        feature_values_using_filter(move_data, tid, 'time', filter_nodes,
                                    values)

        values = (shift(
            move_data.at[tid, 'time'][filter_nodes].astype(np.float64),
            -1,
        ) - move_data.at[tid, 'time'][filter_nodes]) / 1000
        feature_values_using_filter(move_data, tid, 'delta_time', filter_nodes,
                                    values)

        move_data['datetime'] = None
        datetime = []
        for d in move_data['time'].values:
            data = Timestamp(int(d), unit='s', tz='America/Fortaleza')
            datetime.append(str(data)[:-6])
        move_data['datetime'] = datetime

        values = (move_data.at[tid, 'edgeDistance'][filter_nodes] /
                  move_data.at[tid, 'delta_time'][filter_nodes])
        feature_values_using_filter(move_data, tid, 'speed', filter_nodes,
                                    values)

    print(count, size)
    print('we still need to drop {} trajectories with only 1 gps point'.format(
        len(drop_trajectories)))
    move_data.reset_index(inplace=True)
    idxs_drop = move_data[move_data[label_tid].isin(
        drop_trajectories)].index.values
    print('dropping {} rows in {} trajectories with only 1 gps point'.format(
        idxs_drop.shape[0], len(drop_trajectories)))
    if idxs_drop.shape[0] > 0:
        print('shape before dropping: {}'.format(move_data.shape))
        move_data.drop(index=idxs_drop, inplace=True)
        print('shape after dropping: {}'.format(move_data.shape))

    if not inplace:
        return move_data
Example #3
0
def check_time_dist(move_data: DataFrame,
                    index_name: Optional[Text] = TID,
                    tids: Optional[Text] = None,
                    max_dist_between_adj_points: Optional[float] = 5000,
                    max_time_between_adj_points: Optional[float] = 900,
                    max_speed: Optional[float] = 30):
    """
    Used to verify that the trajectories points are in the correct order after
    map matching, considering time and distance.

    Parameters
    ----------
    move_data : dataframe
     The input trajectories data
    index_name: str, optional
     The name of the column to set as the new index during function execution,
     by default TID
    tids: array, optional
     The list of the unique keys of the index_name column, by default None
    max_dist_between_adj_points: float, optional
     The maximum distance between two adjacent points, by default 5000
    max_time_between_adj_points: float, optional
     The maximum time interval between two adjacent points, by default 900
    max_speed: float, optional
     The maximum speed between two adjacent points, by default 30

    Raises
    ------
    ValueError
        if the data is not in order
    """
    if move_data.index.name is not None:
        print('reseting index...')
        move_data.reset_index(inplace=True)

    if tids is None:
        tids = move_data[index_name].unique()

    if move_data.index.name is None:
        print('creating index...')
        move_data.set_index(index_name, inplace=True)

    move_data['isNone'] = move_data['datetime'].isnull()

    for tid in progress_bar(tids, desc='checking ascending distance and time'):
        filter_ = move_data.at[tid, 'isNone']

        # be sure that distances are in ascending order
        dists = move_data.at[tid, 'distFromTrajStartToCurrPoint'][filter_]
        if not np.all(dists[:-1] < dists[1:]):
            raise ValueError('distance feature is not in ascending order')

        # be sure that times are in ascending order
        times = move_data.at[tid, 'datetime'][filter_].astype(int)
        if not np.all(times[:-1] < times[1:]):
            raise ValueError('time feature is not in ascending order')

    count = 0

    for tid in progress_bar(
            tids, desc='checking delta_times, delta_dists and speeds'):
        filter_ = move_data.at[tid, 'isNone']

        dists = move_data.at[tid, 'distFromTrajStartToCurrPoint'][filter_]
        delta_dists = (shift(dists, -1) - dists)[:-1]

        if not np.all(delta_dists <= max_dist_between_adj_points):
            raise ValueError('delta_dists must be <= {}'.format(
                max_dist_between_adj_points))

        times = move_data.at[tid, 'datetime'][filter_].astype(int)
        delta_times = ((shift(times, -1) - times) / 1000.0)[:-1]

        if not np.all(delta_times <= max_time_between_adj_points):
            raise ValueError('delta_times must be <= {}'.format(
                max_time_between_adj_points))

        if not np.all(delta_times > 0):
            raise ValueError('delta_times must be > 0')

        speeds = delta_dists / delta_times
        if not np.all(speeds <= max_speed):
            raise ValueError('speeds > {}'.format(max_speed))

        size_id = 1 if filter_.shape == () else filter_.shape[0]
        count += size_id

    move_data.reset_index(inplace=True)
Example #4
0
    def generate_prev_local_features(
            self,
            label_id: str = TRAJ_ID,
            local_label: str = LOCAL_LABEL,
            sort: bool = True,
            inplace: bool = True) -> 'PandasDiscreteMoveDataFrame' | None:
        """
        Create a feature prev_local with the label of previous local to current point.

        Parameters
        ----------
        label_id : str, optional
            Represents name of column of trajectory id, by default TRAJ_ID
        local_label : str, optional
            Indicates name of column of place labels on symbolic trajectory,
                by default LOCAL_LABEL
        sort : bool, optional
            Wether the dataframe will be sorted, by default True
        inplace : bool, optional
            Represents whether the operation will be performed on
            the data provided or in a copy, by default True

        Returns
        -------
        PandasDiscreteMoveDataFrame
             Object with new features or None

        """
        operation = begin_operation('generate_prev_equ_feature')
        if inplace:
            data_ = self
        else:
            data_ = self.copy()

        ids, size_id, idx = self._prepare_generate_data(self, sort, label_id)

        message = '\nCreating generate_prev_equ_feature in previous equ\n'
        logger.debug(message)

        if (data_[local_label].dtype == 'int'):
            data_[local_label] = data_[local_label].astype(np.float16)
        for idx in progress_bar(ids,
                                desc=f'Generating previous {local_label}'):
            current_local = data_.at[idx, local_label]
            current_local = np.array(current_local)
            size_id = current_local.size

            if size_id <= 1:
                data_.at[idx, PREV_LOCAL] = np.nan

            else:
                prev_local = shift(current_local, 1)

                # previous to current point
                data_.at[idx, PREV_LOCAL] = prev_local

        data_.reset_index(inplace=True)
        data_.last_operation = end_operation(operation)

        if not inplace:
            return data_
Example #5
0
def interpolate_add_deltatime_speed_features(
    move_data,
    label_id="tid",
    max_time_between_adj_points=900,
    max_dist_between_adj_points=5000,
    max_speed=30,
    inplace=True,
):
    """Use to interpolate distances (x) to find times (y).
     Parameters
    ----------
    move_data : dataframe
       The input trajectories data
    label_id: String, optional("tid" by default)
        The name of the column to set as the new index during function execution. Indicates the tid column.
    max_dist_between_adj_points: double, optional(5000 by default)
        The maximum distance between two adjacent points. Used only for verification.
    max_time_between_adj_points: double, optional(900 by default)
        The maximum time interval between two adjacent points. Used only for verification.
    max_speed: double, optional(30 by default)
        The maximum speed between two adjacent points. Used only for verification.
    inplace: boolean, optional(True by default)
        if set to true the original dataframe will be altered,
        otherwise the alteration will be made in a copy, that will be returned.

    Returns
    -------
        move_data : dataframe
            A copy of the original dataframe, with the alterations done by the function. (When inplace is False)
        None
            When inplace is True
    """

    if not inplace:
        move_data = PandasMoveDataFrame(data=move_data.to_DataFrame())

    if TID not in move_data:
        move_data.generate_tid_based_on_id_datatime()

    if move_data.index.name is not None:
        print("reseting index...")
        move_data.reset_index(inplace=True)

    tids = move_data[label_id].unique()
    # tids = [2]

    if move_data.index.name is None:
        print("creating index...")
        move_data.set_index(label_id, inplace=True)

    drop_trajectories = []
    size = move_data.shape[0]
    count = 0
    time.time()

    move_data["delta_time"] = np.nan
    move_data["speed"] = np.nan

    try:
        for tid in progress_bar(tids):
            filter_nodes = move_data.at[tid, "isNode"] == 1
            size_id = 1 if filter_nodes.shape == () else filter_nodes.shape[0]
            count += size_id

            # y - time of snapped points
            y_ = move_data.at[tid, "time"][~filter_nodes]
            if y_.shape[0] < 2:
                # print("traj: {} - insuficient points ({}) for interpolation.
                # adding to drop list...".format(tid,  y_.shape[0]))
                drop_trajectories.append(tid)
                continue

            assert np.all(
                y_[1:] >= y_[:-1]), "time feature is not in ascending order"

            # x - distance from traj start to snapped points
            x_ = move_data.at[tid,
                              "distFromTrajStartToCurrPoint"][~filter_nodes]

            assert np.all(x_[1:] >= x_[:-1]
                          ), "distance feature is not in ascending order"

            # remove duplicates in distances to avoid np.inf in future interpolation results
            idx_duplicates = np.where(x_[1:] == x_[:-1])[0]
            if idx_duplicates.shape[0] > 0:
                x_ = np.delete(x_, idx_duplicates)
                y_ = np.delete(y_, idx_duplicates)

            if y_.shape[0] < 2:
                # print("traj: {} - insuficient points ({}) for interpolation.
                # adding to drop list...".format(tid,  y_.shape[0]))
                drop_trajectories.append(tid)
                continue

            # compute delta_time and distance between points
            # values = (ut.shift(move_data.at[tid, "time"][filter_nodes].astype(np.float64), -1)
            # - move_data.at[tid, "time"][filter_nodes]) / 1000
            # ut.change_move_datafeature_values_using_filter(move_data, tid, "delta_time", filter_nodes, values)
            delta_time = ((shift(y_.astype(np.float64), -1) - y_) /
                          1000.0)[:-1]
            dist_curr_to_next = (shift(x_, -1) - x_)[:-1]
            speed = (dist_curr_to_next / delta_time)[:-1]

            assert np.all(
                delta_time <= max_time_between_adj_points
            ), "delta_time between points cannot be more than {}".format(
                max_time_between_adj_points)
            assert np.all(
                dist_curr_to_next <= max_dist_between_adj_points
            ), "distance between points cannot be more than {}".format(
                max_dist_between_adj_points)
            assert np.all(
                speed <= max_speed
            ), "speed between points cannot be more than {}".format(max_speed)

            assert np.all(x_[1:] >= x_[:-1]
                          ), "distance feature is not in ascending order"

            f_intp = interp1d(x_, y_, fill_value="extrapolate")

            x2_ = move_data.at[tid,
                               "distFromTrajStartToCurrPoint"][filter_nodes]
            assert np.all(x2_[1:] >= x2_[:-1]
                          ), "distances in nodes are not in ascending order"

            intp_result = f_intp(x2_)  # .astype(np.int64)
            assert np.all(intp_result[1:] >= intp_result[:-1]
                          ), "resulting times are not in ascending order"

            assert ~np.isin(
                np.inf,
                intp_result), "interpolation results with np.inf value(s)"

            # update time features for nodes. initially they are empty.
            values = intp_result.astype(np.int64)
            feature_values_using_filter(move_data, tid, "time", filter_nodes,
                                        values)

            # create delta_time feature
            values = (shift(
                move_data.at[tid, "time"][filter_nodes].astype(np.float64),
                -1,
            ) - move_data.at[tid, "time"][filter_nodes]) / 1000
            feature_values_using_filter(move_data, tid, "delta_time",
                                        filter_nodes, values)

            # create speed feature
            values = (move_data.at[tid, "edgeDistance"][filter_nodes] /
                      move_data.at[tid, "delta_time"][filter_nodes])
            feature_values_using_filter(move_data, tid, "speed", filter_nodes,
                                        values)

    except Exception as e:
        raise e

    print(count, size)
    print("we still need to drop {} trajectories with only 1 gps point".format(
        len(drop_trajectories)))
    move_data.reset_index(inplace=True)
    idxs_drop = move_data[move_data[label_id].isin(
        drop_trajectories)].index.values
    print("dropping {} rows in {} trajectories with only 1 gps point".format(
        idxs_drop.shape[0], len(drop_trajectories)))
    if idxs_drop.shape[0] > 0:
        print("shape before dropping: {}".format(move_data.shape))
        move_data.drop(index=idxs_drop, inplace=True)
        print("shape after dropping: {}".format(move_data.shape))

    if not inplace:
        return move_data
Example #6
0
def check_time_dist(
    move_data,
    index_name="tid",
    tids=None,
    max_dist_between_adj_points=5000,
    max_time_between_adj_points=900,
    max_speed=30,
    inplace=True,
):
    """
    Used to verify that the trajectories points are in the correct order after
    map matching, considering time and distance.

    Parameters
    ----------
    move_data : dataframe
       The input trajectories data
    index_name: String, optional("tid" by default)
        The name of the column to set as the new index during function execution. Indicates the tid column.
    tids: array, optional(None by default)
        The list of the unique keys of the index_name column.
    max_dist_between_adj_points: double, optional(5000 by default)
        The maximum distance between two adjacent points.
    max_time_between_adj_points: double, optional(900 by default)
        The maximum time interval between two adjacent points.
    max_speed: double, optional(30 by default)
        The maximum speed between two adjacent points.
    inplace: boolean, optional(True by default)
        if set to true the original dataframe will be altered,
        otherwise the alteration will be made in a copy, that will be returned.

    Returns
    -------
        move_data : dataframe
            A copy of the original dataframe, with the alterations done by the function. (When inplace is False)
        None
            When inplace is True
    """

    if not inplace:
        move_data = PandasMoveDataFrame(data=move_data.to_DataFrame())

    try:
        if TID not in move_data:
            move_data.generate_tid_based_on_id_datatime()

        if move_data.index.name is not None:
            print("reseting index...")
            move_data.reset_index(inplace=True)

        if tids is None:
            tids = move_data[index_name].unique()

        if move_data.index.name is None:
            print("creating index...")
            move_data.set_index(index_name, inplace=True)

        for tid in progress_bar(tids,
                                desc="checking ascending distance and time"):
            filter_ = move_data.at[tid, "isNode"] != 1

            # be sure that distances are in ascending order
            dists = move_data.at[tid, "distFromTrajStartToCurrPoint"][filter_]
            assert np.all(dists[:-1] < dists[1:]
                          ), "distance feature is not in ascending order"

            # be sure that times are in ascending order
            times = move_data.at[tid, "time"][filter_].astype(np.float64)
            assert np.all(times[:-1] < times[1:]
                          ), "time feature is not in ascending order"

        count = 0

        for tid in progress_bar(
                tids, desc="checking delta_times, delta_dists and speeds"):
            filter_ = move_data.at[tid, "isNode"] != 1

            dists = move_data.at[tid, "distFromTrajStartToCurrPoint"][filter_]
            delta_dists = (shift(dists, -1) -
                           dists)[:-1]  # do not use last element (np.nan)

            assert np.all(
                delta_dists <= max_dist_between_adj_points
            ), "delta_dists must be <= {}".format(max_dist_between_adj_points)

            times = move_data.at[tid, "time"][filter_].astype(np.float64)
            delta_times = ((shift(times, -1) - times) /
                           1000.0)[:-1]  # do not use last element (np.nan)

            assert np.all(
                delta_times <= max_time_between_adj_points
            ), "delta_times must be <= {}".format(max_time_between_adj_points)

            assert np.all(delta_times > 0), "delta_times must be > 0"

            assert np.all(delta_dists > 0), "delta_dists must be > 0"

            speeds = delta_dists / delta_times
            assert np.all(speeds <= max_speed), "speeds > {}".format(max_speed)

            size_id = 1 if filter_.shape == () else filter_.shape[0]
            count += size_id

        move_data.reset_index(inplace=True)
        if not inplace:
            return move_data

    except Exception as e:
        raise e