Ejemplo n.º 1
0
    def _bounding_boxes_greedy_(pd_trajectory, duration_min, diam_max1,
                                duration_max1, diam_max2):
        """ Takes pandas data frame of trajectory data and splits it so that each
        frame contains consequentive data where location is contained within
        a bounding box. Greedy approach is used where a new bounding box is created if the a trajectory point
        does not satisfy any of the bounding box constraints specified by the parameters.
        :params pd_trajectory: pandas frame with trajectory data
        :params duration_min:  float, skip trajectories whose duration is below this value
        :params diam_max1:  float, maximum diameter of first resulting bounding box
        :params duration_max1:  float, maximum duration of first resulting bounding box
        :params diam_max2:  float, maximum diameter of second resulting bounding box
        """
        trajectories = []

        if len(pd_trajectory) == 0:
            return trajectories

        start_idx = 0  # start of current segment
        time_min = pd_trajectory.iloc[0]['time']
        time_max = time_min
        lat_min = pd_trajectory.iloc[0]['latitude']
        lat_max = lat_min
        long_min = pd_trajectory.iloc[0]['longitude']
        long_max = long_min
        idx = 1
        while idx < len(pd_trajectory):
            time, latitude, longitude = pd_trajectory.iloc[
                idx, :].time, pd_trajectory.iloc[
                    idx, :].latitude, pd_trajectory.iloc[idx, :].longitude
            time_min = min(time_min, time)
            time_max = max(time_max, time)
            lat_min = min(lat_min, latitude)
            lat_max = max(lat_max, latitude)
            long_min = min(long_min, longitude)
            long_max = max(long_max, longitude)
            if ((haversine_distance(lat_min, long_min, lat_max, long_max) >
                 diam_max1 and time_max - time_min <= duration_max1) or
                (haversine_distance(lat_min, long_min, lat_max, long_max) >
                 diam_max2 and time_max - time_min > duration_max1)):
                # segment found

                # Skip segment if duration is too short
                if time_max - time_min < duration_min:
                    logger.warning(
                        f"Skipping trajectory segment due to short duration {time_max-time_min}"
                    )
                else:
                    trajectories.append(pd_trajectory.iloc[start_idx:idx, :])

                start_idx = idx
                time_min, time_max = time, time
                lat_min, lat_max = latitude, latitude
                long_min, long_max = longitude, longitude
            idx += 1
        if start_idx < len(pd_trajectory) - 1:
            trajectories.append(pd_trajectory.iloc[start_idx:, :])
        return trajectories
Ejemplo n.º 2
0
def conseq_distance_meters(frame):
    '''Frame[col=["latitude", "longitude", ...]] -> distances of conseq rows'''
    lat, lon = frame['latitude'].to_numpy(), frame['longitude'].to_numpy()
    return np.array([
        haversine_distance(lat[i], lon[i], lat[i + 1], lon[i + 1])
        for i in range(len(lat) - 1)
    ])
Ejemplo n.º 3
0
 def _diam_(pd_trajectory):
     """ Computes the diameter of a trajectory """
     lat_min = pd_trajectory['latitude'].min()
     lat_max = pd_trajectory['latitude'].max()
     long_min = pd_trajectory['longitude'].min()
     long_max = pd_trajectory['longitude'].max()
     return haversine_distance(lat_min, long_min, lat_max, long_max)
Ejemplo n.º 4
0
 def _find_sequence_startpoints(self, allowed_jump, hard_time_gap):
     """ Returns ordered list of indices with time sequence start points. A
     time sequence satisfies that there are no two time points further than
     hard_time_gap away. Furthermore in a time gap more than soft_time_gap,
     the travelled distance must be less than allowed_jump to keep a connected
     time sequence (otherwise we add a break point)
     Note that index 0 is always contained as a start point.
     """
     time_steps = np.diff(self.data[:, 0])
     sequence_startpoints = [0]
     mask = [True for i in range(self.get_n_time_stamps())]
     break_due_to_time = []
     for i in range(self.get_n_time_stamps() - 1):
         if time_steps[i] >= hard_time_gap:
             continue
         distance_jumped = haversine_distance(self.data[i, 1], self.data[i,
                                                                         2],
                                              self.data[i + 1, 1],
                                              self.data[i + 1, 2])
         if distance_jumped > allowed_jump:
             continue
         mask[i + 1] = False
     sequence_startpoints.extend(
         [i for i, val in enumerate(mask) if (val and i != 0)])
     return sequence_startpoints
Ejemplo n.º 5
0
def convolution_filtre(path_1, path_2, timestep, weight_dist_max, weight_dist_min,
                       weight_min_val, filtre_size):

    distance = 0
    count_weights = 0
    total_accuracy = 0
    # loop on nearby time-steps
    for ti in range(timestep - int(filtre_size/2), timestep + int(filtre_size/2) + 1):
        # there is no padding, so if we encounter values outside of boundaries, we pass
        if ti < 0 or ti >= path_1.shape[0] or ti >= path_2.shape[0]:
            continue
        # similarly we pass if there are some undefined values
        if path_1[ti,0]==0 or path_1[ti,1]==0 or path_2[ti,0]==0 or path_2[ti,1]==0:
            continue
        # compute weights based on accuracy
        w1 = weight_accuracy(path_1[ti,2], weight_dist_max, weight_dist_min, weight_min_val)
        w2 = weight_accuracy(path_2[ti,2], weight_dist_max, weight_dist_min, weight_min_val)
        # try to replace with nearby values if strong innacuracy
        ti_path_1 = ti
        ti_path_2 = ti
        # check for w1
        if w1==weight_min_val:
            if ti>=1:
                w1_new = weight_accuracy(path_1[ti-1,2], weight_dist_max, weight_dist_min, weight_min_val)
                if w1_new>w1 and path_1[ti-1,0]!=0 and path_1[ti-1,1]!=0:
                    w1 = w1_new
                    ti_path_1 = ti - 1
            if ti<path_1.shape[0]-1:
                w1_new = weight_accuracy(path_1[ti+1,2], weight_dist_max, weight_dist_min, weight_min_val)
                if w1_new>w1 and path_1[ti+1,0]!=0 and path_1[ti+1,1]!=0:
                    w1 = w1_new
                    ti_path_1 = ti + 1
        # check for w2
        if w2==weight_min_val:
            if ti>1:
                w2_new = weight_accuracy(path_2[ti-1,2], weight_dist_max, weight_dist_min, weight_min_val)
                if w2_new>w2 and path_2[ti-1,0]!=0 and path_2[ti-1,1]!=0:
                    w2 = w2_new
                    ti_path_2 = ti - 1
            if ti<path_2.shape[0]-1:
                w2_new =  weight_accuracy(path_2[ti+1,2], weight_dist_max, weight_dist_min, weight_min_val)
                if w2_new>w2 and path_2[ti+1,0]!=0 and path_2[ti+1,1]!=0:
                    w2 = w2_new
                    ti_path_2 = ti + 1
        # compute weights and distances
        count_weights += w1 * w2
        distance += w1*w2*haversine_distance(path_1[ti_path_1,0],path_1[ti_path_1,1],path_2[ti_path_2,0],path_2[ti_path_2,1])
        total_accuracy += w1*w2*(path_1[ti_path_1,2] + path_2[ti_path_2,2])
    # return distance estimations
    if count_weights != 0:
        dist_estimate = distance/count_weights
        dist_min = (distance-total_accuracy)/count_weights
        dist_max = (distance+total_accuracy)/count_weights
        return (dist_estimate, max(0,dist_min), dist_max)
    else:
        return (1e9,1e9,1e9)
Ejemplo n.º 6
0
def add_distance_to_df(df):
    """ Adds distance as a column to the supplied pd.DataFrame"""
    lats1 = np.array(df["latitude"].iloc[:-1].values, dtype=np.float64)
    lats2 = np.array(df["latitude"].iloc[1:].values, dtype=np.float64)
    lons1 = np.array(df["longitude"].iloc[:-1].values, dtype=np.float64)
    lons2 = np.array(df["longitude"].iloc[1:].values, dtype=np.float64)
    distance = [0]
    for lat1, lat2, lon1, lon2 in zip(lats1, lats2, lons1, lons2):
        distance.append(
            haversine_distance(lat1=lat1, lon1=lon1, lat2=lat2, lon2=lon2))
    df.insert(len(df.columns), "distance", distance)
Ejemplo n.º 7
0
def get_max_dist_meters(frame):
    '''Frame[col=["latitude", "longitude", ...]] -> max distance of rows'''
    if not len(frame): return 0

    res = 0
    lat, lon = frame['latitude'].to_numpy(), frame['longitude'].to_numpy()
    for i, (ai, oi) in enumerate(zip(lat, lon)):
        for (aj, oj) in zip(lat[i + 1:], lon[i + 1:]):
            res = max(res, haversine_distance(ai, oi, aj, oj))

    return res
Ejemplo n.º 8
0
def add_mode_of_transport_to_df(df,
                                stop_duration_threshold=30,
                                distance_threshold=30,
                                pt_search_radius=10):
    """ Adds transport mode as a column to the supplied pd.DataFrame"""

    transport = [mode_of_transport_from_speed(0)]

    center_point = df.iloc[0]
    last_point = df.iloc[0]

    potential_still_points = []
    potential_move_points = []

    stop_points = []
    stop_duration = 0

    for index, current_point in df.iloc[1:].iterrows():

        time_since_last_point = current_point.timefrom - last_point.timeto

        distance_from_center = haversine_distance(lat1=center_point.latitude,
                                                  lon1=center_point.longitude,
                                                  lat2=current_point.latitude,
                                                  lon2=current_point.longitude)

        predicted = mode_of_transport_from_speed(current_point.speed * 3.6)
        potential_move_points.append(predicted)

        if distance_from_center < distance_threshold:
            potential_still_points.append(_TRANSPORT_TYPES[0])
            stop_duration += time_since_last_point

        else:
            if stop_duration > stop_duration_threshold:
                stop_points.append(
                    (index - len(potential_still_points), index))
                potential_still_points.append(_TRANSPORT_TYPES[0])
                transport.extend(potential_still_points)
            else:
                transport.extend(potential_move_points)

            potential_move_points = []
            potential_still_points = []
            center_point = current_point
            stop_duration = 0

        last_point = current_point

    transport.extend(potential_still_points)

    df.insert(len(df.columns), "transport", transport)
Ejemplo n.º 9
0
 def inspect(self, allowed_jump, time_gap):
     """
     Method for inspecting given trajectory data. Outputs:
         - time span
         - all 'gaps' in the data where either the time gap is surpassed
         or the distance moved is more than allowed_jump
     """
     print("\n")
     self.__str__()
     if self._empty_():
         print("Data array is empty - no inspection possible")
         return
     print(
         "Data covers period {0} - {1}         Time Delta = {2} (h,m,s)        n_timestamps = {3}"
         .format(
             datetime.utcfromtimestamp(
                 self.get_min_time()).strftime('%Y-%m-%d %H:%M:%S'),
             datetime.utcfromtimestamp(
                 self.get_max_time()).strftime('%Y-%m-%d %H:%M:%S'),
             convert_seconds(self.get_max_time() - self.get_min_time()),
             self.get_n_time_stamps()))
     print("GPS Gaps:")
     time_gap_s = time_gap * 60 * 60
     gaps = self._find_sequence_startpoints(allowed_jump, time_gap_s)[1:]
     for s, gap in enumerate(gaps):
         distance = haversine_distance(self.data[gap - 1,
                                                 1], self.data[gap - 1, 2],
                                       self.data[gap, 1], self.data[gap, 2])
         print(
             " * {0}   -   {1}        Time Delta = {2} (h,m,s)       Distance {3}m       GPS accuracy {4}m"
             .format(
                 datetime.utcfromtimestamp(
                     self.data[gap - 1, 0]).strftime('%Y-%m-%d %H:%M:%S'),
                 datetime.utcfromtimestamp(
                     self.data[gap, 0]).strftime('%Y-%m-%d %H:%M:%S'),
                 convert_seconds(self.data[gap, 0] - self.data[gap - 1, 0]),
                 round(distance),
                 max(self.data[gap - 1, 3], self.data[gap, 3])))
Ejemplo n.º 10
0
def load_azure_data(query,
                    outlier_threshold=100,
                    include_attributes=_DEFAULT_INCLUDE_ATTRIBUTES,
                    dt_threshold=None,
                    dx_threshold=None):
    """ Loads data from the Azure database and returns a dictionary of
    uuids and user events.

    dt_threshold is None or number. None keeps original data. With number
    data is filter such that 2 conseq events are at least dt_threshold
    apart. NOTE: dt_threshold value is in seconds

    dx_threshold is None or number. None keeps original data. With number
    a distance threshold (in meters) in data is applied such that kepts
    consecutive events have distance > dx_threshold.
    """

    with timer("db connect"):
        db = connect_to_azure_database()

    db_func = re.search('(FROM|from) (\w*)', query).group(2)
    with timer(f"db query {db_func}"):
        df = pd.read_sql(
            query,
            con=db,
            parse_dates=["timeto", "timefrom"],
        )
    db.close()

    df = df.sort_values(by='timefrom')
    df = df.reset_index(drop=True)

    # Time coarse
    if dt_threshold is not None:
        # NOTE: here we get timestamps and timedelta as their diff so convert
        # threshold for comparison
        assert dt_threshold > 0
        dt_threshold = timedelta(days=0, seconds=dt_threshold)
        # Setup for recreating a valid (with correct columns) but empty
        # frame
        keys = list(df.keys())
        df = df[sparsify_mask(df['timefrom'], dt_threshold)]

        if not len(df):
            print('GPS time coarsening yielded empty frame')
            df = pd.DataFrame(columns=keys)

    # Space coarsen
    if dx_threshold is not None:
        assert dx_threshold > 0
        position = np.c_[df['latitude'].to_numpy(), df['longitude'].to_numpy()]
        # Inside filter we want to get distance between rows x, y
        distance = lambda x, y: haversine_distance(x[0], x[1], y[0], y[1])

        keys = list(df.keys())
        df = df[sparsify_mask(position,
                              threshold=dx_threshold,
                              distance=distance)]

        if not len(df):
            print('GPS distance coarsening yielded empty frame')
            df = pd.DataFrame(columns=keys)

    df = df.loc[:, include_attributes]

    data_dict = {}

    for uuid in df.uuid.unique():
        user_data = df.loc[df["uuid"] == uuid]
        data_dict[uuid.lower()] = process_data_frame(user_data,
                                                     outlier_threshold)

    return data_dict
Ejemplo n.º 11
0
 def sqm(self) -> float:
     return haversine_distance(self.minlat, self.minlon, self.maxlat, self.minlon) * \
            haversine_distance(self.minlat, self.minlon, self.minlat, self.maxlon)