def process(self, point): if self._prev_point is None: self._prev_point = point return # if there is no hypothesis, lets see if we far away from the existing endpoint if self._hypothesis_point is None: if not self._endpoint_exists(self._prev_point): self._hypothesis_point = self._prev_point else: self._prev_point = point return ds = distance(self._hypothesis_point, self._prev_point) if ds < self._stationary_distance_threshold: dt = delta_float_time(self._prev_point[0], point[0]) # if we time contribution is large, then shift hypothesis point to the most recent onec if dt > self._current_cumulative_dt: self._hypothesis_point = self._prev_point self._current_cumulative_dt += dt if self._current_cumulative_dt > self._stationary_threshold: self._endpoints.append(self._hypothesis_point) self._hypothesis_point = None self._current_cumulative_dt = 0 else: # moved too far from hypothesis point, reseting self._hypothesis_point = None self._current_cumulative_dt = 0 self._prev_point = point
def trajectory_point_to_str(data, index, with_address=True): coords = "%s, %s" % tuple(data[index][1:]) if with_address: geocoder = Geocoder() address = geocoder.reverse(coords, exactly_one = True).address else: address = None tz = pytz.timezone('US/Pacific') date = num2date(data[index][0], tz=tz) try: dt = (num2date(data[index+1][0]) - date).total_seconds() dist = distance(data[index], data[index+1]) v = ms_to_mph*dist/dt if dt!=0 else 0 if dt < 60: dt_str = "%ds" % dt elif dt < 60*60: dt_str = "%dmin" % (dt/60,) else: dt_str = "%.1fh" % (dt/60/60,) metrics = "%s; %.2fm; %.fmph" % (dt_str, dist, v) except IndexError: metrics = "NO DATA" return "Index:%s; Date:%s; Address:%s; Coords: %s; dt,ds,v:%s" % \ (index, date, address, coords, metrics)
def process(self, point): if self._from_endpoint_index is None: index = self._get_closest_endpoint_index(point) if index is not None: self._start_route(point, index) return else: dt = delta_float_time(self._current_route[-1][0], point[0]) if dt > self._continuity_threshold: if self._verbose: dist = distance(point, self._endpoints[self._from_endpoint_index]) print('Continuity ruined from %s\n to %s\n dt=%smin, dist=%s' % (trajectory_point_to_str([self._current_route[-1]], 0), trajectory_point_to_str([point], 0), dt/60., dist)) index = self._get_closest_endpoint_index(point) if index == self._from_endpoint_index: if self._verbose: print(' !BUT Didnt move too far from beginning though') self._stop_route() self._start_route(point, index) else: self._stop_route() return self._current_route.append(point) index = self._get_closest_endpoint_index(point) if index == self._from_endpoint_index: if self._verbose: print('made a loop or didnt move') self._stop_route() self._start_route(point, index) elif index == self._to_endpoint_index(): if self._from_endpoint_index == 0: self._AtoB_routes.append(np.array(self._current_route)) if self._verbose: print('A to B found: %s TO %s' % (trajectory_point_to_str(self._current_route, 0), trajectory_point_to_str(self._current_route, len(self._current_route)-1))) self._stop_route() else: self._BtoA_routes.append(np.array(self._current_route)) if self._verbose: print('B to A found: %s TO %s' % (trajectory_point_to_str(self._current_route, 0), trajectory_point_to_str(self._current_route, len(self._current_route)-1))) self._stop_route()
def test_delta_dist(): data_points = [ [0, 32.936004, -117.23537], [0, 32.934912, -117.236338], [0, 32.935667, -117.235796], [0, 32.935667, -117.235796], [0, 32.936034, -117.23537], ] ds = [] for i in range(len(data_points) - 1): ds.append(distance(data_points[i], data_points[i+1])) expected_ds = [151.20243391843636, 97.87941457631524, 0.0, 56.95460850285275] np.testing.assert_array_almost_equal(expected_ds, ds)
def test_remove_stationary_noise(): """ The data has large amount of noise - switching between SD and LA every 10 seconds. It starts from SD, then noise, later it returns to SD. We test that LA is ignored """ data = remove_duplicate_points(_get_test_data())[561:576] fixed_data = apply_filter(data, VelocityOutliersFilter()) print(len(fixed_data)) assert len(fixed_data) == 11 stationary_point = [0, 33.004964, -117.060207] distances = np.array([distance(stationary_point, d) for d in fixed_data]) assert (distances < 246.6).all()
def allow(self, current_p, next_p): dist = distance(current_p, next_p) dt = delta_float_time(current_p[0], next_p[0]) v = ms_to_mph*dist/dt if v > self._speed_threshold: if self._outliers_counter > 0: self._outliers_counter -= 1 return False if dist > self._distance_threshold: if self._outliers_counter > 0: self._outliers_counter -= 1 return False self._outliers_counter = self._max_number_outliers return True
def test_remove_stationary_noise_return_to_stable(): """ The data has large amount of noise - switching between SD and LA every 10 seconds. It starts from the noisy point, later it returns to SD. Here we test that even if data starts with noisy value, we still converge to stable point """ data = remove_duplicate_points(_get_test_data())[563:576] fixed_data = apply_filter(data, VelocityOutliersFilter(85)) stationary_point = [0, 33.004964, -117.060207] distances = np.array([distance(stationary_point, d) for d in fixed_data]) print(fixed_data) assert len(fixed_data) == 7 # filter converged after few steps assert (distances[:4] > 157000).all() assert (distances[4:] < 246.6).all()
def find_endpoints_batch(data): # get delta time array in seconds delta_time = extract_delta_time(data) # get indices on the trajectory where we spend a lot of time still stationary_threshold = (60*60) * 3 # hours stationary_points = np.where(delta_time>stationary_threshold)[0] # filter out stationary points that are driving-distance (1km) close to each other is_index_close = lambda index1, index2: distance(data[index1], data[index2]) < 1000 unique_locations = [stationary_points[0]] for s in stationary_points: candidates = [u for u in unique_locations if is_index_close(s, u)] # location is unique if no candidates found if len(candidates) == 0: unique_locations.append(s) return unique_locations
def _get_closest_endpoint_index(self, point): for i in range(len(self._endpoints)): if distance(point, self._endpoints[i]) < self._distance_to_start_route: return i return None
def _endpoint_exists(self, endpoint): for e in self._endpoints: if distance(endpoint, e) < self._endpoints_distance: return True return False
def extract_delta_dist(data): return np.array([distance(data[i+1], data[i]) for i in range(data.shape[0]-1)])