def process(self, point):
     if self._prev_point is None:
         self._prev_point = point
         return
     
     # if there is no hypothesis, lets see if we far away from the existing endpoint
     if self._hypothesis_point is None:
         if not self._endpoint_exists(self._prev_point):
             self._hypothesis_point = self._prev_point
         else:
             self._prev_point = point
             return
     
     ds = distance(self._hypothesis_point, self._prev_point)
     if ds < self._stationary_distance_threshold:
         dt = delta_float_time(self._prev_point[0], point[0])
         # if we time contribution is large, then shift hypothesis point to the most recent onec
         if dt > self._current_cumulative_dt:
             self._hypothesis_point = self._prev_point
         self._current_cumulative_dt += dt
         if self._current_cumulative_dt > self._stationary_threshold:
             self._endpoints.append(self._hypothesis_point)
             self._hypothesis_point = None
             self._current_cumulative_dt = 0
     else:
         # moved too far from hypothesis point, reseting
         self._hypothesis_point = None
         self._current_cumulative_dt = 0
     self._prev_point = point
def trajectory_point_to_str(data, index, with_address=True):
    coords = "%s, %s" % tuple(data[index][1:])
    if with_address:
        geocoder = Geocoder()
        address = geocoder.reverse(coords, exactly_one = True).address
    else:
        address = None
    tz = pytz.timezone('US/Pacific')
    date = num2date(data[index][0], tz=tz)
    try:
        dt = (num2date(data[index+1][0]) - date).total_seconds()
        dist = distance(data[index], data[index+1])
        v = ms_to_mph*dist/dt if dt!=0 else 0
        if dt < 60:
            dt_str = "%ds" % dt
        elif dt < 60*60:
            dt_str = "%dmin" % (dt/60,)
        else:
            dt_str = "%.1fh" % (dt/60/60,)
        metrics = "%s; %.2fm; %.fmph" % (dt_str, dist, v)
    except IndexError:
        metrics = "NO DATA"

    return "Index:%s; Date:%s; Address:%s; Coords: %s; dt,ds,v:%s" % \
        (index, date, address, coords, metrics)
    def process(self, point):
        if self._from_endpoint_index is None:
            index = self._get_closest_endpoint_index(point)
            if index is not None:
                self._start_route(point, index)
            return
        else:
            dt = delta_float_time(self._current_route[-1][0], point[0])
            if dt > self._continuity_threshold:
                if self._verbose:
                    dist = distance(point, self._endpoints[self._from_endpoint_index])
                    print('Continuity ruined from %s\n    to %s\n   dt=%smin, dist=%s' %
                          (trajectory_point_to_str([self._current_route[-1]], 0),
                           trajectory_point_to_str([point], 0),
                           dt/60., dist))
                index = self._get_closest_endpoint_index(point)
                if index == self._from_endpoint_index:
                    if self._verbose:
                        print(' !BUT Didnt move too far from beginning though')
                    self._stop_route()
                    self._start_route(point, index)
                else:
                    self._stop_route()
                return
            
            self._current_route.append(point)

            index = self._get_closest_endpoint_index(point)
            if index == self._from_endpoint_index:
                if self._verbose:
                    print('made a loop or didnt move')
                self._stop_route()
                self._start_route(point, index)
            elif index == self._to_endpoint_index():
                if self._from_endpoint_index == 0:
                    self._AtoB_routes.append(np.array(self._current_route))
                    if self._verbose:
                        print('A to B found: %s TO %s' %
                              (trajectory_point_to_str(self._current_route, 0),
                               trajectory_point_to_str(self._current_route, len(self._current_route)-1)))
                    self._stop_route()
                    
                else:
                    self._BtoA_routes.append(np.array(self._current_route))
                    if self._verbose:
                        print('B to A found: %s TO %s' %
                              (trajectory_point_to_str(self._current_route, 0),
                               trajectory_point_to_str(self._current_route, len(self._current_route)-1)))
                    self._stop_route()
def test_delta_dist():
    data_points = [
        [0, 32.936004, -117.23537],
        [0, 32.934912, -117.236338],
        [0, 32.935667, -117.235796],
        [0, 32.935667, -117.235796],
        [0, 32.936034, -117.23537],
    ]
    
    ds = []
    for i in range(len(data_points) - 1):
        ds.append(distance(data_points[i], data_points[i+1]))

    expected_ds = [151.20243391843636, 97.87941457631524, 0.0, 56.95460850285275]
    np.testing.assert_array_almost_equal(expected_ds, ds)
def test_remove_stationary_noise():
    """
    The data has large amount of noise - switching between SD and LA every 10 seconds.
    It starts from SD, then noise, later it returns to SD. We test that LA is ignored
    """
    data = remove_duplicate_points(_get_test_data())[561:576]

    fixed_data = apply_filter(data, VelocityOutliersFilter())
    print(len(fixed_data))
    assert len(fixed_data) == 11

    stationary_point = [0, 33.004964, -117.060207]
    distances = np.array([distance(stationary_point, d) for d in fixed_data])

    assert (distances < 246.6).all()
   def allow(self, current_p, next_p):    
       dist = distance(current_p, next_p)
       dt = delta_float_time(current_p[0], next_p[0])
       v = ms_to_mph*dist/dt
       if v > self._speed_threshold:
           if self._outliers_counter > 0:
               self._outliers_counter -= 1
               return False
 
       if dist > self._distance_threshold:
           if self._outliers_counter > 0:
               self._outliers_counter -= 1
               return False
       self._outliers_counter = self._max_number_outliers
       
       return True
def test_remove_stationary_noise_return_to_stable():
    """
    The data has large amount of noise - switching between SD and LA every 10 seconds.
    It starts from the noisy point, later it returns to SD.
    Here we test that even if data starts with noisy value, we still converge
    to stable point
    """
    data = remove_duplicate_points(_get_test_data())[563:576]

    fixed_data = apply_filter(data, VelocityOutliersFilter(85))

    stationary_point = [0, 33.004964, -117.060207]
    distances = np.array([distance(stationary_point, d) for d in fixed_data])

    print(fixed_data)
    assert len(fixed_data) == 7
    # filter converged after few steps
    assert (distances[:4] > 157000).all()
    assert (distances[4:] < 246.6).all()
def find_endpoints_batch(data):
    # get delta time array in seconds
    delta_time = extract_delta_time(data) 

    # get indices on the trajectory where we spend a lot of time still
    stationary_threshold = (60*60) * 3  # hours
    stationary_points = np.where(delta_time>stationary_threshold)[0]
    
    # filter out stationary points that are driving-distance (1km) close to each other
    is_index_close = lambda index1, index2: distance(data[index1], data[index2]) < 1000

    unique_locations = [stationary_points[0]]
    for s in stationary_points:
        candidates = [u for u in unique_locations
                      if is_index_close(s, u)]
        # location is unique if no candidates found
        if len(candidates) == 0:
            unique_locations.append(s)

    return unique_locations
 def _get_closest_endpoint_index(self, point):
     for i in range(len(self._endpoints)):
         if distance(point, self._endpoints[i]) < self._distance_to_start_route:
             return i
     return None
 def _endpoint_exists(self, endpoint):
     for e in self._endpoints:
         if distance(endpoint, e) < self._endpoints_distance:
             return True
     return False
def extract_delta_dist(data):
    return np.array([distance(data[i+1], data[i])
                     for i in range(data.shape[0]-1)])