예제 #1
0
파일: geocode.py 프로젝트: amjadm61/ichnaea
    def region(self, lat, lon):
        """
        Return a region code matching the provided position.
        If the position is not found inside any region return None.
        """
        # Look up point in RTree of buffered region envelopes.
        # This is a coarse-grained but very fast match.
        point = geometry.Point(lon, lat)
        codes = [self._tree_ids[id_] for id_ in
                 self._tree.intersection(point.bounds)]

        if not codes:
            return None

        # match point against the buffered polygon shapes
        buffered_codes = [code for code in codes
                          if self._buffered_shapes[code].contains(point)]
        if len(buffered_codes) < 2:
            return buffered_codes[0] if buffered_codes else None

        # match point against the precise polygon shapes
        precise_codes = [code for code in buffered_codes
                         if self._prepared_shapes[code].contains(point)]

        if len(precise_codes) == 1:
            return precise_codes[0]

        # Use distance from the border of each region as the tie-breaker.
        distances = {}

        # point wasn't in any precise region, which one of the buffered
        # regions is it closest to?
        if not precise_codes:
            for code in buffered_codes:
                coords = []
                if isinstance(self._shapes[code].boundary,
                              geometry.base.BaseMultipartGeometry):
                    for geom in self._shapes[code].boundary.geoms:
                        coords.extend([coord for coord in geom.coords])
                else:
                    coords = self._shapes[code].boundary.coords
                for coord in coords:
                    distances[geocalc.distance(
                        coord[1], coord[0], lat, lon)] = code
            return distances[min(distances.keys())]

        # point was in multiple overlapping regions, take the one where it
        # is farthest away from the border / the most inside a region
        for code in precise_codes:
            coords = []
            if isinstance(self._shapes[code].boundary,
                          geometry.base.BaseMultipartGeometry):
                for geom in self._shapes[code].boundary.geoms:
                    coords.extend([coord for coord in geom.coords])
            else:
                coords = self._shapes[code].boundary.coords
            for coord in coords:
                distances[geocalc.distance(
                    coord[1], coord[0], lat, lon)] = code
        return distances[max(distances.keys())]
예제 #2
0
def cluster_wifis(networks):
    # Only consider clusters that have at least 2 found networks
    # inside them. Otherwise someone could use a combination of
    # one real network and one fake and therefor not found network to
    # get the position of the real network.
    length = len(networks)
    if length < MIN_WIFIS_IN_CLUSTER:
        # Not enough WiFis to form a valid cluster.
        return []

    positions = networks[['lat', 'lon']]
    if length == 2:
        one = positions[0]
        two = positions[1]
        if distance(one[0], one[1], two[0], two[1]) <= MAX_WIFI_CLUSTER_METERS:
            # Only two WiFis and they agree, so cluster them.
            return [networks]
        else:
            # Or they disagree forming two clusters of size one,
            # neither of which is large enough to be returned.
            return []

    # Calculate the condensed distance matrix based on distance in meters.
    # This avoids calculating the square form, which would calculate
    # each value twice and avoids calculating the diagonal of zeros.
    # We avoid the special cases for length < 2 with the above checks.
    # See scipy.spatial.distance.squareform and
    # https://stackoverflow.com/questions/13079563
    dist_matrix = numpy.zeros(length * (length - 1) // 2, dtype=numpy.double)
    for i, (a, b) in enumerate(itertools.combinations(positions, 2)):
        dist_matrix[i] = distance(a[0], a[1], b[0], b[1])

    link_matrix = hierarchy.linkage(dist_matrix, method='complete')
    assignments = hierarchy.fcluster(link_matrix,
                                     MAX_WIFI_CLUSTER_METERS,
                                     criterion='distance',
                                     depth=2)

    indexed_clusters = defaultdict(list)
    for i, net in zip(assignments, networks):
        indexed_clusters[i].append(net)

    clusters = []
    for values in indexed_clusters.values():
        if len(values) >= MIN_WIFIS_IN_CLUSTER:
            clusters.append(numpy.array(values, dtype=NETWORK_DTYPE))

    return clusters
예제 #3
0
파일: cell.py 프로젝트: mate1983/ichnaea
def aggregate_cell_position(networks, min_accuracy, max_accuracy):
    """
    Calculate the aggregate position of the user inside the given
    cluster of networks.

    Return the position, an accuracy estimate and a combined score.
    The accuracy is bounded by the min_accuracy and max_accuracy.
    """
    if len(networks) == 1:
        lat = networks[0]['lat']
        lon = networks[0]['lon']
        radius = min(max(networks[0]['radius'], min_accuracy), max_accuracy)
        score = networks[0]['score']
        return (float(lat), float(lon), float(radius), float(score))

    points = numpy.array(
        [(net['lat'], net['lon']) for net in networks],
        dtype=numpy.double)

    weights = numpy.array([
        net['score'] / math.pow(net['signal'], 2) for net in networks],
        dtype=numpy.double)

    lat, lon = numpy.average(points, axis=0, weights=weights)
    score = networks['score'].sum()

    # Guess the accuracy as the 95th percentile of the distances
    # from the lat/lon to the positions of all networks.
    distances = numpy.array([
        distance(lat, lon, net['lat'], net['lon'])
        for net in networks], dtype=numpy.double)
    accuracy = min(max(numpy.percentile(distances, 95),
                       min_accuracy), max_accuracy)

    return (float(lat), float(lon), float(accuracy), float(score))
예제 #4
0
 def test_circle_radius(self):
     circles = numpy.array(
         [(1.0, 1.0, 100.0), (1.001, 1.001, 100.0)],
         dtype=numpy.double)
     lat, lon, radius = aggregate_position(circles, 10.0)
     self.assertEqual((lat, lon), (1.0005, 1.0005))
     self.assertAlmostEqual(distance(lat, lon, 1.0, 1.0) + 100.0, radius, 7)
예제 #5
0
    def best_cluster(self):
        """Return the best cluster from this collection."""
        if len(self) <= 1:
            return self

        results = sorted(self, key=operator.attrgetter('accuracy'))

        clusters = {}
        for i, result1 in enumerate(results):
            clusters[i] = [result1]
            # allow a 50% buffer zone around each result
            radius1 = result1.accuracy * 1.5
            for j, result2 in enumerate(results):
                if j > i:
                    # only calculate the upper triangle
                    radius2 = result2.accuracy * 1.5
                    max_radius = max(radius1, radius2)
                    apart = distance(result1.lat, result1.lon,
                                     result2.lat, result2.lon)
                    if apart <= max_radius:
                        clusters[i].append(result2)

        def sum_score(values):
            # Sort by highest cumulative score,
            # break tie by highest individual score
            return (sum([v.score for v in values]),
                    max([v.score for v in values]))

        clusters = sorted(clusters.values(), key=sum_score, reverse=True)
        return clusters[0]
예제 #6
0
    def aggregate_obs(self):
        positions = numpy.array(
            [(obs.lat, obs.lon) for obs in self.observations],
            dtype=numpy.double)

        max_lat, max_lon = positions.max(axis=0)
        min_lat, min_lon = positions.min(axis=0)

        box_distance = distance(min_lat, min_lon, max_lat, max_lon)
        if box_distance > self.MAX_DIST_METERS:
            return None

        weights = numpy.array(
            [obs.weight for obs in self.observations],
            dtype=numpy.double)

        lat, lon = numpy.average(positions, axis=0, weights=weights)
        lat = float(lat)
        lon = float(lon)
        radius = circle_radius(lat, lon, max_lat, max_lon, min_lat, min_lon)
        region = GEOCODER.region(lat, lon)

        samples, weight = self.bounded_samples_weight(
            len(self.observations), float(weights.sum()))

        return {
            'positions': positions, 'weights': weights,
            'lat': lat, 'lon': lon,
            'max_lat': float(max_lat), 'min_lat': float(min_lat),
            'max_lon': float(max_lon), 'min_lon': float(min_lon),
            'radius': radius, 'region': region,
            'samples': samples, 'weight': weight,
        }
예제 #7
0
파일: cell.py 프로젝트: amolk4games/ichnaea
def aggregate_cell_position(networks, min_accuracy, max_accuracy):
    """
    Calculate the aggregate position of the user inside the given
    cluster of networks.

    Return the position, an accuracy estimate and a combined score.
    The accuracy is bounded by the min_accuracy and max_accuracy.
    """
    if len(networks) == 1:
        lat = networks[0]['lat']
        lon = networks[0]['lon']
        radius = min(max(networks[0]['radius'], min_accuracy), max_accuracy)
        score = networks[0]['score']
        return (float(lat), float(lon), float(radius), float(score))

    points = numpy.array([(net['lat'], net['lon']) for net in networks],
                         dtype=numpy.double)

    weights = numpy.array(
        [net['score'] / math.pow(net['signal'], 2) for net in networks],
        dtype=numpy.double)

    lat, lon = numpy.average(points, axis=0, weights=weights)
    score = networks['score'].sum()

    # Guess the accuracy as the 95th percentile of the distances
    # from the lat/lon to the positions of all networks.
    distances = numpy.array(
        [distance(lat, lon, net['lat'], net['lon']) for net in networks],
        dtype=numpy.double)
    accuracy = min(max(numpy.percentile(distances, 95), min_accuracy),
                   max_accuracy)

    return (float(lat), float(lon), float(accuracy), float(score))
예제 #8
0
파일: wifi.py 프로젝트: cemoulto/ichnaea
def cluster_wifis(networks):
    # Only consider clusters that have at least 2 found networks
    # inside them. Otherwise someone could use a combination of
    # one real network and one fake and therefor not found network to
    # get the position of the real network.
    length = len(networks)
    if length < MIN_WIFIS_IN_CLUSTER:
        # Not enough WiFis to form a valid cluster.
        return []

    positions = networks[['lat', 'lon']]
    if length == 2:
        one = positions[0]
        two = positions[1]
        if distance(one[0], one[1],
                    two[0], two[1]) <= MAX_WIFI_CLUSTER_METERS:
            # Only two WiFis and they agree, so cluster them.
            return [networks]
        else:
            # Or they disagree forming two clusters of size one,
            # neither of which is large enough to be returned.
            return []

    # Calculate the condensed distance matrix based on distance in meters.
    # This avoids calculating the square form, which would calculate
    # each value twice and avoids calculating the diagonal of zeros.
    # We avoid the special cases for length < 2 with the above checks.
    # See scipy.spatial.distance.squareform and
    # https://stackoverflow.com/questions/13079563
    dist_matrix = numpy.zeros(length * (length - 1) // 2, dtype=numpy.double)
    for i, (a, b) in enumerate(itertools.combinations(positions, 2)):
        dist_matrix[i] = distance(a[0], a[1], b[0], b[1])

    link_matrix = hierarchy.linkage(dist_matrix, method='complete')
    assignments = hierarchy.fcluster(
        link_matrix, MAX_WIFI_CLUSTER_METERS, criterion='distance', depth=2)

    indexed_clusters = defaultdict(list)
    for i, net in zip(assignments, networks):
        indexed_clusters[i].append(net)

    clusters = []
    for values in indexed_clusters.values():
        if len(values) >= MIN_WIFIS_IN_CLUSTER:
            clusters.append(numpy.array(values, dtype=NETWORK_DTYPE))

    return clusters
예제 #9
0
 def test_simple_distance(self):
     # This is a simple case where the points are close to each other.
     lat1 = 44.0337065
     lon1 = -79.4908184
     lat2 = 44.0347065
     lon2 = -79.4918184
     delta = distance(lat1, lon1, lat2, lon2)
     self.assertAlmostEqual(delta, 136.9483, 4)
예제 #10
0
 def test_simple_distance(self):
     # This is a simple case where the points are close to each other.
     lat1 = 44.0337065
     lon1 = -79.4908184
     lat2 = 44.0347065
     lon2 = -79.4918184
     delta = distance(lat1, lon1, lat2, lon2)
     self.assertAlmostEqual(delta, 136.9483, 4)
예제 #11
0
    def test_simple_distance(self):
        # This is a simple case where the points are close to each other.

        lat1 = 44.0337065
        lon1 = -79.4908184
        lat2 = 44.0347065
        lon2 = -79.4918184
        delta = distance(lat1, lon1, lat2, lon2)
        sdelta = '%0.4f' % delta
        self.assertEqual(sdelta, '0.1369')
예제 #12
0
    def test_simple_distance(self):
        # This is a simple case where the points are close to each other.

        lat1 = 44.0337065
        lon1 = -79.4908184
        lat2 = 44.0347065
        lon2 = -79.4918184
        delta = distance(lat1, lon1, lat2, lon2)
        sdelta = "%0.4f" % delta
        self.assertEqual(sdelta, '0.1369')
예제 #13
0
파일: provider.py 프로젝트: awoland/ichnaea
    def _get_clusters(self, wifi_signals, queried_wifis):
        """
        Filter out BSSIDs that are numerically very similar, assuming they're
        multiple interfaces on the same base station or such.
        """
        dissimilar_keys = set(self._filter_bssids_by_similarity(
            [w.key for w in queried_wifis]))

        if len(dissimilar_keys) < len(queried_wifis):
            self.stat_time(
                'wifi.provided_too_similar',
                len(queried_wifis) - len(dissimilar_keys))

        wifi_networks = [
            Network(w.key, w.lat, w.lon, w.range)
            for w in queried_wifis if w.key in dissimilar_keys]

        if len(wifi_networks) < MIN_WIFIS_IN_QUERY:
            # We didn't get enough matches.
            self.stat_count('wifi.found_too_few')

        # Sort networks by signal strengths in query.
        wifi_networks.sort(
            lambda a, b: cmp(wifi_signals[b.key], wifi_signals[a.key]))

        clusters = self._cluster_elements(
            wifi_networks,
            lambda a, b: distance(a.lat, a.lon, b.lat, b.lon),
            MAX_WIFI_CLUSTER_KM)

        # The second loop selects a cluster and estimates the position of that
        # cluster. The selected cluster is the one with the most points, larger
        # than MIN_WIFIS_IN_CLUSTER; its position is estimated taking up-to
        # MAX_WIFIS_IN_CLUSTER worth of points from the cluster, which is
        # pre-sorted in signal-strength order due to the way we built the
        # clusters.
        #
        # The reasoning here is that if we have >1 cluster at all, we probably
        # have some bad data -- likely an AP or set of APs associated with a
        # single antenna that moved -- since a user shouldn't be able to hear
        # multiple groups 500m apart.
        #
        # So we're trying to select a cluster that's most-likely good data,
        # which we assume to be the one with the most points in it.
        #
        # The reason we take a subset of those points when estimating location
        # is that we're doing a (non-weighted) centroid calculation, which is
        # itself unbalanced by distant elements. Even if we did a weighted
        # centroid here, using radio intensity as a proxy for distance has an
        # error that increases significantly with distance, so we'd have to
        # underweight pretty heavily.

        return [c for c in clusters if len(c) >= MIN_WIFIS_IN_CLUSTER]
예제 #14
0
    def test_out_of_range(self):
        # We don't always sanitize the incoming data and thus have to deal
        # with some invalid coordinates. Make sure the distance function
        # doesn't error out on us.

        lat1 = -100.0
        lon1 = -186.0
        lat2 = 0.0
        lon2 = 0.0
        delta = distance(lat1, lon1, lat2, lon2)
        sdelta = '%0.4f' % delta
        self.assertEqual(sdelta, '8901.7476')
예제 #15
0
    def test_out_of_range(self):
        # We don't always sanitize the incoming data and thus have to deal
        # with some invalid coordinates. Make sure the distance function
        # doesn't error out on us.

        lat1 = -100.0
        lon1 = -186.0
        lat2 = 0.0
        lon2 = 0.0
        delta = distance(lat1, lon1, lat2, lon2)
        sdelta = "%0.4f" % delta
        self.assertEqual(sdelta, '8901.7476')
예제 #16
0
    def _get_clusters(self, wifi_signals, queried_wifis):
        """
        Filter out BSSIDs that are numerically very similar, assuming they're
        multiple interfaces on the same base station or such.
        """
        dissimilar_keys = set(
            self._filter_bssids_by_similarity([w.key for w in queried_wifis]))

        if len(dissimilar_keys) < len(queried_wifis):
            self.stat_time('wifi.provided_too_similar',
                           len(queried_wifis) - len(dissimilar_keys))

        wifi_networks = [
            Network(w.key, w.lat, w.lon, w.range) for w in queried_wifis
            if w.key in dissimilar_keys
        ]

        if len(wifi_networks) < MIN_WIFIS_IN_QUERY:
            # We didn't get enough matches.
            self.stat_count('wifi.found_too_few')

        # Sort networks by signal strengths in query.
        wifi_networks.sort(
            lambda a, b: cmp(wifi_signals[b.key], wifi_signals[a.key]))

        clusters = self._cluster_elements(
            wifi_networks, lambda a, b: distance(a.lat, a.lon, b.lat, b.lon),
            MAX_WIFI_CLUSTER_KM)

        # The second loop selects a cluster and estimates the position of that
        # cluster. The selected cluster is the one with the most points, larger
        # than MIN_WIFIS_IN_CLUSTER; its position is estimated taking up-to
        # MAX_WIFIS_IN_CLUSTER worth of points from the cluster, which is
        # pre-sorted in signal-strength order due to the way we built the
        # clusters.
        #
        # The reasoning here is that if we have >1 cluster at all, we probably
        # have some bad data -- likely an AP or set of APs associated with a
        # single antenna that moved -- since a user shouldn't be able to hear
        # multiple groups 500m apart.
        #
        # So we're trying to select a cluster that's most-likely good data,
        # which we assume to be the one with the most points in it.
        #
        # The reason we take a subset of those points when estimating location
        # is that we're doing a (non-weighted) centroid calculation, which is
        # itself unbalanced by distant elements. Even if we did a weighted
        # centroid here, using radio intensity as a proxy for distance has an
        # error that increases significantly with distance, so we'd have to
        # underweight pretty heavily.

        return [c for c in clusters if len(c) >= MIN_WIFIS_IN_CLUSTER]
예제 #17
0
    def test_antipodal(self):
        # Antipodal points (opposite sides of the planet) have a round off
        # error with the standard haversine calculation which is extremely
        # old and assumes we are using fixed precision math instead of IEEE
        # floats.

        lat1 = 90.0
        lon1 = 0.0
        lat2 = -90.0
        lon2 = 0
        delta = distance(lat1, lon1, lat2, lon2)
        sdelta = "%0.4f" % delta
        self.assertEqual(sdelta, '20015.0868')
예제 #18
0
    def test_antipodal(self):
        # Antipodal points (opposite sides of the planet) have a round off
        # error with the standard haversine calculation which is extremely
        # old and assumes we are using fixed precision math instead of IEEE
        # floats.

        lat1 = 90.0
        lon1 = 0.0
        lat2 = -90.0
        lon2 = 0
        delta = distance(lat1, lon1, lat2, lon2)
        sdelta = '%0.4f' % delta
        self.assertEqual(sdelta, '20015.0868')
    def confirm_station_obs(self):
        confirm = False
        if self.has_position():
            # station with position
            confirm = True
            for obs in self.observations:
                obs_distance = distance(obs.lat, obs.lon, self.station.lat,
                                        self.station.lon)
                if obs_distance > self.MAX_DIST_METERS:
                    confirm = False
                    break

        return confirm
예제 #20
0
    def confirm_station_obs(self):
        confirm = False
        if self.has_position():
            # station with position
            confirm = True
            for obs in self.observations:
                obs_distance = distance(obs.lat, obs.lon,
                                        self.station.lat, self.station.lon)
                if obs_distance > self.MAX_DIST_METERS:
                    confirm = False
                    break

        return confirm
예제 #21
0
def estimate_accuracy(lat, lon, points, minimum):
    if len(points) == 1:
        accuracy = points[0].range
    else:
        # Terrible approximation, but hopefully better
        # than the old approximation, "worst-case range":
        # this one takes the maximum distance from location
        # to any of the provided points.
        accuracy = max([distance(to_degrees(lat),
                                 to_degrees(lon),
                                 to_degrees(p.lat),
                                 to_degrees(p.lon)) * 1000
                        for p in points])
    return max(accuracy, minimum)
예제 #22
0
파일: tasks.py 프로젝트: elkos/ichnaea
def _nearest_tower(missing_lat, missing_lon, centroids):
    """
    We just need the closest cell, so we can approximate
    using the haversine formula.
    """
    lat1 = missing_lat
    lon1 = missing_lon

    min_dist = None
    for pt in centroids:
        lat2 = pt['lat']
        lon2 = pt['lon']
        dist = distance(lat1, lon1, lat2, lon2)
        if min_dist is None or min_dist['dist'] > dist:
            min_dist = {'dist': dist, 'pt': pt}
    if min_dist['dist'] <= NEAREST_DISTANCE:
        return min_dist
예제 #23
0
 def _estimate_accuracy(self, lat, lon, points, minimum):
     """
     Return the maximum range between a position (lat/lon) and a
     list of secondary positions (points). But at least use the
     specified minimum value.
     """
     if len(points) == 1:
         accuracy = points[0].range
     else:
         # Terrible approximation, but hopefully better
         # than the old approximation, "worst-case range":
         # this one takes the maximum distance from location
         # to any of the provided points.
         accuracy = max([distance(lat, lon, p.lat, p.lon) * 1000 for p in points])
     if accuracy is not None:
         accuracy = float(accuracy)
     return max(accuracy, minimum)
예제 #24
0
def _nearest_tower(missing_lat, missing_lon, centroids):
    """
    We just need the closest cell, so we can approximate
    using the haversine formula.
    """
    lat1 = to_degrees(missing_lat)
    lon1 = to_degrees(missing_lon)

    min_dist = None
    for pt in centroids:
        lat2 = to_degrees(pt['lat'])
        lon2 = to_degrees(pt['lon'])
        dist = distance(lat1, lon1, lat2, lon2)
        if min_dist is None or min_dist['dist'] > dist:
            min_dist = {'dist': dist, 'pt': pt}
    if min_dist['dist'] <= NEAREST_DISTANCE:
        return min_dist
예제 #25
0
def _nearest_tower(missing_lat, missing_lon, centroids):
    """
    We just need the closest cell, so we can approximate
    using the haversine formula.
    """
    FLOAT_CONST = 10000000.0
    lat1 = missing_lat / FLOAT_CONST
    lon1 = missing_lon / FLOAT_CONST

    min_dist = None
    for pt in centroids:
        lat2 = float(pt['lat']) / FLOAT_CONST
        lon2 = float(pt['lon']) / FLOAT_CONST
        dist = distance(lat1, lon1, lat2, lon2)
        if min_dist is None or min_dist['dist'] > dist:
            min_dist = {'dist': dist, 'pt': pt}
    if min_dist['dist'] <= NEAREST_DISTANCE:
        return min_dist
예제 #26
0
 def _estimate_accuracy(self, lat, lon, points, minimum):
     """
     Return the maximum range between a position (lat/lon) and a
     list of secondary positions (points). But at least use the
     specified minimum value.
     """
     if len(points) == 1:
         accuracy = points[0].range
     else:
         # Terrible approximation, but hopefully better
         # than the old approximation, "worst-case range":
         # this one takes the maximum distance from location
         # to any of the provided points.
         accuracy = max(
             [distance(lat, lon, p.lat, p.lon) * 1000 for p in points])
     if accuracy is not None:
         accuracy = float(accuracy)
     return max(accuracy, minimum)
예제 #27
0
파일: mac.py 프로젝트: amjadm61/ichnaea
def aggregate_mac_position(networks, minimum_accuracy):
    # Idea based on https://gis.stackexchange.com/questions/40660

    def func(point, points):
        return numpy.array([
            distance(p['lat'], p['lon'], point[0], point[1]) *
            min(math.sqrt(2000.0 / p['age']), 1.0) /
            math.pow(p['signalStrength'], 2)
            for p in points])

    # Guess initial position as the weighted mean over all networks.
    points = numpy.array(
        [(net['lat'], net['lon']) for net in networks],
        dtype=numpy.double)

    weights = numpy.array([
        net['score'] *
        min(math.sqrt(2000.0 / net['age']), 1.0) /
        math.pow(net['signalStrength'], 2)
        for net in networks],
        dtype=numpy.double)

    initial = numpy.average(points, axis=0, weights=weights)

    (lat, lon), cov_x, info, mesg, ier = leastsq(
        func, initial, args=networks, full_output=True)

    if ier not in (1, 2, 3, 4):  # pragma: no cover
        # No solution found, use initial estimate.
        lat, lon = initial

    # Guess the accuracy as the 95th percentile of the distances
    # from the lat/lon to the positions of all networks.
    distances = numpy.array([
        distance(lat, lon, net['lat'], net['lon'])
        for net in networks], dtype=numpy.double)
    accuracy = max(numpy.percentile(distances, 95), minimum_accuracy)

    return (float(lat), float(lon), float(accuracy))
예제 #28
0
def aggregate_mac_position(networks, minimum_accuracy):
    # Idea based on https://gis.stackexchange.com/questions/40660

    def func(point, points):
        return numpy.array([
            distance(p['lat'], p['lon'], point[0], point[1]) *
            min(math.sqrt(2000.0 / p['age']), 1.0) /
            math.pow(p['signalStrength'], 2) for p in points
        ])

    # Guess initial position as the weighted mean over all networks.
    points = numpy.array([(net['lat'], net['lon']) for net in networks],
                         dtype=numpy.double)

    weights = numpy.array([
        net['score'] * min(math.sqrt(2000.0 / net['age']), 1.0) /
        math.pow(net['signalStrength'], 2) for net in networks
    ],
                          dtype=numpy.double)

    initial = numpy.average(points, axis=0, weights=weights)

    (lat, lon), cov_x, info, mesg, ier = leastsq(func,
                                                 initial,
                                                 args=networks,
                                                 full_output=True)

    if ier not in (1, 2, 3, 4):  # pragma: no cover
        # No solution found, use initial estimate.
        lat, lon = initial

    # Guess the accuracy as the 95th percentile of the distances
    # from the lat/lon to the positions of all networks.
    distances = numpy.array(
        [distance(lat, lon, net['lat'], net['lon']) for net in networks],
        dtype=numpy.double)
    accuracy = max(numpy.percentile(distances, 95), minimum_accuracy)

    return (float(lat), float(lon), float(accuracy))
    def aggregate_obs(self):
        positions = numpy.array([(obs.lat, obs.lon)
                                 for obs in self.observations],
                                dtype=numpy.double)

        max_lat, max_lon = positions.max(axis=0)
        min_lat, min_lon = positions.min(axis=0)

        box_distance = distance(min_lat, min_lon, max_lat, max_lon)
        if box_distance > self.MAX_DIST_METERS:
            return None

        weights = numpy.array([obs.weight for obs in self.observations],
                              dtype=numpy.double)

        lat, lon = numpy.average(positions, axis=0, weights=weights)
        lat = float(lat)
        lon = float(lon)
        radius = circle_radius(lat, lon, max_lat, max_lon, min_lat, min_lon)
        region = GEOCODER.region(lat, lon)

        samples, weight = self.bounded_samples_weight(len(self.observations),
                                                      float(weights.sum()))

        return {
            'positions': positions,
            'weights': weights,
            'lat': lat,
            'lon': lon,
            'max_lat': float(max_lat),
            'min_lat': float(min_lat),
            'max_lon': float(max_lon),
            'min_lon': float(min_lon),
            'radius': radius,
            'region': region,
            'samples': samples,
            'weight': weight,
        }
예제 #30
0
 def func(point, points):
     return numpy.array([
         distance(p['lat'], p['lon'], point[0], point[1]) /
         math.pow(p['signal'], 2) for p in points
     ])
예제 #31
0
 def test_circle_radius(self):
     circles = numpy.array([(1.0, 1.0, 100.0), (1.001, 1.001, 100.0)],
                           dtype=numpy.double)
     lat, lon, radius = aggregate_position(circles, 10.0)
     self.assertEqual((lat, lon), (1.0005, 1.0005))
     self.assertAlmostEqual(distance(lat, lon, 1.0, 1.0) + 100.0, radius, 7)
예제 #32
0
 def wifi_distance(one, two):
     return distance(one.lat, one.lon, two.lat, two.lon)
예제 #33
0
def search_all_sources(session, api_name, data,
                       client_addr=None, geoip_db=None):
    """
    Common code-path for all lookup APIs, using
    WiFi, cell, cell-lac and GeoIP data sources.

    :param session: A database session for queries.
    :param api_name: A string to use in metrics (for example "geolocate").
    :param data: A dict conforming to the search API.
    :param client_addr: The IP address the request came from.
    :param geoip_db: The geoip database.
    """

    stats_client = get_stats_client()
    heka_client = get_heka_client()

    result = None
    result_metric = None

    validated = {
        'wifi': [],
        'cell': [],
        'cell_lac': set(),
        'cell_network': [],
        'cell_lac_network': [],
    }

    # Pass-through wifi data
    validated['wifi'] = data.get('wifi', [])

    # Pre-process cell data
    radio = RADIO_TYPE.get(data.get('radio', ''), -1)
    for cell in data.get('cell', ()):
        cell = normalized_cell_dict(cell, default_radio=radio)
        if cell:
            cell_key = to_cellkey(cell)
            validated['cell'].append(cell_key)
            validated['cell_lac'].add(cell_key._replace(cid=CELLID_LAC))

    # Merge all possible cell and lac keys into one list
    all_cell_keys = []
    all_cell_keys.extend(validated['cell'])
    for key in validated['cell_lac']:
        all_cell_keys.append(key)

    # Do a single query for all cells and lacs at the same time
    try:
        all_networks = query_cell_networks(session, all_cell_keys)
    except Exception:
        heka_client.raven(RAVEN_ERROR)
        all_networks = []
    for network in all_networks:
        if network.key == CELLID_LAC:
            validated['cell_lac_network'].append(network)
        else:
            validated['cell_network'].append(network)

    # Always do a GeoIP lookup because it is cheap and we want to
    # report geoip vs. other data mismatches. We may also use
    # the full GeoIP City-level estimate as well, if all else fails.
    (geoip_res, countries) = geoip_and_best_guess_country_codes(
        validated['cell'], api_name, client_addr, geoip_db)

    # First we attempt a "zoom-in" from cell-lac, to cell
    # to wifi, tightening our estimate each step only so
    # long as it doesn't contradict the existing best-estimate
    # nor the possible countries of origin.

    for (data_field, object_field, metric_name, search_fn) in [
            ('cell_lac', 'cell_lac_network', 'cell_lac', search_cell_lac),
            ('cell', 'cell_network', 'cell', search_cell),
            ('wifi', 'wifi', 'wifi', search_wifi)]:

        if validated[data_field]:
            r = None
            try:
                r = search_fn(session, validated[object_field])
            except Exception:
                heka_client.raven(RAVEN_ERROR)
                stats_client.incr('%s.%s_error' %
                                  (api_name, metric_name))

            if r is None:
                stats_client.incr('%s.no_%s_found' %
                                  (api_name, metric_name))

            else:
                lat = float(r['lat'])
                lon = float(r['lon'])

                stats_client.incr('%s.%s_found' %
                                  (api_name, metric_name))

                # Skip any hit that matches none of the possible countries.
                country_match = False
                for country in countries:
                    if location_is_in_country(lat, lon, country, 1):
                        country_match = True
                        break

                if countries and not country_match:
                    stats_client.incr('%s.anomaly.%s_country_mismatch' %
                                      (api_name, metric_name))

                # Always accept the first result we get.
                if result is None:
                    result = r
                    result_metric = metric_name

                # Or any result that appears to be an improvement over the
                # existing best guess.
                elif (distance(float(result['lat']),
                               float(result['lon']), lat, lon) * 1000
                      <= result['accuracy']):
                    result = r
                    result_metric = metric_name

                else:
                    stats_client.incr('%s.anomaly.%s_%s_mismatch' %
                                      (api_name, metric_name, result_metric))

    # Fall back to GeoIP if nothing has worked yet. We do not
    # include this in the "zoom-in" loop because GeoIP is
    # frequently _wrong_ at the city level; we only want to
    # accept that estimate if we got nothing better from cell
    # or wifi.
    if not result and geoip_res:
        result = geoip_res
        result_metric = 'geoip'

    if not result:
        stats_client.incr('%s.miss' % api_name)
        return None

    rounded_result = {
        'lat': round(result['lat'], DEGREE_DECIMAL_PLACES),
        'lon': round(result['lon'], DEGREE_DECIMAL_PLACES),
        'accuracy': round(result['accuracy'], DEGREE_DECIMAL_PLACES),
    }

    stats_client.incr('%s.%s_hit' % (api_name, result_metric))
    stats_client.timing('%s.accuracy.%s' % (api_name, result_metric),
                        rounded_result['accuracy'])

    return rounded_result
예제 #34
0
def calculate_new_position(station,
                           measures,
                           moving_stations,
                           max_dist_km,
                           backfill=True):
    # if backfill is true, we work on older measures for which
    # the new/total counters where never updated
    length = len(measures)
    latitudes = [w[0] for w in measures]
    longitudes = [w[1] for w in measures]
    new_lat = sum(latitudes) // length
    new_lon = sum(longitudes) // length

    if station.lat and station.lon:
        latitudes.append(station.lat)
        longitudes.append(station.lon)
        existing_station = True
    else:
        station.lat = new_lat
        station.lon = new_lon
        existing_station = False

    # calculate extremes of measures, existing location estimate
    # and existing extreme values
    def extreme(vals, attr, function):
        new = function(vals)
        old = getattr(station, attr, None)
        if old is not None:
            return function(new, old)
        else:
            return new

    min_lat = extreme(latitudes, 'min_lat', min)
    min_lon = extreme(longitudes, 'min_lon', min)
    max_lat = extreme(latitudes, 'max_lat', max)
    max_lon = extreme(longitudes, 'max_lon', max)

    # calculate sphere-distance from opposite corners of
    # bounding box containing current location estimate
    # and new measurements; if too big, station is moving
    box_dist = distance(to_degrees(min_lat), to_degrees(min_lon),
                        to_degrees(max_lat), to_degrees(max_lon))

    if existing_station:

        if box_dist > max_dist_km:
            # add to moving list, return early without updating
            # station since it will be deleted by caller momentarily
            moving_stations.add(station)
            return

        if backfill:
            new_total = station.total_measures + length
            old_length = station.total_measures
            # update total to account for new measures
            # new counter never got updated to include the measures
            station.total_measures = new_total
        else:
            new_total = station.total_measures
            old_length = new_total - length

        station.lat = ((station.lat * old_length) +
                       (new_lat * length)) // new_total
        station.lon = ((station.lon * old_length) +
                       (new_lon * length)) // new_total

    if not backfill:
        # decrease new counter, total is already correct
        # in the backfill case new counter was never increased
        station.new_measures = station.new_measures - length

    # update max/min lat/lon columns
    station.min_lat = min_lat
    station.min_lon = min_lon
    station.max_lat = max_lat
    station.max_lon = max_lon

    # give radio-range estimate between extreme values and centroid
    ctr = (to_degrees(station.lat), to_degrees(station.lon))
    points = [(to_degrees(min_lat), to_degrees(min_lon)),
              (to_degrees(min_lat), to_degrees(max_lon)),
              (to_degrees(max_lat), to_degrees(min_lon)),
              (to_degrees(max_lat), to_degrees(max_lon))]

    station.range = range_to_points(ctr, points) * 1000.0
예제 #35
0
 def test_out_of_range(self):
     self.assertAlmostEqual(
         distance(-100.0, -186.0, 0.0, 0.0), 8901747.5973, 4)
예제 #36
0
    def new_station_values(self, station, station_key,
                           first_blocked, observations):
        # This function returns a 3-tuple, the first element is True,
        # if the station was found to be moving.
        # The second element is either None or a dict of values,
        # if the station is new and should result in a table insert
        # The third element is either None or a dict of values
        # if the station did exist and should be updated

        obs_length = len(observations)
        obs_positions = numpy.array(
            [(obs.lat, obs.lon) for obs in observations],
            dtype=numpy.double)
        obs_lat, obs_lon = centroid(obs_positions)

        values = {
            'modified': self.utcnow,
        }
        values.update(station_key.__dict__)
        if self.station_type == 'cell':
            # pass on extra psc column which is not actually part
            # of the stations hash key
            values['psc'] = observations[-1].psc

        created = self.utcnow
        if station is None:
            if first_blocked:
                # if the station did previously exist, retain at least the
                # time it was first put on a blocklist as the creation date
                created = first_blocked
            values.update({
                'created': created,
                'range': 0,
                'total_measures': 0,
            })

        if (station is not None and
                station.lat is not None and station.lon is not None):
            obs_positions = numpy.append(obs_positions, [
                (station.lat, station.lon),
                (numpy.nan if station.max_lat is None else station.max_lat,
                 numpy.nan if station.max_lon is None else station.max_lon),
                (numpy.nan if station.min_lat is None else station.min_lat,
                 numpy.nan if station.min_lon is None else station.min_lon),
            ], axis=0)
            existing_station = True
        else:
            values['lat'] = obs_lat
            values['lon'] = obs_lon
            existing_station = False

        max_lat, max_lon = numpy.nanmax(obs_positions, axis=0)
        min_lat, min_lon = numpy.nanmin(obs_positions, axis=0)

        # calculate sphere-distance from opposite corners of
        # bounding box containing current location estimate
        # and new observations; if too big, station is moving
        box_dist = distance(min_lat, min_lon, max_lat, max_lon)

        # TODO: If we get a too large box_dist, we should not create
        # a new station record with the impossibly big distance,
        # so moving the box_dist > self.max_dist_meters here

        if existing_station:
            if box_dist > self.max_dist_meters:
                # Signal a moving station and return early without updating
                # the station since it will be deleted by caller momentarily
                return (True, None, None)
            # limit the maximum weight of the old station estimate
            old_weight = min(station.total_measures,
                             self.MAX_OLD_OBSERVATIONS)
            new_weight = old_weight + obs_length

            values['lat'] = ((station.lat * old_weight) +
                             (obs_lat * obs_length)) / new_weight
            values['lon'] = ((station.lon * old_weight) +
                             (obs_lon * obs_length)) / new_weight

        # increase total counter
        if station is not None:
            values['total_measures'] = station.total_measures + obs_length
        else:
            values['total_measures'] = obs_length

        # update max/min lat/lon columns
        values['min_lat'] = float(min_lat)
        values['min_lon'] = float(min_lon)
        values['max_lat'] = float(max_lat)
        values['max_lon'] = float(max_lon)

        # give radio-range estimate between extreme values and centroid
        values['range'] = circle_radius(
            values['lat'], values['lon'],
            max_lat, max_lon, min_lat, min_lon)

        if station is None:
            # return new values
            return (False, values, None)
        else:
            # return updated values, remove station from session
            self.session.expunge(station)
            return (False, None, values)
예제 #37
0
def search_wifi(session, data):

    # estimate signal strength at -100 dBm if none is provided,
    # which is worse than the 99th percentile of wifi dBms we
    # see in practice (-98).
    def signal_strength(w):
        if 'signal' in w:
            return int(w['signal'])
        else:
            return -100

    wifi_signals = dict([(normalize_wifi_key(w['key']),
                          signal_strength(w))
                         for w in data['wifi']])
    wifi_keys = set(wifi_signals.keys())

    if not any(wifi_keys):
        # no valid normalized keys
        return None
    if len(wifi_keys) < 3:
        # we didn't even get three keys, bail out
        return None
    sql_null = None  # avoid pep8 warning
    query = session.query(Wifi.key, Wifi.lat, Wifi.lon).filter(
        Wifi.key.in_(wifi_keys)).filter(
        Wifi.lat != sql_null).filter(
        Wifi.lon != sql_null)
    wifis = query.all()
    if len(wifis) < 3:
        # we got fewer than three actual matches
        return None

    wifis = [Network(normalize_wifi_key(w[0]), w[1], w[2]) for w in wifis]

    # sort networks by signal strengths in query
    wifis.sort(lambda a, b: cmp(wifi_signals[b.key],
                                wifi_signals[a.key]))

    clusters = []

    for w in wifis:
        # try to assign w to a cluster (but at most one)
        for c in clusters:
            for n in c:
                if distance(quantize(n.lat), quantize(n.lon),
                            quantize(w.lat), quantize(w.lon)) <= MAX_DIST:
                    c.append(w)
                    w = None
                    break

            if len(c) >= 3:
                # if we have a cluster with more than 3
                # networks in it, return its centroid.
                length = len(c)
                avg_lat = sum([n.lat for n in c]) / length
                avg_lon = sum([n.lon for n in c]) / length
                return {
                    'lat': quantize(avg_lat),
                    'lon': quantize(avg_lon),
                    'accuracy': 500,
                }

            if w is None:
                break

        # if w didn't adhere to any cluster, make a new one
        if w is not None:
            clusters.append([w])

    # if we didn't get any clusters with >3 networks,
    # the query is a bunch of outliers; give up and
    # let the next location method try.
    return None
예제 #38
0
파일: geocode.py 프로젝트: ingle/ichnaea
    def region(self, lat, lon):
        """
        Return a region code matching the provided position.
        If the position is not found inside any region return None.
        """
        # Look up point in RTree of buffered region envelopes.
        # This is a coarse-grained but very fast match.
        point = geometry.Point(lon, lat)
        codes = [
            self._tree_ids[id_]
            for id_ in self._tree.intersection(point.bounds)
        ]

        if not codes:
            return None

        # match point against the buffered polygon shapes
        buffered_codes = [
            code for code in codes
            if self._buffered_shapes[code].contains(point)
        ]
        if len(buffered_codes) < 2:
            return buffered_codes[0] if buffered_codes else None

        # match point against the precise polygon shapes
        precise_codes = [
            code for code in buffered_codes
            if self._prepared_shapes[code].contains(point)
        ]

        if len(precise_codes) == 1:
            return precise_codes[0]

        # Use distance from the border of each region as the tie-breaker.
        distances = {}

        # point wasn't in any precise region, which one of the buffered
        # regions is it closest to?
        if not precise_codes:
            for code in buffered_codes:
                coords = []
                if isinstance(self._shapes[code].boundary,
                              geometry.base.BaseMultipartGeometry):
                    for geom in self._shapes[code].boundary.geoms:
                        coords.extend([coord for coord in geom.coords])
                else:
                    coords = self._shapes[code].boundary.coords
                for coord in coords:
                    distances[geocalc.distance(coord[1], coord[0], lat,
                                               lon)] = code
            return distances[min(distances.keys())]

        # point was in multiple overlapping regions, take the one where it
        # is farthest away from the border / the most inside a region
        for code in precise_codes:
            coords = []
            if isinstance(self._shapes[code].boundary,
                          geometry.base.BaseMultipartGeometry):
                for geom in self._shapes[code].boundary.geoms:
                    coords.extend([coord for coord in geom.coords])
            else:
                coords = self._shapes[code].boundary.coords
            for coord in coords:
                distances[geocalc.distance(coord[1], coord[0], lat,
                                           lon)] = code
        return distances[max(distances.keys())]
예제 #39
0
 def test_non_float(self):
     self.assertAlmostEqual(distance(1.0, 1.0, 1, 1.1), 11117.7991, 4)
     with self.assertRaises(TypeError):
         distance(None, '0.1', 1, 1.1)
예제 #40
0
 def test_out_of_max_bounds(self):
     self.assertAlmostEqual(distance(-100.0, -186.0, 0.0, 0.0),
                            8901747.5973, 4)
예제 #41
0
 def test_antipodal(self):
     # Antipodal points (opposite sides of the planet) have a round off
     # error with the standard haversine calculation which is extremely
     # old and assumes we are using fixed precision math instead of IEEE
     # floats.
     self.assertAlmostEqual(distance(90.0, 0.0, -90.0, 0), 20015086.796, 4)
예제 #42
0
    def get(self, query):
        """
        Get a cached result for the query.

        :param query: The query for which to look for a cached value.
        :type query: :class:`ichnaea.api.locate.query.Query`

        :returns: The cache result or None.
        :rtype: :class:`~ichnaea.api.locate.fallback.ExternalResult`
        """
        fallback_name = query.api_key.fallback_name

        if not self._should_cache(query):
            self._stat_count(fallback_name, 'bypassed')
            return None

        cache_keys = self._cache_keys(query)
        # dict of (lat, lon, fallback) tuples to ExternalResult list
        # lat/lon clustered into ~100x100 meter grid cells
        clustered_results = defaultdict(list)
        not_found_cluster = (None, None, None)
        try:
            for value in self.redis_client.mget(cache_keys):
                if not value:
                    continue

                value = simplejson.loads(value)
                if value == LOCATION_NOT_FOUND:
                    value = ExternalResult(None, None, None, None)
                    clustered_results[not_found_cluster] = [value]
                else:
                    value = ExternalResult(**value)
                    # ~100x100m clusters
                    clustered_results[(round(value.lat,
                                             3), round(value.lat, 3),
                                       value.fallback)].append(value)
        except (simplejson.JSONDecodeError, RedisError):
            self.raven_client.captureException()
            self._stat_count(fallback_name, 'failure')
            return None

        if not clustered_results:
            self._stat_count(fallback_name, 'miss')
            return None

        if list(clustered_results.keys()) == [not_found_cluster]:
            # the only match was for not found results
            self._stat_count(fallback_name, 'hit')
            return clustered_results[not_found_cluster][0]

        if len(clustered_results) == 1:
            # all the cached values agree with each other
            self._stat_count(fallback_name, 'hit')
            results = list(clustered_results.values())[0]

            circles = numpy.array([(res.lat, res.lon, res.accuracy)
                                   for res in results],
                                  dtype=numpy.double)
            points, accuracies = numpy.hsplit(circles, [2])

            lat, lon = points.mean(axis=0)
            lat = float(lat)
            lon = float(lon)

            radius = 0.0
            for circle in circles:
                p_dist = distance(lat, lon, circle[0], circle[1]) + circle[2]
                radius = max(radius, p_dist)

            return ExternalResult(
                lat=lat,
                lon=lon,
                accuracy=float(radius),
                fallback=results[0].fallback,
            )

        # inconsistent results
        self._stat_count(fallback_name, 'inconsistent')
        return None
예제 #43
0
def cluster_networks(models,
                     lookups,
                     min_radius=None,
                     min_signal=None,
                     max_distance=None):
    """
    Given a list of database models and lookups, return
    a list of clusters of nearby networks.
    """
    now = util.utcnow()

    # Create a dict of macs mapped to their signal strength.
    signals = {}
    for lookup in lookups:
        signals[lookup.mac] = lookup.signal or min_signal

    networks = numpy.array([(model.lat, model.lon, model.radius or min_radius,
                             signals[model.mac], model.score(now))
                            for model in models],
                           dtype=NETWORK_DTYPE)

    # Only consider clusters that have at least 2 found networks
    # inside them. Otherwise someone could use a combination of
    # one real network and one fake and therefor not found network to
    # get the position of the real network.
    length = len(networks)
    if length < 2:
        # Not enough networks to form a valid cluster.
        return []

    positions = networks[['lat', 'lon']]
    if length == 2:
        one = positions[0]
        two = positions[1]
        if distance(one[0], one[1], two[0], two[1]) <= max_distance:
            # Only two networks and they agree, so cluster them.
            return [networks]
        else:
            # Or they disagree forming two clusters of size one,
            # neither of which is large enough to be returned.
            return []

    # Calculate the condensed distance matrix based on distance in meters.
    # This avoids calculating the square form, which would calculate
    # each value twice and avoids calculating the diagonal of zeros.
    # We avoid the special cases for length < 2 with the above checks.
    # See scipy.spatial.distance.squareform and
    # https://stackoverflow.com/questions/13079563
    dist_matrix = numpy.zeros(length * (length - 1) // 2, dtype=numpy.double)
    for i, (a, b) in enumerate(itertools.combinations(positions, 2)):
        dist_matrix[i] = distance(a[0], a[1], b[0], b[1])

    link_matrix = hierarchy.linkage(dist_matrix, method='complete')
    assignments = hierarchy.fcluster(link_matrix,
                                     max_distance,
                                     criterion='distance',
                                     depth=2)

    indexed_clusters = defaultdict(list)
    for i, net in zip(assignments, networks):
        indexed_clusters[i].append(net)

    clusters = []
    for values in indexed_clusters.values():
        if len(values) >= 2:
            clusters.append(numpy.array(values, dtype=NETWORK_DTYPE))

    return clusters
예제 #44
0
 def test_antipodal(self):
     # Antipodal points (opposite sides of the planet) have a round off
     # error with the standard haversine calculation which is extremely
     # old and assumes we are using fixed precision math instead of IEEE
     # floats.
     self.assertAlmostEqual(distance(90.0, 0.0, -90.0, 0), 20015086.796, 4)
예제 #45
0
    def station_values(self, station_key, shard_station, observations):
        """
        Return two-tuple of status, value dict where status is one of:
        `new`, `new_moving`, `moving`, `changed`.
        """
        # cases:
        # we always get a station key and observations
        # 0. observations disagree
        # 0.a. no shard station, return new_moving
        # 0.b. shard station, return moving
        # 1. no shard station
        # 1.a. obs agree -> return new
        # 2. shard station
        # 2.a. obs disagree -> return moving
        # 2.b. obs agree -> return changed
        created = self.utcnow
        values = {
            'mac': station_key,
            'modified': self.utcnow,
        }

        obs_length = len(observations)
        obs_positions = numpy.array(
            [(obs.lat, obs.lon) for obs in observations],
            dtype=numpy.double)
        obs_new_lat, obs_new_lon = centroid(obs_positions)
        obs_max_lat, obs_max_lon = numpy.nanmax(obs_positions, axis=0)
        obs_min_lat, obs_min_lon = numpy.nanmin(obs_positions, axis=0)
        obs_box_dist = distance(obs_min_lat, obs_min_lon,
                                obs_max_lat, obs_max_lon)

        if obs_box_dist > self.max_dist_meters:
            # the new observations are already too far apart
            if not shard_station:
                values.update({
                    'created': created,
                    'block_first': self.today,
                    'block_last': self.today,
                    'block_count': 1,
                })
                return ('new_moving', values)
            else:
                block_count = shard_station.block_count or 0
                values.update({
                    'lat': None,
                    'lon': None,
                    'max_lat': None,
                    'min_lat': None,
                    'max_lon': None,
                    'min_lon': None,
                    'country': shard_station.country,
                    'radius': None,
                    'samples': None,
                    'source': None,
                    'block_last': self.today,
                    'block_count': block_count + 1,
                })
                return ('moving', values)

        if shard_station is None:
            # totally new station, only agreeing observations
            radius = circle_radius(
                obs_new_lat, obs_new_lon,
                obs_max_lat, obs_max_lon, obs_min_lat, obs_min_lon)
            values.update({
                'created': created,
                'lat': obs_new_lat,
                'lon': obs_new_lon,
                'max_lat': float(obs_max_lat),
                'min_lat': float(obs_min_lat),
                'max_lon': float(obs_max_lon),
                'min_lon': float(obs_min_lon),
                'country': country_for_location(obs_new_lat, obs_new_lon),
                'radius': radius,
                'samples': obs_length,
                'source': None,
            })
            return ('new', values)
        else:
            # shard_station + new observations
            positions = numpy.append(obs_positions, [
                (numpy.nan if shard_station.lat is None
                    else shard_station.lat,
                 numpy.nan if shard_station.lon is None
                    else shard_station.lon),
                (numpy.nan if shard_station.max_lat is None
                    else shard_station.max_lat,
                 numpy.nan if shard_station.max_lon is None
                    else shard_station.max_lon),
                (numpy.nan if shard_station.min_lat is None
                    else shard_station.min_lat,
                 numpy.nan if shard_station.min_lon is None
                    else shard_station.min_lon),
            ], axis=0)
            max_lat, max_lon = numpy.nanmax(positions, axis=0)
            min_lat, min_lon = numpy.nanmin(positions, axis=0)
            box_dist = distance(min_lat, min_lon, max_lat, max_lon)
            if box_dist > self.max_dist_meters:
                # shard_station + disagreeing observations
                block_count = shard_station.block_count or 0
                values.update({
                    'lat': None,
                    'lon': None,
                    'max_lat': None,
                    'min_lat': None,
                    'max_lon': None,
                    'min_lon': None,
                    'country': shard_station.country,
                    'radius': None,
                    'samples': None,
                    'source': None,
                    'block_last': self.today,
                    'block_count': block_count + 1,
                })
                return ('moving', values)
            else:
                # shard_station + agreeing observations
                if shard_station.lat is None or shard_station.lon is None:
                    old_weight = 0
                else:
                    old_weight = min((shard_station.samples or 0),
                                     self.MAX_OLD_OBSERVATIONS)
                new_lat = ((obs_new_lat * obs_length +
                            (shard_station.lat or 0.0) * old_weight) /
                           (obs_length + old_weight))
                new_lon = ((obs_new_lon * obs_length +
                            (shard_station.lon or 0.0) * old_weight) /
                           (obs_length + old_weight))
                samples = (shard_station.samples or 0) + obs_length
                radius = circle_radius(
                    new_lat, new_lon, max_lat, max_lon, min_lat, min_lon)
                country = shard_station.country
                if (country and not country_matches_location(
                        new_lat, new_lon, country)):
                    # reset country if it no longer matches
                    country = None
                if not country:
                    country = country_for_location(new_lat, new_lon)
                values.update({
                    'lat': new_lat,
                    'lon': new_lon,
                    'max_lat': float(max_lat),
                    'min_lat': float(min_lat),
                    'max_lon': float(max_lon),
                    'min_lon': float(min_lon),
                    'country': country,
                    'radius': radius,
                    'samples': samples,
                    'source': None,
                    # use the exact same keys as in the moving case
                    'block_last': shard_station.block_last,
                    'block_count': shard_station.block_count,
                })
                return ('changed', values)

        return (None, None)  # pragma: no cover
예제 #46
0
 def test_non_float(self):
     self.assertAlmostEqual(distance(1.0, 1.0, 1, 1.1), 11117.7991, 4)
     with self.assertRaises(TypeError):
         distance(None, '0.1', 1, 1.1)
예제 #47
0
def search_all_sources(session,
                       api_name,
                       data,
                       client_addr=None,
                       geoip_db=None,
                       api_key_log=False,
                       api_key_name=None):
    """
    Common code-path for all lookup APIs, using
    WiFi, cell, cell-lac and GeoIP data sources.

    :param session: A database session for queries.
    :param api_name: A string to use in metrics (for example "geolocate").
    :param data: A dict conforming to the search API.
    :param client_addr: The IP address the request came from.
    :param geoip_db: The geoip database.
    """

    stats_client = get_stats_client()
    heka_client = get_heka_client()

    result = None
    result_metric = None

    validated = {
        'wifi': [],
        'cell': [],
        'cell_lac': set(),
        'cell_network': [],
        'cell_lac_network': [],
    }

    # Pre-process wifi data
    for wifi in data.get('wifi', ()):
        wifi = normalized_wifi_dict(wifi)
        if wifi:
            validated['wifi'].append(wifi)

    # Pre-process cell data
    radio = RADIO_TYPE.get(data.get('radio', ''), -1)
    for cell in data.get('cell', ()):
        cell = normalized_cell_dict(cell, default_radio=radio)
        if cell:
            cell_key = to_cellkey(cell)
            validated['cell'].append(cell_key)
            validated['cell_lac'].add(cell_key._replace(cid=CELLID_LAC))

    # Merge all possible cell and lac keys into one list
    all_cell_keys = []
    all_cell_keys.extend(validated['cell'])
    for key in validated['cell_lac']:
        all_cell_keys.append(key)

    # Do a single query for all cells and lacs at the same time
    try:
        all_networks = query_cell_networks(session, all_cell_keys)
    except Exception:
        heka_client.raven(RAVEN_ERROR)
        all_networks = []
    for network in all_networks:
        if network.key == CELLID_LAC:
            validated['cell_lac_network'].append(network)
        else:
            validated['cell_network'].append(network)

    # Always do a GeoIP lookup because it is cheap and we want to
    # report geoip vs. other data mismatches. We may also use
    # the full GeoIP City-level estimate as well, if all else fails.
    (geoip_res,
     countries) = geoip_and_best_guess_country_codes(validated['cell'],
                                                     api_name, client_addr,
                                                     geoip_db)

    # First we attempt a "zoom-in" from cell-lac, to cell
    # to wifi, tightening our estimate each step only so
    # long as it doesn't contradict the existing best-estimate
    # nor the possible countries of origin.

    for (data_field, object_field, metric_name, search_fn) in [
        ('cell_lac', 'cell_lac_network', 'cell_lac', search_cell_lac),
        ('cell', 'cell_network', 'cell', search_cell),
        ('wifi', 'wifi', 'wifi', search_wifi)
    ]:

        if validated[data_field]:
            r = None
            try:
                r = search_fn(session, validated[object_field])
            except Exception:
                heka_client.raven(RAVEN_ERROR)
                stats_client.incr('%s.%s_error' % (api_name, metric_name))

            if r is None:
                stats_client.incr('%s.no_%s_found' % (api_name, metric_name))

            else:
                lat = float(r['lat'])
                lon = float(r['lon'])

                stats_client.incr('%s.%s_found' % (api_name, metric_name))

                # Skip any hit that matches none of the possible countries.
                country_match = False
                for country in countries:
                    if location_is_in_country(lat, lon, country, 1):
                        country_match = True
                        break

                if countries and not country_match:
                    stats_client.incr('%s.anomaly.%s_country_mismatch' %
                                      (api_name, metric_name))

                # Always accept the first result we get.
                if result is None:
                    result = r
                    result_metric = metric_name

                # Or any result that appears to be an improvement over the
                # existing best guess.
                elif (distance(float(result['lat']), float(result['lon']), lat,
                               lon) * 1000 <= result['accuracy']):
                    result = r
                    result_metric = metric_name

                else:
                    stats_client.incr('%s.anomaly.%s_%s_mismatch' %
                                      (api_name, metric_name, result_metric))

    # Fall back to GeoIP if nothing has worked yet. We do not
    # include this in the "zoom-in" loop because GeoIP is
    # frequently _wrong_ at the city level; we only want to
    # accept that estimate if we got nothing better from cell
    # or wifi.
    if not result and geoip_res:
        result = geoip_res
        result_metric = 'geoip'

    # Do detailed logging for some api keys
    if api_key_log and api_key_name:
        api_log_metric = None
        wifi_keys = set([w['key'] for w in validated['wifi']])
        if wifi_keys and \
           len(filter_bssids_by_similarity(wifi_keys)) >= MIN_WIFIS_IN_QUERY:
            # Only count requests as WiFi-based if they contain enough
            # distinct WiFi networks to pass our filters
            if result_metric == 'wifi':
                api_log_metric = 'wifi_hit'
            else:
                api_log_metric = 'wifi_miss'
        elif validated['cell']:
            if result_metric == 'cell':
                api_log_metric = 'cell_hit'
            elif result_metric == 'cell_lac':
                api_log_metric = 'cell_lac_hit'
            else:
                api_log_metric = 'cell_miss'
        else:
            if geoip_res:
                api_log_metric = 'geoip_hit'
            else:
                api_log_metric = 'geoip_miss'
        if api_log_metric:
            stats_client.incr('%s.api_log.%s.%s' %
                              (api_name, api_key_name, api_log_metric))

    if not result:
        stats_client.incr('%s.miss' % api_name)
        return None

    rounded_result = {
        'lat': round(result['lat'], DEGREE_DECIMAL_PLACES),
        'lon': round(result['lon'], DEGREE_DECIMAL_PLACES),
        'accuracy': round(result['accuracy'], DEGREE_DECIMAL_PLACES),
    }

    stats_client.incr('%s.%s_hit' % (api_name, result_metric))
    stats_client.timing('%s.accuracy.%s' % (api_name, result_metric),
                        rounded_result['accuracy'])

    return rounded_result
예제 #48
0
def search_wifi(session, data):

    # Estimate signal strength at -100 dBm if none is provided,
    # which is worse than the 99th percentile of wifi dBms we
    # see in practice (-98).
    def signal_strength(w):
        if 'signal' in w:
            return int(w['signal'])
        else:
            return -100

    wifi_signals = dict([(normalized_wifi_key(w['key']),
                          signal_strength(w))
                         for w in data['wifi']])
    wifi_keys = set(wifi_signals.keys())

    if not any(wifi_keys):
        # No valid normalized keys.
        return None
    if len(wifi_keys) < MIN_WIFIS_IN_QUERY:
        # We didn't get enough keys.
        return None
    query = session.query(Wifi.key, Wifi.lat, Wifi.lon, Wifi.range).filter(
        Wifi.key.in_(wifi_keys)).filter(
        Wifi.lat.isnot(None)).filter(
        Wifi.lon.isnot(None))
    wifis = query.all()
    if len(wifis) < MIN_WIFIS_IN_QUERY:
        # We didn't get enough matches.
        return None

    wifis = [Network(normalized_wifi_key(w[0]), w[1], w[2], w[3])
             for w in wifis]

    # Sort networks by signal strengths in query.
    wifis.sort(lambda a, b: cmp(wifi_signals[b.key],
                                wifi_signals[a.key]))

    clusters = []

    # The first loop forms a set of clusters by distance,
    # preferring the cluster with the stronger signal strength
    # if there's a tie.
    for w in wifis:

        # Try to assign w to a cluster (but at most one).
        for c in clusters:
            for n in c:
                if distance(quantize(n.lat),
                            quantize(n.lon),
                            quantize(w.lat),
                            quantize(w.lon)) <= MAX_WIFI_CLUSTER_KM:
                    c.append(w)
                    w = None
                    break

            if w is None:
                break

        # If w didn't adhere to any cluster, make a new one.
        if w is not None:
            clusters.append([w])

    # The second loop selects a cluster and estimates the position of that
    # cluster. The selected cluster is the one with the most points, larger
    # than MIN_WIFIS_IN_CLUSTER; its position is estimated taking up-to
    # MAX_WIFIS_IN_CLUSTER worth of points from the cluster, which is
    # pre-sorted in signal-strength order due to the way we built the
    # clusters.
    #
    # The reasoning here is that if we have >1 cluster at all, we probably
    # have some bad data -- likely an AP or set of APs associated with a
    # single antenna that moved -- since a user shouldn't be able to hear
    # multiple groups 500m apart.
    #
    # So we're trying to select a cluster that's most-likely good data,
    # which we assume to be the one with the most points in it.
    #
    # The reason we take a subset of those points when estimating location
    # is that we're doing a (non-weighted) centroid calculation, which is
    # itself unbalanced by distant elements. Even if we did a weighted
    # centroid here, using radio intensity as a proxy for distance has an
    # error that increases significantly with distance, so we'd have to
    # underweight pretty heavily.

    clusters = [c for c in clusters if len(c) > MIN_WIFIS_IN_CLUSTER]

    if len(clusters) == 0:
        return None

    clusters.sort(lambda a, b: cmp(len(b), len(a)))
    cluster = clusters[0]
    sample = cluster[:min(len(cluster), MAX_WIFIS_IN_CLUSTER)]
    length = len(sample)
    avg_lat = sum([n.lat for n in sample]) / length
    avg_lon = sum([n.lon for n in sample]) / length
    return {
        'lat': quantize(avg_lat),
        'lon': quantize(avg_lon),
        'accuracy': estimate_accuracy(avg_lat, avg_lon,
                                      sample, WIFI_MIN_ACCURACY),
    }
예제 #49
0
파일: locate.py 프로젝트: boostrack/ichnaea
 def agrees_with(self, result):
     dist = distance(result.lat, result.lon, self.lat, self.lon) * 1000
     return dist <= result.accuracy
예제 #50
0
 def agrees_with(self, other):
     dist = distance(other.lat, other.lon, self.lat, self.lon) * 1000
     return dist <= other.accuracy
예제 #51
0
    def calculate_new_position(self, station, observations):
        # This function returns True if the station was found to be moving.
        length = len(observations)
        latitudes = [obs.lat for obs in observations]
        longitudes = [obs.lon for obs in observations]
        new_lat = sum(latitudes) / length
        new_lon = sum(longitudes) / length

        if station.lat and station.lon:
            latitudes.append(station.lat)
            longitudes.append(station.lon)
            existing_station = True
        else:
            station.lat = new_lat
            station.lon = new_lon
            existing_station = False

        # calculate extremes of observations, existing location estimate
        # and existing extreme values
        def extreme(vals, attr, function):
            new = function(vals)
            old = getattr(station, attr, None)
            if old is not None:
                return function(new, old)
            else:
                return new

        min_lat = extreme(latitudes, 'min_lat', min)
        min_lon = extreme(longitudes, 'min_lon', min)
        max_lat = extreme(latitudes, 'max_lat', max)
        max_lon = extreme(longitudes, 'max_lon', max)

        # calculate sphere-distance from opposite corners of
        # bounding box containing current location estimate
        # and new observations; if too big, station is moving
        box_dist = distance(min_lat, min_lon, max_lat, max_lon)

        if existing_station:

            if box_dist > self.max_dist_km:
                # Signal a moving station and return early without updating
                # the station since it will be deleted by caller momentarily
                return True

            # limit the maximum weight of the old station estimate
            old_weight = min(station.total_measures - length,
                             self.MAX_OLD_OBSERVATIONS)
            new_weight = old_weight + length

            station.lat = ((station.lat * old_weight) +
                           (new_lat * length)) / new_weight
            station.lon = ((station.lon * old_weight) +
                           (new_lon * length)) / new_weight

        # decrease new counter, total is already correct
        station.new_measures = station.new_measures - length

        # update max/min lat/lon columns
        station.min_lat = min_lat
        station.min_lon = min_lon
        station.max_lat = max_lat
        station.max_lon = max_lon

        # give radio-range estimate between extreme values and centroid
        ctr = (station.lat, station.lon)
        points = [(min_lat, min_lon), (min_lat, max_lon), (max_lat, min_lon),
                  (max_lat, max_lon)]

        station.range = range_to_points(ctr, points) * 1000.0
        station.modified = util.utcnow()
예제 #52
0
def calculate_new_position(station, measures, moving_stations,
                           max_dist_km, backfill=True):
    # if backfill is true, we work on older measures for which
    # the new/total counters where never updated
    length = len(measures)
    latitudes = [w[0] for w in measures]
    longitudes = [w[1] for w in measures]
    new_lat = sum(latitudes) // length
    new_lon = sum(longitudes) // length

    if station.lat and station.lon:
        latitudes.append(station.lat)
        longitudes.append(station.lon)
        existing_station = True
    else:
        station.lat = new_lat
        station.lon = new_lon
        existing_station = False

    # calculate extremes of measures, existing location estimate
    # and existing extreme values
    def extreme(vals, attr, function):
        new = function(vals)
        old = getattr(station, attr, None)
        if old is not None:
            return function(new, old)
        else:
            return new

    min_lat = extreme(latitudes, 'min_lat', min)
    min_lon = extreme(longitudes, 'min_lon', min)
    max_lat = extreme(latitudes, 'max_lat', max)
    max_lon = extreme(longitudes, 'max_lon', max)

    # calculate sphere-distance from opposite corners of
    # bounding box containing current location estimate
    # and new measurements; if too big, station is moving
    box_dist = distance(to_degrees(min_lat), to_degrees(min_lon),
                        to_degrees(max_lat), to_degrees(max_lon))

    if existing_station:

        if box_dist > max_dist_km:
            # add to moving list, return early without updating
            # station since it will be deleted by caller momentarily
            moving_stations.add(station)
            return

        if backfill:
            new_total = station.total_measures + length
            old_length = station.total_measures
            # update total to account for new measures
            # new counter never got updated to include the measures
            station.total_measures = new_total
        else:
            new_total = station.total_measures
            old_length = new_total - length

        station.lat = ((station.lat * old_length) +
                       (new_lat * length)) // new_total
        station.lon = ((station.lon * old_length) +
                       (new_lon * length)) // new_total

    if not backfill:
        # decrease new counter, total is already correct
        # in the backfill case new counter was never increased
        station.new_measures = station.new_measures - length

    # update max/min lat/lon columns
    station.min_lat = min_lat
    station.min_lon = min_lon
    station.max_lat = max_lat
    station.max_lon = max_lon

    # give radio-range estimate between extreme values and centroid
    ctr = (to_degrees(station.lat), to_degrees(station.lon))
    points = [(to_degrees(min_lat), to_degrees(min_lon)),
              (to_degrees(min_lat), to_degrees(max_lon)),
              (to_degrees(max_lat), to_degrees(min_lon)),
              (to_degrees(max_lat), to_degrees(max_lon))]

    station.range = range_to_points(ctr, points) * 1000.0
예제 #53
0
    def station_values(self, station_key, shard_station, observations):
        """
        Return two-tuple of status, value dict where status is one of:
        `new`, `new_moving`, `moving`, `changed`.
        """
        # cases:
        # we always get a station key and observations
        # 0. observations disagree
        # 0.a. no shard station, return new_moving
        # 0.b. shard station, return moving
        # 1. no shard station
        # 1.a. obs agree -> return new
        # 2. shard station
        # 2.a. obs disagree -> return moving
        # 2.b. obs agree -> return changed
        created = self.utcnow
        values = self._base_station_values(station_key, observations)

        obs_positions = numpy.array(
            [(obs.lat, obs.lon) for obs in observations],
            dtype=numpy.double)
        obs_length = len(observations)

        obs_weights = numpy.array(
            [obs.weight for obs in observations],
            dtype=numpy.double)
        obs_weight = float(obs_weights.sum())

        obs_new_lat, obs_new_lon = numpy.average(
            obs_positions, axis=0, weights=obs_weights)
        obs_new_lat = float(obs_new_lat)
        obs_new_lon = float(obs_new_lon)

        obs_max_lat, obs_max_lon = obs_positions.max(axis=0)
        obs_min_lat, obs_min_lon = obs_positions.min(axis=0)
        obs_box_dist = distance(obs_min_lat, obs_min_lon,
                                obs_max_lat, obs_max_lon)

        if obs_box_dist > self.max_dist_meters:
            # the new observations are already too far apart
            if not shard_station:
                values.update({
                    'created': created,
                    'block_first': self.today,
                    'block_last': self.today,
                    'block_count': 1,
                })
                return ('new_moving', values)
            else:
                block_count = shard_station.block_count or 0
                values.update({
                    'lat': None,
                    'lon': None,
                    'max_lat': None,
                    'min_lat': None,
                    'max_lon': None,
                    'min_lon': None,
                    'radius': None,
                    'region': shard_station.region,
                    'samples': None,
                    'source': None,
                    'weight': None,
                    'block_first': shard_station.block_first or self.today,
                    'block_last': self.today,
                    'block_count': block_count + 1,
                })
                return ('moving', values)

        if shard_station is None:
            # totally new station, only agreeing observations
            radius = circle_radius(
                obs_new_lat, obs_new_lon,
                obs_max_lat, obs_max_lon, obs_min_lat, obs_min_lon)
            values.update({
                'created': created,
                'lat': obs_new_lat,
                'lon': obs_new_lon,
                'max_lat': float(obs_max_lat),
                'min_lat': float(obs_min_lat),
                'max_lon': float(obs_max_lon),
                'min_lon': float(obs_min_lon),
                'radius': radius,
                'region': GEOCODER.region(obs_new_lat, obs_new_lon),
                'samples': obs_length,
                'source': None,
                'weight': obs_weight,
            })
            return ('new', values)
        else:
            # shard_station + new observations
            positions = numpy.append(obs_positions, [
                (numpy.nan if shard_station.lat is None
                    else shard_station.lat,
                 numpy.nan if shard_station.lon is None
                    else shard_station.lon),
                (numpy.nan if shard_station.max_lat is None
                    else shard_station.max_lat,
                 numpy.nan if shard_station.max_lon is None
                    else shard_station.max_lon),
                (numpy.nan if shard_station.min_lat is None
                    else shard_station.min_lat,
                 numpy.nan if shard_station.min_lon is None
                    else shard_station.min_lon),
            ], axis=0)
            max_lat, max_lon = numpy.nanmax(positions, axis=0)
            min_lat, min_lon = numpy.nanmin(positions, axis=0)
            box_dist = distance(min_lat, min_lon, max_lat, max_lon)
            if box_dist > self.max_dist_meters:
                # shard_station + disagreeing observations
                block_count = shard_station.block_count or 0
                values.update({
                    'lat': None,
                    'lon': None,
                    'max_lat': None,
                    'min_lat': None,
                    'max_lon': None,
                    'min_lon': None,
                    'radius': None,
                    'region': shard_station.region,
                    'samples': None,
                    'source': None,
                    'weight': None,
                    'block_first': shard_station.block_first or self.today,
                    'block_last': self.today,
                    'block_count': block_count + 1,
                })
                return ('moving', values)
            else:
                # shard_station + agreeing observations
                if shard_station.lat is None or shard_station.lon is None:
                    old_weight = 0
                else:
                    old_weight = min((shard_station.weight or 0.0),
                                     self.MAX_OLD_WEIGHT)

                new_lat = ((obs_new_lat * obs_weight +
                            (shard_station.lat or 0.0) * old_weight) /
                           (obs_weight + old_weight))
                new_lon = ((obs_new_lon * obs_weight +
                            (shard_station.lon or 0.0) * old_weight) /
                           (obs_weight + old_weight))

                # put in maximum value to avoid overflow of DB column
                samples = min((shard_station.samples or 0) + obs_length,
                              4294967295)
                weight = min((shard_station.weight or 0.0) + obs_weight,
                             1000000000.0)

                radius = circle_radius(
                    new_lat, new_lon, max_lat, max_lon, min_lat, min_lon)
                region = shard_station.region
                if (region and not GEOCODER.in_region(
                        new_lat, new_lon, region)):
                    # reset region if it no longer matches
                    region = None
                if not region:
                    region = GEOCODER.region(new_lat, new_lon)
                values.update({
                    'lat': new_lat,
                    'lon': new_lon,
                    'max_lat': float(max_lat),
                    'min_lat': float(min_lat),
                    'max_lon': float(max_lon),
                    'min_lon': float(min_lon),
                    'radius': radius,
                    'region': region,
                    'samples': samples,
                    'source': None,
                    'weight': weight,
                    # use the exact same keys as in the moving case
                    'block_first': shard_station.block_first,
                    'block_last': shard_station.block_last,
                    'block_count': shard_station.block_count,
                })
                return ('changed', values)

        return (None, None)  # pragma: no cover
예제 #54
0
    def get(self, query):
        """
        Get a cached result for the query.

        :param query: The query for which to look for a cached value.
        :type query: :class:`ichnaea.api.locate.query.Query`

        :returns: The cache result or None.
        :rtype: :class:`~ichnaea.api.locate.fallback.ExternalResult`
        """
        if not self._should_cache(query):
            self._stat_count('cache', tags=['status:bypassed'])
            return None

        cache_keys = self._cache_keys(query)
        # dict of (lat, lon, fallback) tuples to ExternalResult list
        # lat/lon clustered into ~100x100 meter grid cells
        clustered_results = defaultdict(list)
        not_found_cluster = (None, None, None)
        try:
            for value in self.redis_client.mget(cache_keys):
                if not value:
                    continue

                value = simplejson.loads(value)
                if value == LOCATION_NOT_FOUND:
                    value = ExternalResult(None, None, None, None)
                    clustered_results[not_found_cluster] = [value]
                else:
                    value = ExternalResult(**value)
                    # ~100x100m clusters
                    clustered_results[(round(value.lat, 3),
                                       round(value.lat, 3),
                                       value.fallback)].append(value)
        except (simplejson.JSONDecodeError, RedisError):
            self.raven_client.captureException()
            self._stat_count('cache', tags=['status:failure'])
            return None

        if not clustered_results:
            self._stat_count('cache', tags=['status:miss'])
            return None

        if list(clustered_results.keys()) == [not_found_cluster]:
            # the only match was for not found results
            self._stat_count('cache', tags=['status:hit'])
            return clustered_results[not_found_cluster][0]

        if len(clustered_results) == 1:
            # all the cached values agree with each other
            self._stat_count('cache', tags=['status:hit'])
            results = list(clustered_results.values())[0]

            circles = numpy.array(
                [(res.lat, res.lon, res.accuracy) for res in results],
                dtype=numpy.double)
            points, accuracies = numpy.hsplit(circles, [2])

            lat, lon = points.mean(axis=0)
            lat = float(lat)
            lon = float(lon)

            radius = 0.0
            for circle in circles:
                p_dist = distance(lat, lon, circle[0], circle[1]) + circle[2]
                radius = max(radius, p_dist)

            return ExternalResult(
                lat=lat,
                lon=lon,
                accuracy=float(radius),
                fallback=results[0].fallback,
            )

        # inconsistent results
        self._stat_count('cache', tags=['status:inconsistent'])
        return None
예제 #55
0
파일: station.py 프로젝트: awoland/ichnaea
    def calculate_new_position(self, station, observations):
        # This function returns True if the station was found to be moving.
        length = len(observations)
        latitudes = [obs.lat for obs in observations]
        longitudes = [obs.lon for obs in observations]
        new_lat = sum(latitudes) / length
        new_lon = sum(longitudes) / length

        if station.lat and station.lon:
            latitudes.append(station.lat)
            longitudes.append(station.lon)
            existing_station = True
        else:
            station.lat = new_lat
            station.lon = new_lon
            existing_station = False

        # calculate extremes of observations, existing location estimate
        # and existing extreme values
        def extreme(vals, attr, function):
            new = function(vals)
            old = getattr(station, attr, None)
            if old is not None:
                return function(new, old)
            else:
                return new

        min_lat = extreme(latitudes, 'min_lat', min)
        min_lon = extreme(longitudes, 'min_lon', min)
        max_lat = extreme(latitudes, 'max_lat', max)
        max_lon = extreme(longitudes, 'max_lon', max)

        # calculate sphere-distance from opposite corners of
        # bounding box containing current location estimate
        # and new observations; if too big, station is moving
        box_dist = distance(min_lat, min_lon, max_lat, max_lon)

        if existing_station:

            if box_dist > self.max_dist_km:
                # Signal a moving station and return early without updating
                # the station since it will be deleted by caller momentarily
                return True

            # limit the maximum weight of the old station estimate
            old_weight = min(station.total_measures - length,
                             self.MAX_OLD_OBSERVATIONS)
            new_weight = old_weight + length

            station.lat = ((station.lat * old_weight) +
                           (new_lat * length)) / new_weight
            station.lon = ((station.lon * old_weight) +
                           (new_lon * length)) / new_weight

        # decrease new counter, total is already correct
        station.new_measures = station.new_measures - length

        # update max/min lat/lon columns
        station.min_lat = min_lat
        station.min_lon = min_lon
        station.max_lat = max_lat
        station.max_lon = max_lon

        # give radio-range estimate between extreme values and centroid
        ctr = (station.lat, station.lon)
        points = [(min_lat, min_lon),
                  (min_lat, max_lon),
                  (max_lat, min_lon),
                  (max_lat, max_lon)]

        station.range = range_to_points(ctr, points) * 1000.0
        station.modified = util.utcnow()
예제 #56
0
def search_wifi(session, wifis, stats_client, api_name):
    # Estimate signal strength at -100 dBm if none is provided,
    # which is worse than the 99th percentile of wifi dBms we
    # see in practice (-98).

    def signal_strength(w):
        signal = w['signal']
        if signal == 0:
            return -100
        return signal

    wifi_signals = dict([(w['key'], signal_strength(w)) for w in wifis])
    wifi_keys = set(wifi_signals.keys())

    if len(wifi_keys) < MIN_WIFIS_IN_QUERY:
        # We didn't get enough keys.
        if len(wifi_keys) >= 1:
            stats_client.incr('%s.wifi.provided_too_few' % api_name)
        return None

    stats_client.timing('%s.wifi.provided' % api_name, len(wifi_keys))

    query = session.query(Wifi.key, Wifi.lat, Wifi.lon, Wifi.range).filter(
        Wifi.key.in_(wifi_keys)).filter(Wifi.lat.isnot(None)).filter(
            Wifi.lon.isnot(None))
    wifis = query.all()

    if len(wifis) < len(wifi_keys):
        stats_client.incr('%s.wifi.partial_match' % api_name)
        stats_client.timing('%s.wifi.provided_not_known' % api_name,
                            len(wifi_keys) - len(wifis))

    # Filter out BSSIDs that are numerically very similar, assuming they're
    # multiple interfaces on the same base station or such.
    dissimilar_keys = set(filter_bssids_by_similarity([w.key for w in wifis]))

    if len(dissimilar_keys) < len(wifis):
        stats_client.timing('%s.wifi.provided_too_similar' % api_name,
                            len(wifis) - len(dissimilar_keys))

    wifis = [
        Network(w.key, w.lat, w.lon, w.range) for w in wifis
        if w.key in dissimilar_keys
    ]

    if len(wifis) < MIN_WIFIS_IN_QUERY:
        # We didn't get enough matches.
        stats_client.incr('%s.wifi.found_too_few' % api_name)
        return None

    # Sort networks by signal strengths in query.
    wifis.sort(lambda a, b: cmp(wifi_signals[b.key], wifi_signals[a.key]))

    clusters = cluster_elements(
        wifis, lambda a, b: distance(a.lat, a.lon, b.lat, b.lon),
        MAX_WIFI_CLUSTER_KM)

    # The second loop selects a cluster and estimates the position of that
    # cluster. The selected cluster is the one with the most points, larger
    # than MIN_WIFIS_IN_CLUSTER; its position is estimated taking up-to
    # MAX_WIFIS_IN_CLUSTER worth of points from the cluster, which is
    # pre-sorted in signal-strength order due to the way we built the
    # clusters.
    #
    # The reasoning here is that if we have >1 cluster at all, we probably
    # have some bad data -- likely an AP or set of APs associated with a
    # single antenna that moved -- since a user shouldn't be able to hear
    # multiple groups 500m apart.
    #
    # So we're trying to select a cluster that's most-likely good data,
    # which we assume to be the one with the most points in it.
    #
    # The reason we take a subset of those points when estimating location
    # is that we're doing a (non-weighted) centroid calculation, which is
    # itself unbalanced by distant elements. Even if we did a weighted
    # centroid here, using radio intensity as a proxy for distance has an
    # error that increases significantly with distance, so we'd have to
    # underweight pretty heavily.

    clusters = [c for c in clusters if len(c) >= MIN_WIFIS_IN_CLUSTER]

    if len(clusters) == 0:
        stats_client.incr('%s.wifi.found_no_cluster' % api_name)
        return None

    clusters.sort(lambda a, b: cmp(len(b), len(a)))
    cluster = clusters[0]
    sample = cluster[:min(len(cluster), MAX_WIFIS_IN_CLUSTER)]
    length = len(sample)
    avg_lat = sum([n.lat for n in sample]) / length
    avg_lon = sum([n.lon for n in sample]) / length
    return {
        'lat': avg_lat,
        'lon': avg_lon,
        'accuracy': estimate_accuracy(avg_lat, avg_lon, sample,
                                      WIFI_MIN_ACCURACY),
    }
예제 #57
0
def search_wifi(session, wifis):
    # Estimate signal strength at -100 dBm if none is provided,
    # which is worse than the 99th percentile of wifi dBms we
    # see in practice (-98).

    def signal_strength(w):
        signal = w['signal']
        if signal == 0:
            return -100
        return signal

    wifi_signals = dict([(w['key'], signal_strength(w)) for w in wifis])
    wifi_keys = set(wifi_signals.keys())

    if len(wifi_keys) < MIN_WIFIS_IN_QUERY:
        # We didn't get enough keys.
        return None

    query = session.query(Wifi.key, Wifi.lat, Wifi.lon, Wifi.range).filter(
        Wifi.key.in_(wifi_keys)).filter(
        Wifi.lat.isnot(None)).filter(
        Wifi.lon.isnot(None))
    wifis = query.all()

    # Filter out BSSIDs that are numerically very similar, assuming they're
    # multiple interfaces on the same base station or such.
    dissimilar_keys = set(filter_bssids_by_similarity([w.key for w in wifis]))

    wifis = [Network(w.key, w.lat, w.lon, w.range)
             for w in wifis
             if w.key in dissimilar_keys]

    if len(wifis) < MIN_WIFIS_IN_QUERY:
        # We didn't get enough matches.
        return None

    # Sort networks by signal strengths in query.
    wifis.sort(lambda a, b: cmp(wifi_signals[b.key],
                                wifi_signals[a.key]))

    clusters = cluster_elements(wifis,
                                lambda a, b: distance(a.lat, a.lon,
                                                      b.lat, b.lon),
                                MAX_WIFI_CLUSTER_KM)

    # The second loop selects a cluster and estimates the position of that
    # cluster. The selected cluster is the one with the most points, larger
    # than MIN_WIFIS_IN_CLUSTER; its position is estimated taking up-to
    # MAX_WIFIS_IN_CLUSTER worth of points from the cluster, which is
    # pre-sorted in signal-strength order due to the way we built the
    # clusters.
    #
    # The reasoning here is that if we have >1 cluster at all, we probably
    # have some bad data -- likely an AP or set of APs associated with a
    # single antenna that moved -- since a user shouldn't be able to hear
    # multiple groups 500m apart.
    #
    # So we're trying to select a cluster that's most-likely good data,
    # which we assume to be the one with the most points in it.
    #
    # The reason we take a subset of those points when estimating location
    # is that we're doing a (non-weighted) centroid calculation, which is
    # itself unbalanced by distant elements. Even if we did a weighted
    # centroid here, using radio intensity as a proxy for distance has an
    # error that increases significantly with distance, so we'd have to
    # underweight pretty heavily.

    clusters = [c for c in clusters if len(c) >= MIN_WIFIS_IN_CLUSTER]

    if len(clusters) == 0:
        return None

    clusters.sort(lambda a, b: cmp(len(b), len(a)))
    cluster = clusters[0]
    sample = cluster[:min(len(cluster), MAX_WIFIS_IN_CLUSTER)]
    length = len(sample)
    avg_lat = sum([n.lat for n in sample]) / length
    avg_lon = sum([n.lon for n in sample]) / length
    return {
        'lat': avg_lat,
        'lon': avg_lon,
        'accuracy': estimate_accuracy(avg_lat, avg_lon,
                                      sample, WIFI_MIN_ACCURACY),
    }
예제 #58
0
파일: station.py 프로젝트: ingle/ichnaea
    def station_values(self, station_key, shard_station, observations):
        """
        Return two-tuple of status, value dict where status is one of:
        `new`, `new_moving`, `moving`, `changed`.
        """
        # cases:
        # we always get a station key and observations
        # 0. observations disagree
        # 0.a. no shard station, return new_moving
        # 0.b. shard station, return moving
        # 1. no shard station
        # 1.a. obs agree -> return new
        # 2. shard station
        # 2.a. obs disagree -> return moving
        # 2.b. obs agree -> return changed
        created = self.utcnow
        values = self._base_station_values(station_key, observations)

        obs_length = len(observations)
        obs_positions = numpy.array([(obs.lat, obs.lon)
                                     for obs in observations],
                                    dtype=numpy.double)
        obs_new_lat, obs_new_lon = centroid(obs_positions)
        obs_max_lat, obs_max_lon = numpy.nanmax(obs_positions, axis=0)
        obs_min_lat, obs_min_lon = numpy.nanmin(obs_positions, axis=0)
        obs_box_dist = distance(obs_min_lat, obs_min_lon, obs_max_lat,
                                obs_max_lon)

        if obs_box_dist > self.max_dist_meters:
            # the new observations are already too far apart
            if not shard_station:
                values.update({
                    'created': created,
                    'block_first': self.today,
                    'block_last': self.today,
                    'block_count': 1,
                })
                return ('new_moving', values)
            else:
                block_count = shard_station.block_count or 0
                values.update({
                    'lat':
                    None,
                    'lon':
                    None,
                    'max_lat':
                    None,
                    'min_lat':
                    None,
                    'max_lon':
                    None,
                    'min_lon':
                    None,
                    'radius':
                    None,
                    'region':
                    shard_station.region,
                    'samples':
                    None,
                    'source':
                    None,
                    'block_first':
                    shard_station.block_first or self.today,
                    'block_last':
                    self.today,
                    'block_count':
                    block_count + 1,
                })
                return ('moving', values)

        if shard_station is None:
            # totally new station, only agreeing observations
            radius = circle_radius(obs_new_lat, obs_new_lon, obs_max_lat,
                                   obs_max_lon, obs_min_lat, obs_min_lon)
            values.update({
                'created': created,
                'lat': obs_new_lat,
                'lon': obs_new_lon,
                'max_lat': float(obs_max_lat),
                'min_lat': float(obs_min_lat),
                'max_lon': float(obs_max_lon),
                'min_lon': float(obs_min_lon),
                'radius': radius,
                'region': GEOCODER.region(obs_new_lat, obs_new_lon),
                'samples': obs_length,
                'source': None,
            })
            return ('new', values)
        else:
            # shard_station + new observations
            positions = numpy.append(obs_positions, [
                (numpy.nan if shard_station.lat is None else shard_station.lat,
                 numpy.nan
                 if shard_station.lon is None else shard_station.lon),
                (numpy.nan if shard_station.max_lat is None else
                 shard_station.max_lat, numpy.nan
                 if shard_station.max_lon is None else shard_station.max_lon),
                (numpy.nan if shard_station.min_lat is None else
                 shard_station.min_lat, numpy.nan
                 if shard_station.min_lon is None else shard_station.min_lon),
            ],
                                     axis=0)
            max_lat, max_lon = numpy.nanmax(positions, axis=0)
            min_lat, min_lon = numpy.nanmin(positions, axis=0)
            box_dist = distance(min_lat, min_lon, max_lat, max_lon)
            if box_dist > self.max_dist_meters:
                # shard_station + disagreeing observations
                block_count = shard_station.block_count or 0
                values.update({
                    'lat':
                    None,
                    'lon':
                    None,
                    'max_lat':
                    None,
                    'min_lat':
                    None,
                    'max_lon':
                    None,
                    'min_lon':
                    None,
                    'radius':
                    None,
                    'region':
                    shard_station.region,
                    'samples':
                    None,
                    'source':
                    None,
                    'block_first':
                    shard_station.block_first or self.today,
                    'block_last':
                    self.today,
                    'block_count':
                    block_count + 1,
                })
                return ('moving', values)
            else:
                # shard_station + agreeing observations
                if shard_station.lat is None or shard_station.lon is None:
                    old_weight = 0
                else:
                    old_weight = min((shard_station.samples or 0),
                                     self.MAX_OLD_OBSERVATIONS)
                new_lat = ((obs_new_lat * obs_length +
                            (shard_station.lat or 0.0) * old_weight) /
                           (obs_length + old_weight))
                new_lon = ((obs_new_lon * obs_length +
                            (shard_station.lon or 0.0) * old_weight) /
                           (obs_length + old_weight))
                samples = (shard_station.samples or 0) + obs_length
                radius = circle_radius(new_lat, new_lon, max_lat, max_lon,
                                       min_lat, min_lon)
                region = shard_station.region
                if (region
                        and not GEOCODER.in_region(new_lat, new_lon, region)):
                    # reset region if it no longer matches
                    region = None
                if not region:
                    region = GEOCODER.region(new_lat, new_lon)
                values.update({
                    'lat': new_lat,
                    'lon': new_lon,
                    'max_lat': float(max_lat),
                    'min_lat': float(min_lat),
                    'max_lon': float(max_lon),
                    'min_lon': float(min_lon),
                    'radius': radius,
                    'region': region,
                    'samples': samples,
                    'source': None,
                    # use the exact same keys as in the moving case
                    'block_first': shard_station.block_first,
                    'block_last': shard_station.block_last,
                    'block_count': shard_station.block_count,
                })
                return ('changed', values)

        return (None, None)  # pragma: no cover
예제 #59
0
def search_all_sources(session,
                       api_name,
                       data,
                       client_addr=None,
                       geoip_db=None,
                       api_key_log=False,
                       api_key_name=None,
                       result_type='position'):
    """
    Common code-path for all lookup APIs, using
    WiFi, cell, cell-lac and GeoIP data sources.

    :param session: A database session for queries.
    :param api_name: A string to use in metrics (for example "geolocate").
    :param data: A dict conforming to the search API.
    :param client_addr: The IP address the request came from.
    :param geoip_db: The geoip database.
    :param api_key_log: Enable additional api key specific logging?
    :param api_key_name: The metric friendly api key name.
    :param result_type: What kind of result to return, either a lat/lon
                        position or a country estimate.
    """

    if result_type not in ('country', 'position'):
        raise ValueError('Invalid result_type, must be one of '
                         'position or country')

    stats_client = get_stats_client()
    heka_client = get_heka_client()

    result = None
    result_metric = None

    validated = {
        'wifi': [],
        'cell': [],
        'cell_lac': set(),
        'cell_network': [],
        'cell_lac_network': [],
    }

    # Pre-process wifi data
    for wifi in data.get('wifi', ()):
        wifi = normalized_wifi_dict(wifi)
        if wifi:
            validated['wifi'].append(wifi)

    # Pre-process cell data
    radio = RADIO_TYPE.get(data.get('radio', ''), -1)
    for cell in data.get('cell', ()):
        cell = normalized_cell_dict(cell, default_radio=radio)
        if cell:
            cell_key = to_cellkey(cell)
            validated['cell'].append(cell_key)
            validated['cell_lac'].add(cell_key)

    found_cells = []

    # Query all cells and OCID cells
    for model in Cell, OCIDCell, CellArea:
        cell_filter = []
        for key in validated['cell']:
            # create a list of 'and' criteria for cell keys
            criterion = join_cellkey(model, key)
            cell_filter.append(and_(*criterion))

        if cell_filter:
            # only do a query if we have cell results, or this will match
            # all rows in the table
            load_fields = ('radio', 'mcc', 'mnc', 'lac', 'lat', 'lon', 'range')
            query = (session.query(model).options(
                load_only(*load_fields)).filter(or_(*cell_filter)).filter(
                    model.lat.isnot(None)).filter(model.lon.isnot(None)))

            try:
                found_cells.extend(query.all())
            except Exception:
                heka_client.raven(RAVEN_ERROR)

    if found_cells:
        # Group all found_cellss by location area
        lacs = defaultdict(list)
        for cell in found_cells:
            cellarea_key = (cell.radio, cell.mcc, cell.mnc, cell.lac)
            lacs[cellarea_key].append(cell)

        def sort_lac(v):
            # use the lac with the most values,
            # or the one with the smallest range
            return (len(v), -min([e.range for e in v]))

        # If we get data from multiple location areas, use the one with the
        # most data points in it. That way a lac with a cell hit will
        # have two entries and win over a lac with only the lac entry.
        lac = sorted(lacs.values(), key=sort_lac, reverse=True)

        for cell in lac[0]:
            # The first entry is the key,
            # used only to distinguish cell from lac
            network = Network(key=None,
                              lat=cell.lat,
                              lon=cell.lon,
                              range=cell.range)
            if type(cell) is CellArea:
                validated['cell_lac_network'].append(network)
            else:
                validated['cell_network'].append(network)

    # Always do a GeoIP lookup because it is cheap and we want to
    # report geoip vs. other data mismatches. We may also use
    # the full GeoIP City-level estimate as well, if all else fails.
    (geoip_res,
     countries) = geoip_and_best_guess_country_codes(validated['cell'],
                                                     api_name, client_addr,
                                                     geoip_db, stats_client)

    # First we attempt a "zoom-in" from cell-lac, to cell
    # to wifi, tightening our estimate each step only so
    # long as it doesn't contradict the existing best-estimate
    # nor the possible countries of origin.

    for (data_field, object_field, metric_name, search_fn) in [
        ('cell_lac', 'cell_lac_network', 'cell_lac', search_cell_lac),
        ('cell', 'cell_network', 'cell', search_cell),
        ('wifi', 'wifi', 'wifi', search_wifi)
    ]:

        if validated[data_field]:
            r = None
            try:
                r = search_fn(session, validated[object_field], stats_client,
                              api_name)
            except Exception:
                heka_client.raven(RAVEN_ERROR)
                stats_client.incr('%s.%s_error' % (api_name, metric_name))

            if r is None:
                stats_client.incr('%s.no_%s_found' % (api_name, metric_name))

            else:
                lat = float(r['lat'])
                lon = float(r['lon'])

                stats_client.incr('%s.%s_found' % (api_name, metric_name))

                # Skip any hit that matches none of the possible countries.
                country_match = False
                for country in countries:
                    if location_is_in_country(lat, lon, country, 1):
                        country_match = True
                        break

                if countries and not country_match:
                    stats_client.incr('%s.anomaly.%s_country_mismatch' %
                                      (api_name, metric_name))

                # Always accept the first result we get.
                if result is None:
                    result = r
                    result_metric = metric_name

                # Or any result that appears to be an improvement over the
                # existing best guess.
                elif (distance(float(result['lat']), float(result['lon']), lat,
                               lon) * 1000 <= result['accuracy']):
                    result = r
                    result_metric = metric_name

                else:
                    stats_client.incr('%s.anomaly.%s_%s_mismatch' %
                                      (api_name, metric_name, result_metric))

    # Fall back to GeoIP if nothing has worked yet. We do not
    # include this in the "zoom-in" loop because GeoIP is
    # frequently _wrong_ at the city level; we only want to
    # accept that estimate if we got nothing better from cell
    # or wifi.
    if not result and geoip_res:
        result = geoip_res
        result_metric = 'geoip'

    # Do detailed logging for some api keys
    if api_key_log and api_key_name:
        api_log_metric = None
        wifi_keys = set([w['key'] for w in validated['wifi']])
        if wifi_keys and \
           len(filter_bssids_by_similarity(wifi_keys)) >= MIN_WIFIS_IN_QUERY:
            # Only count requests as WiFi-based if they contain enough
            # distinct WiFi networks to pass our filters
            if result_metric == 'wifi':
                api_log_metric = 'wifi_hit'
            else:
                api_log_metric = 'wifi_miss'
        elif validated['cell']:
            if result_metric == 'cell':
                api_log_metric = 'cell_hit'
            elif result_metric == 'cell_lac':
                api_log_metric = 'cell_lac_hit'
            else:
                api_log_metric = 'cell_miss'
        else:
            if geoip_res:
                api_log_metric = 'geoip_hit'
            else:
                api_log_metric = 'geoip_miss'
        if api_log_metric:
            stats_client.incr('%s.api_log.%s.%s' %
                              (api_name, api_key_name, api_log_metric))

    if not result:
        stats_client.incr('%s.miss' % api_name)
        return None

    stats_client.incr('%s.%s_hit' % (api_name, result_metric))

    if result_type == 'position':
        rounded_result = {
            'lat': round(result['lat'], DEGREE_DECIMAL_PLACES),
            'lon': round(result['lon'], DEGREE_DECIMAL_PLACES),
            'accuracy': round(result['accuracy'], DEGREE_DECIMAL_PLACES),
        }
        stats_client.timing('%s.accuracy.%s' % (api_name, result_metric),
                            rounded_result['accuracy'])
        return rounded_result
    elif result_type == 'country':
        if countries:
            country = iso3166.countries.get(countries[0])
            return {
                'country_name': country.name,
                'country_code': country.alpha2
            }