Exemple #1
0
    def aggregate_station_obs(self):
        station = self.station
        obs_data = self.obs_data

        def get_nan(name):
            value = getattr(station, name, None)
            return numpy.nan if value is None else value

        positions = numpy.append(obs_data['positions'], [
            (get_nan('lat'), get_nan('lon')),
            (get_nan('max_lat'), get_nan('max_lon')),
            (get_nan('min_lat'), get_nan('min_lon')),
        ], axis=0)

        max_lat, max_lon = numpy.nanmax(positions, axis=0)
        min_lat, min_lon = numpy.nanmin(positions, axis=0)

        if station.lat is None or station.lon is None:
            old_weight = 0.0
        else:
            old_weight = min((station.weight or 0.0), self.MAX_OLD_WEIGHT)

        lat = ((obs_data['lat'] * obs_data['weight'] +
                (station.lat or 0.0) * old_weight) /
               (obs_data['weight'] + old_weight))
        lon = ((obs_data['lon'] * obs_data['weight'] +
                (station.lon or 0.0) * old_weight) /
               (obs_data['weight'] + old_weight))

        radius = circle_radius(lat, lon, max_lat, max_lon, min_lat, min_lon)
        region = station.region
        if (region and not GEOCODER.in_region(lat, lon, region)):
            # reset region if it no longer matches
            region = None
        if not region:
            region = GEOCODER.region(lat, lon)

        samples, weight = self.bounded_samples_weight(
            (station.samples or 0) + obs_data['samples'],
            (station.weight or 0.0) + obs_data['weight'])

        return {
            'lat': lat, 'lon': lon,
            'max_lat': float(max_lat), 'min_lat': float(min_lat),
            'max_lon': float(max_lon), 'min_lon': float(min_lon),
            'radius': radius, 'region': region,
            'samples': samples, 'weight': weight,
        }
Exemple #2
0
    def region(self, ctr_lat, ctr_lon, mcc, cells):
        region = None
        regions = [cell.region for cell in cells]
        unique_regions = set(regions)
        if len(unique_regions) == 1:
            region = regions[0]
        else:
            # Choose the area region based on the majority of cells
            # inside each region.
            grouped_regions = defaultdict(int)
            for reg in regions:
                grouped_regions[reg] += 1
            max_count = max(grouped_regions.values())
            max_regions = sorted([k for k, v in grouped_regions.items()
                                  if v == max_count])
            # If we get a tie here, randomly choose the first.
            region = max_regions[0]
            if len(max_regions) > 1:
                # Try to break the tie based on the center of the area,
                # but keep the randomly chosen region if this fails.
                area_region = GEOCODER.region_for_cell(
                    ctr_lat, ctr_lon, mcc)
                if area_region is not None:
                    region = area_region

        return region
Exemple #3
0
    def aggregate_obs(self):
        positions = numpy.array(
            [(obs.lat, obs.lon) for obs in self.observations],
            dtype=numpy.double)

        max_lat, max_lon = positions.max(axis=0)
        min_lat, min_lon = positions.min(axis=0)

        box_distance = distance(min_lat, min_lon, max_lat, max_lon)
        if box_distance > self.MAX_DIST_METERS:
            return None

        weights = numpy.array(
            [obs.weight for obs in self.observations],
            dtype=numpy.double)

        lat, lon = numpy.average(positions, axis=0, weights=weights)
        lat = float(lat)
        lon = float(lon)
        radius = circle_radius(lat, lon, max_lat, max_lon, min_lat, min_lon)
        region = GEOCODER.region(lat, lon)

        samples, weight = self.bounded_samples_weight(
            len(self.observations), float(weights.sum()))

        return {
            'positions': positions, 'weights': weights,
            'lat': lat, 'lon': lon,
            'max_lat': float(max_lat), 'min_lat': float(min_lat),
            'max_lon': float(max_lon), 'min_lon': float(min_lon),
            'radius': radius, 'region': region,
            'samples': samples, 'weight': weight,
        }
Exemple #4
0
    def radius(self, code, subs=None, city=None, default=REGION_RADIUS):
        """
        Return the best radius guess for the given region code.

        :param code: A two-letter region code.
        :type code: str

        :param subs: A list of ISO subdivision codes.
        :type code: list

        :param city: A geoname_id from a city record or None.
        :type city: int

        :returns: A tuple of radius/region radius guesses in meters.
        :rtype: tuple
        """
        region_radius = GEOCODER.region_max_radius(code)
        if region_radius is None:
            # No region code or no successful radius lookup
            region_radius = default

        # Use region radius as an upper bound for city / subdivision
        # radius for really small regions. E.g. Vatican City cannot
        # be larger than the Vatican as a region.
        radius = region_radius

        if subs:
            radius = min(SUB_RADII.get(code, SUB_RADIUS), radius)

        if city:
            radius = min(CITY_RADII.get(city, CITY_RADIUS), radius)

        return (radius, region_radius)
Exemple #5
0
    def validator(self, node, cstruct):
        super(ValidCellObservationSchema, self).validator(node, cstruct)

        in_region = GEOCODER.in_region_mcc(
            cstruct['lat'], cstruct['lon'], cstruct['mcc'])

        if not in_region:
            raise colander.Invalid(node, (
                'Lat/lon must be inside one of the regions for the MCC'))
Exemple #6
0
    def validator(self, node, cstruct):
        super(ValidReportSchema, self).validator(node, cstruct)
        for field in ('lat', 'lon'):
            if (cstruct[field] is None or
                    cstruct[field] is colander.null):
                raise colander.Invalid(node, 'Report %s is required.' % field)

        if not GEOCODER.any_region(cstruct['lat'], cstruct['lon']):
            raise colander.Invalid(node, 'Lat/lon must be inside a region.')
Exemple #7
0
 def test_ambiguous_mcc(self):
     regions = GEOCODER.regions_for_mcc(234, metadata=True)
     cell = CellFactory.build(mcc=234)
     query = self.model_query(cells=[cell])
     results = self.source.search(query)
     self.check_model_result(results, regions)
     self.check_stats(counter=[
         (self.api_type + '.source',
             ['key:test', 'region:none', 'source:internal',
              'accuracy:low', 'status:hit']),
     ])
Exemple #8
0
 def test_no_match(self):
     assert GEOCODER.regions_for_mcc(None) == []
     assert GEOCODER.regions_for_mcc(None, metadata=True) == []
     assert GEOCODER.regions_for_mcc(1) == []
     assert GEOCODER.regions_for_mcc(1, metadata=True) == []
     assert GEOCODER.regions_for_mcc('') == []
     assert GEOCODER.regions_for_mcc('1', metadata=True) == []
Exemple #9
0
 def test_no_match(self):
     self.assertEqual(GEOCODER.regions_for_mcc(None), [])
     self.assertEqual(GEOCODER.regions_for_mcc(None, metadata=True), [])
     self.assertEqual(GEOCODER.regions_for_mcc(1), [])
     self.assertEqual(GEOCODER.regions_for_mcc(1, metadata=True), [])
     self.assertEqual(GEOCODER.regions_for_mcc(''), [])
     self.assertEqual(GEOCODER.regions_for_mcc('1', metadata=True), [])
Exemple #10
0
    def test_multiple_mcc(self):
        now = util.utcnow()
        region = GEOCODER.regions_for_mcc(235, metadata=True)[0]
        cell = CellAreaFactory(mcc=234, num_cells=6)
        cell2 = CellAreaFactory(mcc=235, num_cells=8)
        self.session.flush()

        query = self.model_query(cells=[cell, cell2])
        results = self.source.search(query)
        self.assertTrue(len(results) > 2)
        best_result = results.best(query.expected_accuracy)
        self.assertEqual(best_result.region_code, region.code)
        self.assertAlmostEqual(best_result.score, 1.25 + cell.score(now), 4)
def upgrade():
    bind = op.get_bind()
    from ichnaea.geocode import GEOCODER

    log.info('Update cell_area regions.')
    stmt = '''\
UPDATE cell_area
SET `region` = "{code}"
WHERE `radio` IN (0, 1, 2, 3) AND `mcc` = {mcc} AND `region` IS NULL
'''
    length = len(MCC_TO_REGION)
    for i, (mcc, code) in enumerate(MCC_TO_REGION.items()):
        op.execute(sa.text(stmt.format(code=code, mcc=mcc)))
        if (i > 0 and i % 10 == 0):
            log.info('Updated %s of %s regions.', i, length)
    log.info('Updated %s of %s regions.', length, length)

    stmt = 'SELECT COUNT(*) FROM cell_area WHERE region IS NULL'
    todo = bind.execute(stmt).fetchone()[0]
    log.info('Updating remaining %s areas.', todo)

    stmt = '''\
SELECT HEX(`areaid`), `mcc`, `lat`, `lon`
FROM cell_area
WHERE `region` IS NULL
'''
    rows = bind.execute(stmt).fetchall()

    areas = {}
    i = 0
    for row in rows:
        if (i > 0 and i % 5000 == 0):
            log.info('Geocoded %s of %s areas.', i, todo)
        code = GEOCODER.region_for_cell(row.lat, row.lon, row.mcc)
        if code not in areas:
            areas[code] = []
        areas[code].append(row[0])
        i += 1
    log.info('Geocoded %s of %s areas.', todo, todo)

    stmt = '''\
UPDATE cell_area
SET `region` = "{code}"
WHERE `areaid` in ({ids})
'''
    for code, areaids in areas.items():
        if not code:
            continue
        ids = 'UNHEX("' + '"), UNHEX("'.join(areaids) + '")'
        op.execute(sa.text(stmt.format(code=code, ids=ids)))
        log.info('Updated %s region.', code)
Exemple #12
0
    def test_from_mcc(self):
        region = GEOCODER.regions_for_mcc(235, metadata=True)[0]
        cell = CellAreaFactory(mcc=235, num_cells=10)
        self.session.flush()

        query = self.model_query(cells=[cell])
        results = self.source.search(query)
        self.check_model_results(results, [region])
        self.assertAlmostEqual(results[0].score, 1.0, 4)
        self.check_stats(counter=[
            (self.api_type + '.source',
                ['key:test', 'region:none', 'source:internal',
                 'accuracy:low', 'status:hit']),
        ])
Exemple #13
0
    def test_multiple_mcc(self, geoip_db, http_session,
                          session, source, stats):
        now = util.utcnow()
        region = GEOCODER.regions_for_mcc(235, metadata=True)[0]
        area = CellAreaFactory(mcc=234, num_cells=6)
        area2 = CellAreaFactory(mcc=235, num_cells=8)
        session.flush()

        query = self.model_query(
            geoip_db, http_session, session, stats,
            cells=[area, area2])
        results = source.search(query)
        assert len(results) > 2
        best_result = results.best()
        assert best_result.region_code == region.code
        assert best_result.score == 1.25 + area_score(area, now)
Exemple #14
0
    def test_from_mcc(self, geoip_db, http_session,
                      session, source, stats):
        region = GEOCODER.regions_for_mcc(235, metadata=True)[0]
        area = CellAreaFactory(mcc=235, num_cells=10)
        session.flush()

        query = self.model_query(
            geoip_db, http_session, session, stats,
            cells=[area])
        results = source.search(query)
        self.check_model_results(results, [region])
        assert results[0].score == 1.0
        stats.check(counter=[
            (self.api_type + '.source',
                ['key:test', 'region:none', 'source:internal',
                 'accuracy:low', 'status:hit']),
        ])
Exemple #15
0
    def search_cell(self, query):
        results = self.result_list()
        now = util.utcnow()

        ambiguous_cells = []
        regions = []
        for cell in list(query.cell) + list(query.cell_area):
            code = cell.mobileCountryCode
            mcc_regions = GEOCODER.regions_for_mcc(code, metadata=True)
            # Divide score by number of possible regions for the mcc
            score = 1.0 / (len(mcc_regions) or 1.0)
            for mcc_region in mcc_regions:
                regions.append((mcc_region, score))
            if len(mcc_regions) > 1:
                ambiguous_cells.append(cell)

        # Group by region code
        grouped_regions = {}
        for region, score in regions:
            code = region.code
            if code not in grouped_regions:
                grouped_regions[code] = [region, score]
            else:
                # Sum up scores of multiple matches
                grouped_regions[code][1] += score

        if ambiguous_cells:
            # Only do a database query if the mcc is ambiguous.
            # Use the area models for area and cell entries,
            # as we are only interested in the region here,
            # which won't differ between individual cells inside and area.
            areas = query_areas(
                query, ambiguous_cells, self.area_model, self.raven_client)
            for area in areas:
                code = area.region
                if code and code in grouped_regions:
                    grouped_regions[code][1] += area_score(area, now)

        for region, score in grouped_regions.values():
            results.add(self.result_type(
                region_code=region.code,
                region_name=region.name,
                accuracy=region.radius,
                score=score))

        return results
Exemple #16
0
    def search_mcc(self, query):
        results = ResultList()

        codes = set()
        for cell in list(query.cell) + list(query.cell_area):
            codes.add(cell.mcc)

        regions = []
        for code in codes:
            regions.extend(GEOCODER.regions_for_mcc(code, metadata=True))

        for region in regions:
            region_code = region.code
            results.add(self.result_type(
                region_code=region_code,
                region_name=region.name,
                accuracy=region.radius))
        return results
Exemple #17
0
    def validate(cls, entry, _raise_invalid=False, **kw):
        validated = super(CellAreaMixin, cls).validate(
            entry, _raise_invalid=_raise_invalid, **kw)
        if validated is not None and 'areaid' not in validated:
            validated['areaid'] = (
                validated['radio'],
                validated['mcc'],
                validated['mnc'],
                validated['lac'],
            )

            if (('region' not in validated or not validated['region']) and
                    validated['lat'] is not None and
                    validated['lon'] is not None):
                validated['region'] = GEOCODER.region_for_cell(
                    validated['lat'], validated['lon'], validated['mcc'])

        return validated
Exemple #18
0
    def search_mcc(self, query):
        results = ResultList()

        codes = set()
        for cell in list(query.cell) + list(query.cell_area):
            codes.add(cell.mcc)

        regions = []
        for code in codes:
            regions.extend(GEOCODER.regions_for_mcc(code, metadata=True))

        for region in regions:
            region_code = region.code
            results.add(self.result_type(
                region_code=region_code,
                region_name=region.name,
                accuracy=region.radius))
        return results
Exemple #19
0
    def search_blue(self, query):
        results = self.result_list()

        now = util.utcnow()
        regions = defaultdict(int)
        blues = query_macs(query, query.blue, self.raven_client, BlueShard)
        for blue in blues:
            regions[blue.region] += station_score(blue, now)

        for code, score in regions.items():
            region = GEOCODER.region_for_code(code)
            if region:
                results.add(self.result_type(
                    region_code=code,
                    region_name=region.name,
                    accuracy=region.radius,
                    score=score))

        return results
Exemple #20
0
    def search_wifi(self, query):
        results = self.result_list()

        now = util.utcnow()
        regions = defaultdict(int)
        wifis = query_macs(query, query.wifi, self.raven_client, WifiShard)
        for wifi in wifis:
            regions[wifi.region] += station_score(wifi, now)

        for code, score in regions.items():
            region = GEOCODER.region_for_code(code)
            if region:
                results.add(self.result_type(
                    region_code=code,
                    region_name=region.name,
                    accuracy=region.radius,
                    score=score))

        return results
Exemple #21
0
    def test_ambiguous_mcc(self):
        now = util.utcnow()
        regions = GEOCODER.regions_for_mcc(234, metadata=True)
        cell = CellAreaFactory(mcc=234, num_cells=10)
        self.session.flush()

        query = self.model_query(cells=[cell])
        results = self.source.search(query)
        self.check_model_results(results, regions)
        self.assertEqual(results.best().region_code, 'GB')
        for result in results:
            score = 0.25
            if result.region_code == 'GB':
                score += cell.score(now)
            self.assertAlmostEqual(result.score, score, 4)
        self.check_stats(counter=[
            (self.api_type + '.source',
                ['key:test', 'region:none', 'source:internal',
                 'accuracy:low', 'status:hit']),
        ])
Exemple #22
0
    def test_blue(self):
        now = util.utcnow()
        region = GEOCODER.regions_for_mcc(235, metadata=True)[0]
        blue1 = BlueShardFactory(samples=10)
        blue2 = BlueShardFactory(samples=20)
        blue3 = BlueShardFactory.build(region='DE', samples=100)
        self.session.flush()

        query = self.model_query(blues=[blue1, blue2, blue3])
        results = self.source.search(query)
        self.check_model_results(results, [region])
        best_result = results.best()
        self.assertEqual(best_result.region_code, region.code)
        self.assertAlmostEqual(
            best_result.score, blue1.score(now) + blue2.score(now), 4)
        self.check_stats(counter=[
            (self.api_type + '.source',
                ['key:test', 'region:none', 'source:internal',
                 'accuracy:low', 'status:hit']),
        ])
Exemple #23
0
    def test_wifi(self):
        now = util.utcnow()
        region = GEOCODER.regions_for_mcc(235, metadata=True)[0]
        wifi1 = WifiShardFactory(samples=10)
        wifi2 = WifiShardFactory(samples=20)
        wifi3 = WifiShardFactory.build(region='DE', samples=100)
        self.session.flush()

        query = self.model_query(wifis=[wifi1, wifi2, wifi3])
        results = self.source.search(query)
        self.check_model_results(results, [region])
        best_result = results.best(query.expected_accuracy)
        self.assertEqual(best_result.region_code, region.code)
        self.assertAlmostEqual(
            best_result.score, wifi1.score(now) + wifi2.score(now), 4)
        self.check_stats(counter=[
            (self.api_type + '.source',
                ['key:test', 'region:none', 'source:internal',
                 'accuracy:low', 'status:hit']),
        ])
Exemple #24
0
    def test_blue(self, geoip_db, http_session,
                  session, source, stats):
        now = util.utcnow()
        region = GEOCODER.regions_for_mcc(235, metadata=True)[0]
        blue1 = BlueShardFactory(samples=10)
        blue2 = BlueShardFactory(samples=20)
        blue3 = BlueShardFactory.build(region='DE', samples=100)
        session.flush()

        query = self.model_query(
            geoip_db, http_session, session, stats,
            blues=[blue1, blue2, blue3])
        results = source.search(query)
        self.check_model_results(results, [region])
        best_result = results.best()
        assert best_result.region_code == region.code
        assert (best_result.score ==
                station_score(blue1, now) + station_score(blue2, now))
        stats.check(counter=[
            (self.api_type + '.source',
                ['key:test', 'region:none', 'source:internal',
                 'accuracy:low', 'status:hit']),
        ])
Exemple #25
0
    def test_ambiguous_mcc(self, geoip_db, http_session,
                           session, source, stats):
        now = util.utcnow()
        regions = GEOCODER.regions_for_mcc(234, metadata=True)
        area = CellAreaFactory(mcc=234, num_cells=10)
        session.flush()

        query = self.model_query(
            geoip_db, http_session, session, stats,
            cells=[area])
        results = source.search(query)
        self.check_model_results(results, regions)
        assert results.best().region_code == 'GB'
        for result in results:
            score = 0.25
            if result.region_code == 'GB':
                score += area_score(area, now)
            assert result.score == score
        stats.check(counter=[
            (self.api_type + '.source',
                ['key:test', 'region:none', 'source:internal',
                 'accuracy:low', 'status:hit']),
        ])
Exemple #26
0
 def test_all_valid_mcc(self):
     for mcc in ALL_VALID_MCCS:
         regions = set(GEOCODER.regions_for_mcc(mcc))
         self.assertNotEqual(regions, set())
         self.assertEqual(regions - GEOCODER._valid_regions, set())
Exemple #27
0
 def test_filtered(self):
     # AX / Aland Islands is not in the GENC list
     regions = GEOCODER.regions_for_mcc(244)
     self.assertEqual(set(regions), set(['FI']))
Exemple #28
0
 def test_multiple(self):
     regions = GEOCODER.regions_for_mcc(311)
     self.assertEqual(set(regions), set(['GU', 'US']))
     regions = GEOCODER.regions_for_mcc(311, metadata=True)
     self.assertEqual(set([r.code for r in regions]), set(['GU', 'US']))
Exemple #29
0
 def test_single(self):
     regions = GEOCODER.regions_for_mcc(262)
     self.assertEqual(set(regions), set(['DE']))
     regions = GEOCODER.regions_for_mcc(262, metadata=True)
     self.assertEqual(set([r.code for r in regions]), set(['DE']))
Exemple #30
0
 def test_max_radius_fail(self):
     for invalid in (None, 42, 'A', 'us', 'USA', 'AA'):
         self.assertTrue(GEOCODER.region_max_radius(invalid) is None)
Exemple #31
0
 def test_max_radius(self):
     self.assertEqual(GEOCODER.region_max_radius('US'), 2971000.0)
     self.assertEqual(GEOCODER.region_max_radius('LI'), 14000.0)
     self.assertEqual(GEOCODER.region_max_radius('VA'), 1000.0)