def aggregate_station_obs(self): station = self.station obs_data = self.obs_data def get_nan(name): value = getattr(station, name, None) return numpy.nan if value is None else value positions = numpy.append(obs_data['positions'], [ (get_nan('lat'), get_nan('lon')), (get_nan('max_lat'), get_nan('max_lon')), (get_nan('min_lat'), get_nan('min_lon')), ], axis=0) max_lat, max_lon = numpy.nanmax(positions, axis=0) min_lat, min_lon = numpy.nanmin(positions, axis=0) if station.lat is None or station.lon is None: old_weight = 0.0 else: old_weight = min((station.weight or 0.0), self.MAX_OLD_WEIGHT) lat = ((obs_data['lat'] * obs_data['weight'] + (station.lat or 0.0) * old_weight) / (obs_data['weight'] + old_weight)) lon = ((obs_data['lon'] * obs_data['weight'] + (station.lon or 0.0) * old_weight) / (obs_data['weight'] + old_weight)) radius = circle_radius(lat, lon, max_lat, max_lon, min_lat, min_lon) region = station.region if (region and not GEOCODER.in_region(lat, lon, region)): # reset region if it no longer matches region = None if not region: region = GEOCODER.region(lat, lon) samples, weight = self.bounded_samples_weight( (station.samples or 0) + obs_data['samples'], (station.weight or 0.0) + obs_data['weight']) return { 'lat': lat, 'lon': lon, 'max_lat': float(max_lat), 'min_lat': float(min_lat), 'max_lon': float(max_lon), 'min_lon': float(min_lon), 'radius': radius, 'region': region, 'samples': samples, 'weight': weight, }
def region(self, ctr_lat, ctr_lon, mcc, cells): region = None regions = [cell.region for cell in cells] unique_regions = set(regions) if len(unique_regions) == 1: region = regions[0] else: # Choose the area region based on the majority of cells # inside each region. grouped_regions = defaultdict(int) for reg in regions: grouped_regions[reg] += 1 max_count = max(grouped_regions.values()) max_regions = sorted([k for k, v in grouped_regions.items() if v == max_count]) # If we get a tie here, randomly choose the first. region = max_regions[0] if len(max_regions) > 1: # Try to break the tie based on the center of the area, # but keep the randomly chosen region if this fails. area_region = GEOCODER.region_for_cell( ctr_lat, ctr_lon, mcc) if area_region is not None: region = area_region return region
def aggregate_obs(self): positions = numpy.array( [(obs.lat, obs.lon) for obs in self.observations], dtype=numpy.double) max_lat, max_lon = positions.max(axis=0) min_lat, min_lon = positions.min(axis=0) box_distance = distance(min_lat, min_lon, max_lat, max_lon) if box_distance > self.MAX_DIST_METERS: return None weights = numpy.array( [obs.weight for obs in self.observations], dtype=numpy.double) lat, lon = numpy.average(positions, axis=0, weights=weights) lat = float(lat) lon = float(lon) radius = circle_radius(lat, lon, max_lat, max_lon, min_lat, min_lon) region = GEOCODER.region(lat, lon) samples, weight = self.bounded_samples_weight( len(self.observations), float(weights.sum())) return { 'positions': positions, 'weights': weights, 'lat': lat, 'lon': lon, 'max_lat': float(max_lat), 'min_lat': float(min_lat), 'max_lon': float(max_lon), 'min_lon': float(min_lon), 'radius': radius, 'region': region, 'samples': samples, 'weight': weight, }
def radius(self, code, subs=None, city=None, default=REGION_RADIUS): """ Return the best radius guess for the given region code. :param code: A two-letter region code. :type code: str :param subs: A list of ISO subdivision codes. :type code: list :param city: A geoname_id from a city record or None. :type city: int :returns: A tuple of radius/region radius guesses in meters. :rtype: tuple """ region_radius = GEOCODER.region_max_radius(code) if region_radius is None: # No region code or no successful radius lookup region_radius = default # Use region radius as an upper bound for city / subdivision # radius for really small regions. E.g. Vatican City cannot # be larger than the Vatican as a region. radius = region_radius if subs: radius = min(SUB_RADII.get(code, SUB_RADIUS), radius) if city: radius = min(CITY_RADII.get(city, CITY_RADIUS), radius) return (radius, region_radius)
def validator(self, node, cstruct): super(ValidCellObservationSchema, self).validator(node, cstruct) in_region = GEOCODER.in_region_mcc( cstruct['lat'], cstruct['lon'], cstruct['mcc']) if not in_region: raise colander.Invalid(node, ( 'Lat/lon must be inside one of the regions for the MCC'))
def validator(self, node, cstruct): super(ValidReportSchema, self).validator(node, cstruct) for field in ('lat', 'lon'): if (cstruct[field] is None or cstruct[field] is colander.null): raise colander.Invalid(node, 'Report %s is required.' % field) if not GEOCODER.any_region(cstruct['lat'], cstruct['lon']): raise colander.Invalid(node, 'Lat/lon must be inside a region.')
def test_ambiguous_mcc(self): regions = GEOCODER.regions_for_mcc(234, metadata=True) cell = CellFactory.build(mcc=234) query = self.model_query(cells=[cell]) results = self.source.search(query) self.check_model_result(results, regions) self.check_stats(counter=[ (self.api_type + '.source', ['key:test', 'region:none', 'source:internal', 'accuracy:low', 'status:hit']), ])
def test_no_match(self): assert GEOCODER.regions_for_mcc(None) == [] assert GEOCODER.regions_for_mcc(None, metadata=True) == [] assert GEOCODER.regions_for_mcc(1) == [] assert GEOCODER.regions_for_mcc(1, metadata=True) == [] assert GEOCODER.regions_for_mcc('') == [] assert GEOCODER.regions_for_mcc('1', metadata=True) == []
def test_no_match(self): self.assertEqual(GEOCODER.regions_for_mcc(None), []) self.assertEqual(GEOCODER.regions_for_mcc(None, metadata=True), []) self.assertEqual(GEOCODER.regions_for_mcc(1), []) self.assertEqual(GEOCODER.regions_for_mcc(1, metadata=True), []) self.assertEqual(GEOCODER.regions_for_mcc(''), []) self.assertEqual(GEOCODER.regions_for_mcc('1', metadata=True), [])
def test_multiple_mcc(self): now = util.utcnow() region = GEOCODER.regions_for_mcc(235, metadata=True)[0] cell = CellAreaFactory(mcc=234, num_cells=6) cell2 = CellAreaFactory(mcc=235, num_cells=8) self.session.flush() query = self.model_query(cells=[cell, cell2]) results = self.source.search(query) self.assertTrue(len(results) > 2) best_result = results.best(query.expected_accuracy) self.assertEqual(best_result.region_code, region.code) self.assertAlmostEqual(best_result.score, 1.25 + cell.score(now), 4)
def upgrade(): bind = op.get_bind() from ichnaea.geocode import GEOCODER log.info('Update cell_area regions.') stmt = '''\ UPDATE cell_area SET `region` = "{code}" WHERE `radio` IN (0, 1, 2, 3) AND `mcc` = {mcc} AND `region` IS NULL ''' length = len(MCC_TO_REGION) for i, (mcc, code) in enumerate(MCC_TO_REGION.items()): op.execute(sa.text(stmt.format(code=code, mcc=mcc))) if (i > 0 and i % 10 == 0): log.info('Updated %s of %s regions.', i, length) log.info('Updated %s of %s regions.', length, length) stmt = 'SELECT COUNT(*) FROM cell_area WHERE region IS NULL' todo = bind.execute(stmt).fetchone()[0] log.info('Updating remaining %s areas.', todo) stmt = '''\ SELECT HEX(`areaid`), `mcc`, `lat`, `lon` FROM cell_area WHERE `region` IS NULL ''' rows = bind.execute(stmt).fetchall() areas = {} i = 0 for row in rows: if (i > 0 and i % 5000 == 0): log.info('Geocoded %s of %s areas.', i, todo) code = GEOCODER.region_for_cell(row.lat, row.lon, row.mcc) if code not in areas: areas[code] = [] areas[code].append(row[0]) i += 1 log.info('Geocoded %s of %s areas.', todo, todo) stmt = '''\ UPDATE cell_area SET `region` = "{code}" WHERE `areaid` in ({ids}) ''' for code, areaids in areas.items(): if not code: continue ids = 'UNHEX("' + '"), UNHEX("'.join(areaids) + '")' op.execute(sa.text(stmt.format(code=code, ids=ids))) log.info('Updated %s region.', code)
def test_from_mcc(self): region = GEOCODER.regions_for_mcc(235, metadata=True)[0] cell = CellAreaFactory(mcc=235, num_cells=10) self.session.flush() query = self.model_query(cells=[cell]) results = self.source.search(query) self.check_model_results(results, [region]) self.assertAlmostEqual(results[0].score, 1.0, 4) self.check_stats(counter=[ (self.api_type + '.source', ['key:test', 'region:none', 'source:internal', 'accuracy:low', 'status:hit']), ])
def test_multiple_mcc(self, geoip_db, http_session, session, source, stats): now = util.utcnow() region = GEOCODER.regions_for_mcc(235, metadata=True)[0] area = CellAreaFactory(mcc=234, num_cells=6) area2 = CellAreaFactory(mcc=235, num_cells=8) session.flush() query = self.model_query( geoip_db, http_session, session, stats, cells=[area, area2]) results = source.search(query) assert len(results) > 2 best_result = results.best() assert best_result.region_code == region.code assert best_result.score == 1.25 + area_score(area, now)
def test_from_mcc(self, geoip_db, http_session, session, source, stats): region = GEOCODER.regions_for_mcc(235, metadata=True)[0] area = CellAreaFactory(mcc=235, num_cells=10) session.flush() query = self.model_query( geoip_db, http_session, session, stats, cells=[area]) results = source.search(query) self.check_model_results(results, [region]) assert results[0].score == 1.0 stats.check(counter=[ (self.api_type + '.source', ['key:test', 'region:none', 'source:internal', 'accuracy:low', 'status:hit']), ])
def search_cell(self, query): results = self.result_list() now = util.utcnow() ambiguous_cells = [] regions = [] for cell in list(query.cell) + list(query.cell_area): code = cell.mobileCountryCode mcc_regions = GEOCODER.regions_for_mcc(code, metadata=True) # Divide score by number of possible regions for the mcc score = 1.0 / (len(mcc_regions) or 1.0) for mcc_region in mcc_regions: regions.append((mcc_region, score)) if len(mcc_regions) > 1: ambiguous_cells.append(cell) # Group by region code grouped_regions = {} for region, score in regions: code = region.code if code not in grouped_regions: grouped_regions[code] = [region, score] else: # Sum up scores of multiple matches grouped_regions[code][1] += score if ambiguous_cells: # Only do a database query if the mcc is ambiguous. # Use the area models for area and cell entries, # as we are only interested in the region here, # which won't differ between individual cells inside and area. areas = query_areas( query, ambiguous_cells, self.area_model, self.raven_client) for area in areas: code = area.region if code and code in grouped_regions: grouped_regions[code][1] += area_score(area, now) for region, score in grouped_regions.values(): results.add(self.result_type( region_code=region.code, region_name=region.name, accuracy=region.radius, score=score)) return results
def search_mcc(self, query): results = ResultList() codes = set() for cell in list(query.cell) + list(query.cell_area): codes.add(cell.mcc) regions = [] for code in codes: regions.extend(GEOCODER.regions_for_mcc(code, metadata=True)) for region in regions: region_code = region.code results.add(self.result_type( region_code=region_code, region_name=region.name, accuracy=region.radius)) return results
def validate(cls, entry, _raise_invalid=False, **kw): validated = super(CellAreaMixin, cls).validate( entry, _raise_invalid=_raise_invalid, **kw) if validated is not None and 'areaid' not in validated: validated['areaid'] = ( validated['radio'], validated['mcc'], validated['mnc'], validated['lac'], ) if (('region' not in validated or not validated['region']) and validated['lat'] is not None and validated['lon'] is not None): validated['region'] = GEOCODER.region_for_cell( validated['lat'], validated['lon'], validated['mcc']) return validated
def search_blue(self, query): results = self.result_list() now = util.utcnow() regions = defaultdict(int) blues = query_macs(query, query.blue, self.raven_client, BlueShard) for blue in blues: regions[blue.region] += station_score(blue, now) for code, score in regions.items(): region = GEOCODER.region_for_code(code) if region: results.add(self.result_type( region_code=code, region_name=region.name, accuracy=region.radius, score=score)) return results
def search_wifi(self, query): results = self.result_list() now = util.utcnow() regions = defaultdict(int) wifis = query_macs(query, query.wifi, self.raven_client, WifiShard) for wifi in wifis: regions[wifi.region] += station_score(wifi, now) for code, score in regions.items(): region = GEOCODER.region_for_code(code) if region: results.add(self.result_type( region_code=code, region_name=region.name, accuracy=region.radius, score=score)) return results
def test_ambiguous_mcc(self): now = util.utcnow() regions = GEOCODER.regions_for_mcc(234, metadata=True) cell = CellAreaFactory(mcc=234, num_cells=10) self.session.flush() query = self.model_query(cells=[cell]) results = self.source.search(query) self.check_model_results(results, regions) self.assertEqual(results.best().region_code, 'GB') for result in results: score = 0.25 if result.region_code == 'GB': score += cell.score(now) self.assertAlmostEqual(result.score, score, 4) self.check_stats(counter=[ (self.api_type + '.source', ['key:test', 'region:none', 'source:internal', 'accuracy:low', 'status:hit']), ])
def test_blue(self): now = util.utcnow() region = GEOCODER.regions_for_mcc(235, metadata=True)[0] blue1 = BlueShardFactory(samples=10) blue2 = BlueShardFactory(samples=20) blue3 = BlueShardFactory.build(region='DE', samples=100) self.session.flush() query = self.model_query(blues=[blue1, blue2, blue3]) results = self.source.search(query) self.check_model_results(results, [region]) best_result = results.best() self.assertEqual(best_result.region_code, region.code) self.assertAlmostEqual( best_result.score, blue1.score(now) + blue2.score(now), 4) self.check_stats(counter=[ (self.api_type + '.source', ['key:test', 'region:none', 'source:internal', 'accuracy:low', 'status:hit']), ])
def test_wifi(self): now = util.utcnow() region = GEOCODER.regions_for_mcc(235, metadata=True)[0] wifi1 = WifiShardFactory(samples=10) wifi2 = WifiShardFactory(samples=20) wifi3 = WifiShardFactory.build(region='DE', samples=100) self.session.flush() query = self.model_query(wifis=[wifi1, wifi2, wifi3]) results = self.source.search(query) self.check_model_results(results, [region]) best_result = results.best(query.expected_accuracy) self.assertEqual(best_result.region_code, region.code) self.assertAlmostEqual( best_result.score, wifi1.score(now) + wifi2.score(now), 4) self.check_stats(counter=[ (self.api_type + '.source', ['key:test', 'region:none', 'source:internal', 'accuracy:low', 'status:hit']), ])
def test_blue(self, geoip_db, http_session, session, source, stats): now = util.utcnow() region = GEOCODER.regions_for_mcc(235, metadata=True)[0] blue1 = BlueShardFactory(samples=10) blue2 = BlueShardFactory(samples=20) blue3 = BlueShardFactory.build(region='DE', samples=100) session.flush() query = self.model_query( geoip_db, http_session, session, stats, blues=[blue1, blue2, blue3]) results = source.search(query) self.check_model_results(results, [region]) best_result = results.best() assert best_result.region_code == region.code assert (best_result.score == station_score(blue1, now) + station_score(blue2, now)) stats.check(counter=[ (self.api_type + '.source', ['key:test', 'region:none', 'source:internal', 'accuracy:low', 'status:hit']), ])
def test_ambiguous_mcc(self, geoip_db, http_session, session, source, stats): now = util.utcnow() regions = GEOCODER.regions_for_mcc(234, metadata=True) area = CellAreaFactory(mcc=234, num_cells=10) session.flush() query = self.model_query( geoip_db, http_session, session, stats, cells=[area]) results = source.search(query) self.check_model_results(results, regions) assert results.best().region_code == 'GB' for result in results: score = 0.25 if result.region_code == 'GB': score += area_score(area, now) assert result.score == score stats.check(counter=[ (self.api_type + '.source', ['key:test', 'region:none', 'source:internal', 'accuracy:low', 'status:hit']), ])
def test_all_valid_mcc(self): for mcc in ALL_VALID_MCCS: regions = set(GEOCODER.regions_for_mcc(mcc)) self.assertNotEqual(regions, set()) self.assertEqual(regions - GEOCODER._valid_regions, set())
def test_filtered(self): # AX / Aland Islands is not in the GENC list regions = GEOCODER.regions_for_mcc(244) self.assertEqual(set(regions), set(['FI']))
def test_multiple(self): regions = GEOCODER.regions_for_mcc(311) self.assertEqual(set(regions), set(['GU', 'US'])) regions = GEOCODER.regions_for_mcc(311, metadata=True) self.assertEqual(set([r.code for r in regions]), set(['GU', 'US']))
def test_single(self): regions = GEOCODER.regions_for_mcc(262) self.assertEqual(set(regions), set(['DE'])) regions = GEOCODER.regions_for_mcc(262, metadata=True) self.assertEqual(set([r.code for r in regions]), set(['DE']))
def test_max_radius_fail(self): for invalid in (None, 42, 'A', 'us', 'USA', 'AA'): self.assertTrue(GEOCODER.region_max_radius(invalid) is None)
def test_max_radius(self): self.assertEqual(GEOCODER.region_max_radius('US'), 2971000.0) self.assertEqual(GEOCODER.region_max_radius('LI'), 14000.0) self.assertEqual(GEOCODER.region_max_radius('VA'), 1000.0)