def test_scan_lacs_remove(self): session = self.db_master_session redis_client = self.redis_client # create an orphaned lac entry key = dict(radio=1, mcc=1, mnc=1, lac=1) session.add(CellArea(**key)) session.flush() enqueue_lacs(session, redis_client, [CellArea.to_hashkey(key)], UPDATE_KEY['cell_lac']) # after scanning the orphaned record gets removed self.assertEqual(scan_lacs.delay().get(), 1) lacs = session.query(CellArea).all() self.assertEqual(lacs, [])
def test_cell(self): session = self.db_slave_session london = self.geoip_data['London'] cell_key = { 'radio': RADIO_TYPE['gsm'], 'mcc': GB_MCC, 'mnc': 1, 'lac': 1, } session.add(Cell( lat=GB_LAT, lon=GB_LON, range=6000, cid=1, **cell_key)) session.add(CellArea( lat=GB_LAT, lon=GB_LON, range=9000, **cell_key)) session.flush() result = self._make_query(data={'cell': [dict(cid=1, **cell_key)]}, client_addr=london['ip'], api_key_log=True) self.assertEqual(result, {'lat': GB_LAT, 'lon': GB_LON, 'accuracy': 6000}) self.check_stats( counter=[ 'm.cell_hit', ('m.geoip_hit', 0), 'm.api_log.test.cell_hit', ('m.api_log.test.geoip_hit', 0), ], )
def test_cell_agrees_with_lac(self): # This test checks that when a cell is at a lat/lon that # is inside its enclosing LAC, we accept it and tighten # our accuracy accordingly. session = self.db_slave_session key = dict(mcc=BRAZIL_MCC, mnc=VIVO_MNC, lac=12345) data = [ Cell(lat=SAO_PAULO_LAT + 0.002, lon=SAO_PAULO_LON + 0.002, range=1000, radio=RADIO_TYPE['gsm'], cid=6789, **key), CellArea(lat=SAO_PAULO_LAT, lon=SAO_PAULO_LON, range=10000, radio=RADIO_TYPE['gsm'], **key), ] session.add_all(data) session.flush() result = self._make_query(data={ "cell": [dict(radio="gsm", cid=6789, **key)]}) self.assertEqual(result, {'lat': SAO_PAULO_LAT + 0.002, 'lon': SAO_PAULO_LON + 0.002, 'accuracy': CELL_MIN_ACCURACY}) self.check_stats( counter=[ ('m.cell_lac_hit', 0), ('m.cell_hit', 1), ] )
def test_cell_disagrees_with_lac(self): # This test checks that when a cell is at a lat/lon that # is not in the LAC associated with it, we drop back # to the LAC. This likely represents some kind of internal # database consistency error, but it might also just be a # new cell that hasn't been integrated yet or something. session = self.db_slave_session key = dict(mcc=BRAZIL_MCC, mnc=VIVO_MNC, lac=12345) data = [ Cell(lat=PORTO_ALEGRE_LAT, lon=PORTO_ALEGRE_LON, range=1000, radio=RADIO_TYPE['gsm'], cid=6789, **key), CellArea(lat=SAO_PAULO_LAT, lon=SAO_PAULO_LON, range=10000, radio=RADIO_TYPE['gsm'], **key), ] session.add_all(data) session.flush() result = self._make_query( data={"cell": [dict(radio="gsm", cid=6789, **key)]}) self.assertEqual(result, {'lat': SAO_PAULO_LAT, 'lon': SAO_PAULO_LON, 'accuracy': LAC_MIN_ACCURACY}) self.check_stats( counter=[ ('m.cell_lac_hit', 1), ] )
def test_cell_hit_ignores_lac(self): session = self.db_slave_session lat = PARIS_LAT lon = PARIS_LON key = dict(mcc=FRANCE_MCC, mnc=2, lac=3) data = [ Cell(lat=lat, lon=lon, range=1000, radio=2, cid=4, **key), Cell(lat=lat + 0.002, lon=lon + 0.004, range=1000, radio=2, cid=5, **key), Cell(lat=lat + 0.006, lon=lon + 0.006, range=1000, radio=2, cid=6, **key), CellArea(lat=lat + 0.0026666, lon=lon + 0.0033333, radio=2, range=50000, **key), ] session.add_all(data) session.flush() result = self._make_query( data={"cell": [dict(radio="umts", cid=5, **key)]}) self.assertEqual(result, {'lat': PARIS_LAT + 0.002, 'lon': PARIS_LON + 0.004, 'accuracy': CELL_MIN_ACCURACY})
def test_cell_multiple_country_codes_from_mcc(self): session = self.db_slave_session cell_key = { 'radio': RADIO_TYPE['gsm'], 'mcc': GB_MCC, 'mnc': 1, 'lac': 1, } session.add(Cell( lat=GB_LAT, lon=GB_LON, range=6000, cid=1, **cell_key)) session.add(CellArea( lat=GB_LAT, lon=GB_LON, range=9000, **cell_key)) session.flush() # Without a GeoIP, the mcc results in 4 different equally common # mcc values, GB not being the first one. We need to make sure # that we accept any of the country codes as a possible match # and don't discard otherwise good cell data based on this. result = self._make_query(data={'cell': [dict(cid=1, **cell_key)]}) self.assertEqual(result, {'lat': GB_LAT, 'lon': GB_LON, 'accuracy': 6000}) self.check_stats( counter=[ 'm.cell_hit', ], )
def test_cell_miss_lac_hit(self): session = self.db_slave_session lat = PARIS_LAT lon = PARIS_LON key = dict(mcc=FRANCE_MCC, mnc=2, lac=3) umts = RADIO_TYPE['umts'] data = [ Cell(lat=lat, lon=lon, radio=umts, cid=4, **key), Cell(lat=lat + 0.002, lon=lon + 0.004, radio=umts, cid=5, **key), Cell(lat=lat + 0.006, lon=lon + 0.006, radio=umts, cid=6, **key), CellArea(lat=lat + 0.0026666, lon=lon + 0.0033333, radio=umts, range=500000, **key), ] session.add_all(data) session.flush() result = self._make_query( data={"cell": [dict(radio="umts", cid=7, **key)]}, api_key_log=True) self.assertEqual(result, {'lat': PARIS_LAT + 0.0026666, 'lon': PARIS_LON + 0.0033333, 'accuracy': 500000}) self.check_stats( counter=[ 'm.cell_lac_hit', 'm.api_log.test.cell_lac_hit', ('m.api_log.test.cell_hit', 0), ('m.api_log.test.cell_miss', 0), ], )
def test_cell_multiple_lac_hit(self): session = self.db_slave_session lat = PARIS_LAT lon = PARIS_LON gsm = RADIO_TYPE['gsm'] key = dict(mcc=FRANCE_MCC, mnc=2, lac=3) key2 = dict(mcc=FRANCE_MCC, mnc=2, lac=4) expected_lac = CellArea( lat=lat + 0.2, lon=lon + 0.2, radio=gsm, range=20000, **key) data = [ Cell(lat=lat + 0.02, lon=lon + 0.02, radio=gsm, cid=4, range=2000, **key2), Cell(lat=lat + 0.04, lon=lon + 0.04, radio=gsm, cid=5, range=3000, **key2), Cell(lat=lat + 0.2, lon=lon + 0.4, radio=gsm, cid=5, range=1000, **key), CellArea(lat=lat, lon=lon, radio=gsm, range=30000, **key2), expected_lac, ] session.add_all(data) session.flush() # We have two lacs, both with two cells, but only know about # one cell in one of them and two in the other. # The lac with two known cells wins and we use both their # positions to calculate the final result. result = self._make_query(data={ "cell": [ dict(radio="gsm", cid=4, **key), dict(radio="gsm", cid=9, **key), dict(radio="gsm", cid=4, **key2), dict(radio="gsm", cid=5, **key2), ] }) self.assertEqual(result, {'lat': expected_lac.lat, 'lon': expected_lac.lon, 'accuracy': expected_lac.range})
def import_stations(session, pipe, filename, fields): today = util.utcnow().date() def commit_batch(ins, rows, commit=True): result = session.execute(ins, rows) count = result.rowcount # apply trick to avoid querying for existing rows, # MySQL claims 1 row for an inserted row, 2 for an updated row inserted_rows = 2 * len(rows) - count changed_rows = count - len(rows) assert inserted_rows + changed_rows == len(rows) StatCounter(StatKey.unique_ocid_cell, today).incr(pipe, inserted_rows) if commit: session.commit() else: # pragma: no cover session.flush() with GzipFile(filename, 'rb') as zip_file: csv_reader = csv.DictReader(zip_file, fields) batch = 10000 rows = [] area_keys = set() ins = OCIDCell.__table__.insert( on_duplicate=(( 'changeable = values(changeable), ' 'modified = values(modified), ' 'total_measures = values(total_measures), ' 'lat = values(lat), ' 'lon = values(lon), ' 'psc = values(psc), ' '`range` = values(`range`)'))) for row in csv_reader: # skip any header row if csv_reader.line_num == 1 and \ 'radio' in row.values(): # pragma: no cover continue data = make_ocid_cell_import_dict(row) if data is not None: rows.append(data) area_keys.add(CellArea.to_hashkey(data)) if len(rows) == batch: # pragma: no cover commit_batch(ins, rows, commit=False) rows = [] if rows: commit_batch(ins, rows) for area_key in area_keys: update_area.delay(area_key, cell_type='ocid')
def test_cell_multiple_lac_lower_range_wins(self): session = self.db_slave_session lat = PARIS_LAT lon = PARIS_LON gsm = RADIO_TYPE['gsm'] key = dict(mcc=FRANCE_MCC, mnc=2, lac=3) key2 = dict(mcc=FRANCE_MCC, mnc=2, lac=4) expected_lac = CellArea( lat=lat + 0.2, lon=lon + 0.2, radio=gsm, range=10000, **key) data = [ Cell(lat=lat + 0.02, lon=lon + 0.02, radio=gsm, cid=4, range=2000, **key2), Cell(lat=lat + 0.2, lon=lon + 0.4, radio=gsm, cid=4, range=4000, **key), CellArea(lat=lat, lon=lon, radio=gsm, range=20000, **key2), expected_lac, ] session.add_all(data) session.flush() # We have two lacs with each one known cell. # The lac with the smallest cell wins. result = self._make_query(data={ "cell": [ dict(radio="gsm", cid=4, **key), dict(radio="gsm", cid=4, **key2), ] }) self.assertEqual(result, {'lat': expected_lac.lat, 'lon': expected_lac.lon, 'accuracy': LAC_MIN_ACCURACY})
def test_new(self): cell = CellFactory() self.session.flush() area_key = CellArea.to_hashkey(cell) self.area_queue.enqueue([area_key]) self.assertEqual(scan_areas.delay().get(), 1) area = self.session.query(CellArea).one() self.assertAlmostEqual(area.lat, cell.lat) self.assertAlmostEqual(area.lon, cell.lon) self.assertEqual(area.range, 0) self.assertEqual(area.num_cells, 1) self.assertEqual(area.avg_cell_range, cell.range)
def __call__(self, cell_keys): cells_removed = 0 changed_areas = set() area_queue = self.task.app.data_queues['update_cellarea'] for key in cell_keys: query = Cell.querykey(self.session, key) cells_removed += query.delete() changed_areas.add(CellArea.to_hashkey(key)) if changed_areas: area_queue.enqueue(changed_areas, pipe=self.pipe) return cells_removed
def remove(self, cell_keys): cells_removed = 0 changed_areas = set() for key in cell_keys: query = Cell.querykey(self.session, key) cells_removed += query.delete() changed_areas.add(CellArea.to_hashkey(key)) if changed_areas: self.session.on_post_commit(enqueue_areas, self.redis_client, changed_areas, UPDATE_KEY['cell_lac']) return cells_removed
def test_cell_multiple_radio_lac_hit_with_min_lac_accuracy(self): session = self.db_slave_session lat = PARIS_LAT lon = PARIS_LON gsm = RADIO_TYPE['gsm'] lte = RADIO_TYPE['lte'] key = dict(mcc=FRANCE_MCC, mnc=3, lac=4) key2 = dict(mcc=FRANCE_MCC, mnc=2, lac=3) expected_lac = CellArea( lat=lat + 0.2, lon=lon + 0.2, radio=gsm, range=3000, **key) data = [ Cell(lat=lat + 0.01, lon=lon + 0.02, radio=lte, cid=4, range=2000, **key2), Cell(lat=lat + 0.2, lon=lon + 0.4, radio=gsm, cid=5, range=500, **key), CellArea(lat=lat, lon=lon, radio=lte, range=10000, **key2), expected_lac, ] session.add_all(data) session.flush() # GSM lac-only hit (cid 9 instead of 5) and a LTE cell hit result = self._make_query(data={ "cell": [ dict(radio="gsm", cid=9, **key), dict(radio="lte", cid=4, **key2), ] }) self.assertEqual(result, {'lat': expected_lac.lat, 'lon': expected_lac.lon, 'accuracy': LAC_MIN_ACCURACY})
def remove(self, cell_keys): cells_removed = 0 changed_areas = set() for key in cell_keys: query = Cell.querykey(self.session, key) cells_removed += query.delete() changed_areas.add(CellArea.to_hashkey(key)) if changed_areas: redis_key = self.task.app.data_queues['cell_area_update'] self.session.on_post_commit(enqueue_areas, self.redis_client, changed_areas, redis_key) return cells_removed
def remove_cell(self, cell_keys): cells_removed = 0 redis_client = self.app.redis_client with self.db_session() as session: changed_lacs = set() for k in cell_keys: key = Cell.to_hashkey(k) query = session.query(Cell).filter(*Cell.joinkey(key)) cells_removed += query.delete() changed_lacs.add(CellArea.to_hashkey(key)) if changed_lacs: session.on_post_commit(enqueue_lacs, redis_client, changed_lacs, UPDATE_KEY['cell_lac']) session.commit() return cells_removed
def test_geoip_mcc_mismatch(self): session = self.db_slave_session gsm = RADIO_TYPE['gsm'] bhutan = self.geoip_data['Bhutan'] key = {'mcc': USA_MCC, 'mnc': 1, 'lac': 1, 'cid': 1} key2 = {'mcc': USA_MCC, 'mnc': 1, 'lac': 1, } session.add(Cell(radio=gsm, lat=FREMONT_LAT, lon=FREMONT_LON, range=1000, **key)) session.add(CellArea(radio=gsm, lat=FREMONT_LAT, lon=FREMONT_LON, range=10000, **key2)) session.flush() result = self._make_query(data={'cell': [dict(radio='gsm', **key)]}, client_addr=bhutan['ip']) self.assertEqual(result, {'lat': FREMONT_LAT, 'lon': FREMONT_LON, 'accuracy': CELL_MIN_ACCURACY})
def remove(self, cell_keys): cells_removed = 0 changed_areas = set() for key in cell_keys: query = Cell.querykey(self.session, key) cells_removed += query.delete() changed_areas.add(CellArea.to_hashkey(key)) if changed_areas: redis_key = self.task.app.data_queues['cell_area_update'] self.session.on_post_commit( enqueue_areas, self.redis_client, changed_areas, redis_key) return cells_removed
def scan_lacs(self, batch=100): """ Find cell LACs that have changed and update the bounding box. This includes adding new LAC entries and removing them. """ redis_client = self.app.redis_client redis_lacs = dequeue_lacs(redis_client, UPDATE_KEY['cell_lac'], batch=batch) lacs = set([CellArea.to_hashkey(lac) for lac in redis_lacs]) for lac in lacs: update_lac.delay(lac.radio, lac.mcc, lac.mnc, lac.lac, cell_model_key='cell', cell_area_model_key='cell_area') return len(lacs)
def import_stations(session, filename, fields): with GzipFile(filename, 'rb') as zip_file: csv_reader = csv.DictReader(zip_file, fields) batch = 10000 rows = [] lacs = set() ins = OCIDCell.__table__.insert( on_duplicate=(('changeable = values(changeable), ' 'modified = values(modified), ' 'total_measures = values(total_measures), ' 'lat = values(lat), ' 'lon = values(lon), ' 'psc = values(psc), ' '`range` = values(`range`)'))) for row in csv_reader: # skip any header row if csv_reader.line_num == 1 and \ 'radio' in row.values(): # pragma: no cover continue data = make_ocid_cell_import_dict(row) if data is not None: rows.append(data) lacs.add(CellArea.to_hashkey(data)) if len(rows) == batch: # pragma: no cover session.execute(ins, rows) session.commit() rows = [] if rows: session.execute(ins, rows) session.commit() for lac in lacs: update_lac.delay(lac.radio, lac.mcc, lac.mnc, lac.lac, cell_model_key='ocid_cell', cell_area_model_key='ocid_cell_area')
def location_update_cell(self, min_new=10, max_new=100, batch=10): cells = [] redis_client = self.app.redis_client with self.db_session() as session: emit_new_observation_metric(self.stats_client, session, self.shortname, Cell, min_new, max_new) query = (session.query(Cell).filter( Cell.new_measures >= min_new).filter( Cell.new_measures < max_new).limit(batch)) cells = query.all() if not cells: return 0 moving_cells = set() updated_lacs = set() for cell in cells: query = session.query( CellObservation.lat, CellObservation.lon, CellObservation.id).filter(*CellObservation.joinkey(cell)) # only take the last X new_measures query = query.order_by(CellObservation.created.desc()).limit( cell.new_measures) observations = query.all() if observations: moving = calculate_new_position(cell, observations, CELL_MAX_DIST_KM) if moving: moving_cells.add(cell) updated_lacs.add(CellArea.to_hashkey(cell)) if updated_lacs: session.on_post_commit(enqueue_lacs, redis_client, updated_lacs, UPDATE_KEY['cell_lac']) if moving_cells: # some cells found to be moving too much blacklist_and_remove_moving_cells(session, moving_cells) session.commit() return (len(cells), len(moving_cells))
def test_lac_miss(self): session = self.db_slave_session key = dict(mcc=FRANCE_MCC, mnc=2, lac=3) lat = PARIS_LAT lon = PARIS_LON gsm = RADIO_TYPE['gsm'] data = [ Cell(lat=lat, lon=lon, radio=gsm, cid=4, **key), Cell(lat=lat + 0.002, lon=lon + 0.004, radio=gsm, cid=5, **key), Cell(lat=1.006, lon=1.006, radio=gsm, cid=6, **key), CellArea(lat=1.0026666, lon=1.0033333, radio=gsm, range=50000, **key), ] session.add_all(data) session.flush() result = self._make_query( data={"cell": [dict(radio="gsm", mcc=FRANCE_MCC, mnc=2, lac=4, cid=5)]}) self.assertTrue(result is None)
def test_wifi_agrees_with_cell_and_lac(self): # This test checks that when a wifi is at a lat/lon that # is inside its enclosing LAC and cell, we accept it and # tighten our accuracy accordingly. session = self.db_slave_session key = dict(mcc=BRAZIL_MCC, mnc=VIVO_MNC, lac=12345) wifi1 = dict(key="1234567890ab") wifi2 = dict(key="1234890ab567") wifi3 = dict(key="4321890ab567") lat = SAO_PAULO_LAT + 0.002 lon = SAO_PAULO_LON + 0.002 data = [ Wifi(lat=lat, lon=lon, **wifi1), Wifi(lat=lat, lon=lon, **wifi2), Wifi(lat=lat, lon=lon, **wifi3), Cell(lat=SAO_PAULO_LAT, lon=SAO_PAULO_LON, range=1000, radio=RADIO_TYPE['gsm'], cid=6789, **key), CellArea(lat=SAO_PAULO_LAT, lon=SAO_PAULO_LON, range=10000, radio=RADIO_TYPE['gsm'], **key), ] session.add_all(data) session.flush() result = self._make_query(data={ "cell": [dict(radio="gsm", cid=6789, **key)], "wifi": [wifi1, wifi2, wifi3]}) self.assertEqual(result, {'lat': SAO_PAULO_LAT + 0.002, 'lon': SAO_PAULO_LON + 0.002, 'accuracy': WIFI_MIN_ACCURACY}) self.check_stats( counter=[ ('m.wifi_hit', 1), ] )
def test_wifi_disagrees_with_lac(self): # This test checks that when a wifi is at a lat/lon that # is not in the LAC associated with our query, we drop back # to the LAC. session = self.db_slave_session key = dict(mcc=BRAZIL_MCC, mnc=VIVO_MNC, lac=12345) wifi1 = dict(key="1234567890ab") wifi2 = dict(key="1234890ab567") wifi3 = dict(key="4321890ab567") lat = PORTO_ALEGRE_LAT lon = PORTO_ALEGRE_LON data = [ Wifi(lat=lat, lon=lon, **wifi1), Wifi(lat=lat, lon=lon, **wifi2), Wifi(lat=lat, lon=lon, **wifi3), CellArea(lat=SAO_PAULO_LAT, lon=SAO_PAULO_LON, range=10000, radio=RADIO_TYPE['gsm'], **key), ] session.add_all(data) session.flush() result = self._make_query(data={ "cell": [dict(radio="gsm", cid=6789, **key)], "wifi": [wifi1, wifi2, wifi3], }) self.assertEqual(result, {'lat': SAO_PAULO_LAT, 'lon': SAO_PAULO_LON, 'accuracy': LAC_MIN_ACCURACY}) self.check_stats( counter=[ ('m.wifi_hit', 0), ('m.cell_lac_hit', 1), ] )
def import_stations(session, filename, fields): with GzipFile(filename, 'rb') as zip_file: csv_reader = csv.DictReader(zip_file, fields) batch = 10000 rows = [] area_keys = set() ins = OCIDCell.__table__.insert( on_duplicate=(( 'changeable = values(changeable), ' 'modified = values(modified), ' 'total_measures = values(total_measures), ' 'lat = values(lat), ' 'lon = values(lon), ' 'psc = values(psc), ' '`range` = values(`range`)'))) for row in csv_reader: # skip any header row if csv_reader.line_num == 1 and \ 'radio' in row.values(): # pragma: no cover continue data = make_ocid_cell_import_dict(row) if data is not None: rows.append(data) area_keys.add(CellArea.to_hashkey(data)) if len(rows) == batch: # pragma: no cover session.execute(ins, rows) session.commit() rows = [] if rows: session.execute(ins, rows) session.commit() for area_key in area_keys: update_area.delay(area_key, cell_type='ocid')
def add_area_update(self, station): self.updated_areas.add(CellArea.to_hashkey(station))
def add_area_update(self, station_key): area_key = CellArea.to_hashkey(station_key) self.updated_areas.add(area_key)