def test_empty_radio_skipped(self, session, redis_client, cellarea_queue): """ A empty string for the radio type causes the row to be skipped. The public CSV export encodes an unexpected radio type from the database as an empty string. We can't determine what radio type was expected. """ # In row 3, the radio is an empty string csv = StringIO( """\ radio,mcc,net,area,cell,unit,lon,lat,range,samples,changeable,created,updated,averageSignal UMTS,202,1,2120,12842,,23.4123167,38.8574351,0,6,1,1568220564,1570120316, ,203,1,2120,12842,,23.4123167,38.8574351,0,6,1,1568220564,1570120316, GSM,208,10,30014,20669,,2.5112670,46.5992450,0,78,1,1566307030,1570119413, """ ) read_stations_from_csv(session, csv, redis_client, cellarea_queue) # The empty radio row is skipped, but the following row is processed. umts = session.query(CellShard.shard_model(Radio.umts)).one() assert umts.lat == 38.8574351 assert umts.lon == 23.4123167 gsm_model = CellShard.shard_model(Radio.gsm) assert session.query(func.count(gsm_model.cellid)).scalar() == 1 assert session.query(func.count(CellArea.areaid)).scalar() == 2 assert session.query(func.count(RegionStat.region)).scalar() == 2
def test_outdated_station(self, session, redis_client, cellarea_queue): """An older statuon record does not update existing station records.""" station_data = { "radio": Radio.wcdma, "mcc": 202, "mnc": 1, "lac": 2120, "cid": 12842, "lat": 38.85, "lon": 23.41, "radius": 1, "samples": 1, "created": datetime(2019, 1, 1, tzinfo=UTC), "modified": datetime(2019, 10, 7, tzinfo=UTC), } station = CellShard.create(_raise_invalid=True, **station_data) session.add(station) session.flush() csv = StringIO("""\ radio,mcc,net,area,cell,unit,lon,lat,range,samples,changeable,created,updated,averageSignal UMTS,202,1,2120,12842,,23.4123167,38.8574351,0,6,1,1568220564,1570120316, """) read_stations_from_csv(session, csv, redis_client, cellarea_queue) # The existing station is unmodified wcdma = session.query(CellShard.shard_model(Radio.wcdma)).one() assert wcdma.lat == 38.85 assert wcdma.lon == 23.41 assert wcdma.created == datetime(2019, 1, 1, tzinfo=UTC) assert wcdma.modified == datetime(2019, 10, 7, tzinfo=UTC) # No CellAreas or RegionStats are generated assert session.query(func.count(CellArea.areaid)).scalar() == 0 assert session.query(func.count(RegionStat.region)).scalar() == 0
def main(argv, _db=None): parser = argparse.ArgumentParser( prog=argv[0], description=( "Import from public cell data into a local dev environment. " "See https://location.services.mozilla.com/downloads"), ) parser.add_argument("filename", help="Path to the csv.gz import file.") args = parser.parse_args(argv[1:]) if not settings("local_dev_env"): print("This script can only be run in a local dev environment.") print("Set LOCAL_DEV_ENV=True in your environment.") return 1 filename = os.path.abspath(os.path.expanduser(args.filename)) if not os.path.isfile(filename): print("File %s not found." % filename) return 1 configure_logging() celery_app = get_eager_celery_app() init_worker(celery_app) cellarea_queue = celery_app.data_queues["update_cellarea"] with db_worker_session(celery_app.db, commit=False) as session: with gzip_open(filename, "r") as file_handle: read_stations_from_csv(session, file_handle, celery_app.redis_client, cellarea_queue) return 0
def test_unexpected_csv(self, session, redis_client, cellarea_queue): """An unexpected CSV input exits early.""" csv = StringIO("""\ region,name US,United States UK,United Kingdom """) with pytest.raises(InvalidCSV): read_stations_from_csv(session, csv, redis_client, cellarea_queue)
def test_modified_station(self, session, redis_client, cellarea_queue): """A modified station updates existing records.""" station_data = { "radio": Radio.umts, "mcc": 202, "mnc": 1, "lac": 2120, "cid": 12842, "lat": 38.85, "lon": 23.41, "min_lat": 38.7, "max_lat": 38.9, "min_lon": 23.4, "max_lon": 23.5, "radius": 1, "samples": 1, "created": datetime(2019, 1, 1, tzinfo=UTC), "modified": datetime(2019, 1, 1, tzinfo=UTC), } station = CellShard.create(_raise_invalid=True, **station_data) session.add(station) session.flush() csv = StringIO( """\ radio,mcc,net,area,cell,unit,lon,lat,range,samples,changeable,created,updated,averageSignal UMTS,202,1,2120,12842,,23.4123167,38.8574351,0,6,1,1568220564,1570120316, """ ) read_stations_from_csv(session, csv, redis_client, cellarea_queue) # Check the details of the UMTS station umts = session.query(CellShard.shard_model(Radio.umts)).one() # New position, other details from import assert umts.lat == 38.8574351 assert umts.lon == 23.4123167 assert umts.radius == 0 assert umts.samples == 6 assert umts.created == datetime(2019, 9, 11, 16, 49, 24, tzinfo=UTC) assert umts.modified == datetime(2019, 10, 3, 16, 31, 56, tzinfo=UTC) # Other details unchanged assert umts.max_lat == station_data["max_lat"] assert umts.min_lat == station_data["min_lat"] assert umts.max_lon == station_data["max_lon"] assert umts.min_lon == station_data["min_lon"] assert umts.region == "GR" # A Modified station triggers the creation of a new CellArea cell_area = session.query(CellArea).order_by(CellArea.areaid).one() assert cell_area.areaid == (Radio.wcdma, 202, 1, 2120) # The new CellAreas triggers the creation of a RegionStat stat = session.query(RegionStat).order_by("region").one() assert stat.region == "GR" assert stat.wcdma == 1
def test_new_stations(self, session, redis_client, cellarea_queue): """New stations are imported, creating cell areas and region stats.""" csv = StringIO( """\ radio,mcc,net,area,cell,unit,lon,lat,range,samples,changeable,created,updated,averageSignal UMTS,202,1,2120,12842,,23.4123167,38.8574351,0,6,1,1568220564,1570120316, GSM,208,10,30014,20669,,2.5112670,46.5992450,0,78,1,1566307030,1570119413, LTE,202,1,2120,12842,,23.4123167,38.8574351,0,6,1,1568220588,1570120328, """ ) read_stations_from_csv(session, csv, redis_client, cellarea_queue) # Check the details of the UMTS station umts = session.query(CellShard.shard_model(Radio.umts)).one() assert umts.mcc == 202 assert umts.mnc == 1 assert umts.lac == 2120 assert umts.cid == 12842 assert umts.lat == 38.8574351 assert umts.lon == 23.4123167 assert umts.max_lat == umts.lat assert umts.min_lat == umts.lat assert umts.max_lon == umts.lon assert umts.min_lon == umts.lon assert umts.radius == 0 assert umts.samples == 6 assert umts.created == datetime(2019, 9, 11, 16, 49, 24, tzinfo=UTC) assert umts.modified == datetime(2019, 10, 3, 16, 31, 56, tzinfo=UTC) assert umts.region == "GR" # Check the counts of the other station types gsm_model = CellShard.shard_model(Radio.gsm) assert session.query(func.count(gsm_model.cellid)).scalar() == 1 lte_model = CellShard.shard_model(Radio.lte) assert session.query(func.count(lte_model.cellid)).scalar() == 1 # New stations trigger the creation of new CellAreas cell_areas = session.query(CellArea).order_by(CellArea.areaid).all() area1, area2, area3 = cell_areas assert area1.areaid == (Radio.gsm, 208, 10, 30014) assert area2.areaid == (Radio.wcdma, 202, 1, 2120) assert area3.areaid == (Radio.lte, 202, 1, 2120) # New CellAreas trigger the creation of RegionStats stats = session.query(RegionStat).order_by("region").all() assert len(stats) == 2 actual = [ (stat.region, stat.gsm, stat.wcdma, stat.lte, stat.blue, stat.wifi) for stat in stats ] expected = [("FR", 1, 0, 0, 0, 0), ("GR", 0, 1, 1, 0, 0)] assert actual == expected
def test_unexpected_radio_halts(self, session, redis_client, cellarea_queue): """ A row with an unexpected radio type halts processing of the CSV. The public CSV export is limited to a few types of radios, so an unexpected radio type suggests file corruption or other shenanigans. """ # In row 3, 'WCDMA' is not a valid radio string csv = StringIO("""\ radio,mcc,net,area,cell,unit,lon,lat,range,samples,changeable,created,updated,averageSignal UMTS,202,1,2120,12842,,23.4123167,38.8574351,0,6,1,1568220564,1570120316, WCDMA,203,1,2120,12842,,23.4123167,38.8574351,0,6,1,1568220564,1570120316, GSM,208,10,30014,20669,,2.5112670,46.5992450,0,78,1,1566307030,1570119413, """) with pytest.raises(InvalidCSV): read_stations_from_csv(session, csv, redis_client, cellarea_queue)
def test_bad_data_skipped(self, session, redis_client, cellarea_queue): """A row that has invalid data (like a string for a number) is skipped.""" # In GSM row, the mcc field should be a number, not a string csv = StringIO("""\ radio,mcc,net,area,cell,unit,lon,lat,range,samples,changeable,created,updated,averageSignal UMTS,202,1,2120,12842,,23.4123167,38.8574351,0,6,1,1568220564,1570120316, GSM,"MCC",10,30014,20669,,2.5112670,46.5992450,0,78,1,1566307030,1570119413, LTE,202,1,2120,12842,,23.4123167,38.8574351,0,6,1,1568220588,1570120328, """) read_stations_from_csv(session, csv, redis_client, cellarea_queue) # The invalid GSM row is skipped gsm_model = CellShard.shard_model(Radio.gsm) assert session.query(func.count(gsm_model.cellid)).scalar() == 0 # The valid WCDMA and LTE rows are processed, and in the same region wcdma_model = CellShard.shard_model(Radio.wcdma) lte_model = CellShard.shard_model(Radio.lte) assert session.query(func.count(wcdma_model.cellid)).scalar() == 1 assert session.query(func.count(lte_model.cellid)).scalar() == 1 assert session.query(func.count(CellArea.areaid)).scalar() == 2 assert session.query(func.count(RegionStat.region)).scalar() == 1
def test_invalid_row_skipped(self, session, redis_client, cellarea_queue): """A row that fails validation is skipped.""" # In GSM row, the longitude 202.5 is greater than max of 180 csv = StringIO( """\ radio,mcc,net,area,cell,unit,lon,lat,range,samples,changeable,created,updated,averageSignal UMTS,202,1,2120,12842,,23.4123167,38.8574351,0,6,1,1568220564,1570120316, GSM,208,10,30014,20669,,202.5,46.5992450,0,78,1,1566307030,1570119413, LTE,202,1,2120,12842,,23.4123167,38.8574351,0,6,1,1568220588,1570120328, """ ) read_stations_from_csv(session, csv, redis_client, cellarea_queue) # The invalid GSM row is skipped gsm_model = CellShard.shard_model(Radio.gsm) assert session.query(func.count(gsm_model.cellid)).scalar() == 0 # The valid UMTS and LTE rows are processed, and in the same region umts_model = CellShard.shard_model(Radio.umts) lte_model = CellShard.shard_model(Radio.lte) assert session.query(func.count(umts_model.cellid)).scalar() == 1 assert session.query(func.count(lte_model.cellid)).scalar() == 1 assert session.query(func.count(CellArea.areaid)).scalar() == 2 assert session.query(func.count(RegionStat.region)).scalar() == 1