def test_outdated_station(self, session, redis_client, cellarea_queue): """An older statuon record does not update existing station records.""" station_data = { "radio": Radio.wcdma, "mcc": 202, "mnc": 1, "lac": 2120, "cid": 12842, "lat": 38.85, "lon": 23.41, "radius": 1, "samples": 1, "created": datetime(2019, 1, 1, tzinfo=UTC), "modified": datetime(2019, 10, 7, tzinfo=UTC), } station = CellShard.create(_raise_invalid=True, **station_data) session.add(station) session.flush() csv = StringIO("""\ radio,mcc,net,area,cell,unit,lon,lat,range,samples,changeable,created,updated,averageSignal UMTS,202,1,2120,12842,,23.4123167,38.8574351,0,6,1,1568220564,1570120316, """) read_stations_from_csv(session, csv, redis_client, cellarea_queue) # The existing station is unmodified wcdma = session.query(CellShard.shard_model(Radio.wcdma)).one() assert wcdma.lat == 38.85 assert wcdma.lon == 23.41 assert wcdma.created == datetime(2019, 1, 1, tzinfo=UTC) assert wcdma.modified == datetime(2019, 10, 7, tzinfo=UTC) # No CellAreas or RegionStats are generated assert session.query(func.count(CellArea.areaid)).scalar() == 0 assert session.query(func.count(RegionStat.region)).scalar() == 0
def test_empty_radio_skipped(self, session, redis_client, cellarea_queue): """ A empty string for the radio type causes the row to be skipped. The public CSV export encodes an unexpected radio type from the database as an empty string. We can't determine what radio type was expected. """ # In row 3, the radio is an empty string csv = StringIO( """\ radio,mcc,net,area,cell,unit,lon,lat,range,samples,changeable,created,updated,averageSignal UMTS,202,1,2120,12842,,23.4123167,38.8574351,0,6,1,1568220564,1570120316, ,203,1,2120,12842,,23.4123167,38.8574351,0,6,1,1568220564,1570120316, GSM,208,10,30014,20669,,2.5112670,46.5992450,0,78,1,1566307030,1570119413, """ ) read_stations_from_csv(session, csv, redis_client, cellarea_queue) # The empty radio row is skipped, but the following row is processed. umts = session.query(CellShard.shard_model(Radio.umts)).one() assert umts.lat == 38.8574351 assert umts.lon == 23.4123167 gsm_model = CellShard.shard_model(Radio.gsm) assert session.query(func.count(gsm_model.cellid)).scalar() == 1 assert session.query(func.count(CellArea.areaid)).scalar() == 2 assert session.query(func.count(RegionStat.region)).scalar() == 2
def test_update(self): now = util.utcnow() invalid_key = dict(lac=None, cid=None) observations = [] def obs_factory(**kw): obs = CellObservationFactory.build(**kw) if obs is not None: observations.append(obs) cell1 = CellShardFactory(samples=3, weight=3.0) lat1, lon1 = (cell1.lat, cell1.lon) key1 = dict(radio=cell1.radio, lac=cell1.lac, cid=cell1.cid) obs_factory(lat=lat1, lon=lon1, created=now, **key1) obs_factory(lat=lat1 + 0.004, lon=lon1 + 0.006, created=now, **key1) obs_factory(lat=lat1 + 0.006, lon=lon1 + 0.009, created=now, **key1) # The lac, cid are invalid and should be skipped obs_factory(created=now, **invalid_key) obs_factory(created=now, **invalid_key) cell2 = CellShardFactory(lat=lat1 + 1.0, lon=lon1 + 1.0, samples=3, weight=3.0) lat2, lon2 = (cell2.lat, cell2.lon) key2 = dict(radio=cell2.radio, lac=cell2.lac, cid=cell2.cid) obs_factory(lat=lat2 + 0.001, lon=lon2 + 0.002, created=now, **key2) obs_factory(lat=lat2 + 0.003, lon=lon2 + 0.006, created=now, **key2) cell3 = CellShardFactory(samples=100000, weight=100000.0) lat3, lon3 = (cell3.lat, cell3.lon) key3 = dict(radio=cell3.radio, lac=cell3.lac, cid=cell3.cid) for i in range(10): obs_factory(lat=lat3 + 0.5, lon=lon3 + 0.5, **key3) self.session.commit() self._queue_and_update_cell(observations) shard = CellShard.shard_model(cell1.cellid) found = (self.session.query(shard).filter( shard.cellid == cell1.cellid)).one() self.assertAlmostEqual(found.lat, lat1 + 0.001667, 6) self.assertAlmostEqual(found.lon, lon1 + 0.0025, 6) shard = CellShard.shard_model(cell2.cellid) found = (self.session.query(shard).filter( shard.cellid == cell2.cellid)).one() self.assertAlmostEqual(found.lat, lat2 + 0.0008, 6) self.assertAlmostEqual(found.lon, lon2 + 0.0016, 6) shard = CellShard.shard_model(cell3.cellid) found = (self.session.query(shard).filter( shard.cellid == cell3.cellid)).one() expected_lat = ((lat3 * 10000) + (lat3 + 0.5) * 10) / 10010 expected_lon = ((lon3 * 10000) + (lon3 + 0.5) * 10) / 10010 self.assertAlmostEqual(found.lat, expected_lat, 7) self.assertAlmostEqual(found.lon, expected_lon, 7)
def test_update(self): now = util.utcnow() invalid_key = dict(lac=None, cid=None) observations = [] def obs_factory(**kw): obs = CellObservationFactory.build(**kw) if obs is not None: observations.append(obs) cell1 = CellShardFactory(samples=3, weight=3.0) lat1, lon1 = (cell1.lat, cell1.lon) key1 = dict(radio=cell1.radio, lac=cell1.lac, cid=cell1.cid) obs_factory(lat=lat1, lon=lon1, created=now, **key1) obs_factory(lat=lat1 + 0.004, lon=lon1 + 0.006, created=now, **key1) obs_factory(lat=lat1 + 0.006, lon=lon1 + 0.009, created=now, **key1) # The lac, cid are invalid and should be skipped obs_factory(created=now, **invalid_key) obs_factory(created=now, **invalid_key) cell2 = CellShardFactory( lat=lat1 + 1.0, lon=lon1 + 1.0, samples=3, weight=3.0) lat2, lon2 = (cell2.lat, cell2.lon) key2 = dict(radio=cell2.radio, lac=cell2.lac, cid=cell2.cid) obs_factory(lat=lat2 + 0.001, lon=lon2 + 0.002, created=now, **key2) obs_factory(lat=lat2 + 0.003, lon=lon2 + 0.006, created=now, **key2) cell3 = CellShardFactory(samples=100000, weight=100000.0) lat3, lon3 = (cell3.lat, cell3.lon) key3 = dict(radio=cell3.radio, lac=cell3.lac, cid=cell3.cid) for i in range(10): obs_factory(lat=lat3 + 0.5, lon=lon3 + 0.5, **key3) self.session.commit() self._queue_and_update_cell(observations) shard = CellShard.shard_model(cell1.cellid) found = (self.session.query(shard) .filter(shard.cellid == cell1.cellid)).one() self.assertAlmostEqual(found.lat, lat1 + 0.001667, 6) self.assertAlmostEqual(found.lon, lon1 + 0.0025, 6) shard = CellShard.shard_model(cell2.cellid) found = (self.session.query(shard) .filter(shard.cellid == cell2.cellid)).one() self.assertAlmostEqual(found.lat, lat2 + 0.0008, 6) self.assertAlmostEqual(found.lon, lon2 + 0.0016, 6) shard = CellShard.shard_model(cell3.cellid) found = (self.session.query(shard) .filter(shard.cellid == cell3.cellid)).one() expected_lat = ((lat3 * 10000) + (lat3 + 0.5) * 10) / 10010 expected_lon = ((lon3 * 10000) + (lon3 + 0.5) * 10) / 10010 self.assertAlmostEqual(found.lat, expected_lat, 7) self.assertAlmostEqual(found.lon, expected_lon, 7)
def test_modified_station(self, session, redis_client, cellarea_queue): """A modified station updates existing records.""" station_data = { "radio": Radio.umts, "mcc": 202, "mnc": 1, "lac": 2120, "cid": 12842, "lat": 38.85, "lon": 23.41, "min_lat": 38.7, "max_lat": 38.9, "min_lon": 23.4, "max_lon": 23.5, "radius": 1, "samples": 1, "created": datetime(2019, 1, 1, tzinfo=UTC), "modified": datetime(2019, 1, 1, tzinfo=UTC), } station = CellShard.create(_raise_invalid=True, **station_data) session.add(station) session.flush() csv = StringIO( """\ radio,mcc,net,area,cell,unit,lon,lat,range,samples,changeable,created,updated,averageSignal UMTS,202,1,2120,12842,,23.4123167,38.8574351,0,6,1,1568220564,1570120316, """ ) read_stations_from_csv(session, csv, redis_client, cellarea_queue) # Check the details of the UMTS station umts = session.query(CellShard.shard_model(Radio.umts)).one() # New position, other details from import assert umts.lat == 38.8574351 assert umts.lon == 23.4123167 assert umts.radius == 0 assert umts.samples == 6 assert umts.created == datetime(2019, 9, 11, 16, 49, 24, tzinfo=UTC) assert umts.modified == datetime(2019, 10, 3, 16, 31, 56, tzinfo=UTC) # Other details unchanged assert umts.max_lat == station_data["max_lat"] assert umts.min_lat == station_data["min_lat"] assert umts.max_lon == station_data["max_lon"] assert umts.min_lon == station_data["min_lon"] assert umts.region == "GR" # A Modified station triggers the creation of a new CellArea cell_area = session.query(CellArea).order_by(CellArea.areaid).one() assert cell_area.areaid == (Radio.wcdma, 202, 1, 2120) # The new CellAreas triggers the creation of a RegionStat stat = session.query(RegionStat).order_by("region").one() assert stat.region == "GR" assert stat.wcdma == 1
def test_new_stations(self, session, redis_client, cellarea_queue): """New stations are imported, creating cell areas and region stats.""" csv = StringIO( """\ radio,mcc,net,area,cell,unit,lon,lat,range,samples,changeable,created,updated,averageSignal UMTS,202,1,2120,12842,,23.4123167,38.8574351,0,6,1,1568220564,1570120316, GSM,208,10,30014,20669,,2.5112670,46.5992450,0,78,1,1566307030,1570119413, LTE,202,1,2120,12842,,23.4123167,38.8574351,0,6,1,1568220588,1570120328, """ ) read_stations_from_csv(session, csv, redis_client, cellarea_queue) # Check the details of the UMTS station umts = session.query(CellShard.shard_model(Radio.umts)).one() assert umts.mcc == 202 assert umts.mnc == 1 assert umts.lac == 2120 assert umts.cid == 12842 assert umts.lat == 38.8574351 assert umts.lon == 23.4123167 assert umts.max_lat == umts.lat assert umts.min_lat == umts.lat assert umts.max_lon == umts.lon assert umts.min_lon == umts.lon assert umts.radius == 0 assert umts.samples == 6 assert umts.created == datetime(2019, 9, 11, 16, 49, 24, tzinfo=UTC) assert umts.modified == datetime(2019, 10, 3, 16, 31, 56, tzinfo=UTC) assert umts.region == "GR" # Check the counts of the other station types gsm_model = CellShard.shard_model(Radio.gsm) assert session.query(func.count(gsm_model.cellid)).scalar() == 1 lte_model = CellShard.shard_model(Radio.lte) assert session.query(func.count(lte_model.cellid)).scalar() == 1 # New stations trigger the creation of new CellAreas cell_areas = session.query(CellArea).order_by(CellArea.areaid).all() area1, area2, area3 = cell_areas assert area1.areaid == (Radio.gsm, 208, 10, 30014) assert area2.areaid == (Radio.wcdma, 202, 1, 2120) assert area3.areaid == (Radio.lte, 202, 1, 2120) # New CellAreas trigger the creation of RegionStats stats = session.query(RegionStat).order_by("region").all() assert len(stats) == 2 actual = [ (stat.region, stat.gsm, stat.wcdma, stat.lte, stat.blue, stat.wifi) for stat in stats ] expected = [("FR", 1, 0, 0, 0, 0), ("GR", 0, 1, 1, 0, 0)] assert actual == expected
def test_weighted_update(self): cell = CellShardFactory(radio=Radio.gsm, samples=1, weight=2.0) cell_lat = cell.lat cell_lon = cell.lon cell_key = dict(radio=cell.radio, mcc=cell.mcc, mnc=cell.mnc, lac=cell.lac, cid=cell.cid) obs_factory = CellObservationFactory obs = [ obs_factory(lat=cell.lat, lon=cell.lon - 0.002, accuracy=20.0, signal=-51, **cell_key), obs_factory(lat=cell.lat, signal=-111, lon=cell.lon - 0.004, accuracy=40.0, **cell_key), ] self.session.commit() self._queue_and_update_cell(obs) shard = CellShard.shard_model(cell.cellid) cells = self.session.query(shard).all() self.assertEqual(len(cells), 1) cell = cells[0] self.assertAlmostEqual(cell.lat, cell_lat) self.assertAlmostEqual(cell.max_lat, cell_lat) self.assertAlmostEqual(cell.min_lat, cell_lat) self.assertAlmostEqual(cell.lon, cell_lon - 0.0016358, 7) self.assertAlmostEqual(cell.max_lon, cell_lon) self.assertAlmostEqual(cell.min_lon, cell_lon - 0.004) self.assertEqual(cell.radius, 164) self.assertEqual(cell.samples, 3) self.assertAlmostEqual(cell.weight, 9.47, 2)
def test_max_min_radius_update(self): cell = CellShardFactory(radius=150, samples=3, weight=3.0) cell_lat = cell.lat cell_lon = cell.lon cell.max_lat = cell.lat + 0.001 cell.min_lat = cell.lat - 0.001 cell.max_lon = cell.lon + 0.001 cell.min_lon = cell.lon - 0.001 k1 = dict(radio=cell.radio, mcc=cell.mcc, mnc=cell.mnc, lac=cell.lac, cid=cell.cid) obs_factory = CellObservationFactory obs = [ obs_factory(lat=cell.lat, lon=cell.lon - 0.002, **k1), obs_factory(lat=cell.lat + 0.004, lon=cell.lon - 0.006, **k1), ] self.session.commit() self._queue_and_update_cell(obs) shard = CellShard.shard_model(cell.cellid) cells = self.session.query(shard).all() self.assertEqual(len(cells), 1) cell = cells[0] self.assertAlmostEqual(cell.lat, cell_lat + 0.0008) self.assertAlmostEqual(cell.max_lat, cell_lat + 0.004) self.assertAlmostEqual(cell.min_lat, cell_lat - 0.001) self.assertAlmostEqual(cell.lon, cell_lon - 0.0016) self.assertAlmostEqual(cell.max_lon, cell_lon + 0.001) self.assertAlmostEqual(cell.min_lon, cell_lon - 0.006) self.assertEqual(cell.radius, 468) self.assertEqual(cell.samples, 5) self.assertAlmostEqual(cell.weight, 5.0, 2)
def test_blocklist(self): now = util.utcnow() today = now.date() observations = CellObservationFactory.build_batch(3) obs = observations[0] CellShardFactory( radio=obs.radio, mcc=obs.mcc, mnc=obs.mnc, lac=obs.lac, cid=obs.cid, created=now, block_first=today - timedelta(days=10), block_last=today, block_count=1, ) self.session.commit() self._queue_and_update(observations) blocks = [] for obs in observations: shard = CellShard.shard_model(obs.cellid) cell = (self.session.query(shard).filter( shard.cellid == obs.cellid)).one() if cell.blocked(): blocks.append(cell) self.assertEqual(len(blocks), 1) self.check_statcounter(StatKey.cell, 2) self.check_statcounter(StatKey.unique_cell, 2)
def test_blocklist(self): now = util.utcnow() today = now.date() observations = CellObservationFactory.build_batch(3) obs = observations[0] CellShardFactory( radio=obs.radio, mcc=obs.mcc, mnc=obs.mnc, lac=obs.lac, cid=obs.cid, created=now, block_first=today - timedelta(days=10), block_last=today, block_count=1, ) self.session.commit() self._queue_and_update_cell(observations) blocks = [] for obs in observations: shard = CellShard.shard_model(obs.cellid) cell = (self.session.query(shard) .filter(shard.cellid == obs.cellid)).one() if cell.blocked(): blocks.append(cell) self.assertEqual(len(blocks), 1) self.check_statcounter(StatKey.cell, 2) self.check_statcounter(StatKey.unique_cell, 2)
def configure_data(redis_client): """ Configure fixed set of data queues. """ data_queues = { 'update_cell': DataQueue('update_cell', redis_client, queue_key='update_cell'), # BBB 'update_cellarea': DataQueue('update_cellarea', redis_client, queue_key='update_cellarea'), 'update_cellarea_ocid': DataQueue('update_cellarea_ocid', redis_client, queue_key='update_cellarea_ocid'), 'update_score': DataQueue('update_score', redis_client, queue_key='update_score'), } for shard_id in DataMap.shards().keys(): name = 'update_datamap_' + shard_id data_queues[name] = DataQueue(name, redis_client, queue_key=name) for shard_id in CellShard.shards().keys(): name = 'update_cell_' + shard_id data_queues[name] = DataQueue( name, redis_client, queue_key=name) for shard_id in WifiShard.shards().keys(): name = 'update_wifi_' + shard_id data_queues[name] = DataQueue( name, redis_client, queue_key=name) return data_queues
def test_max_min_radius_update(self): cell = CellShardFactory(radius=150, samples=3) cell_lat = cell.lat cell_lon = cell.lon cell.max_lat = cell.lat + 0.001 cell.min_lat = cell.lat - 0.001 cell.max_lon = cell.lon + 0.001 cell.min_lon = cell.lon - 0.001 k1 = dict(radio=cell.radio, mcc=cell.mcc, mnc=cell.mnc, lac=cell.lac, cid=cell.cid) obs_factory = CellObservationFactory obs = [ obs_factory(lat=cell.lat, lon=cell.lon - 0.002, **k1), obs_factory(lat=cell.lat + 0.004, lon=cell.lon - 0.006, **k1), ] self.session.commit() self._queue_and_update(obs) shard = CellShard.shard_model(cell.cellid) cells = self.session.query(shard).all() self.assertEqual(len(cells), 1) cell = cells[0] self.assertAlmostEqual(cell.lat, cell_lat + 0.0008) self.assertAlmostEqual(cell.max_lat, cell_lat + 0.004) self.assertAlmostEqual(cell.min_lat, cell_lat - 0.001) self.assertAlmostEqual(cell.lon, cell_lon - 0.0016) self.assertAlmostEqual(cell.max_lon, cell_lon + 0.001) self.assertAlmostEqual(cell.min_lon, cell_lon - 0.006) self.assertEqual(cell.radius, 468) self.assertEqual(cell.samples, 5)
def query_cells(query, lookups, model, raven_client): # Given a location query and a list of lookup instances, query the # database and return a list of model objects. cellids = [lookup.cellid for lookup in lookups] if not cellids: # pragma: no cover return [] # load all fields used in score calculation and those we # need for the position load_fields = ('lat', 'lon', 'radius', 'created', 'modified', 'samples') today = util.utcnow().date() temp_blocked = today - TEMPORARY_BLOCKLIST_DURATION if model == CellOCID: # non sharded OCID table return query_cell_table(query.session, model, cellids, temp_blocked, load_fields, raven_client) result = [] shards = defaultdict(list) for lookup in lookups: shards[CellShard.shard_model(lookup.radio)].append(lookup.cellid) for shard, shard_cellids in shards.items(): result.extend( query_cell_table(query.session, shard, shard_cellids, temp_blocked, load_fields, raven_client)) return result
def configure_data(redis_client): """ Configure fixed set of data queues. """ data_queues = { 'update_cell': DataQueue('update_cell', redis_client, queue_key='update_cell'), # BBB 'update_cellarea': DataQueue('update_cellarea', redis_client, queue_key='update_cellarea'), 'update_cellarea_ocid': DataQueue('update_cellarea_ocid', redis_client, queue_key='update_cellarea_ocid'), 'update_score': DataQueue('update_score', redis_client, queue_key='update_score'), } for shard_id in DataMap.shards().keys(): name = 'update_datamap_' + shard_id data_queues[name] = DataQueue(name, redis_client, queue_key=name) for shard_id in CellShard.shards().keys(): name = 'update_cell_' + shard_id data_queues[name] = DataQueue(name, redis_client, queue_key=name) for shard_id in WifiShard.shards().keys(): name = 'update_wifi_' + shard_id data_queues[name] = DataQueue(name, redis_client, queue_key=name) return data_queues
def query_cells(query, lookups, model, raven_client): # Given a location query and a list of lookup instances, query the # database and return a list of model objects. cellids = [lookup.cellid for lookup in lookups] if not cellids: # pragma: no cover return [] # load all fields used in score calculation and those we # need for the position load_fields = ('lat', 'lon', 'radius', 'region', 'samples', 'created', 'modified', 'last_seen', 'block_last') today = util.utcnow().date() temp_blocked = today - TEMPORARY_BLOCKLIST_DURATION if model == CellOCID: # non sharded OCID table return query_cell_table(query.session, model, cellids, temp_blocked, load_fields, raven_client) result = [] shards = defaultdict(list) for lookup in lookups: shards[CellShard.shard_model(lookup.radio)].append(lookup.cellid) for shard, shard_cellids in shards.items(): result.extend( query_cell_table(query.session, shard, shard_cellids, temp_blocked, load_fields, raven_client)) return result
def configure_data(redis_client): """ Configure fixed set of data queues. """ data_queues = { # *_incoming need to be the exact same as in webapp.config 'update_incoming': DataQueue('update_incoming', redis_client, batch=100, compress=True), 'transfer_incoming': DataQueue('transfer_incoming', redis_client, batch=100, compress=True), } for key in ('update_cellarea', ): data_queues[key] = DataQueue(key, redis_client, batch=100, json=False) for shard_id in BlueShard.shards().keys(): key = 'update_blue_' + shard_id data_queues[key] = DataQueue(key, redis_client, batch=500) for shard_id in DataMap.shards().keys(): key = 'update_datamap_' + shard_id data_queues[key] = DataQueue(key, redis_client, batch=500, json=False) for shard_id in CellShard.shards().keys(): key = 'update_cell_' + shard_id data_queues[key] = DataQueue(key, redis_client, batch=500) for shard_id in WifiShard.shards().keys(): key = 'update_wifi_' + shard_id data_queues[key] = DataQueue(key, redis_client, batch=500) return data_queues
def test_lock_timeout(self, celery, db_rw_drop_table, redis, ro_session, session, stats): obs = CellObservationFactory.build() cell = CellShardFactory.build( radio=obs.radio, mcc=obs.mcc, mnc=obs.mnc, lac=obs.lac, cid=obs.cid, samples=10, ) ro_session.add(cell) ro_session.flush() orig_add_area = CellUpdater.add_area_update orig_wait = CellUpdater._retry_wait num = [0] def mock_area(self, updated_areas, key, num=num, ro_session=ro_session): orig_add_area(self, updated_areas, key) num[0] += 1 if num[0] == 2: ro_session.rollback() try: CellUpdater._retry_wait = 0.0001 session.execute('set session innodb_lock_wait_timeout = 1') with mock.patch.object(CellUpdater, 'add_area_update', mock_area): self.queue_and_update(celery, [obs]) # the inner task logic was called exactly twice assert num[0] == 2 shard = CellShard.shard_model(obs.cellid) cells = session.query(shard).all() assert len(cells) == 1 assert cells[0].samples == 1 self.check_statcounter(redis, StatKey.cell, 1) self.check_statcounter(redis, StatKey.unique_cell, 1) stats.check( counter=[('data.observation.insert', 1, ['type:cell'])], timer=[('task', 1, ['task:data.update_cell'])], ) finally: CellUpdater._retry_wait = orig_wait for model in CellShard.shards().values(): session.execute(text( 'drop table %s;' % model.__tablename__))
def _update_all(self): schedule_export_reports.delay().get() for shard_id in CellShard.shards().keys(): update_cell.delay(shard_id=shard_id).get() for shard_id in WifiShard.shards().keys(): update_wifi.delay(shard_id=shard_id).get()
def _queue_and_update_cell(self, obs): sharded_obs = defaultdict(list) for ob in obs: sharded_obs[CellShard.shard_id(ob.cellid)].append(ob) for shard_id, values in sharded_obs.items(): queue = self.celery_app.data_queues['update_cell_' + shard_id] queue.enqueue([value.to_json() for value in values]) update_cell.delay(shard_id=shard_id).get()
def _queue_and_update(self, obs): sharded_obs = defaultdict(list) for ob in obs: sharded_obs[CellShard.shard_id(ob.cellid)].append(ob) for shard_id, values in sharded_obs.items(): queue = self.celery_app.data_queues['update_cell_' + shard_id] queue.enqueue(values) update_cell.delay(shard_id=shard_id).get()
def test_import_local_cell(self): self.import_csv(cell_type='cell') cells = self.session.query(CellShard.shards()['wcdma']).all() self.assertEqual(len(cells), 9) areaids = set([cell.areaid for cell in cells]) self.assertEqual(self.session.query(CellArea).count(), len(areaids)) update_statcounter.delay(ago=0).get() self.check_stat(StatKey.unique_cell, 9)
def test_lock_timeout(self, celery, redis, session, session2, metricsmock, restore_db): obs = CellObservationFactory.build() cell = CellShardFactory.build( radio=obs.radio, mcc=obs.mcc, mnc=obs.mnc, lac=obs.lac, cid=obs.cid, samples=10, ) session2.add(cell) session2.flush() orig_add_area = CellUpdater.add_area_update orig_wait = CellUpdater._retry_wait num = [0] def mock_area(self, updated_areas, key, num=num, session2=session2): orig_add_area(self, updated_areas, key) num[0] += 1 if num[0] == 2: session2.rollback() try: CellUpdater._retry_wait = 0.0001 session.execute("set session innodb_lock_wait_timeout = 1") with mock.patch.object(CellUpdater, "add_area_update", mock_area): self.queue_and_update(celery, [obs]) # the inner task logic was called exactly twice assert num[0] == 2 shard = CellShard.shard_model(obs.cellid) cells = session.query(shard).all() assert len(cells) == 1 assert cells[0].samples == 1 self.check_statcounter(redis, StatKey.cell, 1) self.check_statcounter(redis, StatKey.unique_cell, 1) # Assert generated metrics are correct assert (len( metricsmock.filter_records("incr", "data.observation.insert", value=1, tags=["type:cell"])) == 1) assert (len( metricsmock.filter_records("timing", "task", tags=["task:data.update_cell" ])) == 1) finally: CellUpdater._retry_wait = orig_wait session.execute(text("drop table %s;" % cell.__tablename__))
def _update_all(self): update_incoming.delay().get() for shard_id in BlueShard.shards().keys(): update_blue.delay(shard_id=shard_id).get() for shard_id in CellShard.shards().keys(): update_cell.delay(shard_id=shard_id).get() for shard_id in WifiShard.shards().keys(): update_wifi.delay(shard_id=shard_id).get()
def test_import_local_cell(self): self.import_csv(cell_type='cell') cells = self.session.query(CellShard.shards()['wcdma']).all() self.assertEqual(len(cells), 9) areaids = set([cell.areaid for cell in cells]) self.assertEqual( self.session.query(CellArea).count(), len(areaids)) update_statcounter.delay(ago=0).get() self.check_stat(StatKey.unique_cell, 9)
def test_import_local_cell(self, celery, redis, session): self.import_csv( celery, redis, session, CellShardFactory.build(radio=Radio.wcdma), cell_type='cell') cells = session.query(CellShard.shards()['wcdma']).all() assert len(cells) == 9 areaids = set([cell.areaid for cell in cells]) assert session.query(CellArea).count() == len(areaids) update_statcounter.delay().get() self.check_stat(session, StatKey.unique_cell, 9)
def test_bad_data_skipped(self, session, redis_client, cellarea_queue): """A row that has invalid data (like a string for a number) is skipped.""" # In GSM row, the mcc field should be a number, not a string csv = StringIO("""\ radio,mcc,net,area,cell,unit,lon,lat,range,samples,changeable,created,updated,averageSignal UMTS,202,1,2120,12842,,23.4123167,38.8574351,0,6,1,1568220564,1570120316, GSM,"MCC",10,30014,20669,,2.5112670,46.5992450,0,78,1,1566307030,1570119413, LTE,202,1,2120,12842,,23.4123167,38.8574351,0,6,1,1568220588,1570120328, """) read_stations_from_csv(session, csv, redis_client, cellarea_queue) # The invalid GSM row is skipped gsm_model = CellShard.shard_model(Radio.gsm) assert session.query(func.count(gsm_model.cellid)).scalar() == 0 # The valid WCDMA and LTE rows are processed, and in the same region wcdma_model = CellShard.shard_model(Radio.wcdma) lte_model = CellShard.shard_model(Radio.lte) assert session.query(func.count(wcdma_model.cellid)).scalar() == 1 assert session.query(func.count(lte_model.cellid)).scalar() == 1 assert session.query(func.count(CellArea.areaid)).scalar() == 2 assert session.query(func.count(RegionStat.region)).scalar() == 1
def test_shard_queues(self): # BBB observations = CellObservationFactory.build_batch(3) data_queues = self.celery_app.data_queues single_queue = data_queues['update_cell'] single_queue.enqueue(observations) update_cell.delay().get() self.assertEqual(single_queue.size(), 0) total = 0 for shard_id in CellShard.shards().keys(): total += data_queues['update_cell_' + shard_id].size() self.assertEqual(total, 3)
def test_lock_timeout(self): obs = CellObservationFactory.build() cell = CellShardFactory.build( radio=obs.radio, mcc=obs.mcc, mnc=obs.mnc, lac=obs.lac, cid=obs.cid, samples=10, ) self.db_ro_session.add(cell) self.db_ro_session.flush() orig_add_area = CellUpdater.add_area_update orig_wait = CellUpdater._retry_wait num = [0] def mock_area(self, updated_areas, key, num=num, ro_session=self.db_ro_session): orig_add_area(self, updated_areas, key) num[0] += 1 if num[0] == 2: ro_session.rollback() try: CellUpdater._retry_wait = 0.001 self.session.execute('set session innodb_lock_wait_timeout = 1') with mock.patch.object(CellUpdater, 'add_area_update', mock_area): self._queue_and_update_cell([obs]) finally: CellUpdater._retry_wait = orig_wait # the inner task logic was called exactly twice self.assertEqual(num[0], 2) shard = CellShard.shard_model(obs.cellid) cells = self.session.query(shard).all() self.assertEqual(len(cells), 1) self.assertEqual(cells[0].samples, 1) self.check_statcounter(StatKey.cell, 1) self.check_statcounter(StatKey.unique_cell, 1) self.check_stats( counter=[('data.observation.insert', 1, ['type:cell'])], timer=[('task', 1, ['task:data.update_cell'])], )
def test_invalid_row_skipped(self, session, redis_client, cellarea_queue): """A row that fails validation is skipped.""" # In GSM row, the longitude 202.5 is greater than max of 180 csv = StringIO( """\ radio,mcc,net,area,cell,unit,lon,lat,range,samples,changeable,created,updated,averageSignal UMTS,202,1,2120,12842,,23.4123167,38.8574351,0,6,1,1568220564,1570120316, GSM,208,10,30014,20669,,202.5,46.5992450,0,78,1,1566307030,1570119413, LTE,202,1,2120,12842,,23.4123167,38.8574351,0,6,1,1568220588,1570120328, """ ) read_stations_from_csv(session, csv, redis_client, cellarea_queue) # The invalid GSM row is skipped gsm_model = CellShard.shard_model(Radio.gsm) assert session.query(func.count(gsm_model.cellid)).scalar() == 0 # The valid UMTS and LTE rows are processed, and in the same region umts_model = CellShard.shard_model(Radio.umts) lte_model = CellShard.shard_model(Radio.lte) assert session.query(func.count(umts_model.cellid)).scalar() == 1 assert session.query(func.count(lte_model.cellid)).scalar() == 1 assert session.query(func.count(CellArea.areaid)).scalar() == 2 assert session.query(func.count(RegionStat.region)).scalar() == 1
def _update_all(self, session, datamap_only=False): ExportConfigFactory(name='internal', batch=0, schema='internal') session.flush() update_incoming.delay().get() if datamap_only: return for shard_id in BlueShard.shards().keys(): update_blue.delay(shard_id=shard_id).get() for shard_id in CellShard.shards().keys(): update_cell.delay(shard_id=shard_id).get() for shard_id in WifiShard.shards().keys(): update_wifi.delay(shard_id=shard_id).get()
def _update_all(self, session, datamap_only=False): ExportConfigFactory(name="internal", batch=0, schema="internal") session.flush() update_incoming.delay().get() if datamap_only: return for shard_id in BlueShard.shards().keys(): update_blue.delay(shard_id=shard_id).get() for shard_id in CellShard.shards().keys(): update_cell.delay(shard_id=shard_id).get() for shard_id in WifiShard.shards().keys(): update_wifi.delay(shard_id=shard_id).get()
def test_lock_timeout(self, celery, redis, session, session2, stats, restore_db): obs = CellObservationFactory.build() cell = CellShardFactory.build( radio=obs.radio, mcc=obs.mcc, mnc=obs.mnc, lac=obs.lac, cid=obs.cid, samples=10, ) session2.add(cell) session2.flush() orig_add_area = CellUpdater.add_area_update orig_wait = CellUpdater._retry_wait num = [0] def mock_area(self, updated_areas, key, num=num, session2=session2): orig_add_area(self, updated_areas, key) num[0] += 1 if num[0] == 2: session2.rollback() try: CellUpdater._retry_wait = 0.0001 session.execute('set session innodb_lock_wait_timeout = 1') with mock.patch.object(CellUpdater, 'add_area_update', mock_area): self.queue_and_update(celery, [obs]) # the inner task logic was called exactly twice assert num[0] == 2 shard = CellShard.shard_model(obs.cellid) cells = session.query(shard).all() assert len(cells) == 1 assert cells[0].samples == 1 self.check_statcounter(redis, StatKey.cell, 1) self.check_statcounter(redis, StatKey.unique_cell, 1) stats.check( counter=[('data.observation.insert', 1, ['type:cell'])], timer=[('task', 1, ['task:data.update_cell'])], ) finally: CellUpdater._retry_wait = orig_wait session.execute(text('drop table %s;' % cell.__tablename__))
def test_cell_duplicated(self, celery, session): self.add_reports(celery, cell_factor=1, wifi_factor=0) # duplicate the cell entry inside the report item = self._pop_item(celery) report = item["report"] cell = report["cellTowers"][0] radio = cell["radioType"] report["cellTowers"].append(cell.copy()) report["cellTowers"].append(cell.copy()) report["cellTowers"][1]["signalStrength"] += 2 report["cellTowers"][2]["signalStrength"] -= 2 self._push_item(celery, item) self._update_all(session) shard = CellShard.shard_model(radio) cells = session.query(shard).all() assert len(cells) == 1 assert cells[0].samples == 1
def test_cell_duplicated(self): self.add_reports(cell_factor=1, wifi_factor=0) # duplicate the cell entry inside the report item = self._pop_item() report = item['report'] cell = report['cellTowers'][0] radio = cell['radioType'] report['cellTowers'].append(cell.copy()) report['cellTowers'].append(cell.copy()) report['cellTowers'][1]['signalStrength'] += 2 report['cellTowers'][2]['signalStrength'] -= 2 self._push_item(item) self._update_all() shard = CellShard.shard_model(radio) cells = self.session.query(shard).all() self.assertEqual(len(cells), 1) self.assertEqual(cells[0].samples, 1)
def test_cell_duplicated(self, celery, session): self.add_reports(celery, cell_factor=1, wifi_factor=0) # duplicate the cell entry inside the report item = self._pop_item(celery) report = item['report'] cell = report['cellTowers'][0] radio = cell['radioType'] report['cellTowers'].append(cell.copy()) report['cellTowers'].append(cell.copy()) report['cellTowers'][1]['signalStrength'] += 2 report['cellTowers'][2]['signalStrength'] -= 2 self._push_item(celery, item) self._update_all(session) shard = CellShard.shard_model(radio) cells = session.query(shard).all() assert len(cells) == 1 assert cells[0].samples == 1
def test_cell_duplicated(self): self.add_reports(cell_factor=1, wifi_factor=0) # duplicate the cell entry inside the report queue = self.celery_app.export_queues['internal'] items = queue.dequeue(queue.queue_key()) report = items[0]['report'] cell = report['cellTowers'][0] radio = cell['radioType'] report['cellTowers'].append(cell.copy()) report['cellTowers'].append(cell.copy()) report['cellTowers'][1]['signalStrength'] += 2 report['cellTowers'][2]['signalStrength'] -= 2 queue.enqueue(items, queue.queue_key()) self._update_all() shard = CellShard.shard_model(radio) cells = self.session.query(shard).all() self.assertEqual(len(cells), 1) self.assertEqual(cells[0].samples, 1)
def test_cell_duplicated(self): self.add_reports(cell_factor=1, wifi_factor=0) # duplicate the cell entry inside the report queue = self.celery_app.export_queues['queue_export_internal'] items = queue.dequeue(queue.queue_key()) report = items[0]['report'] cell = report['cellTowers'][0] radio = cell['radioType'] report['cellTowers'].append(cell.copy()) report['cellTowers'].append(cell.copy()) report['cellTowers'][1]['signalStrength'] += 2 report['cellTowers'][2]['signalStrength'] -= 2 queue.enqueue(items, queue.queue_key()) self._update_all() shard = CellShard.shard_model(radio) cells = self.session.query(shard).all() self.assertEqual(len(cells), 1) self.assertEqual(cells[0].samples, 1)
def test_cell(self, celery, session): reports = self.add_reports(celery, cell_factor=1, wifi_factor=0) self._update_all(session) position = reports[0]["position"] cell_data = reports[0]["cellTowers"][0] shard = CellShard.shard_model(cell_data["radioType"]) cells = session.query(shard).all() assert len(cells) == 1 cell = cells[0] assert cell.lat == position["latitude"] assert cell.lon == position["longitude"] assert cell.radio.name == cell_data["radioType"] assert cell.mcc == cell_data["mobileCountryCode"] assert cell.mnc == cell_data["mobileNetworkCode"] assert cell.lac == cell_data["locationAreaCode"] assert cell.cid == cell_data["cellId"] assert cell.psc == cell_data["primaryScramblingCode"] assert cell.samples == 1
def test_cell(self, celery, session): reports = self.add_reports(celery, cell_factor=1, wifi_factor=0) self._update_all(session) position = reports[0]['position'] cell_data = reports[0]['cellTowers'][0] shard = CellShard.shard_model(cell_data['radioType']) cells = session.query(shard).all() assert len(cells) == 1 cell = cells[0] assert cell.lat == position['latitude'] assert cell.lon == position['longitude'] assert cell.radio.name == cell_data['radioType'] assert cell.mcc == cell_data['mobileCountryCode'] assert cell.mnc == cell_data['mobileNetworkCode'] assert cell.lac == cell_data['locationAreaCode'] assert cell.cid == cell_data['cellId'] assert cell.psc == cell_data['primaryScramblingCode'] assert cell.samples == 1
def test_cell(self): reports = self.add_reports(cell_factor=1, wifi_factor=0) self._update_all() position = reports[0]['position'] cell_data = reports[0]['cellTowers'][0] radio = cell_data['radioType'] shard = CellShard.shard_model(radio) cells = self.session.query(shard).all() self.assertEqual(len(cells), 1) cell = cells[0] self.assertEqual(cell.lat, position['latitude']) self.assertEqual(cell.lon, position['longitude']) self.assertEqual(cell.radio.name, cell_data['radioType']) self.assertEqual(cell.mcc, cell_data['mobileCountryCode']) self.assertEqual(cell.mnc, cell_data['mobileNetworkCode']) self.assertEqual(cell.lac, cell_data['locationAreaCode']) self.assertEqual(cell.cid, cell_data['cellId']) self.assertEqual(cell.psc, cell_data['primaryScramblingCode']) self.assertEqual(cell.samples, 1)
def configure_data(redis_client): """ Configure fixed set of data queues. """ data_queues = { # update_incoming needs to be the exact same as in webapp.config 'update_incoming': DataQueue('update_incoming', redis_client, batch=100, compress=True), } for key in ('update_cellarea', 'update_cellarea_ocid'): data_queues[key] = DataQueue(key, redis_client, batch=100, json=False) for shard_id in BlueShard.shards().keys(): key = 'update_blue_' + shard_id data_queues[key] = DataQueue(key, redis_client, batch=500) for shard_id in DataMap.shards().keys(): key = 'update_datamap_' + shard_id data_queues[key] = DataQueue(key, redis_client, batch=500, json=False) for shard_id in CellShard.shards().keys(): key = 'update_cell_' + shard_id data_queues[key] = DataQueue(key, redis_client, batch=500) for shard_id in WifiShard.shards().keys(): key = 'update_wifi_' + shard_id data_queues[key] = DataQueue(key, redis_client, batch=500) return data_queues
def celerybeat_schedule(app_config): """Return the celery beat schedule as a dictionary.""" sections = app_config.sections() schedule = { # Monitoring 'monitor-queue-size': { 'task': 'ichnaea.data.tasks.monitor_queue_size', 'schedule': timedelta(seconds=60), 'options': {'expires': 57}, }, 'monitor-api-users': { 'task': 'ichnaea.data.tasks.monitor_api_users', 'schedule': timedelta(seconds=600), 'options': {'expires': 570}, }, 'monitor-api-key-limits': { 'task': 'ichnaea.data.tasks.monitor_api_key_limits', 'schedule': timedelta(seconds=600), 'options': {'expires': 570}, }, # Statistics 'update-statcounter': { 'task': 'ichnaea.data.tasks.update_statcounter', 'args': (1, ), 'schedule': crontab(minute=3), 'options': {'expires': 2700}, }, 'update-statregion': { 'task': 'ichnaea.data.tasks.update_statregion', 'schedule': timedelta(seconds=3600 * 6), 'options': {'expires': 3600 * 5}, }, # Data Pipeline 'schedule-export-reports': { 'task': 'ichnaea.data.tasks.schedule_export_reports', 'schedule': timedelta(seconds=8), 'options': {'expires': 15}, }, 'update-cellarea': { 'task': 'ichnaea.data.tasks.update_cellarea', 'schedule': timedelta(seconds=8), 'args': (100, ), 'options': {'expires': 15}, }, 'update-cellarea-ocid': { 'task': 'ichnaea.data.tasks.update_cellarea_ocid', 'schedule': timedelta(seconds=9), 'args': (100, ), 'options': {'expires': 15}, }, 'update-score': { 'task': 'ichnaea.data.tasks.update_score', 'args': (250, ), 'schedule': timedelta(seconds=9), 'options': {'expires': 10}, }, } for shard_id in CellShard.shards().keys(): schedule.update({ 'update-cell-' + shard_id: { 'task': 'ichnaea.data.tasks.update_cell', 'schedule': timedelta(seconds=7), 'args': (500, shard_id), 'options': {'expires': 10}, } }) for shard_id in DataMap.shards().keys(): schedule.update({ 'update-datamap-' + shard_id: { 'task': 'ichnaea.data.tasks.update_datamap', 'args': (500, shard_id), 'schedule': timedelta(seconds=14), 'options': {'expires': 20}, }, }) for shard_id in WifiShard.shards().keys(): schedule.update({ 'update-wifi-' + shard_id: { 'task': 'ichnaea.data.tasks.update_wifi', 'schedule': timedelta(seconds=6), 'args': (500, shard_id), 'options': {'expires': 10}, } }) if 'assets' in sections and app_config.get('assets', 'bucket', None): # only configure tasks if target bucket is configured schedule.update({ 'cell-export-full': { 'task': 'ichnaea.data.tasks.cell_export_full', 'schedule': crontab(hour=0, minute=13), 'options': {'expires': 39600}, }, 'cell-export-diff': { 'task': 'ichnaea.data.tasks.cell_export_diff', 'schedule': crontab(minute=3), 'options': {'expires': 2700}, }, }) if 'import:ocid' in sections: schedule.update({ 'monitor-ocid-import': { 'task': 'ichnaea.data.tasks.monitor_ocid_import', 'schedule': timedelta(seconds=600), 'options': {'expires': 570}, }, 'cell-import-external': { 'task': 'ichnaea.data.tasks.cell_import_external', 'args': (True, ), 'schedule': crontab(minute=52), 'options': {'expires': 2700}, }, }) return schedule
def process_reports(self, reports, userid=None): malformed_reports = 0 positions = set() observations = {'cell': [], 'wifi': []} obs_count = { 'cell': {'upload': 0, 'drop': 0}, 'wifi': {'upload': 0, 'drop': 0}, } new_station_count = {'cell': 0, 'wifi': 0} for report in reports: cell, wifi, malformed_obs = self.process_report(report) if cell: observations['cell'].extend(cell) obs_count['cell']['upload'] += len(cell) if wifi: observations['wifi'].extend(wifi) obs_count['wifi']['upload'] += len(wifi) if (cell or wifi): positions.add((report['lat'], report['lon'])) else: malformed_reports += 1 for name in ('cell', 'wifi'): obs_count[name]['drop'] += malformed_obs[name] # group by unique station key for name in ('cell', 'wifi'): station_keys = set() for obs in observations[name]: if name == 'cell': station_keys.add(obs.cellid) elif name == 'wifi': station_keys.add(obs.mac) # determine scores for stations new_station_count[name] += self.new_stations(name, station_keys) if observations['cell']: sharded_obs = defaultdict(list) for ob in observations['cell']: shard_id = CellShard.shard_id(ob.cellid) sharded_obs[shard_id].append(ob) for shard_id, values in sharded_obs.items(): cell_queue = self.data_queues['update_cell_' + shard_id] cell_queue.enqueue(list(values), pipe=self.pipe) if observations['wifi']: sharded_obs = defaultdict(list) for ob in observations['wifi']: shard_id = WifiShard.shard_id(ob.mac) sharded_obs[shard_id].append(ob) for shard_id, values in sharded_obs.items(): wifi_queue = self.data_queues['update_wifi_' + shard_id] wifi_queue.enqueue(list(values), pipe=self.pipe) self.process_datamap(positions) self.process_score(userid, positions, new_station_count) self.emit_stats( len(reports), malformed_reports, obs_count, )
def test_blocklist_moving_cells(self): now = util.utcnow() today = now.date() obs = [] obs_factory = CellObservationFactory moving = set() cells = CellShardFactory.create_batch(4) cells.append(CellShardFactory.build()) # a cell with an entry but no prior position cell = cells[0] cell_key = dict(radio=cell.radio, mcc=cell.mcc, mnc=cell.mnc, lac=cell.lac, cid=cell.cid) cell.samples = 0 obs.extend([ obs_factory(lat=cell.lat + 0.01, lon=cell.lon + 0.01, **cell_key), obs_factory(lat=cell.lat + 0.02, lon=cell.lon + 0.05, **cell_key), obs_factory(lat=cell.lat + 0.03, lon=cell.lon + 0.09, **cell_key), ]) cell.lat = None cell.lon = None # a cell with a prior known position cell = cells[1] cell_key = dict(radio=cell.radio, mcc=cell.mcc, mnc=cell.mnc, lac=cell.lac, cid=cell.cid) cell.samples = 1 cell.lat += 0.1 obs.extend([ obs_factory(lat=cell.lat + 1.0, lon=cell.lon, **cell_key), obs_factory(lat=cell.lat + 3.0, lon=cell.lon, **cell_key), ]) moving.add(cell.cellid) # a cell with a very different prior position cell = cells[2] cell_key = dict(radio=cell.radio, mcc=cell.mcc, mnc=cell.mnc, lac=cell.lac, cid=cell.cid) cell.samples = 1 obs.extend([ obs_factory(lat=cell.lat + 3.0, lon=cell.lon, **cell_key), obs_factory(lat=cell.lat - 0.1, lon=cell.lon, **cell_key), ]) moving.add(cell.cellid) # another cell with a prior known position (and negative lon) cell = cells[3] cell_key = dict(radio=cell.radio, mcc=cell.mcc, mnc=cell.mnc, lac=cell.lac, cid=cell.cid) cell.samples = 1 cell.lon *= -1.0 obs.extend([ obs_factory(lat=cell.lat + 1.0, lon=cell.lon, **cell_key), obs_factory(lat=cell.lat + 2.0, lon=cell.lon, **cell_key), ]) moving.add(cell.cellid) # an already blocklisted cell cell = cells[4] cell_key = dict(radio=cell.radio, mcc=cell.mcc, mnc=cell.mnc, lac=cell.lac, cid=cell.cid) CellShardFactory(block_first=today, block_last=today, block_count=1, **cell_key) obs.extend([ obs_factory(lat=cell.lat, lon=cell.lon, **cell_key), obs_factory(lat=cell.lat + 3.0, lon=cell.lon, **cell_key), ]) moving.add(cell.cellid) self.session.commit() self._queue_and_update(obs) shards = set() for cellid in moving: shards.add(CellShard.shard_model(cellid)) blocks = [] for shard in shards: for row in self.session.query(shard).all(): if row.blocked(): blocks.append(row) self.assertEqual(set([b.cellid for b in blocks]), moving)
def test_blocklist_moving_cells(self): now = util.utcnow() today = now.date() obs = [] obs_factory = CellObservationFactory moving = set() cells = CellShardFactory.create_batch(4) cells.append(CellShardFactory.build()) # a cell with an entry but no prior position cell = cells[0] cell_key = dict(radio=cell.radio, mcc=cell.mcc, mnc=cell.mnc, lac=cell.lac, cid=cell.cid) cell.samples = None cell.weight = None obs.extend([ obs_factory(lat=cell.lat + 0.01, lon=cell.lon + 0.01, **cell_key), obs_factory(lat=cell.lat + 0.02, lon=cell.lon + 0.05, **cell_key), obs_factory(lat=cell.lat + 0.03, lon=cell.lon + 0.09, **cell_key), ]) cell.lat = None cell.lon = None # a cell with a prior known position cell = cells[1] cell_key = dict(radio=cell.radio, mcc=cell.mcc, mnc=cell.mnc, lac=cell.lac, cid=cell.cid) cell.samples = 1 cell.weight = 1.0 cell.lat += 0.1 obs.extend([ obs_factory(lat=cell.lat + 1.0, lon=cell.lon, **cell_key), obs_factory(lat=cell.lat + 3.0, lon=cell.lon, **cell_key), ]) moving.add(cell.cellid) # a cell with a very different prior position cell = cells[2] cell_key = dict(radio=cell.radio, mcc=cell.mcc, mnc=cell.mnc, lac=cell.lac, cid=cell.cid) cell.samples = 1 cell.weight = 1.0 obs.extend([ obs_factory(lat=cell.lat + 3.0, lon=cell.lon, **cell_key), obs_factory(lat=cell.lat - 0.1, lon=cell.lon, **cell_key), ]) moving.add(cell.cellid) # another cell with a prior known position (and negative lon) cell = cells[3] cell_key = dict(radio=cell.radio, mcc=cell.mcc, mnc=cell.mnc, lac=cell.lac, cid=cell.cid) cell.samples = 1 cell.weight = 1.0 cell.lon *= -1.0 obs.extend([ obs_factory(lat=cell.lat + 1.0, lon=cell.lon, **cell_key), obs_factory(lat=cell.lat + 2.0, lon=cell.lon, **cell_key), ]) moving.add(cell.cellid) # an already blocklisted cell cell = cells[4] cell_key = dict(radio=cell.radio, mcc=cell.mcc, mnc=cell.mnc, lac=cell.lac, cid=cell.cid) CellShardFactory(block_first=today, block_last=today, block_count=1, **cell_key) obs.extend([ obs_factory(lat=cell.lat, lon=cell.lon, **cell_key), obs_factory(lat=cell.lat + 3.0, lon=cell.lon, **cell_key), ]) moving.add(cell.cellid) self.session.commit() self._queue_and_update_cell(obs) shards = set() for cellid in moving: shards.add(CellShard.shard_model(cellid)) blocks = [] for shard in shards: for row in self.session.query(shard).all(): if row.blocked(): blocks.append(row) self.assertEqual(set([b.cellid for b in blocks]), moving)
def tearDown(self): for model in CellShard.shards().values(): self.session.execute(text('drop table %s;' % model.__tablename__)) self.setup_tables(self.db_rw.engine) super(TestDatabaseErrors, self).tearDown()
def test_retriable_exceptions( self, celery, redis, session, db_shared_session, metricsmock, errclass, errno, errmsg, backoff_sleep_mock, ): """Test database exceptions where the task should wait and try again.""" obs = CellObservationFactory.build(radio=Radio.lte) shard = CellShard.shard_model(obs.cellid) cell = CellShardFactory.build( radio=obs.radio, mcc=obs.mcc, mnc=obs.mnc, lac=obs.lac, cid=obs.cid, samples=10, created=datetime(2019, 12, 5, tzinfo=UTC), ) session.add(cell) session.commit() session.begin_nested() # Protect test cell from task rollback error = errclass(errno, errmsg) wrapped = InterfaceError.instance( statement="SELECT COUNT(*) FROM cell_area", params={}, orig=error, dbapi_base_err=MySQLError, ) with mock.patch.object(CellUpdater, "add_area_update", side_effect=[wrapped, None]): self._queue_and_update(celery, [obs], update_cell) assert CellUpdater.add_area_update.call_count == 2 backoff_sleep_mock.assert_called_once() cells = session.query(shard).all() assert len(cells) == 1 self.check_statcounter(redis, StatKey.cell, 1) # The existing cell record was updated cell = cells[0] assert cell.samples == 11 assert cell.created == datetime(2019, 12, 5, tzinfo=UTC) self.check_statcounter(redis, StatKey.unique_cell, 0) # Assert generated metrics are correct metricsmock.assert_incr_once("data.observation.insert", value=1, tags=["type:cell"]) metricsmock.assert_incr_once("data.station.confirm", value=1, tags=["type:cell"]) metricsmock.assert_timing_once("task", tags=["task:data.update_cell"]) metricsmock.assert_incr_once("data.station.dberror", tags=["type:cell", "errno:%s" % errno])
def write_stations_to_csv(session, path, start_time=None, end_time=None): where = 'radio != 1 AND lat IS NOT NULL AND lon IS NOT NULL' if None not in (start_time, end_time): where = where + ' AND modified >= "%s" AND modified < "%s"' fmt = '%Y-%m-%d %H:%M:%S' where = where % (start_time.strftime(fmt), end_time.strftime(fmt)) header_row = [ 'radio', 'mcc', 'net', 'area', 'cell', 'unit', 'lon', 'lat', 'range', 'samples', 'changeable', 'created', 'updated', 'averageSignal', ] header_row = ','.join(header_row) + '\n' tables = [shard.__tablename__ for shard in CellShard.shards().values()] stmt = '''SELECT CONCAT_WS(",", CASE radio WHEN 0 THEN "GSM" WHEN 2 THEN "UMTS" WHEN 3 THEN "LTE" ELSE "" END, `mcc`, `mnc`, `lac`, `cid`, COALESCE(`psc`, ""), ROUND(`lon`, 7), ROUND(`lat`, 7), COALESCE(`radius`, "0"), COALESCE(`samples`, "0"), "1", COALESCE(UNIX_TIMESTAMP(`created`), ""), COALESCE(UNIX_TIMESTAMP(`modified`), ""), "" ) AS `cell_value` FROM %s WHERE %s ORDER BY `cellid` LIMIT :l OFFSET :o ''' with util.gzip_open(path, 'w', compresslevel=5) as gzip_wrapper: with gzip_wrapper as gzip_file: gzip_file.write(header_row) for table in tables: table_stmt = text(stmt % (table, where)) offset = 0 limit = 25000 while True: rows = session.execute( table_stmt.bindparams(o=offset, l=limit)).fetchall() if rows: buf = '\r\n'.join([row.cell_value for row in rows]) if buf: buf += '\r\n' gzip_file.write(buf) offset += limit else: break
def write_stations_to_csv(session, path, start_time=None, end_time=None): where = 'radio != 1 AND lat IS NOT NULL AND lon IS NOT NULL' if None not in (start_time, end_time): where = where + ' AND modified >= "%s" AND modified < "%s"' fmt = '%Y-%m-%d %H:%M:%S' where = where % (start_time.strftime(fmt), end_time.strftime(fmt)) header_row = [ 'radio', 'mcc', 'net', 'area', 'cell', 'unit', 'lon', 'lat', 'range', 'samples', 'changeable', 'created', 'updated', 'averageSignal', ] header_row = ','.join(header_row) + '\n' tables = [shard.__tablename__ for shard in CellShard.shards().values()] stmt = '''SELECT CONCAT_WS(",", CASE radio WHEN 0 THEN "GSM" WHEN 2 THEN "UMTS" WHEN 3 THEN "LTE" ELSE "" END, `mcc`, `mnc`, `lac`, `cid`, COALESCE(`psc`, ""), ROUND(`lon`, 7), ROUND(`lat`, 7), COALESCE(`radius`, "0"), COALESCE(`samples`, "0"), "1", COALESCE(UNIX_TIMESTAMP(`created`), ""), COALESCE(UNIX_TIMESTAMP(`modified`), ""), "" ) AS `cell_value` FROM %s WHERE %s ORDER BY `radio`, `mcc`, `mnc`, `lac`, `cid` LIMIT :l OFFSET :o ''' limit = 10000 offset = 0 with util.gzip_open(path, 'w', compresslevel=5) as gzip_wrapper: with gzip_wrapper as gzip_file: gzip_file.write(header_row) for table in tables: table_stmt = text(stmt % (table, where)) while True: rows = session.execute( table_stmt.bindparams(o=offset, l=limit)).fetchall() if rows: buf = '\r\n'.join([row.cell_value for row in rows]) if buf: buf += '\r\n' gzip_file.write(buf) offset += limit else: break