def test_temp_blocked(self): utcnow = util.utcnow() bad_wifi = WifiObservationFactory.build() good_wifi = WifiObservationFactory.build() WifiShardFactory(mac=bad_wifi.mac, lat=None, lon=None, created=utcnow, block_first=utcnow.date() - timedelta(days=10), block_last=utcnow.date(), block_count=1) obs = [good_wifi, bad_wifi, good_wifi] self.session.commit() self._queue_and_update(obs) shard = WifiShard.shard_model(good_wifi.mac) wifis = (self.session.query(shard).filter( shard.mac == good_wifi.mac)).all() self.assertEqual(len(wifis), 1) self.assertTrue(wifis[0].lat is not None) self.assertTrue(wifis[0].lon is not None) self.assertEqual(wifis[0].samples, 2) shard = WifiShard.shard_model(bad_wifi.mac) wifis = (self.session.query(shard).filter( shard.mac == bad_wifi.mac)).all() self.assertEqual(len(wifis), 1) self.assertTrue(wifis[0].block_first < utcnow.date()) self.assertTrue(wifis[0].lat is None) self.assertTrue(wifis[0].lon is None) self.check_statcounter(StatKey.wifi, 2) self.check_statcounter(StatKey.unique_wifi, 1)
def test_temp_blocked(self): utcnow = util.utcnow() bad_wifi = WifiObservationFactory.build() good_wifi = WifiObservationFactory.build() WifiShardFactory( mac=bad_wifi.mac, lat=None, lon=None, created=utcnow, block_first=utcnow.date() - timedelta(days=10), block_last=utcnow.date(), block_count=1) obs = [good_wifi, bad_wifi, good_wifi] self.session.commit() self._queue_and_update(obs) shard = WifiShard.shard_model(good_wifi.mac) wifis = (self.session.query(shard) .filter(shard.mac == good_wifi.mac)).all() self.assertEqual(len(wifis), 1) self.assertTrue(wifis[0].lat is not None) self.assertTrue(wifis[0].lon is not None) self.assertEqual(wifis[0].samples, 2) shard = WifiShard.shard_model(bad_wifi.mac) wifis = (self.session.query(shard) .filter(shard.mac == bad_wifi.mac)).all() self.assertEqual(len(wifis), 1) self.assertTrue(wifis[0].block_first < utcnow.date()) self.assertTrue(wifis[0].lat is None) self.assertTrue(wifis[0].lon is None) self.check_statcounter(StatKey.wifi, 2) self.check_statcounter(StatKey.unique_wifi, 1)
def test_update(self): utcnow = util.utcnow() obs = [] obs_factory = WifiObservationFactory # first wifi wifi1 = WifiShardFactory(lat=None, lon=None, samples=3) new_pos = WifiShardFactory.build() mac1, lat1, lon1 = (wifi1.mac, new_pos.lat, new_pos.lon) obs.extend([ obs_factory(lat=lat1, lon=lon1, key=mac1), obs_factory(lat=lat1 + 0.002, lon=lon1 + 0.003, key=mac1), obs_factory(lat=lat1 + 0.004, lon=lon1 + 0.006, key=mac1), ]) # second wifi wifi2 = WifiShardFactory( lat=lat1 + 1.0, lon=lon1 + 1.0, max_lat=lat1 + 1.0, min_lat=lat1 + 0.999, max_lon=lon1 + 1.0, min_lon=None, radius=20, samples=2, created=utcnow - timedelta(10), modified=utcnow - timedelta(10)) mac2, lat2, lon2 = (wifi2.mac, wifi2.lat, wifi2.lon) obs.extend([ obs_factory(lat=lat2 + 0.002, lon=lon2 + 0.004, key=mac2), obs_factory(lat=lat2 + 0.002, lon=lon2 + 0.004, key=mac2), ]) self.session.commit() self._queue_and_update(obs) shard = WifiShard.shard_model(mac1) found = self.session.query(shard).filter(shard.mac == mac1).one() self.assertAlmostEqual(found.lat, lat1 + 0.002) self.assertAlmostEqual(found.max_lat, lat1 + 0.004) self.assertAlmostEqual(found.min_lat, lat1) self.assertAlmostEqual(found.lon, lon1 + 0.003) self.assertAlmostEqual(found.max_lon, lon1 + 0.006) self.assertAlmostEqual(found.min_lon, lon1) self.assertEqual(found.modified.date(), utcnow.date()) self.assertEqual(found.radius, 304) self.assertEqual(found.region, 'GB') self.assertEqual(found.samples, 6) shard = WifiShard.shard_model(mac2) found = self.session.query(shard).filter(shard.mac == mac2).one() self.assertAlmostEqual(found.lat, lat2 + 0.001) self.assertAlmostEqual(found.max_lat, lat2 + 0.002) self.assertAlmostEqual(found.min_lat, lat2 - 0.001) self.assertAlmostEqual(found.lon, lon2 + 0.002) self.assertAlmostEqual(found.max_lon, lon2 + 0.004) self.assertAlmostEqual(found.min_lon, lon2) self.assertEqual(found.created.date(), utcnow.date() - timedelta(10)) self.assertEqual(found.modified.date(), utcnow.date()) self.assertEqual(found.radius, 260) self.assertEqual(found.region, 'GB') self.assertEqual(found.samples, 4)
def test_update(self): utcnow = util.utcnow() obs = [] obs_factory = WifiObservationFactory # first wifi wifi1 = WifiShardFactory(lat=None, lon=None, samples=3) new_pos = WifiShardFactory.build() mac1, lat1, lon1 = (wifi1.mac, new_pos.lat, new_pos.lon) obs.extend([ obs_factory(lat=lat1, lon=lon1, key=mac1), obs_factory(lat=lat1 + 0.002, lon=lon1 + 0.003, key=mac1), obs_factory(lat=lat1 + 0.004, lon=lon1 + 0.006, key=mac1), ]) # second wifi wifi2 = WifiShardFactory(lat=lat1 + 1.0, lon=lon1 + 1.0, max_lat=lat1 + 1.0, min_lat=lat1 + 0.999, max_lon=lon1 + 1.0, min_lon=None, radius=20, samples=2, created=utcnow - timedelta(10), modified=utcnow - timedelta(10)) mac2, lat2, lon2 = (wifi2.mac, wifi2.lat, wifi2.lon) obs.extend([ obs_factory(lat=lat2 + 0.002, lon=lon2 + 0.004, key=mac2), obs_factory(lat=lat2 + 0.002, lon=lon2 + 0.004, key=mac2), ]) self.session.commit() self._queue_and_update(obs) shard = WifiShard.shard_model(mac1) found = self.session.query(shard).filter(shard.mac == mac1).one() self.assertAlmostEqual(found.lat, lat1 + 0.002) self.assertAlmostEqual(found.max_lat, lat1 + 0.004) self.assertAlmostEqual(found.min_lat, lat1) self.assertAlmostEqual(found.lon, lon1 + 0.003) self.assertAlmostEqual(found.max_lon, lon1 + 0.006) self.assertAlmostEqual(found.min_lon, lon1) self.assertEqual(found.modified.date(), utcnow.date()) self.assertEqual(found.radius, 304) self.assertEqual(found.region, 'GB') self.assertEqual(found.samples, 6) shard = WifiShard.shard_model(mac2) found = self.session.query(shard).filter(shard.mac == mac2).one() self.assertAlmostEqual(found.lat, lat2 + 0.001) self.assertAlmostEqual(found.max_lat, lat2 + 0.002) self.assertAlmostEqual(found.min_lat, lat2 - 0.001) self.assertAlmostEqual(found.lon, lon2 + 0.002) self.assertAlmostEqual(found.max_lon, lon2 + 0.004) self.assertAlmostEqual(found.min_lon, lon2) self.assertEqual(found.created.date(), utcnow.date() - timedelta(10)) self.assertEqual(found.modified.date(), utcnow.date()) self.assertEqual(found.radius, 260) self.assertEqual(found.region, 'GB') self.assertEqual(found.samples, 4)
def query_wifis(query, raven_client): macs = [lookup.mac for lookup in query.wifi] if not macs: # pragma: no cover return [] result = [] today = util.utcnow().date() temp_blocked = today - TEMPORARY_BLOCKLIST_DURATION try: # load all fields used in score calculation and those we # need for the position load_fields = ('lat', 'lon', 'radius', 'created', 'modified', 'samples') shards = defaultdict(list) for mac in macs: shards[WifiShard.shard_model(mac)].append(mac) for shard, shard_macs in shards.items(): rows = (query.session.query(shard).filter( shard.mac.in_(shard_macs)).filter( shard.lat.isnot(None)).filter( shard.lon.isnot(None)).filter( or_( shard.block_count.is_(None), shard.block_count < PERMANENT_BLOCKLIST_THRESHOLD)).filter( or_(shard.block_last.is_(None), shard.block_last < temp_blocked)).options( load_only(*load_fields))).all() result.extend(list(rows)) except Exception: raven_client.captureException() return result
def test_weighted_update(self): wifi = WifiShardFactory(samples=2, weight=3.0) wifi_lat = wifi.lat wifi_lon = wifi.lon obs_factory = WifiObservationFactory obs = [ obs_factory(lat=wifi.lat, lon=wifi.lon - 0.002, accuracy=20.0, signal=-30, mac=wifi.mac), obs_factory(lat=wifi.lat, lon=wifi.lon - 0.004, accuracy=40.0, signal=-60, mac=wifi.mac), ] self.session.commit() self._queue_and_update_wifi(obs) shard = WifiShard.shard_model(wifi.mac) wifis = self.session.query(shard).all() self.assertEqual(len(wifis), 1) wifi = wifis[0] self.assertAlmostEqual(wifi.lat, wifi_lat) self.assertAlmostEqual(wifi.max_lat, wifi_lat) self.assertAlmostEqual(wifi.min_lat, wifi_lat) self.assertAlmostEqual(wifi.lon, wifi_lon - 0.0017709, 7) self.assertAlmostEqual(wifi.max_lon, wifi_lon) self.assertAlmostEqual(wifi.min_lon, wifi_lon - 0.004) self.assertEqual(wifi.radius, 154) self.assertEqual(wifi.samples, 4) self.assertAlmostEqual(wifi.weight, 15.53, 2)
def test_temp_blocked_admitted_again(self): now = util.utcnow() last_week = now - TEMPORARY_BLOCKLIST_DURATION - timedelta(days=1) obs = WifiObservationFactory() WifiShardFactory( mac=obs.mac, lat=None, lon=None, samples=0, created=last_week, modified=last_week, block_first=last_week.date(), block_last=last_week.date(), block_count=1) self.session.flush() # add a new entry for the previously blocked wifi self.data_queue.enqueue([obs]) self.assertEqual(self.data_queue.size(), 1) update_wifi.delay().get() # the wifi was inserted again shard = WifiShard.shard_model(obs.mac) wifis = self.session.query(shard).all() self.assertEqual(len(wifis), 1) wifi = wifis[0] self.assertEqual(wifi.created.date(), last_week.date()) self.assertAlmostEqual(wifi.lat, obs.lat) self.assertAlmostEqual(wifi.lon, obs.lon) self.assertEqual(wifi.country, 'GB') self.assertEqual(wifi.samples, 1) self.check_statcounter(StatKey.unique_wifi, 0)
def configure_data(redis_client): """ Configure fixed set of data queues. """ data_queues = { 'update_cell': DataQueue('update_cell', redis_client, queue_key='update_cell'), # BBB 'update_cellarea': DataQueue('update_cellarea', redis_client, queue_key='update_cellarea'), 'update_cellarea_ocid': DataQueue('update_cellarea_ocid', redis_client, queue_key='update_cellarea_ocid'), 'update_score': DataQueue('update_score', redis_client, queue_key='update_score'), } for shard_id in DataMap.shards().keys(): name = 'update_datamap_' + shard_id data_queues[name] = DataQueue(name, redis_client, queue_key=name) for shard_id in CellShard.shards().keys(): name = 'update_cell_' + shard_id data_queues[name] = DataQueue(name, redis_client, queue_key=name) for shard_id in WifiShard.shards().keys(): name = 'update_wifi_' + shard_id data_queues[name] = DataQueue(name, redis_client, queue_key=name) return data_queues
def test_temp_blocked_admitted_again(self): now = util.utcnow() last_week = now - TEMPORARY_BLOCKLIST_DURATION - timedelta(days=1) obs = WifiObservationFactory() WifiShardFactory(mac=obs.mac, lat=None, lon=None, samples=0, created=last_week, modified=last_week, block_first=last_week.date(), block_last=last_week.date(), block_count=1) self.session.commit() # add a new entry for the previously blocked wifi self._queue_and_update([obs]) # the wifi was inserted again shard = WifiShard.shard_model(obs.mac) wifis = self.session.query(shard).all() self.assertEqual(len(wifis), 1) wifi = wifis[0] self.assertEqual(wifi.block_first, last_week.date()) self.assertEqual(wifi.block_last, last_week.date()) self.assertEqual(wifi.created.date(), last_week.date()) self.assertAlmostEqual(wifi.lat, obs.lat) self.assertAlmostEqual(wifi.lon, obs.lon) self.assertEqual(wifi.region, 'GB') self.assertEqual(wifi.samples, 1) self.check_statcounter(StatKey.unique_wifi, 0)
def configure_data(redis_client): """ Configure fixed set of data queues. """ data_queues = { 'update_cell': DataQueue('update_cell', redis_client, queue_key='update_cell'), # BBB 'update_cellarea': DataQueue('update_cellarea', redis_client, queue_key='update_cellarea'), 'update_cellarea_ocid': DataQueue('update_cellarea_ocid', redis_client, queue_key='update_cellarea_ocid'), 'update_score': DataQueue('update_score', redis_client, queue_key='update_score'), } for shard_id in DataMap.shards().keys(): name = 'update_datamap_' + shard_id data_queues[name] = DataQueue(name, redis_client, queue_key=name) for shard_id in CellShard.shards().keys(): name = 'update_cell_' + shard_id data_queues[name] = DataQueue( name, redis_client, queue_key=name) for shard_id in WifiShard.shards().keys(): name = 'update_wifi_' + shard_id data_queues[name] = DataQueue( name, redis_client, queue_key=name) return data_queues
def test_position_invalid(self, celery, session, metricsmock): self.add_reports(celery, 1, cell_factor=0, wifi_factor=1, wifi_key="000000123456", lat=-90.1) self.add_reports(celery, 1, cell_factor=0, wifi_factor=1, wifi_key="000000234567") self._update_all(session) shard = WifiShard.shards()["0"] assert session.query(shard).count() == 1 metricsmock.assert_incr_once("data.report.upload", value=2, tags=["key:test"]) metricsmock.assert_incr_once("data.report.drop", value=1, tags=["key:test"]) metricsmock.assert_incr_once("data.observation.insert", value=1, tags=["type:wifi"]) metricsmock.assert_incr_once("data.observation.upload", tags=["type:wifi", "key:test"])
def query_database(query, raven_client): macs = [lookup.mac for lookup in query.wifi] if not macs: # pragma: no cover return [] result = [] today = util.utcnow().date() temp_blocked = today - TEMPORARY_BLOCKLIST_DURATION try: load_fields = ('lat', 'lon', 'radius') shards = defaultdict(list) for mac in macs: shards[WifiShard.shard_model(mac)].append(mac) for shard, shard_macs in shards.items(): rows = ( query.session.query(shard) .filter(shard.mac.in_(shard_macs)) .filter(shard.lat.isnot(None)) .filter(shard.lon.isnot(None)) .filter(or_( shard.block_count.is_(None), shard.block_count < PERMANENT_BLOCKLIST_THRESHOLD)) .filter(or_( shard.block_last.is_(None), shard.block_last < temp_blocked)) .options(load_only(*load_fields)) ).all() result.extend(list(rows)) except Exception: raven_client.captureException() return result
def test_new(self): utcnow = util.utcnow() obs = WifiObservationFactory.build() self.data_queue.enqueue([obs]) self.assertEqual(self.data_queue.size(), 1) update_wifi.delay().get() shard = WifiShard.shard_model(obs.mac) wifis = self.session.query(shard).all() self.assertEqual(len(wifis), 1) wifi = wifis[0] self.assertAlmostEqual(wifi.lat, obs.lat) self.assertAlmostEqual(wifi.max_lat, obs.lat) self.assertAlmostEqual(wifi.min_lat, obs.lat) self.assertAlmostEqual(wifi.lon, obs.lon) self.assertAlmostEqual(wifi.max_lon, obs.lon) self.assertAlmostEqual(wifi.min_lon, obs.lon) self.assertEqual(wifi.country, 'GB') self.assertEqual(wifi.radius, 0) self.assertEqual(wifi.samples, 1) self.assertEqual(wifi.created.date(), utcnow.date()) self.assertEqual(wifi.modified.date(), utcnow.date()) self.assertEqual(wifi.block_first, None) self.assertEqual(wifi.block_last, None) self.assertEqual(wifi.block_count, None)
def configure_data(redis_client): """ Configure fixed set of data queues. """ data_queues = { # *_incoming need to be the exact same as in webapp.config 'update_incoming': DataQueue('update_incoming', redis_client, batch=100, compress=True), 'transfer_incoming': DataQueue('transfer_incoming', redis_client, batch=100, compress=True), } for key in ('update_cellarea', ): data_queues[key] = DataQueue(key, redis_client, batch=100, json=False) for shard_id in BlueShard.shards().keys(): key = 'update_blue_' + shard_id data_queues[key] = DataQueue(key, redis_client, batch=500) for shard_id in DataMap.shards().keys(): key = 'update_datamap_' + shard_id data_queues[key] = DataQueue(key, redis_client, batch=500, json=False) for shard_id in CellShard.shards().keys(): key = 'update_cell_' + shard_id data_queues[key] = DataQueue(key, redis_client, batch=500) for shard_id in WifiShard.shards().keys(): key = 'update_wifi_' + shard_id data_queues[key] = DataQueue(key, redis_client, batch=500) return data_queues
def _update_all(self): schedule_export_reports.delay().get() for shard_id in CellShard.shards().keys(): update_cell.delay(shard_id=shard_id).get() for shard_id in WifiShard.shards().keys(): update_wifi.delay(shard_id=shard_id).get()
def _queue_and_update(self, obs): sharded_obs = defaultdict(list) for ob in obs: sharded_obs[WifiShard.shard_id(ob.mac)].append(ob) for shard_id, values in sharded_obs.items(): queue = self.celery_app.data_queues['update_wifi_' + shard_id] queue.enqueue(values) update_wifi.delay(shard_id=shard_id).get()
def _shard_observations(self, observations): sharded_obs = {} for obs in observations: if obs is not None: shard = WifiShard.shard_model(obs.mac) if shard not in sharded_obs: sharded_obs[shard] = defaultdict(list) sharded_obs[shard][obs.mac].append(obs) return sharded_obs
def _update_all(self): update_incoming.delay().get() for shard_id in BlueShard.shards().keys(): update_blue.delay(shard_id=shard_id).get() for shard_id in CellShard.shards().keys(): update_cell.delay(shard_id=shard_id).get() for shard_id in WifiShard.shards().keys(): update_wifi.delay(shard_id=shard_id).get()
def test_wifi(self, celery, session): reports = self.add_reports(celery, cell_factor=0, wifi_factor=1) self._update_all(session) position = reports[0]['position'] wifi_data = reports[0]['wifiAccessPoints'][0] shard = WifiShard.shard_model(wifi_data['macAddress']) wifis = session.query(shard).all() assert len(wifis) == 1 wifi = wifis[0] assert wifi.lat == position['latitude'] assert wifi.lon == position['longitude'] assert wifi.mac == wifi_data['macAddress'] assert wifi.samples == 1
def test_wifi(self, celery, session): reports = self.add_reports(celery, cell_factor=0, wifi_factor=1) self._update_all(session) position = reports[0]["position"] wifi_data = reports[0]["wifiAccessPoints"][0] shard = WifiShard.shard_model(wifi_data["macAddress"]) wifis = session.query(shard).all() assert len(wifis) == 1 wifi = wifis[0] assert wifi.lat == position["latitude"] assert wifi.lon == position["longitude"] assert wifi.mac == wifi_data["macAddress"] assert wifi.samples == 1
def __call__(self): cells = (self.session.query(CellArea.region, CellArea.radio, func.sum(CellArea.num_cells)) .filter(CellArea.region.isnot(None)) .group_by(CellArea.region, CellArea.radio)).all() default = {'gsm': 0, 'wcdma': 0, 'lte': 0, 'wifi': 0} stats = {} for region, radio, num in cells: if region not in stats: stats[region] = default.copy() stats[region][radio.name] = int(num) for shard in WifiShard.shards().values(): wifis = (self.session.query(shard.region, func.count()) .filter(shard.region.isnot(None)) .group_by(shard.region)).all() for region, num in wifis: if region not in stats: stats[region] = default.copy() stats[region]['wifi'] += int(num) if not stats: return region_stats = dict(self.session.query(RegionStat.region, RegionStat).all()) for region, values in stats.items(): if region in region_stats: region_stats[region].gsm = values['gsm'] region_stats[region].wcdma = values['wcdma'] region_stats[region].lte = values['lte'] region_stats[region].wifi = values['wifi'] else: self.session.add(RegionStat( region=region, gsm=values['gsm'], wcdma=values['wcdma'], lte=values['lte'], wifi=values['wifi'], )) obsolete_regions = list(set(region_stats.keys()) - set(stats.keys())) if obsolete_regions: (self.session.query(RegionStat) .filter(RegionStat.region.in_(obsolete_regions)) ).delete(synchronize_session=False)
def __call__(self): cells = (self.session.query(CellArea.region, CellArea.radio, func.sum(CellArea.num_cells)).filter( CellArea.region.isnot(None)).group_by( CellArea.region, CellArea.radio)).all() default = {'gsm': 0, 'wcdma': 0, 'lte': 0, 'wifi': 0} stats = {} for region, radio, num in cells: if region not in stats: stats[region] = default.copy() stats[region][radio.name] = int(num) for shard in WifiShard.shards().values(): wifis = (self.session.query(shard.region, func.count()).filter( shard.region.isnot(None)).group_by(shard.region)).all() for region, num in wifis: if region not in stats: stats[region] = default.copy() stats[region]['wifi'] += int(num) if not stats: return region_stats = dict( self.session.query(RegionStat.region, RegionStat).all()) for region, values in stats.items(): if region in region_stats: region_stats[region].gsm = values['gsm'] region_stats[region].wcdma = values['wcdma'] region_stats[region].lte = values['lte'] region_stats[region].wifi = values['wifi'] else: self.session.add( RegionStat( region=region, gsm=values['gsm'], wcdma=values['wcdma'], lte=values['lte'], wifi=values['wifi'], )) obsolete_regions = list(set(region_stats.keys()) - set(stats.keys())) if obsolete_regions: (self.session.query(RegionStat).filter( RegionStat.region.in_(obsolete_regions))).delete( synchronize_session=False)
def test_position_invalid(self, celery, session, stats): self.add_reports(celery, 1, cell_factor=0, wifi_factor=1, wifi_key='000000123456', lat=-90.1) self.add_reports(celery, 1, cell_factor=0, wifi_factor=1, wifi_key='000000234567') self._update_all(session) shard = WifiShard.shards()['0'] assert session.query(shard).count() == 1 stats.check(counter=[ ('data.report.upload', 1, 2, ['key:test']), ('data.report.drop', 1, 1, ['key:test']), ('data.observation.insert', 1, 1, ['type:wifi']), ('data.observation.upload', 1, 1, ['type:wifi', 'key:test']), ])
def test_wifi(self): reports = self.add_reports(cell_factor=0, wifi_factor=1) self._update_all() position = reports[0]['position'] wifi_data = reports[0]['wifiAccessPoints'][0] mac = wifi_data['macAddress'] shard = WifiShard.shard_model(mac) wifis = self.session.query(shard).all() self.assertEqual(len(wifis), 1) wifi = wifis[0] self.assertEqual(wifi.lat, position['latitude']) self.assertEqual(wifi.lon, position['longitude']) self.assertEqual(wifi.mac, wifi_data['macAddress']) self.assertEqual(wifi.samples, 1)
def test_position_invalid(self): self.add_reports(1, cell_factor=0, wifi_factor=1, wifi_key='000000123456', lat=-90.1) self.add_reports(1, cell_factor=0, wifi_factor=1, wifi_key='000000234567') self._update_all() shard = WifiShard.shards()['0'] self.assertEqual(self.session.query(shard).count(), 1) self.check_stats(counter=[ ('data.report.upload', 1, 2, ['key:test']), ('data.report.drop', 1, 1, ['key:test']), ('data.observation.insert', 1, 1, ['type:wifi']), ('data.observation.upload', 1, 1, ['type:wifi', 'key:test']), ])
def test_position_invalid(self): self.add_reports(1, cell_factor=0, wifi_factor=1, wifi_key='000000123456', lat=-90.1) self.add_reports(1, cell_factor=0, wifi_factor=1, wifi_key='000000234567') self._update_all() shard = WifiShard.shards()['0'] self.assertEqual(self.session.query(shard).count(), 1) self.check_stats(counter=[ ('data.report.upload', 1, 2, ['key:test']), ('data.report.drop', 1, 1, ['reason:malformed', 'key:test']), ('data.observation.insert', 1, 1, ['type:wifi']), ('data.observation.upload', 1, 1, ['type:wifi', 'key:test']), ])
def _update_all(self, session, datamap_only=False): ExportConfigFactory(name='internal', batch=0, schema='internal') session.flush() update_incoming.delay().get() if datamap_only: return for shard_id in BlueShard.shards().keys(): update_blue.delay(shard_id=shard_id).get() for shard_id in CellShard.shards().keys(): update_cell.delay(shard_id=shard_id).get() for shard_id in WifiShard.shards().keys(): update_wifi.delay(shard_id=shard_id).get()
def _update_all(self, session, datamap_only=False): ExportConfigFactory(name="internal", batch=0, schema="internal") session.flush() update_incoming.delay().get() if datamap_only: return for shard_id in BlueShard.shards().keys(): update_blue.delay(shard_id=shard_id).get() for shard_id in CellShard.shards().keys(): update_cell.delay(shard_id=shard_id).get() for shard_id in WifiShard.shards().keys(): update_wifi.delay(shard_id=shard_id).get()
def test_wifi(self): reports = self.add_reports(cell_factor=0, wifi_factor=1) schedule_export_reports.delay().get() for i in range(16): update_wifi.delay(shard_id='%x' % i).get() position = reports[0]['position'] wifi_data = reports[0]['wifiAccessPoints'][0] mac = wifi_data['macAddress'] shard = WifiShard.shard_model(mac) wifis = self.session.query(shard).all() self.assertEqual(len(wifis), 1) wifi = wifis[0] self.assertEqual(wifi.lat, position['latitude']) self.assertEqual(wifi.lon, position['longitude']) self.assertEqual(wifi.mac, wifi_data['macAddress']) self.assertEqual(wifi.samples, 1)
def test_wifi_duplicated(self): self.add_reports(cell_factor=0, wifi_factor=1) # duplicate the wifi entry inside the report item = self._pop_item() report = item['report'] wifi = report['wifiAccessPoints'][0] mac = wifi['macAddress'] report['wifiAccessPoints'].append(wifi.copy()) report['wifiAccessPoints'].append(wifi.copy()) report['wifiAccessPoints'][1]['signalStrength'] += 2 report['wifiAccessPoints'][2]['signalStrength'] -= 2 self._push_item(item) self._update_all() shard = WifiShard.shard_model(mac) wifis = self.session.query(shard).all() self.assertEqual(len(wifis), 1) self.assertEqual(wifis[0].samples, 1)
def test_wifi_duplicated(self, celery, session): self.add_reports(celery, cell_factor=0, wifi_factor=1) # duplicate the wifi entry inside the report item = self._pop_item(celery) report = item['report'] wifi = report['wifiAccessPoints'][0] mac = wifi['macAddress'] report['wifiAccessPoints'].append(wifi.copy()) report['wifiAccessPoints'].append(wifi.copy()) report['wifiAccessPoints'][1]['signalStrength'] += 2 report['wifiAccessPoints'][2]['signalStrength'] -= 2 self._push_item(celery, item) self._update_all(session) shard = WifiShard.shard_model(mac) wifis = session.query(shard).all() assert len(wifis) == 1 assert wifis[0].samples == 1
def test_wifi_duplicated(self, celery, session): self.add_reports(celery, cell_factor=0, wifi_factor=1) # duplicate the wifi entry inside the report item = self._pop_item(celery) report = item["report"] wifi = report["wifiAccessPoints"][0] mac = wifi["macAddress"] report["wifiAccessPoints"].append(wifi.copy()) report["wifiAccessPoints"].append(wifi.copy()) report["wifiAccessPoints"][1]["signalStrength"] += 2 report["wifiAccessPoints"][2]["signalStrength"] -= 2 self._push_item(celery, item) self._update_all(session) shard = WifiShard.shard_model(mac) wifis = session.query(shard).all() assert len(wifis) == 1 assert wifis[0].samples == 1
def test_wifi_duplicated(self): self.add_reports(cell_factor=0, wifi_factor=1) # duplicate the wifi entry inside the report queue = self.celery_app.export_queues['queue_export_internal'] items = queue.dequeue(queue.queue_key()) report = items[0]['report'] wifi = report['wifiAccessPoints'][0] mac = wifi['macAddress'] report['wifiAccessPoints'].append(wifi.copy()) report['wifiAccessPoints'].append(wifi.copy()) report['wifiAccessPoints'][1]['signalStrength'] += 2 report['wifiAccessPoints'][2]['signalStrength'] -= 2 queue.enqueue(items, queue.queue_key()) self._update_all() shard = WifiShard.shard_model(mac) wifis = self.session.query(shard).all() self.assertEqual(len(wifis), 1) self.assertEqual(wifis[0].samples, 1)
def test_wifi_duplicated(self): self.add_reports(cell_factor=0, wifi_factor=1) # duplicate the wifi entry inside the report queue = self.celery_app.export_queues['internal'] items = queue.dequeue(queue.queue_key()) report = items[0]['report'] wifi = report['wifiAccessPoints'][0] mac = wifi['macAddress'] report['wifiAccessPoints'].append(wifi.copy()) report['wifiAccessPoints'].append(wifi.copy()) report['wifiAccessPoints'][1]['signalStrength'] += 2 report['wifiAccessPoints'][2]['signalStrength'] -= 2 queue.enqueue(items, queue.queue_key()) self._update_all() shard = WifiShard.shard_model(mac) wifis = self.session.query(shard).all() self.assertEqual(len(wifis), 1) self.assertEqual(wifis[0].samples, 1)
def new_stations(self, name, station_keys): if len(station_keys) == 0: return 0 # assume all stations are unknown unknown_keys = set(station_keys) if name == 'wifi': # there is only one combined table structure shards = defaultdict(list) for mac in unknown_keys: shards[WifiShard.shard_model(mac)].append(mac) for shard, macs in shards.items(): query = (self.session.query(shard.mac) .filter(shard.mac.in_(macs))) unknown_keys -= set([r.mac for r in query.all()]) elif name == 'cell': # first check the station table, which is more likely to contain # stations station_iter = Cell.iterkeys( self.session, list(unknown_keys), # only load the columns required for the hashkey extra=lambda query: query.options( load_only(*tuple(Cell._hashkey_cls._fields)))) # subtract all stations which are found in the station table unknown_keys -= set([sta.hashkey() for sta in station_iter]) if len(unknown_keys) == 0: # pragma: no cover return 0 # Only check the blocklist table for the still unknown keys. # There is no need to check for the already found keys again. block_iter = CellBlocklist.iterkeys( self.session, list(unknown_keys), # only load the columns required for the hashkey extra=lambda query: query.options( load_only(*tuple(CellBlocklist._hashkey_cls._fields)))) # subtract all stations which are found in the blocklist table unknown_keys -= set([block.hashkey() for block in block_iter]) return len(unknown_keys)
def test_new(self): utcnow = util.utcnow() obs = WifiObservationFactory.build() self._queue_and_update([obs]) shard = WifiShard.shard_model(obs.mac) wifis = self.session.query(shard).all() self.assertEqual(len(wifis), 1) wifi = wifis[0] self.assertAlmostEqual(wifi.lat, obs.lat) self.assertAlmostEqual(wifi.max_lat, obs.lat) self.assertAlmostEqual(wifi.min_lat, obs.lat) self.assertAlmostEqual(wifi.lon, obs.lon) self.assertAlmostEqual(wifi.max_lon, obs.lon) self.assertAlmostEqual(wifi.min_lon, obs.lon) self.assertEqual(wifi.radius, 0) self.assertEqual(wifi.region, 'GB') self.assertEqual(wifi.samples, 1) self.assertEqual(wifi.created.date(), utcnow.date()) self.assertEqual(wifi.modified.date(), utcnow.date()) self.assertEqual(wifi.block_first, None) self.assertEqual(wifi.block_last, None) self.assertEqual(wifi.block_count, None)
def configure_data(redis_client): """ Configure fixed set of data queues. """ data_queues = { # update_incoming needs to be the exact same as in webapp.config 'update_incoming': DataQueue('update_incoming', redis_client, batch=100, compress=True), } for key in ('update_cellarea', 'update_cellarea_ocid'): data_queues[key] = DataQueue(key, redis_client, batch=100, json=False) for shard_id in BlueShard.shards().keys(): key = 'update_blue_' + shard_id data_queues[key] = DataQueue(key, redis_client, batch=500) for shard_id in DataMap.shards().keys(): key = 'update_datamap_' + shard_id data_queues[key] = DataQueue(key, redis_client, batch=500, json=False) for shard_id in CellShard.shards().keys(): key = 'update_cell_' + shard_id data_queues[key] = DataQueue(key, redis_client, batch=500) for shard_id in WifiShard.shards().keys(): key = 'update_wifi_' + shard_id data_queues[key] = DataQueue(key, redis_client, batch=500) return data_queues
def process_reports(self, reports, userid=None): malformed_reports = 0 positions = set() observations = {'cell': [], 'wifi': []} obs_count = { 'cell': { 'upload': 0, 'drop': 0 }, 'wifi': { 'upload': 0, 'drop': 0 }, } new_station_count = {'cell': 0, 'wifi': 0} for report in reports: cell, wifi, malformed_obs = self.process_report(report) if cell: observations['cell'].extend(cell) obs_count['cell']['upload'] += len(cell) if wifi: observations['wifi'].extend(wifi) obs_count['wifi']['upload'] += len(wifi) if (cell or wifi): positions.add((report['lat'], report['lon'])) else: malformed_reports += 1 for name in ('cell', 'wifi'): obs_count[name]['drop'] += malformed_obs[name] # group by unique station key for name in ('cell', 'wifi'): station_keys = set() for obs in observations[name]: if name == 'cell': station_keys.add(obs.cellid) elif name == 'wifi': station_keys.add(obs.mac) # determine scores for stations new_station_count[name] += self.new_stations(name, station_keys) if observations['cell']: sharded_obs = defaultdict(list) for ob in observations['cell']: shard_id = CellShard.shard_id(ob.cellid) sharded_obs[shard_id].append(ob) for shard_id, values in sharded_obs.items(): cell_queue = self.data_queues['update_cell_' + shard_id] cell_queue.enqueue(list(values), pipe=self.pipe) if observations['wifi']: sharded_obs = defaultdict(list) for ob in observations['wifi']: shard_id = WifiShard.shard_id(ob.mac) sharded_obs[shard_id].append(ob) for shard_id, values in sharded_obs.items(): wifi_queue = self.data_queues['update_wifi_' + shard_id] wifi_queue.enqueue(list(values), pipe=self.pipe) self.process_datamap(positions) self.process_score(userid, positions, new_station_count) self.emit_stats( len(reports), malformed_reports, obs_count, )
def test_blocklist_moving_wifis(self): now = util.utcnow() obs = [] obs_factory = WifiObservationFactory moving = set() wifis = WifiShardFactory.create_batch(7) wifis.append(WifiShardFactory.build()) # a wifi without an entry and disagreeing observations wifi = wifis[-1] obs.extend([ obs_factory(lat=wifi.lat, lon=wifi.lon, mac=wifi.mac), obs_factory(lat=wifi.lat + 2.0, lon=wifi.lon, mac=wifi.mac), ]) moving.add(wifi.mac) # a wifi with an entry but no prior position wifi = wifis[0] obs.extend([ obs_factory(lat=wifi.lat + 0.001, lon=wifi.lon + 0.001, mac=wifi.mac), obs_factory(lat=wifi.lat + 0.002, lon=wifi.lon + 0.005, mac=wifi.mac), obs_factory(lat=wifi.lat + 0.003, lon=wifi.lon + 0.009, mac=wifi.mac), ]) wifi.lat = None wifi.lon = None wifi.samples = None wifi.weight = None # a wifi with a prior known position wifi = wifis[1] wifi.samples = 1 wifi.weight = 1.0 wifi.lat += 1.0 wifi.lon += 1.0 obs.extend([ obs_factory(lat=wifi.lat + 0.01, lon=wifi.lon, mac=wifi.mac), obs_factory(lat=wifi.lat + 0.07, lon=wifi.lon, mac=wifi.mac), ]) moving.add(wifi.mac) # a wifi with a very different prior position wifi = wifis[2] wifi.samples = 1 wifi.weight = 1.0 obs.extend([ obs_factory(lat=wifi.lat + 2.0, lon=wifi.lon, mac=wifi.mac), obs_factory(lat=wifi.lat + 2.002, lon=wifi.lon, mac=wifi.mac), ]) moving.add(wifi.mac) # an already blocked wifi wifi = wifis[3] wifi.block_last = now.date() wifi.block_count = 1 obs.extend([ obs_factory(lat=wifi.lat, lon=wifi.lon, mac=wifi.mac), obs_factory(lat=wifi.lat + 0.1, lon=wifi.lon, mac=wifi.mac), ]) moving.add(wifi.mac) # a permanently blocked wifi wifi = wifis[4] wifi_lat, wifi_lon = (wifi.lat, wifi.lon) wifi.block_last = now.date() - 2 * TEMPORARY_BLOCKLIST_DURATION wifi.block_count = PERMANENT_BLOCKLIST_THRESHOLD for col in ('lat', 'lon', 'max_lat', 'min_lat', 'max_lon', 'min_lon'): setattr(wifi, col, None) obs.extend([ obs_factory(lat=wifi_lat, lon=wifi_lon, mac=wifi.mac), ]) moving.add(wifi.mac) # a no longer blocked wifi wifi = wifis[5] wifi_lat, wifi_lon = (wifi.lat, wifi.lon) wifi.block_last = now.date() - 2 * TEMPORARY_BLOCKLIST_DURATION wifi.block_count = 2 for col in ('lat', 'lon', 'max_lat', 'min_lat', 'max_lon', 'min_lon'): setattr(wifi, col, None) obs.extend([ obs_factory(lat=wifi_lat, lon=wifi_lon, mac=wifi.mac), ]) # a no longer blocked wifi with disagreeing observations wifi = wifis[6] wifi_lat, wifi_lon = (wifi.lat, wifi.lon) wifi.block_last = now.date() - 2 * TEMPORARY_BLOCKLIST_DURATION wifi.block_count = 2 for col in ('lat', 'lon', 'max_lat', 'min_lat', 'max_lon', 'min_lon'): setattr(wifi, col, None) obs.extend([ obs_factory(lat=wifi_lat, lon=wifi_lon, mac=wifi.mac), obs_factory(lat=wifi_lat + 2.0, lon=wifi_lon, mac=wifi.mac), ]) moving.add(wifi.mac) self.session.commit() self._queue_and_update_wifi(obs) shards = set() for mac in moving: shards.add(WifiShard.shard_model(mac)) blocks = [] for shard in shards: for row in self.session.query(shard).all(): if row.blocked(): blocks.append(row) self.assertEqual(set([b.mac for b in blocks]), moving)
def celerybeat_schedule(app_config): """Return the celery beat schedule as a dictionary.""" sections = app_config.sections() schedule = { # Monitoring 'monitor-queue-size': { 'task': 'ichnaea.data.tasks.monitor_queue_size', 'schedule': timedelta(seconds=60), 'options': {'expires': 57}, }, 'monitor-api-users': { 'task': 'ichnaea.data.tasks.monitor_api_users', 'schedule': timedelta(seconds=600), 'options': {'expires': 570}, }, 'monitor-api-key-limits': { 'task': 'ichnaea.data.tasks.monitor_api_key_limits', 'schedule': timedelta(seconds=600), 'options': {'expires': 570}, }, # Statistics 'update-statcounter': { 'task': 'ichnaea.data.tasks.update_statcounter', 'args': (1, ), 'schedule': crontab(minute=3), 'options': {'expires': 2700}, }, 'update-statregion': { 'task': 'ichnaea.data.tasks.update_statregion', 'schedule': timedelta(seconds=3600 * 6), 'options': {'expires': 3600 * 5}, }, # Data Pipeline 'schedule-export-reports': { 'task': 'ichnaea.data.tasks.schedule_export_reports', 'schedule': timedelta(seconds=8), 'options': {'expires': 15}, }, 'update-cellarea': { 'task': 'ichnaea.data.tasks.update_cellarea', 'schedule': timedelta(seconds=8), 'args': (100, ), 'options': {'expires': 15}, }, 'update-cellarea-ocid': { 'task': 'ichnaea.data.tasks.update_cellarea_ocid', 'schedule': timedelta(seconds=9), 'args': (100, ), 'options': {'expires': 15}, }, 'update-score': { 'task': 'ichnaea.data.tasks.update_score', 'args': (250, ), 'schedule': timedelta(seconds=9), 'options': {'expires': 10}, }, } for shard_id in CellShard.shards().keys(): schedule.update({ 'update-cell-' + shard_id: { 'task': 'ichnaea.data.tasks.update_cell', 'schedule': timedelta(seconds=7), 'args': (500, shard_id), 'options': {'expires': 10}, } }) for shard_id in DataMap.shards().keys(): schedule.update({ 'update-datamap-' + shard_id: { 'task': 'ichnaea.data.tasks.update_datamap', 'args': (500, shard_id), 'schedule': timedelta(seconds=14), 'options': {'expires': 20}, }, }) for shard_id in WifiShard.shards().keys(): schedule.update({ 'update-wifi-' + shard_id: { 'task': 'ichnaea.data.tasks.update_wifi', 'schedule': timedelta(seconds=6), 'args': (500, shard_id), 'options': {'expires': 10}, } }) if 'assets' in sections and app_config.get('assets', 'bucket', None): # only configure tasks if target bucket is configured schedule.update({ 'cell-export-full': { 'task': 'ichnaea.data.tasks.cell_export_full', 'schedule': crontab(hour=0, minute=13), 'options': {'expires': 39600}, }, 'cell-export-diff': { 'task': 'ichnaea.data.tasks.cell_export_diff', 'schedule': crontab(minute=3), 'options': {'expires': 2700}, }, }) if 'import:ocid' in sections: schedule.update({ 'monitor-ocid-import': { 'task': 'ichnaea.data.tasks.monitor_ocid_import', 'schedule': timedelta(seconds=600), 'options': {'expires': 570}, }, 'cell-import-external': { 'task': 'ichnaea.data.tasks.cell_import_external', 'args': (True, ), 'schedule': crontab(minute=52), 'options': {'expires': 2700}, }, }) return schedule
def celerybeat_schedule(app_config): """Return the celery beat schedule as a dictionary.""" sections = app_config.sections() schedule = { # Monitoring 'monitor-queue-size': { 'task': 'ichnaea.data.tasks.monitor_queue_size', 'schedule': timedelta(seconds=60), 'options': { 'expires': 57 }, }, 'monitor-api-users': { 'task': 'ichnaea.data.tasks.monitor_api_users', 'schedule': timedelta(seconds=600), 'options': { 'expires': 570 }, }, 'monitor-api-key-limits': { 'task': 'ichnaea.data.tasks.monitor_api_key_limits', 'schedule': timedelta(seconds=600), 'options': { 'expires': 570 }, }, # Statistics 'update-statcounter': { 'task': 'ichnaea.data.tasks.update_statcounter', 'args': (1, ), 'schedule': crontab(minute=3), 'options': { 'expires': 2700 }, }, 'update-statregion': { 'task': 'ichnaea.data.tasks.update_statregion', 'schedule': crontab(minute=5), 'options': { 'expires': 2700 }, }, # Data Pipeline 'schedule-export-reports': { 'task': 'ichnaea.data.tasks.schedule_export_reports', 'schedule': timedelta(seconds=8), 'options': { 'expires': 15 }, }, 'update-cellarea': { 'task': 'ichnaea.data.tasks.update_cellarea', 'schedule': timedelta(seconds=8), 'args': (100, ), 'options': { 'expires': 15 }, }, 'update-cellarea-ocid': { 'task': 'ichnaea.data.tasks.update_cellarea_ocid', 'schedule': timedelta(seconds=9), 'args': (100, ), 'options': { 'expires': 15 }, }, 'update-score': { 'task': 'ichnaea.data.tasks.update_score', 'args': (250, ), 'schedule': timedelta(seconds=9), 'options': { 'expires': 10 }, }, } for shard_id in CellShard.shards().keys(): schedule.update({ 'update-cell-' + shard_id: { 'task': 'ichnaea.data.tasks.update_cell', 'schedule': timedelta(seconds=7), 'args': (500, shard_id), 'options': { 'expires': 10 }, } }) for shard_id in DataMap.shards().keys(): schedule.update({ 'update-datamap-' + shard_id: { 'task': 'ichnaea.data.tasks.update_datamap', 'args': (500, shard_id), 'schedule': timedelta(seconds=14), 'options': { 'expires': 20 }, }, }) for shard_id in WifiShard.shards().keys(): schedule.update({ 'update-wifi-' + shard_id: { 'task': 'ichnaea.data.tasks.update_wifi', 'schedule': timedelta(seconds=6), 'args': (500, shard_id), 'options': { 'expires': 10 }, } }) if 'assets' in sections and app_config.get('assets', 'bucket', None): # only configure tasks if target bucket is configured schedule.update({ 'cell-export-full': { 'task': 'ichnaea.data.tasks.cell_export_full', 'schedule': crontab(hour=0, minute=13), 'options': { 'expires': 39600 }, }, 'cell-export-diff': { 'task': 'ichnaea.data.tasks.cell_export_diff', 'schedule': crontab(minute=3), 'options': { 'expires': 2700 }, }, }) if 'import:ocid' in sections: schedule.update({ 'monitor-ocid-import': { 'task': 'ichnaea.data.tasks.monitor_ocid_import', 'schedule': timedelta(seconds=600), 'options': { 'expires': 570 }, }, 'cell-import-external': { 'task': 'ichnaea.data.tasks.cell_import_external', 'args': (True, ), 'schedule': crontab(minute=52), 'options': { 'expires': 2700 }, }, }) return schedule
def process_reports(self, reports, userid=None): malformed_reports = 0 positions = set() observations = {'cell': [], 'wifi': []} obs_count = { 'cell': {'upload': 0, 'drop': 0}, 'wifi': {'upload': 0, 'drop': 0}, } new_station_count = {'cell': 0, 'wifi': 0} for report in reports: cell, wifi, malformed_obs = self.process_report(report) if cell: observations['cell'].extend(cell) obs_count['cell']['upload'] += len(cell) if wifi: observations['wifi'].extend(wifi) obs_count['wifi']['upload'] += len(wifi) if (cell or wifi): positions.add((report['lat'], report['lon'])) else: malformed_reports += 1 for name in ('cell', 'wifi'): obs_count[name]['drop'] += malformed_obs[name] # group by unique station key for name in ('cell', 'wifi'): station_keys = set() for obs in observations[name]: if name == 'cell': station_keys.add(obs.cellid) elif name == 'wifi': station_keys.add(obs.mac) # determine scores for stations new_station_count[name] += self.new_stations(name, station_keys) if observations['cell']: sharded_obs = defaultdict(list) for ob in observations['cell']: shard_id = CellShard.shard_id(ob.cellid) sharded_obs[shard_id].append(ob) for shard_id, values in sharded_obs.items(): cell_queue = self.data_queues['update_cell_' + shard_id] cell_queue.enqueue(list(values), pipe=self.pipe) if observations['wifi']: sharded_obs = defaultdict(list) for ob in observations['wifi']: shard_id = WifiShard.shard_id(ob.mac) sharded_obs[shard_id].append(ob) for shard_id, values in sharded_obs.items(): wifi_queue = self.data_queues['update_wifi_' + shard_id] wifi_queue.enqueue(list(values), pipe=self.pipe) self.process_datamap(positions) self.process_score(userid, positions, new_station_count) self.emit_stats( len(reports), malformed_reports, obs_count, )
def test_blocklist_moving_wifis(self): now = util.utcnow() obs = [] obs_factory = WifiObservationFactory moving = set() wifis = WifiShardFactory.create_batch(7) wifis.append(WifiShardFactory.build()) # a wifi without an entry and disagreeing observations wifi = wifis[-1] obs.extend([ obs_factory(lat=wifi.lat, lon=wifi.lon, key=wifi.mac), obs_factory(lat=wifi.lat + 2.0, lon=wifi.lon, key=wifi.mac), ]) moving.add(wifi.mac) # a wifi with an entry but no prior position wifi = wifis[0] obs.extend([ obs_factory(lat=wifi.lat + 0.001, lon=wifi.lon + 0.001, key=wifi.mac), obs_factory(lat=wifi.lat + 0.002, lon=wifi.lon + 0.005, key=wifi.mac), obs_factory(lat=wifi.lat + 0.003, lon=wifi.lon + 0.009, key=wifi.mac), ]) wifi.lat = None wifi.lon = None wifi.samples = 0 # a wifi with a prior known position wifi = wifis[1] wifi.samples = 1 wifi.lat += 1.0 wifi.lon += 1.0 obs.extend([ obs_factory(lat=wifi.lat + 0.01, lon=wifi.lon, key=wifi.mac), obs_factory(lat=wifi.lat + 0.07, lon=wifi.lon, key=wifi.mac), ]) moving.add(wifi.mac) # a wifi with a very different prior position wifi = wifis[2] wifi.samples = 1 obs.extend([ obs_factory(lat=wifi.lat + 2.0, lon=wifi.lon, key=wifi.mac), obs_factory(lat=wifi.lat + 2.002, lon=wifi.lon, key=wifi.mac), ]) moving.add(wifi.mac) # an already blocked wifi wifi = wifis[3] wifi.block_last = now.date() wifi.block_count = 1 obs.extend([ obs_factory(lat=wifi.lat, lon=wifi.lon, key=wifi.mac), obs_factory(lat=wifi.lat + 0.1, lon=wifi.lon, key=wifi.mac), ]) moving.add(wifi.mac) # a permanently blocked wifi wifi = wifis[4] wifi_lat, wifi_lon = (wifi.lat, wifi.lon) wifi.block_last = now.date() - 2 * TEMPORARY_BLOCKLIST_DURATION wifi.block_count = PERMANENT_BLOCKLIST_THRESHOLD for col in ('lat', 'lon', 'max_lat', 'min_lat', 'max_lon', 'min_lon'): setattr(wifi, col, None) obs.extend([ obs_factory(lat=wifi_lat, lon=wifi_lon, key=wifi.mac), ]) moving.add(wifi.mac) # a no longer blocked wifi wifi = wifis[5] wifi_lat, wifi_lon = (wifi.lat, wifi.lon) wifi.block_last = now.date() - 2 * TEMPORARY_BLOCKLIST_DURATION wifi.block_count = 2 for col in ('lat', 'lon', 'max_lat', 'min_lat', 'max_lon', 'min_lon'): setattr(wifi, col, None) obs.extend([ obs_factory(lat=wifi_lat, lon=wifi_lon, key=wifi.mac), ]) # a no longer blocked wifi with disagreeing observations wifi = wifis[6] wifi_lat, wifi_lon = (wifi.lat, wifi.lon) wifi.block_last = now.date() - 2 * TEMPORARY_BLOCKLIST_DURATION wifi.block_count = 2 for col in ('lat', 'lon', 'max_lat', 'min_lat', 'max_lon', 'min_lon'): setattr(wifi, col, None) obs.extend([ obs_factory(lat=wifi_lat, lon=wifi_lon, key=wifi.mac), obs_factory(lat=wifi_lat + 2.0, lon=wifi_lon, key=wifi.mac), ]) moving.add(wifi.mac) self.session.commit() self._queue_and_update(obs) shards = set() for mac in moving: shards.add(WifiShard.shard_model(mac)) blocks = [] for shard in shards: for row in self.session.query(shard).all(): if row.blocked(): blocks.append(row) self.assertEqual(set([b.mac for b in blocks]), moving)
def test_blocklist_moving_wifis(self): now = util.utcnow() obs = [] obs_factory = WifiObservationFactory moving = set() wifis = WifiShardFactory.create_batch(8) wifis.append(WifiShardFactory.build()) # a wifi without an entry and disagreeing observations wifi = wifis[-1] obs.extend([ obs_factory(lat=wifi.lat, lon=wifi.lon, key=wifi.mac), obs_factory(lat=wifi.lat + 2.0, lon=wifi.lon, key=wifi.mac), ]) moving.add(wifi.mac) # a wifi with an entry but no prior position wifi = wifis[0] obs.extend([ obs_factory(lat=wifi.lat + 0.001, lon=wifi.lon + 0.001, key=wifi.mac), obs_factory(lat=wifi.lat + 0.002, lon=wifi.lon + 0.005, key=wifi.mac), obs_factory(lat=wifi.lat + 0.003, lon=wifi.lon + 0.009, key=wifi.mac), ]) wifi.lat = None wifi.lon = None wifi.samples = 0 # a wifi with a prior known position wifi = wifis[1] wifi.samples = 1 wifi.lat += 1.0 wifi.lon += 1.0 obs.extend([ obs_factory(lat=wifi.lat + 0.01, lon=wifi.lon, key=wifi.mac), obs_factory(lat=wifi.lat + 0.07, lon=wifi.lon, key=wifi.mac), ]) moving.add(wifi.mac) # a wifi with a very different prior position wifi = wifis[2] wifi.samples = 1 obs.extend([ obs_factory(lat=wifi.lat + 2.0, lon=wifi.lon, key=wifi.mac), obs_factory(lat=wifi.lat + 2.002, lon=wifi.lon, key=wifi.mac), ]) moving.add(wifi.mac) # another wifi with a prior known position (and negative lat) wifi = wifis[3] wifi.samples = 1 wifi.lat *= -1.0 obs.extend([ obs_factory(lat=wifi.lat - 0.1, lon=wifi.lon, key=wifi.mac), obs_factory(lat=wifi.lat - 0.16, lon=wifi.lon, key=wifi.mac), ]) moving.add(wifi.mac) # an already blocked wifi wifi = wifis[4] wifi.block_last = now.date() wifi.block_count = 1 obs.extend([ obs_factory(lat=wifi.lat, lon=wifi.lon, key=wifi.mac), obs_factory(lat=wifi.lat + 0.1, lon=wifi.lon, key=wifi.mac), ]) moving.add(wifi.mac) # a permanently blocked wifi wifi = wifis[5] wifi_lat, wifi_lon = (wifi.lat, wifi.lon) wifi.block_last = now.date() - 2 * TEMPORARY_BLOCKLIST_DURATION wifi.block_count = PERMANENT_BLOCKLIST_THRESHOLD for col in ('lat', 'lon', 'max_lat', 'min_lat', 'max_lon', 'min_lon'): setattr(wifi, col, None) obs.extend([ obs_factory(lat=wifi_lat, lon=wifi_lon, key=wifi.mac), ]) moving.add(wifi.mac) # a no longer blocked wifi wifi = wifis[6] wifi_lat, wifi_lon = (wifi.lat, wifi.lon) wifi.block_last = now.date() - 2 * TEMPORARY_BLOCKLIST_DURATION wifi.block_count = 2 for col in ('lat', 'lon', 'max_lat', 'min_lat', 'max_lon', 'min_lon'): setattr(wifi, col, None) obs.extend([ obs_factory(lat=wifi_lat, lon=wifi_lon, key=wifi.mac), ]) # a no longer blocked wifi with disagreeing observations wifi = wifis[7] wifi_lat, wifi_lon = (wifi.lat, wifi.lon) wifi.block_last = now.date() - 2 * TEMPORARY_BLOCKLIST_DURATION wifi.block_count = 2 for col in ('lat', 'lon', 'max_lat', 'min_lat', 'max_lon', 'min_lon'): setattr(wifi, col, None) obs.extend([ obs_factory(lat=wifi_lat, lon=wifi_lon, key=wifi.mac), obs_factory(lat=wifi_lat + 2.0, lon=wifi_lon, key=wifi.mac), ]) moving.add(wifi.mac) self.data_queue.enqueue(obs) self.session.commit() update_wifi.delay().get() shards = set() for mac in moving: shards.add(WifiShard.shard_model(mac)) blocks = [] for shard in shards: for row in self.session.query(shard).all(): if row.blocked(): blocks.append(row) self.assertEqual(set([b.mac for b in blocks]), moving) self.check_stats(counter=[ ('data.observation.drop', 1, 3, ['type:wifi', 'reason:blocklisted']), ('data.station.blocklist', 1, 5, ['type:wifi', 'action:add', 'reason:moving']), ])