def test_skip_delete_new_blocks(self): now = util.utcnow() today_0000 = now.replace(hour=0, minute=0, second=0, tzinfo=pytz.UTC) yesterday_0000 = today_0000 - timedelta(days=1) yesterday_2359 = today_0000 - timedelta(seconds=1) old = now - timedelta(days=5) session = self.db_master_session for i in range(100, 150, 10): block = ObservationBlock() block.measure_type = ObservationType.cell block.start_id = i block.end_id = i + 10 block.s3_key = 'fake_key' block.archive_sha = 'fake_sha' block.archive_date = None session.add(block) observations = [] for i in range(100, 110): observations.append(CellObservation(id=i, created=old)) for i in range(110, 120): observations.append(CellObservation(id=i, created=yesterday_0000)) for i in range(120, 130): observations.append(CellObservation(id=i, created=yesterday_2359)) for i in range(130, 140): observations.append(CellObservation(id=i, created=today_0000)) for i in range(140, 150): observations.append(CellObservation(id=i, created=now)) session.add_all(observations) session.commit() def _archived_blocks(): blocks = session.query(ObservationBlock).all() return len([b for b in blocks if b.archive_date is not None]) def _delete(days=7): with patch.object(S3Backend, 'check_archive', lambda x, y, z: True): delete_cellmeasure_records.delay(days_old=days).get() session.commit() _delete(days=7) self.assertEquals(session.query(CellObservation).count(), 50) self.assertEqual(_archived_blocks(), 0) _delete(days=2) self.assertEquals(session.query(CellObservation).count(), 40) self.assertEqual(_archived_blocks(), 1) _delete(days=1) self.assertEquals(session.query(CellObservation).count(), 20) self.assertEqual(_archived_blocks(), 3) _delete(days=0) self.assertEquals(session.query(CellObservation).count(), 0) self.assertEqual(_archived_blocks(), 5)
def test_mcc_latlon(self): sample = dict(radio=Radio.gsm, mnc=6, lac=1, cid=2, lat=GB_LAT, lon=GB_LON) assert CellObservation.create(mcc=GB_MCC, **sample) is not None assert CellObservation.create(mcc=262, **sample) is None
def add_line_of_cells_and_scan_lac(self): session = self.session big = 1.0 small = big / 10 keys = dict(radio=Radio.cdma, mcc=1, mnc=1, lac=1) observations = [ CellObservation(lat=ctr + xd, lon=ctr + yd, cid=cell, **keys) for cell in range(10) for ctr in [cell * big] for (xd, yd) in [(small, small), (small, -small), (-small, small), (-small, -small)] ] session.add_all(observations) cells = [ Cell(lat=ctr, lon=ctr, cid=cell, new_measures=4, total_measures=1, **keys) for cell in range(10) for ctr in [cell * big] ] session.add_all(cells) session.commit() result = location_update_cell.delay(min_new=0, max_new=9999, batch=len(observations)) self.assertEqual(result.get(), (len(cells), 0)) scan_areas.delay()
def test_unthrottle_cell_observations(self): session = self.db_master_session block = ObservationBlock() block.measure_type = ObservationType.cell block.start_id = 120 block.end_id = 140 block.s3_key = 'fake_key' block.archive_sha = 'fake_sha' block.archive_date = None session.add(block) gsm = RADIO_TYPE['gsm'] k = dict(radio=gsm, mcc=1, mnc=2, lac=4, lat=1.0, lon=1.0) for i in range(100, 150): session.add( CellObservation(id=i, cid=i, created=self.really_old, **k)) session.add(Cell(total_measures=11000, cid=i, **k)) session.commit() with patch.object(S3Backend, 'check_archive', lambda x, y, z: True): delete_cellmeasure_records.delay(batch=3).get() cell_unthrottle_measures.delay(10000, 1000).get() cells = session.query(Cell).all() self.assertEquals(len(cells), 50) for cell in cells: if 120 <= cell.cid and cell.cid < 140: self.assertEquals(cell.total_measures, 0) else: self.assertEquals(cell.total_measures, 1) self.check_stats(counter=['items.cell_unthrottled'])
def test_customjson(self): now = util.utcnow() report_id = uuid.uuid1() obs = CellObservation.create( radio=Radio.gsm, mcc=GB_MCC, mnc=5, lac=12345, cid=23456, report_id=report_id, lat=GB_LAT, lon=GB_LON, created=now) json_data = kombu_dumps(obs) self.assertTrue('accuracy' not in json_data) result = kombu_loads(json_data) self.assertTrue(type(result), CellObservation) self.assertTrue(result.accuracy is None) self.assertEqual(type(result.report_id), uuid.UUID) self.assertEqual(result.report_id, report_id) self.assertEqual(type(result.radio), Radio) self.assertEqual(result.radio, Radio.gsm) self.assertEqual(result.mcc, GB_MCC) self.assertEqual(result.mnc, 5) self.assertEqual(result.lac, 12345) self.assertEqual(result.cid, 23456) self.assertEqual(result.lat, GB_LAT) self.assertEqual(result.lon, GB_LON) self.assertEqual(type(result.created), datetime.datetime) self.assertEqual(result.created, now)
def test_schedule_cell_observations(self): session = self.db_master_session blocks = schedule_cellmeasure_archival.delay(batch=1).get() self.assertEquals(len(blocks), 0) observations = [] for i in range(20): observations.append(CellObservation(created=self.really_old)) session.add_all(observations) session.flush() start_id = observations[0].id blocks = schedule_cellmeasure_archival.delay(batch=15).get() self.assertEquals(len(blocks), 1) block = blocks[0] self.assertEquals(block, (start_id, start_id + 15)) blocks = schedule_cellmeasure_archival.delay(batch=6).get() self.assertEquals(len(blocks), 0) blocks = schedule_cellmeasure_archival.delay(batch=5).get() self.assertEquals(len(blocks), 1) block = blocks[0] self.assertEquals(block, (start_id + 15, start_id + 20)) blocks = schedule_cellmeasure_archival.delay(batch=1).get() self.assertEquals(len(blocks), 0)
def test_fields(self): obs = CellObservation.create(radio=Radio.gsm, mcc=GB_MCC, mnc=5, lac=12345, cid=23456, lat=GB_LAT, lon=GB_LON, pressure=1010.2, source='gnss', timestamp=1405602028568, asu=26, signal=-61, ta=10) assert obs.lat == GB_LAT assert obs.lon == GB_LON assert obs.pressure == 1010.2 assert obs.source == ReportSource.gnss assert obs.timestamp == 1405602028568 assert obs.radio == Radio.gsm assert obs.mcc == GB_MCC assert obs.mnc == 5 assert obs.lac == 12345 assert obs.cid == 23456 assert obs.asu == 26 assert obs.signal == -61 assert obs.ta == 10 assert obs.shard_id == 'gsm'
def test_location_update_cell(self): now = util.utcnow() before = now - timedelta(days=1) schema = ValidCellKeySchema() session = self.db_master_session k1 = dict(radio=1, mcc=1, mnc=2, lac=3, cid=4) k2 = dict(radio=1, mcc=1, mnc=2, lac=6, cid=8) k3 = dict(radio=1, mcc=1, mnc=2, lac=schema.fields['lac'].missing, cid=schema.fields['cid'].missing) data = [ Cell(new_measures=3, total_measures=5, **k1), CellObservation(lat=1.0, lon=1.0, created=now, **k1), CellObservation(lat=1.002, lon=1.003, created=now, **k1), CellObservation(lat=1.004, lon=1.006, created=now, **k1), # The lac, cid are invalid and should be skipped CellObservation(lat=1.5, lon=1.5, created=now, **k3), CellObservation(lat=1.502, lon=1.503, created=now, **k3), Cell(lat=2.0, lon=2.0, new_measures=2, total_measures=4, **k2), # the lat/lon is bogus and mismatches the line above on purpose # to make sure old observations are skipped CellObservation(lat=-1.0, lon=-1.0, created=before, **k2), CellObservation(lat=-1.0, lon=-1.0, created=before, **k2), CellObservation(lat=2.002, lon=2.004, created=now, **k2), CellObservation(lat=2.002, lon=2.004, created=now, **k2), ] session.add_all(data) session.commit() result = location_update_cell.delay(min_new=1) self.assertEqual(result.get(), (2, 0)) self.check_stats( total=2, timer=['task.data.location_update_cell'], gauge=['task.data.location_update_cell.new_measures_1_100'], ) cells = session.query(Cell).all() self.assertEqual(len(cells), 2) self.assertEqual([c.new_measures for c in cells], [0, 0]) for cell in cells: if cell.cid == 4: self.assertEqual(cell.lat, 1.002) self.assertEqual(cell.lon, 1.003) elif cell.cid == 8: self.assertEqual(cell.lat, 2.001) self.assertEqual(cell.lon, 2.002)
def process_report(self, data): def add_missing_dict_entries(dst, src): # x.update(y) overwrites entries in x with those in y; # We want to only add those not already present. # We also only want to copy the top-level base report data # and not any nested values like cell or wifi. for (key, value) in src.items(): if key != 'radio' and key not in dst \ and not isinstance(value, (tuple, list, dict)): dst[key] = value report_data = Report.validate(data) if report_data is None: return ([], []) cell_observations = {} wifi_observations = {} if data.get('cell'): # flatten report / cell data into a single dict for cell in data['cell']: # only validate the additional fields cell = CellReport.validate(cell) if cell is None: continue add_missing_dict_entries(cell, report_data) cell_key = CellObservation.to_hashkey(cell) if cell_key in cell_observations: existing = cell_observations[cell_key] if existing['ta'] > cell['ta'] or \ (existing['signal'] != 0 and existing['signal'] < cell['signal']) or \ existing['asu'] < cell['asu']: cell_observations[cell_key] = cell else: cell_observations[cell_key] = cell cell_observations = cell_observations.values() # flatten report / wifi data into a single dict if data.get('wifi'): for wifi in data['wifi']: # only validate the additional fields wifi = WifiReport.validate(wifi) if wifi is None: continue add_missing_dict_entries(wifi, report_data) wifi_key = WifiObservation.to_hashkey(wifi) if wifi_key in wifi_observations: existing = wifi_observations[wifi_key] if existing['signal'] != 0 and \ existing['signal'] < wifi['signal']: wifi_observations[wifi_key] = wifi else: wifi_observations[wifi_key] = wifi wifi_observations = wifi_observations.values() return (cell_observations, wifi_observations)
def test_max_min_range_update(self): session = self.session k1 = dict(radio=Radio.cdma, mcc=1, mnc=2, lac=3, cid=4) data = [ Cell(lat=1.001, lon=-1.001, max_lat=1.002, min_lat=1.0, max_lon=-1.0, min_lon=-1.002, new_measures=2, total_measures=4, **k1), CellObservation(lat=1.001, lon=-1.003, **k1), CellObservation(lat=1.005, lon=-1.007, **k1), ] session.add_all(data) session.commit() result = location_update_cell.delay(min_new=1) self.assertEqual(result.get(), (1, 0)) cells = session.query(Cell).all() self.assertEqual(len(cells), 1) cell = cells[0] self.assertEqual(cell.lat, 1.002) self.assertEqual(cell.max_lat, 1.005) self.assertEqual(cell.min_lat, 1.0) self.assertEqual(cell.lon, -1.003) self.assertEqual(cell.max_lon, -1.0) self.assertEqual(cell.min_lon, -1.007) # independent calculation: the cell bounding box is # (1.000, -1.007) to (1.005, -1.000), with centroid # at (1.002, -1.003); worst distance from centroid # to any corner is 556m self.assertEqual(cell.range, 556)
def test_json(self): obs = CellObservationFactory.build(accuracy=None) result = CellObservation.from_json(simplejson.loads( simplejson.dumps(obs.to_json()))) self.assertTrue(type(result), CellObservation) self.assertTrue(result.accuracy is None) self.assertEqual(type(result.radio), Radio) self.assertEqual(result.radio, obs.radio) self.assertEqual(result.mcc, obs.mcc) self.assertEqual(result.mnc, obs.mnc) self.assertEqual(result.lac, obs.lac) self.assertEqual(result.cid, obs.cid) self.assertEqual(result.lat, obs.lat) self.assertEqual(result.lon, obs.lon)
def test_json(self): obs = CellObservationFactory.build(accuracy=None, source="fixed") result = CellObservation.from_json(json.loads(json.dumps(obs.to_json()))) assert type(result) is CellObservation assert result.accuracy is None assert type(result.radio), Radio assert result.radio == obs.radio assert result.mcc == obs.mcc assert result.mnc == obs.mnc assert result.lac == obs.lac assert result.cid == obs.cid assert result.lat == obs.lat assert result.lon == obs.lon assert result.source is ReportSource.fixed assert type(result.source) is ReportSource
def test_fields(self): obs = CellObservation.create( radio=Radio.gsm, mcc=GB_MCC, mnc=5, lac=12345, cid=23456, lat=GB_LAT, lon=GB_LON, asu=26, signal=-61, ta=10) self.assertEqual(obs.lat, GB_LAT) self.assertEqual(obs.lon, GB_LON) self.assertEqual(obs.radio, Radio.gsm) self.assertEqual(obs.mcc, GB_MCC) self.assertEqual(obs.mnc, 5) self.assertEqual(obs.lac, 12345) self.assertEqual(obs.cid, 23456) self.assertEqual(obs.asu, 26) self.assertEqual(obs.signal, -61) self.assertEqual(obs.ta, 10)
def test_internaljson(self): obs = CellObservation.create( radio=Radio.gsm, mcc=GB_MCC, mnc=5, lac=12345, cid=23456, lat=GB_LAT, lon=GB_LON) result = internal_loads(internal_dumps(obs)) self.assertTrue(type(result), CellObservation) self.assertTrue(result.accuracy is None) self.assertEqual(type(result.radio), Radio) self.assertEqual(result.radio, Radio.gsm) self.assertEqual(result.mcc, GB_MCC) self.assertEqual(result.mnc, 5) self.assertEqual(result.lac, 12345) self.assertEqual(result.cid, 23456) self.assertEqual(result.lat, GB_LAT) self.assertEqual(result.lon, GB_LON)
def test_backup_cell_to_s3(self): session = self.db_master_session batch_size = 10 observations = [] for i in range(batch_size): observations.append(CellObservation(created=self.really_old)) session.add_all(observations) session.flush() start_id = observations[0].id blocks = schedule_cellmeasure_archival.delay(batch=batch_size).get() self.assertEquals(len(blocks), 1) block = blocks[0] self.assertEquals(block, (start_id, start_id + batch_size)) with mock_s3(): with patch.object(S3Backend, 'backup_archive', lambda x, y, z: True): write_cellmeasure_s3_backups.delay(cleanup_zip=False).get() msgs = self.heka_client.stream.msgs info_msgs = [m for m in msgs if m.type == 'oldstyle'] self.assertEquals(1, len(info_msgs)) info = info_msgs[0] fname = info.payload.split(":")[-1] myzip = ZipFile(fname) try: contents = set(myzip.namelist()) expected_contents = set(['alembic_revision.txt', 'cell_measure.csv']) self.assertEquals(expected_contents, contents) finally: myzip.close() blocks = session.query(ObservationBlock).all() self.assertEquals(len(blocks), 1) block = blocks[0] actual_sha = hashlib.sha1() actual_sha.update(open(fname, 'rb').read()) self.assertEquals(block.archive_sha, actual_sha.digest()) self.assertTrue(block.s3_key is not None) self.assertTrue('/cell_' in block.s3_key) self.assertTrue(block.archive_date is None)
def test_json(self): obs = CellObservationFactory.build( accuracy=None, source='fixed') result = CellObservation.from_json(simplejson.loads( simplejson.dumps(obs.to_json()))) assert type(result) is CellObservation assert result.accuracy is None assert type(result.radio), Radio assert result.radio == obs.radio assert result.mcc == obs.mcc assert result.mnc == obs.mnc assert result.lac == obs.lac assert result.cid == obs.cid assert result.lat == obs.lat assert result.lon == obs.lon assert result.source is ReportSource.fixed assert type(result.source) is ReportSource
def test_monitor_measures(self): session = self.db_master_session result = monitor_measures.delay().get() self.check_stats( gauge=[('table.cell_measure', 1), ('table.wifi_measure', 1)], ) self.assertEqual(result, {'cell_measure': -1, 'wifi_measure': -1}) # add some observations session.add_all([CellObservation() for i in range(3)]) session.add_all([WifiObservation() for i in range(5)]) session.flush() result = monitor_measures.delay().get() self.check_stats( gauge=[('table.cell_measure', 2), ('table.wifi_measure', 2)], ) self.assertEqual(result, {'cell_measure': 3, 'wifi_measure': 5})
def test_scan_lacs_asymmetric(self): session = self.db_master_session big = 0.1 small = big / 10 keys = dict(radio=1, mcc=1, mnc=1, lac=1) observations = [ CellObservation(lat=ctr + xd, lon=ctr + yd, cid=cell, **keys) for cell in range(6) for ctr in [(2**cell) * big] for (xd, yd) in [(small, small), (small, -small), (-small, small), (-small, -small)] ] session.add_all(observations) cells = [ Cell(lat=ctr, lon=ctr, cid=cell, new_measures=4, total_measures=1, **keys) for cell in range(6) for ctr in [(2**cell) * big] ] session.add_all(cells) session.commit() result = location_update_cell.delay(min_new=0, max_new=9999, batch=len(observations)) self.assertEqual(result.get(), (len(cells), 0)) scan_lacs.delay() lac = session.query(CellArea).filter(CellArea.lac == 1).first() # We produced a sequence of 0.02-degree-on-a-side # cell bounding boxes centered at # [0, 0.2, 0.4, 0.8, 1.6, 3.2] degrees. # So the lower-left corner is at (-0.01, -0.01) # and the upper-right corner is at (3.21, 3.21) # we should therefore see a LAC centroid at (1.05, 1.05) # with a range of 339.540m self.assertEqual(lac.lat, 1.05) self.assertEqual(lac.lon, 1.05) self.assertEqual(lac.range, 339540)
def test_scan_areas_race_with_location_update(self): session = self.session # First batch of cell observations for CID 1 keys = dict(radio=Radio.cdma, mcc=1, mnc=1, lac=1, cid=1) cell = Cell(new_measures=4, total_measures=1, **keys) observations = [ CellObservation(lat=1.0, lon=1.0, **keys), CellObservation(lat=1.0, lon=1.0, **keys), CellObservation(lat=1.0, lon=1.0, **keys), CellObservation(lat=1.0, lon=1.0, **keys), ] session.add(cell) session.add_all(observations) session.commit() # Periodic location_update_cell runs and updates CID 1 # to have a location, inserts LAC 1 with new_measures=1 # which will be picked up by the next scan_lac. result = location_update_cell.delay(min_new=1) self.assertEqual(result.get(), (1, 0)) # Second batch of cell observations for CID 2 keys['cid'] = 2 cell = Cell(new_measures=4, total_measures=1, **keys) observations = [ CellObservation(lat=1.0, lon=1.0, **keys), CellObservation(lat=1.0, lon=1.0, **keys), CellObservation(lat=1.0, lon=1.0, **keys), CellObservation(lat=1.0, lon=1.0, **keys), ] session.add(cell) session.add_all(observations) session.commit() # Periodic LAC scan runs, picking up LAC 1; this could # accidentally pick up CID 2, but it should not since it # has not had its location updated yet. If there's no # exception here, CID 2 is properly ignored. scan_areas.delay()
def location_update_cell(self, min_new=10, max_new=100, batch=10): cells = [] redis_client = self.app.redis_client with self.db_session() as session: emit_new_observation_metric(self.stats_client, session, self.shortname, Cell, min_new, max_new) query = (session.query(Cell).filter( Cell.new_measures >= min_new).filter( Cell.new_measures < max_new).limit(batch)) cells = query.all() if not cells: return 0 moving_cells = set() updated_lacs = set() for cell in cells: query = session.query( CellObservation.lat, CellObservation.lon, CellObservation.id).filter(*CellObservation.joinkey(cell)) # only take the last X new_measures query = query.order_by(CellObservation.created.desc()).limit( cell.new_measures) observations = query.all() if observations: moving = calculate_new_position(cell, observations, CELL_MAX_DIST_KM) if moving: moving_cells.add(cell) updated_lacs.add(CellArea.to_hashkey(cell)) if updated_lacs: session.on_post_commit(enqueue_lacs, redis_client, updated_lacs, UPDATE_KEY['cell_lac']) if moving_cells: # some cells found to be moving too much blacklist_and_remove_moving_cells(session, moving_cells) session.commit() return (len(cells), len(moving_cells))
def test_fields(self): report_id = uuid.uuid1() self.session.add(CellObservation.create( radio=Radio.gsm, mcc=GB_MCC, mnc=5, lac=12345, cid=23456, report_id=report_id, lat=GB_LAT, lon=GB_LON, asu=26, signal=-61, ta=10)) self.session.flush() result = self.session.query(CellObservation).first() self.assertEqual(result.report_id, report_id) self.assertEqual(result.lat, GB_LAT) self.assertEqual(result.lon, GB_LON) self.assertEqual(result.radio, Radio.gsm) self.assertEqual(result.mcc, GB_MCC) self.assertEqual(result.mnc, 5) self.assertEqual(result.lac, 12345) self.assertEqual(result.cid, 23456) self.assertEqual(result.asu, 26) self.assertEqual(result.signal, -61) self.assertEqual(result.ta, 10)
def test_cell_histogram(self): session = self.db_master_session today = util.utcnow().date() yesterday = (today - timedelta(1)) two_days = (today - timedelta(2)) long_ago = (today - timedelta(3)) observations = [ CellObservation(lat=1.0, lon=2.0, created=today), CellObservation(lat=1.0, lon=2.0, created=today), CellObservation(lat=1.0, lon=2.0, created=yesterday), CellObservation(lat=1.0, lon=2.0, created=two_days), CellObservation(lat=1.0, lon=2.0, created=two_days), CellObservation(lat=1.0, lon=2.0, created=two_days), CellObservation(lat=1.0, lon=2.0, created=long_ago), ] session.add_all(observations) session.commit() cell_histogram.delay(ago=3).get() stats = session.query(Stat).order_by(Stat.time).all() self.assertEqual(len(stats), 1) self.assertEqual(stats[0].key, StatKey.cell) self.assertEqual(stats[0].time, long_ago) self.assertEqual(stats[0].value, 1) # fill up newer dates cell_histogram.delay(ago=2).get() cell_histogram.delay(ago=1).get() cell_histogram.delay(ago=0).get() # test duplicate execution cell_histogram.delay(ago=1).get() stats = session.query(Stat).order_by(Stat.time).all() self.assertEqual(len(stats), 4) self.assertEqual(stats[0].time, long_ago) self.assertEqual(stats[0].value, 1) self.assertEqual(stats[1].time, two_days) self.assertEqual(stats[1].value, 4) self.assertEqual(stats[2].time, yesterday) self.assertEqual(stats[2].value, 5) self.assertEqual(stats[3].time, today) self.assertEqual(stats[3].value, 7)
def test_internaljson(self): obs = CellObservation.create(radio=Radio.gsm, mcc=GB_MCC, mnc=5, lac=12345, cid=23456, lat=GB_LAT, lon=GB_LON) result = internal_loads(internal_dumps(obs)) self.assertTrue(type(result), CellObservation) self.assertTrue(result.accuracy is None) self.assertEqual(type(result.radio), Radio) self.assertEqual(result.radio, Radio.gsm) self.assertEqual(result.mcc, GB_MCC) self.assertEqual(result.mnc, 5) self.assertEqual(result.lac, 12345) self.assertEqual(result.cid, 23456) self.assertEqual(result.lat, GB_LAT) self.assertEqual(result.lon, GB_LON)
def test_delete_cell_observations(self): session = self.db_master_session block = ObservationBlock() block.measure_type = ObservationType.cell block.start_id = 120 block.end_id = 140 block.s3_key = 'fake_key' block.archive_sha = 'fake_sha' block.archive_date = None session.add(block) for i in range(100, 150): session.add(CellObservation(id=i, created=self.really_old)) session.commit() with patch.object(S3Backend, 'check_archive', lambda x, y, z: True): delete_cellmeasure_records.delay(batch=3).get() self.assertEquals(session.query(CellObservation).count(), 30) self.assertTrue(block.archive_date is not None)
def test_fields(self): report_id = uuid.uuid1() session = self.session session.add(CellObservation.create( radio=Radio.gsm, mcc=GB_MCC, mnc=5, lac=12345, cid=23456, report_id=report_id, lat=GB_LAT, lon=GB_LON, asu=26, signal=-61, ta=10)) session.flush() result = session.query(CellObservation).first() self.assertEqual(result.report_id, report_id) self.assertEqual(result.lat, GB_LAT) self.assertEqual(result.lon, GB_LON) self.assertEqual(result.radio, Radio.gsm) self.assertEqual(result.mcc, GB_MCC) self.assertEqual(result.mnc, 5) self.assertEqual(result.lac, 12345) self.assertEqual(result.cid, 23456) self.assertEqual(result.asu, 26) self.assertEqual(result.signal, -61) self.assertEqual(result.ta, 10)
def test_fields(self): obs = CellObservation.create( radio=Radio.gsm, mcc=GB_MCC, mnc=5, lac=12345, cid=23456, lat=GB_LAT, lon=GB_LON, pressure=1010.2, source='gnss', timestamp=1405602028568, asu=26, signal=-61, ta=10) assert obs.lat == GB_LAT assert obs.lon == GB_LON assert obs.pressure == 1010.2 assert obs.source == ReportSource.gnss assert obs.timestamp == 1405602028568 assert obs.radio == Radio.gsm assert obs.mcc == GB_MCC assert obs.mnc == 5 assert obs.lac == 12345 assert obs.cid == 23456 assert obs.asu == 26 assert obs.signal == -61 assert obs.ta == 10 assert obs.shard_id == 'gsm'
def test_fields(self): obs = CellObservation.create(radio=Radio.gsm, mcc=GB_MCC, mnc=5, lac=12345, cid=23456, lat=GB_LAT, lon=GB_LON, asu=26, signal=-61, ta=10) self.assertEqual(obs.lat, GB_LAT) self.assertEqual(obs.lon, GB_LON) self.assertEqual(obs.radio, Radio.gsm) self.assertEqual(obs.mcc, GB_MCC) self.assertEqual(obs.mnc, 5) self.assertEqual(obs.lac, 12345) self.assertEqual(obs.cid, 23456) self.assertEqual(obs.asu, 26) self.assertEqual(obs.signal, -61) self.assertEqual(obs.ta, 10) self.assertEqual(obs.shard_id, 'gsm')
def process_reports(self, reports, userid=None): positions = [] cell_observations = [] wifi_observations = [] for i, report in enumerate(reports): report['report_id'] = uuid.uuid1() cell, wifi = self.process_report(report) cell_observations.extend(cell) wifi_observations.extend(wifi) if cell or wifi: positions.append({ 'lat': report['lat'], 'lon': report['lon'], }) if cell_observations: # group by and create task per cell key self.stats_client.incr('items.uploaded.cell_observations', len(cell_observations)) if self.api_key_log: self.stats_client.incr( 'items.api_log.%s.uploaded.' 'cell_observations' % self.api_key_name, len(cell_observations)) cells = defaultdict(list) for obs in cell_observations: cells[CellObservation.to_hashkey(obs)].append(obs) # Create a task per group of 5 cell keys at a time. # Grouping them helps in avoiding per-task overhead. cells = list(cells.values()) batch_size = 5 countdown = 0 for i in range(0, len(cells), batch_size): values = [] for observations in cells[i:i + batch_size]: values.extend([encode_radio_dict(o) for o in observations]) # insert observations, expire the task if it wasn't processed # after six hours to avoid queue overload, also delay # each task by one second more, to get a more even workload # and avoid parallel updates of the same underlying stations self.insert_cell_task.apply_async( args=[values], kwargs={'userid': userid}, expires=21600, countdown=countdown) countdown += 1 if wifi_observations: # group by WiFi key self.stats_client.incr('items.uploaded.wifi_observations', len(wifi_observations)) if self.api_key_log: self.stats_client.incr( 'items.api_log.%s.uploaded.' 'wifi_observations' % self.api_key_name, len(wifi_observations)) wifis = defaultdict(list) for obs in wifi_observations: wifis[WifiObservation.to_hashkey(obs)].append(obs) # Create a task per group of 20 WiFi keys at a time. # We tend to get a huge number of unique WiFi networks per # batch upload, with one to very few observations per WiFi. # Grouping them helps in avoiding per-task overhead. wifis = list(wifis.values()) batch_size = 20 countdown = 0 for i in range(0, len(wifis), batch_size): values = [] for observations in wifis[i:i + batch_size]: values.extend(observations) # insert observations, expire the task if it wasn't processed # after six hours to avoid queue overload, also delay # each task by one second more, to get a more even workload # and avoid parallel updates of the same underlying stations self.insert_wifi_task.apply_async( args=[values], kwargs={'userid': userid}, expires=21600, countdown=countdown) countdown += 1 if userid is not None: scorekey = Score.to_hashkey( userid=userid, key=ScoreKey.location, time=util.utcnow().date()) Score.incr(self.session, scorekey, len(positions)) if positions: self.process_mapstat(positions)
def test_blacklist_moving_cells(self): now = util.utcnow() long_ago = now - timedelta(days=40) session = self.session k1 = dict(radio=Radio.cdma, mcc=1, mnc=2, lac=3, cid=4) k2 = dict(radio=Radio.cdma, mcc=1, mnc=2, lac=6, cid=8) k3 = dict(radio=Radio.cdma, mcc=1, mnc=2, lac=9, cid=12) k4 = dict(radio=Radio.cdma, mcc=1, mnc=2, lac=12, cid=16) k5 = dict(radio=Radio.cdma, mcc=1, mnc=2, lac=15, cid=20) k6 = dict(radio=Radio.cdma, mcc=1, mnc=2, lac=18, cid=24) # keys k2, k3 and k4 are expected to be detected as moving data = [ # a cell with an entry but no prior position Cell(new_measures=3, total_measures=0, **k1), CellObservation(lat=1.001, lon=1.001, **k1), CellObservation(lat=1.002, lon=1.005, **k1), CellObservation(lat=1.003, lon=1.009, **k1), # a cell with a prior known position Cell(lat=2.0, lon=2.0, new_measures=2, total_measures=1, **k2), CellObservation(lat=2.0, lon=2.0, **k2), CellObservation(lat=4.0, lon=2.0, **k2), # a cell with a very different prior position Cell(lat=1.0, lon=1.0, new_measures=2, total_measures=1, **k3), CellObservation(lat=3.0, lon=3.0, **k3), CellObservation(lat=-3.0, lon=3.0, **k3), # another cell with a prior known position (and negative lat) Cell(lat=-4.0, lon=4.0, new_measures=2, total_measures=1, **k4), CellObservation(lat=-4.0, lon=4.0, **k4), CellObservation(lat=-6.0, lon=4.0, **k4), # an already blacklisted cell CellBlacklist(time=now, count=1, **k5), CellObservation(lat=5.0, lon=5.0, **k5), CellObservation(lat=8.0, lon=5.0, **k5), # a cell with an old different record we ignore, position # estimate has been updated since Cell(lat=6.0, lon=6.0, new_measures=2, total_measures=1, **k6), CellObservation(lat=6.9, lon=6.9, time=long_ago, **k6), CellObservation(lat=6.0, lon=6.0, **k6), CellObservation(lat=6.001, lon=6, **k6), ] session.add_all(data) session.commit() result = location_update_cell.delay(min_new=1) self.assertEqual(result.get(), (5, 3)) moving = [k2, k3, k4, k5] black = session.query(CellBlacklist).all() self.assertEqual(set([b.hashkey() for b in black]), set([CellBlacklist.to_hashkey(k) for k in moving])) keys = [k1, k2, k3, k4, k5, k6] observations = session.query(CellObservation).all() self.assertEqual(len(observations), 14) self.assertEqual(set([obs.hashkey() for obs in observations]), set([CellObservation.to_hashkey(k) for k in keys])) # test duplicate call result = location_update_cell.delay(min_new=1) self.assertEqual(result.get(), (0, 0)) self.check_stats( total=6, timer=[ # We made duplicate calls ('task.data.location_update_cell', 2), # One of those would've scheduled a remove_cell task ('task.data.remove_cell', 1) ], gauge=[ ('task.data.location_update_cell.new_measures_1_100', 2), ])
def process_observations(observations, session, userid=None, api_key_log=False, api_key_name=None): stats_client = get_stats_client() positions = [] cell_observations = [] wifi_observations = [] for i, obs in enumerate(observations): obs['report_id'] = uuid.uuid1() cell, wifi = process_observation(obs, session) cell_observations.extend(cell) wifi_observations.extend(wifi) if cell or wifi: positions.append({ 'lat': obs['lat'], 'lon': obs['lon'], }) if cell_observations: # group by and create task per cell key stats_client.incr('items.uploaded.cell_observations', len(cell_observations)) if api_key_log: stats_client.incr( 'items.api_log.%s.uploaded.cell_observations' % api_key_name, len(cell_observations)) cells = defaultdict(list) for obs in cell_observations: cells[CellObservation.to_hashkey(obs)].append(obs) # Create a task per group of 5 cell keys at a time. # Grouping them helps in avoiding per-task overhead. cells = list(cells.values()) batch_size = 5 countdown = 0 for i in range(0, len(cells), batch_size): values = [] for observations in cells[i:i + batch_size]: values.extend(observations) # insert observations, expire the task if it wasn't processed # after six hours to avoid queue overload, also delay # each task by one second more, to get a more even workload # and avoid parallel updates of the same underlying stations insert_measures_cell.apply_async(args=[values], kwargs={'userid': userid}, expires=21600, countdown=countdown) countdown += 1 if wifi_observations: # group by WiFi key stats_client.incr('items.uploaded.wifi_observations', len(wifi_observations)) if api_key_log: stats_client.incr( 'items.api_log.%s.uploaded.wifi_observations' % api_key_name, len(wifi_observations)) wifis = defaultdict(list) for obs in wifi_observations: wifis[WifiObservation.to_hashkey(obs)].append(obs) # Create a task per group of 20 WiFi keys at a time. # We tend to get a huge number of unique WiFi networks per # batch upload, with one to very few observations per WiFi. # Grouping them helps in avoiding per-task overhead. wifis = list(wifis.values()) batch_size = 20 countdown = 0 for i in range(0, len(wifis), batch_size): values = [] for observations in wifis[i:i + batch_size]: values.extend(observations) # insert observations, expire the task if it wasn't processed # after six hours to avoid queue overload, also delay # each task by one second more, to get a more even workload # and avoid parallel updates of the same underlying stations insert_measures_wifi.apply_async(args=[values], kwargs={'userid': userid}, expires=21600, countdown=countdown) countdown += 1 if userid is not None: process_score(userid, len(positions), session) if positions: process_mapstat(session, positions)
def process_report(self, data): def add_missing_dict_entries(dst, src): # x.update(y) overwrites entries in x with those in y; # We want to only add those not already present. # We also only want to copy the top-level base report data # and not any nested values like cell or wifi. for (key, value) in src.items(): if key != 'radio' and key not in dst \ and not isinstance(value, (tuple, list, dict)): dst[key] = value def better_cell_obs(new, old): comparators = [ ('ta', operator.lt), ('signal', operator.gt), ('asu', operator.gt), ] for field, better in comparators: if (None not in (old[field], new[field]) and better(new[field], old[field])): return True return False def better_wifi_obs(new, old): if (None not in (old['signal'], new['signal']) and new['signal'] > old['signal']): return True return False report_data = Report.validate(data) if report_data is None: return ([], []) cell_observations = {} wifi_observations = {} if data.get('cell'): # flatten report / cell data into a single dict for cell in data['cell']: # only validate the additional fields cell = CellReport.validate(cell) if cell is None: continue add_missing_dict_entries(cell, report_data) cell_key = CellObservation.to_hashkey(cell) if cell_key in cell_observations: existing = cell_observations[cell_key] if better_cell_obs(cell, existing): cell_observations[cell_key] = cell else: cell_observations[cell_key] = cell cell_observations = cell_observations.values() # flatten report / wifi data into a single dict if data.get('wifi'): for wifi in data['wifi']: # only validate the additional fields wifi = WifiReport.validate(wifi) if wifi is None: continue add_missing_dict_entries(wifi, report_data) wifi_key = WifiObservation.to_hashkey(wifi) if wifi_key in wifi_observations: existing = wifi_observations[wifi_key] if better_wifi_obs(wifi, existing): wifi_observations[wifi_key] = wifi else: wifi_observations[wifi_key] = wifi wifi_observations = wifi_observations.values() return (cell_observations, wifi_observations)
def process_reports(self, reports, userid=None): positions = set() cell_observations = [] wifi_observations = [] for i, report in enumerate(reports): report['report_id'] = uuid.uuid1() cell, wifi = self.process_report(report) cell_observations.extend(cell) wifi_observations.extend(wifi) if (cell or wifi) and report.get('lat') and report.get('lon'): positions.add((report['lat'], report['lon'])) if cell_observations: # group by and create task per cell key self.stats_client.incr('items.uploaded.cell_observations', len(cell_observations)) if self.api_key and self.api_key.log: self.stats_client.incr( 'items.api_log.%s.uploaded.' 'cell_observations' % self.api_key.name, len(cell_observations)) cells = defaultdict(list) for obs in cell_observations: cells[CellObservation.to_hashkey(obs)].append(obs) # Create a task per group of 100 cell keys at a time. # Grouping them helps in avoiding per-task overhead. cells = list(cells.values()) batch_size = 100 countdown = 0 for i in range(0, len(cells), batch_size): values = [] for observations in cells[i:i + batch_size]: values.extend([encode_radio_dict(o) for o in observations]) # insert observations, expire the task if it wasn't processed # after six hours to avoid queue overload, also delay # each task by one second more, to get a more even workload # and avoid parallel updates of the same underlying stations self.insert_cell_task.apply_async( args=[values], kwargs={'userid': userid}, expires=21600, countdown=countdown) countdown += 1 if wifi_observations: # group by WiFi key self.stats_client.incr('items.uploaded.wifi_observations', len(wifi_observations)) if self.api_key and self.api_key.log: self.stats_client.incr( 'items.api_log.%s.uploaded.' 'wifi_observations' % self.api_key.name, len(wifi_observations)) wifis = defaultdict(list) for obs in wifi_observations: wifis[WifiObservation.to_hashkey(obs)].append(obs) # Create a task per group of 100 WiFi keys at a time. # We tend to get a huge number of unique WiFi networks per # batch upload, with one to very few observations per WiFi. # Grouping them helps in avoiding per-task overhead. wifis = list(wifis.values()) batch_size = 100 countdown = 0 for i in range(0, len(wifis), batch_size): values = [] for observations in wifis[i:i + batch_size]: values.extend(observations) # insert observations, expire the task if it wasn't processed # after six hours to avoid queue overload, also delay # each task by one second more, to get a more even workload # and avoid parallel updates of the same underlying stations self.insert_wifi_task.apply_async( args=[values], kwargs={'userid': userid}, expires=21600, countdown=countdown) countdown += 1 self.process_mapstat(positions) self.process_score(userid, positions)