def test_insert_observations_invalid_lac(self): time = util.utcnow() - timedelta(days=1) today = util.utcnow().date() cell = CellFactory(total_measures=5) self.session.add(Score(key=ScoreKey.new_cell, userid=1, time=today, value=7)) self.session.flush() obs = dict( radio=int(cell.radio), mcc=cell.mcc, mnc=cell.mnc, psc=cell.psc, created=time, time=time, lat=cell.lat, lon=cell.lon, accuracy=0, altitude=0, altitude_accuracy=0) entries = [ {'lac': constants.MAX_LAC_ALL + 1, 'cid': constants.MAX_CID_ALL + 1, 'asu': 8}, {'lac': None, 'cid': None, 'asu': 8}, ] for entry in entries: entry.update(obs) result = insert_measures_cell.delay(entries, userid=1) self.assertEqual(result.get(), 0) # The incomplete observations never make it into the queue self.assertEqual(self.data_queue.size(), 0) update_cell.delay().get() # Nothing should change in the initially created Cell record self.session.refresh(cell) cells = self.session.query(Cell).all() self.assertEqual(len(cells), 1) self._compare_sets([c.total_measures for c in cells], [5])
def test_time(self): app = self.app # test two weeks ago and "now" time = util.utcnow() - timedelta(14) tstr = encode_datetime(time) app.post_json( '/v1/submit', {"items": [ {"lat": 1.0, "lon": 2.0, "wifi": [{"key": "00aaaaaaaaaa"}], "time": tstr}, {"lat": 2.0, "lon": 3.0, "wifi": [{"key": "00bbbbbbbbbb"}]}, ]}, status=204) session = self.db_master_session result = session.query(WifiMeasure).all() self.assertEqual(len(result), 2) wifis = dict([(w.key, (w.created, w.time)) for w in result]) today = util.utcnow().date() month_rounded_tday = time.replace(day=1, hour=0, minute=0, second=0) month_rounded_tday = month_rounded_tday.replace(tzinfo=pytz.UTC) month_rounded_today = today.replace(day=1) self.assertEqual(wifis['00aaaaaaaaaa'][0].date(), today) self.assertEqual(wifis['00aaaaaaaaaa'][1], month_rounded_tday) self.assertEqual(wifis['00bbbbbbbbbb'][0].date(), today) self.assertEqual(wifis['00bbbbbbbbbb'][1].date(), month_rounded_today)
def test_insert_observations(self): session = self.session time = util.utcnow() - timedelta(days=1) today = util.utcnow().date() session.add(Wifi(key="ab1234567890", new_measures=0, total_measures=0)) session.add(Score(key=ScoreKey.new_wifi, userid=1, time=today, value=7)) session.flush() obs = dict( created=time, lat=1.0, lon=2.0, time=time, accuracy=0, altitude=0, altitude_accuracy=0, radio=-1, heading=52.9, speed=158.5, ) entries = [ {"key": "ab1234567890", "channel": 11, "signal": -80}, {"key": "ab1234567890", "channel": 3, "signal": -90}, {"key": "ab1234567890", "channel": 3, "signal": -80}, {"key": "cd3456789012", "channel": 3, "signal": -90}, ] for e in entries: e.update(obs) result = insert_measures_wifi.delay(entries, userid=1) self.assertEqual(result.get(), 4) observations = session.query(WifiObservation).all() self.assertEqual(len(observations), 4) self.assertEqual(set([o.key for o in observations]), set(["ab1234567890", "cd3456789012"])) self.assertEqual(set([o.channel for o in observations]), set([3, 11])) self.assertEqual(set([o.signal for o in observations]), set([-80, -90])) self.assertEqual(set([o.heading or o in observations]), set([52.9])) self.assertEqual(set([o.speed or o in observations]), set([158.5])) wifis = session.query(Wifi).all() self.assertEqual(len(wifis), 2) self.assertEqual(set([w.key for w in wifis]), set(["ab1234567890", "cd3456789012"])) self.assertEqual(set([w.new_measures for w in wifis]), set([1, 3])) self.assertEqual(set([w.total_measures for w in wifis]), set([1, 3])) scores = session.query(Score).all() self.assertEqual(len(scores), 1) self.assertEqual(scores[0].key, ScoreKey.new_wifi) self.assertEqual(scores[0].value, 8) # test duplicate execution result = insert_measures_wifi.delay(entries, userid=1) self.assertEqual(result.get(), 4) # TODO this task isn't idempotent yet observations = session.query(WifiObservation).all() self.assertEqual(len(observations), 8)
def __init__(self, *args, **kw): if 'created' not in kw: kw['created'] = util.utcnow() if 'modified' not in kw: kw['modified'] = util.utcnow() if 'new_measures' not in kw: kw['new_measures'] = 0 if 'total_measures' not in kw: kw['total_measures'] = 0 super(Wifi, self).__init__(*args, **kw)
def test_created_from_blocklist_time(self): now = util.utcnow() last_week = now - TEMPORARY_BLOCKLIST_DURATION - timedelta(days=1) obs = CellObservationFactory.build() self.session.add( CellBlocklist(time=last_week, count=1, radio=obs.radio, mcc=obs.mcc, mnc=obs.mnc, lac=obs.lac, cid=obs.cid)) self.session.flush() # add a new entry for the previously blocklisted cell self.data_queue.enqueue([obs]) self.assertEqual(self.data_queue.size(), 1) update_cell.delay().get() # the cell was inserted again cells = self.session.query(Cell).all() self.assertEqual(len(cells), 1) # and the creation date was set to the date of the blocklist entry self.assertEqual(cells[0].created, last_week) self.check_statcounter(StatKey.cell, 1) self.check_statcounter(StatKey.unique_cell, 0)
def test_histogram(self): session = self.session today = util.utcnow().date() one_day = today - timedelta(days=1) two_days = today - timedelta(days=2) one_month = today - timedelta(days=35) two_months = today - timedelta(days=70) long_ago = today - timedelta(days=100) stats = [ Stat(key=StatKey.cell, time=long_ago, value=40), Stat(key=StatKey.cell, time=two_months, value=50), Stat(key=StatKey.cell, time=one_month, value=60), Stat(key=StatKey.cell, time=two_days, value=70), Stat(key=StatKey.cell, time=one_day, value=80), Stat(key=StatKey.cell, time=today, value=90), ] session.add_all(stats) session.commit() result = histogram(session, StatKey.cell, days=90) self.assertTrue( [unixtime(one_day), 80] in result[0]) if two_months.month == 12: expected = date(two_months.year + 1, 1, 1) else: expected = date(two_months.year, two_months.month + 1, 1) self.assertTrue( [unixtime(expected), 50] in result[0])
def test_leaders(self): session = self.session today = util.utcnow().date() test_data = [] for i in range(20): test_data.append((u'nick-%s' % i, 30)) highest = u'nick-high-too-long_' highest += (128 - len(highest)) * u'x' test_data.append((highest, 40)) lowest = u'nick-low' test_data.append((lowest, 20)) for nick, value in test_data: user = User(nickname=nick) session.add(user) session.flush() score = Score(key=ScoreKey.location, userid=user.id, time=today, value=value) session.add(score) session.commit() # check the result result = leaders(session) self.assertEqual(len(result), 22) self.assertEqual(result[0]['nickname'], highest[:24] + u'...') self.assertEqual(result[0]['num'], 40) self.assertTrue(lowest in [r['nickname'] for r in result])
def global_stats(session): today = util.utcnow().date() yesterday = today - timedelta(1) names = ('cell', 'wifi', 'unique_cell', 'unique_ocid_cell', 'unique_wifi') stat_keys = [STAT_TYPE[name] for name in names] rows = session.query(Stat.key, Stat.value).filter( Stat.key.in_(stat_keys)).filter( Stat.time == yesterday) stats = {} for row in rows.all(): if row[1]: stats[row[0]] = int(row[1]) result = {} for name in names: stat_key = STAT_TYPE[name] try: result[name] = stats[stat_key] except KeyError: # no stats entry available, maybe closely after midnight # and task hasn't run yet, take latest value row = session.query(Stat.value).filter( Stat.key == stat_key).order_by( Stat.time.desc()).limit(1).first() if row is not None: result[name] = row[0] else: result[name] = 0 for k, v in result.items(): # show as millions result[k] = "%.2f" % ((v // 10000) / 100.0) return result
def __call__(self, diff=True, _filename=None): url = self.settings.get('url') apikey = self.settings.get('apikey') if not url or not apikey: # pragma: no cover return if _filename is None: if diff: prev_hour = util.utcnow() - timedelta(hours=1) _filename = prev_hour.strftime( 'cell_towers_diff-%Y%m%d%H.csv.gz') else: # pragma: no cover _filename = 'cell_towers.csv.gz' with util.selfdestruct_tempdir() as temp_dir: path = os.path.join(temp_dir, _filename) with open(path, 'wb') as temp_file: with closing(requests.get(url, params={'apiKey': apikey, 'filename': _filename}, stream=True)) as req: for chunk in req.iter_content(chunk_size=2 ** 20): temp_file.write(chunk) temp_file.flush() with self.task.redis_pipeline() as pipe: with self.task.db_session() as session: self.import_stations(session, pipe, path)
def leaders_weekly(session, batch=20): result = {'new_cell': [], 'new_wifi': []} today = util.utcnow().date() one_week = today - timedelta(7) score_rows = {} userids = set() for name in ('new_cell', 'new_wifi'): score_rows[name] = session.query( Score.userid, func.sum(Score.value)).filter( Score.key == ScoreKey[name]).filter( Score.time >= one_week).order_by( func.sum(Score.value).desc()).group_by( Score.userid).limit(batch).all() userids.update(set([s[0] for s in score_rows[name]])) if not userids: # pragma: no cover return result user_rows = session.query(User.id, User.nickname).filter( User.id.in_(userids)).all() users = dict(user_rows) for name, value in score_rows.items(): for userid, value in value: nickname = users.get(userid, 'anonymous') if len(nickname) > 24: # pragma: no cover nickname = nickname[:24] + u'...' result[name].append( {'nickname': nickname, 'num': int(value)}) return result
def test_insert_measures_out_of_range(self): session = self.db_master_session time = util.utcnow() - timedelta(days=1) measure = dict( id=0, created=encode_datetime(time), lat=PARIS_LAT, lon=PARIS_LON, time=encode_datetime(time), accuracy=0, altitude=0, altitude_accuracy=0, radio=RADIO_TYPE['gsm'], mcc=FRANCE_MCC, mnc=2, lac=3, cid=4) entries = [ {"asu": 8, "signal": -70, "ta": 32}, {"asu": -10, "signal": -300, "ta": -10}, {"asu": 256, "signal": 16, "ta": 128}, ] for e in entries: e.update(measure) result = insert_measures_cell.delay(entries) self.assertEqual(result.get(), 3) measures = session.query(CellMeasure).all() self.assertEqual(len(measures), 3) self.assertEqual(set([m.asu for m in measures]), set([-1, 8])) self.assertEqual(set([m.signal for m in measures]), set([0, -70])) self.assertEqual(set([m.ta for m in measures]), set([0, 32]))
def test_normalize_time(self): now = util.utcnow() first_args = dict(day=1, hour=0, minute=0, second=0, microsecond=0, tzinfo=UTC) now_enc = now.replace(**first_args) two_weeks_ago = now - timedelta(14) short_format = now.date().isoformat() entries = [ ('', now_enc), (now, now_enc), (two_weeks_ago, two_weeks_ago.replace(**first_args)), (short_format, now_enc), ("2011-01-01T11:12:13.456Z", now_enc), ("2070-01-01T11:12:13.456Z", now_enc), ("10-10-10", now_enc), ("2011-10-13T.Z", now_enc), ] for entry in entries: in_, expected = entry if not isinstance(in_, str): in_ = encode_datetime(in_) self.assertEqual( decode_datetime(normalized_time(in_)), expected)
def query_database(query, raven_client): macs = [lookup.mac for lookup in query.wifi] if not macs: # pragma: no cover return [] result = [] today = util.utcnow().date() temp_blocked = today - TEMPORARY_BLOCKLIST_DURATION try: load_fields = ('lat', 'lon', 'radius') shards = defaultdict(list) for mac in macs: shards[WifiShard.shard_model(mac)].append(mac) for shard, shard_macs in shards.items(): rows = ( query.session.query(shard) .filter(shard.mac.in_(shard_macs)) .filter(shard.lat.isnot(None)) .filter(shard.lon.isnot(None)) .filter(or_( shard.block_count.is_(None), shard.block_count < PERMANENT_BLOCKLIST_THRESHOLD)) .filter(or_( shard.block_last.is_(None), shard.block_last < temp_blocked)) .options(load_only(*load_fields)) ).all() result.extend(list(rows)) except Exception: raven_client.captureException() return result
def import_stations(self, session, pipe, filename): today = util.utcnow().date() on_duplicate = ( '`modified` = values(`modified`)' ', `lat` = values(`lat`)' ', `lon` = values(`lon`)' ', `psc` = values(`psc`)' ', `max_lat` = values(`max_lat`)' ', `min_lat` = values(`min_lat`)' ', `max_lon` = values(`max_lon`)' ', `min_lon` = values(`min_lon`)' ', `radius` = values(`radius`)' ', `samples` = values(`samples`)' ) table_insert = self.cell_model.__table__.insert( mysql_on_duplicate=on_duplicate) def commit_batch(rows): result = session.execute(table_insert, rows) count = result.rowcount # apply trick to avoid querying for existing rows, # MySQL claims 1 row for an inserted row, 2 for an updated row inserted_rows = 2 * len(rows) - count changed_rows = count - len(rows) assert inserted_rows + changed_rows == len(rows) StatCounter(self.stat_key, today).incr(pipe, inserted_rows) areaids = set() with util.gzip_open(filename, 'r') as gzip_wrapper: with gzip_wrapper as gzip_file: csv_reader = csv.reader(gzip_file) parse_row = partial(self.make_import_dict, self.cell_model.validate, self.import_spec) rows = [] for row in csv_reader: # skip any header row if (csv_reader.line_num == 1 and row[0] == 'radio'): # pragma: no cover continue data = parse_row(row) if data is not None: rows.append(data) areaids.add((int(data['radio']), data['mcc'], data['mnc'], data['lac'])) if len(rows) == self.batch_size: # pragma: no cover commit_batch(rows) session.flush() rows = [] if rows: commit_batch(rows) self.area_queue.enqueue( [encode_cellarea(*id_) for id_ in areaids], json=False)
def test_score(self): now = util.utcnow() wifi = WifiShard.create( mac='111101123456', created=now, modified=now, radius=10, samples=2, ) self.assertAlmostEqual(wifi.score(now), 0.1, 2)
def test_block_last(self): now = util.utcnow() assert round(station_score(Dummy( now - timedelta(days=70), now - timedelta(days=60), 10, 64, (now - timedelta(days=65)).date()), now), 2) == 1.73
def test_nickname_header_update(self): app = self.app nickname = 'World Tr\xc3\xa4veler' utcday = util.utcnow().date() session = self.db_master_session user = User(nickname=nickname.decode('utf-8')) session.add(user) session.flush() session.add(Score(userid=user.id, key=SCORE_TYPE['location'], value=7)) session.add(Score(userid=user.id, key=SCORE_TYPE['new_wifi'], value=3)) session.commit() app.post_json( '/v1/submit', {"items": [ {"lat": 1.0, "lon": 2.0, "wifi": [{"key": "00AAAAAAAAAA"}]}, ]}, headers={'X-Nickname': nickname}, status=204) result = session.query(User).all() self.assertEqual(len(result), 1) self.assertEqual(result[0].nickname, nickname.decode('utf-8')) result = session.query(Score).all() self.assertEqual(len(result), 2) self.assertEqual(set([r.name for r in result]), set(['location', 'new_wifi'])) for r in result: if r.name == 'location': self.assertEqual(r.value, 8) self.assertEqual(r.time, utcday) elif r.name == 'new_wifi': self.assertEqual(r.value, 4) self.assertEqual(r.time, utcday)
def monitor_api_key_limits(self): result = {} try: today = util.utcnow().strftime('%Y%m%d') keys = self.redis_client.keys('apilimit:*:' + today) if keys: values = self.redis_client.mget(keys) keys = [k.split(':')[1] for k in keys] else: values = [] names = {} if keys: with self.db_session(commit=False) as session: query = (ApiKey.querykeys(session, keys) .options(load_only('valid_key', 'shortname'))) for api_key in query.all(): names[api_key.valid_key] = api_key.name result = {} for k, v in zip(keys, values): name = names.get(k, k) value = int(v) result[name] = value self.stats_client.gauge('apilimit.' + name, value) except Exception: # pragma: no cover # Log but ignore the exception self.raven_client.captureException() return result
def test_score(self): now = util.utcnow() assert round(station_score(Dummy( now, now, 0, 1), now), 2) == 0.05 assert round(station_score(Dummy( now - timedelta(days=1), now, 10, 2), now), 2) == 0.1 assert round(station_score(Dummy( now - timedelta(days=5), now, 10, 2), now), 2) == 0.5 assert round(station_score(Dummy( now - timedelta(days=10), now, 10, 2), now), 2) == 1.0 assert round(station_score(Dummy( now - timedelta(days=10), now, 10, 64), now), 2) == 6.0 assert round(station_score(Dummy( now - timedelta(days=10), now, 10, 1024), now), 2) == 10.0 assert round(station_score(Dummy( now - timedelta(days=10), now, 0, 1024), now), 2) == 0.5 assert round(station_score(Dummy( now - timedelta(days=70), now - timedelta(days=40), 10, 1024), now), 2) == 7.07 assert round(station_score(Dummy( now - timedelta(days=190), now - timedelta(days=180), 10, 1024), now), 2) == 3.78 assert round(station_score(Dummy( now - timedelta(days=190), now - timedelta(days=180), 10, 64), now), 2) == 2.27
def test_new(self): utcnow = util.utcnow() obs = WifiObservationFactory.build() self.data_queue.enqueue([obs]) self.assertEqual(self.data_queue.size(), 1) update_wifi.delay().get() shard = WifiShard.shard_model(obs.mac) wifis = self.session.query(shard).all() self.assertEqual(len(wifis), 1) wifi = wifis[0] self.assertAlmostEqual(wifi.lat, obs.lat) self.assertAlmostEqual(wifi.max_lat, obs.lat) self.assertAlmostEqual(wifi.min_lat, obs.lat) self.assertAlmostEqual(wifi.lon, obs.lon) self.assertAlmostEqual(wifi.max_lon, obs.lon) self.assertAlmostEqual(wifi.min_lon, obs.lon) self.assertEqual(wifi.country, 'GB') self.assertEqual(wifi.radius, 0) self.assertEqual(wifi.samples, 1) self.assertEqual(wifi.created.date(), utcnow.date()) self.assertEqual(wifi.modified.date(), utcnow.date()) self.assertEqual(wifi.block_first, None) self.assertEqual(wifi.block_last, None) self.assertEqual(wifi.block_count, None)
def test_temp_blocked_admitted_again(self): now = util.utcnow() last_week = now - TEMPORARY_BLOCKLIST_DURATION - timedelta(days=1) obs = WifiObservationFactory() WifiShardFactory( mac=obs.mac, lat=None, lon=None, samples=0, created=last_week, modified=last_week, block_first=last_week.date(), block_last=last_week.date(), block_count=1) self.session.flush() # add a new entry for the previously blocked wifi self.data_queue.enqueue([obs]) self.assertEqual(self.data_queue.size(), 1) update_wifi.delay().get() # the wifi was inserted again shard = WifiShard.shard_model(obs.mac) wifis = self.session.query(shard).all() self.assertEqual(len(wifis), 1) wifi = wifis[0] self.assertEqual(wifi.created.date(), last_week.date()) self.assertAlmostEqual(wifi.lat, obs.lat) self.assertAlmostEqual(wifi.lon, obs.lon) self.assertEqual(wifi.country, 'GB') self.assertEqual(wifi.samples, 1) self.check_statcounter(StatKey.unique_wifi, 0)
def import_latest_ocid_cells(self, diff=True, filename=None, session=None): url = self.app.settings['ichnaea']['ocid_url'] apikey = self.app.settings['ichnaea']['ocid_apikey'] if not url or not apikey: # pragma: no cover return if filename is None: if diff: prev_hour = util.utcnow() - timedelta(hours=1) filename = prev_hour.strftime('cell_towers_diff-%Y%m%d%H.csv.gz') else: # pragma: no cover filename = 'cell_towers.csv.gz' with closing(requests.get(url, params={'apiKey': apikey, 'filename': filename}, stream=True)) as r: with selfdestruct_tempdir() as d: path = os.path.join(d, filename) with open(path, 'wb') as f: for chunk in r.iter_content(chunk_size=2 ** 20): f.write(chunk) f.flush() with self.redis_pipeline() as pipe: with self.db_session() as dbsession: if session is None: # pragma: no cover session = dbsession import_stations(session, pipe, path, CELL_FIELDS)
def export_modified_cells(self, hourly=True, bucket=None): if bucket is None: # pragma: no cover bucket = self.app.settings['ichnaea']['s3_assets_bucket'] if not bucket: # pragma: no cover return now = util.utcnow() if hourly: end_time = now.replace(minute=0, second=0) file_time = end_time file_type = 'diff' start_time = end_time - timedelta(hours=1) cond = and_(Cell.__table__.c.modified >= start_time, Cell.__table__.c.modified < end_time, Cell.__table__.c.lat.isnot(None)) else: file_time = now.replace(hour=0, minute=0, second=0) file_type = 'full' cond = Cell.__table__.c.lat.isnot(None) filename = 'MLS-%s-cell-export-' % file_type filename = filename + file_time.strftime('%Y-%m-%dT%H0000.csv.gz') with selfdestruct_tempdir() as d: path = os.path.join(d, filename) with self.db_session(commit=False) as session: write_stations_to_csv(session, Cell.__table__, CELL_COLUMNS, cond, path, make_cell_export_dict, CELL_FIELDS) write_stations_to_s3(path, bucket)
def test_fields(self): now = util.utcnow() today = now.date() self.session.add(WifiShard.create( mac='111101123456', created=now, modified=now, lat=GB_LAT, max_lat=GB_LAT, min_lat=GB_LAT, lon=GB_LON, max_lon=GB_LON, min_lon=GB_LON, radius=200, region='GB', samples=10, source=StationSource.gnss, block_first=today, block_last=today, block_count=1, _raise_invalid=True, )) self.session.flush() wifi = self.session.query(WifiShard0).first() self.assertEqual(wifi.mac, '111101123456') self.assertEqual(wifi.created, now) self.assertEqual(wifi.modified, now) self.assertEqual(wifi.lat, GB_LAT) self.assertEqual(wifi.max_lat, GB_LAT) self.assertEqual(wifi.min_lat, GB_LAT) self.assertEqual(wifi.lon, GB_LON) self.assertEqual(wifi.max_lon, GB_LON) self.assertEqual(wifi.min_lon, GB_LON) self.assertEqual(wifi.radius, 200) self.assertEqual(wifi.region, 'GB') self.assertEqual(wifi.samples, 10) self.assertEqual(wifi.source, StationSource.gnss) self.assertEqual(wifi.block_first, today) self.assertEqual(wifi.block_last, today) self.assertEqual(wifi.block_count, 1)
def test_stats(self): day = util.utcnow().date() - timedelta(1) session = self.db_master_session stats = [ Stat(key=STAT_TYPE['cell'], time=day, value=2000000), Stat(key=STAT_TYPE['wifi'], time=day, value=2000000), Stat(key=STAT_TYPE['unique_cell'], time=day, value=1000000), Stat(key=STAT_TYPE['unique_wifi'], time=day, value=2000000), ] session.add_all(stats) session.commit() request = DummyRequest() request.db_slave_session = self.db_master_session request.registry.redis_client = self.redis_client inst = self._make_view(request) result = inst.stats_view() self.assertEqual(result['page_title'], 'Statistics') self.assertEqual( result['metrics1'], [ {'name': 'Unique Cells', 'value': '1.00'}, {'name': 'Cell Observations', 'value': '2.00'}, ]) self.assertEqual( result['metrics2'], [ {'name': 'Unique Wifi Networks', 'value': '2.00'}, {'name': 'Wifi Observations', 'value': '2.00'}, ]) # call the view again, without a working db session, so # we can be sure to use the cached result inst = self._make_view(request) request.db_slave_session = None second_result = inst.stats_view() self.assertEqual(second_result, result)
def test_histogram_different_stat_name(self, ro_session): today = util.utcnow().date() ro_session.add(Stat(key=StatKey.unique_cell, time=today, value=9)) ro_session.commit() result = histogram(ro_session, StatKey.unique_cell) first_of_month = today.replace(day=1) assert result == [[[unixtime(first_of_month), 9]]]
def __call__(self, hourly=True, _bucket=None): if _bucket is None: # pragma: no cover bucket = self.settings['bucket'] else: bucket = _bucket if not bucket: # pragma: no cover return now = util.utcnow() start_time = None end_time = None if hourly: end_time = now.replace(minute=0, second=0) file_time = end_time file_type = 'diff' start_time = end_time - timedelta(hours=1) else: file_time = now.replace(hour=0, minute=0, second=0) file_type = 'full' filename = 'MLS-%s-cell-export-' % file_type filename = filename + file_time.strftime('%Y-%m-%dT%H0000.csv.gz') with util.selfdestruct_tempdir() as temp_dir: path = os.path.join(temp_dir, filename) with self.task.db_session(commit=False) as session: write_stations_to_csv( session, path, start_time=start_time, end_time=end_time) self.write_stations_to_s3(path, bucket)
def test_score(self): now = util.utcnow() self.assertAlmostEqual(DummyModel( now, now, 0, 1).score(now), 0.05, 2) self.assertAlmostEqual(DummyModel( now - timedelta(days=1), now, 10, 2).score(now), 0.1, 2) self.assertAlmostEqual(DummyModel( now - timedelta(days=5), now, 10, 2).score(now), 0.5, 2) self.assertAlmostEqual(DummyModel( now - timedelta(days=10), now, 10, 2).score(now), 1.0, 2) self.assertAlmostEqual(DummyModel( now - timedelta(days=10), now, 10, 64).score(now), 6.0, 2) self.assertAlmostEqual(DummyModel( now - timedelta(days=10), now, 10, 1024).score(now), 10.0, 2) self.assertAlmostEqual(DummyModel( now - timedelta(days=10), now, 0, 1024).score(now), 0.5, 2) self.assertAlmostEqual(DummyModel( now - timedelta(days=70), now - timedelta(days=40), 10, 1024).score(now), 7.07, 2) self.assertAlmostEqual(DummyModel( now - timedelta(days=190), now - timedelta(days=180), 10, 1024).score(now), 3.78, 2) self.assertAlmostEqual(DummyModel( now - timedelta(days=190), now - timedelta(days=180), 10, 64).score(now), 2.27, 2)
def histogram(session, name, days=365): today = util.utcnow().date() start = today - timedelta(days=days) stat_key = STAT_TYPE[name] month_key = (func.year(Stat.time), func.month(Stat.time)) rows = session.query(func.max(Stat.value), *month_key).filter( Stat.key == stat_key).filter( Stat.time >= start).filter( Stat.time < today).group_by( *month_key).order_by( *month_key ) result = [] for num, year, month in rows.all(): # use first of August to plot the highest result for July if month == 12: # pragma: no cover next_month = date(year + 1, 1, 1) else: next_month = date(year, month + 1, 1) if next_month >= today: # we restrict dates to be at most yesterday next_month = today - timedelta(days=1) day = timegm(next_month.timetuple()) * 1000 result.append([day, num]) return [result]
def __init__(self, task, session, pipe, utcnow=None): DataTask.__init__(self, task, session) self.pipe = pipe self.data_queue = self.task.app.data_queues[self.queue_name] if utcnow is None: utcnow = util.utcnow() self.utcnow = utcnow
def test_multiple_cells(self): now = util.utcnow() cell = CellShardFactory(samples=100) cell2 = CellShardFactory(radio=cell.radio, mcc=cell.mcc, mnc=cell.mnc, lac=cell.lac, cid=cell.cid + 1, lat=cell.lat + 1.0, lon=cell.lon + 1.0, samples=10) self.session.flush() query = self.model_query(cells=[cell, cell2]) results = self.source.search(query) self.check_model_results( results, [cell], lat=cell.lat + 0.3333333, lon=cell.lon + 0.3333333, accuracy=CELL_MAX_ACCURACY) self.assertAlmostEqual( results.best().score, cell.score(now) + cell2.score(now), 4)
def test_fields(self, session): now = util.utcnow() today = now.date() session.add( WifiShard.create( mac='111101123456', created=now, modified=now, lat=GB_LAT, max_lat=GB_LAT, min_lat=GB_LAT, lon=GB_LON, max_lon=GB_LON, min_lon=GB_LON, radius=200, region='GB', samples=10, source=ReportSource.gnss, weight=1.5, last_seen=today, block_first=today, block_last=today, block_count=1, _raise_invalid=True, )) session.flush() wifi = session.query(WifiShard0).first() assert wifi.mac == '111101123456' assert wifi.created == now assert wifi.modified == now assert wifi.lat == GB_LAT assert wifi.max_lat == GB_LAT assert wifi.min_lat == GB_LAT assert wifi.lon == GB_LON assert wifi.max_lon == GB_LON assert wifi.min_lon == GB_LON assert wifi.radius == 200 assert wifi.region == 'GB' assert wifi.samples == 10 assert wifi.source == ReportSource.gnss assert wifi.weight == 1.5 assert wifi.last_seen == today assert wifi.block_first == today assert wifi.block_last == today assert wifi.block_count == 1
def test_fields(self): now = util.utcnow() today = now.date() self.session.add( WifiShard.create( mac='111101123456', created=now, modified=now, lat=GB_LAT, max_lat=GB_LAT, min_lat=GB_LAT, lon=GB_LON, max_lon=GB_LON, min_lon=GB_LON, radius=200, region='GB', samples=10, source=StationSource.gnss, weight=1.5, last_seen=today, block_first=today, block_last=today, block_count=1, _raise_invalid=True, )) self.session.flush() wifi = self.session.query(WifiShard0).first() self.assertEqual(wifi.mac, '111101123456') self.assertEqual(wifi.created, now) self.assertEqual(wifi.modified, now) self.assertEqual(wifi.lat, GB_LAT) self.assertEqual(wifi.max_lat, GB_LAT) self.assertEqual(wifi.min_lat, GB_LAT) self.assertEqual(wifi.lon, GB_LON) self.assertEqual(wifi.max_lon, GB_LON) self.assertEqual(wifi.min_lon, GB_LON) self.assertEqual(wifi.radius, 200) self.assertEqual(wifi.region, 'GB') self.assertEqual(wifi.samples, 10) self.assertEqual(wifi.source, StationSource.gnss) self.assertEqual(wifi.weight, 1.5) self.assertEqual(wifi.last_seen, today) self.assertEqual(wifi.block_first, today) self.assertEqual(wifi.block_last, today) self.assertEqual(wifi.block_count, 1)
def export_file(filename, tablename, _db=None, _session=None): today = util.utcnow().date() one_year_ago = today - timedelta(days=365) one_year_ago = one_year_ago.strftime('%Y-%m-%d') # this is executed in a worker process stmt = text('''\ SELECT `grid`, CAST(ROUND(DATEDIFF(CURDATE(), `modified`) / 30) AS UNSIGNED) as `num` FROM {tablename} WHERE modified >= '{modified}' LIMIT :limit OFFSET :offset '''.format(tablename=tablename, modified=one_year_ago).replace('\n', ' ')) db = configure_db('ro', _db=_db) offset = 0 limit = 200000 result_rows = 0 with util.gzip_open(filename, 'w', compresslevel=2) as fd: with db_worker_session(db, commit=False) as session: if _session is not None: # testing hook session = _session while True: result = session.execute( stmt.bindparams(limit=limit, offset=offset)) rows = result.fetchall() result.close() if not rows: break lines = [] extend = lines.extend for row in rows: lat, lon = decode_datamap_grid(row.grid) extend(random_points(lat, lon, row.num)) fd.writelines(lines) result_rows += len(lines) offset += limit if not result_rows: os.remove(filename) db.close() return result_rows
def test_ok_wifi(self): app = self.app now = util.utcnow() today = now.date() first_of_month = now.replace(day=1, hour=0, minute=0, second=0) wifi_data = [{ "key": "0012AB12AB12", "signalToNoiseRatio": 5 }, { "key": "00:34:cd:34:cd:34", "signalToNoiseRatio": 5 }] res = app.post_json('/v1/submit', { "items": [{ "lat": 12.3456781, "lon": 23.4567892, "accuracy": 17, "wifi": wifi_data }] }, status=204) self.assertEqual(res.body, '') session = self.db_master_session wifi_result = session.query(WifiObservation).all() self.assertEqual(len(wifi_result), 2) item = wifi_result[0] report_id = item.report_id self.assertTrue(isinstance(report_id, uuid.UUID)) self.assertEqual(item.created.date(), today) self.assertEqual(item.time, first_of_month) self.assertEqual(item.lat, 12.3456781) self.assertEqual(item.lon, 23.4567892) self.assertEqual(item.accuracy, 17) self.assertEqual(item.altitude, 0) self.assertEqual(item.altitude_accuracy, 0) self.assertTrue(item.key in ("0012ab12ab12", "0034cd34cd34")) self.assertEqual(item.channel, 0) self.assertEqual(item.signal, 0) self.assertEqual(item.snr, 5) item = wifi_result[1] self.assertEqual(item.report_id, report_id) self.assertEqual(item.created.date(), today) self.assertEqual(item.lat, 12.3456781) self.assertEqual(item.lon, 23.4567892)
def test_stats_wifi_json(self): app = self.app today = util.utcnow().date() yesterday = today - timedelta(1) session = self.db_slave_session stat = Stat(key=StatKey.unique_wifi, time=yesterday, value=2) session.add(stat) session.commit() result = app.get('/stats_wifi.json', status=200) self.assertEqual( result.json, {'series': [ {'data': [[timegm(yesterday.timetuple()) * 1000, 2]], 'title': 'MLS WiFi'}, ]} ) second_result = app.get('/stats_wifi.json', status=200) self.assertEqual(second_result.json, result.json)
def test_ok_cell(self): app = self.app now = util.utcnow() today = now.date() first_of_month = now.replace(day=1, hour=0, minute=0, second=0) cell_data = [{ "radio": Radio.umts.name, "mcc": FRANCE_MCC, "mnc": 1, "lac": 2, "cid": 1234 }] res = app.post_json('/v1/submit?key=test', { "items": [{ "lat": PARIS_LAT, "lon": PARIS_LON, "time": now.strftime('%Y-%m-%d'), "accuracy": 10, "altitude": 123, "altitude_accuracy": 7, "radio": Radio.gsm.name, "cell": cell_data }] }, status=204) self.assertEqual(res.body, '') session = self.session cell_result = session.query(CellObservation).all() self.assertEqual(len(cell_result), 1) item = cell_result[0] self.assertTrue(isinstance(item.report_id, uuid.UUID)) self.assertEqual(item.created.date(), today) self.assertEqual(item.time, first_of_month) self.assertEqual(item.lat, PARIS_LAT) self.assertEqual(item.lon, PARIS_LON) self.assertEqual(item.accuracy, 10) self.assertEqual(item.altitude, 123) self.assertEqual(item.altitude_accuracy, 7) self.assertEqual(item.radio, Radio.umts) self.assertEqual(item.mcc, FRANCE_MCC) self.assertEqual(item.mnc, 1) self.assertEqual(item.lac, 2) self.assertEqual(item.cid, 1234)
def test_log_stats(self, app, redis, stats): cell, query = self._one_cell_query() self._post(app, [query], api_key='test') stats.check(counter=[ ('data.batch.upload', 1), ('data.batch.upload', ['key:test']), ('request', [self.metric_path, 'method:post', 'status:%s' % self.status]), (self.metric_type + '.request', [self.metric_path, 'key:test']), ], timer=[ ('request', [self.metric_path, 'method:post']), ]) today = util.utcnow().date() assert ([k.decode('ascii') for k in redis.keys('apiuser:*') ] == ['apiuser:submit:test:%s' % today.strftime('%Y-%m-%d')])
def verified_delete(self, block_id, batch=10000): utcnow = util.utcnow() with self.db_session() as session: block = session.query(ObservationBlock).filter( ObservationBlock.id == block_id).first() observation_type = block.measure_type obs_cls = OBSERVATION_TYPE_META[observation_type]['class'] for start in range(block.start_id, block.end_id, batch): end = min(block.end_id, start + batch) q = session.query(obs_cls).filter( obs_cls.id >= start, obs_cls.id < end) q.delete() session.flush() block.archive_date = utcnow session.commit()
def check(self): api_key = None api_key_text = self.request.GET.get('key', None) skip_check = False if api_key_text is None: self.log_count('none', False) if self.error_on_invalidkey: raise self.prepare_exception(InvalidAPIKey()) if api_key_text is not None: try: session = self.request.db_ro_session api_key = session.query(ApiKey).get(api_key_text) except Exception: # if we cannot connect to backend DB, skip api key check skip_check = True self.raven_client.captureException() if api_key is not None and api_key.should_allow(self.view_type): self.log_count(api_key.name, api_key.should_log(self.view_type)) rate_key = 'apilimit:{key}:{path}:{time}'.format( key=api_key_text, path=self.metric_path, time=util.utcnow().strftime('%Y%m%d')) should_limit = rate_limit_exceeded(self.redis_client, rate_key, maxreq=api_key.maxreq) if should_limit: raise self.prepare_exception(DailyLimitExceeded()) elif skip_check: pass else: if api_key_text is not None: self.log_count('invalid', False) if self.error_on_invalidkey: raise self.prepare_exception(InvalidAPIKey()) # If we failed to look up an ApiKey, create an empty one # rather than passing None through api_key = api_key or ApiKey( valid_key=None, allow_fallback=False, allow_locate=True) return self.view(api_key)
def search_cell(self, query): results = self.result_list() now = util.utcnow() ambiguous_cells = [] regions = [] for cell in list(query.cell) + list(query.cell_area): code = cell.mobileCountryCode mcc_regions = GEOCODER.regions_for_mcc(code, metadata=True) # Divide score by number of possible regions for the mcc score = 1.0 / (len(mcc_regions) or 1.0) for mcc_region in mcc_regions: regions.append((mcc_region, score)) if len(mcc_regions) > 1: ambiguous_cells.append(cell) # Group by region code grouped_regions = {} for region, score in regions: code = region.code if code not in grouped_regions: grouped_regions[code] = [region, score] else: # Sum up scores of multiple matches grouped_regions[code][1] += score if ambiguous_cells: # Only do a database query if the mcc is ambiguous. # Use the area models for area and cell entries, # as we are only interested in the region here, # which won't differ between individual cells inside and area. areas = query_areas(query, ambiguous_cells, self.area_model, self.raven_client) for area in areas: code = area.region if code and code in grouped_regions: grouped_regions[code][1] += area_score(area, now) for region, score in grouped_regions.values(): results.add( self.result_type(region_code=region.code, region_name=region.name, accuracy=region.radius, score=score)) return results
def test_score(self): now = util.utcnow() assert round(station_score(Dummy(now, now, 0, 1), now), 2) == 0.05 assert (round( station_score(Dummy(now - timedelta(days=1), now, 10, 2), now), 2) == 0.1) assert (round( station_score(Dummy(now - timedelta(days=5), now, 10, 2), now), 2) == 0.5) assert (round( station_score(Dummy(now - timedelta(days=10), now, 10, 2), now), 2) == 1.0) assert (round( station_score(Dummy(now - timedelta(days=10), now, 10, 64), now), 2) == 6.0) assert (round( station_score(Dummy(now - timedelta(days=10), now, 10, 1024), now), 2) == 10.0) assert (round( station_score(Dummy(now - timedelta(days=10), now, 0, 1024), now), 2) == 0.5) assert (round( station_score( Dummy(now - timedelta(days=70), now - timedelta(days=40), 10, 1024), now, ), 2, ) == 7.07) assert (round( station_score( Dummy(now - timedelta(days=190), now - timedelta(days=180), 10, 1024), now, ), 2, ) == 3.78) assert (round( station_score( Dummy(now - timedelta(days=190), now - timedelta(days=180), 10, 64), now, ), 2, ) == 2.27)
def log_unique_ip(self, valid_key): addr = self.request.client_addr if isinstance(addr, bytes): # pragma: no cover addr = addr.decode('ascii') try: ip = str(ip_address(addr)) except ValueError: # pragma: no cover ip = None if ip: redis_key = 'apiuser:{api_type}:{api_key}:{date}'.format( api_type=self.view_type, api_key=valid_key, date=util.utcnow().date().strftime('%Y-%m-%d'), ) with self.redis_client.pipeline() as pipe: pipe.pfadd(redis_key, ip) pipe.expire(redis_key, 691200) # 8 days pipe.execute()
def monitor_ocid_import(self): result = -1 try: now = util.utcnow() stats_client = self.stats_client with self.db_session() as session: q = session.query(func.max(OCIDCell.created)) max_created = q.first()[0] if max_created: # diff between now and the value, in milliseconds diff = now - max_created result = (diff.days * 86400 + diff.seconds) * 1000 stats_client.gauge('table.ocid_cell_age', result) except Exception: # pragma: no cover # Log but ignore the exception self.heka_client.raven('error') return result
def test_monitor_ocid_import(self): now = util.utcnow() expected = [] results = [] for i in range(35, 5, -5): created = now - timedelta(hours=i) expected.append(i * 3600000) CellOCIDFactory(created=created, cid=i) self.session.flush() results.append(monitor_ocid_import.delay().get()) self.check_stats(gauge=[ ('table', len(expected), ['table:cell_ocid_age']), ]) for result, expect in zip(results, expected): # The values should be almost equal, ignoring differences # less than 10 seconds (or 9999 milliseconds / 4 places) self.assertAlmostEqual(result, expect, -4)
def test_multiple_mcc(self, geoip_db, http_session, session, source, stats): now = util.utcnow() region = GEOCODER.regions_for_mcc(235, metadata=True)[0] area = CellAreaFactory(mcc=234, num_cells=6) area2 = CellAreaFactory(mcc=235, num_cells=8) session.flush() query = self.model_query(geoip_db, http_session, session, stats, cells=[area, area2]) results = source.search(query) assert len(results) > 2 best_result = results.best() assert best_result.region_code == region.code assert best_result.score == 1.25 + area_score(area, now)
def test_wifi_over_cell(self): now = util.utcnow() three_months = now - timedelta(days=90) wifi1 = WifiShardFactory(samples=1000, created=three_months, modified=now, region='US') wifi2 = WifiShardFactory(samples=1000, created=three_months, modified=now, region='US') cell = CellShardFactory(radio=Radio.gsm, samples=10) self.session.flush() query = self.model_query(cells=[cell], wifis=[wifi1, wifi2]) res = self._call(body=query, ip=self.test_ip) # wifi says US with a high score, cell and geoip say UK self.check_model_response(res, wifi1, region='US')
def test_api_key_limit(self): london = self.geoip_data['London'] api_key = uuid1().hex self.session.add(ApiKey(valid_key=api_key, maxreq=5, shortname='dis')) self.session.flush() # exhaust today's limit dstamp = util.utcnow().strftime("%Y%m%d") key = "apilimit:%s:%s" % (api_key, dstamp) self.redis_client.incr(key, 10) res = self.app.post_json( '%s?key=%s' % (self.url, api_key), {}, extra_environ={'HTTP_X_FORWARDED_FOR': london['ip']}, status=403) errors = res.json['error']['errors'] self.assertEqual(errors[0]['reason'], 'dailyLimitExceeded')
def test_global_stats_missing_today(self, session): day = util.utcnow().date() - timedelta(1) yesterday = day - timedelta(days=1) stats = [ Stat(key=StatKey.cell, time=yesterday, value=5000000), Stat(key=StatKey.cell, time=day, value=6000000), Stat(key=StatKey.wifi, time=day, value=3000000), Stat(key=StatKey.unique_cell, time=yesterday, value=4000000), ] session.add_all(stats) session.commit() result = global_stats(session) assert (result == { 'blue': '0.00', 'unique_blue': '0.00', 'cell': '6.00', 'unique_cell': '4.00', 'wifi': '3.00', 'unique_wifi': '0.00', })
def test_stats_cell_json(self): yesterday = util.utcnow().date() - timedelta(1) self.session.add( Stat(key=StatKey.unique_cell, time=yesterday, value=2)) self.session.add( Stat(key=StatKey.unique_cell_ocid, time=yesterday, value=5)) self.session.commit() result = self.app.get('/stats_cell.json', status=200) self.assertEqual( result.json, {'series': [ {'data': [[timegm(yesterday.timetuple()) * 1000, 2]], 'title': 'MLS Cells'}, {'data': [[timegm(yesterday.timetuple()) * 1000, 5]], 'title': 'OCID Cells'}, ]} ) second_result = self.app.get('/stats_cell.json', status=200) self.assertEqual(second_result.json, result.json)
def test_location_update_cell(self): now = util.utcnow() before = now - timedelta(days=1) session = self.db_master_session k1 = dict(radio=1, mcc=1, mnc=2, lac=3, cid=4) k2 = dict(radio=1, mcc=1, mnc=2, lac=6, cid=8) k3 = dict(radio=1, mcc=1, mnc=2, lac=-1, cid=-1) data = [ Cell(new_measures=3, total_measures=5, **k1), CellMeasure(lat=1.0, lon=1.0, **k1), CellMeasure(lat=1.002, lon=1.003, **k1), CellMeasure(lat=1.004, lon=1.006, **k1), # The lac, cid are invalid and should be skipped CellMeasure(lat=1.5, lon=1.5, **k3), CellMeasure(lat=1.502, lon=1.503, **k3), Cell(lat=2.0, lon=2.0, new_measures=2, total_measures=4, **k2), # the lat/lon is bogus and mismatches the line above on purpose # to make sure old measures are skipped CellMeasure(lat=-1.0, lon=-1.0, created=before, **k2), CellMeasure(lat=-1.0, lon=-1.0, created=before, **k2), CellMeasure(lat=2.002, lon=2.004, **k2), CellMeasure(lat=2.002, lon=2.004, **k2), ] session.add_all(data) session.commit() result = location_update_cell.delay(min_new=1) self.assertEqual(result.get(), (2, 0)) self.check_stats( total=2, timer=['task.data.location_update_cell'], gauge=['task.data.location_update_cell.new_measures_1_100'], ) cells = session.query(Cell).filter(Cell.cid != CELLID_LAC).all() self.assertEqual(len(cells), 2) self.assertEqual([c.new_measures for c in cells], [0, 0]) for cell in cells: if cell.cid == 4: self.assertEqual(cell.lat, 1.002) self.assertEqual(cell.lon, 1.003) elif cell.cid == 8: self.assertEqual(cell.lat, 2.001) self.assertEqual(cell.lon, 2.002)
def test_insert_observations_out_of_range(self): session = self.session time = util.utcnow() - timedelta(days=1) obs = dict(created=time, lat=PARIS_LAT, lon=PARIS_LON, time=time, accuracy=0, altitude=0, altitude_accuracy=0, radio=int(Radio.gsm), mcc=FRANCE_MCC, mnc=2, lac=3, cid=4) entries = [ { "asu": 8, "signal": -70, "ta": 32 }, { "asu": -10, "signal": -300, "ta": -10 }, { "asu": 256, "signal": 16, "ta": 128 }, ] for e in entries: e.update(obs) result = insert_measures_cell.delay(entries) self.assertEqual(result.get(), 3) observations = session.query(CellObservation).all() self.assertEqual(len(observations), 3) self.assertEqual(set([o.asu for o in observations]), set([-1, 8])) self.assertEqual(set([o.signal for o in observations]), set([0, -70])) self.assertEqual(set([o.ta for o in observations]), set([0, 32]))
def test_cell_histogram(self): session = self.db_master_session today = util.utcnow().date() yesterday = (today - timedelta(1)) two_days = (today - timedelta(2)) long_ago = (today - timedelta(3)) measures = [ CellMeasure(lat=1.0, lon=2.0, created=today), CellMeasure(lat=1.0, lon=2.0, created=today), CellMeasure(lat=1.0, lon=2.0, created=yesterday), CellMeasure(lat=1.0, lon=2.0, created=two_days), CellMeasure(lat=1.0, lon=2.0, created=two_days), CellMeasure(lat=1.0, lon=2.0, created=two_days), CellMeasure(lat=1.0, lon=2.0, created=long_ago), ] session.add_all(measures) session.commit() cell_histogram.delay(ago=3).get() stats = session.query(Stat).order_by(Stat.time).all() self.assertEqual(len(stats), 1) self.assertEqual(stats[0].key, STAT_TYPE['cell']) self.assertEqual(stats[0].time, long_ago) self.assertEqual(stats[0].value, 1) # fill up newer dates cell_histogram.delay(ago=2).get() cell_histogram.delay(ago=1).get() cell_histogram.delay(ago=0).get() # test duplicate execution cell_histogram.delay(ago=1).get() stats = session.query(Stat).order_by(Stat.time).all() self.assertEqual(len(stats), 4) self.assertEqual(stats[0].time, long_ago) self.assertEqual(stats[0].value, 1) self.assertEqual(stats[1].time, two_days) self.assertEqual(stats[1].value, 4) self.assertEqual(stats[2].time, yesterday) self.assertEqual(stats[2].value, 5) self.assertEqual(stats[3].time, today) self.assertEqual(stats[3].value, 7)
def test_unique_cell_histogram(self): session = self.db_master_session today = util.utcnow().date() one_day = (today - timedelta(1)) two_days = (today - timedelta(2)) long_ago = (today - timedelta(3)) cells = [ Cell(created=long_ago, radio=0, mcc=1, mnc=2, lac=3, cid=4), Cell(created=two_days, radio=2, mcc=1, mnc=2, lac=3, cid=4), Cell(created=two_days, radio=2, mcc=1, mnc=2, lac=3, cid=5), Cell(created=one_day, radio=0, mcc=2, mnc=2, lac=3, cid=5), Cell(created=today, radio=0, mcc=1, mnc=3, lac=3, cid=4), Cell(created=today, radio=0, mcc=1, mnc=2, lac=4, cid=CELLID_LAC), ] session.add_all(cells) session.commit() result = unique_cell_histogram.delay(ago=3) self.assertEqual(result.get(), 1) stats = session.query(Stat).order_by(Stat.time).all() self.assertEqual(len(stats), 1) self.assertEqual(stats[0].key, STAT_TYPE['unique_cell']) self.assertEqual(stats[0].time, long_ago) self.assertEqual(stats[0].value, 1) # fill up newer dates unique_cell_histogram.delay(ago=2).get() unique_cell_histogram.delay(ago=1).get() unique_cell_histogram.delay(ago=0).get() # test duplicate execution unique_cell_histogram.delay(ago=1).get() stats = session.query(Stat).order_by(Stat.time).all() self.assertEqual(len(stats), 4) self.assertEqual(stats[0].time, long_ago) self.assertEqual(stats[0].value, 1) self.assertEqual(stats[1].time, two_days) self.assertEqual(stats[1].value, 3) self.assertEqual(stats[2].time, one_day) self.assertEqual(stats[2].value, 4) self.assertEqual(stats[3].time, today) self.assertEqual(stats[3].value, 5)
def test_leaders(self): session = self.db_master_session today = util.utcnow().date() yesterday = today - timedelta(days=1) for i in range(7, 1, -1): user = User(nickname=unicode(i)) session.add(user) session.flush() score1 = Score(userid=user.id, time=today, value=i) score1.name = 'location' session.add(score1) score2 = Score(userid=user.id, time=yesterday, value=i + 1) score2.name = 'location' session.add(score2) session.commit() request = DummyRequest() request.db_slave_session = self.db_master_session request.registry.redis_client = self.redis_client inst = self._make_view(request) result = inst.leaders_view() self.assertEqual(result['leaders1'], [{ 'anchor': u'7', 'nickname': u'7', 'num': 15, 'pos': 1 }, { 'anchor': u'6', 'nickname': u'6', 'num': 13, 'pos': 2 }]) self.assertEqual(result['leaders2'], [{ 'anchor': u'5', 'nickname': u'5', 'num': 11, 'pos': 3 }]) # call the view again, without a working db session, so # we can be sure to use the cached result inst = self._make_view(request) request.db_slave_session = None second_result = inst.leaders_view() self.assertEqual(second_result, result)
def send(self, queue_items): # ignore metadata reports = [item["report"] for item in queue_items] _, bucketname, path = urlparse(self.config.url)[:3] # s3 key names start without a leading slash path = path.lstrip("/") if not path.endswith("/"): path += "/" year, month, day = util.utcnow().timetuple()[:3] # strip away queue prefix again parts = self.queue_key.split(":") source = parts[1] api_key = parts[2] obj_name = path.format(source=source, api_key=api_key, year=year, month=month, day=day) obj_name += uuid.uuid1().hex + ".json.gz" try: data = util.encode_gzip(json.dumps({ "items": reports }).encode(), compresslevel=7) s3 = boto3.resource("s3") bucket = s3.Bucket(bucketname) obj = bucket.Object(obj_name) obj.put(Body=data, ContentEncoding="gzip", ContentType="application/json") METRICS.incr("data.export.upload", tags=self.stats_tags + ["status:success"]) except Exception: METRICS.incr("data.export.upload", tags=self.stats_tags + ["status:failure"]) raise
def test_insert_measures_out_of_range(self): session = self.db_master_session time = util.utcnow() - timedelta(days=1) measure = dict(created=encode_datetime(time), lat=PARIS_LAT, lon=PARIS_LON, time=encode_datetime(time), accuracy=0, altitude=0, altitude_accuracy=0, radio=RADIO_TYPE['gsm'], mcc=FRANCE_MCC, mnc=2, lac=3, cid=4) entries = [ { "asu": 8, "signal": -70, "ta": 32 }, { "asu": -10, "signal": -300, "ta": -10 }, { "asu": 256, "signal": 16, "ta": 128 }, ] for e in entries: e.update(measure) result = insert_measures_cell.delay(entries) self.assertEqual(result.get(), 3) measures = session.query(CellMeasure).all() self.assertEqual(len(measures), 3) self.assertEqual(set([m.asu for m in measures]), set([-1, 8])) self.assertEqual(set([m.signal for m in measures]), set([0, -70])) self.assertEqual(set([m.ta for m in measures]), set([0, 32]))
def query_cells(query, lookups, model, raven_client): # Given a location query and a list of lookup instances, query the # database and return a list of model objects. cellids = [lookup.cellid for lookup in lookups] if not cellids: return [] # load all fields used in score calculation and those we # need for the position load_fields = ( "cellid", "lat", "lon", "radius", "region", "samples", "created", "modified", "last_seen", "block_last", "block_count", ) result = [] today = util.utcnow().date() try: shards = defaultdict(list) for lookup in lookups: shards[model.shard_model(lookup.radioType)].append(lookup.cellid) for shard, shard_cellids in shards.items(): columns = shard.__table__.c fields = [getattr(columns, f) for f in load_fields] rows = (query.session.execute( select(fields).where(columns.lat.isnot(None)).where( columns.lon.isnot(None)).where( columns.cellid.in_(shard_cellids)))).fetchall() result.extend( [row for row in rows if not station_blocked(row, today)]) except Exception: raven_client.captureException() return result
def test_wifi_histogram(self): session = self.db_master_session today = util.utcnow().date() yesterday = (today - timedelta(1)) two_days = (today - timedelta(2)) long_ago = (today - timedelta(3)) observations = [ WifiObservation(lat=1.0, lon=2.0, created=today), WifiObservation(lat=1.0, lon=2.0, created=today), WifiObservation(lat=1.0, lon=2.0, created=yesterday), WifiObservation(lat=1.0, lon=2.0, created=two_days), WifiObservation(lat=1.0, lon=2.0, created=two_days), WifiObservation(lat=1.0, lon=2.0, created=two_days), WifiObservation(lat=1.0, lon=2.0, created=long_ago), ] session.add_all(observations) session.commit() wifi_histogram.delay(ago=3).get() stats = session.query(Stat).order_by(Stat.time).all() self.assertEqual(len(stats), 1) self.assertEqual(stats[0].key, StatKey.wifi) self.assertEqual(stats[0].time, long_ago) self.assertEqual(stats[0].value, 1) # fill in newer dates wifi_histogram.delay(ago=2).get() wifi_histogram.delay(ago=1).get() wifi_histogram.delay(ago=0).get() # test duplicate execution wifi_histogram.delay(ago=1).get() stats = session.query(Stat).order_by(Stat.time).all() self.assertEqual(len(stats), 4) self.assertEqual(stats[0].time, long_ago) self.assertEqual(stats[0].value, 1) self.assertEqual(stats[1].time, two_days) self.assertEqual(stats[1].value, 4) self.assertEqual(stats[2].time, yesterday) self.assertEqual(stats[2].value, 5) self.assertEqual(stats[3].time, today) self.assertEqual(stats[3].value, 7)
def test_scan_areas_update(self): session = self.session self.add_line_of_cells_and_scan_lac() today = util.utcnow().date() lac = session.query(CellArea).filter(Cell.lac == 1).first() # We produced a sequence of 0.2-degree-on-a-side # cell bounding boxes centered at [0, 1, 2, ..., 9] # degrees. So the lower-left corner is at (-0.1, -0.1) # and the upper-right corner is at (9.1, 9.1) # we should therefore see a LAC centroid at (4.5, 4.5) # with a range of 723,001m self.assertEqual(lac.lat, 4.5) self.assertEqual(lac.lon, 4.5) self.assertEqual(lac.range, 723001) self.assertEqual(lac.created.date(), today) self.assertEqual(lac.modified.date(), today) self.assertEqual(lac.num_cells, 10)