Beispiel #1
0
    def test_insert_observations_invalid_lac(self):
        time = util.utcnow() - timedelta(days=1)
        today = util.utcnow().date()

        cell = CellFactory(total_measures=5)
        self.session.add(Score(key=ScoreKey.new_cell,
                               userid=1, time=today, value=7))
        self.session.flush()

        obs = dict(
            radio=int(cell.radio), mcc=cell.mcc, mnc=cell.mnc, psc=cell.psc,
            created=time, time=time, lat=cell.lat, lon=cell.lon,
            accuracy=0, altitude=0, altitude_accuracy=0)
        entries = [
            {'lac': constants.MAX_LAC_ALL + 1,
             'cid': constants.MAX_CID_ALL + 1, 'asu': 8},
            {'lac': None, 'cid': None, 'asu': 8},
        ]
        for entry in entries:
            entry.update(obs)

        result = insert_measures_cell.delay(entries, userid=1)
        self.assertEqual(result.get(), 0)

        # The incomplete observations never make it into the queue
        self.assertEqual(self.data_queue.size(), 0)
        update_cell.delay().get()

        # Nothing should change in the initially created Cell record
        self.session.refresh(cell)
        cells = self.session.query(Cell).all()
        self.assertEqual(len(cells), 1)
        self._compare_sets([c.total_measures for c in cells], [5])
Beispiel #2
0
    def test_time(self):
        app = self.app
        # test two weeks ago and "now"
        time = util.utcnow() - timedelta(14)
        tstr = encode_datetime(time)
        app.post_json(
            '/v1/submit', {"items": [
                {"lat": 1.0,
                 "lon": 2.0,
                 "wifi": [{"key": "00aaaaaaaaaa"}],
                 "time": tstr},
                {"lat": 2.0,
                 "lon": 3.0,
                 "wifi": [{"key": "00bbbbbbbbbb"}]},
            ]},
            status=204)
        session = self.db_master_session
        result = session.query(WifiMeasure).all()
        self.assertEqual(len(result), 2)

        wifis = dict([(w.key, (w.created, w.time)) for w in result])
        today = util.utcnow().date()

        month_rounded_tday = time.replace(day=1, hour=0, minute=0, second=0)
        month_rounded_tday = month_rounded_tday.replace(tzinfo=pytz.UTC)
        month_rounded_today = today.replace(day=1)

        self.assertEqual(wifis['00aaaaaaaaaa'][0].date(), today)
        self.assertEqual(wifis['00aaaaaaaaaa'][1], month_rounded_tday)

        self.assertEqual(wifis['00bbbbbbbbbb'][0].date(), today)
        self.assertEqual(wifis['00bbbbbbbbbb'][1].date(), month_rounded_today)
Beispiel #3
0
    def test_insert_observations(self):
        session = self.session
        time = util.utcnow() - timedelta(days=1)
        today = util.utcnow().date()

        session.add(Wifi(key="ab1234567890",
                         new_measures=0, total_measures=0))
        session.add(Score(key=ScoreKey.new_wifi,
                          userid=1, time=today, value=7))
        session.flush()

        obs = dict(
            created=time, lat=1.0, lon=2.0,
            time=time, accuracy=0, altitude=0,
            altitude_accuracy=0, radio=-1,
            heading=52.9,
            speed=158.5,
        )
        entries = [
            {"key": "ab1234567890", "channel": 11, "signal": -80},
            {"key": "ab1234567890", "channel": 3, "signal": -90},
            {"key": "ab1234567890", "channel": 3, "signal": -80},
            {"key": "cd3456789012", "channel": 3, "signal": -90},
        ]
        for e in entries:
            e.update(obs)
        result = insert_measures_wifi.delay(entries, userid=1)
        self.assertEqual(result.get(), 4)

        observations = session.query(WifiObservation).all()
        self.assertEqual(len(observations), 4)
        self.assertEqual(set([o.key for o in observations]),
                         set(["ab1234567890", "cd3456789012"]))
        self.assertEqual(set([o.channel for o in observations]), set([3, 11]))
        self.assertEqual(set([o.signal for o in observations]),
                         set([-80, -90]))
        self.assertEqual(set([o.heading or o in observations]), set([52.9]))
        self.assertEqual(set([o.speed or o in observations]), set([158.5]))

        wifis = session.query(Wifi).all()
        self.assertEqual(len(wifis), 2)
        self.assertEqual(set([w.key for w in wifis]), set(["ab1234567890",
                                                           "cd3456789012"]))
        self.assertEqual(set([w.new_measures for w in wifis]), set([1, 3]))
        self.assertEqual(set([w.total_measures for w in wifis]), set([1, 3]))

        scores = session.query(Score).all()
        self.assertEqual(len(scores), 1)
        self.assertEqual(scores[0].key, ScoreKey.new_wifi)
        self.assertEqual(scores[0].value, 8)

        # test duplicate execution
        result = insert_measures_wifi.delay(entries, userid=1)
        self.assertEqual(result.get(), 4)
        # TODO this task isn't idempotent yet
        observations = session.query(WifiObservation).all()
        self.assertEqual(len(observations), 8)
Beispiel #4
0
 def __init__(self, *args, **kw):
     if 'created' not in kw:
         kw['created'] = util.utcnow()
     if 'modified' not in kw:
         kw['modified'] = util.utcnow()
     if 'new_measures' not in kw:
         kw['new_measures'] = 0
     if 'total_measures' not in kw:
         kw['total_measures'] = 0
     super(Wifi, self).__init__(*args, **kw)
    def test_created_from_blocklist_time(self):
        now = util.utcnow()
        last_week = now - TEMPORARY_BLOCKLIST_DURATION - timedelta(days=1)

        obs = CellObservationFactory.build()
        self.session.add(
            CellBlocklist(time=last_week, count=1,
                          radio=obs.radio, mcc=obs.mcc,
                          mnc=obs.mnc, lac=obs.lac, cid=obs.cid))
        self.session.flush()

        # add a new entry for the previously blocklisted cell
        self.data_queue.enqueue([obs])
        self.assertEqual(self.data_queue.size(), 1)
        update_cell.delay().get()

        # the cell was inserted again
        cells = self.session.query(Cell).all()
        self.assertEqual(len(cells), 1)

        # and the creation date was set to the date of the blocklist entry
        self.assertEqual(cells[0].created, last_week)

        self.check_statcounter(StatKey.cell, 1)
        self.check_statcounter(StatKey.unique_cell, 0)
Beispiel #6
0
    def test_histogram(self):
        session = self.session
        today = util.utcnow().date()
        one_day = today - timedelta(days=1)
        two_days = today - timedelta(days=2)
        one_month = today - timedelta(days=35)
        two_months = today - timedelta(days=70)
        long_ago = today - timedelta(days=100)
        stats = [
            Stat(key=StatKey.cell, time=long_ago, value=40),
            Stat(key=StatKey.cell, time=two_months, value=50),
            Stat(key=StatKey.cell, time=one_month, value=60),
            Stat(key=StatKey.cell, time=two_days, value=70),
            Stat(key=StatKey.cell, time=one_day, value=80),
            Stat(key=StatKey.cell, time=today, value=90),
        ]
        session.add_all(stats)
        session.commit()
        result = histogram(session, StatKey.cell, days=90)
        self.assertTrue(
            [unixtime(one_day), 80] in result[0])

        if two_months.month == 12:
            expected = date(two_months.year + 1, 1, 1)
        else:
            expected = date(two_months.year, two_months.month + 1, 1)
        self.assertTrue(
            [unixtime(expected), 50] in result[0])
Beispiel #7
0
 def test_leaders(self):
     session = self.session
     today = util.utcnow().date()
     test_data = []
     for i in range(20):
         test_data.append((u'nick-%s' % i, 30))
     highest = u'nick-high-too-long_'
     highest += (128 - len(highest)) * u'x'
     test_data.append((highest, 40))
     lowest = u'nick-low'
     test_data.append((lowest, 20))
     for nick, value in test_data:
         user = User(nickname=nick)
         session.add(user)
         session.flush()
         score = Score(key=ScoreKey.location,
                       userid=user.id, time=today, value=value)
         session.add(score)
     session.commit()
     # check the result
     result = leaders(session)
     self.assertEqual(len(result), 22)
     self.assertEqual(result[0]['nickname'], highest[:24] + u'...')
     self.assertEqual(result[0]['num'], 40)
     self.assertTrue(lowest in [r['nickname'] for r in result])
Beispiel #8
0
def global_stats(session):
    today = util.utcnow().date()
    yesterday = today - timedelta(1)
    names = ('cell', 'wifi', 'unique_cell', 'unique_ocid_cell', 'unique_wifi')
    stat_keys = [STAT_TYPE[name] for name in names]
    rows = session.query(Stat.key, Stat.value).filter(
        Stat.key.in_(stat_keys)).filter(
        Stat.time == yesterday)

    stats = {}
    for row in rows.all():
        if row[1]:
            stats[row[0]] = int(row[1])

    result = {}
    for name in names:
        stat_key = STAT_TYPE[name]
        try:
            result[name] = stats[stat_key]
        except KeyError:
            # no stats entry available, maybe closely after midnight
            # and task hasn't run yet, take latest value
            row = session.query(Stat.value).filter(
                Stat.key == stat_key).order_by(
                Stat.time.desc()).limit(1).first()
            if row is not None:
                result[name] = row[0]
            else:
                result[name] = 0

    for k, v in result.items():
        # show as millions
        result[k] = "%.2f" % ((v // 10000) / 100.0)

    return result
Beispiel #9
0
    def __call__(self, diff=True, _filename=None):
        url = self.settings.get('url')
        apikey = self.settings.get('apikey')
        if not url or not apikey:  # pragma: no cover
            return

        if _filename is None:
            if diff:
                prev_hour = util.utcnow() - timedelta(hours=1)
                _filename = prev_hour.strftime(
                    'cell_towers_diff-%Y%m%d%H.csv.gz')
            else:  # pragma: no cover
                _filename = 'cell_towers.csv.gz'

        with util.selfdestruct_tempdir() as temp_dir:
            path = os.path.join(temp_dir, _filename)
            with open(path, 'wb') as temp_file:
                with closing(requests.get(url,
                                          params={'apiKey': apikey,
                                                  'filename': _filename},
                                          stream=True)) as req:

                    for chunk in req.iter_content(chunk_size=2 ** 20):
                        temp_file.write(chunk)
                        temp_file.flush()

                with self.task.redis_pipeline() as pipe:
                    with self.task.db_session() as session:
                        self.import_stations(session, pipe, path)
Beispiel #10
0
def leaders_weekly(session, batch=20):
    result = {'new_cell': [], 'new_wifi': []}
    today = util.utcnow().date()
    one_week = today - timedelta(7)

    score_rows = {}
    userids = set()
    for name in ('new_cell', 'new_wifi'):
        score_rows[name] = session.query(
            Score.userid, func.sum(Score.value)).filter(
            Score.key == ScoreKey[name]).filter(
            Score.time >= one_week).order_by(
            func.sum(Score.value).desc()).group_by(
            Score.userid).limit(batch).all()
        userids.update(set([s[0] for s in score_rows[name]]))

    if not userids:  # pragma: no cover
        return result

    user_rows = session.query(User.id, User.nickname).filter(
        User.id.in_(userids)).all()
    users = dict(user_rows)

    for name, value in score_rows.items():
        for userid, value in value:
            nickname = users.get(userid, 'anonymous')
            if len(nickname) > 24:  # pragma: no cover
                nickname = nickname[:24] + u'...'
            result[name].append(
                {'nickname': nickname, 'num': int(value)})

    return result
Beispiel #11
0
    def test_insert_measures_out_of_range(self):
        session = self.db_master_session
        time = util.utcnow() - timedelta(days=1)

        measure = dict(
            id=0, created=encode_datetime(time),
            lat=PARIS_LAT,
            lon=PARIS_LON,
            time=encode_datetime(time), accuracy=0, altitude=0,
            altitude_accuracy=0, radio=RADIO_TYPE['gsm'], mcc=FRANCE_MCC,
            mnc=2, lac=3, cid=4)
        entries = [
            {"asu": 8, "signal": -70, "ta": 32},
            {"asu": -10, "signal": -300, "ta": -10},
            {"asu": 256, "signal": 16, "ta": 128},
        ]
        for e in entries:
            e.update(measure)

        result = insert_measures_cell.delay(entries)
        self.assertEqual(result.get(), 3)

        measures = session.query(CellMeasure).all()
        self.assertEqual(len(measures), 3)
        self.assertEqual(set([m.asu for m in measures]), set([-1, 8]))
        self.assertEqual(set([m.signal for m in measures]), set([0, -70]))
        self.assertEqual(set([m.ta for m in measures]), set([0, 32]))
    def test_normalize_time(self):
        now = util.utcnow()
        first_args = dict(day=1, hour=0, minute=0, second=0,
                          microsecond=0, tzinfo=UTC)
        now_enc = now.replace(**first_args)
        two_weeks_ago = now - timedelta(14)
        short_format = now.date().isoformat()

        entries = [
            ('', now_enc),
            (now, now_enc),
            (two_weeks_ago, two_weeks_ago.replace(**first_args)),
            (short_format, now_enc),
            ("2011-01-01T11:12:13.456Z", now_enc),
            ("2070-01-01T11:12:13.456Z", now_enc),
            ("10-10-10", now_enc),
            ("2011-10-13T.Z", now_enc),
        ]

        for entry in entries:
            in_, expected = entry
            if not isinstance(in_, str):
                in_ = encode_datetime(in_)
            self.assertEqual(
                decode_datetime(normalized_time(in_)), expected)
Beispiel #13
0
def query_database(query, raven_client):
    macs = [lookup.mac for lookup in query.wifi]
    if not macs:  # pragma: no cover
        return []

    result = []
    today = util.utcnow().date()
    temp_blocked = today - TEMPORARY_BLOCKLIST_DURATION

    try:
        load_fields = ('lat', 'lon', 'radius')
        shards = defaultdict(list)
        for mac in macs:
            shards[WifiShard.shard_model(mac)].append(mac)

        for shard, shard_macs in shards.items():
            rows = (
                query.session.query(shard)
                             .filter(shard.mac.in_(shard_macs))
                             .filter(shard.lat.isnot(None))
                             .filter(shard.lon.isnot(None))
                             .filter(or_(
                                 shard.block_count.is_(None),
                                 shard.block_count <
                                     PERMANENT_BLOCKLIST_THRESHOLD))
                             .filter(or_(
                                 shard.block_last.is_(None),
                                 shard.block_last < temp_blocked))
                             .options(load_only(*load_fields))
            ).all()
            result.extend(list(rows))
    except Exception:
        raven_client.captureException()
    return result
Beispiel #14
0
    def import_stations(self, session, pipe, filename):
        today = util.utcnow().date()

        on_duplicate = (
            '`modified` = values(`modified`)'
            ', `lat` = values(`lat`)'
            ', `lon` = values(`lon`)'
            ', `psc` = values(`psc`)'
            ', `max_lat` = values(`max_lat`)'
            ', `min_lat` = values(`min_lat`)'
            ', `max_lon` = values(`max_lon`)'
            ', `min_lon` = values(`min_lon`)'
            ', `radius` = values(`radius`)'
            ', `samples` = values(`samples`)'
        )

        table_insert = self.cell_model.__table__.insert(
            mysql_on_duplicate=on_duplicate)

        def commit_batch(rows):
            result = session.execute(table_insert, rows)
            count = result.rowcount
            # apply trick to avoid querying for existing rows,
            # MySQL claims 1 row for an inserted row, 2 for an updated row
            inserted_rows = 2 * len(rows) - count
            changed_rows = count - len(rows)
            assert inserted_rows + changed_rows == len(rows)
            StatCounter(self.stat_key, today).incr(pipe, inserted_rows)

        areaids = set()

        with util.gzip_open(filename, 'r') as gzip_wrapper:
            with gzip_wrapper as gzip_file:
                csv_reader = csv.reader(gzip_file)
                parse_row = partial(self.make_import_dict,
                                    self.cell_model.validate,
                                    self.import_spec)
                rows = []
                for row in csv_reader:
                    # skip any header row
                    if (csv_reader.line_num == 1 and
                            row[0] == 'radio'):  # pragma: no cover
                        continue

                    data = parse_row(row)
                    if data is not None:
                        rows.append(data)
                        areaids.add((int(data['radio']), data['mcc'],
                                    data['mnc'], data['lac']))

                    if len(rows) == self.batch_size:  # pragma: no cover
                        commit_batch(rows)
                        session.flush()
                        rows = []

                if rows:
                    commit_batch(rows)

        self.area_queue.enqueue(
            [encode_cellarea(*id_) for id_ in areaids], json=False)
Beispiel #15
0
 def test_score(self):
     now = util.utcnow()
     wifi = WifiShard.create(
         mac='111101123456', created=now, modified=now,
         radius=10, samples=2,
     )
     self.assertAlmostEqual(wifi.score(now), 0.1, 2)
Beispiel #16
0
 def test_block_last(self):
     now = util.utcnow()
     assert round(station_score(Dummy(
         now - timedelta(days=70),
         now - timedelta(days=60),
         10, 64,
         (now - timedelta(days=65)).date()), now), 2) == 1.73
Beispiel #17
0
 def test_nickname_header_update(self):
     app = self.app
     nickname = 'World Tr\xc3\xa4veler'
     utcday = util.utcnow().date()
     session = self.db_master_session
     user = User(nickname=nickname.decode('utf-8'))
     session.add(user)
     session.flush()
     session.add(Score(userid=user.id, key=SCORE_TYPE['location'], value=7))
     session.add(Score(userid=user.id, key=SCORE_TYPE['new_wifi'], value=3))
     session.commit()
     app.post_json(
         '/v1/submit', {"items": [
             {"lat": 1.0,
              "lon": 2.0,
              "wifi": [{"key": "00AAAAAAAAAA"}]},
         ]},
         headers={'X-Nickname': nickname},
         status=204)
     result = session.query(User).all()
     self.assertEqual(len(result), 1)
     self.assertEqual(result[0].nickname, nickname.decode('utf-8'))
     result = session.query(Score).all()
     self.assertEqual(len(result), 2)
     self.assertEqual(set([r.name for r in result]),
                      set(['location', 'new_wifi']))
     for r in result:
         if r.name == 'location':
             self.assertEqual(r.value, 8)
             self.assertEqual(r.time, utcday)
         elif r.name == 'new_wifi':
             self.assertEqual(r.value, 4)
             self.assertEqual(r.time, utcday)
Beispiel #18
0
def monitor_api_key_limits(self):
    result = {}
    try:
        today = util.utcnow().strftime('%Y%m%d')
        keys = self.redis_client.keys('apilimit:*:' + today)
        if keys:
            values = self.redis_client.mget(keys)
            keys = [k.split(':')[1] for k in keys]
        else:
            values = []

        names = {}
        if keys:
            with self.db_session(commit=False) as session:
                query = (ApiKey.querykeys(session, keys)
                               .options(load_only('valid_key', 'shortname')))
                for api_key in query.all():
                    names[api_key.valid_key] = api_key.name

        result = {}
        for k, v in zip(keys, values):
            name = names.get(k, k)
            value = int(v)
            result[name] = value
            self.stats_client.gauge('apilimit.' + name, value)
    except Exception:  # pragma: no cover
        # Log but ignore the exception
        self.raven_client.captureException()
    return result
Beispiel #19
0
 def test_score(self):
     now = util.utcnow()
     assert round(station_score(Dummy(
         now, now, 0, 1), now), 2) == 0.05
     assert round(station_score(Dummy(
         now - timedelta(days=1), now, 10, 2), now), 2) == 0.1
     assert round(station_score(Dummy(
         now - timedelta(days=5), now, 10, 2), now), 2) == 0.5
     assert round(station_score(Dummy(
         now - timedelta(days=10), now, 10, 2), now), 2) == 1.0
     assert round(station_score(Dummy(
         now - timedelta(days=10), now, 10, 64), now), 2) == 6.0
     assert round(station_score(Dummy(
         now - timedelta(days=10), now, 10, 1024), now), 2) == 10.0
     assert round(station_score(Dummy(
         now - timedelta(days=10), now, 0, 1024), now), 2) == 0.5
     assert round(station_score(Dummy(
         now - timedelta(days=70), now - timedelta(days=40),
         10, 1024), now), 2) == 7.07
     assert round(station_score(Dummy(
         now - timedelta(days=190), now - timedelta(days=180),
         10, 1024), now), 2) == 3.78
     assert round(station_score(Dummy(
         now - timedelta(days=190), now - timedelta(days=180),
         10, 64), now), 2) == 2.27
Beispiel #20
0
    def test_new(self):
        utcnow = util.utcnow()
        obs = WifiObservationFactory.build()
        self.data_queue.enqueue([obs])
        self.assertEqual(self.data_queue.size(), 1)
        update_wifi.delay().get()

        shard = WifiShard.shard_model(obs.mac)
        wifis = self.session.query(shard).all()
        self.assertEqual(len(wifis), 1)
        wifi = wifis[0]
        self.assertAlmostEqual(wifi.lat, obs.lat)
        self.assertAlmostEqual(wifi.max_lat, obs.lat)
        self.assertAlmostEqual(wifi.min_lat, obs.lat)
        self.assertAlmostEqual(wifi.lon, obs.lon)
        self.assertAlmostEqual(wifi.max_lon, obs.lon)
        self.assertAlmostEqual(wifi.min_lon, obs.lon)
        self.assertEqual(wifi.country, 'GB')
        self.assertEqual(wifi.radius, 0)
        self.assertEqual(wifi.samples, 1)
        self.assertEqual(wifi.created.date(), utcnow.date())
        self.assertEqual(wifi.modified.date(), utcnow.date())
        self.assertEqual(wifi.block_first, None)
        self.assertEqual(wifi.block_last, None)
        self.assertEqual(wifi.block_count, None)
Beispiel #21
0
    def test_temp_blocked_admitted_again(self):
        now = util.utcnow()
        last_week = now - TEMPORARY_BLOCKLIST_DURATION - timedelta(days=1)

        obs = WifiObservationFactory()
        WifiShardFactory(
            mac=obs.mac,
            lat=None,
            lon=None,
            samples=0,
            created=last_week,
            modified=last_week,
            block_first=last_week.date(),
            block_last=last_week.date(),
            block_count=1)
        self.session.flush()

        # add a new entry for the previously blocked wifi
        self.data_queue.enqueue([obs])
        self.assertEqual(self.data_queue.size(), 1)
        update_wifi.delay().get()

        # the wifi was inserted again
        shard = WifiShard.shard_model(obs.mac)
        wifis = self.session.query(shard).all()
        self.assertEqual(len(wifis), 1)
        wifi = wifis[0]
        self.assertEqual(wifi.created.date(), last_week.date())
        self.assertAlmostEqual(wifi.lat, obs.lat)
        self.assertAlmostEqual(wifi.lon, obs.lon)
        self.assertEqual(wifi.country, 'GB')
        self.assertEqual(wifi.samples, 1)
        self.check_statcounter(StatKey.unique_wifi, 0)
Beispiel #22
0
def import_latest_ocid_cells(self, diff=True, filename=None, session=None):
    url = self.app.settings['ichnaea']['ocid_url']
    apikey = self.app.settings['ichnaea']['ocid_apikey']
    if not url or not apikey:  # pragma: no cover
        return

    if filename is None:
        if diff:
            prev_hour = util.utcnow() - timedelta(hours=1)
            filename = prev_hour.strftime('cell_towers_diff-%Y%m%d%H.csv.gz')
        else:  # pragma: no cover
            filename = 'cell_towers.csv.gz'

    with closing(requests.get(url,
                              params={'apiKey': apikey,
                                      'filename': filename},
                              stream=True)) as r:
        with selfdestruct_tempdir() as d:
            path = os.path.join(d, filename)
            with open(path, 'wb') as f:
                for chunk in r.iter_content(chunk_size=2 ** 20):
                    f.write(chunk)
                    f.flush()

            with self.redis_pipeline() as pipe:
                with self.db_session() as dbsession:
                    if session is None:  # pragma: no cover
                        session = dbsession
                    import_stations(session,
                                    pipe,
                                    path,
                                    CELL_FIELDS)
Beispiel #23
0
def export_modified_cells(self, hourly=True, bucket=None):
    if bucket is None:  # pragma: no cover
        bucket = self.app.settings['ichnaea']['s3_assets_bucket']
    if not bucket:  # pragma: no cover
        return

    now = util.utcnow()

    if hourly:
        end_time = now.replace(minute=0, second=0)
        file_time = end_time
        file_type = 'diff'
        start_time = end_time - timedelta(hours=1)
        cond = and_(Cell.__table__.c.modified >= start_time,
                    Cell.__table__.c.modified < end_time,
                    Cell.__table__.c.lat.isnot(None))
    else:
        file_time = now.replace(hour=0, minute=0, second=0)
        file_type = 'full'
        cond = Cell.__table__.c.lat.isnot(None)

    filename = 'MLS-%s-cell-export-' % file_type
    filename = filename + file_time.strftime('%Y-%m-%dT%H0000.csv.gz')

    with selfdestruct_tempdir() as d:
        path = os.path.join(d, filename)
        with self.db_session(commit=False) as session:
            write_stations_to_csv(session, Cell.__table__, CELL_COLUMNS, cond,
                                  path, make_cell_export_dict, CELL_FIELDS)
        write_stations_to_s3(path, bucket)
Beispiel #24
0
    def test_fields(self):
        now = util.utcnow()
        today = now.date()
        self.session.add(WifiShard.create(
            mac='111101123456', created=now, modified=now,
            lat=GB_LAT, max_lat=GB_LAT, min_lat=GB_LAT,
            lon=GB_LON, max_lon=GB_LON, min_lon=GB_LON,
            radius=200, region='GB', samples=10, source=StationSource.gnss,
            block_first=today, block_last=today, block_count=1,
            _raise_invalid=True,
        ))
        self.session.flush()

        wifi = self.session.query(WifiShard0).first()
        self.assertEqual(wifi.mac, '111101123456')
        self.assertEqual(wifi.created, now)
        self.assertEqual(wifi.modified, now)
        self.assertEqual(wifi.lat, GB_LAT)
        self.assertEqual(wifi.max_lat, GB_LAT)
        self.assertEqual(wifi.min_lat, GB_LAT)
        self.assertEqual(wifi.lon, GB_LON)
        self.assertEqual(wifi.max_lon, GB_LON)
        self.assertEqual(wifi.min_lon, GB_LON)
        self.assertEqual(wifi.radius, 200)
        self.assertEqual(wifi.region, 'GB')
        self.assertEqual(wifi.samples, 10)
        self.assertEqual(wifi.source, StationSource.gnss)
        self.assertEqual(wifi.block_first, today)
        self.assertEqual(wifi.block_last, today)
        self.assertEqual(wifi.block_count, 1)
Beispiel #25
0
    def test_stats(self):
        day = util.utcnow().date() - timedelta(1)
        session = self.db_master_session
        stats = [
            Stat(key=STAT_TYPE['cell'], time=day, value=2000000),
            Stat(key=STAT_TYPE['wifi'], time=day, value=2000000),
            Stat(key=STAT_TYPE['unique_cell'], time=day, value=1000000),
            Stat(key=STAT_TYPE['unique_wifi'], time=day, value=2000000),
        ]
        session.add_all(stats)
        session.commit()
        request = DummyRequest()
        request.db_slave_session = self.db_master_session
        request.registry.redis_client = self.redis_client
        inst = self._make_view(request)
        result = inst.stats_view()
        self.assertEqual(result['page_title'], 'Statistics')
        self.assertEqual(
            result['metrics1'], [
                {'name': 'Unique Cells', 'value': '1.00'},
                {'name': 'Cell Observations', 'value': '2.00'},
            ])
        self.assertEqual(
            result['metrics2'], [
                {'name': 'Unique Wifi Networks', 'value': '2.00'},
                {'name': 'Wifi Observations', 'value': '2.00'},
            ])

        # call the view again, without a working db session, so
        # we can be sure to use the cached result
        inst = self._make_view(request)
        request.db_slave_session = None
        second_result = inst.stats_view()
        self.assertEqual(second_result, result)
Beispiel #26
0
 def test_histogram_different_stat_name(self, ro_session):
     today = util.utcnow().date()
     ro_session.add(Stat(key=StatKey.unique_cell, time=today, value=9))
     ro_session.commit()
     result = histogram(ro_session, StatKey.unique_cell)
     first_of_month = today.replace(day=1)
     assert result == [[[unixtime(first_of_month), 9]]]
Beispiel #27
0
    def __call__(self, hourly=True, _bucket=None):
        if _bucket is None:  # pragma: no cover
            bucket = self.settings['bucket']
        else:
            bucket = _bucket

        if not bucket:  # pragma: no cover
            return

        now = util.utcnow()
        start_time = None
        end_time = None

        if hourly:
            end_time = now.replace(minute=0, second=0)
            file_time = end_time
            file_type = 'diff'
            start_time = end_time - timedelta(hours=1)
        else:
            file_time = now.replace(hour=0, minute=0, second=0)
            file_type = 'full'

        filename = 'MLS-%s-cell-export-' % file_type
        filename = filename + file_time.strftime('%Y-%m-%dT%H0000.csv.gz')

        with util.selfdestruct_tempdir() as temp_dir:
            path = os.path.join(temp_dir, filename)
            with self.task.db_session(commit=False) as session:
                write_stations_to_csv(
                    session, path,
                    start_time=start_time, end_time=end_time)
            self.write_stations_to_s3(path, bucket)
Beispiel #28
0
 def test_score(self):
     now = util.utcnow()
     self.assertAlmostEqual(DummyModel(
         now, now, 0, 1).score(now), 0.05, 2)
     self.assertAlmostEqual(DummyModel(
         now - timedelta(days=1), now, 10, 2).score(now), 0.1, 2)
     self.assertAlmostEqual(DummyModel(
         now - timedelta(days=5), now, 10, 2).score(now), 0.5, 2)
     self.assertAlmostEqual(DummyModel(
         now - timedelta(days=10), now, 10, 2).score(now), 1.0, 2)
     self.assertAlmostEqual(DummyModel(
         now - timedelta(days=10), now, 10, 64).score(now), 6.0, 2)
     self.assertAlmostEqual(DummyModel(
         now - timedelta(days=10), now, 10, 1024).score(now), 10.0, 2)
     self.assertAlmostEqual(DummyModel(
         now - timedelta(days=10), now, 0, 1024).score(now), 0.5, 2)
     self.assertAlmostEqual(DummyModel(
         now - timedelta(days=70), now - timedelta(days=40),
         10, 1024).score(now), 7.07, 2)
     self.assertAlmostEqual(DummyModel(
         now - timedelta(days=190), now - timedelta(days=180),
         10, 1024).score(now), 3.78, 2)
     self.assertAlmostEqual(DummyModel(
         now - timedelta(days=190), now - timedelta(days=180),
         10, 64).score(now), 2.27, 2)
Beispiel #29
0
def histogram(session, name, days=365):
    today = util.utcnow().date()
    start = today - timedelta(days=days)
    stat_key = STAT_TYPE[name]
    month_key = (func.year(Stat.time), func.month(Stat.time))
    rows = session.query(func.max(Stat.value), *month_key).filter(
        Stat.key == stat_key).filter(
        Stat.time >= start).filter(
        Stat.time < today).group_by(
        *month_key).order_by(
        *month_key
    )
    result = []
    for num, year, month in rows.all():
        # use first of August to plot the highest result for July
        if month == 12:  # pragma: no cover
            next_month = date(year + 1, 1, 1)
        else:
            next_month = date(year, month + 1, 1)
        if next_month >= today:
            # we restrict dates to be at most yesterday
            next_month = today - timedelta(days=1)
        day = timegm(next_month.timetuple()) * 1000
        result.append([day, num])
    return [result]
Beispiel #30
0
 def __init__(self, task, session, pipe, utcnow=None):
     DataTask.__init__(self, task, session)
     self.pipe = pipe
     self.data_queue = self.task.app.data_queues[self.queue_name]
     if utcnow is None:
         utcnow = util.utcnow()
     self.utcnow = utcnow
Beispiel #31
0
    def test_multiple_cells(self):
        now = util.utcnow()
        cell = CellShardFactory(samples=100)
        cell2 = CellShardFactory(radio=cell.radio, mcc=cell.mcc, mnc=cell.mnc,
                                 lac=cell.lac, cid=cell.cid + 1,
                                 lat=cell.lat + 1.0, lon=cell.lon + 1.0,
                                 samples=10)
        self.session.flush()

        query = self.model_query(cells=[cell, cell2])
        results = self.source.search(query)
        self.check_model_results(
            results, [cell],
            lat=cell.lat + 0.3333333, lon=cell.lon + 0.3333333,
            accuracy=CELL_MAX_ACCURACY)
        self.assertAlmostEqual(
            results.best().score, cell.score(now) + cell2.score(now), 4)
    def test_fields(self, session):
        now = util.utcnow()
        today = now.date()
        session.add(
            WifiShard.create(
                mac='111101123456',
                created=now,
                modified=now,
                lat=GB_LAT,
                max_lat=GB_LAT,
                min_lat=GB_LAT,
                lon=GB_LON,
                max_lon=GB_LON,
                min_lon=GB_LON,
                radius=200,
                region='GB',
                samples=10,
                source=ReportSource.gnss,
                weight=1.5,
                last_seen=today,
                block_first=today,
                block_last=today,
                block_count=1,
                _raise_invalid=True,
            ))
        session.flush()

        wifi = session.query(WifiShard0).first()
        assert wifi.mac == '111101123456'
        assert wifi.created == now
        assert wifi.modified == now
        assert wifi.lat == GB_LAT
        assert wifi.max_lat == GB_LAT
        assert wifi.min_lat == GB_LAT
        assert wifi.lon == GB_LON
        assert wifi.max_lon == GB_LON
        assert wifi.min_lon == GB_LON
        assert wifi.radius == 200
        assert wifi.region == 'GB'
        assert wifi.samples == 10
        assert wifi.source == ReportSource.gnss
        assert wifi.weight == 1.5
        assert wifi.last_seen == today
        assert wifi.block_first == today
        assert wifi.block_last == today
        assert wifi.block_count == 1
Beispiel #33
0
    def test_fields(self):
        now = util.utcnow()
        today = now.date()
        self.session.add(
            WifiShard.create(
                mac='111101123456',
                created=now,
                modified=now,
                lat=GB_LAT,
                max_lat=GB_LAT,
                min_lat=GB_LAT,
                lon=GB_LON,
                max_lon=GB_LON,
                min_lon=GB_LON,
                radius=200,
                region='GB',
                samples=10,
                source=StationSource.gnss,
                weight=1.5,
                last_seen=today,
                block_first=today,
                block_last=today,
                block_count=1,
                _raise_invalid=True,
            ))
        self.session.flush()

        wifi = self.session.query(WifiShard0).first()
        self.assertEqual(wifi.mac, '111101123456')
        self.assertEqual(wifi.created, now)
        self.assertEqual(wifi.modified, now)
        self.assertEqual(wifi.lat, GB_LAT)
        self.assertEqual(wifi.max_lat, GB_LAT)
        self.assertEqual(wifi.min_lat, GB_LAT)
        self.assertEqual(wifi.lon, GB_LON)
        self.assertEqual(wifi.max_lon, GB_LON)
        self.assertEqual(wifi.min_lon, GB_LON)
        self.assertEqual(wifi.radius, 200)
        self.assertEqual(wifi.region, 'GB')
        self.assertEqual(wifi.samples, 10)
        self.assertEqual(wifi.source, StationSource.gnss)
        self.assertEqual(wifi.weight, 1.5)
        self.assertEqual(wifi.last_seen, today)
        self.assertEqual(wifi.block_first, today)
        self.assertEqual(wifi.block_last, today)
        self.assertEqual(wifi.block_count, 1)
Beispiel #34
0
def export_file(filename, tablename, _db=None, _session=None):
    today = util.utcnow().date()
    one_year_ago = today - timedelta(days=365)
    one_year_ago = one_year_ago.strftime('%Y-%m-%d')
    # this is executed in a worker process
    stmt = text('''\
SELECT
`grid`, CAST(ROUND(DATEDIFF(CURDATE(), `modified`) / 30) AS UNSIGNED) as `num`
FROM {tablename}
WHERE modified >= '{modified}'
LIMIT :limit OFFSET :offset
'''.format(tablename=tablename, modified=one_year_ago).replace('\n', ' '))
    db = configure_db('ro', _db=_db)

    offset = 0
    limit = 200000

    result_rows = 0
    with util.gzip_open(filename, 'w', compresslevel=2) as fd:
        with db_worker_session(db, commit=False) as session:
            if _session is not None:
                # testing hook
                session = _session
            while True:
                result = session.execute(
                    stmt.bindparams(limit=limit, offset=offset))
                rows = result.fetchall()
                result.close()
                if not rows:
                    break

                lines = []
                extend = lines.extend
                for row in rows:
                    lat, lon = decode_datamap_grid(row.grid)
                    extend(random_points(lat, lon, row.num))

                fd.writelines(lines)
                result_rows += len(lines)
                offset += limit

    if not result_rows:
        os.remove(filename)

    db.close()
    return result_rows
Beispiel #35
0
    def test_ok_wifi(self):
        app = self.app
        now = util.utcnow()
        today = now.date()
        first_of_month = now.replace(day=1, hour=0, minute=0, second=0)

        wifi_data = [{
            "key": "0012AB12AB12",
            "signalToNoiseRatio": 5
        }, {
            "key": "00:34:cd:34:cd:34",
            "signalToNoiseRatio": 5
        }]

        res = app.post_json('/v1/submit', {
            "items": [{
                "lat": 12.3456781,
                "lon": 23.4567892,
                "accuracy": 17,
                "wifi": wifi_data
            }]
        },
                            status=204)
        self.assertEqual(res.body, '')
        session = self.db_master_session
        wifi_result = session.query(WifiObservation).all()
        self.assertEqual(len(wifi_result), 2)
        item = wifi_result[0]
        report_id = item.report_id
        self.assertTrue(isinstance(report_id, uuid.UUID))
        self.assertEqual(item.created.date(), today)
        self.assertEqual(item.time, first_of_month)
        self.assertEqual(item.lat, 12.3456781)
        self.assertEqual(item.lon, 23.4567892)
        self.assertEqual(item.accuracy, 17)
        self.assertEqual(item.altitude, 0)
        self.assertEqual(item.altitude_accuracy, 0)
        self.assertTrue(item.key in ("0012ab12ab12", "0034cd34cd34"))
        self.assertEqual(item.channel, 0)
        self.assertEqual(item.signal, 0)
        self.assertEqual(item.snr, 5)
        item = wifi_result[1]
        self.assertEqual(item.report_id, report_id)
        self.assertEqual(item.created.date(), today)
        self.assertEqual(item.lat, 12.3456781)
        self.assertEqual(item.lon, 23.4567892)
Beispiel #36
0
 def test_stats_wifi_json(self):
     app = self.app
     today = util.utcnow().date()
     yesterday = today - timedelta(1)
     session = self.db_slave_session
     stat = Stat(key=StatKey.unique_wifi, time=yesterday, value=2)
     session.add(stat)
     session.commit()
     result = app.get('/stats_wifi.json', status=200)
     self.assertEqual(
         result.json, {'series': [
             {'data': [[timegm(yesterday.timetuple()) * 1000, 2]],
              'title': 'MLS WiFi'},
         ]}
     )
     second_result = app.get('/stats_wifi.json', status=200)
     self.assertEqual(second_result.json, result.json)
Beispiel #37
0
    def test_ok_cell(self):
        app = self.app
        now = util.utcnow()
        today = now.date()
        first_of_month = now.replace(day=1, hour=0, minute=0, second=0)

        cell_data = [{
            "radio": Radio.umts.name,
            "mcc": FRANCE_MCC,
            "mnc": 1,
            "lac": 2,
            "cid": 1234
        }]
        res = app.post_json('/v1/submit?key=test', {
            "items": [{
                "lat": PARIS_LAT,
                "lon": PARIS_LON,
                "time": now.strftime('%Y-%m-%d'),
                "accuracy": 10,
                "altitude": 123,
                "altitude_accuracy": 7,
                "radio": Radio.gsm.name,
                "cell": cell_data
            }]
        },
                            status=204)
        self.assertEqual(res.body, '')

        session = self.session
        cell_result = session.query(CellObservation).all()
        self.assertEqual(len(cell_result), 1)
        item = cell_result[0]
        self.assertTrue(isinstance(item.report_id, uuid.UUID))
        self.assertEqual(item.created.date(), today)

        self.assertEqual(item.time, first_of_month)
        self.assertEqual(item.lat, PARIS_LAT)
        self.assertEqual(item.lon, PARIS_LON)
        self.assertEqual(item.accuracy, 10)
        self.assertEqual(item.altitude, 123)
        self.assertEqual(item.altitude_accuracy, 7)
        self.assertEqual(item.radio, Radio.umts)
        self.assertEqual(item.mcc, FRANCE_MCC)
        self.assertEqual(item.mnc, 1)
        self.assertEqual(item.lac, 2)
        self.assertEqual(item.cid, 1234)
Beispiel #38
0
 def test_log_stats(self, app, redis, stats):
     cell, query = self._one_cell_query()
     self._post(app, [query], api_key='test')
     stats.check(counter=[
         ('data.batch.upload', 1),
         ('data.batch.upload', ['key:test']),
         ('request',
          [self.metric_path, 'method:post',
           'status:%s' % self.status]),
         (self.metric_type + '.request', [self.metric_path, 'key:test']),
     ],
                 timer=[
                     ('request', [self.metric_path, 'method:post']),
                 ])
     today = util.utcnow().date()
     assert ([k.decode('ascii') for k in redis.keys('apiuser:*')
              ] == ['apiuser:submit:test:%s' % today.strftime('%Y-%m-%d')])
Beispiel #39
0
def verified_delete(self, block_id, batch=10000):
    utcnow = util.utcnow()
    with self.db_session() as session:
        block = session.query(ObservationBlock).filter(
            ObservationBlock.id == block_id).first()
        observation_type = block.measure_type
        obs_cls = OBSERVATION_TYPE_META[observation_type]['class']

        for start in range(block.start_id, block.end_id, batch):
            end = min(block.end_id, start + batch)
            q = session.query(obs_cls).filter(
                obs_cls.id >= start,
                obs_cls.id < end)
            q.delete()
            session.flush()
        block.archive_date = utcnow
        session.commit()
Beispiel #40
0
    def check(self):
        api_key = None
        api_key_text = self.request.GET.get('key', None)
        skip_check = False

        if api_key_text is None:
            self.log_count('none', False)
            if self.error_on_invalidkey:
                raise self.prepare_exception(InvalidAPIKey())

        if api_key_text is not None:
            try:
                session = self.request.db_ro_session
                api_key = session.query(ApiKey).get(api_key_text)
            except Exception:
                # if we cannot connect to backend DB, skip api key check
                skip_check = True
                self.raven_client.captureException()

        if api_key is not None and api_key.should_allow(self.view_type):
            self.log_count(api_key.name, api_key.should_log(self.view_type))

            rate_key = 'apilimit:{key}:{path}:{time}'.format(
                key=api_key_text,
                path=self.metric_path,
                time=util.utcnow().strftime('%Y%m%d'))

            should_limit = rate_limit_exceeded(self.redis_client,
                                               rate_key,
                                               maxreq=api_key.maxreq)

            if should_limit:
                raise self.prepare_exception(DailyLimitExceeded())
        elif skip_check:
            pass
        else:
            if api_key_text is not None:
                self.log_count('invalid', False)
            if self.error_on_invalidkey:
                raise self.prepare_exception(InvalidAPIKey())

        # If we failed to look up an ApiKey, create an empty one
        # rather than passing None through
        api_key = api_key or ApiKey(
            valid_key=None, allow_fallback=False, allow_locate=True)
        return self.view(api_key)
Beispiel #41
0
    def search_cell(self, query):
        results = self.result_list()
        now = util.utcnow()

        ambiguous_cells = []
        regions = []
        for cell in list(query.cell) + list(query.cell_area):
            code = cell.mobileCountryCode
            mcc_regions = GEOCODER.regions_for_mcc(code, metadata=True)
            # Divide score by number of possible regions for the mcc
            score = 1.0 / (len(mcc_regions) or 1.0)
            for mcc_region in mcc_regions:
                regions.append((mcc_region, score))
            if len(mcc_regions) > 1:
                ambiguous_cells.append(cell)

        # Group by region code
        grouped_regions = {}
        for region, score in regions:
            code = region.code
            if code not in grouped_regions:
                grouped_regions[code] = [region, score]
            else:
                # Sum up scores of multiple matches
                grouped_regions[code][1] += score

        if ambiguous_cells:
            # Only do a database query if the mcc is ambiguous.
            # Use the area models for area and cell entries,
            # as we are only interested in the region here,
            # which won't differ between individual cells inside and area.
            areas = query_areas(query, ambiguous_cells, self.area_model,
                                self.raven_client)
            for area in areas:
                code = area.region
                if code and code in grouped_regions:
                    grouped_regions[code][1] += area_score(area, now)

        for region, score in grouped_regions.values():
            results.add(
                self.result_type(region_code=region.code,
                                 region_name=region.name,
                                 accuracy=region.radius,
                                 score=score))

        return results
Beispiel #42
0
 def test_score(self):
     now = util.utcnow()
     assert round(station_score(Dummy(now, now, 0, 1), now), 2) == 0.05
     assert (round(
         station_score(Dummy(now - timedelta(days=1), now, 10, 2), now),
         2) == 0.1)
     assert (round(
         station_score(Dummy(now - timedelta(days=5), now, 10, 2), now),
         2) == 0.5)
     assert (round(
         station_score(Dummy(now - timedelta(days=10), now, 10, 2), now),
         2) == 1.0)
     assert (round(
         station_score(Dummy(now - timedelta(days=10), now, 10, 64), now),
         2) == 6.0)
     assert (round(
         station_score(Dummy(now - timedelta(days=10), now, 10, 1024), now),
         2) == 10.0)
     assert (round(
         station_score(Dummy(now - timedelta(days=10), now, 0, 1024), now),
         2) == 0.5)
     assert (round(
         station_score(
             Dummy(now - timedelta(days=70), now - timedelta(days=40), 10,
                   1024),
             now,
         ),
         2,
     ) == 7.07)
     assert (round(
         station_score(
             Dummy(now - timedelta(days=190), now - timedelta(days=180), 10,
                   1024),
             now,
         ),
         2,
     ) == 3.78)
     assert (round(
         station_score(
             Dummy(now - timedelta(days=190), now - timedelta(days=180), 10,
                   64),
             now,
         ),
         2,
     ) == 2.27)
Beispiel #43
0
 def log_unique_ip(self, valid_key):
     addr = self.request.client_addr
     if isinstance(addr, bytes):  # pragma: no cover
         addr = addr.decode('ascii')
     try:
         ip = str(ip_address(addr))
     except ValueError:  # pragma: no cover
         ip = None
     if ip:
         redis_key = 'apiuser:{api_type}:{api_key}:{date}'.format(
             api_type=self.view_type,
             api_key=valid_key,
             date=util.utcnow().date().strftime('%Y-%m-%d'),
         )
         with self.redis_client.pipeline() as pipe:
             pipe.pfadd(redis_key, ip)
             pipe.expire(redis_key, 691200)  # 8 days
             pipe.execute()
Beispiel #44
0
def monitor_ocid_import(self):
    result = -1
    try:
        now = util.utcnow()
        stats_client = self.stats_client
        with self.db_session() as session:
            q = session.query(func.max(OCIDCell.created))
            max_created = q.first()[0]
        if max_created:
            # diff between now and the value, in milliseconds
            diff = now - max_created
            result = (diff.days * 86400 + diff.seconds) * 1000

        stats_client.gauge('table.ocid_cell_age', result)
    except Exception:  # pragma: no cover
        # Log but ignore the exception
        self.heka_client.raven('error')
    return result
Beispiel #45
0
    def test_monitor_ocid_import(self):
        now = util.utcnow()
        expected = []
        results = []
        for i in range(35, 5, -5):
            created = now - timedelta(hours=i)
            expected.append(i * 3600000)
            CellOCIDFactory(created=created, cid=i)
            self.session.flush()
            results.append(monitor_ocid_import.delay().get())

        self.check_stats(gauge=[
            ('table', len(expected), ['table:cell_ocid_age']),
        ])
        for result, expect in zip(results, expected):
            # The values should be almost equal, ignoring differences
            # less than 10 seconds (or 9999 milliseconds / 4 places)
            self.assertAlmostEqual(result, expect, -4)
Beispiel #46
0
    def test_multiple_mcc(self, geoip_db, http_session, session, source,
                          stats):
        now = util.utcnow()
        region = GEOCODER.regions_for_mcc(235, metadata=True)[0]
        area = CellAreaFactory(mcc=234, num_cells=6)
        area2 = CellAreaFactory(mcc=235, num_cells=8)
        session.flush()

        query = self.model_query(geoip_db,
                                 http_session,
                                 session,
                                 stats,
                                 cells=[area, area2])
        results = source.search(query)
        assert len(results) > 2
        best_result = results.best()
        assert best_result.region_code == region.code
        assert best_result.score == 1.25 + area_score(area, now)
Beispiel #47
0
    def test_wifi_over_cell(self):
        now = util.utcnow()
        three_months = now - timedelta(days=90)
        wifi1 = WifiShardFactory(samples=1000,
                                 created=three_months,
                                 modified=now,
                                 region='US')
        wifi2 = WifiShardFactory(samples=1000,
                                 created=three_months,
                                 modified=now,
                                 region='US')
        cell = CellShardFactory(radio=Radio.gsm, samples=10)
        self.session.flush()

        query = self.model_query(cells=[cell], wifis=[wifi1, wifi2])
        res = self._call(body=query, ip=self.test_ip)
        # wifi says US with a high score, cell and geoip say UK
        self.check_model_response(res, wifi1, region='US')
Beispiel #48
0
    def test_api_key_limit(self):
        london = self.geoip_data['London']
        api_key = uuid1().hex
        self.session.add(ApiKey(valid_key=api_key, maxreq=5, shortname='dis'))
        self.session.flush()

        # exhaust today's limit
        dstamp = util.utcnow().strftime("%Y%m%d")
        key = "apilimit:%s:%s" % (api_key, dstamp)
        self.redis_client.incr(key, 10)

        res = self.app.post_json(
            '%s?key=%s' % (self.url, api_key), {},
            extra_environ={'HTTP_X_FORWARDED_FOR': london['ip']},
            status=403)

        errors = res.json['error']['errors']
        self.assertEqual(errors[0]['reason'], 'dailyLimitExceeded')
    def test_global_stats_missing_today(self, session):
        day = util.utcnow().date() - timedelta(1)
        yesterday = day - timedelta(days=1)
        stats = [
            Stat(key=StatKey.cell, time=yesterday, value=5000000),
            Stat(key=StatKey.cell, time=day, value=6000000),
            Stat(key=StatKey.wifi, time=day, value=3000000),
            Stat(key=StatKey.unique_cell, time=yesterday, value=4000000),
        ]
        session.add_all(stats)
        session.commit()

        result = global_stats(session)
        assert (result == {
            'blue': '0.00', 'unique_blue': '0.00',
            'cell': '6.00', 'unique_cell': '4.00',
            'wifi': '3.00', 'unique_wifi': '0.00',
        })
Beispiel #50
0
 def test_stats_cell_json(self):
     yesterday = util.utcnow().date() - timedelta(1)
     self.session.add(
         Stat(key=StatKey.unique_cell, time=yesterday, value=2))
     self.session.add(
         Stat(key=StatKey.unique_cell_ocid, time=yesterday, value=5))
     self.session.commit()
     result = self.app.get('/stats_cell.json', status=200)
     self.assertEqual(
         result.json, {'series': [
             {'data': [[timegm(yesterday.timetuple()) * 1000, 2]],
              'title': 'MLS Cells'},
             {'data': [[timegm(yesterday.timetuple()) * 1000, 5]],
              'title': 'OCID Cells'},
         ]}
     )
     second_result = self.app.get('/stats_cell.json', status=200)
     self.assertEqual(second_result.json, result.json)
Beispiel #51
0
    def test_location_update_cell(self):
        now = util.utcnow()
        before = now - timedelta(days=1)
        session = self.db_master_session
        k1 = dict(radio=1, mcc=1, mnc=2, lac=3, cid=4)
        k2 = dict(radio=1, mcc=1, mnc=2, lac=6, cid=8)
        k3 = dict(radio=1, mcc=1, mnc=2, lac=-1, cid=-1)
        data = [
            Cell(new_measures=3, total_measures=5, **k1),
            CellMeasure(lat=1.0, lon=1.0, **k1),
            CellMeasure(lat=1.002, lon=1.003, **k1),
            CellMeasure(lat=1.004, lon=1.006, **k1),
            # The lac, cid are invalid and should be skipped
            CellMeasure(lat=1.5, lon=1.5, **k3),
            CellMeasure(lat=1.502, lon=1.503, **k3),
            Cell(lat=2.0, lon=2.0, new_measures=2, total_measures=4, **k2),
            # the lat/lon is bogus and mismatches the line above on purpose
            # to make sure old measures are skipped
            CellMeasure(lat=-1.0, lon=-1.0, created=before, **k2),
            CellMeasure(lat=-1.0, lon=-1.0, created=before, **k2),
            CellMeasure(lat=2.002, lon=2.004, **k2),
            CellMeasure(lat=2.002, lon=2.004, **k2),
        ]
        session.add_all(data)
        session.commit()

        result = location_update_cell.delay(min_new=1)
        self.assertEqual(result.get(), (2, 0))
        self.check_stats(
            total=2,
            timer=['task.data.location_update_cell'],
            gauge=['task.data.location_update_cell.new_measures_1_100'],
        )

        cells = session.query(Cell).filter(Cell.cid != CELLID_LAC).all()
        self.assertEqual(len(cells), 2)
        self.assertEqual([c.new_measures for c in cells], [0, 0])
        for cell in cells:
            if cell.cid == 4:
                self.assertEqual(cell.lat, 1.002)
                self.assertEqual(cell.lon, 1.003)
            elif cell.cid == 8:
                self.assertEqual(cell.lat, 2.001)
                self.assertEqual(cell.lon, 2.002)
Beispiel #52
0
    def test_insert_observations_out_of_range(self):
        session = self.session
        time = util.utcnow() - timedelta(days=1)

        obs = dict(created=time,
                   lat=PARIS_LAT,
                   lon=PARIS_LON,
                   time=time,
                   accuracy=0,
                   altitude=0,
                   altitude_accuracy=0,
                   radio=int(Radio.gsm),
                   mcc=FRANCE_MCC,
                   mnc=2,
                   lac=3,
                   cid=4)
        entries = [
            {
                "asu": 8,
                "signal": -70,
                "ta": 32
            },
            {
                "asu": -10,
                "signal": -300,
                "ta": -10
            },
            {
                "asu": 256,
                "signal": 16,
                "ta": 128
            },
        ]
        for e in entries:
            e.update(obs)

        result = insert_measures_cell.delay(entries)
        self.assertEqual(result.get(), 3)

        observations = session.query(CellObservation).all()
        self.assertEqual(len(observations), 3)
        self.assertEqual(set([o.asu for o in observations]), set([-1, 8]))
        self.assertEqual(set([o.signal for o in observations]), set([0, -70]))
        self.assertEqual(set([o.ta for o in observations]), set([0, 32]))
Beispiel #53
0
    def test_cell_histogram(self):
        session = self.db_master_session
        today = util.utcnow().date()
        yesterday = (today - timedelta(1))
        two_days = (today - timedelta(2))
        long_ago = (today - timedelta(3))
        measures = [
            CellMeasure(lat=1.0, lon=2.0, created=today),
            CellMeasure(lat=1.0, lon=2.0, created=today),
            CellMeasure(lat=1.0, lon=2.0, created=yesterday),
            CellMeasure(lat=1.0, lon=2.0, created=two_days),
            CellMeasure(lat=1.0, lon=2.0, created=two_days),
            CellMeasure(lat=1.0, lon=2.0, created=two_days),
            CellMeasure(lat=1.0, lon=2.0, created=long_ago),
        ]
        session.add_all(measures)
        session.commit()

        cell_histogram.delay(ago=3).get()

        stats = session.query(Stat).order_by(Stat.time).all()
        self.assertEqual(len(stats), 1)
        self.assertEqual(stats[0].key, STAT_TYPE['cell'])
        self.assertEqual(stats[0].time, long_ago)
        self.assertEqual(stats[0].value, 1)

        # fill up newer dates
        cell_histogram.delay(ago=2).get()
        cell_histogram.delay(ago=1).get()
        cell_histogram.delay(ago=0).get()

        # test duplicate execution
        cell_histogram.delay(ago=1).get()

        stats = session.query(Stat).order_by(Stat.time).all()
        self.assertEqual(len(stats), 4)
        self.assertEqual(stats[0].time, long_ago)
        self.assertEqual(stats[0].value, 1)
        self.assertEqual(stats[1].time, two_days)
        self.assertEqual(stats[1].value, 4)
        self.assertEqual(stats[2].time, yesterday)
        self.assertEqual(stats[2].value, 5)
        self.assertEqual(stats[3].time, today)
        self.assertEqual(stats[3].value, 7)
Beispiel #54
0
    def test_unique_cell_histogram(self):
        session = self.db_master_session
        today = util.utcnow().date()
        one_day = (today - timedelta(1))
        two_days = (today - timedelta(2))
        long_ago = (today - timedelta(3))
        cells = [
            Cell(created=long_ago, radio=0, mcc=1, mnc=2, lac=3, cid=4),
            Cell(created=two_days, radio=2, mcc=1, mnc=2, lac=3, cid=4),
            Cell(created=two_days, radio=2, mcc=1, mnc=2, lac=3, cid=5),
            Cell(created=one_day, radio=0, mcc=2, mnc=2, lac=3, cid=5),
            Cell(created=today, radio=0, mcc=1, mnc=3, lac=3, cid=4),
            Cell(created=today, radio=0, mcc=1, mnc=2, lac=4, cid=CELLID_LAC),
        ]
        session.add_all(cells)
        session.commit()

        result = unique_cell_histogram.delay(ago=3)
        self.assertEqual(result.get(), 1)

        stats = session.query(Stat).order_by(Stat.time).all()
        self.assertEqual(len(stats), 1)
        self.assertEqual(stats[0].key, STAT_TYPE['unique_cell'])
        self.assertEqual(stats[0].time, long_ago)
        self.assertEqual(stats[0].value, 1)

        # fill up newer dates
        unique_cell_histogram.delay(ago=2).get()
        unique_cell_histogram.delay(ago=1).get()
        unique_cell_histogram.delay(ago=0).get()

        # test duplicate execution
        unique_cell_histogram.delay(ago=1).get()

        stats = session.query(Stat).order_by(Stat.time).all()
        self.assertEqual(len(stats), 4)
        self.assertEqual(stats[0].time, long_ago)
        self.assertEqual(stats[0].value, 1)
        self.assertEqual(stats[1].time, two_days)
        self.assertEqual(stats[1].value, 3)
        self.assertEqual(stats[2].time, one_day)
        self.assertEqual(stats[2].value, 4)
        self.assertEqual(stats[3].time, today)
        self.assertEqual(stats[3].value, 5)
Beispiel #55
0
    def test_leaders(self):
        session = self.db_master_session
        today = util.utcnow().date()
        yesterday = today - timedelta(days=1)
        for i in range(7, 1, -1):
            user = User(nickname=unicode(i))
            session.add(user)
            session.flush()
            score1 = Score(userid=user.id, time=today, value=i)
            score1.name = 'location'
            session.add(score1)
            score2 = Score(userid=user.id, time=yesterday, value=i + 1)
            score2.name = 'location'
            session.add(score2)
        session.commit()
        request = DummyRequest()
        request.db_slave_session = self.db_master_session
        request.registry.redis_client = self.redis_client
        inst = self._make_view(request)
        result = inst.leaders_view()
        self.assertEqual(result['leaders1'], [{
            'anchor': u'7',
            'nickname': u'7',
            'num': 15,
            'pos': 1
        }, {
            'anchor': u'6',
            'nickname': u'6',
            'num': 13,
            'pos': 2
        }])
        self.assertEqual(result['leaders2'], [{
            'anchor': u'5',
            'nickname': u'5',
            'num': 11,
            'pos': 3
        }])

        # call the view again, without a working db session, so
        # we can be sure to use the cached result
        inst = self._make_view(request)
        request.db_slave_session = None
        second_result = inst.leaders_view()
        self.assertEqual(second_result, result)
Beispiel #56
0
    def send(self, queue_items):
        # ignore metadata
        reports = [item["report"] for item in queue_items]

        _, bucketname, path = urlparse(self.config.url)[:3]
        # s3 key names start without a leading slash
        path = path.lstrip("/")
        if not path.endswith("/"):
            path += "/"

        year, month, day = util.utcnow().timetuple()[:3]

        # strip away queue prefix again
        parts = self.queue_key.split(":")
        source = parts[1]
        api_key = parts[2]

        obj_name = path.format(source=source,
                               api_key=api_key,
                               year=year,
                               month=month,
                               day=day)
        obj_name += uuid.uuid1().hex + ".json.gz"

        try:
            data = util.encode_gzip(json.dumps({
                "items": reports
            }).encode(),
                                    compresslevel=7)

            s3 = boto3.resource("s3")
            bucket = s3.Bucket(bucketname)

            obj = bucket.Object(obj_name)
            obj.put(Body=data,
                    ContentEncoding="gzip",
                    ContentType="application/json")

            METRICS.incr("data.export.upload",
                         tags=self.stats_tags + ["status:success"])
        except Exception:
            METRICS.incr("data.export.upload",
                         tags=self.stats_tags + ["status:failure"])
            raise
Beispiel #57
0
    def test_insert_measures_out_of_range(self):
        session = self.db_master_session
        time = util.utcnow() - timedelta(days=1)

        measure = dict(created=encode_datetime(time),
                       lat=PARIS_LAT,
                       lon=PARIS_LON,
                       time=encode_datetime(time),
                       accuracy=0,
                       altitude=0,
                       altitude_accuracy=0,
                       radio=RADIO_TYPE['gsm'],
                       mcc=FRANCE_MCC,
                       mnc=2,
                       lac=3,
                       cid=4)
        entries = [
            {
                "asu": 8,
                "signal": -70,
                "ta": 32
            },
            {
                "asu": -10,
                "signal": -300,
                "ta": -10
            },
            {
                "asu": 256,
                "signal": 16,
                "ta": 128
            },
        ]
        for e in entries:
            e.update(measure)

        result = insert_measures_cell.delay(entries)
        self.assertEqual(result.get(), 3)

        measures = session.query(CellMeasure).all()
        self.assertEqual(len(measures), 3)
        self.assertEqual(set([m.asu for m in measures]), set([-1, 8]))
        self.assertEqual(set([m.signal for m in measures]), set([0, -70]))
        self.assertEqual(set([m.ta for m in measures]), set([0, 32]))
Beispiel #58
0
def query_cells(query, lookups, model, raven_client):
    # Given a location query and a list of lookup instances, query the
    # database and return a list of model objects.
    cellids = [lookup.cellid for lookup in lookups]
    if not cellids:
        return []

    # load all fields used in score calculation and those we
    # need for the position
    load_fields = (
        "cellid",
        "lat",
        "lon",
        "radius",
        "region",
        "samples",
        "created",
        "modified",
        "last_seen",
        "block_last",
        "block_count",
    )
    result = []
    today = util.utcnow().date()

    try:
        shards = defaultdict(list)
        for lookup in lookups:
            shards[model.shard_model(lookup.radioType)].append(lookup.cellid)

        for shard, shard_cellids in shards.items():
            columns = shard.__table__.c
            fields = [getattr(columns, f) for f in load_fields]
            rows = (query.session.execute(
                select(fields).where(columns.lat.isnot(None)).where(
                    columns.lon.isnot(None)).where(
                        columns.cellid.in_(shard_cellids)))).fetchall()

            result.extend(
                [row for row in rows if not station_blocked(row, today)])
    except Exception:
        raven_client.captureException()

    return result
Beispiel #59
0
    def test_wifi_histogram(self):
        session = self.db_master_session
        today = util.utcnow().date()
        yesterday = (today - timedelta(1))
        two_days = (today - timedelta(2))
        long_ago = (today - timedelta(3))
        observations = [
            WifiObservation(lat=1.0, lon=2.0, created=today),
            WifiObservation(lat=1.0, lon=2.0, created=today),
            WifiObservation(lat=1.0, lon=2.0, created=yesterday),
            WifiObservation(lat=1.0, lon=2.0, created=two_days),
            WifiObservation(lat=1.0, lon=2.0, created=two_days),
            WifiObservation(lat=1.0, lon=2.0, created=two_days),
            WifiObservation(lat=1.0, lon=2.0, created=long_ago),
        ]
        session.add_all(observations)
        session.commit()

        wifi_histogram.delay(ago=3).get()

        stats = session.query(Stat).order_by(Stat.time).all()
        self.assertEqual(len(stats), 1)
        self.assertEqual(stats[0].key, StatKey.wifi)
        self.assertEqual(stats[0].time, long_ago)
        self.assertEqual(stats[0].value, 1)

        # fill in newer dates
        wifi_histogram.delay(ago=2).get()
        wifi_histogram.delay(ago=1).get()
        wifi_histogram.delay(ago=0).get()

        # test duplicate execution
        wifi_histogram.delay(ago=1).get()

        stats = session.query(Stat).order_by(Stat.time).all()
        self.assertEqual(len(stats), 4)
        self.assertEqual(stats[0].time, long_ago)
        self.assertEqual(stats[0].value, 1)
        self.assertEqual(stats[1].time, two_days)
        self.assertEqual(stats[1].value, 4)
        self.assertEqual(stats[2].time, yesterday)
        self.assertEqual(stats[2].value, 5)
        self.assertEqual(stats[3].time, today)
        self.assertEqual(stats[3].value, 7)
Beispiel #60
0
    def test_scan_areas_update(self):
        session = self.session
        self.add_line_of_cells_and_scan_lac()
        today = util.utcnow().date()

        lac = session.query(CellArea).filter(Cell.lac == 1).first()

        # We produced a sequence of 0.2-degree-on-a-side
        # cell bounding boxes centered at [0, 1, 2, ..., 9]
        # degrees. So the lower-left corner is at (-0.1, -0.1)
        # and the upper-right corner is at (9.1, 9.1)
        # we should therefore see a LAC centroid at (4.5, 4.5)
        # with a range of 723,001m
        self.assertEqual(lac.lat, 4.5)
        self.assertEqual(lac.lon, 4.5)
        self.assertEqual(lac.range, 723001)
        self.assertEqual(lac.created.date(), today)
        self.assertEqual(lac.modified.date(), today)
        self.assertEqual(lac.num_cells, 10)