Example #1
0
    def test_skip_delete_new_blocks(self):
        now = util.utcnow()
        today_0000 = now.replace(hour=0, minute=0, second=0, tzinfo=pytz.UTC)
        yesterday_0000 = today_0000 - timedelta(days=1)
        yesterday_2359 = today_0000 - timedelta(seconds=1)
        old = now - timedelta(days=5)
        session = self.db_master_session

        for i in range(100, 150, 10):
            block = ObservationBlock()
            block.measure_type = ObservationType.cell
            block.start_id = i
            block.end_id = i + 10
            block.s3_key = 'fake_key'
            block.archive_sha = 'fake_sha'
            block.archive_date = None
            session.add(block)

        observations = []
        for i in range(100, 110):
            observations.append(CellObservation(id=i, created=old))
        for i in range(110, 120):
            observations.append(CellObservation(id=i, created=yesterday_0000))
        for i in range(120, 130):
            observations.append(CellObservation(id=i, created=yesterday_2359))
        for i in range(130, 140):
            observations.append(CellObservation(id=i, created=today_0000))
        for i in range(140, 150):
            observations.append(CellObservation(id=i, created=now))

        session.add_all(observations)
        session.commit()

        def _archived_blocks():
            blocks = session.query(ObservationBlock).all()
            return len([b for b in blocks if b.archive_date is not None])

        def _delete(days=7):
            with patch.object(S3Backend,
                              'check_archive',
                              lambda x, y, z: True):
                delete_cellmeasure_records.delay(days_old=days).get()
            session.commit()

        _delete(days=7)
        self.assertEquals(session.query(CellObservation).count(), 50)
        self.assertEqual(_archived_blocks(), 0)

        _delete(days=2)
        self.assertEquals(session.query(CellObservation).count(), 40)
        self.assertEqual(_archived_blocks(), 1)

        _delete(days=1)
        self.assertEquals(session.query(CellObservation).count(), 20)
        self.assertEqual(_archived_blocks(), 3)

        _delete(days=0)
        self.assertEquals(session.query(CellObservation).count(), 0)
        self.assertEqual(_archived_blocks(), 5)
Example #2
0
 def test_mcc_latlon(self):
     sample = dict(radio=Radio.gsm,
                   mnc=6,
                   lac=1,
                   cid=2,
                   lat=GB_LAT,
                   lon=GB_LON)
     assert CellObservation.create(mcc=GB_MCC, **sample) is not None
     assert CellObservation.create(mcc=262, **sample) is None
Example #3
0
    def add_line_of_cells_and_scan_lac(self):
        session = self.session
        big = 1.0
        small = big / 10
        keys = dict(radio=Radio.cdma, mcc=1, mnc=1, lac=1)
        observations = [
            CellObservation(lat=ctr + xd, lon=ctr + yd, cid=cell, **keys)
            for cell in range(10)
            for ctr in [cell * big]
            for (xd, yd) in [(small, small),
                             (small, -small),
                             (-small, small),
                             (-small, -small)]
        ]
        session.add_all(observations)

        cells = [
            Cell(lat=ctr, lon=ctr, cid=cell,
                 new_measures=4, total_measures=1, **keys)
            for cell in range(10)
            for ctr in [cell * big]
        ]

        session.add_all(cells)
        session.commit()
        result = location_update_cell.delay(min_new=0,
                                            max_new=9999,
                                            batch=len(observations))
        self.assertEqual(result.get(), (len(cells), 0))
        scan_areas.delay()
Example #4
0
    def test_unthrottle_cell_observations(self):
        session = self.db_master_session
        block = ObservationBlock()
        block.measure_type = ObservationType.cell
        block.start_id = 120
        block.end_id = 140
        block.s3_key = 'fake_key'
        block.archive_sha = 'fake_sha'
        block.archive_date = None
        session.add(block)

        gsm = RADIO_TYPE['gsm']
        k = dict(radio=gsm, mcc=1, mnc=2, lac=4, lat=1.0, lon=1.0)
        for i in range(100, 150):
            session.add(
                CellObservation(id=i, cid=i, created=self.really_old, **k))
            session.add(Cell(total_measures=11000, cid=i, **k))
        session.commit()

        with patch.object(S3Backend, 'check_archive', lambda x, y, z: True):
            delete_cellmeasure_records.delay(batch=3).get()

        cell_unthrottle_measures.delay(10000, 1000).get()

        cells = session.query(Cell).all()
        self.assertEquals(len(cells), 50)
        for cell in cells:
            if 120 <= cell.cid and cell.cid < 140:
                self.assertEquals(cell.total_measures, 0)
            else:
                self.assertEquals(cell.total_measures, 1)

        self.check_stats(counter=['items.cell_unthrottled'])
    def test_customjson(self):
        now = util.utcnow()
        report_id = uuid.uuid1()
        obs = CellObservation.create(
            radio=Radio.gsm, mcc=GB_MCC, mnc=5, lac=12345, cid=23456,
            report_id=report_id, lat=GB_LAT, lon=GB_LON, created=now)

        json_data = kombu_dumps(obs)
        self.assertTrue('accuracy' not in json_data)

        result = kombu_loads(json_data)
        self.assertTrue(type(result), CellObservation)
        self.assertTrue(result.accuracy is None)
        self.assertEqual(type(result.report_id), uuid.UUID)
        self.assertEqual(result.report_id, report_id)
        self.assertEqual(type(result.radio), Radio)
        self.assertEqual(result.radio, Radio.gsm)
        self.assertEqual(result.mcc, GB_MCC)
        self.assertEqual(result.mnc, 5)
        self.assertEqual(result.lac, 12345)
        self.assertEqual(result.cid, 23456)
        self.assertEqual(result.lat, GB_LAT)
        self.assertEqual(result.lon, GB_LON)
        self.assertEqual(type(result.created), datetime.datetime)
        self.assertEqual(result.created, now)
Example #6
0
    def test_schedule_cell_observations(self):
        session = self.db_master_session

        blocks = schedule_cellmeasure_archival.delay(batch=1).get()
        self.assertEquals(len(blocks), 0)

        observations = []
        for i in range(20):
            observations.append(CellObservation(created=self.really_old))
        session.add_all(observations)
        session.flush()
        start_id = observations[0].id

        blocks = schedule_cellmeasure_archival.delay(batch=15).get()
        self.assertEquals(len(blocks), 1)
        block = blocks[0]
        self.assertEquals(block, (start_id, start_id + 15))

        blocks = schedule_cellmeasure_archival.delay(batch=6).get()
        self.assertEquals(len(blocks), 0)

        blocks = schedule_cellmeasure_archival.delay(batch=5).get()
        self.assertEquals(len(blocks), 1)
        block = blocks[0]
        self.assertEquals(block, (start_id + 15, start_id + 20))

        blocks = schedule_cellmeasure_archival.delay(batch=1).get()
        self.assertEquals(len(blocks), 0)
Example #7
0
    def test_fields(self):
        obs = CellObservation.create(radio=Radio.gsm,
                                     mcc=GB_MCC,
                                     mnc=5,
                                     lac=12345,
                                     cid=23456,
                                     lat=GB_LAT,
                                     lon=GB_LON,
                                     pressure=1010.2,
                                     source='gnss',
                                     timestamp=1405602028568,
                                     asu=26,
                                     signal=-61,
                                     ta=10)

        assert obs.lat == GB_LAT
        assert obs.lon == GB_LON
        assert obs.pressure == 1010.2
        assert obs.source == ReportSource.gnss
        assert obs.timestamp == 1405602028568
        assert obs.radio == Radio.gsm
        assert obs.mcc == GB_MCC
        assert obs.mnc == 5
        assert obs.lac == 12345
        assert obs.cid == 23456
        assert obs.asu == 26
        assert obs.signal == -61
        assert obs.ta == 10
        assert obs.shard_id == 'gsm'
Example #8
0
    def test_location_update_cell(self):
        now = util.utcnow()
        before = now - timedelta(days=1)
        schema = ValidCellKeySchema()
        session = self.db_master_session

        k1 = dict(radio=1, mcc=1, mnc=2, lac=3, cid=4)
        k2 = dict(radio=1, mcc=1, mnc=2, lac=6, cid=8)
        k3 = dict(radio=1,
                  mcc=1,
                  mnc=2,
                  lac=schema.fields['lac'].missing,
                  cid=schema.fields['cid'].missing)
        data = [
            Cell(new_measures=3, total_measures=5, **k1),
            CellObservation(lat=1.0, lon=1.0, created=now, **k1),
            CellObservation(lat=1.002, lon=1.003, created=now, **k1),
            CellObservation(lat=1.004, lon=1.006, created=now, **k1),
            # The lac, cid are invalid and should be skipped
            CellObservation(lat=1.5, lon=1.5, created=now, **k3),
            CellObservation(lat=1.502, lon=1.503, created=now, **k3),
            Cell(lat=2.0, lon=2.0, new_measures=2, total_measures=4, **k2),
            # the lat/lon is bogus and mismatches the line above on purpose
            # to make sure old observations are skipped
            CellObservation(lat=-1.0, lon=-1.0, created=before, **k2),
            CellObservation(lat=-1.0, lon=-1.0, created=before, **k2),
            CellObservation(lat=2.002, lon=2.004, created=now, **k2),
            CellObservation(lat=2.002, lon=2.004, created=now, **k2),
        ]
        session.add_all(data)
        session.commit()

        result = location_update_cell.delay(min_new=1)
        self.assertEqual(result.get(), (2, 0))
        self.check_stats(
            total=2,
            timer=['task.data.location_update_cell'],
            gauge=['task.data.location_update_cell.new_measures_1_100'],
        )

        cells = session.query(Cell).all()
        self.assertEqual(len(cells), 2)
        self.assertEqual([c.new_measures for c in cells], [0, 0])
        for cell in cells:
            if cell.cid == 4:
                self.assertEqual(cell.lat, 1.002)
                self.assertEqual(cell.lon, 1.003)
            elif cell.cid == 8:
                self.assertEqual(cell.lat, 2.001)
                self.assertEqual(cell.lon, 2.002)
Example #9
0
    def process_report(self, data):
        def add_missing_dict_entries(dst, src):
            # x.update(y) overwrites entries in x with those in y;
            # We want to only add those not already present.
            # We also only want to copy the top-level base report data
            # and not any nested values like cell or wifi.
            for (key, value) in src.items():
                if key != 'radio' and key not in dst \
                   and not isinstance(value, (tuple, list, dict)):
                    dst[key] = value

        report_data = Report.validate(data)
        if report_data is None:
            return ([], [])

        cell_observations = {}
        wifi_observations = {}

        if data.get('cell'):
            # flatten report / cell data into a single dict
            for cell in data['cell']:
                # only validate the additional fields
                cell = CellReport.validate(cell)
                if cell is None:
                    continue
                add_missing_dict_entries(cell, report_data)
                cell_key = CellObservation.to_hashkey(cell)
                if cell_key in cell_observations:
                    existing = cell_observations[cell_key]
                    if existing['ta'] > cell['ta'] or \
                       (existing['signal'] != 0 and
                        existing['signal'] < cell['signal']) or \
                       existing['asu'] < cell['asu']:
                        cell_observations[cell_key] = cell
                else:
                    cell_observations[cell_key] = cell
        cell_observations = cell_observations.values()

        # flatten report / wifi data into a single dict
        if data.get('wifi'):
            for wifi in data['wifi']:
                # only validate the additional fields
                wifi = WifiReport.validate(wifi)
                if wifi is None:
                    continue
                add_missing_dict_entries(wifi, report_data)
                wifi_key = WifiObservation.to_hashkey(wifi)
                if wifi_key in wifi_observations:
                    existing = wifi_observations[wifi_key]
                    if existing['signal'] != 0 and \
                       existing['signal'] < wifi['signal']:
                        wifi_observations[wifi_key] = wifi
                else:
                    wifi_observations[wifi_key] = wifi
            wifi_observations = wifi_observations.values()
        return (cell_observations, wifi_observations)
Example #10
0
    def process_report(self, data):
        def add_missing_dict_entries(dst, src):
            # x.update(y) overwrites entries in x with those in y;
            # We want to only add those not already present.
            # We also only want to copy the top-level base report data
            # and not any nested values like cell or wifi.
            for (key, value) in src.items():
                if key != 'radio' and key not in dst \
                   and not isinstance(value, (tuple, list, dict)):
                    dst[key] = value

        report_data = Report.validate(data)
        if report_data is None:
            return ([], [])

        cell_observations = {}
        wifi_observations = {}

        if data.get('cell'):
            # flatten report / cell data into a single dict
            for cell in data['cell']:
                # only validate the additional fields
                cell = CellReport.validate(cell)
                if cell is None:
                    continue
                add_missing_dict_entries(cell, report_data)
                cell_key = CellObservation.to_hashkey(cell)
                if cell_key in cell_observations:
                    existing = cell_observations[cell_key]
                    if existing['ta'] > cell['ta'] or \
                       (existing['signal'] != 0 and
                        existing['signal'] < cell['signal']) or \
                       existing['asu'] < cell['asu']:
                        cell_observations[cell_key] = cell
                else:
                    cell_observations[cell_key] = cell
        cell_observations = cell_observations.values()

        # flatten report / wifi data into a single dict
        if data.get('wifi'):
            for wifi in data['wifi']:
                # only validate the additional fields
                wifi = WifiReport.validate(wifi)
                if wifi is None:
                    continue
                add_missing_dict_entries(wifi, report_data)
                wifi_key = WifiObservation.to_hashkey(wifi)
                if wifi_key in wifi_observations:
                    existing = wifi_observations[wifi_key]
                    if existing['signal'] != 0 and \
                       existing['signal'] < wifi['signal']:
                        wifi_observations[wifi_key] = wifi
                else:
                    wifi_observations[wifi_key] = wifi
            wifi_observations = wifi_observations.values()
        return (cell_observations, wifi_observations)
Example #11
0
    def test_max_min_range_update(self):
        session = self.session

        k1 = dict(radio=Radio.cdma, mcc=1, mnc=2, lac=3, cid=4)
        data = [
            Cell(lat=1.001,
                 lon=-1.001,
                 max_lat=1.002,
                 min_lat=1.0,
                 max_lon=-1.0,
                 min_lon=-1.002,
                 new_measures=2,
                 total_measures=4,
                 **k1),
            CellObservation(lat=1.001, lon=-1.003, **k1),
            CellObservation(lat=1.005, lon=-1.007, **k1),
        ]
        session.add_all(data)
        session.commit()

        result = location_update_cell.delay(min_new=1)
        self.assertEqual(result.get(), (1, 0))

        cells = session.query(Cell).all()
        self.assertEqual(len(cells), 1)
        cell = cells[0]
        self.assertEqual(cell.lat, 1.002)
        self.assertEqual(cell.max_lat, 1.005)
        self.assertEqual(cell.min_lat, 1.0)
        self.assertEqual(cell.lon, -1.003)
        self.assertEqual(cell.max_lon, -1.0)
        self.assertEqual(cell.min_lon, -1.007)

        # independent calculation: the cell bounding box is
        # (1.000, -1.007) to (1.005, -1.000), with centroid
        # at (1.002, -1.003); worst distance from centroid
        # to any corner is 556m
        self.assertEqual(cell.range, 556)
Example #12
0
 def test_json(self):
     obs = CellObservationFactory.build(accuracy=None)
     result = CellObservation.from_json(simplejson.loads(
         simplejson.dumps(obs.to_json())))
     self.assertTrue(type(result), CellObservation)
     self.assertTrue(result.accuracy is None)
     self.assertEqual(type(result.radio), Radio)
     self.assertEqual(result.radio, obs.radio)
     self.assertEqual(result.mcc, obs.mcc)
     self.assertEqual(result.mnc, obs.mnc)
     self.assertEqual(result.lac, obs.lac)
     self.assertEqual(result.cid, obs.cid)
     self.assertEqual(result.lat, obs.lat)
     self.assertEqual(result.lon, obs.lon)
Example #13
0
    def test_json(self):
        obs = CellObservationFactory.build(accuracy=None, source="fixed")
        result = CellObservation.from_json(json.loads(json.dumps(obs.to_json())))

        assert type(result) is CellObservation
        assert result.accuracy is None
        assert type(result.radio), Radio
        assert result.radio == obs.radio
        assert result.mcc == obs.mcc
        assert result.mnc == obs.mnc
        assert result.lac == obs.lac
        assert result.cid == obs.cid
        assert result.lat == obs.lat
        assert result.lon == obs.lon
        assert result.source is ReportSource.fixed
        assert type(result.source) is ReportSource
Example #14
0
    def test_fields(self):
        obs = CellObservation.create(
            radio=Radio.gsm, mcc=GB_MCC, mnc=5, lac=12345, cid=23456,
            lat=GB_LAT, lon=GB_LON,
            asu=26, signal=-61, ta=10)

        self.assertEqual(obs.lat, GB_LAT)
        self.assertEqual(obs.lon, GB_LON)
        self.assertEqual(obs.radio, Radio.gsm)
        self.assertEqual(obs.mcc, GB_MCC)
        self.assertEqual(obs.mnc, 5)
        self.assertEqual(obs.lac, 12345)
        self.assertEqual(obs.cid, 23456)
        self.assertEqual(obs.asu, 26)
        self.assertEqual(obs.signal, -61)
        self.assertEqual(obs.ta, 10)
Example #15
0
    def test_internaljson(self):
        obs = CellObservation.create(
            radio=Radio.gsm, mcc=GB_MCC, mnc=5, lac=12345, cid=23456,
            lat=GB_LAT, lon=GB_LON)

        result = internal_loads(internal_dumps(obs))
        self.assertTrue(type(result), CellObservation)
        self.assertTrue(result.accuracy is None)
        self.assertEqual(type(result.radio), Radio)
        self.assertEqual(result.radio, Radio.gsm)
        self.assertEqual(result.mcc, GB_MCC)
        self.assertEqual(result.mnc, 5)
        self.assertEqual(result.lac, 12345)
        self.assertEqual(result.cid, 23456)
        self.assertEqual(result.lat, GB_LAT)
        self.assertEqual(result.lon, GB_LON)
Example #16
0
    def test_backup_cell_to_s3(self):
        session = self.db_master_session
        batch_size = 10
        observations = []
        for i in range(batch_size):
            observations.append(CellObservation(created=self.really_old))
        session.add_all(observations)
        session.flush()
        start_id = observations[0].id

        blocks = schedule_cellmeasure_archival.delay(batch=batch_size).get()
        self.assertEquals(len(blocks), 1)
        block = blocks[0]
        self.assertEquals(block, (start_id, start_id + batch_size))

        with mock_s3():
            with patch.object(S3Backend,
                              'backup_archive', lambda x, y, z: True):
                write_cellmeasure_s3_backups.delay(cleanup_zip=False).get()

                msgs = self.heka_client.stream.msgs
                info_msgs = [m for m in msgs if m.type == 'oldstyle']
                self.assertEquals(1, len(info_msgs))
                info = info_msgs[0]
                fname = info.payload.split(":")[-1]

                myzip = ZipFile(fname)
                try:
                    contents = set(myzip.namelist())
                    expected_contents = set(['alembic_revision.txt',
                                             'cell_measure.csv'])
                    self.assertEquals(expected_contents, contents)
                finally:
                    myzip.close()

        blocks = session.query(ObservationBlock).all()

        self.assertEquals(len(blocks), 1)
        block = blocks[0]

        actual_sha = hashlib.sha1()
        actual_sha.update(open(fname, 'rb').read())
        self.assertEquals(block.archive_sha, actual_sha.digest())
        self.assertTrue(block.s3_key is not None)
        self.assertTrue('/cell_' in block.s3_key)
        self.assertTrue(block.archive_date is None)
Example #17
0
    def test_json(self):
        obs = CellObservationFactory.build(
            accuracy=None, source='fixed')
        result = CellObservation.from_json(simplejson.loads(
            simplejson.dumps(obs.to_json())))

        assert type(result) is CellObservation
        assert result.accuracy is None
        assert type(result.radio), Radio
        assert result.radio == obs.radio
        assert result.mcc == obs.mcc
        assert result.mnc == obs.mnc
        assert result.lac == obs.lac
        assert result.cid == obs.cid
        assert result.lat == obs.lat
        assert result.lon == obs.lon
        assert result.source is ReportSource.fixed
        assert type(result.source) is ReportSource
Example #18
0
    def test_monitor_measures(self):
        session = self.db_master_session

        result = monitor_measures.delay().get()
        self.check_stats(
            gauge=[('table.cell_measure', 1), ('table.wifi_measure', 1)],
        )
        self.assertEqual(result, {'cell_measure': -1, 'wifi_measure': -1})

        # add some observations
        session.add_all([CellObservation() for i in range(3)])
        session.add_all([WifiObservation() for i in range(5)])
        session.flush()

        result = monitor_measures.delay().get()
        self.check_stats(
            gauge=[('table.cell_measure', 2), ('table.wifi_measure', 2)],
        )
        self.assertEqual(result, {'cell_measure': 3, 'wifi_measure': 5})
Example #19
0
    def test_scan_lacs_asymmetric(self):
        session = self.db_master_session
        big = 0.1
        small = big / 10
        keys = dict(radio=1, mcc=1, mnc=1, lac=1)
        observations = [
            CellObservation(lat=ctr + xd, lon=ctr + yd, cid=cell, **keys)
            for cell in range(6) for ctr in [(2**cell) * big]
            for (xd, yd) in [(small, small), (small,
                                              -small), (-small,
                                                        small), (-small,
                                                                 -small)]
        ]
        session.add_all(observations)

        cells = [
            Cell(lat=ctr,
                 lon=ctr,
                 cid=cell,
                 new_measures=4,
                 total_measures=1,
                 **keys) for cell in range(6) for ctr in [(2**cell) * big]
        ]

        session.add_all(cells)
        session.commit()
        result = location_update_cell.delay(min_new=0,
                                            max_new=9999,
                                            batch=len(observations))
        self.assertEqual(result.get(), (len(cells), 0))
        scan_lacs.delay()
        lac = session.query(CellArea).filter(CellArea.lac == 1).first()

        # We produced a sequence of 0.02-degree-on-a-side
        # cell bounding boxes centered at
        # [0, 0.2, 0.4, 0.8, 1.6, 3.2] degrees.
        # So the lower-left corner is at (-0.01, -0.01)
        # and the upper-right corner is at (3.21, 3.21)
        # we should therefore see a LAC centroid at (1.05, 1.05)
        # with a range of 339.540m
        self.assertEqual(lac.lat, 1.05)
        self.assertEqual(lac.lon, 1.05)
        self.assertEqual(lac.range, 339540)
Example #20
0
    def test_scan_areas_race_with_location_update(self):
        session = self.session

        # First batch of cell observations for CID 1
        keys = dict(radio=Radio.cdma, mcc=1, mnc=1, lac=1, cid=1)
        cell = Cell(new_measures=4, total_measures=1, **keys)
        observations = [
            CellObservation(lat=1.0, lon=1.0, **keys),
            CellObservation(lat=1.0, lon=1.0, **keys),
            CellObservation(lat=1.0, lon=1.0, **keys),
            CellObservation(lat=1.0, lon=1.0, **keys),
        ]
        session.add(cell)
        session.add_all(observations)
        session.commit()

        # Periodic location_update_cell runs and updates CID 1
        # to have a location, inserts LAC 1 with new_measures=1
        # which will be picked up by the next scan_lac.
        result = location_update_cell.delay(min_new=1)
        self.assertEqual(result.get(), (1, 0))

        # Second batch of cell observations for CID 2
        keys['cid'] = 2
        cell = Cell(new_measures=4, total_measures=1, **keys)
        observations = [
            CellObservation(lat=1.0, lon=1.0, **keys),
            CellObservation(lat=1.0, lon=1.0, **keys),
            CellObservation(lat=1.0, lon=1.0, **keys),
            CellObservation(lat=1.0, lon=1.0, **keys),
        ]
        session.add(cell)
        session.add_all(observations)
        session.commit()

        # Periodic LAC scan runs, picking up LAC 1; this could
        # accidentally pick up CID 2, but it should not since it
        # has not had its location updated yet. If there's no
        # exception here, CID 2 is properly ignored.
        scan_areas.delay()
Example #21
0
def location_update_cell(self, min_new=10, max_new=100, batch=10):
    cells = []
    redis_client = self.app.redis_client
    with self.db_session() as session:
        emit_new_observation_metric(self.stats_client, session, self.shortname,
                                    Cell, min_new, max_new)
        query = (session.query(Cell).filter(
            Cell.new_measures >= min_new).filter(
                Cell.new_measures < max_new).limit(batch))
        cells = query.all()
        if not cells:
            return 0
        moving_cells = set()
        updated_lacs = set()
        for cell in cells:
            query = session.query(
                CellObservation.lat, CellObservation.lon,
                CellObservation.id).filter(*CellObservation.joinkey(cell))
            # only take the last X new_measures
            query = query.order_by(CellObservation.created.desc()).limit(
                cell.new_measures)
            observations = query.all()

            if observations:
                moving = calculate_new_position(cell, observations,
                                                CELL_MAX_DIST_KM)
                if moving:
                    moving_cells.add(cell)

                updated_lacs.add(CellArea.to_hashkey(cell))

        if updated_lacs:
            session.on_post_commit(enqueue_lacs, redis_client, updated_lacs,
                                   UPDATE_KEY['cell_lac'])

        if moving_cells:
            # some cells found to be moving too much
            blacklist_and_remove_moving_cells(session, moving_cells)

        session.commit()

    return (len(cells), len(moving_cells))
    def test_fields(self):
        report_id = uuid.uuid1()
        self.session.add(CellObservation.create(
            radio=Radio.gsm, mcc=GB_MCC, mnc=5, lac=12345, cid=23456,
            report_id=report_id, lat=GB_LAT, lon=GB_LON,
            asu=26, signal=-61, ta=10))
        self.session.flush()

        result = self.session.query(CellObservation).first()
        self.assertEqual(result.report_id, report_id)
        self.assertEqual(result.lat, GB_LAT)
        self.assertEqual(result.lon, GB_LON)
        self.assertEqual(result.radio, Radio.gsm)
        self.assertEqual(result.mcc, GB_MCC)
        self.assertEqual(result.mnc, 5)
        self.assertEqual(result.lac, 12345)
        self.assertEqual(result.cid, 23456)
        self.assertEqual(result.asu, 26)
        self.assertEqual(result.signal, -61)
        self.assertEqual(result.ta, 10)
Example #23
0
    def test_cell_histogram(self):
        session = self.db_master_session
        today = util.utcnow().date()
        yesterday = (today - timedelta(1))
        two_days = (today - timedelta(2))
        long_ago = (today - timedelta(3))
        observations = [
            CellObservation(lat=1.0, lon=2.0, created=today),
            CellObservation(lat=1.0, lon=2.0, created=today),
            CellObservation(lat=1.0, lon=2.0, created=yesterday),
            CellObservation(lat=1.0, lon=2.0, created=two_days),
            CellObservation(lat=1.0, lon=2.0, created=two_days),
            CellObservation(lat=1.0, lon=2.0, created=two_days),
            CellObservation(lat=1.0, lon=2.0, created=long_ago),
        ]
        session.add_all(observations)
        session.commit()

        cell_histogram.delay(ago=3).get()

        stats = session.query(Stat).order_by(Stat.time).all()
        self.assertEqual(len(stats), 1)
        self.assertEqual(stats[0].key, StatKey.cell)
        self.assertEqual(stats[0].time, long_ago)
        self.assertEqual(stats[0].value, 1)

        # fill up newer dates
        cell_histogram.delay(ago=2).get()
        cell_histogram.delay(ago=1).get()
        cell_histogram.delay(ago=0).get()

        # test duplicate execution
        cell_histogram.delay(ago=1).get()

        stats = session.query(Stat).order_by(Stat.time).all()
        self.assertEqual(len(stats), 4)
        self.assertEqual(stats[0].time, long_ago)
        self.assertEqual(stats[0].value, 1)
        self.assertEqual(stats[1].time, two_days)
        self.assertEqual(stats[1].value, 4)
        self.assertEqual(stats[2].time, yesterday)
        self.assertEqual(stats[2].value, 5)
        self.assertEqual(stats[3].time, today)
        self.assertEqual(stats[3].value, 7)
Example #24
0
    def test_internaljson(self):
        obs = CellObservation.create(radio=Radio.gsm,
                                     mcc=GB_MCC,
                                     mnc=5,
                                     lac=12345,
                                     cid=23456,
                                     lat=GB_LAT,
                                     lon=GB_LON)

        result = internal_loads(internal_dumps(obs))
        self.assertTrue(type(result), CellObservation)
        self.assertTrue(result.accuracy is None)
        self.assertEqual(type(result.radio), Radio)
        self.assertEqual(result.radio, Radio.gsm)
        self.assertEqual(result.mcc, GB_MCC)
        self.assertEqual(result.mnc, 5)
        self.assertEqual(result.lac, 12345)
        self.assertEqual(result.cid, 23456)
        self.assertEqual(result.lat, GB_LAT)
        self.assertEqual(result.lon, GB_LON)
Example #25
0
    def test_delete_cell_observations(self):
        session = self.db_master_session
        block = ObservationBlock()
        block.measure_type = ObservationType.cell
        block.start_id = 120
        block.end_id = 140
        block.s3_key = 'fake_key'
        block.archive_sha = 'fake_sha'
        block.archive_date = None
        session.add(block)

        for i in range(100, 150):
            session.add(CellObservation(id=i, created=self.really_old))
        session.commit()

        with patch.object(S3Backend, 'check_archive', lambda x, y, z: True):
            delete_cellmeasure_records.delay(batch=3).get()

        self.assertEquals(session.query(CellObservation).count(), 30)
        self.assertTrue(block.archive_date is not None)
Example #26
0
    def test_fields(self):
        report_id = uuid.uuid1()
        session = self.session
        session.add(CellObservation.create(
            radio=Radio.gsm, mcc=GB_MCC, mnc=5, lac=12345, cid=23456,
            report_id=report_id, lat=GB_LAT, lon=GB_LON,
            asu=26, signal=-61, ta=10))
        session.flush()

        result = session.query(CellObservation).first()
        self.assertEqual(result.report_id, report_id)
        self.assertEqual(result.lat, GB_LAT)
        self.assertEqual(result.lon, GB_LON)
        self.assertEqual(result.radio, Radio.gsm)
        self.assertEqual(result.mcc, GB_MCC)
        self.assertEqual(result.mnc, 5)
        self.assertEqual(result.lac, 12345)
        self.assertEqual(result.cid, 23456)
        self.assertEqual(result.asu, 26)
        self.assertEqual(result.signal, -61)
        self.assertEqual(result.ta, 10)
Example #27
0
    def test_fields(self):
        obs = CellObservation.create(
            radio=Radio.gsm, mcc=GB_MCC, mnc=5, lac=12345, cid=23456,
            lat=GB_LAT, lon=GB_LON,
            pressure=1010.2, source='gnss', timestamp=1405602028568,
            asu=26, signal=-61, ta=10)

        assert obs.lat == GB_LAT
        assert obs.lon == GB_LON
        assert obs.pressure == 1010.2
        assert obs.source == ReportSource.gnss
        assert obs.timestamp == 1405602028568
        assert obs.radio == Radio.gsm
        assert obs.mcc == GB_MCC
        assert obs.mnc == 5
        assert obs.lac == 12345
        assert obs.cid == 23456
        assert obs.asu == 26
        assert obs.signal == -61
        assert obs.ta == 10
        assert obs.shard_id == 'gsm'
Example #28
0
    def test_fields(self):
        obs = CellObservation.create(radio=Radio.gsm,
                                     mcc=GB_MCC,
                                     mnc=5,
                                     lac=12345,
                                     cid=23456,
                                     lat=GB_LAT,
                                     lon=GB_LON,
                                     asu=26,
                                     signal=-61,
                                     ta=10)

        self.assertEqual(obs.lat, GB_LAT)
        self.assertEqual(obs.lon, GB_LON)
        self.assertEqual(obs.radio, Radio.gsm)
        self.assertEqual(obs.mcc, GB_MCC)
        self.assertEqual(obs.mnc, 5)
        self.assertEqual(obs.lac, 12345)
        self.assertEqual(obs.cid, 23456)
        self.assertEqual(obs.asu, 26)
        self.assertEqual(obs.signal, -61)
        self.assertEqual(obs.ta, 10)

        self.assertEqual(obs.shard_id, 'gsm')
Example #29
0
    def process_reports(self, reports, userid=None):
        positions = []
        cell_observations = []
        wifi_observations = []
        for i, report in enumerate(reports):
            report['report_id'] = uuid.uuid1()
            cell, wifi = self.process_report(report)
            cell_observations.extend(cell)
            wifi_observations.extend(wifi)
            if cell or wifi:
                positions.append({
                    'lat': report['lat'],
                    'lon': report['lon'],
                })

        if cell_observations:
            # group by and create task per cell key
            self.stats_client.incr('items.uploaded.cell_observations',
                                   len(cell_observations))
            if self.api_key_log:
                self.stats_client.incr(
                    'items.api_log.%s.uploaded.'
                    'cell_observations' % self.api_key_name,
                    len(cell_observations))

            cells = defaultdict(list)
            for obs in cell_observations:
                cells[CellObservation.to_hashkey(obs)].append(obs)

            # Create a task per group of 5 cell keys at a time.
            # Grouping them helps in avoiding per-task overhead.
            cells = list(cells.values())
            batch_size = 5
            countdown = 0
            for i in range(0, len(cells), batch_size):
                values = []
                for observations in cells[i:i + batch_size]:
                    values.extend([encode_radio_dict(o) for o in observations])
                # insert observations, expire the task if it wasn't processed
                # after six hours to avoid queue overload, also delay
                # each task by one second more, to get a more even workload
                # and avoid parallel updates of the same underlying stations
                self.insert_cell_task.apply_async(
                    args=[values],
                    kwargs={'userid': userid},
                    expires=21600,
                    countdown=countdown)
                countdown += 1

        if wifi_observations:
            # group by WiFi key
            self.stats_client.incr('items.uploaded.wifi_observations',
                                   len(wifi_observations))
            if self.api_key_log:
                self.stats_client.incr(
                    'items.api_log.%s.uploaded.'
                    'wifi_observations' % self.api_key_name,
                    len(wifi_observations))

            wifis = defaultdict(list)
            for obs in wifi_observations:
                wifis[WifiObservation.to_hashkey(obs)].append(obs)

            # Create a task per group of 20 WiFi keys at a time.
            # We tend to get a huge number of unique WiFi networks per
            # batch upload, with one to very few observations per WiFi.
            # Grouping them helps in avoiding per-task overhead.
            wifis = list(wifis.values())
            batch_size = 20
            countdown = 0
            for i in range(0, len(wifis), batch_size):
                values = []
                for observations in wifis[i:i + batch_size]:
                    values.extend(observations)
                # insert observations, expire the task if it wasn't processed
                # after six hours to avoid queue overload, also delay
                # each task by one second more, to get a more even workload
                # and avoid parallel updates of the same underlying stations
                self.insert_wifi_task.apply_async(
                    args=[values],
                    kwargs={'userid': userid},
                    expires=21600,
                    countdown=countdown)
                countdown += 1

        if userid is not None:
            scorekey = Score.to_hashkey(
                userid=userid,
                key=ScoreKey.location,
                time=util.utcnow().date())
            Score.incr(self.session, scorekey, len(positions))
        if positions:
            self.process_mapstat(positions)
Example #30
0
    def test_blacklist_moving_cells(self):
        now = util.utcnow()
        long_ago = now - timedelta(days=40)
        session = self.session

        k1 = dict(radio=Radio.cdma, mcc=1, mnc=2, lac=3, cid=4)
        k2 = dict(radio=Radio.cdma, mcc=1, mnc=2, lac=6, cid=8)
        k3 = dict(radio=Radio.cdma, mcc=1, mnc=2, lac=9, cid=12)
        k4 = dict(radio=Radio.cdma, mcc=1, mnc=2, lac=12, cid=16)
        k5 = dict(radio=Radio.cdma, mcc=1, mnc=2, lac=15, cid=20)
        k6 = dict(radio=Radio.cdma, mcc=1, mnc=2, lac=18, cid=24)

        # keys k2, k3 and k4 are expected to be detected as moving
        data = [
            # a cell with an entry but no prior position
            Cell(new_measures=3, total_measures=0, **k1),
            CellObservation(lat=1.001, lon=1.001, **k1),
            CellObservation(lat=1.002, lon=1.005, **k1),
            CellObservation(lat=1.003, lon=1.009, **k1),
            # a cell with a prior known position
            Cell(lat=2.0, lon=2.0,
                 new_measures=2, total_measures=1, **k2),
            CellObservation(lat=2.0, lon=2.0, **k2),
            CellObservation(lat=4.0, lon=2.0, **k2),
            # a cell with a very different prior position
            Cell(lat=1.0, lon=1.0,
                 new_measures=2, total_measures=1, **k3),
            CellObservation(lat=3.0, lon=3.0, **k3),
            CellObservation(lat=-3.0, lon=3.0, **k3),
            # another cell with a prior known position (and negative lat)
            Cell(lat=-4.0, lon=4.0,
                 new_measures=2, total_measures=1, **k4),
            CellObservation(lat=-4.0, lon=4.0, **k4),
            CellObservation(lat=-6.0, lon=4.0, **k4),
            # an already blacklisted cell
            CellBlacklist(time=now, count=1, **k5),
            CellObservation(lat=5.0, lon=5.0, **k5),
            CellObservation(lat=8.0, lon=5.0, **k5),
            # a cell with an old different record we ignore, position
            # estimate has been updated since
            Cell(lat=6.0, lon=6.0,
                 new_measures=2, total_measures=1, **k6),
            CellObservation(lat=6.9, lon=6.9, time=long_ago, **k6),
            CellObservation(lat=6.0, lon=6.0, **k6),
            CellObservation(lat=6.001, lon=6, **k6),
        ]
        session.add_all(data)
        session.commit()

        result = location_update_cell.delay(min_new=1)
        self.assertEqual(result.get(), (5, 3))

        moving = [k2, k3, k4, k5]
        black = session.query(CellBlacklist).all()
        self.assertEqual(set([b.hashkey() for b in black]),
                         set([CellBlacklist.to_hashkey(k) for k in moving]))

        keys = [k1, k2, k3, k4, k5, k6]
        observations = session.query(CellObservation).all()
        self.assertEqual(len(observations), 14)
        self.assertEqual(set([obs.hashkey() for obs in observations]),
                         set([CellObservation.to_hashkey(k) for k in keys]))

        # test duplicate call
        result = location_update_cell.delay(min_new=1)
        self.assertEqual(result.get(), (0, 0))

        self.check_stats(
            total=6,
            timer=[
                # We made duplicate calls
                ('task.data.location_update_cell', 2),
                # One of those would've scheduled a remove_cell task
                ('task.data.remove_cell', 1)
            ],
            gauge=[
                ('task.data.location_update_cell.new_measures_1_100', 2),
            ])
Example #31
0
    def test_blacklist_moving_cells(self):
        now = util.utcnow()
        long_ago = now - timedelta(days=40)
        session = self.session

        k1 = dict(radio=Radio.cdma, mcc=1, mnc=2, lac=3, cid=4)
        k2 = dict(radio=Radio.cdma, mcc=1, mnc=2, lac=6, cid=8)
        k3 = dict(radio=Radio.cdma, mcc=1, mnc=2, lac=9, cid=12)
        k4 = dict(radio=Radio.cdma, mcc=1, mnc=2, lac=12, cid=16)
        k5 = dict(radio=Radio.cdma, mcc=1, mnc=2, lac=15, cid=20)
        k6 = dict(radio=Radio.cdma, mcc=1, mnc=2, lac=18, cid=24)

        # keys k2, k3 and k4 are expected to be detected as moving
        data = [
            # a cell with an entry but no prior position
            Cell(new_measures=3, total_measures=0, **k1),
            CellObservation(lat=1.001, lon=1.001, **k1),
            CellObservation(lat=1.002, lon=1.005, **k1),
            CellObservation(lat=1.003, lon=1.009, **k1),
            # a cell with a prior known position
            Cell(lat=2.0, lon=2.0, new_measures=2, total_measures=1, **k2),
            CellObservation(lat=2.0, lon=2.0, **k2),
            CellObservation(lat=4.0, lon=2.0, **k2),
            # a cell with a very different prior position
            Cell(lat=1.0, lon=1.0, new_measures=2, total_measures=1, **k3),
            CellObservation(lat=3.0, lon=3.0, **k3),
            CellObservation(lat=-3.0, lon=3.0, **k3),
            # another cell with a prior known position (and negative lat)
            Cell(lat=-4.0, lon=4.0, new_measures=2, total_measures=1, **k4),
            CellObservation(lat=-4.0, lon=4.0, **k4),
            CellObservation(lat=-6.0, lon=4.0, **k4),
            # an already blacklisted cell
            CellBlacklist(time=now, count=1, **k5),
            CellObservation(lat=5.0, lon=5.0, **k5),
            CellObservation(lat=8.0, lon=5.0, **k5),
            # a cell with an old different record we ignore, position
            # estimate has been updated since
            Cell(lat=6.0, lon=6.0, new_measures=2, total_measures=1, **k6),
            CellObservation(lat=6.9, lon=6.9, time=long_ago, **k6),
            CellObservation(lat=6.0, lon=6.0, **k6),
            CellObservation(lat=6.001, lon=6, **k6),
        ]
        session.add_all(data)
        session.commit()

        result = location_update_cell.delay(min_new=1)
        self.assertEqual(result.get(), (5, 3))

        moving = [k2, k3, k4, k5]
        black = session.query(CellBlacklist).all()
        self.assertEqual(set([b.hashkey() for b in black]),
                         set([CellBlacklist.to_hashkey(k) for k in moving]))

        keys = [k1, k2, k3, k4, k5, k6]
        observations = session.query(CellObservation).all()
        self.assertEqual(len(observations), 14)
        self.assertEqual(set([obs.hashkey() for obs in observations]),
                         set([CellObservation.to_hashkey(k) for k in keys]))

        # test duplicate call
        result = location_update_cell.delay(min_new=1)
        self.assertEqual(result.get(), (0, 0))

        self.check_stats(
            total=6,
            timer=[
                # We made duplicate calls
                ('task.data.location_update_cell', 2),
                # One of those would've scheduled a remove_cell task
                ('task.data.remove_cell', 1)
            ],
            gauge=[
                ('task.data.location_update_cell.new_measures_1_100', 2),
            ])
Example #32
0
 def test_mcc_latlon(self):
     sample = dict(radio=Radio.gsm, mnc=6, lac=1, cid=2,
                   lat=GB_LAT, lon=GB_LON)
     assert CellObservation.create(mcc=GB_MCC, **sample) is not None
     assert CellObservation.create(mcc=262, **sample) is None
Example #33
0
def process_observations(observations,
                         session,
                         userid=None,
                         api_key_log=False,
                         api_key_name=None):
    stats_client = get_stats_client()
    positions = []
    cell_observations = []
    wifi_observations = []
    for i, obs in enumerate(observations):
        obs['report_id'] = uuid.uuid1()
        cell, wifi = process_observation(obs, session)
        cell_observations.extend(cell)
        wifi_observations.extend(wifi)
        if cell or wifi:
            positions.append({
                'lat': obs['lat'],
                'lon': obs['lon'],
            })

    if cell_observations:
        # group by and create task per cell key
        stats_client.incr('items.uploaded.cell_observations',
                          len(cell_observations))
        if api_key_log:
            stats_client.incr(
                'items.api_log.%s.uploaded.cell_observations' % api_key_name,
                len(cell_observations))

        cells = defaultdict(list)
        for obs in cell_observations:
            cells[CellObservation.to_hashkey(obs)].append(obs)

        # Create a task per group of 5 cell keys at a time.
        # Grouping them helps in avoiding per-task overhead.
        cells = list(cells.values())
        batch_size = 5
        countdown = 0
        for i in range(0, len(cells), batch_size):
            values = []
            for observations in cells[i:i + batch_size]:
                values.extend(observations)
            # insert observations, expire the task if it wasn't processed
            # after six hours to avoid queue overload, also delay
            # each task by one second more, to get a more even workload
            # and avoid parallel updates of the same underlying stations
            insert_measures_cell.apply_async(args=[values],
                                             kwargs={'userid': userid},
                                             expires=21600,
                                             countdown=countdown)
            countdown += 1

    if wifi_observations:
        # group by WiFi key
        stats_client.incr('items.uploaded.wifi_observations',
                          len(wifi_observations))
        if api_key_log:
            stats_client.incr(
                'items.api_log.%s.uploaded.wifi_observations' % api_key_name,
                len(wifi_observations))

        wifis = defaultdict(list)
        for obs in wifi_observations:
            wifis[WifiObservation.to_hashkey(obs)].append(obs)

        # Create a task per group of 20 WiFi keys at a time.
        # We tend to get a huge number of unique WiFi networks per
        # batch upload, with one to very few observations per WiFi.
        # Grouping them helps in avoiding per-task overhead.
        wifis = list(wifis.values())
        batch_size = 20
        countdown = 0
        for i in range(0, len(wifis), batch_size):
            values = []
            for observations in wifis[i:i + batch_size]:
                values.extend(observations)
            # insert observations, expire the task if it wasn't processed
            # after six hours to avoid queue overload, also delay
            # each task by one second more, to get a more even workload
            # and avoid parallel updates of the same underlying stations
            insert_measures_wifi.apply_async(args=[values],
                                             kwargs={'userid': userid},
                                             expires=21600,
                                             countdown=countdown)
            countdown += 1

    if userid is not None:
        process_score(userid, len(positions), session)
    if positions:
        process_mapstat(session, positions)
Example #34
0
    def process_report(self, data):
        def add_missing_dict_entries(dst, src):
            # x.update(y) overwrites entries in x with those in y;
            # We want to only add those not already present.
            # We also only want to copy the top-level base report data
            # and not any nested values like cell or wifi.
            for (key, value) in src.items():
                if key != 'radio' and key not in dst \
                   and not isinstance(value, (tuple, list, dict)):
                    dst[key] = value

        def better_cell_obs(new, old):
            comparators = [
                ('ta', operator.lt),
                ('signal', operator.gt),
                ('asu', operator.gt),
            ]
            for field, better in comparators:
                if (None not in (old[field], new[field]) and
                        better(new[field], old[field])):
                    return True
            return False

        def better_wifi_obs(new, old):
            if (None not in (old['signal'], new['signal']) and
                    new['signal'] > old['signal']):
                return True
            return False

        report_data = Report.validate(data)
        if report_data is None:
            return ([], [])

        cell_observations = {}
        wifi_observations = {}

        if data.get('cell'):
            # flatten report / cell data into a single dict
            for cell in data['cell']:
                # only validate the additional fields
                cell = CellReport.validate(cell)
                if cell is None:
                    continue
                add_missing_dict_entries(cell, report_data)
                cell_key = CellObservation.to_hashkey(cell)
                if cell_key in cell_observations:
                    existing = cell_observations[cell_key]
                    if better_cell_obs(cell, existing):
                        cell_observations[cell_key] = cell
                else:
                    cell_observations[cell_key] = cell
        cell_observations = cell_observations.values()

        # flatten report / wifi data into a single dict
        if data.get('wifi'):
            for wifi in data['wifi']:
                # only validate the additional fields
                wifi = WifiReport.validate(wifi)
                if wifi is None:
                    continue
                add_missing_dict_entries(wifi, report_data)
                wifi_key = WifiObservation.to_hashkey(wifi)
                if wifi_key in wifi_observations:
                    existing = wifi_observations[wifi_key]
                    if better_wifi_obs(wifi, existing):
                        wifi_observations[wifi_key] = wifi
                else:
                    wifi_observations[wifi_key] = wifi
            wifi_observations = wifi_observations.values()
        return (cell_observations, wifi_observations)
Example #35
0
    def process_reports(self, reports, userid=None):
        positions = set()
        cell_observations = []
        wifi_observations = []
        for i, report in enumerate(reports):
            report['report_id'] = uuid.uuid1()
            cell, wifi = self.process_report(report)
            cell_observations.extend(cell)
            wifi_observations.extend(wifi)
            if (cell or wifi) and report.get('lat') and report.get('lon'):
                positions.add((report['lat'], report['lon']))

        if cell_observations:
            # group by and create task per cell key
            self.stats_client.incr('items.uploaded.cell_observations',
                                   len(cell_observations))
            if self.api_key and self.api_key.log:
                self.stats_client.incr(
                    'items.api_log.%s.uploaded.'
                    'cell_observations' % self.api_key.name,
                    len(cell_observations))

            cells = defaultdict(list)
            for obs in cell_observations:
                cells[CellObservation.to_hashkey(obs)].append(obs)

            # Create a task per group of 100 cell keys at a time.
            # Grouping them helps in avoiding per-task overhead.
            cells = list(cells.values())
            batch_size = 100
            countdown = 0
            for i in range(0, len(cells), batch_size):
                values = []
                for observations in cells[i:i + batch_size]:
                    values.extend([encode_radio_dict(o) for o in observations])
                # insert observations, expire the task if it wasn't processed
                # after six hours to avoid queue overload, also delay
                # each task by one second more, to get a more even workload
                # and avoid parallel updates of the same underlying stations
                self.insert_cell_task.apply_async(
                    args=[values],
                    kwargs={'userid': userid},
                    expires=21600,
                    countdown=countdown)
                countdown += 1

        if wifi_observations:
            # group by WiFi key
            self.stats_client.incr('items.uploaded.wifi_observations',
                                   len(wifi_observations))
            if self.api_key and self.api_key.log:
                self.stats_client.incr(
                    'items.api_log.%s.uploaded.'
                    'wifi_observations' % self.api_key.name,
                    len(wifi_observations))

            wifis = defaultdict(list)
            for obs in wifi_observations:
                wifis[WifiObservation.to_hashkey(obs)].append(obs)

            # Create a task per group of 100 WiFi keys at a time.
            # We tend to get a huge number of unique WiFi networks per
            # batch upload, with one to very few observations per WiFi.
            # Grouping them helps in avoiding per-task overhead.
            wifis = list(wifis.values())
            batch_size = 100
            countdown = 0
            for i in range(0, len(wifis), batch_size):
                values = []
                for observations in wifis[i:i + batch_size]:
                    values.extend(observations)
                # insert observations, expire the task if it wasn't processed
                # after six hours to avoid queue overload, also delay
                # each task by one second more, to get a more even workload
                # and avoid parallel updates of the same underlying stations
                self.insert_wifi_task.apply_async(
                    args=[values],
                    kwargs={'userid': userid},
                    expires=21600,
                    countdown=countdown)
                countdown += 1

        self.process_mapstat(positions)
        self.process_score(userid, positions)