Esempio n. 1
0
    def test_remove_wifi(self):
        session = self.session
        observations = []
        wifi_keys = [{'key': "a%s1234567890" % i} for i in range(5)]
        m1 = 1.0
        m2 = 2.0
        for key in wifi_keys:
            key = key['key']
            observations.append(Wifi(key=key))
            observations.append(WifiObservation(lat=m1, lon=m1, key=key))
            observations.append(WifiObservation(lat=m2, lon=m2, key=key))
        session.add_all(observations)
        session.flush()

        result = remove_wifi.delay(wifi_keys[:2])
        self.assertEqual(result.get(), 2)

        wifis = session.query(Wifi).all()
        self.assertEqual(len(wifis), 3)

        result = remove_wifi.delay(wifi_keys)
        self.assertEqual(result.get(), 3)

        result = remove_wifi.delay(wifi_keys)
        self.assertEqual(result.get(), 0)

        wifis = session.query(Wifi).all()
        self.assertEqual(len(wifis), 0)
Esempio n. 2
0
    def test_remove_wifi(self):
        observations = []
        wifi_keys = []
        m1 = 1.0
        m2 = 2.0
        for key in ['a%s1234567890' % i for i in range(5)]:
            wifi = Wifi(key=key)
            wifi_keys.append(wifi.hashkey())
            observations.append(wifi)
            observations.append(WifiObservation(lat=m1, lon=m1, key=key))
            observations.append(WifiObservation(lat=m2, lon=m2, key=key))
        self.session.add_all(observations)
        self.session.flush()

        result = remove_wifi.delay(wifi_keys[:2])
        self.assertEqual(result.get(), 2)

        wifis = self.session.query(Wifi).all()
        self.assertEqual(len(wifis), 3)

        result = remove_wifi.delay(wifi_keys)
        self.assertEqual(result.get(), 3)

        result = remove_wifi.delay(wifi_keys)
        self.assertEqual(result.get(), 0)

        wifis = self.session.query(Wifi).all()
        self.assertEqual(len(wifis), 0)
Esempio n. 3
0
    def test_max_min_range_update(self):
        session = self.session
        k1 = "ab1234567890"
        k2 = "cd1234567890"
        data = [
            Wifi(key=k1, new_measures=2, total_measures=2),
            WifiObservation(lat=1.0, lon=1.0, key=k1),
            WifiObservation(lat=1.002, lon=1.004, key=k1),
            Wifi(key=k2,
                 lat=2.0,
                 lon=-2.0,
                 max_lat=2.001,
                 min_lat=1.999,
                 max_lon=-1.999,
                 min_lon=-2.001,
                 new_measures=2,
                 total_measures=4),
            WifiObservation(lat=2.002, lon=-2.004, key=k2),
            WifiObservation(lat=1.998, lon=-1.996, key=k2),
        ]
        session.add_all(data)
        session.commit()

        result = location_update_wifi.delay(min_new=1)
        self.assertEqual(result.get(), (2, 0))

        wifis = dict(session.query(Wifi.key, Wifi).all())
        self.assertEqual(set(wifis.keys()), set([k1, k2]))

        self.assertEqual(wifis[k1].lat, 1.001)
        self.assertEqual(wifis[k1].max_lat, 1.002)
        self.assertEqual(wifis[k1].min_lat, 1.0)
        self.assertEqual(wifis[k1].lon, 1.002)
        self.assertEqual(wifis[k1].max_lon, 1.004)
        self.assertEqual(wifis[k1].min_lon, 1.0)

        self.assertEqual(wifis[k2].lat, 2.0)
        self.assertEqual(wifis[k2].max_lat, 2.002)
        self.assertEqual(wifis[k2].min_lat, 1.998)
        self.assertEqual(wifis[k2].lon, -2.0)
        self.assertEqual(wifis[k2].max_lon, -1.996)
        self.assertEqual(wifis[k2].min_lon, -2.004)

        # independent calculation: the k1 bounding box is
        # (1.000, 1.000) to (1.002, 1.004), with centroid
        # at (1.001, 1.002); worst distance from centroid
        # to any corner is 249m
        self.assertEqual(wifis[k1].range, 249)

        # independent calculation: the k2 bounding box is
        # (1.998, -2.004) to (2.002, -1.996), with centroid
        # at (2.000, 2.000); worst distance from centroid
        # to any corner is 497m
        self.assertEqual(wifis[k2].range, 497)
Esempio n. 4
0
    def test_schedule_wifi_observations(self):
        session = self.db_master_session

        blocks = schedule_wifimeasure_archival.delay(batch=1).get()
        self.assertEquals(len(blocks), 0)

        batch_size = 10
        observations = []
        for i in range(batch_size * 2):
            observations.append(WifiObservation(created=self.really_old))
        session.add_all(observations)
        session.flush()
        start_id = observations[0].id

        blocks = schedule_wifimeasure_archival.delay(batch=batch_size).get()
        self.assertEquals(len(blocks), 2)
        block = blocks[0]
        self.assertEquals(block,
                          (start_id, start_id + batch_size))

        block = blocks[1]
        self.assertEquals(block,
                          (start_id + batch_size, start_id + 2 * batch_size))

        blocks = schedule_wifimeasure_archival.delay(batch=batch_size).get()
        self.assertEquals(len(blocks), 0)
Esempio n. 5
0
    def test_unthrottle_wifi_observations(self):
        session = self.db_master_session
        block = ObservationBlock()
        block.measure_type = ObservationType.wifi
        block.start_id = 120
        block.end_id = 140
        block.s3_key = 'fake_key'
        block.archive_sha = 'fake_sha'
        block.archive_date = None
        session.add(block)

        k = dict(lat=1.0, lon=1.0)
        for i in range(100, 150):
            session.add(
                WifiObservation(id=i, key=str(i), created=self.really_old))
            session.add(Wifi(total_measures=11000, key=str(i), **k))
        session.commit()

        with patch.object(S3Backend, 'check_archive', lambda x, y, z: True):
            delete_wifimeasure_records.delay(batch=7).get()

        wifi_unthrottle_measures.delay(10000, 1000).get()

        wifis = session.query(Wifi).all()
        self.assertEquals(len(wifis), 50)
        for wifi in wifis:
            if 120 <= int(wifi.key) and int(wifi.key) < 140:
                self.assertEquals(wifi.total_measures, 0)
            else:
                self.assertEquals(wifi.total_measures, 1)

        self.check_stats(counter=['items.wifi_unthrottled'])
Esempio n. 6
0
 def test_json(self):
     obs = WifiObservationFactory.build(accuracy=None)
     result = WifiObservation.from_json(simplejson.loads(
         simplejson.dumps(obs.to_json())))
     self.assertTrue(type(result), WifiObservation)
     self.assertTrue(result.accuracy is None)
     self.assertEqual(result.mac, obs.mac)
     self.assertEqual(result.lat, obs.lat)
     self.assertEqual(result.lon, obs.lon)
Esempio n. 7
0
    def process_report(self, data):
        def add_missing_dict_entries(dst, src):
            # x.update(y) overwrites entries in x with those in y;
            # We want to only add those not already present.
            # We also only want to copy the top-level base report data
            # and not any nested values like cell or wifi.
            for (key, value) in src.items():
                if key != 'radio' and key not in dst \
                   and not isinstance(value, (tuple, list, dict)):
                    dst[key] = value

        report_data = Report.validate(data)
        if report_data is None:
            return ([], [])

        cell_observations = {}
        wifi_observations = {}

        if data.get('cell'):
            # flatten report / cell data into a single dict
            for cell in data['cell']:
                # only validate the additional fields
                cell = CellReport.validate(cell)
                if cell is None:
                    continue
                add_missing_dict_entries(cell, report_data)
                cell_key = CellObservation.to_hashkey(cell)
                if cell_key in cell_observations:
                    existing = cell_observations[cell_key]
                    if existing['ta'] > cell['ta'] or \
                       (existing['signal'] != 0 and
                        existing['signal'] < cell['signal']) or \
                       existing['asu'] < cell['asu']:
                        cell_observations[cell_key] = cell
                else:
                    cell_observations[cell_key] = cell
        cell_observations = cell_observations.values()

        # flatten report / wifi data into a single dict
        if data.get('wifi'):
            for wifi in data['wifi']:
                # only validate the additional fields
                wifi = WifiReport.validate(wifi)
                if wifi is None:
                    continue
                add_missing_dict_entries(wifi, report_data)
                wifi_key = WifiObservation.to_hashkey(wifi)
                if wifi_key in wifi_observations:
                    existing = wifi_observations[wifi_key]
                    if existing['signal'] != 0 and \
                       existing['signal'] < wifi['signal']:
                        wifi_observations[wifi_key] = wifi
                else:
                    wifi_observations[wifi_key] = wifi
            wifi_observations = wifi_observations.values()
        return (cell_observations, wifi_observations)
Esempio n. 8
0
    def process_report(self, data):
        def add_missing_dict_entries(dst, src):
            # x.update(y) overwrites entries in x with those in y;
            # We want to only add those not already present.
            # We also only want to copy the top-level base report data
            # and not any nested values like cell or wifi.
            for (key, value) in src.items():
                if key != 'radio' and key not in dst \
                   and not isinstance(value, (tuple, list, dict)):
                    dst[key] = value

        report_data = Report.validate(data)
        if report_data is None:
            return ([], [])

        cell_observations = {}
        wifi_observations = {}

        if data.get('cell'):
            # flatten report / cell data into a single dict
            for cell in data['cell']:
                # only validate the additional fields
                cell = CellReport.validate(cell)
                if cell is None:
                    continue
                add_missing_dict_entries(cell, report_data)
                cell_key = CellObservation.to_hashkey(cell)
                if cell_key in cell_observations:
                    existing = cell_observations[cell_key]
                    if existing['ta'] > cell['ta'] or \
                       (existing['signal'] != 0 and
                        existing['signal'] < cell['signal']) or \
                       existing['asu'] < cell['asu']:
                        cell_observations[cell_key] = cell
                else:
                    cell_observations[cell_key] = cell
        cell_observations = cell_observations.values()

        # flatten report / wifi data into a single dict
        if data.get('wifi'):
            for wifi in data['wifi']:
                # only validate the additional fields
                wifi = WifiReport.validate(wifi)
                if wifi is None:
                    continue
                add_missing_dict_entries(wifi, report_data)
                wifi_key = WifiObservation.to_hashkey(wifi)
                if wifi_key in wifi_observations:
                    existing = wifi_observations[wifi_key]
                    if existing['signal'] != 0 and \
                       existing['signal'] < wifi['signal']:
                        wifi_observations[wifi_key] = wifi
                else:
                    wifi_observations[wifi_key] = wifi
            wifi_observations = wifi_observations.values()
        return (cell_observations, wifi_observations)
Esempio n. 9
0
    def test_internaljson(self):
        mac = '3680873e9b83'
        obs = WifiObservation.create(key=mac, lat=GB_LAT, lon=GB_LON)

        result = internal_loads(internal_dumps(obs))
        self.assertTrue(type(result), WifiObservation)
        self.assertTrue(result.accuracy is None)
        self.assertEqual(result.mac, mac)
        self.assertEqual(result.lat, GB_LAT)
        self.assertEqual(result.lon, GB_LON)
Esempio n. 10
0
    def test_internaljson(self):
        mac = '3680873e9b83'
        obs = WifiObservation.create(
            key=mac, lat=GB_LAT, lon=GB_LON)

        result = internal_loads(internal_dumps(obs))
        self.assertTrue(type(result), WifiObservation)
        self.assertTrue(result.accuracy is None)
        self.assertEqual(result.mac, mac)
        self.assertEqual(result.lat, GB_LAT)
        self.assertEqual(result.lon, GB_LON)
Esempio n. 11
0
    def test_json(self):
        obs = WifiObservationFactory.build(accuracy=None, source=ReportSource.query)
        result = WifiObservation.from_json(json.loads(json.dumps(obs.to_json())))

        assert type(result) is WifiObservation
        assert result.accuracy is None
        assert result.mac == obs.mac
        assert result.lat == obs.lat
        assert result.lon == obs.lon
        assert result.source == ReportSource.query
        assert type(result.source) is ReportSource
Esempio n. 12
0
    def test_fields(self):
        mac = '3680873e9b83'
        obs = WifiObservation.create(
            key=mac, lat=GB_LAT, lon=GB_LON,
            channel=5, signal=-45)

        self.assertEqual(obs.lat, GB_LAT)
        self.assertEqual(obs.lon, GB_LON)
        self.assertEqual(obs.mac, mac)
        self.assertEqual(obs.channel, 5)
        self.assertEqual(obs.signal, -45)
Esempio n. 13
0
    def test_json(self):
        obs = WifiObservationFactory.build(
            accuracy=None, source=ReportSource.query)
        result = WifiObservation.from_json(simplejson.loads(
            simplejson.dumps(obs.to_json())))

        assert type(result) is WifiObservation
        assert result.accuracy is None
        assert result.mac == obs.mac
        assert result.lat == obs.lat
        assert result.lon == obs.lon
        assert result.source == ReportSource.query
        assert type(result.source) is ReportSource
    def test_fields(self):
        key = '3680873e9b83'
        report_id = uuid.uuid1()
        self.session.add(WifiObservation.create(
            key=key, report_id=report_id, lat=GB_LAT, lon=GB_LON,
            channel=5, signal=-45))
        self.session.flush()

        result = self.session.query(WifiObservation).first()
        self.assertEqual(result.report_id, report_id)
        self.assertEqual(result.lat, GB_LAT)
        self.assertEqual(result.lon, GB_LON)
        self.assertEqual(result.key, key)
        self.assertEqual(result.channel, 5)
        self.assertEqual(result.signal, -45)
Esempio n. 15
0
    def test_fields(self):
        mac = '3680873e9b83'
        obs = WifiObservation.create(key=mac,
                                     lat=GB_LAT,
                                     lon=GB_LON,
                                     channel=5,
                                     signal=-45)

        self.assertEqual(obs.lat, GB_LAT)
        self.assertEqual(obs.lon, GB_LON)
        self.assertEqual(obs.mac, mac)
        self.assertEqual(obs.channel, 5)
        self.assertEqual(obs.signal, -45)

        self.assertEqual(obs.shard_id, '8')
Esempio n. 16
0
    def test_fields(self):
        key = "3680873e9b83"
        report_id = uuid.uuid1()
        session = self.session
        session.add(WifiObservation.create(
            key=key, report_id=report_id, lat=GB_LAT, lon=GB_LON,
            channel=5, signal=-45))
        session.flush()

        result = session.query(WifiObservation).first()
        self.assertEqual(result.report_id, report_id)
        self.assertEqual(result.lat, GB_LAT)
        self.assertEqual(result.lon, GB_LON)
        self.assertEqual(result.key, key)
        self.assertEqual(result.channel, 5)
        self.assertEqual(result.signal, -45)
Esempio n. 17
0
    def test_backup_wifi_to_s3(self):
        session = self.db_master_session
        batch_size = 10
        observations = []
        for i in range(batch_size):
            observations.append(WifiObservation(created=self.really_old))
        session.add_all(observations)
        session.flush()
        start_id = observations[0].id

        blocks = schedule_wifimeasure_archival.delay(batch=batch_size).get()
        self.assertEquals(len(blocks), 1)
        block = blocks[0]
        self.assertEquals(block, (start_id, start_id + batch_size))

        with mock_s3():
            with patch.object(S3Backend,
                              'backup_archive', lambda x, y, z: True):
                write_wifimeasure_s3_backups.delay(cleanup_zip=False).get()

                msgs = self.heka_client.stream.msgs
                info_msgs = [m for m in msgs if m.type == 'oldstyle']
                self.assertEquals(1, len(info_msgs))
                info = info_msgs[0]
                fname = info.payload.split(":")[-1]

                myzip = ZipFile(fname)
                try:
                    contents = set(myzip.namelist())
                    expected_contents = set(['alembic_revision.txt',
                                             'wifi_measure.csv'])
                    self.assertEquals(expected_contents, contents)
                finally:
                    myzip.close()

        blocks = session.query(ObservationBlock).all()

        self.assertEquals(len(blocks), 1)
        block = blocks[0]

        actual_sha = hashlib.sha1()
        actual_sha.update(open(fname, 'rb').read())
        self.assertEquals(block.archive_sha, actual_sha.digest())
        self.assertTrue(block.s3_key is not None)
        self.assertTrue('/wifi_' in block.s3_key)
        self.assertTrue(block.archive_date is None)
Esempio n. 18
0
    def test_fields(self):
        mac = '3680873e9b83'
        obs = WifiObservation.create(
            mac=mac, lat=GB_LAT, lon=GB_LON,
            pressure=1010.2, source=ReportSource.query,
            timestamp=1405602028568,
            channel=5, signal=-45)

        assert obs.lat == GB_LAT
        assert obs.lon == GB_LON
        assert obs.mac == mac
        assert obs.pressure == 1010.2
        assert obs.source == ReportSource.query
        assert obs.timestamp == 1405602028568
        assert obs.channel == 5
        assert obs.signal == -45
        assert obs.shard_id == '8'
Esempio n. 19
0
    def test_monitor_measures(self):
        session = self.db_master_session

        result = monitor_measures.delay().get()
        self.check_stats(
            gauge=[('table.cell_measure', 1), ('table.wifi_measure', 1)],
        )
        self.assertEqual(result, {'cell_measure': -1, 'wifi_measure': -1})

        # add some observations
        session.add_all([CellObservation() for i in range(3)])
        session.add_all([WifiObservation() for i in range(5)])
        session.flush()

        result = monitor_measures.delay().get()
        self.check_stats(
            gauge=[('table.cell_measure', 2), ('table.wifi_measure', 2)],
        )
        self.assertEqual(result, {'cell_measure': 3, 'wifi_measure': 5})
Esempio n. 20
0
    def test_delete_wifi_observations(self):
        session = self.db_master_session
        block = ObservationBlock()
        block.measure_type = ObservationType.wifi
        block.start_id = 120
        block.end_id = 140
        block.s3_key = 'fake_key'
        block.archive_sha = 'fake_sha'
        block.archive_date = None
        session.add(block)

        for i in range(100, 150):
            session.add(WifiObservation(id=i, created=self.really_old))
        session.commit()

        with patch.object(S3Backend, 'check_archive', lambda x, y, z: True):
            delete_wifimeasure_records.delay(batch=7).get()

        self.assertEquals(session.query(WifiObservation).count(), 30)
        self.assertTrue(block.archive_date is not None)
Esempio n. 21
0
    def test_wifi_histogram(self):
        session = self.db_master_session
        today = util.utcnow().date()
        yesterday = (today - timedelta(1))
        two_days = (today - timedelta(2))
        long_ago = (today - timedelta(3))
        observations = [
            WifiObservation(lat=1.0, lon=2.0, created=today),
            WifiObservation(lat=1.0, lon=2.0, created=today),
            WifiObservation(lat=1.0, lon=2.0, created=yesterday),
            WifiObservation(lat=1.0, lon=2.0, created=two_days),
            WifiObservation(lat=1.0, lon=2.0, created=two_days),
            WifiObservation(lat=1.0, lon=2.0, created=two_days),
            WifiObservation(lat=1.0, lon=2.0, created=long_ago),
        ]
        session.add_all(observations)
        session.commit()

        wifi_histogram.delay(ago=3).get()

        stats = session.query(Stat).order_by(Stat.time).all()
        self.assertEqual(len(stats), 1)
        self.assertEqual(stats[0].key, StatKey.wifi)
        self.assertEqual(stats[0].time, long_ago)
        self.assertEqual(stats[0].value, 1)

        # fill in newer dates
        wifi_histogram.delay(ago=2).get()
        wifi_histogram.delay(ago=1).get()
        wifi_histogram.delay(ago=0).get()

        # test duplicate execution
        wifi_histogram.delay(ago=1).get()

        stats = session.query(Stat).order_by(Stat.time).all()
        self.assertEqual(len(stats), 4)
        self.assertEqual(stats[0].time, long_ago)
        self.assertEqual(stats[0].value, 1)
        self.assertEqual(stats[1].time, two_days)
        self.assertEqual(stats[1].value, 4)
        self.assertEqual(stats[2].time, yesterday)
        self.assertEqual(stats[2].value, 5)
        self.assertEqual(stats[3].time, today)
        self.assertEqual(stats[3].value, 7)
Esempio n. 22
0
    def test_fields(self):
        mac = '3680873e9b83'
        obs = WifiObservation.create(mac=mac,
                                     lat=GB_LAT,
                                     lon=GB_LON,
                                     pressure=1010.2,
                                     source=ReportSource.query,
                                     timestamp=1405602028568,
                                     channel=5,
                                     signal=-45)

        assert obs.lat == GB_LAT
        assert obs.lon == GB_LON
        assert obs.mac == mac
        assert obs.pressure == 1010.2
        assert obs.source == ReportSource.query
        assert obs.timestamp == 1405602028568
        assert obs.channel == 5
        assert obs.signal == -45
        assert obs.shard_id == '8'
    def test_customjson(self):
        key = '3680873e9b83'
        now = util.utcnow()
        report_id = uuid.uuid1()
        obs = WifiObservation.create(
            key=key, report_id=report_id, lat=GB_LAT, lon=GB_LON,
            created=now)

        json_data = kombu_dumps(obs)
        self.assertTrue('accuracy' not in json_data)

        result = kombu_loads(json_data)
        self.assertTrue(type(result), WifiObservation)
        self.assertTrue(result.accuracy is None)
        self.assertEqual(type(result.report_id), uuid.UUID)
        self.assertEqual(result.report_id, report_id)
        self.assertEqual(result.key, key)
        self.assertEqual(result.lat, GB_LAT)
        self.assertEqual(result.lon, GB_LON)
        self.assertEqual(type(result.created), datetime.datetime)
        self.assertEqual(result.created, now)
Esempio n. 24
0
    def test_location_update_wifi(self):
        now = util.utcnow()
        before = now - timedelta(days=1)
        session = self.session
        k1 = "ab1234567890"
        k2 = "cd1234567890"
        data = [
            Wifi(key=k1, new_measures=3, total_measures=3),
            WifiObservation(lat=1.0, lon=1.0, key=k1, created=now),
            WifiObservation(lat=1.002, lon=1.003, key=k1, created=now),
            WifiObservation(lat=1.004, lon=1.006, key=k1, created=now),
            Wifi(key=k2, lat=2.0, lon=2.0, new_measures=2, total_measures=4),
            # the lat/lon is bogus and mismatches the line above on purpose
            # to make sure old observations are skipped
            WifiObservation(lat=-1.0, lon=-1.0, key=k2, created=before),
            WifiObservation(lat=-1.0, lon=-1.0, key=k2, created=before),
            WifiObservation(lat=2.002, lon=2.004, key=k2, created=now),
            WifiObservation(lat=2.002, lon=2.004, key=k2, created=now),
        ]
        session.add_all(data)
        session.commit()

        result = location_update_wifi.delay(min_new=1)
        self.assertEqual(result.get(), (2, 0))
        self.check_stats(
            total=2,
            timer=['task.data.location_update_wifi'],
            gauge=['task.data.location_update_wifi.new_measures_1_100'],
        )

        wifis = dict(session.query(Wifi.key, Wifi).all())
        self.assertEqual(set(wifis.keys()), set([k1, k2]))

        self.assertEqual(wifis[k1].lat, 1.002)
        self.assertEqual(wifis[k1].lon, 1.003)
        self.assertEqual(wifis[k1].new_measures, 0)

        self.assertEqual(wifis[k2].lat, 2.001)
        self.assertEqual(wifis[k2].lon, 2.002)
        self.assertEqual(wifis[k2].new_measures, 0)
Esempio n. 25
0
    def process_reports(self, reports, userid=None):
        positions = []
        cell_observations = []
        wifi_observations = []
        for i, report in enumerate(reports):
            report['report_id'] = uuid.uuid1()
            cell, wifi = self.process_report(report)
            cell_observations.extend(cell)
            wifi_observations.extend(wifi)
            if cell or wifi:
                positions.append({
                    'lat': report['lat'],
                    'lon': report['lon'],
                })

        if cell_observations:
            # group by and create task per cell key
            self.stats_client.incr('items.uploaded.cell_observations',
                                   len(cell_observations))
            if self.api_key_log:
                self.stats_client.incr(
                    'items.api_log.%s.uploaded.'
                    'cell_observations' % self.api_key_name,
                    len(cell_observations))

            cells = defaultdict(list)
            for obs in cell_observations:
                cells[CellObservation.to_hashkey(obs)].append(obs)

            # Create a task per group of 5 cell keys at a time.
            # Grouping them helps in avoiding per-task overhead.
            cells = list(cells.values())
            batch_size = 5
            countdown = 0
            for i in range(0, len(cells), batch_size):
                values = []
                for observations in cells[i:i + batch_size]:
                    values.extend([encode_radio_dict(o) for o in observations])
                # insert observations, expire the task if it wasn't processed
                # after six hours to avoid queue overload, also delay
                # each task by one second more, to get a more even workload
                # and avoid parallel updates of the same underlying stations
                self.insert_cell_task.apply_async(
                    args=[values],
                    kwargs={'userid': userid},
                    expires=21600,
                    countdown=countdown)
                countdown += 1

        if wifi_observations:
            # group by WiFi key
            self.stats_client.incr('items.uploaded.wifi_observations',
                                   len(wifi_observations))
            if self.api_key_log:
                self.stats_client.incr(
                    'items.api_log.%s.uploaded.'
                    'wifi_observations' % self.api_key_name,
                    len(wifi_observations))

            wifis = defaultdict(list)
            for obs in wifi_observations:
                wifis[WifiObservation.to_hashkey(obs)].append(obs)

            # Create a task per group of 20 WiFi keys at a time.
            # We tend to get a huge number of unique WiFi networks per
            # batch upload, with one to very few observations per WiFi.
            # Grouping them helps in avoiding per-task overhead.
            wifis = list(wifis.values())
            batch_size = 20
            countdown = 0
            for i in range(0, len(wifis), batch_size):
                values = []
                for observations in wifis[i:i + batch_size]:
                    values.extend(observations)
                # insert observations, expire the task if it wasn't processed
                # after six hours to avoid queue overload, also delay
                # each task by one second more, to get a more even workload
                # and avoid parallel updates of the same underlying stations
                self.insert_wifi_task.apply_async(
                    args=[values],
                    kwargs={'userid': userid},
                    expires=21600,
                    countdown=countdown)
                countdown += 1

        if userid is not None:
            scorekey = Score.to_hashkey(
                userid=userid,
                key=ScoreKey.location,
                time=util.utcnow().date())
            Score.incr(self.session, scorekey, len(positions))
        if positions:
            self.process_mapstat(positions)
Esempio n. 26
0
 def test_invalid(self):
     assert WifiObservation.create(
         mac='3680873e9b83', lat=0.0, lon=0.0) is None
     assert WifiObservation.create(
         mac='', lat=0.0, lon=0.0) is None
Esempio n. 27
0
def process_observations(observations,
                         session,
                         userid=None,
                         api_key_log=False,
                         api_key_name=None):
    stats_client = get_stats_client()
    positions = []
    cell_observations = []
    wifi_observations = []
    for i, obs in enumerate(observations):
        obs['report_id'] = uuid.uuid1()
        cell, wifi = process_observation(obs, session)
        cell_observations.extend(cell)
        wifi_observations.extend(wifi)
        if cell or wifi:
            positions.append({
                'lat': obs['lat'],
                'lon': obs['lon'],
            })

    if cell_observations:
        # group by and create task per cell key
        stats_client.incr('items.uploaded.cell_observations',
                          len(cell_observations))
        if api_key_log:
            stats_client.incr(
                'items.api_log.%s.uploaded.cell_observations' % api_key_name,
                len(cell_observations))

        cells = defaultdict(list)
        for obs in cell_observations:
            cells[CellObservation.to_hashkey(obs)].append(obs)

        # Create a task per group of 5 cell keys at a time.
        # Grouping them helps in avoiding per-task overhead.
        cells = list(cells.values())
        batch_size = 5
        countdown = 0
        for i in range(0, len(cells), batch_size):
            values = []
            for observations in cells[i:i + batch_size]:
                values.extend(observations)
            # insert observations, expire the task if it wasn't processed
            # after six hours to avoid queue overload, also delay
            # each task by one second more, to get a more even workload
            # and avoid parallel updates of the same underlying stations
            insert_measures_cell.apply_async(args=[values],
                                             kwargs={'userid': userid},
                                             expires=21600,
                                             countdown=countdown)
            countdown += 1

    if wifi_observations:
        # group by WiFi key
        stats_client.incr('items.uploaded.wifi_observations',
                          len(wifi_observations))
        if api_key_log:
            stats_client.incr(
                'items.api_log.%s.uploaded.wifi_observations' % api_key_name,
                len(wifi_observations))

        wifis = defaultdict(list)
        for obs in wifi_observations:
            wifis[WifiObservation.to_hashkey(obs)].append(obs)

        # Create a task per group of 20 WiFi keys at a time.
        # We tend to get a huge number of unique WiFi networks per
        # batch upload, with one to very few observations per WiFi.
        # Grouping them helps in avoiding per-task overhead.
        wifis = list(wifis.values())
        batch_size = 20
        countdown = 0
        for i in range(0, len(wifis), batch_size):
            values = []
            for observations in wifis[i:i + batch_size]:
                values.extend(observations)
            # insert observations, expire the task if it wasn't processed
            # after six hours to avoid queue overload, also delay
            # each task by one second more, to get a more even workload
            # and avoid parallel updates of the same underlying stations
            insert_measures_wifi.apply_async(args=[values],
                                             kwargs={'userid': userid},
                                             expires=21600,
                                             countdown=countdown)
            countdown += 1

    if userid is not None:
        process_score(userid, len(positions), session)
    if positions:
        process_mapstat(session, positions)
Esempio n. 28
0
    def test_blacklist_moving_wifis(self):
        now = util.utcnow()
        long_ago = now - timedelta(days=40)
        session = self.session
        k1 = "ab1234567890"
        k2 = "bc1234567890"
        k3 = "cd1234567890"
        k4 = "de1234567890"
        k5 = "ef1234567890"
        k6 = "fa1234567890"

        keys = set([k1, k2, k3, k4, k5, k6])

        # keys k2, k3 and k4 are expected to be detected as moving
        data = [
            # a wifi with an entry but no prior position
            Wifi(key=k1, new_measures=3, total_measures=0),
            WifiObservation(lat=1.001, lon=1.001, key=k1),
            WifiObservation(lat=1.002, lon=1.005, key=k1),
            WifiObservation(lat=1.003, lon=1.009, key=k1),
            # a wifi with a prior known position
            Wifi(lat=2.0, lon=2.0, key=k2, new_measures=2, total_measures=1),
            WifiObservation(lat=2.01, lon=2, key=k2),
            WifiObservation(lat=2.07, lon=2, key=k2),
            # a wifi with a very different prior position
            Wifi(lat=1.0, lon=1.0, key=k3, new_measures=2, total_measures=1),
            WifiObservation(lat=3.0, lon=3.0, key=k3),
            WifiObservation(lat=-3.0, lon=3.0, key=k3),
            # another wifi with a prior known position (and negative lat)
            Wifi(lat=-4.0, lon=4.0, key=k4, new_measures=2, total_measures=1),
            WifiObservation(lat=-4.1, lon=4, key=k4),
            WifiObservation(lat=-4.16, lon=4, key=k4),
            # an already blacklisted wifi
            WifiBlacklist(key=k5, time=now, count=1),
            WifiObservation(lat=5.0, lon=5.0, key=k5),
            WifiObservation(lat=5.1, lon=5.0, key=k5),
            # a wifi with an old different record we ignore, position
            # estimate has been updated since
            Wifi(lat=6.0, lon=6.0, key=k6, new_measures=2, total_measures=1),
            WifiObservation(lat=6.9, lon=6.9, key=k6, time=long_ago),
            WifiObservation(lat=6.0, lon=6.0, key=k6),
            WifiObservation(lat=6.001, lon=6.0, key=k6),
        ]
        session.add_all(data)
        session.commit()

        result = location_update_wifi.delay(min_new=1)
        self.assertEqual(result.get(), (5, 3))

        black = session.query(WifiBlacklist).all()
        self.assertEqual(set([b.key for b in black]), set([k2, k3, k4, k5]))

        observations = session.query(WifiObservation).all()
        self.assertEqual(len(observations), 14)
        self.assertEqual(set([o.key for o in observations]), keys)

        # test duplicate call
        result = location_update_wifi.delay(min_new=1)
        self.assertEqual(result.get(), (0, 0))

        self.check_stats(
            total=6,
            timer=[
                # We made duplicate calls
                ('task.data.location_update_wifi', 2),
                # One of those would've scheduled a remove_wifi task
                ('task.data.remove_wifi', 1)
            ],
            gauge=[
                ('task.data.location_update_wifi.new_measures_1_100', 2),
            ])
Esempio n. 29
0
 def test_invalid(self):
     mac = '3680873e9b83'
     obs = WifiObservation.create(key=mac, lat=0.0, lon=0.0)
     self.assertTrue(obs is None, obs)
Esempio n. 30
0
    def process_report(self, data):
        def add_missing_dict_entries(dst, src):
            # x.update(y) overwrites entries in x with those in y;
            # We want to only add those not already present.
            # We also only want to copy the top-level base report data
            # and not any nested values like cell or wifi.
            for (key, value) in src.items():
                if key != 'radio' and key not in dst \
                   and not isinstance(value, (tuple, list, dict)):
                    dst[key] = value

        def better_cell_obs(new, old):
            comparators = [
                ('ta', operator.lt),
                ('signal', operator.gt),
                ('asu', operator.gt),
            ]
            for field, better in comparators:
                if (None not in (old[field], new[field]) and
                        better(new[field], old[field])):
                    return True
            return False

        def better_wifi_obs(new, old):
            if (None not in (old['signal'], new['signal']) and
                    new['signal'] > old['signal']):
                return True
            return False

        report_data = Report.validate(data)
        if report_data is None:
            return ([], [])

        cell_observations = {}
        wifi_observations = {}

        if data.get('cell'):
            # flatten report / cell data into a single dict
            for cell in data['cell']:
                # only validate the additional fields
                cell = CellReport.validate(cell)
                if cell is None:
                    continue
                add_missing_dict_entries(cell, report_data)
                cell_key = CellObservation.to_hashkey(cell)
                if cell_key in cell_observations:
                    existing = cell_observations[cell_key]
                    if better_cell_obs(cell, existing):
                        cell_observations[cell_key] = cell
                else:
                    cell_observations[cell_key] = cell
        cell_observations = cell_observations.values()

        # flatten report / wifi data into a single dict
        if data.get('wifi'):
            for wifi in data['wifi']:
                # only validate the additional fields
                wifi = WifiReport.validate(wifi)
                if wifi is None:
                    continue
                add_missing_dict_entries(wifi, report_data)
                wifi_key = WifiObservation.to_hashkey(wifi)
                if wifi_key in wifi_observations:
                    existing = wifi_observations[wifi_key]
                    if better_wifi_obs(wifi, existing):
                        wifi_observations[wifi_key] = wifi
                else:
                    wifi_observations[wifi_key] = wifi
            wifi_observations = wifi_observations.values()
        return (cell_observations, wifi_observations)
Esempio n. 31
0
 def test_invalid(self):
     mac = '3680873e9b83'
     obs = WifiObservation.create(key=mac, lat=0.0, lon=0.0)
     self.assertTrue(obs is None, obs)
Esempio n. 32
0
 def test_invalid(self):
     assert WifiObservation.create(mac='3680873e9b83', lat=0.0,
                                   lon=0.0) is None
     assert WifiObservation.create(mac='', lat=0.0, lon=0.0) is None
Esempio n. 33
0
    def process_reports(self, reports, userid=None):
        positions = set()
        cell_observations = []
        wifi_observations = []
        for i, report in enumerate(reports):
            report['report_id'] = uuid.uuid1()
            cell, wifi = self.process_report(report)
            cell_observations.extend(cell)
            wifi_observations.extend(wifi)
            if (cell or wifi) and report.get('lat') and report.get('lon'):
                positions.add((report['lat'], report['lon']))

        if cell_observations:
            # group by and create task per cell key
            self.stats_client.incr('items.uploaded.cell_observations',
                                   len(cell_observations))
            if self.api_key and self.api_key.log:
                self.stats_client.incr(
                    'items.api_log.%s.uploaded.'
                    'cell_observations' % self.api_key.name,
                    len(cell_observations))

            cells = defaultdict(list)
            for obs in cell_observations:
                cells[CellObservation.to_hashkey(obs)].append(obs)

            # Create a task per group of 100 cell keys at a time.
            # Grouping them helps in avoiding per-task overhead.
            cells = list(cells.values())
            batch_size = 100
            countdown = 0
            for i in range(0, len(cells), batch_size):
                values = []
                for observations in cells[i:i + batch_size]:
                    values.extend([encode_radio_dict(o) for o in observations])
                # insert observations, expire the task if it wasn't processed
                # after six hours to avoid queue overload, also delay
                # each task by one second more, to get a more even workload
                # and avoid parallel updates of the same underlying stations
                self.insert_cell_task.apply_async(
                    args=[values],
                    kwargs={'userid': userid},
                    expires=21600,
                    countdown=countdown)
                countdown += 1

        if wifi_observations:
            # group by WiFi key
            self.stats_client.incr('items.uploaded.wifi_observations',
                                   len(wifi_observations))
            if self.api_key and self.api_key.log:
                self.stats_client.incr(
                    'items.api_log.%s.uploaded.'
                    'wifi_observations' % self.api_key.name,
                    len(wifi_observations))

            wifis = defaultdict(list)
            for obs in wifi_observations:
                wifis[WifiObservation.to_hashkey(obs)].append(obs)

            # Create a task per group of 100 WiFi keys at a time.
            # We tend to get a huge number of unique WiFi networks per
            # batch upload, with one to very few observations per WiFi.
            # Grouping them helps in avoiding per-task overhead.
            wifis = list(wifis.values())
            batch_size = 100
            countdown = 0
            for i in range(0, len(wifis), batch_size):
                values = []
                for observations in wifis[i:i + batch_size]:
                    values.extend(observations)
                # insert observations, expire the task if it wasn't processed
                # after six hours to avoid queue overload, also delay
                # each task by one second more, to get a more even workload
                # and avoid parallel updates of the same underlying stations
                self.insert_wifi_task.apply_async(
                    args=[values],
                    kwargs={'userid': userid},
                    expires=21600,
                    countdown=countdown)
                countdown += 1

        self.process_mapstat(positions)
        self.process_score(userid, positions)