Esempio n. 1
0
    def test_outdated_station(self, session, redis_client, cellarea_queue):
        """An older statuon record does not update existing station records."""
        station_data = {
            "radio": Radio.wcdma,
            "mcc": 202,
            "mnc": 1,
            "lac": 2120,
            "cid": 12842,
            "lat": 38.85,
            "lon": 23.41,
            "radius": 1,
            "samples": 1,
            "created": datetime(2019, 1, 1, tzinfo=UTC),
            "modified": datetime(2019, 10, 7, tzinfo=UTC),
        }
        station = CellShard.create(_raise_invalid=True, **station_data)
        session.add(station)
        session.flush()

        csv = StringIO("""\
radio,mcc,net,area,cell,unit,lon,lat,range,samples,changeable,created,updated,averageSignal
UMTS,202,1,2120,12842,,23.4123167,38.8574351,0,6,1,1568220564,1570120316,
""")
        read_stations_from_csv(session, csv, redis_client, cellarea_queue)

        # The existing station is unmodified
        wcdma = session.query(CellShard.shard_model(Radio.wcdma)).one()
        assert wcdma.lat == 38.85
        assert wcdma.lon == 23.41
        assert wcdma.created == datetime(2019, 1, 1, tzinfo=UTC)
        assert wcdma.modified == datetime(2019, 10, 7, tzinfo=UTC)

        # No CellAreas or RegionStats are generated
        assert session.query(func.count(CellArea.areaid)).scalar() == 0
        assert session.query(func.count(RegionStat.region)).scalar() == 0
Esempio n. 2
0
    def test_empty_radio_skipped(self, session, redis_client, cellarea_queue):
        """
        A empty string for the radio type causes the row to be skipped.

        The public CSV export encodes an unexpected radio type from the database
        as an empty string. We can't determine what radio type was expected.
        """
        # In row 3, the radio is an empty string
        csv = StringIO(
            """\
radio,mcc,net,area,cell,unit,lon,lat,range,samples,changeable,created,updated,averageSignal
UMTS,202,1,2120,12842,,23.4123167,38.8574351,0,6,1,1568220564,1570120316,
,203,1,2120,12842,,23.4123167,38.8574351,0,6,1,1568220564,1570120316,
GSM,208,10,30014,20669,,2.5112670,46.5992450,0,78,1,1566307030,1570119413,
"""
        )
        read_stations_from_csv(session, csv, redis_client, cellarea_queue)

        # The empty radio row is skipped, but the following row is processed.
        umts = session.query(CellShard.shard_model(Radio.umts)).one()
        assert umts.lat == 38.8574351
        assert umts.lon == 23.4123167
        gsm_model = CellShard.shard_model(Radio.gsm)
        assert session.query(func.count(gsm_model.cellid)).scalar() == 1
        assert session.query(func.count(CellArea.areaid)).scalar() == 2
        assert session.query(func.count(RegionStat.region)).scalar() == 2
Esempio n. 3
0
    def test_update(self):
        now = util.utcnow()
        invalid_key = dict(lac=None, cid=None)
        observations = []

        def obs_factory(**kw):
            obs = CellObservationFactory.build(**kw)
            if obs is not None:
                observations.append(obs)

        cell1 = CellShardFactory(samples=3, weight=3.0)
        lat1, lon1 = (cell1.lat, cell1.lon)
        key1 = dict(radio=cell1.radio, lac=cell1.lac, cid=cell1.cid)
        obs_factory(lat=lat1, lon=lon1, created=now, **key1)
        obs_factory(lat=lat1 + 0.004, lon=lon1 + 0.006, created=now, **key1)
        obs_factory(lat=lat1 + 0.006, lon=lon1 + 0.009, created=now, **key1)
        # The lac, cid are invalid and should be skipped
        obs_factory(created=now, **invalid_key)
        obs_factory(created=now, **invalid_key)

        cell2 = CellShardFactory(lat=lat1 + 1.0,
                                 lon=lon1 + 1.0,
                                 samples=3,
                                 weight=3.0)
        lat2, lon2 = (cell2.lat, cell2.lon)
        key2 = dict(radio=cell2.radio, lac=cell2.lac, cid=cell2.cid)
        obs_factory(lat=lat2 + 0.001, lon=lon2 + 0.002, created=now, **key2)
        obs_factory(lat=lat2 + 0.003, lon=lon2 + 0.006, created=now, **key2)

        cell3 = CellShardFactory(samples=100000, weight=100000.0)
        lat3, lon3 = (cell3.lat, cell3.lon)
        key3 = dict(radio=cell3.radio, lac=cell3.lac, cid=cell3.cid)
        for i in range(10):
            obs_factory(lat=lat3 + 0.5, lon=lon3 + 0.5, **key3)

        self.session.commit()
        self._queue_and_update_cell(observations)

        shard = CellShard.shard_model(cell1.cellid)
        found = (self.session.query(shard).filter(
            shard.cellid == cell1.cellid)).one()
        self.assertAlmostEqual(found.lat, lat1 + 0.001667, 6)
        self.assertAlmostEqual(found.lon, lon1 + 0.0025, 6)

        shard = CellShard.shard_model(cell2.cellid)
        found = (self.session.query(shard).filter(
            shard.cellid == cell2.cellid)).one()
        self.assertAlmostEqual(found.lat, lat2 + 0.0008, 6)
        self.assertAlmostEqual(found.lon, lon2 + 0.0016, 6)

        shard = CellShard.shard_model(cell3.cellid)
        found = (self.session.query(shard).filter(
            shard.cellid == cell3.cellid)).one()
        expected_lat = ((lat3 * 10000) + (lat3 + 0.5) * 10) / 10010
        expected_lon = ((lon3 * 10000) + (lon3 + 0.5) * 10) / 10010
        self.assertAlmostEqual(found.lat, expected_lat, 7)
        self.assertAlmostEqual(found.lon, expected_lon, 7)
Esempio n. 4
0
    def test_update(self):
        now = util.utcnow()
        invalid_key = dict(lac=None, cid=None)
        observations = []

        def obs_factory(**kw):
            obs = CellObservationFactory.build(**kw)
            if obs is not None:
                observations.append(obs)

        cell1 = CellShardFactory(samples=3, weight=3.0)
        lat1, lon1 = (cell1.lat, cell1.lon)
        key1 = dict(radio=cell1.radio, lac=cell1.lac, cid=cell1.cid)
        obs_factory(lat=lat1, lon=lon1, created=now, **key1)
        obs_factory(lat=lat1 + 0.004, lon=lon1 + 0.006, created=now, **key1)
        obs_factory(lat=lat1 + 0.006, lon=lon1 + 0.009, created=now, **key1)
        # The lac, cid are invalid and should be skipped
        obs_factory(created=now, **invalid_key)
        obs_factory(created=now, **invalid_key)

        cell2 = CellShardFactory(
            lat=lat1 + 1.0, lon=lon1 + 1.0, samples=3, weight=3.0)
        lat2, lon2 = (cell2.lat, cell2.lon)
        key2 = dict(radio=cell2.radio, lac=cell2.lac, cid=cell2.cid)
        obs_factory(lat=lat2 + 0.001, lon=lon2 + 0.002, created=now, **key2)
        obs_factory(lat=lat2 + 0.003, lon=lon2 + 0.006, created=now, **key2)

        cell3 = CellShardFactory(samples=100000, weight=100000.0)
        lat3, lon3 = (cell3.lat, cell3.lon)
        key3 = dict(radio=cell3.radio, lac=cell3.lac, cid=cell3.cid)
        for i in range(10):
            obs_factory(lat=lat3 + 0.5, lon=lon3 + 0.5, **key3)

        self.session.commit()
        self._queue_and_update_cell(observations)

        shard = CellShard.shard_model(cell1.cellid)
        found = (self.session.query(shard)
                             .filter(shard.cellid == cell1.cellid)).one()
        self.assertAlmostEqual(found.lat, lat1 + 0.001667, 6)
        self.assertAlmostEqual(found.lon, lon1 + 0.0025, 6)

        shard = CellShard.shard_model(cell2.cellid)
        found = (self.session.query(shard)
                             .filter(shard.cellid == cell2.cellid)).one()
        self.assertAlmostEqual(found.lat, lat2 + 0.0008, 6)
        self.assertAlmostEqual(found.lon, lon2 + 0.0016, 6)

        shard = CellShard.shard_model(cell3.cellid)
        found = (self.session.query(shard)
                             .filter(shard.cellid == cell3.cellid)).one()
        expected_lat = ((lat3 * 10000) + (lat3 + 0.5) * 10) / 10010
        expected_lon = ((lon3 * 10000) + (lon3 + 0.5) * 10) / 10010
        self.assertAlmostEqual(found.lat, expected_lat, 7)
        self.assertAlmostEqual(found.lon, expected_lon, 7)
Esempio n. 5
0
    def test_modified_station(self, session, redis_client, cellarea_queue):
        """A modified station updates existing records."""
        station_data = {
            "radio": Radio.umts,
            "mcc": 202,
            "mnc": 1,
            "lac": 2120,
            "cid": 12842,
            "lat": 38.85,
            "lon": 23.41,
            "min_lat": 38.7,
            "max_lat": 38.9,
            "min_lon": 23.4,
            "max_lon": 23.5,
            "radius": 1,
            "samples": 1,
            "created": datetime(2019, 1, 1, tzinfo=UTC),
            "modified": datetime(2019, 1, 1, tzinfo=UTC),
        }
        station = CellShard.create(_raise_invalid=True, **station_data)
        session.add(station)
        session.flush()

        csv = StringIO(
            """\
radio,mcc,net,area,cell,unit,lon,lat,range,samples,changeable,created,updated,averageSignal
UMTS,202,1,2120,12842,,23.4123167,38.8574351,0,6,1,1568220564,1570120316,
"""
        )
        read_stations_from_csv(session, csv, redis_client, cellarea_queue)

        # Check the details of the UMTS station
        umts = session.query(CellShard.shard_model(Radio.umts)).one()
        # New position, other details from import
        assert umts.lat == 38.8574351
        assert umts.lon == 23.4123167
        assert umts.radius == 0
        assert umts.samples == 6
        assert umts.created == datetime(2019, 9, 11, 16, 49, 24, tzinfo=UTC)
        assert umts.modified == datetime(2019, 10, 3, 16, 31, 56, tzinfo=UTC)
        # Other details unchanged
        assert umts.max_lat == station_data["max_lat"]
        assert umts.min_lat == station_data["min_lat"]
        assert umts.max_lon == station_data["max_lon"]
        assert umts.min_lon == station_data["min_lon"]
        assert umts.region == "GR"

        # A Modified station triggers the creation of a new CellArea
        cell_area = session.query(CellArea).order_by(CellArea.areaid).one()
        assert cell_area.areaid == (Radio.wcdma, 202, 1, 2120)

        # The new CellAreas triggers the creation of a RegionStat
        stat = session.query(RegionStat).order_by("region").one()
        assert stat.region == "GR"
        assert stat.wcdma == 1
Esempio n. 6
0
    def test_new_stations(self, session, redis_client, cellarea_queue):
        """New stations are imported, creating cell areas and region stats."""
        csv = StringIO(
            """\
radio,mcc,net,area,cell,unit,lon,lat,range,samples,changeable,created,updated,averageSignal
UMTS,202,1,2120,12842,,23.4123167,38.8574351,0,6,1,1568220564,1570120316,
GSM,208,10,30014,20669,,2.5112670,46.5992450,0,78,1,1566307030,1570119413,
LTE,202,1,2120,12842,,23.4123167,38.8574351,0,6,1,1568220588,1570120328,
"""
        )
        read_stations_from_csv(session, csv, redis_client, cellarea_queue)

        # Check the details of the UMTS station
        umts = session.query(CellShard.shard_model(Radio.umts)).one()
        assert umts.mcc == 202
        assert umts.mnc == 1
        assert umts.lac == 2120
        assert umts.cid == 12842
        assert umts.lat == 38.8574351
        assert umts.lon == 23.4123167
        assert umts.max_lat == umts.lat
        assert umts.min_lat == umts.lat
        assert umts.max_lon == umts.lon
        assert umts.min_lon == umts.lon
        assert umts.radius == 0
        assert umts.samples == 6
        assert umts.created == datetime(2019, 9, 11, 16, 49, 24, tzinfo=UTC)
        assert umts.modified == datetime(2019, 10, 3, 16, 31, 56, tzinfo=UTC)
        assert umts.region == "GR"

        # Check the counts of the other station types
        gsm_model = CellShard.shard_model(Radio.gsm)
        assert session.query(func.count(gsm_model.cellid)).scalar() == 1
        lte_model = CellShard.shard_model(Radio.lte)
        assert session.query(func.count(lte_model.cellid)).scalar() == 1

        # New stations trigger the creation of new CellAreas
        cell_areas = session.query(CellArea).order_by(CellArea.areaid).all()
        area1, area2, area3 = cell_areas
        assert area1.areaid == (Radio.gsm, 208, 10, 30014)
        assert area2.areaid == (Radio.wcdma, 202, 1, 2120)
        assert area3.areaid == (Radio.lte, 202, 1, 2120)

        # New CellAreas trigger the creation of RegionStats
        stats = session.query(RegionStat).order_by("region").all()
        assert len(stats) == 2
        actual = [
            (stat.region, stat.gsm, stat.wcdma, stat.lte, stat.blue, stat.wifi)
            for stat in stats
        ]
        expected = [("FR", 1, 0, 0, 0, 0), ("GR", 0, 1, 1, 0, 0)]
        assert actual == expected
Esempio n. 7
0
    def test_weighted_update(self):
        cell = CellShardFactory(radio=Radio.gsm, samples=1, weight=2.0)
        cell_lat = cell.lat
        cell_lon = cell.lon
        cell_key = dict(radio=cell.radio, mcc=cell.mcc, mnc=cell.mnc,
                        lac=cell.lac, cid=cell.cid)

        obs_factory = CellObservationFactory
        obs = [
            obs_factory(lat=cell.lat, lon=cell.lon - 0.002,
                        accuracy=20.0, signal=-51, **cell_key),
            obs_factory(lat=cell.lat, signal=-111, lon=cell.lon - 0.004,
                        accuracy=40.0, **cell_key),
        ]

        self.session.commit()
        self._queue_and_update_cell(obs)
        shard = CellShard.shard_model(cell.cellid)
        cells = self.session.query(shard).all()
        self.assertEqual(len(cells), 1)
        cell = cells[0]
        self.assertAlmostEqual(cell.lat, cell_lat)
        self.assertAlmostEqual(cell.max_lat, cell_lat)
        self.assertAlmostEqual(cell.min_lat, cell_lat)
        self.assertAlmostEqual(cell.lon, cell_lon - 0.0016358, 7)
        self.assertAlmostEqual(cell.max_lon, cell_lon)
        self.assertAlmostEqual(cell.min_lon, cell_lon - 0.004)
        self.assertEqual(cell.radius, 164)
        self.assertEqual(cell.samples, 3)
        self.assertAlmostEqual(cell.weight, 9.47, 2)
Esempio n. 8
0
    def test_max_min_radius_update(self):
        cell = CellShardFactory(radius=150, samples=3, weight=3.0)
        cell_lat = cell.lat
        cell_lon = cell.lon
        cell.max_lat = cell.lat + 0.001
        cell.min_lat = cell.lat - 0.001
        cell.max_lon = cell.lon + 0.001
        cell.min_lon = cell.lon - 0.001
        k1 = dict(radio=cell.radio, mcc=cell.mcc, mnc=cell.mnc,
                  lac=cell.lac, cid=cell.cid)

        obs_factory = CellObservationFactory
        obs = [
            obs_factory(lat=cell.lat, lon=cell.lon - 0.002, **k1),
            obs_factory(lat=cell.lat + 0.004, lon=cell.lon - 0.006, **k1),
        ]

        self.session.commit()
        self._queue_and_update_cell(obs)

        shard = CellShard.shard_model(cell.cellid)
        cells = self.session.query(shard).all()
        self.assertEqual(len(cells), 1)
        cell = cells[0]
        self.assertAlmostEqual(cell.lat, cell_lat + 0.0008)
        self.assertAlmostEqual(cell.max_lat, cell_lat + 0.004)
        self.assertAlmostEqual(cell.min_lat, cell_lat - 0.001)
        self.assertAlmostEqual(cell.lon, cell_lon - 0.0016)
        self.assertAlmostEqual(cell.max_lon, cell_lon + 0.001)
        self.assertAlmostEqual(cell.min_lon, cell_lon - 0.006)
        self.assertEqual(cell.radius, 468)
        self.assertEqual(cell.samples, 5)
        self.assertAlmostEqual(cell.weight, 5.0, 2)
Esempio n. 9
0
    def test_blocklist(self):
        now = util.utcnow()
        today = now.date()
        observations = CellObservationFactory.build_batch(3)
        obs = observations[0]
        CellShardFactory(
            radio=obs.radio,
            mcc=obs.mcc,
            mnc=obs.mnc,
            lac=obs.lac,
            cid=obs.cid,
            created=now,
            block_first=today - timedelta(days=10),
            block_last=today,
            block_count=1,
        )
        self.session.commit()
        self._queue_and_update(observations)

        blocks = []
        for obs in observations:
            shard = CellShard.shard_model(obs.cellid)
            cell = (self.session.query(shard).filter(
                shard.cellid == obs.cellid)).one()
            if cell.blocked():
                blocks.append(cell)

        self.assertEqual(len(blocks), 1)
        self.check_statcounter(StatKey.cell, 2)
        self.check_statcounter(StatKey.unique_cell, 2)
Esempio n. 10
0
    def test_blocklist(self):
        now = util.utcnow()
        today = now.date()
        observations = CellObservationFactory.build_batch(3)
        obs = observations[0]
        CellShardFactory(
            radio=obs.radio, mcc=obs.mcc, mnc=obs.mnc,
            lac=obs.lac, cid=obs.cid,
            created=now,
            block_first=today - timedelta(days=10),
            block_last=today,
            block_count=1,
        )
        self.session.commit()
        self._queue_and_update_cell(observations)

        blocks = []
        for obs in observations:
            shard = CellShard.shard_model(obs.cellid)
            cell = (self.session.query(shard)
                                .filter(shard.cellid == obs.cellid)).one()
            if cell.blocked():
                blocks.append(cell)

        self.assertEqual(len(blocks), 1)
        self.check_statcounter(StatKey.cell, 2)
        self.check_statcounter(StatKey.unique_cell, 2)
Esempio n. 11
0
def configure_data(redis_client):
    """
    Configure fixed set of data queues.
    """
    data_queues = {
        'update_cell': DataQueue('update_cell', redis_client,
                                 queue_key='update_cell'),  # BBB
        'update_cellarea': DataQueue('update_cellarea', redis_client,
                                     queue_key='update_cellarea'),
        'update_cellarea_ocid': DataQueue('update_cellarea_ocid', redis_client,
                                          queue_key='update_cellarea_ocid'),
        'update_score': DataQueue('update_score', redis_client,
                                  queue_key='update_score'),
    }
    for shard_id in DataMap.shards().keys():
        name = 'update_datamap_' + shard_id
        data_queues[name] = DataQueue(name, redis_client, queue_key=name)
    for shard_id in CellShard.shards().keys():
        name = 'update_cell_' + shard_id
        data_queues[name] = DataQueue(
            name, redis_client, queue_key=name)
    for shard_id in WifiShard.shards().keys():
        name = 'update_wifi_' + shard_id
        data_queues[name] = DataQueue(
            name, redis_client, queue_key=name)
    return data_queues
Esempio n. 12
0
    def test_max_min_radius_update(self):
        cell = CellShardFactory(radius=150, samples=3)
        cell_lat = cell.lat
        cell_lon = cell.lon
        cell.max_lat = cell.lat + 0.001
        cell.min_lat = cell.lat - 0.001
        cell.max_lon = cell.lon + 0.001
        cell.min_lon = cell.lon - 0.001
        k1 = dict(radio=cell.radio,
                  mcc=cell.mcc,
                  mnc=cell.mnc,
                  lac=cell.lac,
                  cid=cell.cid)

        obs_factory = CellObservationFactory
        obs = [
            obs_factory(lat=cell.lat, lon=cell.lon - 0.002, **k1),
            obs_factory(lat=cell.lat + 0.004, lon=cell.lon - 0.006, **k1),
        ]

        self.session.commit()
        self._queue_and_update(obs)

        shard = CellShard.shard_model(cell.cellid)
        cells = self.session.query(shard).all()
        self.assertEqual(len(cells), 1)
        cell = cells[0]
        self.assertAlmostEqual(cell.lat, cell_lat + 0.0008)
        self.assertAlmostEqual(cell.max_lat, cell_lat + 0.004)
        self.assertAlmostEqual(cell.min_lat, cell_lat - 0.001)
        self.assertAlmostEqual(cell.lon, cell_lon - 0.0016)
        self.assertAlmostEqual(cell.max_lon, cell_lon + 0.001)
        self.assertAlmostEqual(cell.min_lon, cell_lon - 0.006)
        self.assertEqual(cell.radius, 468)
        self.assertEqual(cell.samples, 5)
Esempio n. 13
0
def query_cells(query, lookups, model, raven_client):
    # Given a location query and a list of lookup instances, query the
    # database and return a list of model objects.
    cellids = [lookup.cellid for lookup in lookups]
    if not cellids:  # pragma: no cover
        return []

    # load all fields used in score calculation and those we
    # need for the position
    load_fields = ('lat', 'lon', 'radius',
                   'created', 'modified', 'samples')

    today = util.utcnow().date()
    temp_blocked = today - TEMPORARY_BLOCKLIST_DURATION

    if model == CellOCID:
        # non sharded OCID table
        return query_cell_table(query.session, model, cellids,
                                temp_blocked, load_fields, raven_client)

    result = []
    shards = defaultdict(list)
    for lookup in lookups:
        shards[CellShard.shard_model(lookup.radio)].append(lookup.cellid)

    for shard, shard_cellids in shards.items():
        result.extend(
            query_cell_table(query.session, shard, shard_cellids,
                             temp_blocked, load_fields, raven_client))

    return result
Esempio n. 14
0
def configure_data(redis_client):
    """
    Configure fixed set of data queues.
    """
    data_queues = {
        'update_cell':
        DataQueue('update_cell', redis_client, queue_key='update_cell'),  # BBB
        'update_cellarea':
        DataQueue('update_cellarea', redis_client,
                  queue_key='update_cellarea'),
        'update_cellarea_ocid':
        DataQueue('update_cellarea_ocid',
                  redis_client,
                  queue_key='update_cellarea_ocid'),
        'update_score':
        DataQueue('update_score', redis_client, queue_key='update_score'),
    }
    for shard_id in DataMap.shards().keys():
        name = 'update_datamap_' + shard_id
        data_queues[name] = DataQueue(name, redis_client, queue_key=name)
    for shard_id in CellShard.shards().keys():
        name = 'update_cell_' + shard_id
        data_queues[name] = DataQueue(name, redis_client, queue_key=name)
    for shard_id in WifiShard.shards().keys():
        name = 'update_wifi_' + shard_id
        data_queues[name] = DataQueue(name, redis_client, queue_key=name)
    return data_queues
Esempio n. 15
0
def query_cells(query, lookups, model, raven_client):
    # Given a location query and a list of lookup instances, query the
    # database and return a list of model objects.
    cellids = [lookup.cellid for lookup in lookups]
    if not cellids:  # pragma: no cover
        return []

    # load all fields used in score calculation and those we
    # need for the position
    load_fields = ('lat', 'lon', 'radius', 'region', 'samples',
                   'created', 'modified', 'last_seen', 'block_last')

    today = util.utcnow().date()
    temp_blocked = today - TEMPORARY_BLOCKLIST_DURATION

    if model == CellOCID:
        # non sharded OCID table
        return query_cell_table(query.session, model, cellids,
                                temp_blocked, load_fields, raven_client)

    result = []
    shards = defaultdict(list)
    for lookup in lookups:
        shards[CellShard.shard_model(lookup.radio)].append(lookup.cellid)

    for shard, shard_cellids in shards.items():
        result.extend(
            query_cell_table(query.session, shard, shard_cellids,
                             temp_blocked, load_fields, raven_client))

    return result
Esempio n. 16
0
def configure_data(redis_client):
    """
    Configure fixed set of data queues.
    """
    data_queues = {
        # *_incoming need to be the exact same as in webapp.config
        'update_incoming': DataQueue('update_incoming', redis_client,
                                     batch=100, compress=True),
        'transfer_incoming': DataQueue('transfer_incoming', redis_client,
                                       batch=100, compress=True),
    }
    for key in ('update_cellarea', ):
        data_queues[key] = DataQueue(key, redis_client, batch=100, json=False)
    for shard_id in BlueShard.shards().keys():
        key = 'update_blue_' + shard_id
        data_queues[key] = DataQueue(key, redis_client, batch=500)
    for shard_id in DataMap.shards().keys():
        key = 'update_datamap_' + shard_id
        data_queues[key] = DataQueue(key, redis_client, batch=500, json=False)
    for shard_id in CellShard.shards().keys():
        key = 'update_cell_' + shard_id
        data_queues[key] = DataQueue(key, redis_client, batch=500)
    for shard_id in WifiShard.shards().keys():
        key = 'update_wifi_' + shard_id
        data_queues[key] = DataQueue(key, redis_client, batch=500)
    return data_queues
Esempio n. 17
0
    def test_lock_timeout(self, celery, db_rw_drop_table,
                          redis, ro_session, session, stats):
        obs = CellObservationFactory.build()
        cell = CellShardFactory.build(
            radio=obs.radio, mcc=obs.mcc, mnc=obs.mnc,
            lac=obs.lac, cid=obs.cid,
            samples=10,
        )
        ro_session.add(cell)
        ro_session.flush()

        orig_add_area = CellUpdater.add_area_update
        orig_wait = CellUpdater._retry_wait
        num = [0]

        def mock_area(self, updated_areas, key,
                      num=num, ro_session=ro_session):
            orig_add_area(self, updated_areas, key)
            num[0] += 1
            if num[0] == 2:
                ro_session.rollback()

        try:
            CellUpdater._retry_wait = 0.0001
            session.execute('set session innodb_lock_wait_timeout = 1')
            with mock.patch.object(CellUpdater, 'add_area_update', mock_area):
                self.queue_and_update(celery, [obs])

            # the inner task logic was called exactly twice
            assert num[0] == 2

            shard = CellShard.shard_model(obs.cellid)
            cells = session.query(shard).all()
            assert len(cells) == 1
            assert cells[0].samples == 1

            self.check_statcounter(redis, StatKey.cell, 1)
            self.check_statcounter(redis, StatKey.unique_cell, 1)
            stats.check(
                counter=[('data.observation.insert', 1, ['type:cell'])],
                timer=[('task', 1, ['task:data.update_cell'])],
            )
        finally:
            CellUpdater._retry_wait = orig_wait
            for model in CellShard.shards().values():
                session.execute(text(
                    'drop table %s;' % model.__tablename__))
Esempio n. 18
0
    def _update_all(self):
        schedule_export_reports.delay().get()

        for shard_id in CellShard.shards().keys():
            update_cell.delay(shard_id=shard_id).get()

        for shard_id in WifiShard.shards().keys():
            update_wifi.delay(shard_id=shard_id).get()
Esempio n. 19
0
    def _update_all(self):
        schedule_export_reports.delay().get()

        for shard_id in CellShard.shards().keys():
            update_cell.delay(shard_id=shard_id).get()

        for shard_id in WifiShard.shards().keys():
            update_wifi.delay(shard_id=shard_id).get()
Esempio n. 20
0
    def _queue_and_update_cell(self, obs):
        sharded_obs = defaultdict(list)
        for ob in obs:
            sharded_obs[CellShard.shard_id(ob.cellid)].append(ob)

        for shard_id, values in sharded_obs.items():
            queue = self.celery_app.data_queues['update_cell_' + shard_id]
            queue.enqueue([value.to_json() for value in values])
            update_cell.delay(shard_id=shard_id).get()
Esempio n. 21
0
    def _queue_and_update(self, obs):
        sharded_obs = defaultdict(list)
        for ob in obs:
            sharded_obs[CellShard.shard_id(ob.cellid)].append(ob)

        for shard_id, values in sharded_obs.items():
            queue = self.celery_app.data_queues['update_cell_' + shard_id]
            queue.enqueue(values)
            update_cell.delay(shard_id=shard_id).get()
Esempio n. 22
0
    def test_import_local_cell(self):
        self.import_csv(cell_type='cell')
        cells = self.session.query(CellShard.shards()['wcdma']).all()
        self.assertEqual(len(cells), 9)

        areaids = set([cell.areaid for cell in cells])
        self.assertEqual(self.session.query(CellArea).count(), len(areaids))

        update_statcounter.delay(ago=0).get()
        self.check_stat(StatKey.unique_cell, 9)
Esempio n. 23
0
    def test_lock_timeout(self, celery, redis, session, session2, metricsmock,
                          restore_db):
        obs = CellObservationFactory.build()
        cell = CellShardFactory.build(
            radio=obs.radio,
            mcc=obs.mcc,
            mnc=obs.mnc,
            lac=obs.lac,
            cid=obs.cid,
            samples=10,
        )
        session2.add(cell)
        session2.flush()

        orig_add_area = CellUpdater.add_area_update
        orig_wait = CellUpdater._retry_wait
        num = [0]

        def mock_area(self, updated_areas, key, num=num, session2=session2):
            orig_add_area(self, updated_areas, key)
            num[0] += 1
            if num[0] == 2:
                session2.rollback()

        try:
            CellUpdater._retry_wait = 0.0001
            session.execute("set session innodb_lock_wait_timeout = 1")
            with mock.patch.object(CellUpdater, "add_area_update", mock_area):
                self.queue_and_update(celery, [obs])

            # the inner task logic was called exactly twice
            assert num[0] == 2

            shard = CellShard.shard_model(obs.cellid)
            cells = session.query(shard).all()
            assert len(cells) == 1
            assert cells[0].samples == 1

            self.check_statcounter(redis, StatKey.cell, 1)
            self.check_statcounter(redis, StatKey.unique_cell, 1)

            # Assert generated metrics are correct
            assert (len(
                metricsmock.filter_records("incr",
                                           "data.observation.insert",
                                           value=1,
                                           tags=["type:cell"])) == 1)
            assert (len(
                metricsmock.filter_records("timing",
                                           "task",
                                           tags=["task:data.update_cell"
                                                 ])) == 1)
        finally:
            CellUpdater._retry_wait = orig_wait
            session.execute(text("drop table %s;" % cell.__tablename__))
Esempio n. 24
0
    def _update_all(self):
        update_incoming.delay().get()

        for shard_id in BlueShard.shards().keys():
            update_blue.delay(shard_id=shard_id).get()

        for shard_id in CellShard.shards().keys():
            update_cell.delay(shard_id=shard_id).get()

        for shard_id in WifiShard.shards().keys():
            update_wifi.delay(shard_id=shard_id).get()
Esempio n. 25
0
    def test_import_local_cell(self):
        self.import_csv(cell_type='cell')
        cells = self.session.query(CellShard.shards()['wcdma']).all()
        self.assertEqual(len(cells), 9)

        areaids = set([cell.areaid for cell in cells])
        self.assertEqual(
            self.session.query(CellArea).count(), len(areaids))

        update_statcounter.delay(ago=0).get()
        self.check_stat(StatKey.unique_cell, 9)
Esempio n. 26
0
    def test_import_local_cell(self, celery, redis, session):
        self.import_csv(
            celery, redis, session,
            CellShardFactory.build(radio=Radio.wcdma), cell_type='cell')
        cells = session.query(CellShard.shards()['wcdma']).all()
        assert len(cells) == 9

        areaids = set([cell.areaid for cell in cells])
        assert session.query(CellArea).count() == len(areaids)

        update_statcounter.delay().get()
        self.check_stat(session, StatKey.unique_cell, 9)
Esempio n. 27
0
    def test_bad_data_skipped(self, session, redis_client, cellarea_queue):
        """A row that has invalid data (like a string for a number) is skipped."""
        # In GSM row, the mcc field should be a number, not a string
        csv = StringIO("""\
radio,mcc,net,area,cell,unit,lon,lat,range,samples,changeable,created,updated,averageSignal
UMTS,202,1,2120,12842,,23.4123167,38.8574351,0,6,1,1568220564,1570120316,
GSM,"MCC",10,30014,20669,,2.5112670,46.5992450,0,78,1,1566307030,1570119413,
LTE,202,1,2120,12842,,23.4123167,38.8574351,0,6,1,1568220588,1570120328,
""")
        read_stations_from_csv(session, csv, redis_client, cellarea_queue)

        # The invalid GSM row is skipped
        gsm_model = CellShard.shard_model(Radio.gsm)
        assert session.query(func.count(gsm_model.cellid)).scalar() == 0

        # The valid WCDMA and LTE rows are processed, and in the same region
        wcdma_model = CellShard.shard_model(Radio.wcdma)
        lte_model = CellShard.shard_model(Radio.lte)
        assert session.query(func.count(wcdma_model.cellid)).scalar() == 1
        assert session.query(func.count(lte_model.cellid)).scalar() == 1
        assert session.query(func.count(CellArea.areaid)).scalar() == 2
        assert session.query(func.count(RegionStat.region)).scalar() == 1
Esempio n. 28
0
    def test_shard_queues(self):  # BBB
        observations = CellObservationFactory.build_batch(3)
        data_queues = self.celery_app.data_queues
        single_queue = data_queues['update_cell']
        single_queue.enqueue(observations)
        update_cell.delay().get()

        self.assertEqual(single_queue.size(), 0)

        total = 0
        for shard_id in CellShard.shards().keys():
            total += data_queues['update_cell_' + shard_id].size()

        self.assertEqual(total, 3)
Esempio n. 29
0
    def test_shard_queues(self):  # BBB
        observations = CellObservationFactory.build_batch(3)
        data_queues = self.celery_app.data_queues
        single_queue = data_queues['update_cell']
        single_queue.enqueue(observations)
        update_cell.delay().get()

        self.assertEqual(single_queue.size(), 0)

        total = 0
        for shard_id in CellShard.shards().keys():
            total += data_queues['update_cell_' + shard_id].size()

        self.assertEqual(total, 3)
Esempio n. 30
0
    def test_lock_timeout(self):
        obs = CellObservationFactory.build()
        cell = CellShardFactory.build(
            radio=obs.radio,
            mcc=obs.mcc,
            mnc=obs.mnc,
            lac=obs.lac,
            cid=obs.cid,
            samples=10,
        )
        self.db_ro_session.add(cell)
        self.db_ro_session.flush()

        orig_add_area = CellUpdater.add_area_update
        orig_wait = CellUpdater._retry_wait
        num = [0]

        def mock_area(self,
                      updated_areas,
                      key,
                      num=num,
                      ro_session=self.db_ro_session):
            orig_add_area(self, updated_areas, key)
            num[0] += 1
            if num[0] == 2:
                ro_session.rollback()

        try:
            CellUpdater._retry_wait = 0.001
            self.session.execute('set session innodb_lock_wait_timeout = 1')
            with mock.patch.object(CellUpdater, 'add_area_update', mock_area):
                self._queue_and_update_cell([obs])
        finally:
            CellUpdater._retry_wait = orig_wait

        # the inner task logic was called exactly twice
        self.assertEqual(num[0], 2)

        shard = CellShard.shard_model(obs.cellid)
        cells = self.session.query(shard).all()
        self.assertEqual(len(cells), 1)
        self.assertEqual(cells[0].samples, 1)

        self.check_statcounter(StatKey.cell, 1)
        self.check_statcounter(StatKey.unique_cell, 1)
        self.check_stats(
            counter=[('data.observation.insert', 1, ['type:cell'])],
            timer=[('task', 1, ['task:data.update_cell'])],
        )
Esempio n. 31
0
    def test_invalid_row_skipped(self, session, redis_client, cellarea_queue):
        """A row that fails validation is skipped."""
        # In GSM row, the longitude 202.5 is greater than max of 180
        csv = StringIO(
            """\
radio,mcc,net,area,cell,unit,lon,lat,range,samples,changeable,created,updated,averageSignal
UMTS,202,1,2120,12842,,23.4123167,38.8574351,0,6,1,1568220564,1570120316,
GSM,208,10,30014,20669,,202.5,46.5992450,0,78,1,1566307030,1570119413,
LTE,202,1,2120,12842,,23.4123167,38.8574351,0,6,1,1568220588,1570120328,
"""
        )
        read_stations_from_csv(session, csv, redis_client, cellarea_queue)

        # The invalid GSM row is skipped
        gsm_model = CellShard.shard_model(Radio.gsm)
        assert session.query(func.count(gsm_model.cellid)).scalar() == 0

        # The valid UMTS and LTE rows are processed, and in the same region
        umts_model = CellShard.shard_model(Radio.umts)
        lte_model = CellShard.shard_model(Radio.lte)
        assert session.query(func.count(umts_model.cellid)).scalar() == 1
        assert session.query(func.count(lte_model.cellid)).scalar() == 1
        assert session.query(func.count(CellArea.areaid)).scalar() == 2
        assert session.query(func.count(RegionStat.region)).scalar() == 1
Esempio n. 32
0
    def _update_all(self, session, datamap_only=False):
        ExportConfigFactory(name='internal', batch=0, schema='internal')
        session.flush()
        update_incoming.delay().get()

        if datamap_only:
            return

        for shard_id in BlueShard.shards().keys():
            update_blue.delay(shard_id=shard_id).get()

        for shard_id in CellShard.shards().keys():
            update_cell.delay(shard_id=shard_id).get()

        for shard_id in WifiShard.shards().keys():
            update_wifi.delay(shard_id=shard_id).get()
Esempio n. 33
0
    def _update_all(self, session, datamap_only=False):
        ExportConfigFactory(name="internal", batch=0, schema="internal")
        session.flush()
        update_incoming.delay().get()

        if datamap_only:
            return

        for shard_id in BlueShard.shards().keys():
            update_blue.delay(shard_id=shard_id).get()

        for shard_id in CellShard.shards().keys():
            update_cell.delay(shard_id=shard_id).get()

        for shard_id in WifiShard.shards().keys():
            update_wifi.delay(shard_id=shard_id).get()
    def test_lock_timeout(self, celery, redis, session, session2, stats,
                          restore_db):
        obs = CellObservationFactory.build()
        cell = CellShardFactory.build(
            radio=obs.radio,
            mcc=obs.mcc,
            mnc=obs.mnc,
            lac=obs.lac,
            cid=obs.cid,
            samples=10,
        )
        session2.add(cell)
        session2.flush()

        orig_add_area = CellUpdater.add_area_update
        orig_wait = CellUpdater._retry_wait
        num = [0]

        def mock_area(self, updated_areas, key, num=num, session2=session2):
            orig_add_area(self, updated_areas, key)
            num[0] += 1
            if num[0] == 2:
                session2.rollback()

        try:
            CellUpdater._retry_wait = 0.0001
            session.execute('set session innodb_lock_wait_timeout = 1')
            with mock.patch.object(CellUpdater, 'add_area_update', mock_area):
                self.queue_and_update(celery, [obs])

            # the inner task logic was called exactly twice
            assert num[0] == 2

            shard = CellShard.shard_model(obs.cellid)
            cells = session.query(shard).all()
            assert len(cells) == 1
            assert cells[0].samples == 1

            self.check_statcounter(redis, StatKey.cell, 1)
            self.check_statcounter(redis, StatKey.unique_cell, 1)
            stats.check(
                counter=[('data.observation.insert', 1, ['type:cell'])],
                timer=[('task', 1, ['task:data.update_cell'])],
            )
        finally:
            CellUpdater._retry_wait = orig_wait
            session.execute(text('drop table %s;' % cell.__tablename__))
Esempio n. 35
0
    def test_cell_duplicated(self, celery, session):
        self.add_reports(celery, cell_factor=1, wifi_factor=0)
        # duplicate the cell entry inside the report
        item = self._pop_item(celery)
        report = item["report"]
        cell = report["cellTowers"][0]
        radio = cell["radioType"]
        report["cellTowers"].append(cell.copy())
        report["cellTowers"].append(cell.copy())
        report["cellTowers"][1]["signalStrength"] += 2
        report["cellTowers"][2]["signalStrength"] -= 2
        self._push_item(celery, item)
        self._update_all(session)

        shard = CellShard.shard_model(radio)
        cells = session.query(shard).all()
        assert len(cells) == 1
        assert cells[0].samples == 1
Esempio n. 36
0
    def test_cell_duplicated(self):
        self.add_reports(cell_factor=1, wifi_factor=0)
        # duplicate the cell entry inside the report
        item = self._pop_item()
        report = item['report']
        cell = report['cellTowers'][0]
        radio = cell['radioType']
        report['cellTowers'].append(cell.copy())
        report['cellTowers'].append(cell.copy())
        report['cellTowers'][1]['signalStrength'] += 2
        report['cellTowers'][2]['signalStrength'] -= 2
        self._push_item(item)
        self._update_all()

        shard = CellShard.shard_model(radio)
        cells = self.session.query(shard).all()
        self.assertEqual(len(cells), 1)
        self.assertEqual(cells[0].samples, 1)
Esempio n. 37
0
    def test_cell_duplicated(self, celery, session):
        self.add_reports(celery, cell_factor=1, wifi_factor=0)
        # duplicate the cell entry inside the report
        item = self._pop_item(celery)
        report = item['report']
        cell = report['cellTowers'][0]
        radio = cell['radioType']
        report['cellTowers'].append(cell.copy())
        report['cellTowers'].append(cell.copy())
        report['cellTowers'][1]['signalStrength'] += 2
        report['cellTowers'][2]['signalStrength'] -= 2
        self._push_item(celery, item)
        self._update_all(session)

        shard = CellShard.shard_model(radio)
        cells = session.query(shard).all()
        assert len(cells) == 1
        assert cells[0].samples == 1
Esempio n. 38
0
    def test_cell_duplicated(self):
        self.add_reports(cell_factor=1, wifi_factor=0)
        # duplicate the cell entry inside the report
        queue = self.celery_app.export_queues['internal']
        items = queue.dequeue(queue.queue_key())
        report = items[0]['report']
        cell = report['cellTowers'][0]
        radio = cell['radioType']
        report['cellTowers'].append(cell.copy())
        report['cellTowers'].append(cell.copy())
        report['cellTowers'][1]['signalStrength'] += 2
        report['cellTowers'][2]['signalStrength'] -= 2
        queue.enqueue(items, queue.queue_key())
        self._update_all()

        shard = CellShard.shard_model(radio)
        cells = self.session.query(shard).all()
        self.assertEqual(len(cells), 1)
        self.assertEqual(cells[0].samples, 1)
Esempio n. 39
0
    def test_cell_duplicated(self):
        self.add_reports(cell_factor=1, wifi_factor=0)
        # duplicate the cell entry inside the report
        queue = self.celery_app.export_queues['queue_export_internal']
        items = queue.dequeue(queue.queue_key())
        report = items[0]['report']
        cell = report['cellTowers'][0]
        radio = cell['radioType']
        report['cellTowers'].append(cell.copy())
        report['cellTowers'].append(cell.copy())
        report['cellTowers'][1]['signalStrength'] += 2
        report['cellTowers'][2]['signalStrength'] -= 2
        queue.enqueue(items, queue.queue_key())
        self._update_all()

        shard = CellShard.shard_model(radio)
        cells = self.session.query(shard).all()
        self.assertEqual(len(cells), 1)
        self.assertEqual(cells[0].samples, 1)
Esempio n. 40
0
    def test_lock_timeout(self):
        obs = CellObservationFactory.build()
        cell = CellShardFactory.build(
            radio=obs.radio, mcc=obs.mcc, mnc=obs.mnc,
            lac=obs.lac, cid=obs.cid,
            samples=10,
        )
        self.db_ro_session.add(cell)
        self.db_ro_session.flush()

        orig_add_area = CellUpdater.add_area_update
        orig_wait = CellUpdater._retry_wait
        num = [0]

        def mock_area(self, updated_areas, key,
                      num=num, ro_session=self.db_ro_session):
            orig_add_area(self, updated_areas, key)
            num[0] += 1
            if num[0] == 2:
                ro_session.rollback()

        try:
            CellUpdater._retry_wait = 0.001
            self.session.execute('set session innodb_lock_wait_timeout = 1')
            with mock.patch.object(CellUpdater, 'add_area_update', mock_area):
                self._queue_and_update_cell([obs])
        finally:
            CellUpdater._retry_wait = orig_wait

        # the inner task logic was called exactly twice
        self.assertEqual(num[0], 2)

        shard = CellShard.shard_model(obs.cellid)
        cells = self.session.query(shard).all()
        self.assertEqual(len(cells), 1)
        self.assertEqual(cells[0].samples, 1)

        self.check_statcounter(StatKey.cell, 1)
        self.check_statcounter(StatKey.unique_cell, 1)
        self.check_stats(
            counter=[('data.observation.insert', 1, ['type:cell'])],
            timer=[('task', 1, ['task:data.update_cell'])],
        )
Esempio n. 41
0
    def test_cell(self, celery, session):
        reports = self.add_reports(celery, cell_factor=1, wifi_factor=0)
        self._update_all(session)

        position = reports[0]["position"]
        cell_data = reports[0]["cellTowers"][0]
        shard = CellShard.shard_model(cell_data["radioType"])
        cells = session.query(shard).all()
        assert len(cells) == 1
        cell = cells[0]

        assert cell.lat == position["latitude"]
        assert cell.lon == position["longitude"]
        assert cell.radio.name == cell_data["radioType"]
        assert cell.mcc == cell_data["mobileCountryCode"]
        assert cell.mnc == cell_data["mobileNetworkCode"]
        assert cell.lac == cell_data["locationAreaCode"]
        assert cell.cid == cell_data["cellId"]
        assert cell.psc == cell_data["primaryScramblingCode"]
        assert cell.samples == 1
Esempio n. 42
0
    def test_cell(self, celery, session):
        reports = self.add_reports(celery, cell_factor=1, wifi_factor=0)
        self._update_all(session)

        position = reports[0]['position']
        cell_data = reports[0]['cellTowers'][0]
        shard = CellShard.shard_model(cell_data['radioType'])
        cells = session.query(shard).all()
        assert len(cells) == 1
        cell = cells[0]

        assert cell.lat == position['latitude']
        assert cell.lon == position['longitude']
        assert cell.radio.name == cell_data['radioType']
        assert cell.mcc == cell_data['mobileCountryCode']
        assert cell.mnc == cell_data['mobileNetworkCode']
        assert cell.lac == cell_data['locationAreaCode']
        assert cell.cid == cell_data['cellId']
        assert cell.psc == cell_data['primaryScramblingCode']
        assert cell.samples == 1
Esempio n. 43
0
    def test_cell(self):
        reports = self.add_reports(cell_factor=1, wifi_factor=0)
        self._update_all()

        position = reports[0]['position']
        cell_data = reports[0]['cellTowers'][0]
        radio = cell_data['radioType']
        shard = CellShard.shard_model(radio)
        cells = self.session.query(shard).all()
        self.assertEqual(len(cells), 1)
        cell = cells[0]

        self.assertEqual(cell.lat, position['latitude'])
        self.assertEqual(cell.lon, position['longitude'])
        self.assertEqual(cell.radio.name, cell_data['radioType'])
        self.assertEqual(cell.mcc, cell_data['mobileCountryCode'])
        self.assertEqual(cell.mnc, cell_data['mobileNetworkCode'])
        self.assertEqual(cell.lac, cell_data['locationAreaCode'])
        self.assertEqual(cell.cid, cell_data['cellId'])
        self.assertEqual(cell.psc, cell_data['primaryScramblingCode'])
        self.assertEqual(cell.samples, 1)
Esempio n. 44
0
    def test_cell(self):
        reports = self.add_reports(cell_factor=1, wifi_factor=0)
        self._update_all()

        position = reports[0]['position']
        cell_data = reports[0]['cellTowers'][0]
        radio = cell_data['radioType']
        shard = CellShard.shard_model(radio)
        cells = self.session.query(shard).all()
        self.assertEqual(len(cells), 1)
        cell = cells[0]

        self.assertEqual(cell.lat, position['latitude'])
        self.assertEqual(cell.lon, position['longitude'])
        self.assertEqual(cell.radio.name, cell_data['radioType'])
        self.assertEqual(cell.mcc, cell_data['mobileCountryCode'])
        self.assertEqual(cell.mnc, cell_data['mobileNetworkCode'])
        self.assertEqual(cell.lac, cell_data['locationAreaCode'])
        self.assertEqual(cell.cid, cell_data['cellId'])
        self.assertEqual(cell.psc, cell_data['primaryScramblingCode'])
        self.assertEqual(cell.samples, 1)
Esempio n. 45
0
    def test_weighted_update(self):
        cell = CellShardFactory(radio=Radio.gsm, samples=1, weight=2.0)
        cell_lat = cell.lat
        cell_lon = cell.lon
        cell_key = dict(radio=cell.radio,
                        mcc=cell.mcc,
                        mnc=cell.mnc,
                        lac=cell.lac,
                        cid=cell.cid)

        obs_factory = CellObservationFactory
        obs = [
            obs_factory(lat=cell.lat,
                        lon=cell.lon - 0.002,
                        accuracy=20.0,
                        signal=-51,
                        **cell_key),
            obs_factory(lat=cell.lat,
                        signal=-111,
                        lon=cell.lon - 0.004,
                        accuracy=40.0,
                        **cell_key),
        ]

        self.session.commit()
        self._queue_and_update_cell(obs)
        shard = CellShard.shard_model(cell.cellid)
        cells = self.session.query(shard).all()
        self.assertEqual(len(cells), 1)
        cell = cells[0]
        self.assertAlmostEqual(cell.lat, cell_lat)
        self.assertAlmostEqual(cell.max_lat, cell_lat)
        self.assertAlmostEqual(cell.min_lat, cell_lat)
        self.assertAlmostEqual(cell.lon, cell_lon - 0.0016358, 7)
        self.assertAlmostEqual(cell.max_lon, cell_lon)
        self.assertAlmostEqual(cell.min_lon, cell_lon - 0.004)
        self.assertEqual(cell.radius, 164)
        self.assertEqual(cell.samples, 3)
        self.assertAlmostEqual(cell.weight, 9.47, 2)
Esempio n. 46
0
def configure_data(redis_client):
    """
    Configure fixed set of data queues.
    """
    data_queues = {
        # update_incoming needs to be the exact same as in webapp.config
        'update_incoming': DataQueue('update_incoming', redis_client,
                                     batch=100, compress=True),
    }
    for key in ('update_cellarea', 'update_cellarea_ocid'):
        data_queues[key] = DataQueue(key, redis_client, batch=100, json=False)
    for shard_id in BlueShard.shards().keys():
        key = 'update_blue_' + shard_id
        data_queues[key] = DataQueue(key, redis_client, batch=500)
    for shard_id in DataMap.shards().keys():
        key = 'update_datamap_' + shard_id
        data_queues[key] = DataQueue(key, redis_client, batch=500, json=False)
    for shard_id in CellShard.shards().keys():
        key = 'update_cell_' + shard_id
        data_queues[key] = DataQueue(key, redis_client, batch=500)
    for shard_id in WifiShard.shards().keys():
        key = 'update_wifi_' + shard_id
        data_queues[key] = DataQueue(key, redis_client, batch=500)
    return data_queues
Esempio n. 47
0
def celerybeat_schedule(app_config):
    """Return the celery beat schedule as a dictionary."""

    sections = app_config.sections()

    schedule = {

        # Monitoring

        'monitor-queue-size': {
            'task': 'ichnaea.data.tasks.monitor_queue_size',
            'schedule': timedelta(seconds=60),
            'options': {'expires': 57},
        },
        'monitor-api-users': {
            'task': 'ichnaea.data.tasks.monitor_api_users',
            'schedule': timedelta(seconds=600),
            'options': {'expires': 570},
        },
        'monitor-api-key-limits': {
            'task': 'ichnaea.data.tasks.monitor_api_key_limits',
            'schedule': timedelta(seconds=600),
            'options': {'expires': 570},
        },

        # Statistics

        'update-statcounter': {
            'task': 'ichnaea.data.tasks.update_statcounter',
            'args': (1, ),
            'schedule': crontab(minute=3),
            'options': {'expires': 2700},
        },
        'update-statregion': {
            'task': 'ichnaea.data.tasks.update_statregion',
            'schedule': timedelta(seconds=3600 * 6),
            'options': {'expires': 3600 * 5},
        },

        # Data Pipeline

        'schedule-export-reports': {
            'task': 'ichnaea.data.tasks.schedule_export_reports',
            'schedule': timedelta(seconds=8),
            'options': {'expires': 15},
        },

        'update-cellarea': {
            'task': 'ichnaea.data.tasks.update_cellarea',
            'schedule': timedelta(seconds=8),
            'args': (100, ),
            'options': {'expires': 15},
        },
        'update-cellarea-ocid': {
            'task': 'ichnaea.data.tasks.update_cellarea_ocid',
            'schedule': timedelta(seconds=9),
            'args': (100, ),
            'options': {'expires': 15},
        },

        'update-score': {
            'task': 'ichnaea.data.tasks.update_score',
            'args': (250, ),
            'schedule': timedelta(seconds=9),
            'options': {'expires': 10},
        },

    }

    for shard_id in CellShard.shards().keys():
        schedule.update({
            'update-cell-' + shard_id: {
                'task': 'ichnaea.data.tasks.update_cell',
                'schedule': timedelta(seconds=7),
                'args': (500, shard_id),
                'options': {'expires': 10},
            }
        })

    for shard_id in DataMap.shards().keys():
        schedule.update({
            'update-datamap-' + shard_id: {
                'task': 'ichnaea.data.tasks.update_datamap',
                'args': (500, shard_id),
                'schedule': timedelta(seconds=14),
                'options': {'expires': 20},
            },
        })

    for shard_id in WifiShard.shards().keys():
        schedule.update({
            'update-wifi-' + shard_id: {
                'task': 'ichnaea.data.tasks.update_wifi',
                'schedule': timedelta(seconds=6),
                'args': (500, shard_id),
                'options': {'expires': 10},
            }
        })

    if 'assets' in sections and app_config.get('assets', 'bucket', None):
        # only configure tasks if target bucket is configured
        schedule.update({
            'cell-export-full': {
                'task': 'ichnaea.data.tasks.cell_export_full',
                'schedule': crontab(hour=0, minute=13),
                'options': {'expires': 39600},
            },
            'cell-export-diff': {
                'task': 'ichnaea.data.tasks.cell_export_diff',
                'schedule': crontab(minute=3),
                'options': {'expires': 2700},
            },
        })

    if 'import:ocid' in sections:
        schedule.update({
            'monitor-ocid-import': {
                'task': 'ichnaea.data.tasks.monitor_ocid_import',
                'schedule': timedelta(seconds=600),
                'options': {'expires': 570},
            },
            'cell-import-external': {
                'task': 'ichnaea.data.tasks.cell_import_external',
                'args': (True, ),
                'schedule': crontab(minute=52),
                'options': {'expires': 2700},
            },

        })

    return schedule
Esempio n. 48
0
    def process_reports(self, reports, userid=None):
        malformed_reports = 0
        positions = set()
        observations = {'cell': [], 'wifi': []}
        obs_count = {
            'cell': {'upload': 0, 'drop': 0},
            'wifi': {'upload': 0, 'drop': 0},
        }
        new_station_count = {'cell': 0, 'wifi': 0}

        for report in reports:
            cell, wifi, malformed_obs = self.process_report(report)
            if cell:
                observations['cell'].extend(cell)
                obs_count['cell']['upload'] += len(cell)
            if wifi:
                observations['wifi'].extend(wifi)
                obs_count['wifi']['upload'] += len(wifi)
            if (cell or wifi):
                positions.add((report['lat'], report['lon']))
            else:
                malformed_reports += 1
            for name in ('cell', 'wifi'):
                obs_count[name]['drop'] += malformed_obs[name]

        # group by unique station key
        for name in ('cell', 'wifi'):
            station_keys = set()
            for obs in observations[name]:
                if name == 'cell':
                    station_keys.add(obs.cellid)
                elif name == 'wifi':
                    station_keys.add(obs.mac)
            # determine scores for stations
            new_station_count[name] += self.new_stations(name, station_keys)

        if observations['cell']:
            sharded_obs = defaultdict(list)
            for ob in observations['cell']:
                shard_id = CellShard.shard_id(ob.cellid)
                sharded_obs[shard_id].append(ob)
            for shard_id, values in sharded_obs.items():
                cell_queue = self.data_queues['update_cell_' + shard_id]
                cell_queue.enqueue(list(values), pipe=self.pipe)

        if observations['wifi']:
            sharded_obs = defaultdict(list)
            for ob in observations['wifi']:
                shard_id = WifiShard.shard_id(ob.mac)
                sharded_obs[shard_id].append(ob)
            for shard_id, values in sharded_obs.items():
                wifi_queue = self.data_queues['update_wifi_' + shard_id]
                wifi_queue.enqueue(list(values), pipe=self.pipe)

        self.process_datamap(positions)
        self.process_score(userid, positions, new_station_count)
        self.emit_stats(
            len(reports),
            malformed_reports,
            obs_count,
        )
Esempio n. 49
0
    def test_blocklist_moving_cells(self):
        now = util.utcnow()
        today = now.date()
        obs = []
        obs_factory = CellObservationFactory
        moving = set()
        cells = CellShardFactory.create_batch(4)
        cells.append(CellShardFactory.build())
        # a cell with an entry but no prior position
        cell = cells[0]
        cell_key = dict(radio=cell.radio,
                        mcc=cell.mcc,
                        mnc=cell.mnc,
                        lac=cell.lac,
                        cid=cell.cid)
        cell.samples = 0
        obs.extend([
            obs_factory(lat=cell.lat + 0.01, lon=cell.lon + 0.01, **cell_key),
            obs_factory(lat=cell.lat + 0.02, lon=cell.lon + 0.05, **cell_key),
            obs_factory(lat=cell.lat + 0.03, lon=cell.lon + 0.09, **cell_key),
        ])
        cell.lat = None
        cell.lon = None
        # a cell with a prior known position
        cell = cells[1]
        cell_key = dict(radio=cell.radio,
                        mcc=cell.mcc,
                        mnc=cell.mnc,
                        lac=cell.lac,
                        cid=cell.cid)
        cell.samples = 1
        cell.lat += 0.1
        obs.extend([
            obs_factory(lat=cell.lat + 1.0, lon=cell.lon, **cell_key),
            obs_factory(lat=cell.lat + 3.0, lon=cell.lon, **cell_key),
        ])
        moving.add(cell.cellid)
        # a cell with a very different prior position
        cell = cells[2]
        cell_key = dict(radio=cell.radio,
                        mcc=cell.mcc,
                        mnc=cell.mnc,
                        lac=cell.lac,
                        cid=cell.cid)
        cell.samples = 1
        obs.extend([
            obs_factory(lat=cell.lat + 3.0, lon=cell.lon, **cell_key),
            obs_factory(lat=cell.lat - 0.1, lon=cell.lon, **cell_key),
        ])
        moving.add(cell.cellid)
        # another cell with a prior known position (and negative lon)
        cell = cells[3]
        cell_key = dict(radio=cell.radio,
                        mcc=cell.mcc,
                        mnc=cell.mnc,
                        lac=cell.lac,
                        cid=cell.cid)
        cell.samples = 1
        cell.lon *= -1.0
        obs.extend([
            obs_factory(lat=cell.lat + 1.0, lon=cell.lon, **cell_key),
            obs_factory(lat=cell.lat + 2.0, lon=cell.lon, **cell_key),
        ])
        moving.add(cell.cellid)
        # an already blocklisted cell
        cell = cells[4]
        cell_key = dict(radio=cell.radio,
                        mcc=cell.mcc,
                        mnc=cell.mnc,
                        lac=cell.lac,
                        cid=cell.cid)
        CellShardFactory(block_first=today,
                         block_last=today,
                         block_count=1,
                         **cell_key)
        obs.extend([
            obs_factory(lat=cell.lat, lon=cell.lon, **cell_key),
            obs_factory(lat=cell.lat + 3.0, lon=cell.lon, **cell_key),
        ])
        moving.add(cell.cellid)
        self.session.commit()
        self._queue_and_update(obs)

        shards = set()
        for cellid in moving:
            shards.add(CellShard.shard_model(cellid))
        blocks = []
        for shard in shards:
            for row in self.session.query(shard).all():
                if row.blocked():
                    blocks.append(row)
        self.assertEqual(set([b.cellid for b in blocks]), moving)
Esempio n. 50
0
    def test_blocklist_moving_cells(self):
        now = util.utcnow()
        today = now.date()
        obs = []
        obs_factory = CellObservationFactory
        moving = set()
        cells = CellShardFactory.create_batch(4)
        cells.append(CellShardFactory.build())
        # a cell with an entry but no prior position
        cell = cells[0]
        cell_key = dict(radio=cell.radio, mcc=cell.mcc, mnc=cell.mnc,
                        lac=cell.lac, cid=cell.cid)
        cell.samples = None
        cell.weight = None
        obs.extend([
            obs_factory(lat=cell.lat + 0.01,
                        lon=cell.lon + 0.01, **cell_key),
            obs_factory(lat=cell.lat + 0.02,
                        lon=cell.lon + 0.05, **cell_key),
            obs_factory(lat=cell.lat + 0.03,
                        lon=cell.lon + 0.09, **cell_key),
        ])
        cell.lat = None
        cell.lon = None
        # a cell with a prior known position
        cell = cells[1]
        cell_key = dict(radio=cell.radio, mcc=cell.mcc, mnc=cell.mnc,
                        lac=cell.lac, cid=cell.cid)
        cell.samples = 1
        cell.weight = 1.0
        cell.lat += 0.1
        obs.extend([
            obs_factory(lat=cell.lat + 1.0,
                        lon=cell.lon, **cell_key),
            obs_factory(lat=cell.lat + 3.0,
                        lon=cell.lon, **cell_key),
        ])
        moving.add(cell.cellid)
        # a cell with a very different prior position
        cell = cells[2]
        cell_key = dict(radio=cell.radio, mcc=cell.mcc, mnc=cell.mnc,
                        lac=cell.lac, cid=cell.cid)
        cell.samples = 1
        cell.weight = 1.0
        obs.extend([
            obs_factory(lat=cell.lat + 3.0,
                        lon=cell.lon, **cell_key),
            obs_factory(lat=cell.lat - 0.1,
                        lon=cell.lon, **cell_key),
        ])
        moving.add(cell.cellid)
        # another cell with a prior known position (and negative lon)
        cell = cells[3]
        cell_key = dict(radio=cell.radio, mcc=cell.mcc, mnc=cell.mnc,
                        lac=cell.lac, cid=cell.cid)
        cell.samples = 1
        cell.weight = 1.0
        cell.lon *= -1.0
        obs.extend([
            obs_factory(lat=cell.lat + 1.0,
                        lon=cell.lon, **cell_key),
            obs_factory(lat=cell.lat + 2.0,
                        lon=cell.lon, **cell_key),
        ])
        moving.add(cell.cellid)
        # an already blocklisted cell
        cell = cells[4]
        cell_key = dict(radio=cell.radio, mcc=cell.mcc, mnc=cell.mnc,
                        lac=cell.lac, cid=cell.cid)
        CellShardFactory(block_first=today, block_last=today, block_count=1,
                         **cell_key)
        obs.extend([
            obs_factory(lat=cell.lat,
                        lon=cell.lon, **cell_key),
            obs_factory(lat=cell.lat + 3.0,
                        lon=cell.lon, **cell_key),
        ])
        moving.add(cell.cellid)
        self.session.commit()
        self._queue_and_update_cell(obs)

        shards = set()
        for cellid in moving:
            shards.add(CellShard.shard_model(cellid))
        blocks = []
        for shard in shards:
            for row in self.session.query(shard).all():
                if row.blocked():
                    blocks.append(row)
        self.assertEqual(set([b.cellid for b in blocks]), moving)
Esempio n. 51
0
    def tearDown(self):
        for model in CellShard.shards().values():
            self.session.execute(text('drop table %s;' % model.__tablename__))

        self.setup_tables(self.db_rw.engine)
        super(TestDatabaseErrors, self).tearDown()
Esempio n. 52
0
    def test_retriable_exceptions(
        self,
        celery,
        redis,
        session,
        db_shared_session,
        metricsmock,
        errclass,
        errno,
        errmsg,
        backoff_sleep_mock,
    ):
        """Test database exceptions where the task should wait and try again."""

        obs = CellObservationFactory.build(radio=Radio.lte)
        shard = CellShard.shard_model(obs.cellid)
        cell = CellShardFactory.build(
            radio=obs.radio,
            mcc=obs.mcc,
            mnc=obs.mnc,
            lac=obs.lac,
            cid=obs.cid,
            samples=10,
            created=datetime(2019, 12, 5, tzinfo=UTC),
        )
        session.add(cell)
        session.commit()
        session.begin_nested()  # Protect test cell from task rollback

        error = errclass(errno, errmsg)
        wrapped = InterfaceError.instance(
            statement="SELECT COUNT(*) FROM cell_area",
            params={},
            orig=error,
            dbapi_base_err=MySQLError,
        )
        with mock.patch.object(CellUpdater,
                               "add_area_update",
                               side_effect=[wrapped, None]):
            self._queue_and_update(celery, [obs], update_cell)
            assert CellUpdater.add_area_update.call_count == 2
            backoff_sleep_mock.assert_called_once()

        cells = session.query(shard).all()
        assert len(cells) == 1
        self.check_statcounter(redis, StatKey.cell, 1)

        # The existing cell record was updated
        cell = cells[0]
        assert cell.samples == 11
        assert cell.created == datetime(2019, 12, 5, tzinfo=UTC)
        self.check_statcounter(redis, StatKey.unique_cell, 0)

        # Assert generated metrics are correct
        metricsmock.assert_incr_once("data.observation.insert",
                                     value=1,
                                     tags=["type:cell"])
        metricsmock.assert_incr_once("data.station.confirm",
                                     value=1,
                                     tags=["type:cell"])
        metricsmock.assert_timing_once("task", tags=["task:data.update_cell"])
        metricsmock.assert_incr_once("data.station.dberror",
                                     tags=["type:cell",
                                           "errno:%s" % errno])
Esempio n. 53
0
def write_stations_to_csv(session, path, start_time=None, end_time=None):
    where = 'radio != 1 AND lat IS NOT NULL AND lon IS NOT NULL'
    if None not in (start_time, end_time):
        where = where + ' AND modified >= "%s" AND modified < "%s"'
        fmt = '%Y-%m-%d %H:%M:%S'
        where = where % (start_time.strftime(fmt), end_time.strftime(fmt))

    header_row = [
        'radio', 'mcc', 'net', 'area', 'cell', 'unit',
        'lon', 'lat', 'range', 'samples', 'changeable',
        'created', 'updated', 'averageSignal',
    ]
    header_row = ','.join(header_row) + '\n'

    tables = [shard.__tablename__ for shard in CellShard.shards().values()]
    stmt = '''SELECT
    CONCAT_WS(",",
        CASE radio
            WHEN 0 THEN "GSM"
            WHEN 2 THEN "UMTS"
            WHEN 3 THEN "LTE"
            ELSE ""
        END,
        `mcc`,
        `mnc`,
        `lac`,
        `cid`,
        COALESCE(`psc`, ""),
        ROUND(`lon`, 7),
        ROUND(`lat`, 7),
        COALESCE(`radius`, "0"),
        COALESCE(`samples`, "0"),
        "1",
        COALESCE(UNIX_TIMESTAMP(`created`), ""),
        COALESCE(UNIX_TIMESTAMP(`modified`), ""),
        ""
    ) AS `cell_value`
FROM %s
WHERE %s
ORDER BY `cellid`
LIMIT :l
OFFSET :o
'''

    with util.gzip_open(path, 'w', compresslevel=5) as gzip_wrapper:
        with gzip_wrapper as gzip_file:
            gzip_file.write(header_row)
            for table in tables:
                table_stmt = text(stmt % (table, where))
                offset = 0
                limit = 25000
                while True:
                    rows = session.execute(
                        table_stmt.bindparams(o=offset, l=limit)).fetchall()
                    if rows:
                        buf = '\r\n'.join([row.cell_value for row in rows])
                        if buf:
                            buf += '\r\n'
                        gzip_file.write(buf)
                        offset += limit
                    else:
                        break
Esempio n. 54
0
def write_stations_to_csv(session, path, start_time=None, end_time=None):
    where = 'radio != 1 AND lat IS NOT NULL AND lon IS NOT NULL'
    if None not in (start_time, end_time):
        where = where + ' AND modified >= "%s" AND modified < "%s"'
        fmt = '%Y-%m-%d %H:%M:%S'
        where = where % (start_time.strftime(fmt), end_time.strftime(fmt))

    header_row = [
        'radio',
        'mcc',
        'net',
        'area',
        'cell',
        'unit',
        'lon',
        'lat',
        'range',
        'samples',
        'changeable',
        'created',
        'updated',
        'averageSignal',
    ]
    header_row = ','.join(header_row) + '\n'

    tables = [shard.__tablename__ for shard in CellShard.shards().values()]
    stmt = '''SELECT
    CONCAT_WS(",",
        CASE radio
            WHEN 0 THEN "GSM"
            WHEN 2 THEN "UMTS"
            WHEN 3 THEN "LTE"
            ELSE ""
        END,
        `mcc`,
        `mnc`,
        `lac`,
        `cid`,
        COALESCE(`psc`, ""),
        ROUND(`lon`, 7),
        ROUND(`lat`, 7),
        COALESCE(`radius`, "0"),
        COALESCE(`samples`, "0"),
        "1",
        COALESCE(UNIX_TIMESTAMP(`created`), ""),
        COALESCE(UNIX_TIMESTAMP(`modified`), ""),
        ""
    ) AS `cell_value`
FROM %s
WHERE %s
ORDER BY `radio`, `mcc`, `mnc`, `lac`, `cid`
LIMIT :l
OFFSET :o
'''

    limit = 10000
    offset = 0
    with util.gzip_open(path, 'w', compresslevel=5) as gzip_wrapper:
        with gzip_wrapper as gzip_file:
            gzip_file.write(header_row)
            for table in tables:
                table_stmt = text(stmt % (table, where))
                while True:
                    rows = session.execute(
                        table_stmt.bindparams(o=offset, l=limit)).fetchall()
                    if rows:
                        buf = '\r\n'.join([row.cell_value for row in rows])
                        if buf:
                            buf += '\r\n'
                        gzip_file.write(buf)
                        offset += limit
                    else:
                        break