def test_update(self, celery, session):
        area = CellAreaFactory(radio=Radio.gsm, num_cells=1)
        area.region = None
        BlueShardFactory.create_batch(2, region='CA')
        BlueShardFactory.create_batch(3, region='GB')
        CellAreaFactory(radio=Radio.gsm, region='DE', num_cells=1)
        CellAreaFactory(radio=Radio.gsm, region='DE', num_cells=2)
        CellAreaFactory(radio=Radio.gsm, region='CA', num_cells=2)
        CellAreaFactory(radio=Radio.wcdma, region='DE', num_cells=3)
        CellAreaFactory(radio=Radio.lte, region='CA', num_cells=4)
        WifiShardFactory.create_batch(5, region='DE')
        WifiShardFactory.create_batch(6, region='US')
        wifi = WifiShardFactory()
        wifi.region = None
        session.add(RegionStat(region='US', blue=1, wifi=2))
        session.add(RegionStat(region='TW', wifi=1))
        session.flush()

        update_statregion.delay().get()
        stats = session.query(RegionStat).all()
        assert len(stats) == 4

        for stat in stats:
            values = (stat.gsm, stat.wcdma, stat.lte, stat.blue, stat.wifi)
            if stat.region == 'DE':
                assert values == (3, 3, 0, 0, 5)
            elif stat.region == 'CA':
                assert values == (2, 0, 4, 2, 0)
            elif stat.region == 'GB':
                assert values == (0, 0, 0, 3, 0)
            elif stat.region == 'US':
                assert values == (0, 0, 0, 0, 6)
Exemple #2
0
    def test_update(self):
        area = CellAreaFactory(radio=Radio.gsm, num_cells=1)
        area.region = None
        CellAreaFactory(radio=Radio.gsm, region='DE', num_cells=1)
        CellAreaFactory(radio=Radio.gsm, region='DE', num_cells=2)
        CellAreaFactory(radio=Radio.gsm, region='CA', num_cells=2)
        CellAreaFactory(radio=Radio.wcdma, region='DE', num_cells=3)
        CellAreaFactory(radio=Radio.lte, region='CA', num_cells=4)
        WifiShardFactory.create_batch(5, region='DE')
        WifiShardFactory.create_batch(6, region='US')
        wifi = WifiShardFactory()
        wifi.region = None
        self.session.add(RegionStat(region='US', wifi=2))
        self.session.add(RegionStat(region='TW', wifi=1))
        self.session.flush()

        update_statregion.delay().get()
        stats = self.session.query(RegionStat).all()
        self.assertEqual(len(stats), 3)

        for stat in stats:
            values = (stat.gsm, stat.wcdma, stat.lte, stat.wifi)
            if stat.region == 'DE':
                self.assertEqual(values, (3, 3, 0, 5))
            elif stat.region == 'CA':
                self.assertEqual(values, (2, 0, 4, 0))
            elif stat.region == 'US':
                self.assertEqual(values, (0, 0, 0, 6))
Exemple #3
0
    def test_update(self):
        area = CellAreaFactory(radio=Radio.gsm, num_cells=1)
        area.region = None
        CellAreaFactory(radio=Radio.gsm, region='DE', num_cells=1)
        CellAreaFactory(radio=Radio.gsm, region='DE', num_cells=2)
        CellAreaFactory(radio=Radio.gsm, region='CA', num_cells=2)
        CellAreaFactory(radio=Radio.wcdma, region='DE', num_cells=3)
        CellAreaFactory(radio=Radio.lte, region='CA', num_cells=4)
        WifiShardFactory.create_batch(5, region='DE')
        WifiShardFactory.create_batch(6, region='US')
        wifi = WifiShardFactory()
        wifi.region = None
        self.session.add(RegionStat(region='US', wifi=2))
        self.session.add(RegionStat(region='TW', wifi=1))
        self.session.flush()

        update_statregion.delay().get()
        stats = self.session.query(RegionStat).all()
        self.assertEqual(len(stats), 3)

        for stat in stats:
            values = (stat.gsm, stat.wcdma, stat.lte, stat.wifi)
            if stat.region == 'DE':
                self.assertEqual(values, (3, 3, 0, 5))
            elif stat.region == 'CA':
                self.assertEqual(values, (2, 0, 4, 0))
            elif stat.region == 'US':
                self.assertEqual(values, (0, 0, 0, 6))
Exemple #4
0
    def test_update(self, celery, session):
        area = CellAreaFactory(radio=Radio.gsm, num_cells=1)
        area.region = None
        BlueShardFactory.create_batch(2, region='CA')
        BlueShardFactory.create_batch(3, region='GB')
        CellAreaFactory(radio=Radio.gsm, region='DE', num_cells=1)
        CellAreaFactory(radio=Radio.gsm, region='DE', num_cells=2)
        CellAreaFactory(radio=Radio.gsm, region='CA', num_cells=2)
        CellAreaFactory(radio=Radio.wcdma, region='DE', num_cells=3)
        CellAreaFactory(radio=Radio.lte, region='CA', num_cells=4)
        WifiShardFactory.create_batch(5, region='DE')
        WifiShardFactory.create_batch(6, region='US')
        wifi = WifiShardFactory()
        wifi.region = None
        session.add(RegionStat(region='US', blue=1, wifi=2))
        session.add(RegionStat(region='TW', wifi=1))
        session.flush()

        update_statregion.delay().get()
        stats = session.query(RegionStat).all()
        assert len(stats) == 4

        for stat in stats:
            values = (stat.gsm, stat.wcdma, stat.lte, stat.blue, stat.wifi)
            if stat.region == 'DE':
                assert values == (3, 3, 0, 0, 5)
            elif stat.region == 'CA':
                assert values == (2, 0, 4, 2, 0)
            elif stat.region == 'GB':
                assert values == (0, 0, 0, 3, 0)
            elif stat.region == 'US':
                assert values == (0, 0, 0, 0, 6)
Exemple #5
0
 def test_empty(self):
     update_statregion.delay().get()
     stats = self.session.query(RegionStat).all()
     self.assertEqual(stats, [])
Exemple #6
0
 def test_empty(self):
     update_statregion.delay().get()
     stats = self.session.query(RegionStat).all()
     self.assertEqual(stats, [])
 def test_empty(self, celery, session):
     update_statregion.delay().get()
     stats = session.query(RegionStat).all()
     assert stats == []
Exemple #8
0
def read_stations_from_csv(session, file_handle, redis_client, cellarea_queue):
    """
    Read stations from a public cell export CSV.

    :arg session: a database session
    :arg file_handle: an open file handle for the CSV data
    :arg redis_client: a Redis client
    :arg cellarea_queue: the DataQueue for updating cellarea IDs
    """
    # Avoid circular imports
    from ichnaea.data.tasks import update_cellarea, update_statregion

    csv_content = peekable(reader(file_handle))
    # UMTS was the original name for WCDMA stations
    radio_type = {"UMTS": "wcdma", "GSM": "gsm", "LTE": "lte", "": "Unknown"}

    counts = defaultdict(Counter)
    areas = set()
    areas_total = 0
    total = 0

    if not csv_content:
        LOGGER.warning("Nothing to process.")
        return

    first_row = csv_content.peek()
    if first_row == _FIELD_NAMES:
        # Skip the first row because it's a header row
        next(csv_content)
    else:
        LOGGER.warning("Expected header row, got data: %s", first_row)

    for row in csv_content:
        try:
            radio = radio_type[row[0]]
        except KeyError:
            raise InvalidCSV("Unknown radio type in row: %s" % row)

        if radio == "Unknown":
            LOGGER.warning("Skipping unknown radio: %s", row)
            continue

        try:
            data = {
                "radio": radio,
                "mcc": int(row[1]),
                "mnc": int(row[2]),
                "lac": int(row[3]),
                "cid": int(row[4]),
                "psc": int(row[5]) if row[5] else 0,
                "lon": float(row[6]),
                "lat": float(row[7]),
                # Some exported radiuses exceed the max and fail validation
                "radius": min(int(row[8]), CELL_MAX_RADIUS),
                "samples": int(row[9]),
                # row[10] is "changable", always 1 and not imported
                "created": datetime.fromtimestamp(int(row[11]), UTC),
                "modified": datetime.fromtimestamp(int(row[12]), UTC),
            }
            shard = CellShard.create(_raise_invalid=True, **data)
        except (colander.Invalid, ValueError) as e:
            if total == 0:
                # If the first row is invalid, it's likely the rest of the
                # file is, too--drop out here.
                raise InvalidCSV("first row %s is invalid: %s" % (row, e))
            else:
                LOGGER.warning("row %s is invalid: %s", row, e)
                continue

        # Is this station in the database?
        shard_type = shard.__class__
        existing = (session.query(shard_type).filter(
            shard_type.cellid == shard.cellid).options(
                load_only("modified")).one_or_none())

        if existing:
            if existing.modified < data["modified"]:
                # Update existing station with new data
                operation = "updated"
                existing.psc = shard.psc
                existing.lon = shard.lon
                existing.lat = shard.lat
                existing.radius = shard.radius
                existing.samples = shard.samples
                existing.created = shard.created
                existing.modified = shard.modified
            else:
                # Do nothing to existing station record
                operation = "found"
        else:
            # Add a new station record
            operation = "new"
            shard.min_lat = shard.lat
            shard.max_lat = shard.lat
            shard.min_lon = shard.lon
            shard.max_lon = shard.lon
            session.add(shard)

        counts[data["radio"]][operation] += 1

        # Process the cell area?
        if operation in {"new", "updated"}:
            areas.add(area_id(shard))

        # Process a chunk of stations, report on progress
        total += 1
        if total % 1000 == 0:
            session.commit()
            LOGGER.info("Processed %d stations", total)

        if areas and (len(areas) % 1000 == 0):
            session.commit()
            areas_total += len(areas)
            LOGGER.info("Processed %d station areas", areas_total)
            with redis_pipeline(redis_client) as pipe:
                cellarea_queue.enqueue(list(areas), pipe=pipe)
            update_cellarea.delay()
            areas = set()

    # Commit remaining station data
    session.commit()

    # Update the remaining cell areas
    if areas:
        areas_total += len(areas)
        with redis_pipeline(redis_client) as pipe:
            cellarea_queue.enqueue(list(areas), pipe=pipe)
        update_cellarea.delay()

    # Now that we've updated all the cell areas, we need to update the
    # statregion
    update_statregion.delay()

    # Summarize results
    LOGGER.info("Complete, processed %d station%s:", total,
                "" if total == 1 else "s")
    for radio_type, op_counts in sorted(counts.items()):
        LOGGER.info(
            "  %s: %d new, %d updated, %d already loaded",
            radio_type,
            op_counts["new"],
            op_counts["updated"],
            op_counts["found"],
        )
    if areas_total:
        LOGGER.info("  %d station area%s updated", areas_total,
                    "" if areas_total == 1 else "s")
Exemple #9
0
 def test_empty(self, celery, session):
     update_statregion.delay().get()
     stats = session.query(RegionStat).all()
     assert stats == []