Esempio n. 1
0
    def test_outdated_station(self, session, redis_client, cellarea_queue):
        """An older statuon record does not update existing station records."""
        station_data = {
            "radio": Radio.wcdma,
            "mcc": 202,
            "mnc": 1,
            "lac": 2120,
            "cid": 12842,
            "lat": 38.85,
            "lon": 23.41,
            "radius": 1,
            "samples": 1,
            "created": datetime(2019, 1, 1, tzinfo=UTC),
            "modified": datetime(2019, 10, 7, tzinfo=UTC),
        }
        station = CellShard.create(_raise_invalid=True, **station_data)
        session.add(station)
        session.flush()

        csv = StringIO("""\
radio,mcc,net,area,cell,unit,lon,lat,range,samples,changeable,created,updated,averageSignal
UMTS,202,1,2120,12842,,23.4123167,38.8574351,0,6,1,1568220564,1570120316,
""")
        read_stations_from_csv(session, csv, redis_client, cellarea_queue)

        # The existing station is unmodified
        wcdma = session.query(CellShard.shard_model(Radio.wcdma)).one()
        assert wcdma.lat == 38.85
        assert wcdma.lon == 23.41
        assert wcdma.created == datetime(2019, 1, 1, tzinfo=UTC)
        assert wcdma.modified == datetime(2019, 10, 7, tzinfo=UTC)

        # No CellAreas or RegionStats are generated
        assert session.query(func.count(CellArea.areaid)).scalar() == 0
        assert session.query(func.count(RegionStat.region)).scalar() == 0
Esempio n. 2
0
    def test_modified_station(self, session, redis_client, cellarea_queue):
        """A modified station updates existing records."""
        station_data = {
            "radio": Radio.umts,
            "mcc": 202,
            "mnc": 1,
            "lac": 2120,
            "cid": 12842,
            "lat": 38.85,
            "lon": 23.41,
            "min_lat": 38.7,
            "max_lat": 38.9,
            "min_lon": 23.4,
            "max_lon": 23.5,
            "radius": 1,
            "samples": 1,
            "created": datetime(2019, 1, 1, tzinfo=UTC),
            "modified": datetime(2019, 1, 1, tzinfo=UTC),
        }
        station = CellShard.create(_raise_invalid=True, **station_data)
        session.add(station)
        session.flush()

        csv = StringIO(
            """\
radio,mcc,net,area,cell,unit,lon,lat,range,samples,changeable,created,updated,averageSignal
UMTS,202,1,2120,12842,,23.4123167,38.8574351,0,6,1,1568220564,1570120316,
"""
        )
        read_stations_from_csv(session, csv, redis_client, cellarea_queue)

        # Check the details of the UMTS station
        umts = session.query(CellShard.shard_model(Radio.umts)).one()
        # New position, other details from import
        assert umts.lat == 38.8574351
        assert umts.lon == 23.4123167
        assert umts.radius == 0
        assert umts.samples == 6
        assert umts.created == datetime(2019, 9, 11, 16, 49, 24, tzinfo=UTC)
        assert umts.modified == datetime(2019, 10, 3, 16, 31, 56, tzinfo=UTC)
        # Other details unchanged
        assert umts.max_lat == station_data["max_lat"]
        assert umts.min_lat == station_data["min_lat"]
        assert umts.max_lon == station_data["max_lon"]
        assert umts.min_lon == station_data["min_lon"]
        assert umts.region == "GR"

        # A Modified station triggers the creation of a new CellArea
        cell_area = session.query(CellArea).order_by(CellArea.areaid).one()
        assert cell_area.areaid == (Radio.wcdma, 202, 1, 2120)

        # The new CellAreas triggers the creation of a RegionStat
        stat = session.query(RegionStat).order_by("region").one()
        assert stat.region == "GR"
        assert stat.wcdma == 1
Esempio n. 3
0
def read_stations_from_csv(session, file_handle, redis_client, cellarea_queue):
    """
    Read stations from a public cell export CSV.

    :arg session: a database session
    :arg file_handle: an open file handle for the CSV data
    :arg redis_client: a Redis client
    :arg cellarea_queue: the DataQueue for updating cellarea IDs
    """
    # Avoid circular imports
    from ichnaea.data.tasks import update_cellarea, update_statregion

    csv_content = peekable(reader(file_handle))
    # UMTS was the original name for WCDMA stations
    radio_type = {"UMTS": "wcdma", "GSM": "gsm", "LTE": "lte", "": "Unknown"}

    counts = defaultdict(Counter)
    areas = set()
    areas_total = 0
    total = 0

    if not csv_content:
        LOGGER.warning("Nothing to process.")
        return

    first_row = csv_content.peek()
    if first_row == _FIELD_NAMES:
        # Skip the first row because it's a header row
        next(csv_content)
    else:
        LOGGER.warning("Expected header row, got data: %s", first_row)

    for row in csv_content:
        try:
            radio = radio_type[row[0]]
        except KeyError:
            raise InvalidCSV("Unknown radio type in row: %s" % row)

        if radio == "Unknown":
            LOGGER.warning("Skipping unknown radio: %s", row)
            continue

        try:
            data = {
                "radio": radio,
                "mcc": int(row[1]),
                "mnc": int(row[2]),
                "lac": int(row[3]),
                "cid": int(row[4]),
                "psc": int(row[5]) if row[5] else 0,
                "lon": float(row[6]),
                "lat": float(row[7]),
                # Some exported radiuses exceed the max and fail validation
                "radius": min(int(row[8]), CELL_MAX_RADIUS),
                "samples": int(row[9]),
                # row[10] is "changable", always 1 and not imported
                "created": datetime.fromtimestamp(int(row[11]), UTC),
                "modified": datetime.fromtimestamp(int(row[12]), UTC),
            }
            shard = CellShard.create(_raise_invalid=True, **data)
        except (colander.Invalid, ValueError) as e:
            if total == 0:
                # If the first row is invalid, it's likely the rest of the
                # file is, too--drop out here.
                raise InvalidCSV("first row %s is invalid: %s" % (row, e))
            else:
                LOGGER.warning("row %s is invalid: %s", row, e)
                continue

        # Is this station in the database?
        shard_type = shard.__class__
        existing = (session.query(shard_type).filter(
            shard_type.cellid == shard.cellid).options(
                load_only("modified")).one_or_none())

        if existing:
            if existing.modified < data["modified"]:
                # Update existing station with new data
                operation = "updated"
                existing.psc = shard.psc
                existing.lon = shard.lon
                existing.lat = shard.lat
                existing.radius = shard.radius
                existing.samples = shard.samples
                existing.created = shard.created
                existing.modified = shard.modified
            else:
                # Do nothing to existing station record
                operation = "found"
        else:
            # Add a new station record
            operation = "new"
            shard.min_lat = shard.lat
            shard.max_lat = shard.lat
            shard.min_lon = shard.lon
            shard.max_lon = shard.lon
            session.add(shard)

        counts[data["radio"]][operation] += 1

        # Process the cell area?
        if operation in {"new", "updated"}:
            areas.add(area_id(shard))

        # Process a chunk of stations, report on progress
        total += 1
        if total % 1000 == 0:
            session.commit()
            LOGGER.info("Processed %d stations", total)

        if areas and (len(areas) % 1000 == 0):
            session.commit()
            areas_total += len(areas)
            LOGGER.info("Processed %d station areas", areas_total)
            with redis_pipeline(redis_client) as pipe:
                cellarea_queue.enqueue(list(areas), pipe=pipe)
            update_cellarea.delay()
            areas = set()

    # Commit remaining station data
    session.commit()

    # Update the remaining cell areas
    if areas:
        areas_total += len(areas)
        with redis_pipeline(redis_client) as pipe:
            cellarea_queue.enqueue(list(areas), pipe=pipe)
        update_cellarea.delay()

    # Now that we've updated all the cell areas, we need to update the
    # statregion
    update_statregion.delay()

    # Summarize results
    LOGGER.info("Complete, processed %d station%s:", total,
                "" if total == 1 else "s")
    for radio_type, op_counts in sorted(counts.items()):
        LOGGER.info(
            "  %s: %d new, %d updated, %d already loaded",
            radio_type,
            op_counts["new"],
            op_counts["updated"],
            op_counts["found"],
        )
    if areas_total:
        LOGGER.info("  %d station area%s updated", areas_total,
                    "" if areas_total == 1 else "s")