Ejemplo n.º 1
0
def cluster_cells(cells, lookups, min_age=0):
    """
    Cluster cells by area.
    """
    now = util.utcnow()
    today = now.date()

    # Create a dict of cell ids mapped to their age and signal strength.
    obs_data = {}
    for lookup in lookups:
        obs_data[decode_cellid(lookup.cellid)] = (
            max(abs(lookup.age or min_age), 1000),
            lookup.signalStrength or MIN_CELL_SIGNAL[lookup.radioType])

    areas = defaultdict(list)
    for cell in cells:
        areas[area_id(cell)].append(cell)

    clusters = []
    for area_cells in areas.values():
        clusters.append(numpy.array([(
            cell.lat, cell.lon, cell.radius,
            obs_data[cell.cellid][0],
            obs_data[cell.cellid][1],
            station_score(cell, now),
            encode_cellid(*cell.cellid),
            bool(cell.last_seen >= today))
            for cell in area_cells],
            dtype=NETWORK_DTYPE))

    return clusters
Ejemplo n.º 2
0
    def test_region_all_none(self, celery, session):
        """If all cell regions are None, the area region is None."""

        # Sardinia, in Mediterranean, not identified as part of Italy
        cell = self.cell_factory(
            radio=Radio.wcdma,
            mcc=204,
            mnc=4,
            lac=35051,
            cid=1018429,
            lat=40.18,
            lon=9.59,
            radius=10,
            region=None,
        )
        assert cell.region is None
        cell2 = self.cell_factory(
            radio=cell.radio,
            mcc=cell.mcc,
            mnc=cell.mnc,
            lac=cell.lac,
            cid=cell.cid + 1,
            lat=cell.lat + 0.1,
            lon=cell.lon + 0.1,
            radius=10,
            region=None,
        )
        assert cell2.region is None
        session.flush()

        self.area_queue(celery).enqueue([area_id(cell)])
        self.task.delay().get()

        area = session.query(self.area_model).one()
        assert area.region is None
Ejemplo n.º 3
0
def cluster_cells(cells, lookups, min_age=0):
    """
    Cluster cells by area.
    """
    now = util.utcnow()
    today = now.date()

    # Create a dict of cell ids mapped to their age and signal strength.
    obs_data = {}
    for lookup in lookups:
        obs_data[decode_cellid(lookup.cellid)] = (max(
            abs(lookup.age or min_age),
            1000), lookup.signalStrength or MIN_CELL_SIGNAL[lookup.radioType])

    areas = defaultdict(list)
    for cell in cells:
        areas[area_id(cell)].append(cell)

    clusters = []
    for area_cells in areas.values():
        clusters.append(
            numpy.array(
                [(cell.lat, cell.lon, cell.radius, obs_data[cell.cellid][0],
                  obs_data[cell.cellid][1], station_score(cell, now),
                  encode_cellid(*cell.cellid), bool(cell.last_seen >= today))
                 for cell in area_cells],
                dtype=NETWORK_DTYPE))

    return clusters
Ejemplo n.º 4
0
    def test_region_outside_tie(self, celery, session):
        cell = self.cell_factory(
            radio=Radio.gsm,
            mcc=310,
            mnc=1,
            lac=1,
            cid=1,
            lat=18.33,
            lon=-64.9,
            radius=10000,
            region="PR",
        )
        self.cell_factory(
            radio=cell.radio,
            mcc=cell.mcc,
            mnc=cell.mnc,
            lac=cell.lac,
            cid=2,
            lat=18.34,
            lon=-64.9,
            radius=10000,
            region="PR",
        )
        session.flush()

        self.area_queue(celery).enqueue([area_id(cell)])
        self.task.delay().get()

        area = session.query(self.area_model).one()
        assert area.region == "PR"
Ejemplo n.º 5
0
    def test_region(self, celery, session):
        cell = self.cell_factory(
            radio=Radio.gsm,
            mcc=425,
            mnc=1,
            lac=1,
            cid=1,
            lat=32.2,
            lon=35.0,
            radius=10000,
            region="XW",
        )
        self.cell_factory(
            radio=cell.radio,
            mcc=cell.mcc,
            mnc=cell.mnc,
            lac=cell.lac,
            cid=2,
            lat=32.2,
            lon=34.9,
            radius=10000,
            region="IL",
        )
        session.flush()

        self.area_queue(celery).enqueue([area_id(cell)])
        self.task.delay().get()

        area = session.query(self.area_model).one()
        assert area.region == "IL"
Ejemplo n.º 6
0
    def test_region_outside_tie(self, celery, session):
        cell = self.cell_factory(
            radio=Radio.gsm, mcc=310, mnc=1, lac=1, cid=1,
            lat=18.33, lon=-64.9, radius=10000, region='PR')
        self.cell_factory(
            radio=cell.radio, mcc=cell.mcc, mnc=cell.mnc, lac=cell.lac, cid=2,
            lat=18.34, lon=-64.9, radius=10000, region='PR')
        session.flush()

        self.area_queue(celery).enqueue([area_id(cell)])
        self.task.delay().get()

        area = session.query(self.area_model).one()
        assert area.region == 'PR'
Ejemplo n.º 7
0
    def test_region(self, celery, session):
        cell = self.cell_factory(
            radio=Radio.gsm, mcc=425, mnc=1, lac=1, cid=1,
            lat=32.2, lon=35.0, radius=10000, region='XW')
        self.cell_factory(
            radio=cell.radio, mcc=cell.mcc, mnc=cell.mnc, lac=cell.lac, cid=2,
            lat=32.2, lon=34.9, radius=10000, region='IL')
        session.flush()

        self.area_queue(celery).enqueue([area_id(cell)])
        self.task.delay().get()

        area = session.query(self.area_model).one()
        assert area.region == 'IL'
Ejemplo n.º 8
0
    def test_region_null_tied(self, celery, session):
        """If an equal number of cells have region=None, the area is None."""

        # Bornholm, an island in the Baltic sea, not identified as part of Denmark
        cell = self.cell_factory(
            radio=Radio.wcdma,
            mcc=204,
            mnc=175,
            lac=1515,
            cid=13241603,
            lat=55.115,
            lon=14.88,
            radius=10,
            region=None,
        )
        assert cell.region is None

        # Reeuwijk, Netherlands
        self.cell_factory(
            radio=Radio.wcdma,
            mcc=cell.mcc,
            mnc=cell.mnc,
            lac=cell.lac,
            cid=cell.cid + 2,
            lat=52.056,
            lon=4.733,
            radius=10,
            region="NL",
        )
        session.flush()

        self.area_queue(celery).enqueue([area_id(cell)])
        self.task.delay().get()

        area = session.query(self.area_model).one()
        assert area.region is None
Ejemplo n.º 9
0
def read_stations_from_csv(session, file_handle, redis_client, cellarea_queue):
    """
    Read stations from a public cell export CSV.

    :arg session: a database session
    :arg file_handle: an open file handle for the CSV data
    :arg redis_client: a Redis client
    :arg cellarea_queue: the DataQueue for updating cellarea IDs
    """
    # Avoid circular imports
    from ichnaea.data.tasks import update_cellarea, update_statregion

    csv_content = peekable(reader(file_handle))
    # UMTS was the original name for WCDMA stations
    radio_type = {"UMTS": "wcdma", "GSM": "gsm", "LTE": "lte", "": "Unknown"}

    counts = defaultdict(Counter)
    areas = set()
    areas_total = 0
    total = 0

    if not csv_content:
        LOGGER.warning("Nothing to process.")
        return

    first_row = csv_content.peek()
    if first_row == _FIELD_NAMES:
        # Skip the first row because it's a header row
        next(csv_content)
    else:
        LOGGER.warning("Expected header row, got data: %s", first_row)

    for row in csv_content:
        try:
            radio = radio_type[row[0]]
        except KeyError:
            raise InvalidCSV("Unknown radio type in row: %s" % row)

        if radio == "Unknown":
            LOGGER.warning("Skipping unknown radio: %s", row)
            continue

        try:
            data = {
                "radio": radio,
                "mcc": int(row[1]),
                "mnc": int(row[2]),
                "lac": int(row[3]),
                "cid": int(row[4]),
                "psc": int(row[5]) if row[5] else 0,
                "lon": float(row[6]),
                "lat": float(row[7]),
                # Some exported radiuses exceed the max and fail validation
                "radius": min(int(row[8]), CELL_MAX_RADIUS),
                "samples": int(row[9]),
                # row[10] is "changable", always 1 and not imported
                "created": datetime.fromtimestamp(int(row[11]), UTC),
                "modified": datetime.fromtimestamp(int(row[12]), UTC),
            }
            shard = CellShard.create(_raise_invalid=True, **data)
        except (colander.Invalid, ValueError) as e:
            if total == 0:
                # If the first row is invalid, it's likely the rest of the
                # file is, too--drop out here.
                raise InvalidCSV("first row %s is invalid: %s" % (row, e))
            else:
                LOGGER.warning("row %s is invalid: %s", row, e)
                continue

        # Is this station in the database?
        shard_type = shard.__class__
        existing = (session.query(shard_type).filter(
            shard_type.cellid == shard.cellid).options(
                load_only("modified")).one_or_none())

        if existing:
            if existing.modified < data["modified"]:
                # Update existing station with new data
                operation = "updated"
                existing.psc = shard.psc
                existing.lon = shard.lon
                existing.lat = shard.lat
                existing.radius = shard.radius
                existing.samples = shard.samples
                existing.created = shard.created
                existing.modified = shard.modified
            else:
                # Do nothing to existing station record
                operation = "found"
        else:
            # Add a new station record
            operation = "new"
            shard.min_lat = shard.lat
            shard.max_lat = shard.lat
            shard.min_lon = shard.lon
            shard.max_lon = shard.lon
            session.add(shard)

        counts[data["radio"]][operation] += 1

        # Process the cell area?
        if operation in {"new", "updated"}:
            areas.add(area_id(shard))

        # Process a chunk of stations, report on progress
        total += 1
        if total % 1000 == 0:
            session.commit()
            LOGGER.info("Processed %d stations", total)

        if areas and (len(areas) % 1000 == 0):
            session.commit()
            areas_total += len(areas)
            LOGGER.info("Processed %d station areas", areas_total)
            with redis_pipeline(redis_client) as pipe:
                cellarea_queue.enqueue(list(areas), pipe=pipe)
            update_cellarea.delay()
            areas = set()

    # Commit remaining station data
    session.commit()

    # Update the remaining cell areas
    if areas:
        areas_total += len(areas)
        with redis_pipeline(redis_client) as pipe:
            cellarea_queue.enqueue(list(areas), pipe=pipe)
        update_cellarea.delay()

    # Now that we've updated all the cell areas, we need to update the
    # statregion
    update_statregion.delay()

    # Summarize results
    LOGGER.info("Complete, processed %d station%s:", total,
                "" if total == 1 else "s")
    for radio_type, op_counts in sorted(counts.items()):
        LOGGER.info(
            "  %s: %d new, %d updated, %d already loaded",
            radio_type,
            op_counts["new"],
            op_counts["updated"],
            op_counts["found"],
        )
    if areas_total:
        LOGGER.info("  %d station area%s updated", areas_total,
                    "" if areas_total == 1 else "s")