def test_update(self, celery, session): area = CellAreaFactory(radio=Radio.gsm, num_cells=1) area.region = None BlueShardFactory.create_batch(2, region='CA') BlueShardFactory.create_batch(3, region='GB') CellAreaFactory(radio=Radio.gsm, region='DE', num_cells=1) CellAreaFactory(radio=Radio.gsm, region='DE', num_cells=2) CellAreaFactory(radio=Radio.gsm, region='CA', num_cells=2) CellAreaFactory(radio=Radio.wcdma, region='DE', num_cells=3) CellAreaFactory(radio=Radio.lte, region='CA', num_cells=4) WifiShardFactory.create_batch(5, region='DE') WifiShardFactory.create_batch(6, region='US') wifi = WifiShardFactory() wifi.region = None session.add(RegionStat(region='US', blue=1, wifi=2)) session.add(RegionStat(region='TW', wifi=1)) session.flush() update_statregion.delay().get() stats = session.query(RegionStat).all() assert len(stats) == 4 for stat in stats: values = (stat.gsm, stat.wcdma, stat.lte, stat.blue, stat.wifi) if stat.region == 'DE': assert values == (3, 3, 0, 0, 5) elif stat.region == 'CA': assert values == (2, 0, 4, 2, 0) elif stat.region == 'GB': assert values == (0, 0, 0, 3, 0) elif stat.region == 'US': assert values == (0, 0, 0, 0, 6)
def test_update(self): area = CellAreaFactory(radio=Radio.gsm, num_cells=1) area.region = None CellAreaFactory(radio=Radio.gsm, region='DE', num_cells=1) CellAreaFactory(radio=Radio.gsm, region='DE', num_cells=2) CellAreaFactory(radio=Radio.gsm, region='CA', num_cells=2) CellAreaFactory(radio=Radio.wcdma, region='DE', num_cells=3) CellAreaFactory(radio=Radio.lte, region='CA', num_cells=4) WifiShardFactory.create_batch(5, region='DE') WifiShardFactory.create_batch(6, region='US') wifi = WifiShardFactory() wifi.region = None self.session.add(RegionStat(region='US', wifi=2)) self.session.add(RegionStat(region='TW', wifi=1)) self.session.flush() update_statregion.delay().get() stats = self.session.query(RegionStat).all() self.assertEqual(len(stats), 3) for stat in stats: values = (stat.gsm, stat.wcdma, stat.lte, stat.wifi) if stat.region == 'DE': self.assertEqual(values, (3, 3, 0, 5)) elif stat.region == 'CA': self.assertEqual(values, (2, 0, 4, 0)) elif stat.region == 'US': self.assertEqual(values, (0, 0, 0, 6))
def test_empty(self): update_statregion.delay().get() stats = self.session.query(RegionStat).all() self.assertEqual(stats, [])
def test_empty(self, celery, session): update_statregion.delay().get() stats = session.query(RegionStat).all() assert stats == []
def read_stations_from_csv(session, file_handle, redis_client, cellarea_queue): """ Read stations from a public cell export CSV. :arg session: a database session :arg file_handle: an open file handle for the CSV data :arg redis_client: a Redis client :arg cellarea_queue: the DataQueue for updating cellarea IDs """ # Avoid circular imports from ichnaea.data.tasks import update_cellarea, update_statregion csv_content = peekable(reader(file_handle)) # UMTS was the original name for WCDMA stations radio_type = {"UMTS": "wcdma", "GSM": "gsm", "LTE": "lte", "": "Unknown"} counts = defaultdict(Counter) areas = set() areas_total = 0 total = 0 if not csv_content: LOGGER.warning("Nothing to process.") return first_row = csv_content.peek() if first_row == _FIELD_NAMES: # Skip the first row because it's a header row next(csv_content) else: LOGGER.warning("Expected header row, got data: %s", first_row) for row in csv_content: try: radio = radio_type[row[0]] except KeyError: raise InvalidCSV("Unknown radio type in row: %s" % row) if radio == "Unknown": LOGGER.warning("Skipping unknown radio: %s", row) continue try: data = { "radio": radio, "mcc": int(row[1]), "mnc": int(row[2]), "lac": int(row[3]), "cid": int(row[4]), "psc": int(row[5]) if row[5] else 0, "lon": float(row[6]), "lat": float(row[7]), # Some exported radiuses exceed the max and fail validation "radius": min(int(row[8]), CELL_MAX_RADIUS), "samples": int(row[9]), # row[10] is "changable", always 1 and not imported "created": datetime.fromtimestamp(int(row[11]), UTC), "modified": datetime.fromtimestamp(int(row[12]), UTC), } shard = CellShard.create(_raise_invalid=True, **data) except (colander.Invalid, ValueError) as e: if total == 0: # If the first row is invalid, it's likely the rest of the # file is, too--drop out here. raise InvalidCSV("first row %s is invalid: %s" % (row, e)) else: LOGGER.warning("row %s is invalid: %s", row, e) continue # Is this station in the database? shard_type = shard.__class__ existing = (session.query(shard_type).filter( shard_type.cellid == shard.cellid).options( load_only("modified")).one_or_none()) if existing: if existing.modified < data["modified"]: # Update existing station with new data operation = "updated" existing.psc = shard.psc existing.lon = shard.lon existing.lat = shard.lat existing.radius = shard.radius existing.samples = shard.samples existing.created = shard.created existing.modified = shard.modified else: # Do nothing to existing station record operation = "found" else: # Add a new station record operation = "new" shard.min_lat = shard.lat shard.max_lat = shard.lat shard.min_lon = shard.lon shard.max_lon = shard.lon session.add(shard) counts[data["radio"]][operation] += 1 # Process the cell area? if operation in {"new", "updated"}: areas.add(area_id(shard)) # Process a chunk of stations, report on progress total += 1 if total % 1000 == 0: session.commit() LOGGER.info("Processed %d stations", total) if areas and (len(areas) % 1000 == 0): session.commit() areas_total += len(areas) LOGGER.info("Processed %d station areas", areas_total) with redis_pipeline(redis_client) as pipe: cellarea_queue.enqueue(list(areas), pipe=pipe) update_cellarea.delay() areas = set() # Commit remaining station data session.commit() # Update the remaining cell areas if areas: areas_total += len(areas) with redis_pipeline(redis_client) as pipe: cellarea_queue.enqueue(list(areas), pipe=pipe) update_cellarea.delay() # Now that we've updated all the cell areas, we need to update the # statregion update_statregion.delay() # Summarize results LOGGER.info("Complete, processed %d station%s:", total, "" if total == 1 else "s") for radio_type, op_counts in sorted(counts.items()): LOGGER.info( " %s: %d new, %d updated, %d already loaded", radio_type, op_counts["new"], op_counts["updated"], op_counts["found"], ) if areas_total: LOGGER.info(" %d station area%s updated", areas_total, "" if areas_total == 1 else "s")