def _relations_sync(moved_sensor_ids: typing.List[int]): logger = get_celery_logger() trie: Trie[Zipcode] = Trie() for zipcode in Zipcode.query.all(): trie.insert(zipcode.geohash, zipcode) new_relations = [] # Delete the old relations before rebuilding them deleted_relations_count = SensorZipcodeRelation.query.filter( SensorZipcodeRelation.sensor_id.in_(moved_sensor_ids)).delete( synchronize_session=False) logger.info("Deleting %s relations", deleted_relations_count) sensors = Sensor.query.filter(Sensor.id.in_(moved_sensor_ids)).all() for sensor in sensors: gh = sensor.geohash latitude = sensor.latitude longitude = sensor.longitude done = False zipcode_ids: typing.Set[int] = set() # TODO: Use Postgres' native geolocation extension. while gh and not done: zipcodes = [ zipcode for zipcode in trie.get(gh) if zipcode.id not in zipcode_ids ] for zipcode_id, distance in sorted( [( z.id, haversine_distance(longitude, latitude, z.longitude, z.latitude), ) for z in zipcodes], key=lambda t: t[1], ): if distance >= 25: done = True break if len(zipcode_ids) >= 25: done = True break zipcode_ids.add(zipcode_id) data = { "zipcode_id": zipcode_id, "sensor_id": sensor.id, "distance": distance, } new_relations.append(SensorZipcodeRelation(**data)) gh = gh[:-1] if new_relations: logger.info("Creating %s relations", len(new_relations)) for objs in chunk_list(new_relations): db.session.bulk_save_objects(objs) db.session.commit()
def _zipcodes_sync( geonames_data: TGeonamesData, cities_map: TCitiesMap, timezones_map: typing.Dict[str, str], ): existing_zipcodes = { zipcode.zipcode: zipcode for zipcode in Zipcode.query.all() } updates = [] new_zipcodes = [] for zipcode, city_name, state_code, latitude, longitude in geonames_data: obj = existing_zipcodes.get(zipcode) timezone = timezones_map.get(zipcode) if (not obj or obj.latitude != latitude or obj.longitude != longitude or timezone != obj.timezone or obj.coordinates is None): gh = geohash.encode(latitude, longitude) data = dict( zipcode=zipcode, city_id=cities_map[state_code][city_name].id, latitude=latitude, longitude=longitude, timezone=timezone, coordinates=f"POINT({longitude} {latitude})", **{f"geohash_bit_{i}": c for i, c in enumerate(gh, start=1)}, ) if obj: data["id"] = obj.id updates.append(data) else: new_zipcodes.append(Zipcode(**data)) if new_zipcodes: logger.info("Creating %s zipcodes", len(new_zipcodes)) for objects in chunk_list(new_zipcodes): db.session.bulk_save_objects(objects) db.session.commit() if updates: logger.info("Updating %s zipcodes", len(updates)) for mappings in chunk_list(updates): db.session.bulk_update_mappings(Zipcode, mappings) db.session.commit()
def _metrics_sync(): updates = [] ts = timestamp() zipcodes_to_sensors = collections.defaultdict(list) for zipcode_id, latest_reading, distance in ( Sensor.query.join(SensorZipcodeRelation).filter( Sensor.updated_at > ts - (30 * 60)).with_entities( SensorZipcodeRelation.zipcode_id, Sensor.latest_reading, SensorZipcodeRelation.distance, ).all()): zipcodes_to_sensors[zipcode_id].append((latest_reading, distance)) for zipcode_id, sensor_tuples in zipcodes_to_sensors.items(): readings: typing.List[float] = [] closest_reading = float("inf") farthest_reading = 0.0 for reading, distance in sorted(sensor_tuples, key=lambda s: s[1]): if (len(readings) < DESIRED_NUM_READINGS or distance < DESIRED_READING_DISTANCE_KM): readings.append(reading) closest_reading = min(distance, closest_reading) farthest_reading = max(distance, farthest_reading) else: break if readings: pm25 = round(sum(readings) / len(readings), ndigits=3) num_sensors = len(readings) min_sensor_distance = round(closest_reading, ndigits=3) max_sensor_distance = round(farthest_reading, ndigits=3) updates.append({ "id": zipcode_id, "pm25": pm25, "pm25_updated_at": ts, "num_sensors": num_sensors, "min_sensor_distance": min_sensor_distance, "max_sensor_distance": max_sensor_distance, }) logger.info("Updating %s zipcodes", len(updates)) for mappings in chunk_list(updates, batch_size=5000): db.session.bulk_update_mappings(Zipcode, mappings) db.session.commit()
def upgrade(): # ### commands auto generated by Alembic - please adjust! ### op.add_column( "zipcodes", sa.Column( "coordinates", Geometry(geometry_type="POINT", from_text="ST_GeomFromEWKT", name="geometry"), nullable=True, ), ) # ### end Alembic commands ### # Now populate coordinates from existing data from airq.lib.util import chunk_list from airq.models.zipcodes import Zipcode bind = op.get_bind() session = orm.Session(bind=bind) updates = [] for zipcode in session.query(Zipcode).all(): data = dict( id=zipcode.id, coordinates=f"POINT({zipcode.longitude} {zipcode.latitude})", ) updates.append(data) print(f"Setting coordinates for {len(updates)} zipcodes") num_processed = 0 for mappings in chunk_list(updates): session.bulk_update_mappings(Zipcode, mappings) session.commit() num_processed += len(mappings) print(f"Processed {num_processed} zipcodes")
def _metrics_sync(): logger = get_celery_logger() updates = [] ts = now() zipcodes_to_sensors = collections.defaultdict(list) for zipcode_id, latest_reading, humidity, pm_cf_1, sensor_id, distance in ( Sensor.query.join(SensorZipcodeRelation).filter( Sensor.updated_at > ts.timestamp() - (30 * 60)).with_entities( SensorZipcodeRelation.zipcode_id, Sensor.latest_reading, Sensor.humidity, Sensor.pm_cf_1, Sensor.id, SensorZipcodeRelation.distance, ).all()): zipcodes_to_sensors[zipcode_id].append( (latest_reading, humidity, pm_cf_1, sensor_id, distance)) for zipcode_id, sensor_tuples in zipcodes_to_sensors.items(): pm_25_readings: typing.List[float] = [] pm_cf_1_readings: typing.List[float] = [] humidities: typing.List[float] = [] closest_reading = float("inf") farthest_reading = 0.0 sensor_ids: typing.List[int] = [] for pm_25, humidity, pm_cf_1, sensor_id, distance in sorted( sensor_tuples, key=lambda s: s[-1]): if (len(pm_25_readings) < DESIRED_NUM_READINGS or distance < DESIRED_READING_DISTANCE_KM): pm_25_readings.append(pm_25) humidities.append(humidity) pm_cf_1_readings.append(pm_cf_1) sensor_ids.append(sensor_id) closest_reading = min(distance, closest_reading) farthest_reading = max(distance, farthest_reading) else: break if pm_25_readings: num_sensors = len(pm_25_readings) pm25 = round(sum(pm_25_readings) / num_sensors, ndigits=3) humidity = round(sum(humidities) / num_sensors, ndigits=3) pm_cf_1 = round(sum(pm_cf_1_readings) / num_sensors, ndigits=3) min_sensor_distance = round(closest_reading, ndigits=3) max_sensor_distance = round(farthest_reading, ndigits=3) details = { "num_sensors": num_sensors, "min_sensor_distance": min_sensor_distance, "max_sensor_distance": max_sensor_distance, "sensor_ids": sensor_ids, } updates.append({ "id": zipcode_id, "pm25": pm25, "humidity": humidity, "pm_cf_1": pm_cf_1, "pm25_updated_at": ts.timestamp(), "metrics_data": details, }) logger.info("Updating %s zipcodes", len(updates)) for mappings in chunk_list(updates, batch_size=5000): db.session.bulk_update_mappings(Zipcode, mappings) db.session.commit()