Beispiel #1
0
def _relations_sync(moved_sensor_ids: typing.List[int]):
    logger = get_celery_logger()

    trie: Trie[Zipcode] = Trie()
    for zipcode in Zipcode.query.all():
        trie.insert(zipcode.geohash, zipcode)

    new_relations = []

    # Delete the old relations before rebuilding them
    deleted_relations_count = SensorZipcodeRelation.query.filter(
        SensorZipcodeRelation.sensor_id.in_(moved_sensor_ids)).delete(
            synchronize_session=False)
    logger.info("Deleting %s relations", deleted_relations_count)

    sensors = Sensor.query.filter(Sensor.id.in_(moved_sensor_ids)).all()
    for sensor in sensors:
        gh = sensor.geohash
        latitude = sensor.latitude
        longitude = sensor.longitude
        done = False
        zipcode_ids: typing.Set[int] = set()
        # TODO: Use Postgres' native geolocation extension.
        while gh and not done:
            zipcodes = [
                zipcode for zipcode in trie.get(gh)
                if zipcode.id not in zipcode_ids
            ]

            for zipcode_id, distance in sorted(
                [(
                    z.id,
                    haversine_distance(longitude, latitude, z.longitude,
                                       z.latitude),
                ) for z in zipcodes],
                    key=lambda t: t[1],
            ):
                if distance >= 25:
                    done = True
                    break
                if len(zipcode_ids) >= 25:
                    done = True
                    break
                zipcode_ids.add(zipcode_id)
                data = {
                    "zipcode_id": zipcode_id,
                    "sensor_id": sensor.id,
                    "distance": distance,
                }
                new_relations.append(SensorZipcodeRelation(**data))
            gh = gh[:-1]

    if new_relations:
        logger.info("Creating %s relations", len(new_relations))
        for objs in chunk_list(new_relations):
            db.session.bulk_save_objects(objs)
            db.session.commit()
Beispiel #2
0
def _zipcodes_sync(
    geonames_data: TGeonamesData,
    cities_map: TCitiesMap,
    timezones_map: typing.Dict[str, str],
):
    existing_zipcodes = {
        zipcode.zipcode: zipcode
        for zipcode in Zipcode.query.all()
    }
    updates = []
    new_zipcodes = []
    for zipcode, city_name, state_code, latitude, longitude in geonames_data:
        obj = existing_zipcodes.get(zipcode)
        timezone = timezones_map.get(zipcode)
        if (not obj or obj.latitude != latitude or obj.longitude != longitude
                or timezone != obj.timezone or obj.coordinates is None):
            gh = geohash.encode(latitude, longitude)
            data = dict(
                zipcode=zipcode,
                city_id=cities_map[state_code][city_name].id,
                latitude=latitude,
                longitude=longitude,
                timezone=timezone,
                coordinates=f"POINT({longitude} {latitude})",
                **{f"geohash_bit_{i}": c
                   for i, c in enumerate(gh, start=1)},
            )
            if obj:
                data["id"] = obj.id
                updates.append(data)
            else:
                new_zipcodes.append(Zipcode(**data))

    if new_zipcodes:
        logger.info("Creating %s zipcodes", len(new_zipcodes))
        for objects in chunk_list(new_zipcodes):
            db.session.bulk_save_objects(objects)
            db.session.commit()

    if updates:
        logger.info("Updating %s zipcodes", len(updates))
        for mappings in chunk_list(updates):
            db.session.bulk_update_mappings(Zipcode, mappings)
            db.session.commit()
Beispiel #3
0
def _metrics_sync():
    updates = []
    ts = timestamp()

    zipcodes_to_sensors = collections.defaultdict(list)
    for zipcode_id, latest_reading, distance in (
            Sensor.query.join(SensorZipcodeRelation).filter(
                Sensor.updated_at > ts - (30 * 60)).with_entities(
                    SensorZipcodeRelation.zipcode_id,
                    Sensor.latest_reading,
                    SensorZipcodeRelation.distance,
                ).all()):
        zipcodes_to_sensors[zipcode_id].append((latest_reading, distance))

    for zipcode_id, sensor_tuples in zipcodes_to_sensors.items():
        readings: typing.List[float] = []
        closest_reading = float("inf")
        farthest_reading = 0.0
        for reading, distance in sorted(sensor_tuples, key=lambda s: s[1]):
            if (len(readings) < DESIRED_NUM_READINGS
                    or distance < DESIRED_READING_DISTANCE_KM):
                readings.append(reading)
                closest_reading = min(distance, closest_reading)
                farthest_reading = max(distance, farthest_reading)
            else:
                break

        if readings:
            pm25 = round(sum(readings) / len(readings), ndigits=3)
            num_sensors = len(readings)
            min_sensor_distance = round(closest_reading, ndigits=3)
            max_sensor_distance = round(farthest_reading, ndigits=3)
            updates.append({
                "id": zipcode_id,
                "pm25": pm25,
                "pm25_updated_at": ts,
                "num_sensors": num_sensors,
                "min_sensor_distance": min_sensor_distance,
                "max_sensor_distance": max_sensor_distance,
            })

    logger.info("Updating %s zipcodes", len(updates))
    for mappings in chunk_list(updates, batch_size=5000):
        db.session.bulk_update_mappings(Zipcode, mappings)
        db.session.commit()
def upgrade():
    # ### commands auto generated by Alembic - please adjust! ###
    op.add_column(
        "zipcodes",
        sa.Column(
            "coordinates",
            Geometry(geometry_type="POINT",
                     from_text="ST_GeomFromEWKT",
                     name="geometry"),
            nullable=True,
        ),
    )
    # ### end Alembic commands ###

    # Now populate coordinates from existing data
    from airq.lib.util import chunk_list
    from airq.models.zipcodes import Zipcode

    bind = op.get_bind()
    session = orm.Session(bind=bind)

    updates = []
    for zipcode in session.query(Zipcode).all():
        data = dict(
            id=zipcode.id,
            coordinates=f"POINT({zipcode.longitude} {zipcode.latitude})",
        )
        updates.append(data)

    print(f"Setting coordinates for {len(updates)} zipcodes")
    num_processed = 0
    for mappings in chunk_list(updates):
        session.bulk_update_mappings(Zipcode, mappings)
        session.commit()
        num_processed += len(mappings)
        print(f"Processed {num_processed} zipcodes")
Beispiel #5
0
def _metrics_sync():
    logger = get_celery_logger()
    updates = []
    ts = now()

    zipcodes_to_sensors = collections.defaultdict(list)
    for zipcode_id, latest_reading, humidity, pm_cf_1, sensor_id, distance in (
            Sensor.query.join(SensorZipcodeRelation).filter(
                Sensor.updated_at > ts.timestamp() - (30 * 60)).with_entities(
                    SensorZipcodeRelation.zipcode_id,
                    Sensor.latest_reading,
                    Sensor.humidity,
                    Sensor.pm_cf_1,
                    Sensor.id,
                    SensorZipcodeRelation.distance,
                ).all()):
        zipcodes_to_sensors[zipcode_id].append(
            (latest_reading, humidity, pm_cf_1, sensor_id, distance))

    for zipcode_id, sensor_tuples in zipcodes_to_sensors.items():
        pm_25_readings: typing.List[float] = []
        pm_cf_1_readings: typing.List[float] = []
        humidities: typing.List[float] = []
        closest_reading = float("inf")
        farthest_reading = 0.0
        sensor_ids: typing.List[int] = []
        for pm_25, humidity, pm_cf_1, sensor_id, distance in sorted(
                sensor_tuples, key=lambda s: s[-1]):
            if (len(pm_25_readings) < DESIRED_NUM_READINGS
                    or distance < DESIRED_READING_DISTANCE_KM):
                pm_25_readings.append(pm_25)
                humidities.append(humidity)
                pm_cf_1_readings.append(pm_cf_1)
                sensor_ids.append(sensor_id)
                closest_reading = min(distance, closest_reading)
                farthest_reading = max(distance, farthest_reading)
            else:
                break

        if pm_25_readings:
            num_sensors = len(pm_25_readings)
            pm25 = round(sum(pm_25_readings) / num_sensors, ndigits=3)
            humidity = round(sum(humidities) / num_sensors, ndigits=3)
            pm_cf_1 = round(sum(pm_cf_1_readings) / num_sensors, ndigits=3)
            min_sensor_distance = round(closest_reading, ndigits=3)
            max_sensor_distance = round(farthest_reading, ndigits=3)
            details = {
                "num_sensors": num_sensors,
                "min_sensor_distance": min_sensor_distance,
                "max_sensor_distance": max_sensor_distance,
                "sensor_ids": sensor_ids,
            }
            updates.append({
                "id": zipcode_id,
                "pm25": pm25,
                "humidity": humidity,
                "pm_cf_1": pm_cf_1,
                "pm25_updated_at": ts.timestamp(),
                "metrics_data": details,
            })

    logger.info("Updating %s zipcodes", len(updates))
    for mappings in chunk_list(updates, batch_size=5000):
        db.session.bulk_update_mappings(Zipcode, mappings)
        db.session.commit()