예제 #1
0
def get_frequency(start_time: str, end_time: str) -> str:
    diff_days = round(
        (str_to_date(end_time) - str_to_date(start_time)).total_seconds() /
        86400)

    if diff_days >= 5:
        frequency = "96H"
    elif diff_days <= 1:
        diff_hours = round(
            (str_to_date(end_time) - str_to_date(start_time)).seconds / 3600)
        frequency = "1H" if diff_hours <= 0 else f"{diff_hours}H"
    else:
        frequency = f"{round(diff_days * 24)}H"

    return frequency
예제 #2
0
def get_airqo_data(freq: str,
                   start_time: str = None,
                   end_time: str = None) -> list:
    airqo_api = AirQoApi()
    devices = airqo_api.get_devices(tenant="airqo", all_devices=False)
    measurements = []

    start = (str_to_date(start_time) if start_time else datetime.utcnow() -
             timedelta(days=7))
    end = str_to_date(end_time) if end_time else datetime.utcnow()

    start_time = (date_to_str_days(start)
                  if freq == "daily" else date_to_str_hours(start))
    end_time = date_to_str_days(end) if freq == "daily" else date_to_str_hours(
        end)

    frequency = get_airqo_api_frequency(freq=freq)
    dates = pd.date_range(start_time, end_time, freq=frequency)
    last_date_time = dates.values[len(dates.values) - 1]

    for device in devices:

        for date in dates:

            start = date_to_str(date)
            end_date_time = date + timedelta(hours=dates.freq.n)

            if np.datetime64(end_date_time) > last_date_time:
                end = end_time
            else:
                end = date_to_str(end_date_time)

            try:
                events = airqo_api.get_events(
                    tenant="airqo",
                    start_time=start,
                    frequency=freq,
                    end_time=end,
                    device=device["name"],
                )
                measurements.extend(events)

            except Exception as ex:
                print(ex)
                traceback.print_exc()

    insights = format_measurements_to_insights(data=measurements)
    return insights
예제 #3
0
def map_site_ids_to_historical_measurements(data: list, deployment_logs: list) -> list:

    if not deployment_logs or not data:
        return data

    airqo_api = AirQoApi()
    devices = airqo_api.get_devices(tenant="airqo")

    mapped_data = []

    devices_logs_df = pd.DataFrame(deployment_logs)
    devices_logs_df["start_time"] = devices_logs_df["start_time"].apply(
        lambda x: str_to_date(x)
    )
    devices_logs_df["end_time"] = devices_logs_df["end_time"].apply(
        lambda x: str_to_date(x)
    )

    data = un_fill_nan(data)
    data_df = pd.DataFrame(data)

    for _, data_row in data_df.iterrows():
        device = get_device(devices, device_id=data_row["device_id"])

        if not device:
            continue

        site_id = device.get("site").get("_id")
        time = str_to_date(data_row["time"])
        device_logs = devices_logs_df[devices_logs_df["device_id"] == device.get("_id")]

        if not device_logs.empty:
            for _, log in device_logs.iterrows():
                if log["start_time"] <= time <= log["end_time"]:
                    site_id = log["site_id"]

        data_row["site_id"] = site_id

        mapped_data.append(data_row.to_dict())

    return mapped_data
예제 #4
0
def measurement_time_to_string(time: str, daily=False):
    date_time = str_to_date(time)
    if daily:
        return date_to_str_days(date_time)
    else:
        return date_to_str_hours(date_time)
예제 #5
0
def restructure_airqo_data_for_bigquery(data: list) -> list:
    restructured_data = []

    data_df = pd.DataFrame(data)
    columns = list(data_df.columns)

    for _, data_row in data_df.iterrows():
        device_data = dict(
            {
                "timestamp": str_to_date(data_row["time"]),
                "tenant": "airqo",
                "site_id": data_row["site_id"],
                "device_number": data_row["device_number"],
                "device": data_row["device"],
                "latitude": get_column_value(
                    column="latitude", columns=columns, series=data_row
                ),
                "longitude": get_column_value(
                    column="longitude", columns=columns, series=data_row
                ),
                "pm2_5": get_column_value(
                    column="pm2_5", columns=columns, series=data_row
                ),
                "s1_pm2_5": get_column_value(
                    column="s1_pm2_5", columns=columns, series=data_row
                ),
                "s2_pm2_5": get_column_value(
                    column="s2_pm2_5", columns=columns, series=data_row
                ),
                "pm2_5_raw_value": get_column_value(
                    column="raw_pm2_5", columns=columns, series=data_row
                ),
                "pm2_5_calibrated_value": get_column_value(
                    column="calibrated_pm2_5",
                    columns=columns,
                    series=data_row,
                ),
                "pm10": get_column_value(
                    column="pm10",
                    columns=columns,
                    series=data_row,
                ),
                "s1_pm10": get_column_value(
                    column="s1_pm10", columns=columns, series=data_row
                ),
                "s2_pm10": get_column_value(
                    column="s2_pm10", columns=columns, series=data_row
                ),
                "pm10_raw_value": get_column_value(
                    column="raw_pm10",
                    columns=columns,
                    series=data_row,
                ),
                "pm10_calibrated_value": get_column_value(
                    column="calibrated_pm10",
                    columns=columns,
                    series=data_row,
                ),
                "altitude": get_column_value(
                    column="altitude", columns=columns, series=data_row
                ),
                "wind_speed": get_column_value(
                    column="wind_speed", columns=columns, series=data_row
                ),
                "external_temperature": get_column_value(
                    column="temperature", columns=columns, series=data_row
                ),
                "external_humidity": get_column_value(
                    column="humidity", columns=columns, series=data_row
                ),
            }
        )

        restructured_data.append(device_data)

    return pd.DataFrame(
        columns=BigQueryApi().hourly_measurements_columns, data=restructured_data
    ).to_dict(orient="records")
예제 #6
0
def transform_kcca_hourly_data_for_bigquery(data: list) -> list:
    restructured_data = []

    data_df = pd.DataFrame(data)
    columns = list(data_df.columns)

    airqo_api = AirQoApi()
    devices = airqo_api.get_devices(tenant="kcca")

    for _, data_row in data_df.iterrows():
        device_name = data_row["deviceCode"]
        site_id, _ = get_site_and_device_id(devices, device_name=device_name)
        if not site_id:
            continue

        location = str(data_row["location.coordinates"])
        location = location.replace("[", "").replace("]", "")
        location_coordinates = location.split(",")

        device_data = dict({
            "timestamp":
            str_to_date(data_row["time"]),
            "tenant":
            "kcca",
            "site_id":
            site_id,
            "device_number":
            0,
            "device":
            device_name,
            "latitude":
            location_coordinates[1],
            "longitude":
            location_coordinates[0],
            "pm2_5":
            get_column_value(
                column="characteristics.pm2_5ConcMass.value",
                columns=columns,
                series=data_row,
            ),
            "s1_pm2_5":
            get_column_value(column="s1_pm2_5",
                             columns=columns,
                             series=data_row),
            "s2_pm2_5":
            get_column_value(column="s2_pm2_5",
                             columns=columns,
                             series=data_row),
            "pm2_5_raw_value":
            get_column_value(
                column="characteristics.pm2_5ConcMass.raw",
                columns=columns,
                series=data_row,
            ),
            "pm2_5_calibrated_value":
            get_column_value(
                column="characteristics.pm2_5ConcMass.calibratedValue",
                columns=columns,
                series=data_row,
            ),
            "pm10":
            get_column_value(
                column="characteristics.pm10ConcMass.value",
                columns=columns,
                series=data_row,
            ),
            "s1_pm10":
            get_column_value(column="s1_pm10",
                             columns=columns,
                             series=data_row),
            "s2_pm10":
            get_column_value(column="s2_pm10",
                             columns=columns,
                             series=data_row),
            "pm10_raw_value":
            get_column_value(
                column="characteristics.pm10ConcMass.raw",
                columns=columns,
                series=data_row,
            ),
            "pm10_calibrated_value":
            get_column_value(
                column="characteristics.pm10ConcMass.calibratedValue",
                columns=columns,
                series=data_row,
            ),
            "no2":
            get_column_value(
                column="characteristics.no2Conc.value",
                columns=columns,
                series=data_row,
            ),
            "no2_raw_value":
            get_column_value(
                column="characteristics.no2Conc.raw",
                columns=columns,
                series=data_row,
            ),
            "no2_calibrated_value":
            get_column_value(
                column="characteristics.no2Conc.calibratedValue",
                columns=columns,
                series=data_row,
            ),
            "pm1":
            get_column_value(
                column="characteristics.pm1ConcMass.value",
                columns=columns,
                series=data_row,
            ),
            "pm1_raw_value":
            get_column_value(
                column="characteristics.pm1ConcMass.raw",
                columns=columns,
                series=data_row,
            ),
            "pm1_calibrated_value":
            get_column_value(
                column="characteristics.pm1ConcMass.calibratedValue",
                columns=columns,
                series=data_row,
            ),
            "altitude":
            get_column_value(
                column="characteristics.altitude.value",
                columns=columns,
                series=data_row,
            ),
            "wind_speed":
            get_column_value(
                column="characteristics.windSpeed.value",
                columns=columns,
                series=data_row,
            ),
            "external_temperature":
            get_column_value(
                column="characteristics.temperature.value",
                columns=columns,
                series=data_row,
            ),
            "external_humidity":
            get_column_value(
                column="characteristics.relHumid.value",
                columns=columns,
                series=data_row,
            ),
        })

        restructured_data.append(device_data)

    return pd.DataFrame(columns=BigQueryApi().hourly_measurements_columns,
                        data=restructured_data).to_dict(orient="records")