Example #1
0
    def create_empty_insights():

        from airqo_etl_utils.airqo_api import AirQoApi

        from airqo_etl_utils.commons import fill_nan
        import random
        import pandas as pd
        from airqo_etl_utils.date import (
            date_to_str_days,
            date_to_str_hours,
        )

        airqo_api = AirQoApi()
        sites = airqo_api.get_sites(tenant="airqo")
        insights = []

        dates = pd.date_range(start_date_time, end_date_time, freq="1H")
        for date in dates:
            date_time = date_to_str_hours(date)
            for site in sites:
                try:
                    hourly_insight = {
                        "time": date_time,
                        "pm2_5": random.uniform(50.0, 150.0),
                        "pm10": random.uniform(50.0, 150.0),
                        "empty": True,
                        "frequency": "HOURLY",
                        "forecast": False,
                        "siteId": site["_id"],
                    }
                    insights.append(hourly_insight)
                except Exception as ex:
                    print(ex)

        dates = pd.date_range(start_date_time, end_date_time, freq="24H")
        for date in dates:
            date_time = date_to_str_days(date)
            for site in sites:
                try:
                    daily_insight = {
                        "time": date_time,
                        "pm2_5": random.uniform(50.0, 150.0),
                        "pm10": random.uniform(50.0, 150.0),
                        "empty": True,
                        "frequency": "DAILY",
                        "forecast": False,
                        "siteId": site["_id"],
                    }
                    insights.append(daily_insight)
                except Exception as ex:
                    print(ex)

        return dict({"data": fill_nan(data=insights)})
Example #2
0
def get_weather_data_from_tahmo(start_time=None,
                                end_time=None,
                                tenant="airqo"):
    airqo_api = AirQoApi()
    airqo_sites = airqo_api.get_sites(tenant=tenant)
    station_codes = []
    for site in airqo_sites:
        try:
            if "nearest_tahmo_station" in dict(site).keys():
                station_codes.append(site["nearest_tahmo_station"]["code"])
        except Exception as ex:
            print(ex)

    measurements = []
    tahmo_api = TahmoApi()

    frequency = get_frequency(start_time=start_time, end_time=end_time)
    dates = pd.date_range(start_time, end_time, freq=frequency)
    last_date_time = dates.values[len(dates.values) - 1]

    for date in dates:

        start = date_to_str(date)
        end_date_time = date + timedelta(hours=dates.freq.n)

        if np.datetime64(end_date_time) > last_date_time:
            end = end_time
        else:
            end = date_to_str(end_date_time)

        print(start + " : " + end)

        range_measurements = tahmo_api.get_measurements(
            start, end, station_codes)
        measurements.extend(range_measurements)

    if len(measurements) != 0:
        measurements_df = pd.DataFrame(data=measurements)
    else:
        measurements_df = pd.DataFrame(
            [], columns=["value", "variable", "station", "time"])
        return measurements_df.to_dict(orient="records")

    clean_measurements_df = remove_invalid_dates(dataframe=measurements_df,
                                                 start_time=start_time,
                                                 end_time=end_time)
    return clean_measurements_df.to_dict(orient="records")
def extract_sites_meta_data(tenant=None) -> list:
    airqo_api = AirQoApi()
    sites = airqo_api.get_sites(tenant=tenant)
    sites_df = pd.DataFrame(sites)
    sites_df = sites_df[[
        "_id",
        "latitude",
        "tenant",
        "longitude",
        "name",
        "bearing_to_kampala_center",
        "landform_90",
        "distance_to_kampala_center",
        "altitude",
        "landform_270",
        "aspect",
        "description",
        "distance_to_nearest_tertiary_road",
        "distance_to_nearest_primary_road",
        "distance_to_nearest_road",
        "distance_to_nearest_residential_road",
        "distance_to_nearest_secondary_road",
        "distance_to_nearest_unclassified_road",
        "country",
        "region",
        "parish",
        "sub_county",
        "county",
        "district",
        "city",
    ]]

    sites_df.rename(
        columns={
            "_id": "site_id",
            "latitude": "site_latitude",
            "longitude": "site_longitude",
            "description": "site_description",
            "altitude": "site_altitude",
            "name": "site_name",
            "distance_to_nearest_tertiary_road":
            "site_distance_to_nearest_tertiary_road",
            "distance_to_nearest_primary_road":
            "site_distance_to_nearest_primary_road",
            "distance_to_nearest_road": "site_distance_to_nearest_road",
            "distance_to_nearest_residential_road":
            "site_distance_to_nearest_residential_road",
            "distance_to_nearest_secondary_road":
            "site_distance_to_nearest_secondary_road",
            "distance_to_nearest_unclassified_road":
            "site_distance_to_nearest_unclassified_road",
            "bearing_to_kampala_center": "site_bearing_to_kampala_center",
            "landform_90": "site_landform_90",
            "distance_to_kampala_center": "site_distance_to_kampala_center",
            "landform_270": "site_landform_270",
            "aspect": "site_aspect",
        },
        inplace=True,
    )
    sites_df.reset_index(drop=True, inplace=True)
    return sites_df.to_dict(orient="records")
Example #4
0
def resample_weather_data(data: list, frequency: str):
    weather_raw_data = pd.DataFrame(data)
    if weather_raw_data.empty:
        return weather_raw_data.to_dict(orient="records")

    airqo_api = AirQoApi()
    sites = airqo_api.get_sites(tenant="airqo")
    valid_sites = list(
        filter(lambda x: "nearest_tahmo_station" in dict(x).keys(), sites))

    # to include site id
    # devices = get_devices_or_sites(configuration.AIRQO_BASE_URL, tenant='airqo', sites=False)

    temperature = weather_raw_data.loc[
        weather_raw_data["variable"] == "te",
        ["value", "variable", "station", "time"]]
    humidity = weather_raw_data.loc[weather_raw_data["variable"] == "rh",
                                    ["value", "variable", "station", "time"]]
    wind_speed = weather_raw_data.loc[weather_raw_data["variable"] == "ws",
                                      ["value", "variable", "station", "time"]]

    humidity["value"] = pd.to_numeric(humidity["value"], errors="coerce")
    humidity["value"] = humidity["value"].apply(lambda x: x * 100)

    data = pd.concat([temperature, humidity, wind_speed])
    data.reset_index(inplace=True)
    devices_weather_data = []

    data["value"] = pd.to_numeric(data["value"],
                                  errors="coerce",
                                  downcast="float")
    data = data.fillna(0)

    data_station_gps = data.groupby("station")

    for _, station_group in data_station_gps:

        device_weather_data = []
        station = station_group.iloc[0]["station"]

        try:

            # resampling station values
            temperature = resample_data(
                station_group.loc[station_group["variable"] == "te",
                                  ["value", "time"]],
                frequency,
            )
            temperature.columns = ["temperature", "time"]
            humidity = resample_data(
                station_group.loc[station_group["variable"] == "rh",
                                  ["value", "time"]],
                frequency,
            )
            humidity.columns = ["humidity", "time"]
            wind_speed = resample_data(
                station_group.loc[station_group["variable"] == "ws",
                                  ["value", "time"]],
                frequency,
            )
            wind_speed.columns = ["wind_speed", "time"]

            data_frames = [temperature, humidity, wind_speed]

            station_df = reduce(
                lambda left, right: pd.merge(
                    left, right, on=["time"], how="outer"),
                data_frames,
            )
            station_df["frequency"] = frequency

            # mapping device to station
            station_devices = get_device_ids_from_station(station, valid_sites)

            if len(station_devices) == 0:
                continue

            for device_id in station_devices:
                device_station_df = station_df.copy(deep=True)
                device_station_df["device_id"] = device_id
                device_weather_data.extend(
                    device_station_df.to_dict(orient="records"))

        except Exception as ex:
            print(ex)
            traceback.print_exc()
            continue

        # to include site id
        # device_station_data_df = pd.DataFrame(device_weather_data)
        # device_station_data_df['site_id'] = device_station_data_df['device_id'].apply(
        #     lambda x: get_device_site_id(x, devices))
        # devices_weather_data.extend(device_station_data_df.to_dict(orient='records'))

        devices_weather_data.extend(device_weather_data)

    # pd.DataFrame(devices_weather_data).to_csv(path_or_buf='devices_weather.csv', index=False)

    return devices_weather_data