Example #1
0
def extract_kcca_measurements(start_time: str, end_time: str,
                              freq: str) -> list:
    if freq.lower() == "hourly":
        interval = "6H"
    elif freq.lower() == "daily":
        interval = "48H"
    else:
        interval = "1H"

    dates = pd.date_range(start_time, end_time, freq=interval)
    measurements = []
    last_date_time = dates.values[len(dates.values) - 1]

    for date in dates:

        start = date_to_str(date)
        end_date_time = date + timedelta(hours=dates.freq.n)

        if np.datetime64(end_date_time) > last_date_time:
            end = end_time
        else:
            end = date_to_str(end_date_time)

        print(start_time + " : " + end_time)

        range_measurements = query_kcca_measurements(freq, start, end)
        measurements.extend(range_measurements)

    measurements_df = pd.json_normalize(measurements)
    return measurements_df.to_dict(orient="records")
Example #2
0
def insights_forecast():
    from airqo_etl_utils.app_insights_utils import (
        create_insights_data,
        get_forecast_data,
        transform_old_forecast,
    )

    from airqo_etl_utils.date import date_to_str, first_day_of_week, first_day_of_month

    now = datetime.now()
    start_date_time = date_to_str(
        first_day_of_week(first_day_of_month(date_time=now)))
    end_date_time = date_to_str(now)

    old_forecast = transform_old_forecast(start_date_time=start_date_time,
                                          end_date_time=end_date_time)
    pd.DataFrame(old_forecast).to_csv(path_or_buf="old_forecast_data.csv",
                                      index=False)

    forecast_data = get_forecast_data("airqo")
    pd.DataFrame(forecast_data).to_csv(path_or_buf="forecast_data.csv",
                                       index=False)

    insights_data = create_insights_data(data=forecast_data)
    pd.DataFrame(insights_data).to_csv(
        path_or_buf="insights_forecast_data.csv", index=False)
Example #3
0
    def get_hourly_data(
        self, start_date_time: str, end_date_time: str, columns: list, table: str
    ) -> pd.DataFrame:

        try:
            query = f"""
                SELECT {', '.join(map(str, columns))}
                FROM `{table}`
                WHERE timestamp >= '{start_date_time}' and timestamp <= '{end_date_time}'
            """
            dataframe = self.client.query(query=query).result().to_dataframe()
        except Exception as ex:
            print(ex)
            query = f"""
                SELECT {', '.join(map(str, columns))}
                FROM `{table}`
                WHERE time >= '{start_date_time}' and time <= '{end_date_time}'
            """

            dataframe = self.client.query(query=query).result().to_dataframe()

        dataframe["timestamp"] = dataframe["timestamp"].apply(lambda x: date_to_str(x))
        if "time" in list(dataframe.columns):
            dataframe["time"] = dataframe["time"].apply(lambda x: date_to_str(x))

        return dataframe
Example #4
0
def extract_airqo_devices_deployment_history() -> list:
    airqo_api = AirQoApi()
    devices = airqo_api.get_devices(tenant="airqo")
    devices_history = []
    for device in devices:

        try:
            maintenance_logs = airqo_api.get_maintenance_logs(
                tenant="airqo", device=device["name"], activity_type="deployment"
            )

            if not maintenance_logs or len(maintenance_logs) <= 1:
                continue

            log_df = pd.DataFrame(maintenance_logs)
            log_df = log_df.dropna(subset=["date"])

            log_df["site_id"] = (
                log_df["site_id"].fillna(method="bfill").fillna(method="ffill")
            )
            log_df = log_df.dropna(subset=["site_id"])

            log_df["start_time"] = pd.to_datetime(log_df["date"])
            log_df = log_df.sort_values(by="start_time")
            log_df["end_time"] = log_df["start_time"].shift(-1)
            log_df["end_time"] = log_df["end_time"].fillna(datetime.utcnow())

            log_df["start_time"] = log_df["start_time"].apply(lambda x: date_to_str(x))
            log_df["end_time"] = log_df["end_time"].apply(lambda x: date_to_str(x))

            if len(set(log_df["site_id"].tolist())) == 1:
                continue

            for _, raw in log_df.iterrows():
                device_history = {
                    "device": raw["device"],
                    "device_id": device["_id"],
                    "start_time": raw["start_time"],
                    "end_time": raw["end_time"],
                    "site_id": raw["site_id"],
                }

                devices_history.append(device_history)

        except Exception as ex:
            print(ex)
            traceback.print_exc()

    return devices_history
Example #5
0
    def extract():

        from airqo_etl_utils.kcca_utils import extract_kcca_measurements
        from airqo_etl_utils.commons import fill_nan
        from airqo_etl_utils.date import date_to_str
        from datetime import datetime, timedelta

        start_time = date_to_str(datetime.utcnow() - timedelta(hours=1))
        end_time = date_to_str(datetime.utcnow())

        kcca_data = extract_kcca_measurements(start_time=start_time,
                                              end_time=end_time,
                                              freq="raw")

        return dict({"data": fill_nan(data=kcca_data)})
Example #6
0
def get_airqo_data(freq: str,
                   start_time: str = None,
                   end_time: str = None) -> list:
    airqo_api = AirQoApi()
    devices = airqo_api.get_devices(tenant="airqo", all_devices=False)
    measurements = []

    start = (str_to_date(start_time) if start_time else datetime.utcnow() -
             timedelta(days=7))
    end = str_to_date(end_time) if end_time else datetime.utcnow()

    start_time = (date_to_str_days(start)
                  if freq == "daily" else date_to_str_hours(start))
    end_time = date_to_str_days(end) if freq == "daily" else date_to_str_hours(
        end)

    frequency = get_airqo_api_frequency(freq=freq)
    dates = pd.date_range(start_time, end_time, freq=frequency)
    last_date_time = dates.values[len(dates.values) - 1]

    for device in devices:

        for date in dates:

            start = date_to_str(date)
            end_date_time = date + timedelta(hours=dates.freq.n)

            if np.datetime64(end_date_time) > last_date_time:
                end = end_time
            else:
                end = date_to_str(end_date_time)

            try:
                events = airqo_api.get_events(
                    tenant="airqo",
                    start_time=start,
                    frequency=freq,
                    end_time=end,
                    device=device["name"],
                )
                measurements.extend(events)

            except Exception as ex:
                print(ex)
                traceback.print_exc()

    insights = format_measurements_to_insights(data=measurements)
    return insights
Example #7
0
def get_weather_data_from_tahmo(start_time=None,
                                end_time=None,
                                tenant="airqo"):
    airqo_api = AirQoApi()
    airqo_sites = airqo_api.get_sites(tenant=tenant)
    station_codes = []
    for site in airqo_sites:
        try:
            if "nearest_tahmo_station" in dict(site).keys():
                station_codes.append(site["nearest_tahmo_station"]["code"])
        except Exception as ex:
            print(ex)

    measurements = []
    tahmo_api = TahmoApi()

    frequency = get_frequency(start_time=start_time, end_time=end_time)
    dates = pd.date_range(start_time, end_time, freq=frequency)
    last_date_time = dates.values[len(dates.values) - 1]

    for date in dates:

        start = date_to_str(date)
        end_date_time = date + timedelta(hours=dates.freq.n)

        if np.datetime64(end_date_time) > last_date_time:
            end = end_time
        else:
            end = date_to_str(end_date_time)

        print(start + " : " + end)

        range_measurements = tahmo_api.get_measurements(
            start, end, station_codes)
        measurements.extend(range_measurements)

    if len(measurements) != 0:
        measurements_df = pd.DataFrame(data=measurements)
    else:
        measurements_df = pd.DataFrame(
            [], columns=["value", "variable", "station", "time"])
        return measurements_df.to_dict(orient="records")

    clean_measurements_df = remove_invalid_dates(dataframe=measurements_df,
                                                 start_time=start_time,
                                                 end_time=end_time)
    return clean_measurements_df.to_dict(orient="records")
Example #8
0
def resample_data(data: pd.DataFrame, frequency: str) -> pd.DataFrame:
    data = data.dropna(subset=["time"])
    data["time"] = pd.to_datetime(data["time"])
    data = data.sort_index(axis=0)
    if "latitude" in data.columns and "longitude" in data.columns:
        original_df = data[["time", "latitude", "longitude"]]
    else:
        original_df = data[["time"]]

    resample_value = "24H" if frequency.lower() == "daily" else "1H"
    averages = pd.DataFrame(data.resample(resample_value, on="time").mean())

    averages["time"] = averages.index
    averages["time"] = averages["time"].apply(lambda x: date_to_str(x))
    averages = averages.reset_index(drop=True)

    if resample_value == "1H":
        original_df["time"] = original_df["time"].apply(
            lambda x: date_to_str_hours(x))
    elif resample_value == "24H":
        original_df["time"] = original_df["time"].apply(
            lambda x: date_to_str_days(x))
    else:
        original_df["time"] = original_df["time"].apply(
            lambda x: date_to_str(x))

    if "latitude" in original_df.columns and "longitude" in original_df.columns:

        def reset_latitude_or_longitude(time: str, field: str):
            date_row = pd.DataFrame(
                original_df.loc[original_df["time"] == time])
            if date_row.empty:
                return time
            return (date_row.iloc[0]["latitude"]
                    if field == "latitude" else date_row.iloc[0]["longitude"])

        averages["latitude"] = averages.apply(
            lambda row: reset_latitude_or_longitude(row["time"], "latitude"),
            axis=1)
        averages["longitude"] = averages.apply(
            lambda row: reset_latitude_or_longitude(row["time"], "longitude"),
            axis=1)

    return averages
Example #9
0
    def extract_insights_forecast_data():
        from airqo_etl_utils.app_insights_utils import (
            create_insights_data,
            transform_old_forecast,
        )
        from airqo_etl_utils.date import (
            date_to_str,
            first_day_of_week,
            first_day_of_month,
        )
        from airqo_etl_utils.commons import fill_nan

        now = datetime.now()
        start_date_time = date_to_str(
            first_day_of_week(first_day_of_month(date_time=now)))
        end_date_time = date_to_str(now)

        forecast_data = transform_old_forecast(start_date_time=start_date_time,
                                               end_date_time=end_date_time)
        insights_data = create_insights_data(data=forecast_data)

        return dict({"data": fill_nan(data=insights_data)})
Example #10
0
def query_insights_data(freq: str,
                        start_date_time: str,
                        end_date_time: str,
                        forecast=False,
                        all_data=False) -> list:
    airqo_api = AirQoApi()
    insights = []

    frequency = get_frequency(start_time=start_date_time,
                              end_time=end_date_time)
    dates = pd.date_range(start_date_time, end_date_time, freq=frequency)
    last_date_time = dates.values[len(dates.values) - 1]

    for date in dates:

        start = date_to_str(date)
        query_end_date_time = date + timedelta(hours=dates.freq.n)

        if np.datetime64(query_end_date_time) > last_date_time:
            end = end_date_time
        else:
            end = date_to_str(query_end_date_time)

        try:
            api_results = airqo_api.get_app_insights(
                start_time=start,
                frequency=freq,
                end_time=end,
                forecast=forecast,
                all_data=all_data,
            )
            insights.extend(api_results)

        except Exception as ex:
            print(ex)
            traceback.print_exc()

    return insights
Example #11
0
def remove_invalid_dates(dataframe: pd.DataFrame, start_time: str,
                         end_time: str) -> pd.DataFrame:
    start = pd.to_datetime(start_time)
    end = pd.to_datetime(end_time)

    dataframe["time"] = pd.to_datetime(dataframe["time"])
    data_frame = dataframe.set_index(["time"])

    time_data_frame = data_frame.loc[(data_frame.index >= start)
                                     & (data_frame.index <= end)]

    time_data_frame["time"] = time_data_frame.index
    time_data_frame["time"] = time_data_frame["time"].apply(
        lambda x: date_to_str(x))
    time_data_frame = time_data_frame.reset_index(drop=True)

    return time_data_frame
Example #12
0
def extract_airqo_data_from_thingspeak(
    start_time: str, end_time: str, all_devices: bool
) -> list:
    thingspeak_base_url = configuration.THINGSPEAK_CHANNEL_URL

    airqo_api = AirQoApi()
    airqo_devices = airqo_api.get_devices(tenant="airqo", all_devices=all_devices)
    read_keys = airqo_api.get_read_keys(devices=airqo_devices)

    channels_data = []

    frequency = get_frequency(start_time=start_time, end_time=end_time)

    def get_field_8_value(x: str, position: int):

        try:
            values = x.split(",")
            return values[position]
        except Exception as exc:
            print(exc)
            return None

    dates = pd.date_range(start_time, end_time, freq=frequency)
    last_date_time = dates.values[len(dates.values) - 1]
    for device in airqo_devices:
        try:

            channel_id = str(device["device_number"])

            for date in dates:

                start = date_to_str(date)
                end_date_time = date + timedelta(hours=dates.freq.n)

                if np.datetime64(end_date_time) > last_date_time:
                    end = end_time
                else:
                    end = date_to_str(end_date_time)

                read_key = read_keys[str(channel_id)]

                channel_url = f"{thingspeak_base_url}{channel_id}/feeds.json?start={start}&end={end}&api_key={read_key}"
                print(f"{channel_url}")

                data = json.loads(
                    requests.get(channel_url, timeout=100.0).content.decode("utf-8")
                )
                if (data != -1) and ("feeds" in data):
                    dataframe = pd.DataFrame(data["feeds"])

                    if dataframe.empty:
                        print(
                            f"{channel_id} does not have data between {start} and {end}"
                        )
                        continue

                    channel_df = pd.DataFrame(
                        data=[],
                        columns=[
                            "time",
                            "s1_pm2_5",
                            "s2_pm2_5",
                            "s1_pm10",
                            "device_id",
                            "site_id",
                            "s2_pm10",
                            "latitude",
                            "longitude",
                            "altitude",
                            "wind_speed",
                            "satellites",
                            "hdop",
                            "internalTemperature",
                            "internalHumidity",
                            "battery",
                            "temperature",
                            "humidity",
                            "pressure",
                            "externalAltitude",
                        ],
                    )

                    channel_df["s1_pm2_5"] = dataframe["field1"].apply(
                        lambda x: get_valid_value(x, "pm2_5")
                    )
                    channel_df["s1_pm10"] = dataframe["field2"].apply(
                        lambda x: get_valid_value(x, "pm10")
                    )
                    channel_df["s2_pm2_5"] = dataframe["field3"].apply(
                        lambda x: get_valid_value(x, "pm2_5")
                    )
                    channel_df["s2_pm10"] = dataframe["field4"].apply(
                        lambda x: get_valid_value(x, "pm10")
                    )
                    channel_df["latitude"] = dataframe["field5"].apply(
                        lambda x: get_valid_value(x, "latitude")
                    )
                    channel_df["longitude"] = dataframe["field6"].apply(
                        lambda x: get_valid_value(x, "longitude")
                    )
                    channel_df["battery"] = dataframe["field7"].apply(
                        lambda x: get_valid_value(x, "battery")
                    )

                    if "field8" in dataframe.columns:
                        try:
                            channel_df["latitude"] = dataframe["field8"].apply(
                                lambda x: get_valid_value(
                                    get_field_8_value(x, 0), "latitude"
                                )
                            )
                            channel_df["longitude"] = dataframe["field8"].apply(
                                lambda x: get_valid_value(
                                    get_field_8_value(x, 1), "longitude"
                                )
                            )
                            channel_df["altitude"] = dataframe["field8"].apply(
                                lambda x: get_valid_value(
                                    get_field_8_value(x, 2), "altitude"
                                )
                            )
                            channel_df["wind_speed"] = dataframe["field8"].apply(
                                lambda x: get_valid_value(
                                    get_field_8_value(x, 3), "wind_speed"
                                )
                            )
                            channel_df["satellites"] = dataframe["field8"].apply(
                                lambda x: get_valid_value(
                                    get_field_8_value(x, 4), "satellites"
                                )
                            )
                            channel_df["hdop"] = dataframe["field8"].apply(
                                lambda x: get_valid_value(
                                    get_field_8_value(x, 5), "hdop"
                                )
                            )
                            channel_df["internalTemperature"] = dataframe[
                                "field8"
                            ].apply(
                                lambda x: get_valid_value(
                                    get_field_8_value(x, 6), "externalTemperature"
                                )
                            )
                            channel_df["internalHumidity"] = dataframe["field8"].apply(
                                lambda x: get_valid_value(
                                    get_field_8_value(x, 7), "externalHumidity"
                                )
                            )
                            channel_df["temperature"] = dataframe["field8"].apply(
                                lambda x: get_valid_value(
                                    get_field_8_value(x, 8), "externalTemperature"
                                )
                            )
                            channel_df["humidity"] = dataframe["field8"].apply(
                                lambda x: get_valid_value(
                                    get_field_8_value(x, 9), "externalHumidity"
                                )
                            )
                            channel_df["pressure"] = dataframe["field8"].apply(
                                lambda x: get_valid_value(
                                    get_field_8_value(x, 10), "pressure"
                                )
                            )
                            channel_df["externalAltitude"] = dataframe["field8"].apply(
                                lambda x: get_valid_value(
                                    get_field_8_value(x, 11), "altitude"
                                )
                            )

                        except Exception as ex:
                            traceback.print_exc()
                            print(ex)

                    channel_df["time"] = dataframe["created_at"]
                    channel_df["device_id"] = device["_id"]
                    channel_df["site_id"] = device["site"]["_id"]
                    channel_df["device_number"] = device["device_number"]
                    channel_df["device"] = device["name"]
                    channel_df["frequency"] = "raw"

                    channels_data.extend(channel_df.to_dict(orient="records"))

        except Exception as ex:
            print(ex)
            traceback.print_exc()

    channel_data_df = pd.DataFrame(channels_data)
    clean_channel_data_df = remove_invalid_dates(
        dataframe=channel_data_df, start_time=start_time, end_time=end_time
    )
    return clean_channel_data_df.to_dict(orient="records")
Example #13
0
def predict_time_to_string(time: str):
    date_time = predict_str_to_date(time)
    return date_to_str(date_time)