def extract_airqo_hourly_data_from_api(start_time: str, end_time: str) -> list: airqo_api = AirQoApi() devices = airqo_api.get_devices(tenant="airqo") devices_list = list(devices) hourly_events = [] if len(devices_list) == 0: print("devices empty") return [] for device in devices_list: try: if "name" not in device.keys(): print(f"name missing in device keys : {device}") continue device_name = device["name"] events = airqo_api.get_events( tenant="airqo", start_time=start_time, frequency="hourly", end_time=end_time, device=device_name, ) if not events: print( f"No measurements for {device_name} : startTime {start_time} : endTime : {end_time}" ) continue hourly_events.extend(events) except Exception as ex: traceback.print_exc() print(ex) device_measurements = pd.json_normalize(hourly_events) column_mappings = { "internalTemperature.value": "internalTemperature", "internalHumidity.value": "internalHumidity", "externalTemperature.value": "temperature", "externalHumidity.value": "humidity", "externalPressure.value": "pressure", "speed.value": "windSpeed", "altitude.value": "altitude", "battery.value": "battery", "satellites.value": "satellites", "hdop.value": "hdop", "pm10.value": "pm10", "s2_pm10.value": "s2_pm10", "s2_pm2_5.value": "s2_pm2_5", "average_pm2_5.calibratedValue": "calibrated_pm2_5", } device_measurements.rename(columns=column_mappings, inplace=True) return device_measurements.to_dict(orient="records")
def extract_airqo_devices_deployment_history() -> list: airqo_api = AirQoApi() devices = airqo_api.get_devices(tenant="airqo") devices_history = [] for device in devices: try: maintenance_logs = airqo_api.get_maintenance_logs( tenant="airqo", device=device["name"], activity_type="deployment" ) if not maintenance_logs or len(maintenance_logs) <= 1: continue log_df = pd.DataFrame(maintenance_logs) log_df = log_df.dropna(subset=["date"]) log_df["site_id"] = ( log_df["site_id"].fillna(method="bfill").fillna(method="ffill") ) log_df = log_df.dropna(subset=["site_id"]) log_df["start_time"] = pd.to_datetime(log_df["date"]) log_df = log_df.sort_values(by="start_time") log_df["end_time"] = log_df["start_time"].shift(-1) log_df["end_time"] = log_df["end_time"].fillna(datetime.utcnow()) log_df["start_time"] = log_df["start_time"].apply(lambda x: date_to_str(x)) log_df["end_time"] = log_df["end_time"].apply(lambda x: date_to_str(x)) if len(set(log_df["site_id"].tolist())) == 1: continue for _, raw in log_df.iterrows(): device_history = { "device": raw["device"], "device_id": device["_id"], "start_time": raw["start_time"], "end_time": raw["end_time"], "site_id": raw["site_id"], } devices_history.append(device_history) except Exception as ex: print(ex) traceback.print_exc() return devices_history
def get_airqo_data(freq: str, start_time: str = None, end_time: str = None) -> list: airqo_api = AirQoApi() devices = airqo_api.get_devices(tenant="airqo", all_devices=False) measurements = [] start = (str_to_date(start_time) if start_time else datetime.utcnow() - timedelta(days=7)) end = str_to_date(end_time) if end_time else datetime.utcnow() start_time = (date_to_str_days(start) if freq == "daily" else date_to_str_hours(start)) end_time = date_to_str_days(end) if freq == "daily" else date_to_str_hours( end) frequency = get_airqo_api_frequency(freq=freq) dates = pd.date_range(start_time, end_time, freq=frequency) last_date_time = dates.values[len(dates.values) - 1] for device in devices: for date in dates: start = date_to_str(date) end_date_time = date + timedelta(hours=dates.freq.n) if np.datetime64(end_date_time) > last_date_time: end = end_time else: end = date_to_str(end_date_time) try: events = airqo_api.get_events( tenant="airqo", start_time=start, frequency=freq, end_time=end, device=device["name"], ) measurements.extend(events) except Exception as ex: print(ex) traceback.print_exc() insights = format_measurements_to_insights(data=measurements) return insights
def map_site_ids_to_historical_measurements(data: list, deployment_logs: list) -> list: if not deployment_logs or not data: return data airqo_api = AirQoApi() devices = airqo_api.get_devices(tenant="airqo") mapped_data = [] devices_logs_df = pd.DataFrame(deployment_logs) devices_logs_df["start_time"] = devices_logs_df["start_time"].apply( lambda x: str_to_date(x) ) devices_logs_df["end_time"] = devices_logs_df["end_time"].apply( lambda x: str_to_date(x) ) data = un_fill_nan(data) data_df = pd.DataFrame(data) for _, data_row in data_df.iterrows(): device = get_device(devices, device_id=data_row["device_id"]) if not device: continue site_id = device.get("site").get("_id") time = str_to_date(data_row["time"]) device_logs = devices_logs_df[devices_logs_df["device_id"] == device.get("_id")] if not device_logs.empty: for _, log in device_logs.iterrows(): if log["start_time"] <= time <= log["end_time"]: site_id = log["site_id"] data_row["site_id"] = site_id mapped_data.append(data_row.to_dict()) return mapped_data
def get_forecast_data(tenant: str) -> list: airqo_api = AirQoApi() devices = airqo_api.get_devices(tenant=tenant, all_devices=False) forecast_measurements = pd.DataFrame(data=[], columns=insights_columns) time = int((datetime.utcnow() + timedelta(hours=1)).timestamp()) for device in devices: device_dict = dict(device) device_number = device_dict.get("device_number", None) site = device_dict.get("site", None) if not site: print(f"device {device_number} isn't attached to a site.") continue site_id = site["_id"] if device_number: forecast = airqo_api.get_forecast(channel_id=device_number, timestamp=time) if forecast: forecast_df = pd.DataFrame(forecast) forecast_cleaned_df = pd.DataFrame(columns=insights_columns) forecast_cleaned_df["time"] = forecast_df["prediction_time"] forecast_cleaned_df["pm2_5"] = forecast_df["prediction_value"] forecast_cleaned_df["pm10"] = forecast_df["prediction_value"] forecast_cleaned_df["siteId"] = site_id forecast_cleaned_df["frequency"] = "hourly" forecast_cleaned_df["forecast"] = True forecast_cleaned_df["empty"] = False forecast_measurements = forecast_measurements.append( forecast_cleaned_df, ignore_index=True) forecast_measurements["time"] = forecast_measurements["time"].apply( lambda x: predict_time_to_string(x)) forecast_measurements = forecast_measurements[ forecast_measurements["pm2_5"].notna()] return forecast_measurements.to_dict(orient="records")
def extract_airqo_data_from_thingspeak( start_time: str, end_time: str, all_devices: bool ) -> list: thingspeak_base_url = configuration.THINGSPEAK_CHANNEL_URL airqo_api = AirQoApi() airqo_devices = airqo_api.get_devices(tenant="airqo", all_devices=all_devices) read_keys = airqo_api.get_read_keys(devices=airqo_devices) channels_data = [] frequency = get_frequency(start_time=start_time, end_time=end_time) def get_field_8_value(x: str, position: int): try: values = x.split(",") return values[position] except Exception as exc: print(exc) return None dates = pd.date_range(start_time, end_time, freq=frequency) last_date_time = dates.values[len(dates.values) - 1] for device in airqo_devices: try: channel_id = str(device["device_number"]) for date in dates: start = date_to_str(date) end_date_time = date + timedelta(hours=dates.freq.n) if np.datetime64(end_date_time) > last_date_time: end = end_time else: end = date_to_str(end_date_time) read_key = read_keys[str(channel_id)] channel_url = f"{thingspeak_base_url}{channel_id}/feeds.json?start={start}&end={end}&api_key={read_key}" print(f"{channel_url}") data = json.loads( requests.get(channel_url, timeout=100.0).content.decode("utf-8") ) if (data != -1) and ("feeds" in data): dataframe = pd.DataFrame(data["feeds"]) if dataframe.empty: print( f"{channel_id} does not have data between {start} and {end}" ) continue channel_df = pd.DataFrame( data=[], columns=[ "time", "s1_pm2_5", "s2_pm2_5", "s1_pm10", "device_id", "site_id", "s2_pm10", "latitude", "longitude", "altitude", "wind_speed", "satellites", "hdop", "internalTemperature", "internalHumidity", "battery", "temperature", "humidity", "pressure", "externalAltitude", ], ) channel_df["s1_pm2_5"] = dataframe["field1"].apply( lambda x: get_valid_value(x, "pm2_5") ) channel_df["s1_pm10"] = dataframe["field2"].apply( lambda x: get_valid_value(x, "pm10") ) channel_df["s2_pm2_5"] = dataframe["field3"].apply( lambda x: get_valid_value(x, "pm2_5") ) channel_df["s2_pm10"] = dataframe["field4"].apply( lambda x: get_valid_value(x, "pm10") ) channel_df["latitude"] = dataframe["field5"].apply( lambda x: get_valid_value(x, "latitude") ) channel_df["longitude"] = dataframe["field6"].apply( lambda x: get_valid_value(x, "longitude") ) channel_df["battery"] = dataframe["field7"].apply( lambda x: get_valid_value(x, "battery") ) if "field8" in dataframe.columns: try: channel_df["latitude"] = dataframe["field8"].apply( lambda x: get_valid_value( get_field_8_value(x, 0), "latitude" ) ) channel_df["longitude"] = dataframe["field8"].apply( lambda x: get_valid_value( get_field_8_value(x, 1), "longitude" ) ) channel_df["altitude"] = dataframe["field8"].apply( lambda x: get_valid_value( get_field_8_value(x, 2), "altitude" ) ) channel_df["wind_speed"] = dataframe["field8"].apply( lambda x: get_valid_value( get_field_8_value(x, 3), "wind_speed" ) ) channel_df["satellites"] = dataframe["field8"].apply( lambda x: get_valid_value( get_field_8_value(x, 4), "satellites" ) ) channel_df["hdop"] = dataframe["field8"].apply( lambda x: get_valid_value( get_field_8_value(x, 5), "hdop" ) ) channel_df["internalTemperature"] = dataframe[ "field8" ].apply( lambda x: get_valid_value( get_field_8_value(x, 6), "externalTemperature" ) ) channel_df["internalHumidity"] = dataframe["field8"].apply( lambda x: get_valid_value( get_field_8_value(x, 7), "externalHumidity" ) ) channel_df["temperature"] = dataframe["field8"].apply( lambda x: get_valid_value( get_field_8_value(x, 8), "externalTemperature" ) ) channel_df["humidity"] = dataframe["field8"].apply( lambda x: get_valid_value( get_field_8_value(x, 9), "externalHumidity" ) ) channel_df["pressure"] = dataframe["field8"].apply( lambda x: get_valid_value( get_field_8_value(x, 10), "pressure" ) ) channel_df["externalAltitude"] = dataframe["field8"].apply( lambda x: get_valid_value( get_field_8_value(x, 11), "altitude" ) ) except Exception as ex: traceback.print_exc() print(ex) channel_df["time"] = dataframe["created_at"] channel_df["device_id"] = device["_id"] channel_df["site_id"] = device["site"]["_id"] channel_df["device_number"] = device["device_number"] channel_df["device"] = device["name"] channel_df["frequency"] = "raw" channels_data.extend(channel_df.to_dict(orient="records")) except Exception as ex: print(ex) traceback.print_exc() channel_data_df = pd.DataFrame(channels_data) clean_channel_data_df = remove_invalid_dates( dataframe=channel_data_df, start_time=start_time, end_time=end_time ) return clean_channel_data_df.to_dict(orient="records")
def transform_kcca_measurements_for_api(unclean_data) -> list: data = pd.DataFrame(unclean_data) airqo_api = AirQoApi() devices = airqo_api.get_devices(tenant="kcca") device_gps = data.groupby("deviceCode") cleaned_measurements = [] for _, group in device_gps: device_name = group.iloc[0]["deviceCode"] site_id, device_id = get_site_and_device_id(devices, device_name=device_name) if not site_id and not device_id: continue transformed_data = [] columns = group.columns for index, row in group.iterrows(): location = str(row["location.coordinates"]) location = location.replace("[", "").replace("]", "") location_coordinates = location.split(",") frequency = str(row.get("outputFrequency", "raw")) if frequency.lower() == "hour": frequency = "hourly" elif frequency.lower() == "day": frequency = "daily" else: frequency = "raw" row_data = dict({ "frequency": frequency, "time": frequency_time(dateStr=row.get("time"), frequency=frequency), "tenant": "kcca", "site_id": site_id, "device_id": device_id, "device": row["deviceCode"], "location": dict({ "longitude": dict({"value": to_double(location_coordinates[0])}), "latitude": dict({"value": to_double(location_coordinates[1])}), }), "pm2_5": { "value": get_valid_column_value( column_name="characteristics.pm2_5ConcMass.raw", series=row, columns_names=columns, data_name="pm2_5", ), "calibratedValue": get_valid_column_value( column_name="characteristics.pm2_5ConcMass.value", series=row, columns_names=columns, data_name="pm2_5", ), }, "pm1": { "value": get_valid_column_value( column_name="characteristics.pm1ConcMass.raw", series=row, columns_names=columns, data_name=None, ), "calibratedValue": get_valid_column_value( column_name="characteristics.pm1ConcMass.value", series=row, columns_names=columns, data_name=None, ), }, "pm10": { "value": get_valid_column_value( column_name="characteristics.pm10ConcMass.raw", series=row, columns_names=columns, data_name="pm10", ), "calibratedValue": get_valid_column_value( column_name="characteristics.pm10ConcMass.value", series=row, columns_names=columns, data_name="pm10", ), }, "externalTemperature": { "value": get_valid_column_value( column_name="characteristics.temperature.value", series=row, columns_names=columns, data_name="externalTemperature", ), }, "externalHumidity": { "value": get_valid_column_value( column_name="characteristics.relHumid.value", series=row, columns_names=columns, data_name="externalHumidity", ), }, "no2": { "value": get_valid_column_value( column_name="characteristics.no2Conc.raw", series=row, columns_names=columns, data_name=None, ), "calibratedValue": get_valid_column_value( column_name="characteristics.no2Conc.value", series=row, columns_names=columns, data_name=None, ), }, "speed": { "value": get_valid_column_value( column_name="characteristics.windSpeed.value", series=row, columns_names=columns, data_name=None, ), }, }) transformed_data.append(row_data) if transformed_data: cleaned_measurements.extend(transformed_data) return cleaned_measurements
def transform_kcca_hourly_data_for_bigquery(data: list) -> list: restructured_data = [] data_df = pd.DataFrame(data) columns = list(data_df.columns) airqo_api = AirQoApi() devices = airqo_api.get_devices(tenant="kcca") for _, data_row in data_df.iterrows(): device_name = data_row["deviceCode"] site_id, _ = get_site_and_device_id(devices, device_name=device_name) if not site_id: continue location = str(data_row["location.coordinates"]) location = location.replace("[", "").replace("]", "") location_coordinates = location.split(",") device_data = dict({ "timestamp": str_to_date(data_row["time"]), "tenant": "kcca", "site_id": site_id, "device_number": 0, "device": device_name, "latitude": location_coordinates[1], "longitude": location_coordinates[0], "pm2_5": get_column_value( column="characteristics.pm2_5ConcMass.value", columns=columns, series=data_row, ), "s1_pm2_5": get_column_value(column="s1_pm2_5", columns=columns, series=data_row), "s2_pm2_5": get_column_value(column="s2_pm2_5", columns=columns, series=data_row), "pm2_5_raw_value": get_column_value( column="characteristics.pm2_5ConcMass.raw", columns=columns, series=data_row, ), "pm2_5_calibrated_value": get_column_value( column="characteristics.pm2_5ConcMass.calibratedValue", columns=columns, series=data_row, ), "pm10": get_column_value( column="characteristics.pm10ConcMass.value", columns=columns, series=data_row, ), "s1_pm10": get_column_value(column="s1_pm10", columns=columns, series=data_row), "s2_pm10": get_column_value(column="s2_pm10", columns=columns, series=data_row), "pm10_raw_value": get_column_value( column="characteristics.pm10ConcMass.raw", columns=columns, series=data_row, ), "pm10_calibrated_value": get_column_value( column="characteristics.pm10ConcMass.calibratedValue", columns=columns, series=data_row, ), "no2": get_column_value( column="characteristics.no2Conc.value", columns=columns, series=data_row, ), "no2_raw_value": get_column_value( column="characteristics.no2Conc.raw", columns=columns, series=data_row, ), "no2_calibrated_value": get_column_value( column="characteristics.no2Conc.calibratedValue", columns=columns, series=data_row, ), "pm1": get_column_value( column="characteristics.pm1ConcMass.value", columns=columns, series=data_row, ), "pm1_raw_value": get_column_value( column="characteristics.pm1ConcMass.raw", columns=columns, series=data_row, ), "pm1_calibrated_value": get_column_value( column="characteristics.pm1ConcMass.calibratedValue", columns=columns, series=data_row, ), "altitude": get_column_value( column="characteristics.altitude.value", columns=columns, series=data_row, ), "wind_speed": get_column_value( column="characteristics.windSpeed.value", columns=columns, series=data_row, ), "external_temperature": get_column_value( column="characteristics.temperature.value", columns=columns, series=data_row, ), "external_humidity": get_column_value( column="characteristics.relHumid.value", columns=columns, series=data_row, ), }) restructured_data.append(device_data) return pd.DataFrame(columns=BigQueryApi().hourly_measurements_columns, data=restructured_data).to_dict(orient="records")
def transform_kcca_data_for_message_broker(data: list, frequency: str) -> list: restructured_data = [] data_df = pd.DataFrame(data) columns = list(data_df.columns) airqo_api = AirQoApi() devices = airqo_api.get_devices(tenant="kcca") for _, data_row in data_df.iterrows(): device_name = data_row["deviceCode"] site_id, device_id = get_site_and_device_id(devices, device_name=device_name) if not site_id and not device_id: continue location = str(data_row["location.coordinates"]) location = location.replace("[", "").replace("]", "") location_coordinates = location.split(",") device_data = dict({ "time": frequency_time(dateStr=data_row["time"], frequency=frequency), "tenant": "kcca", "site_id": site_id, "device_id": device_id, "device_number": 0, "device": device_name, "latitude": location_coordinates[1], "longitude": location_coordinates[0], "pm2_5": get_column_value( column="characteristics.pm2_5ConcMass.value", columns=columns, series=data_row, ), "pm10": get_column_value( column="characteristics.pm10ConcMass.value", columns=columns, series=data_row, ), "s1_pm2_5": get_column_value( column="characteristics.pm2_5ConcMass.raw", columns=columns, series=data_row, ), "s1_pm10": get_column_value( column="characteristics.pm10ConcMass.raw", columns=columns, series=data_row, ), "s2_pm2_5": None, "s2_pm10": None, "pm2_5_calibrated_value": get_column_value( column="characteristics.pm2_5ConcMass.calibratedValue", columns=columns, series=data_row, ), "pm10_calibrated_value": get_column_value( column="characteristics.pm10ConcMass.calibratedValue", columns=columns, series=data_row, ), "altitude": get_column_value( column="characteristics.altitude.value", columns=columns, series=data_row, ), "wind_speed": get_column_value( column="characteristics.windSpeed.value", columns=columns, series=data_row, ), "external_temperature": get_column_value( column="characteristics.temperature.value", columns=columns, series=data_row, ), "external_humidity": get_column_value( column="characteristics.relHumid.value", columns=columns, series=data_row, ), }) restructured_data.append(device_data) return restructured_data