def create_empty_insights(): from airqo_etl_utils.airqo_api import AirQoApi from airqo_etl_utils.commons import fill_nan import random import pandas as pd from airqo_etl_utils.date import ( date_to_str_days, date_to_str_hours, ) airqo_api = AirQoApi() sites = airqo_api.get_sites(tenant="airqo") insights = [] dates = pd.date_range(start_date_time, end_date_time, freq="1H") for date in dates: date_time = date_to_str_hours(date) for site in sites: try: hourly_insight = { "time": date_time, "pm2_5": random.uniform(50.0, 150.0), "pm10": random.uniform(50.0, 150.0), "empty": True, "frequency": "HOURLY", "forecast": False, "siteId": site["_id"], } insights.append(hourly_insight) except Exception as ex: print(ex) dates = pd.date_range(start_date_time, end_date_time, freq="24H") for date in dates: date_time = date_to_str_days(date) for site in sites: try: daily_insight = { "time": date_time, "pm2_5": random.uniform(50.0, 150.0), "pm10": random.uniform(50.0, 150.0), "empty": True, "frequency": "DAILY", "forecast": False, "siteId": site["_id"], } insights.append(daily_insight) except Exception as ex: print(ex) return dict({"data": fill_nan(data=insights)})
def get_weather_data_from_tahmo(start_time=None, end_time=None, tenant="airqo"): airqo_api = AirQoApi() airqo_sites = airqo_api.get_sites(tenant=tenant) station_codes = [] for site in airqo_sites: try: if "nearest_tahmo_station" in dict(site).keys(): station_codes.append(site["nearest_tahmo_station"]["code"]) except Exception as ex: print(ex) measurements = [] tahmo_api = TahmoApi() frequency = get_frequency(start_time=start_time, end_time=end_time) dates = pd.date_range(start_time, end_time, freq=frequency) last_date_time = dates.values[len(dates.values) - 1] for date in dates: start = date_to_str(date) end_date_time = date + timedelta(hours=dates.freq.n) if np.datetime64(end_date_time) > last_date_time: end = end_time else: end = date_to_str(end_date_time) print(start + " : " + end) range_measurements = tahmo_api.get_measurements( start, end, station_codes) measurements.extend(range_measurements) if len(measurements) != 0: measurements_df = pd.DataFrame(data=measurements) else: measurements_df = pd.DataFrame( [], columns=["value", "variable", "station", "time"]) return measurements_df.to_dict(orient="records") clean_measurements_df = remove_invalid_dates(dataframe=measurements_df, start_time=start_time, end_time=end_time) return clean_measurements_df.to_dict(orient="records")
def extract_sites_meta_data(tenant=None) -> list: airqo_api = AirQoApi() sites = airqo_api.get_sites(tenant=tenant) sites_df = pd.DataFrame(sites) sites_df = sites_df[[ "_id", "latitude", "tenant", "longitude", "name", "bearing_to_kampala_center", "landform_90", "distance_to_kampala_center", "altitude", "landform_270", "aspect", "description", "distance_to_nearest_tertiary_road", "distance_to_nearest_primary_road", "distance_to_nearest_road", "distance_to_nearest_residential_road", "distance_to_nearest_secondary_road", "distance_to_nearest_unclassified_road", "country", "region", "parish", "sub_county", "county", "district", "city", ]] sites_df.rename( columns={ "_id": "site_id", "latitude": "site_latitude", "longitude": "site_longitude", "description": "site_description", "altitude": "site_altitude", "name": "site_name", "distance_to_nearest_tertiary_road": "site_distance_to_nearest_tertiary_road", "distance_to_nearest_primary_road": "site_distance_to_nearest_primary_road", "distance_to_nearest_road": "site_distance_to_nearest_road", "distance_to_nearest_residential_road": "site_distance_to_nearest_residential_road", "distance_to_nearest_secondary_road": "site_distance_to_nearest_secondary_road", "distance_to_nearest_unclassified_road": "site_distance_to_nearest_unclassified_road", "bearing_to_kampala_center": "site_bearing_to_kampala_center", "landform_90": "site_landform_90", "distance_to_kampala_center": "site_distance_to_kampala_center", "landform_270": "site_landform_270", "aspect": "site_aspect", }, inplace=True, ) sites_df.reset_index(drop=True, inplace=True) return sites_df.to_dict(orient="records")
def resample_weather_data(data: list, frequency: str): weather_raw_data = pd.DataFrame(data) if weather_raw_data.empty: return weather_raw_data.to_dict(orient="records") airqo_api = AirQoApi() sites = airqo_api.get_sites(tenant="airqo") valid_sites = list( filter(lambda x: "nearest_tahmo_station" in dict(x).keys(), sites)) # to include site id # devices = get_devices_or_sites(configuration.AIRQO_BASE_URL, tenant='airqo', sites=False) temperature = weather_raw_data.loc[ weather_raw_data["variable"] == "te", ["value", "variable", "station", "time"]] humidity = weather_raw_data.loc[weather_raw_data["variable"] == "rh", ["value", "variable", "station", "time"]] wind_speed = weather_raw_data.loc[weather_raw_data["variable"] == "ws", ["value", "variable", "station", "time"]] humidity["value"] = pd.to_numeric(humidity["value"], errors="coerce") humidity["value"] = humidity["value"].apply(lambda x: x * 100) data = pd.concat([temperature, humidity, wind_speed]) data.reset_index(inplace=True) devices_weather_data = [] data["value"] = pd.to_numeric(data["value"], errors="coerce", downcast="float") data = data.fillna(0) data_station_gps = data.groupby("station") for _, station_group in data_station_gps: device_weather_data = [] station = station_group.iloc[0]["station"] try: # resampling station values temperature = resample_data( station_group.loc[station_group["variable"] == "te", ["value", "time"]], frequency, ) temperature.columns = ["temperature", "time"] humidity = resample_data( station_group.loc[station_group["variable"] == "rh", ["value", "time"]], frequency, ) humidity.columns = ["humidity", "time"] wind_speed = resample_data( station_group.loc[station_group["variable"] == "ws", ["value", "time"]], frequency, ) wind_speed.columns = ["wind_speed", "time"] data_frames = [temperature, humidity, wind_speed] station_df = reduce( lambda left, right: pd.merge( left, right, on=["time"], how="outer"), data_frames, ) station_df["frequency"] = frequency # mapping device to station station_devices = get_device_ids_from_station(station, valid_sites) if len(station_devices) == 0: continue for device_id in station_devices: device_station_df = station_df.copy(deep=True) device_station_df["device_id"] = device_id device_weather_data.extend( device_station_df.to_dict(orient="records")) except Exception as ex: print(ex) traceback.print_exc() continue # to include site id # device_station_data_df = pd.DataFrame(device_weather_data) # device_station_data_df['site_id'] = device_station_data_df['device_id'].apply( # lambda x: get_device_site_id(x, devices)) # devices_weather_data.extend(device_station_data_df.to_dict(orient='records')) devices_weather_data.extend(device_weather_data) # pd.DataFrame(devices_weather_data).to_csv(path_or_buf='devices_weather.csv', index=False) return devices_weather_data