def get_frequency(start_time: str, end_time: str) -> str: diff_days = round( (str_to_date(end_time) - str_to_date(start_time)).total_seconds() / 86400) if diff_days >= 5: frequency = "96H" elif diff_days <= 1: diff_hours = round( (str_to_date(end_time) - str_to_date(start_time)).seconds / 3600) frequency = "1H" if diff_hours <= 0 else f"{diff_hours}H" else: frequency = f"{round(diff_days * 24)}H" return frequency
def get_airqo_data(freq: str, start_time: str = None, end_time: str = None) -> list: airqo_api = AirQoApi() devices = airqo_api.get_devices(tenant="airqo", all_devices=False) measurements = [] start = (str_to_date(start_time) if start_time else datetime.utcnow() - timedelta(days=7)) end = str_to_date(end_time) if end_time else datetime.utcnow() start_time = (date_to_str_days(start) if freq == "daily" else date_to_str_hours(start)) end_time = date_to_str_days(end) if freq == "daily" else date_to_str_hours( end) frequency = get_airqo_api_frequency(freq=freq) dates = pd.date_range(start_time, end_time, freq=frequency) last_date_time = dates.values[len(dates.values) - 1] for device in devices: for date in dates: start = date_to_str(date) end_date_time = date + timedelta(hours=dates.freq.n) if np.datetime64(end_date_time) > last_date_time: end = end_time else: end = date_to_str(end_date_time) try: events = airqo_api.get_events( tenant="airqo", start_time=start, frequency=freq, end_time=end, device=device["name"], ) measurements.extend(events) except Exception as ex: print(ex) traceback.print_exc() insights = format_measurements_to_insights(data=measurements) return insights
def map_site_ids_to_historical_measurements(data: list, deployment_logs: list) -> list: if not deployment_logs or not data: return data airqo_api = AirQoApi() devices = airqo_api.get_devices(tenant="airqo") mapped_data = [] devices_logs_df = pd.DataFrame(deployment_logs) devices_logs_df["start_time"] = devices_logs_df["start_time"].apply( lambda x: str_to_date(x) ) devices_logs_df["end_time"] = devices_logs_df["end_time"].apply( lambda x: str_to_date(x) ) data = un_fill_nan(data) data_df = pd.DataFrame(data) for _, data_row in data_df.iterrows(): device = get_device(devices, device_id=data_row["device_id"]) if not device: continue site_id = device.get("site").get("_id") time = str_to_date(data_row["time"]) device_logs = devices_logs_df[devices_logs_df["device_id"] == device.get("_id")] if not device_logs.empty: for _, log in device_logs.iterrows(): if log["start_time"] <= time <= log["end_time"]: site_id = log["site_id"] data_row["site_id"] = site_id mapped_data.append(data_row.to_dict()) return mapped_data
def measurement_time_to_string(time: str, daily=False): date_time = str_to_date(time) if daily: return date_to_str_days(date_time) else: return date_to_str_hours(date_time)
def restructure_airqo_data_for_bigquery(data: list) -> list: restructured_data = [] data_df = pd.DataFrame(data) columns = list(data_df.columns) for _, data_row in data_df.iterrows(): device_data = dict( { "timestamp": str_to_date(data_row["time"]), "tenant": "airqo", "site_id": data_row["site_id"], "device_number": data_row["device_number"], "device": data_row["device"], "latitude": get_column_value( column="latitude", columns=columns, series=data_row ), "longitude": get_column_value( column="longitude", columns=columns, series=data_row ), "pm2_5": get_column_value( column="pm2_5", columns=columns, series=data_row ), "s1_pm2_5": get_column_value( column="s1_pm2_5", columns=columns, series=data_row ), "s2_pm2_5": get_column_value( column="s2_pm2_5", columns=columns, series=data_row ), "pm2_5_raw_value": get_column_value( column="raw_pm2_5", columns=columns, series=data_row ), "pm2_5_calibrated_value": get_column_value( column="calibrated_pm2_5", columns=columns, series=data_row, ), "pm10": get_column_value( column="pm10", columns=columns, series=data_row, ), "s1_pm10": get_column_value( column="s1_pm10", columns=columns, series=data_row ), "s2_pm10": get_column_value( column="s2_pm10", columns=columns, series=data_row ), "pm10_raw_value": get_column_value( column="raw_pm10", columns=columns, series=data_row, ), "pm10_calibrated_value": get_column_value( column="calibrated_pm10", columns=columns, series=data_row, ), "altitude": get_column_value( column="altitude", columns=columns, series=data_row ), "wind_speed": get_column_value( column="wind_speed", columns=columns, series=data_row ), "external_temperature": get_column_value( column="temperature", columns=columns, series=data_row ), "external_humidity": get_column_value( column="humidity", columns=columns, series=data_row ), } ) restructured_data.append(device_data) return pd.DataFrame( columns=BigQueryApi().hourly_measurements_columns, data=restructured_data ).to_dict(orient="records")
def transform_kcca_hourly_data_for_bigquery(data: list) -> list: restructured_data = [] data_df = pd.DataFrame(data) columns = list(data_df.columns) airqo_api = AirQoApi() devices = airqo_api.get_devices(tenant="kcca") for _, data_row in data_df.iterrows(): device_name = data_row["deviceCode"] site_id, _ = get_site_and_device_id(devices, device_name=device_name) if not site_id: continue location = str(data_row["location.coordinates"]) location = location.replace("[", "").replace("]", "") location_coordinates = location.split(",") device_data = dict({ "timestamp": str_to_date(data_row["time"]), "tenant": "kcca", "site_id": site_id, "device_number": 0, "device": device_name, "latitude": location_coordinates[1], "longitude": location_coordinates[0], "pm2_5": get_column_value( column="characteristics.pm2_5ConcMass.value", columns=columns, series=data_row, ), "s1_pm2_5": get_column_value(column="s1_pm2_5", columns=columns, series=data_row), "s2_pm2_5": get_column_value(column="s2_pm2_5", columns=columns, series=data_row), "pm2_5_raw_value": get_column_value( column="characteristics.pm2_5ConcMass.raw", columns=columns, series=data_row, ), "pm2_5_calibrated_value": get_column_value( column="characteristics.pm2_5ConcMass.calibratedValue", columns=columns, series=data_row, ), "pm10": get_column_value( column="characteristics.pm10ConcMass.value", columns=columns, series=data_row, ), "s1_pm10": get_column_value(column="s1_pm10", columns=columns, series=data_row), "s2_pm10": get_column_value(column="s2_pm10", columns=columns, series=data_row), "pm10_raw_value": get_column_value( column="characteristics.pm10ConcMass.raw", columns=columns, series=data_row, ), "pm10_calibrated_value": get_column_value( column="characteristics.pm10ConcMass.calibratedValue", columns=columns, series=data_row, ), "no2": get_column_value( column="characteristics.no2Conc.value", columns=columns, series=data_row, ), "no2_raw_value": get_column_value( column="characteristics.no2Conc.raw", columns=columns, series=data_row, ), "no2_calibrated_value": get_column_value( column="characteristics.no2Conc.calibratedValue", columns=columns, series=data_row, ), "pm1": get_column_value( column="characteristics.pm1ConcMass.value", columns=columns, series=data_row, ), "pm1_raw_value": get_column_value( column="characteristics.pm1ConcMass.raw", columns=columns, series=data_row, ), "pm1_calibrated_value": get_column_value( column="characteristics.pm1ConcMass.calibratedValue", columns=columns, series=data_row, ), "altitude": get_column_value( column="characteristics.altitude.value", columns=columns, series=data_row, ), "wind_speed": get_column_value( column="characteristics.windSpeed.value", columns=columns, series=data_row, ), "external_temperature": get_column_value( column="characteristics.temperature.value", columns=columns, series=data_row, ), "external_humidity": get_column_value( column="characteristics.relHumid.value", columns=columns, series=data_row, ), }) restructured_data.append(device_data) return pd.DataFrame(columns=BigQueryApi().hourly_measurements_columns, data=restructured_data).to_dict(orient="records")