def format_airqo_data_to_insights(data: list): restructured_data = [] data_df = pd.DataFrame(data) columns = list(data_df.columns) for _, data_row in data_df.iterrows(): device_data = dict({ "time": data_row["time"], "siteId": data_row["site_id"], "frequency": data_row["frequency"], "pm2_5": get_column_value(column="pm2_5", columns=columns, series=data_row), "pm10": get_column_value(column="pm10", columns=columns, series=data_row), "empty": False, "forecast": False, }) restructured_data.append(device_data) return create_insights_data(data=restructured_data)
def restructure_airqo_data_for_bigquery(data: list) -> list: restructured_data = [] data_df = pd.DataFrame(data) columns = list(data_df.columns) for _, data_row in data_df.iterrows(): device_data = dict( { "timestamp": str_to_date(data_row["time"]), "tenant": "airqo", "site_id": data_row["site_id"], "device_number": data_row["device_number"], "device": data_row["device"], "latitude": get_column_value( column="latitude", columns=columns, series=data_row ), "longitude": get_column_value( column="longitude", columns=columns, series=data_row ), "pm2_5": get_column_value( column="pm2_5", columns=columns, series=data_row ), "s1_pm2_5": get_column_value( column="s1_pm2_5", columns=columns, series=data_row ), "s2_pm2_5": get_column_value( column="s2_pm2_5", columns=columns, series=data_row ), "pm2_5_raw_value": get_column_value( column="raw_pm2_5", columns=columns, series=data_row ), "pm2_5_calibrated_value": get_column_value( column="calibrated_pm2_5", columns=columns, series=data_row, ), "pm10": get_column_value( column="pm10", columns=columns, series=data_row, ), "s1_pm10": get_column_value( column="s1_pm10", columns=columns, series=data_row ), "s2_pm10": get_column_value( column="s2_pm10", columns=columns, series=data_row ), "pm10_raw_value": get_column_value( column="raw_pm10", columns=columns, series=data_row, ), "pm10_calibrated_value": get_column_value( column="calibrated_pm10", columns=columns, series=data_row, ), "altitude": get_column_value( column="altitude", columns=columns, series=data_row ), "wind_speed": get_column_value( column="wind_speed", columns=columns, series=data_row ), "external_temperature": get_column_value( column="temperature", columns=columns, series=data_row ), "external_humidity": get_column_value( column="humidity", columns=columns, series=data_row ), } ) restructured_data.append(device_data) return pd.DataFrame( columns=BigQueryApi().hourly_measurements_columns, data=restructured_data ).to_dict(orient="records")
def restructure_airqo_data_for_message_broker(data: list) -> list: restructured_data = [] data_df = pd.DataFrame(data) columns = list(data_df.columns) for _, data_row in data_df.iterrows(): device_data = dict( { "time": data_row["time"], "tenant": "airqo", "site_id": data_row["site_id"], "device_number": data_row["device_number"], "frequency": data_row["frequency"], "device": data_row["device"], "latitude": get_column_value( column="latitude", columns=columns, series=data_row ), "longitude": get_column_value( column="longitude", columns=columns, series=data_row ), "pm2_5": get_column_value( column="pm2_5", columns=columns, series=data_row ), "pm10": get_column_value( column="pm10", columns=columns, series=data_row ), "s1_pm2_5": get_column_value( column="s1_pm2_5", columns=columns, series=data_row ), "s1_pm10": get_column_value( column="s1_pm10", columns=columns, series=data_row ), "s2_pm2_5": get_column_value( column="s2_pm2_5", columns=columns, series=data_row ), "s2_pm10": get_column_value( column="s2_pm10", columns=columns, series=data_row ), "pm2_5_calibrated_value": get_column_value( column="calibrated_pm2_5", columns=columns, series=data_row ), "pm10_calibrated_value": get_column_value( column="calibrated_pm10", columns=columns, series=data_row ), "altitude": get_column_value( column="altitude", columns=columns, series=data_row ), "wind_speed": get_column_value( column="wind_speed", columns=columns, series=data_row ), "external_temperature": get_column_value( column="temperature", columns=columns, series=data_row ), "external_humidity": get_column_value( column="humidity", columns=columns, series=data_row ), } ) restructured_data.append(device_data) return restructured_data
def restructure_airqo_data_for_api(data: list) -> list: restructured_data = [] data_df = pd.DataFrame(data) columns = list(data_df.columns) for _, data_row in data_df.iterrows(): device_data = dict( { "device": data_row["device"], "device_id": data_row["device_id"], "site_id": data_row["site_id"], "device_number": data_row["device_number"], "tenant": "airqo", "location": { "latitude": { "value": get_column_value( column="latitude", series=data_row, columns=columns ) }, "longitude": { "value": get_column_value( column="longitude", series=data_row, columns=columns ) }, }, "frequency": data_row["frequency"], "time": data_row["time"], "average_pm2_5": { "value": data_row["raw_pm2_5"], "calibratedValue": data_row["pm2_5"], }, "average_pm10": { "value": data_row["raw_pm10"], "calibratedValue": data_row["pm10"], }, "pm2_5": { "value": get_column_value( column="s1_pm2_5", series=data_row, columns=columns ) }, "pm10": { "value": get_column_value( column="s1_pm10", series=data_row, columns=columns ) }, # "pm2_5": { # "value": get_column_value("pm2_5", data_row, columns, "pm2_5"), # "rawValue": get_column_value("raw_pm2_5", data_row, columns, "pm2_5"), # "calibratedValue": get_column_value("calibrated_pm2_5", data_row, columns, "pm2_5") # }, # "pm10": { # "value": get_column_value("pm10", data_row, columns, "pm10"), # "rawValue": get_column_value("raw_pm10", data_row, columns, "pm10"), # "calibratedValue": get_column_value("calibrated_pm10", data_row, columns, "pm10") # }, "s1_pm2_5": { "value": get_column_value( column="s1_pm2_5", series=data_row, columns=columns ) }, "s1_pm10": { "value": get_column_value( column="s1_pm10", series=data_row, columns=columns ) }, "s2_pm2_5": { "value": get_column_value( column="s2_pm2_5", series=data_row, columns=columns ) }, "s2_pm10": { "value": get_column_value( column="s2_pm10", series=data_row, columns=columns ) }, "battery": { "value": get_column_value( column="voltage", series=data_row, columns=columns ) }, "altitude": { "value": get_column_value( column="altitude", series=data_row, columns=columns ) }, "speed": { "value": get_column_value( column="wind_speed", series=data_row, columns=columns ) }, "satellites": { "value": get_column_value( column="no_sats", series=data_row, columns=columns ) }, "hdop": { "value": get_column_value( column="hdope", series=data_row, columns=columns ) }, "externalTemperature": { "value": get_column_value( column="temperature", series=data_row, columns=columns ) }, "externalHumidity": { "value": get_column_value( column="humidity", series=data_row, columns=columns ) }, } ) restructured_data.append(device_data) return restructured_data
def transform_kcca_hourly_data_for_bigquery(data: list) -> list: restructured_data = [] data_df = pd.DataFrame(data) columns = list(data_df.columns) airqo_api = AirQoApi() devices = airqo_api.get_devices(tenant="kcca") for _, data_row in data_df.iterrows(): device_name = data_row["deviceCode"] site_id, _ = get_site_and_device_id(devices, device_name=device_name) if not site_id: continue location = str(data_row["location.coordinates"]) location = location.replace("[", "").replace("]", "") location_coordinates = location.split(",") device_data = dict({ "timestamp": str_to_date(data_row["time"]), "tenant": "kcca", "site_id": site_id, "device_number": 0, "device": device_name, "latitude": location_coordinates[1], "longitude": location_coordinates[0], "pm2_5": get_column_value( column="characteristics.pm2_5ConcMass.value", columns=columns, series=data_row, ), "s1_pm2_5": get_column_value(column="s1_pm2_5", columns=columns, series=data_row), "s2_pm2_5": get_column_value(column="s2_pm2_5", columns=columns, series=data_row), "pm2_5_raw_value": get_column_value( column="characteristics.pm2_5ConcMass.raw", columns=columns, series=data_row, ), "pm2_5_calibrated_value": get_column_value( column="characteristics.pm2_5ConcMass.calibratedValue", columns=columns, series=data_row, ), "pm10": get_column_value( column="characteristics.pm10ConcMass.value", columns=columns, series=data_row, ), "s1_pm10": get_column_value(column="s1_pm10", columns=columns, series=data_row), "s2_pm10": get_column_value(column="s2_pm10", columns=columns, series=data_row), "pm10_raw_value": get_column_value( column="characteristics.pm10ConcMass.raw", columns=columns, series=data_row, ), "pm10_calibrated_value": get_column_value( column="characteristics.pm10ConcMass.calibratedValue", columns=columns, series=data_row, ), "no2": get_column_value( column="characteristics.no2Conc.value", columns=columns, series=data_row, ), "no2_raw_value": get_column_value( column="characteristics.no2Conc.raw", columns=columns, series=data_row, ), "no2_calibrated_value": get_column_value( column="characteristics.no2Conc.calibratedValue", columns=columns, series=data_row, ), "pm1": get_column_value( column="characteristics.pm1ConcMass.value", columns=columns, series=data_row, ), "pm1_raw_value": get_column_value( column="characteristics.pm1ConcMass.raw", columns=columns, series=data_row, ), "pm1_calibrated_value": get_column_value( column="characteristics.pm1ConcMass.calibratedValue", columns=columns, series=data_row, ), "altitude": get_column_value( column="characteristics.altitude.value", columns=columns, series=data_row, ), "wind_speed": get_column_value( column="characteristics.windSpeed.value", columns=columns, series=data_row, ), "external_temperature": get_column_value( column="characteristics.temperature.value", columns=columns, series=data_row, ), "external_humidity": get_column_value( column="characteristics.relHumid.value", columns=columns, series=data_row, ), }) restructured_data.append(device_data) return pd.DataFrame(columns=BigQueryApi().hourly_measurements_columns, data=restructured_data).to_dict(orient="records")
def transform_kcca_data_for_message_broker(data: list, frequency: str) -> list: restructured_data = [] data_df = pd.DataFrame(data) columns = list(data_df.columns) airqo_api = AirQoApi() devices = airqo_api.get_devices(tenant="kcca") for _, data_row in data_df.iterrows(): device_name = data_row["deviceCode"] site_id, device_id = get_site_and_device_id(devices, device_name=device_name) if not site_id and not device_id: continue location = str(data_row["location.coordinates"]) location = location.replace("[", "").replace("]", "") location_coordinates = location.split(",") device_data = dict({ "time": frequency_time(dateStr=data_row["time"], frequency=frequency), "tenant": "kcca", "site_id": site_id, "device_id": device_id, "device_number": 0, "device": device_name, "latitude": location_coordinates[1], "longitude": location_coordinates[0], "pm2_5": get_column_value( column="characteristics.pm2_5ConcMass.value", columns=columns, series=data_row, ), "pm10": get_column_value( column="characteristics.pm10ConcMass.value", columns=columns, series=data_row, ), "s1_pm2_5": get_column_value( column="characteristics.pm2_5ConcMass.raw", columns=columns, series=data_row, ), "s1_pm10": get_column_value( column="characteristics.pm10ConcMass.raw", columns=columns, series=data_row, ), "s2_pm2_5": None, "s2_pm10": None, "pm2_5_calibrated_value": get_column_value( column="characteristics.pm2_5ConcMass.calibratedValue", columns=columns, series=data_row, ), "pm10_calibrated_value": get_column_value( column="characteristics.pm10ConcMass.calibratedValue", columns=columns, series=data_row, ), "altitude": get_column_value( column="characteristics.altitude.value", columns=columns, series=data_row, ), "wind_speed": get_column_value( column="characteristics.windSpeed.value", columns=columns, series=data_row, ), "external_temperature": get_column_value( column="characteristics.temperature.value", columns=columns, series=data_row, ), "external_humidity": get_column_value( column="characteristics.relHumid.value", columns=columns, series=data_row, ), }) restructured_data.append(device_data) return restructured_data