Exemple #1
0
def format_airqo_data_to_insights(data: list):
    restructured_data = []

    data_df = pd.DataFrame(data)
    columns = list(data_df.columns)

    for _, data_row in data_df.iterrows():
        device_data = dict({
            "time":
            data_row["time"],
            "siteId":
            data_row["site_id"],
            "frequency":
            data_row["frequency"],
            "pm2_5":
            get_column_value(column="pm2_5", columns=columns, series=data_row),
            "pm10":
            get_column_value(column="pm10", columns=columns, series=data_row),
            "empty":
            False,
            "forecast":
            False,
        })

        restructured_data.append(device_data)

    return create_insights_data(data=restructured_data)
def restructure_airqo_data_for_bigquery(data: list) -> list:
    restructured_data = []

    data_df = pd.DataFrame(data)
    columns = list(data_df.columns)

    for _, data_row in data_df.iterrows():
        device_data = dict(
            {
                "timestamp": str_to_date(data_row["time"]),
                "tenant": "airqo",
                "site_id": data_row["site_id"],
                "device_number": data_row["device_number"],
                "device": data_row["device"],
                "latitude": get_column_value(
                    column="latitude", columns=columns, series=data_row
                ),
                "longitude": get_column_value(
                    column="longitude", columns=columns, series=data_row
                ),
                "pm2_5": get_column_value(
                    column="pm2_5", columns=columns, series=data_row
                ),
                "s1_pm2_5": get_column_value(
                    column="s1_pm2_5", columns=columns, series=data_row
                ),
                "s2_pm2_5": get_column_value(
                    column="s2_pm2_5", columns=columns, series=data_row
                ),
                "pm2_5_raw_value": get_column_value(
                    column="raw_pm2_5", columns=columns, series=data_row
                ),
                "pm2_5_calibrated_value": get_column_value(
                    column="calibrated_pm2_5",
                    columns=columns,
                    series=data_row,
                ),
                "pm10": get_column_value(
                    column="pm10",
                    columns=columns,
                    series=data_row,
                ),
                "s1_pm10": get_column_value(
                    column="s1_pm10", columns=columns, series=data_row
                ),
                "s2_pm10": get_column_value(
                    column="s2_pm10", columns=columns, series=data_row
                ),
                "pm10_raw_value": get_column_value(
                    column="raw_pm10",
                    columns=columns,
                    series=data_row,
                ),
                "pm10_calibrated_value": get_column_value(
                    column="calibrated_pm10",
                    columns=columns,
                    series=data_row,
                ),
                "altitude": get_column_value(
                    column="altitude", columns=columns, series=data_row
                ),
                "wind_speed": get_column_value(
                    column="wind_speed", columns=columns, series=data_row
                ),
                "external_temperature": get_column_value(
                    column="temperature", columns=columns, series=data_row
                ),
                "external_humidity": get_column_value(
                    column="humidity", columns=columns, series=data_row
                ),
            }
        )

        restructured_data.append(device_data)

    return pd.DataFrame(
        columns=BigQueryApi().hourly_measurements_columns, data=restructured_data
    ).to_dict(orient="records")
def restructure_airqo_data_for_message_broker(data: list) -> list:
    restructured_data = []

    data_df = pd.DataFrame(data)
    columns = list(data_df.columns)

    for _, data_row in data_df.iterrows():
        device_data = dict(
            {
                "time": data_row["time"],
                "tenant": "airqo",
                "site_id": data_row["site_id"],
                "device_number": data_row["device_number"],
                "frequency": data_row["frequency"],
                "device": data_row["device"],
                "latitude": get_column_value(
                    column="latitude", columns=columns, series=data_row
                ),
                "longitude": get_column_value(
                    column="longitude", columns=columns, series=data_row
                ),
                "pm2_5": get_column_value(
                    column="pm2_5", columns=columns, series=data_row
                ),
                "pm10": get_column_value(
                    column="pm10", columns=columns, series=data_row
                ),
                "s1_pm2_5": get_column_value(
                    column="s1_pm2_5", columns=columns, series=data_row
                ),
                "s1_pm10": get_column_value(
                    column="s1_pm10", columns=columns, series=data_row
                ),
                "s2_pm2_5": get_column_value(
                    column="s2_pm2_5", columns=columns, series=data_row
                ),
                "s2_pm10": get_column_value(
                    column="s2_pm10", columns=columns, series=data_row
                ),
                "pm2_5_calibrated_value": get_column_value(
                    column="calibrated_pm2_5", columns=columns, series=data_row
                ),
                "pm10_calibrated_value": get_column_value(
                    column="calibrated_pm10", columns=columns, series=data_row
                ),
                "altitude": get_column_value(
                    column="altitude", columns=columns, series=data_row
                ),
                "wind_speed": get_column_value(
                    column="wind_speed", columns=columns, series=data_row
                ),
                "external_temperature": get_column_value(
                    column="temperature", columns=columns, series=data_row
                ),
                "external_humidity": get_column_value(
                    column="humidity", columns=columns, series=data_row
                ),
            }
        )

        restructured_data.append(device_data)

    return restructured_data
def restructure_airqo_data_for_api(data: list) -> list:
    restructured_data = []

    data_df = pd.DataFrame(data)
    columns = list(data_df.columns)

    for _, data_row in data_df.iterrows():
        device_data = dict(
            {
                "device": data_row["device"],
                "device_id": data_row["device_id"],
                "site_id": data_row["site_id"],
                "device_number": data_row["device_number"],
                "tenant": "airqo",
                "location": {
                    "latitude": {
                        "value": get_column_value(
                            column="latitude", series=data_row, columns=columns
                        )
                    },
                    "longitude": {
                        "value": get_column_value(
                            column="longitude", series=data_row, columns=columns
                        )
                    },
                },
                "frequency": data_row["frequency"],
                "time": data_row["time"],
                "average_pm2_5": {
                    "value": data_row["raw_pm2_5"],
                    "calibratedValue": data_row["pm2_5"],
                },
                "average_pm10": {
                    "value": data_row["raw_pm10"],
                    "calibratedValue": data_row["pm10"],
                },
                "pm2_5": {
                    "value": get_column_value(
                        column="s1_pm2_5", series=data_row, columns=columns
                    )
                },
                "pm10": {
                    "value": get_column_value(
                        column="s1_pm10", series=data_row, columns=columns
                    )
                },
                # "pm2_5": {
                #     "value": get_column_value("pm2_5", data_row, columns, "pm2_5"),
                #     "rawValue": get_column_value("raw_pm2_5", data_row, columns, "pm2_5"),
                #     "calibratedValue": get_column_value("calibrated_pm2_5", data_row, columns, "pm2_5")
                # },
                # "pm10": {
                #     "value": get_column_value("pm10", data_row, columns, "pm10"),
                #     "rawValue": get_column_value("raw_pm10", data_row, columns, "pm10"),
                #     "calibratedValue": get_column_value("calibrated_pm10", data_row, columns, "pm10")
                # },
                "s1_pm2_5": {
                    "value": get_column_value(
                        column="s1_pm2_5", series=data_row, columns=columns
                    )
                },
                "s1_pm10": {
                    "value": get_column_value(
                        column="s1_pm10", series=data_row, columns=columns
                    )
                },
                "s2_pm2_5": {
                    "value": get_column_value(
                        column="s2_pm2_5", series=data_row, columns=columns
                    )
                },
                "s2_pm10": {
                    "value": get_column_value(
                        column="s2_pm10", series=data_row, columns=columns
                    )
                },
                "battery": {
                    "value": get_column_value(
                        column="voltage", series=data_row, columns=columns
                    )
                },
                "altitude": {
                    "value": get_column_value(
                        column="altitude", series=data_row, columns=columns
                    )
                },
                "speed": {
                    "value": get_column_value(
                        column="wind_speed", series=data_row, columns=columns
                    )
                },
                "satellites": {
                    "value": get_column_value(
                        column="no_sats", series=data_row, columns=columns
                    )
                },
                "hdop": {
                    "value": get_column_value(
                        column="hdope", series=data_row, columns=columns
                    )
                },
                "externalTemperature": {
                    "value": get_column_value(
                        column="temperature", series=data_row, columns=columns
                    )
                },
                "externalHumidity": {
                    "value": get_column_value(
                        column="humidity", series=data_row, columns=columns
                    )
                },
            }
        )

        restructured_data.append(device_data)

    return restructured_data
def transform_kcca_hourly_data_for_bigquery(data: list) -> list:
    restructured_data = []

    data_df = pd.DataFrame(data)
    columns = list(data_df.columns)

    airqo_api = AirQoApi()
    devices = airqo_api.get_devices(tenant="kcca")

    for _, data_row in data_df.iterrows():
        device_name = data_row["deviceCode"]
        site_id, _ = get_site_and_device_id(devices, device_name=device_name)
        if not site_id:
            continue

        location = str(data_row["location.coordinates"])
        location = location.replace("[", "").replace("]", "")
        location_coordinates = location.split(",")

        device_data = dict({
            "timestamp":
            str_to_date(data_row["time"]),
            "tenant":
            "kcca",
            "site_id":
            site_id,
            "device_number":
            0,
            "device":
            device_name,
            "latitude":
            location_coordinates[1],
            "longitude":
            location_coordinates[0],
            "pm2_5":
            get_column_value(
                column="characteristics.pm2_5ConcMass.value",
                columns=columns,
                series=data_row,
            ),
            "s1_pm2_5":
            get_column_value(column="s1_pm2_5",
                             columns=columns,
                             series=data_row),
            "s2_pm2_5":
            get_column_value(column="s2_pm2_5",
                             columns=columns,
                             series=data_row),
            "pm2_5_raw_value":
            get_column_value(
                column="characteristics.pm2_5ConcMass.raw",
                columns=columns,
                series=data_row,
            ),
            "pm2_5_calibrated_value":
            get_column_value(
                column="characteristics.pm2_5ConcMass.calibratedValue",
                columns=columns,
                series=data_row,
            ),
            "pm10":
            get_column_value(
                column="characteristics.pm10ConcMass.value",
                columns=columns,
                series=data_row,
            ),
            "s1_pm10":
            get_column_value(column="s1_pm10",
                             columns=columns,
                             series=data_row),
            "s2_pm10":
            get_column_value(column="s2_pm10",
                             columns=columns,
                             series=data_row),
            "pm10_raw_value":
            get_column_value(
                column="characteristics.pm10ConcMass.raw",
                columns=columns,
                series=data_row,
            ),
            "pm10_calibrated_value":
            get_column_value(
                column="characteristics.pm10ConcMass.calibratedValue",
                columns=columns,
                series=data_row,
            ),
            "no2":
            get_column_value(
                column="characteristics.no2Conc.value",
                columns=columns,
                series=data_row,
            ),
            "no2_raw_value":
            get_column_value(
                column="characteristics.no2Conc.raw",
                columns=columns,
                series=data_row,
            ),
            "no2_calibrated_value":
            get_column_value(
                column="characteristics.no2Conc.calibratedValue",
                columns=columns,
                series=data_row,
            ),
            "pm1":
            get_column_value(
                column="characteristics.pm1ConcMass.value",
                columns=columns,
                series=data_row,
            ),
            "pm1_raw_value":
            get_column_value(
                column="characteristics.pm1ConcMass.raw",
                columns=columns,
                series=data_row,
            ),
            "pm1_calibrated_value":
            get_column_value(
                column="characteristics.pm1ConcMass.calibratedValue",
                columns=columns,
                series=data_row,
            ),
            "altitude":
            get_column_value(
                column="characteristics.altitude.value",
                columns=columns,
                series=data_row,
            ),
            "wind_speed":
            get_column_value(
                column="characteristics.windSpeed.value",
                columns=columns,
                series=data_row,
            ),
            "external_temperature":
            get_column_value(
                column="characteristics.temperature.value",
                columns=columns,
                series=data_row,
            ),
            "external_humidity":
            get_column_value(
                column="characteristics.relHumid.value",
                columns=columns,
                series=data_row,
            ),
        })

        restructured_data.append(device_data)

    return pd.DataFrame(columns=BigQueryApi().hourly_measurements_columns,
                        data=restructured_data).to_dict(orient="records")
def transform_kcca_data_for_message_broker(data: list, frequency: str) -> list:
    restructured_data = []

    data_df = pd.DataFrame(data)
    columns = list(data_df.columns)

    airqo_api = AirQoApi()
    devices = airqo_api.get_devices(tenant="kcca")

    for _, data_row in data_df.iterrows():
        device_name = data_row["deviceCode"]
        site_id, device_id = get_site_and_device_id(devices,
                                                    device_name=device_name)
        if not site_id and not device_id:
            continue

        location = str(data_row["location.coordinates"])
        location = location.replace("[", "").replace("]", "")
        location_coordinates = location.split(",")

        device_data = dict({
            "time":
            frequency_time(dateStr=data_row["time"], frequency=frequency),
            "tenant":
            "kcca",
            "site_id":
            site_id,
            "device_id":
            device_id,
            "device_number":
            0,
            "device":
            device_name,
            "latitude":
            location_coordinates[1],
            "longitude":
            location_coordinates[0],
            "pm2_5":
            get_column_value(
                column="characteristics.pm2_5ConcMass.value",
                columns=columns,
                series=data_row,
            ),
            "pm10":
            get_column_value(
                column="characteristics.pm10ConcMass.value",
                columns=columns,
                series=data_row,
            ),
            "s1_pm2_5":
            get_column_value(
                column="characteristics.pm2_5ConcMass.raw",
                columns=columns,
                series=data_row,
            ),
            "s1_pm10":
            get_column_value(
                column="characteristics.pm10ConcMass.raw",
                columns=columns,
                series=data_row,
            ),
            "s2_pm2_5":
            None,
            "s2_pm10":
            None,
            "pm2_5_calibrated_value":
            get_column_value(
                column="characteristics.pm2_5ConcMass.calibratedValue",
                columns=columns,
                series=data_row,
            ),
            "pm10_calibrated_value":
            get_column_value(
                column="characteristics.pm10ConcMass.calibratedValue",
                columns=columns,
                series=data_row,
            ),
            "altitude":
            get_column_value(
                column="characteristics.altitude.value",
                columns=columns,
                series=data_row,
            ),
            "wind_speed":
            get_column_value(
                column="characteristics.windSpeed.value",
                columns=columns,
                series=data_row,
            ),
            "external_temperature":
            get_column_value(
                column="characteristics.temperature.value",
                columns=columns,
                series=data_row,
            ),
            "external_humidity":
            get_column_value(
                column="characteristics.relHumid.value",
                columns=columns,
                series=data_row,
            ),
        })

        restructured_data.append(device_data)

    return restructured_data