Beispiel #1
0
def data_or_zeroes(df: pd.DataFrame, start, end, resolution) -> pd.DataFrame:
    """Making really sure we have the structure to let the plots not fail"""
    if df is None or df.empty:
        return pd.DataFrame(
            index=pd.date_range(
                start=start,
                end=end,
                freq=resolution,
                tz=time_utils.get_timezone(),
                closed="left",
            ),
            columns=["event_value"],
        ).fillna(0)
    else:
        return df.fillna(0)
Beispiel #2
0
def query_time_series_data(
    old_sensor_names: Tuple[str],
    make_query: QueryCallType,
    query_window: Tuple[Optional[datetime], Optional[datetime]] = (None, None),
    belief_horizon_window: Tuple[Optional[timedelta], Optional[timedelta]] = (
        None,
        None,
    ),
    belief_time_window: Tuple[Optional[datetime],
                              Optional[datetime]] = (None, None),
    belief_time: Optional[datetime] = None,
    user_source_ids: Optional[Union[int, List[int]]] = None,
    source_types: Optional[List[str]] = None,
    exclude_source_types: Optional[List[str]] = None,
    resolution: Union[str, timedelta] = None,
) -> Dict[str, tb.BeliefsDataFrame]:
    """
    Run a query for time series data on the database for a tuple of assets.
    Here, we need to know that postgres only stores naive datetimes and we keep them as UTC.
    Therefore, we localize the result.
    Then, we resample the result, to fit the given resolution. *
    Returns a dictionary of asset names (as keys) and BeliefsDataFrames (as values),
    with each BeliefsDataFrame having an "event_value" column.

    * Note that we convert string resolutions to datetime.timedelta objects.
    """

    # On demo, we query older data as if it's the current year's data (we convert back below)
    if current_app.config.get("FLEXMEASURES_MODE", "") == "demo":
        query_window = convert_query_window_for_demo(query_window)

    query = make_query(
        old_sensor_names=old_sensor_names,
        query_window=query_window,
        belief_horizon_window=belief_horizon_window,
        belief_time_window=belief_time_window,
        belief_time=belief_time,
        user_source_ids=user_source_ids,
        source_types=source_types,
        exclude_source_types=exclude_source_types,
    )

    df_all_assets = pd.DataFrame(
        query.all(),
        columns=[col["name"] for col in query.column_descriptions])
    bdf_dict: Dict[str, tb.BeliefsDataFrame] = {}
    for old_sensor_model_name in old_sensor_names:

        # Select data for the given asset
        df = df_all_assets[
            df_all_assets["name"] ==
            old_sensor_model_name].loc[:, df_all_assets.columns != "name"]

        # todo: Keep the preferred data source (first look at source_type, then user_source_id if needed)
        # if user_source_ids:
        #     values_orig["source"] = values_orig["source"].astype("category")
        #     values_orig["source"].cat.set_categories(user_source_ids, inplace=True)
        #     values_orig = (
        #         values_orig.sort_values(by=["source"], ascending=True)
        #         .drop_duplicates(subset=["source"], keep="first")
        #         .sort_values(by=["datetime"])
        #     )

        # Keep the most recent observation
        # todo: this block also resolves multi-sourced data by selecting the "first" (unsorted) source; we should have a consistent policy for this case
        df = (df.sort_values(by=["horizon"], ascending=True).drop_duplicates(
            subset=["datetime"], keep="first").sort_values(by=["datetime"]))

        # Index according to time and rename columns
        # todo: this operation can be simplified after moving our time series data structures to timely-beliefs
        df.rename(
            index=str,
            columns={
                "value": "event_value",
                "datetime": "event_start",
                "DataSource": "source",
                "horizon": "belief_horizon",
            },
            inplace=True,
        )
        df.set_index("event_start", drop=True, inplace=True)

        # Convert to the FLEXMEASURES timezone
        if not df.empty:
            df.index = df.index.tz_convert(time_utils.get_timezone())

        # On demo, we query older data as if it's the current year's data (we converted above)
        if current_app.config.get("FLEXMEASURES_MODE", "") == "demo":
            df.index = df.index.map(
                lambda t: t.replace(year=datetime.now().year))

        sensor = find_sensor_by_name(name=old_sensor_model_name)
        bdf = tb.BeliefsDataFrame(df.reset_index(), sensor=sensor)

        # re-sample data to the resolution we need to serve
        if resolution is None:
            resolution = sensor.event_resolution
        elif isinstance(resolution, str):
            try:
                # todo: allow pandas freqstr as resolution when timely-beliefs supports DateOffsets,
                #       https://github.com/SeitaBV/timely-beliefs/issues/13
                resolution = pd.to_timedelta(resolution).to_pytimedelta()
            except ValueError:
                resolution = isodate.parse_duration(resolution)
        bdf = bdf.resample_events(event_resolution=resolution,
                                  keep_only_most_recent_belief=True)

        # Slice query window after resampling
        if query_window[0] is not None:
            bdf = bdf[
                bdf.index.get_level_values("event_start") >= query_window[0]]
        if query_window[1] is not None:
            bdf = bdf[
                bdf.index.get_level_values("event_start") < query_window[1]]

        bdf_dict[old_sensor_model_name] = bdf

    return bdf_dict
Beispiel #3
0
def save_forecasts_in_db(api_key: str, locations: List[Tuple[float, float]],
                         data_source: DataSource):
    """Process the response from DarkSky into Weather timed values.
    Collects all forecasts for all locations and all sensors at all locations, then bulk-saves them.
    """
    click.echo("[FLEXMEASURES] Getting weather forecasts:")
    click.echo("[FLEXMEASURES]  Latitude, Longitude")
    click.echo("[FLEXMEASURES] -----------------------")
    db_forecasts = []
    weather_sensors: dict = {}  # keep track of the sensors to save lookups

    for location in locations:
        click.echo("[FLEXMEASURES] %s, %s" % location)

        forecasts = call_darksky(api_key, location)
        time_of_api_call = as_server_time(
            datetime.fromtimestamp(forecasts["currently"]["time"],
                                   get_timezone())).replace(second=0,
                                                            microsecond=0)
        click.echo("[FLEXMEASURES] Called Dark Sky API successfully at %s." %
                   time_of_api_call)

        # map sensor name in our db to sensor name/label in dark sky response
        sensor_name_mapping = dict(temperature="temperature",
                                   wind_speed="windSpeed",
                                   radiation="cloudCover")

        for fc in forecasts["hourly"]["data"]:
            fc_datetime = as_server_time(
                datetime.fromtimestamp(fc["time"],
                                       get_timezone())).replace(second=0,
                                                                microsecond=0)
            fc_horizon = fc_datetime - time_of_api_call
            click.echo(
                "[FLEXMEASURES] Processing forecast for %s (horizon: %s) ..." %
                (fc_datetime, fc_horizon))
            for flexmeasures_sensor_type in sensor_name_mapping.keys():
                needed_response_label = sensor_name_mapping[
                    flexmeasures_sensor_type]
                if needed_response_label in fc:
                    weather_sensor = weather_sensors.get(
                        flexmeasures_sensor_type, None)
                    if weather_sensor is None:
                        weather_sensor = find_closest_weather_sensor(
                            flexmeasures_sensor_type,
                            lat=location[0],
                            lng=location[1])
                        if weather_sensor is not None:
                            weather_sensors[
                                flexmeasures_sensor_type] = weather_sensor
                        else:
                            raise Exception(
                                "No weather sensor set up for this sensor type (%s)"
                                % flexmeasures_sensor_type)

                    fc_value = fc[needed_response_label]
                    # the radiation is not available in dark sky -> we compute it ourselves
                    if flexmeasures_sensor_type == "radiation":
                        fc_value = compute_irradiance(
                            location[0],
                            location[1],
                            fc_datetime,
                            fc[needed_response_label],
                        )

                    db_forecasts.append(
                        Weather(
                            datetime=fc_datetime,
                            horizon=fc_horizon,
                            value=fc_value,
                            sensor_id=weather_sensor.id,
                            data_source_id=data_source.id,
                        ))
                else:
                    # we will not fail here, but issue a warning
                    msg = "No label '%s' in response data for time %s" % (
                        needed_response_label,
                        fc_datetime,
                    )
                    click.echo("[FLEXMEASURES] %s" % msg)
                    current_app.logger.warning(msg)
    if len(db_forecasts) == 0:
        # This is probably a serious problem
        raise Exception(
            "Nothing to put in the database was produced. That does not seem right..."
        )
    db.session.bulk_save_objects(db_forecasts)