def data_or_zeroes(df: pd.DataFrame, start, end, resolution) -> pd.DataFrame: """Making really sure we have the structure to let the plots not fail""" if df is None or df.empty: return pd.DataFrame( index=pd.date_range( start=start, end=end, freq=resolution, tz=time_utils.get_timezone(), closed="left", ), columns=["event_value"], ).fillna(0) else: return df.fillna(0)
def query_time_series_data( old_sensor_names: Tuple[str], make_query: QueryCallType, query_window: Tuple[Optional[datetime], Optional[datetime]] = (None, None), belief_horizon_window: Tuple[Optional[timedelta], Optional[timedelta]] = ( None, None, ), belief_time_window: Tuple[Optional[datetime], Optional[datetime]] = (None, None), belief_time: Optional[datetime] = None, user_source_ids: Optional[Union[int, List[int]]] = None, source_types: Optional[List[str]] = None, exclude_source_types: Optional[List[str]] = None, resolution: Union[str, timedelta] = None, ) -> Dict[str, tb.BeliefsDataFrame]: """ Run a query for time series data on the database for a tuple of assets. Here, we need to know that postgres only stores naive datetimes and we keep them as UTC. Therefore, we localize the result. Then, we resample the result, to fit the given resolution. * Returns a dictionary of asset names (as keys) and BeliefsDataFrames (as values), with each BeliefsDataFrame having an "event_value" column. * Note that we convert string resolutions to datetime.timedelta objects. """ # On demo, we query older data as if it's the current year's data (we convert back below) if current_app.config.get("FLEXMEASURES_MODE", "") == "demo": query_window = convert_query_window_for_demo(query_window) query = make_query( old_sensor_names=old_sensor_names, query_window=query_window, belief_horizon_window=belief_horizon_window, belief_time_window=belief_time_window, belief_time=belief_time, user_source_ids=user_source_ids, source_types=source_types, exclude_source_types=exclude_source_types, ) df_all_assets = pd.DataFrame( query.all(), columns=[col["name"] for col in query.column_descriptions]) bdf_dict: Dict[str, tb.BeliefsDataFrame] = {} for old_sensor_model_name in old_sensor_names: # Select data for the given asset df = df_all_assets[ df_all_assets["name"] == old_sensor_model_name].loc[:, df_all_assets.columns != "name"] # todo: Keep the preferred data source (first look at source_type, then user_source_id if needed) # if user_source_ids: # values_orig["source"] = values_orig["source"].astype("category") # values_orig["source"].cat.set_categories(user_source_ids, inplace=True) # values_orig = ( # values_orig.sort_values(by=["source"], ascending=True) # .drop_duplicates(subset=["source"], keep="first") # .sort_values(by=["datetime"]) # ) # Keep the most recent observation # todo: this block also resolves multi-sourced data by selecting the "first" (unsorted) source; we should have a consistent policy for this case df = (df.sort_values(by=["horizon"], ascending=True).drop_duplicates( subset=["datetime"], keep="first").sort_values(by=["datetime"])) # Index according to time and rename columns # todo: this operation can be simplified after moving our time series data structures to timely-beliefs df.rename( index=str, columns={ "value": "event_value", "datetime": "event_start", "DataSource": "source", "horizon": "belief_horizon", }, inplace=True, ) df.set_index("event_start", drop=True, inplace=True) # Convert to the FLEXMEASURES timezone if not df.empty: df.index = df.index.tz_convert(time_utils.get_timezone()) # On demo, we query older data as if it's the current year's data (we converted above) if current_app.config.get("FLEXMEASURES_MODE", "") == "demo": df.index = df.index.map( lambda t: t.replace(year=datetime.now().year)) sensor = find_sensor_by_name(name=old_sensor_model_name) bdf = tb.BeliefsDataFrame(df.reset_index(), sensor=sensor) # re-sample data to the resolution we need to serve if resolution is None: resolution = sensor.event_resolution elif isinstance(resolution, str): try: # todo: allow pandas freqstr as resolution when timely-beliefs supports DateOffsets, # https://github.com/SeitaBV/timely-beliefs/issues/13 resolution = pd.to_timedelta(resolution).to_pytimedelta() except ValueError: resolution = isodate.parse_duration(resolution) bdf = bdf.resample_events(event_resolution=resolution, keep_only_most_recent_belief=True) # Slice query window after resampling if query_window[0] is not None: bdf = bdf[ bdf.index.get_level_values("event_start") >= query_window[0]] if query_window[1] is not None: bdf = bdf[ bdf.index.get_level_values("event_start") < query_window[1]] bdf_dict[old_sensor_model_name] = bdf return bdf_dict
def save_forecasts_in_db(api_key: str, locations: List[Tuple[float, float]], data_source: DataSource): """Process the response from DarkSky into Weather timed values. Collects all forecasts for all locations and all sensors at all locations, then bulk-saves them. """ click.echo("[FLEXMEASURES] Getting weather forecasts:") click.echo("[FLEXMEASURES] Latitude, Longitude") click.echo("[FLEXMEASURES] -----------------------") db_forecasts = [] weather_sensors: dict = {} # keep track of the sensors to save lookups for location in locations: click.echo("[FLEXMEASURES] %s, %s" % location) forecasts = call_darksky(api_key, location) time_of_api_call = as_server_time( datetime.fromtimestamp(forecasts["currently"]["time"], get_timezone())).replace(second=0, microsecond=0) click.echo("[FLEXMEASURES] Called Dark Sky API successfully at %s." % time_of_api_call) # map sensor name in our db to sensor name/label in dark sky response sensor_name_mapping = dict(temperature="temperature", wind_speed="windSpeed", radiation="cloudCover") for fc in forecasts["hourly"]["data"]: fc_datetime = as_server_time( datetime.fromtimestamp(fc["time"], get_timezone())).replace(second=0, microsecond=0) fc_horizon = fc_datetime - time_of_api_call click.echo( "[FLEXMEASURES] Processing forecast for %s (horizon: %s) ..." % (fc_datetime, fc_horizon)) for flexmeasures_sensor_type in sensor_name_mapping.keys(): needed_response_label = sensor_name_mapping[ flexmeasures_sensor_type] if needed_response_label in fc: weather_sensor = weather_sensors.get( flexmeasures_sensor_type, None) if weather_sensor is None: weather_sensor = find_closest_weather_sensor( flexmeasures_sensor_type, lat=location[0], lng=location[1]) if weather_sensor is not None: weather_sensors[ flexmeasures_sensor_type] = weather_sensor else: raise Exception( "No weather sensor set up for this sensor type (%s)" % flexmeasures_sensor_type) fc_value = fc[needed_response_label] # the radiation is not available in dark sky -> we compute it ourselves if flexmeasures_sensor_type == "radiation": fc_value = compute_irradiance( location[0], location[1], fc_datetime, fc[needed_response_label], ) db_forecasts.append( Weather( datetime=fc_datetime, horizon=fc_horizon, value=fc_value, sensor_id=weather_sensor.id, data_source_id=data_source.id, )) else: # we will not fail here, but issue a warning msg = "No label '%s' in response data for time %s" % ( needed_response_label, fc_datetime, ) click.echo("[FLEXMEASURES] %s" % msg) current_app.logger.warning(msg) if len(db_forecasts) == 0: # This is probably a serious problem raise Exception( "Nothing to put in the database was produced. That does not seem right..." ) db.session.bulk_save_objects(db_forecasts)