def aggregate_values( bdf_dict: Dict[str, tb.BeliefsDataFrame]) -> tb.BeliefsDataFrame: # todo: test this function rigorously, e.g. with empty bdfs in bdf_dict # todo: consider 1 bdf with beliefs from source A, plus 1 bdf with beliefs from source B -> 1 bdf with sources A+B # todo: consider 1 bdf with beliefs from sources A and B, plus 1 bdf with beliefs from source C. -> 1 bdf with sources A+B and A+C # todo: consider 1 bdf with beliefs from sources A and B, plus 1 bdf with beliefs from source C and D. -> 1 bdf with sources A+B, A+C, B+C and B+D # Relevant issue: https://github.com/SeitaBV/timely-beliefs/issues/33 unique_source_ids: List[int] = [] for bdf in bdf_dict.values(): unique_source_ids.extend(bdf.lineage.sources) if not bdf.lineage.unique_beliefs_per_event_per_source: current_app.logger.warning( "Not implemented: only aggregation of deterministic uni-source beliefs (1 per event) is properly supported" ) if bdf.lineage.number_of_sources > 1: current_app.logger.warning( "Not implemented: aggregating multi-source beliefs about the same sensor." ) if len(set(unique_source_ids)) > 1: current_app.logger.warning( f"Not implemented: aggregating multi-source beliefs. Source {unique_source_ids[1:]} will be treated as if source {unique_source_ids[0]}" ) data_as_bdf = tb.BeliefsDataFrame() for k, v in bdf_dict.items(): if data_as_bdf.empty: data_as_bdf = v.copy() elif not v.empty: data_as_bdf["event_value"] = data_as_bdf["event_value"].add( simplify_index(v.copy())["event_value"], fill_value=0, level="event_start", ) # we only look at the event_start index level and sum up duplicates that level return data_as_bdf
def test_resample_sensor_data(app, db, setup_beliefs, event_starts_after: str, event_ends_before: str): """Check resampling market data from hourly to 30 minute resolution and back.""" from flexmeasures.cli.data_edit import resample_sensor_data sensor = Sensor.query.filter(Sensor.name == "epex_da").one_or_none() event_starts_after = pd.Timestamp(event_starts_after) event_ends_before = pd.Timestamp(event_ends_before) beliefs_before = sensor.search_beliefs( most_recent_beliefs_only=False, event_starts_after=event_starts_after, event_ends_before=event_ends_before, ) # Check whether fixtures have flushed assert sensor.id is not None # Check whether we have all desired beliefs query = TimedBelief.query.filter(TimedBelief.sensor_id == sensor.id) if not pd.isnull(event_starts_after): query = query.filter(TimedBelief.event_start >= event_starts_after) if not pd.isnull(event_ends_before): query = query.filter(TimedBelief.event_start + sensor.event_resolution <= event_ends_before) all_beliefs_for_given_sensor = query.all() pd.testing.assert_frame_equal( tb.BeliefsDataFrame(all_beliefs_for_given_sensor), beliefs_before) cli_input = { "sensor-id": sensor.id, "event-resolution": sensor.event_resolution.seconds / 60 / 2, } runner = app.test_cli_runner() result = runner.invoke(resample_sensor_data, to_flags(cli_input) + ["--skip-integrity-check"]) # Check result for success assert "Successfully resampled" in result.output # Check that we now have twice as much data for this sensor sensor = Sensor.query.filter(Sensor.name == "epex_da").one_or_none() beliefs_after = sensor.search_beliefs( most_recent_beliefs_only=False, event_starts_after=event_starts_after, event_ends_before=event_ends_before, ) assert len(beliefs_after) == 2 * len(beliefs_before) # Checksum assert beliefs_after["event_value"].sum( ) == 2 * beliefs_before["event_value"].sum() # Resample back to original resolution (on behalf of the next test case) cli_input["event-resolution"] = sensor.event_resolution.seconds / 60 result = runner.invoke(resample_sensor_data, to_flags(cli_input) + ["--skip-integrity-check"]) assert "Successfully resampled" in result.output
def test_query_beliefs(setup_beliefs): """Check various ways of querying for beliefs.""" sensor = Sensor.query.filter_by(name="epex_da").one_or_none() source = DataSource.query.filter_by(name="ENTSO-E").one_or_none() bdfs = [ TimedBelief.search(sensor, source=source, most_recent_beliefs_only=False), TimedBelief.search(sensor.id, source=source, most_recent_beliefs_only=False), TimedBelief.search(sensor.name, source=source, most_recent_beliefs_only=False), sensor.search_beliefs(source=source, most_recent_beliefs_only=False), tb.BeliefsDataFrame(sensor.beliefs)[tb.BeliefsDataFrame( sensor.beliefs).index.get_level_values("source") == source], ] for bdf in bdfs: assert sensor.event_resolution == timedelta(hours=1) assert bdf.event_resolution == timedelta(hours=1) assert len(bdf) == setup_beliefs
def csv_as_belief(csv_in,tz_hour_difference,n_events = None): sensor_descriptions = ( ("Temperature", "°C"), ) # Create source and sensors source_a = tb.BeliefSource(name="KNMI") sensors = (tb.Sensor(name=descr[0], unit=descr[1], event_resolution=timedelta(minutes=15)) for descr in sensor_descriptions) # Create BeliefsDataFrame for sensor in sensors: blfs = read_beliefs_from_csv(sensor,csv_in, source=source_a, tz_hour_difference=tz_hour_difference, n_events=n_events) df = tb.BeliefsDataFrame(sensor=sensor, beliefs=blfs).sort_index() #df['2015-05-16 09:14:01+00 return df
def make_df(n_events = 100, n_horizons = 169, tz_hour_difference=-9, event_resolution=timedelta(hours=1)): """ Returns DataFrame in which n events and n horizons are stored @param n_events: int,number of events in DataFrame @param n_horizons: int,number of horizons in DataFrame @param tz_hour_difference: float,time difference @param event_resolution: timedelta object,event resolution """ sensor_descriptions = (("Temperature", "°C"),) source = tb.BeliefSource(name="Random forest") sensors = (tb.Sensor(name=descr[0], unit=descr[1], event_resolution=event_resolution) for descr in sensor_descriptions) blfs=[] for sensor in sensors: blfs += read_beliefs_from_csv(sensor, source=source, cp=0.05, event_resolution=event_resolution, tz_hour_difference=tz_hour_difference) blfs += read_beliefs_from_csv(sensor, source=source, cp=0.5, event_resolution=event_resolution, tz_hour_difference=tz_hour_difference) blfs += read_beliefs_from_csv(sensor, source=source, cp=0.95, event_resolution=event_resolution, tz_hour_difference=tz_hour_difference) bdf = tb.BeliefsDataFrame(sensor=sensor, beliefs=blfs).sort_index() return bdf
def test_load_beliefs(csv_file): """Test loading BeliefsDataFrame to csv. The saved file does not contain the sensor information, and the sources are saved by their name. Therefore, we test the following functionality: - The user should specify the sensor upon loading - The user should be warned that the loaded sources are not of type BeliefSource. - The user should have the possibility to look up the saved source names by passing a list of sources. """ # Load beliefs with tb.read_csv df = pd.read_csv("test.csv") with pytest.warns(UserWarning, match="type other than BeliefSource"): df = tb.BeliefsDataFrame(df, sensor=tb.Sensor("Sensor Y")) assert df.sensor.name == "Sensor Y" # No lookup should issue warning with pytest.warns(UserWarning, match="type other than BeliefSource"): df = tb.read_csv("test.csv", sensor=tb.Sensor("Sensor Y")) assert all( c != tb.BeliefSource for c in df.index.get_level_values("source").map(type) ) # This lookup should fail with pytest.raises(ValueError, match="not in list"): tb.read_csv( "test.csv", sensor=tb.Sensor("Sensor Y"), look_up_sources=[tb.BeliefSource(name="Source X")], ) # This lookup should succeed source_a, source_b = tb.BeliefSource("Source A"), tb.BeliefSource("Source B") df = tb.read_csv( "test.csv", sensor=tb.Sensor("Sensor Y"), look_up_sources=[source_a, source_b] ) assert df.sensor.name == "Sensor Y" assert source_a in df.index.get_level_values("source") assert source_b in df.index.get_level_values("source") assert isinstance(df.index.get_level_values("event_start")[0], datetime) assert isinstance(df.index.get_level_values("belief_time")[0], datetime)
def csv_as_belief(csv_in, tz_hour_difference, n_events=None): # Uncomment desired forecasting data one at a time sensor_descriptions = ( # ("Solar irradiation", "kW/m²"), # ("Solar power", "kW"), # ("Wind speed", "m/s"), # ("Wind power", "MW"), ("Temperature", "°C"), ) # Create source and sensors source_a = tb.BeliefSource(name="KNMI") sensors = (tb.Sensor(name=descr[0], unit=descr[1], event_resolution=timedelta(minutes=15)) for descr in sensor_descriptions) # Create BeliefsDataFrame for sensor in sensors: blfs = read_beliefs_from_csv(sensor, csv_in, source=source_a, tz_hour_difference=tz_hour_difference, n_events=n_events) df = tb.BeliefsDataFrame(sensor=sensor, beliefs=blfs).sort_index() return df
def verify_sensor_data_in_db( post_message, values, db, entity_type: str, fm_scheme: str, swapped_sign: bool = False, ): """util method to verify that sensor data ended up in the database""" start = parse_datetime(post_message["start"]) end = start + parse_duration(post_message["duration"]) sensor: Sensor = SensorField(entity_type, fm_scheme).deserialize( post_message[entity_type]) resolution = sensor.event_resolution query = ( db.session.query( TimedBelief.event_start, TimedBelief.event_value, TimedBelief.belief_horizon, ).filter((TimedBelief.event_start > start - resolution) & (TimedBelief.event_start < end)) # .filter(TimedBelief.belief_horizon == (TimedBelief.event_start + resolution) - prior) # only for sensors with 0-hour ex_post knowledge horizon function .join(Sensor).filter(Sensor.name == sensor.name)) if "horizon" in post_message: horizon = parse_duration(post_message["horizon"]) query = query.filter(TimedBelief.belief_horizon == horizon) # todo: after basing sensor data on TimedBelief, we should be able to get a BeliefsDataFrame from the query directly df = pd.DataFrame( query.all(), columns=[col["name"] for col in query.column_descriptions]) bdf = tb.BeliefsDataFrame(df, sensor=sensor, source="Some source") if "prior" in post_message: prior = parse_datetime(post_message["prior"]) bdf = bdf.fixed_viewpoint(prior) if swapped_sign: bdf["event_value"] = -bdf["event_value"] assert bdf["event_value"].tolist() == values
def main(df, current_time, start_time, last_start_time=None, model=LinearRegression(), source=None): """ Accepts a Beliefs Dataframe df and returns forecasts from start_time to last_start_time in timely beliefs rows @param df: Beliefs Dataframe @param current_time : datetime object, generate a forecast from this point @param start_time: datetime object, @param last_start_time: datetime object @param model : model to use to generate new data """ if last_start_time == None: last_start_time = start_time # check if times are in chronological order elif start_time > last_start_time: raise ValueError("last_start_time must be set after start_time") first_date = df.iloc[0].name[0] last_date = df.iloc[-1].name[0] # check if current time is in data frame if current_time < first_date or current_time > last_date: raise ValueError( 'Your current_time is not in the dataframe \nstart:{}\nend :{}'. format(first_date, last_date)) # check if current time is compatible with the event resolution resolution_minutes = df.sensor.event_resolution.seconds / 60 if current_time.minute % (resolution_minutes) != 0: raise ValueError( 'Your current_time is not compatible with the event resolution of {} minutes' .format(resolution_minutes)) # get beliefseries from all the times current = get_beliefsSeries_from_event_start(df, current_time, current_time, 'event_value') start = get_beliefsSeries_from_event_start(df, start_time, current_time, 'event_value') last_start = get_beliefsSeries_from_event_start(df, last_start_time, current_time, 'event_value') # create list of beliefSeries beliefSeries_list = [start.copy()] blfs_list = [] temp_time = start_time i = 0 # loop over given time slot while temp_time <= last_start_time: if temp_time > last_date or temp_time < first_date: i += 1 blfs_list += [ tb.TimedBelief( source=tb.BeliefSource(name='test' + str(i)), sensor=df.sensor, value=generator(df, current, model), belief_time=current_time, event_start=temp_time, cumulative_probability=0.5, ) ] else: beliefSeries_list += [ get_beliefsSeries_from_event_start(df, temp_time, current_time, 'event_value').copy() ] temp_time += df.sensor.event_resolution df_1 = tb.BeliefsDataFrame(sensor=df.sensor, beliefs=blfs_list) # loops over all time steps for beliefSeries in beliefSeries_list: if beliefSeries.empty == False: beliefSeries[0] = generator(df, current, model) temp = beliefSeries_list[0].to_frame(name='event_value') #loop over out of frame values for i in range(len(beliefSeries_list) - 2): temp = temp.append(beliefSeries_list[i + 2].to_frame(name='event_value')) df_1 = temp.append(df_1) return df_1
def post_price_data_response( # noqa C901 unit, generic_asset_name_groups, horizon, prior, value_groups, start, duration, resolution, ) -> ResponseTuple: # additional validation, todo: to be moved into Marshmallow if horizon is None and prior is None: extra_info = "Missing horizon or prior." return invalid_horizon(extra_info) current_app.logger.info("POSTING PRICE DATA") data_source = get_or_create_source(current_user) price_df_per_market = [] forecasting_jobs = [] for market_group, event_values in zip(generic_asset_name_groups, value_groups): for market in market_group: # Parse the entity address try: ea = parse_entity_address(market, entity_type="market") except EntityAddressException as eae: return invalid_domain(str(eae)) sensor_id = ea["sensor_id"] # Look for the Sensor object sensor = Sensor.query.filter(Sensor.id == sensor_id).one_or_none() if sensor is None: return unrecognized_market(sensor_id) elif unit != sensor.unit: return invalid_unit("%s prices" % sensor.name, [sensor.unit]) # Convert to timely-beliefs terminology event_starts, belief_horizons = determine_belief_timing( event_values, start, resolution, horizon, prior, sensor ) # Create new Price objects beliefs = [ TimedBelief( event_start=event_start, event_value=event_value, belief_horizon=belief_horizon, sensor=sensor, source=data_source, ) for event_start, event_value, belief_horizon in zip( event_starts, event_values, belief_horizons ) ] price_df_per_market.append(tb.BeliefsDataFrame(beliefs)) # Make forecasts, but not in play mode. Price forecasts (horizon>0) can still lead to other price forecasts, # by the way, due to things like day-ahead markets. if current_app.config.get("FLEXMEASURES_MODE", "") != "play": # Forecast 24 and 48 hours ahead for at most the last 24 hours of posted price data forecasting_jobs = create_forecasting_jobs( sensor.id, max(start, start + duration - timedelta(hours=24)), start + duration, resolution=duration / len(event_values), horizons=[timedelta(hours=24), timedelta(hours=48)], enqueue=False, # will enqueue later, after saving data ) return save_and_enqueue(price_df_per_market, forecasting_jobs)
def post_power_data( unit, generic_asset_name_groups, value_groups, horizon, prior, start, duration, resolution, create_forecasting_jobs_too, ): # additional validation, todo: to be moved into Marshmallow if horizon is None and prior is None: extra_info = "Missing horizon or prior." return invalid_horizon(extra_info) current_app.logger.info("POSTING POWER DATA") data_source = get_or_create_source(current_user) user_sensors = get_sensors() if not user_sensors: current_app.logger.info("User doesn't seem to have any assets") user_sensor_ids = [sensor.id for sensor in user_sensors] power_df_per_connection = [] forecasting_jobs = [] for connection_group, event_values in zip(generic_asset_name_groups, value_groups): for connection in connection_group: # TODO: get asset through util function after refactoring # Parse the entity address try: ea = parse_entity_address(connection, entity_type="connection") except EntityAddressException as eae: return invalid_domain(str(eae)) sensor_id = ea["sensor_id"] # Look for the Sensor object if sensor_id in user_sensor_ids: sensor = Sensor.query.filter(Sensor.id == sensor_id).one_or_none() else: current_app.logger.warning("Cannot identify connection %s" % connection) return unrecognized_connection_group() # Validate the sign of the values (following USEF specs with positive consumption and negative production) if sensor.get_attribute("is_strictly_non_positive") and any( v < 0 for v in event_values ): extra_info = ( "Connection %s is registered as a pure consumer and can only receive non-negative values." % sensor.entity_address ) return power_value_too_small(extra_info) elif sensor.get_attribute("is_strictly_non_negative") and any( v > 0 for v in event_values ): extra_info = ( "Connection %s is registered as a pure producer and can only receive non-positive values." % sensor.entity_address ) return power_value_too_big(extra_info) # Convert to timely-beliefs terminology event_starts, belief_horizons = determine_belief_timing( event_values, start, resolution, horizon, prior, sensor ) # Create new Power objects beliefs = [ TimedBelief( event_start=event_start, event_value=event_value * -1, # Reverse sign for FlexMeasures specs with positive production and negative consumption belief_horizon=belief_horizon, sensor=sensor, source=data_source, ) for event_start, event_value, belief_horizon in zip( event_starts, event_values, belief_horizons ) ] power_df_per_connection.append(tb.BeliefsDataFrame(beliefs)) if create_forecasting_jobs_too: forecasting_jobs.extend( create_forecasting_jobs( sensor_id, start, start + duration, resolution=duration / len(event_values), enqueue=False, # will enqueue later, after saving data ) ) return save_and_enqueue(power_df_per_connection, forecasting_jobs)
def post_weather_data_response( # noqa: C901 unit, generic_asset_name_groups, horizon, prior, value_groups, start, duration, resolution, ) -> ResponseTuple: # additional validation, todo: to be moved into Marshmallow if horizon is None and prior is None: extra_info = "Missing horizon or prior." return invalid_horizon(extra_info) current_app.logger.info("POSTING WEATHER DATA") data_source = get_or_create_source(current_user) weather_df_per_sensor = [] forecasting_jobs = [] for sensor_group, event_values in zip(generic_asset_name_groups, value_groups): for sensor in sensor_group: # Parse the entity address try: ea = parse_entity_address(sensor, entity_type="weather_sensor") except EntityAddressException as eae: return invalid_domain(str(eae)) weather_sensor_type_name = ea["weather_sensor_type_name"] latitude = ea["latitude"] longitude = ea["longitude"] # Check whether the unit is valid for this sensor type (e.g. no m/s allowed for temperature data) accepted_units = valid_sensor_units(weather_sensor_type_name) if unit not in accepted_units: return invalid_unit(weather_sensor_type_name, accepted_units) sensor: Sensor = get_sensor_by_generic_asset_type_and_location( weather_sensor_type_name, latitude, longitude ) # Convert to timely-beliefs terminology event_starts, belief_horizons = determine_belief_timing( event_values, start, resolution, horizon, prior, sensor ) # Create new Weather objects beliefs = [ TimedBelief( event_start=event_start, event_value=event_value, belief_horizon=belief_horizon, sensor=sensor, source=data_source, ) for event_start, event_value, belief_horizon in zip( event_starts, event_values, belief_horizons ) ] weather_df_per_sensor.append(tb.BeliefsDataFrame(beliefs)) # make forecasts, but only if the sent-in values are not forecasts themselves (and also not in play) if current_app.config.get( "FLEXMEASURES_MODE", "" ) != "play" and horizon <= timedelta( hours=0 ): # Todo: replace 0 hours with whatever the moment of switching from ex-ante to ex-post is for this generic asset forecasting_jobs.extend( create_forecasting_jobs( sensor.id, start, start + duration, resolution=duration / len(event_values), horizons=[horizon], enqueue=False, # will enqueue later, after saving data ) ) return save_and_enqueue(weather_df_per_sensor, forecasting_jobs)
def post_weather_data_response( # noqa: C901 unit, generic_asset_name_groups, horizon, rolling, value_groups, start, duration, resolution, ): current_app.logger.info("POSTING WEATHER DATA") data_source = get_or_create_source(current_user) weather_df_per_sensor = [] forecasting_jobs = [] for sensor_group, value_group in zip(generic_asset_name_groups, value_groups): for sensor in sensor_group: # Parse the entity address try: ea = parse_entity_address( sensor, entity_type="weather_sensor", fm_scheme="fm0" ) except EntityAddressException as eae: return invalid_domain(str(eae)) weather_sensor_type_name = ea["weather_sensor_type_name"] latitude = ea["latitude"] longitude = ea["longitude"] # Check whether the unit is valid for this sensor type (e.g. no m/s allowed for temperature data) accepted_units = valid_sensor_units(weather_sensor_type_name) if unit not in accepted_units: return invalid_unit(weather_sensor_type_name, accepted_units) sensor = get_sensor_by_generic_asset_type_and_location( weather_sensor_type_name, latitude, longitude ) if is_response_tuple(sensor): # Error message telling the user about the nearest weather sensor they can post to return sensor # Create new Weather objects beliefs = [] for j, value in enumerate(value_group): dt = start + j * duration / len(value_group) if rolling: h = horizon else: # Deduct the difference in end times of the individual timeslot and the timeseries duration h = horizon - ( (start + duration) - (dt + duration / len(value_group)) ) w = TimedBelief( event_start=dt, event_value=value, belief_horizon=h, sensor=sensor, source=data_source, ) beliefs.append(w) weather_df_per_sensor.append(tb.BeliefsDataFrame(beliefs)) # make forecasts, but only if the sent-in values are not forecasts themselves (and also not in play) if current_app.config.get( "FLEXMEASURES_MODE", "" ) != "play" and horizon <= timedelta( hours=0 ): # Todo: replace 0 hours with whatever the moment of switching from ex-ante to ex-post is for this sensor forecasting_jobs.extend( create_forecasting_jobs( sensor.id, start, start + duration, resolution=duration / len(value_group), enqueue=False, # will enqueue later, after saving data ) ) return save_and_enqueue(weather_df_per_sensor, forecasting_jobs)
def make_schedule( sensor_id: int, start: datetime, end: datetime, belief_time: datetime, resolution: timedelta, soc_at_start: Optional[float] = None, soc_targets: Optional[pd.Series] = None, soc_min: Optional[float] = None, soc_max: Optional[float] = None, roundtrip_efficiency: Optional[float] = None, price_sensor: Optional[Sensor] = None, ) -> bool: """Preferably, a starting soc is given. Otherwise, we try to retrieve the current state of charge from the asset (if that is the valid one at the start). Otherwise, we set the starting soc to 0 (some assets don't use the concept of a state of charge, and without soc targets and limits the starting soc doesn't matter). """ # https://docs.sqlalchemy.org/en/13/faq/connections.html#how-do-i-use-engines-connections-sessions-with-python-multiprocessing-or-os-fork db.engine.dispose() rq_job = get_current_job() # find sensor sensor = Sensor.query.filter_by(id=sensor_id).one_or_none() if rq_job: click.echo("Running Scheduling Job %s: %s, from %s to %s" % (rq_job.id, sensor, start, end)) if soc_at_start is None: if (start == sensor.get_attribute("soc_datetime") and sensor.get_attribute("soc_in_mwh") is not None): soc_at_start = sensor.get_attribute("soc_in_mwh") else: soc_at_start = 0 if soc_targets is None: soc_targets = pd.Series(np.nan, index=pd.date_range(start, end, freq=resolution, closed="right")) if sensor.generic_asset.generic_asset_type.name == "battery": consumption_schedule = schedule_battery( sensor, start, end, resolution, soc_at_start, soc_targets, soc_min, soc_max, roundtrip_efficiency, price_sensor=price_sensor, ) elif sensor.generic_asset.generic_asset_type.name in ( "one-way_evse", "two-way_evse", ): consumption_schedule = schedule_charging_station( sensor, start, end, resolution, soc_at_start, soc_targets, soc_min, soc_max, roundtrip_efficiency, price_sensor=price_sensor, ) else: raise ValueError( "Scheduling is not (yet) supported for asset type %s." % sensor.generic_asset.generic_asset_type) data_source = get_data_source( data_source_name="Seita", data_source_type="scheduling script", ) if rq_job: click.echo("Job %s made schedule." % rq_job.id) ts_value_schedule = [ TimedBelief( event_start=dt, belief_time=belief_time, event_value=-value, sensor=sensor, source=data_source, ) for dt, value in consumption_schedule.items() ] # For consumption schedules, positive values denote consumption. For the db, consumption is negative bdf = tb.BeliefsDataFrame(ts_value_schedule) save_to_db(bdf) db.session.commit() return True
def make_rolling_viewpoint_forecasts( sensor_id: int, horizon: timedelta, start: datetime, end: datetime, custom_model_params: dict = None, ) -> int: """Build forecasting model specs, make rolling-viewpoint forecasts, and save the forecasts made. Each individual forecast is a belief about a time interval. Rolling-viewpoint forecasts share the same belief horizon (the duration between belief time and knowledge time). Model specs are also retrained in a rolling fashion, but with its own frequency set in custom_model_params. See the timely-beliefs lib for relevant terminology. Parameters ---------- :param sensor_id: int To identify which sensor to forecast :param horizon: timedelta duration between the end of each interval and the time at which the belief about that interval is formed :param start: datetime start of forecast period, i.e. start time of the first interval to be forecast :param end: datetime end of forecast period, i.e end time of the last interval to be forecast :param custom_model_params: dict pass in params which will be passed to the model specs configurator, e.g. outcome_var_transformation, only advisable to be used for testing. :returns: int the number of forecasts made """ # https://docs.sqlalchemy.org/en/13/faq/connections.html#how-do-i-use-engines-connections-sessions-with-python-multiprocessing-or-os-fork db.engine.dispose() rq_job = get_current_job() # find out which model to run, fall back to latest recommended model_search_term = rq_job.meta.get("model_search_term", "linear-OLS") # find sensor sensor = Sensor.query.filter_by(id=sensor_id).one_or_none() click.echo( "Running Forecasting Job %s: %s for %s on model '%s', from %s to %s" % (rq_job.id, sensor, horizon, model_search_term, start, end)) if hasattr(sensor, "market_type"): ex_post_horizon = None # Todo: until we sorted out the ex_post_horizon, use all available price data else: ex_post_horizon = timedelta(hours=0) # Make model specs model_configurator = lookup_model_specs_configurator(model_search_term) model_specs, model_identifier, fallback_model_search_term = model_configurator( sensor=sensor, forecast_start=as_server_time(start), forecast_end=as_server_time(end), forecast_horizon=horizon, ex_post_horizon=ex_post_horizon, custom_model_params=custom_model_params, ) model_specs.creation_time = server_now() rq_job.meta["model_identifier"] = model_identifier rq_job.meta["fallback_model_search_term"] = fallback_model_search_term rq_job.save() # before we run the model, check if horizon is okay and enough data is available if horizon not in supported_horizons(): raise InvalidHorizonException("Invalid horizon on job %s: %s" % (rq_job.id, horizon)) query_window = get_query_window( model_specs.start_of_training, end, [lag * model_specs.frequency for lag in model_specs.lags], ) check_data_availability( sensor, TimedBelief, start, end, query_window, horizon, ) data_source = get_data_source( data_source_name="Seita (%s)" % rq_job.meta.get("model_identifier", "unknown model"), data_source_type="forecasting script", ) forecasts, model_state = make_rolling_forecasts( start=as_server_time(start), end=as_server_time(end), model_specs=model_specs, ) click.echo("Job %s made %d forecasts." % (rq_job.id, len(forecasts))) ts_value_forecasts = [ TimedBelief( event_start=dt, belief_horizon=horizon, event_value=value, sensor=sensor, source=data_source, ) for dt, value in forecasts.items() ] bdf = tb.BeliefsDataFrame(ts_value_forecasts) save_to_db(bdf) db.session.commit() return len(forecasts)
def query_time_series_data( old_sensor_names: Tuple[str], make_query: QueryCallType, query_window: Tuple[Optional[datetime], Optional[datetime]] = (None, None), belief_horizon_window: Tuple[Optional[timedelta], Optional[timedelta]] = ( None, None, ), belief_time_window: Tuple[Optional[datetime], Optional[datetime]] = (None, None), belief_time: Optional[datetime] = None, user_source_ids: Optional[Union[int, List[int]]] = None, source_types: Optional[List[str]] = None, exclude_source_types: Optional[List[str]] = None, resolution: Union[str, timedelta] = None, ) -> Dict[str, tb.BeliefsDataFrame]: """ Run a query for time series data on the database for a tuple of assets. Here, we need to know that postgres only stores naive datetimes and we keep them as UTC. Therefore, we localize the result. Then, we resample the result, to fit the given resolution. * Returns a dictionary of asset names (as keys) and BeliefsDataFrames (as values), with each BeliefsDataFrame having an "event_value" column. * Note that we convert string resolutions to datetime.timedelta objects. """ # On demo, we query older data as if it's the current year's data (we convert back below) if current_app.config.get("FLEXMEASURES_MODE", "") == "demo": query_window = convert_query_window_for_demo(query_window) query = make_query( old_sensor_names=old_sensor_names, query_window=query_window, belief_horizon_window=belief_horizon_window, belief_time_window=belief_time_window, belief_time=belief_time, user_source_ids=user_source_ids, source_types=source_types, exclude_source_types=exclude_source_types, ) df_all_assets = pd.DataFrame( query.all(), columns=[col["name"] for col in query.column_descriptions]) bdf_dict: Dict[str, tb.BeliefsDataFrame] = {} for old_sensor_model_name in old_sensor_names: # Select data for the given asset df = df_all_assets[ df_all_assets["name"] == old_sensor_model_name].loc[:, df_all_assets.columns != "name"] # todo: Keep the preferred data source (first look at source_type, then user_source_id if needed) # if user_source_ids: # values_orig["source"] = values_orig["source"].astype("category") # values_orig["source"].cat.set_categories(user_source_ids, inplace=True) # values_orig = ( # values_orig.sort_values(by=["source"], ascending=True) # .drop_duplicates(subset=["source"], keep="first") # .sort_values(by=["datetime"]) # ) # Keep the most recent observation # todo: this block also resolves multi-sourced data by selecting the "first" (unsorted) source; we should have a consistent policy for this case df = (df.sort_values(by=["horizon"], ascending=True).drop_duplicates( subset=["datetime"], keep="first").sort_values(by=["datetime"])) # Index according to time and rename columns # todo: this operation can be simplified after moving our time series data structures to timely-beliefs df.rename( index=str, columns={ "value": "event_value", "datetime": "event_start", "DataSource": "source", "horizon": "belief_horizon", }, inplace=True, ) df.set_index("event_start", drop=True, inplace=True) # Convert to the FLEXMEASURES timezone if not df.empty: df.index = df.index.tz_convert(time_utils.get_timezone()) # On demo, we query older data as if it's the current year's data (we converted above) if current_app.config.get("FLEXMEASURES_MODE", "") == "demo": df.index = df.index.map( lambda t: t.replace(year=datetime.now().year)) sensor = find_sensor_by_name(name=old_sensor_model_name) bdf = tb.BeliefsDataFrame(df.reset_index(), sensor=sensor) # re-sample data to the resolution we need to serve if resolution is None: resolution = sensor.event_resolution elif isinstance(resolution, str): try: # todo: allow pandas freqstr as resolution when timely-beliefs supports DateOffsets, # https://github.com/SeitaBV/timely-beliefs/issues/13 resolution = pd.to_timedelta(resolution).to_pytimedelta() except ValueError: resolution = isodate.parse_duration(resolution) bdf = bdf.resample_events(event_resolution=resolution, keep_only_most_recent_belief=True) # Slice query window after resampling if query_window[0] is not None: bdf = bdf[ bdf.index.get_level_values("event_start") >= query_window[0]] if query_window[1] is not None: bdf = bdf[ bdf.index.get_level_values("event_start") < query_window[1]] bdf_dict[old_sensor_model_name] = bdf return bdf_dict
def main(df,current_time,start_time,last_start_time = None,model=LinearRegression(), value = 'event_value',addtocsv = False): """ This is the main function of the generator, it opens the data works with the timely_beliefs framework and adds results to a timely_beliefs row and/or to the input csvfile @param csv_in: csv file containing forecast data @param current_time : datetime string @param start_time: datetime string @param last_start_time: datetime string @param model : model to use to generate new data @param addtocsv: boolean """ if last_start_time == None: last_start_time = start_time first_date = df.iloc[0].name[0] last_date = df.iloc[-1].name[0] #check if current time is in data frame if current_time < first_date or current_time > last_date : raise SystemExit('Error: your current_time is not in the dataframe') #get beliefseries from all the times current = get_beliefsSeries_from_event_start(df,current_time,current_time,value) start = get_beliefsSeries_from_event_start(df,start_time,current_time,value) last_start = get_beliefsSeries_from_event_start(df,last_start_time,current_time,value) #create list of beliefSeries beliefSeries_list = [start.copy()] blfs_list = [] temp_time = start_time i = 0 #loop over given time slot while temp_time <= last_start_time: if temp_time > last_date: i += 1 blfs_list += [tb.TimedBelief( source= tb.BeliefSource(name='test'+ str(i)), sensor= df.sensor, value= generator(df,current,model), belief_time= current_time, event_start= temp_time, cumulative_probability= 0.5, )] else: beliefSeries_list += [get_beliefsSeries_from_event_start(temp_time,current_time).copy()] print(temp_time) print(current_time) print(get_beliefsSeries_from_event_start(temp_time,current_time).copy()) temp_time += df.sensor.event_resolution print(beliefSeries_list) df_1 = tb.BeliefsDataFrame(sensor=df.sensor, beliefs=blfs_list) #print(load_time_series(test_list[0],sensor=df.sensor,source='test',belief_horizon=timedelta(hours=0), cumulative_probability=0.5)) #loops over all time steps for beliefSeries in beliefSeries_list: if beliefSeries.empty == False: beliefSeries[0] = generator(df, current, model) if addtocsv == True: with open(csv_in, 'w') as csvfile: writer = csv.writer(csvfile, delimiter=',') writer.writerows(datacomp) temp = beliefSeries_list[0].to_frame(name=value) for i in range(len(beliefSeries_list)-2): temp = temp.append(beliefSeries_list[i+2].to_frame(name=value)) df_1 = temp.append(df_1) return df_1
def get_weather_data( assets: List[Asset], metrics: dict, sensor_type: WeatherSensorType, query_window: Tuple[datetime, datetime], resolution: str, forecast_horizon: timedelta, ) -> Tuple[pd.DataFrame, pd.DataFrame, str, Sensor, dict]: """Get most recent weather data and forecast weather data for the requested forecast horizon. Return weather observations, weather forecasts (either might be an empty DataFrame), the name of the sensor type, the weather sensor and a dict with the following metrics: - expected value - mean absolute error - mean absolute percentage error - weighted absolute percentage error""" # Todo: for now we only collect weather data for a single asset asset = assets[0] weather_data = tb.BeliefsDataFrame(columns=["event_value"]) weather_forecast_data = tb.BeliefsDataFrame(columns=["event_value"]) sensor_type_name = "" closest_sensor = None if sensor_type: # Find the 50 closest weather sensors sensor_type_name = sensor_type.name closest_sensors = Sensor.find_closest( generic_asset_type_name=asset.generic_asset.generic_asset_type. name, sensor_name=sensor_type_name, n=50, object=asset, ) if closest_sensors: closest_sensor = closest_sensors[0] # Collect the weather data for the requested time window sensor_names = [sensor.name for sensor in closest_sensors] # Get weather data weather_bdf_dict: Dict[str, tb.BeliefsDataFrame] = TimedBelief.search( sensor_names, event_starts_after=query_window[0], event_ends_before=query_window[1], resolution=resolution, horizons_at_least=None, horizons_at_most=timedelta(hours=0), sum_multiple=False, ) weather_df_dict: Dict[str, pd.DataFrame] = {} for sensor_name in weather_bdf_dict: weather_df_dict[sensor_name] = simplify_index( weather_bdf_dict[sensor_name], index_levels_to_columns=["belief_horizon", "source"], ) # Get weather forecasts weather_forecast_bdf_dict: Dict[ str, tb.BeliefsDataFrame] = TimedBelief.search( sensor_names, event_starts_after=query_window[0], event_ends_before=query_window[1], resolution=resolution, horizons_at_least=forecast_horizon, horizons_at_most=None, source_types=["user", "forecasting script", "script"], sum_multiple=False, ) weather_forecast_df_dict: Dict[str, pd.DataFrame] = {} for sensor_name in weather_forecast_bdf_dict: weather_forecast_df_dict[sensor_name] = simplify_index( weather_forecast_bdf_dict[sensor_name], index_levels_to_columns=["belief_horizon", "source"], ) # Take the closest weather sensor which contains some data for the selected time window for sensor, sensor_name in zip(closest_sensors, sensor_names): if (not weather_df_dict[sensor_name] ["event_value"].isnull().values.all() or not weather_forecast_df_dict[sensor_name] ["event_value"].isnull().values.all()): closest_sensor = sensor break weather_data = weather_df_dict[sensor_name] weather_forecast_data = weather_forecast_df_dict[sensor_name] # Calculate the weather metrics if not weather_data.empty: metrics["realised_weather"] = weather_data["event_value"].mean( ) else: metrics["realised_weather"] = np.NaN if (not weather_forecast_data.empty and weather_forecast_data.size == weather_data.size): metrics["expected_weather"] = weather_forecast_data[ "event_value"].mean() metrics["mae_weather"] = calculations.mean_absolute_error( weather_data["event_value"], weather_forecast_data["event_value"]) metrics[ "mape_weather"] = calculations.mean_absolute_percentage_error( weather_data["event_value"], weather_forecast_data["event_value"]) metrics[ "wape_weather"] = calculations.weighted_absolute_percentage_error( weather_data["event_value"], weather_forecast_data["event_value"]) else: metrics["expected_weather"] = np.NaN metrics["mae_weather"] = np.NaN metrics["mape_weather"] = np.NaN metrics["wape_weather"] = np.NaN return ( weather_data, weather_forecast_data, sensor_type_name, closest_sensor, metrics, )
assert beliefs.index.tzinfo == pytz.utc # Construct the BeliefsDataFrame by looping over the belief horizons blfs = load_time_series( beliefs[sensor.name.replace(' ', '_').lower()], sensor=sensor, source=source, belief_horizon=timedelta(hours=0), cumulative_probability=0.5) # load the observations (keep cp=0.5) return blfs # Create source and sensors source_a = tb.BeliefSource(name="KNMI") sensors = (tb.Sensor(name=descr[0], unit=descr[1], event_resolution=timedelta(minutes=15)) for descr in sensor_descriptions) # Create BeliefsDataFrame for sensor in sensors: blfs = read_beliefs_from_csv(sensor, source=source_a, tz_hour_difference=tz_hour_difference, n_events=n_events) df = tb.BeliefsDataFrame(sensor=sensor, beliefs=blfs).sort_index() #print(df) df.keys() #df['2015-05-16 09:14:01+00:00']
def post_price_data_response( unit, generic_asset_name_groups, horizon, rolling, value_groups, start, duration, resolution, ): current_app.logger.info("POSTING PRICE DATA") data_source = get_or_create_source(current_user) price_df_per_market = [] forecasting_jobs = [] for market_group, value_group in zip(generic_asset_name_groups, value_groups): for market in market_group: # Parse the entity address try: ea = parse_entity_address(market, entity_type="market", fm_scheme="fm0") except EntityAddressException as eae: return invalid_domain(str(eae)) market_name = ea["market_name"] # Look for the Sensor object sensor = get_sensor_by_unique_name(market_name, ["day_ahead", "tou_tariff"]) if is_response_tuple(sensor): # Error message telling the user what to do return sensor if unit != sensor.unit: return invalid_unit("%s prices" % sensor.name, [sensor.unit]) # Create new Price objects beliefs = [] for j, value in enumerate(value_group): dt = start + j * duration / len(value_group) if rolling: h = horizon else: # Deduct the difference in end times of the individual timeslot and the timeseries duration h = horizon - ( (start + duration) - (dt + duration / len(value_group)) ) p = TimedBelief( event_start=dt, event_value=value, belief_horizon=h, sensor=sensor, source=data_source, ) beliefs.append(p) price_df_per_market.append(tb.BeliefsDataFrame(beliefs)) # Make forecasts, but not in play mode. Price forecasts (horizon>0) can still lead to other price forecasts, # by the way, due to things like day-ahead markets. if current_app.config.get("FLEXMEASURES_MODE", "") != "play": # Forecast 24 and 48 hours ahead for at most the last 24 hours of posted price data forecasting_jobs = create_forecasting_jobs( sensor.id, max(start, start + duration - timedelta(hours=24)), start + duration, resolution=duration / len(value_group), horizons=[timedelta(hours=24), timedelta(hours=48)], enqueue=False, # will enqueue later, after saving data ) return save_and_enqueue(price_df_per_market, forecasting_jobs)
def create_connection_and_value_groups( # noqa: C901 unit, generic_asset_name_groups, value_groups, horizon, rolling, start, duration ): """ Code for POSTing Power values to the API. Only lets users post to assets they own. The sign of values is validated according to asset specs, but in USEF terms. Then, we store the reverse sign for FlexMeasures specs (with positive production and negative consumption). If power values are not forecasts, forecasting jobs are created. """ current_app.logger.info("POSTING POWER DATA") data_source = get_or_create_source(current_user) user_sensors = get_sensors() if not user_sensors: current_app.logger.info("User doesn't seem to have any assets") user_sensor_ids = [sensor.id for sensor in user_sensors] power_df_per_connection = [] forecasting_jobs = [] for connection_group, value_group in zip(generic_asset_name_groups, value_groups): for connection in connection_group: # TODO: get asset through util function after refactoring # Parse the entity address try: connection = parse_entity_address( connection, entity_type="connection", fm_scheme="fm0" ) except EntityAddressException as eae: return invalid_domain(str(eae)) sensor_id = connection["asset_id"] # Look for the Sensor object if sensor_id in user_sensor_ids: sensor = Sensor.query.filter(Sensor.id == sensor_id).one_or_none() else: current_app.logger.warning("Cannot identify connection %s" % connection) return unrecognized_connection_group() # Validate the sign of the values (following USEF specs with positive consumption and negative production) if sensor.get_attribute("is_strictly_non_positive") and any( v < 0 for v in value_group ): extra_info = ( "Connection %s is registered as a pure consumer and can only receive non-negative values." % sensor.entity_address ) return power_value_too_small(extra_info) elif sensor.get_attribute("is_strictly_non_negative") and any( v > 0 for v in value_group ): extra_info = ( "Connection %s is registered as a pure producer and can only receive non-positive values." % sensor.entity_address ) return power_value_too_big(extra_info) # Create a new BeliefsDataFrame beliefs = [] for j, value in enumerate(value_group): dt = start + j * duration / len(value_group) if rolling: h = horizon else: # Deduct the difference in end times of the individual timeslot and the timeseries duration h = horizon - ( (start + duration) - (dt + duration / len(value_group)) ) p = TimedBelief( event_start=dt, event_value=value * -1, # Reverse sign for FlexMeasures specs with positive production and negative consumption belief_horizon=h, sensor=sensor, source=data_source, ) assert p not in db.session beliefs.append(p) power_df_per_connection.append(tb.BeliefsDataFrame(beliefs)) # make forecasts, but only if the sent-in values are not forecasts themselves if horizon <= timedelta( hours=0 ): # Todo: replace 0 hours with whatever the moment of switching from ex-ante to ex-post is for this sensor forecasting_jobs.extend( create_forecasting_jobs( sensor_id, start, start + duration, resolution=duration / len(value_group), enqueue=False, # will enqueue later, after saving data ) ) return save_and_enqueue(power_df_per_connection, forecasting_jobs)