def test_load_beliefs(csv_file): """Test loading BeliefsDataFrame to csv. The saved file does not contain the sensor information, and the sources are saved by their name. Therefore, we test the following functionality: - The user should specify the sensor upon loading - The user should be warned that the loaded sources are not of type BeliefSource. - The user should have the possibility to look up the saved source names by passing a list of sources. """ # Load beliefs with tb.read_csv df = pd.read_csv("test.csv") with pytest.warns(UserWarning, match="type other than BeliefSource"): df = tb.BeliefsDataFrame(df, sensor=tb.Sensor("Sensor Y")) assert df.sensor.name == "Sensor Y" # No lookup should issue warning with pytest.warns(UserWarning, match="type other than BeliefSource"): df = tb.read_csv("test.csv", sensor=tb.Sensor("Sensor Y")) assert all( c != tb.BeliefSource for c in df.index.get_level_values("source").map(type) ) # This lookup should fail with pytest.raises(ValueError, match="not in list"): tb.read_csv( "test.csv", sensor=tb.Sensor("Sensor Y"), look_up_sources=[tb.BeliefSource(name="Source X")], ) # This lookup should succeed source_a, source_b = tb.BeliefSource("Source A"), tb.BeliefSource("Source B") df = tb.read_csv( "test.csv", sensor=tb.Sensor("Sensor Y"), look_up_sources=[source_a, source_b] ) assert df.sensor.name == "Sensor Y" assert source_a in df.index.get_level_values("source") assert source_b in df.index.get_level_values("source") assert isinstance(df.index.get_level_values("event_start")[0], datetime) assert isinstance(df.index.get_level_values("belief_time")[0], datetime)
def csv_as_belief(csv_in,tz_hour_difference,n_events = None): sensor_descriptions = ( ("Temperature", "°C"), ) # Create source and sensors source_a = tb.BeliefSource(name="KNMI") sensors = (tb.Sensor(name=descr[0], unit=descr[1], event_resolution=timedelta(minutes=15)) for descr in sensor_descriptions) # Create BeliefsDataFrame for sensor in sensors: blfs = read_beliefs_from_csv(sensor,csv_in, source=source_a, tz_hour_difference=tz_hour_difference, n_events=n_events) df = tb.BeliefsDataFrame(sensor=sensor, beliefs=blfs).sort_index() #df['2015-05-16 09:14:01+00 return df
def make_df(n_events = 100, n_horizons = 169, tz_hour_difference=-9, event_resolution=timedelta(hours=1)): """ Returns DataFrame in which n events and n horizons are stored @param n_events: int,number of events in DataFrame @param n_horizons: int,number of horizons in DataFrame @param tz_hour_difference: float,time difference @param event_resolution: timedelta object,event resolution """ sensor_descriptions = (("Temperature", "°C"),) source = tb.BeliefSource(name="Random forest") sensors = (tb.Sensor(name=descr[0], unit=descr[1], event_resolution=event_resolution) for descr in sensor_descriptions) blfs=[] for sensor in sensors: blfs += read_beliefs_from_csv(sensor, source=source, cp=0.05, event_resolution=event_resolution, tz_hour_difference=tz_hour_difference) blfs += read_beliefs_from_csv(sensor, source=source, cp=0.5, event_resolution=event_resolution, tz_hour_difference=tz_hour_difference) blfs += read_beliefs_from_csv(sensor, source=source, cp=0.95, event_resolution=event_resolution, tz_hour_difference=tz_hour_difference) bdf = tb.BeliefsDataFrame(sensor=sensor, beliefs=blfs).sort_index() return bdf
def csv_as_belief(csv_in, tz_hour_difference, n_events=None): # Uncomment desired forecasting data one at a time sensor_descriptions = ( # ("Solar irradiation", "kW/m²"), # ("Solar power", "kW"), # ("Wind speed", "m/s"), # ("Wind power", "MW"), ("Temperature", "°C"), ) # Create source and sensors source_a = tb.BeliefSource(name="KNMI") sensors = (tb.Sensor(name=descr[0], unit=descr[1], event_resolution=timedelta(minutes=15)) for descr in sensor_descriptions) # Create BeliefsDataFrame for sensor in sensors: blfs = read_beliefs_from_csv(sensor, csv_in, source=source_a, tz_hour_difference=tz_hour_difference, n_events=n_events) df = tb.BeliefsDataFrame(sensor=sensor, beliefs=blfs).sort_index() return df
def main(df, current_time, start_time, last_start_time=None, model=LinearRegression(), source=None): """ Accepts a Beliefs Dataframe df and returns forecasts from start_time to last_start_time in timely beliefs rows @param df: Beliefs Dataframe @param current_time : datetime object, generate a forecast from this point @param start_time: datetime object, @param last_start_time: datetime object @param model : model to use to generate new data """ if last_start_time == None: last_start_time = start_time # check if times are in chronological order elif start_time > last_start_time: raise ValueError("last_start_time must be set after start_time") first_date = df.iloc[0].name[0] last_date = df.iloc[-1].name[0] # check if current time is in data frame if current_time < first_date or current_time > last_date: raise ValueError( 'Your current_time is not in the dataframe \nstart:{}\nend :{}'. format(first_date, last_date)) # check if current time is compatible with the event resolution resolution_minutes = df.sensor.event_resolution.seconds / 60 if current_time.minute % (resolution_minutes) != 0: raise ValueError( 'Your current_time is not compatible with the event resolution of {} minutes' .format(resolution_minutes)) # get beliefseries from all the times current = get_beliefsSeries_from_event_start(df, current_time, current_time, 'event_value') start = get_beliefsSeries_from_event_start(df, start_time, current_time, 'event_value') last_start = get_beliefsSeries_from_event_start(df, last_start_time, current_time, 'event_value') # create list of beliefSeries beliefSeries_list = [start.copy()] blfs_list = [] temp_time = start_time i = 0 # loop over given time slot while temp_time <= last_start_time: if temp_time > last_date or temp_time < first_date: i += 1 blfs_list += [ tb.TimedBelief( source=tb.BeliefSource(name='test' + str(i)), sensor=df.sensor, value=generator(df, current, model), belief_time=current_time, event_start=temp_time, cumulative_probability=0.5, ) ] else: beliefSeries_list += [ get_beliefsSeries_from_event_start(df, temp_time, current_time, 'event_value').copy() ] temp_time += df.sensor.event_resolution df_1 = tb.BeliefsDataFrame(sensor=df.sensor, beliefs=blfs_list) # loops over all time steps for beliefSeries in beliefSeries_list: if beliefSeries.empty == False: beliefSeries[0] = generator(df, current, model) temp = beliefSeries_list[0].to_frame(name='event_value') #loop over out of frame values for i in range(len(beliefSeries_list) - 2): temp = temp.append(beliefSeries_list[i + 2].to_frame(name='event_value')) df_1 = temp.append(df_1) return df_1
beliefs = beliefs.resample(event_resolution).mean() assert beliefs.index.tzinfo == pytz.utc # Construct the BeliefsDataFrame by looping over the belief horizons blfs = load_time_series( beliefs[sensor.name.replace(' ', '_').lower()], sensor=sensor, source=source, belief_horizon=timedelta(hours=0), cumulative_probability=0.5) # load the observations (keep cp=0.5) return blfs # Create source and sensors source_a = tb.BeliefSource(name="KNMI") sensors = (tb.Sensor(name=descr[0], unit=descr[1], event_resolution=timedelta(minutes=15)) for descr in sensor_descriptions) # Create BeliefsDataFrame for sensor in sensors: blfs = read_beliefs_from_csv(sensor, source=source_a, tz_hour_difference=tz_hour_difference, n_events=n_events) df = tb.BeliefsDataFrame(sensor=sensor, beliefs=blfs).sort_index() #print(df) df.keys() #df['2015-05-16 09:14:01+00:00']
def main(df,current_time,start_time,last_start_time = None,model=LinearRegression(), value = 'event_value',addtocsv = False): """ This is the main function of the generator, it opens the data works with the timely_beliefs framework and adds results to a timely_beliefs row and/or to the input csvfile @param csv_in: csv file containing forecast data @param current_time : datetime string @param start_time: datetime string @param last_start_time: datetime string @param model : model to use to generate new data @param addtocsv: boolean """ if last_start_time == None: last_start_time = start_time first_date = df.iloc[0].name[0] last_date = df.iloc[-1].name[0] #check if current time is in data frame if current_time < first_date or current_time > last_date : raise SystemExit('Error: your current_time is not in the dataframe') #get beliefseries from all the times current = get_beliefsSeries_from_event_start(df,current_time,current_time,value) start = get_beliefsSeries_from_event_start(df,start_time,current_time,value) last_start = get_beliefsSeries_from_event_start(df,last_start_time,current_time,value) #create list of beliefSeries beliefSeries_list = [start.copy()] blfs_list = [] temp_time = start_time i = 0 #loop over given time slot while temp_time <= last_start_time: if temp_time > last_date: i += 1 blfs_list += [tb.TimedBelief( source= tb.BeliefSource(name='test'+ str(i)), sensor= df.sensor, value= generator(df,current,model), belief_time= current_time, event_start= temp_time, cumulative_probability= 0.5, )] else: beliefSeries_list += [get_beliefsSeries_from_event_start(temp_time,current_time).copy()] print(temp_time) print(current_time) print(get_beliefsSeries_from_event_start(temp_time,current_time).copy()) temp_time += df.sensor.event_resolution print(beliefSeries_list) df_1 = tb.BeliefsDataFrame(sensor=df.sensor, beliefs=blfs_list) #print(load_time_series(test_list[0],sensor=df.sensor,source='test',belief_horizon=timedelta(hours=0), cumulative_probability=0.5)) #loops over all time steps for beliefSeries in beliefSeries_list: if beliefSeries.empty == False: beliefSeries[0] = generator(df, current, model) if addtocsv == True: with open(csv_in, 'w') as csvfile: writer = csv.writer(csvfile, delimiter=',') writer.writerows(datacomp) temp = beliefSeries_list[0].to_frame(name=value) for i in range(len(beliefSeries_list)-2): temp = temp.append(beliefSeries_list[i+2].to_frame(name=value)) df_1 = temp.append(df_1) return df_1
import os from datetime import timedelta from timely_beliefs.examples.beliefs_data_frames import sixteen_probabilistic_beliefs import timely_beliefs as tb example_df = sixteen_probabilistic_beliefs() temperature_df = tb.read_csv( os.path.dirname(os.path.abspath(__file__)) + "/temperature.csv", sensor=tb.Sensor("Thermometer A", unit="°C", event_resolution=timedelta(hours=1)), source=tb.BeliefSource("Source X"), )
source=source, belief_horizon=(isodate.parse_duration("PT%s" % h)) + event_resolution, cumulative_probability=cp) # load the forecasts except isodate.isoerror.ISO8601Error: # In case of old headers that don't yet follow the ISO 8601 standard blfs += load_time_series( beliefs[h].head(n_events), sensor=sensor, source=source, belief_horizon=(isodate.parse_duration("%s" % h)) + event_resolution, cumulative_probability=cp) # load the forecasts return blfs source_a = tb.BeliefSource(name="Linear regression") source_b = tb.BeliefSource(name="XGBoost") source_c = tb.BeliefSource(name="Random forest") sensors = (tb.Sensor(name=descr[0], unit=descr[1], event_resolution=event_resolution) for descr in sensor_descriptions) for sensor in sensors: blfs = read_beliefs_from_csv(sensor, source=source_a, cp=0.5, event_resolution=event_resolution, tz_hour_difference=tz_hour_difference) blfs += read_beliefs_from_csv(sensor,