def use_heuristic(HEURISTIC_TYPE='Perfekt-Pred-Heuristic', epochs=1, threshold_dem=50, deactivate_SMS=False, deactivate_LION=False): # Naming the agent: now = datetime.now() NAME = str(round(threshold_dem))+'_TARGET_VALUE_'+HEURISTIC_TYPE+now.strftime("_%d-%m-%Y_%H-%M-%S") # Import dataset and logger based on the common settings df, power_dem_df, logger, period_min = dataset_and_logger(NAME) # Setup reward_maker r_maker = reward_maker( LOGGER = logger, # Settings: COST_TYPE = 'exact_costs', R_TYPE = 'savings_focus', R_HORIZON = 'single_step', # Parameter to calculate costs: cost_per_kwh = 0.2255, LION_Anschaffungs_Preis = 34100, LION_max_Ladezyklen = 1000, SMS_Anschaffungs_Preis = 115000/3, SMS_max_Nutzungsjahre = 20, Leistungspreis = 102, # Setup logging tags: logging_list = ['cost_saving','exact_costs','sum_exact_costs','sum_cost_saving'], # Deactivation options for the batteries: deactivate_SMS = deactivate_SMS, deactivate_LION = deactivate_LION) # Lade Environment: env = common_env( reward_maker = r_maker, df = df, power_dem_df = power_dem_df, # Datset Inputs for the states: input_list = ['norm_total_power','normal','seq_max'], # Batters stats: max_SMS_SoC = 12/3, max_LION_SoC = 54, # Period length in minutes: PERIODEN_DAUER = period_min, # Heuristics can only use continious values: ACTION_TYPE = 'contin', OBS_TYPE = 'contin', # Define heuristic usage: AGENT_TYPE = 'heuristic') # Use the complete dataset (no validation split): env.use_all_data() # Setup Agent agent = heurisitc( env = env, HEURISTIC_TYPE = HEURISTIC_TYPE, threshold_dem = threshold_dem) return agent.calculate(epochs=epochs)
def run_agent(name='',gamma=.9, lr=0.1, tau=0.15, update_num=500, epsilon_decay='linear', input_list=['norm_total_power','normal','seq_max'], hidden_size=256, pre_trained_model=None, target_update_num=None): ''' Trains and tests a DQN based on the passed parameter. ''' # Naming the agent: now = datetime.now() NAME = 'agent_DQN_'+name+'_t-stamp'+now.strftime("_%d-%m-%Y_%H-%M-%S") # Import dataset and logger based from the common settings df, power_dem_df, logger, period_min = dataset_and_logger(NAME) # Number of warm-up steps: num_warmup_steps = 100 # Number of epochs and steps: epochs = 100 # Setup reward_maker r_maker = reward_maker( LOGGER = logger, # Settings: COST_TYPE = 'exact_costs', R_TYPE = 'savings_focus', R_HORIZON = 'single_step', # Parameter to calculate costs: cost_per_kwh = 0.2255, LION_Anschaffungs_Preis = 34100, LION_max_Ladezyklen = 6000, SMS_Anschaffungs_Preis = 55000,#115000/3, SMS_max_Nutzungsjahre = 25, Leistungspreis = 102,) # Setup common_env env = common_env( reward_maker = r_maker, df = df, power_dem_df = power_dem_df, # Datset Inputs for the states: input_list = input_list, # Batters stats: max_SMS_SoC = 25, max_LION_SoC = 54, LION_max_entladung = 50, SMS_max_entladung = 100, SMS_entladerate = 0.72, LION_entladerate = 0.00008, # Period length in minutes: PERIODEN_DAUER = period_min, # DQN inputs can be conti and outputs must be discrete: ACTION_TYPE = 'discrete', OBS_TYPE = 'contin', # Set number of discrete values: discrete_space = 22, # Size of validation data: val_split = 0.1) # Setup Agent: agent = DQN( env = env, memory_len = update_num, # Training parameter: gamma = gamma, epsilon = 0.99, epsilon_min = 0.1, epsilon_decay = epsilon_decay, lr = lr, tau = tau, activation = 'relu', loss = 'mean_squared_error', hidden_size = hidden_size, pre_trained_model=pre_trained_model, target_update_num=target_update_num) # Train: training(agent, epochs, update_num, num_warmup_steps) # Test with dataset that includes val-data: env.use_all_data() testing(agent)
# Number of warm-up steps: num_warmup_steps = 100 # Train every x number of steps: update_num = 100 # Number of epochs and steps: epochs = 100 # Setup reward_maker r_maker = reward_maker( LOGGER=logger, # Settings: COST_TYPE='exact_costs', R_TYPE='savings_focus', R_HORIZON='single_step', # Parameter to calculate costs: cost_per_kwh=0.2255, LION_Anschaffungs_Preis=34100, LION_max_Ladezyklen=1000, SMS_Anschaffungs_Preis=115000 / 3, SMS_max_Nutzungsjahre=20, Leistungspreis=102) # Setup common_env env = common_env( reward_maker=r_maker, df=df, power_dem_df=power_dem_df, # Datset Inputs for the states: input_list=['norm_total_power', 'normal', 'seq_max'], # Batters stats:
def run_agent(name='', learning_rate=0.00025, gamma=0.99, n_steps=2500, ent_coef=0.01, vf_coef=0.5, cliprange=0.2,input_list=['norm_total_power','normal','seq_max'],norm=None): # Naming the agent: now = datetime.now() NAME = 'agent_PPO2_'+name+'_t-stamp'+now.strftime("_%d-%m-%Y_%H-%M-%S") # Import dataset and logger based on the common settings df, power_dem_df, logger, period_min = cms.dataset_and_logger(NAME) epochs = 100 # Setup reward_maker r_maker = reward_maker( LOGGER = logger, # Settings: COST_TYPE = 'exact_costs', R_TYPE = 'savings_focus', # Agents from stable base-lines cant use multi-step rewards from our code # So R_HOTIZON can only be 'single-step' R_HORIZON = 'single_step', # Parameter to calculate costs: cost_per_kwh = 0.2255, LION_Anschaffungs_Preis = 34100, LION_max_Ladezyklen = 6000, SMS_Anschaffungs_Preis = 55000,#115000/3, SMS_max_Nutzungsjahre = 25, Leistungspreis = 102, norm_range = norm) # Setup common_env env = common_env( reward_maker = r_maker, df = df, power_dem_df = power_dem_df, # Datset Inputs for the states: input_list = input_list, # Batters stats: max_SMS_SoC = 25, max_LION_SoC = 54, LION_max_entladung = 50, SMS_max_entladung = 100, SMS_entladerate = 0.72, LION_entladerate = 0.00008, # Period length in minutes: PERIODEN_DAUER = period_min, # PPO can use conti values: ACTION_TYPE = 'contin', OBS_TYPE = 'contin', # Tells the environment to make standart GYM outputs, # so agents from stable-baselines (or any other RL-library that uses gym) can be used AGENT_TYPE = 'standart_gym', val_split = 0.1) # Create vectorised environment: dummy_env = DummyVecEnv([lambda: env]) # Callback: checkpoint_callback = CheckpointCallback(save_freq=100000, save_path=cms.__dict__['D_PATH']+'agent-models/', name_prefix=NAME) # Setup Model: model = PPO2(MlpPolicy, dummy_env, verbose=1, tensorboard_log=cms.__dict__['D_PATH']+'agent-logs/', learning_rate=learning_rate, gamma=gamma, n_steps=n_steps, ent_coef=ent_coef, vf_coef=vf_coef, cliprange=cliprange)#, nminibatches=1) #model = PPO2(MlpPolicy, env, verbose=1, tensorboard_log=DATENSATZ_PATH+'LOGS/agent_logging',callback=checkpoint_callback, n_steps=2500) # Train: model.learn(total_timesteps=epochs*len(env.__dict__['df']), tb_log_name=NAME) model.save(cms.__dict__['D_PATH']+"agent-models/"+NAME)
# Number of warm-up steps: num_warmup_steps = 100 # Train every x number of steps: update_num = 50 # Number of epochs and steps: epochs = 100 # Setup reward_maker r_maker = reward_maker( LOGGER=logger, # Settings: COST_TYPE='exact_costs', R_TYPE='savings_focus', # R_HORIZON is now an int for the periodes of the reward horizon: R_HORIZON=12, # Additional the multi-step strategy must be set: M_STRATEGY='sum_to_terminal', # Parameter to calculate costs: cost_per_kwh=0.2255, LION_Anschaffungs_Preis=34100, LION_max_Ladezyklen=1000, SMS_Anschaffungs_Preis=115000 / 3, SMS_max_Nutzungsjahre=20, Leistungspreis=102) # Setup common_env env = common_env( reward_maker=r_maker, df=df, power_dem_df=power_dem_df, # Datset Inputs for the states: input_list=['norm_total_power', 'normal', 'seq_max'],
def run_agent(name='',gamma=.9, lr=0.1, update_num=100, load_table=None, epsilon_decay='linear', input_list=['norm_total_power','normal','seq_max']): # Naming the agent: now = datetime.now() NAME = 'agent_Q-Table_'+name+'_t-stamp'+now.strftime("_%d-%m-%Y_%H-%M-%S") # Import dataset and logger based on the common settings df, power_dem_df, logger, period_min = dataset_and_logger(NAME) # Number of warm-up steps: num_warmup_steps = 100 # Number of epochs and steps: epochs = 100 # Setup reward_maker r_maker = reward_maker( LOGGER = logger, # Settings: COST_TYPE = 'exact_costs', R_TYPE = 'savings_focus', R_HORIZON = 'single_step', # Parameter to calculate costs: cost_per_kwh = 0.2255, LION_Anschaffungs_Preis = 34100, LION_max_Ladezyklen = 6000, SMS_Anschaffungs_Preis = 55000,#115000/3, SMS_max_Nutzungsjahre = 25, Leistungspreis = 102,) # Setup common_env env = common_env( reward_maker = r_maker, df = df, power_dem_df = power_dem_df, # Datset Inputs for the states: input_list = input_list, # Batters stats: max_SMS_SoC = 25, max_LION_SoC = 54, LION_max_entladung = 50, SMS_max_entladung = 100, SMS_entladerate = 0.72, LION_entladerate = 0.00008, # Period length in minutes: PERIODEN_DAUER = period_min, # Q-Table can only take discrete inputs and make discrete outputs ACTION_TYPE = 'discrete', OBS_TYPE = 'discrete', # Set number of discrete values: discrete_space = 22, # Size of validation data: val_split = 0.1) # Setup agent: agent = Q_Learner( env = env, memory_len = update_num, # Training parameter: gamma = gamma, epsilon = 0.99, epsilon_min = 0.1, epsilon_decay = epsilon_decay, lr = lr, load_table = load_table) # Train: training(agent, epochs, update_num, num_warmup_steps) # Test with dataset that includes val-data: env.use_all_data() testing(agent)
def use_heuristic(HEURISTIC_TYPE='Perfekt-Pred', test_name='', epochs=1, threshold_dem=50, deactivate_SMS=False, deactivate_LION=False, num_past_periods=12,num_outputs=12,TYPE_LIST=['NORMAL'],seq_transform=['MAX'], max_SMS_SoC=25,max_LION_SoC=54,no_pred=False): # Naming the agent: now = datetime.now() if test_name != 'Configurations': NAME = 'heuristic_'+test_name+'_'+HEURISTIC_TYPE+'_'+str(round(threshold_dem))+'_t-stamp'+now.strftime("_%d-%m-%Y_%H-%M-%S") else: if deactivate_SMS == False: SMS_string = 'SMS' else: SMS_string = 'None' if deactivate_LION == False: LION_string = 'LION' else: LION_string = 'None' NAME = str(round(threshold_dem))+'-'+LION_string+'-'+SMS_string NAME = 'heuristic_'+test_name+'_'+HEURISTIC_TYPE+'_'+NAME+'_t-stamp'+now.strftime("_%d-%m-%Y_%H-%M-%S") # Import dataset and logger based on the common settings df, power_dem_df, input_list = load_specific_sets(num_past_periods, num_outputs, TYPE_LIST, seq_transform, no_pred) logger, period_min = load_logger(NAME, only_per_episode=False) # Setup reward_maker r_maker = reward_maker( LOGGER = logger, # Settings: COST_TYPE = 'exact_costs', R_TYPE = 'savings_focus', R_HORIZON = 'single_step', # Parameter to calculate costs: cost_per_kwh = 0.2255, LION_Anschaffungs_Preis = 34100, LION_max_Ladezyklen = 6000, SMS_Anschaffungs_Preis = 55000,#115000/3, SMS_max_Nutzungsjahre = 25, Leistungspreis = 102, # Setup logging tags: logging_list = ['cost_saving','exact_costs','sum_exact_costs','sum_cost_saving'], # Deactivation options for the batteries: deactivate_SMS = deactivate_SMS, deactivate_LION = deactivate_LION) # Lade Environment: env = common_env( reward_maker = r_maker, df = df, power_dem_df = power_dem_df, # Datset Inputs for the states: input_list = input_list, # Batters stats: max_SMS_SoC = max_SMS_SoC,#/1.2, max_LION_SoC = max_LION_SoC,#/1.2, LION_max_entladung = 50, SMS_max_entladung = 100, SMS_entladerate = 0.72, LION_entladerate = 0.00008, # Period length in minutes: PERIODEN_DAUER = period_min, # Heuristics can only use continious values: ACTION_TYPE = 'contin', OBS_TYPE = 'contin', # Define heuristic usage: AGENT_TYPE = 'heuristic', val_split = 0) # Use the complete dataset (no validation split): #env.use_all_data() # Setup Agent agent = heurisitc( env = env, HEURISTIC_TYPE = HEURISTIC_TYPE, threshold_dem = threshold_dem) return agent.calculate(epochs=epochs,LSTM_column=input_list[-1])
def use_heuristic(HEURISTIC_TYPE='Perfekt-Pred-Heuristic', epochs=1, threshold_dem=50, deactivate_SMS=True, deactivate_LION=True): # Naming the agent and setting up the directory path: now = datetime.now() NAME = str( round(threshold_dem)) + '_NO_BATTERY_' + HEURISTIC_TYPE + now.strftime( "_%d-%m-%Y_%H-%M-%S") D_PATH = '_small_d/' # Load the dataset: main_dataset = mainDataset(D_PATH=D_PATH, period_string_min='15min', full_dataset=True) # Normalized dataframe: df = main_dataset.make_input_df(drop_main_terminal=False, use_time_diff=True, day_diff='holiday-weekend') # Sum of the power demand dataframe (nor normalized): power_dem_df = main_dataset.load_total_power()[24:-12] # Load the LSTM input dataset: lstm_dataset = lstmInputDataset(main_dataset, df, num_past_periods=12) # Making predictions: normal_predictions = wahrsager(lstm_dataset, power_dem_df, TYPE='NORMAL').pred()[:-12] seq_predictions = wahrsager(lstm_dataset, power_dem_df, TYPE='SEQ', num_outputs=12).pred() # Adding the predictions to the dataset: df = df[24:-12] df['normal'] = normal_predictions df['seq_max'] = max_seq(seq_predictions) logger = Logger(NAME, D_PATH) # Setup reward_maker r_maker = reward_maker(LOGGER=logger, COST_TYPE='exact_costs', R_TYPE='savings_focus', R_HORIZON='single_step', cost_per_kwh=0.2255, LION_Anschaffungs_Preis=34100, LION_max_Ladezyklen=1000, SMS_Anschaffungs_Preis=115000 / 3, SMS_max_Nutzungsjahre=20, Leistungspreis=102, logging_list=[ 'cost_saving', 'exact_costs', 'sum_exact_costs', 'sum_cost_saving' ], deactivate_SMS=deactivate_SMS, deactivate_LION=deactivate_LION) # Lade Environment: env = common_env(reward_maker=r_maker, df=df, power_dem_df=power_dem_df, input_list=['norm_total_power', 'normal', 'seq_max'], max_SMS_SoC=12 / 3, max_LION_SoC=54, PERIODEN_DAUER=15, ACTION_TYPE='contin', OBS_TYPE='contin', AGENT_TYPE='heuristic') agent = heurisitc(env=env, HEURISTIC_TYPE=HEURISTIC_TYPE, threshold_dem=threshold_dem) return agent.calculate(epochs=epochs)