max_SMS_SoC=12 / 3, max_LION_SoC=54, # Period length in minutes: PERIODEN_DAUER=period_min, # Q-Table can only take discrete inputs and make dicrete outputs ACTION_TYPE='discrete', OBS_TYPE='discrete', # Set number of discrete values: discrete_space=22, # Size of validation data: val_split=0.1) # Setup agent: agent = Q_Learner( env=env, memory_len=update_num, # Training parameter: gamma=0.85, epsilon=0.8, epsilon_min=0.1, epsilon_decay=0.999996, lr=0.5, tau=0.125) # Train: training(agent, epochs, update_num, num_warmup_steps) # Test with dataset that includes val-data: env.use_all_data() testing(agent)
def run_agent(name='',gamma=.9, lr=0.1, tau=0.15, update_num=500, epsilon_decay='linear', input_list=['norm_total_power','normal','seq_max'], hidden_size=256, pre_trained_model=None, target_update_num=None): ''' Trains and tests a DQN based on the passed parameter. ''' # Naming the agent: now = datetime.now() NAME = 'agent_DQN_'+name+'_t-stamp'+now.strftime("_%d-%m-%Y_%H-%M-%S") # Import dataset and logger based from the common settings df, power_dem_df, logger, period_min = dataset_and_logger(NAME) # Number of warm-up steps: num_warmup_steps = 100 # Number of epochs and steps: epochs = 100 # Setup reward_maker r_maker = reward_maker( LOGGER = logger, # Settings: COST_TYPE = 'exact_costs', R_TYPE = 'savings_focus', R_HORIZON = 'single_step', # Parameter to calculate costs: cost_per_kwh = 0.2255, LION_Anschaffungs_Preis = 34100, LION_max_Ladezyklen = 6000, SMS_Anschaffungs_Preis = 55000,#115000/3, SMS_max_Nutzungsjahre = 25, Leistungspreis = 102,) # Setup common_env env = common_env( reward_maker = r_maker, df = df, power_dem_df = power_dem_df, # Datset Inputs for the states: input_list = input_list, # Batters stats: max_SMS_SoC = 25, max_LION_SoC = 54, LION_max_entladung = 50, SMS_max_entladung = 100, SMS_entladerate = 0.72, LION_entladerate = 0.00008, # Period length in minutes: PERIODEN_DAUER = period_min, # DQN inputs can be conti and outputs must be discrete: ACTION_TYPE = 'discrete', OBS_TYPE = 'contin', # Set number of discrete values: discrete_space = 22, # Size of validation data: val_split = 0.1) # Setup Agent: agent = DQN( env = env, memory_len = update_num, # Training parameter: gamma = gamma, epsilon = 0.99, epsilon_min = 0.1, epsilon_decay = epsilon_decay, lr = lr, tau = tau, activation = 'relu', loss = 'mean_squared_error', hidden_size = hidden_size, pre_trained_model=pre_trained_model, target_update_num=target_update_num) # Train: training(agent, epochs, update_num, num_warmup_steps) # Test with dataset that includes val-data: env.use_all_data() testing(agent)
def run_agent(name='',gamma=.9, lr=0.1, update_num=100, load_table=None, epsilon_decay='linear', input_list=['norm_total_power','normal','seq_max']): # Naming the agent: now = datetime.now() NAME = 'agent_Q-Table_'+name+'_t-stamp'+now.strftime("_%d-%m-%Y_%H-%M-%S") # Import dataset and logger based on the common settings df, power_dem_df, logger, period_min = dataset_and_logger(NAME) # Number of warm-up steps: num_warmup_steps = 100 # Number of epochs and steps: epochs = 100 # Setup reward_maker r_maker = reward_maker( LOGGER = logger, # Settings: COST_TYPE = 'exact_costs', R_TYPE = 'savings_focus', R_HORIZON = 'single_step', # Parameter to calculate costs: cost_per_kwh = 0.2255, LION_Anschaffungs_Preis = 34100, LION_max_Ladezyklen = 6000, SMS_Anschaffungs_Preis = 55000,#115000/3, SMS_max_Nutzungsjahre = 25, Leistungspreis = 102,) # Setup common_env env = common_env( reward_maker = r_maker, df = df, power_dem_df = power_dem_df, # Datset Inputs for the states: input_list = input_list, # Batters stats: max_SMS_SoC = 25, max_LION_SoC = 54, LION_max_entladung = 50, SMS_max_entladung = 100, SMS_entladerate = 0.72, LION_entladerate = 0.00008, # Period length in minutes: PERIODEN_DAUER = period_min, # Q-Table can only take discrete inputs and make discrete outputs ACTION_TYPE = 'discrete', OBS_TYPE = 'discrete', # Set number of discrete values: discrete_space = 22, # Size of validation data: val_split = 0.1) # Setup agent: agent = Q_Learner( env = env, memory_len = update_num, # Training parameter: gamma = gamma, epsilon = 0.99, epsilon_min = 0.1, epsilon_decay = epsilon_decay, lr = lr, load_table = load_table) # Train: training(agent, epochs, update_num, num_warmup_steps) # Test with dataset that includes val-data: env.use_all_data() testing(agent)