def main_function(location, num_of_panels, num_of_turbines, num_of_batteries): # Get arguments if len(sys.argv) > 1: episodes_num = int(sys.argv[1]) else: episodes_num = 2000 # House dependent parameters # location = 'California' # num_of_panels = 30 # Number of 250-watts solar panels # num_of_turbines = 2 # Number of 400 KW wind turbines # num_of_batteries = 2 house = House(location, num_of_panels, num_of_turbines, num_of_batteries) # Main dependent parameters num_of_months = 12 num_of_days = 30 # number of days per episode num_time_states = 4 epsilon = 0.5 alpha = 0.8 # Initiate Agent agent = Agent() Q = agent.initialize_Q() avg_Q_old = np.mean(Q) # For printing and plots print_iteration = 50 # ARMAN: What is a print_flag? print_flag = False # ARMAN: Needs comments rList = [] solarList = [] windList = [] ffList = [] battstorageList = [] battusedList = [] energyList = [] solarSubList = [] windSubList = [] ffSubList = [] battstorageSubList = [] battusedSubList = [] final_itr = [] final_list = [] final_solar = [] solar_dict = {0: [], 1: [], 2: [], 3: []} final_wind = [] wind_dict = {0: [], 1: [], 2: [], 3: []} final_ff = [] ff_dict = {0: [], 1: [], 2: [], 3: []} final_battery = [] battery_dict = {0: [], 1: [], 2: [], 3: []} ## for realtime plotting # fig, ax = plt.subplots() # ax.set_ylabel("Energy (kWh)") # ax.set_title("Evolution of Energy Use") for itr in range(episodes_num): if itr % print_iteration == 0: print_flag = True # The house stays constant for every episode env = EnergyEnvironment(house) cur_state = env.state total_reward = 0 solar_avg = 0 wind_avg = 0 ff_avg = 0 batt_storage_avg = 0 batt_used_avg = 0 # for month in range(num_of_months): # env.state[env.month_index] = month for day in range(num_of_days): total_solar_energy = 0 total_wind_energy = 0 total_grid_energy = 0 total_battery_used = 0 for i in range(num_time_states): action, cur_state_index, action_index = agent.get_action( cur_state, Q, epsilon) reward, next_state = env.step(action, cur_state) Q = agent.get_Q(action, cur_state, Q, epsilon, cur_state_index, action_index, reward, alpha) cur_state = next_state total_reward += reward # calculate total total_solar_energy += env.solar_energy total_wind_energy += env.wind_energy total_grid_energy += env.grid_energy total_battery_used += env.battery_used if itr == (episodes_num - 1): solar_dict[i].append(env.solar_energy) wind_dict[i].append(env.wind_energy) ff_dict[i].append(env.grid_energy) battery_dict[i].append(env.battery_used) # store how much is stored in the battery at the end of each day total_battery_stored = env.battery_energy # save total daily energy produced from different sources solarSubList.append(total_solar_energy) windSubList.append(total_wind_energy) ffSubList.append(total_grid_energy) battstorageSubList.append(total_battery_stored) battusedSubList.append(total_battery_used) solar_avg = np.mean(solarSubList) wind_avg = np.mean(windSubList) ff_avg = np.mean(ffSubList) batt_storage_avg = np.mean(battstorageSubList) batt_used_avg = np.mean(battusedSubList) if print_flag: avg_Q_new = np.mean(Q) avg_Q_change = abs(avg_Q_new - avg_Q_old) utils.print_info(itr, env, solar_avg, wind_avg, ff_avg, batt_storage_avg, batt_used_avg, avg_Q_change) avg_Q_old = avg_Q_new solarList.append(solar_avg) windList.append(wind_avg) ffList.append(ff_avg) battstorageList.append(batt_storage_avg) battusedList.append(np.mean(batt_used_avg)) # plt.ion() # plots.real_time_plot([[solar_avg], [wind_avg], [ff_avg], # [batt_storage_avg], [batt_used_avg]], # colors=['b', 'g', 'r', 'purple', 'gray'], # legends=["Solar Energy", "Wind Energy", "Fossil Fuel Energy", "Battery Storage", # "Battery Usage"], ax=ax) solarSubList = [] windSubList = [] ffSubList = [] battstorageSubList = [] battusedSubList = [] print_flag = False #total reward per episode appended for learning curve visualization rList.append(total_reward) #decrease exploration factor by a little bit every episode epsilon = max(0, epsilon - 0.0005) alpha = max(0, alpha - 0.0005) # plt.close() print("Score over time: " + str(sum(rList) / episodes_num)) print("Q-values:", Q) plots.plot_learning_curve(rList) for i in range(num_time_states): final_solar.append(np.mean(solar_dict[i])) final_wind.append(np.mean(wind_dict[i])) final_ff.append(np.mean(ff_dict[i])) final_battery.append(np.mean(battery_dict[i])) energyList.append(solarList) energyList.append(windList) energyList.append(ffList) # energyList.append(battstorageList) energyList.append(battusedList) final_itr.append(final_solar) final_itr.append(final_wind) final_itr.append(final_ff) final_itr.append(final_battery) # plots.multiBarPlot_final(list(range(4)), final_itr, colors=['b', 'g', 'r', 'purple', 'gray'], ylabel="Energy (kWh)", # title="Final Iteration of Energy Use", legends=["Solar Energy", "Wind Energy", "Fossil Fuel Energy", "Battery Storage", "Battery Usage"]) # # plots.multiBarPlot(list(range(len(solarList))), energyList, colors=['b', 'g', 'r', 'purple', 'gray'], ylabel="Energy (kWh)", # title="Evolution of Energy Use", legends=["Solar Energy", "Wind Energy", "Fossil Fuel Energy", "Battery Storage", "Battery Usage"]) return list(range(len(solarList))), energyList, list( range(len(final_solar))), final_itr, list(range(len(rList))), rList
terminal = True eval_rewards = [] evaluate_frame_number = 0 for frame in range(EVAL_LENGTH): if terminal: game_wrapper.reset(evaluation=True) life_lost = True episode_reward_sum = 0 terminal = False # Breakout require a "fire" action (action #1) to start the # game each time a life is lost. # Otherwise, the agent would sit around doing nothing. action = 1 if life_lost else agent.get_action( 0, game_wrapper.state, evaluation=True) # Step action _, reward, terminal, life_lost = game_wrapper.step(action, render_mode='human') evaluate_frame_number += 1 episode_reward_sum += reward # On game-over if terminal: print( f'Game over, reward: {episode_reward_sum}, frame: {frame}/{EVAL_LENGTH}' ) eval_rewards.append(episode_reward_sum) print('Average reward:',
# TRAINING EPOCH # We evaluate and save our model after a number of # epoch, controlled by frame numbers in our config. while epoch_frame < FRAMES_BETWEEN_EVAL: start_time = time.time() game_wrapper.reset() life_lost = True episode_reward_sum = 0 # TRAINING EPISODE # One episode is one game, after which we update # our metrics. If the episode takes longer than anticipated # we can shortcircuit (to avoid less valuable training). for _ in range(MAX_EPISODE_LENGTH): # Get action action = agent.get_action(frame_number, game_wrapper.state) # Take step processed_frame, reward, terminal, life_lost = game_wrapper.step( action) frame_number += 1 epoch_frame += 1 episode_reward_sum += reward # Add experience to replay memory agent.add_experience(action=action, frame=processed_frame[:, :, 0], reward=reward, clip_reward=CLIP_REWARD, terminal=life_lost)