def main(): # spin up environment env = TwoAreaPowerSystemEnv() env.seed(2) # spin up ddpg agent agent_ddpg = DdpgController(state_size=7, action_size=2, random_seed=2) # spin up pi agent agent_pi = ClassicalPiController() # bundle agents agent_list = [agent_ddpg, agent_pi] #################################################### # COMMENT OUT IF NOT CONTINUING TRAINING #################################################### # Load the actor and critic networks #agent.actor_local.load_state_dict(torch.load('checkpoint_actor.pth')) #agent.critic_local.load_state_dict(torch.load('checkpoint_critic.pth')) # spin up the power demand signal signal = StepSignal() # train the agent scores = ddpg_train(env, agent_list, signal)
def main(): # spin up environment env = TwoAreaPowerSystemEnv() env.seed(2) state = env.reset() # spin up agent agent = DdpgController(state_size=7, action_size=2, random_seed=2) # spin up the power demand signal signal = StepSignal() signal.reset(15, 'on', 0.01, 'off', 0.0) # Load the actor and critic networks agent.actor_local.load_state_dict(torch.load('checkpoint_actor.pth')) agent.critic_local.load_state_dict(torch.load('checkpoint_critic.pth')) # initialise empty list to store simulation output out_s_1 = [0] out_s_2 = [0] out_tieline = [0] control_s_1 = [0] control_s_2 = [0] demand_list = [0] time_list = [0] score = 0 while True: action = agent.act(state, add_noise=False) demand = (signal.del_p_L_1_func(env.t), signal.del_p_L_2_func(env.t)) state, reward, done, _ = env.step(action, demand) score += reward out_s_1.append(state[2]) out_s_2.append(state[6]) out_tieline.append(state[3]) control_s_1.append(action[0]) control_s_2.append(action[1]) demand_list.append(demand[0]) time_list.append(env.t) if done: break print('Score: {}'.format(score)) # Plot the agent performance png files png_plot_file_path = './test_plots/ddpg_test_plot/pngplot/zz_plot_final.png' pik_file_path = './test_plots/ddpg_test_plot/pickledata/zz_plot_final.pkl' capture_agent_progress(time_list, out_s_1, out_s_2, control_s_1, control_s_2, out_tieline, demand_list, png_plot_file_path, pik_file_path)
def main(): # spin up environment env = TwoAreaPowerSystemEnv() env.seed(2) state = env.reset() # spin up agent agent = DdpgController(state_size=7, action_size=2, random_seed=2) # spin up the power demand signal signal = StepSignal() signal.reset(1, 'on', 0.01, 'off', 0.0) # Load the actor and critic networks agent.actor_local.load_state_dict(torch.load('checkpoint_actor.pth')) agent.critic_local.load_state_dict(torch.load('checkpoint_critic.pth')) # initialise empty list to store simulation output out_s_1 = [0] out_s_2 = [0] control_s_1 = [0] control_s_2 = [0] demand_list = [0] time_list = [0] score = 0 while True: action = agent.act(state, add_noise=False) demand = (signal.del_p_L_1_func(env.t), signal.del_p_L_2_func(env.t)) state, reward, done, _ = env.step(action, demand) score += reward out_s_1.append(state[2]) out_s_2.append(state[6]) time_list.append(env.t) control_s_1.append(action[0]) control_s_2.append(action[1]) demand_list.append(demand[0]) if done: break print('Score: {}'.format(score)) plt.subplot(411) plt.plot(time_list, out_s_1) plt.plot(time_list, out_s_2) plt.subplot(412) plt.plot(time_list, control_s_1) plt.subplot(413) plt.plot(time_list, control_s_2) plt.subplot(414) plt.plot(time_list, demand_list) plt.show()
def agent_test(mag, dur): #################################### # Create the environment #################################### # spin up environment env = TwoAreaPowerSystemEnv() # reset the agent state = env.reset() #################################### # View the action space and the state space #print("Action space: {}".format(env.action_space)) #print("State space: {}".format(env.observation_space)) #################################### # Create the controller #################################### # implement controller agent = ClassicalPiController() # reset the controller action = agent.reset() #################################### #################################### # Create the signal #################################### # implement the signal signal = StepSignal() signal.reset(dur, 'on', mag, 'off', 0.0) #################################### # initialise empty list to store simulation output out_s_1 = [0] out_s_2 = [0] out_tieline = [0] control_s_1 = [0] control_s_2 = [0] demand_list = [0] time_list = [0] score = 0 while True: # Obtain the current demand demand = ( signal.del_p_L_1_func(env.t), # power demand for area 1 signal.del_p_L_2_func(env.t)) # power demand for area 2 # Step the environment forward by one step state, reward, done, _ = env.step(action, demand) score += reward out_s_1.append(state[2]) out_s_2.append(state[6]) out_tieline.append(state[3]) demand_list.append(demand[0]) time_list.append(env.t) # Given the current state observation take an action action = agent.act(state, (env.t, env.t + env.t_delta)) control_s_1.append(action[0]) control_s_2.append(action[1]) action /= 0.02 if done: break print('Score: {}'.format(score)) if mag < 0: fp = 'neg' else: fp = 'pos' png_plot_file_path = './test_plots/pi_test_plot/pngplot/{}_{}_plot_final.png'.format( fp, dur) pik_file_path = './test_plots/pi_test_plot/pickledata/{}_{}_plot_final.pkl'.format( fp, dur) capture_agent_progress(time_list, out_s_1, out_s_2, control_s_1, control_s_2, out_tieline, demand_list, png_plot_file_path, pik_file_path)
def main(): #################################### # Create the environment #################################### # spin up environment env = TwoAreaPowerSystemEnv() # reset the agent state = env.reset() #################################### # View the action space and the state space print("Action space: {}".format(env.action_space)) print("State space: {}".format(env.observation_space)) #################################### # Create the controller #################################### # implement controller agent = ClassicalPiController() # reset the controller action = agent.reset() #################################### #################################### # Create the signal #################################### # implement the signal signal = StepSignal() signal.reset(1, 'on', 0.01, 'off', 0.0) #################################### # initialise empty list to store simulation output out_s_1 = [0] out_s_2 = [0] out_tieline = [0] control_s_1 = [0] control_s_2 = [0] demand_list = [0] time_list = [0] score = 0 while True: # Obtain the current demand demand = ( signal.del_p_L_1_func(env.t), # power demand for area 1 signal.del_p_L_2_func(env.t)) # power demand for area 2 # Step the environment forward by one step state, reward, done, _ = env.step(action, demand) score += reward out_s_1.append(state[2]) out_s_2.append(state[6]) out_tieline.append(state[3]) demand_list.append(demand[0]) time_list.append(env.t) # Given the current state observation take an action action = agent.act(state, (env.t, env.t + env.t_delta)) control_s_1.append(action[0]) control_s_2.append(action[1]) if done: break print('Score: {}'.format(score)) plt.subplot(511) plt.plot(time_list, out_s_1) plt.plot(time_list, out_s_2) plt.subplot(512) plt.plot(time_list, out_tieline) plt.subplot(513) plt.plot(time_list, control_s_1) plt.subplot(514) plt.plot(time_list, control_s_2) plt.subplot(515) plt.plot(time_list, demand_list) plt.show()