actor_main = DDPGActor(observation_dim, action_dim, actor_lr, device) actor_target = DDPGActor(observation_dim, action_dim, actor_lr, device) critic_main = DDPGCritic(observation_dim, action_dim, critic_lr, device) critic_target = DDPGCritic(observation_dim, action_dim, critic_lr, device) target_initialize(actor_main, actor_target) target_initialize(critic_main, critic_target) iter_i = 0 epi_i = 0 save_flag = False while iter_i < max_iteration: noise = Noise.OrnsteinUhlenbeckActionNoise(mu=np.zeros([action_dim]), sigma=sigma) noise.reset() timestep = env.reset() ep_reward = 0.0 prev_action = np.zeros([action_dim]) # timestep, reward, discount, observation _, _, _, s = timestep s = utils.state_1d_flat(s) s_a = np.append(s, prev_action) s_a = torch.FloatTensor(s_a).to(device) step_i = 0
actor_main = DDPGActor(state_action_dim, action_dim, actor_lr, device) actor_target = DDPGActor(state_action_dim, action_dim, actor_lr, device) critic_main = DDPGCritic(state_action_dim, action_dim, critic_lr, device) critic_target = DDPGCritic(state_action_dim, action_dim, critic_lr, device) target_initialize(actor_main, actor_target) target_initialize(critic_main, critic_target) # start training agent for epi_i in range(1, max_episode + 1): sigma = np.random.uniform(sigma_min, sigma_max) assert noise_type in ["ou", "gaussian"] if noise_type == "ou": noise = Noise.OrnsteinUhlenbeckActionNoise( mu=np.zeros([action_dim]), sigma=sigma * np.ones([action_dim])) else: noise = Noise.GaussianNoise(action_dim=action_dim, sigma=sigma) noise.reset() timestep = env.reset() ep_reward = 0.0 prev_action = np.zeros([action_dim]) # timestep, reward, discount, observation _, _, _, s = timestep s = utils.state_1d_flat(s) s_a = np.append(s, prev_action) s_a = torch.FloatTensor(s_a).to(device)
critic_main = DDPGCritic(state_control_dim, control_dim, critic_lr, device) critic_target = DDPGCritic(state_control_dim, control_dim, critic_lr, device) target_initialize(actor_main, actor_target) target_initialize(critic_main, critic_target) # start training agent for epi_i in range(1, max_episode + 1): sigma = np.random.uniform(sigma_min, sigma_max) assert noise_type in ["ou", "gaussian"] if noise_type == "ou": noise = Noise.OrnsteinUhlenbeckActionNoise( mu=np.zeros([action_dim]), sigma=sigma, actions_per_control=actions_per_control) # this noise is only for single action, for a control you need to repeat sampling else: noise = Noise.GaussianNoise(action_dim=control_dim, sigma=sigma) noise.reset() timestep = env.reset() ep_reward = 0.0 prev_action = np.zeros([actions_per_control, action_dim]) # timestep, reward, discount, observation _, _, _, s = timestep s = utils.state_1d_flat(s) s_a = np.append(s, prev_action.reshape([-1]))