def main(): """ UnboundLocalError: local variable 'RENDER' referenced before assignment If the global variable changed in a function without declare with a "global" prefix, then the variable here will be treat as a local variable For example, if "RENDER" is not been declared with global prefix, access "RENDER" variable will raise UnboundLocalError before assign value to "RENDER" """ global RENDER env = gym.make(ENV_NAME) env = env.unwrapped env.seed(1) s_dim = env.observation_space.shape[0] a_dim = env.action_space.shape[0] a_bound = env.action_space.high[0] # print(f"s_dim: {s_dim}, a_dim: {a_dim}, a_bound: {a_bound}") # s_dim: 3, a_dim: 1, a_bound: 2.0 ddpg = DDPG(s_dim, a_dim, a_bound) # var: add noise to action var = 3 for i in range(MAX_EPISODES): s = env.reset() # s : list # s.shape = (3,) ep_reward = 0 for j in range(MAX_EP_STEPS): if RENDER: env.render() a = ddpg.choose_action(s) a = np.clip(np.random.normal(a, var), -a_bound, a_bound) s_, r, done, info = env.step(a) # s : list # a : np.float # r : float # s_ : list ddpg.store_transition(s, a, r/10, s_) if ddpg.m_pointer > ddpg.capacity: var *= 0.9995 ddpg.learn() s = s_ ep_reward += r if done or (j+1) == MAX_EP_STEPS: print(f"Episode: {i:03d}") print(f"\tReward: {ep_reward:.3f}, Explore: {var:.2f}") if ep_reward > -150: RENDER = True break env.close()
np.random.seed(42) env.seed(42) torch.manual_seed(42) torch.cuda.manual_seed(42) writer = SummaryWriter(log_dir='logs/') agent = DDPG(env, writer) all_timesteps = 0 for e in range(epoch): noise = OUActionNoise(env.action_space.shape[0]) state = env.reset() cumulative_reward = 0 for timestep in range(200): action = agent.get_action(state, noise, timestep) state_, reward, done, _ = env.step(action * env.action_space.high[0]) # env.render() agent.store_transition(state, action, state_, reward, done) state = state_ cumulative_reward += reward agent.update(all_timesteps) all_timesteps += 1 print('Epoch : {} / {}, Cumulative Reward : {}'.format( e, epoch, cumulative_reward)) writer.add_scalar("reward", cumulative_reward, e) agent.save_model()
agent = DDPG(env, writer) all_timesteps = 0 for e in range(epoch): noise = OUActionNoise(env.action_space.shape[0]) env.reset() pixel = env.render(mode='rgb_array') state = deque([get_screen(pixel) for _ in range(3)], maxlen=3) cumulative_reward = 0 for timestep in range(200): action = agent.get_action(np.array(state)[np.newaxis], noise, timestep) _, reward, done, _ = env.step(action * env.action_space.high[0]) pixel = env.render(mode='rgb_array') state_ = state.copy() state_.append(get_screen(pixel)) agent.store_transition(np.array(state), action, np.array(state_), reward, done) state = state_ cumulative_reward += reward agent.update(all_timesteps, batch_size=16) all_timesteps += 1 print('Epoch : {} / {}, Cumulative Reward : {}'.format( e, epoch, cumulative_reward)) writer.add_scalar("reward", cumulative_reward, e) if e % 500 == 0: agent.save_model('models/' + str(e) + '_') agent.save_model()