예제 #1
0
def main():
    """
    UnboundLocalError: local variable 'RENDER' referenced before assignment

    If the global variable changed in a function without
    declare with a "global" prefix, then the variable here
    will be treat as a local variable

    For example,
    if "RENDER" is not been declared with global prefix,
    access "RENDER" variable will raise UnboundLocalError
    before assign value to "RENDER"
    """

    global RENDER
    env = gym.make(ENV_NAME)
    env = env.unwrapped
    env.seed(1)

    s_dim = env.observation_space.shape[0]
    a_dim = env.action_space.shape[0]
    a_bound = env.action_space.high[0]
    # print(f"s_dim: {s_dim}, a_dim: {a_dim}, a_bound: {a_bound}")
    # s_dim: 3, a_dim: 1, a_bound: 2.0

    ddpg = DDPG(s_dim, a_dim, a_bound)

    # var: add noise to action
    var = 3
    for i in range(MAX_EPISODES):
        s = env.reset()
        # s : list
        # s.shape = (3,)
        ep_reward = 0
        for j in range(MAX_EP_STEPS):
            if RENDER:
                env.render()

            a = ddpg.choose_action(s)
            a = np.clip(np.random.normal(a, var), -a_bound, a_bound)
            s_, r, done, info = env.step(a)

            # s : list
            # a : np.float
            # r : float
            # s_ : list
            ddpg.store_transition(s, a, r/10, s_)

            if ddpg.m_pointer > ddpg.capacity:
                var *= 0.9995
                ddpg.learn()

            s = s_
            ep_reward += r

            if done or (j+1) == MAX_EP_STEPS:
                print(f"Episode: {i:03d}")
                print(f"\tReward: {ep_reward:.3f}, Explore: {var:.2f}")
                if ep_reward > -150:
                    RENDER = True
                break
    env.close()
예제 #2
0
np.random.seed(42)
env.seed(42)
torch.manual_seed(42)
torch.cuda.manual_seed(42)

writer = SummaryWriter(log_dir='logs/')
agent = DDPG(env, writer)

all_timesteps = 0

for e in range(epoch):
    noise = OUActionNoise(env.action_space.shape[0])
    state = env.reset()
    cumulative_reward = 0
    for timestep in range(200):
        action = agent.get_action(state, noise, timestep)
        state_, reward, done, _ = env.step(action * env.action_space.high[0])
        # env.render()
        agent.store_transition(state, action, state_, reward, done)

        state = state_
        cumulative_reward += reward

        agent.update(all_timesteps)
        all_timesteps += 1
    print('Epoch : {} / {}, Cumulative Reward : {}'.format(
        e, epoch, cumulative_reward))
    writer.add_scalar("reward", cumulative_reward, e)

agent.save_model()
예제 #3
0
agent = DDPG(env, writer)

all_timesteps = 0

for e in range(epoch):
    noise = OUActionNoise(env.action_space.shape[0])
    env.reset()
    pixel = env.render(mode='rgb_array')
    state = deque([get_screen(pixel) for _ in range(3)], maxlen=3)
    cumulative_reward = 0
    for timestep in range(200):
        action = agent.get_action(np.array(state)[np.newaxis], noise, timestep)
        _, reward, done, _ = env.step(action * env.action_space.high[0])
        pixel = env.render(mode='rgb_array')
        state_ = state.copy()
        state_.append(get_screen(pixel))
        agent.store_transition(np.array(state), action, np.array(state_),
                               reward, done)

        state = state_
        cumulative_reward += reward

        agent.update(all_timesteps, batch_size=16)
        all_timesteps += 1
    print('Epoch : {} / {}, Cumulative Reward : {}'.format(
        e, epoch, cumulative_reward))
    writer.add_scalar("reward", cumulative_reward, e)
    if e % 500 == 0:
        agent.save_model('models/' + str(e) + '_')
agent.save_model()