Beispiel #1
0
def get_random_state(game_name, random_steps=20):
    gym_env = gym.make(game_name)
    env = CustomGym(gym_env, 'SpaceInvaders-v0')
        
    # Reset the environment and then take 20 random actions
    state = env.reset()
    for _ in range(random_steps):
        state, _, _, _ = env.step(random.randrange(env.action_size))

    return state
Beispiel #2
0
def play(agent,
         game_name,
         render=True,
         num_episodes=10,
         fps=5.0,
         monitor=True):
    gym_env = gym.make(game_name)
    if monitor:
        print(gym_env)
        gym_env = wrappers.Monitor(gym_env, 'videos/-v0')
    print(gym_env)
    print(game_name)
    env = CustomGym(game_name)

    desired_frame_length = 1.0 / fps

    episode_rewards = []
    episode_vals = []
    t = 0
    for ep in range(num_episodes):
        print("Starting episode", ep)
        episode_reward = 0
        state = env.reset()
        terminal = False
        current_time = time()
        while not terminal:
            policy, value = agent.get_policy_and_value(state)
            action_idx = np.random.choice(agent.action_size, p=policy)
            state, reward, terminal, _ = env.step(action_idx)
            if render:
                env.render()
            t += 1
            episode_vals.append(value)
            episode_reward += reward
            # Sleep so the frame rate is correct
            next_time = time()
            frame_length = next_time - current_time
            if frame_length < desired_frame_length:
                sleep(desired_frame_length - frame_length)
            current_time = next_time
        episode_rewards.append(episode_reward)
    if monitor:
        gym_env.monitor.close()
    return episode_rewards, episode_vals