예제 #1
0
import _util as u

# Command창에서 색깔 표시
cr.init(autoreset=True)

env = gym.make('CartPole-v0')
# env = gym.make('FrozenLake-v0')
env.reset()
random_episodes = 0
reward_sum = 0
key = b'_K'

while random_episodes < 5:
    env.render()
    # if u.kbhit():
    key = u.inkey()
    if key == b'q':
        print("Game aborted!")
        break

    action = u.arrow_keys[key]  # 0-Left, 1-Down, 2-Right, 3-Up
    # action = env.action_space.sample()
    print(action)
    observation, reward, done, info = env.step(action)
    print(observation, reward, done, action)
    reward_sum += reward
    if done:
        random_episodes += 1
        print("Reward for this episode was:", random_episodes, reward_sum)
        reward_sum = 0
        env.reset()
예제 #2
0
    if i % 100 == 0:
        print('num_episodes = {:4d}, cost = {:7.5f} '.format(
            i, cost_sum / count))

    if len(rList) > 10 and np.mean(rList[-10:]) > 500:
        break

print("Success rate: " + str(sum(rList) / num_episodes) + "%")

observation = env.reset()
reward_sum = 0

while True:
    env.render()
    observation = np.reshape(observation, [1, m.input_size])
    Q_pred = sess.run(m.Y_, feed_dict={m.X: observation})
    action = np.argmax(Q_pred)

    observation, reward, done, _ = env.step(action)
    reward_sum += reward
    if done:
        print("Total score: {}".format(reward_sum))
        break

plt.bar(range(len(rList)), rList, color="blue")
plt.show()

k = u.inkey()

env.close()