import numpy as np import matplotlib.pyplot as plt from rl_gym.envs.registration import register import random as pr ''' https://www.youtube.com/watch?v=ZCumo_6qTsU&index=9&list=PLlMkM4tgfjnKsCWav-Z2F-MMFRx-2gMGG ''' register( id='FrozenLake-v3', entry_point='rl_gym.envs.toy_text:FrozenLakeEnv', kwargs={'map_name': '4x4', 'is_slippery': False} ) env = rl_gym.make('FrozenLake-v0') Q = np.zeros([env.observation_space.n, env.action_space.n]) learning_rate = .85 dis = .99 num_episodes = 2000 rList = [] for i in range(num_episodes): state = env.reset() rAll = 0 done = False while not done:
import numpy as np import tensorflow as tf import random from collections import deque from dqn import dqn import rl_gym env = rl_gym.make('CartPole-v0') ''' https://www.youtube.com/watch?v=Fbf9YUyDFww&list=PLlMkM4tgfjnKsCWav-Z2F-MMFRx-2gMGG&index=15 ''' input_size = env.observation_space.shape[0] output_size = env.action_space.n dis = 0.9 REPLAY_MEMORY = 50000 def simple_replay_train(DQN, train_batch): x_stack = np.empty(0).reshape(0, DQN.input_size) y_stack = np.empty(0).reshape(0, DQN.output_size) for state, action, reward, next_state, done in train_batch: Q = DQN.predict(state) if done: Q[0, action] = reward else: Q[0, action] = reward + dis * np.max(DQN.predict(next_state))