TOTAL_GAMETIME = 10000 N_EPISODES = 10000 REPLACE_TARGET = 10 game = GameEnv.RacingEnv() game.fps = 60 GameTime = 0 GameHistory = [] renderFlag = True ddqn_agent = DDQNAgent(alpha=0.0005, gamma=0.99, n_actions=5, epsilon=0.02, epsilon_end=0.01, epsilon_dec=0.999, replace_target=REPLACE_TARGET, batch_size=64, input_dims=19, fname='ddqn_model.h5') ddqn_agent.load_model() ddqn_agent.update_network_parameters() ddqn_scores = [] eps_history = [] def run(): #scores = deque(maxlen=100)
import gym from gym import wrappers import numpy as np from ddqn_keras import DDQNAgent from utils import plotLearning if __name__ == '__main__': env = gym.make('LunarLander-v2') ddqn_agent = DDQNAgent(alpha=0.0005, gamma=0.99, n_actions=4, epsilon=1.0, batch_size=64, input_dims=8) n_games = 500 #ddqn_agent.load_model() ddqn_scores = [] eps_history = [] #env = wrappers.Monitor(env, "tmp/lunar-lander-ddqn-2", # video_callable=lambda episode_id: True, force=True) for i in range(n_games): done = False score = 0 observation = env.reset() while not done: action = ddqn_agent.choose_action(observation) observation_, reward, done, info = env.step(action) score += reward ddqn_agent.remember(observation, action, reward, observation_, int(done))
from ddqn_keras import DDQNAgent from collections import deque import random, math TOTAL_GAMETIME = 1000 # Max game time for one episode N_EPISODES = 10000 REPLACE_TARGET = 50 game = GameEnv.RacingEnv() game.fps = 60 GameTime = 0 GameHistory = [] renderFlag = False ddqn_agent = DDQNAgent(alpha=0.0005, gamma=0.99, n_actions=5, epsilon=1.00, epsilon_end=0.10, epsilon_dec=0.9995, replace_target= REPLACE_TARGET, batch_size=512, input_dims=19) # if you want to load the existing model uncomment this line. # careful an existing model might be overwritten #ddqn_agent.load_model() ddqn_scores = [] eps_history = [] def run(): for e in range(N_EPISODES): game.reset() #reset env done = False
import gym from gym import wrappers import numpy as np import random from ddqn_keras import DDQNAgent from utils import plotLearning import ipdb SHOW_EVERY = 10 if __name__ == '__main__': env = gym.make('MountainCar-v0') # ipdb.set_trace() ddqn_agent = DDQNAgent(alpha=0.0005, gamma=0.99, n_actions=env.action_space.n, epsilon=.01, batch_size=64, input_dims=env.observation_space.shape[0]) n_games = 500 #ddqn_agent.load_model() ddqn_scores = [] eps_history = [] #env = wrappers.Monitor(env, "tmp/lunar-lander-ddqn-2", # video_callable=lambda episode_id: True, force=True) for i in range(n_games): done = False score = 0 observation = env.reset() while not done: if not i or not i % SHOW_EVERY:
def close(self): pygame.quit() # Clock to limit speed clock = pygame.time.Clock() # Exit the program? exit_program = False env = Arcanoid() ddqn_agent = DDQNAgent(alpha=0.005, gamma=0.99, n_actions=env.action_space.n, epsilon=1.0, batch_size=64, input_dims=env.observation_space.shape[0] * 2, replace_target=1000) ddqn_scores = [] eps_history = [] history = [] n_games = 500_000 start = time.time() for i in range(n_games): done = False observation = env.reset() observation = observation / [GAME_WIDTH, GAME_WIDTH, GAME_HEIGHT] observation = np.append([-1, -1, -1], observation) while not done: action = ddqn_agent.choose_action(observation)