def start_threads(self): # max number of episodes max_eps = 1e6 envs = [] # create 1 local enviroment for each thread for _ in range(NUM_THREADS): _env = gym_super_mario_bros.make(env_name) _env = JoypadSpace(_env, SIMPLE_MOVEMENT) env = atari_wrapper.wrap_dqn(_env) envs.append(env) # create the threads and assign them their enviroment and exploration rate threads = [] for i in range(NUM_THREADS): thread = threading.Thread( target=train_thread, daemon=True, args=(self, max_eps, envs[i], agent.discount_rate, self.optimizer, stats, AnnealingVariable(.7, 1e-20, 10000), i)) threads.append(thread) # starts the threads for t in threads: print("STARTING") t.start() time.sleep(0.5) try: [t.join() for t in threads] # wait for threads to finish except KeyboardInterrupt: print("Exiting threads!")
action = self.epsilon_greedy_policy(state, exploration_rate) next_state, reward, done, _ = env.step(action) episode_reward += reward state = next_state rewards_arr[episode] = episode_reward stats(self, episode_reward) print(episode_reward) stats.save_stats() return rewards_arr def save_weights(self): print("PRINT") self.model.save_weights("PongDQNWeights.h5") def save_model(self): self.model.save("DqnPongModel.h5") def restore_weights(self): print("Restoring model weights Pong") self.model.load_weights("DqnPongModel.h5") stats = Stats() _env = gym.make('PongNoFrameskip-v4') env = atari_wrapper.wrap_dqn(_env) agent = DQNAgent() agent.init_priority_replay(50000) agent.train_model(max_steps=50e6, stats=stats) env.close()
env.render() # time.sleep(0.05) action = self.pick_action(state, exploration_rate) next_state, reward, done, _ = env.step(action) episode_reward += reward state = next_state if callable(stats): stats(self, episode_reward) rewards_arr[episode] = episode_reward print(episode_reward) stats.save_stats() return rewards_arr def restore_weights(self): self.global_network.load_weights('A3CPong.h5') test_env = gym.make(env_name) test_env = atari_wrapper.wrap_dqn(test_env) NUM_ACTIONS = test_env.action_space.n OBS_SPACE = test_env.observation_space.shape[0] state = test_env.reset() state = np.expand_dims(state, axis=0) stats = Stats() agent = A3CAgent() agent.play(test_env, stats) test_env.close()