type=float, default=0.9, help='Reward decay') args = parse.parse_args() # env setup env = gym.make('MountainCar-v0') env = env.unwrapped # algorithm setup method = args.method if method == 'QL': print("Use Q-Learning...") print('--------------------------------') RL = QLearningTable(actions=list(range(env.action_space.n)), learning_rate=args.learning_rate, reward_decay=args.reward_decay) elif method == 'SARSA': print("Use SARSA...") print('--------------------------------') RL = SarsaTable(actions=list(range(env.action_space.n)), learning_rate=args.learning_rate, reward_decay=args.reward_decay) elif method == 'DQN': print("Use DQN...") print('--------------------------------') env_shape = 0 if isinstance( env.action_space.sample(), int) else env.action_space.sample().shape # to confirm the shape RL = DQN(action_n=env.action_space.n, state_n=env.observation_space.shape[0],
def __init__(self, file_name, base_port=5006): atexit.register(self.close) self.port = base_port self._buffer_size = 10240 self._loaded = False self._open_socket = False logger.info("unity env try created, socket with port:{}".format( str(self.port))) try: # Establish communication socket self._socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM) self._socket.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) self._socket.bind(("localhost", self.port)) self._open_socket = True except socket.error: self._open_socket = True self.close() raise socket.error( "Couldn't launch new environment " "You may need to manually close a previously opened environment " "or use a different worker number.") cwd = os.getcwd() file_name = (file_name.strip().replace('.app', '').replace( '.exe', '').replace('.x86_64', '').replace('.x86', '')) true_filename = os.path.basename(os.path.normpath(file_name)) launch_string = None if platform == "linux" or platform == "linux2": candidates = glob.glob(os.path.join(cwd, file_name) + '.x86_64') if len(candidates) == 0: candidates = glob.glob(os.path.join(cwd, file_name) + '.x86') if len(candidates) == 0: candidates = glob.glob(file_name + '.x86_64') if len(candidates) == 0: candidates = glob.glob(file_name + '.x86') if len(candidates) > 0: launch_string = candidates[0] elif platform == 'darwin': candidates = glob.glob( os.path.join(cwd, file_name + '.app', 'Contents', 'MacOS', true_filename)) if len(candidates) == 0: candidates = glob.glob( os.path.join(file_name + '.app', 'Contents', 'MacOS', true_filename)) if len(candidates) == 0: candidates = glob.glob( os.path.join(cwd, file_name + '.app', 'Contents', 'MacOS', '*')) if len(candidates) == 0: candidates = glob.glob( os.path.join(file_name + '.app', 'Contents', 'MacOS', '*')) if len(candidates) > 0: launch_string = candidates[0] elif platform == 'win32': candidates = glob.glob(os.path.join(cwd, file_name + '.exe')) if len(candidates) == 0: candidates = glob.glob(file_name + '.exe') if len(candidates) > 0: launch_string = candidates[0] if launch_string is None: self.close() raise UnityEnvironmentException( "Couldn't launch the {0} environment. " "Provided filename does not match any environments.".format( true_filename)) else: # Launch Unity environment proc1 = subprocess.Popen([launch_string, '--port', str(self.port)]) self._socket.settimeout(60) try: try: self._socket.listen(1) self._conn, _ = self._socket.accept() self._conn.settimeout(30) p = self._conn.recv(self._buffer_size).decode('utf-8') p = json.loads(p) # print p except socket.timeout as e: raise UnityTimeOutException( "The Unity environment took too long to respond. Make sure {} does not need user interaction to " "launch and that the Academy and the external Brain(s) are attached to objects in the Scene." .format(str(file_name))) self._data = {} self._global_done = None self._log_path = p["logPath"] self._alpha = p["alpha"] self._epsilon = p["epsilon"] self._gamma = p["gamma"] self._states = p["states"] # self._num_states = len(self._states) # for i in range(self._num_states): # print "i:{0}, state:{1}".format(i,self._states[i]) self._actions = p["actions"] self._brain = QLearningTable(self._actions, self._states, self._alpha, self._gamma, self._epsilon) self._loaded = True self._recv_bytes() logger.info("started successfully!") except UnityEnvironmentException: proc1.kill() self.close() raise
parser.add_argument( '--method', default='DQN', help= 'input method used to solve problem: Q-learning / Sarsa / SarsaLambda / DQN / Policy Gradient' ) parser.add_argument('--episode', default='5000', help='input how many episodes to execute') parser.add_argument('--test', default='False', help='is testing mode or not') args = parser.parse_args() if args.method == 'Q-learning': RL = QLearningTable(range(0, env.action_space.n)) elif args.method == 'Sarsa': RL = SarsaTable(range(0, env.action_space.n)) elif args.method == 'SarsaLambda': RL = SarsaLambdaTable(range(0, env.action_space.n)) elif args.method == 'DQN': if args.test == 'True': RL = DeepQNetwork(env.action_space.n, 2, lr=0.1, batch_size=128, reward_decay=0.9, e_greedy=0.9, replace_target_iter=300, memory_size=3000, e_greedy_increment=0.0001,
action = RL.choose_action(str(observation)) observation_, reward, done = game.ML_move(action) if (reward > max): max = reward #print(str(observation) + " R" + str(reward)) RL.learn(str(observation), action, reward, str(observation_)) # swap observation observation = observation_ # break while loop when end of this episode if done: print(max) break if __name__ == "__main__": game = Game(288, 512) game.setup() reward = 0 done = False RL = QLearningTable(actions=list(range(game.n_actions))) #arcade.run() #start_new_thread(main,()) thread1 = myThread() #thread2 = myThread2() thread1.start() #thread2.start() arcade.run()
for episode in range(100): # initial observation s_curr = maze._reset_maze() while True: maze._render() # Get next action from the Q-table action = rl.select_next_action(str(s_curr)) # take the action and observe the next state and reward s_next, reward, isDone = maze._update_maze(action) # learn from the feedback rl.learn(str(s_curr), action, reward, str(s_next)) s_curr = s_next if isDone: break print("Game over") maze.destroy() if __name__ == "__main__": maze = Maze() rl = QLearningTable(actions=list(range(len(maze.action_space)))) maze.after(100, update) maze.mainloop()
step = 0 RL.learning_rate = lr[episode] while True: env.render() #env.update() action = RL.choose_action(str(observation)) observation_, reward, done = env.step(action) sum_reward += reward step += 1 RL.learn(str(observation), action, reward, str(observation_), done) observation = observation_ if done: REWARD.append(sum_reward) STEP.append(step) break REWARD = [] STEP = [] lr = [0.1] * 200 + [0.03] * 400 + [0.005] * 400 + [ 0.0005 ] * 800 #+[0.0001]*400#+[0.00001]*400 env = Maze(n_goals=2, n_agents=2, origin_random=True) #random needs more training RL = QLearningTable(actions=list(range(env.n_actions))) env.after(100, update(len(lr))) env.mainloop() joblib.dump(RL, 'table2.pkl') plt.plot(range(1, len(REWARD) + 1), REWARD) plt.show()