def play(difficulty): """ Play the Snake game, feel free to change the difficulty Default difficulty: Medium """ d = 150 if difficulty == "easy": d = 200 print("\nDifficulty set to <easy>\n") elif difficulty == "medium": d = 150 print("\nDifficulty set to <medium>\n") elif difficulty == "hard": d = 100 print("\nDifficulty set to <hard>\n") elif difficulty == "insane": d = 80 print("\nDifficulty set to <insane>\n") else: print( "\nYou did not enter a valid difficulty!\nDifficulty set to <medium>\n" ) env = Environment(wrap=False, grid_size=10, rate=d, tail=True, obstacles=False) env.play()
def play(): env = Environment(wrap=False, grid_size=10, rate=difficulty, tail=True, obstacles=False) env.play()
def watch(): csv_file_path = "./Data/Logs/log_file{}.csv".format(logFileNumber) RENDER_TO_SCREEN = True # rate should be 0 when not rendering, else it will lengthen training time unnecessarily env = Environment(wrap = False, grid_size = 10, rate = 80, tail = True, obstacles = False) if RENDER_TO_SCREEN: env.prerender() env.noArrow() avg_time = 0 avg_score = 0 episode = 0 df = pd.read_csv(csv_file_path) # print(df) GAME_OVER = False GAME_ON = True t = 3 time_stamps = df["TIME_STAMP"].values snake_Xs = df["SNAKE_X"].values snake_Ys = df["SNAKE_Y"].values food_Xs = df["FOOD_X"].values food_Ys = df["FOOD_Y"].values my_actions = df["INPUT_BUTTON"].values my_index = 0 while GAME_ON: # needs to be changed to ignore the SCALE state, info = env.irl_reset(snake_Xs[my_index], snake_Ys[my_index], food_Xs[my_index], food_Ys[my_index]) # last_action = 3 last_action = my_actions[my_index] # print(state) my_index = my_index + 1 #BUG: TODO: FIX MY_INDEX if RENDER_TO_SCREEN: env.countdown = True else: env.countdown = False while not GAME_OVER: try: if RENDER_TO_SCREEN: env.render() action = last_action # print(self.time) if my_index < len(my_actions): # Ensuring that it takes the last action pressed during that "tick" last_action_pressed = False i = 1 while not last_action_pressed: if my_index + i < len(my_actions) and time_stamps[my_index] == time_stamps[my_index + i]: my_index = my_index + 1 i = i + 1 else: last_action_pressed = True # print(time_stamps[my_index]) if env.time == time_stamps[my_index]: action = my_actions[my_index] last_action = action if action != -1: my_index = my_index + 1 if env.countdown: text = env.font.render(str(t), True, (255, 255, 255)) env.display.blit(text,(60,30)) env.pg.display.update() time.sleep(0.2) t = t - 1 if t == 0: t = 3 env.countdown = False else: new_state, reward, GAME_OVER, info = env.step(action, action_space = 4) if reward > 0: env.food.irl_make(food_Xs[my_index], food_Ys[my_index], env.SCALE) # new_state = env.get_state() if GAME_OVER: avg_time += info["time"] avg_score += info["score"] my_index = my_index + 1 except KeyboardInterrupt as e: raise e while GAME_OVER: if my_index >= len(my_actions)-1: # print("Total Episodes: ", episode) env.end() else: GAME_OVER = False episode = episode + 1 avg_time = 0 avg_score = 0 # print("Total Episodes: ", episode+1) env.end()
def play(): env = Environment(wrap=True, grid_size=10, rate=100, tail=False) env.play()
def train(s): RENDER_TO_SCREEN = False # rate should be 0 when not rendering, else it will lengthen training time unnecessarily env = Environment(wrap=False, grid_size=8, rate=0, max_time=50) Q = Qmatrix(2, env) # 0 - zeros, 1 - random, 2 - textfile alpha = 0.15 # Learning rate, i.e. which fraction of the Q values should be updated gamma = 0.99 # Discount factor, i.e. to which extent the algorithm considers possible future rewards epsilon = 0.1 # Probability to choose random action instead of best action epsilon_function = True epsilon_start = 0.5 epsilon_end = 0.05 epsilon_percentage = 0.5 # in decimal # Test for an Epsilon linear function # y = mx + c # y = (0.9 / 20% of total episode)*x + 1 # if epsilon <= 0.1, make epsilon = 0.1 avg_time = 0 avg_score = 0 print_episode = 100 total_episodes = 2000 start_time = time.time() for episode in range(total_episodes): # Reset the environment # state, info = env.reset() done = False first_action = True action_count = 0 # Epsilon linear function if epsilon_function: epsilon = (-(epsilon_start - epsilon_end) / (epsilon_percentage * total_episodes)) * episode + ( epsilon_start) if epsilon < epsilon_end: epsilon = epsilon_end while not done: # Testing with try except in loop # Might not be the best implementation of training and ensuring saving Q to a .txt file try: # print("waiting for recv...") # data = input("Send (q to Quit): ") # s.send(str.encode("p\n")) r = s.recv(1024) if r != None: x = r.decode("utf-8") # print(x) #to skip line x_cleaned = x[3:-1] #Need to find a better implementation a = x_cleaned.split(", ") # print("Cleaned msg: ", a) #raw bytes received if a[0] != "close": if a[0] == "done": done = True # print("\nEpisode done, #", episode) state = np.zeros( 4 ) # Needs to change to incorporate dead states reward = int(a[1]) # print("reward = ", reward, "\n") else: state = np.zeros(4) for i in range(4): state[i] = float(a[i]) # print("State = ", state) reward = int(a[4]) # print("reward = ", reward, "\n") if np.random.rand() <= epsilon: action = np.random.randint(0, 4) else: action = np.argmax(Q[state_index(state)]) # print("Action = ", action) s.send(str.encode(str(action) + "\n")) if first_action: action_count = action_count + 1 if action_count >= 2: first_action = False # Q[env.state_index(state), action] += alpha * (reward + gamma * np.max(Q[env.state_index(new_state)]) - Q[env.state_index(state), action]) # TRAINING PART if not first_action: if reward == 10: avg_score = avg_score + 1 Q[state_index(prev_state), prev_action] += alpha * ( reward + gamma * np.max(Q[state_index(state)]) - Q[state_index(prev_state), prev_action]) # save the previous state prev_state = state prev_action = action else: s.close() print("Socket has been closed") connected = False except KeyboardInterrupt as e: # Test to see if I can write the Q file during runtime np.savetxt(Q_textfile_path_save, Q.astype(np.float), fmt='%f', delimiter=" ") print("Saved Q matrix to text file") s.close() print("Socket has been closed") raise e if (episode % print_episode == 0 and episode != 0) or (episode == total_episodes - 1): current_time = time.time() - start_time print( "Episode:", episode, "\ttime:", avg_time / print_episode, "\tscore:", avg_score / print_episode, "\tepsilon:", epsilon, "\ttime {0:.0f}:{1:.0f}".format(current_time / 60, current_time % 60)) np.savetxt(Q_textfile_path_save, Q.astype(np.float), fmt='%f', delimiter=" ") avg_time = 0 avg_score = 0 # This doesn't need to be here # np.savetxt(Q_textfile_path_save, Q.astype(np.float), fmt='%f', delimiter = " ") print("Simulation finished. \nSaved Q matrix to text file at:", Q_textfile_path_save)
def run(s): RENDER_TO_SCREEN = False env = Environment(wrap=False, grid_size=8, rate=80, max_time=100, tail=False) Q = Qmatrix(2, env) # 0 - zeros, 1 - random, 2 - textfile # Minimise the overfitting during testing epsilon = 0.005 # Testing for a certain amount of episodes for episode in range(1000): # state, info = env.reset() done = False score = 0 prev_state = np.zeros(4) prev_state[2] = -1 while not done: try: # print("waiting for recv...") # data = input("Send (q to Quit): ") # s.send(str.encode("p\n")) r = s.recv(1024) if r != None: x = r.decode("utf-8") # print(x) #to skip line x_cleaned = x[3:-1] #Need to find a better implementation a = x_cleaned.split(", ") if a[0] != "close": if a[0] == "done": done = True print("Episode done") else: state = np.zeros(4) for i in range(4): state[i] = float(a[i]) state[i] = int(state[i]) # print(state) if np.random.rand() <= epsilon: action = np.random.randint(0, 4) else: action = np.argmax(Q[state_index(state)]) # print("Action = ", action) s.send(str.encode(str(action) + "\n")) score = brute_force_scoring( state, prev_state, score) prev_state = state else: s.close() print("Socket has been closed") connected = False # To force close the connection except KeyboardInterrupt as e: s.close() print("Socket has been closed") raise e # connected = False if episode % 1 == 0: print("Episode:", episode, " Score:", score)
def watch(file_number): """ Watch the games in a specific log file Default file: Last log file """ number_path = "./Data/log_file_number.txt" log_file_number_txt = np.loadtxt(number_path, dtype='int') if file_number >= log_file_number_txt or file_number < 0: print( "\nYou did not enter a valid log file number!\nReplaying last log file (file number: {})\n" .format(log_file_number_txt - 1)) file_number = log_file_number_txt - 1 else: print("\nReplaying Log file {}\n".format(file_number)) csv_file_path = "./Data/Logs/log_file{}.csv".format(file_number) RENDER_TO_SCREEN = True # Create environment env = Environment(wrap=False, grid_size=10, rate=90, tail=True, obstacles=False) if RENDER_TO_SCREEN: env.prerender() # No arrow on top of snakes head env.noArrow() avg_time = 0 avg_score = 0 episode = 0 df = pd.read_csv(csv_file_path) # print(df) GAME_OVER = False GAME_ON = True t = 3 time_stamps = df["TIME_STAMP"].values snake_Xs = df["SNAKE_X"].values snake_Ys = df["SNAKE_Y"].values food_Xs = df["FOOD_X"].values food_Ys = df["FOOD_Y"].values my_actions = df["INPUT_BUTTON"].values my_index = 0 while GAME_ON: # needs to be changed to ignore the SCALE state, info = env.irl_reset(snake_Xs[my_index], snake_Ys[my_index], food_Xs[my_index], food_Ys[my_index]) # last_action = 3 last_action = my_actions[my_index] # print(state) my_index = my_index + 1 #BUG: TODO: FIX MY_INDEX if RENDER_TO_SCREEN: env.countdown = True else: env.countdown = False while not GAME_OVER: try: if RENDER_TO_SCREEN: env.render() action = last_action # print(self.time) if my_index < len(my_actions): # Ensuring that it takes the last action pressed during that "tick" last_action_pressed = False i = 1 while not last_action_pressed: if my_index + i < len(my_actions) and time_stamps[ my_index] == time_stamps[my_index + i]: my_index = my_index + 1 i = i + 1 else: last_action_pressed = True # print(time_stamps[my_index]) if env.time == time_stamps[my_index]: action = my_actions[my_index] last_action = action if action != -1: my_index = my_index + 1 if env.countdown: text = env.font.render(str(t), True, (255, 255, 255)) env.display.blit(text, (60, 30)) env.pg.display.update() time.sleep(0.4) t = t - 1 if t == 0: t = 3 env.countdown = False else: new_state, reward, GAME_OVER, info = env.step( action, action_space=4) if reward > 0: env.food.irl_make(food_Xs[my_index], food_Ys[my_index], env.SCALE) # new_state = env.get_state() if GAME_OVER: avg_time += info["time"] avg_score += info["score"] my_index = my_index + 1 except KeyboardInterrupt as e: raise e while GAME_OVER: if my_index >= len(my_actions) - 1: # print("Total Episodes: ", episode) env.end() else: GAME_OVER = False episode = episode + 1 avg_time = 0 avg_score = 0 # print("Total Episodes: ", episode+1) env.end()