class Trainer: def __init__(self, agent): self.agent = agent self.name = "Trainer" def start_training_session(self, num_of_episodes): for episode_num in range(num_of_episodes): self.headline_print("start game of episode number {}".format(episode_num + 1)) self.game_id = rest_api.create_game(self.name).content.decode("utf-8") print("Trainer created game with id: {}".format(self.game_id)) self.tcp = TCP(self.game_id, self.name, self.on_recieved) self.start_game_with_agent(self.game_id) self.headline_print("end of episode number {}".format(episode_num + 1)) def start_game_with_agent(self, game_id): env = QuoridorEnv(game_id, "Agent") cur_state = env.reset() done = False steps_num = 0 while not done: steps_num += 1 action = self.agent.act(cur_state, env) new_state, reward, done, _ = env.step(action) self.agent.remember(cur_state, action, reward, new_state, done) self.agent.replay(env) # internally iterates default (prediction) model self.agent.target_train() # iterates target model cur_state = new_state print("\n\nGAME FINISHED IN {} STEPS!\n\n".format(steps_num)) def on_recieved(self, json_message): if json_message["type"] == "NewTurnEvent": if json_message["nextPlayerToPlay"] == self.name: myLoc = json_message["currentPosition"] actions = [] for move in json_message["avialiableMoves"]: if int(move["x"]) > int(myLoc["x"]): actions.append(3) # Move Right elif int(move["x"]) < int(myLoc["x"]): actions.append(2) # Move Left elif int(move["y"]) > int(myLoc["y"]): actions.append(1) # Move Down elif int(move["y"]) < int(myLoc["y"]): actions.append(0) # Move Up actions_len = len(actions) random_i = np.random.randint(0, actions_len) act_json = utils.convert_action_to_server(actions[random_i]) self.tcp.write(act_json) elif json_message["type"] == "GameOverEvent": pass elif json_message["type"] == "RoomStateResponse": if len(json_message["players"]) == 2: rest_api.start_game(self.game_id) def headline_print(self, text): print("====================================================") print(text) print("====================================================")
class QuoridorEnv(gym.Env): """ players: { index: number, start_location: number, name: string, targets: [number] } """ def __init__(self, game_id, player_name, is_not_tcp): self.game_id = game_id self.player_name = player_name self.is_my_turn = False self.winner_status = GameWinnerStatus.NoWinner self.last_turn_illegal = False self.action_options = [] self.winning_points_dim = np.zeros(shape=(9, 9), dtype=int) # join_game(self.game_id, self.player_name) if not is_not_tcp: self.tcp = TCP(game_id, player_name, self.on_recieved) self.wait_for_my_turn() self.action_space = spaces.Discrete(Global.num_of_actions) self.observation_space = spaces.Tuple(( spaces.MultiBinary([9, 9]), spaces.MultiBinary([9, 9]), spaces.MultiBinary([9, 9]), spaces.MultiBinary([9, 9]), spaces.MultiBinary([9, 9]) )) self.seed() # Start the first game #self.reset() def step(self, action): assert self.action_space.contains(action) action = int(action) self.update_board(action) self.wait_for_my_turn() reward, done = self.calculate_reward() self.is_my_turn = False # self.print_board() return self.board, reward, done, {} def wait_for_my_turn(self): while not self.is_my_turn: pass def reset(self): # self.board = self.init_board() return self.board def calculate_reward(self): reward = -1 done = False if self.last_turn_illegal: reward = -10 self.last_turn_illegal = False if self.winner_status != GameWinnerStatus.NoWinner: done = True if self.winner_status == GameWinnerStatus.EnvWinner: reward = 200 elif self.winner_status == GameWinnerStatus.EnvLoser: reward = -200 return reward, done def update_board(self, action): operation = utils.convert_action_to_server(action) self.send_to_server(operation) # WAITING def print_board(self): os.system('cls') print('------- TEAM 600 --------') arrays = np.dsplit(self.board, 4) for y in range(9): for x in range(9): print('|', end='') if arrays[0][y][x] != 0: print(1, end='') elif arrays[1][y][x] != 0: print(2, end='') else: print(' ', end='') if arrays[3][y][x] != 0: print('|', end='') elif y != 0 & arrays[3][y - 1][x] != 0: print('|', end='') else: print(' ', end='') print('') for x_wall in range(9): if arrays[2][y][x_wall] != 0: print(' ==', end='') elif x_wall != 0 & arrays[2][y][x_wall - 1] != 0: print(' ==', end='') else: print(' __', end='') print('') def get_and_convert_board(self): board = json.loads(get_board(self.game_id).content) return convert_board(board, self.winning_points_dim) def send_to_server(self, operation): self.tcp.write(operation) def on_recieved(self, json_message): if json_message["type"] == "IllegalMove": # self.is_my_turn = True self.last_turn_illegal = True elif json_message["type"] == "NewTurnEvent": if json_message["nextPlayerToPlay"] == self.player_name: self.board = self.get_and_convert_board() self.is_my_turn = True self.update_action_options(json_message) elif json_message["type"] == "GameOverEvent": self.is_my_turn = True if json_message["winnerName"] == self.player_name: self.winner_status = GameWinnerStatus.EnvWinner else: self.winner_status = GameWinnerStatus.EnvLoser elif json_message["type"] == "StartGameMessage": self.update_winning_locations(json_message["players"]) def action_shape(self): return action_shape() def observation_shape(self): return observation_shape() def update_action_options(self, moves_json): self.action_options = [] myLoc = moves_json["currentPosition"] for move in moves_json["avialiableMoves"]: if int(move["x"]) > int(myLoc["x"]): self.action_options.append(3) # Move Right elif int(move["x"]) < int(myLoc["x"]): self.action_options.append(2) # Move Left elif int(move["y"]) > int(myLoc["y"]): self.action_options.append(1) # Move Down elif int(move["y"]) < int(myLoc["y"]): self.action_options.append(0) # Move Up for wall in moves_json["availableWalls"]: wall_action = 4 + wall["position"]["x"] + (8 * wall["position"]["y"]) if wall["wallDirection"] == "Down": wall_action += 64 self.action_options.append(wall_action) def get_action_options(self): return self.action_options def update_winning_locations(self, players): for player in players: if player["name"] == self.player_name: for loc in player["endLine"]: x = int(loc["x"]) y = int(loc["y"]) self.winning_points_dim[y, x] = 1 print(self.winning_points_dim)