def load(): """ read input file and self storage """ N = 5 piece_type, previous_board, board = FileHandler.readInput(N) q_table = FileHandler.readQTable() player = GreedyPlayer(q_table) action = player.get_input(piece_type, previous_board, board) return action
def test_evaluation(self): verbose = True q_table = FileHandler.read_q_table(path="../QLearner/QTable.pkl") my_player = QLearningPlayer(q_table=q_table, debug=False) previous_board = [[0, 0, 0, 0, 0], [0, 0, 0, 2, 0], [0, 0, 1, 0, 0], [0, 0, 0, 0, 0], [0, 0, 0, 0, 0]] previous_board_int = GameBoard.encode_board(previous_board) board = [[0, 0, 0, 0, 0], [0, 0, 1, 2, 0], [0, 0, 1, 0, 0], [0, 0, 0, 0, 0], [0, 0, 0, 0, 0]] board_int = GameBoard.encode_board(board) valid_moves = GameBoard.get_valid_moves(previous_board_int, board_int, 2) print(my_player._check_q_table(board_int, valid_moves)) print(my_player.get_input(2, previous_board, board))
def test_against_self(self): num = 100 verbose = False q_table = FileHandler.read_q_table(path="../QLearner/QTable.pkl") my_player = QLearningPlayer(q_table=q_table, debug=False) my_player2 = QLearningPlayer(q_table=q_table, debug=False) print('Training MyPlayer against Greedy player for {} times......'. format(num)) go.battle(my_player, my_player2, num, learn=True, show_result=True, verbose=verbose) go.battle(my_player2, my_player, num, learn=True, show_result=True, verbose=verbose)
return False def _store_encoded_state(self, encoded_board, piece_type, value, action): # pass if encoded_board in self.history: self.history[encoded_board][piece_type] = (value, action) else: self.history[encoded_board] = {piece_type: (value, action)} def _get_valid_moves(self, previous_board_int, board_int, piece_type): moves = GameBoard.get_valid_moves(previous_board_int, board_int, piece_type) return moves def load(): """ read input file and self storage """ N = 5 piece_type, previous_board, board = FileHandler.readInput(N) q_table = FileHandler.readQTable() player = GreedyPlayer(q_table) action = player.get_input(piece_type, previous_board, board) return action if __name__ == "__main__": action = load() FileHandler.writeOutput(action)
move = (i, j) return curr_max, move def _check_encoded_state(self, encoded_board, piece_type): # return False if encoded_board in self.history: if piece_type in self.history[encoded_board]: return self.history[encoded_board][piece_type] return False def _store_encoded_state(self, encoded_board, piece_type, value, action): # pass if encoded_board in self.history: self.history[encoded_board][piece_type] = (value, action) else: self.history[encoded_board] = {piece_type: (value, action)} def _get_valid_moves(self, previous_board_int, board_int, piece_type): moves = GameBoard.get_valid_moves(previous_board_int, board_int, piece_type) return moves if __name__ == "__main__": piece_type, previous_board, board = FileHandler.read_input(5) q_table = FileHandler.read_q_table() player = QLearningPlayer(q_table=q_table) action = player.get_input(piece_type, previous_board, board) # FileHandler.writeQTable(player.history) FileHandler.write_output(action)
self.learn_from_history(self.aggressive_win_table, WIN_REWARD) # self.learn_from_history(self.aggressive_lose_table, LOSS_REWARD) def learn_from_history(self, history, reward, alpha=0.7, gamma=1.0): history.reverse() max_q_value = -1.0 for hist in history: state, move = hist if state == 0: max_q_value = -1.0 self.Q(state) q = self.q_values[state] if max_q_value < 0: q[move[0]][move[1]] = reward else: q[move[0]][move[1]] = q[move[0]][move[1]] * ( 1 - alpha) + alpha * (gamma * max_q_value) max_q_value = np.max(q) print(max_q_value) if __name__ == "__main__": learner = Learner() learner.learn_from_champion() # learner.learn_from_aggressive() # print(learner.q_values) FileHandler.write_q_table(learner.q_values) print(FileHandler.read_q_table()) print(len(learner.q_values.keys()))
def test_write_qtable(self): temp = {6661: 456456} FileHandler.write_q_table(temp)
def test_read_qtable2(self): temp = FileHandler.read_q_table(path="QTable_from_run.pkl") print(temp)
def test_read_qtable(self): temp = FileHandler.read_q_table() print(temp)