コード例 #1
0
def load():
    """
    read input file and self storage
    """
    N = 5
    piece_type, previous_board, board = FileHandler.readInput(N)
    q_table = FileHandler.readQTable()
    player = GreedyPlayer(q_table)
    action = player.get_input(piece_type, previous_board, board)

    return action
 def test_evaluation(self):
     verbose = True
     q_table = FileHandler.read_q_table(path="../QLearner/QTable.pkl")
     my_player = QLearningPlayer(q_table=q_table, debug=False)
     previous_board = [[0, 0, 0, 0, 0], [0, 0, 0, 2, 0], [0, 0, 1, 0, 0],
                       [0, 0, 0, 0, 0], [0, 0, 0, 0, 0]]
     previous_board_int = GameBoard.encode_board(previous_board)
     board = [[0, 0, 0, 0, 0], [0, 0, 1, 2, 0], [0, 0, 1, 0, 0],
              [0, 0, 0, 0, 0], [0, 0, 0, 0, 0]]
     board_int = GameBoard.encode_board(board)
     valid_moves = GameBoard.get_valid_moves(previous_board_int, board_int,
                                             2)
     print(my_player._check_q_table(board_int, valid_moves))
     print(my_player.get_input(2, previous_board, board))
 def test_against_self(self):
     num = 100
     verbose = False
     q_table = FileHandler.read_q_table(path="../QLearner/QTable.pkl")
     my_player = QLearningPlayer(q_table=q_table, debug=False)
     my_player2 = QLearningPlayer(q_table=q_table, debug=False)
     print('Training MyPlayer against Greedy player for {} times......'.
           format(num))
     go.battle(my_player,
               my_player2,
               num,
               learn=True,
               show_result=True,
               verbose=verbose)
     go.battle(my_player2,
               my_player,
               num,
               learn=True,
               show_result=True,
               verbose=verbose)
コード例 #4
0
        return False

    def _store_encoded_state(self, encoded_board, piece_type, value, action):
        # pass
        if encoded_board in self.history:
            self.history[encoded_board][piece_type] = (value, action)
        else:
            self.history[encoded_board] = {piece_type: (value, action)}

    def _get_valid_moves(self, previous_board_int, board_int, piece_type):
        moves = GameBoard.get_valid_moves(previous_board_int, board_int, piece_type)
        return moves


def load():
    """
    read input file and self storage
    """
    N = 5
    piece_type, previous_board, board = FileHandler.readInput(N)
    q_table = FileHandler.readQTable()
    player = GreedyPlayer(q_table)
    action = player.get_input(piece_type, previous_board, board)

    return action


if __name__ == "__main__":
    action = load()
    FileHandler.writeOutput(action)
コード例 #5
0
                        move = (i, j)
        return curr_max, move

    def _check_encoded_state(self, encoded_board, piece_type):
        # return False
        if encoded_board in self.history:
            if piece_type in self.history[encoded_board]:
                return self.history[encoded_board][piece_type]
        return False

    def _store_encoded_state(self, encoded_board, piece_type, value, action):
        # pass
        if encoded_board in self.history:
            self.history[encoded_board][piece_type] = (value, action)
        else:
            self.history[encoded_board] = {piece_type: (value, action)}

    def _get_valid_moves(self, previous_board_int, board_int, piece_type):
        moves = GameBoard.get_valid_moves(previous_board_int, board_int,
                                          piece_type)
        return moves


if __name__ == "__main__":
    piece_type, previous_board, board = FileHandler.read_input(5)
    q_table = FileHandler.read_q_table()
    player = QLearningPlayer(q_table=q_table)
    action = player.get_input(piece_type, previous_board, board)
    # FileHandler.writeQTable(player.history)
    FileHandler.write_output(action)
コード例 #6
0
        self.learn_from_history(self.aggressive_win_table, WIN_REWARD)
        # self.learn_from_history(self.aggressive_lose_table, LOSS_REWARD)

    def learn_from_history(self, history, reward, alpha=0.7, gamma=1.0):
        history.reverse()
        max_q_value = -1.0
        for hist in history:
            state, move = hist
            if state == 0:
                max_q_value = -1.0
            self.Q(state)
            q = self.q_values[state]
            if max_q_value < 0:
                q[move[0]][move[1]] = reward
            else:
                q[move[0]][move[1]] = q[move[0]][move[1]] * (
                    1 - alpha) + alpha * (gamma * max_q_value)
            max_q_value = np.max(q)
            print(max_q_value)


if __name__ == "__main__":
    learner = Learner()
    learner.learn_from_champion()
    # learner.learn_from_aggressive()
    # print(learner.q_values)

    FileHandler.write_q_table(learner.q_values)
    print(FileHandler.read_q_table())
    print(len(learner.q_values.keys()))
コード例 #7
0
 def test_write_qtable(self):
     temp = {6661: 456456}
     FileHandler.write_q_table(temp)
コード例 #8
0
 def test_read_qtable2(self):
     temp = FileHandler.read_q_table(path="QTable_from_run.pkl")
     print(temp)
コード例 #9
0
 def test_read_qtable(self):
     temp = FileHandler.read_q_table()
     print(temp)