コード例 #1
0
def test_step_with_action_taken_by_opponent():
    mock_opponent = Mock()
    env = TicTacToeEnv(mock_opponent)
    env.reset()

    attrs = {
        "select_action.side_effect": [1, 3],
        "get_player_number.return_value": 2
    }
    mock_opponent.configure_mock(**attrs)

    env.step(0)
    with pytest.raises(ValueError):
        env.step(1)
コード例 #2
0
def _play_out(agent_actions, opponent_actions, expected_reward):
    mock_opponent = Mock()
    env = TicTacToeEnv(mock_opponent)
    _, is_done = env.reset()

    attrs = {
        "select_action.side_effect": opponent_actions,
        "get_player_number.return_value": 2
    }
    mock_opponent.configure_mock(**attrs)

    expected_state = np.zeros((3, 3), dtype=np.int)
    i = 0

    # call env.step until the end
    while not is_done:

        state, reward, is_done = env.step(agent_actions[i])

        expected_state[agent_actions[i] // 3, agent_actions[i] % 3] = PLAYER1
        if i < len(opponent_actions):
            expected_state[opponent_actions[i] // 3,
                           opponent_actions[i] % 3] = PLAYER2
        if not is_done:
            assert np.array_equal(state, expected_state)
            assert reward == 0
            mock_opponent.select_action.assert_called_once()
            mock_opponent.reset_mock()
        i += 1

    assert np.array_equal(state, expected_state)
    assert i == len(agent_actions)
    assert reward == expected_reward
    if i > len(opponent_actions):
        mock_opponent.select_action.assert_not_called()
    else:
        mock_opponent.select_action.assert_called_once()
コード例 #3
0
def main():
    use_sarsa = True if len(sys.argv) >= 2 and '--sarsa' in sys.argv else False

    num_episodes = 3000
    env = TicTacToeEnv(TicTacToeDecentAlgorithmPlayer(2, 1))
    agent = TicTacToeSarsaAgent() if use_sarsa else TicTacToeQLearningAgent()
    epsilon = Epsilon(1.0, 0.95)
    rewards = []
    moves = []

    def before_episode_callback(env, agent, episode_number):
        agent.set_epsilon(epsilon.value)

    def after_episode_callback(env, agent, episode_number, reward):
        epsilon.decay()
        rewards.append(reward)
        moves.append(env.get_moves())

    if use_sarsa:
        experiment = SarsaExperiment(env, agent, before_episode_callback,
                                     after_episode_callback)
        print("using SARSA")
    else:
        experiment = QLearningExperiment(env, agent, before_episode_callback,
                                         after_episode_callback)
        print("using Q-Learning")

    experiment.experiment(num_episodes)

    num_games_to_analyze = 100
    analyzer = TicTacToeMoveAnalyzer(moves, rewards, num_games_to_analyze)
    num_wins = analyzer.num_wins()
    num_corner_openings = analyzer.num_corner_openings()
    rl_method = "SARSA" if use_sarsa else "Q-Learning"
    text = "{0} wins and {1} corner openings in the last {2} games.".format(
        num_wins, num_corner_openings, num_games_to_analyze)

    TicTacToePlotter.plot_episode_reward(rewards, text, rl_method)
コード例 #4
0
            action = np.r_[user_type, move_target]
            self._reset_step()
            return action
        else:
            pi = self.model.get_pi(state)
            choice = self.np_random.choice(
                9, 1, p=pi.flatten(), replace=False)
            move_target = self.action_space[choice[0]]
            action = np.r_[PLAYER, move_target]
            self._reset_step()
            return action


if __name__ == "__main__":
    # 환경 생성 및 시드 설정
    env = TicTacToeEnv()
    env.seed(2018)
    # 에이전트 생성 및 시드 생성
    my_agent = ZeroAgent()
    my_agent.seed(2018)
    # 통계용
    result = {1: 0, 0: 0, -1: 0}
    # play game
    for e in range(episode_count):
        state = env.reset()
        print('-' * 15, '\nepisode: %d' % (e + 1))
        # 첫턴을 나와 상대 중 누가 할지 정하기
        my_agent.first_turn = my_agent.np_random.choice(2, replace=False)
        done = False
        while not done:
            # action 선택하기 (셀프 모드)
コード例 #5
0
from tictactoe_env import TicTacToeEnv as MEnv  #main env
import random
import tkinter as tk
#import sys
#import pdb

env = MEnv(19, 650, 650)
arraybackup = env.action_space
for epoch in range(0, 2):
    env.render()
    #print("reset")
    root = env.reset()
    arrayCount = 0
    for ev in range(0, 361):
        action = random.choice(random.choice(env.action_space))
        if action == None:
            if arrayCount < 361:
                while action is None:
                    action = random.choice(random.choice(env.action_space))
            else:
                env.action_space = arraybackup
                continue
        a, b = action
        arrayCount += 1
        env.action_space[a][b] = None
        observation, reword, done, info = env.step(action)
        #print(done)
        if done == True:
            #print("True, reset")
            continue
        root.update()
コード例 #6
0
from tictactoe_env import TicTacToeEnv as MEnv  #main env
#import tictactoe_env
#import random
#import tkinter as tk
#import pdb
#import numpy as np
import bot
import pickle

env = MEnv(19, 650, 650)
ateration = 0
reword = 0
epochs = 10
layers = None
done = False
Gametype = "EvE"  #, "PvP", "EvP"#
try:
    with open('weights.pickle', 'rb') as weights:
        layers = pickle.load(weights)
        print(len(layers))
except:
    pass

env.render()
root = env.reset()

if Gametype == "EvE":
    for epoch in range(0, epochs):
        env.render()
        root = env.reset()
        #print("after reset")
コード例 #7
0
def test_reset():
    mock_opponent = Mock()
    env = TicTacToeEnv(mock_opponent)
    state, is_done = env.reset()
    assert np.array_equal(state, np.zeros((3, 3), dtype=np.int))
    assert is_done == False
コード例 #8
0
def test_did_win():
    mock_opponent = Mock()
    env = TicTacToeEnv(mock_opponent)

    for player_a, player_b in [(PLAYER1, PLAYER2), (PLAYER2, PLAYER1)]:
        # test rows
        for i in range(3):
            env.reset()
            env.board[i, :] = player_a
            assert env._did_win(player_a)
            assert not env._did_win(player_b)

        # test columns
        for i in range(3):
            env.reset()
            env.board[:, i] = player_a
            assert env._did_win(player_a)
            assert not env._did_win(player_b)

        # test diagonals
        env.reset()
        env.board[0, 0] = player_a
        env.board[1, 1] = player_a
        env.board[2, 2] = player_a
        assert env._did_win(player_a)
        assert not env._did_win(player_b)

        env.reset()
        env.board[0, 2] = player_a
        env.board[1, 1] = player_a
        env.board[2, 0] = player_a
        assert env._did_win(player_a)
        assert not env._did_win(player_b)

    # test no winners
    env.reset()
    assert not env._did_win(PLAYER1)
    assert not env._did_win(PLAYER2)

    env.board[0, 0] = PLAYER1
    assert not env._did_win(PLAYER1)
    assert not env._did_win(PLAYER2)

    env.board[0, 1] = PLAYER1
    assert not env._did_win(PLAYER1)
    assert not env._did_win(PLAYER2)

    env.board[0, 2] = PLAYER2
    assert not env._did_win(PLAYER1)
    assert not env._did_win(PLAYER2)

    env.board[1, 0] = PLAYER2
    assert not env._did_win(PLAYER1)
    assert not env._did_win(PLAYER2)

    env.board[1, 1] = PLAYER2
    assert not env._did_win(PLAYER1)
    assert not env._did_win(PLAYER2)

    env.board[1, 2] = PLAYER1
    assert not env._did_win(PLAYER1)
    assert not env._did_win(PLAYER2)

    env.board[2, 0] = PLAYER1
    assert not env._did_win(PLAYER1)
    assert not env._did_win(PLAYER2)

    env.board[2, 1] = PLAYER2
    assert not env._did_win(PLAYER1)
    assert not env._did_win(PLAYER2)

    env.board[2, 2] = PLAYER2
    assert not env._did_win(PLAYER1)
    assert not env._did_win(PLAYER2)
コード例 #9
0
def test_get_moves():
    mock_opponent = Mock()
    env = TicTacToeEnv(mock_opponent)
    env.reset()
    assert not env.get_moves()

    attrs = {
        "select_action.side_effect": [3, 4, 2, 7],
        "get_player_number.return_value": 2
    }
    mock_opponent.configure_mock(**attrs)

    env.step(0)
    assert env.get_moves() == [0, 3]

    env.step(1)
    env.step(5)
    env.step(6)
    env.step(8)
    assert env.get_moves() == [0, 3, 1, 4, 5, 2, 6, 7, 8]

    env.reset()
    assert not env.get_moves()