Ejemplo n.º 1
0
    def __init__(self,
                 test_agent,
                 num_episodes,
                 log_dir,
                 out_file="test_outcomes.csv",
                 verbose=False):
        self.test_agent = test_agent
        self.num_episodes = num_episodes
        self.log_dir = log_dir
        self.out_file = out_file
        self.verbose = verbose
        self.random_agent_first = RandomAgent()
        self.random_agent_second = RandomAgent()
        self.minmax_agent_first = MinMaxAgent()
        self.minmax_agent_second = MinMaxAgent()
        self.res_random_first = []
        self.res_random_second = []
        self.res_minmax_first = []
        self.res_minmax_second = []
        self.res_self = []
        self.current_score = None
        self.best_score = self.INVALID_SCORE * 100
        self.current_idx = 0
        self.best_score_idx = None
        self.best_train_episode = None
        self.plot = None
        self.x_values = []
        self.scores = []
        self.all_board_states = []
        self.n_self_episodes = num_episodes

        with open(os.path.join(self.log_dir, 'params.json'), 'r') as f:
            self.train_params = json.load(f, object_pairs_hook=OrderedDict)
Ejemplo n.º 2
0
        return self.get_2d_flat_obs(obs)

    @staticmethod
    def get_2d_flat_obs(obs):
        new_obs = np.zeros(ObsRawTo2DFlat.SHAPE, dtype=np.uint8)

        for i, state in enumerate(obs):
            y = int(i / TicTacToeEnv.BOARD_LENGTH)
            x = i - y * TicTacToeEnv.BOARD_LENGTH
            new_obs[0][y][x] = state * 127

        return new_obs


if __name__ == "__main__":
    env = TicTacToeEnv(RandomAgent(), player_one_char='-')
    env = ObsRawTo2D(env)
    print("Checking environment...")
    check_env(env, warn=True)

    print("Observation space:", env.observation_space)
    print("Shape:", env.observation_space.shape)
    print("Observation space high:", env.observation_space.high)
    print("Observation space low:", env.observation_space.low)

    print("Action space:", env.action_space)

    obs = env.reset()
    done = False

    step = 1
Ejemplo n.º 3
0
from gym_tictactoe.envs.tictactoe_env import TicTacToeEnv, OBS_FORMAT_2D
from gym_tictactoe.agents.min_max_agent import MinMaxAgent
from gym_tictactoe.agents.random_agent import RandomAgent

from utils.hyperparams import P_CHAR, REWARDS, N_REPEATS, NET_ARCH, OBS_FORMAT, FILTER_SIZES, PADS, GAMMA
from utils.hyperparams import NET_ARCH_2D
from utils.utils import filter_tf_warnings

from train import train

filter_tf_warnings()

ALG = DQN

ENV_AGENT = RandomAgent()

SELF_PLAY = True

TRAIN_EPISODES = 40000

EVAL_FREQ = [int(TRAIN_EPISODES / 10)]

P_CHAR = '-'

ENV_EXP = [0.0, 0.2, 0.5]

N_ENVS = 1

total_trainings = len(OBS_FORMAT) * len(REWARDS) * len(GAMMA) * \
    len(ENV_EXP) * len(NET_ARCH) * len(FILTER_SIZES) * len(PADS) * \
Ejemplo n.º 4
0
def main():
    parser = argparse.ArgumentParser(
        description=
        'Train a model, plot training and testing results, and save the model.'
    )
    parser.add_argument('-a',
                        '--alg',
                        type=str,
                        default='DQN',
                        help='Algorithm name. PPO2 or DQN (default: DQN)')
    parser.add_argument('-e',
                        '--episodes',
                        type=int,
                        default=10000,
                        help='Training Episodes (default: 10000)')
    parser.add_argument('-f',
                        '--freq',
                        type=int,
                        default=2000,
                        help='Evaluation Frequency (default: 2000)')
    parser.add_argument("-tb",
                        "--tensorboard",
                        default=None,
                        help="Tensorboard logdir. (default: None)")
    parser.add_argument(
        '-p',
        '--player_one',
        type=str,
        default='-',
        help=
        'X for the agent, O for the environment, or - for randomly choosing in each train episode (default: -)'
    )
    parser.add_argument('-g',
                        '--gamma',
                        type=float,
                        default=1.0,
                        help='Gamma (default: 1.0)')
    parser.add_argument("-r",
                        "--random_agent",
                        action="store_true",
                        help='Train vs Random agent (default: Train vs Self)')
    parser.add_argument("-m",
                        "--min_max",
                        action="store_true",
                        help='Train vs MinMax agent (default: Train vs Self)')
    parser.add_argument(
        "-o",
        "--one_hot",
        action="store_true",
        help=
        'Use one hot encoded observations (Mlp) (default: Use 2D observations (Cnn))'
    )
    parser.add_argument(
        '-n',
        '--n_envs',
        type=int,
        default=8,
        help='Number of parallel environments when using PPO2 (default: 8)')
    args = parser.parse_args()

    alg = get_alg(args.alg)

    if not alg:
        print("Algorithm not found.")
        exit(1)

    env_agent = RandomAgent()

    self_play = True

    if args.random_agent:
        self_play = False

    if args.min_max:
        env_agent = MinMaxAgent()
        self_play = False

    obs_format = OBS_FORMAT_2D

    if args.one_hot:
        obs_format = OBS_FORMAT_ONE_HOT

    filter_tf_warnings()

    train(alg,
          obs_format,
          env_agent,
          self_play,
          train_episodes=args.episodes,
          eval_freq=args.freq,
          player_one_char=args.player_one,
          gamma=args.gamma,
          n_envs=args.n_envs,
          tensorboard_log=args.tensorboard)
Ejemplo n.º 5
0
from datetime import datetime
from collections import OrderedDict

from gym_tictactoe.envs.tictactoe_env import TicTacToeEnv
from gym_tictactoe.agents.random_agent import RandomAgent
from gym_tictactoe.agents.min_max_agent import MinMaxAgent

from utils.test_utils import AgentTestFramework

if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument('-e', '--episodes', type=int, default=100000)
    parser.add_argument("-v", "--verbose", action="store_true")
    args = parser.parse_args()

    agents = [RandomAgent(), MinMaxAgent(side=TicTacToeEnv.CROSS)]

    for agent in agents:

        now = datetime.now()

        log_dir = "logs_minmax_random/{}_{}_{}".format(
            now.strftime('%Y%m%d-%H%M%S'), agent.name, args.episodes)

        os.makedirs(log_dir, exist_ok=True)

        print("\nlog_dir:", log_dir)

        params = OrderedDict()
        params['agent'] = agent.name
        params['num_episodes'] = args.episodes