Example #1
0
    def test_rand_vs_rand(self):
        args1 = utils.hyperparameters(['--size=5', '--model=rand'])
        args2 = utils.hyperparameters(['--size=5', '--model=rand'])

        win_rate, _, _ = utils.multi_proc_play(args1,
                                               args2,
                                               self.num_games,
                                               workers=8)

        self.assertAlmostEqual(win_rate, 0.5, delta=0.1)
Example #2
0
    def test_mctac_vs_ac(self):
        args1 = utils.hyperparameters(
            ['--size=9', '--model=ac', '--mcts=81', '--baseline'])
        args2 = utils.hyperparameters(
            ['--size=9', '--model=ac', '--mcts=0', '--baseline'])

        win_rate, _, _ = utils.multi_proc_play(args1,
                                               args2,
                                               self.num_games,
                                               workers=8)

        self.assertGreaterEqual(win_rate, 0.6)
Example #3
0
    def test_mctval_vs_val(self):
        args1 = utils.hyperparameters(
            ['--size=7', '--mcts=128', '--temp=1', '--baseline'])
        args2 = utils.hyperparameters(
            ['--size=7', '--mcts=0', '--temp=0.05', '--baseline'])

        win_rate, _, _ = utils.multi_proc_play(args1,
                                               args2,
                                               self.num_games,
                                               workers=8)

        self.assertGreaterEqual(win_rate, 0.6)
Example #4
0
    def test_mct_vs_rand(self):
        args1 = utils.hyperparameters([
            '--size=5', '--model=greedy', '--mcts=10', '--temp=1', '--baseline'
        ])
        args2 = utils.hyperparameters(['--size=5', '--model=rand'])

        win_rate, _, _ = utils.multi_proc_play(args1,
                                               args2,
                                               self.num_games,
                                               workers=8)

        self.assertGreaterEqual(win_rate, 0.6)
Example #5
0
    def test_checkpoint(self):
        args1 = utils.hyperparameters([
            '--size=5', '--depth=0', '--temp=0.1',
            '--customdir=bin/checkpoints/2020-01-20/'
        ])
        args2 = utils.hyperparameters(
            ['--size=5', '--depth=0', '--temp=0.1', '--baseline'])

        win_rate, _, _ = utils.multi_proc_play(args1,
                                               args2,
                                               self.num_games,
                                               workers=8)

        self.assertGreaterEqual(win_rate, 0.6)
Example #6
0
    def test_smartgreed_vs_greed(self):
        args1 = utils.hyperparameters([
            '--size=5', '--model=smartgreedy', '--depth=0', '--temp=0.1',
            '--baseline'
        ])
        args2 = utils.hyperparameters([
            '--size=5', '--model=greedy', '--depth=0', '--temp=0.1',
            '--baseline'
        ])

        win_rate, _, _ = utils.multi_proc_play(args1,
                                               args2,
                                               self.num_games,
                                               workers=8)

        self.assertGreaterEqual(win_rate, 0.6)
Example #7
0
    def test_greedy_obvious_move(self):
        args = utils.hyperparameters(['--size=5', '--model=greedy'])

        self.set_obvious_move_state()

        # Obvious move is to pass and win the game
        policy, _ = utils.baselines.create_policy(args)
        pi = policy(self.env)
        self.assertEqual(pi[-1], 1)
        self.assertTrue(np.allclose(pi[:-1], 0))
Example #8
0
import gym

import go_ai.policies.baselines
from go_ai import game, utils

args = utils.hyperparameters(['customdir=bin/baselines/'])

# Environment
go_env = gym.make('gym_go:go-v0', size=args.size)

# Policies
policy, model = go_ai.policies.baselines.create_policy(args, 'Checkpoint')
print(f"Loaded model {policy}")

human_pi = go_ai.policies.baselines.Human(args.render)

# Play
go_env.reset()
game.pit(go_env, policy, human_pi)
Example #9
0
        go_env.reset()
        for a in traj.actions:
            greedy_actions = []
            for policy in policies:
                pi = policy(go_env)
                greedy_actions.append(np.argmax(pi))
            all_greedy_actions.append(greedy_actions)

            go_env.step(a)

    queue.put(np.array(all_greedy_actions, dtype=int))


if __name__ == '__main__':
    all_args = []

    for arg_str in arg_strs:
        args = utils.hyperparameters(arg_str.split())
        all_args.append(args)

    _, _, replay = utils.multi_proc_play(all_args[0], all_args[0], 16)

    greedy_actions = evaluate_greedy_actions(all_args, replay)

    best_actions = greedy_actions[:, -1]
    stats_str = ''
    for i in range(len(all_args) - 1):
        agreement = np.mean(greedy_actions[:, i] == best_actions)
        stats_str += (f'{100 * agreement:.1f}% ')
    print(stats_str)
Example #10
0
import os

import gym

from go_ai.policies import actorcritic, baselines, value
import go_ai.search.plot
from go_ai import measurements, utils

utils.config_log()
args = utils.hyperparameters()

# Environment
go_env = gym.make('gym_go:go-v0', size=args.size)

if args.baseline:
    outdir = 'bin/baselines/'
else:
    outdir = args.customdir

# Policies
policy, model = baselines.create_policy(args, 'Model')

# Directories and files
plotsdir = os.path.join(outdir, f'{args.model}{args.size}_plots/')
if not os.path.exists(plotsdir):
    os.mkdir(plotsdir)

stats_path = os.path.join(outdir, f'{args.model}{args.size}_stats.txt')

# Plot stats
if os.path.exists(stats_path):