def test_rand_vs_rand(self): args1 = utils.hyperparameters(['--size=5', '--model=rand']) args2 = utils.hyperparameters(['--size=5', '--model=rand']) win_rate, _, _ = utils.multi_proc_play(args1, args2, self.num_games, workers=8) self.assertAlmostEqual(win_rate, 0.5, delta=0.1)
def test_mctac_vs_ac(self): args1 = utils.hyperparameters( ['--size=9', '--model=ac', '--mcts=81', '--baseline']) args2 = utils.hyperparameters( ['--size=9', '--model=ac', '--mcts=0', '--baseline']) win_rate, _, _ = utils.multi_proc_play(args1, args2, self.num_games, workers=8) self.assertGreaterEqual(win_rate, 0.6)
def test_mctval_vs_val(self): args1 = utils.hyperparameters( ['--size=7', '--mcts=128', '--temp=1', '--baseline']) args2 = utils.hyperparameters( ['--size=7', '--mcts=0', '--temp=0.05', '--baseline']) win_rate, _, _ = utils.multi_proc_play(args1, args2, self.num_games, workers=8) self.assertGreaterEqual(win_rate, 0.6)
def test_mct_vs_rand(self): args1 = utils.hyperparameters([ '--size=5', '--model=greedy', '--mcts=10', '--temp=1', '--baseline' ]) args2 = utils.hyperparameters(['--size=5', '--model=rand']) win_rate, _, _ = utils.multi_proc_play(args1, args2, self.num_games, workers=8) self.assertGreaterEqual(win_rate, 0.6)
def test_checkpoint(self): args1 = utils.hyperparameters([ '--size=5', '--depth=0', '--temp=0.1', '--customdir=bin/checkpoints/2020-01-20/' ]) args2 = utils.hyperparameters( ['--size=5', '--depth=0', '--temp=0.1', '--baseline']) win_rate, _, _ = utils.multi_proc_play(args1, args2, self.num_games, workers=8) self.assertGreaterEqual(win_rate, 0.6)
def test_smartgreed_vs_greed(self): args1 = utils.hyperparameters([ '--size=5', '--model=smartgreedy', '--depth=0', '--temp=0.1', '--baseline' ]) args2 = utils.hyperparameters([ '--size=5', '--model=greedy', '--depth=0', '--temp=0.1', '--baseline' ]) win_rate, _, _ = utils.multi_proc_play(args1, args2, self.num_games, workers=8) self.assertGreaterEqual(win_rate, 0.6)
def test_greedy_obvious_move(self): args = utils.hyperparameters(['--size=5', '--model=greedy']) self.set_obvious_move_state() # Obvious move is to pass and win the game policy, _ = utils.baselines.create_policy(args) pi = policy(self.env) self.assertEqual(pi[-1], 1) self.assertTrue(np.allclose(pi[:-1], 0))
import gym import go_ai.policies.baselines from go_ai import game, utils args = utils.hyperparameters(['customdir=bin/baselines/']) # Environment go_env = gym.make('gym_go:go-v0', size=args.size) # Policies policy, model = go_ai.policies.baselines.create_policy(args, 'Checkpoint') print(f"Loaded model {policy}") human_pi = go_ai.policies.baselines.Human(args.render) # Play go_env.reset() game.pit(go_env, policy, human_pi)
go_env.reset() for a in traj.actions: greedy_actions = [] for policy in policies: pi = policy(go_env) greedy_actions.append(np.argmax(pi)) all_greedy_actions.append(greedy_actions) go_env.step(a) queue.put(np.array(all_greedy_actions, dtype=int)) if __name__ == '__main__': all_args = [] for arg_str in arg_strs: args = utils.hyperparameters(arg_str.split()) all_args.append(args) _, _, replay = utils.multi_proc_play(all_args[0], all_args[0], 16) greedy_actions = evaluate_greedy_actions(all_args, replay) best_actions = greedy_actions[:, -1] stats_str = '' for i in range(len(all_args) - 1): agreement = np.mean(greedy_actions[:, i] == best_actions) stats_str += (f'{100 * agreement:.1f}% ') print(stats_str)
import os import gym from go_ai.policies import actorcritic, baselines, value import go_ai.search.plot from go_ai import measurements, utils utils.config_log() args = utils.hyperparameters() # Environment go_env = gym.make('gym_go:go-v0', size=args.size) if args.baseline: outdir = 'bin/baselines/' else: outdir = args.customdir # Policies policy, model = baselines.create_policy(args, 'Model') # Directories and files plotsdir = os.path.join(outdir, f'{args.model}{args.size}_plots/') if not os.path.exists(plotsdir): os.mkdir(plotsdir) stats_path = os.path.join(outdir, f'{args.model}{args.size}_stats.txt') # Plot stats if os.path.exists(stats_path):