def test_output_size(self): policy19 = CNNPolicy(["board", "liberties", "sensibleness", "capture_size"], board=19) output = policy19.forward(policy19.preprocessor.state_to_tensor(GameState(19))) self.assertEqual(output.shape, (1, 19 * 19)) policy13 = CNNPolicy(["board", "liberties", "sensibleness", "capture_size"], board=13) output = policy13.forward(policy13.preprocessor.state_to_tensor(GameState(13))) self.assertEqual(output.shape, (1, 13 * 13))
def test_save_load(self): policy = CNNPolicy(["board", "liberties", "sensibleness", "capture_size"]) model_file = 'TESTPOLICY.json' weights_file = 'TESTWEIGHTS.h5' model_file2 = 'TESTPOLICY2.json' weights_file2 = 'TESTWEIGHTS2.h5' # test saving model/weights separately policy.save_model(model_file) policy.model.save_weights(weights_file, overwrite=True) # test saving them together policy.save_model(model_file2, weights_file2) copypolicy = CNNPolicy.load_model(model_file) copypolicy.model.load_weights(weights_file) copypolicy2 = CNNPolicy.load_model(model_file2) for w1, w2 in zip(copypolicy.model.get_weights(), copypolicy2.model.get_weights()): self.assertTrue(np.all(w1 == w2)) os.remove(model_file) os.remove(weights_file) os.remove(model_file2) os.remove(weights_file2)
def test_batch_eval_state(self): policy = CNNPolicy( ["board", "liberties", "sensibleness", "capture_size"]) results = policy.batch_eval_state([GameState(), GameState()]) self.assertEqual(len(results), 2) # one result per GameState self.assertEqual(len(results[0]), 361) # each one has 361 (move,prob) pairs
def test_probabilistic_player(self): gs = GameState() policy = CNNPolicy(["board", "ones", "turns_since"]) player = ProbabilisticPolicyPlayer(policy) for i in range(20): move = player.get_move(gs) self.assertIsNotNone(move) gs.do_move(move)
def test_greedy_player(self): gs = GameState() policy = CNNPolicy(["board", "ones", "turns_since"]) player = GreedyPolicyPlayer(policy) for _ in range(20): move = player.get_move(gs) self.assertNotEqual(move, go.PASS) gs.do_move(move)
def test_sensible_greedy(self): gs = GameState() policy = CNNPolicy(["board", "ones", "turns_since"]) player = GreedyPolicyPlayer(policy) empty = (10, 10) for x in range(19): for y in range(19): if (x, y) != empty: gs.do_move((x, y), go.BLACK) gs.current_player = go.BLACK self.assertIsNone(player.get_move(gs))
def test_sensible_probabilistic(self): gs = GameState() policy = CNNPolicy(["board", "ones", "turns_since"]) player = ProbabilisticPolicyPlayer(policy) empty = (10, 10) for x in range(19): for y in range(19): if (x, y) != empty: gs.do_move((x, y), go.BLACK) gs.set_current_player(go.BLACK) self.assertEqual(player.get_move(gs), go.PASS)
def test_save_load(self): policy = CNNPolicy( ["board", "liberties", "sensibleness", "capture_size"]) model_file = 'TESTPOLICY.json' weights_file = 'TESTWEIGHTS.h5' policy.save_model(model_file) policy.model.save_weights(weights_file) copypolicy = CNNPolicy.load_model(model_file) copypolicy.model.load_weights(weights_file) os.remove(model_file) os.remove(weights_file)
def test_default_policy(self): policy = CNNPolicy(["board", "liberties", "sensibleness", "capture_size"]) policy.eval_state(GameState())
from AlphaGo.training.supervised_policy_trainer import run_training from AlphaGo.models.policy import CNNPolicy from cProfile import Profile architecture = {'filters_per_layer': 128, 'layers': 12} features = ['board', 'ones', 'turns_since'] policy = CNNPolicy(features, **architecture) policy.save_model('model.json') profile = Profile() # --epochs 5 --minibatch 32 --learning-rate 0.01 arguments = ('model.json', 'debug_feature_planes.hdf5', 'training_results/', 5, 32, .01) def run_supervised_policy_training(): run_training(*arguments) profile.runcall(run_supervised_policy_training) profile.dump_stats('supervised_policy_training_bench_results.prof')