def test_game_decrease(game): # Create "mock" state that ends after 20 moves with the learner losing lose_state = [MockState(go.WHITE, 20, size=19)] policy1 = CNNPolicy.load_model( os.path.join('tests', 'test_data', 'minimodel_policy.json')) policy2 = CNNPolicy.load_model( os.path.join('tests', 'test_data', 'minimodel_policy.json')) learner = MockPlayer(policy1, game) opponent = MockPlayer(policy2, game) optimizer = SGD(lr=0.001) policy1.model.compile(loss=log_loss, optimizer=optimizer) # Get initial (before learning) move probabilities for all moves made by black init_move_probs = get_sgf_move_probs(game, policy1, go.BLACK) init_probs = [prob for (mv, prob) in init_move_probs] # Run RL training run_n_games(optimizer, 0.001, learner, opponent, 1, mock_states=lose_state) # Get new move probabilities for black's moves having finished 1 round of training new_move_probs = get_sgf_move_probs(game, policy1, go.BLACK) new_probs = [prob for (mv, prob) in new_move_probs] # Assert that, on average, move probabilities for black decreased having lost. self.assertTrue( sum((new_probs[i] - init_probs[i]) for i in range(10)) < 0)
def run_and_get_new_weights(init_weights, winners, game): # Create "mock" states that end after 2 moves with a predetermined winner. states = [MockState(winner, 2, size=19) for winner in winners] policy1 = CNNPolicy.load_model( os.path.join('tests', 'test_data', 'minimodel_policy.json')) policy2 = CNNPolicy.load_model( os.path.join('tests', 'test_data', 'minimodel_policy.json')) policy1.model.set_weights(init_weights) optimizer = SGD(lr=0.001) policy1.model.compile(loss=log_loss, optimizer=optimizer) learner = MockPlayer(policy1, game) opponent = MockPlayer(policy2, game) # Run RL training run_n_games(optimizer, 0.001, learner, opponent, 2, mock_states=states) return policy1.model.get_weights()
def test_game_run_N(game): policy1 = CNNPolicy.load_model(os.path.join('tests', 'test_data', 'minimodel.json')) policy2 = CNNPolicy.load_model(os.path.join('tests', 'test_data', 'minimodel.json')) learner = MockPlayer(policy1, game) opponent = MockPlayer(policy2, game) optimizer = SGD() init_weights = policy1.model.get_weights() policy1.model.compile(loss=log_loss, optimizer=optimizer) # Run RL training run_n_games(optimizer, learner, opponent, 2) # Get new weights for comparison trained_weights = policy1.model.get_weights() # Assert that some parameters changed. any_change = any(not np.array_equal(i, t) for (i, t) in zip(init_weights, trained_weights)) self.assertTrue(any_change)