def test_predict(self): game_root = Game() root = Node(game_root) model = MagicMock() prediction = [ np.array([[0.25]]), np.reshape(np.arange(0.001, 0.897, step=0.001), newshape=(1, 896)) ] model.predict.return_value = prediction action_encoder = ActionEncoder(DirectionResolver()) mcts = MCTS(root, config={ 'ALPHA': 0.8, 'CPUCT': 1, 'EPSILON': 0.2 }, model=model, state_encoder=StateEncoder(), action_encoder=action_encoder) value, probs, allowed_actions = mcts.predict_state_value(game_root) self.assertEqual(value, 0.25) self.assertCountEqual( allowed_actions, action_encoder.convert_moves_to_action_ids( game_root.get_possible_moves_from_current_player_perspective()) ) for idx, prob in enumerate(probs): if idx in allowed_actions: self.assertTrue(prob > 0.01) else: self.assertTrue(prob < np.exp(-40))
def test_convert_to_one_hot_3(self): encoder = ActionEncoder(DirectionResolver()) actual_action = encoder.convert_action_to_one_hot([1, 5]) expected_action = np.zeros((1, 32 * 4)) expected_action[0, 0] = 1 self.assertTrue( np.array_equal(actual_action.toarray(), expected_action))
def test_evaluate_leaf(self): game_root = Game() root = Node(game_root) model = MagicMock() prediction = [ np.array([[0.25]]), np.reshape(np.arange(0.001, 0.897, step=0.001), newshape=(1, 896)) ] model.predict.return_value = prediction action_encoder = ActionEncoder(DirectionResolver()) mcts = MCTS(root, config={ 'ALPHA': 0.8, 'CPUCT': 1, 'EPSILON': 0.2 }, model=model, state_encoder=StateEncoder(), action_encoder=action_encoder) _, probs, _ = mcts.predict_state_value(game_root) value = mcts.evaluate_leaf(root) self.assertEqual(value, 0.25) self.assertEqual(len(root.edges), 7) self.assertEqual(root.edges[0].action, 8) self.assertEqual(root.edges[0].stats['P'], probs[8]) self.assertEqual(root.edges[1].action, 104) self.assertEqual(root.edges[1].stats['P'], probs[104])
def test_move_to_leaf(self): game = Game() root = Node(game) action_encoder = ActionEncoder(DirectionResolver()) mcts = MCTS(root, config={ 'ALPHA': 0.8, 'CPUCT': 1, 'EPSILON': 0.2 }, model=None, state_encoder=None, action_encoder=action_encoder) puct = MagicMock() mcts.puct = puct child1 = Node(game.move(game.get_possible_moves()[0])) child2 = Node(game.move(game.get_possible_moves()[1])) child3 = Node(game.move(game.get_possible_moves()[2])) edge1 = Edge( root, child1, 0.33, action_encoder.convert_move_to_action_id( game.get_possible_moves()[0])) edge2 = Edge( root, child2, 0.34, action_encoder.convert_move_to_action_id( game.get_possible_moves()[1])) edge3 = Edge( root, child3, 0.33, action_encoder.convert_move_to_action_id( game.get_possible_moves()[2])) root.edges.append(edge1) root.edges.append(edge2) root.edges.append(edge3) puct.puct.return_value = edge2 leaf, value, done, breadcrumbs = mcts.move_to_leaf() self.assertEquals(leaf, child2) self.assertEquals(value, 0) self.assertEquals(done, 0) self.assertEquals(False, 0) self.assertEquals(True, 1)
def read_agent(version): nn = Residual_CNN(config['REG_CONST'], config['LEARNING_RATE'], (2, 4, 8), config['ACTION_SIZE'], config['HIDDEN_CNN_LAYERS'], config['MOMENTUM']) m_tmp = nn.read(version) nn.model.set_weights(m_tmp.get_weights()) player = Agent(nn, ActionEncoder(DirectionResolver()), StateEncoder(), name='player' + str(version), config=config) return player
def test_backfill(self): game_root = Game() root = Node(game_root) action_encoder = ActionEncoder(DirectionResolver()) position1 = game_root.move(game_root.get_possible_moves()[0]) child1 = Node(position1) edge1 = Edge( root, child1, 0.3, action_encoder.convert_move_to_action_id( game_root.get_possible_moves()[0])) position2 = position1.move(position1.get_possible_moves()[0]) child2 = Node(position2) edge2 = Edge( child1, child2, 0.2, action_encoder.convert_move_to_action_id( game_root.get_possible_moves()[0])) edge2.stats['N'] = 4 edge2.stats['W'] = 1 mcts = MCTS(root, config={ 'ALPHA': 0.8, 'CPUCT': 1, 'EPSILON': 0.2 }, model=None, state_encoder=None, action_encoder=action_encoder) mcts.backfill(child2, -1, [edge2, edge1]) self.assertEquals(edge2.stats['N'], 5) self.assertEquals(edge2.stats['W'], 2) self.assertEquals(edge2.stats['Q'], 2 / 5) self.assertEquals(edge1.stats['N'], 1) self.assertEquals(edge1.stats['W'], -1) self.assertEquals(edge1.stats['Q'], -1)
def test_act_tau_0(self): config = { 'ALPHA': 0.8, 'CPUCT': 1, 'EPSILON': 0.2, 'ACTION_SIZE': 32 * 4 * 7, 'MCTS_SIMULATIONS': 3 } action_encoder = ActionEncoder(DirectionResolver()) agent = Agent(model=None, action_encoder=action_encoder, state_encoder=StateEncoder(), name='player1', config=config) game_root = Game() root_node = Node(game_root) child1 = Node(game_root.move(game_root.get_possible_moves()[0])) edge1 = Edge(root_node, child1, 0.33, 8) edge1.stats['N'] = 10 edge1.stats['Q'] = 0.2 root_node.edges.append(edge1) child2 = Node(game_root.move(game_root.get_possible_moves()[1])) edge2 = Edge(root_node, child2, 0.5, 104) edge2.stats['N'] = 20 edge2.stats['Q'] = 0.5 root_node.edges.append(edge2) child3 = Node(game_root.move(game_root.get_possible_moves()[2])) edge3 = Edge(root_node, child3, 0.17, 9) edge3.stats['N'] = 15 edge3.stats['Q'] = 0.3 root_node.edges.append(edge3) agent.prepare_mcts_for_next_action = MagicMock() mcts = MagicMock() mcts.root = root_node mcts.evaluate_leaf.return_value = 0.7 agent.mcts = mcts mcts.move_to_leaf.return_value = (root_node, 0.5, False, []) action, pi, value = agent.act(game_root, tau=0) self.assertEqual(action, [9, 14]) self.assertEqual(value, 0.5) self.assertEqual(pi[8], 10/(10 + 20 + 15)) self.assertEqual(pi[9], 15/(10 + 20 + 15)) self.assertEqual(pi[8 + 3*32], 20/(10 + 20 + 15))
def test_integration(self): HIDDEN_CNN_LAYERS = [{ 'filters': 75, 'kernel_size': (4, 4) }, { 'filters': 75, 'kernel_size': (4, 4) }, { 'filters': 75, 'kernel_size': (4, 4) }, { 'filters': 75, 'kernel_size': (4, 4) }, { 'filters': 75, 'kernel_size': (4, 4) }, { 'filters': 75, 'kernel_size': (4, 4) }] model = Residual_CNN(0.0001, 0.1, (2, 4, 8), 32 * 4, HIDDEN_CNN_LAYERS, momentum=0.9) game_root = Game() root = Node(game_root) mcts = MCTS(root, config={ 'ALPHA': 0.8, 'CPUCT': 1, 'EPSILON': 0.2 }, model=model, state_encoder=StateEncoder(), action_encoder=ActionEncoder(DirectionResolver())) mcts.predict_state_value(game_root) mcts.evaluate_leaf(root)
config['ACTION_SIZE'], config['HIDDEN_CNN_LAYERS'], config['MOMENTUM']) if "INITIAL_MODEL_VERSION" in config: best_player_version = config["INITIAL_MODEL_VERSION"] logging.info('LOADING MODEL VERSION ' + str(config["INITIAL_MODEL_VERSION"]) + '...') m_tmp = best_NN.read(best_player_version) current_NN.model.set_weights(m_tmp.get_weights()) best_NN.model.set_weights(m_tmp.get_weights()) else: best_player_version = 0 best_NN.model.set_weights(current_NN.model.get_weights()) current_player = Agent(current_NN, ActionEncoder(DirectionResolver()), StateEncoder(), name='current_player', config=config) best_player = Agent(best_NN, ActionEncoder(DirectionResolver()), StateEncoder(), name='best_player', config=config) iteration = 0 while 1: iteration += 1 logging.info('ITERATION NUMBER ' + str(iteration))
def test_convert_action_id_to_true_position_and_direction_4(self): encoder = ActionEncoder(DirectionResolver()) move = encoder.convert_action_id_to_true_position_and_direction(105, 2) self.assertEqual(move, (23, 2))
def test_convert_action_id_to_position_and_direction_2(self): encoder = ActionEncoder(DirectionResolver()) move = encoder.convert_action_id_to_position_and_direction(105) self.assertEqual(move, (10, 4))
def test_convert_position_direction_distance_to_move_1(self): encoder = ActionEncoder(DirectionResolver()) move = encoder.convert_direction_and_distance_to_move(10, 4, 1) self.assertEqual(move, [10, 15])
def test_convert_moves_to_action_ids(self): encoder = ActionEncoder(DirectionResolver()) values = encoder.convert_moves_to_action_ids([[10, 17], [10, 15]]) self.assertCountEqual(values, [9, 105])