def evaluate_new_neural_network(p_v_network_old,
                                p_v_network_new,
                                number_of_battles=4,
                                plane_size=config.PLANE_SIZE):
    root1 = p_v_mcts_player.MCTSNode(gl.GameLogic(plane_size=plane_size),
                                     father_edge=None,
                                     p_v_network=p_v_network_new)
    root2 = p_v_mcts_player.MCTSNode(gl.GameLogic(plane_size=plane_size),
                                     father_edge=None,
                                     p_v_network=p_v_network_old)
    player1 = p_v_mcts_player.MCTSPlayer(root=root1,
                                         p_v_network=p_v_network_new,
                                         max_simulation=80)
    player2 = p_v_mcts_player.MCTSPlayer(root=root2,
                                         p_v_network=p_v_network_old,
                                         max_simulation=80)

    new_pure_win = 0
    print("------新黑旧白------")
    for i in range(number_of_battles):
        player1.refresh()
        player2.refresh()
        winner, plane_record, action_list, turn = play.PlayLogic().play(
            player1, player2)
        new_pure_win += winner
    print("------新白旧黑------")
    for i in range(number_of_battles):
        player1.refresh()
        player2.refresh()
        winner, plane_record, action_list, turn = play.PlayLogic().play(
            player2, player1)
        new_pure_win -= winner

    return new_pure_win
Example #2
0
    def evaluate_new_neural_network(
            self,
            p_v_network_old,
            p_v_network_new,
            number_of_battles=config.NUMBER_of_BATTLES_WHEN_EVALUATING,
            plane_size=config.PLANE_SIZE):
        # return True  # 测试用
        root1 = p_v_mcts_player.MCTSNode(gl.GameLogic(plane_size=plane_size),
                                         father_edge=None,
                                         p_v_network=p_v_network_new)
        root2 = p_v_mcts_player.MCTSNode(gl.GameLogic(plane_size=plane_size),
                                         father_edge=None,
                                         p_v_network=p_v_network_old)
        player1 = p_v_mcts_player.MCTSPlayer(
            root=root1,
            p_v_network=p_v_network_new,
            max_simulation=config.MAX_SIMULATION_WHEN_EVALUATING)
        player2 = p_v_mcts_player.MCTSPlayer(
            root=root2,
            p_v_network=p_v_network_old,
            max_simulation=config.MAX_SIMULATION_WHEN_EVALUATING)

        new_pure_win = 0
        logging.info("新白旧黑")
        for i in range(number_of_battles):
            player1.refresh()
            player2.refresh()
            winner, plane_record, action_list, turn = play.PlayLogic(
                plane_size=config.PLANE_SIZE).play(player2, player1)
            new_pure_win -= winner
        if new_pure_win >= 0:
            new_pure_win = 0
            logging.info("新黑旧白")
            for i in range(number_of_battles):
                player1.refresh()
                player2.refresh()
                winner, plane_record, action_list, turn = play.PlayLogic(
                    plane_size=config.PLANE_SIZE).play(player1, player2)
                new_pure_win += winner
            if new_pure_win >= 0:
                return True
            else:
                # return True  # 测试用
                return False
        else:
            # return True  # 测试用
            return False
def evaluate_new_neural_network(player1, player2, number_of_battles=1):
    new_pure_win = 0
    print("------新黑旧白------")
    for i in range(number_of_battles):
        player1.refresh()
        player2.refresh()
        winner, plane_record_1, action_list, turn = play.PlayLogic().play(
            player1, player2)
        new_pure_win += winner
    print("------新白旧黑------")
    for i in range(number_of_battles):
        player1.refresh()
        player2.refresh()
        winner, plane_record_2, action_list, turn = play.PlayLogic().play(
            player2, player1)
        new_pure_win -= winner

    return plane_record_1, plane_record_2
Example #4
0
    def evaluate_new_neural_network(self,
                                    p_v_network_old,
                                    p_v_network_new,
                                    number_of_battles=11,
                                    plane_size=15):

        root1 = p_v_mcts_player.MCTSNode(gl.GameLogic(plane_size=plane_size),
                                         father_edge=None,
                                         p_v_network=p_v_network_new)
        root2 = p_v_mcts_player.MCTSNode(gl.GameLogic(plane_size=plane_size),
                                         father_edge=None,
                                         p_v_network=p_v_network_old)
        player1 = p_v_mcts_player.MCTSPlayer(root=root1,
                                             p_v_network=p_v_network_new,
                                             max_simulation=50)
        player2 = p_v_mcts_player.MCTSPlayer(root=root2,
                                             p_v_network=p_v_network_old,
                                             max_simulation=50)

        new_pure_win = 0
        for i in range(number_of_battles):
            player1.refresh()
            player2.refresh()
            winner, plane_record, action_list, turn = play.PlayLogic().play(
                player1, player2)
            new_pure_win += winner
        if new_pure_win > 2:
            new_pure_win = 0
            for i in range(number_of_battles):
                player1.refresh()
                player2.refresh()
                winner, plane_record, action_list, turn = play.PlayLogic(
                ).play(player2, player1)
                new_pure_win += winner
            if new_pure_win > 2:
                return True
            else:
                return False
        else:
            return False
Example #5
0
    def evaluate_new_network_with_random_player(self,
                                                p_v_network_new,
                                                number_of_battles=25,
                                                plane_size=config.PLANE_SIZE,
                                                u=1,
                                                max_simulation=1):
        root1 = p_v_mcts_player.MCTSNode(gl.GameLogic(plane_size=plane_size),
                                         father_edge=None,
                                         p_v_network=p_v_network_new)
        player1 = p_v_mcts_player.MCTSPlayer(root=root1,
                                             p_v_network=p_v_network_new,
                                             max_simulation=max_simulation)
        player2 = random_player.RandomPlayer(
            gl.GameLogic(plane_size=plane_size))
        new_pure_win = 0
        print("------神黑随白------")
        for i in range(number_of_battles):
            player1.refresh()
            player2.refresh()
            winner, plane_record, action_list, turn = play.PlayLogic().play(
                player1, player2)
            new_pure_win += winner
        print("------神白随黑------")
        for i in range(number_of_battles):
            player1.refresh()
            player2.refresh()
            winner, plane_record, action_list, turn = play.PlayLogic().play(
                player2, player1)
            new_pure_win -= winner
        win_rate = (new_pure_win +
                    number_of_battles * 2.0) / (2 * 2 * number_of_battles)
        with open(
                'network/win_rate_max_simulation' + str(max_simulation) +
                '.txt', 'a+') as f:
            f.write(str(u) + "," + str(win_rate) + "\n")

        return new_pure_win
Example #6
0
    def train_and_update(self,
                         plane_size=15,
                         number_of_epoch=1,
                         number_of_update_network=200,
                         number_of_games=200,
                         numbuer_of_samples_in_each_game=9,
                         min_batch=100,
                         max_simulation=3):
        '''
        :param number_of_epoch:
        :param number_of_update_network:
        :param number_of_games:
        :param numbuer_of_samples_in_each_game:
        :param min_batch: 需要是 number_of_games 乘以 numbuer_of_samples_in_each_game 的积的约数
        :return:
        '''
        p_v_network_new = p_v_network.P_V_Network()
        p_v_network_old = p_v_network.P_V_Network()

        path = "./network"
        if not os.path.exists(path):
            os.makedirs(path)

        for u in range(number_of_update_network):
            print("the %dth update" % (u))
            p_v_network_new.save(u)

            self_play_game = play.PlayLogic(plane_size=plane_size)
            data_generator = generate_self_play_data.GenerateSelfPlayData(
                self_play_game)

            root1 = p_v_mcts_player.MCTSNode(
                gl.GameLogic(plane_size=plane_size),
                father_edge=None,
                p_v_network=p_v_network_new)
            root2 = p_v_mcts_player.MCTSNode(
                gl.GameLogic(plane_size=plane_size),
                father_edge=None,
                p_v_network=p_v_network_new)
            player1 = p_v_mcts_player.MCTSPlayer(root=root1,
                                                 p_v_network=p_v_network_new,
                                                 max_simulation=max_simulation)
            player2 = p_v_mcts_player.MCTSPlayer(root=root2,
                                                 p_v_network=p_v_network_new,
                                                 max_simulation=max_simulation)

            plane_records, game_result_, y_ = data_generator.generate_self_play_data(
                player1,
                player2,
                number_of_games=number_of_games,
                numbuer_of_samples_in_each_game=numbuer_of_samples_in_each_game
            )

            for e in range(number_of_epoch):
                for i in range(
                        int(number_of_games * numbuer_of_samples_in_each_game /
                            min_batch)):
                    # min-batch 100, 由于只有1000个局面样本,所以只循环10次
                    batch = [
                        plane_records[i * min_batch:(i + 1) * min_batch],
                        game_result_[i * min_batch:(i + 1) * min_batch],
                        y_[i * min_batch:(i + 1) * min_batch]
                    ]
                    if e % 10 == 0:
                        # loss = p_v_network_new.loss.eval(feed_dict={p_v_network_new.x_plane: batch[0], p_v_network_new.game_result: batch[1], p_v_network_new.y_: batch[2], p_v_network_new.is_training: False})
                        # p_v_network_new.sess.run([p_v_network_new.loss.eval], feed_dict={p_v_network_new.x_plane: batch[0], p_v_network_new.game_result: batch[1], p_v_network_new.y_: batch[2], p_v_network_new.is_training: False})
                        # print("step %d, loss %g" % (i, loss))
                        pass
                    p_v_network_new.sess.run(
                        [p_v_network_new.train_step],
                        feed_dict={
                            p_v_network_new.x_plane: batch[0],
                            p_v_network_new.game_result: batch[1],
                            p_v_network_new.y_: batch[2],
                            p_v_network_new.is_training: True
                        })

            if self.evaluate_new_neural_network(p_v_network_old,
                                                p_v_network_new,
                                                plane_size=plane_size,
                                                number_of_battles=5):
                print("old_network changed")
                p_v_network_old.restore(u)
Example #7
0
        arr_data_augment_board = arr_data_augment_board.swapaxes(0, 1)
        arr_data_augment_board = arr_data_augment_board.swapaxes(1, 2)
        arr_data_augment_act = np.fliplr(arr_data_augment_act)
        board = np.concatenate((board, np.array([arr_data_augment_board])))
        action_probability_distribution = np.concatenate((action_probability_distribution, np.array([arr_data_augment_act.reshape(size ** 2)])))
        result = np.concatenate((result, np.array([[z]])))

        return result, board, action_probability_distribution



if __name__ == "__main__":
    import p_v_network
    import play

    self_play_game = play.PlayLogic(plane_size=15)
    data_generator = GenerateSelfPlayData(self_play_game)


    p_v_network = p_v_network.P_V_Network()
    root1 = p_v_mcts_player.MCTSNode(gl.GameLogic(plane_size=15), father_edge=None, p_v_network=p_v_network)
    root2 = p_v_mcts_player.MCTSNode(gl.GameLogic(plane_size=15), father_edge=None, p_v_network=p_v_network)
    player1 = p_v_mcts_player.MCTSPlayer(root=root1, p_v_network=p_v_network, max_simulation=5)
    player2 = p_v_mcts_player.MCTSPlayer(root=root2, p_v_network=p_v_network, max_simulation=5)

    arr, result, y_ = data_generator.generate_self_play_data(player1, player2, number_of_games=2, numbuer_of_samples_in_each_game=8)
    print(arr.shape, result.shape, y_.shape)