def evaluate_new_neural_network(p_v_network_old,
                                p_v_network_new,
                                number_of_battles=4,
                                plane_size=config.PLANE_SIZE):
    root1 = p_v_mcts_player.MCTSNode(gl.GameLogic(plane_size=plane_size),
                                     father_edge=None,
                                     p_v_network=p_v_network_new)
    root2 = p_v_mcts_player.MCTSNode(gl.GameLogic(plane_size=plane_size),
                                     father_edge=None,
                                     p_v_network=p_v_network_old)
    player1 = p_v_mcts_player.MCTSPlayer(root=root1,
                                         p_v_network=p_v_network_new,
                                         max_simulation=80)
    player2 = p_v_mcts_player.MCTSPlayer(root=root2,
                                         p_v_network=p_v_network_old,
                                         max_simulation=80)

    new_pure_win = 0
    print("------新黑旧白------")
    for i in range(number_of_battles):
        player1.refresh()
        player2.refresh()
        winner, plane_record, action_list, turn = play.PlayLogic().play(
            player1, player2)
        new_pure_win += winner
    print("------新白旧黑------")
    for i in range(number_of_battles):
        player1.refresh()
        player2.refresh()
        winner, plane_record, action_list, turn = play.PlayLogic().play(
            player2, player1)
        new_pure_win -= winner

    return new_pure_win
Beispiel #2
0
    def evaluate_new_neural_network(
            self,
            p_v_network_old,
            p_v_network_new,
            number_of_battles=config.NUMBER_of_BATTLES_WHEN_EVALUATING,
            plane_size=config.PLANE_SIZE):
        # return True  # 测试用
        root1 = p_v_mcts_player.MCTSNode(gl.GameLogic(plane_size=plane_size),
                                         father_edge=None,
                                         p_v_network=p_v_network_new)
        root2 = p_v_mcts_player.MCTSNode(gl.GameLogic(plane_size=plane_size),
                                         father_edge=None,
                                         p_v_network=p_v_network_old)
        player1 = p_v_mcts_player.MCTSPlayer(
            root=root1,
            p_v_network=p_v_network_new,
            max_simulation=config.MAX_SIMULATION_WHEN_EVALUATING)
        player2 = p_v_mcts_player.MCTSPlayer(
            root=root2,
            p_v_network=p_v_network_old,
            max_simulation=config.MAX_SIMULATION_WHEN_EVALUATING)

        new_pure_win = 0
        logging.info("新白旧黑")
        for i in range(number_of_battles):
            player1.refresh()
            player2.refresh()
            winner, plane_record, action_list, turn = play.PlayLogic(
                plane_size=config.PLANE_SIZE).play(player2, player1)
            new_pure_win -= winner
        if new_pure_win >= 0:
            new_pure_win = 0
            logging.info("新黑旧白")
            for i in range(number_of_battles):
                player1.refresh()
                player2.refresh()
                winner, plane_record, action_list, turn = play.PlayLogic(
                    plane_size=config.PLANE_SIZE).play(player1, player2)
                new_pure_win += winner
            if new_pure_win >= 0:
                return True
            else:
                # return True  # 测试用
                return False
        else:
            # return True  # 测试用
            return False
Beispiel #3
0
 def __init__(self, deck_cards, attributes):
     self.id = None
     self.generate_id()
     self.begin = time()
     self.last_interaction = time()
     self.active = True
     self.players = [Player(), Player()]
     self.game_state = game_logic.GameLogic(deck_cards=deck_cards,
                                            attributes=attributes)
Beispiel #4
0
    def evaluate_new_neural_network(self,
                                    p_v_network_old,
                                    p_v_network_new,
                                    number_of_battles=11,
                                    plane_size=15):

        root1 = p_v_mcts_player.MCTSNode(gl.GameLogic(plane_size=plane_size),
                                         father_edge=None,
                                         p_v_network=p_v_network_new)
        root2 = p_v_mcts_player.MCTSNode(gl.GameLogic(plane_size=plane_size),
                                         father_edge=None,
                                         p_v_network=p_v_network_old)
        player1 = p_v_mcts_player.MCTSPlayer(root=root1,
                                             p_v_network=p_v_network_new,
                                             max_simulation=50)
        player2 = p_v_mcts_player.MCTSPlayer(root=root2,
                                             p_v_network=p_v_network_old,
                                             max_simulation=50)

        new_pure_win = 0
        for i in range(number_of_battles):
            player1.refresh()
            player2.refresh()
            winner, plane_record, action_list, turn = play.PlayLogic().play(
                player1, player2)
            new_pure_win += winner
        if new_pure_win > 2:
            new_pure_win = 0
            for i in range(number_of_battles):
                player1.refresh()
                player2.refresh()
                winner, plane_record, action_list, turn = play.PlayLogic(
                ).play(player2, player1)
                new_pure_win += winner
            if new_pure_win > 2:
                return True
            else:
                return False
        else:
            return False
Beispiel #5
0
    def evaluate_new_network_with_random_player(self,
                                                p_v_network_new,
                                                number_of_battles=25,
                                                plane_size=config.PLANE_SIZE,
                                                u=1,
                                                max_simulation=1):
        root1 = p_v_mcts_player.MCTSNode(gl.GameLogic(plane_size=plane_size),
                                         father_edge=None,
                                         p_v_network=p_v_network_new)
        player1 = p_v_mcts_player.MCTSPlayer(root=root1,
                                             p_v_network=p_v_network_new,
                                             max_simulation=max_simulation)
        player2 = random_player.RandomPlayer(
            gl.GameLogic(plane_size=plane_size))
        new_pure_win = 0
        print("------神黑随白------")
        for i in range(number_of_battles):
            player1.refresh()
            player2.refresh()
            winner, plane_record, action_list, turn = play.PlayLogic().play(
                player1, player2)
            new_pure_win += winner
        print("------神白随黑------")
        for i in range(number_of_battles):
            player1.refresh()
            player2.refresh()
            winner, plane_record, action_list, turn = play.PlayLogic().play(
                player2, player1)
            new_pure_win -= winner
        win_rate = (new_pure_win +
                    number_of_battles * 2.0) / (2 * 2 * number_of_battles)
        with open(
                'network/win_rate_max_simulation' + str(max_simulation) +
                '.txt', 'a+') as f:
            f.write(str(u) + "," + str(win_rate) + "\n")

        return new_pure_win
Beispiel #6
0
    def play(self, player1, player2):
        self.game_logic = gl.GameLogic(plane_size=self.plane_size)
        action_probability_distribution_list = []
        x, y, action_probability_distribution = player1.get_action_and_probability(
        )
        action_probability_distribution_list.append(
            action_probability_distribution)
        self.game_logic.play(x, y)
        player2.get_opponents_action(x, y)
        result = self.game_logic.game_result_fast_version(x, y)
        while result == 2:
            if self.game_logic.current_player == 1:
                x, y, action_probability_distribution = player1.get_action_and_probability(
                )
                action_probability_distribution_list.append(
                    action_probability_distribution)
                self.game_logic.play(x, y)
                player2.get_opponents_action(x, y)
            else:
                x, y, action_probability_distribution = player2.get_action_and_probability(
                )
                action_probability_distribution_list.append(
                    action_probability_distribution)
                self.game_logic.play(x, y)
                player1.get_opponents_action(x, y)
            result = self.game_logic.game_result_fast_version(x, y)

        if result == 1:
            self.play_record.append(self.game_logic.plane.copy())
            logging.info("黑胜")
            print("黑胜")
            return 1, self.game_logic.plane, action_probability_distribution_list, self.game_logic.current_turn - 1
        elif result == -1:
            self.play_record.append(self.game_logic.plane.copy())
            logging.info("白胜")
            print("白胜")
            return -1, self.game_logic.plane, action_probability_distribution_list, self.game_logic.current_turn - 1
        elif result == 0:
            self.play_record.append(self.game_logic.plane.copy())
            logging.info("和棋")
            print("和棋")
            return 0, self.game_logic.plane, action_probability_distribution_list, self.game_logic.current_turn - 1
        else:
            logging.warning("程序出错了,3秒后退出...")
            print("程序出错了,3秒后退出...")
            time.sleep(3)
            exit()
 def __init__(self):
     self._running = True
     self._size = (450, 900)
     self._game = game_logic.GameLogic()
     
     self.BLACK = (0, 0, 0)
     self.GREY = (110, 110, 110)
     self.WHITE = (255, 255, 255)
     self.RED = (255, 0, 0)
     self.GREEN = (0, 255, 0)
     self.BLUE = (0, 0, 255)
     self.YELLOW = (255, 255, 0)
     self.PURPLE = (255, 0, 255)
     self.SKY = (0, 255, 255)
     self.ORANGE = (255, 127, 0)
     self.PINK = (255, 0, 127)
     self.LIME = (0, 255, 127)
     self.BROWN = (165, 42, 42)
Beispiel #8
0
import tkinter as tk
import time
import p_v_network
import p_v_mcts_player
import game_logic as gl

game_logic = gl.GameLogic(plane_size=15)

p_v_network_1 = p_v_network.P_V_Network()
p_v_network_1.restore(0)
p_v_network_2 = p_v_network.P_V_Network()
p_v_network_2.restore(5)
root1 = p_v_mcts_player.MCTSNode(gl.GameLogic(plane_size=15),
                                 father_edge=None,
                                 p_v_network=p_v_network_1)
root2 = p_v_mcts_player.MCTSNode(gl.GameLogic(plane_size=15),
                                 father_edge=None,
                                 p_v_network=p_v_network_2)
player1 = p_v_mcts_player.MCTSPlayer(root=root1,
                                     p_v_network=p_v_network_1,
                                     max_simulation=160)
player2 = p_v_mcts_player.MCTSPlayer(root=root2,
                                     p_v_network=p_v_network_2,
                                     max_simulation=160)


def click_callback(event):
    x = event.x
    y = event.y
    print(x, y)
    if x > game_logic.plane_size * 30 + 15 or x < 15 or y > game_logic.plane_size * 30 + 15 or y < 15:
Beispiel #9
0
        arr_data_augment_board = arr_data_augment_board.swapaxes(0, 1)
        arr_data_augment_board = arr_data_augment_board.swapaxes(1, 2)
        arr_data_augment_act = np.fliplr(arr_data_augment_act)
        board = np.concatenate((board, np.array([arr_data_augment_board])))
        action_probability_distribution = np.concatenate((action_probability_distribution, np.array([arr_data_augment_act.reshape(size ** 2)])))
        result = np.concatenate((result, np.array([[z]])))

        return result, board, action_probability_distribution



if __name__ == "__main__":
    import p_v_network
    import play

    self_play_game = play.PlayLogic(plane_size=15)
    data_generator = GenerateSelfPlayData(self_play_game)


    p_v_network = p_v_network.P_V_Network()
    root1 = p_v_mcts_player.MCTSNode(gl.GameLogic(plane_size=15), father_edge=None, p_v_network=p_v_network)
    root2 = p_v_mcts_player.MCTSNode(gl.GameLogic(plane_size=15), father_edge=None, p_v_network=p_v_network)
    player1 = p_v_mcts_player.MCTSPlayer(root=root1, p_v_network=p_v_network, max_simulation=5)
    player2 = p_v_mcts_player.MCTSPlayer(root=root2, p_v_network=p_v_network, max_simulation=5)

    arr, result, y_ = data_generator.generate_self_play_data(player1, player2, number_of_games=2, numbuer_of_samples_in_each_game=8)
    print(arr.shape, result.shape, y_.shape)



 def refresh(self):
     self.root = MCTSNode(
         game_logic.GameLogic(plane_size=self.root.state.plane_size),
         father_edge=None,
         p_v_network=self.p_v_network)
Beispiel #11
0
 def __init__(self, plane_size=config.PLANE_SIZE):
     self.plane_size = plane_size
     self.game_logic = gl.GameLogic(plane_size=plane_size)
     self.play_record = []
import tkinter as tk
import time

import game_logic as gl

game_logic = gl.GameLogic(plane_size=8)


def click_callback(event):
    print("clicked at", event.x, event.y)
    x = event.x
    y = event.y
    if x > game_logic.plane_size * 30 + 15 or x < 15 or y > game_logic.plane_size * 30 + 15 or y < 15:
        return

    result_x = (x - 15) // 30
    result_y = (y - 15) // 30
    print(result_x, result_y)
    if game_logic.play(result_x, result_y):
        if game_logic.current_player == 1:
            canvas.create_oval(30 + result_x * 30 - 11,
                               30 + result_y * 30 - 11,
                               30 + result_x * 30 + 10,
                               30 + result_y * 30 + 10,
                               fill='white')
        elif game_logic.current_player == -1:
            canvas.create_oval(30 + result_x * 30 - 11,
                               30 + result_y * 30 - 11,
                               30 + result_x * 30 + 10,
                               30 + result_y * 30 + 10,
                               fill='black')
Beispiel #13
0
            return 0, self.game_logic.plane, action_probability_distribution_list, self.game_logic.current_turn - 1
        else:
            logging.warning("程序出错了,3秒后退出...")
            print("程序出错了,3秒后退出...")
            time.sleep(3)
            exit()


if __name__ == "__main__":
    import p_v_mcts_player_v2
    import p_v_network
    import game_logic as gl

    pl = PlayLogic()
    p_v_network = p_v_network.P_V_Network()
    state1 = gl.GameLogic(plane_size=15)
    state2 = gl.GameLogic(plane_size=15)
    temp_player = p_v_mcts_player_v2.MCTSPlayer(root=None,
                                                p_v_network=p_v_network,
                                                max_simulation=5)
    action_probability_distribution, value = temp_player.get_current_action_probability_distribution_and_value_by_neural_network(
        p_v_network=p_v_network, state=state1)
    root1 = p_v_mcts_player_v2.MCTSNode(state1, None,
                                        action_probability_distribution, value)

    root2 = p_v_mcts_player_v2.MCTSNode(state2, None,
                                        action_probability_distribution, value)
    player1 = p_v_mcts_player_v2.MCTSPlayer(root=root1,
                                            p_v_network=p_v_network,
                                            max_simulation=5)
    player2 = p_v_mcts_player_v2.MCTSPlayer(root=root2,
Beispiel #14
0
def play_level(screen, player):
    background = pygame.Surface((c.SCREEN_WIDTH, c.SCREEN_HEIGHT))
    clock = pygame.time.Clock()

    # Make washers and dryers
    washer_group, dryer_group = level_utils.make_washers_and_dryers((0, 0), 2,
                                                                    2)

    # Images and sprites for player and laundry piles
    pile_images = image_utils.load_laundry_images('images/laundry/in_pile')
    pile_in = pile.Pile(15, 7, pile_images, c.LaundryState.UNWASHED)
    pile_out = pile.Pile(c.SCREEN_WIDTH - 105, 7, pile_images,
                         c.LaundryState.DRIED)

    # Labels for laundry piles
    # TODO: make more dynamic/adjustable labels based on position of piles
    pile_in_label, pile_in_rect = level_utils.make_label(WHITE, 'inbox')
    pile_in_rect.bottomleft = (10, c.SCREEN_HEIGHT)
    pile_out_label, pile_out_rect = level_utils.make_label(WHITE, 'outbox')
    pile_out_rect.bottomright = (c.SCREEN_WIDTH - PADDING, c.SCREEN_HEIGHT)

    daily_clock = level_utils.DailyClock()

    # Generating orders
    orders = level_utils.generate_orders(order_num_min=8,
                                         order_num_max=8,
                                         load_num_min=1,
                                         load_num_max=1)
    customers = level_utils.generate_customers(orders)
    inactive_customers = pygame.sprite.Group(
        customers)  # all customers start off inactive

    # Storing all sprites to master group
    all_sprites = pygame.sprite.Group(washer_group, dryer_group, pile_in,
                                      pile_out, player)
    logic = game_logic.GameLogic(orders, pile_in, pile_out, player)

    running = True
    while running:
        id = 0
        time_delta = clock.tick(FPS) / 1000.0
        mouse_up = False
        for event in pygame.event.get():
            if event.type == pygame.QUIT:
                running = False
                return c.GameState.QUIT
            if event.type == pygame.MOUSEBUTTONUP and event.button == 1:
                print("click!!!")
                print("the current time is: " + str(pygame.time.get_ticks()))
                mouse_up = True
            if event.type > pygame.USEREVENT:
                id = event.type - pygame.USEREVENT
                print(id)
            if event.type == c.FAIL_STATE:
                return c.GameState.GAME_OVER
            if event.type == c.GAME_LOGIC_EVENT:
                logic.handle_event(event.type)
            if event.type == c.NOON_EVENT:
                for customer in inactive_customers:
                    all_sprites.add(customer)
                    inactive_customers.remove(customer)

        if not customers:
            print("final score: " + str(logic.score))
            return c.GameState.GAME_OVER  # TODO: update/change

        # Updating objects
        all_sprites.update(time_delta, pygame.mouse.get_pos(), mouse_up, logic,
                           id)
        pile_in.update_y_pos()
        pile_out.update_y_pos()
        clock_text = daily_clock.get_updated_text(time_delta)
        clock_label, clock_rect = level_utils.make_label(WHITE, clock_text)
        clock_rect.topright = (c.SCREEN_WIDTH - PADDING, PADDING)

        # Drawing background, sprites, and labels
        screen.blit(background, (0, 0))
        screen.blit(pile_in_label, pile_in_rect)
        screen.blit(pile_out_label, pile_out_rect)
        screen.blit(clock_label, clock_rect)
        all_sprites.draw(screen)

        # Updating display with the latest
        pygame.display.update()
import tkinter as tk
import time
import numpy as np

import game_logic as gl
import config

game_logic = gl.GameLogic(plane_size=config.PLANE_SIZE)

plane_record = np.load("plane_record/plane_record66.npy")

root = tk.Tk()
root.title("Gomoku")
root.resizable(0, 0)
root.wm_attributes("-topmost", 1)
canvas = tk.Canvas(root,
                   width=game_logic.plane_size * 30 + 30,
                   height=game_logic.plane_size * 30 + 30,
                   bd=0,
                   bg='khaki',
                   highlightthickness=0)

for i in range(1, game_logic.plane_size + 1):
    canvas.create_line(i * 30, 30, i * 30, game_logic.plane_size * 30, width=2)
for i in range(1, game_logic.plane_size + 1):
    canvas.create_line(30, i * 30, game_logic.plane_size * 30, i * 30, width=2)
#  之所以是123,因为create_line宽度是2个像素,如果124的话会不合适
if game_logic.plane_size == 15:
    canvas.create_oval(116, 116, 123, 123, fill='black')
    canvas.create_oval(116, 356, 123, 363, fill='black')
    canvas.create_oval(356, 116, 363, 123, fill='black')
import numpy as np

import p_v_network_v2 as p_v_network
import p_v_mcts_player
import game_logic as gl
import play
import config
import random_player

game_logic = gl.GameLogic(plane_size=config.PLANE_SIZE)

# p_v_network_1 = p_v_network.P_V_Network()
# p_v_network_1.restore(0)
# root1 = p_v_mcts_player.MCTSNode(gl.GameLogic(plane_size=plane_size), father_edge=None, p_v_network=p_v_network_new)
# player1 = p_v_mcts_player.MCTSPlayer(root=root1, p_v_network=p_v_network_new, max_simulation=2)
player1 = random_player.RandomPlayer(
    gl.GameLogic(plane_size=config.PLANE_SIZE))

p_v_network_2 = p_v_network.P_V_Network()
p_v_network_2.restore(0)
root2 = p_v_mcts_player.MCTSNode(gl.GameLogic(plane_size=config.PLANE_SIZE),
                                 father_edge=None,
                                 p_v_network=p_v_network_2)
player2 = p_v_mcts_player.MCTSPlayer(root=root2,
                                     p_v_network=p_v_network_2,
                                     max_simulation=2)


def evaluate_new_neural_network(player1, player2, number_of_battles=1):
    new_pure_win = 0
    print("------新黑旧白------")
Beispiel #17
0
                   surface=window,
                   grid_dim=constants.GRID_DIM)
player1 = util.Player(window,
                      constants.PLAYER_1_COLOR,
                      board,
                      constants.PLAYER_1_CHECKER_POSITIONS,
                      name="Player Red")
player2 = util.Player(window,
                      constants.PLAYER_2_COLOR,
                      board,
                      constants.PLAYER_2_CHECKER_POSITIONS,
                      name="Player Blue")
select = util.Selector(surface=window,
                       des_color=constants.DESTINATION_COLOR,
                       loc_color=constants.LOCATION_COLOR)
logic = game_logic.GameLogic(player1=player1, player2=player2, board=board)

# Main Game Loop
while run:
    # If not paused, draw all board and selector
    if not pause:
        draw(board, select)

    pygame.display.flip()

    for event in pygame.event.get():
        if event.type == pygame.QUIT:
            run = False
        if event.type == pygame.KEYDOWN:
            if event.key == pygame.K_SPACE:
                if pause:
Beispiel #18
0
    def train_and_update(self,
                         plane_size=15,
                         number_of_epoch=1,
                         number_of_update_network=200,
                         number_of_games=200,
                         numbuer_of_samples_in_each_game=9,
                         min_batch=100,
                         max_simulation=3):
        '''
        :param number_of_epoch:
        :param number_of_update_network:
        :param number_of_games:
        :param numbuer_of_samples_in_each_game:
        :param min_batch: 需要是 number_of_games 乘以 numbuer_of_samples_in_each_game 的积的约数
        :return:
        '''
        p_v_network_new = p_v_network.P_V_Network()
        p_v_network_old = p_v_network.P_V_Network()

        path = "./network"
        if not os.path.exists(path):
            os.makedirs(path)

        for u in range(number_of_update_network):
            print("the %dth update" % (u))
            p_v_network_new.save(u)

            self_play_game = play.PlayLogic(plane_size=plane_size)
            data_generator = generate_self_play_data.GenerateSelfPlayData(
                self_play_game)

            root1 = p_v_mcts_player.MCTSNode(
                gl.GameLogic(plane_size=plane_size),
                father_edge=None,
                p_v_network=p_v_network_new)
            root2 = p_v_mcts_player.MCTSNode(
                gl.GameLogic(plane_size=plane_size),
                father_edge=None,
                p_v_network=p_v_network_new)
            player1 = p_v_mcts_player.MCTSPlayer(root=root1,
                                                 p_v_network=p_v_network_new,
                                                 max_simulation=max_simulation)
            player2 = p_v_mcts_player.MCTSPlayer(root=root2,
                                                 p_v_network=p_v_network_new,
                                                 max_simulation=max_simulation)

            plane_records, game_result_, y_ = data_generator.generate_self_play_data(
                player1,
                player2,
                number_of_games=number_of_games,
                numbuer_of_samples_in_each_game=numbuer_of_samples_in_each_game
            )

            for e in range(number_of_epoch):
                for i in range(
                        int(number_of_games * numbuer_of_samples_in_each_game /
                            min_batch)):
                    # min-batch 100, 由于只有1000个局面样本,所以只循环10次
                    batch = [
                        plane_records[i * min_batch:(i + 1) * min_batch],
                        game_result_[i * min_batch:(i + 1) * min_batch],
                        y_[i * min_batch:(i + 1) * min_batch]
                    ]
                    if e % 10 == 0:
                        # loss = p_v_network_new.loss.eval(feed_dict={p_v_network_new.x_plane: batch[0], p_v_network_new.game_result: batch[1], p_v_network_new.y_: batch[2], p_v_network_new.is_training: False})
                        # p_v_network_new.sess.run([p_v_network_new.loss.eval], feed_dict={p_v_network_new.x_plane: batch[0], p_v_network_new.game_result: batch[1], p_v_network_new.y_: batch[2], p_v_network_new.is_training: False})
                        # print("step %d, loss %g" % (i, loss))
                        pass
                    p_v_network_new.sess.run(
                        [p_v_network_new.train_step],
                        feed_dict={
                            p_v_network_new.x_plane: batch[0],
                            p_v_network_new.game_result: batch[1],
                            p_v_network_new.y_: batch[2],
                            p_v_network_new.is_training: True
                        })

            if self.evaluate_new_neural_network(p_v_network_old,
                                                p_v_network_new,
                                                plane_size=plane_size,
                                                number_of_battles=5):
                print("old_network changed")
                p_v_network_old.restore(u)