Exemple #1
0
def evaluate(AIs, play_num, return_draw=False):
    wins = 0.
    draw_num = 0
    for i in range(play_num):
        state = State()
        AIs[0].init_prev()
        AIs[1].init_prev()
        AIs[i % 2].color = 0
        AIs[1 - i % 2].color = 1
        while True:
            s, pi = AIs[i % 2].act_and_get_pi(state)
            a = actionid2str(state, s)
            while not state.accept_action_str(a):
                print("this action is impossible")
                s, pi = AIs[i % 2].act_and_get_pi(state)
                a = actionid2str(state, s)
            AIs[1 - i % 2].prev_action = s

            if state.terminate:
                break

            s, pi = AIs[1 - i % 2].act_and_get_pi(state)
            a = actionid2str(state, s)
            while not state.accept_action_str(a):
                print("this action is impossible")
                s, pi = AIs[1 - i % 2].act_and_get_pi(state)
                a = actionid2str(state, s)
            AIs[i % 2].prev_action = s

            if state.terminate:
                break

        if i % 2 == 0 and state.reward == 1:
            wins += 1.
        elif i % 2 == 1 and state.reward == -1:
            wins += 1.
        elif state.reward == 0:
            wins += 0.5
            draw_num += 1
        sys.stderr.write('\r\033[K {}win/{}'.format(i + 1 - wins, i + 1))
        sys.stderr.flush()
    print("")
    AIs[0].color = 0
    AIs[1].color = 1

    if return_draw:
        return wins, draw_num
    else:
        return wins
Exemple #2
0
def normal_play(agents):
    state = State()
    while True:
        state.display_cui()
        start = time.time()
        s = agents[0].act(state, showNQ=True)
        end = time.time()
        print(end - start)
        if isinstance(s, int):
            a = actionid2str(state, s)
        else:
            a = s
        while not state.accept_action_str(a):
            print(a)
            print("this action is impossible")
            s = agents[0].act(state, showNQ=True)
            if isinstance(s, int):
                a = actionid2str(state, s)
            else:
                a = s
        agents[1].prev_action = s

        if state.terminate:
            break
        #time.sleep(0.1)

        state.display_cui()
        s = agents[1].act(state, showNQ=True)
        if isinstance(s, int):
            a = actionid2str(state, s)
        else:
            a = s
        while not state.accept_action_str(a):
            print(a)
            print("this action is impossible")
            s = agents[1].act(state, showNQ=True)
            if isinstance(s, int):
                a = actionid2str(state, s)
            else:
                a = s
        agents[0].prev_action = s

        #time.sleep(0.1)
        if state.terminate:
            break

    state.display_cui()
    print("The game finished. reward=({}, {})".format(state.reward, -state.reward))
Exemple #3
0
class Quoridor(Widget):
    turn = NumericProperty(0)
    move_str = StringProperty("player1 move")
    player1wall = NumericProperty(10)
    player2wall = NumericProperty(10)
    Bx = NumericProperty(0)
    By = NumericProperty(0)
    Wx = NumericProperty(0)
    Wy = NumericProperty(0)
    button = ObjectProperty(None)
    human_button1 = ObjectProperty(None)
    ai_button1 = ObjectProperty(None)
    human_button2 = ObjectProperty(None)
    ai_button2 = ObjectProperty(None)
    search_nodes = 800

    def dont_down(self, button):
        if button.state != "down":
            button.state = "down"

    def __init__(self, **kwargs):
        super(Quoridor, self).__init__(**kwargs)
        self.state = State()
        self.agents = [GUIHuman(0), CNNAI(1, search_nodes=self.search_nodes, tau=0.5)]
        self.playing_game = False
        self.human_button1.bind(on_press=lambda touch: self.dont_down(self.human_button1))
        self.ai_button1.bind(on_press=lambda touch: self.dont_down(self.ai_button1))
        self.human_button2.bind(on_press=lambda touch: self.dont_down(self.human_button2))
        self.ai_button2.bind(on_press=lambda touch: self.dont_down(self.ai_button2))
        self.button.bind(on_release=lambda touch: self.start_game())

        self.row_wall_colors = [Color(0.7, 0.7, 0, 0) for i in range(64)]
        self.column_wall_colors = [Color(0.7, 0.7, 0, 0) for i in range(64)]

        with self.canvas.before:
            Color(96/255, 32/128, 0, 1)
            Rectangle(pos=(10, 10), size=(BOARD_LEN - 20, BOARD_LEN - 20))
            Color(64/255, 0, 0, 1)
            for i in range(10):
                Rectangle(pos=(int(10 + i / 9 * (BOARD_LEN - 30)), 10), size=(10, BOARD_LEN - 20))
            for i in range(10):
                Rectangle(pos=(10, int(10 + i / 9 * (BOARD_LEN - 30))), size=(BOARD_LEN - 20, 10))

        for i, color in enumerate(self.row_wall_colors):
            self.canvas.add(color)
            x = i % 8
            y = i // 8
            self.canvas.add(Rectangle(pos=(int(20 + x / 9 * (BOARD_LEN - 30)), int(10 + (y + 1) / 9 * (BOARD_LEN - 30))), size=((BOARD_LEN - 30) // 9 * 2 - 10, 10)))
        for i, color in enumerate(self.column_wall_colors):
            self.canvas.add(color)
            x = i % 8
            y = i // 8
            self.canvas.add(Rectangle(pos=(int(10 + (x + 1) / 9 * (BOARD_LEN - 30)), int(20 + y / 9 * (BOARD_LEN - 30))), size=(10, (BOARD_LEN - 30) // 9 * 2 - 10)))

    def oneturn(self, color):
        global touched
        s = self.agents[color].act(self.state)
        if isinstance(self.agents[color], CNNAI):
            g = self.agents[color].get_tree_for_graphviz()
            g.render(os.path.join("game_trees", "game_tree{}".format(self.state.turn)))
        if s == -1:
            return

        if isinstance(s, int):
            a = actionid2str(self.state, s)
        else:
            a = s
        if not self.state.accept_action_str(a):
            print(a)
            print("this action is impossible")
            return
        self.agents[1 - color].prev_action = s
        self.state.display_cui()
        print(self.state.get_player_dist_from_goal())
        touched = False

    def start_game(self):
        global touched
        if self.human_button1.state == "down":
            agent1 = GUIHuman(0)
        elif self.ai_button1.state == "down":
            #agent1 = CNNAI(0, search_nodes=self.search_nodes, tau=0.25, v_is_dist=True, p_is_almost_flat=True)
            agent1 = CNNAI(0, search_nodes=self.search_nodes, tau=0.25)
            agent1.load("./parameter/epoch110.ckpt")
        if self.human_button2.state == "down":
            agent2 = GUIHuman(1)
        elif self.ai_button2.state == "down":
            #agent2 = CNNAI(1, search_nodes=self.search_nodes, tau=0.25, v_is_dist=True, p_is_almost_flat=True)
            agent2 = CNNAI(1, search_nodes=self.search_nodes, tau=0.25)
            agent2.load("./parameter/epoch110.ckpt")
        self.agents = [agent1, agent2]
        self.state = State()
        self.playing_game = True
        touched = False

    def update(self, dt):
        if self.playing_game and not self.state.terminate:
            self.oneturn(self.state.turn % 2)
        if self.state.terminate:
            self.playing_game = False

        self.turn = self.state.turn
        if self.turn % 2 == 0:
            self.move_str = "player1 move"
        else:
            self.move_str = "player2 move"
        self.player1wall = self.state.black_walls
        self.player2wall = self.state.white_walls

        self.Bx = int(15 + (self.state.Bx + 0.5) / 9 * (BOARD_LEN - 30))
        self.By = int(15 + (8.5 - self.state.By) / 9 * (BOARD_LEN - 30))
        self.Wx = int(15 + (self.state.Wx + 0.5) / 9 * (BOARD_LEN - 30))
        self.Wy = int(15 + (8.5 - self.state.Wy) / 9 * (BOARD_LEN - 30))

        for x in range(8):
            for y in range(8):
                self.row_wall_colors[(7 - y) * 8 + x].a = 0
                self.column_wall_colors[(7 - y) * 8 + x].a = 0

        for x in range(8):
            for y in range(8):
                mouse_x, mouse_y = Window.mouse_pos
                if int(20 + (x + 0.5) / 9 * (BOARD_LEN - 30)) <= mouse_x < int(20 + (x + 1.5) / 9 * (BOARD_LEN - 30)) and int(10 + (y + 1) / 9 * (BOARD_LEN - 30)) <= mouse_y <= int(10 + (y + 1) / 9 * (BOARD_LEN - 30)) + 10:
                    self.row_wall_colors[y * 8 + x].a = 0.5
                if int(20 + (x + 1) / 9 * (BOARD_LEN - 30)) - 10 <= mouse_x <= int(20 + (x + 1) / 9 * (BOARD_LEN - 30)) and int(10 + (y + 0.5) / 9 * (BOARD_LEN - 30)) <= mouse_y < int(10 + (y + 1.5) / 9 * (BOARD_LEN - 30)):
                    self.column_wall_colors[y * 8 + x].a = 0.5
                if int(20 + (x + 1) / 9 * (BOARD_LEN - 30)) - 10 <= mouse_x <= int(20 + (x + 1) / 9 * (BOARD_LEN - 30)) and int(10 + (y + 1) / 9 * (BOARD_LEN - 30)) <= mouse_y <= int(10 + (y + 1) / 9 * (BOARD_LEN - 30)) + 10:
                    self.row_wall_colors[y * 8 + x].a = 0
                    self.column_wall_colors[y * 8 + x].a = 0

        for x in range(8):
            for y in range(8):
                if self.state.row_wall[x, y]:
                    self.row_wall_colors[(7 - y) * 8 + x].a = 1
                if self.state.column_wall[x, y]:
                    self.column_wall_colors[(7 - y) * 8 + x].a = 1

        #print(Window.mouse_pos)

    def on_touch_down(self, touch):
        global touched, action
        touched = True
        for x in range(9):
            for y in range(9):
                if int(20 + (x + 1) / 9 * (BOARD_LEN - 30)) - 10 <= touch.x <= int(20 + (x + 1) / 9 * (BOARD_LEN - 30)) and int(10 + (y + 1) / 9 * (BOARD_LEN - 30)) <= touch.y <= int(10 + (y + 1) / 9 * (BOARD_LEN - 30)) + 10:
                    continue
                if int(20 + (x + 0.5) / 9 * (BOARD_LEN - 30)) <= touch.x < int(20 + (x + 1.5) / 9 * (BOARD_LEN - 30)) and int(10 + (y + 1) / 9 * (BOARD_LEN - 30)) <= touch.y <= int(10 + (y + 1) / 9 * (BOARD_LEN - 30)) + 10:
                    action = chr(ord("a") + x) + str(8 - y) + "h"
                if int(20 + (x + 1) / 9 * (BOARD_LEN - 30)) - 10 <= touch.x <= int(20 + (x + 1) / 9 * (BOARD_LEN - 30)) and int(10 + (y + 0.5) / 9 * (BOARD_LEN - 30)) <= touch.y < int(10 + (y + 1.5) / 9 * (BOARD_LEN - 30)):
                    action = chr(ord("a") + x) + str(8 - y) + "v"
                if int(20 + x / 9 * (BOARD_LEN - 30)) <= touch.x < int(10 + (x + 1) / 9 * (BOARD_LEN - 30)) and int(20 + y / 9 * (BOARD_LEN - 30)) <= touch.y < int(10 + (y + 1) / 9 * (BOARD_LEN - 30)):
                    action = chr(ord("a") + x) + str(9 - y)

        #print(touch.x, touch.y)
        super(Quoridor, self).on_touch_down(touch)
Exemple #4
0
def generate_data(AIs, play_num, noise=0.1, display=False, equal_draw=True):
    data = []
    for i in range(play_num):
        state = State()
        AIs[0].init_prev()
        AIs[1].init_prev()
        featuress = [[], [], [], []]
        for i, b1, b2 in [(0, False, False), (1, True, False), (2, False, True), (3, True, True)]:
            featuress[i].append(state.feature_CNN(b1, b2))

        pis = []
        states = [state_copy(state)]
        while True:
            AIs[0].tau = np.random.rand() * (1. - TAU_MIN) + TAU_MIN
            AIs[1].tau = np.random.rand() * (1. - TAU_MIN) + TAU_MIN
            if state.turn >= 20:
                AIs[0].tau = TAU_MIN
                AIs[1].tau = TAU_MIN
            s, pi = AIs[0].act_and_get_pi(state, noise=noise)
            a = actionid2str(state, s)
            while not state.accept_action_str(a):
                print("this action is impossible")
                s, pi = AIs[0].act_and_get_pi(state)
                a = actionid2str(state, s)
            AIs[1].prev_action = s
            pis.append(pi)
            if display:
                state.display_cui()
            end = False
            for state2 in states:
                if equal_draw and state == state2:
                    end = True
                    break
            if end:
                break
            states.append(state_copy(state))
            if state.terminate:
                break
            for i, b1, b2 in [(0, False, False), (1, True, False), (2, False, True), (3, True, True)]:
                featuress[i].append(state.feature_CNN(b1, b2))
            s, pi = AIs[1].act_and_get_pi(state, noise=noise)
            a = actionid2str(state, s)
            while not state.accept_action_str(a):
                print("this action is impossible")
                s, pi = AIs[1].act_and_get_pi(state)
                a = actionid2str(state, s)
            AIs[0].prev_action = s
            pis.append(pi)
            if display:
                state.display_cui()
            end = False
            for state2 in states:
                if equal_draw and state == state2:
                    end = True
                    break
            if end:
                break
            states.append(state_copy(state))
            if state.terminate:
                break
            for i, b1, b2 in [(0, False, False), (1, True, False), (2, False, True), (3, True, True)]:
                featuress[i].append(state.feature_CNN(b1, b2))
        del states
        if state.reward == 0:
            continue
        for feature1, feature2, feature3, feature4, pi in zip(featuress[0], featuress[1], featuress[2], featuress[3], pis):
            data.append((feature1, pi, state.reward))
            a = np.flip(pi[:64].reshape((8, 8)), 0).flatten()
            b = np.flip(pi[64:128].reshape((8, 8)), 0).flatten()
            mvarray1 = pi[128:].reshape((3, 3))
            mvarray2 = np.zeros((3, 3))
            for y in [-1, 0, 1]:
                for x in [-1, 0, 1]:
                    mvarray2[x, y] = mvarray1[-x, y]
            c = mvarray2.flatten()
            data.append((feature2, np.concatenate([a, b, c]), state.reward))
            a = np.flip(pi[:64].reshape((8, 8)), 1).flatten()
            b = np.flip(pi[64:128].reshape((8, 8)), 1).flatten()
            mvarray1 = pi[128:].reshape((3, 3))
            mvarray2 = np.zeros((3, 3))
            for y in [-1, 0, 1]:
                for x in [-1, 0, 1]:
                    mvarray2[x, y] = mvarray1[x, -y]
            c = mvarray2.flatten()
            data.append((feature3, np.concatenate([a, b, c]), -state.reward))
            a = np.flip(np.flip(pi[:64].reshape((8, 8)), 1), 0).flatten()
            b = np.flip(np.flip(pi[64:128].reshape((8, 8)), 1), 0).flatten()
            mvarray1 = pi[128:].reshape((3, 3))
            mvarray2 = np.zeros((3, 3))
            for y in [-1, 0, 1]:
                for x in [-1, 0, 1]:
                    mvarray2[x, y] = mvarray1[-x, -y]
            c = mvarray2.flatten()
            data.append((feature4, np.concatenate([a, b, c]), -state.reward))

    return data