Beispiel #1
0
def evaluate(AIs, play_num, return_draw=False):
    wins = 0.
    draw_num = 0
    for i in range(play_num):
        state = State()
        AIs[0].init_prev()
        AIs[1].init_prev()
        AIs[i % 2].color = 0
        AIs[1 - i % 2].color = 1
        while True:
            s, pi = AIs[i % 2].act_and_get_pi(state)
            a = actionid2str(state, s)
            while not state.accept_action_str(a):
                print("this action is impossible")
                s, pi = AIs[i % 2].act_and_get_pi(state)
                a = actionid2str(state, s)
            AIs[1 - i % 2].prev_action = s

            if state.terminate:
                break

            s, pi = AIs[1 - i % 2].act_and_get_pi(state)
            a = actionid2str(state, s)
            while not state.accept_action_str(a):
                print("this action is impossible")
                s, pi = AIs[1 - i % 2].act_and_get_pi(state)
                a = actionid2str(state, s)
            AIs[i % 2].prev_action = s

            if state.terminate:
                break

        if i % 2 == 0 and state.reward == 1:
            wins += 1.
        elif i % 2 == 1 and state.reward == -1:
            wins += 1.
        elif state.reward == 0:
            wins += 0.5
            draw_num += 1
        sys.stderr.write('\r\033[K {}win/{}'.format(i + 1 - wins, i + 1))
        sys.stderr.flush()
    print("")
    AIs[0].color = 0
    AIs[1].color = 1

    if return_draw:
        return wins, draw_num
    else:
        return wins
Beispiel #2
0
def normal_play(agents):
    state = State()
    while True:
        state.display_cui()
        start = time.time()
        s = agents[0].act(state, showNQ=True)
        end = time.time()
        print(end - start)
        if isinstance(s, int):
            a = actionid2str(state, s)
        else:
            a = s
        while not state.accept_action_str(a):
            print(a)
            print("this action is impossible")
            s = agents[0].act(state, showNQ=True)
            if isinstance(s, int):
                a = actionid2str(state, s)
            else:
                a = s
        agents[1].prev_action = s

        if state.terminate:
            break
        #time.sleep(0.1)

        state.display_cui()
        s = agents[1].act(state, showNQ=True)
        if isinstance(s, int):
            a = actionid2str(state, s)
        else:
            a = s
        while not state.accept_action_str(a):
            print(a)
            print("this action is impossible")
            s = agents[1].act(state, showNQ=True)
            if isinstance(s, int):
                a = actionid2str(state, s)
            else:
                a = s
        agents[0].prev_action = s

        #time.sleep(0.1)
        if state.terminate:
            break

    state.display_cui()
    print("The game finished. reward=({}, {})".format(state.reward, -state.reward))
Beispiel #3
0
def get_graphviz_tree(tree, g, count=0, threshold=5):
    if len(tree.children.items()) == 0:
        g.node(str(count), label="0")
    else:
        parent_count = count
        g.node(str(parent_count),
               label=str(int(np.sum(tree.N))) + os.linesep +
               "{:.3f}".format(np.sum(tree.W) / np.sum(tree.N)))
        count += 1
        for key, value in tree.children.items():
            if int(tree.N[key]) >= threshold:
                g.edge(str(parent_count),
                       str(count),
                       label=actionid2str(tree.s, key) + os.linesep +
                       str(int(tree.N[key])))
                get_graphviz_tree(value, g, count)
            count += int(np.sum(value.N)) + 1
Beispiel #4
0
    def oneturn(self, color):
        global touched
        s = self.agents[color].act(self.state)
        if isinstance(self.agents[color], CNNAI):
            g = self.agents[color].get_tree_for_graphviz()
            g.render(os.path.join("game_trees", "game_tree{}".format(self.state.turn)))
        if s == -1:
            return

        if isinstance(s, int):
            a = actionid2str(self.state, s)
        else:
            a = s
        if not self.state.accept_action_str(a):
            print(a)
            print("this action is impossible")
            return
        self.agents[1 - color].prev_action = s
        self.state.display_cui()
        print(self.state.get_player_dist_from_goal())
        touched = False
Beispiel #5
0
    def MCTS(self,
             state,
             max_node,
             C_puct,
             tau,
             showNQ=False,
             noise=0.,
             random_flip=False):
        p = self.p(state)
        illegal = (p == 0.)
        old_p = p
        p = (1. - noise) * p + noise * np.random.rand(len(p))
        p[illegal] = 0.
        p = p / sum(p)
        #root_tree = Tree(state, p)
        root_tree = self.prev_tree
        root_tree.s = state
        if self.prev_action is not None:
            if self.prev_action in root_tree.children.keys():
                root_tree = root_tree.children[self.prev_action]
            else:
                root_tree = Tree(state, p)
        root_tree.P = p

        node_num = np.sum(root_tree.N)
        while node_num < max_node:
            # select
            nodess = []
            actionss = []
            for j in range(min(self.n_parallel, max_node)):
                _, _, nodes, actions = self.select(root_tree, C_puct)
                if nodes is None:
                    break
                nodess.append(nodes)
                actionss.append(actions)

                # virtual loss
                for node, action in zip(nodes, actions):
                    node.N[action] += self.virtual_loss_n
                    node.W[action] -= self.virtual_loss_n
                    node.Q[action] = node.W[action] / node.N[action]
            for nodes, actions in zip(nodess, actionss):
                # virtual lossを元に戻す
                for node, action in zip(nodes, actions):
                    node.N[action] -= self.virtual_loss_n
                    node.W[action] += self.virtual_loss_n
                    if node.N[action] == 0:
                        node.Q[action] = 0.
                    else:
                        node.Q[action] = node.W[action] / node.N[action]

            states = []
            for nodes, actions in zip(nodess, actionss):
                s = state_copy(nodes[-1].s)
                #print([self.actionid2str(node.s, action) for node, action in zip(nodes, actions)])
                s.accept_action_str(actionid2str(s, actions[-1]))
                states.append(s)
            node_num += len(states)

            p = self.p_array(states, random_flip=random_flip)
            v = self.v_array(states, random_flip=random_flip)

            for nodes2, actions2 in zip(nodess, actionss):
                pass
                #print([self.actionid2str(node.s, action) for node, action in zip(nodes2, actions2)])
            #print("")

            count = 0
            for s, nodes, actions in zip(states, nodess, actionss):
                if not s.terminate:
                    t = nodes[-1]
                    a = actions[-1]
                    if a not in t.children.keys():
                        t.children[a] = Tree(s, p[count])
                count += 1

            # backup
            count = 0
            for nodes, actions in zip(nodess, actionss):
                for node, action in zip(nodes, actions):
                    node.N[action] += 1
                    node.W[action] += v[count]
                    node.Q[action] = node.W[action] / node.N[action]
                count += 1
        if showNQ:
            print("p=")
            self.display_parameter(np.asarray(old_p * 1000, dtype="int32"))
            print("N=")
            self.display_parameter(np.asarray(root_tree.N, dtype="int32"))
            print("Q=")
            self.display_parameter(
                np.asarray(root_tree.Q * 1000, dtype="int32"))
            print("v={}".format(self.v(root_tree.s)))

        if tau == 0:
            action = np.argmax(root_tree.N)
        else:
            N2 = np.power(np.asarray(root_tree.N, dtype="float64"), 1. / tau)
            pi = N2 / np.sum(N2)
            action = np.random.choice(len(pi), p=pi)
        # 葉に向う行動は勝ちになる行動のみ
        if action in root_tree.children.keys():
            self.prev_tree = root_tree.children[action]
        action2 = np.argmax(root_tree.N)
        pi_ret = np.zeros((137, ))
        pi_ret[action2] = 1.
        return action, root_tree.N / np.sum(root_tree.N)
Beispiel #6
0
def generate_data(AIs, play_num, noise=0.1, display=False, equal_draw=True):
    data = []
    for i in range(play_num):
        state = State()
        AIs[0].init_prev()
        AIs[1].init_prev()
        featuress = [[], [], [], []]
        for i, b1, b2 in [(0, False, False), (1, True, False), (2, False, True), (3, True, True)]:
            featuress[i].append(state.feature_CNN(b1, b2))

        pis = []
        states = [state_copy(state)]
        while True:
            AIs[0].tau = np.random.rand() * (1. - TAU_MIN) + TAU_MIN
            AIs[1].tau = np.random.rand() * (1. - TAU_MIN) + TAU_MIN
            if state.turn >= 20:
                AIs[0].tau = TAU_MIN
                AIs[1].tau = TAU_MIN
            s, pi = AIs[0].act_and_get_pi(state, noise=noise)
            a = actionid2str(state, s)
            while not state.accept_action_str(a):
                print("this action is impossible")
                s, pi = AIs[0].act_and_get_pi(state)
                a = actionid2str(state, s)
            AIs[1].prev_action = s
            pis.append(pi)
            if display:
                state.display_cui()
            end = False
            for state2 in states:
                if equal_draw and state == state2:
                    end = True
                    break
            if end:
                break
            states.append(state_copy(state))
            if state.terminate:
                break
            for i, b1, b2 in [(0, False, False), (1, True, False), (2, False, True), (3, True, True)]:
                featuress[i].append(state.feature_CNN(b1, b2))
            s, pi = AIs[1].act_and_get_pi(state, noise=noise)
            a = actionid2str(state, s)
            while not state.accept_action_str(a):
                print("this action is impossible")
                s, pi = AIs[1].act_and_get_pi(state)
                a = actionid2str(state, s)
            AIs[0].prev_action = s
            pis.append(pi)
            if display:
                state.display_cui()
            end = False
            for state2 in states:
                if equal_draw and state == state2:
                    end = True
                    break
            if end:
                break
            states.append(state_copy(state))
            if state.terminate:
                break
            for i, b1, b2 in [(0, False, False), (1, True, False), (2, False, True), (3, True, True)]:
                featuress[i].append(state.feature_CNN(b1, b2))
        del states
        if state.reward == 0:
            continue
        for feature1, feature2, feature3, feature4, pi in zip(featuress[0], featuress[1], featuress[2], featuress[3], pis):
            data.append((feature1, pi, state.reward))
            a = np.flip(pi[:64].reshape((8, 8)), 0).flatten()
            b = np.flip(pi[64:128].reshape((8, 8)), 0).flatten()
            mvarray1 = pi[128:].reshape((3, 3))
            mvarray2 = np.zeros((3, 3))
            for y in [-1, 0, 1]:
                for x in [-1, 0, 1]:
                    mvarray2[x, y] = mvarray1[-x, y]
            c = mvarray2.flatten()
            data.append((feature2, np.concatenate([a, b, c]), state.reward))
            a = np.flip(pi[:64].reshape((8, 8)), 1).flatten()
            b = np.flip(pi[64:128].reshape((8, 8)), 1).flatten()
            mvarray1 = pi[128:].reshape((3, 3))
            mvarray2 = np.zeros((3, 3))
            for y in [-1, 0, 1]:
                for x in [-1, 0, 1]:
                    mvarray2[x, y] = mvarray1[x, -y]
            c = mvarray2.flatten()
            data.append((feature3, np.concatenate([a, b, c]), -state.reward))
            a = np.flip(np.flip(pi[:64].reshape((8, 8)), 1), 0).flatten()
            b = np.flip(np.flip(pi[64:128].reshape((8, 8)), 1), 0).flatten()
            mvarray1 = pi[128:].reshape((3, 3))
            mvarray2 = np.zeros((3, 3))
            for y in [-1, 0, 1]:
                for x in [-1, 0, 1]:
                    mvarray2[x, y] = mvarray1[-x, -y]
            c = mvarray2.flatten()
            data.append((feature4, np.concatenate([a, b, c]), -state.reward))

    return data
Beispiel #7
0
    def MCTS(self,
             state,
             max_node,
             C_puct,
             tau,
             showNQ=False,
             noise=0.,
             random_flip=False):
        # 壁がお互いになく、分岐のない場合読みを入れない。ただし、それでもprev_treeとかの関係上振る舞いが変わるので保留中。
        #search_node_num = max_node
        #if state.black_walls == 0 and state.white_walls == 0:
        #    x, y = state.color_p(state.turn % 2)
        #    if int(np.sum(state.movable_array(x, y, shortest_only=True))) == 1:
        #        search_node_num = 1
        p = self.p(state)
        illegal = (p == 0.)
        old_p = p
        p = (1. - noise) * p + noise * np.random.rand(len(p))
        p[illegal] = 0.
        p = p / sum(p)
        #root_tree = Tree(state, p)
        root_tree = self.prev_tree
        root_tree.s = state
        if self.prev_action is not None:
            if self.prev_action in root_tree.children.keys():
                root_tree = root_tree.children[self.prev_action]
            else:
                root_tree = Tree(state, p)
        root_tree.P = p

        node_num = np.sum(root_tree.N)
        while node_num < max_node:
            # select
            nodess = []
            actionss = []
            for j in range(min(self.n_parallel, max_node)):
                _, _, nodes, actions = self.select(root_tree, C_puct)
                if nodes is None:
                    break
                nodess.append(nodes)
                actionss.append(actions)

                # virtual loss
                for node, action in zip(nodes, actions):
                    node.N[action] += self.virtual_loss_n
                    if self.color == node.s.turn % 2:  # 先後でQがひっくり返ることを考慮
                        node.W[action] -= self.virtual_loss_n
                    else:
                        node.W[action] += self.virtual_loss_n
                    node.Q[action] = node.W[action] / node.N[action]
            for nodes, actions in zip(nodess, actionss):
                # virtual lossを元に戻す
                for node, action in zip(nodes, actions):
                    node.N[action] -= self.virtual_loss_n
                    if self.color == node.s.turn % 2:
                        node.W[action] += self.virtual_loss_n
                    else:
                        node.W[action] -= self.virtual_loss_n
                    if node.N[action] == 0:
                        node.Q[action] = 0.
                    else:
                        node.Q[action] = node.W[action] / node.N[action]

            states = []
            for nodes, actions in zip(nodess, actionss):
                s = state_copy(nodes[-1].s)
                #print([self.actionid2str(node.s, action) for node, action in zip(nodes, actions)])
                s.accept_action_str(actionid2str(s, actions[-1]))
                states.append(s)
            node_num += len(states)

            #p = self.p_array(states, random_flip=random_flip)
            v = self.v_array(states, random_flip=random_flip)

            for nodes2, actions2 in zip(nodess, actionss):
                pass
                #print([self.actionid2str(node.s, action) for node, action in zip(nodes2, actions2)])
            #print("")

            count = 0
            for s, nodes, actions in zip(states, nodess, actionss):
                if not s.terminate:
                    t = nodes[-1]
                    a = actions[-1]
                    if a not in t.children.keys():
                        t.children[a] = Tree(s, None)
                count += 1

            # backup
            count = 0
            for nodes, actions in zip(nodess, actionss):
                for node, action in zip(nodes, actions):
                    node.N[action] += 1
                    node.W[action] += v[count]
                    node.Q[action] = node.W[action] / node.N[action]
                count += 1
        if showNQ:
            print("p=")
            self.display_parameter(np.asarray(old_p * 1000, dtype="int32"))
            print("N=")
            self.display_parameter(np.asarray(root_tree.N, dtype="int32"))
            print("Q=")
            self.display_parameter(
                np.asarray(root_tree.Q * 1000, dtype="int32"))
            print("v={}".format(self.v(root_tree.s)))

        if tau == 0:
            N2 = root_tree.N * (root_tree.N == np.max(root_tree.N))
        else:
            N2 = np.power(np.asarray(root_tree.N, dtype="float64"), 1. / tau)
        pi = N2 / np.sum(N2)
        action = np.random.choice(len(pi), p=pi)
        # 葉に向う行動は勝ちになる行動のみ
        if action in root_tree.children.keys():
            self.prev_tree = root_tree.children[action]
        action2 = np.argmax(root_tree.N)
        pi_ret = np.zeros((137, ))
        pi_ret[action2] = 1.
        self.tree_for_visualize = root_tree
        return action, root_tree.N / np.sum(root_tree.N)