Example #1
0
    def testing(self):
        training = True

        while training:
            tree = Tree(self.curr_network)

            tree.black_root = Node(
                np.array([
                    0, 0, 0, 0, 5, 6, 7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 6, 7, 0, 0, 0, 0, 0, 0, 0,
                    0,
                    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 6, 7, 0, 0, 0, 0, 0, 0])
            )
            tree.black_root.build_actions()
            tree.set_probabilities(tree.black_root)

            tree.white_root = Node(np.array([
                    0, 0, 0, 0, 5, 6, 7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 6, 7, 0, 0, 0, 0, 0, 0, 0,
                    0,
                    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 6, 7, 0, 0, 0, 1, 0, 0])
            )
            tree.white_root.build_actions()
            tree.set_probabilities(tree.white_root)
            self.env.set_env(tree.black_root.state)
            done = False
            while not done:
                tree.noise = True

                for counter in range(0, 800):
                    tree.env.set_env(tree.black_root.state)
                    tree.test_simulations()

                black_action_edge = max(tree.black_root.edges,key=attrgetter('visits'))
                white_action_edge = max(tree.white_root.edges,key=attrgetter('visits'))
                black_mc_prob = self.build_mc_prob(tree.black_root)
                print([edge.visits for edge in tree.black_root.edges])
                white_mc_prob = self.build_mc_prob(tree.white_root)


                env_action = self.env.step(black_action_edge.action, white_action_edge.action)
                black_score, white_score = self.env.get_player_scores()
                black_state, white_state = self.build_states(tree.black_root.state, black_action_edge,
                                                             white_action_edge, env_action, black_score, white_score)
                black_root, white_root = self.find_roots(black_state, white_state, black_action_edge, white_action_edge)
                tree.black_root = black_root
                tree.white_root = white_root

                done = self.env.check_status()
                if done:
                    print(self.env.eval_game())
Example #2
0
    def eval_net(self):
        updated_net = 0
        old_net = 0
        draw =0

        for i in range (0,100):

            tree = Tree(self.learning_network)
            tree.init_tree()
            old_tree = Tree(self.curr_network)
            old_tree.init_tree()
            eval_env = Environment()
            done = False

            while not done:
                for x in range (0,400):
                    tree.env.set_env(tree.black_root.state)
                    old_tree.env.set_env(old_tree.black_root.state)
                    tree.double_agent_simulation()
                    old_tree.double_agent_simulation()
                action_edge = max(tree.black_root.edges,key=attrgetter('visits'))
                action = action_edge.action

                old_action_edge = max(old_tree.white_root.edges,key=attrgetter('visits'))
                old_action = old_action_edge.action

                env_action = eval_env.step(action,old_action)
                new,old =eval_env.get_player_scores()
                black_state,white_state = self.build_states(tree.black_root.state,action_edge,old_action_edge,env_action,new,old)

                learning_black_root = Node(black_state)
                learning_black_root.build_actions()
                self.set_probabilities(learning_black_root,self.learning_network)
                learning_white_root = Node(white_state)
                learning_white_root.build_actions()
                self.set_probabilities(learning_white_root,self.learning_network)
                tree.black_root = learning_black_root
                tree.white_root = learning_white_root

                curr_black_root = Node(black_state)
                curr_black_root.build_actions()
                self.set_probabilities(curr_black_root, self.curr_network)
                curr_white_root = Node(white_state)
                curr_white_root.build_actions()
                self.set_probabilities(curr_white_root, self.curr_network)
                old_tree.black_root = curr_black_root
                old_tree.white_root = curr_white_root

                done = eval_env.check_status()
            winner = eval_env.eval_game()
            if winner == 1 :
                updated_net +=1
                print("Net won this game " + str(updated_net) )
            elif winner == 0:
                draw +=1
                print("Draw " + str(draw))
            else:
                old_net +=1
                print("Old Net won. " + str(old_net))

        print("New Network won " + str(updated_net) + " matches against the old one.")
        print("Draws: "+str(draw))

        if updated_net / (updated_net + old_net) >= 0.54:
            torch.save(self.learning_network.state_dict(), self.dir)
            self.curr_network.load_state_dict(torch.load(self.dir))
            self.curr_network.eval()
            print("---Network updated---")
        else:
            self.learning_network.load_state_dict(torch.load(self.dir))
            self.learning_network.eval()