def testing(self): training = True while training: tree = Tree(self.curr_network) tree.black_root = Node( np.array([ 0, 0, 0, 0, 5, 6, 7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 6, 7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 6, 7, 0, 0, 0, 0, 0, 0]) ) tree.black_root.build_actions() tree.set_probabilities(tree.black_root) tree.white_root = Node(np.array([ 0, 0, 0, 0, 5, 6, 7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 6, 7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 6, 7, 0, 0, 0, 1, 0, 0]) ) tree.white_root.build_actions() tree.set_probabilities(tree.white_root) self.env.set_env(tree.black_root.state) done = False while not done: tree.noise = True for counter in range(0, 800): tree.env.set_env(tree.black_root.state) tree.test_simulations() black_action_edge = max(tree.black_root.edges,key=attrgetter('visits')) white_action_edge = max(tree.white_root.edges,key=attrgetter('visits')) black_mc_prob = self.build_mc_prob(tree.black_root) print([edge.visits for edge in tree.black_root.edges]) white_mc_prob = self.build_mc_prob(tree.white_root) env_action = self.env.step(black_action_edge.action, white_action_edge.action) black_score, white_score = self.env.get_player_scores() black_state, white_state = self.build_states(tree.black_root.state, black_action_edge, white_action_edge, env_action, black_score, white_score) black_root, white_root = self.find_roots(black_state, white_state, black_action_edge, white_action_edge) tree.black_root = black_root tree.white_root = white_root done = self.env.check_status() if done: print(self.env.eval_game())
def eval_net(self): updated_net = 0 old_net = 0 draw =0 for i in range (0,100): tree = Tree(self.learning_network) tree.init_tree() old_tree = Tree(self.curr_network) old_tree.init_tree() eval_env = Environment() done = False while not done: for x in range (0,400): tree.env.set_env(tree.black_root.state) old_tree.env.set_env(old_tree.black_root.state) tree.double_agent_simulation() old_tree.double_agent_simulation() action_edge = max(tree.black_root.edges,key=attrgetter('visits')) action = action_edge.action old_action_edge = max(old_tree.white_root.edges,key=attrgetter('visits')) old_action = old_action_edge.action env_action = eval_env.step(action,old_action) new,old =eval_env.get_player_scores() black_state,white_state = self.build_states(tree.black_root.state,action_edge,old_action_edge,env_action,new,old) learning_black_root = Node(black_state) learning_black_root.build_actions() self.set_probabilities(learning_black_root,self.learning_network) learning_white_root = Node(white_state) learning_white_root.build_actions() self.set_probabilities(learning_white_root,self.learning_network) tree.black_root = learning_black_root tree.white_root = learning_white_root curr_black_root = Node(black_state) curr_black_root.build_actions() self.set_probabilities(curr_black_root, self.curr_network) curr_white_root = Node(white_state) curr_white_root.build_actions() self.set_probabilities(curr_white_root, self.curr_network) old_tree.black_root = curr_black_root old_tree.white_root = curr_white_root done = eval_env.check_status() winner = eval_env.eval_game() if winner == 1 : updated_net +=1 print("Net won this game " + str(updated_net) ) elif winner == 0: draw +=1 print("Draw " + str(draw)) else: old_net +=1 print("Old Net won. " + str(old_net)) print("New Network won " + str(updated_net) + " matches against the old one.") print("Draws: "+str(draw)) if updated_net / (updated_net + old_net) >= 0.54: torch.save(self.learning_network.state_dict(), self.dir) self.curr_network.load_state_dict(torch.load(self.dir)) self.curr_network.eval() print("---Network updated---") else: self.learning_network.load_state_dict(torch.load(self.dir)) self.learning_network.eval()