def play_deterministic_game(self, starting_intgamestate, thislogits, thisxnode, otherlogits, otherxnode, thisSess, otherSess): self.input_tensor.fill(0) black_groups = unionfind() white_groups = unionfind() turn = HexColor.BLACK intgamestate = [] for imove in starting_intgamestate: black_groups, white_groups = GameCheck.updateUF(intgamestate, black_groups, white_groups, imove, turn, self.boardsize) turn = HexColor.EMPTY - turn intgamestate.append(imove) game_status = GameCheck.winner(black_groups, white_groups) empty_points = [] for i in range(self.boardsize * self.boardsize): if i not in intgamestate: empty_points.append(i) aux_player_color=np.random.randint(HexColor.BLACK, HexColor.EMPTY) assert aux_player_color == 1 or aux_player_color == 2 first_player=turn while game_status == HexColor.EMPTY: self.input_tensor.fill(0) self.input_tensor_builder.set_position_tensors_in_batch(self.input_tensor, 0, intgamestate) if aux_player_color != turn: logits_score = thisSess.run(thislogits, feed_dict={thisxnode: self.input_tensor}) else: logits_score = otherSess.run(otherlogits, feed_dict={otherxnode: self.input_tensor}) if turn == first_player: logits_score = np.squeeze(logits_score) best_action=-1 largest_score=0 for action in empty_points: if best_action == -1: largest_score = logits_score[action] best_action = action elif logits_score[action] > largest_score: largest_score=logits_score[action] best_action = action selected_int_move = best_action else: selected_int_move = softmax_selection(logits_score, empty_points) black_groups, white_groups = GameCheck.updateUF(intgamestate, black_groups, white_groups, selected_int_move, turn, self.boardsize) game_status = GameCheck.winner(black_groups, white_groups) intgamestate.append(selected_int_move) empty_points.remove(selected_int_move) turn = HexColor.EMPTY - turn reward = 0.25 + 1.0/len(intgamestate) if game_status == HexColor.BLACK else -1.0/len(intgamestate) - 0.25 #print('played one game') return intgamestate, reward
def playonegame(self, sess, logits, boardsize, x_input_node, starting_intgamestate): self.input_tensor.fill(0) black_groups = unionfind() white_groups = unionfind() turn = HexColor.BLACK intgamestate = [] for imove in starting_intgamestate: black_groups, white_groups = GameCheck.updateUF( intgamestate, black_groups, white_groups, imove, turn, boardsize) turn = HexColor.EMPTY - turn intgamestate.append(imove) whoplayedlastmove = HexColor.BLACK if len( intgamestate) % 2 == 1 else HexColor.WHITE game_status = GameCheck.winner(black_groups, white_groups) empty_points = [] for i in range(boardsize * boardsize): if i not in intgamestate: empty_points.append(i) while game_status == HexColor.EMPTY: self.input_tensor_builder.set_position_tensors_in_batch( self.input_tensor, 0, intgamestate) logits_score = sess.run( logits, feed_dict={x_input_node: self.input_tensor}) selected_int_move = softmax_selection(logits_score, empty_points) black_groups, white_groups = GameCheck.updateUF( intgamestate, black_groups, white_groups, selected_int_move, turn, boardsize) game_status = GameCheck.winner(black_groups, white_groups) intgamestate.append(selected_int_move) empty_points.remove(selected_int_move) turn = HexColor.EMPTY - turn reward = 1.0 if game_status == whoplayedlastmove else -1.0 #print('played one game') return reward
def playbatchgame(self, sess, logits, boardsize, batchsize, x_input_node, topk, is_adversarial_pg=False): intmoveseqlist = [] gameresultlist = [] batch_cnt = 0 while batch_cnt < batchsize: self.input_tensor.fill(0) black_groups = unionfind() white_groups = unionfind() turn = HexColor.BLACK intgamestate = [] game_status = HexColor.EMPTY k = np.random.randint(1, 20) cnt = 0 empty_points = [] for i in range(boardsize * boardsize): if i not in intgamestate: empty_points.append(i) while game_status == HexColor.EMPTY and cnt < k: self.input_tensor_builder.set_position_tensors_in_batch( self.input_tensor, 0, intgamestate) logits_score = sess.run( logits, feed_dict={x_input_node: self.input_tensor}) selected_int_move = softmax_selection(logits_score, empty_points, temperature=5.0) black_groups, white_groups = GameCheck.updateUF( intgamestate, black_groups, white_groups, selected_int_move, turn, boardsize) game_status = GameCheck.winner(black_groups, white_groups) intgamestate.append(selected_int_move) empty_points.remove(selected_int_move) turn = HexColor.EMPTY - turn cnt += 1 if game_status != HexColor.EMPTY: print('wasted!') continue intmoveseqlist.append(intgamestate) if is_adversarial_pg: self.input_tensor_builder.set_position_tensors_in_batch( self.input_tensor, 0, intgamestate) logits_score = sess.run( logits, feed_dict={x_input_node: self.input_tensor}) logits_score = np.squeeze(logits_score) top_points = np.argpartition(-logits_score, kth=topk)[:topk] top_points = top_points.tolist() for i in top_points: if i not in empty_points: top_points.remove(i) if len(top_points) == 0: top_points = np.random.choice(empty_points, topk) min_reward = 2.0 for i in top_points: intgamestate.append(i) reward = self.playonegame( sess, logits, boardsize, x_input_node, starting_intgamestate=intgamestate) reward = -reward min_reward = min(reward, min_reward) intgamestate.remove(i) gameresultlist.append(min_reward) else: reward = self.playonegame(sess, logits, boardsize, x_input_node, starting_intgamestate=intgamestate) gameresultlist.append(reward) batch_cnt += 1 return intmoveseqlist, gameresultlist