コード例 #1
0
ファイル: policy_gradient3.py プロジェクト: vipmath/hexse
    def play_deterministic_game(self, starting_intgamestate, thislogits, thisxnode, otherlogits, otherxnode, thisSess, otherSess):
        self.input_tensor.fill(0)
        black_groups = unionfind()
        white_groups = unionfind()
        turn = HexColor.BLACK
        intgamestate = []
        for imove in starting_intgamestate:
            black_groups, white_groups = GameCheck.updateUF(intgamestate, black_groups, white_groups,
                                                            imove, turn, self.boardsize)
            turn = HexColor.EMPTY - turn
            intgamestate.append(imove)

        game_status = GameCheck.winner(black_groups, white_groups)
        empty_points = []
        for i in range(self.boardsize * self.boardsize):
            if i not in intgamestate:
                empty_points.append(i)
        aux_player_color=np.random.randint(HexColor.BLACK, HexColor.EMPTY)
        assert aux_player_color == 1 or aux_player_color == 2
        first_player=turn
        while game_status == HexColor.EMPTY:
            self.input_tensor.fill(0)
            self.input_tensor_builder.set_position_tensors_in_batch(self.input_tensor, 0, intgamestate)
            if aux_player_color != turn:
                logits_score = thisSess.run(thislogits, feed_dict={thisxnode: self.input_tensor})
            else:
                logits_score = otherSess.run(otherlogits, feed_dict={otherxnode: self.input_tensor})
            if turn == first_player:
                logits_score = np.squeeze(logits_score)
                best_action=-1
                largest_score=0
                for action in empty_points:
                    if best_action == -1:
                        largest_score = logits_score[action]
                        best_action = action
                    elif logits_score[action] > largest_score:
                        largest_score=logits_score[action]
                        best_action = action
                selected_int_move = best_action
            else:
                selected_int_move = softmax_selection(logits_score, empty_points)
            black_groups, white_groups = GameCheck.updateUF(intgamestate, black_groups, white_groups,
                                                            selected_int_move, turn, self.boardsize)
            game_status = GameCheck.winner(black_groups, white_groups)
            intgamestate.append(selected_int_move)
            empty_points.remove(selected_int_move)
            turn = HexColor.EMPTY - turn

        reward = 0.25 + 1.0/len(intgamestate) if game_status == HexColor.BLACK else -1.0/len(intgamestate) - 0.25
        #print('played one game')
        return intgamestate, reward
コード例 #2
0
def run_single_match(black_agent,
                     white_agent,
                     boardsize,
                     opening='',
                     verbose=False):
    game = []
    black_agent.sendCommand("clear_board")
    white_agent.sendCommand("clear_board")
    black_groups = unionfind()
    white_groups = unionfind()
    turn = HexColor.BLACK
    if opening:
        int_move = MoveConvert.raw_move_to_int_move(opening, boardsize)
        game.append(int_move)
        turn = HexColor.WHITE
        white_agent.play_black(opening)
        black_agent.play_black(opening)
        black_groups, white_groups = GameCheck.updateUF(
            game, black_groups, white_groups, int_move, turn, boardsize)

    game_status = HexColor.EMPTY
    while game_status == HexColor.EMPTY:
        if turn == HexColor.BLACK:
            move = black_agent.genmove_black()
            if move == "resign":
                print("black resign")
                print(state_to_str(game, boardsize))
                return HexColor.WHITE
            white_agent.play_black(move)
        else:
            move = white_agent.genmove_white()
            if move == "resign":
                print("white resign")
                print(state_to_str(game, boardsize))
                return HexColor.BLACK
            black_agent.play_white(move)
        int_move = MoveConvert.raw_move_to_int_move(move, boardsize)
        black_groups, white_groups = GameCheck.updateUF(
            game, black_groups, white_groups, int_move, turn, boardsize)
        game_status = GameCheck.winner(black_groups, white_groups)
        game.append(int_move)
        if verbose:
            print(state_to_str(game, boardsize))
        turn = HexColor.EMPTY - turn
        sys.stdout.flush()
    print("gamestatus", game_status)
    print(state_to_str(game, boardsize))
    return game_status
コード例 #3
0
ファイル: policygradient.py プロジェクト: cgao3/pnnhex
    def play_one_batch_games(self, sess, otherSess, thisLogit, otherLogit, data_node, batch_game_size, batch_reward):

        this_win_count=0
        other_win_count=0

        this_player=random.randint(1,2)
        games=[]
        for ind in range(batch_game_size):
            self.board_tensor.fill(0)
            RLTensorUtil.makeTensorInBatch(self.board_tensor,0,[])
            currentplayer = HexColor.BLACK
            gamestatus = HexColor.EMPTY
            black_group = unionfind()
            white_group = unionfind()
            count = 0
            moves=[]
            while (gamestatus == HexColor.EMPTY):
                if (currentplayer == this_player):
                    logit = sess.run(thisLogit, feed_dict={data_node: self.board_tensor})
                else:
                    logit = otherSess.run(otherLogit, feed_dict={data_node: self.board_tensor})
                action = softmax_selection(logit, moves)
                RLTensorUtil.makeTensorInBatch(self.board_tensor, 0, moves)
                #update_tensor(self.board_tensor, currentplayer, action)
                #black_group, white_group = update_unionfind(action, currentplayer, moves, black_group, white_group)
                black_group, white_group = GameCheckUtil.updateUF(moves,black_group,white_group, action, currentplayer)
                currentplayer = HexColor.EMPTY - currentplayer
                #gamestatus = winner(black_group, white_group)
                gamestatus=GameCheckUtil.winner(black_group,white_group)
                moves.append(action)
                count += 1
                #print(count, "action ", action)
            if(gamestatus == this_player): this_win_count += 1
            else: other_win_count += 1
            #print("steps ", count, "gamestatus ", gamestatus)
            R = 1.0/count if gamestatus == this_player else -1.0/count
            games.append([-1] + moves) #first hypothesisted action is -1
            batch_reward[ind]=R

        print("this player win: ", this_win_count, "other player win: ", other_win_count)
        return (games, this_win_count, other_win_count)
コード例 #4
0
    def playonegame(self, sess, logits, boardsize, x_input_node,
                    starting_intgamestate):
        self.input_tensor.fill(0)
        black_groups = unionfind()
        white_groups = unionfind()
        turn = HexColor.BLACK
        intgamestate = []
        for imove in starting_intgamestate:
            black_groups, white_groups = GameCheck.updateUF(
                intgamestate, black_groups, white_groups, imove, turn,
                boardsize)
            turn = HexColor.EMPTY - turn
            intgamestate.append(imove)

        whoplayedlastmove = HexColor.BLACK if len(
            intgamestate) % 2 == 1 else HexColor.WHITE
        game_status = GameCheck.winner(black_groups, white_groups)
        empty_points = []
        for i in range(boardsize * boardsize):
            if i not in intgamestate:
                empty_points.append(i)
        while game_status == HexColor.EMPTY:
            self.input_tensor_builder.set_position_tensors_in_batch(
                self.input_tensor, 0, intgamestate)
            logits_score = sess.run(
                logits, feed_dict={x_input_node: self.input_tensor})
            selected_int_move = softmax_selection(logits_score, empty_points)
            black_groups, white_groups = GameCheck.updateUF(
                intgamestate, black_groups, white_groups, selected_int_move,
                turn, boardsize)
            game_status = GameCheck.winner(black_groups, white_groups)
            intgamestate.append(selected_int_move)
            empty_points.remove(selected_int_move)
            turn = HexColor.EMPTY - turn

        reward = 1.0 if game_status == whoplayedlastmove else -1.0
        #print('played one game')
        return reward
コード例 #5
0
ファイル: tournament.py プロジェクト: cgao3/pnnhex
def run_single_match(black_agent, white_agent, verbose=False):
    game=[]
    black_agent.sendCommand("clear_board")
    white_agent.sendCommand("clear_board")
    black_groups=unionfind()
    white_groups=unionfind()
    turn=HexColor.BLACK
    gamestatus=HexColor.EMPTY
    while gamestatus==HexColor.EMPTY:
        if turn==HexColor.BLACK:
            move = black_agent.genmove_black()
            if move == "resign":
                print("black resign")
                print(state_to_str(game))
                return 1
            white_agent.play_black(move)
        else:
            move=white_agent.genmove_white()
            if move=="resign":
                print("white resign")
                print(state_to_str(game))
                return 0
            black_agent.play_white(move)
        #imove=raw_move_to_int(move)
        imove=MoveConvertUtil.rawMoveToIntMove(move)
        black_groups, white_groups = GameCheckUtil.updateUF(game, black_groups, white_groups, imove, turn)
        #black_groups, white_groups=update_unionfind(imove, turn, game, black_groups, white_groups)
        #gamestatus=winner(black_groups,white_groups)
        gamestatus=GameCheckUtil.winner(black_groups,white_groups)
        game.append(imove)
        if verbose:
            print(state_to_str(game))
        turn=HexColor.EMPTY-turn
        sys.stdout.flush()
    print("gamestatus", gamestatus)
    print(state_to_str(game))
    return gamestatus
コード例 #6
0
    def playbatchgame(self,
                      sess,
                      logits,
                      boardsize,
                      batchsize,
                      x_input_node,
                      topk,
                      is_adversarial_pg=False):
        intmoveseqlist = []
        gameresultlist = []
        batch_cnt = 0
        while batch_cnt < batchsize:
            self.input_tensor.fill(0)
            black_groups = unionfind()
            white_groups = unionfind()
            turn = HexColor.BLACK
            intgamestate = []
            game_status = HexColor.EMPTY
            k = np.random.randint(1, 20)
            cnt = 0
            empty_points = []
            for i in range(boardsize * boardsize):
                if i not in intgamestate:
                    empty_points.append(i)
            while game_status == HexColor.EMPTY and cnt < k:
                self.input_tensor_builder.set_position_tensors_in_batch(
                    self.input_tensor, 0, intgamestate)
                logits_score = sess.run(
                    logits, feed_dict={x_input_node: self.input_tensor})

                selected_int_move = softmax_selection(logits_score,
                                                      empty_points,
                                                      temperature=5.0)
                black_groups, white_groups = GameCheck.updateUF(
                    intgamestate, black_groups, white_groups,
                    selected_int_move, turn, boardsize)
                game_status = GameCheck.winner(black_groups, white_groups)
                intgamestate.append(selected_int_move)
                empty_points.remove(selected_int_move)
                turn = HexColor.EMPTY - turn
                cnt += 1
            if game_status != HexColor.EMPTY:
                print('wasted!')
                continue
            intmoveseqlist.append(intgamestate)
            if is_adversarial_pg:
                self.input_tensor_builder.set_position_tensors_in_batch(
                    self.input_tensor, 0, intgamestate)
                logits_score = sess.run(
                    logits, feed_dict={x_input_node: self.input_tensor})
                logits_score = np.squeeze(logits_score)
                top_points = np.argpartition(-logits_score, kth=topk)[:topk]
                top_points = top_points.tolist()
                for i in top_points:
                    if i not in empty_points:
                        top_points.remove(i)
                if len(top_points) == 0:
                    top_points = np.random.choice(empty_points, topk)
                min_reward = 2.0
                for i in top_points:
                    intgamestate.append(i)
                    reward = self.playonegame(
                        sess,
                        logits,
                        boardsize,
                        x_input_node,
                        starting_intgamestate=intgamestate)
                    reward = -reward
                    min_reward = min(reward, min_reward)
                    intgamestate.remove(i)
                gameresultlist.append(min_reward)
            else:
                reward = self.playonegame(sess,
                                          logits,
                                          boardsize,
                                          x_input_node,
                                          starting_intgamestate=intgamestate)
                gameresultlist.append(reward)
            batch_cnt += 1
        return intmoveseqlist, gameresultlist