Ejemplo n.º 1
0
 def play_later():
     result = self.move_to(stone, to_)
     if opp_q is not None:
         opp_valid = rule.valid_action(self.board(), -player)
         self.show_qtext(opp_q, opp_valid, hide=False)
     if result == rule.ACCQUIRE:
         # 对手走棋
         self.switch_player_and_play()
     elif result == rule.WIN:
         logger.info('GAME OVER, WINNER IS %s', stone.player.name)
         self.game_over(stone.player)
Ejemplo n.º 2
0
 def probabilities(self, board, player):
     x = self.feature_1st(board, player)
     valid = rule.valid_action(board, player)
     x = np.array([x])
     p = self.model.predict(x)[0]
     p = p.reshape(5, 5, 4)
     vp = p * valid  # 所有可能走法的概率
     if vp.max() == 0:
         vp = valid / valid.sum()
     else:
         vp = vp / vp.sum()
     return vp
Ejemplo n.º 3
0
 def feature_1st(board, player):
     """
     第一视角的棋局特征
     :param board:   棋盘
     :param player:  当前的棋手
     :return: 当前局面的特征(5x5xN)
     """
     space = (board == 0).astype(np.int8).reshape((5, 5, 1))
     self = (board == player).astype(np.int8).reshape((5, 5, 1))
     opponent = (board == -player).astype(np.int8).reshape((5, 5, 1))
     v_locations = rule.valid_location(board, player).reshape((5, 5, 1))
     v_actions = rule.valid_action(board, player)
     bias = np.ones((5, 5, 1))
     return np.concatenate(
         (space, self, opponent, v_locations, v_actions, bias), axis=2)
Ejemplo n.º 4
0
 def feature(board, player):
     """
     棋局的特征
     :param board:   棋盘
     :param player:  当前的棋手
     :return: 当前局面的特征(5x5x10)
     """
     space = (board == 0).astype(np.int8).reshape((5, 5, 1))
     black = (board == 1).astype(np.int8).reshape((5, 5, 1))
     white = (board == -1).astype(np.int8).reshape((5, 5, 1))
     who = np.ones((5, 5, 1)) if player == 1 else np.zeros((5, 5, 1))
     v_locations = rule.valid_location(board, player).reshape((5, 5, 1))
     v_actions = rule.valid_action(board, player)
     bias = np.ones((5, 5, 1))
     return np.concatenate(
         (space, black, white, who, v_locations, v_actions, bias), axis=2)
Ejemplo n.º 5
0
def simulate(nw0, nw1, init='fixed'):
    board = rule.init_board() if init == 'fixed' else rule.random_init_board()
    player = 1
    records = Record()
    while True:
        nw = nw0 if player == 1 else nw1
        try:
            bd = board.copy()
            from_, action, vp, p = nw.policy(board, player)
            # print('>', from_, action)
            assert board[from_] == player
            to_ = tuple(np.add(from_, rule.actions_move[action]))
            command, eat = rule.move(board, from_, to_)
            reward = len(eat)
            records.add(bd, from_, action, reward, vp, win=command == rule.WIN)
        except NoActionException:
            return Record(), 0
        except Exception as e:
            logging.info('board is:')
            logging.info(board)
            logging.info('player is: %s', player)
            valid = rule.valid_action(board, player)
            logging.info('predict is:')
            print(nw.p)
            logging.info('sum is: %s', nw.p.sum())
            logging.info('valid action is:')
            logging.info(nw.valid)
            logging.info('p * valid is:')
            logging.info(nw.vp)
            logging.info('from:%s, action:%s', from_, action)
            logging.info('prob is: %s', valid[from_][action])
            records.save('records/train/1st_')
            raise e
        # if eat:
        #     print(player, from_, to_, eat, N)
        if command == rule.WIN:
            logging.info('%s WIN, step use: %s', str(player), records.length())
            return records, player
        if records.length() > 10000:
            logging.info('走子数过多: %s', records.length())
            return Record(), 0
        player = -player
        board = rule.flip_board(board)
Ejemplo n.º 6
0
    def _play(self, player, from_, to_, p, opp_q=None):
        logger.info('from:%s, to_:%s', from_, to_)
        logger.debug('p:\n%s', p)
        board = self.board()
        valid_action = rule.valid_action(board, player)
        logger.debug('valid_action:\n%s', valid_action)
        self.show_qtext(p, valid_action)
        self.show_select(from_, to_)
        stone = self.stone(from_)

        def play_later():
            result = self.move_to(stone, to_)
            if opp_q is not None:
                opp_valid = rule.valid_action(self.board(), -player)
                self.show_qtext(opp_q, opp_valid, hide=False)
            if result == rule.ACCQUIRE:
                # 对手走棋
                self.switch_player_and_play()
            elif result == rule.WIN:
                logger.info('GAME OVER, WINNER IS %s', stone.player.name)
                self.game_over(stone.player)

        self.play_timer = self.window.after(int(self.period * 1000),
                                            play_later)
Ejemplo n.º 7
0
 def policy_1st(self, board, player):
     x = self.feature_1st(board, player)
     valid = rule.valid_action(board, player)
     self.set_dropout(0)
     return self._policy(x, board, valid)
Ejemplo n.º 8
0
 def policy(self, board, player):
     x = self.feature_1st(board, player).flatten()
     valid = rule.valid_action(board, player)
     return self._policy(x, board, player, valid)