Example #1
0
    def _train_one_epoch(self, lr, discount, exploration_rate):
        """Return the mean of difference during this epoch"""
        # Opponent: minimax with random move
        prob_oppo_random = 0.4
        agent_oppo = AlphaBetaAgent(opponent_color(self.color), depth=1)
        agent_oppo_random = RandomAgent(opponent_color(self.color))

        board = Board()
        first_move = (10, 10)
        board.put_stone(first_move, check_legal=False)

        if board.next != self.color:
            board.put_stone(agent_oppo_random.get_action(board), check_legal=False)

        diffs = []
        while board.winner is None:
            legal_actions = board.get_legal_actions()

            # Get next action with exploration
            if random.uniform(0, 1) < exploration_rate:
                action_next = random.choice(legal_actions)
            else:
                action_next = max(legal_actions, key=lambda action: self._calc_q(board, action))

            # Keep current features
            feats = self.rl_env.extract_features(board, action_next, self.color)
            q = self.w.dot(feats)

            # Apply next action
            board.put_stone(action_next, check_legal=False)

            # Let opponent play
            if board.winner is None:
                if random.uniform(0, 1) < prob_oppo_random:
                    board.put_stone(agent_oppo_random.get_action(board), check_legal=False)
                else:
                    board.put_stone(agent_oppo.get_action(board), check_legal=False)

            # Calc difference
            reward_now = self.rl_env.get_reward(board, self.color)
            reward_future = 0
            if board.winner is None:
                next_legal_actions = board.get_legal_actions()
                next_qs = [self._calc_q(board, action) for action in next_legal_actions]
                reward_future = max(next_qs)
            difference = reward_now + discount * reward_future - q
            diffs.append(difference)

            # Apply weight update
            self.w += (lr * difference * feats)

        return mean(diffs)
Example #2
0
def get_liberties(board: Board, color):
    liberties_self = set()
    liberties_oppo = set()
    for group in board.groups[color]:
        liberties_self = liberties_self | group.liberties
    for group in board.groups[opponent_color(color)]:
        liberties_oppo = liberties_oppo | group.liberties
    return liberties_self, liberties_oppo
Example #3
0
    def _start_with_ui(self):
        """Start the game with GUI."""
        self.ui.initialize()
        self.time_elapsed = time.time()

        # First move is fixed on the center of board
        first_move = (10, 10)
        self.board.put_stone(first_move, check_legal=False)
        self.ui.draw(first_move, opponent_color(self.board.next))

        # Take turns to play move
        while self.board.winner is None:
            if self.board.next == 'BLACK':
                point = self.perform_one_move(self.agent_black)
            else:
                point = self.perform_one_move(self.agent_white)

            # Check if action is legal
            if point not in self.board.legal_actions:
                continue

            # Apply action
            prev_legal_actions = self.board.legal_actions.copy()
            self.board.put_stone(point, check_legal=False)
            # Remove previous legal actions on board
            for action in prev_legal_actions:
                self.ui.remove(action)
            # Draw new point
            self.ui.draw(point, opponent_color(self.board.next))
            # Update new legal actions and any removed groups
            if self.board.winner:
                for group in self.board.removed_groups:
                    for point in group.points:
                        self.ui.remove(point)
                if self.board.end_by_no_legal_actions:
                    print('Game ends early (no legal action is available for %s)' % self.board.next)
            else:
                for action in self.board.legal_actions:
                    self.ui.draw(action, 'BLUE', 8)

        self.time_elapsed = time.time() - self.time_elapsed
        if self.dir_save:
            path_file = join(self.dir_save, 'go_' + str(time.time()) + '.jpg')
            self.ui.save_image(path_file)
            print('Board image saved in file ' + path_file)
Example #4
0
def get_num_groups_with_k_liberties(board: Board, color, k):
    num_groups_self = 0
    num_groups_oppo = 0
    for group in board.groups[color]:
        if group.num_liberty == k:
            num_groups_self += 1
    for group in board.groups[opponent_color(color)]:
        if group.num_liberty == k:
            num_groups_oppo += 1
    return num_groups_self, num_groups_oppo
Example #5
0
 def get_action(self, board):
     actions = board.get_legal_actions()
     num_groups = [
         len(
             board.libertydict.get_groups(opponent_color(self.color),
                                          action)) for action in actions
     ]
     max_num_groups = max(num_groups)
     idx_candidates = [
         idx for idx, num in enumerate(num_groups) if num == max_num_groups
     ]
     return actions[random.choice(idx_candidates)] if actions else None
Example #6
0
def get_group_scores(board: Board, color):
    selfscore = []
    opponentscore = []
    for group in board.groups[color]:
        if group.num_liberty != 1:
            selfscore.append(eval_group(group, board))
    for group in board.groups[opponent_color(color)]:
        if group.num_liberty != 1:
            opponentscore.append(eval_group(group, board))
    selfscore.sort(reverse=True)
    selfscore.extend([0, 0, 0])
    opponentscore.sort(reverse=True)
    opponentscore.extend([0, 0, 0])
    return selfscore[:3], opponentscore[:3]
Example #7
0
    def extract_features(cls, board: Board, action, color, isself=True, generatesuccessor=True):
        
        """Return a numpy array of features"""
        if generatesuccessor:
            board = board.generate_successor_state(action)
        else:
            board.put_stone(action)
        oppo = opponent_color(color)
        
        if board.winner == color:
            return np.array([0] * (cls.get_num_feats()) + [1] + [0] * (cls.get_num_feats() - 1)) , isself
        elif board.winner == oppo:
            return np.array([1] + [0] * (cls.get_num_feats() * 2 - 1)), isself
        
        if color == board.next: # Now opponent's move
            print('f**k! Extract features when color==next!')

        num_endangered_self, num_endangered_oppo = get_num_endangered_groups(board, color)
        
        if num_endangered_self>0:
            return np.array([1] + [0] * (cls.get_num_feats() * 2 - 1)) , isself # Doomed to lose

        elif len(board.legal_actions) == 1: #One choice only
            return cls.extract_features(board, board.legal_actions[0], oppo, not isself, False)
        
        elif num_endangered_oppo>1: 
            return np.array([0] * (cls.get_num_feats()) + [1] + [0] * (cls.get_num_feats() - 1)) , isself # Doomed to win 

        # Features for groups
        num_groups_2lbt_self, num_groups_2lbt_oppo = get_num_groups_with_k_liberties(board, color, 2)

        # Features for number of groups
        num_groups_self = len(board.groups[color])/3.
        num_groups_oppo = len(board.groups[oppo])/3.

        # Features for liberty variance
        self_group_score, oppo_group_score = get_group_scores(board, color)

        feats = [0,num_groups_2lbt_self, num_groups_self] + self_group_score + [0, num_groups_2lbt_oppo ,num_groups_oppo] + oppo_group_score # Add bias
        if len(feats) !=12:
            print('!!!!!!!!!!!!!!!!!!!',len(feats),'@@@@@@@@@@@@@@@@@@@@@')
        return np.array(feats), isself 
Example #8
0
def eval_group(group: Group, board: Board):
    """Evaluate the liveliness of group; higher score, more endangered"""
    if group.num_liberty > 3:
        return 0
    elif group.num_liberty == 1:
        return 5

    # Till here, group has either 2 or 3 liberties.
    var_x = np.var([x[0] for x in group.liberties])
    var_y = np.var([x[1] for x in group.liberties])
    var_sum = var_x + var_y
    if var_sum < 0.1:
        print('var_sum < 0.1')

    num_shared_liberty = 0
    for liberty in group.liberties:
        num_shared_self_groups = len(
            board.libertydict.get_groups(group.color, liberty))
        num_shared_oppo_groups = len(
            board.libertydict.get_groups(opponent_color(group.color), liberty))
        if num_shared_self_groups == 3 and num_shared_oppo_groups == 0:  # Group is safe
            return 0
        elif num_shared_self_groups == 2 or num_shared_self_groups == 3:
            num_shared_liberty += 1

    if num_shared_liberty == 1 and var_sum <= 0.5:
        score = 1 / np.sqrt(group.num_liberty) / var_sum / 4.
    elif num_shared_liberty == 2 and var_sum > 0.3:
        score = 1 / np.sqrt(group.num_liberty) / var_sum / 8.
    else:
        score = 1 / np.sqrt(group.num_liberty) / var_sum / 6.
        if np.sqrt(group.num_liberty) < 1.1:
            print('f**k!', group.num_liberty, board.winner)
        if var_sum < 0.2:
            print('shit!')
    return score
Example #9
0
def evaluate(board: Board, color):
    """Color has the next action"""
    # Score for win or lose
    score_win = 1000 - board.counter_move  # Prefer faster game
    if board.winner:
        return score_win if board.winner == color else -score_win

    oppo = opponent_color(color)
    # Score for endangered groups
    num_endangered_self, num_endangered_oppo = get_num_endangered_groups(
        board, color)
    if num_endangered_oppo > 0:
        return score_win - 10  # Win in the next move
    elif num_endangered_self > 1:
        return -(score_win - 10)  # Lose in the next move

    # Score for dangerous liberties
    liberties_self, liberties_oppo = get_liberties(board, color)
    for liberty in liberties_oppo:
        if is_dangerous_liberty(board, liberty, oppo):
            return score_win / 2  # Good probability to win in the next next move
    for liberty in liberties_self:
        if is_dangerous_liberty(board, liberty, color):
            self_groups = board.libertydict.get_groups(color, liberty)
            liberties = self_groups[0].liberties | self_groups[1].liberties
            able_to_save = False
            for lbt in liberties:
                if len(board.libertydict.get_groups(oppo, lbt)) > 0:
                    able_to_save = True
                    break
            if not able_to_save:
                return -score_win / 2  # Good probability to lose in the next next move

    # Score for groups
    num_groups_2lbt_self, num_groups_2lbt_oppo = get_num_groups_with_k_liberties(
        board, color, 2)
    score_groups = num_groups_2lbt_oppo - num_groups_2lbt_self

    # Score for liberties
    num_shared_liberties_self = 0
    num_shared_liberties_oppo = 0
    for liberty in liberties_self:
        num_shared_liberties_self += len(
            board.libertydict.get_groups(color, liberty)) - 1
    for liberty in liberties_oppo:
        num_shared_liberties_oppo += len(
            board.libertydict.get_groups(oppo, liberty)) - 1
    score_liberties = num_shared_liberties_oppo - num_shared_liberties_self

    # Score for groups (doesn't help)
    # score_groups_self = []
    # score_groups_oppo = []
    # for group in board.groups[color]:
    # if group.num_liberty > 1:
    # score_groups_self.append(eval_group(group, board))
    # for group in board.groups[opponent_color(color)]:
    # if group.num_liberty > 1:
    # score_groups_oppo.append(eval_group(group, board))
    # score_groups_self.sort(reverse=True)
    # score_groups_self += [0, 0]
    # score_groups_oppo.sort(reverse=True)
    # score_groups_oppo += [0, 0]
    # finals = score_groups_oppo[0] - score_groups_self[0] + score_groups_oppo[1] - score_groups_self[1]

    return score_groups * normal(1, 0.1) + score_liberties * normal(1, 0.1)
Example #10
0
    def extract_features(cls, board: Board, action, color):
        """Return a numpy array of features"""
        board = board.generate_successor_state(action)
        oppo = opponent_color(color)

        # Features for win
        feat_win = 1 if board.winner == color else 0
        if feat_win == 1:
            return np.array([feat_win] + [0] * (cls.get_num_feats() - 1))

        # Features for endangered groups
        num_endangered_self, num_endangered_oppo = get_num_endangered_groups(board, color)
        feat_exist_endangered_self = 1 if num_endangered_self > 0 else 0
        feat_more_than_one_endangered_oppo = 1 if num_endangered_oppo > 1 else 0

        # Features for dangerous liberties
        feat_exist_guarantee_losing = 0
        feat_exist_guarantee_winning = 0
        liberties_self, liberties_oppo = get_liberties(board, color)
        for liberty in liberties_self:
            if is_dangerous_liberty(board, liberty, color):
                feat_exist_guarantee_losing = 1
                break
        for liberty in liberties_oppo:
            if is_dangerous_liberty(board, liberty, oppo):
                oppo_groups = board.libertydict.get_groups(oppo, liberty)
                liberties = oppo_groups[0].liberties | oppo_groups[1].liberties
                able_to_save = False
                for lbt in liberties:
                    if len(board.libertydict.get_groups(color, lbt)) > 0:
                        able_to_save = True
                        break
                if not able_to_save:
                    feat_exist_guarantee_winning = 1
                    break

        # Features for groups
        num_groups_2lbt_self, num_groups_2lbt_oppo = get_num_groups_with_k_liberties(board, color, 2)
        feat_groups_2lbt = num_groups_2lbt_oppo - num_groups_2lbt_self

        # Features for shared liberties
        num_shared_liberties_self = 0
        num_shared_liberties_oppo = 0
        for liberty in liberties_self:
            num_shared_liberties_self += len(board.libertydict.get_groups(color, liberty)) - 1
        for liberty in liberties_oppo:
            num_shared_liberties_oppo += len(board.libertydict.get_groups(oppo, liberty)) - 1
        feat_shared_liberties = num_shared_liberties_oppo - num_shared_liberties_self

        # Features for number of groups
        feat_num_groups_diff = len(board.groups[color]) - len(board.groups[oppo])

        # Features for liberty variance
        var_self, var_oppo = [], []
        for group in board.groups[color]:
            var_self.append(calc_group_liberty_var(group))
        for group in board.groups[oppo]:
            var_oppo.append(calc_group_liberty_var(group))
        feat_var_self_mean = np.mean(var_self)
        feat_var_oppo_mean = np.mean(var_oppo)

        feats = [feat_win, feat_exist_endangered_self, feat_more_than_one_endangered_oppo,
                 feat_exist_guarantee_losing, feat_exist_guarantee_winning, feat_groups_2lbt,
                 feat_shared_liberties, feat_num_groups_diff, feat_var_self_mean,
                 feat_var_oppo_mean, 1]  # Add bias
        return np.array(feats)