def _train_one_epoch(self, lr, discount, exploration_rate): """Return the mean of difference during this epoch""" # Opponent: minimax with random move prob_oppo_random = 0.4 agent_oppo = AlphaBetaAgent(opponent_color(self.color), depth=1) agent_oppo_random = RandomAgent(opponent_color(self.color)) board = Board() first_move = (10, 10) board.put_stone(first_move, check_legal=False) if board.next != self.color: board.put_stone(agent_oppo_random.get_action(board), check_legal=False) diffs = [] while board.winner is None: legal_actions = board.get_legal_actions() # Get next action with exploration if random.uniform(0, 1) < exploration_rate: action_next = random.choice(legal_actions) else: action_next = max(legal_actions, key=lambda action: self._calc_q(board, action)) # Keep current features feats = self.rl_env.extract_features(board, action_next, self.color) q = self.w.dot(feats) # Apply next action board.put_stone(action_next, check_legal=False) # Let opponent play if board.winner is None: if random.uniform(0, 1) < prob_oppo_random: board.put_stone(agent_oppo_random.get_action(board), check_legal=False) else: board.put_stone(agent_oppo.get_action(board), check_legal=False) # Calc difference reward_now = self.rl_env.get_reward(board, self.color) reward_future = 0 if board.winner is None: next_legal_actions = board.get_legal_actions() next_qs = [self._calc_q(board, action) for action in next_legal_actions] reward_future = max(next_qs) difference = reward_now + discount * reward_future - q diffs.append(difference) # Apply weight update self.w += (lr * difference * feats) return mean(diffs)
def get_liberties(board: Board, color): liberties_self = set() liberties_oppo = set() for group in board.groups[color]: liberties_self = liberties_self | group.liberties for group in board.groups[opponent_color(color)]: liberties_oppo = liberties_oppo | group.liberties return liberties_self, liberties_oppo
def _start_with_ui(self): """Start the game with GUI.""" self.ui.initialize() self.time_elapsed = time.time() # First move is fixed on the center of board first_move = (10, 10) self.board.put_stone(first_move, check_legal=False) self.ui.draw(first_move, opponent_color(self.board.next)) # Take turns to play move while self.board.winner is None: if self.board.next == 'BLACK': point = self.perform_one_move(self.agent_black) else: point = self.perform_one_move(self.agent_white) # Check if action is legal if point not in self.board.legal_actions: continue # Apply action prev_legal_actions = self.board.legal_actions.copy() self.board.put_stone(point, check_legal=False) # Remove previous legal actions on board for action in prev_legal_actions: self.ui.remove(action) # Draw new point self.ui.draw(point, opponent_color(self.board.next)) # Update new legal actions and any removed groups if self.board.winner: for group in self.board.removed_groups: for point in group.points: self.ui.remove(point) if self.board.end_by_no_legal_actions: print('Game ends early (no legal action is available for %s)' % self.board.next) else: for action in self.board.legal_actions: self.ui.draw(action, 'BLUE', 8) self.time_elapsed = time.time() - self.time_elapsed if self.dir_save: path_file = join(self.dir_save, 'go_' + str(time.time()) + '.jpg') self.ui.save_image(path_file) print('Board image saved in file ' + path_file)
def get_num_groups_with_k_liberties(board: Board, color, k): num_groups_self = 0 num_groups_oppo = 0 for group in board.groups[color]: if group.num_liberty == k: num_groups_self += 1 for group in board.groups[opponent_color(color)]: if group.num_liberty == k: num_groups_oppo += 1 return num_groups_self, num_groups_oppo
def get_action(self, board): actions = board.get_legal_actions() num_groups = [ len( board.libertydict.get_groups(opponent_color(self.color), action)) for action in actions ] max_num_groups = max(num_groups) idx_candidates = [ idx for idx, num in enumerate(num_groups) if num == max_num_groups ] return actions[random.choice(idx_candidates)] if actions else None
def get_group_scores(board: Board, color): selfscore = [] opponentscore = [] for group in board.groups[color]: if group.num_liberty != 1: selfscore.append(eval_group(group, board)) for group in board.groups[opponent_color(color)]: if group.num_liberty != 1: opponentscore.append(eval_group(group, board)) selfscore.sort(reverse=True) selfscore.extend([0, 0, 0]) opponentscore.sort(reverse=True) opponentscore.extend([0, 0, 0]) return selfscore[:3], opponentscore[:3]
def extract_features(cls, board: Board, action, color, isself=True, generatesuccessor=True): """Return a numpy array of features""" if generatesuccessor: board = board.generate_successor_state(action) else: board.put_stone(action) oppo = opponent_color(color) if board.winner == color: return np.array([0] * (cls.get_num_feats()) + [1] + [0] * (cls.get_num_feats() - 1)) , isself elif board.winner == oppo: return np.array([1] + [0] * (cls.get_num_feats() * 2 - 1)), isself if color == board.next: # Now opponent's move print('f**k! Extract features when color==next!') num_endangered_self, num_endangered_oppo = get_num_endangered_groups(board, color) if num_endangered_self>0: return np.array([1] + [0] * (cls.get_num_feats() * 2 - 1)) , isself # Doomed to lose elif len(board.legal_actions) == 1: #One choice only return cls.extract_features(board, board.legal_actions[0], oppo, not isself, False) elif num_endangered_oppo>1: return np.array([0] * (cls.get_num_feats()) + [1] + [0] * (cls.get_num_feats() - 1)) , isself # Doomed to win # Features for groups num_groups_2lbt_self, num_groups_2lbt_oppo = get_num_groups_with_k_liberties(board, color, 2) # Features for number of groups num_groups_self = len(board.groups[color])/3. num_groups_oppo = len(board.groups[oppo])/3. # Features for liberty variance self_group_score, oppo_group_score = get_group_scores(board, color) feats = [0,num_groups_2lbt_self, num_groups_self] + self_group_score + [0, num_groups_2lbt_oppo ,num_groups_oppo] + oppo_group_score # Add bias if len(feats) !=12: print('!!!!!!!!!!!!!!!!!!!',len(feats),'@@@@@@@@@@@@@@@@@@@@@') return np.array(feats), isself
def eval_group(group: Group, board: Board): """Evaluate the liveliness of group; higher score, more endangered""" if group.num_liberty > 3: return 0 elif group.num_liberty == 1: return 5 # Till here, group has either 2 or 3 liberties. var_x = np.var([x[0] for x in group.liberties]) var_y = np.var([x[1] for x in group.liberties]) var_sum = var_x + var_y if var_sum < 0.1: print('var_sum < 0.1') num_shared_liberty = 0 for liberty in group.liberties: num_shared_self_groups = len( board.libertydict.get_groups(group.color, liberty)) num_shared_oppo_groups = len( board.libertydict.get_groups(opponent_color(group.color), liberty)) if num_shared_self_groups == 3 and num_shared_oppo_groups == 0: # Group is safe return 0 elif num_shared_self_groups == 2 or num_shared_self_groups == 3: num_shared_liberty += 1 if num_shared_liberty == 1 and var_sum <= 0.5: score = 1 / np.sqrt(group.num_liberty) / var_sum / 4. elif num_shared_liberty == 2 and var_sum > 0.3: score = 1 / np.sqrt(group.num_liberty) / var_sum / 8. else: score = 1 / np.sqrt(group.num_liberty) / var_sum / 6. if np.sqrt(group.num_liberty) < 1.1: print('f**k!', group.num_liberty, board.winner) if var_sum < 0.2: print('shit!') return score
def evaluate(board: Board, color): """Color has the next action""" # Score for win or lose score_win = 1000 - board.counter_move # Prefer faster game if board.winner: return score_win if board.winner == color else -score_win oppo = opponent_color(color) # Score for endangered groups num_endangered_self, num_endangered_oppo = get_num_endangered_groups( board, color) if num_endangered_oppo > 0: return score_win - 10 # Win in the next move elif num_endangered_self > 1: return -(score_win - 10) # Lose in the next move # Score for dangerous liberties liberties_self, liberties_oppo = get_liberties(board, color) for liberty in liberties_oppo: if is_dangerous_liberty(board, liberty, oppo): return score_win / 2 # Good probability to win in the next next move for liberty in liberties_self: if is_dangerous_liberty(board, liberty, color): self_groups = board.libertydict.get_groups(color, liberty) liberties = self_groups[0].liberties | self_groups[1].liberties able_to_save = False for lbt in liberties: if len(board.libertydict.get_groups(oppo, lbt)) > 0: able_to_save = True break if not able_to_save: return -score_win / 2 # Good probability to lose in the next next move # Score for groups num_groups_2lbt_self, num_groups_2lbt_oppo = get_num_groups_with_k_liberties( board, color, 2) score_groups = num_groups_2lbt_oppo - num_groups_2lbt_self # Score for liberties num_shared_liberties_self = 0 num_shared_liberties_oppo = 0 for liberty in liberties_self: num_shared_liberties_self += len( board.libertydict.get_groups(color, liberty)) - 1 for liberty in liberties_oppo: num_shared_liberties_oppo += len( board.libertydict.get_groups(oppo, liberty)) - 1 score_liberties = num_shared_liberties_oppo - num_shared_liberties_self # Score for groups (doesn't help) # score_groups_self = [] # score_groups_oppo = [] # for group in board.groups[color]: # if group.num_liberty > 1: # score_groups_self.append(eval_group(group, board)) # for group in board.groups[opponent_color(color)]: # if group.num_liberty > 1: # score_groups_oppo.append(eval_group(group, board)) # score_groups_self.sort(reverse=True) # score_groups_self += [0, 0] # score_groups_oppo.sort(reverse=True) # score_groups_oppo += [0, 0] # finals = score_groups_oppo[0] - score_groups_self[0] + score_groups_oppo[1] - score_groups_self[1] return score_groups * normal(1, 0.1) + score_liberties * normal(1, 0.1)
def extract_features(cls, board: Board, action, color): """Return a numpy array of features""" board = board.generate_successor_state(action) oppo = opponent_color(color) # Features for win feat_win = 1 if board.winner == color else 0 if feat_win == 1: return np.array([feat_win] + [0] * (cls.get_num_feats() - 1)) # Features for endangered groups num_endangered_self, num_endangered_oppo = get_num_endangered_groups(board, color) feat_exist_endangered_self = 1 if num_endangered_self > 0 else 0 feat_more_than_one_endangered_oppo = 1 if num_endangered_oppo > 1 else 0 # Features for dangerous liberties feat_exist_guarantee_losing = 0 feat_exist_guarantee_winning = 0 liberties_self, liberties_oppo = get_liberties(board, color) for liberty in liberties_self: if is_dangerous_liberty(board, liberty, color): feat_exist_guarantee_losing = 1 break for liberty in liberties_oppo: if is_dangerous_liberty(board, liberty, oppo): oppo_groups = board.libertydict.get_groups(oppo, liberty) liberties = oppo_groups[0].liberties | oppo_groups[1].liberties able_to_save = False for lbt in liberties: if len(board.libertydict.get_groups(color, lbt)) > 0: able_to_save = True break if not able_to_save: feat_exist_guarantee_winning = 1 break # Features for groups num_groups_2lbt_self, num_groups_2lbt_oppo = get_num_groups_with_k_liberties(board, color, 2) feat_groups_2lbt = num_groups_2lbt_oppo - num_groups_2lbt_self # Features for shared liberties num_shared_liberties_self = 0 num_shared_liberties_oppo = 0 for liberty in liberties_self: num_shared_liberties_self += len(board.libertydict.get_groups(color, liberty)) - 1 for liberty in liberties_oppo: num_shared_liberties_oppo += len(board.libertydict.get_groups(oppo, liberty)) - 1 feat_shared_liberties = num_shared_liberties_oppo - num_shared_liberties_self # Features for number of groups feat_num_groups_diff = len(board.groups[color]) - len(board.groups[oppo]) # Features for liberty variance var_self, var_oppo = [], [] for group in board.groups[color]: var_self.append(calc_group_liberty_var(group)) for group in board.groups[oppo]: var_oppo.append(calc_group_liberty_var(group)) feat_var_self_mean = np.mean(var_self) feat_var_oppo_mean = np.mean(var_oppo) feats = [feat_win, feat_exist_endangered_self, feat_more_than_one_endangered_oppo, feat_exist_guarantee_losing, feat_exist_guarantee_winning, feat_groups_2lbt, feat_shared_liberties, feat_num_groups_diff, feat_var_self_mean, feat_var_oppo_mean, 1] # Add bias return np.array(feats)