def _best_actions(self, board): alpha, beta = -999999, 999999 states, indexes = next_states(board, self.player, index=True) values = [] potential_actions = [] for state, index in zip(states, indexes): value = collapse_state(self.grader, state, self.reach, self.player, self.alpha_beta, alpha, beta) values.append(value) if self.alpha_beta > 0: # Alpha if value > alpha: alpha = value potential_actions = [index] elif value == alpha: potential_actions.append(index) elif self.alpha_beta < 0: # Beta if value < beta: beta = value potential_actions = [index] elif value == beta: potential_actions.append(index) if len(potential_actions) == 1: return potential_actions[0] r = np.random.randint(0, len(potential_actions)) return potential_actions[r]
def _best_actions(self, board): best_value = -9999999 * self.mini_max states, indexes = next_states(board, self.player, index=True) values = [] best_actions = [] for state, index in zip(states, indexes): value = collapse_state(self.grader, state, self.reach, self.player, (self.mini_max * -1)) values.append(value) if self.mini_max > 0: # Maximize if value > best_value: best_value = value best_actions = [index] elif value == best_value: best_actions.append(index) elif self.mini_max < 0: # Minimize if value < best_value: best_value = value best_actions = [index] elif value == best_value: best_actions.append(index) print 'values', values # if len(best_actions) == 1: # return best_actions[0] r = np.random.randint(0, len(best_actions)) return best_actions[r]
def collapse_state(grader, board, reach, player, ab, alpha=-999999, beta=999999, level=0): if reach == 0 or np.count_nonzero( board) == board.size or grader.check_board(board, player): return grader.grade_board(board, player=player) - level * 100 states = next_states(board, (-1 * player)) values = [] for state in states: value = collapse_state(grader, state, (reach - 1), player, (ab * -1), alpha, beta, (level + 1)) if ab == 2: # Alpha Pruning if value > beta: return value elif ab == -2: # Beta Pruning if value < alpha: return value values.append(value) if ab > 0: return max(values) elif ab < 0: return min(values) return
def _best_action(self, board): states, indexes = next_states(board, self.player, index=True) max_value = None max_indexes = [] for state, index in zip(states, indexes): value = self._value_state(state) if max_value is None: max_value = value max_indexes = [index] elif value > max_value: max_value = value max_indexes = [index] elif value == max_value: max_indexes.append(index) size = len(max_indexes) r = np.random.randint(0, size) return max_indexes[r]