Exemple #1
0
 def _best_actions(self, board):
     alpha, beta = -999999, 999999
     states, indexes = next_states(board, self.player, index=True)
     values = []
     potential_actions = []
     for state, index in zip(states, indexes):
         value = collapse_state(self.grader, state, self.reach, self.player,
                                self.alpha_beta, alpha, beta)
         values.append(value)
         if self.alpha_beta > 0:  # Alpha
             if value > alpha:
                 alpha = value
                 potential_actions = [index]
             elif value == alpha:
                 potential_actions.append(index)
         elif self.alpha_beta < 0:  # Beta
             if value < beta:
                 beta = value
                 potential_actions = [index]
             elif value == beta:
                 potential_actions.append(index)
     if len(potential_actions) == 1:
         return potential_actions[0]
     r = np.random.randint(0, len(potential_actions))
     return potential_actions[r]
Exemple #2
0
 def _best_actions(self, board):
     best_value = -9999999 * self.mini_max
     states, indexes = next_states(board, self.player, index=True)
     values = []
     best_actions = []
     for state, index in zip(states, indexes):
         value = collapse_state(self.grader, state, self.reach, self.player,
                                (self.mini_max * -1))
         values.append(value)
         if self.mini_max > 0:  # Maximize
             if value > best_value:
                 best_value = value
                 best_actions = [index]
             elif value == best_value:
                 best_actions.append(index)
         elif self.mini_max < 0:  # Minimize
             if value < best_value:
                 best_value = value
                 best_actions = [index]
             elif value == best_value:
                 best_actions.append(index)
     print 'values', values
     # if len(best_actions) == 1:
     #     return best_actions[0]
     r = np.random.randint(0, len(best_actions))
     return best_actions[r]
Exemple #3
0
def collapse_state(grader,
                   board,
                   reach,
                   player,
                   ab,
                   alpha=-999999,
                   beta=999999,
                   level=0):

    if reach == 0 or np.count_nonzero(
            board) == board.size or grader.check_board(board, player):
        return grader.grade_board(board, player=player) - level * 100

    states = next_states(board, (-1 * player))
    values = []
    for state in states:
        value = collapse_state(grader, state, (reach - 1), player, (ab * -1),
                               alpha, beta, (level + 1))
        if ab == 2:  # Alpha Pruning
            if value > beta:
                return value
        elif ab == -2:  # Beta Pruning
            if value < alpha:
                return value
        values.append(value)
    if ab > 0:
        return max(values)
    elif ab < 0:
        return min(values)
    return
Exemple #4
0
 def _best_action(self, board):
     states, indexes = next_states(board, self.player, index=True)
     max_value = None
     max_indexes = []
     for state, index in zip(states, indexes):
         value = self._value_state(state)
         if max_value is None:
             max_value = value
             max_indexes = [index]
         elif value > max_value:
             max_value = value
             max_indexes = [index]
         elif value == max_value:
             max_indexes.append(index)
     size = len(max_indexes)
     r = np.random.randint(0, size)
     return max_indexes[r]