Esempio n. 1
0
 def minimax1(self, state, ava_actions, depth):
     score = check_game_status(state[0])
     if score == 0:
         return 0
     elif score == 1:
         return 10 - depth
     elif score == 2:
         return -10 + depth
     if state[1] == 'O':
         best = -1000
         c = 0
         for action in ava_actions:
             ava_actions.remove(action)
             best = max(
                 best,
                 self.minimax1(after_action_state(state, action),
                               ava_actions, depth + 1))
             ava_actions.insert(c, action)
             c += 1
         return best
     else:
         best = 1000
         c = 0
         for action in ava_actions:
             ava_actions.remove(action)
             best = min(
                 best,
                 self.minimax1(after_action_state(state, action),
                               ava_actions, depth + 1))
             ava_actions.insert(c, action)
             c += 1
         return best
    def minimax(self, state, turn, ava_actions,depth):
        
        score= check_game_status(state[0])
        if(score==1):
             return 10
        if(score==2):
            return -10
        if(score==0):
            return 0

        if(turn==0):
            
            best= 1000
            
            for i in ava_actions:
                c=ava_actions.index(i)
                ava_actions.remove(i)
                best=min(best,self.minimax(after_action_state(state,i),1,ava_actions,depth+1))
                ava_actions.insert(c,i)
                    
            return best
        else:
            
            best=-1000
            
            for i in ava_actions:
                c=ava_actions.index(i)
                ava_actions.remove(i)
                best=max(best,self.minimax(after_action_state(state,i),0,ava_actions,depth-1))
                ava_actions.insert(c,i)
            return best
Esempio n. 3
0
    def minimax(self, state, depth, isMax, ava_actions):
        board, extra = state
        gameState = check_game_status(board)

        if gameState == 0:
            return 0
        elif gameState == 1:
            return 1
        elif gameState == 2:
            return -1

        if isMax:
            bestScore = -100000
            for i in range(0, 9, 1):
                if i in ava_actions:
                    statet = after_action_state(state, i)
                    ava_actions.remove(i) 
                    score = self.minimax(statet, depth + 1, False, ava_actions)
                    ava_actions.append(i)
                    bestScore = max(score, bestScore)

            return bestScore
        else:
            bestScore = 100000
            for i in range(0, 9, 1):
                if i in ava_actions:
                    statet = after_action_state(state, i)
                    ava_actions.remove(i)
                    score = self.minimax(statet, depth + 1, True, ava_actions)
                    ava_actions.append(i)
                    bestScore = min(score, bestScore)

            return bestScore
Esempio n. 4
0
 def act(self, state, ava_actions):
     for action in ava_actions:
         nstate = after_action_state(state, action)
         gstatus = check_game_status(nstate[0])
         if gstatus > 0:
             if tomark(gstatus) == self.mark:
                 return action
     return random.choice(ava_actions)
 def act(self, state, my_env):
     available_actions = my_env.available_actions()
     for action in available_actions:
         nstate = after_action_state(my_env.state, action)
         gstatus = check_game_status(nstate[0])
         if gstatus > 0:
             if tomark(gstatus) == self.mark:
                 return action
     return random.choice(available_actions)
    def act(self, state, my_env: TicTacToeEnv):
        available_actions = my_env.available_actions()
        # --- Step 1: play winning move, if possible ---
        for action in available_actions:
            nstate = after_action_state(state, action)
            gstatus = check_game_status(nstate[0])
            if gstatus > 0:
                if tomark(gstatus) == self.mark:
                    return action

        # --- Step 2: block opponent from winning ---
        # imagine the opponent was playing
        rev_state = (state[0], next_mark(state[1]))
        for action in available_actions:
            nstate = after_action_state(rev_state, action)
            gstatus = check_game_status(nstate[0])
            if gstatus > 0:
                # if they can make a winning move, play that
                if tomark(gstatus) == self.opponent_mark:
                    return action

        return random.choice(available_actions)
 def compute_reward(self, state):
     """Given a terminal state, return reward"""
     gstatus = check_game_status(state[0])
     if gstatus == -1:
         raise RuntimeError(
             "Error! Compute reward called when game was not finished.")
     # tie
     elif gstatus == 0:
         return 0.5
     # won
     elif gstatus == tocode(self.mark):
         return 1
     # lost
     else:
         return 0
 def act(self, state, ava_actions):
     board, mark = state
     nboard = list(board[:])
     if check_game_status(nboard) < 0:
         min = 100
         max = -100
         action_min = ava_actions[0]
         action_max = ava_actions[0]
         if mark == 'O':
             for action in ava_actions:
                 nboard[action] = 1
                 mark = next_mark(mark)
                 value, q = self.act(
                     (tuple(nboard), mark),
                     [p for p in ava_actions if p != action])
                 if (value < min):
                     min = value
                     action_min = action
                 nboard[action] = 0  #backtrack
                 mark = next_mark(mark)
             return min, action_min
         else:
             for action in ava_actions:
                 nboard[action] = 2
                 mark = next_mark(mark)
                 value, m = self.act(
                     (tuple(nboard), mark),
                     [p for p in ava_actions if p != action])
                 if (value > max):
                     max = value
                     action_max = action
                 nboard[action] = 0  #backtrack
                 mark = next_mark(mark)
             return max, action_max
     else:
         return check_game_status(nboard), 12
Esempio n. 9
0
 def ask_value(self, state):
     """Returns value of given state.
     If state is not exists, set it as default value.
     Args:
         state (tuple): State.
     Returns:
         float: Value of a state.
     """
     if state not in st_values:
         gstatus = check_game_status(state[0])
         val = 0
         # win
         if gstatus > 0:
             val = O_REWARD if self.mark == 'O' else X_REWARD
         set_state_value(state, val)
     return st_values[state]
Esempio n. 10
0
def minimax(board, is_max):
    #print("Minimax called")
    curr = check_game_status(board)
    #print(curr)
    if curr == 1:
        #print("max")
        return 10
    if curr == 2:
        #print("min")
        return -10
    if curr == 0:
        #print("draw")
        return 0

    if is_max:
        #print("Maximizer")
        mark = 'O'
        maxval = -100
        for i in range(0, 9):
            if board[i] == 0:
                old_state = [board, mark]
                new_board, new_mark = after_action_state(old_state, i)
                #print(new_board)
                currval = minimax(new_board, False)
                #print(currval)
                if maxval < currval:
                    maxval = currval
        return maxval
    else:
        #print("minimizer")
        mark = 'X'
        minval = 100
        for i in range(0, 9):
            if board[i] == 0:
                #print(i)
                old_state = [board, mark]
                new_state = after_action_state(old_state, i)
                #print(new_state[0])
                new_board = new_state[0]
                new_mark = new_state[1]
                currval = minimax(new_board, False)
                #print(currval,mark)
                if minval > currval:
                    minval = currval
        return minval
Esempio n. 11
0
    def ask_value(self, state):
        """Returns value of given state.

        If state is not exists, set it as default value.

        Args:
            state (tuple): State.

        Returns:
            float: Value of a state.
        """
        if state not in st_values:
            logging.debug("ask_value - new state {}".format(state))
            gstatus = check_game_status(state[0])
            val = DEFAULT_VALUE
            # win
            if gstatus > 0:
                val = O_REWARD if self.mark == 'O' else X_REWARD
            set_state_value(state, val)
        return st_values[state]
Esempio n. 12
0
def find_loc_prob(state, aval_actions, action, win_count, loss_count, step):
    aval_actions.remove(action)
    state = after_action_state(state, action)
    game_status = check_game_status(state[0])

    if (game_status == 0 or game_status == tocode(next_mark(state[1]))):
        win_count = win_count + step

        if (game_status == 0):	#If there is draw then it will be counted as victory for both the players
            loss_count = loss_count + step

        return win_count, loss_count
    elif (game_status == tocode(state[1])):
        loss_count = loss_count + step
        return win_count, loss_count
    else:
        for action in aval_actions:
            temp = aval_actions.copy()
            loss_count, win_count = find_loc_prob(state, temp, action, loss_count, win_count, step/5)

    return win_count, loss_count
Esempio n. 13
0
def find_loc_prob(state, aval_actions, action, win_count, loss_count, step):
    aval_actions.remove(action)
    state = after_action_state(state, action)
    game_status = check_game_status(state[0])
    print("Action = {}".format(action))

    if (game_status == 0 or game_status == tocode(next_mark(state[1]))):
        win_count = win_count + step
        return win_count, loss_count
    elif (game_status == tocode(state[1])):
        loss_count = loss_count + step
        return win_count, loss_count
    else:
        for action in aval_actions:
            print("Calling recurssively for step {}".format(step))
            print(
                "Win count and Loss count till this step = {} and {} for mark {}"
                .format(win_count, loss_count, state[1]))
            loss_count, win_count = find_loc_prob(state, aval_actions, action,
                                                  loss_count, win_count,
                                                  step - 1)

    return win_count, loss_count