Ejemplo n.º 1
0
 def minimax1(self, state, ava_actions, depth):
     score = check_game_status(state[0])
     if score == 0:
         return 0
     elif score == 1:
         return 10 - depth
     elif score == 2:
         return -10 + depth
     if state[1] == 'O':
         best = -1000
         c = 0
         for action in ava_actions:
             ava_actions.remove(action)
             best = max(
                 best,
                 self.minimax1(after_action_state(state, action),
                               ava_actions, depth + 1))
             ava_actions.insert(c, action)
             c += 1
         return best
     else:
         best = 1000
         c = 0
         for action in ava_actions:
             ava_actions.remove(action)
             best = min(
                 best,
                 self.minimax1(after_action_state(state, action),
                               ava_actions, depth + 1))
             ava_actions.insert(c, action)
             c += 1
         return best
Ejemplo n.º 2
0
    def minimax(self, state, turn, ava_actions,depth):
        
        score= check_game_status(state[0])
        if(score==1):
             return 10
        if(score==2):
            return -10
        if(score==0):
            return 0

        if(turn==0):
            
            best= 1000
            
            for i in ava_actions:
                c=ava_actions.index(i)
                ava_actions.remove(i)
                best=min(best,self.minimax(after_action_state(state,i),1,ava_actions,depth+1))
                ava_actions.insert(c,i)
                    
            return best
        else:
            
            best=-1000
            
            for i in ava_actions:
                c=ava_actions.index(i)
                ava_actions.remove(i)
                best=max(best,self.minimax(after_action_state(state,i),0,ava_actions,depth-1))
                ava_actions.insert(c,i)
            return best
Ejemplo n.º 3
0
    def minimax(self, state, depth, isMax, ava_actions):
        board, extra = state
        gameState = check_game_status(board)

        if gameState == 0:
            return 0
        elif gameState == 1:
            return 1
        elif gameState == 2:
            return -1

        if isMax:
            bestScore = -100000
            for i in range(0, 9, 1):
                if i in ava_actions:
                    statet = after_action_state(state, i)
                    ava_actions.remove(i) 
                    score = self.minimax(statet, depth + 1, False, ava_actions)
                    ava_actions.append(i)
                    bestScore = max(score, bestScore)

            return bestScore
        else:
            bestScore = 100000
            for i in range(0, 9, 1):
                if i in ava_actions:
                    statet = after_action_state(state, i)
                    ava_actions.remove(i)
                    score = self.minimax(statet, depth + 1, True, ava_actions)
                    ava_actions.append(i)
                    bestScore = min(score, bestScore)

            return bestScore
Ejemplo n.º 4
0
    def greedy_action(self, state, ava_actions):
        """Return best action by current state value.
        Evaluate each action, select best one. Tie-breaking is random.
        Args:
            state (tuple): Board status + mark
            ava_actions (list): Available actions
        Returns:
            int: Selected action
        """
        assert len(ava_actions) > 0

        ava_values = []
        for action in ava_actions:
            nstate = after_action_state(state, action)
            nval = self.ask_value(nstate)
            ava_values.append(nval)
            vcnt = st_visits[nstate]
            # print("  nstate {} val {:0.2f} visits {}".
            #       format(nstate, nval, vcnt))

        # select most right action for 'O' or 'X'
        if self.mark == 'O':
            indices = best_val_indices(ava_values, max)
        else:
            indices = best_val_indices(ava_values, min)

        # tie breaking by random choice
        aidx = random.choice(indices)
        # print("greedy_action mark {} ava_values {} indices {} aidx {}".
        #       format(self.mark, ava_values, indices, aidx))

        action = ava_actions[aidx]

        return action
    def greedy_action(self, state, ava_actions):
        """Return best action by current state value.

        Evaluate each action, select best one. Tie-breaking is random.

        Args:
            state (tuple): Board status + mark
            ava_actions (list): Available actions

        Returns:
            int: Selected action
        """
        assert len(ava_actions) > 0
        coun_nstate = 0
        ava_values = []
        # temp for insert to db variable
        temp_db_nstate = []
        temp_db_nvalue = []
        temp_db_choose = 0

        for action in ava_actions:
            nstate = after_action_state(state, action)
            nval = self.ask_value(nstate)
            # show next state and reward
            print("Choice:" + str(coun_nstate) + ". %s || Reward is %s" %
                  (nstate, nval))
            temp_db_nstate.append(nstate)
            temp_db_nvalue.append(nval)
            # print("temp_db_nstate: %s" %(temp_db_nstate))
            # print("temp_db_nvalue: %s" %(temp_db_nvalue))

            coun_nstate += 1
            ava_values.append(nval)
            vcnt = st_visits[nstate]
            logging.debug("  nstate {} val {:0.2f} visits {}".format(
                nstate, nval, vcnt))

        # select most right action for 'O' or 'X'
        if self.mark == 'O':
            indices = best_val_indices(ava_values, max)
            print("---> Machine Choose Maximum Reward in choice(s) %s" %
                  (indices))

        else:
            indices = best_val_indices(ava_values, min)
            print("---> Machine Choose Minimum Reward in choice(s) %s" %
                  (indices))

        # tie breaking by random choice
        aidx = random.choice(indices)
        logging.debug(
            "greedy_action mark {} ava_values {} indices {} aidx {}".format(
                self.mark, ava_values, indices, aidx))
        print("------> Machine Choose choice %s." % (aidx))
        action = ava_actions[aidx]
        print("---------> Machine pick at %s." % str(action + 1))

        self.set_db(temp_db_nstate, temp_db_nvalue, aidx, action)
        return action
Ejemplo n.º 6
0
 def act(self, state, ava_actions):
     for action in ava_actions:
         nstate = after_action_state(state, action)
         gstatus = check_game_status(nstate[0])
         if gstatus > 0:
             if tomark(gstatus) == self.mark:
                 return action
     return random.choice(ava_actions)
Ejemplo n.º 7
0
def minimax(board, is_max):
    #print("Minimax called")
    curr = check_game_status(board)
    #print(curr)
    if curr == 1:
        #print("max")
        return 10
    if curr == 2:
        #print("min")
        return -10
    if curr == 0:
        #print("draw")
        return 0

    if is_max:
        #print("Maximizer")
        mark = 'O'
        maxval = -100
        for i in range(0, 9):
            if board[i] == 0:
                old_state = [board, mark]
                new_board, new_mark = after_action_state(old_state, i)
                #print(new_board)
                currval = minimax(new_board, False)
                #print(currval)
                if maxval < currval:
                    maxval = currval
        return maxval
    else:
        #print("minimizer")
        mark = 'X'
        minval = 100
        for i in range(0, 9):
            if board[i] == 0:
                #print(i)
                old_state = [board, mark]
                new_state = after_action_state(old_state, i)
                #print(new_state[0])
                new_board = new_state[0]
                new_mark = new_state[1]
                currval = minimax(new_board, False)
                #print(currval,mark)
                if minval > currval:
                    minval = currval
        return minval
Ejemplo n.º 8
0
 def act(self, state, my_env):
     available_actions = my_env.available_actions()
     for action in available_actions:
         nstate = after_action_state(my_env.state, action)
         gstatus = check_game_status(nstate[0])
         if gstatus > 0:
             if tomark(gstatus) == self.mark:
                 return action
     return random.choice(available_actions)
Ejemplo n.º 9
0
    def act(self, state, my_env: TicTacToeEnv):
        available_actions = my_env.available_actions()
        # --- Step 1: play winning move, if possible ---
        for action in available_actions:
            nstate = after_action_state(state, action)
            gstatus = check_game_status(nstate[0])
            if gstatus > 0:
                if tomark(gstatus) == self.mark:
                    return action

        # --- Step 2: block opponent from winning ---
        # imagine the opponent was playing
        rev_state = (state[0], next_mark(state[1]))
        for action in available_actions:
            nstate = after_action_state(rev_state, action)
            gstatus = check_game_status(nstate[0])
            if gstatus > 0:
                # if they can make a winning move, play that
                if tomark(gstatus) == self.opponent_mark:
                    return action

        return random.choice(available_actions)
Ejemplo n.º 10
0
 def act(self, state, ava_actions):
     opt = -100
     action = -1
     #print(ava_actions)
     for i in ava_actions:
         new_board, mark = after_action_state(state, i)
         #ava_action[i] = 1;
         #print(new_board)
         val = minimax(new_board, False)
         #print(val)
         if val > opt:
             opt = val
             action = i
     return action
Ejemplo n.º 11
0
 def act(self, state, ava_actions):
     best = -1000
     bestact = -1
     c = 0
     for action in ava_actions:
         ava_actions.remove(action)
         moveVal = self.minimax1(after_action_state(state, action),
                                 ava_actions, 0)
         ava_actions.insert(c, action)
         c += 1
         if moveVal > best:
             best = moveVal
             bestact = action
     return bestact
Ejemplo n.º 12
0
    def act(self, state, ava_actions):
        
        bvalue=-1000
        pos=-1
        for i in ava_actions:
            c=ava_actions.index(i)
            ava_actions.remove(i)
            move = self.minimax(after_action_state(state,i),0,ava_actions,0)
            ava_actions.insert(c,i)

            if(move>bvalue):
                bvalue=move
                pos=i
        
        return pos
Ejemplo n.º 13
0
    def act(self, state, ava_actions):
        #raise NotImplementedError()
        
        bestScore = -100000
        bestMove = 1

        for i in range(0, 9, 1):
            if i in ava_actions:
                statet = after_action_state(state, i)
                ava_actions.remove(i)
                score = self.minimax(statet, 0, False, ava_actions)
                ava_actions.append(i)

                if score > bestScore:
                    bestScore = score
                    bestMove = i

        return bestMove
Ejemplo n.º 14
0
def find_loc_prob(state, aval_actions, action, win_count, loss_count, step):
    aval_actions.remove(action)
    state = after_action_state(state, action)
    game_status = check_game_status(state[0])

    if (game_status == 0 or game_status == tocode(next_mark(state[1]))):
        win_count = win_count + step

        if (game_status == 0):	#If there is draw then it will be counted as victory for both the players
            loss_count = loss_count + step

        return win_count, loss_count
    elif (game_status == tocode(state[1])):
        loss_count = loss_count + step
        return win_count, loss_count
    else:
        for action in aval_actions:
            temp = aval_actions.copy()
            loss_count, win_count = find_loc_prob(state, temp, action, loss_count, win_count, step/5)

    return win_count, loss_count
Ejemplo n.º 15
0
    def expand(self, node: Node, my_env: TicTacToeEnv):
        """
        MCTS: Expansion stage.
          - If additional moves are possible from given node
            child nodes will be created, one selected, and env advanced.
          - If not, same node and env will be returned.
        """
        # If this is a terminal state, don't try to expand
        if my_env.done:
            return node, my_env

        # Add a child node for each possible action
        for action in my_env.available_actions():
            nstate = after_action_state(node.state, action)
            Node(nstate, action, parent=node)

        # If node has children after expansion, select one
        if node.children:
            node = random.choice(node.children)
            my_env.step(node.action)

        return node, my_env
Ejemplo n.º 16
0
def find_loc_prob(state, aval_actions, action, win_count, loss_count, step):
    aval_actions.remove(action)
    state = after_action_state(state, action)
    game_status = check_game_status(state[0])
    print("Action = {}".format(action))

    if (game_status == 0 or game_status == tocode(next_mark(state[1]))):
        win_count = win_count + step
        return win_count, loss_count
    elif (game_status == tocode(state[1])):
        loss_count = loss_count + step
        return win_count, loss_count
    else:
        for action in aval_actions:
            print("Calling recurssively for step {}".format(step))
            print(
                "Win count and Loss count till this step = {} and {} for mark {}"
                .format(win_count, loss_count, state[1]))
            loss_count, win_count = find_loc_prob(state, aval_actions, action,
                                                  loss_count, win_count,
                                                  step - 1)

    return win_count, loss_count