def minimax1(self, state, ava_actions, depth): score = check_game_status(state[0]) if score == 0: return 0 elif score == 1: return 10 - depth elif score == 2: return -10 + depth if state[1] == 'O': best = -1000 c = 0 for action in ava_actions: ava_actions.remove(action) best = max( best, self.minimax1(after_action_state(state, action), ava_actions, depth + 1)) ava_actions.insert(c, action) c += 1 return best else: best = 1000 c = 0 for action in ava_actions: ava_actions.remove(action) best = min( best, self.minimax1(after_action_state(state, action), ava_actions, depth + 1)) ava_actions.insert(c, action) c += 1 return best
def minimax(self, state, turn, ava_actions,depth): score= check_game_status(state[0]) if(score==1): return 10 if(score==2): return -10 if(score==0): return 0 if(turn==0): best= 1000 for i in ava_actions: c=ava_actions.index(i) ava_actions.remove(i) best=min(best,self.minimax(after_action_state(state,i),1,ava_actions,depth+1)) ava_actions.insert(c,i) return best else: best=-1000 for i in ava_actions: c=ava_actions.index(i) ava_actions.remove(i) best=max(best,self.minimax(after_action_state(state,i),0,ava_actions,depth-1)) ava_actions.insert(c,i) return best
def minimax(self, state, depth, isMax, ava_actions): board, extra = state gameState = check_game_status(board) if gameState == 0: return 0 elif gameState == 1: return 1 elif gameState == 2: return -1 if isMax: bestScore = -100000 for i in range(0, 9, 1): if i in ava_actions: statet = after_action_state(state, i) ava_actions.remove(i) score = self.minimax(statet, depth + 1, False, ava_actions) ava_actions.append(i) bestScore = max(score, bestScore) return bestScore else: bestScore = 100000 for i in range(0, 9, 1): if i in ava_actions: statet = after_action_state(state, i) ava_actions.remove(i) score = self.minimax(statet, depth + 1, True, ava_actions) ava_actions.append(i) bestScore = min(score, bestScore) return bestScore
def greedy_action(self, state, ava_actions): """Return best action by current state value. Evaluate each action, select best one. Tie-breaking is random. Args: state (tuple): Board status + mark ava_actions (list): Available actions Returns: int: Selected action """ assert len(ava_actions) > 0 ava_values = [] for action in ava_actions: nstate = after_action_state(state, action) nval = self.ask_value(nstate) ava_values.append(nval) vcnt = st_visits[nstate] # print(" nstate {} val {:0.2f} visits {}". # format(nstate, nval, vcnt)) # select most right action for 'O' or 'X' if self.mark == 'O': indices = best_val_indices(ava_values, max) else: indices = best_val_indices(ava_values, min) # tie breaking by random choice aidx = random.choice(indices) # print("greedy_action mark {} ava_values {} indices {} aidx {}". # format(self.mark, ava_values, indices, aidx)) action = ava_actions[aidx] return action
def greedy_action(self, state, ava_actions): """Return best action by current state value. Evaluate each action, select best one. Tie-breaking is random. Args: state (tuple): Board status + mark ava_actions (list): Available actions Returns: int: Selected action """ assert len(ava_actions) > 0 coun_nstate = 0 ava_values = [] # temp for insert to db variable temp_db_nstate = [] temp_db_nvalue = [] temp_db_choose = 0 for action in ava_actions: nstate = after_action_state(state, action) nval = self.ask_value(nstate) # show next state and reward print("Choice:" + str(coun_nstate) + ". %s || Reward is %s" % (nstate, nval)) temp_db_nstate.append(nstate) temp_db_nvalue.append(nval) # print("temp_db_nstate: %s" %(temp_db_nstate)) # print("temp_db_nvalue: %s" %(temp_db_nvalue)) coun_nstate += 1 ava_values.append(nval) vcnt = st_visits[nstate] logging.debug(" nstate {} val {:0.2f} visits {}".format( nstate, nval, vcnt)) # select most right action for 'O' or 'X' if self.mark == 'O': indices = best_val_indices(ava_values, max) print("---> Machine Choose Maximum Reward in choice(s) %s" % (indices)) else: indices = best_val_indices(ava_values, min) print("---> Machine Choose Minimum Reward in choice(s) %s" % (indices)) # tie breaking by random choice aidx = random.choice(indices) logging.debug( "greedy_action mark {} ava_values {} indices {} aidx {}".format( self.mark, ava_values, indices, aidx)) print("------> Machine Choose choice %s." % (aidx)) action = ava_actions[aidx] print("---------> Machine pick at %s." % str(action + 1)) self.set_db(temp_db_nstate, temp_db_nvalue, aidx, action) return action
def act(self, state, ava_actions): for action in ava_actions: nstate = after_action_state(state, action) gstatus = check_game_status(nstate[0]) if gstatus > 0: if tomark(gstatus) == self.mark: return action return random.choice(ava_actions)
def minimax(board, is_max): #print("Minimax called") curr = check_game_status(board) #print(curr) if curr == 1: #print("max") return 10 if curr == 2: #print("min") return -10 if curr == 0: #print("draw") return 0 if is_max: #print("Maximizer") mark = 'O' maxval = -100 for i in range(0, 9): if board[i] == 0: old_state = [board, mark] new_board, new_mark = after_action_state(old_state, i) #print(new_board) currval = minimax(new_board, False) #print(currval) if maxval < currval: maxval = currval return maxval else: #print("minimizer") mark = 'X' minval = 100 for i in range(0, 9): if board[i] == 0: #print(i) old_state = [board, mark] new_state = after_action_state(old_state, i) #print(new_state[0]) new_board = new_state[0] new_mark = new_state[1] currval = minimax(new_board, False) #print(currval,mark) if minval > currval: minval = currval return minval
def act(self, state, my_env): available_actions = my_env.available_actions() for action in available_actions: nstate = after_action_state(my_env.state, action) gstatus = check_game_status(nstate[0]) if gstatus > 0: if tomark(gstatus) == self.mark: return action return random.choice(available_actions)
def act(self, state, my_env: TicTacToeEnv): available_actions = my_env.available_actions() # --- Step 1: play winning move, if possible --- for action in available_actions: nstate = after_action_state(state, action) gstatus = check_game_status(nstate[0]) if gstatus > 0: if tomark(gstatus) == self.mark: return action # --- Step 2: block opponent from winning --- # imagine the opponent was playing rev_state = (state[0], next_mark(state[1])) for action in available_actions: nstate = after_action_state(rev_state, action) gstatus = check_game_status(nstate[0]) if gstatus > 0: # if they can make a winning move, play that if tomark(gstatus) == self.opponent_mark: return action return random.choice(available_actions)
def act(self, state, ava_actions): opt = -100 action = -1 #print(ava_actions) for i in ava_actions: new_board, mark = after_action_state(state, i) #ava_action[i] = 1; #print(new_board) val = minimax(new_board, False) #print(val) if val > opt: opt = val action = i return action
def act(self, state, ava_actions): best = -1000 bestact = -1 c = 0 for action in ava_actions: ava_actions.remove(action) moveVal = self.minimax1(after_action_state(state, action), ava_actions, 0) ava_actions.insert(c, action) c += 1 if moveVal > best: best = moveVal bestact = action return bestact
def act(self, state, ava_actions): bvalue=-1000 pos=-1 for i in ava_actions: c=ava_actions.index(i) ava_actions.remove(i) move = self.minimax(after_action_state(state,i),0,ava_actions,0) ava_actions.insert(c,i) if(move>bvalue): bvalue=move pos=i return pos
def act(self, state, ava_actions): #raise NotImplementedError() bestScore = -100000 bestMove = 1 for i in range(0, 9, 1): if i in ava_actions: statet = after_action_state(state, i) ava_actions.remove(i) score = self.minimax(statet, 0, False, ava_actions) ava_actions.append(i) if score > bestScore: bestScore = score bestMove = i return bestMove
def find_loc_prob(state, aval_actions, action, win_count, loss_count, step): aval_actions.remove(action) state = after_action_state(state, action) game_status = check_game_status(state[0]) if (game_status == 0 or game_status == tocode(next_mark(state[1]))): win_count = win_count + step if (game_status == 0): #If there is draw then it will be counted as victory for both the players loss_count = loss_count + step return win_count, loss_count elif (game_status == tocode(state[1])): loss_count = loss_count + step return win_count, loss_count else: for action in aval_actions: temp = aval_actions.copy() loss_count, win_count = find_loc_prob(state, temp, action, loss_count, win_count, step/5) return win_count, loss_count
def expand(self, node: Node, my_env: TicTacToeEnv): """ MCTS: Expansion stage. - If additional moves are possible from given node child nodes will be created, one selected, and env advanced. - If not, same node and env will be returned. """ # If this is a terminal state, don't try to expand if my_env.done: return node, my_env # Add a child node for each possible action for action in my_env.available_actions(): nstate = after_action_state(node.state, action) Node(nstate, action, parent=node) # If node has children after expansion, select one if node.children: node = random.choice(node.children) my_env.step(node.action) return node, my_env
def find_loc_prob(state, aval_actions, action, win_count, loss_count, step): aval_actions.remove(action) state = after_action_state(state, action) game_status = check_game_status(state[0]) print("Action = {}".format(action)) if (game_status == 0 or game_status == tocode(next_mark(state[1]))): win_count = win_count + step return win_count, loss_count elif (game_status == tocode(state[1])): loss_count = loss_count + step return win_count, loss_count else: for action in aval_actions: print("Calling recurssively for step {}".format(step)) print( "Win count and Loss count till this step = {} and {} for mark {}" .format(win_count, loss_count, state[1])) loss_count, win_count = find_loc_prob(state, aval_actions, action, loss_count, win_count, step - 1) return win_count, loss_count