def act(self, state, ava_actions): for action in ava_actions: nstate = after_action_state(state, action) gstatus = check_game_status(nstate[0]) if gstatus > 0: if tomark(gstatus) == self.mark: return action return random.choice(ava_actions)
def act(self, state, my_env): available_actions = my_env.available_actions() for action in available_actions: nstate = after_action_state(my_env.state, action) gstatus = check_game_status(nstate[0]) if gstatus > 0: if tomark(gstatus) == self.mark: return action return random.choice(available_actions)
def act(self, state, my_env: TicTacToeEnv): available_actions = my_env.available_actions() # --- Step 1: play winning move, if possible --- for action in available_actions: nstate = after_action_state(state, action) gstatus = check_game_status(nstate[0]) if gstatus > 0: if tomark(gstatus) == self.mark: return action # --- Step 2: block opponent from winning --- # imagine the opponent was playing rev_state = (state[0], next_mark(state[1])) for action in available_actions: nstate = after_action_state(rev_state, action) gstatus = check_game_status(nstate[0]) if gstatus > 0: # if they can make a winning move, play that if tomark(gstatus) == self.opponent_mark: return action return random.choice(available_actions)