def get_action(self, state): """ :param state : (list) 6x7 list representing the current state of the game :return int: Index of the column to put the piece (always checks for valid moves) """ # get all possible moves possible_actions = get_valid_moves(state) tiles = [self.tile, self.opponent_tile] # check if it has a winning move for action in possible_actions: simulated_state = make_move(state, action, tile=self.tile) # if the simulated next state is a winning game if get_winner(simulated_state, tiles) == self.tile: # take the action return action # check if the opponent has a winning move for action in possible_actions: simulated_state = make_move(state, action, tile=self.opponent_tile) # if the simulated state is a loosing game if get_winner(simulated_state, tiles) == self.opponent_tile: # block that move return action # otherwise take random action return Agent.get_action(self, state)
def simulate(games=10, log_every=100): results = [] won = 0 agents = [LearningAgent(1), Agent(-1)] players = cycle(agents) for _ in range(random.randrange(2)): current_player = next(players) for iteration in range(1, games + 1): state = get_initial_state() current_game = [] while get_valid_moves(state): current_player = next(players) initial_state = state action = current_player.get_action(state) state = make_move(state, action, current_player.tile) turn = { 'st': initial_state, 'a': action, 'st1': state } if current_player.tile == 1: current_game.append(turn) # clean game reward = get_winner(state) current_game[-1]['r'] = reward current_game[-1]['st1'] = None # log if reward > 0: won += 1 if iteration % log_every == 0: print('won %s games out of %s' %(won, log_every)) won = 0 # learn agents[0].learn(current_game) results.append(current_game) return agents[0]
def get_action(self, state): valid_moves = get_valid_moves(state) if random.random() < 0.1: return Agent.get_action(self, state) parsed_state = parse_state(state) array = np.array(self.Q[parsed_state]).argsort() predicted_moves = list(array) for i in reversed(range(9)): # get the index of the tempted move tempted_move = predicted_moves.index(i) # if the top priority move is among valid moves if tempted_move in valid_moves: # take this move return tempted_move
def get_action(self, state): valid_moves = get_valid_moves(state) self.add_state(state) max_move = None max_value = None for move in valid_moves: simulated_state = make_move(state, move, self.tile) parsed_simulated_state = parse_state(simulated_state) self.add_state(simulated_state, parsed_simulated_state) state_value = self.Q[parsed_simulated_state] if state_value > max_value: max_value = state_value max_move = move if max_value > 0: return max_move else: if random.random() < 0.1: return Agent.get_action(self, state) else: return max_move
def get_action(self, state): return random.choice(get_valid_moves(state))
def get_action(self, state): """ :param state: 6x7 list representing the current state of the game :return int: Index of the column to put the piece (always checks for valid moves) """ return random.choice(get_valid_moves(state))