def get_action(self, state): """ :param state : (list) 6x7 list representing the current state of the game :return int: Index of the column to put the piece (always checks for valid moves) """ # get all possible moves possible_actions = get_valid_moves(state) tiles = [self.tile, self.opponent_tile] # check if it has a winning move for action in possible_actions: simulated_state = make_move(state, action, tile=self.tile) # if the simulated next state is a winning game if get_winner(simulated_state, tiles) == self.tile: # take the action return action # check if the opponent has a winning move for action in possible_actions: simulated_state = make_move(state, action, tile=self.opponent_tile) # if the simulated state is a loosing game if get_winner(simulated_state, tiles) == self.opponent_tile: # block that move return action # otherwise take random action return Agent.get_action(self, state)
def add_state(self, state, parsed_state=None): if not parsed_state: parsed_state = parse_state(state) if parsed_state not in self.Q.keys(): winner = get_winner(state, [1, -1]) if winner == 1: self.Q[parsed_state] = 1 elif winner == 0: self.Q[parsed_state] = 0 elif winner == -1: self.Q[parsed_state] = -1 self.added_states += 1
def simulate(games=10, log_every=100): results = [] won = 0 agents = [LearningAgent(1), Agent(-1)] players = cycle(agents) for _ in range(random.randrange(2)): current_player = next(players) for iteration in range(1, games + 1): state = get_initial_state() current_game = [] while get_valid_moves(state): current_player = next(players) initial_state = state action = current_player.get_action(state) state = make_move(state, action, current_player.tile) turn = { 'st': initial_state, 'a': action, 'st1': state } if current_player.tile == 1: current_game.append(turn) # clean game reward = get_winner(state) current_game[-1]['r'] = reward current_game[-1]['st1'] = None # log if reward > 0: won += 1 if iteration % log_every == 0: print('won %s games out of %s' %(won, log_every)) won = 0 # learn agents[0].learn(current_game) results.append(current_game) return agents[0]
def get_reward(self,a,is_final_period:bool): """ Method calculates reward for action Note: since variable 'a' refers to the state resulting from action, we do not need to know the preceding state :param a: :param is_final_period: :return: reward for current period """ bids = self.S[a].current_bids price_paid1 = bids[self.player_id] price_paid2 = bids[1-self.player_id] # if bidding has ended, current player has highest bid (setting nans to -1) and current player bid is not nan won_auction = is_final_period & (~np.isnan(price_paid1)) & ((price_paid1 >= price_paid2) | np.isnan(price_paid2)) is_tie=env.get_winner((price_paid1,price_paid2)) r = self.calc_final_reward(won_auction,price_paid1,self.agent_valuation,is_tie) return r
def parse_game(current_game, last_state, gamma, tiles): # clean results for game clean_game = [] turns = len(current_game) reward = get_winner(last_state, tiles) for i, step in enumerate(current_game): clean_step = { 'st': step[0], 'action': step[1], 'reward': gamma ** (turns - i - 1) * reward } # try: # clean_step['st_1'] = current_game[i + 1][0] # except IndexError: # clean_step['st_1'] = None clean_game.append(clean_step) return clean_game
def simulate(agents=[None, None], iterations=10, tiles=[1, -1], log=True, backup=False, print_every=10): # if the agents are not passed # create dumb agents for i in range(len(agents)): if not agents[i]: agents[i] = Agent(tiles[i]) else: agents[i].set_tile(tiles[i]) # create an iterator for alternate players players = cycle(agents) # initialize list to return results = [] won = 0 games_started = 0 total_reward = 0 # run n simulations for iteration in range(1, iterations + 1): # print('iteration ' + str(iteration) + ' of ' + str(iterations)) # get an empty board state = get_initial_state() # initialize the list for this game current_game = [] # randomize the first agent to play for _ in range(randrange(1, 3)): current_player = next(players) # play until the game is over played_first = current_player.tile == 1 while not game_over(state, tiles): # initial state for this turn to string initial_state = state # change current player current_player = next(players) # ask the agent to give an action action = current_player.get_action(state) # perform the action and update the state of the game state = make_move(state, action, current_player.tile) # if the current player is agent 1 # add the current turn to the list current_game.append(initial_state) current_game.append(state) # add the last game to the results list results.append(current_game) for agent in agents: if agent.learns: agent.learn(current_game) if log: reward = get_winner(state, tiles) total_reward += reward if reward > 0 and played_first: won += 1 games_started += 1 if played_first else 0 if iteration % print_every == 0: print('won ' + str(won) + ' out of ' + str(games_started) + ' games') print('reward: ' + str(total_reward)) total_reward = 0 won = 0 if backup and iteration % print_every == 0: save_q(agents[0]) return agents[0], results