Beispiel #1
0
    def get_action(self, state):
        """
        :param state : (list) 6x7 list representing the current state of the game
        :return int: Index of the column to put the piece (always checks for valid moves)
        """

        # get all possible moves
        possible_actions = get_valid_moves(state)
        tiles = [self.tile, self.opponent_tile]

        # check if it has a winning move
        for action in possible_actions:
            simulated_state = make_move(state, action, tile=self.tile)
            # if the simulated next state is a winning game
            if get_winner(simulated_state, tiles) == self.tile:
                # take the action
                return action

        # check if the opponent has a winning move
        for action in possible_actions:
            simulated_state = make_move(state, action, tile=self.opponent_tile)
            # if the simulated state is a loosing game
            if get_winner(simulated_state, tiles) == self.opponent_tile:
                # block that move
                return action

        # otherwise take random action
        return Agent.get_action(self, state)
Beispiel #2
0
    def add_state(self, state, parsed_state=None):
        if not parsed_state:
            parsed_state = parse_state(state)

        if parsed_state not in self.Q.keys():
            winner = get_winner(state, [1, -1])
            if winner == 1:
                self.Q[parsed_state] = 1
            elif winner == 0:
                self.Q[parsed_state] = 0
            elif winner == -1:
                self.Q[parsed_state] = -1
            self.added_states += 1
Beispiel #3
0
def simulate(games=10, log_every=100):
    results = []
    won = 0
    agents = [LearningAgent(1), Agent(-1)]

    players = cycle(agents)

    for _ in range(random.randrange(2)):
        current_player = next(players)

    for iteration in range(1, games + 1):
        state = get_initial_state()

        current_game = []

        while get_valid_moves(state):
            current_player = next(players)
            initial_state = state
            action = current_player.get_action(state)
            state = make_move(state, action, current_player.tile)

            turn = {
                'st': initial_state,
                'a': action,
                'st1': state
            }

            if current_player.tile == 1:
                current_game.append(turn)


        # clean game
        reward = get_winner(state)
        current_game[-1]['r'] = reward
        current_game[-1]['st1'] = None

        # log
        if reward > 0:
            won += 1

        if iteration % log_every == 0:
            print('won %s games out of %s' %(won, log_every))
            won = 0

        # learn

        agents[0].learn(current_game)

        results.append(current_game)

    return agents[0]
Beispiel #4
0
    def get_reward(self,a,is_final_period:bool):
        """
        Method calculates reward for action
        Note: since variable 'a' refers to the state resulting from action, we do not need to know the preceding state
        :param a:
        :param is_final_period:
        :return: reward for current period
        """

        bids = self.S[a].current_bids
        price_paid1 = bids[self.player_id]
        price_paid2 = bids[1-self.player_id]

        # if bidding has ended, current player has highest bid (setting nans to -1) and current player bid is not nan
        won_auction = is_final_period & (~np.isnan(price_paid1)) & ((price_paid1 >= price_paid2) | np.isnan(price_paid2))
        is_tie=env.get_winner((price_paid1,price_paid2))
        r = self.calc_final_reward(won_auction,price_paid1,self.agent_valuation,is_tie)

        return r
Beispiel #5
0
def parse_game(current_game, last_state, gamma, tiles):
    # clean results for game
    clean_game = []

    turns = len(current_game)
    reward = get_winner(last_state, tiles)

    for i, step in enumerate(current_game):

        clean_step = {
            'st': step[0],
            'action': step[1],
            'reward': gamma ** (turns - i - 1) * reward
        }

#        try:
#            clean_step['st_1'] = current_game[i + 1][0]
#        except IndexError:
#            clean_step['st_1'] = None

        clean_game.append(clean_step)

    return clean_game
Beispiel #6
0
def simulate(agents=[None, None], iterations=10, tiles=[1, -1], log=True, backup=False, print_every=10):
    
    # if the agents are not passed
    # create dumb agents
    for i in range(len(agents)):
        if not agents[i]:
            agents[i] = Agent(tiles[i])
        else:
            agents[i].set_tile(tiles[i])

    # create an iterator for alternate players
    players = cycle(agents)

    # initialize list to return
    results = []
    won = 0
    games_started = 0
    total_reward = 0

    # run n simulations
    for iteration in range(1, iterations + 1):
        # print('iteration ' + str(iteration) + ' of ' + str(iterations))

        # get an empty board
        state = get_initial_state()

        # initialize the list for this game
        current_game = []

        # randomize the first agent to play
        for _ in range(randrange(1, 3)):
            current_player = next(players)
        
        # play until the game is over

        played_first =  current_player.tile == 1

        while not game_over(state, tiles):

            # initial state for this turn to string
            initial_state = state
            
            # change current player
            current_player = next(players)

            # ask the agent to give an action
            action = current_player.get_action(state)

            # perform the action and update the state of the game
            state = make_move(state, action, current_player.tile)
            
            # if the current player is agent 1
            # add the current turn to the list
            current_game.append(initial_state)

        current_game.append(state)

        # add the last game to the results list
        results.append(current_game)

        for agent in agents:
            if agent.learns:
                agent.learn(current_game)

        if log:
            reward = get_winner(state, tiles)
            total_reward += reward
            if reward > 0 and played_first:
                won += 1
            games_started += 1 if played_first else 0

            if iteration % print_every == 0:
                print('won ' + str(won) + ' out of ' + str(games_started) + ' games')
                print('reward: ' + str(total_reward))

                total_reward = 0
                won = 0

            if backup and iteration % print_every == 0:
                save_q(agents[0])

    return agents[0], results