Python Agent.MCTS Examples

Programming Language: Python

Namespace/Package Name: agent

Class/Type: Agent

Method/Function: MCTS

Examples at hotexamples.com: 2

Python Agent.MCTS - 2 examples found. These are the top rated real world Python examples of agent.Agent.MCTS extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

act(30)

Agent(30)

__init__(30)

bind_udp_sockets(6)

agent_factory(5)

act_e_greedy(5)

act_and_train(4)

numTrt(4)

numValidTrt(4)

perform_production(4)

setLocation(3)

getMoveList(2)

find(2)

_getReward(2)

_getState(2)

populate_replay_memory(2)

_set_position(2)

MCTS(2)

GetAgentByEmail(2)

expand(1)

get_qq_name_id(1)

enter_group_v2(1)

do_nothing(1)

f_train(1)

check_group_msg(1)

findAll(1)

bash(1)

estimate(1)

Action(1)

get_state_value(1)

print_it(1)

update_experience(1)

turn(1)

stock_experience(1)

simulateAgent(1)

set_board(1)

send_group_msg(1)

rand(1)

present_word(1)

handle(1)

present_number(1)

play_in_test_mode(1)

nextSymbols(1)

nextArticle(1)

new(1)

loop(1)

load_group_list(1)

locAfterMove(1)

a_star_manhattan(1)

act_detail(1)

Example #1

Show file

File: selfPlay.py Project: Upset-Downset/UpsetDownset

class SelfPlay(object):
    '''Abstract class for construction remote self-play actors.
    '''
    def __init__(self):
        '''Instantiates a self-play actor.
        
        Returns
        -------
        None.

        '''
        self.agent = Agent(path='./model_data/alpha_0.pt')

    def run(self,
            replay_buffer,
            update_signal,
            self_play_id,
            search_iters=SELF_PLAY_SEARCH_ITERS,
            markov_exp=SELF_PLAY_MARKOV_EXP,
            temp=TEMP,
            temp_thrshld=TEMP_THRSHLD):
        '''Starts indefinite self-play loop. The games for self-play are 
        generated via an ongoing Markov chain as described in randomDag.py.
        The self-play processes are synchronized with one another, train 
        and evaluation processes via the 'replay_buffer' and 'update_signal', 
        respectively. 'replay_buffer' stores the self-play data and triggers 
        the start of training while 'update_signal' triggers model parameter 
        updates.
        
        Parameters
        ----------
        replay_buffer : ReplayBuffer
            remote actor for managing self-play data between self-play processes 
            and the Train process. Also carries the signal to start training.
        update_signal : UpdateSignal
            remote actor for synchronization between self-play processes and 
            evaluation processes. Triggers model parameter updates.
        self_play_id : int (nonnegative)
            unique identifier for the self-play process.
        search_iters : int (positve), optional
             the number of search iterations to perform during MCTS. 
             The default is SELF_PLAY_SEARCH_ITERS.
        markov_exp : float, optional
            The exponent determining the number of steps taken in 
            the markov chain in generating games for self-play.
        temp : float (nonnegative)
            partially controls exploration. If 0, the policy is deterministic 
            and the position with highest visit  from MCTS is chosen.
        temp_thrshld : int (nonnegative), optional
            The number of moves after which the policy becomes determnistic.
            I.e., temp is set to 0. (See temp, above.) The default is 
            TEMP_THRSHLD.

        Returns
        -------
        None.

        '''
        # put agent in evaluation mode
        self.agent.model.eval()
        # the action space...
        actions = np.arange(MAX_NODES)
        # game state generator via an ongoing Markov chain
        state_generator = GameState.state_generator(markov_exp)
        # start indefinite self-play loop
        while True:
            # check for updates
            if ray.get(update_signal.get_update.remote(self_play_id)):
                # get current update_id
                update_id = ray.get(update_signal.get_update_id.remote())
                # load current alpha paramenters
                self.agent.load_parameters(
                    path=f'./model_data/alpha_{update_id}.pt')
                # reset the update signal
                update_signal.clear_update.remote(self_play_id)
            # get a game and play
            initial_state = next(state_generator)
            root = PUCTNode(initial_state)
            states = []
            policies = []
            move_count = 0
            while not root.state.is_terminal_state():
                t = temp if move_count < temp_thrshld else 0
                policy = self.agent.MCTS(root, search_iters, t)
                move = np.random.choice(actions, p=policy)
                states.append(root.state.encoded_state)
                policies.append(policy)
                root = root.edges[move]
                root.to_root()
                move_count += 1
            # update state values as seen from current players perspective
            if move_count % 2 == 0:
                values = [(-1)**(i + 1) for i in range(move_count)]
            else:
                values = [(-1)**i for i in range(move_count)]
            # construct training data from self-play
            train_data = [
                (state, policy, value)
                for state, policy, value in zip(states, policies, values)
            ]
            # add training data to replay buffer
            replay_buffer.add.remote(train_data)

Example #2

Show file

class Evaluation(object):
    '''Abstract class for construction of remote evaluation actors.
    '''
    def __init__(self, update_id):
        '''Instantiate an Evaluation actor

        Parameters
        ----------
        update_id : int (nonegative)
            the current update_id. When an Evaluation actor is instatiated the 
            actors alpha agent pulls from the most current alpha parameters.
    
        Returns
        -------
        None.

        '''
        self.alpha_agent = Agent(path=f'./model_data/alpha_{update_id}.pt')
        self.apprentice_agent = Agent(path='./model_data/apprentice.pt')

    def update_alpha_parameters(self, update_id):
        '''Updates the alpha parameters. Used if an update is triggered 
        after an evaluation.

        Parameters
        ----------
        update_id : int (positive)
            the index of the update to the alpha parameters. 
            (Each time an update is triggered the update_id is incremented 
             by 1 and then new alpha parameters are saved indexed by the 
            current update_id.)

        Returns
        -------
        None.

        '''
        self.apprentice_agent.save_parameters(
            path=f'./model_data/alpha_{update_id}.pt')

    def run(self,
            num_plays=PLAYS_PER_EVAL,
            search_iters=EVAL_PLAY_SEARCH_ITERS,
            markov_exp=EVAL_PLAY_MARKOV_EXP):
        '''Starts an evaluation. The evaluation process is synchronized 
        with the self-play processes and evaluation processes via instances of  
        UpdateSignal and AsyncSignal, respectively, in the main script: 
        asyn_training.py. The UpdateSignal triggers an update in each of the 
        self-play processes if the total number of apprentice wins to total 
        evaluation games surpasses the declared win ratio while the AsyncSignal
        triggers the evaluation processes.

        Parameters
        ----------
        num_plays : int (positive), optional
            The numnber of evaluation games to play. The default is 
            PLAYS_PER_EVAL.
        search_iters : int (positive), optional
            the number of search iterations to perform during MCTS. 
            The default is EVAL_PLAY_SEARCH_ITERS.
        markov_exp : float, optional
            The exponent determining the number of steps taken in 
            the markov chain in generating games for evaluation.

        Returns
        -------
        apprentice_wins : int (nonegative)
            the number of apprentice wins.

        '''
        # put models in eval mode...
        self.alpha_agent.model.eval()
        self.apprentice_agent.model.eval()
        # setup gameplay
        alpha = 0
        apprentice = 1
        actions = np.arange(MAX_NODES)
        state_generator = GameState.state_generator(markov_exp)
        apprentice_wins = 0
        # start evaluation game play
        for i in range(num_plays):
            # uniformly randomly choose which agent plays first
            next_move = np.random.choice([alpha, apprentice])
            # play a randomly generated game of upset-downset
            game_state = next(state_generator)
            while not game_state.is_terminal_state():
                root = PUCTNode(game_state)
                policy = self.alpha_agent.MCTS(root, search_iters, 0) \
                    if next_move == alpha \
                        else self.apprentice_agent.MCTS(root, search_iters, 0)
                move = np.random.choice(actions, p=policy)
                game_state = root.edges[move].state
                next_move = 1 - next_move
            # decide winner
            winner = 1 - next_move
            if winner == apprentice:
                apprentice_wins += 1

        return apprentice_wins