Python maskAndMoves Examples

Programming Language: Python

Namespace/Package Name: agent

Method/Function: maskAndMoves

Examples at hotexamples.com: 3

Python maskAndMoves - 3 examples found. These are the top rated real world Python examples of agent.maskAndMoves extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

def play_episode(root, max_depth, apprentice, move_type="all", verbose=False):
    episode = []
    state = copy.deepcopy(root)
    edge_index = boardToData(root).edge_index
    # ******************* PLAY EPISODE ***************************
    for i in range(max_depth):
        #print_message_over(f"Playing episode: {i}/{max_depth}")

        # Check if episode is over
        if state.gameOver: break

        # Check is current player is alive or not
        if not state.activePlayer.is_alive:
            # print("\npassing, dead player")
            state.endTurn()
            continue

        # Get possible moves, and apprentice policy
        mask, actions = agent.maskAndMoves(state, state.gamePhase, edge_index)

        try:
            policy, value = apprentice.getPolicy(state)
        except Exception as e:
            state.report()
            print(state.activePlayer.is_alive)
            print(state.activePlayer.num_countries)
            raise e

        if isinstance(mask, torch.Tensor):
            mask = mask.detach().numpy()

        probs = policy * mask

        probs = probs.flatten()

        probs = probs / probs.sum()

        # Random selection? e-greedy?

        ind = np.random.choice(range(len(actions)), p=probs)
        move = agent.buildMove(state, actions[ind])

        saved = (move_type == "all" or move_type == state.gamePhase)
        if verbose:
            # print(f"\t\tPlay episode: turn {i}, move = {move}, saved = {saved}")
            pass

        if saved:
            episode.append(copy.deepcopy(state))

        # Play the move to continue
        state.playMove(move)

    return episode

Example #2

Show file

def play_episode(root, max_depth, apprentice):
    episode = []
    state = copy.deepcopy(root)
    edge_index = boardToData(root).edge_index
    # ******************* PLAY EPISODE ***************************
    for i in range(max_depth):  
        #print_message_over(f"Playing episode: {i}/{max_depth}")

        # Check if episode is over            
        if state.gameOver: break

        # Check is current player is alive or not
        if not state.activePlayer.is_alive: 
            # print("\npassing, dead player")
            state.endTurn()
            continue

        # Get possible moves, and apprentice policy
        mask, actions = maskAndMoves(state, state.gamePhase, edge_index)
        try:
            policy, value = apprentice.play(state)
        except Exception as e:
            state.report()
            print(state.activePlayer.is_alive)
            print(state.activePlayer.num_countries)
            raise e
        policy = policy * mask
        probs = policy.squeeze().detach().numpy()
        probs =  probs / probs.sum()

        # Random selection? e-greedy?
        ind = np.random.choice(range(len(actions)), p = probs)
        move = buildMove(state, actions[ind])
        
        episode.append(copy.deepcopy(state))

        # Play the move to continue
        state.playMove(move)
        
    return episode

Example #3

Show file


#%%%
board.play() 
while not board.gameOver and board.gamePhase != "attack":
  board.play()
  
  
board.report()
print(board.countriesPandas())
print("\n")

# Get policy for board
canon, _ = board.toCanonical(board.activePlayer.code)
batch = torch_geometric.data.Batch.from_data_list([boardToData(canon)])
mask, moves = agent.maskAndMoves(canon, canon.gamePhase, batch.edge_index)
policy, value = apprentice.getPolicy(canon)
pop = policy.squeeze()

T = 1
exp = np.exp(np.log(np.maximum(pop, 0.000001))/T)
soft = exp/exp.sum()

co = board.countries()
for m, a, p, s in zip(mask.squeeze(), moves, pop, soft):
    if m.item():
        if len(a) > 2:
            print(
                f"{a[0]}: {co[a[1]]['id']} -> {co[a[2]]['id']} - {p:.3f} - {s:.3f}")
        else:
            print(f"{a[0]}: {co[a[1]]['id']} - {p:.3f}- {s:.3f}")