Esempio n. 1
0
def play_episode(root, max_depth, apprentice, move_type="all", verbose=False):
    episode = []
    state = copy.deepcopy(root)
    edge_index = boardToData(root).edge_index
    # ******************* PLAY EPISODE ***************************
    for i in range(max_depth):
        #print_message_over(f"Playing episode: {i}/{max_depth}")

        # Check if episode is over
        if state.gameOver: break

        # Check is current player is alive or not
        if not state.activePlayer.is_alive:
            # print("\npassing, dead player")
            state.endTurn()
            continue

        # Get possible moves, and apprentice policy
        mask, actions = agent.maskAndMoves(state, state.gamePhase, edge_index)

        try:
            policy, value = apprentice.getPolicy(state)
        except Exception as e:
            state.report()
            print(state.activePlayer.is_alive)
            print(state.activePlayer.num_countries)
            raise e

        if isinstance(mask, torch.Tensor):
            mask = mask.detach().numpy()

        probs = policy * mask

        probs = probs.flatten()

        probs = probs / probs.sum()

        # Random selection? e-greedy?

        ind = np.random.choice(range(len(actions)), p=probs)
        move = agent.buildMove(state, actions[ind])

        saved = (move_type == "all" or move_type == state.gamePhase)
        if verbose:
            # print(f"\t\tPlay episode: turn {i}, move = {move}, saved = {saved}")
            pass

        if saved:
            episode.append(copy.deepcopy(state))

        # Play the move to continue
        state.playMove(move)

    return episode
Esempio n. 2
0
def play_episode(root, max_depth, apprentice):
    episode = []
    state = copy.deepcopy(root)
    edge_index = boardToData(root).edge_index
    # ******************* PLAY EPISODE ***************************
    for i in range(max_depth):  
        #print_message_over(f"Playing episode: {i}/{max_depth}")

        # Check if episode is over            
        if state.gameOver: break

        # Check is current player is alive or not
        if not state.activePlayer.is_alive: 
            # print("\npassing, dead player")
            state.endTurn()
            continue

        # Get possible moves, and apprentice policy
        mask, actions = maskAndMoves(state, state.gamePhase, edge_index)
        try:
            policy, value = apprentice.play(state)
        except Exception as e:
            state.report()
            print(state.activePlayer.is_alive)
            print(state.activePlayer.num_countries)
            raise e
        policy = policy * mask
        probs = policy.squeeze().detach().numpy()
        probs =  probs / probs.sum()

        # Random selection? e-greedy?
        ind = np.random.choice(range(len(actions)), p = probs)
        move = buildMove(state, actions[ind])
        
        episode.append(copy.deepcopy(state))

        # Play the move to continue
        state.playMove(move)
        
    return episode
Esempio n. 3
0

#%%%
board.play() 
while not board.gameOver and board.gamePhase != "attack":
  board.play()
  
  
board.report()
print(board.countriesPandas())
print("\n")

# Get policy for board
canon, _ = board.toCanonical(board.activePlayer.code)
batch = torch_geometric.data.Batch.from_data_list([boardToData(canon)])
mask, moves = agent.maskAndMoves(canon, canon.gamePhase, batch.edge_index)
policy, value = apprentice.getPolicy(canon)
pop = policy.squeeze()

T = 1
exp = np.exp(np.log(np.maximum(pop, 0.000001))/T)
soft = exp/exp.sum()

co = board.countries()
for m, a, p, s in zip(mask.squeeze(), moves, pop, soft):
    if m.item():
        if len(a) > 2:
            print(
                f"{a[0]}: {co[a[1]]['id']} -> {co[a[2]]['id']} - {p:.3f} - {s:.3f}")
        else:
            print(f"{a[0]}: {co[a[1]]['id']} - {p:.3f}- {s:.3f}")