Esempio n. 1
0
def simple_save_state(root_path,
                      state,
                      policy,
                      value,
                      verbose=False,
                      num_task=0):
    try:
        board, _ = state.toCanonical(state.activePlayer.code)
        phase = board.gamePhase
        full_path = os.path.join(root_path, phase, 'raw')
        num = len(os.listdir(full_path)) + 1
        name = f"board_{num}.json"
        while os.path.exists(os.path.join(full_path, name)):
            num += 1
            name = f"board_{num}.json"
        name = f"board_{num}_{num_task}.json"  # Always different
        saveBoardObs(full_path, name, board, board.gamePhase,
                     policy.ravel().tolist(),
                     value.ravel().tolist())
        if verbose:
            print(
                f"\t\tSimple save: Saved board {state.board_id} {os.path.join(full_path, name)}"
            )
            sys.stdout.flush()
        return True
    except Exception as e:
        print(e)
        raise e
Esempio n. 2
0
def save_states(path, states, policies, values):
    for state, policy_exp, value_exp in zip(states, policies, values):
        # Save the board, value and target
        board, _ = state.toCanonical(state.activePlayer.code)
        phase = board.gamePhase
        full_path = os.path.join(path, phase, 'raw')
        num = len(os.listdir(full_path))+1
        saveBoardObs(full_path, 'board_{}.json'.format(num),
                        board, board.gamePhase, policy_exp.ravel().tolist(), value_exp.ravel().tolist())
Esempio n. 3
0
def simple_save_state(path, name, state, policy, value, verbose=False):
    board, _ = state.toCanonical(state.activePlayer.code)
    saveBoardObs(path, name, board, board.gamePhase,
                 policy.ravel().tolist(),
                 value.ravel().tolist())
    if verbose:
        print(
            f"\t\tSimple save: Saved board {state.board_id} {os.path.join(path, name)}"
        )
        sys.stdout.flush()
    return True
Esempio n. 4
0
def create_self_play_data(path,
                          root,
                          num_samples,
                          start_sample,
                          apprentice,
                          expert,
                          max_depth=100,
                          saved_states_per_episode=1,
                          verbose=False):
    """ Function to create episodes from self play.
        Visited states are saved and then re visited with the expert to label the data

    """
    samples = 0

    samples_type = {
        'initialPick': 0,
        'initialFortify': 0,
        'startTurn': 0,
        'attack': 0,
        'fortify': 0
    }
    for k, v in samples_type.items():
        path_aux = os.path.join(path, k, 'raw')

        val = max(
            list(
                map(
                    int,
                    filter(isint, [
                        n[(n.find("_") + 1):n.find(".")]
                        for n in os.listdir(path_aux) if 'board' in n
                    ]))) + [0])
        samples_type[k] = val

    move_to_save = itertools.cycle(list(samples_type.keys()))
    edge_index = boardToData(root).edge_index
    while samples < num_samples:

        # ******************* PLAY EPISODE ***************************
        episode = []
        state = copy.deepcopy(root)
        for i in range(max_depth):
            print_message_over(f"Playing episode: {i}/{max_depth}")

            # Check if episode is over
            if state.gameOver: break

            # Check is current player is alive or not
            if not state.activePlayer.is_alive:
                # print("\npassing, dead player")
                state.endTurn()
                continue

            # Get possible moves, and apprentice policy
            mask, actions = maskAndMoves(state, state.gamePhase, edge_index)
            try:
                policy, value = apprentice.play(state)
            except Exception as e:
                state.report()
                print(state.activePlayer.is_alive)
                print(state.activePlayer.num_countries)
                raise e
            policy = policy * mask
            probs = policy.squeeze().detach().numpy()
            probs = probs / probs.sum()

            ind = np.random.choice(range(len(actions)), p=probs)
            move = buildMove(state, actions[ind])

            episode.append(copy.deepcopy(state))

            # Play the move to continue
            state.playMove(move)

        # ******************* SAVE STATES ***************************
        # Take some states from episode
        # Choose which kind of move we are going to save

        to_save = next(move_to_save)

        try:
            # Define here how many states to select, and how
            options = [s for s in episode if s.gamePhase == to_save]
            init_to_save = to_save
            while not options:
                to_save = next(move_to_save)
                if to_save == init_to_save:
                    raise Exception(
                        "Episode is empty? No dataset could be created for any game phase"
                    )
                options = [s for s in episode if s.gamePhase == to_save]
            states_to_save = np.random.choice(
                options, min(saved_states_per_episode, len(options)))
        except Exception as e:
            raise e

        # Get expert move for the chosen states
        for i, state in enumerate(states_to_save):
            print_message_over(
                f"Saving states: Saved {i}/{len(states_to_save)}... Total: {samples}/{num_samples}"
            )
            policy_exp, value_exp, _ = expert.getActionProb(state,
                                                            temp=1,
                                                            num_sims=None,
                                                            use_val=False)
            # Save the board, value and target
            board, _ = state.toCanonical(state.activePlayer.code)
            phase = board.gamePhase
            if isinstance(policy_exp, torch.Tensor):
                policy_exp = policy_exp.detach().numpy()
            if isinstance(value_exp, torch.Tensor):
                value_exp = value_exp.detach().numpy()

            saveBoardObs(path + '/' + phase + '/raw',
                         'board_{}.json'.format(samples_type[phase]), board,
                         board.gamePhase, policy_exp.tolist(),
                         value_exp.tolist())
            samples += 1
            samples_type[phase] += 1
            print_message_over(
                f"Saving states: Saved {i+1}/{len(states_to_save)}... Total: {samples}/{num_samples}"
            )

    print_message_over("Done!")
    print()
Esempio n. 5
0
def simple_save_state(path, name, state, policy, value):
    board, _ = state.toCanonical(state.activePlayer.code)
    saveBoardObs(path, name,
                        board, board.gamePhase, policy.ravel().tolist(), value.ravel().tolist())
    return True