def simple_save_state(root_path, state, policy, value, verbose=False, num_task=0): try: board, _ = state.toCanonical(state.activePlayer.code) phase = board.gamePhase full_path = os.path.join(root_path, phase, 'raw') num = len(os.listdir(full_path)) + 1 name = f"board_{num}.json" while os.path.exists(os.path.join(full_path, name)): num += 1 name = f"board_{num}.json" name = f"board_{num}_{num_task}.json" # Always different saveBoardObs(full_path, name, board, board.gamePhase, policy.ravel().tolist(), value.ravel().tolist()) if verbose: print( f"\t\tSimple save: Saved board {state.board_id} {os.path.join(full_path, name)}" ) sys.stdout.flush() return True except Exception as e: print(e) raise e
def save_states(path, states, policies, values): for state, policy_exp, value_exp in zip(states, policies, values): # Save the board, value and target board, _ = state.toCanonical(state.activePlayer.code) phase = board.gamePhase full_path = os.path.join(path, phase, 'raw') num = len(os.listdir(full_path))+1 saveBoardObs(full_path, 'board_{}.json'.format(num), board, board.gamePhase, policy_exp.ravel().tolist(), value_exp.ravel().tolist())
def simple_save_state(path, name, state, policy, value, verbose=False): board, _ = state.toCanonical(state.activePlayer.code) saveBoardObs(path, name, board, board.gamePhase, policy.ravel().tolist(), value.ravel().tolist()) if verbose: print( f"\t\tSimple save: Saved board {state.board_id} {os.path.join(path, name)}" ) sys.stdout.flush() return True
def create_self_play_data(path, root, num_samples, start_sample, apprentice, expert, max_depth=100, saved_states_per_episode=1, verbose=False): """ Function to create episodes from self play. Visited states are saved and then re visited with the expert to label the data """ samples = 0 samples_type = { 'initialPick': 0, 'initialFortify': 0, 'startTurn': 0, 'attack': 0, 'fortify': 0 } for k, v in samples_type.items(): path_aux = os.path.join(path, k, 'raw') val = max( list( map( int, filter(isint, [ n[(n.find("_") + 1):n.find(".")] for n in os.listdir(path_aux) if 'board' in n ]))) + [0]) samples_type[k] = val move_to_save = itertools.cycle(list(samples_type.keys())) edge_index = boardToData(root).edge_index while samples < num_samples: # ******************* PLAY EPISODE *************************** episode = [] state = copy.deepcopy(root) for i in range(max_depth): print_message_over(f"Playing episode: {i}/{max_depth}") # Check if episode is over if state.gameOver: break # Check is current player is alive or not if not state.activePlayer.is_alive: # print("\npassing, dead player") state.endTurn() continue # Get possible moves, and apprentice policy mask, actions = maskAndMoves(state, state.gamePhase, edge_index) try: policy, value = apprentice.play(state) except Exception as e: state.report() print(state.activePlayer.is_alive) print(state.activePlayer.num_countries) raise e policy = policy * mask probs = policy.squeeze().detach().numpy() probs = probs / probs.sum() ind = np.random.choice(range(len(actions)), p=probs) move = buildMove(state, actions[ind]) episode.append(copy.deepcopy(state)) # Play the move to continue state.playMove(move) # ******************* SAVE STATES *************************** # Take some states from episode # Choose which kind of move we are going to save to_save = next(move_to_save) try: # Define here how many states to select, and how options = [s for s in episode if s.gamePhase == to_save] init_to_save = to_save while not options: to_save = next(move_to_save) if to_save == init_to_save: raise Exception( "Episode is empty? No dataset could be created for any game phase" ) options = [s for s in episode if s.gamePhase == to_save] states_to_save = np.random.choice( options, min(saved_states_per_episode, len(options))) except Exception as e: raise e # Get expert move for the chosen states for i, state in enumerate(states_to_save): print_message_over( f"Saving states: Saved {i}/{len(states_to_save)}... Total: {samples}/{num_samples}" ) policy_exp, value_exp, _ = expert.getActionProb(state, temp=1, num_sims=None, use_val=False) # Save the board, value and target board, _ = state.toCanonical(state.activePlayer.code) phase = board.gamePhase if isinstance(policy_exp, torch.Tensor): policy_exp = policy_exp.detach().numpy() if isinstance(value_exp, torch.Tensor): value_exp = value_exp.detach().numpy() saveBoardObs(path + '/' + phase + '/raw', 'board_{}.json'.format(samples_type[phase]), board, board.gamePhase, policy_exp.tolist(), value_exp.tolist()) samples += 1 samples_type[phase] += 1 print_message_over( f"Saving states: Saved {i+1}/{len(states_to_save)}... Total: {samples}/{num_samples}" ) print_message_over("Done!") print()
def simple_save_state(path, name, state, policy, value): board, _ = state.toCanonical(state.activePlayer.code) saveBoardObs(path, name, board, board.gamePhase, policy.ravel().tolist(), value.ravel().tolist()) return True