# Set players pR1, pR2 = agent.RandomAgent('Red'), agent.RandomAgent('Blue') players = [pR1, pR2] # Set board prefs = board_params board_orig = Board(world, players) board_orig.setPreferences(prefs) num_nodes = board_orig.world.map_graph.number_of_nodes() num_edges = board_orig.world.map_graph.number_of_edges() if apprentice_params["type"] == "net": if verbose: misc.print_and_flush( f"create_self_play ({num_task}): Creating model") net = GCN_risk( num_nodes, num_edges, model_args['board_input_dim'], model_args['global_input_dim'], model_args['hidden_global_dim'], model_args['num_global_layers'], model_args['hidden_conv_dim'], model_args['num_conv_layers'], model_args['hidden_pick_dim'], model_args['num_pick_layers'], model_args['out_pick_dim'], model_args['hidden_place_dim'], model_args['num_place_layers'], model_args['out_place_dim'], model_args['hidden_attack_dim'], model_args['num_attack_layers'], model_args['out_attack_dim'], model_args['hidden_fortify_dim'], model_args['num_fortify_layers'], model_args['out_fortify_dim'], model_args['hidden_value_dim'], model_args['num_value_layers'], model_args['dropout'], model_args['block']) device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
def create_self_play_script(input_file, move_type, verbose): # ---------------- Start ------------------------- inputs = misc.read_json(input_file) misc.print_and_flush("create_self_play: Start") start = time.process_time() saved_states_per_episode = inputs["saved_states_per_episode"] max_episode_depth = inputs["max_episode_depth"] apprentice_params = inputs["apprentice_params"] expert_params = inputs["expert_params"] path_data = inputs["path_data"] path_model = inputs["path_model"] model_args = misc.read_json(inputs["model_parameters"]) board_params = inputs["board_params"] path_board = board_params["path_board"] move_types = ["initialPick", "initialFortify", "startTurn", "attack", "fortify"] # --------------------------------------------------------------- # Create board world = World(path_board) # Set players pR1, pR2 = agent.RandomAgent('Red'), agent.RandomAgent('Blue') players = [pR1, pR2] # Set board prefs = board_params board_orig = Board(world, players) board_orig.setPreferences(prefs) num_nodes = board_orig.world.map_graph.number_of_nodes() num_edges = board_orig.world.map_graph.number_of_edges() if apprentice_params["type"] == "net": if verbose: misc.print_and_flush("create_self_play: Creating model") net = GCN_risk(num_nodes, num_edges, model_args['board_input_dim'], model_args['global_input_dim'], model_args['hidden_global_dim'], model_args['num_global_layers'], model_args['hidden_conv_dim'], model_args['num_conv_layers'], model_args['hidden_pick_dim'], model_args['num_pick_layers'], model_args['out_pick_dim'], model_args['hidden_place_dim'], model_args['num_place_layers'], model_args['out_place_dim'], model_args['hidden_attack_dim'], model_args['num_attack_layers'], model_args['out_attack_dim'], model_args['hidden_fortify_dim'], model_args['num_fortify_layers'], model_args['out_fortify_dim'], model_args['hidden_value_dim'], model_args['num_value_layers'], model_args['dropout']) device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu') net.to(device) model_name = apprentice_params["model_name"] if model_name: # If it is not the empty string try: if verbose: misc.print_and_flush(f"create_self_play : Chosen model is {model_name}") state_dict = load_dict(os.path.join(path_model, model_name), device = 'cpu', encoding = 'latin1') net.load_state_dict(state_dict['model']) if verbose: misc.print_and_flush("create_self_play: Model has been loaded") except Exception as e: print(e) if verbose: misc.print_and_flush("create_self_play: Defining net apprentice") # Define initial apprentice apprentice = agent.NetApprentice(net) else: if verbose: misc.print_and_flush("create_self_play: Defining MCTS apprentice") apprentice = agent.MctsApprentice(num_MCTS_sims = apprentice_params["num_MCTS_sims"], temp = apprentice_params["temp"], max_depth = apprentice_params["max_depth"], sims_per_eval = apprentice_params["sims_per_eval"]) if verbose: misc.print_and_flush("create_self_play: Defining expert") # build expert expert = build_expert_mcts(apprentice, max_depth=expert_params["max_depth"], sims_per_eval=expert_params["sims_per_eval"], num_MCTS_sims=expert_params["num_MCTS_sims"], wa = expert_params["wa"], wb = expert_params["wb"], cb = expert_params["cb"], use_val = expert_params["use_val"]) if verbose: misc.print_and_flush("create_self_play: Creating data folders") # Create folders to store data for folder in move_types: os.makedirs(os.path.join(path_data, folder, 'raw'), exist_ok = True) os.makedirs(path_model, exist_ok = True) #### START start_inner = time.process_time() state = copy.deepcopy(board_orig) # Play episode, select states to save if verbose: misc.print_and_flush("create_self_play: Self-play") states_to_save = create_self_play_data(move_type, path_data, state, apprentice, max_depth = max_episode_depth, saved_states_per_episode=saved_states_per_episode, verbose = verbose) if verbose: misc.print_and_flush(f"create_self_play: Play episode: Time taken: {round(time.process_time() - start_inner,2)}") # Tag the states and save them start_inner = time.process_time() if verbose: misc.print_and_flush(f"create_self_play: Tag the states ({len(states_to_save)} states to tag)") for st in states_to_save: st_tagged, policy_exp, value_exp = tag_with_expert_move(st, expert, temp=expert_params["temp"], verbose=verbose) _ = simple_save_state(path_data, st_tagged, policy_exp, value_exp, verbose=verbose) if verbose: misc.print_and_flush(f"create_self_play: Tag and save: Time taken -> {round(time.process_time() - start_inner,2)}") misc.print_and_flush(f"create_self_play: Total time taken -> {round(time.process_time() - start,2)}")
def train_model_main(input_file, iteration, checkpoint, verbose): # ---------------- Start ------------------------- misc.print_and_flush(f"train_model {iteration}: Start") start = time.process_time() inputs = misc.read_json(input_file) path_data = inputs["path_data"] path_model = inputs["path_model"] batch_size = inputs["batch_size"] model_args = misc.read_json(inputs["model_parameters"]) board_params = inputs["board_params"] path_board = board_params["path_board"] epochs = inputs["epochs"] eval_every = inputs["eval_every"] # ---------------- Load model ------------------------- move_types = ['initialPick', 'initialFortify', 'startTurn', 'attack', 'fortify'] # Create Board world = World(path_board) # Set players pR1, pR2 = agent.RandomAgent('Red'), agent.RandomAgent('Blue') players = [pR1, pR2] # Set board # TODO: Send to inputs prefs = board_params board_orig = Board(world, players) board_orig.setPreferences(prefs) num_nodes = board_orig.world.map_graph.number_of_nodes() num_edges = board_orig.world.map_graph.number_of_edges() if verbose: misc.print_and_flush("Creating model") net = GCN_risk(num_nodes, num_edges, model_args['board_input_dim'], model_args['global_input_dim'], model_args['hidden_global_dim'], model_args['num_global_layers'], model_args['hidden_conv_dim'], model_args['num_conv_layers'], model_args['hidden_pick_dim'], model_args['num_pick_layers'], model_args['out_pick_dim'], model_args['hidden_place_dim'], model_args['num_place_layers'], model_args['out_place_dim'], model_args['hidden_attack_dim'], model_args['num_attack_layers'], model_args['out_attack_dim'], model_args['hidden_fortify_dim'], model_args['num_fortify_layers'], model_args['out_fortify_dim'], model_args['hidden_value_dim'], model_args['num_value_layers'], model_args['dropout']) device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu') net.to(device) optimizer = torch.optim.Adam(net.parameters(), lr=0.001) scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=20, gamma=0.5) criterion = TPT_Loss #state_dict = model.load_dict(os.path.join(path_model, checkpoint), device = 'cpu', encoding = 'latin1') #net.load_state_dict(state_dict['model']) #optimizer.load_state_dict(state_dict['optimizer']) #scheduler.load_state_dict(state_dict['scheduler']) load_path = os.path.join(path_model, checkpoint) if checkpoint else None # This is used only at the beginning. Then the model that is loaded is trained and saved at each time. # We avoid reloading the last saved model # Train network on dataset if verbose: misc.print_and_flush("Training network") shuffle(move_types) for j, move_type in enumerate(move_types): if verbose: misc.print_and_flush(f"\tTraining {j}: {move_type}") save_path = f"{path_model}/model_{iteration}_{j}_{move_type}.tar" root_path = f'{path_data}/{move_type}' if len(os.listdir(os.path.join(root_path, 'raw')))<batch_size: continue risk_dataset = RiskDataset(root = root_path) # TODO: add validation data loader = G_DataLoader(risk_dataset, batch_size=batch_size, shuffle = True) if verbose: misc.print_and_flush(f"\tTrain on {root_path}, model = {save_path}") train_model(net, optimizer, scheduler, criterion, device, epochs = epochs, train_loader = loader, val_loader = None, eval_every = eval_every, load_path = load_path, save_path = save_path) load_path = None # The model is already in memory misc.print_and_flush(f"train_model: Total time taken -> {round(time.process_time() - start,2)}")
# Create json file with inputs for the self play tasks input_dict = { "saved_states_per_episode": saved_states_per_episode, "apprentice_params": apprentice_params, "expert_params": expert_params, "path_data": path_data, "path_model": path_model, "model_parameters": inputs["model_parameters"], "board_params": board_params, "max_episode_depth": inputs["max_episode_depth"] } self_play_input_json = os.path.join(params_path, self_play_tag) + ".json" misc.write_json(input_dict, self_play_input_json) misc.print_and_flush(f"Running {num_iter} iterations, each of {num_cpu} tasks") for j in range(num_iter): # Each iteration launches num_cpu tasks misc.print_and_flush(f"\n\t*** Inner iter {j+1} of {num_iter}") move_type = next(types_cycle) create_self_play_script(self_play_input_json, move_type, verbose) print(f"Time taken self-play: {round(time.process_time() - start,2)}") ##### 2. Train network on dataset start = time.process_time() print("Training network") # Create the input file input_dict = { "path_data": path_data,