def load_NetPlayer(board, args): num_nodes = board.world.map_graph.number_of_nodes() num_edges = board.world.map_graph.number_of_edges() model_args = misc.read_json(args["model_parameters_json"]) net = GCN_risk( num_nodes, num_edges, model_args['board_input_dim'], model_args['global_input_dim'], model_args['hidden_global_dim'], model_args['num_global_layers'], model_args['hidden_conv_dim'], model_args['num_conv_layers'], model_args['hidden_pick_dim'], model_args['num_pick_layers'], model_args['out_pick_dim'], model_args['hidden_place_dim'], model_args['num_place_layers'], model_args['out_place_dim'], model_args['hidden_attack_dim'], model_args['num_attack_layers'], model_args['out_attack_dim'], model_args['hidden_fortify_dim'], model_args['num_fortify_layers'], model_args['out_fortify_dim'], model_args['hidden_value_dim'], model_args['num_value_layers'], model_args['dropout']) device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu') net.to(device) net.eval() state_dict = load_dict(args["model_path"], device='cpu', encoding='latin1') net.load_state_dict(state_dict['model']) apprentice = agent.NetApprentice(net) kwargs = {} for a in ["move_selection", "name", "temp"]: if a in args: kwargs[a] = args[a] netPlayer = agent.NetPlayer(apprentice, **kwargs) return netPlayer
path_model = "../data_hex/models" out_path = "../data_hex" EI_inputs_path = "../support/exp_iter_inputs/exp_iter_inputs_hex.json" # path_model = "C:/Users/lucas/OneDrive/Documentos/stage_risk/data_01_09_test_map/models" # # path_model = "C:/Users/lucas/OneDrive/Documentos/stage_risk/data_test_git/models" # EI_inputs_path = "../support/exp_iter_inputs/exp_iter_inputs_test_2.json" # path_model = "C:/Users/lucas/OneDrive/Documentos/stage_risk/data_07_09_classic/models" # EI_inputs_path = "../support/exp_iter_inputs/exp_iter_inputs_classic.json" load_model = True model_name = "model_0_0_initialFortify.tar" # Create the net using the same parameters inputs = misc.read_json(EI_inputs_path) model_args = misc.read_json(inputs["model_parameters"]) board_params = inputs["board_params"] path_board = board_params["path_board"] # ---------------- Model ------------------------- print("Creating board") world = World(path_board) # Set players pR1, pR2, pR3 = RandomAgent('Red'), RandomAgent('Blue'), RandomAgent('Green') players = [pR1, pR2] # Set board prefs = board_params
default="") parser.add_argument("--iteration", help="Global iteration of Expert Iteration", type=int, default=0) args = parser.parse_args() return args if __name__ == '__main__': # ---------------- Start ------------------------- print("\t\ttrain_model: Start") start_train = time.process_time() args = parseInputs() inputs = misc.read_json(args.inputs) verbose = bool(args.verbose) iteration = args.iteration checkpoint = args.checkpoint path_data = inputs["path_data"] path_model = inputs["path_model"] batch_size = inputs["batch_size"] model_args = misc.read_json(inputs["model_parameters"]) board_params = inputs["board_params"] path_board = board_params["path_board"] epochs = inputs["epochs"] eval_every = inputs["eval_every"]
return { f"{n}_armies": board.getPlayerArmies(c), f"{n}_income": board.getPlayerIncome(c), f"{n}_countries": board.getPlayerCountries(c), f"{n}_continents": board.getPlayerContinents(c) } if __name__ == '__main__': args = parseInputs() # Manual # args.inputs = "../support/battles/diamond_baselines.json" inputs = misc.read_json(args.inputs) board_params = inputs["board_params"] battles = inputs["battles"] save_path = inputs["save_path"] # Battle here. Create agent first, then set number of matches and play the games for b_name, b_args in battles.items(): print(f"Playing battle {b_name}") battle_args = dict(b_args) battle_args["board_params"] = dict(board_params) res = battle(battle_args, args.verbose) # Write csv with the results csv, path = pd.DataFrame(data=res), f"{save_path}/{b_name}.csv" csv.to_csv(path) print(f"Wrote results to {path}")
def train_model_main(input_file, iteration, checkpoint, verbose): # ---------------- Start ------------------------- misc.print_and_flush(f"train_model {iteration}: Start") start = time.process_time() inputs = misc.read_json(input_file) path_data = inputs["path_data"] path_model = inputs["path_model"] batch_size = inputs["batch_size"] model_args = misc.read_json(inputs["model_parameters"]) board_params = inputs["board_params"] path_board = board_params["path_board"] epochs = inputs["epochs"] eval_every = inputs["eval_every"] # ---------------- Load model ------------------------- move_types = ['initialPick', 'initialFortify', 'startTurn', 'attack', 'fortify'] # Create Board world = World(path_board) # Set players pR1, pR2 = agent.RandomAgent('Red'), agent.RandomAgent('Blue') players = [pR1, pR2] # Set board # TODO: Send to inputs prefs = board_params board_orig = Board(world, players) board_orig.setPreferences(prefs) num_nodes = board_orig.world.map_graph.number_of_nodes() num_edges = board_orig.world.map_graph.number_of_edges() if verbose: misc.print_and_flush("Creating model") net = GCN_risk(num_nodes, num_edges, model_args['board_input_dim'], model_args['global_input_dim'], model_args['hidden_global_dim'], model_args['num_global_layers'], model_args['hidden_conv_dim'], model_args['num_conv_layers'], model_args['hidden_pick_dim'], model_args['num_pick_layers'], model_args['out_pick_dim'], model_args['hidden_place_dim'], model_args['num_place_layers'], model_args['out_place_dim'], model_args['hidden_attack_dim'], model_args['num_attack_layers'], model_args['out_attack_dim'], model_args['hidden_fortify_dim'], model_args['num_fortify_layers'], model_args['out_fortify_dim'], model_args['hidden_value_dim'], model_args['num_value_layers'], model_args['dropout']) device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu') net.to(device) optimizer = torch.optim.Adam(net.parameters(), lr=0.001) scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=20, gamma=0.5) criterion = TPT_Loss #state_dict = model.load_dict(os.path.join(path_model, checkpoint), device = 'cpu', encoding = 'latin1') #net.load_state_dict(state_dict['model']) #optimizer.load_state_dict(state_dict['optimizer']) #scheduler.load_state_dict(state_dict['scheduler']) load_path = os.path.join(path_model, checkpoint) if checkpoint else None # This is used only at the beginning. Then the model that is loaded is trained and saved at each time. # We avoid reloading the last saved model # Train network on dataset if verbose: misc.print_and_flush("Training network") shuffle(move_types) for j, move_type in enumerate(move_types): if verbose: misc.print_and_flush(f"\tTraining {j}: {move_type}") save_path = f"{path_model}/model_{iteration}_{j}_{move_type}.tar" root_path = f'{path_data}/{move_type}' if len(os.listdir(os.path.join(root_path, 'raw')))<batch_size: continue risk_dataset = RiskDataset(root = root_path) # TODO: add validation data loader = G_DataLoader(risk_dataset, batch_size=batch_size, shuffle = True) if verbose: misc.print_and_flush(f"\tTrain on {root_path}, model = {save_path}") train_model(net, optimizer, scheduler, criterion, device, epochs = epochs, train_loader = loader, val_loader = None, eval_every = eval_every, load_path = load_path, save_path = save_path) load_path = None # The model is already in memory misc.print_and_flush(f"train_model: Total time taken -> {round(time.process_time() - start,2)}")
def create_self_play_script(input_file, move_type, verbose): # ---------------- Start ------------------------- inputs = misc.read_json(input_file) misc.print_and_flush("create_self_play: Start") start = time.process_time() saved_states_per_episode = inputs["saved_states_per_episode"] max_episode_depth = inputs["max_episode_depth"] apprentice_params = inputs["apprentice_params"] expert_params = inputs["expert_params"] path_data = inputs["path_data"] path_model = inputs["path_model"] model_args = misc.read_json(inputs["model_parameters"]) board_params = inputs["board_params"] path_board = board_params["path_board"] move_types = ["initialPick", "initialFortify", "startTurn", "attack", "fortify"] # --------------------------------------------------------------- # Create board world = World(path_board) # Set players pR1, pR2 = agent.RandomAgent('Red'), agent.RandomAgent('Blue') players = [pR1, pR2] # Set board prefs = board_params board_orig = Board(world, players) board_orig.setPreferences(prefs) num_nodes = board_orig.world.map_graph.number_of_nodes() num_edges = board_orig.world.map_graph.number_of_edges() if apprentice_params["type"] == "net": if verbose: misc.print_and_flush("create_self_play: Creating model") net = GCN_risk(num_nodes, num_edges, model_args['board_input_dim'], model_args['global_input_dim'], model_args['hidden_global_dim'], model_args['num_global_layers'], model_args['hidden_conv_dim'], model_args['num_conv_layers'], model_args['hidden_pick_dim'], model_args['num_pick_layers'], model_args['out_pick_dim'], model_args['hidden_place_dim'], model_args['num_place_layers'], model_args['out_place_dim'], model_args['hidden_attack_dim'], model_args['num_attack_layers'], model_args['out_attack_dim'], model_args['hidden_fortify_dim'], model_args['num_fortify_layers'], model_args['out_fortify_dim'], model_args['hidden_value_dim'], model_args['num_value_layers'], model_args['dropout']) device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu') net.to(device) model_name = apprentice_params["model_name"] if model_name: # If it is not the empty string try: if verbose: misc.print_and_flush(f"create_self_play : Chosen model is {model_name}") state_dict = load_dict(os.path.join(path_model, model_name), device = 'cpu', encoding = 'latin1') net.load_state_dict(state_dict['model']) if verbose: misc.print_and_flush("create_self_play: Model has been loaded") except Exception as e: print(e) if verbose: misc.print_and_flush("create_self_play: Defining net apprentice") # Define initial apprentice apprentice = agent.NetApprentice(net) else: if verbose: misc.print_and_flush("create_self_play: Defining MCTS apprentice") apprentice = agent.MctsApprentice(num_MCTS_sims = apprentice_params["num_MCTS_sims"], temp = apprentice_params["temp"], max_depth = apprentice_params["max_depth"], sims_per_eval = apprentice_params["sims_per_eval"]) if verbose: misc.print_and_flush("create_self_play: Defining expert") # build expert expert = build_expert_mcts(apprentice, max_depth=expert_params["max_depth"], sims_per_eval=expert_params["sims_per_eval"], num_MCTS_sims=expert_params["num_MCTS_sims"], wa = expert_params["wa"], wb = expert_params["wb"], cb = expert_params["cb"], use_val = expert_params["use_val"]) if verbose: misc.print_and_flush("create_self_play: Creating data folders") # Create folders to store data for folder in move_types: os.makedirs(os.path.join(path_data, folder, 'raw'), exist_ok = True) os.makedirs(path_model, exist_ok = True) #### START start_inner = time.process_time() state = copy.deepcopy(board_orig) # Play episode, select states to save if verbose: misc.print_and_flush("create_self_play: Self-play") states_to_save = create_self_play_data(move_type, path_data, state, apprentice, max_depth = max_episode_depth, saved_states_per_episode=saved_states_per_episode, verbose = verbose) if verbose: misc.print_and_flush(f"create_self_play: Play episode: Time taken: {round(time.process_time() - start_inner,2)}") # Tag the states and save them start_inner = time.process_time() if verbose: misc.print_and_flush(f"create_self_play: Tag the states ({len(states_to_save)} states to tag)") for st in states_to_save: st_tagged, policy_exp, value_exp = tag_with_expert_move(st, expert, temp=expert_params["temp"], verbose=verbose) _ = simple_save_state(path_data, st_tagged, policy_exp, value_exp, verbose=verbose) if verbose: misc.print_and_flush(f"create_self_play: Tag and save: Time taken -> {round(time.process_time() - start_inner,2)}") misc.print_and_flush(f"create_self_play: Total time taken -> {round(time.process_time() - start,2)}")