예제 #1
0
def load_NetPlayer(board, args):
    num_nodes = board.world.map_graph.number_of_nodes()
    num_edges = board.world.map_graph.number_of_edges()
    model_args = misc.read_json(args["model_parameters_json"])
    net = GCN_risk(
        num_nodes, num_edges, model_args['board_input_dim'],
        model_args['global_input_dim'], model_args['hidden_global_dim'],
        model_args['num_global_layers'], model_args['hidden_conv_dim'],
        model_args['num_conv_layers'], model_args['hidden_pick_dim'],
        model_args['num_pick_layers'], model_args['out_pick_dim'],
        model_args['hidden_place_dim'], model_args['num_place_layers'],
        model_args['out_place_dim'], model_args['hidden_attack_dim'],
        model_args['num_attack_layers'], model_args['out_attack_dim'],
        model_args['hidden_fortify_dim'], model_args['num_fortify_layers'],
        model_args['out_fortify_dim'], model_args['hidden_value_dim'],
        model_args['num_value_layers'], model_args['dropout'])
    device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
    net.to(device)
    net.eval()

    state_dict = load_dict(args["model_path"], device='cpu', encoding='latin1')

    net.load_state_dict(state_dict['model'])

    apprentice = agent.NetApprentice(net)

    kwargs = {}
    for a in ["move_selection", "name", "temp"]:
        if a in args: kwargs[a] = args[a]
    netPlayer = agent.NetPlayer(apprentice, **kwargs)

    return netPlayer
예제 #2
0
path_model = "../data_hex/models"
out_path = "../data_hex"
EI_inputs_path = "../support/exp_iter_inputs/exp_iter_inputs_hex.json"

# path_model = "C:/Users/lucas/OneDrive/Documentos/stage_risk/data_01_09_test_map/models"
# # path_model = "C:/Users/lucas/OneDrive/Documentos/stage_risk/data_test_git/models"
# EI_inputs_path = "../support/exp_iter_inputs/exp_iter_inputs_test_2.json"

# path_model = "C:/Users/lucas/OneDrive/Documentos/stage_risk/data_07_09_classic/models"
# EI_inputs_path = "../support/exp_iter_inputs/exp_iter_inputs_classic.json"

load_model = True
model_name = "model_0_0_initialFortify.tar"

# Create the net using the same parameters
inputs = misc.read_json(EI_inputs_path)
model_args = misc.read_json(inputs["model_parameters"])
board_params = inputs["board_params"]
path_board = board_params["path_board"]

# ---------------- Model -------------------------

print("Creating board")

world = World(path_board)

# Set players
pR1, pR2, pR3 = RandomAgent('Red'), RandomAgent('Blue'), RandomAgent('Green')
players = [pR1, pR2]
# Set board
prefs = board_params
예제 #3
0
        default="")
    parser.add_argument("--iteration",
                        help="Global iteration of Expert Iteration",
                        type=int,
                        default=0)
    args = parser.parse_args()
    return args


if __name__ == '__main__':
    # ---------------- Start -------------------------
    print("\t\ttrain_model: Start")
    start_train = time.process_time()

    args = parseInputs()
    inputs = misc.read_json(args.inputs)
    verbose = bool(args.verbose)
    iteration = args.iteration
    checkpoint = args.checkpoint

    path_data = inputs["path_data"]
    path_model = inputs["path_model"]
    batch_size = inputs["batch_size"]
    model_args = misc.read_json(inputs["model_parameters"])

    board_params = inputs["board_params"]
    path_board = board_params["path_board"]

    epochs = inputs["epochs"]
    eval_every = inputs["eval_every"]
예제 #4
0
    return {
        f"{n}_armies": board.getPlayerArmies(c),
        f"{n}_income": board.getPlayerIncome(c),
        f"{n}_countries": board.getPlayerCountries(c),
        f"{n}_continents": board.getPlayerContinents(c)
    }


if __name__ == '__main__':

    args = parseInputs()

    # Manual
    # args.inputs = "../support/battles/diamond_baselines.json"

    inputs = misc.read_json(args.inputs)

    board_params = inputs["board_params"]
    battles = inputs["battles"]
    save_path = inputs["save_path"]

    # Battle here. Create agent first, then set number of matches and play the games
    for b_name, b_args in battles.items():
        print(f"Playing battle {b_name}")
        battle_args = dict(b_args)
        battle_args["board_params"] = dict(board_params)
        res = battle(battle_args, args.verbose)
        # Write csv with the results
        csv, path = pd.DataFrame(data=res), f"{save_path}/{b_name}.csv"
        csv.to_csv(path)
        print(f"Wrote results to {path}")
def train_model_main(input_file, iteration, checkpoint, verbose):
    # ---------------- Start -------------------------
    misc.print_and_flush(f"train_model {iteration}: Start")
    start = time.process_time()
    
    inputs = misc.read_json(input_file)
        
    path_data = inputs["path_data"]
    path_model = inputs["path_model"]
    batch_size = inputs["batch_size"]
    model_args =  misc.read_json(inputs["model_parameters"])

    board_params = inputs["board_params"]
    path_board = board_params["path_board"]    
    
    
    epochs = inputs["epochs"]
    eval_every = inputs["eval_every"]
     

    # ---------------- Load model -------------------------
    
    move_types = ['initialPick', 'initialFortify', 'startTurn', 'attack', 'fortify']

    # Create Board
    world = World(path_board)


    # Set players
    pR1, pR2 = agent.RandomAgent('Red'), agent.RandomAgent('Blue')
    players = [pR1, pR2]
    # Set board
    # TODO: Send to inputs
    prefs = board_params
            
    board_orig = Board(world, players)
    board_orig.setPreferences(prefs)

    num_nodes = board_orig.world.map_graph.number_of_nodes()
    num_edges = board_orig.world.map_graph.number_of_edges()

    if verbose: misc.print_and_flush("Creating model")
    net = GCN_risk(num_nodes, num_edges, 
                     model_args['board_input_dim'], model_args['global_input_dim'],
                     model_args['hidden_global_dim'], model_args['num_global_layers'],
                     model_args['hidden_conv_dim'], model_args['num_conv_layers'],
                     model_args['hidden_pick_dim'], model_args['num_pick_layers'], model_args['out_pick_dim'],
                     model_args['hidden_place_dim'], model_args['num_place_layers'], model_args['out_place_dim'],
                     model_args['hidden_attack_dim'], model_args['num_attack_layers'], model_args['out_attack_dim'],
                     model_args['hidden_fortify_dim'], model_args['num_fortify_layers'], model_args['out_fortify_dim'],
                     model_args['hidden_value_dim'], model_args['num_value_layers'],
                     model_args['dropout'])

    
    device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
    net.to(device)    
    optimizer = torch.optim.Adam(net.parameters(), lr=0.001)
    scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=20, gamma=0.5)
    criterion = TPT_Loss
    

    #state_dict = model.load_dict(os.path.join(path_model, checkpoint), device = 'cpu', encoding = 'latin1')
    #net.load_state_dict(state_dict['model'])
    #optimizer.load_state_dict(state_dict['optimizer'])
    #scheduler.load_state_dict(state_dict['scheduler'])
    load_path = os.path.join(path_model, checkpoint) if checkpoint else None 
    # This is used only at the beginning. Then the model that is loaded is trained and saved at each time.
    # We avoid reloading the last saved model
    
        
        
    # Train network on dataset
    if verbose: misc.print_and_flush("Training network")
    shuffle(move_types)
    for j, move_type in enumerate(move_types):
        if verbose: misc.print_and_flush(f"\tTraining {j}:  {move_type}")
        save_path = f"{path_model}/model_{iteration}_{j}_{move_type}.tar"
        root_path = f'{path_data}/{move_type}'
        
        if len(os.listdir(os.path.join(root_path, 'raw')))<batch_size: continue
        
        risk_dataset = RiskDataset(root = root_path)
        # TODO: add validation data
        loader = G_DataLoader(risk_dataset, batch_size=batch_size, shuffle = True)
        if verbose: misc.print_and_flush(f"\tTrain on {root_path}, model = {save_path}")
        train_model(net, optimizer, scheduler, criterion, device,
                    epochs = epochs, train_loader = loader, val_loader = None, eval_every = eval_every,
                    load_path = load_path, save_path = save_path)
        
        load_path = None # The model is already in memory

    misc.print_and_flush(f"train_model: Total time taken -> {round(time.process_time() - start,2)}")
def create_self_play_script(input_file, move_type, verbose):
    # ---------------- Start -------------------------
    
    inputs = misc.read_json(input_file)    
    
    misc.print_and_flush("create_self_play: Start")
    start = time.process_time()
    
    saved_states_per_episode = inputs["saved_states_per_episode"]
    max_episode_depth = inputs["max_episode_depth"]
    apprentice_params = inputs["apprentice_params"]
    expert_params = inputs["expert_params"]
    

    path_data = inputs["path_data"]
    path_model = inputs["path_model"]
    model_args =  misc.read_json(inputs["model_parameters"])

    board_params = inputs["board_params"]
    path_board = board_params["path_board"]
    
    move_types = ["initialPick", "initialFortify", "startTurn", "attack", "fortify"]
    # ---------------------------------------------------------------    

    # Create board
    world = World(path_board)


    # Set players
    pR1, pR2 = agent.RandomAgent('Red'), agent.RandomAgent('Blue')
    players = [pR1, pR2]
    # Set board
    
    prefs = board_params
            
    board_orig = Board(world, players)
    board_orig.setPreferences(prefs)

    num_nodes = board_orig.world.map_graph.number_of_nodes()
    num_edges = board_orig.world.map_graph.number_of_edges()

    if apprentice_params["type"] == "net":
        if verbose: misc.print_and_flush("create_self_play: Creating model")
        net = GCN_risk(num_nodes, num_edges, 
                         model_args['board_input_dim'], model_args['global_input_dim'],
                         model_args['hidden_global_dim'], model_args['num_global_layers'],
                         model_args['hidden_conv_dim'], model_args['num_conv_layers'],
                         model_args['hidden_pick_dim'], model_args['num_pick_layers'], model_args['out_pick_dim'],
                         model_args['hidden_place_dim'], model_args['num_place_layers'], model_args['out_place_dim'],
                         model_args['hidden_attack_dim'], model_args['num_attack_layers'], model_args['out_attack_dim'],
                         model_args['hidden_fortify_dim'], model_args['num_fortify_layers'], model_args['out_fortify_dim'],
                         model_args['hidden_value_dim'], model_args['num_value_layers'],
                         model_args['dropout'])

        device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')        
        net.to(device)
        
        model_name = apprentice_params["model_name"]
        if model_name: # If it is not the empty string
            try:
        
                if verbose: misc.print_and_flush(f"create_self_play : Chosen model is {model_name}")
                state_dict = load_dict(os.path.join(path_model, model_name), device = 'cpu', encoding = 'latin1')                
                net.load_state_dict(state_dict['model'])        
                if verbose: misc.print_and_flush("create_self_play: Model has been loaded")
            except Exception as e:
                print(e)
                
                                    
        if verbose: misc.print_and_flush("create_self_play: Defining net apprentice")
        # Define initial apprentice        
        apprentice = agent.NetApprentice(net)
    else:
        if verbose: misc.print_and_flush("create_self_play: Defining MCTS apprentice")
        apprentice = agent.MctsApprentice(num_MCTS_sims = apprentice_params["num_MCTS_sims"],
                                          temp = apprentice_params["temp"], 
                                          max_depth = apprentice_params["max_depth"],
                                          sims_per_eval = apprentice_params["sims_per_eval"])


    if verbose: misc.print_and_flush("create_self_play: Defining expert")
    # build expert
    expert = build_expert_mcts(apprentice, max_depth=expert_params["max_depth"],
                    sims_per_eval=expert_params["sims_per_eval"], num_MCTS_sims=expert_params["num_MCTS_sims"],
                    wa = expert_params["wa"], wb = expert_params["wb"],
                    cb = expert_params["cb"], use_val = expert_params["use_val"])
    
                         
    if verbose: misc.print_and_flush("create_self_play: Creating data folders")
    # Create folders to store data
    for folder in move_types:
        os.makedirs(os.path.join(path_data, folder, 'raw'), exist_ok = True)
    os.makedirs(path_model, exist_ok = True)
                                    

    #### START
    start_inner = time.process_time()
    
    state = copy.deepcopy(board_orig)    

    # Play episode, select states to save
    if verbose: misc.print_and_flush("create_self_play: Self-play")
    
    states_to_save = create_self_play_data(move_type, path_data, state, apprentice, max_depth = max_episode_depth, saved_states_per_episode=saved_states_per_episode, verbose = verbose)

    if verbose: misc.print_and_flush(f"create_self_play: Play episode: Time taken: {round(time.process_time() - start_inner,2)}")
    
    
    # Tag the states and save them
    start_inner = time.process_time()
    if verbose: misc.print_and_flush(f"create_self_play: Tag the states ({len(states_to_save)} states to tag)")  
    for st in states_to_save:
        st_tagged, policy_exp, value_exp = tag_with_expert_move(st, expert, temp=expert_params["temp"], verbose=verbose)
        _ = simple_save_state(path_data, st_tagged, policy_exp, value_exp, verbose=verbose)
    if verbose: misc.print_and_flush(f"create_self_play: Tag and save: Time taken -> {round(time.process_time() - start_inner,2)}")
    
        
    misc.print_and_flush(f"create_self_play: Total time taken -> {round(time.process_time() - start,2)}")