Exemple #1
0
def load_NetPlayer(board, args):
    num_nodes = board.world.map_graph.number_of_nodes()
    num_edges = board.world.map_graph.number_of_edges()
    model_args = misc.read_json(args["model_parameters_json"])
    net = GCN_risk(
        num_nodes, num_edges, model_args['board_input_dim'],
        model_args['global_input_dim'], model_args['hidden_global_dim'],
        model_args['num_global_layers'], model_args['hidden_conv_dim'],
        model_args['num_conv_layers'], model_args['hidden_pick_dim'],
        model_args['num_pick_layers'], model_args['out_pick_dim'],
        model_args['hidden_place_dim'], model_args['num_place_layers'],
        model_args['out_place_dim'], model_args['hidden_attack_dim'],
        model_args['num_attack_layers'], model_args['out_attack_dim'],
        model_args['hidden_fortify_dim'], model_args['num_fortify_layers'],
        model_args['out_fortify_dim'], model_args['hidden_value_dim'],
        model_args['num_value_layers'], model_args['dropout'])
    device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
    net.to(device)
    net.eval()

    state_dict = load_dict(args["model_path"], device='cpu', encoding='latin1')

    net.load_state_dict(state_dict['model'])

    apprentice = agent.NetApprentice(net)

    kwargs = {}
    for a in ["move_selection", "name", "temp"]:
        if a in args: kwargs[a] = args[a]
    netPlayer = agent.NetPlayer(apprentice, **kwargs)

    return netPlayer
    op_armies = []
    model_cont = []
    for i, model_name in enumerate(models_sorted):
        a = re.search(f"[a-z]+_[0-9]+_{match_number}", model_name)
        a = 1
        if a is None: continue
        print(f"Chosen model is {model_name}")
        state_dict = load_dict(os.path.join(path_model, model_name),
                               device='cpu',
                               encoding='latin1')
        net.load_state_dict(state_dict['model'])
        net.eval()
        for k in range(num_matchs):
            if (k + 1) % 10 == 0: print(f'Match {k+1}')
            world = World(path_board)
            apprentice = agent.NetApprentice(net)
            netPlayer = agent.NetPlayer(apprentice,
                                        move_selection="random_proportional",
                                        temp=0.5)
            # Play against random
            pRandom = RandomAgent('Random')
            battle_board = Board(world, [netPlayer, pRandom])
            battle_board.setPreferences(prefs)
            for j in range(max_turns):
                battle_board.play()
                if battle_board.gameOver: break

            w = 0
            if battle_board.players[netPlayer.code].is_alive:
                if not battle_board.players[pRandom.code].is_alive:
                    w = 1
def create_self_play_script(input_file, move_type, verbose):
    # ---------------- Start -------------------------
    
    inputs = misc.read_json(input_file)    
    
    misc.print_and_flush("create_self_play: Start")
    start = time.process_time()
    
    saved_states_per_episode = inputs["saved_states_per_episode"]
    max_episode_depth = inputs["max_episode_depth"]
    apprentice_params = inputs["apprentice_params"]
    expert_params = inputs["expert_params"]
    

    path_data = inputs["path_data"]
    path_model = inputs["path_model"]
    model_args =  misc.read_json(inputs["model_parameters"])

    board_params = inputs["board_params"]
    path_board = board_params["path_board"]
    
    move_types = ["initialPick", "initialFortify", "startTurn", "attack", "fortify"]
    # ---------------------------------------------------------------    

    # Create board
    world = World(path_board)


    # Set players
    pR1, pR2 = agent.RandomAgent('Red'), agent.RandomAgent('Blue')
    players = [pR1, pR2]
    # Set board
    
    prefs = board_params
            
    board_orig = Board(world, players)
    board_orig.setPreferences(prefs)

    num_nodes = board_orig.world.map_graph.number_of_nodes()
    num_edges = board_orig.world.map_graph.number_of_edges()

    if apprentice_params["type"] == "net":
        if verbose: misc.print_and_flush("create_self_play: Creating model")
        net = GCN_risk(num_nodes, num_edges, 
                         model_args['board_input_dim'], model_args['global_input_dim'],
                         model_args['hidden_global_dim'], model_args['num_global_layers'],
                         model_args['hidden_conv_dim'], model_args['num_conv_layers'],
                         model_args['hidden_pick_dim'], model_args['num_pick_layers'], model_args['out_pick_dim'],
                         model_args['hidden_place_dim'], model_args['num_place_layers'], model_args['out_place_dim'],
                         model_args['hidden_attack_dim'], model_args['num_attack_layers'], model_args['out_attack_dim'],
                         model_args['hidden_fortify_dim'], model_args['num_fortify_layers'], model_args['out_fortify_dim'],
                         model_args['hidden_value_dim'], model_args['num_value_layers'],
                         model_args['dropout'])

        device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')        
        net.to(device)
        
        model_name = apprentice_params["model_name"]
        if model_name: # If it is not the empty string
            try:
        
                if verbose: misc.print_and_flush(f"create_self_play : Chosen model is {model_name}")
                state_dict = load_dict(os.path.join(path_model, model_name), device = 'cpu', encoding = 'latin1')                
                net.load_state_dict(state_dict['model'])        
                if verbose: misc.print_and_flush("create_self_play: Model has been loaded")
            except Exception as e:
                print(e)
                
                                    
        if verbose: misc.print_and_flush("create_self_play: Defining net apprentice")
        # Define initial apprentice        
        apprentice = agent.NetApprentice(net)
    else:
        if verbose: misc.print_and_flush("create_self_play: Defining MCTS apprentice")
        apprentice = agent.MctsApprentice(num_MCTS_sims = apprentice_params["num_MCTS_sims"],
                                          temp = apprentice_params["temp"], 
                                          max_depth = apprentice_params["max_depth"],
                                          sims_per_eval = apprentice_params["sims_per_eval"])


    if verbose: misc.print_and_flush("create_self_play: Defining expert")
    # build expert
    expert = build_expert_mcts(apprentice, max_depth=expert_params["max_depth"],
                    sims_per_eval=expert_params["sims_per_eval"], num_MCTS_sims=expert_params["num_MCTS_sims"],
                    wa = expert_params["wa"], wb = expert_params["wb"],
                    cb = expert_params["cb"], use_val = expert_params["use_val"])
    
                         
    if verbose: misc.print_and_flush("create_self_play: Creating data folders")
    # Create folders to store data
    for folder in move_types:
        os.makedirs(os.path.join(path_data, folder, 'raw'), exist_ok = True)
    os.makedirs(path_model, exist_ok = True)
                                    

    #### START
    start_inner = time.process_time()
    
    state = copy.deepcopy(board_orig)    

    # Play episode, select states to save
    if verbose: misc.print_and_flush("create_self_play: Self-play")
    
    states_to_save = create_self_play_data(move_type, path_data, state, apprentice, max_depth = max_episode_depth, saved_states_per_episode=saved_states_per_episode, verbose = verbose)

    if verbose: misc.print_and_flush(f"create_self_play: Play episode: Time taken: {round(time.process_time() - start_inner,2)}")
    
    
    # Tag the states and save them
    start_inner = time.process_time()
    if verbose: misc.print_and_flush(f"create_self_play: Tag the states ({len(states_to_save)} states to tag)")  
    for st in states_to_save:
        st_tagged, policy_exp, value_exp = tag_with_expert_move(st, expert, temp=expert_params["temp"], verbose=verbose)
        _ = simple_save_state(path_data, st_tagged, policy_exp, value_exp, verbose=verbose)
    if verbose: misc.print_and_flush(f"create_self_play: Tag and save: Time taken -> {round(time.process_time() - start_inner,2)}")
    
        
    misc.print_and_flush(f"create_self_play: Total time taken -> {round(time.process_time() - start,2)}")
Exemple #4
0
    scheduler = torch.optim.lr_scheduler.StepLR(optimizer,
                                                step_size=20,
                                                gamma=0.5)
    criterion = TPT_Loss
    move_types = [
        'initialPick', 'initialFortify', 'startTurn', 'attack', 'fortify'
    ]
    types_cycle = itertools.cycle(move_types)

    print("Defining apprentice")
    # Define initial apprentice
    apprentice = agent.MctsApprentice(
        num_MCTS_sims=initial_apprentice_mcts_sims,
        temp=1,
        max_depth=max_depth)
    apprentice = agent.NetApprentice(
        net)  # Test the net apprentice, it is way faster # CAMBIAR

    print("Defining expert")
    # build expert
    expert = build_expert_mcts(
        None)  # Start with only MCTS with no inner apprentice

    expert = build_expert_mcts(
        agent.NetApprentice(net))  # Test the network # CAMBIAR

    expert.num_MCTS_sims = expert_mcts_sims

    print("Creating data folders")
    # Create folders to store data
    for folder in move_types:
        os.makedirs(os.path.join(path_data, folder, 'raw'), exist_ok=True)