Ejemplo n.º 1
0
 def __init__(self, model, env, memory, max_steps, max_episodes,
              epsilon_start, epsilon_final, epsilon_decay, start_learning,
              batch_size, save_update_freq, exploration_method, output_dir,
              players, player_num, config_dir, map_file, unit_file,
              env_output_dir, pnames, debug):
     self.model = model
     self.env = env
     self.memory = memory
     self.max_steps = max_steps
     self.max_episodes = max_episodes
     self.epsilon_start = epsilon_start
     self.epsilon_final = epsilon_final
     self.epsilon_decay = epsilon_decay
     self.start_learning = start_learning
     self.batch_size = batch_size
     self.save_update_freq = save_update_freq
     self.output_dir = output_dir
     self.action_table = build_action_table(env.num_groups, env.num_nodes)
     self.players = players
     self.player_num = player_num
     self.config_dir = config_dir
     self.map_file = map_file
     self.unit_file = unit_file
     self.env_output_dir = env_output_dir
     self.pnames = pnames
     self.debug = debug
     self.exploration_method = exploration_method
     self.nodes_array = []
     for i in range(1, self.env.num_nodes + 1):
         self.nodes_array.append(i)
     self._create_opponent_pool()
     self.opp_num = 0
     self.episode_cnt = 0
     self.opp_save_freq = 10  # How many games before saving oppoent
     self.opp_choose_freq = 0.2  # How often [0,1] to sample from the opponent pool
Ejemplo n.º 2
0
    def __init__(self, observation_space, action_space, action_bins,
                 target_update_freq, learning_rate, gamma, hidden_dim,
                 td_target, device, exploration_method):
        super().__init__()

        self.observation_space = observation_space
        self.action_space = action_space
        self.action_bins = action_bins
        self.gamma = gamma
        self.exploration_method = exploration_method

        self.policy_network = BranchingQNetwork(observation_space,
                                                action_space, action_bins,
                                                hidden_dim, exploration_method)
        self.target_network = BranchingQNetwork(observation_space,
                                                action_space, action_bins,
                                                hidden_dim, exploration_method)
        self.target_network.load_state_dict(self.policy_network.state_dict())

        self.optim = optim.Adam(self.policy_network.parameters(),
                                lr=learning_rate)

        self.policy_network.to(device)
        self.target_network.to(device)
        self.device = device

        self.target_update_freq = target_update_freq
        self.update_counter = 0

        self.td_target = td_target

        # TODO: Not hardcode the values for num groups and num nodes
        self.action_table = build_action_table(12, 11)
Ejemplo n.º 3
0
    def __init__(self,
                 observation_space,
                 action_space,
                 action_bins,
                 target_update_freq,
                 learning_rate,
                 gamma,
                 hidden_dim,
                 td_target,
                 device,
                 exploration_method,
                 architecture="Double",
                 isPer=False,
                 nSteps=3):
        super().__init__()

        self.observation_space = observation_space
        self.isPer = isPer
        self.action_space = action_space
        self.action_bins = action_bins
        self.gamma = gamma
        self.exploration_method = exploration_method
        self.architecture = architecture
        self.player_helper = PlayerHelper(7, 1, "../config/DemoMap.json")
        self.policy_network = BranchingQNetwork(observation_space,
                                                action_space, action_bins,
                                                hidden_dim, exploration_method,
                                                architecture)
        if architecture != "Base":
            self.target_network = BranchingQNetwork(observation_space,
                                                    action_space, action_bins,
                                                    hidden_dim,
                                                    exploration_method,
                                                    architecture)
            self.target_network.load_state_dict(
                self.policy_network.state_dict())
            self.target_network.to(device)

        self.optim = optim.Adam(self.policy_network.parameters(),
                                lr=learning_rate)

        self.action_choices = build_action_table()

        self.policy_network.to(device)

        self.device = device

        self.target_update_freq = target_update_freq
        self.update_counter = 0

        self.td_target = td_target

        self.nSteps = nSteps

        self.nBuffer = []
Ejemplo n.º 4
0
 def __init__(self, model,
              env,
              memory,
              max_steps,
              max_episodes,
              epsilon_start,
              epsilon_final,
              epsilon_decay,
              start_learning,
              batch_size,
              save_update_freq,
              exploration_method,
              output_dir,
              players,
              player_num,
              config_dir,
              map_file,
              unit_file,
              env_output_dir,
              pnames,
              debug,
              renderer,
              isNSteps):
     self.model = model
     self.env = env
     self.memory = memory
     self.max_steps = max_steps
     self.max_episodes = max_episodes
     self.epsilon_start = epsilon_start
     self.epsilon_final = epsilon_final
     self.epsilon_decay = epsilon_decay
     self.start_learning = start_learning
     self.batch_size = batch_size
     self.save_update_freq = save_update_freq
     self.output_dir = output_dir
     self.action_table = build_action_table(env.num_groups, env.num_nodes)
     self.player_helper = PlayerHelper(7,1, "../config/DemoMap.json")
     self.players = players
     self.player_num = player_num
     self.config_dir = config_dir
     self.map_file = map_file
     self.unit_file = unit_file
     self.env_output_dir = env_output_dir
     self.pnames = pnames
     self.player_name="random_actions"
     self.debug = debug
     self.exploration_method = exploration_method
     self.nodes_array = []
     self.renderer = renderer
     if isNSteps > 0:
         self.isNSteps = True
     else:
         self.isNSteps = False
     for i in range(1, self.env.num_nodes + 1):
         self.nodes_array.append(i)
Ejemplo n.º 5
0
    )
    bdqn_player_num = 1

    """Load Pre-Saved Model"""
    #bdqn_player.load_state_dict(torch.load(
    #    './runs/Michael_Local/model_state_dict_last'))
    #bdqn_player.load_state_dict(torch.load(
    #    './agents/bd3qn/runs/Newton-train-2-7-21-res/model_state_dict_best'))
    bdqn_player.load_state_dict(torch.load(
        './runs/Michael_Local_2/model_state_dict_last'))
    bdqn_player.to(device)
    players[0] = rand_player
    players[1] = bdqn_player
    names[0] = rand_player.__class__.__name__
    names[1] = bdqn_player.__class__.__name__
    action_table = utils.build_action_table(env.num_groups, env.num_nodes)

    to_render = True
    r = Renderer(map_name,frame_collection=True)
    gif_frames = []
    
    # Play
    total_wins = 0
    game_played = 0
    winrate = 0
    for episode in range(1):
        state = env.reset(
            players=players,
            config_dir=config.env_config,
            map_file=config.map_file,
            unit_file=config.unit_file,