def __init__(self, model, env, memory, max_steps, max_episodes, epsilon_start, epsilon_final, epsilon_decay, start_learning, batch_size, save_update_freq, exploration_method, output_dir, players, player_num, config_dir, map_file, unit_file, env_output_dir, pnames, debug): self.model = model self.env = env self.memory = memory self.max_steps = max_steps self.max_episodes = max_episodes self.epsilon_start = epsilon_start self.epsilon_final = epsilon_final self.epsilon_decay = epsilon_decay self.start_learning = start_learning self.batch_size = batch_size self.save_update_freq = save_update_freq self.output_dir = output_dir self.action_table = build_action_table(env.num_groups, env.num_nodes) self.players = players self.player_num = player_num self.config_dir = config_dir self.map_file = map_file self.unit_file = unit_file self.env_output_dir = env_output_dir self.pnames = pnames self.debug = debug self.exploration_method = exploration_method self.nodes_array = [] for i in range(1, self.env.num_nodes + 1): self.nodes_array.append(i) self._create_opponent_pool() self.opp_num = 0 self.episode_cnt = 0 self.opp_save_freq = 10 # How many games before saving oppoent self.opp_choose_freq = 0.2 # How often [0,1] to sample from the opponent pool
def __init__(self, observation_space, action_space, action_bins, target_update_freq, learning_rate, gamma, hidden_dim, td_target, device, exploration_method): super().__init__() self.observation_space = observation_space self.action_space = action_space self.action_bins = action_bins self.gamma = gamma self.exploration_method = exploration_method self.policy_network = BranchingQNetwork(observation_space, action_space, action_bins, hidden_dim, exploration_method) self.target_network = BranchingQNetwork(observation_space, action_space, action_bins, hidden_dim, exploration_method) self.target_network.load_state_dict(self.policy_network.state_dict()) self.optim = optim.Adam(self.policy_network.parameters(), lr=learning_rate) self.policy_network.to(device) self.target_network.to(device) self.device = device self.target_update_freq = target_update_freq self.update_counter = 0 self.td_target = td_target # TODO: Not hardcode the values for num groups and num nodes self.action_table = build_action_table(12, 11)
def __init__(self, observation_space, action_space, action_bins, target_update_freq, learning_rate, gamma, hidden_dim, td_target, device, exploration_method, architecture="Double", isPer=False, nSteps=3): super().__init__() self.observation_space = observation_space self.isPer = isPer self.action_space = action_space self.action_bins = action_bins self.gamma = gamma self.exploration_method = exploration_method self.architecture = architecture self.player_helper = PlayerHelper(7, 1, "../config/DemoMap.json") self.policy_network = BranchingQNetwork(observation_space, action_space, action_bins, hidden_dim, exploration_method, architecture) if architecture != "Base": self.target_network = BranchingQNetwork(observation_space, action_space, action_bins, hidden_dim, exploration_method, architecture) self.target_network.load_state_dict( self.policy_network.state_dict()) self.target_network.to(device) self.optim = optim.Adam(self.policy_network.parameters(), lr=learning_rate) self.action_choices = build_action_table() self.policy_network.to(device) self.device = device self.target_update_freq = target_update_freq self.update_counter = 0 self.td_target = td_target self.nSteps = nSteps self.nBuffer = []
def __init__(self, model, env, memory, max_steps, max_episodes, epsilon_start, epsilon_final, epsilon_decay, start_learning, batch_size, save_update_freq, exploration_method, output_dir, players, player_num, config_dir, map_file, unit_file, env_output_dir, pnames, debug, renderer, isNSteps): self.model = model self.env = env self.memory = memory self.max_steps = max_steps self.max_episodes = max_episodes self.epsilon_start = epsilon_start self.epsilon_final = epsilon_final self.epsilon_decay = epsilon_decay self.start_learning = start_learning self.batch_size = batch_size self.save_update_freq = save_update_freq self.output_dir = output_dir self.action_table = build_action_table(env.num_groups, env.num_nodes) self.player_helper = PlayerHelper(7,1, "../config/DemoMap.json") self.players = players self.player_num = player_num self.config_dir = config_dir self.map_file = map_file self.unit_file = unit_file self.env_output_dir = env_output_dir self.pnames = pnames self.player_name="random_actions" self.debug = debug self.exploration_method = exploration_method self.nodes_array = [] self.renderer = renderer if isNSteps > 0: self.isNSteps = True else: self.isNSteps = False for i in range(1, self.env.num_nodes + 1): self.nodes_array.append(i)
) bdqn_player_num = 1 """Load Pre-Saved Model""" #bdqn_player.load_state_dict(torch.load( # './runs/Michael_Local/model_state_dict_last')) #bdqn_player.load_state_dict(torch.load( # './agents/bd3qn/runs/Newton-train-2-7-21-res/model_state_dict_best')) bdqn_player.load_state_dict(torch.load( './runs/Michael_Local_2/model_state_dict_last')) bdqn_player.to(device) players[0] = rand_player players[1] = bdqn_player names[0] = rand_player.__class__.__name__ names[1] = bdqn_player.__class__.__name__ action_table = utils.build_action_table(env.num_groups, env.num_nodes) to_render = True r = Renderer(map_name,frame_collection=True) gif_frames = [] # Play total_wins = 0 game_played = 0 winrate = 0 for episode in range(1): state = env.reset( players=players, config_dir=config.env_config, map_file=config.map_file, unit_file=config.unit_file,