def __init__(self, config): Base_Agent.__init__(self, config) self.policy_output_size = self.calculate_policy_output_size() self.policy_new = self.create_NN(input_dim=self.state_size, output_dim=self.policy_output_size) model_path = self.config.model_path if self.config.model_path else 'Models' self.policy_new_path = os.path.join( model_path, "{}_policy_new.pt".format(self.agent_name)) if self.config.load_model: self.locally_load_policy() self.policy_old = self.create_NN(input_dim=self.state_size, output_dim=self.policy_output_size) self.policy_old.load_state_dict( copy.deepcopy(self.policy_new.state_dict())) self.policy_new_optimizer = optim.Adam( self.policy_new.parameters(), lr=self.hyperparameters["learning_rate"], eps=1e-4) self.episode_number = 0 self.many_episode_states = [] self.many_episode_actions = [] self.many_episode_rewards = [] self.experience_generator = Parallel_Experience_Generator( self.environment, self.policy_new, self.config.seed, self.hyperparameters, self.action_size) self.exploration_strategy = Epsilon_Greedy_Exploration(self.config)
def __init__(self, config): Base_Agent.__init__(self, config) base_config.no_render_mode = False ## must be render mode self.q_network_local = q_network_2_EYE(n_action=self.get_action_size()) self.q_network_target = q_network_2_EYE( n_action=self.get_action_size()) self.q_network_optimizer = optim.SGD( self.q_network_local.parameters(), lr=self.hyperparameters["learning_rate"], weight_decay=5e-4) self.memory = Replay_Buffer(self.hyperparameters["buffer_size"], self.hyperparameters["batch_size"], config.seed) self.exploration_strategy = Epsilon_Greedy_Exploration(config) if config.backbone_pretrain: self.load_pretrain() self.copy_model_over(from_model=self.q_network_local, to_model=self.q_network_target) self.q_network_local.to(self.q_network_local.device) self.q_network_target.to(self.q_network_target.device)
def __init__(self, config): Base_Agent.__init__(self, config) self.memory = Replay_Buffer(self.hyperparameters["buffer_size"], self.hyperparameters["batch_size"], config.seed) self.q_network_local = self.create_NN(input_dim=self.state_size, output_dim=self.action_size) self.q_network_optimizer = optim.Adam(self.q_network_local.parameters(), lr=self.hyperparameters["learning_rate"]) self.exploration_strategy = Epsilon_Greedy_Exploration(config)
def __init__(self, config, global_action_id_to_primitive_actions, action_length_reward_bonus, end_of_episode_symbol="/"): super().__init__(config) self.end_of_episode_symbol = end_of_episode_symbol self.global_action_id_to_primitive_actions = global_action_id_to_primitive_actions self.memory = Replay_Buffer(self.hyperparameters["buffer_size"], self.hyperparameters["batch_size"], config.seed) self.exploration_strategy = Epsilon_Greedy_Exploration(config) self.oracle = self.create_oracle() self.oracle_optimizer = optim.Adam( self.oracle.parameters(), lr=self.hyperparameters["learning_rate"]) self.q_network_local = self.create_NN(input_dim=self.state_size + 1, output_dim=self.action_size) self.q_network_local.print_model_summary() self.q_network_optimizer = optim.Adam( self.q_network_local.parameters(), lr=self.hyperparameters["learning_rate"]) self.q_network_target = self.create_NN(input_dim=self.state_size + 1, output_dim=self.action_size) Base_Agent.copy_model_over(from_model=self.q_network_local, to_model=self.q_network_target) self.action_length_reward_bonus = action_length_reward_bonus self.abandon_ship = config.hyperparameters["abandon_ship"]
def __init__(self, config): Base_Agent.__init__(self, config) self.memory = Replay_Buffer(self.hyperparameters["buffer_size"], self.hyperparameters["batch_size"], config.seed) self.q_network_local = Policy(self.state_size, self.action_size).to("cuda") self.q_network_optimizer = optim.Adam( self.q_network_local.parameters(), lr=self.hyperparameters["learning_rate"], eps=1e-4) self.exploration_strategy = Epsilon_Greedy_Exploration(config)
def __init__(self, config): Base_Agent.__init__(self, config) self.policy_output_size = self.calculate_policy_output_size() self.policy_new = self.create_NN(input_dim=self.state_size, output_dim=self.policy_output_size) self.policy_old = self.create_NN(input_dim=self.state_size, output_dim=self.policy_output_size) self.policy_old.load_state_dict(copy.deepcopy(self.policy_new.state_dict())) self.policy_new_optimizer = optim.Adam(self.policy_new.parameters(), lr=self.hyperparameters["learning_rate"]) self.episode_number = 0 self.many_episode_states = [] self.many_episode_actions = [] self.many_episode_rewards = [] self.experience_generator = Parallel_Experience_Generator(self.environment, self.policy_new, self.config.seed, self.hyperparameters, self.action_size) self.exploration_strategy = Epsilon_Greedy_Exploration(self.config)
def __init__(self, config, agent_name_=agent_name): Base_Agent.__init__(self, config, agent_name=agent_name_) self.memory = Replay_Buffer(self.hyperparameters["buffer_size"], self.hyperparameters["batch_size"], config.seed, self.device) self.q_network_local = self.create_NN( input_dim=self.state_size, output_dim=self.action_size) # TODO: Change NN self.q_network_optimizer = optim.Adam( self.q_network_local.parameters(), lr=self.hyperparameters["learning_rate"], eps=1e-4) self.exploration_strategy = Epsilon_Greedy_Exploration(config) self.wandb_watch(self.q_network_local, log_freq=self.config.wandb_model_log_freq)
def __init__(self, config): Base_Agent.__init__(self, config) model_path = self.config.model_path if self.config.model_path else 'Models' self.memory = Replay_Buffer(self.hyperparameters["buffer_size"], self.hyperparameters["batch_size"], config.seed) self.q_network_local = self.create_NN(input_dim=self.state_size, output_dim=self.action_size) self.q_network_local_path = os.path.join( model_path, "{}_q_network_local.pt".format(self.agent_name)) if self.config.load_model: self.locally_load_policy() self.q_network_optimizer = optim.Adam( self.q_network_local.parameters(), lr=self.hyperparameters["learning_rate"], eps=1e-4) self.exploration_strategy = Epsilon_Greedy_Exploration(config)
def __init__(self, config, agent_name_=agent_name): Base_Agent.__init__(self, config, agent_name=agent_name_) self.memory = Replay_Buffer(self.hyperparameters["buffer_size"], self.hyperparameters["batch_size"], config.seed, self.device) # If model is not provided, create one. TODO Add this mechanism to all agents. if not "model" in self.hyperparameters or self.hyperparameters[ "model"] is None: self.q_network_local = self.create_NN(input_dim=self.state_size, output_dim=self.action_size) else: self.q_network_local = self.hyperparameters["model"] self.wandb_watch(self.q_network_local, log_freq=self.config.wandb_model_log_freq) self.q_network_optimizer = optim.Adam( self.q_network_local.parameters(), lr=self.hyperparameters["learning_rate"], eps=1e-4) self.exploration_strategy = Epsilon_Greedy_Exploration(config)
def __init__(self, config): Base_Agent.__init__(self, config) self.agent_dic = self.create_agent_dic() self.exploration_strategy = Epsilon_Greedy_Exploration(config)