def __init__(self, config):
     Base_Agent.__init__(self, config)
     self.policy_output_size = self.calculate_policy_output_size()
     self.policy_new = self.create_NN(input_dim=self.state_size,
                                      output_dim=self.policy_output_size)
     model_path = self.config.model_path if self.config.model_path else 'Models'
     self.policy_new_path = os.path.join(
         model_path, "{}_policy_new.pt".format(self.agent_name))
     if self.config.load_model: self.locally_load_policy()
     self.policy_old = self.create_NN(input_dim=self.state_size,
                                      output_dim=self.policy_output_size)
     self.policy_old.load_state_dict(
         copy.deepcopy(self.policy_new.state_dict()))
     self.policy_new_optimizer = optim.Adam(
         self.policy_new.parameters(),
         lr=self.hyperparameters["learning_rate"],
         eps=1e-4)
     self.episode_number = 0
     self.many_episode_states = []
     self.many_episode_actions = []
     self.many_episode_rewards = []
     self.experience_generator = Parallel_Experience_Generator(
         self.environment, self.policy_new, self.config.seed,
         self.hyperparameters, self.action_size)
     self.exploration_strategy = Epsilon_Greedy_Exploration(self.config)
    def __init__(self, config):
        Base_Agent.__init__(self, config)
        base_config.no_render_mode = False  ## must be render mode

        self.q_network_local = q_network_2_EYE(n_action=self.get_action_size())
        self.q_network_target = q_network_2_EYE(
            n_action=self.get_action_size())
        self.q_network_optimizer = optim.SGD(
            self.q_network_local.parameters(),
            lr=self.hyperparameters["learning_rate"],
            weight_decay=5e-4)

        self.memory = Replay_Buffer(self.hyperparameters["buffer_size"],
                                    self.hyperparameters["batch_size"],
                                    config.seed)
        self.exploration_strategy = Epsilon_Greedy_Exploration(config)

        if config.backbone_pretrain:
            self.load_pretrain()

        self.copy_model_over(from_model=self.q_network_local,
                             to_model=self.q_network_target)

        self.q_network_local.to(self.q_network_local.device)
        self.q_network_target.to(self.q_network_target.device)
 def __init__(self, config):
     Base_Agent.__init__(self, config)
     self.memory = Replay_Buffer(self.hyperparameters["buffer_size"], self.hyperparameters["batch_size"], config.seed)
     self.q_network_local = self.create_NN(input_dim=self.state_size, output_dim=self.action_size)
     self.q_network_optimizer = optim.Adam(self.q_network_local.parameters(),
                                           lr=self.hyperparameters["learning_rate"])
     self.exploration_strategy = Epsilon_Greedy_Exploration(config)
Example #4
0
    def __init__(self,
                 config,
                 global_action_id_to_primitive_actions,
                 action_length_reward_bonus,
                 end_of_episode_symbol="/"):
        super().__init__(config)
        self.end_of_episode_symbol = end_of_episode_symbol
        self.global_action_id_to_primitive_actions = global_action_id_to_primitive_actions
        self.memory = Replay_Buffer(self.hyperparameters["buffer_size"],
                                    self.hyperparameters["batch_size"],
                                    config.seed)
        self.exploration_strategy = Epsilon_Greedy_Exploration(config)

        self.oracle = self.create_oracle()
        self.oracle_optimizer = optim.Adam(
            self.oracle.parameters(), lr=self.hyperparameters["learning_rate"])

        self.q_network_local = self.create_NN(input_dim=self.state_size + 1,
                                              output_dim=self.action_size)
        self.q_network_local.print_model_summary()
        self.q_network_optimizer = optim.Adam(
            self.q_network_local.parameters(),
            lr=self.hyperparameters["learning_rate"])
        self.q_network_target = self.create_NN(input_dim=self.state_size + 1,
                                               output_dim=self.action_size)
        Base_Agent.copy_model_over(from_model=self.q_network_local,
                                   to_model=self.q_network_target)

        self.action_length_reward_bonus = action_length_reward_bonus
        self.abandon_ship = config.hyperparameters["abandon_ship"]
Example #5
0
 def __init__(self, config):
     Base_Agent.__init__(self, config)
     self.memory = Replay_Buffer(self.hyperparameters["buffer_size"],
                                 self.hyperparameters["batch_size"],
                                 config.seed)
     self.q_network_local = Policy(self.state_size,
                                   self.action_size).to("cuda")
     self.q_network_optimizer = optim.Adam(
         self.q_network_local.parameters(),
         lr=self.hyperparameters["learning_rate"],
         eps=1e-4)
     self.exploration_strategy = Epsilon_Greedy_Exploration(config)
 def __init__(self, config):
     Base_Agent.__init__(self, config)
     self.policy_output_size = self.calculate_policy_output_size()
     self.policy_new = self.create_NN(input_dim=self.state_size, output_dim=self.policy_output_size)
     self.policy_old = self.create_NN(input_dim=self.state_size, output_dim=self.policy_output_size)
     self.policy_old.load_state_dict(copy.deepcopy(self.policy_new.state_dict()))
     self.policy_new_optimizer = optim.Adam(self.policy_new.parameters(), lr=self.hyperparameters["learning_rate"])
     self.episode_number = 0
     self.many_episode_states = []
     self.many_episode_actions = []
     self.many_episode_rewards = []
     self.experience_generator = Parallel_Experience_Generator(self.environment, self.policy_new, self.config.seed,
                                                               self.hyperparameters, self.action_size)
     self.exploration_strategy = Epsilon_Greedy_Exploration(self.config)
    def __init__(self, config, agent_name_=agent_name):
        Base_Agent.__init__(self, config, agent_name=agent_name_)
        self.memory = Replay_Buffer(self.hyperparameters["buffer_size"],
                                    self.hyperparameters["batch_size"],
                                    config.seed, self.device)
        self.q_network_local = self.create_NN(
            input_dim=self.state_size,
            output_dim=self.action_size)  # TODO: Change NN
        self.q_network_optimizer = optim.Adam(
            self.q_network_local.parameters(),
            lr=self.hyperparameters["learning_rate"],
            eps=1e-4)
        self.exploration_strategy = Epsilon_Greedy_Exploration(config)

        self.wandb_watch(self.q_network_local,
                         log_freq=self.config.wandb_model_log_freq)
    def __init__(self, config):
        Base_Agent.__init__(self, config)
        model_path = self.config.model_path if self.config.model_path else 'Models'
        self.memory = Replay_Buffer(self.hyperparameters["buffer_size"],
                                    self.hyperparameters["batch_size"],
                                    config.seed)
        self.q_network_local = self.create_NN(input_dim=self.state_size,
                                              output_dim=self.action_size)
        self.q_network_local_path = os.path.join(
            model_path, "{}_q_network_local.pt".format(self.agent_name))

        if self.config.load_model: self.locally_load_policy()
        self.q_network_optimizer = optim.Adam(
            self.q_network_local.parameters(),
            lr=self.hyperparameters["learning_rate"],
            eps=1e-4)
        self.exploration_strategy = Epsilon_Greedy_Exploration(config)
    def __init__(self, config, agent_name_=agent_name):
        Base_Agent.__init__(self, config, agent_name=agent_name_)

        self.memory = Replay_Buffer(self.hyperparameters["buffer_size"],
                                    self.hyperparameters["batch_size"],
                                    config.seed, self.device)

        # If model is not provided, create one. TODO Add this mechanism to all agents.
        if not "model" in self.hyperparameters or self.hyperparameters[
                "model"] is None:
            self.q_network_local = self.create_NN(input_dim=self.state_size,
                                                  output_dim=self.action_size)
        else:
            self.q_network_local = self.hyperparameters["model"]

        self.wandb_watch(self.q_network_local,
                         log_freq=self.config.wandb_model_log_freq)

        self.q_network_optimizer = optim.Adam(
            self.q_network_local.parameters(),
            lr=self.hyperparameters["learning_rate"],
            eps=1e-4)
        self.exploration_strategy = Epsilon_Greedy_Exploration(config)
Example #10
0
 def __init__(self, config):
     Base_Agent.__init__(self, config)
     self.agent_dic = self.create_agent_dic()
     self.exploration_strategy = Epsilon_Greedy_Exploration(config)