Beispiel #1
0
    def __init__(self, config):
        super().__init__(config)
        self.training_mode = True
        self.num_skills = config.hyperparameters["num_skills"] # check
        self.unsupervised_episodes = config.hyperparameters["num_unsupservised_episodes"] # check
        self.supervised_episodes = config.num_episodes_to_run - self.unsupervised_episodes

        assert self.hyperparameters["DISCRIMINATOR"]["final_layer_activation"] == None, "Final layer activation for disciminator should be None" # check
        self.discriminator = self.create_NN(self.state_size, self.num_skills, key_to_use="DISCRIMINATOR")
        self.discriminator_optimizer = optim.Adam(self.discriminator.parameters(),
                                              lr=self.hyperparameters["DISCRIMINATOR"]["learning_rate"]) # check
        self.agent_config = copy.deepcopy(config)
        self.agent_config.environment = DIAYN_Skill_Wrapper(copy.deepcopy(self.environment), self.num_skills, self)
        self.agent_config.hyperparameters = self.agent_config.hyperparameters["AGENT"]
        self.agent_config.hyperparameters["do_evaluation_iterations"] = False # check

        if 'Discrete' in config.environment[0].action_space:
            self.agent = SAC_Discrete(self.agent_config)
        else:
            self.agent = SAC(self.agent_config)  #We have to use SAC because it involves maximising the policy's entropy over actions which is also a part of DIAYN

        self.timesteps_to_give_up_control_for = self.hyperparameters["MANAGER"]["timesteps_to_give_up_control_for"] # check
        self.manager_agent_config = copy.deepcopy(config)
        self.manager_agent_config.environment = DIAYN_Manager_Agent_Wrapper(copy.deepcopy(self.environment), self.agent,
                                                                            self.timesteps_to_give_up_control_for, self.num_skills)
        self.manager_agent_config.hyperparameters = self.manager_agent_config.hyperparameters["MANAGER"]
        self.manager_agent = DDQN(self.manager_agent_config)
    def __init__(self, config):
        Base_Agent.__init__(self, config)
        self.controller_config = copy.deepcopy(config)
        self.controller_config.hyperparameters = self.controller_config.hyperparameters[
            "CONTROLLER"]
        self.controller = DDQN(self.controller_config)
        self.controller.q_network_local = self.create_NN(
            input_dim=self.state_size * 2,
            output_dim=self.action_size,
            key_to_use="CONTROLLER")
        self.controller.q_network_target = self.create_NN(
            input_dim=self.state_size * 2,
            output_dim=self.action_size,
            key_to_use="CONTROLLER")

        self.meta_controller_config = copy.deepcopy(config)
        self.meta_controller_config.hyperparameters = self.meta_controller_config.hyperparameters[
            "META_CONTROLLER"]

        # self.meta_controller = DDQN(self.meta_controller_config)
        # self.meta_controller.q_network_local = self.create_NN(input_dim=self.state_size, output_dim=config.environment.observation_space.n,
        #                                                       key_to_use="META_CONTROLLER")
        # self.meta_controller.q_network_target = self.create_NN(input_dim=self.state_size, output_dim=config.environment.observation_space.n,
        #                                                       key_to_use="META_CONTROLLER")

        self.list_meta_controller = [
            DDQN(self.meta_controller_config) for _ in range(5)
        ]
        self.lq_network_local = []
        self.lq_network_target = []
        for m in self.list_meta_controller:
            m.q_network_local = self.create_NN(
                input_dim=self.state_size,
                output_dim=config.environment.observation_space.n,
                key_to_use="META_CONTROLLER")
            self.lq_network_local.append(m.q_network_local)
            m.q_network_target = self.create_NN(
                input_dim=self.state_size,
                output_dim=config.environment.observation_space.n,
                key_to_use="META_CONTROLLER")
            self.lq_network_target.append(m.q_network_target)

        self.rolling_intrinsic_rewards = []
        self.goals_seen = []
        self.controller_learnt_enough = False
        self.controller_actions = []
Beispiel #3
0
 def create_skill_training_agent(self):
     """Creates and instantiates a pre-training environment for the agent to learn skills in and then instantiates
     and agent to learn in this environment"""
     self.skill_agent_config.environment = Skill_Wrapper(copy.deepcopy(self.environment), self.environment.observation_space.n,
                                                         self.num_skills,
                                                         self.skill_agent_config.hyperparameters[
                                                             "regularisation_weight"], self.skill_agent_config.hyperparameters["visitations_decay"])
     return DDQN(self.skill_agent_config)
Beispiel #4
0
 def __init__(self, config):
     Base_Agent.__init__(self, config)
     self.controller_config = copy.deepcopy(config)
     self.controller_config.hyperparameters = self.controller_config.hyperparameters[
         "CONTROLLER"]
     self.controller = DDQN(self.controller_config)
     self.controller.q_network_local = self.create_NN(
         input_dim=self.state_size * 2,
         output_dim=self.action_size,
         key_to_use="CONTROLLER")
     self.meta_controller_config = copy.deepcopy(config)
     self.meta_controller_config.hyperparameters = self.meta_controller_config.hyperparameters[
         "META_CONTROLLER"]
     self.meta_controller = DDQN(self.meta_controller_config)
     self.meta_controller.q_network_local = self.create_NN(
         input_dim=self.state_size,
         output_dim=config.environment.observation_space.n,
         key_to_use="META_CONTROLLER")
     self.rolling_intrinsic_rewards = []
     self.goals_seen = []
     self.controller_learnt_enough = False
     self.controller_actions = []
Beispiel #5
0
 def create_manager_agent(self, skill_agent):
     """Instantiates a manager agent"""
     self.manager_config.environment = Manager_Frozen_Worker_Wrapper(
         copy.deepcopy(self.environment), self.num_skills,
         self.timesteps_before_changing_skill, skill_agent)
     return DDQN(self.manager_config)