Ejemplo n.º 1
0
 def setup_models(self):
     dqn = self.setup_dqn(trainable=False)
     teammates_model = TeamModel(num_teammates=self.num_teammates,
                                 trainable=False)
     dqn.load(self.directory, load_memory=False)
     teammates_model.load(self.directory, load_memory=False)
     return dqn, teammates_model
Ejemplo n.º 2
0
class LearntPLASTICModel(PLASTICPrior):

    def __init__(self, directory, teammates, num_teammates):
        self.teammates_model = TeamModel(num_teammates=num_teammates, trainable=False)
        self.teammates_model.load(directory, load_memory=False)
        super().__init__(teammates, num_teammates)

    #################
    # PLASTIC Prior #
    #################

    def policies(self, state):
        return self.teammates_model.policies(state)
Ejemplo n.º 3
0
class LearningPLASTICModel(PLASTICPrior):
    def __init__(self, num_teammates):
        self.teammates_model = TeamModel(num_teammates, trainable=True)
        super().__init__("new", num_teammates)

    ############################
    # Learning Prior Interface #
    ############################

    def reinforce(self, state, joint_actions, reward, next_state, terminal):
        info = {}
        datapoint = state, joint_actions, reward, next_state, terminal
        info["team model"] = self.teammates_model.replay_fit(datapoint)
        return info

    def save(self, directory):
        prior_dir = f"{directory}/{self.name}"
        mkdir(prior_dir)
        self.teammates_model.save(prior_dir)

    def load(self, directory):
        prior_dir = f"{directory}/{self.name}"
        self.teammates_model.load(prior_dir)

    #################
    # PLASTIC Prior #
    #################

    def policies(self, state):
        return self.teammates_model.policies(state)
Ejemplo n.º 4
0
class PlasticPolicy:
    def __init__(self, num_teammates):
        self._num_teammates = num_teammates
        self.total_timesteps = 0
        self.dqn, self.teammates_model = self.setup_models()

    @staticmethod
    def setup_dqn(trainable):
        dqn = DQN(trainable, config.LEARNING_RATE, config.DQN_DISCOUNT_FACTOR,
                  ReplayMemoryModel.parse_layer_blueprints(cfg["layers"]),
                  config.REPLAY_MIN_BATCH, config.REPLAY_MEMORY_SIZE)
        return dqn

    def reinforce(self, state, joint_actions, reward, next_state, terminal):
        info = {}
        datapoint = state, joint_actions, reward, next_state, terminal
        self.total_timesteps += 1
        info["dqn"] = self.dqn.replay_fit(datapoint)
        info["team model"] = self.team_model.replay_fit(datapoint)
        return info

    def save(self, directory):
        prior_dir = f"{directory}/{self.name}"
        mkdir(prior_dir)
        self.dqn.save(prior_dir)
        self.team_model.save(prior_dir)

    def load(self, directory):
        prior_dir = f"{directory}/{self.name}"
        self.dqn.load(prior_dir)
        self.team_model.load(prior_dir)

    ##################
    # PLASTIC Policy #
    ##################

    def setup_models(self):
        self.dqn = self.setup_dqn(trainable=True)
        self.team_model = TeamModel(self._num_teammates, trainable=True)
        return self.dqn, self.team_model

    #################
    # PLASTIC Prior #
    #################

    def policies(self, state):
        return self.team_model.policies(state)
Ejemplo n.º 5
0
class LearningPLASTICPolicy(PLASTICPolicy):
    def __init__(self, num_teammates):
        self._num_teammates = num_teammates
        self.total_timesteps = 0
        super().__init__("new", num_teammates)

    ############################
    # Learning Prior Interface #
    ############################

    def reinforce(self, state, joint_actions, reward, next_state, terminal):
        info = {}
        datapoint = state, joint_actions, reward, next_state, terminal
        self.total_timesteps += 1
        info["dqn"] = self.dqn.replay_fit(datapoint)
        info["team model"] = self.team_model.replay_fit(datapoint)
        return info

    def save(self, directory):
        prior_dir = f"{directory}/{self.name}"
        mkdir(prior_dir)
        self.dqn.save(prior_dir)
        self.team_model.save(prior_dir)

    def load(self, directory):
        prior_dir = f"{directory}/{self.name}"
        self.dqn.load(prior_dir)
        self.team_model.load(prior_dir)

    ##################
    # PLASTIC Policy #
    ##################

    def setup_models(self):
        self.dqn = self.setup_dqn(trainable=True)
        self.team_model = TeamModel(self._num_teammates, trainable=True)
        return self.dqn, self.team_model

    #################
    # PLASTIC Prior #
    #################

    def policies(self, state):
        return self.team_model.policies(state)
Ejemplo n.º 6
0
 def setup_models(self):
     self.dqn = self.setup_dqn(trainable=True)
     self.team_model = TeamModel(self._num_teammates, trainable=True)
     return self.dqn, self.team_model
Ejemplo n.º 7
0
 def __init__(self, num_teammates):
     self.teammates_model = TeamModel(num_teammates, trainable=True)
     super().__init__("new", num_teammates)
Ejemplo n.º 8
0
 def __init__(self, directory, teammates, num_teammates):
     self.teammates_model = TeamModel(num_teammates=num_teammates, trainable=False)
     self.teammates_model.load(directory, load_memory=False)
     super().__init__(teammates, num_teammates)