Ejemplo n.º 1
0
    def __init__(self, model_info, alg_config, **kwargs):
        """
        Initialize DQN algorithm.

        It contains four steps:
        1. override the default config, with user's configuration;
        2. create the default actor with Algorithm.__init__;
        3. create once more actor, named by target_actor;
        4. create the replay buffer for training.
        :param model_info:
        :param alg_config:
        """
        model_info = model_info["actor"]
        super(DQNInfoFlowAlg, self).__init__(alg_name="info_flow_dqn",
                                             model_info=model_info,
                                             alg_config=alg_config)

        self.target_actor = model_builder(model_info)
        self.buff = ReplayBuffer(alg_config.get("buffer_size", BUFFER_SIZE))
        self.batch_size = alg_config.get("batch_size", BATCH_SIZE)
        self.target_update_freq = alg_config.get("target_update_freq",
                                                 TARGET_UPDATE_FREQ)
        self.gamma = alg_config.get("gamma", GAMMA)

        self.item_dim = alg_config.get("item_dim")
        self.user_dim = alg_config.get("user_dim")
        self.async_flag = False
        self._times = list()
Ejemplo n.º 2
0
    def __init__(self, alg_name, model_info, alg_config=None, **kwargs):
        """
        use the model info create a algorithm
        :param alg_name:
        :param model_info: model_info["actor"]
        :param alg_config:
        """
        self.actor = model_builder(model_info)
        self.state_dim = model_info.get("state_dim")
        self.action_dim = model_info.get("action_dim")
        self.train_count = 0
        self.alg_name = alg_name
        self.alg_config = alg_config

        self.async_flag = True
        # set default weights map, make compatibility to single agent
        self._weights_map = self.update_weights_map()

        # trainable state
        self._train_ready = True
        self.sync_weights = False

        # train property
        self._prepare_times_per_train = alg_config.get(
            "prepare_times_per_train",
            alg_config["instance_num"] * alg_config["agent_num"],
        )
        self.dist_model_policy = DefaultAlgDistPolicy(
            alg_config["instance_num"],
            prepare_times=self._prepare_times_per_train)

        self.learning_starts = alg_config.get("learning_starts", 0)

        self._train_per_checkpoint = alg_config.get("train_per_checkpoint", 1)
        logging.debug("train/checkpoint: {}".format(self.train_per_checkpoint))
Ejemplo n.º 3
0
    def __init__(self, model_info, alg_config, **kwargs):
        """Initialize DQN algorithm. it's contains four steps:
        1. override the default config, with user's configuration;
        2. create the default actor with Algorithm.__init__;
        3. create once more actor, named by target_actor;
        4. create the replay buffer for training.
        :param model_info:
        :param alg_config:
        """
        import_config(globals(), alg_config)
        model_info = model_info["actor"]
        super(DQN, self).__init__(alg_name="dqn",
                                  model_info=model_info,
                                  alg_config=alg_config)

        self.target_actor = model_builder(model_info)
        self.buff = ReplayBuffer(BUFFER_SIZE)