Exemple #1
0
    def __init__(self, model_info):
        model_config = model_info.get('model_config', None)
        import_config(globals(), model_config)

        self.state_dim = model_info['state_dim']
        self.action_dim = model_info['action_dim']
        super().__init__(model_info)
Exemple #2
0
    def __init__(self, model_info):
        """Init Dqn model for information flow."""
        model_config = model_info.get("model_config", None)
        import_config(globals(), model_config)

        self.state_dim = model_info["state_dim"]
        self.action_dim = model_info["action_dim"]

        self.tau = 0.01
        self.epsilon = 1.0  # exploration rate
        self.epsilon_min = 0.01
        self.epsilon_decay = 0.995
        self.learning_rate = 0.001
        self.vocab_size = model_info["vocab_size"]
        self.emb_dim = model_info["emb_dim"]
        self.user_dim = model_info["user_dim"]
        self.item_dim = model_info["item_dim"]

        self.input_type = model_info["input_type"]
        # logging.info("set input type: {}".format(self.input_type))

        self.embeddings = model_info["embeddings"]
        self.last_act = model_info["last_activate"]

        embedding_weights = np.loadtxt(self.embeddings, delimiter=",", dtype=float)
        self.embedding_initializer = tf.constant_initializer(embedding_weights)

        self.n_history_click = 5
        self.n_history_no_click = 5

        super().__init__(model_info)
    def __init__(self, model_info):
        model_config = model_info.get("model_config", dict())
        import_config(globals(), model_config)
        self.dtype = DTYPE_MAP.get(model_config.get("dtype", "float32"))

        self.state_dim = model_info["state_dim"]
        self.action_dim = model_info["action_dim"]
        self.filter_arch = get_atari_filter(self.state_dim)

        # lr schedule with linear_cosine_decay
        self.lr_schedule = model_info.get("lr_schedule", None)
        self.opt_type = model_info.get("opt_type", "adam")
        self.lr = None

        self.ph_state = None
        self.ph_adv = None
        self.out_actions = None
        self.pi_logic_outs, self.baseline = None, None

        # placeholder for behavior policy logic outputs
        self.ph_bp_logic_outs = None
        self.ph_actions = None
        self.ph_dones = None
        self.ph_rewards = None
        self.loss, self.optimizer, self.train_op = None, None, None

        self.grad_norm_clip = 40.0
        self.sample_batch_steps = 50

        self.saver = None
        self.explore_paras = None
        self.actor_var = None  # store weights for agent

        super().__init__(model_info)
Exemple #4
0
    def __init__(self, model_info):
        model_config = model_info.get('model_config', None)
        import_config(globals(), model_config)

        self.state_dim = model_info['state_dim']
        self.action_dim = model_info['action_dim']
        self.learning_rate = LR
        self.dueling = model_config.get('dueling', False)
        super().__init__(model_info)
Exemple #5
0
    def __init__(self, model_info):
        model_config = model_info.get('model_config', None)
        import_config(globals(), model_config)

        self.state_dim = model_info['state_dim']
        self.action_dim = model_info['action_dim']
        self.action_type = model_config.get('action_type')
        self.num_sgd_iter = model_config.get('NUM_SGD_ITER', NUM_SGD_ITER)
        super().__init__(model_info)
Exemple #6
0
    def __init__(self, model_info):
        model_config = model_info.get('model_config', None)
        import_config(globals(), model_config)

        self.state_dim = model_info['state_dim']
        self.action_dim = model_info['action_dim']
        self.reward_min = model_config.get('reward_min', -300)
        self.reward_max = model_config.get('reward_max', 300)
        self.reward_support_size = math.ceil(value_compression(self.reward_max - self.reward_min)) + 1
        self.value_min = model_config.get('value_min', 0)
        self.value_max = model_config.get('value_max', 60000)
        self.value_support_size = math.ceil(value_compression(self.value_max - self.value_min)) + 1
        self.obs_type = model_config.get('obs_type', 'float32')

        super(MuzeroModel, self).__init__(model_info)
Exemple #7
0
    def __init__(self, model_info, alg_config, **kwargs):
        import_config(globals(), alg_config)
        super().__init__(alg_name="impala",
                         model_info=model_info["actor"],
                         alg_config=alg_config)

        self.dummy_action, self.dummy_value = (
            np.zeros((1, self.action_dim)),
            np.zeros((1, 1)),
        )

        self.async_flag = False  # fixme: refactor async_flag
        self.episode_len = alg_config.get("episode_len", 128)

        self.dist_model_policy = FIFODistPolicy(
            alg_config["instance_num"],
            prepare_times=self._prepare_times_per_train)

        self._init_train_list()
Exemple #8
0
 def __init__(self, model_info, alg_config, **kwargs):
     """
     Algorithm instance, will create their model within the `__init__`.
     :param model_info:
     :param alg_config:
     :param kwargs:
     """
     import_config(globals(), alg_config)
     super().__init__(
         alg_name=kwargs.get("name") or "muzero",
         model_info=model_info["actor"],
         alg_config=alg_config,
     )
     # self.buff = ReplayBuffer(BUFFER_SIZE)
     self.buff = PrioritizedReplayBuffer(BUFFER_SIZE, alpha=1)
     self.discount = GAMMA
     self.unroll_step = UNROLL_STEP
     self.td_step = TD_STEP
     self.async_flag = False
Exemple #9
0
    def __init__(self, model_info, alg_config, **kwargs):
        """
        Initialize DQN algorithm.

        It contains four steps:
        1. override the default config, with user's configuration;
        2. create the default actor with Algorithm.__init__;
        3. create once more actor, named by target_actor;
        4. create the replay buffer for training.
        :param model_info:
        :param alg_config:
        """
        import_config(globals(), alg_config)
        model_info = model_info["actor"]
        super(DQN, self).__init__(alg_name="dqn",
                                  model_info=model_info,
                                  alg_config=alg_config)

        self.target_actor = model_builder(model_info)
        self.buff = ReplayBuffer(BUFFER_SIZE)
Exemple #10
0
    def __init__(self, model_info, alg_config, **kwargs):
        """
        Create Algorithm instance.

        Will create their model within the `__init__`.
        :param model_info:
        :param alg_config:
        :param kwargs:
        """
        import_config(globals(), alg_config)
        super().__init__(alg_name=kwargs.get('name') or 'ppo',
                         model_info=model_info['actor'],
                         alg_config=alg_config)

        self._init_train_list()
        self.async_flag = False  # fixme: refactor async_flag

        if model_info.get('finetune_weight'):
            self.actor.load_model(model_info['finetune_weight'], by_name=True)
            logging.info('load finetune weight: {}'.format(
                model_info['finetune_weight']))
Exemple #11
0
    def __init__(self, model_info, alg_config, **kwargs):
        import_config(globals(), alg_config)
        super().__init__(alg_name="impala",
                         model_info=model_info["actor"],
                         alg_config=alg_config)
        self.states = list()
        self.behavior_logits = list()
        self.actions = list()
        self.dones = list()
        self.rewards = list()
        self.async_flag = False

        # update to divide model policy
        self.dist_model_policy = EqualDistPolicy(
            alg_config["instance_num"],
            prepare_times=self._prepare_times_per_train)

        self.use_train_thread = False
        if self.use_train_thread:
            self.send_train = UniComm("LocalMsg")
            train_thread = threading.Thread(target=self._train_thread)
            train_thread.setDaemon(True)
            train_thread.start()
Exemple #12
0
    def __init__(self, model_info):
        model_config = model_info.get('model_config')
        import_config(globals(), model_config)

        # fixme: could read action_dim&obs_dim from env.info
        self.state_dim = model_info['state_dim']
        self.action_dim = model_info['action_dim']

        self.action_type = model_config.get('action_type')
        self._lr = model_config.get('LR', LR)
        self._batch_size = model_config.get('BATCH_SIZE', BATCH_SIZE)
        self.critic_loss_coef = model_config.get('CRITIC_LOSS_COEF',
                                                 CRITIC_LOSS_COEF)
        self.ent_coef = model_config.get('ENTROPY_LOSS', ENTROPY_LOSS)
        self.clip_ratio = model_config.get('LOSS_CLIPPING', LOSS_CLIPPING)
        self._max_grad_norm = model_config.get('MAX_GRAD_NORM', MAX_GRAD_NORM)
        self.num_sgd_iter = model_config.get('NUM_SGD_ITER', NUM_SGD_ITER)
        self.verbose = model_config.get('SUMMARY', SUMMARY)
        self.vf_clip = model_config.get('VF_CLIP', VF_CLIP)

        self.dist = make_dist(self.action_type, self.action_dim)

        super().__init__(model_info)
Exemple #13
0
    def __init__(self, model_info):
        model_config = model_info.get('model_config', None)
        import_config(globals(), model_config)

        super().__init__(model_info)
Exemple #14
0
 def __init__(self, env, alg, agent_config, **kwargs):
     import_config(globals(), agent_config)
     super().__init__(env, alg, agent_config, **kwargs)
     self.num_simulations = NUM_SIMULATIONS