def __init__(self, model_info): model_config = model_info.get('model_config', None) import_config(globals(), model_config) self.state_dim = model_info['state_dim'] self.action_dim = model_info['action_dim'] super().__init__(model_info)
def __init__(self, model_info): """Init Dqn model for information flow.""" model_config = model_info.get("model_config", None) import_config(globals(), model_config) self.state_dim = model_info["state_dim"] self.action_dim = model_info["action_dim"] self.tau = 0.01 self.epsilon = 1.0 # exploration rate self.epsilon_min = 0.01 self.epsilon_decay = 0.995 self.learning_rate = 0.001 self.vocab_size = model_info["vocab_size"] self.emb_dim = model_info["emb_dim"] self.user_dim = model_info["user_dim"] self.item_dim = model_info["item_dim"] self.input_type = model_info["input_type"] # logging.info("set input type: {}".format(self.input_type)) self.embeddings = model_info["embeddings"] self.last_act = model_info["last_activate"] embedding_weights = np.loadtxt(self.embeddings, delimiter=",", dtype=float) self.embedding_initializer = tf.constant_initializer(embedding_weights) self.n_history_click = 5 self.n_history_no_click = 5 super().__init__(model_info)
def __init__(self, model_info): model_config = model_info.get("model_config", dict()) import_config(globals(), model_config) self.dtype = DTYPE_MAP.get(model_config.get("dtype", "float32")) self.state_dim = model_info["state_dim"] self.action_dim = model_info["action_dim"] self.filter_arch = get_atari_filter(self.state_dim) # lr schedule with linear_cosine_decay self.lr_schedule = model_info.get("lr_schedule", None) self.opt_type = model_info.get("opt_type", "adam") self.lr = None self.ph_state = None self.ph_adv = None self.out_actions = None self.pi_logic_outs, self.baseline = None, None # placeholder for behavior policy logic outputs self.ph_bp_logic_outs = None self.ph_actions = None self.ph_dones = None self.ph_rewards = None self.loss, self.optimizer, self.train_op = None, None, None self.grad_norm_clip = 40.0 self.sample_batch_steps = 50 self.saver = None self.explore_paras = None self.actor_var = None # store weights for agent super().__init__(model_info)
def __init__(self, model_info): model_config = model_info.get('model_config', None) import_config(globals(), model_config) self.state_dim = model_info['state_dim'] self.action_dim = model_info['action_dim'] self.learning_rate = LR self.dueling = model_config.get('dueling', False) super().__init__(model_info)
def __init__(self, model_info): model_config = model_info.get('model_config', None) import_config(globals(), model_config) self.state_dim = model_info['state_dim'] self.action_dim = model_info['action_dim'] self.action_type = model_config.get('action_type') self.num_sgd_iter = model_config.get('NUM_SGD_ITER', NUM_SGD_ITER) super().__init__(model_info)
def __init__(self, model_info): model_config = model_info.get('model_config', None) import_config(globals(), model_config) self.state_dim = model_info['state_dim'] self.action_dim = model_info['action_dim'] self.reward_min = model_config.get('reward_min', -300) self.reward_max = model_config.get('reward_max', 300) self.reward_support_size = math.ceil(value_compression(self.reward_max - self.reward_min)) + 1 self.value_min = model_config.get('value_min', 0) self.value_max = model_config.get('value_max', 60000) self.value_support_size = math.ceil(value_compression(self.value_max - self.value_min)) + 1 self.obs_type = model_config.get('obs_type', 'float32') super(MuzeroModel, self).__init__(model_info)
def __init__(self, model_info, alg_config, **kwargs): import_config(globals(), alg_config) super().__init__(alg_name="impala", model_info=model_info["actor"], alg_config=alg_config) self.dummy_action, self.dummy_value = ( np.zeros((1, self.action_dim)), np.zeros((1, 1)), ) self.async_flag = False # fixme: refactor async_flag self.episode_len = alg_config.get("episode_len", 128) self.dist_model_policy = FIFODistPolicy( alg_config["instance_num"], prepare_times=self._prepare_times_per_train) self._init_train_list()
def __init__(self, model_info, alg_config, **kwargs): """ Algorithm instance, will create their model within the `__init__`. :param model_info: :param alg_config: :param kwargs: """ import_config(globals(), alg_config) super().__init__( alg_name=kwargs.get("name") or "muzero", model_info=model_info["actor"], alg_config=alg_config, ) # self.buff = ReplayBuffer(BUFFER_SIZE) self.buff = PrioritizedReplayBuffer(BUFFER_SIZE, alpha=1) self.discount = GAMMA self.unroll_step = UNROLL_STEP self.td_step = TD_STEP self.async_flag = False
def __init__(self, model_info, alg_config, **kwargs): """ Initialize DQN algorithm. It contains four steps: 1. override the default config, with user's configuration; 2. create the default actor with Algorithm.__init__; 3. create once more actor, named by target_actor; 4. create the replay buffer for training. :param model_info: :param alg_config: """ import_config(globals(), alg_config) model_info = model_info["actor"] super(DQN, self).__init__(alg_name="dqn", model_info=model_info, alg_config=alg_config) self.target_actor = model_builder(model_info) self.buff = ReplayBuffer(BUFFER_SIZE)
def __init__(self, model_info, alg_config, **kwargs): """ Create Algorithm instance. Will create their model within the `__init__`. :param model_info: :param alg_config: :param kwargs: """ import_config(globals(), alg_config) super().__init__(alg_name=kwargs.get('name') or 'ppo', model_info=model_info['actor'], alg_config=alg_config) self._init_train_list() self.async_flag = False # fixme: refactor async_flag if model_info.get('finetune_weight'): self.actor.load_model(model_info['finetune_weight'], by_name=True) logging.info('load finetune weight: {}'.format( model_info['finetune_weight']))
def __init__(self, model_info, alg_config, **kwargs): import_config(globals(), alg_config) super().__init__(alg_name="impala", model_info=model_info["actor"], alg_config=alg_config) self.states = list() self.behavior_logits = list() self.actions = list() self.dones = list() self.rewards = list() self.async_flag = False # update to divide model policy self.dist_model_policy = EqualDistPolicy( alg_config["instance_num"], prepare_times=self._prepare_times_per_train) self.use_train_thread = False if self.use_train_thread: self.send_train = UniComm("LocalMsg") train_thread = threading.Thread(target=self._train_thread) train_thread.setDaemon(True) train_thread.start()
def __init__(self, model_info): model_config = model_info.get('model_config') import_config(globals(), model_config) # fixme: could read action_dim&obs_dim from env.info self.state_dim = model_info['state_dim'] self.action_dim = model_info['action_dim'] self.action_type = model_config.get('action_type') self._lr = model_config.get('LR', LR) self._batch_size = model_config.get('BATCH_SIZE', BATCH_SIZE) self.critic_loss_coef = model_config.get('CRITIC_LOSS_COEF', CRITIC_LOSS_COEF) self.ent_coef = model_config.get('ENTROPY_LOSS', ENTROPY_LOSS) self.clip_ratio = model_config.get('LOSS_CLIPPING', LOSS_CLIPPING) self._max_grad_norm = model_config.get('MAX_GRAD_NORM', MAX_GRAD_NORM) self.num_sgd_iter = model_config.get('NUM_SGD_ITER', NUM_SGD_ITER) self.verbose = model_config.get('SUMMARY', SUMMARY) self.vf_clip = model_config.get('VF_CLIP', VF_CLIP) self.dist = make_dist(self.action_type, self.action_dim) super().__init__(model_info)
def __init__(self, model_info): model_config = model_info.get('model_config', None) import_config(globals(), model_config) super().__init__(model_info)
def __init__(self, env, alg, agent_config, **kwargs): import_config(globals(), agent_config) super().__init__(env, alg, agent_config, **kwargs) self.num_simulations = NUM_SIMULATIONS