def __init__(self, name, model, lstm_model, obs_shape_n, act_space_n, agent_index, shared_actor, args, local_q_func=False): # 最终的目标actor self.shared_actor = shared_actor self.name = name self.n = len(obs_shape_n) self.agent_index = agent_index self.args = args obs_ph_n = [] for i in range(self.n): obs_shape = list(obs_shape_n[i]) obs_shape.append(args.history_length) obs_ph_n.append(U.BatchInput((obs_shape), name="observation"+str(i)).get()) # Create all the functions necessary to train the model self.q_train, self.q_update, self.q_debug = q_train( scope=self.name, make_obs_ph_n=obs_ph_n, act_space_n=act_space_n, q_index=agent_index, q_func=model, lstm_model=lstm_model, optimizer=tf.train.AdamOptimizer(learning_rate=args.lr), args=self.args, grad_norm_clipping=0.5, local_q_func=local_q_func, num_units=args.num_units, reuse=False ) self.replay_buffer = ReplayBuffer(args, obs_shape_n[0], act_space_n[0].n) self.max_replay_buffer_len = args.batch_size * args.max_episode_len self.replay_sample_index = None self.p = None
def __init__(self, name, model, lstm_model, obs_shape_n, act_space_n, agent_index, actor_env, args, local_q_func=False): self.args = args self.name = name self.n = len(obs_shape_n) self.agent_index = agent_index obs_ph_n = [] for i in range(self.n): obs_shape = list(obs_shape_n[i]) obs_shape.append(args.history_length) obs_ph_n.append(U.BatchInput((obs_shape), name="observation" + str(i)).get()) optimizer = tf.train.AdamOptimizer(learning_rate=self.args.lr) self.p_train, self.p_update = p_train( scope=self.name, p_scope=actor_env, make_obs_ph_n=obs_ph_n, act_space_n=act_space_n, p_index=self.agent_index, p_func=model, q_func=model, lstm_model=lstm_model, optimizer=optimizer, grad_norm_clipping=0.5, local_q_func=local_q_func, num_units=self.args.num_units, reuse=True, use_lstm=False ) # Create experience buffer self.replay_buffer = ReplayBuffer(args, obs_shape_n[0], act_space_n[0].n) self.max_replay_buffer_len = args.batch_size * args.max_episode_len self.replay_sample_index = None
def __init__(self, name, model, lstm_model, obs_shape_n, act_space_n, agent_index, args, local_q_func=False): self.critic_scope = None self.name = name self.n = len(obs_shape_n) self.agent_index = agent_index self.args = args obs_ph_n = [] for i in range(self.n): obs_shape = list(obs_shape_n[i]) obs_shape.append(args.history_length) obs_ph_n.append( U.BatchInput((obs_shape), name="observation" + str(i)).get()) # Create all the functions necessary to train the model self.obs_ph_n = obs_ph_n self.act_space_n = act_space_n self.model = model self.lstm_model = lstm_model self.local_q_func = local_q_func self.act, self.p_update, self.p_debug = p_act( scope=self.name, make_obs_ph_n=self.obs_ph_n, act_space_n=self.act_space_n, p_index=self.agent_index, p_func=self.model, lstm_model=self.lstm_model, args=self.args, reuse=False, num_units=self.args.num_units) self.optimizer = tf.train.AdamOptimizer(learning_rate=self.args.lr) self.p_train = None # Create experience buffer self.replay_buffer = ReplayBuffer(args, obs_shape_n[0], act_space_n[0].n) self.max_replay_buffer_len = args.batch_size * args.max_episode_len self.replay_sample_index = None
def __init__(self, name, model, lstm_model, obs_shape_n, act_space_n, agent_index, args, local_q_func=False): self.args = args self.name = name self.n = len(obs_shape_n) self.agent_index = agent_index obs_ph_n = [] for i in range(self.n): obs_shape = list(obs_shape_n[i]) obs_shape.append(args.history_length) obs_ph_n.append( U.BatchInput((obs_shape), name="observation" + str(i)).get()) self.local_q_func = local_q_func self.act, self.p_debug = p_act(scope=self.name, make_obs_ph_n=obs_ph_n, act_space_n=act_space_n, p_index=self.agent_index, p_func=model, lstm_model=lstm_model, num_units=self.args.num_units, use_lstm=False, reuse=False) # Create experience buffer self.replay_buffer = ReplayBuffer(args, obs_shape_n[0], act_space_n[0].n) self.max_replay_buffer_len = args.batch_size * args.max_episode_len self.replay_sample_index = None