def __init__(self, name, before_com_model, channel, after_com_model, critic_mlp_model, obs_shape_n, act_space_n, args, local_q_func=False): self.name = name self.n = len(obs_shape_n) self.args = args self.alpha = 0.5 obs_ph_n = [] for i in range(self.n): obs_ph_n.append( U.BatchInput(obs_shape_n[i], name="observation_" + str(i)).get()) # Create all the functions necessary to train the model self.q_train, self.q_update, self.q_debug = q_train( scope=self.name, make_obs_ph_n=obs_ph_n, act_space_n=act_space_n, q_func=critic_mlp_model, optimizer=tf.train.AdamOptimizer(learning_rate=args.lr), grad_norm_clipping=0.5, local_q_func=local_q_func, num_units=args.num_units, ) self.act, self.p_train, self.p_update, self.p_debug = self.p_train_function( scope=self.name, make_obs_ph_n=obs_ph_n, act_space_n=act_space_n, before_com_func=before_com_model, channel=channel, after_com_func=after_com_model, q_func=critic_mlp_model, optimizer=tf.train.AdamOptimizer(learning_rate=args.lr), grad_norm_clipping=0.5, local_q_func=local_q_func, num_units=args.num_units, beta=args.beta, ibmac_com=args.ibmac_com, ) # Create experience buffer self.replay_buffer = ReplayBuffer(1e6) # self.max_replay_buffer_len = 50 * args.max_episode_len self.max_replay_buffer_len = args.batch_size * args.max_episode_len self.replay_sample_index = None self.message_1_for_record = []
def __init__(self, name, model, obs_shape_n, act_space_n, agent_index, args, local_q_func=False): self.name = name self.n = len(obs_shape_n) self.agent_index = agent_index self.args = args obs_ph_n = [] for i in range(self.n): obs_ph_n.append( U.BatchInput(obs_shape_n[i], name="observation" + str(i)).get()) # Create all the functions necessary to train the model self.q_train, self.q_update, self.q_debug = q_train( scope=self.name, make_obs_ph_n=obs_ph_n, act_space_n=act_space_n, q_index=agent_index, q_func=model, optimizer=tf.train.AdamOptimizer(learning_rate=args.lr), grad_norm_clipping=0.5, local_q_func=local_q_func, num_units=args.num_units) self.act, self.p_train, self.p_update, self.p_debug = p_train( scope=self.name, make_obs_ph_n=obs_ph_n, act_space_n=act_space_n, p_index=agent_index, p_func=model, q_func=model, optimizer=tf.train.AdamOptimizer(learning_rate=args.lr), grad_norm_clipping=0.5, local_q_func=local_q_func, num_units=args.num_units) # Create experience buffer self.replay_buffer = ReplayBuffer(1e6) self.max_replay_buffer_len = args.batch_size * args.max_episode_len self.replay_sample_index = None