def __init__(self, name, agents_number, agent_index, actors, act_space_n, args, common_obs_shape, sep_obs_shape, model, lstm_model, cnn_model, cnn_scope=None, lstm_scope=None, reuse=False, local_q_func=False, session=None): self.actors = actors self.name = name self.n = agents_number self.agent_index = agent_index self.args = args self.history_length = args.history_length common_obs_shape = [args.history_length] + list(common_obs_shape) common_obs_ph = U.BatchInput(common_obs_shape, name="common_observation").get() sep_obs_shape = [args.history_length] + list(sep_obs_shape[1:]) sep_obs_ph_n = [ U.BatchInput(sep_obs_shape, name="common_observation" + str(i)).get() for i in range(self.n) ] # Create all the functions necessary to train the model self.q_train, self.q_update, self.q_debug = q_train( scope=self.name, make_common_obs_ph=common_obs_ph, make_sep_obs_ph_n=sep_obs_ph_n, act_space_n=act_space_n, cnn_model=cnn_model, cnn_scope=cnn_scope, q_index=agent_index, q_func=model, lstm_model=lstm_model, lstm_scope=lstm_scope, optimizer=tf.train.AdamOptimizer(learning_rate=args.lr), args=self.args, grad_norm_clipping=0.5, local_q_func=local_q_func, num_units=args.num_units, reuse=False, use_lstm=self.args.use_lstm, session=session) self.replay_buffer = ReplayBuffer(args.buffer_size, args.history_length) self.max_replay_buffer_len = args.batch_size * args.max_episode_len self.replay_sample_index = None
def __init__(self, name, agents_number, act_space_n, agent_index, args, common_obs_shape, sep_obs_shape, model, lstm_model, cnn_model, lstm_scope=None, cnn_scope=None, reuse=False, session=None, local_q_func=False): self.args = args self.name = name self.n = agents_number self.agent_index = agent_index self.local_q_func = local_q_func sep_obs_shape = [args.history_length] + list(sep_obs_shape[1:]) common_obs_shape = [args.history_length] + list(common_obs_shape) common_obs_ph = U.BatchInput(common_obs_shape, name="common_observation").get() sep_obs_ph_n = [ U.BatchInput(sep_obs_shape, name="common_observation" + str(i)).get() for i in range(self.n) ] self.act, self.p_debug = p_act( make_common_obs_ph=common_obs_ph, make_sep_obs_ph_n=sep_obs_ph_n, act_space_n=act_space_n, p_index=self.agent_index, p_func=model, lstm_model=lstm_model, cnn_model=cnn_model, lstm_scope=lstm_scope, cnn_scope=cnn_scope, use_lstm=self.args.use_lstm, use_cnn=self.args.use_cnn, reuse=reuse, session=session, scope=self.name, num_units=self.args.num_units, ) # Create experience buffer self.replay_buffer = ReplayBuffer(args.buffer_size, args.history_length) self.max_replay_buffer_len = args.batch_size * args.max_episode_len self.replay_sample_index = None
def __init__(self, name, model, lstm_model, obs_shape_n, act_space_n, agent_index, args, local_q_func=False): self.args = args self.name = name self.n = len(obs_shape_n) self.agent_index = agent_index obs_ph_n = [] for i in range(self.n): obs_ph_n.append( U.BatchInput(obs_shape_n[i], name="observation" + str(i)).get()) self.local_q_func = local_q_func self.act, self.p_debug = p_act(scope=self.name, make_obs_ph_n=obs_ph_n, act_space_n=act_space_n, p_index=self.agent_index, p_func=model, lstm_model=lstm_model, num_units=self.args.num_units, use_lstm=False, reuse=False) # Create experience buffer self.replay_buffer = ReplayBuffer(args.buffer_size) self.max_replay_buffer_len = args.batch_size * args.max_episode_len self.replay_sample_index = None
def __init__(self, env_name, name, model, obs_shape_n, act_space_n, agent_index, args, local_q_func=False): self.env_name = env_name self.name = name self.n = len(obs_shape_n) self.agent_index = agent_index self.args = args obs_ph_n = [] for i in range(self.n): obs_ph_n.append( U.BatchInput(obs_shape_n[i], name="observation" + str(i)).get()) # Create all the functions necessary to train the model self.q_train, self.q_update, self.q_debug = q_train( scope=self.env_name + self.name, make_obs_ph_n=obs_ph_n, act_space_n=act_space_n, q_index=agent_index, q_func=model, optimizer=tf.train.AdamOptimizer(learning_rate=args.lr), grad_norm_clipping=0.5, local_q_func=local_q_func, num_units=args.num_units) self.act, self.p_train, self.p_update, self.p_debug = p_train( scope=self.env_name + self.name, make_obs_ph_n=obs_ph_n, act_space_n=act_space_n, p_index=agent_index, p_scope="common_" + self.name, p_func=model, q_func=model, optimizer=tf.train.AdamOptimizer(learning_rate=args.lr), grad_norm_clipping=0.5, local_q_func=local_q_func, num_units=args.num_units) # Create experience buffer self.replay_buffer = ReplayBuffer(args.buffer_size) self.max_replay_buffer_len = args.batch_size * args.max_episode_len self.replay_sample_index = None
def __init__(self, name, model, lstm_model, obs_shape_n, act_space_n, agent_index, actors, args, local_q_func=False, session=None, lstm_scope=None): self.actors = actors self.name = name self.n = len(obs_shape_n) self.agent_index = agent_index self.args = args self.history_length = args.history_length obs_ph_n = [] for i in range(self.n): obs_shape = [args.history_length] + list(obs_shape_n[i]) obs_ph_n.append( U.BatchInput((obs_shape), name="observation" + str(i)).get()) # Create all the functions necessary to train the model self.q_train, self.q_update, self.q_debug = q_train( scope=self.name, make_obs_ph_n=obs_ph_n, act_space_n=act_space_n, q_index=agent_index, q_func=model, lstm_model=lstm_model, lstm_scope=lstm_scope, optimizer=tf.train.AdamOptimizer(learning_rate=args.lr), args=self.args, grad_norm_clipping=0.5, local_q_func=local_q_func, num_units=args.num_units, reuse=False, use_lstm=self.args.use_lstm, session=session) self.replay_buffer = ReplayBuffer(args.buffer_size, args.history_length) self.max_replay_buffer_len = args.batch_size * args.max_episode_len self.replay_sample_index = None
def __init__(self, name, model, lstm_model, obs_shape_n, act_space_n, agent_index, actor_env, args, local_q_func=False, session=None): self.args = args self.name = name self.n = len(obs_shape_n) self.agent_index = agent_index obs_ph_n = [] for i in range(self.n): obs_shape = [args.history_length] + list(obs_shape_n[i]) # obs_shape.append() obs_ph_n.append( U.BatchInput((obs_shape), name="observation" + str(i)).get()) optimizer = tf.train.AdamOptimizer(learning_rate=self.args.lr) self.p_train, self.p_update = p_train(scope=self.name, p_scope=actor_env, make_obs_ph_n=obs_ph_n, act_space_n=act_space_n, p_index=self.agent_index, p_func=model, q_func=model, lstm_model=lstm_model, optimizer=optimizer, grad_norm_clipping=0.5, local_q_func=local_q_func, num_units=self.args.num_units, reuse=True, use_lstm=self.args.use_lstm, session=session, args=args) # Create experience buffer self.replay_buffer = ReplayBuffer(args.buffer_size, args.history_length) self.max_replay_buffer_len = args.batch_size * args.max_episode_len self.replay_sample_index = None