def __call__(self, inputs, state, scope=None): assert len(inputs) == self.n_agents assert len(state) == 3 assert all(len(s) == self.n_agents for s in state) base_cell = tf.nn.rnn_cell.GRUCell(self.n_hidden) states, _, _ = state with tf.variable_scope(scope or "desc_cell"): next_states = [] next_outs = [] next_l_msgs = [] for i_agent in range(self.n_agents): with tf.variable_scope(self.agent_scopes[i_agent], reuse=(i_agent > 0 and self.reuse_agent_scope)): _, next_state = base_cell(inputs[i_agent], states[i_agent]) with tf.variable_scope("out"): next_out, _ = net.mlp(next_state, (self.n_out[i_agent], )) with tf.variable_scope("gen"): next_l_msg, _ = net.mlp(next_state, (self.n_lex, )) next_states.append(next_state) next_outs.append(next_out) next_l_msgs.append(next_l_msg) next_state = (tuple(next_states), tuple(next_outs), tuple(next_l_msgs)) return next_state, next_state
def __call__(self, inputs, state, scope=None): assert len(inputs) == self.n_agents assert len(state) == 3 assert all(len(s) == self.n_agents for s in state) states, comms, _ = state with tf.variable_scope(scope or "comm_cell"): features = [] if self.communicate: base_cell = tf.nn.rnn_cell.GRUCell(self.n_hidden) for i_agent in range(self.n_agents): other_comms = tuple(comms[_ia] for _ia in range(self.n_agents) if _ia != i_agent) features.append( tf.concat(1, (inputs[i_agent], ) + other_comms)) else: # for fair comparison, let non-communicating agents use the # extra channel capacity for themselves #base_cell = tf.nn.rnn_cell.GRUCell( # self.n_hidden + self.n_comm * (self.n_agents - 1)) base_cell = tf.nn.rnn_cell.GRUCell(self.n_hidden) for i_agent in range(self.n_agents): features.append(inputs[i_agent]) next_states = [] next_comms = [] next_outs = [] for i_agent in range(self.n_agents): with tf.variable_scope(self.agent_scopes[i_agent], reuse=(i_agent > 0 and self.reuse_agent_scope)): hidden, _ = net.mlp(features[i_agent], (self.n_hidden, ) * self.feature_depth, final_nonlinearity=True) _, next_state = base_cell(hidden, states[i_agent]) with tf.variable_scope("comm"): next_comm, _ = net.mlp(next_state, (self.n_comm, )) with tf.variable_scope("out"): next_out, _ = net.mlp(next_state, (self.n_out[i_agent], )) next_states.append(next_state) next_comms.append(next_comm) next_outs.append(next_out) transmitted = [self.channel.transmit(c) for c in next_comms] next_state = (tuple(next_states), tuple(transmitted), tuple(next_outs)) return next_state, next_state
def embed_all(inputs, size): out = [] with tf.variable_scope("embed_all") as scope: for inp in inputs: #t_emb, _ = net.embed(inp, count, size) #t_pool = tf.reduce_mean(t_emb, axis=-2) #out.append(t_pool) #scope.reuse_variables() t_emb, _ = net.mlp(inp, (size, )) out.append(t_emb) scope.reuse_variables() return out
def predictor(scope): with tf.variable_scope(scope): t_arg = tf.placeholder(tf.int32, shape=[None]) t_embed, v_embed = net.embed(t_arg, self.world.cookbook.n_kinds, N_EMBED) t_feats = tf.placeholder(tf.float32, shape=[None, world.n_features + 1]) t_comb = tf.concat(1, (t_embed, t_feats)) t_scores, v_weights = net.mlp(t_comb, [N_HIDDEN, self.n_actions]) #t_scores, v_weights = net.mlp(t_comb, [self.n_actions]) return t_arg, t_feats, t_scores, v_embed + v_weights
def predictor(scope): with tf.variable_scope(scope): t_plan = tf.placeholder(tf.int32, shape=[None, 2]) t_embed_plan, v_emb = net.embed(t_plan, self.world.cookbook.n_kinds, N_EMBED, multi=True) t_features = tf.placeholder(tf.float32, shape=[None, world.n_features]) t_comb = tf.concat(1, (t_embed_plan, t_features)) t_scores, v_weights = net.mlp(t_comb, [N_HIDDEN, self.n_actions]) return t_features, t_plan, t_scores, v_weights + v_emb
def build_critic(index, t_input, t_reward, extra_params=[]): with tf.variable_scope("critic_%s" % index): if self.config.model.baseline in ("task", "common"): t_value = tf.get_variable( "b", shape=(), initializer=tf.constant_initializer(0.0)) v_value = [t_value] elif self.config.model.baseline == "state": t_value, v_value = net.mlp(t_input, (1,)) t_value = tf.squeeze(t_value) else: raise NotImplementedError( "Baseline %s is not implemented" % self.config.model.baseline) return CriticModule(t_value, v_value + extra_params)
def build_actor(index, t_input, t_action_mask, extra_params=[]): with tf.variable_scope("actor_%s" % index): t_action_score, v_action = net.mlp(t_input, (N_HIDDEN, self.n_actions)) # TODO this is pretty gross v_bias = v_action[-1] assert "b1" in v_bias.name t_decrement_op = v_bias[-1].assign(v_bias[-1] - 3) t_action_logprobs = tf.nn.log_softmax(t_action_score) t_chosen_prob = tf.reduce_sum( t_action_mask * t_action_logprobs, reduction_indices=(1,)) return ActorModule( t_action_logprobs, t_chosen_prob, v_action + extra_params, t_decrement_op)
def build_scorer(t_code): t_code_rs = tf.reshape( t_code, (config.trainer.n_batch_episodes, 1, config.channel.n_msg)) t_code_tile = tf.tile( t_code_rs, (1, config.trainer.n_distractors + 1, 1)) t_features = tf.concat(2, (t_xa, t_xb_tile, t_code_tile)) t_score, v_net = net.mlp(t_features, (config.model.n_hidden, 1)) t_score_rs = tf.reshape( t_score, (config.trainer.n_batch_episodes, config.trainer.n_distractors + 1, 1)) t_score_sq = tf.squeeze(t_score_rs) t_belief = tf.nn.softmax(t_score_sq) t_errs = tf.nn.sparse_softmax_cross_entropy_with_logits( t_score_sq, tf.ones( (config.trainer.n_batch_episodes,), tf.int32)) t_loss = tf.reduce_mean(t_errs) return t_loss, t_belief, v_net
def build(self, task, reconst_ph, channel, model, config): with tf.variable_scope("belief_translator") as scope: t_xa_true_rs = tf.reshape( reconst_ph.t_xa_true, (config.trainer.n_batch_episodes, 1, task.n_features)) t_xa = tf.concat(1, (t_xa_true_rs, reconst_ph.t_xa_noise)) #t_xa_drop = tf.dropout(t_xa, 0.9) t_xa_drop = t_xa #t_xa_true_drop = tf.nn.dropout(reconst_ph.t_xa_true, 0.9) t_xa_true_drop = reconst_ph.t_xa_true with tf.variable_scope("model") as model_scope: t_mean, self.v_model = net.mlp( t_xa_true_drop, (config.translator.n_hidden, config.channel.n_msg)) t_model_logprob = -tf.reduce_sum( tf.square(t_mean - reconst_ph.t_z), axis=1) self.t_model_loss = -tf.reduce_mean(t_model_logprob) model_scope.reuse_variables() t_all_mean, _ = net.mlp( t_xa_drop, (config.translator.n_hidden, config.channel.n_msg)) t_model_raw_belief = -tf.reduce_sum( tf.square(t_all_mean-tf.expand_dims(reconst_ph.t_z, 1)), axis=2) if config.translator.normalization == "global": assert False, "you probably don't want this" t_model_rs_belief = tf.nn.softmax(tf.reshape( t_model_raw_belief, (1, -1))) self.t_model_belief = tf.reshape( t_model_rs_belief, (config.trainer.n_batch_episodes, config.trainer.n_distractors + 1)) self.t_model_weights = tf.ones( (config.trainer.n_batch_episodes,)) elif config.translator.normalization == "local": self.t_model_belief = tf.nn.softmax(t_model_raw_belief) #self.t_model_weights = tf.nn.softmax(t_model_logprob) self.t_model_logweights = t_model_logprob else: assert False with tf.variable_scope("desc") as desc_scope: t_dist, self.v_desc = net.mlp( t_xa_true_drop, (config.translator.n_hidden, len(task.lexicon))) #(len(task.lexicon),)) self.t_dist = t_dist t_desc_logprob = -tf.nn.softmax_cross_entropy_with_logits( t_dist, reconst_ph.t_l_msg) self.t_desc_loss = -tf.reduce_mean(t_desc_logprob) desc_scope.reuse_variables() t_msg_tile = tf.tile( tf.reshape( reconst_ph.t_l_msg, (config.trainer.n_batch_episodes, 1, len(task.lexicon))), (1, config.trainer.n_distractors + 1, 1)) t_all_dist, _ = net.mlp( t_xa_drop, (config.translator.n_hidden, len(task.lexicon))) #(len(task.lexicon),)) t_all_scores = -tf.nn.softmax_cross_entropy_with_logits( t_all_dist, t_msg_tile) if config.translator.normalization == "global": t_desc_belief_raw = tf.nn.softmax(tf.reshape( t_all_scores, (1, -1))) t_desc_belief = tf.reshape( t_desc_belief_raw, (config.trainer.n_batch_episodes, config.trainer.n_distractors + 1)) t_desc_belief_norm = (t_desc_belief / tf.reduce_max(t_desc_belief)) self.t_desc_belief = t_desc_belief_norm self.t_desc_weights = tf.ones( (config.trainer.n_batch_episodes,)) elif config.translator.normalization == "local": self.t_desc_belief = tf.nn.softmax(t_all_scores) #self.t_desc_weights = tf.nn.softmax(t_desc_logprob) self.t_desc_logweights = t_desc_logprob else: assert False optimizer = tf.train.AdamOptimizer(config.translator.step_size) #varz = self.v_model + self.v_desc #self.t_loss = self.t_desc_loss + self.t_model_loss #self.t_train_op = optimizer.minimize( # self.t_loss, var_list=varz) self.t_train_model_op = optimizer.minimize( self.t_model_loss, var_list=self.v_model) self.t_train_desc_op = optimizer.minimize( self.t_desc_loss, var_list=self.v_desc)