def embed_all(inputs, count, size): out = [] with tf.variable_scope("embed_all") as scope: for inp in inputs: t_emb, _ = net.embed(inp, count, size) t_pool = tf.reduce_mean(t_emb, axis=-2) out.append(t_pool) scope.reuse_variables() return out
def build(self, task, reconst_ph, channel, model, config): with tf.variable_scope("belief_translator") as scope: t_xb_rs = tf.reshape( reconst_ph.t_xb, (config.trainer.n_batch_episodes, 1, task.n_features)) t_xa_true_rs = tf.reshape( reconst_ph.t_xa_true, (config.trainer.n_batch_episodes, 1, task.n_features)) t_xb_tile = tf.tile( t_xb_rs, (1, config.trainer.n_distractors + 1, 1)) t_xa = tf.concat(1, (t_xa_true_rs, reconst_ph.t_xa_noise)) def build_scorer(t_code): t_code_rs = tf.reshape( t_code, (config.trainer.n_batch_episodes, 1, config.channel.n_msg)) t_code_tile = tf.tile( t_code_rs, (1, config.trainer.n_distractors + 1, 1)) t_features = tf.concat(2, (t_xa, t_xb_tile, t_code_tile)) t_score, v_net = net.mlp(t_features, (config.model.n_hidden, 1)) t_score_rs = tf.reshape( t_score, (config.trainer.n_batch_episodes, config.trainer.n_distractors + 1, 1)) t_score_sq = tf.squeeze(t_score_rs) t_belief = tf.nn.softmax(t_score_sq) t_errs = tf.nn.sparse_softmax_cross_entropy_with_logits( t_score_sq, tf.ones( (config.trainer.n_batch_episodes,), tf.int32)) t_loss = tf.reduce_mean(t_errs) return t_loss, t_belief, v_net with tf.variable_scope("model"): self.t_model_loss, self.t_model_belief, v_model_net = build_scorer( reconst_ph.t_z) with tf.variable_scope("desc"): t_desc_embed, v_desc_embed = net.embed( reconst_ph.t_desc, task.n_vocab, config.channel.n_msg) t_desc_pool = tf.reduce_mean(t_desc_embed, axis=1) self.t_desc_loss, self.t_desc_belief, v_desc_net = \ build_scorer(t_desc_pool) optimizer = tf.train.AdamOptimizer(config.model.step_size) varz = v_model_net + v_desc_embed + v_desc_net self.t_loss = self.t_desc_loss + self.t_model_loss self.t_train_op = optimizer.minimize( self.t_loss, var_list=varz)
def predictor(scope): with tf.variable_scope(scope): t_arg = tf.placeholder(tf.int32, shape=[None]) t_embed, v_embed = net.embed(t_arg, self.world.cookbook.n_kinds, N_EMBED) t_feats = tf.placeholder(tf.float32, shape=[None, world.n_features + 1]) t_comb = tf.concat(1, (t_embed, t_feats)) t_scores, v_weights = net.mlp(t_comb, [N_HIDDEN, self.n_actions]) #t_scores, v_weights = net.mlp(t_comb, [self.n_actions]) return t_arg, t_feats, t_scores, v_embed + v_weights
def predictor(scope): with tf.variable_scope(scope): t_plan = tf.placeholder(tf.int32, shape=[None, 2]) t_embed_plan, v_emb = net.embed(t_plan, self.world.cookbook.n_kinds, N_EMBED, multi=True) t_features = tf.placeholder(tf.float32, shape=[None, world.n_features]) t_comb = tf.concat(1, (t_embed_plan, t_features)) t_scores, v_weights = net.mlp(t_comb, [N_HIDDEN, self.n_actions]) return t_features, t_plan, t_scores, v_weights + v_emb
drop_last=True, pin_memory=True) dl_ev = torch.utils.data.DataLoader(dataset.Data.factory( type=type, root=args.root_folder, labels=list(range(args.nb_classes, 2 * args.nb_classes)), is_extracted=args.is_extracted, transform=dataset.utils.make_transform(is_train=False)), batch_size=args.sz_batch, shuffle=False, num_workers=args.nb_workers, pin_memory=True) model = net.bn_inception(pretrained=True) net.embed(model, sz_embedding=args.sz_embedding) model = model.cuda() criterion = proxynca.ProxyNCA(args.sz_embedding, args.nb_classes, args.sz_batch).cuda() opt = torch.optim.Adam( [ { # embedding parameters 'params': model.embedding_layer.parameters(), 'lr': args.lr_embedding }, { # proxy nca parameters 'params': criterion.parameters(), 'lr': args.lr_proxynca },
def setup_model(args=args): model = net.bn_inception(pretrained=True) net.embed(model, sz_embedding=args.sz_embedding) model = model.cuda() return model
def prepare(self, world, trainer): assert self.world is None # Should be initialized as None self.world = world self.trainer = trainer self.n_tasks = len(trainer.task_index) self.n_modules = len(trainer.subtask_index) self.max_task_steps = max(len(t.steps) for t in trainer.task_index.contents.keys()) if self.config.model.featurize_plan: self.n_features = world.n_features + self.n_modules * self.max_task_steps else: self.n_features = world.n_features self.n_actions = world.n_actions + 1 # + 1 for the stop action self.t_n_steps = tf.Variable(1., name="n_steps") self.t_inc_steps = self.t_n_steps.assign(self.t_n_steps + 1) self.optimizer = tf.train.RMSPropOptimizer(0.001) def build_actor(index, t_input, t_action_mask, extra_params=[]): with tf.variable_scope("actor_%s" % index): t_action_score, v_action = net.mlp(t_input, (N_HIDDEN, self.n_actions)) # TODO this is pretty gross v_bias = v_action[-1] assert "b1" in v_bias.name t_decrement_op = v_bias[-1].assign(v_bias[-1] - 3) t_action_logprobs = tf.nn.log_softmax(t_action_score) t_chosen_prob = tf.reduce_sum( t_action_mask * t_action_logprobs, reduction_indices=(1,)) return ActorModule( t_action_logprobs, t_chosen_prob, v_action + extra_params, t_decrement_op) def build_critic(index, t_input, t_reward, extra_params=[]): with tf.variable_scope("critic_%s" % index): if self.config.model.baseline in ("task", "common"): t_value = tf.get_variable( "b", shape=(), initializer=tf.constant_initializer(0.0)) v_value = [t_value] elif self.config.model.baseline == "state": t_value, v_value = net.mlp(t_input, (1,)) t_value = tf.squeeze(t_value) else: raise NotImplementedError( "Baseline %s is not implemented" % self.config.model.baseline) return CriticModule(t_value, v_value + extra_params) def build_actor_trainer(actor, critic, t_reward): t_advantage = t_reward - critic.t_value # NOTE second term for entropy regularizer actor_loss = \ -tf.reduce_sum(actor.t_chosen_prob * t_advantage) + \ 0.001 * tf.reduce_sum(tf.exp(actor.t_probs) * actor.t_probs) actor_grad = tf.gradients(actor_loss, actor.params) actor_trainer = Trainer( actor_loss, actor_grad, self.optimizer.minimize(actor_loss, var_list=actor.params)) return actor_trainer def build_critic_trainer(t_reward, critic): t_advantage = t_reward - critic.t_value critic_loss = tf.reduce_sum(tf.square(t_advantage)) critic_grad = tf.gradients(critic_loss, critic.params) critic_trainer = Trainer( critic_loss, critic_grad, self.optimizer.minimize(critic_loss, var_list=critic.params)) return critic_trainer # placeholders t_arg = tf.placeholder(tf.int32, shape=(None,)) t_step = tf.placeholder(tf.float32, shape=(None, 1)) t_feats = tf.placeholder(tf.float32, shape=(None, self.n_features)) t_action_mask = tf.placeholder(tf.float32, shape=(None, self.n_actions)) t_reward = tf.placeholder(tf.float32, shape=(None,)) if self.config.model.use_args: t_embed, v_embed = net.embed( t_arg, len(trainer.cookbook.index), N_EMBED) xp = v_embed t_input = tf.concat(1, (t_embed, t_feats)) else: t_input = t_feats xp = [] actors = {} actor_trainers = {} critics = {} critic_trainers = {} # Create actor, critic for each module if self.config.model.featurize_plan: actor = build_actor(0, t_input, t_action_mask, extra_params=xp) for i_module in range(self.n_modules): actors[i_module] = actor else: for i_module in range(self.n_modules): actor = build_actor(i_module, t_input, t_action_mask, extra_params=xp) actors[i_module] = actor if self.config.model.baseline == "common": common_critic = build_critic(0, t_input, t_reward, extra_params=xp) for i_task in range(self.n_tasks): if self.config.model.baseline == "common": critic = common_critic else: critic = build_critic(i_task, t_input, t_reward, extra_params=xp) for i_module in range(self.n_modules): critics[i_task, i_module] = critic # NOTE one critic per task for i_module in range(self.n_modules): for i_task in range(self.n_tasks): critic = critics[i_task, i_module] critic_trainer = build_critic_trainer(t_reward, critic) critic_trainers[i_task, i_module] = critic_trainer actor = actors[i_module] actor_trainer = build_actor_trainer(actor, critic, t_reward) actor_trainers[i_task, i_module] = actor_trainer self.t_gradient_placeholders = {} self.t_update_gradient_op = None params = [] for module in actors.values() + critics.values(): params += module.params self.saver = tf.train.Saver() self.session = tf.Session() self.session.run(tf.initialize_all_variables()) self.session.run([actor.t_decrement_op for actor in actors.values()]) self.actors = actors self.critics = critics self.actor_trainers = actor_trainers self.critic_trainers = critic_trainers self.inputs = InputBundle(t_arg, t_step, t_feats, t_action_mask, t_reward)
dl_ev = torch.utils.data.DataLoader( dataset.Birds( root = args.cub_root, labels = list(range(args.nb_classes, 2 * args.nb_classes)), is_extracted = args.cub_is_extracted, transform = dataset.utils.make_transform(is_train = False) ), batch_size = args.sz_batch, shuffle = False, num_workers = args.nb_workers, pin_memory = True ) model = net.bn_inception(pretrained = True) net.embed(model, sz_embedding=args.sz_embedding) model = model.cuda() criterion = proxynca.ProxyNCA(args.sz_embedding, args.nb_classes, args.sz_batch).cuda() opt = torch.optim.Adam( [ { # embedding parameters 'params': model.embedding_layer.parameters(), 'lr' : args.lr_embedding }, { # proxy nca parameters 'params': criterion.parameters(), 'lr': args.lr_proxynca },