Ejemplo n.º 1
0
def embed_all(inputs, count, size):
    out = []
    with tf.variable_scope("embed_all") as scope:
        for inp in inputs:
            t_emb, _ = net.embed(inp, count, size)
            t_pool = tf.reduce_mean(t_emb, axis=-2)
            out.append(t_pool)
            scope.reuse_variables()
    return out
Ejemplo n.º 2
0
    def build(self, task, reconst_ph, channel, model, config):
        with tf.variable_scope("belief_translator") as scope:
            t_xb_rs = tf.reshape(
                    reconst_ph.t_xb,
                    (config.trainer.n_batch_episodes, 1, task.n_features))
            t_xa_true_rs = tf.reshape(
                    reconst_ph.t_xa_true,
                    (config.trainer.n_batch_episodes, 1, task.n_features))

            t_xb_tile = tf.tile(
                    t_xb_rs, (1, config.trainer.n_distractors + 1, 1))

            t_xa = tf.concat(1, (t_xa_true_rs, reconst_ph.t_xa_noise))

            def build_scorer(t_code):
                t_code_rs = tf.reshape(
                        t_code,
                        (config.trainer.n_batch_episodes, 1, 
                            config.channel.n_msg))
                t_code_tile = tf.tile(
                        t_code_rs, (1, config.trainer.n_distractors + 1, 1))
                t_features = tf.concat(2, (t_xa, t_xb_tile, t_code_tile))
                t_score, v_net = net.mlp(t_features, (config.model.n_hidden, 1))
                t_score_rs = tf.reshape(
                        t_score,
                        (config.trainer.n_batch_episodes,
                            config.trainer.n_distractors + 1, 1))
                t_score_sq = tf.squeeze(t_score_rs)
                t_belief = tf.nn.softmax(t_score_sq)
                t_errs = tf.nn.sparse_softmax_cross_entropy_with_logits(
                        t_score_sq, tf.ones(
                            (config.trainer.n_batch_episodes,), tf.int32))
                t_loss = tf.reduce_mean(t_errs)
                return t_loss, t_belief, v_net

            with tf.variable_scope("model"):
                self.t_model_loss, self.t_model_belief, v_model_net = build_scorer(
                        reconst_ph.t_z)

            with tf.variable_scope("desc"):
                t_desc_embed, v_desc_embed = net.embed(
                        reconst_ph.t_desc, task.n_vocab, config.channel.n_msg)
                t_desc_pool = tf.reduce_mean(t_desc_embed, axis=1)
                self.t_desc_loss, self.t_desc_belief, v_desc_net = \
                        build_scorer(t_desc_pool)

            optimizer = tf.train.AdamOptimizer(config.model.step_size)

            varz = v_model_net + v_desc_embed + v_desc_net

            self.t_loss = self.t_desc_loss + self.t_model_loss
            self.t_train_op = optimizer.minimize(
                    self.t_loss, var_list=varz)
Ejemplo n.º 3
0
 def predictor(scope):
     with tf.variable_scope(scope):
         t_arg = tf.placeholder(tf.int32, shape=[None])
         t_embed, v_embed = net.embed(t_arg,
                                      self.world.cookbook.n_kinds,
                                      N_EMBED)
         t_feats = tf.placeholder(tf.float32,
                                  shape=[None, world.n_features + 1])
         t_comb = tf.concat(1, (t_embed, t_feats))
         t_scores, v_weights = net.mlp(t_comb,
                                       [N_HIDDEN, self.n_actions])
         #t_scores, v_weights = net.mlp(t_comb, [self.n_actions])
     return t_arg, t_feats, t_scores, v_embed + v_weights
Ejemplo n.º 4
0
 def predictor(scope):
     with tf.variable_scope(scope):
         t_plan = tf.placeholder(tf.int32, shape=[None, 2])
         t_embed_plan, v_emb = net.embed(t_plan,
                                         self.world.cookbook.n_kinds,
                                         N_EMBED,
                                         multi=True)
         t_features = tf.placeholder(tf.float32,
                                     shape=[None, world.n_features])
         t_comb = tf.concat(1, (t_embed_plan, t_features))
         t_scores, v_weights = net.mlp(t_comb,
                                       [N_HIDDEN, self.n_actions])
     return t_features, t_plan, t_scores, v_weights + v_emb
Ejemplo n.º 5
0
                                    drop_last=True,
                                    pin_memory=True)

dl_ev = torch.utils.data.DataLoader(dataset.Data.factory(
    type=type,
    root=args.root_folder,
    labels=list(range(args.nb_classes, 2 * args.nb_classes)),
    is_extracted=args.is_extracted,
    transform=dataset.utils.make_transform(is_train=False)),
                                    batch_size=args.sz_batch,
                                    shuffle=False,
                                    num_workers=args.nb_workers,
                                    pin_memory=True)

model = net.bn_inception(pretrained=True)
net.embed(model, sz_embedding=args.sz_embedding)
model = model.cuda()

criterion = proxynca.ProxyNCA(args.sz_embedding, args.nb_classes,
                              args.sz_batch).cuda()

opt = torch.optim.Adam(
    [
        {  # embedding parameters
            'params': model.embedding_layer.parameters(),
            'lr': args.lr_embedding
        },
        {  # proxy nca parameters
            'params': criterion.parameters(),
            'lr': args.lr_proxynca
        },
Ejemplo n.º 6
0
 def setup_model(args=args):
     model = net.bn_inception(pretrained=True)
     net.embed(model, sz_embedding=args.sz_embedding)
     model = model.cuda()
     return model
Ejemplo n.º 7
0
    def prepare(self, world, trainer):
        assert self.world is None  # Should be initialized as None
        self.world = world
        self.trainer = trainer

        self.n_tasks = len(trainer.task_index)
        self.n_modules = len(trainer.subtask_index)
        self.max_task_steps = max(len(t.steps) for t in trainer.task_index.contents.keys())
        if self.config.model.featurize_plan:
            self.n_features = world.n_features + self.n_modules * self.max_task_steps
        else:
            self.n_features = world.n_features

        self.n_actions = world.n_actions + 1  # + 1 for the stop action
        self.t_n_steps = tf.Variable(1., name="n_steps")
        self.t_inc_steps = self.t_n_steps.assign(self.t_n_steps + 1)
        self.optimizer = tf.train.RMSPropOptimizer(0.001)

        def build_actor(index, t_input, t_action_mask, extra_params=[]):
            with tf.variable_scope("actor_%s" % index):
                t_action_score, v_action = net.mlp(t_input, (N_HIDDEN, self.n_actions))

                # TODO this is pretty gross
                v_bias = v_action[-1]
                assert "b1" in v_bias.name
                t_decrement_op = v_bias[-1].assign(v_bias[-1] - 3)

                t_action_logprobs = tf.nn.log_softmax(t_action_score)
                t_chosen_prob = tf.reduce_sum(
                    t_action_mask * t_action_logprobs,
                    reduction_indices=(1,))

            return ActorModule(
                    t_action_logprobs, t_chosen_prob,
                    v_action + extra_params, t_decrement_op)

        def build_critic(index, t_input, t_reward, extra_params=[]):
            with tf.variable_scope("critic_%s" % index):
                if self.config.model.baseline in ("task", "common"):
                    t_value = tf.get_variable(
                        "b", shape=(), initializer=tf.constant_initializer(0.0))
                    v_value = [t_value]
                elif self.config.model.baseline == "state":
                    t_value, v_value = net.mlp(t_input, (1,))
                    t_value = tf.squeeze(t_value)
                else:
                    raise NotImplementedError(
                        "Baseline %s is not implemented" % self.config.model.baseline)

            return CriticModule(t_value, v_value + extra_params)

        def build_actor_trainer(actor, critic, t_reward):
            t_advantage = t_reward - critic.t_value
            # NOTE second term for entropy regularizer
            actor_loss = \
                -tf.reduce_sum(actor.t_chosen_prob * t_advantage) + \
                0.001 * tf.reduce_sum(tf.exp(actor.t_probs) * actor.t_probs)
            actor_grad = tf.gradients(actor_loss, actor.params)
            actor_trainer = Trainer(
                actor_loss, actor_grad,
                self.optimizer.minimize(actor_loss, var_list=actor.params))

            return actor_trainer

        def build_critic_trainer(t_reward, critic):
            t_advantage = t_reward - critic.t_value
            critic_loss = tf.reduce_sum(tf.square(t_advantage))
            critic_grad = tf.gradients(critic_loss, critic.params)
            critic_trainer = Trainer(
                critic_loss, critic_grad,
                self.optimizer.minimize(critic_loss, var_list=critic.params))

            return critic_trainer

        # placeholders
        t_arg = tf.placeholder(tf.int32, shape=(None,))
        t_step = tf.placeholder(tf.float32, shape=(None, 1))
        t_feats = tf.placeholder(tf.float32, shape=(None, self.n_features))
        t_action_mask = tf.placeholder(tf.float32, shape=(None, self.n_actions))
        t_reward = tf.placeholder(tf.float32, shape=(None,))

        if self.config.model.use_args:
            t_embed, v_embed = net.embed(
                t_arg, len(trainer.cookbook.index),
                N_EMBED)
            xp = v_embed
            t_input = tf.concat(1, (t_embed, t_feats))
        else:
            t_input = t_feats
            xp = []

        actors = {}
        actor_trainers = {}
        critics = {}
        critic_trainers = {}

        # Create actor, critic for each module
        if self.config.model.featurize_plan:
            actor = build_actor(0, t_input, t_action_mask, extra_params=xp)
            for i_module in range(self.n_modules):
                actors[i_module] = actor
        else:
            for i_module in range(self.n_modules):
                actor = build_actor(i_module, t_input, t_action_mask, extra_params=xp)
                actors[i_module] = actor

        if self.config.model.baseline == "common":
            common_critic = build_critic(0, t_input, t_reward, extra_params=xp)
        for i_task in range(self.n_tasks):
            if self.config.model.baseline == "common":
                critic = common_critic
            else:
                critic = build_critic(i_task, t_input, t_reward, extra_params=xp)
            for i_module in range(self.n_modules):
                critics[i_task, i_module] = critic  # NOTE one critic per task

        for i_module in range(self.n_modules):
            for i_task in range(self.n_tasks):
                critic = critics[i_task, i_module]
                critic_trainer = build_critic_trainer(t_reward, critic)
                critic_trainers[i_task, i_module] = critic_trainer

                actor = actors[i_module]
                actor_trainer = build_actor_trainer(actor, critic, t_reward)
                actor_trainers[i_task, i_module] = actor_trainer

        self.t_gradient_placeholders = {}
        self.t_update_gradient_op = None

        params = []
        for module in actors.values() + critics.values():
            params += module.params
        self.saver = tf.train.Saver()

        self.session = tf.Session()
        self.session.run(tf.initialize_all_variables())
        self.session.run([actor.t_decrement_op for actor in actors.values()])

        self.actors = actors
        self.critics = critics
        self.actor_trainers = actor_trainers
        self.critic_trainers = critic_trainers
        self.inputs = InputBundle(t_arg, t_step, t_feats, t_action_mask, t_reward)
Ejemplo n.º 8
0
dl_ev = torch.utils.data.DataLoader(
    dataset.Birds(
        root = args.cub_root, 
        labels = list(range(args.nb_classes, 2 * args.nb_classes)),
        is_extracted = args.cub_is_extracted,
        transform = dataset.utils.make_transform(is_train = False)
    ),
    batch_size = args.sz_batch,
    shuffle = False,
    num_workers = args.nb_workers,
    pin_memory = True
)

model = net.bn_inception(pretrained = True)
net.embed(model, sz_embedding=args.sz_embedding)
model = model.cuda()

criterion = proxynca.ProxyNCA(args.sz_embedding, args.nb_classes, 
        args.sz_batch).cuda()

opt = torch.optim.Adam(
    [
        { # embedding parameters
            'params': model.embedding_layer.parameters(), 
            'lr' : args.lr_embedding
        },
        { # proxy nca parameters
            'params': criterion.parameters(), 
            'lr': args.lr_proxynca
        },