Ejemplo n.º 1
0
 def embedding_lookup(t):
     if not reuse: log.warning(scope.name)
     _ = fc(t, int(embedding_dim/4), is_train,
            info=not reuse, name='fc1')
     _ = fc(_, embedding_dim, is_train,
            info=not reuse, name='fc2')
     return _
Ejemplo n.º 2
0
    def build(self, img, q, scope='Classifier'):
        # Normalize input images into 0 ~ 1
        img = img / 255.

        with tf.variable_scope(scope):
            conv_1 = conv2d(img, 24, self.is_training, activation_fn=tf.nn.relu, name='conv_1')
            conv_2 = conv2d(conv_1, 24, self.is_training, activation_fn=tf.nn.relu, name='conv_2')
            conv_3 = conv2d(conv_2, 24, self.is_training, activation_fn=tf.nn.relu, name='conv_3')

            # Append the question into image features
            conv_q = tf.concat([tf.reshape(conv_3, [tf.shape(conv_3)[0], -1]), q], axis=1)
            
            fc_1 = fc(conv_q, 256, activation_fn=tf.nn.relu, name='fc_1')
            fc_2 = fc(fc_1, 256, activation_fn=tf.nn.relu, name='fc_2')
            fc_2 = slim.dropout(fc_2, keep_prob=0.5, is_training=self.is_training, scope='fc_3/')
            logits = fc(fc_2, self.a_dim, activation_fn=None, name='fc_3')

        return logits
Ejemplo n.º 3
0
 def Token_Decoder(f, token_dim, scope='Token_Decoder', reuse=False):
     with tf.variable_scope(scope, reuse=reuse) as scope:
         if not reuse: log.warning(scope.name)
         _ = fc(f,
                token_dim,
                is_train,
                info=not reuse,
                batch_norm=False,
                activation_fn=None,
                name='fc1')
         return _
Ejemplo n.º 4
0
    def build(self, img, q, scope='Classifier'):
        # Normalize input images into 0 ~ 1
        img = img / 255.

        with tf.variable_scope(scope):

            ###################### MODIFY HERE ######################
            ## Compute film(q) for gamma, beta

            conv_1 = conv2d(img,
                            24,
                            self.is_training,
                            activation_fn=tf.nn.relu,
                            name='conv_1')
            ## Affine transform of conv_1
            conv_2 = conv2d(conv_1,
                            24,
                            self.is_training,
                            activation_fn=tf.nn.relu,
                            name='conv_2')
            ## Affine transform of conv_2
            conv_3 = conv2d(conv_2,
                            24,
                            self.is_training,
                            activation_fn=tf.nn.relu,
                            name='conv_3')
            ## Affine transform of conv_3

            #########################################################

            features = tf.reshape(conv_3, [tf.shape(conv_3)[0], -1])
            fc_1 = fc(features, 256, activation_fn=tf.nn.relu, name='fc_1')
            fc_2 = fc(fc_1, 256, activation_fn=tf.nn.relu, name='fc_2')
            fc_2 = slim.dropout(fc_2,
                                keep_prob=0.5,
                                is_training=self.is_training,
                                scope='fc_3/')
            logits = fc(fc_2, self.a_dim, activation_fn=None, name='fc_3')

        return logits
Ejemplo n.º 5
0
    def build(self, img, q, scope='Classifier'):
        # Normalize input images into 0 ~ 1
        img = img / 255.

        with tf.variable_scope(scope):

            ###################### MODIFY HERE ######################

            def film(q, scope='film'):
                with tf.variable_scope(scope):
                    cond = fc(q, 3 * 2 * 24, activation_fn=None, name='cond')
                    cond = tf.reshape(cond, [-1, 3, 2, 24])
                    return cond

            def modulate(conv, gamma, beta):
                gamma = tf.reshape(gamma, [-1, 1, 1, 24])
                beta = tf.reshape(beta, [-1, 1, 1, 24])
                return (1 + gamma) * conv + beta

            q_embed = fc(q, 256, name='fc_q')
            cond = film(q_embed)

            conv_1 = conv2d(img, 24, self.is_training, activation_fn=tf.nn.relu, name='conv_1')
            conv_1 = modulate(conv_1, cond[:, 0, 0, :], cond[:, 0, 1, :])
            conv_2 = conv2d(conv_1, 24, self.is_training, activation_fn=tf.nn.relu, name='conv_2')
            conv_2 = modulate(conv_2, cond[:, 1, 0, :], cond[:, 1, 1, :])
            conv_3 = conv2d(conv_2, 24, self.is_training, activation_fn=tf.nn.relu, name='conv_3')
            conv_3 = modulate(conv_3, cond[:, 2, 0, :], cond[:, 2, 1, :])

            #########################################################

            features = tf.reshape(conv_3, [tf.shape(conv_3)[0], -1])
            fc_1 = fc(features, 256, activation_fn=tf.nn.relu, name='fc_1')
            fc_2 = fc(fc_1, 256, activation_fn=tf.nn.relu, name='fc_2')
            fc_2 = slim.dropout(fc_2, keep_prob=0.5, is_training=self.is_training, scope='fc_3/')
            logits = fc(fc_2, self.a_dim, activation_fn=None, name='fc_3')

        return logits
Ejemplo n.º 6
0
 def State_Encoder(s, per, batch_size, scope='State_Encoder', reuse=False):
     with tf.variable_scope(scope, reuse=reuse) as scope:
         if not reuse: log.warning(scope.name)
         _ = conv2d(s, 16, is_train, k_h=3, k_w=3,
                    info=not reuse, batch_norm=True, name='conv1')
         _ = conv2d(_, 32, is_train, k_h=3, k_w=3,
                    info=not reuse, batch_norm=True, name='conv2')
         _ = conv2d(_, 48, is_train, k_h=3, k_w=3,
                    info=not reuse, batch_norm=True, name='conv3')
         if self.pixel_input:
             _ = conv2d(_, 48, is_train, k_h=3, k_w=3,
                        info=not reuse, batch_norm=True, name='conv4')
             _ = conv2d(_, 48, is_train, k_h=3, k_w=3,
                        info=not reuse, batch_norm=True, name='conv5')
         state_feature = tf.reshape(_, [batch_size, -1])
         if self.state_encoder_fc:
             state_feature = fc(state_feature, 512, is_train,
                                info=not reuse, name='fc1')
             state_feature = fc(state_feature, 512, is_train,
                                info=not reuse, name='fc2')
         state_feature = tf.concat([state_feature, per], axis=-1)
         if not reuse: log.info(
             'concat feature {}'.format(state_feature))
         return state_feature
Ejemplo n.º 7
0
    def build(self, img, q, scope='Classifier'):
        # Normalize input images into 0 ~ 1
        img = img / 255.

        def _positional_encoding(features):
            # Append two features of positional encoding to the given feature maps
            d = features.get_shape().as_list()[1]
            indices = tf.range(d)
            x = tf.tile(tf.reshape(indices, [d, 1]), [1, d])
            y = tf.tile(tf.reshape(indices, [1, d]), [d, 1])
            pos = tf.cast(tf.stack([x, y], axis=2)[None] / d, tf.float32)
            pos = tf.tile(pos, [tf.shape(img)[0], 1, 1, 1])
            return tf.concat([features, pos], axis=3)

        def f_phi(g, scope='f_phi'):
            with tf.variable_scope(scope):
                fc_1 = fc(g, 256, activation_fn=tf.nn.relu, name='fc_1')
                fc_1 = slim.dropout(fc_1, keep_prob=0.5, is_training=self.is_training, scope='fc_3/')
                logits = fc(fc_1, self.a_dim, activation_fn=None, name='fc_3')
                return logits

        with tf.variable_scope(scope):
            conv_1 = conv2d(img, 24, self.is_training, activation_fn=tf.nn.relu, name='conv_1')
            conv_2 = conv2d(conv_1, 24, self.is_training, activation_fn=tf.nn.relu, name='conv_2')
            conv_3 = conv2d(conv_2, 24, self.is_training, activation_fn=tf.nn.relu, name='conv_3') # (b,d,d,c)

            # Adding positional information (x,y) into features: size (b,d,d,c+2)
            conv_pos = _positional_encoding(conv_3) 

            ###################### MODIFY HERE ###################### 
            
            conv_q = tf.concat([tf.reshape(conv_pos, [tf.shape(conv_pos)[0], -1]), q], axis=1) 
            
            fc_1 = fc(conv_q, 256, activation_fn=tf.nn.relu, name='fc_1') 
            all_g = fc_1

            #########################################################

            logits = f_phi(all_g)

        return logits
Ejemplo n.º 8
0
 def f_phi(g, scope='f_phi'):
     with tf.variable_scope(scope):
         fc_1 = fc(g, 256, activation_fn=tf.nn.relu, name='fc_1')
         fc_1 = slim.dropout(fc_1, keep_prob=0.5, is_training=self.is_training, scope='fc_3/')
         logits = fc(fc_1, self.a_dim, activation_fn=None, name='fc_3')
         return logits
Ejemplo n.º 9
0
 def film(q, scope='film'):
     with tf.variable_scope(scope):
         cond = fc(q, 3 * 2 * 24, activation_fn=None, name='cond')
         cond = tf.reshape(cond, [-1, 3, 2, 24])
         return cond
Ejemplo n.º 10
0
 def g_theta(o_i, o_j, q, scope='g_theta', reuse=True):
     with tf.variable_scope(scope, reuse=reuse):
         g_1 = fc(tf.concat([o_i, o_j, q], axis=1), 256, name='g_1')
         g_2 = fc(g_1, 256, name='g_2')
         return g_2