Esempio n. 1
0
    def __call__(self, input):
        """
    Args:
      input: batch_size x width x height x num_feat_maps
    Returns:
      output: one unit with domain tag (0/1)
    """
        with tf.variable_scope(self.name):
            #g = tf.get_default_graph()
            #with g.gradient_override_map({"Identity": "ReverseGrad"}):
            idinput = tf.identity(input)
            fc_output = ops.fully_connected(idinput,
                                            reuse=self.reuse,
                                            name='fc1',
                                            units=100)
            fc_output = ops.fully_connected(idinput,
                                            reuse=self.reuse,
                                            name='fc2',
                                            units=100)

            output = ops.logits(fc_output,
                                reuse=self.reuse,
                                name='logits',
                                units=10)

        # set reuse=True for next call
        self.reuse = True
        self.variables = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES,
                                           scope=self.name)

        return output
Esempio n. 2
0
def fpn_classifier(ipt, pool_size, num_classes, is_training=True,
                   fc_layers_size=1024):

    ipt = tf.map_fn(lambda x: ops.conv2d(x, fc_layers_size, pool_size, 0, 1, norm='batch', activation=tf.nn.relu,
                                         is_training=is_training, name='mrcnn_class_conv1',
                                         use_bias=True, kernel_initializer='glorot_uniform_tanh'),
                    elems=ipt, dtype=tf.float32)

    ipt = tf.map_fn(lambda x: ops.conv2d(x, fc_layers_size, 1, 0, 1, norm='batch', activation=tf.nn.relu,
                                         is_training=is_training, name='mrcnn_class_conv2',
                                         use_bias=True, kernel_initializer='glorot_uniform_tanh'),
                    elems=ipt, dtype=tf.float32)

    shared = tf.squeeze(tf.squeeze(ipt, 3), 2)

    mrcnn_class_logits = tf.map_fn(lambda x: ops.fully_connected(x, num_classes, name='mrcnn_class_logits',
                                                                 weights_initializer='glorot_uniform_tanh'),
                                   elems=shared, dtype=tf.float32)

    mrcnn_probs = tf.map_fn(lambda x: tf.nn.softmax(x, name='mrcnn_class'), elems=mrcnn_class_logits, dtype=tf.float32)

    ipt = tf.map_fn(lambda x: ops.fully_connected(x, 4 * num_classes, name='mrcnn_bbox_fc',
                                                  weights_initializer='glorot_uniform_tanh'),
                    elems=shared, dtype=tf.float32)

    ipt_shape = tf.shape(ipt)

    mrcnn_bbox = tf.reshape(ipt, [-1, ipt_shape[1], num_classes, 4])

    return mrcnn_class_logits, mrcnn_probs, mrcnn_bbox
Esempio n. 3
0
def get_prob(input, params, num_class=1000, is_train=True):
    # Get pool5
    layers = get_vgg16_pool5(input, params)
    layers.fc6 = ops.fully_connected(input=layers.pool5,
                                     num_neuron=4096,
                                     name='fc6',
                                     params=params)
    if is_train:
        layers.fc6 = tf.nn.dropout(layers.fc6, keep_prob=0.5)
    layers.fc6_relu = ops.activate(input=layers.fc6,
                                   act_type='relu',
                                   name='fc6_relu')
    layers.fc7 = ops.fully_connected(input=layers.fc6_relu,
                                     num_neuron=4096,
                                     name='fc7',
                                     params=params)
    if is_train:
        layers.fc7 = tf.nn.dropout(layers.fc7, keep_prob=0.5)
    layers.fc7_relu = ops.activate(input=layers.fc7,
                                   act_type='relu',
                                   name='fc7_relu')
    layers.fc8 = ops.fully_connected(input=layers.fc7_relu,
                                     num_neuron=num_class,
                                     name='fc8',
                                     params=params)
    layers.prob = tf.nn.softmax(layers.fc8)
    return layers
Esempio n. 4
0
    def build_generator(self, reuse=False):
        if reuse:
            tf.get_variable_scope().reuse_variables()
        options = self.options
        self.g_question = tf.placeholder('int32', [None, None],
                                         name="question")
        self.g_image_features = tf.placeholder('float32', [
            None, options['img_dim'], options['img_dim'],
            options['img_channels']
        ],
                                               name="image_features")
        # image_features = self.g_image_features
        image_features = tf.nn.l2_normalize(self.g_image_features, dim=3)

        encoded_question = self.encode_question(self.g_question,
                                                options['text_model'],
                                                train=False)
        context, self.g_prob1, self.g_prob2 = self.attend_image(
            image_features, encoded_question, dropout_keep_prob=1.0)

        with tf.variable_scope("post_attention_fc"):
            # context = tf.nn.tanh(context)
            fc_1 = tf.nn.relu(ops.fully_connected(context, 1024, name="fc_1"))
            logits = ops.fully_connected(fc_1,
                                         options['ans_vocab_size'],
                                         name="logits")
            self.g_predictions = tf.argmax(logits, 1)
Esempio n. 5
0
 def _simple_generator(z, zy, igen):
     with tf.variable_scope('%s/%d' % (params.gen_scope, igen)):
         h0 = z
         h1 = ops.fully_connected(h0, 128, 'h1')
         h1 = ops.lrelu(h1)
         h2 = ops.fully_connected(h1, 128, 'h2')
         h2 = ops.lrelu(h2)
         h3 = ops.fully_connected(h2, 1, 'h3')
         return h3, {'h0': h0, 'h1': h1, 'h2': h2, 'h3': h3}
Esempio n. 6
0
 def _simple_discriminator(x, y, reuse_vars=False):
     with tf.variable_scope(params.dis_scope, reuse=reuse_vars):
         h0 = x
         h1 = ops.fully_connected(h0, 128, 'h1')
         h1 = ops.lrelu(h1)
         h2 = ops.fully_connected(h1, 128, 'h2')
         h2 = ops.lrelu(h2)
         h3 = ops.fully_connected(h2, params.num_generators + 1, 'h3')
         return h3, {'h0': h0, 'h1': h1, 'h2': h2, 'h3': h3}
Esempio n. 7
0
def encoder(x, scope="spade_encoder"):
  """Encoder that outputs global N(mu, sig) parameters.

  Args:
    x: [B, H, W, 4] an RGBD image (usually the initial image) which is used to
      sample noise from a distirbution to feed into the refinement
      network. Range [0, 1].
    scope: (str) variable scope

  Returns:
    (mu, logvar) are [B, 256] tensors of parameters defining a normal
      distribution to sample from.
  """

  x = 2 * x - 1
  num_channel = 16

  with tf.compat.v1.variable_scope(scope, reuse=tf.compat.v1.AUTO_REUSE):
    x = ops.sn_conv(x, num_channel, kernel_size=3, stride=2,
                    use_bias=True, use_spectral_norm=True, scope="conv_0")
    x = ops.instance_norm(x, scope="inst_norm_0")
    x = ops.leaky_relu(x, 0.2)

    x = ops.sn_conv(x, 2 * num_channel, kernel_size=3, stride=2,
                    use_bias=True, use_spectral_norm=True, scope="conv_1")
    x = ops.instance_norm(x, scope="inst_norm_1")
    x = ops.leaky_relu(x, 0.2)

    x = ops.sn_conv(x, 4 * num_channel, kernel_size=3, stride=2,
                    use_bias=True, use_spectral_norm=True, scope="conv_2")
    x = ops.instance_norm(x, scope="inst_norm_2")
    x = ops.leaky_relu(x, 0.2)

    x = ops.sn_conv(x, 8 * num_channel, kernel_size=3, stride=2,
                    use_bias=True, use_spectral_norm=True, scope="conv_3")
    x = ops.instance_norm(x, scope="inst_norm_3")
    x = ops.leaky_relu(x, 0.2)

    x = ops.sn_conv(x, 8 * num_channel, kernel_size=3, stride=2,
                    use_bias=True, use_spectral_norm=True, scope="conv_4")
    x = ops.instance_norm(x, scope="inst_norm_4")
    x = ops.leaky_relu(x, 0.2)

    x = ops.sn_conv(x, 8 * num_channel, kernel_size=3, stride=2,
                    use_bias=True, use_spectral_norm=True, scope="conv_5")
    x = ops.instance_norm(x, scope="inst_norm_5")
    x = ops.leaky_relu(x, 0.2)

    mu = ops.fully_connected(x, config.DIM_OF_STYLE_EMBEDDING,
                             scope="linear_mu")
    logvar = ops.fully_connected(x, config.DIM_OF_STYLE_EMBEDDING,
                                 scope="linear_logvar")
  return mu, logvar
    def _encode(self, training_data):
        with tf.variable_scope("encoder"):
            f1 = tf.nn.relu(conv2d("1st_conv", training_data, 1, 50,
                                   self.pref))
            f2 = tf.nn.relu(conv2d("2nd_conv", f1, 50, 100, self.pref))
            f3 = tf.nn.relu(conv2d("3rd_conv", f2, 100, 200, self.pref))

            flat_f3 = tf.reshape(f3, [self.pref.batch_size, 13 * 13 * 200])

            z_means = fully_connected("enc_means", flat_f3, 13 * 13 * 200,
                                      self.pref.n_z)
            z_stddev = fully_connected("enc_stddev", flat_f3, 13 * 13 * 200,
                                       self.pref.n_z)

        return z_means, z_stddev
Esempio n. 9
0
    def create_model(self):
        self.x = tf.placeholder(dtype=tf.float32,
                                shape=[None, INPUT_SIZE * INPUT_SIZE])
        self.y_target = tf.placeholder(dtype=tf.float32, shape=[None, 10])

        labels = self.y_target
        signal = self.x
        signal = tf.reshape(signal, [-1, INPUT_SIZE, INPUT_SIZE])

        signal = lstm(signal, INPUT_SIZE, INPUT_SIZE, INPUT_SIZE)

        signal = fully_connected(signal, 10)

        self.global_step = tf.get_variable('global_step', initializer=0)
        update_global_step = tf.assign(self.global_step, self.global_step + 1)

        self.loss = loss_function(signal, labels)
        self.accuracy = accuracy(signal, labels)

        with tf.control_dependencies([update_global_step]):
            self.train_step = tf.train.AdamOptimizer().minimize(self.loss)

        loss_sum = tf.summary.scalar('loss', self.loss)
        acc_sum = tf.summary.scalar('accuracy', self.accuracy)
        self.all_summaries = tf.summary.merge([loss_sum, acc_sum])
Esempio n. 10
0
    def create_model(self):
        layers = [26, 52, 52]

        self.x = tf.placeholder(dtype=tf.float32,
                                shape=[None, INPUT_SIZE * INPUT_SIZE])
        self.y_target = tf.placeholder(dtype=tf.float32, shape=[None, 10])

        labels = self.y_target
        signal = self.x

        signal = tf.reshape(signal, [-1, INPUT_SIZE, INPUT_SIZE])
        signal = augment(signal, layers[0])

        for i in range(1, len(layers)):
            hidden_n = layers[i]
            input_n = layers[i - 1]
            name = "lstm_{}".format(i)
            signal = bidirect_lstm(signal, hidden_n, input_n, name=name)

        signal = get_last_row(signal, layers[-1])
        signal = fully_connected(signal, 10)

        self.global_step = tf.get_variable('global_step', initializer=0)
        update_global_step = tf.assign(self.global_step, self.global_step + 1)

        self.loss = loss_function(signal, labels)
        self.accuracy = accuracy(signal, labels)

        with tf.control_dependencies([update_global_step]):
            self.train_step = tf.train.AdamOptimizer().minimize(self.loss)

        loss_sum = tf.summary.scalar('loss', self.loss)
        acc_sum = tf.summary.scalar('accuracy', self.accuracy)
        self.all_summaries = tf.summary.merge([loss_sum, acc_sum])
Esempio n. 11
0
    def _decode(self, latent_variables):
        with tf.variable_scope("decoder"):
            z_expanded = fully_connected("dec_expansion", latent_variables,
                                         self.pref.n_z, 13 * 13 * 200)
            z_shaped = tf.reshape(z_expanded,
                                  [self.pref.batch_size, 13, 13, 200])

            dec_f3 = tf.nn.relu(
                conv_transpose("1st_deconv", z_shaped, [
                    self.pref.rfs, self.pref.rfs, self.pref.n_z,
                    z_shaped.get_shape()[-1]
                ], [self.pref.batch_size, 25, 25, 100], self.pref))
            dec_f2 = tf.nn.relu(
                conv_transpose(
                    "2nd_deconv", dec_f3,
                    [self.pref.rfs, self.pref.rfs, 50,
                     dec_f3.get_shape()[-1]],
                    [self.pref.batch_size, 50, 50, 50], self.pref))
            dec_f1 = tf.nn.sigmoid(
                conv_transpose(
                    "3rd_deconv", dec_f2,
                    [self.pref.rfs, self.pref.rfs, 1,
                     dec_f2.get_shape()[-1]],
                    [self.pref.batch_size, 100, 100, 1], self.pref))

        return dec_f1
def densenet(image, options, reuse=False, name='densenet'):
    
    divide = 2
    
    h_conv1 = conv2d(image, options.nk, ks=options.ks, name=name+'_conv1')
    h_db1 = denseblock(h_conv1, options, name=name+'_db1')    
    h_maxpool1 = maxpool2d(h_db1, name=name+'_pool1')
    h_db2 = denseblock(h_maxpool1, options, name=name+'_db2')
    
    pooled_size = int(options.image_size / divide)
    
    h_flat = tf.reshape(h_db2, [-1, pooled_size * pooled_size * options.nk])
    h_fc1 = fully_connected(h_flat, options.nk * options.nk, name=name+'_fc1')
    h_fc2 = fully_connected(h_fc1, options.n_pred, name=name+'_fc2')
    
    return h_fc2
Esempio n. 13
0
    def build_model(self):
        options = self.options

        self.question = tf.placeholder('int32', [None, None], name="question")
        self.image_features = tf.placeholder('float32', [
            None, options['img_dim'], options['img_dim'],
            options['img_channels']
        ],
                                             name="image_features")
        self.answers = tf.placeholder('int32', [None, options['num_answers']],
                                      name="answer")

        # image_features = self.image_features
        image_features = tf.nn.l2_normalize(self.image_features, dim=3)

        encoded_question = self.encode_question(self.question,
                                                options['text_model'],
                                                train=True)
        context, prob1, prob2 = self.attend_image(image_features,
                                                  encoded_question,
                                                  options['dropout_keep_prob'])

        with tf.variable_scope("post_attention_fc"):
            # context = tf.nn.dropout(context, 0.8)
            # context = tf.nn.tanh(context)
            fc_1 = tf.nn.relu(ops.fully_connected(context, 1024, name="fc_1"))
            fc_1 = tf.nn.dropout(fc_1, options['dropout_keep_prob'])
            logits = ops.fully_connected(fc_1,
                                         options['ans_vocab_size'],
                                         name="logits")

            loss = 0
            for i in range(options['num_answers']):
                loss += tf.nn.sparse_softmax_cross_entropy_with_logits(
                    labels=self.answers[:, i], logits=logits)
            loss /= options['num_answers']

            self.loss = tf.reduce_mean(loss)
            self.predictions = tf.argmax(logits, 1)
Esempio n. 14
0
def ds128(x,
          reuse=False,
          is_training=True,
          name='ds128',
          norm=None,
          activation=ops.leaky_relu):
    return ops.fully_connected(x,
                               128,
                               use_bias=False,
                               is_training=is_training,
                               activation=activation,
                               reuse=reuse,
                               name=name,
                               norm=norm)
Esempio n. 15
0
def d2(x,
       reuse=False,
       is_training=True,
       norm=None,
       name='d2',
       activation=tf.nn.sigmoid):
    # 全链接层,2输出
    return ops.fully_connected(x,
                               2,
                               use_bias=False,
                               reuse=reuse,
                               activation=activation,
                               is_training=is_training,
                               name=name,
                               norm=norm)
Esempio n. 16
0
def d7x7x128(x,
             reuse=False,
             is_training=True,
             norm='batch',
             name='d7x7x128',
             activation=tf.nn.relu):
    # 全连接层,7*7*128 = 6272输出
    return ops.fully_connected(x,
                               7 * 7 * 128,
                               use_bias=False,
                               reuse=reuse,
                               activation=activation,
                               is_training=is_training,
                               name=name,
                               norm=norm)
Esempio n. 17
0
def d1(x,
       reuse=False,
       is_training=True,
       name='d1',
       norm=None,
       activation=None):
    return tf.squeeze(
        ops.fully_connected(x,
                            1,
                            use_bias=False,
                            is_training=is_training,
                            activation=activation,
                            reuse=reuse,
                            name=name,
                            norm=norm), -1)
Esempio n. 18
0
def d10(x,
        reuse=False,
        is_training=True,
        norm=None,
        name='d10',
        activation=None):
    # 全链接层,10输出
    return ops.fully_connected(x,
                               10,
                               use_bias=False,
                               reuse=reuse,
                               activation=activation,
                               is_training=is_training,
                               name=name,
                               norm=norm)
Esempio n. 19
0
def d1024(x,
          reuse=False,
          is_training=True,
          norm='batch',
          name='d1024',
          activation=tf.nn.relu):
    # 全连接层,1024输出
    return ops.fully_connected(x,
                               1024,
                               use_bias=False,
                               reuse=reuse,
                               activation=activation,
                               is_training=is_training,
                               name=name,
                               norm=norm)
Esempio n. 20
0
    def _discriminator(x, y, reuse_vars=False):
        with tf.variable_scope(params.dis_scope, reuse=reuse_vars):
            h0 = ops.concat(x, y)

            h1_pure = ops.convolution(h0,
                                      params.dis_filters_size,
                                      params.dis_filters,
                                      name='h1')
            h1 = h1_pure
            if params.use_batch_norm:
                h1 = ops.batch_norm(h1, name='bn1')
            h1 = ops.lrelu(h1)
            h1 = ops.concat(h1, y)

            h2 = ops.convolution(h1,
                                 params.dis_filters_size,
                                 params.dis_filters * 2,
                                 name='h2')
            if params.use_batch_norm:
                h2 = ops.batch_norm(h2, name='bn2')
            h2 = ops.lrelu(h2)
            h2 = ops.concat(h2, y)

            h3 = ops.convolution(h2,
                                 params.dis_filters_size,
                                 params.dis_filters * 4,
                                 name='h3')
            if params.use_batch_norm:
                h3 = ops.batch_norm(h3, name='bn3')
            h3 = ops.lrelu(h3)
            h3 = ops.concat(h3, y)

            h4 = tf.reshape(h3, [params.batch_size, -1])
            h4 = ops.fully_connected(h4, 1, 'h4')
            return h4, {
                'h0': h0,
                'h1': h1,
                'h1_pure': h1_pure,
                'h2': h2,
                'h3': h3,
                'h4': h4
            }
def DenseNet(inputs, nums_out, growth_rate, train_phase, depth):
    inputs = preprocess(inputs)
    n = (depth - 4) // 3
    inputs = conv("conv1", inputs, nums_out=16, k_size=3)
    inputs = DenseBlock("DenseBlock1", inputs, n, growth_rate, train_phase)
    inputs = Transition("Transition_Layer1",
                        inputs,
                        nums_out=growth_rate,
                        train_phase=train_phase)
    inputs = DenseBlock("DenseBlock2", inputs, n, growth_rate, train_phase)
    inputs = Transition("Transition_Layer2",
                        inputs,
                        nums_out=growth_rate,
                        train_phase=train_phase)
    inputs = DenseBlock("DenseBlock3", inputs, n, growth_rate, train_phase)
    inputs = batchnorm(inputs, train_phase, "BN")
    inputs = relu(inputs)
    inputs = global_avg_pooling(inputs)
    inputs = fully_connected("FC", inputs, nums_out)
    return inputs
Esempio n. 22
0
 def __call__(self, inputs, train_phase):
     with tf.variable_scope(self.name, reuse=tf.AUTO_REUSE):
         # inputs = tf.random_crop(inputs, [-1, 70, 70, 3])
         inputs = conv("conv1_1", inputs, 64, 3, 2)
         inputs = leaky_relu(inputs, 0.2)
         # inputs = conv("conv1_2", inputs, 64, 3, is_SN=True)
         # inputs = leaky_relu(inputs, 0.2)
         inputs = conv("conv2_1", inputs, 128, 3, 2)
         inputs = batchnorm(inputs, train_phase, "BN1")
         inputs = leaky_relu(inputs, 0.2)
         # inputs = conv("conv2_2", inputs, 128, 3, is_SN=True)
         # inputs = leaky_relu(inputs, 0.2)
         inputs = conv("conv3_1", inputs, 256, 3, 2)
         inputs = batchnorm(inputs, train_phase, "BN2")
         inputs = leaky_relu(inputs, 0.2)
         # inputs = conv("conv3_2", inputs, 256, 3, is_SN=True)
         # inputs = leaky_relu(inputs, 0.2)
         inputs = conv("conv4_1", inputs, 512, 3, 2)
         inputs = batchnorm(inputs, train_phase, "BN3")
         inputs = leaky_relu(inputs, 0.2)
         # inputs = fully_connected("fc", inputs, 512, is_SN=True)
         output = fully_connected("output", inputs, 1)
     return output
Esempio n. 23
0
def refinement_network(rgbd, mask, z, scope="spade_generator"):
    """Refines rgbd, mask based on noise z.

  H, W should be divisible by 2 ** num_up_layers

  Args:
    rgbd: [B, H, W, 4] the rendered view to be refined
    mask: [B, H, W, 1] binary mask of unknown regions. 1 where known and 0 where
      unknown
    z: [B, D] a noise vector to be used as noise for the generator
    scope: (str) variable scope

  Returns:
    [B, H, W, 4] refined rgbd image.
  """
    img = 2 * rgbd - 1
    img = tf.concat([img, mask], axis=-1)

    num_channel = 32

    num_up_layers = 5
    out_channels = 4  # For RGBD

    batch_size, im_height, im_width, unused_c = rgbd.get_shape().as_list()

    init_h = im_height // (2**num_up_layers)
    init_w = im_width // (2**num_up_layers)

    with tf.compat.v1.variable_scope(scope, reuse=tf.compat.v1.AUTO_REUSE):
        x = ops.fully_connected(z, 16 * num_channel * init_h * init_w,
                                "fc_expand_z")
        x = tf.reshape(x, [batch_size, init_h, init_w, 16 * num_channel])
        x = spade.spade_resblock(
            x,
            img,
            16 * num_channel,
            use_spectral_norm=config.USE_SPECTRAL_NORMALIZATION,
            scope="head")
        x = ops.double_size(x)
        x = spade.spade_resblock(
            x,
            img,
            16 * num_channel,
            use_spectral_norm=config.USE_SPECTRAL_NORMALIZATION,
            scope="middle_0")
        x = spade.spade_resblock(
            x,
            img,
            16 * num_channel,
            use_spectral_norm=config.USE_SPECTRAL_NORMALIZATION,
            scope="middle_1")
        x = ops.double_size(x)
        x = spade.spade_resblock(
            x,
            img,
            8 * num_channel,
            use_spectral_norm=config.USE_SPECTRAL_NORMALIZATION,
            scope="up_0")
        x = ops.double_size(x)
        x = spade.spade_resblock(
            x,
            img,
            4 * num_channel,
            use_spectral_norm=config.USE_SPECTRAL_NORMALIZATION,
            scope="up_1")
        x = ops.double_size(x)
        x = spade.spade_resblock(
            x,
            img,
            2 * num_channel,
            use_spectral_norm=config.USE_SPECTRAL_NORMALIZATION,
            scope="up_2")
        x = ops.double_size(x)
        x = spade.spade_resblock(
            x,
            img,
            1 * num_channel,
            use_spectral_norm=config.USE_SPECTRAL_NORMALIZATION,
            scope="up_3")
        x = ops.leaky_relu(x, 0.2)
        # Pre-trained checkpoint uses default conv scoping.
        x = ops.sn_conv(x, out_channels, kernel_size=3)
        x = tf.tanh(x)
        return 0.5 * (x + 1)
Esempio n. 24
0
    def build_model(self, train = True):
        dropout_rate = 1.0
        if train:
            dropout = 0.5

        options = self.options
        fc7_features = tf.placeholder('float32',
            [ None, self.options['fc7_feature_length'] ], 
            name = 'fc7')
        source_sentence = tf.placeholder('float32', 
                [ None, options['text_length'], options['length_of_word_vector']], 
                name = 'sentence')
        answer = tf.placeholder('float32', 
            [ None, self.options['ans_vocab_size']], name = "answer")

        

        image_embedding = ops.fully_connected(fc7_features, 2 * options['residual_channels'], 
            name = "image_embedding")
        image_embedding = tf.nn.dropout( tf.nn.tanh(image_embedding), dropout_rate)
        print "image_embedding", image_embedding
        
        # image_features_flat = tf.nn.dropout(image_features_flat, 0.5)
        if options['text_model'] == "bytenet":
            text_tensors = text_model_v2.encoder_bytenet(source_sentence, options)
        else:
            text_tensors = text_model_v2.encoder_lstm(source_sentence, options, train)

        encoded_sentence = text_tensors['last_seq_element']

        encoded_embedding = ops.fully_connected(encoded_sentence, 2 * options['residual_channels'], 
            name = "encoded_embedding")
        encoded_embedding = tf.nn.dropout( tf.nn.tanh(encoded_embedding), dropout_rate )
        print "encoded_embedding", encoded_embedding

        combined_features = encoded_embedding * image_embedding
        combined_features = tf.nn.dropout( combined_features, dropout_rate)
        print "combined", combined_features
        logits = ops.fully_connected(combined_features, options['ans_vocab_size'], name = "logits")
        print "logits", logits
        ce = tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=answer, name = 'ce')
        answer_probab = tf.nn.softmax(logits, name='answer_probab')

        predictions = tf.argmax(answer_probab,1)
        correct_predictions = tf.equal(tf.argmax(answer_probab,1), tf.argmax(answer,1))
        accuracy = tf.reduce_mean(tf.cast(correct_predictions, tf.float32))
        loss = tf.reduce_mean(ce, name = 'loss')

        input_tensors = {
            'fc7' : fc7_features,
            'source_sentence' : source_sentence,
            'answer' : answer
        }

        vqa_model = {
            'input_tensors' : input_tensors,
            'loss' : loss,
            'accuracy' : accuracy,
            'predictions' : predictions,
        }
        return vqa_model
Esempio n. 25
0
    def _generator(z, zy):
        with tf.variable_scope(params.gen_scope):
            imh, imw = params.dataset.image_size, params.dataset.image_size

            hidden_layers_num = 3
            imdiv = 2**hidden_layers_num

            h0 = tf.concat([z, zy], axis=1)

            h1 = ops.fully_connected(h0, (imh // imdiv) * (imw // imdiv) *
                                     params.gen_filters * 4, 'h1')
            if params.use_batch_norm:
                h1 = ops.batch_norm(h1, name='bn1')
            h1 = tf.reshape(
                h1, [-1, imh // imdiv, imw // imdiv, params.gen_filters * 4])
            h1 = ops.lrelu(h1)
            h1 = ops.dropout(h1,
                             training=training,
                             keep=params.gen_keep_dropout,
                             name='dropout1')
            h1 = ops.concat(h1, zy)

            h2 = ops.deconvolution(h1,
                                   params.gen_filters_size,
                                   params.gen_filters * 2,
                                   name='h2')
            if params.use_batch_norm:
                h2 = ops.batch_norm(h2, name='bn2')
            h2 = ops.lrelu(h2)
            h2 = ops.dropout(h2,
                             training=training,
                             keep=params.gen_keep_dropout,
                             name='dropout2')
            h2 = ops.concat(h2, zy)

            h3_pure = ops.deconvolution(h2,
                                        params.gen_filters_size,
                                        params.gen_filters,
                                        name='h3')
            h3 = h3_pure
            if params.use_batch_norm:
                h3 = ops.batch_norm(h3, name='bn3')
            h3 = ops.lrelu(h3)
            h3 = ops.dropout(h3,
                             training=training,
                             keep=params.gen_keep_dropout,
                             name='dropout3')
            h3 = ops.concat(h3, zy)

            h4 = ops.deconvolution(h3,
                                   params.gen_filters_size,
                                   params.dataset.channels_size,
                                   name='h4')
            return tf.nn.tanh(h4), {
                'h0': h0,
                'h1': h1,
                'h2': h2,
                'h3': h3,
                'h3_pure': h3_pure,
                'h4': h4
            }
Esempio n. 26
0
def quat_inception(net, vp_mask):
    net.quat_net = {}
    with tf.variable_scope('Viewpoint_Net', reuse=tf.AUTO_REUSE):
        vp_mask = tf.expand_dims(vp_mask, -1)
        # Output (bs, 64, 64, ch)
        conv1 = conv2d('conv1',
                       vp_mask,
                       3,
                       256,
                       stride=1,
                       norm=net.norm,
                       mode=net.mode,
                       act=None)
        net.quat_net['conv1'] = conv1
        conv2 = conv2d('conv2',
                       conv1,
                       1,
                       128,
                       stride=1,
                       norm=net.norm,
                       mode=net.mode)
        net.quat_net['conv2'] = conv2
        conv3 = conv2d('conv3',
                       conv2,
                       1,
                       128,
                       stride=1,
                       norm=net.norm,
                       mode=net.mode)
        net.quat_net['conv3'] = conv3
        # Output (bs, 32, 32, ch)
        pool1 = tf.layers.max_pooling2d(conv3,
                                        2,
                                        2,
                                        padding='same',
                                        name='pool1')
        net.quat_net['pool1'] = pool1
        conv4 = conv2d('conv4',
                       pool1,
                       3,
                       512,
                       stride=1,
                       norm=net.norm,
                       mode=net.mode)
        net.quat_net['conv4'] = conv4
        conv5 = conv2d('conv5',
                       conv4,
                       1,
                       256,
                       stride=1,
                       norm=net.norm,
                       mode=net.mode)
        conv5 = dropout(conv5, net.keep_prob)
        net.quat_net['conv5'] = conv5
        # Output (bs, 16, 16, ch)
        pool2 = tf.layers.max_pooling2d(conv5,
                                        2,
                                        2,
                                        padding='same',
                                        name='pool2')
        pool2 = dropout(pool2, net.keep_prob)
        net.quat_net['pool2'] = pool2
        fc1 = fully_connected('fc1', pool2, 1024)
        net.quat_net['fc1'] = fc1
        fc2 = fully_connected('fc2', fc1, 4 * net.num_classes)
        # fc2 = tf.tanh(fc2)
        net.quat_net['fc2'] = fc2
        out = fc2
        net.quat_net['out'] = out
    return out
Esempio n. 27
0
def quat_res(net, vp_mask):
    net.quat_net = {}
    with tf.variable_scope('Quat_Net', reuse=tf.AUTO_REUSE):
        vp_mask = tf.expand_dims(vp_mask, -1)
        # Output (bs, 32, 32, 64)
        conv1 = conv2d('conv1',
                       vp_mask,
                       7,
                       64,
                       stride=2,
                       norm=net.norm,
                       mode=net.mode,
                       act=None)
        net.quat_net['conv1'] = conv1
        # Output (bs, 16, 16, 64)
        pool1 = tf.layers.max_pooling2d(conv1,
                                        3,
                                        2,
                                        padding='same',
                                        name='pool1')
        net.quat_net['pool1'] = pool1

        # Output (bs, 16, 16, 64)
        conv2_1a = conv2d('conv2_1a',
                          pool1,
                          3,
                          64,
                          stride=1,
                          norm=net.norm,
                          mode=net.mode)
        net.quat_net['conv2_1a'] = conv2_1a
        conv2_2a = conv2d('conv2_2a',
                          conv2_1a,
                          3,
                          64,
                          stride=1,
                          norm=net.norm,
                          mode=net.mode)
        net.quat_net['conv2_2a'] = conv2_2a
        res_2a = tf.add_n([conv2_2a, pool1], name='res_2a')

        conv2_1b = conv2d('conv2_1b',
                          res_2a,
                          3,
                          64,
                          stride=1,
                          norm=net.norm,
                          mode=net.mode)
        net.quat_net['conv2_1b'] = conv2_1b
        conv2_2b = conv2d('conv2_2b',
                          conv2_1b,
                          3,
                          64,
                          stride=1,
                          norm=net.norm,
                          mode=net.mode)
        net.quat_net['conv2_2b'] = conv2_2b
        res_2b = tf.add_n([conv2_2b, res_2a], name='res_2b')

        # Output (bs, 8, 8, 128)
        conv3_1a = conv2d('conv3_1a',
                          res_2b,
                          3,
                          128,
                          stride=2,
                          norm=net.norm,
                          mode=net.mode)
        net.quat_net['conv3_1a'] = conv3_1a
        conv3_2a = conv2d('conv3_2a',
                          conv3_1a,
                          3,
                          128,
                          stride=1,
                          norm=net.norm,
                          mode=net.mode)
        net.quat_net['conv3_2a'] = conv3_2a
        res_2b_skip = conv2d('res_2b_skip',
                             res_2b,
                             1,
                             128,
                             stride=2,
                             norm=net.norm,
                             mode=net.mode)
        res_3a = tf.add_n([conv3_2a, res_2b_skip], name='res_3a')

        conv3_1b = conv2d('conv3_1b',
                          res_3a,
                          3,
                          128,
                          stride=1,
                          norm=net.norm,
                          mode=net.mode)
        net.quat_net['conv3_1b'] = conv3_1b
        conv3_2b = conv2d('conv3_2b',
                          conv3_1b,
                          3,
                          128,
                          stride=1,
                          norm=net.norm,
                          mode=net.mode)
        net.quat_net['conv3_2b'] = conv3_2b
        res_3b = tf.add_n([conv3_2b, res_3a], name='res_3b')

        # Output (bs, 4, 4, 256)
        conv4_1a = conv2d('conv4_1a',
                          res_3b,
                          3,
                          256,
                          stride=2,
                          norm=net.norm,
                          mode=net.mode)
        net.quat_net['conv4_1a'] = conv4_1a
        conv4_2a = conv2d('conv4_2a',
                          conv4_1a,
                          3,
                          256,
                          stride=1,
                          norm=net.norm,
                          mode=net.mode)
        net.quat_net['conv4_2a'] = conv4_2a
        res_3b_skip = conv2d('res_3b_skip',
                             res_3b,
                             1,
                             256,
                             stride=2,
                             norm=net.norm,
                             mode=net.mode)
        res_4a = tf.add_n([conv4_2a, res_3b_skip], name='res_4a')

        conv4_1b = conv2d('conv4_1b',
                          res_4a,
                          3,
                          256,
                          stride=1,
                          norm=net.norm,
                          mode=net.mode)
        net.quat_net['conv4_1b'] = conv4_1b
        conv4_2b = conv2d('conv4_2b',
                          conv4_1b,
                          3,
                          256,
                          stride=1,
                          norm=net.norm,
                          mode=net.mode)
        net.quat_net['conv4_2b'] = conv4_2b
        res_4b = tf.add_n([conv4_2b, res_4a], name='res_4b')

        # Output (bs, 2, 2, 512)
        conv5_1a = conv2d('con5_1a',
                          res_4b,
                          3,
                          512,
                          stride=2,
                          norm=net.norm,
                          mode=net.mode)
        net.quat_net['con5_1a'] = conv5_1a
        conv5_2a = conv2d('con5_2a',
                          conv5_1a,
                          3,
                          512,
                          stride=1,
                          norm=net.norm,
                          mode=net.mode)
        net.quat_net['con5_2a'] = conv5_2a
        res_4b_skip = conv2d('res_4b_skip',
                             res_4b,
                             1,
                             512,
                             stride=2,
                             norm=net.norm,
                             mode=net.mode)
        res_5a = tf.add_n([conv5_2a, res_4b_skip], name='res_5a')

        conv5_1b = conv2d('conv5_1b',
                          res_5a,
                          3,
                          512,
                          stride=1,
                          norm=net.norm,
                          mode=net.mode)
        net.quat_net['conv5_1b'] = conv5_1b
        conv5_2b = conv2d('conv5_2b',
                          conv5_1b,
                          3,
                          512,
                          stride=1,
                          norm=net.norm,
                          mode=net.mode)
        net.quat_net['conv5_2b'] = conv5_2b
        res_5b = tf.add_n([conv5_2b, res_5a], name='res_5b')
        res_5b = dropout(res_5b, net.keep_prob)

        # Output (bs, 4*num_classes)
        fc1 = fully_connected('fc1', res_5b, 512)
        net.quat_net['fc1'] = fc1
        fc2 = fully_connected('fc2', fc1, 4 * net.num_classes)
        net.quat_net['fc2'] = fc2
        # out = tf.tanh(fc2)
        out = fc2
        net.quat_net['out'] = out

    return out