def generator(tensor):
    reuse = len([t for t in tf.global_variables() if t.name.startswith('generator')]) > 0
    print tensor.get_shape()
    with variable_scope.variable_scope('generator', reuse = reuse):
        tensor = slim.fully_connected(tensor, 1024)
        print tensor
        tensor = slim.batch_norm(tensor, activation_fn=tf.nn.relu)
        tensor = slim.fully_connected(tensor, 7*7*128)
        tensor = slim.batch_norm(tensor, activation_fn=tf.nn.relu)
        tensor = tf.reshape(tensor, [-1, 7, 7, 128])
        # print '22',tensor.get_shape()
        tensor = slim.conv2d_transpose(tensor, 64, kernel_size=[4,4], stride=2, activation_fn = None)
        print 'gen',tensor.get_shape()
        tensor = slim.batch_norm(tensor, activation_fn = tf.nn.relu)
        tensor = slim.conv2d_transpose(tensor, 1, kernel_size=[4, 4], stride=2, activation_fn=tf.nn.sigmoid)
    return tensor
Exemple #2
0
def generator(z):
	with slim.arg_scope([slim.fully_connected], 
		normalizer_fn=slim.batch_norm, 
		activation_fn=tf.nn.relu
		):
		net = slim.fully_connected(z, 1024)
		net = slim.fully_connected(net, 128*7*7)
		net = tf.reshape(net, [-1, 7, 7, 128])
	with slim.arg_scope([slim.conv2d_transpose], 
		normalizer_fn=slim.batch_norm, 
		kernel_size=5, stride=2, padding='SAME', 
		activation_fn=tf.nn.relu
		):
		net = slim.conv2d_transpose(net, 128)
		net = slim.conv2d_transpose(net, 1, activation_fn=tf.nn.tanh, normalizer_fn=None)
		return net
    def generator(self, inputs, reuse=False):
        # inputs: (batch, 1, 1, 128)
        with tf.variable_scope('generator', reuse=reuse):
            with slim.arg_scope([slim.conv2d_transpose], padding='SAME', activation_fn=None,           
                                 stride=2, weights_initializer=tf.contrib.layers.xavier_initializer()):
                with slim.arg_scope([slim.batch_norm], decay=0.95, center=True, scale=True, 
                                     activation_fn=tf.nn.relu, is_training=(self.mode=='train')):

                    net = slim.conv2d_transpose(inputs, 512, [4, 4], padding='VALID', scope='conv_transpose1')   # (batch_size, 4, 4, 512)
                    net = slim.batch_norm(net, scope='bn1')
                    net = slim.conv2d_transpose(net, 256, [3, 3], scope='conv_transpose2')  # (batch_size, 8, 8, 256)
                    net = slim.batch_norm(net, scope='bn2')
                    net = slim.conv2d_transpose(net, 128, [3, 3], scope='conv_transpose3')  # (batch_size, 16, 16, 128)
                    net = slim.batch_norm(net, scope='bn3')
                    net = slim.conv2d_transpose(net, 1, [3, 3], activation_fn=tf.nn.tanh, scope='conv_transpose4')   # (batch_size, 32, 32, 1)
                    return net
Exemple #4
0
def generative_network(z):
  """Generative network to parameterize generative model. It takes
  latent variables as input and outputs the likelihood parameters.

  logits = neural_network(z)
  """
  with slim.arg_scope([slim.conv2d_transpose],
                      activation_fn=tf.nn.elu,
                      normalizer_fn=slim.batch_norm,
                      normalizer_params={'scale': True}):
    net = tf.reshape(z, [M, 1, 1, d])
    net = slim.conv2d_transpose(net, 128, 3, padding='VALID')
    net = slim.conv2d_transpose(net, 64, 5, padding='VALID')
    net = slim.conv2d_transpose(net, 32, 5, stride=2)
    net = slim.conv2d_transpose(net, 1, 5, stride=2, activation_fn=None)
    net = slim.flatten(net)
    return net
def prediction_layer(cfg, input, name, num_outputs):
    with slim.arg_scope([slim.conv2d, slim.conv2d_transpose], padding='SAME',
                        activation_fn=None, normalizer_fn=None,
                        weights_regularizer=slim.l2_regularizer(cfg.weight_decay)):
        with tf.variable_scope(name):
            pred = slim.conv2d_transpose(input, num_outputs,
                                         kernel_size=[3, 3], stride=2,
                                         scope='block4')
            return pred
    def inference(self):
        _x = tf.reshape(self.x, shape=[-1, self.input_shape[0], self.input_shape[1], self.input_shape[2]])
        with slim.arg_scope([slim.conv2d, slim.conv2d_transpose],
                            weights_initializer=tf.contrib.layers.xavier_initializer(),
                            weights_regularizer=slim.l2_regularizer(0.05)):

            # 1*H*W -> 32*H*W
            model = slim.conv2d(_x, 32, [3, 3], padding='SAME', scope='conv1')
            # 32*H*W -> 1024*H/16*W/16
            model = slim.conv2d(model, 1024, [16, 16], padding='VALID', scope='conv2', stride=16)
            model = slim.conv2d_transpose(model, self.input_shape[2], [16, 16],
                                          stride=16, padding='VALID', activation_fn=None, scope='deconv_1')
            return model
Exemple #7
0
def fcn_model_vgg(inputs,
                  num_classes=21,
                  is_training=True,
                  dropout_keep_prob=0.8,
                  scope='vgg_16',
                  reuse=None):
    if not is_training:
        dropout_keep_prob = 1.0

    with tf.variable_scope(scope, reuse=reuse):
        net = slim.repeat(inputs, 2, slim.conv2d, 64, [3, 3], scope='conv1')
        net = slim.max_pool2d(net, [2, 2], scope='pool1')
        net = slim.repeat(net, 2, slim.conv2d, 128, [3, 3], scope='conv2')
        net = slim.max_pool2d(net, [2, 2], scope='pool2')
        net = slim.repeat(net, 3, slim.conv2d, 256, [3, 3], scope='conv3')
        net = slim.max_pool2d(net, [2, 2], scope='pool3')
        net = slim.repeat(net, 3, slim.conv2d, 512, [3, 3], scope='conv4')
        net = slim.max_pool2d(net, [2, 2], scope='pool4')
        net = slim.repeat(net, 3, slim.conv2d, 512, [3, 3], scope='conv5')
        net = slim.max_pool2d(net, [2, 2], scope='pool5')

        net = slim.conv2d_transpose(net,
                                    256,
                                    kernel_size=(3, 3),
                                    stride=(2, 2),
                                    scope="deconv1")
        net = slim.batch_norm(net, is_training=is_training)
        net = slim.dropout(net,
                           dropout_keep_prob,
                           is_training=is_training,
                           scope='dropout1')

        net = slim.conv2d_transpose(net,
                                    128,
                                    kernel_size=(3, 3),
                                    stride=(2, 2),
                                    scope="deconv2")
        net = slim.batch_norm(net, is_training=is_training)

        net = slim.conv2d_transpose(net,
                                    64,
                                    kernel_size=(3, 3),
                                    stride=(4, 4),
                                    scope="deconv3")
        net = slim.batch_norm(net, is_training=is_training)
        net = slim.dropout(net,
                           dropout_keep_prob,
                           is_training=is_training,
                           scope='dropout2')

        net = slim.conv2d_transpose(net,
                                    32,
                                    kernel_size=(3, 3),
                                    stride=(2, 2),
                                    scope="deconv4")
        preds = slim.conv2d(net,
                            num_classes, [2, 2],
                            activation_fn=None,
                            scope="conv6")

        return preds
Exemple #8
0
def build_pred(x_in, H, phase):
    '''
    This function builds the prediction model
    '''
    num_class = H['num_class']

    conv_kernel_1 = [1, 1]
    conv_kernel_3 = [3, 3]
    pool_kernel = [2, 2]
    pool_stride = 2

    early_feature = {}
    reuse = {'train': False, 'validate': True, 'test': False}[phase]

    with slim.arg_scope(argument_scope(H, phase)):
        scope_name = 'block_1'
        x_input = x_in
        num_outputs = 64
        with tf.variable_scope(scope_name, reuse=reuse):
            layer_1 = slim.conv2d(x_input, num_outputs, conv_kernel_3, scope='conv1')
            layer_2 = slim.conv2d(layer_1, num_outputs, conv_kernel_3, scope='conv2')
            early_feature[scope_name] = layer_2

        scope_name = 'block_2'
        x_input = slim.max_pool2d(layer_2)
        num_outputs = 128
        with tf.variable_scope(scope_name, reuse=reuse):
            layer_1 = slim.conv2d(x_input, num_outputs, conv_kernel_3, scope='conv1')
            layer_2 = slim.conv2d(layer_1, num_outputs, conv_kernel_3, scope='conv2')
            early_feature[scope_name] = layer_2

        scope_name = 'block_3'
        x_input = slim.max_pool2d(layer_2)
        num_outputs = 256
        with tf.variable_scope(scope_name, reuse=reuse):
            layer_1 = slim.conv2d(x_input, num_outputs, conv_kernel_3, scope='conv1')
            layer_2 = slim.conv2d(layer_1, num_outputs, conv_kernel_3, scope='conv2')
            early_feature[scope_name] = layer_2

        scope_name = 'block_4'
        x_input = slim.max_pool2d(layer_2)
        num_outputs = 512
        with tf.variable_scope(scope_name, reuse=reuse):
            layer_1 = slim.conv2d(x_input, num_outputs, conv_kernel_3, scope='conv1')
            layer_2 = slim.conv2d(layer_1, num_outputs, conv_kernel_3, scope='conv2')
            early_feature[scope_name] = layer_2

        scope_name = 'block_5'
        x_input = slim.max_pool2d(layer_2)
        num_outputs = 1024
        with tf.variable_scope(scope_name, reuse=reuse):
            layer_1 = slim.conv2d(x_input, num_outputs, conv_kernel_3, scope='conv1')
            layer_2 = slim.conv2d(layer_1, num_outputs, conv_kernel_3, scope='conv2')
            early_feature[scope_name] = layer_2

        scope_name = 'block_6'
        num_outputs = 512
        with tf.variable_scope(scope_name, reuse=reuse):
            trans_layer = slim.conv2d_transpose(
                layer_2, num_outputs, pool_kernel, pool_stride, scope='conv_trans')
            x_input = tf.concat([early_feature['block_4'], trans_layer], axis=3)
            layer_1 = slim.conv2d(x_input, num_outputs, conv_kernel_3, scope='conv1')
            layer_2 = slim.conv2d(layer_1, num_outputs, conv_kernel_3, scope='conv2')
            early_feature[scope_name] = layer_2

        scope_name = 'block_7'
        num_outputs = 256
        with tf.variable_scope(scope_name, reuse=reuse):
            trans_layer = slim.conv2d_transpose(
                layer_2, num_outputs, pool_kernel, pool_stride, scope='conv_trans')
            x_input = tf.concat([early_feature['block_3'], trans_layer], axis=3)
            layer_1 = slim.conv2d(x_input, num_outputs, conv_kernel_3, scope='conv1')
            layer_2 = slim.conv2d(layer_1, num_outputs, conv_kernel_3, scope='conv2')
            early_feature[scope_name] = layer_2

        scope_name = 'block_8'
        num_outputs = 128
        with tf.variable_scope(scope_name, reuse=reuse):
            trans_layer = slim.conv2d_transpose(
                layer_2, num_outputs, pool_kernel, pool_stride, scope='conv_trans')
            x_input = tf.concat([early_feature['block_2'], trans_layer], axis=3)
            layer_1 = slim.conv2d(x_input, num_outputs, conv_kernel_3, scope='conv1')
            layer_2 = slim.conv2d(layer_1, num_outputs, conv_kernel_3, scope='conv2')
            early_feature[scope_name] = layer_2

        scope_name = 'block_9'
        num_outputs = 64
        with tf.variable_scope(scope_name, reuse=reuse):
            trans_layer = slim.conv2d_transpose(
                layer_2, num_outputs, pool_kernel, pool_stride, scope='conv_trans')
            x_input = tf.concat([early_feature['block_1'], trans_layer], axis=3)
            layer_1 = slim.conv2d(x_input, num_outputs, conv_kernel_3, scope='conv1')
            layer_2 = slim.conv2d(layer_1, num_outputs, conv_kernel_3, scope='conv2')
            early_feature[scope_name] = layer_2

        scope_name = 'pred'
        with tf.variable_scope(scope_name, reuse=reuse):
            layer_1 = slim.conv2d(layer_2, 1, conv_kernel_1, scope='conv1',
                                  activation_fn=None, normalizer_fn=None)

            early_feature[scope_name] = layer_1

            # pred = tf.argmax(tf.nn.softmax(logits=layer_1), axis=3)
            pred = tf.sigmoid(layer_1)

        return tf.squeeze(layer_1), tf.squeeze(pred)
    def generate(self, inputs, is_training=False, name=''):
        """
        Defines graph for generate network
        :param inputs: tensor with shape [None, z_dim]
        :param is_training: boolean flag for batch normalization
        :param name: name of graph (apply same weights for different inputs)
        :return: generated image
        """
        norm_params = dict(self.normalizer_params.items() +
                           [('is_training', is_training)])
        outputs = inputs
        with tf.name_scope(name=name),\
             tf.variable_scope("generator", reuse=self.reuse),\
             slim.arg_scope([slim.conv2d_transpose],
                            kernel_size=[5, 5],
                            stride=2,
                            activation_fn=self.activation_fn,
                            normalizer_fn=self.normalizer_fn,
                            normalizer_params=norm_params,
                            padding='SAME'):

            with tf.variable_scope("projection"):
                outputs = slim.fully_connected(
                    inputs=outputs,
                    num_outputs=self.start_size * self.start_size *
                    self.channel_depths[0],
                    activation_fn=self.activation_fn,
                    normalizer_fn=self.normalizer_fn,
                    normalizer_params=norm_params)
                outputs = tf.reshape(outputs, [
                    -1, self.start_size, self.start_size,
                    self.channel_depths[0]
                ],
                                     name="projection_reshape")
                logging.debug("Projection: {}".format(outputs))

            for deconv_layer_i, deconv_layer in enumerate(
                    self.channel_depths[:-1]):

                with tf.variable_scope("deconv_{}".format(deconv_layer_i)):
                    outputs = slim.conv2d_transpose(inputs=outputs,
                                                    num_outputs=deconv_layer,
                                                    padding='SAME')
                    logging.debug("Deconv layer {}: {}".format(
                        deconv_layer_i, outputs))

            with tf.variable_scope("output_deconv"):
                outputs = slim.conv2d(inputs=outputs,
                                      num_outputs=self.channel_depths[-1],
                                      activation_fn=tf.nn.tanh,
                                      normalizer_fn=None,
                                      normalizer_params=None,
                                      stride=1,
                                      kernel_size=[5, 5],
                                      padding='SAME')
                logging.debug("Generator output: {}".format(outputs))

            tf.summary.image('generated_images',
                             tf.div(tf.add(outputs, 1.0), 2.0),
                             max_outputs=5)
            self.reuse = True
            self.variables = tf.get_collection(
                tf.GraphKeys.TRAINABLE_VARIABLES, scope='generator')
            return outputs
Exemple #10
0
def deconv2d(input_, output_dim, ks=4, s=2, stddev=0.02, name="deconv2d"):
    with tf.variable_scope(name):
        return slim.conv2d_transpose(input_, output_dim, ks, s, padding='SAME', activation_fn=None,
                                    weights_initializer=tf.truncated_normal_initializer(stddev=stddev),
                                    biases_initializer=None)
def deconv(inputs, rate, k, scope=""):
    scope = "deconv" + scope
    with tf.variable_scope(scope):
        rtn = slim.conv2d_transpose(inputs, 2, k, stride=rate)
    return rtn
Exemple #12
0
def STbaseline(inputs, outputs, loss_weight, labels):
    """
    Spatial stream based on VGG16
    Temporal stream based on Flownet simple
    """

    # Mean subtraction (BGR) for flying chairs
    mean = tf.constant([104.0, 117.0, 123.0], dtype=tf.float32, name="img_global_mean")
    # tf.tile(mean, [4,192,256,1])
    inputs = inputs - mean
    outputs = outputs - mean
    # Scaling to 0 ~ 1 or -0.4 ~ 0.6?
    inputs = tf.truediv(inputs, 255.0)
    outputs = tf.truediv(outputs, 255.0)

    # Add local response normalization (ACROSS_CHANNELS) for computing photometric loss
    inputs_norm = tf.nn.local_response_normalization(inputs, depth_radius=4, beta=0.7)
    outputs_norm = tf.nn.local_response_normalization(outputs, depth_radius=4, beta=0.7)

    with slim.arg_scope([slim.conv2d, slim.conv2d_transpose], 
                        activation_fn=tf.nn.elu):       # original use leaky ReLU, now we use elu
        # Contracting part
        Tconv1   = slim.conv2d(tf.concat(3, [inputs, outputs]), 64, [7, 7], stride=2, scope='Tconv1')
        Tconv2   = slim.conv2d(Tconv1, 128, [5, 5], stride=2, scope='Tconv2')
        Tconv3_1 = slim.conv2d(Tconv2, 256, [5, 5], stride=2, scope='Tconv3_1')
        Tconv3_2 = slim.conv2d(Tconv3_1, 256, [3, 3], scope='Tconv3_2')
        Tconv4_1 = slim.conv2d(Tconv3_2, 512, [3, 3], stride=2, scope='Tconv4_1')
        Tconv4_2 = slim.conv2d(Tconv4_1, 512, [3, 3], scope='Tconv4_2')
        Tconv5_1 = slim.conv2d(Tconv4_2, 512, [3, 3], stride=2, scope='Tconv5_1')
        Tconv5_2 = slim.conv2d(Tconv5_1, 512, [3, 3], scope='Tconv5_2')
        Tconv6_1 = slim.conv2d(Tconv5_2, 1024, [3, 3], stride=2, scope='Tconv6_1')
        Tconv6_2 = slim.conv2d(Tconv6_1, 1024, [3, 3], scope='Tconv6_2')

        # Hyper-params for computing unsupervised loss
        epsilon = 0.0001 
        alpha_c = 0.25
        alpha_s = 0.37
        lambda_smooth = 1.0
        FlowDeltaWeights = tf.constant([0,0,0,0,1,-1,0,0,0,0,0,0,0,1,0,0,-1,0], dtype=tf.float32, shape=[3,3,2,2], name="FlowDeltaWeights")
        scale = 2       # for deconvolution

        # Expanding part
        pr6 = slim.conv2d(Tconv6_2, 2, [3, 3], activation_fn=None, scope='pr6')
        h6 = pr6.get_shape()[1].value
        w6 = pr6.get_shape()[2].value
        pr6_input = tf.image.resize_bilinear(inputs_norm, [h6, w6])
        pr6_output = tf.image.resize_bilinear(outputs_norm, [h6, w6])
        flow_scale_6 = 0.3125    # (*20/64)
        loss6, _ = loss_interp(pr6, pr6_input, pr6_output, epsilon, alpha_c, alpha_s, lambda_smooth, flow_scale_6, FlowDeltaWeights)
        upconv5 = slim.conv2d_transpose(Tconv6_2, 512, [2*scale, 2*scale], stride=scale, scope='upconv5')
        pr6to5 = slim.conv2d_transpose(pr6, 2, [2*scale, 2*scale], stride=scale, activation_fn=None, scope='up_pr6to5')
        concat5 = tf.concat(3, [Tconv5_2, upconv5, pr6to5])

        pr5 = slim.conv2d(concat5, 2, [3, 3], activation_fn=None, scope='pr5')
        h5 = pr5.get_shape()[1].value
        w5 = pr5.get_shape()[2].value
        pr5_input = tf.image.resize_bilinear(inputs_norm, [h5, w5])
        pr5_output = tf.image.resize_bilinear(outputs_norm, [h5, w5])
        flow_scale_5 = 0.625    # (*20/32)
        loss5, _ = loss_interp(pr5, pr5_input, pr5_output, epsilon, alpha_c, alpha_s, lambda_smooth, flow_scale_5, FlowDeltaWeights)
        upconv4 = slim.conv2d_transpose(concat5, 256, [2*scale, 2*scale], stride=scale, scope='upconv4')
        pr5to4 = slim.conv2d_transpose(pr5, 2, [2*scale, 2*scale], stride=scale, activation_fn=None, scope='up_pr5to4')
        concat4 = tf.concat(3, [Tconv4_2, upconv4, pr5to4])

        pr4 = slim.conv2d(concat4, 2, [3, 3], activation_fn=None, scope='pr4')
        h4 = pr4.get_shape()[1].value
        w4 = pr4.get_shape()[2].value
        pr4_input = tf.image.resize_bilinear(inputs_norm, [h4, w4])
        pr4_output = tf.image.resize_bilinear(outputs_norm, [h4, w4])
        flow_scale_4 = 1.25    # (*20/16)
        loss4, _ = loss_interp(pr4, pr4_input, pr4_output, epsilon, alpha_c, alpha_s, lambda_smooth, flow_scale_4, FlowDeltaWeights)
        upconv3 = slim.conv2d_transpose(concat4, 128, [2*scale, 2*scale], stride=scale, scope='upconv3')
        pr4to3 = slim.conv2d_transpose(pr4, 2, [2*scale, 2*scale], stride=scale, activation_fn=None, scope='up_pr4to3')
        concat3 = tf.concat(3, [Tconv3_2, upconv3, pr4to3])

        pr3 = slim.conv2d(concat3, 2, [3, 3], activation_fn=None, scope='pr3')
        h3 = pr3.get_shape()[1].value
        w3 = pr3.get_shape()[2].value
        pr3_input = tf.image.resize_bilinear(inputs_norm, [h3, w3])
        pr3_output = tf.image.resize_bilinear(outputs_norm, [h3, w3])
        flow_scale_3 = 2.5    # (*20/8)
        loss3, _ = loss_interp(pr3, pr3_input, pr3_output, epsilon, alpha_c, alpha_s, lambda_smooth, flow_scale_3, FlowDeltaWeights)
        upconv2 = slim.conv2d_transpose(concat3, 64, [2*scale, 2*scale], stride=scale, scope='upconv2')
        pr3to2 = slim.conv2d_transpose(pr3, 2, [2*scale, 2*scale], stride=scale, activation_fn=None, scope='up_pr3to2')
        concat2 = tf.concat(3, [Tconv2, upconv2, pr3to2])

        pr2 = slim.conv2d(concat2, 2, [3, 3], activation_fn=None, scope='pr2')
        h2 = pr2.get_shape()[1].value
        w2 = pr2.get_shape()[2].value
        pr2_input = tf.image.resize_bilinear(inputs_norm, [h2, w2])
        pr2_output = tf.image.resize_bilinear(outputs_norm, [h2, w2])
        flow_scale_2 = 5.0    # (*20/4)
        loss2, _ = loss_interp(pr2, pr2_input, pr2_output, epsilon, alpha_c, alpha_s, lambda_smooth, flow_scale_2, FlowDeltaWeights)
        upconv1 = slim.conv2d_transpose(concat2, 32, [2*scale, 2*scale], stride=scale, scope='upconv1')
        pr2to1 = slim.conv2d_transpose(pr2, 2, [2*scale, 2*scale], stride=scale, activation_fn=None, scope='up_pr2to1')
        concat1 = tf.concat(3, [Tconv1, upconv1, pr2to1])

        pr1 = slim.conv2d(concat1, 2, [3, 3], activation_fn=None, scope='pr1')
        h1 = pr1.get_shape()[1].value
        w1 = pr1.get_shape()[2].value
        pr1_input = tf.image.resize_bilinear(inputs_norm, [h1, w1])
        pr1_output = tf.image.resize_bilinear(outputs_norm, [h1, w1])
        flow_scale_1 = 10.0    # (*20/2) 
        loss1, prev1 = loss_interp(pr1, pr1_input, pr1_output, epsilon, alpha_c, alpha_s, lambda_smooth, flow_scale_1, FlowDeltaWeights)
        
    with slim.arg_scope([slim.conv2d, slim.fully_connected], 
                        activation_fn=tf.nn.relu,
                        weights_initializer=tf.truncated_normal_initializer(0.0, 0.01),
                        weights_regularizer=slim.l2_regularizer(0.0005)):

        # conv1_1 = slim.conv2d(tf.concat(3, [inputs, outputs]), 64, [3, 3], scope='conv1_1')
        conv1_1 = slim.conv2d(inputs, 64, [3, 3], scope='conv1_1')
        conv1_2 = slim.conv2d(conv1_1, 64, [3, 3], scope='conv1_2')
        pool1 = slim.max_pool2d(conv1_2, [2, 2], scope='pool1')

        conv2_1 = slim.conv2d(pool1, 128, [3, 3], scope='conv2_1')
        conv2_2 = slim.conv2d(conv2_1, 128, [3, 3], scope='conv2_2')
        pool2 = slim.max_pool2d(conv2_2, [2, 2], scope='pool2')

        conv3_1 = slim.conv2d(pool2, 256, [3, 3], scope='conv3_1')
        conv3_2 = slim.conv2d(conv3_1, 256, [3, 3], scope='conv3_2')
        conv3_3 = slim.conv2d(conv3_2, 256, [3, 3], scope='conv3_3')
        pool3 = slim.max_pool2d(conv3_3, [2, 2], scope='pool3')

        conv4_1 = slim.conv2d(pool3, 512, [3, 3], scope='conv4_1')
        conv4_2 = slim.conv2d(conv4_1, 512, [3, 3], scope='conv4_2')
        conv4_3 = slim.conv2d(conv4_2, 512, [3, 3], scope='conv4_3')
        pool4 = slim.max_pool2d(conv4_3, [2, 2], scope='pool4')

        conv5_1 = slim.conv2d(pool4, 512, [3, 3], scope='conv5_1')
        conv5_2 = slim.conv2d(conv5_1, 512, [3, 3], scope='conv5_2')
        conv5_3 = slim.conv2d(conv5_2, 512, [3, 3], scope='conv5_3')
        pool5 = slim.max_pool2d(conv5_3, [2, 2], scope='pool5')

        # Incorporate temporal feature
        concatST = tf.concat(3, [pool5, Tconv5_2])
        poolST = slim.max_pool2d(concatST, [2, 2])
        # print poolST.get_shape()
        concat2ST = tf.concat(3, [poolST, Tconv6_2])
        # print concat2ST.get_shape()
        concatDR = slim.conv2d(concat2ST, 512, [1, 1])
        # print concatDR.get_shape()

        flatten5 = slim.flatten(concatDR, scope='flatten5')
        fc6 = slim.fully_connected(flatten5, 4096, scope='fc6')
        dropout6 = slim.dropout(fc6, 0.9, scope='dropout6')
        fc7 = slim.fully_connected(dropout6, 4096, scope='fc7')
        dropout7 = slim.dropout(fc7, 0.9, scope='dropout7')
        fc8 = slim.fully_connected(dropout7, 101, activation_fn=None, scope='fc8')
        prob = tf.nn.softmax(fc8)
        actionPredictions = tf.argmax(prob, 1)

        cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(fc8, labels)
        actionLoss = tf.reduce_mean(cross_entropy)

        # Adding intermediate losses
        all_loss = loss_weight[0]*loss1["total"] + loss_weight[1]*loss2["total"] + loss_weight[2]*loss3["total"] + \
                    loss_weight[3]*loss4["total"] + loss_weight[4]*loss5["total"] + loss_weight[5]*loss6["total"] + \
                    loss_weight[0]*actionLoss
        slim.losses.add_loss(all_loss)

        losses = [loss1, loss2, loss3, loss4, loss5, loss6, actionLoss]
        # pr1 = tf.mul(tf.constant(20.0), pr1)
        flows_all = [pr1*flow_scale_1, pr2*flow_scale_2, pr3*flow_scale_3, pr4*flow_scale_4, pr5*flow_scale_5, pr6*flow_scale_6]
        
        predictions = [prev1, actionPredictions]
        return losses, flows_all, predictions
Exemple #13
0
def build_heads(pyramid, ih, iw, num_classes, base_anchors, is_training=False, gt_boxes=None):
  """Build the 3-way outputs, i.e., class, box and mask in the pyramid
  Algo
  ----
  For each layer:
    1. Build anchor layer
    2. Process the results of anchor layer, decode the output into rois 
    3. Sample rois 
    4. Build roi layer
    5. Process the results of roi layer, decode the output into boxes
    6. Build the mask layer
    7. Build losses
  """
  outputs = {}
  #arg_scope = _extra_conv_arg_scope(activation_fn=None)
  arg_scope = _extra_conv_arg_scope_with_bn(activation_fn=None)
  my_sigmoid = None
  with slim.arg_scope(arg_scope):
    with tf.variable_scope('pyramid'):
        # for p in pyramid:
        outputs['rpn'] = {}
        for i in range(5, 1, -1):
          p = 'P%d'%i
          stride = 2 ** i
          
          ## rpn head
          shape = tf.shape(pyramid[p])
          height, width = shape[1], shape[2]
          rpn = slim.conv2d(pyramid[p], 256, [3, 3], stride=1, activation_fn=tf.nn.relu, scope='%s/rpn'%p)
          box = slim.conv2d(rpn, base_anchors * 4, [1, 1], stride=1, scope='%s/rpn/box' % p, \
                  weights_initializer=tf.truncated_normal_initializer(stddev=0.001), activation_fn=my_sigmoid)
          cls = slim.conv2d(rpn, base_anchors * 2, [1, 1], stride=1, scope='%s/rpn/cls' % p, \
                  weights_initializer=tf.truncated_normal_initializer(stddev=0.01))

          anchor_scales = [2 **(i-2), 2 ** (i-1), 2 **(i)]
          print("anchor_scales = " , anchor_scales)
          all_anchors = gen_all_anchors(height, width, stride, anchor_scales)
          outputs['rpn'][p]={'box':box, 'cls':cls, 'anchor':all_anchors}

        ## gather all rois
        # print (outputs['rpn'])
        rpn_boxes = [tf.reshape(outputs['rpn']['P%d'%p]['box'], [-1, 4]) for p in range(5, 1, -1)]  
        rpn_clses = [tf.reshape(outputs['rpn']['P%d'%p]['cls'], [-1, 1]) for p in range(5, 1, -1)]  
        rpn_anchors = [tf.reshape(outputs['rpn']['P%d'%p]['anchor'], [-1, 4]) for p in range(5, 1, -1)]  
        rpn_boxes = tf.concat(values=rpn_boxes, axis=0)
        rpn_clses = tf.concat(values=rpn_clses, axis=0)
        rpn_anchors = tf.concat(values=rpn_anchors, axis=0)

        outputs['rpn']['box'] = rpn_boxes
        outputs['rpn']['cls'] = rpn_clses
        outputs['rpn']['anchor'] = rpn_anchors
        # outputs['rpn'] = {'box': rpn_boxes, 'cls': rpn_clses, 'anchor': rpn_anchors}
        
        rpn_probs = tf.nn.softmax(tf.reshape(rpn_clses, [-1, 2]))
        rois, roi_clses, scores, = anchor_decoder(rpn_boxes, rpn_probs, rpn_anchors, ih, iw)
        # rois, scores, batch_inds = sample_rpn_outputs(rois, rpn_probs[:, 1])
        rois, scores, batch_inds, mask_rois, mask_scores, mask_batch_inds = \
                sample_rpn_outputs_with_gt(rois, rpn_probs[:, 1], gt_boxes, is_training=is_training)

        # if is_training:
        #     # rois, scores, batch_inds = _add_jittered_boxes(rois, scores, batch_inds, gt_boxes)
        #     rois, scores, batch_inds = _add_jittered_boxes(rois, scores, batch_inds, gt_boxes, jitter=0.2)
        
        outputs['roi'] = {'box': rois, 'score': scores}

        ## cropping regions
        [assigned_rois, assigned_batch_inds, assigned_layer_inds] = \
                assign_boxes(rois, [rois, batch_inds], [2, 3, 4, 5])

        outputs['assigned_rois'] = assigned_rois
        outputs['assigned_layer_inds'] = assigned_layer_inds

        cropped_rois = []
        ordered_rois = []
        pyramid_feature = []
        for i in range(5, 1, -1):
            print(i)
            p = 'P%d'%i
            splitted_rois = assigned_rois[i-2]
            batch_inds = assigned_batch_inds[i-2]
            cropped, boxes_in_crop = ROIAlign(pyramid[p], splitted_rois, batch_inds, stride=2**i,
                               pooled_height=14, pooled_width=14)
            # cropped = ROIAlign(pyramid[p], splitted_rois, batch_inds, stride=2**i,
            #                    pooled_height=14, pooled_width=14)
            cropped_rois.append(cropped)
            ordered_rois.append(splitted_rois)
            pyramid_feature.append(tf.transpose(pyramid[p],[0,3,1,2]))
            # if i is 5:
            #     outputs['tmp_0'] = tf.transpose(pyramid[p],[0,3,1,2])
            #     outputs['tmp_1'] = splitted_rois
            #     outputs['tmp_2'] = tf.transpose(cropped,[0,3,1,2])
            #     outputs['tmp_3'] = boxes_in_crop
            #     outputs['tmp_4'] = [ih, iw]
            
        cropped_rois = tf.concat(values=cropped_rois, axis=0)
        ordered_rois = tf.concat(values=ordered_rois, axis=0)


        outputs['ordered_rois'] = ordered_rois
        outputs['pyramid_feature'] = pyramid_feature

        outputs['roi']['cropped_rois'] = cropped_rois
        tf.add_to_collection('__CROPPED__', cropped_rois)

        ## refine head
        # to 7 x 7
        cropped_regions = slim.max_pool2d(cropped_rois, [3, 3], stride=2, padding='SAME')
        refine = slim.flatten(cropped_regions)
        refine = slim.fully_connected(refine, 1024, activation_fn=tf.nn.relu)
        refine = slim.dropout(refine, keep_prob=0.75, is_training=is_training)
        refine = slim.fully_connected(refine,  1024, activation_fn=tf.nn.relu)
        refine = slim.dropout(refine, keep_prob=0.75, is_training=is_training)
        cls2 = slim.fully_connected(refine, num_classes, activation_fn=None, 
                weights_initializer=tf.truncated_normal_initializer(stddev=0.05))
        box = slim.fully_connected(refine, num_classes*4, activation_fn=my_sigmoid, 
                weights_initializer=tf.truncated_normal_initializer(stddev=0.05))

        outputs['refined'] = {'box': box, 'cls': cls2}
        
        ## decode refine net outputs
        cls2_prob = tf.nn.softmax(cls2)
        final_boxes, classes, scores = \
                roi_decoder(box, cls2_prob, ordered_rois, ih, iw)

        #outputs['tmp_0'] = ordered_rois
        #outputs['tmp_1'] = assigned_rois
        #outputs['tmp_2'] = box
        #outputs['tmp_3'] = final_boxes
        #outputs['tmp_4'] = cls2_prob

        #outputs['final_boxes'] = {'box': final_boxes, 'cls': classes}
        outputs['final_boxes'] = {'box': final_boxes, 'cls': classes, 'prob': cls2_prob}
        ## for testing, maskrcnn takes refined boxes as inputs
        if not is_training:
          rois = final_boxes
          # [assigned_rois, assigned_batch_inds, assigned_layer_inds] = \
          #       assign_boxes(rois, [rois, batch_inds], [2, 3, 4, 5])
          for i in range(5, 1, -1):
            p = 'P%d'%i
            splitted_rois = assigned_rois[i-2]
            batch_inds = assigned_batch_inds[i-2]
            cropped, _ = ROIAlign(pyramid[p], splitted_rois, batch_inds, stride=2**i,
                               pooled_height=14, pooled_width=14)
            cropped_rois.append(cropped)
            ordered_rois.append(splitted_rois)
          cropped_rois = tf.concat(values=cropped_rois, axis=0)
          ordered_rois = tf.concat(values=ordered_rois, axis=0)
          
        ## mask head
        m = cropped_rois
        for _ in range(4):
            m = slim.conv2d(m, 256, [3, 3], stride=1, padding='SAME', activation_fn=tf.nn.relu)
        # to 28 x 28
        m = slim.conv2d_transpose(m, 256, 2, stride=2, padding='VALID', activation_fn=tf.nn.relu)
        tf.add_to_collection('__TRANSPOSED__', m)
        m = slim.conv2d(m, num_classes, [1, 1], stride=1, padding='VALID', activation_fn=None)
          
        # add a mask, given the predicted boxes and classes
        outputs['mask'] = {'mask':m, 'cls': classes, 'score': scores}
          
  return outputs
    def interface(self, input_x):
        with tf.variable_scope('residual_attention_network'):
            # resnet 头部结构,7*7,stride=2, 然后接一个2*2,stride=3的maxpool
            sc = arg_scope_.arg_scope(is_training=self.is_training)
            with slim.arg_scope(sc):
                conv1 = slim.conv2d(input_x,
                                    64, [7, 7],
                                    stride=2,
                                    padding='SAME',
                                    scope='conv')
                mpool1 = slim.max_pool2d(conv1, [3, 3],
                                         stride=2,
                                         padding='SAME',
                                         scope='maxpool')

            residual_out1 = self.residual_block.residual_block(
                mpool1, 64, scope_name='residual_block1')
            # 缩小为1/8->80*60
            residual_out2 = self.residual_block.residual_block(
                residual_out1, 128, stride=2, scope_name='residual_block2')
            # attention_stage1
            attention_out1 = self.attention_block_stage0.attention_block_stage0(
                residual_out2, 128, 1)

            # decode attention_out0
            # 上采样 变成1/2
            with slim.arg_scope(
                    arg_scope_.arg_scope(is_training=self.is_training)):
                decode_attention_out1 = slim.conv2d(attention_out1,
                                                    128, [1, 1],
                                                    stride=1,
                                                    scope='deconv1-1')
                decode_attention_out1 = slim.conv2d_transpose(
                    decode_attention_out1,
                    64, [3, 3],
                    stride=2,
                    scope='deconv1-2')
                decode_attention_out1 = slim.conv2d(decode_attention_out1,
                                                    64, [1, 1],
                                                    stride=1,
                                                    scope='deconv1-3')
                decode_attention_out1 = slim.conv2d_transpose(
                    decode_attention_out1,
                    1, [3, 3],
                    stride=2,
                    normalizer_fn=None,
                    activation_fn=None,
                    scope='deconv1-4')

            # 进行一步下采样
            # 缩小为1/16->40*30
            residual_out3 = self.residual_block.residual_block(
                attention_out1, 256, stride=2, scope_name='residual_block3')
            # attention_stage1
            # attention_out1_1 = self.attention_block_stage1.attention_block_stage1(residual_out1, 256, 1)
            attention_out2_2 = self.attention_block_stage1.attention_block_stage1(
                residual_out3, 256, 2)

            # decode attention_out2
            # 上采样 变成1/4=
            with slim.arg_scope(
                    arg_scope_.arg_scope(is_training=self.is_training)):
                decode_attention_out2 = slim.conv2d(attention_out2_2,
                                                    256, [1, 1],
                                                    stride=1,
                                                    scope='deconv2-1')
                decode_attention_out2 = slim.conv2d_transpose(
                    decode_attention_out2,
                    128, [3, 3],
                    stride=2,
                    scope='deconv2-2')
                decode_attention_out2 = slim.conv2d(decode_attention_out2,
                                                    128, [1, 1],
                                                    stride=1,
                                                    scope='deconv2-3')
                decode_attention_out2 = slim.conv2d_transpose(
                    decode_attention_out2,
                    1, [3, 3],
                    stride=2,
                    normalizer_fn=None,
                    activation_fn=None,
                    scope='deconv2-4')

            # # 进行一步下采样
            # residual_out2 = self.residual_block.residual_block(
            #     attention_out1_2, 512, stride=2, scope_name='residual_block3'
            # )
            # # attention_stage2
            # # attention_out2_1 = self.attention_block_stage2.attention_block_stage2(residual_out2, 512, 1)
            # # attention_out2_2 = self.attention_block_stage2.attention_block_stage2(attention_out2_1, 512, 2)
            # attention_out2_3 = self.attention_block_stage2.attention_block_stage2(residual_out2, 512, 3)
            #
            # # decode attention_out2
            # with slim.arg_scope(arg_scope_.arg_scope(is_training=self.is_training)):
            #     decode_attention_out2 = slim.conv2d_transpose(
            #         attention_out2_3, 64, [3, 3], stride=2, scope='deconv3-1'
            #     )
            #     decode_attention_out2 = slim.conv2d_transpose(
            #         decode_attention_out2, 64, [3, 3], stride=2, scope='deconv3-2'
            #     )
            #     decode_attention_out2 = slim.conv2d_transpose(
            #         decode_attention_out2, 64, [3, 3], stride=2, scope='deconv3-3'
            #     )
            #     decode_attention_out2 = slim.conv2d_transpose(
            #         decode_attention_out2, 1, [3, 3], stride=2,
            #         normalizer_fn=None, activation_fn=None, scope='deconv3-4'
            #     )

            # 30*23
            # 20*15
            residual_out4 = self.residual_block.residual_block(
                attention_out2_2, 512, stride=2, scope_name='residual_block4')
            residual_out5 = self.residual_block.residual_block(
                residual_out4, 512, scope_name='residual_block5')
            # 10*8
            residual_out6 = self.residual_block.residual_block(
                residual_out5, 1024, stride=2, scope_name='residual_block6')
            global_avg_out = tf.reduce_mean(residual_out6, [1, 2],
                                            name='global_avg_pool',
                                            keepdims=True)
            logits = slim.conv2d(global_avg_out,
                                 self.num_class, [1, 1],
                                 activation_fn=None,
                                 normalizer_fn=None,
                                 scope='logits')

            return decode_attention_out1, decode_attention_out2, logits
    def interface_resnet50(self, inputs, reuse=None, is_training=False):

        endpoints = {}
        with slim.arg_scope(resnet_arg_scope(use_batch_norm=True)):
            _, resnet_endpoints = resnet_v2_50(
                inputs,
                reuse=reuse,
                is_training=is_training,
            )

        endpoints['net1'] = resnet_endpoints[
            'resnet_v2_50/block1/unit_2/bottleneck_v2']  # 128*128 256
        endpoints['net2'] = resnet_endpoints[
            'resnet_v2_50/block2/unit_3/bottleneck_v2']  # 64*64 512
        endpoints['net3'] = resnet_endpoints[
            'resnet_v2_50/block3/unit_5/bottleneck_v2']  # 32*32 1024
        endpoints['net4'] = resnet_endpoints[
            'resnet_v2_50/block4/unit_3/bottleneck_v2']  # 16*16 2048

        with slim.arg_scope(
                self.fcn_arg_scope(is_training=is_training,
                                   normalizer_fn=None)):
            with tf.variable_scope('cloud_net',
                                   'cloud_net', [inputs],
                                   reuse=reuse):

                with tf.variable_scope('alpha_prediction'):
                    # alpha prediction
                    nets = resnet_endpoints[
                        'resnet_v2_50/block4']  # 64*64*2048
                    nets = slim.conv2d_transpose(
                        nets, 512, kernel_size=[3, 3],
                        stride=2) + resnet_endpoints[
                            'resnet_v2_50/block2/unit_2/bottleneck_v2']

                    nets = slim.conv2d_transpose(
                        nets, 256, kernel_size=[3, 3],
                        stride=2) + resnet_endpoints[
                            'resnet_v2_50/block1/unit_2/bottleneck_v2']

                    nets = slim.conv2d_transpose(
                        nets, 64, kernel_size=[3, 3],
                        stride=2) + resnet_endpoints['resnet_v2_50/conv1']

                    alpha_logits = slim.conv2d(nets,
                                               self.alpha_channel, [3, 3],
                                               scope='pred',
                                               activation_fn=None)

                with tf.variable_scope('reflectance_prediction'):
                    # reflectance prediction
                    nets = resnet_endpoints[
                        'resnet_v2_50/block4']  # 64*64*2048
                    nets = slim.conv2d_transpose(
                        nets, 512, kernel_size=[3, 3],
                        stride=2) + resnet_endpoints[
                            'resnet_v2_50/block2/unit_2/bottleneck_v2']

                    nets = slim.conv2d_transpose(
                        nets, 256, kernel_size=[3, 3],
                        stride=2) + resnet_endpoints[
                            'resnet_v2_50/block1/unit_2/bottleneck_v2']

                    nets = slim.conv2d_transpose(
                        nets, 64, kernel_size=[3, 3],
                        stride=2) + resnet_endpoints['resnet_v2_50/conv1']

                    reflectance_logits = slim.conv2d(nets,
                                                     self.reflectance_channel,
                                                     [3, 3],
                                                     scope='pred',
                                                     activation_fn=None)
        return alpha_logits, reflectance_logits
    def interface_unet(self, inputs, reuse=None, is_training=True):
        endpoints = {}
        with slim.arg_scope(self.fcn_arg_scope(is_training=is_training)):
            with tf.variable_scope('cloud_net',
                                   'cloud_net', [inputs],
                                   reuse=reuse):
                with tf.variable_scope('feature_exatraction'):
                    nets = slim.repeat(inputs, 2, slim.conv2d, 64,
                                       [3, 3])  # 508*508*64
                    endpoints['net1'] = nets
                    nets = slim.max_pool2d(nets, [2, 2])  # 254*254*64

                    nets = slim.repeat(nets, 2, slim.conv2d, 128,
                                       [3, 3])  # 250*250*128
                    endpoints['net2'] = nets
                    nets = slim.max_pool2d(nets, [2, 2])  # 125*125*128

                    nets = slim.repeat(nets, 2, slim.conv2d, 256,
                                       [3, 3])  # 121*121*256
                    endpoints['net3'] = nets
                    nets = slim.max_pool2d(nets, [2, 2])  # 61*61*256

                    nets = slim.repeat(nets, 2, slim.conv2d, 512,
                                       [3, 3])  # 57*57*512
                    endpoints['net4'] = nets
                    nets = slim.max_pool2d(nets, [2, 2])  # 29*29*512

                    nets = slim.repeat(nets, 2, slim.conv2d, 1024,
                                       [3, 3])  # 25*25*1024
                    endpoints['net5'] = nets

                with tf.variable_scope('alpha_prediction'):
                    nets = endpoints['net5']
                    nets = slim.conv2d_transpose(nets, 512, [3, 3],
                                                 stride=2)  # 50*50*512
                    nets = self.crop_and_concat(endpoints['net4'], nets)
                    nets = slim.repeat(nets, 2, slim.conv2d, 512,
                                       [3, 3])  # 46*46*512

                    nets = slim.conv2d_transpose(nets, 256, [3, 3],
                                                 stride=2)  # 92*92*256
                    nets = self.crop_and_concat(endpoints['net3'], nets)
                    nets = slim.repeat(nets, 2, slim.conv2d, 256,
                                       [3, 3])  # 88*88*256

                    nets = slim.conv2d_transpose(nets, 128, [3, 3],
                                                 stride=2)  # 176*176*128
                    nets = self.crop_and_concat(endpoints['net2'], nets)
                    nets = slim.repeat(nets, 2, slim.conv2d, 128,
                                       [3, 3])  # 172*172*128

                    nets = slim.conv2d_transpose(nets, 64, [3, 3],
                                                 stride=2)  # 344*344*64
                    nets = self.crop_and_concat(endpoints['net1'], nets)
                    nets = slim.repeat(nets, 2, slim.conv2d, 64,
                                       [3, 3])  # 340*340*64

                    logits = slim.conv2d(nets,
                                         self.alpha_channel, [3, 3],
                                         padding='SAME',
                                         activation_fn=None)
                    alpha_logits = tf.image.resize_images(
                        logits, [self.img_size, self.img_size])

                with tf.variable_scope('reflectance_prediction'):
                    nets = endpoints['net5']
                    nets = slim.conv2d_transpose(nets, 512, [3, 3],
                                                 stride=2)  # 50*50*512
                    nets = self.crop_and_concat(endpoints['net4'], nets)
                    nets = slim.repeat(nets, 2, slim.conv2d, 512,
                                       [3, 3])  # 46*46*512

                    nets = slim.conv2d_transpose(nets, 256, [3, 3],
                                                 stride=2)  # 92*92*256
                    nets = self.crop_and_concat(endpoints['net3'], nets)
                    nets = slim.repeat(nets, 2, slim.conv2d, 256,
                                       [3, 3])  # 88*88*256

                    nets = slim.conv2d_transpose(nets, 128, [3, 3],
                                                 stride=2)  # 176*176*128
                    nets = self.crop_and_concat(endpoints['net2'], nets)
                    nets = slim.repeat(nets, 2, slim.conv2d, 128,
                                       [3, 3])  # 172*172*128

                    nets = slim.conv2d_transpose(nets, 64, [3, 3],
                                                 stride=2)  # 344*344*64
                    nets = self.crop_and_concat(endpoints['net1'], nets)
                    nets = slim.repeat(nets, 2, slim.conv2d, 64,
                                       [3, 3])  # 340*340*64

                    logits = slim.conv2d(nets,
                                         self.reflectance_channel, [3, 3],
                                         padding='SAME',
                                         activation_fn=None)
                    reflectance_logits = tf.image.resize_images(
                        logits, [self.img_size, self.img_size])

            return alpha_logits, reflectance_logits
    def interface_vgg16(self, inputs, reuse=None, is_training=True):
        endpoints = {}
        with slim.arg_scope(vgg_arg_scope()):
            _, vgg_end_points = vgg_16(inputs,
                                       is_training=is_training,
                                       reuse=reuse,
                                       spatial_squeeze=False,
                                       num_classes=None)

        endpoints['net1'] = vgg_end_points['vgg_16/conv1/conv1_2']
        endpoints['net2'] = vgg_end_points['vgg_16/conv2/conv2_2']
        endpoints['net3'] = vgg_end_points['vgg_16/conv3/conv3_3']
        endpoints['net4'] = vgg_end_points['vgg_16/conv4/conv4_3']
        endpoints['net5'] = vgg_end_points['vgg_16/conv5/conv5_3']

        with slim.arg_scope(self.fcn_arg_scope(is_training=is_training)):
            with tf.variable_scope('cloud_net',
                                   'cloud_net', [inputs],
                                   reuse=reuse):
                with tf.variable_scope('feature_exatraction'):
                    nets = vgg_end_points['vgg_16/conv5/conv5_3']
                    nets = slim.conv2d(nets,
                                       512, [3, 3],
                                       stride=2,
                                       scope='pool5')

                    nets = slim.repeat(nets,
                                       2,
                                       slim.conv2d,
                                       512, [3, 3],
                                       scope='conv6')
                    endpoints['net6'] = nets
                    nets = slim.conv2d(nets,
                                       512, [3, 3],
                                       stride=2,
                                       scope='pool6')
                    nets = slim.conv2d(nets, 512, [3, 3], scope='conv7')
                    endpoints['net7'] = nets

                with tf.variable_scope('alpha_prediction'):
                    # alpha prediction
                    nets = endpoints['net7']
                    nets = slim.conv2d_transpose(
                        nets, 512, [3, 3], stride=2,
                        scope='conv_trans1') + endpoints['net6']
                    nets = slim.conv2d_transpose(
                        nets, 512, [3, 3], stride=2,
                        scope='conv_trans2') + endpoints['net5']
                    nets = slim.conv2d_transpose(
                        nets, 512, [3, 3], stride=2,
                        scope='conv_trans3') + endpoints['net4']
                    nets = slim.conv2d_transpose(
                        nets, 256, [3, 3], stride=2,
                        scope='conv_trans4') + endpoints['net3']
                    nets = slim.conv2d_transpose(
                        nets, 128, [3, 3], stride=2,
                        scope='conv_trans5') + endpoints['net2']
                    nets = slim.conv2d_transpose(
                        nets, 64, [3, 3], stride=2,
                        scope='conv_trans6') + endpoints['net1']
                    alpha_logits = slim.conv2d(nets,
                                               self.alpha_channel, [3, 3],
                                               scope='pred',
                                               activation_fn=None)

                with tf.variable_scope('reflectance_prediction'):
                    # reflectance prediction
                    nets = endpoints['net7']
                    nets = slim.conv2d_transpose(
                        nets, 512, [3, 3], stride=2,
                        scope='conv_trans1') + endpoints['net6']
                    nets = slim.conv2d_transpose(
                        nets, 512, [3, 3], stride=2,
                        scope='conv_trans2') + endpoints['net5']
                    nets = slim.conv2d_transpose(
                        nets, 512, [3, 3], stride=2,
                        scope='conv_trans3') + endpoints['net4']
                    nets = slim.conv2d_transpose(
                        nets, 256, [3, 3], stride=2,
                        scope='conv_trans4') + endpoints['net3']
                    nets = slim.conv2d_transpose(
                        nets, 128, [3, 3], stride=2,
                        scope='conv_trans5') + endpoints['net2']
                    nets = slim.conv2d_transpose(
                        nets, 64, [3, 3], stride=2,
                        scope='conv_trans6') + endpoints['net1']
                    reflectance_logits = slim.conv2d(nets,
                                                     self.reflectance_channel,
                                                     [3, 3],
                                                     scope='pred',
                                                     activation_fn=None)
        return alpha_logits, reflectance_logits
    def interface_cloudMattingNet(self, inputs, reuse=None, is_training=True):
        endpoints = {}
        with slim.arg_scope(self.fcn_arg_scope(is_training=is_training)):
            with tf.variable_scope('cloud_net',
                                   'cloud_net', [inputs],
                                   reuse=reuse):
                with tf.variable_scope('feature_exatraction'):
                    nets = slim.repeat(inputs,
                                       2,
                                       slim.conv2d,
                                       64, [3, 3],
                                       scope='conv1')
                    endpoints['net1'] = nets
                    nets = slim.conv2d(nets,
                                       64, [3, 3],
                                       stride=2,
                                       scope='pool1')

                    nets = slim.repeat(nets,
                                       2,
                                       slim.conv2d,
                                       128, [3, 3],
                                       scope='conv2')
                    endpoints['net2'] = nets
                    nets = slim.conv2d(nets,
                                       128, [3, 3],
                                       stride=2,
                                       scope='pool2')

                    nets = slim.repeat(nets,
                                       2,
                                       slim.conv2d,
                                       128, [3, 3],
                                       scope='conv3')
                    endpoints['net3'] = nets
                    nets = slim.conv2d(nets,
                                       128, [3, 3],
                                       stride=2,
                                       scope='pool3')

                    nets = slim.repeat(nets,
                                       2,
                                       slim.conv2d,
                                       256, [3, 3],
                                       scope='conv4')
                    endpoints['net4'] = nets
                    nets = slim.conv2d(nets,
                                       256, [3, 3],
                                       stride=2,
                                       scope='pool4')

                    nets = slim.repeat(nets,
                                       2,
                                       slim.conv2d,
                                       512, [3, 3],
                                       scope='conv5')
                    endpoints['net5'] = nets
                    nets = slim.conv2d(nets,
                                       512, [3, 3],
                                       stride=2,
                                       scope='pool5')

                    nets = slim.repeat(nets,
                                       2,
                                       slim.conv2d,
                                       512, [3, 3],
                                       scope='conv6')
                    endpoints['net6'] = nets
                    nets = slim.conv2d(nets,
                                       512, [3, 3],
                                       stride=2,
                                       scope='pool6')
                    nets = slim.conv2d(nets, 512, [3, 3], scope='conv7')
                    endpoints['net7'] = nets

                with tf.variable_scope('alpha_prediction'):
                    # alpha prediction
                    nets = endpoints['net7']
                    nets = slim.conv2d_transpose(
                        nets, 512, [3, 3], stride=2,
                        scope='conv_trans1') + endpoints['net6']
                    nets = slim.conv2d_transpose(
                        nets, 512, [3, 3], stride=2,
                        scope='conv_trans2') + endpoints['net5']
                    nets = slim.conv2d_transpose(
                        nets, 256, [3, 3], stride=2,
                        scope='conv_trans3') + endpoints['net4']
                    nets = slim.conv2d_transpose(
                        nets, 128, [3, 3], stride=2,
                        scope='conv_trans4') + endpoints['net3']
                    nets = slim.conv2d_transpose(
                        nets, 128, [3, 3], stride=2,
                        scope='conv_trans5') + endpoints['net2']
                    nets = slim.conv2d_transpose(
                        nets, 64, [3, 3], stride=2,
                        scope='conv_trans6') + endpoints['net1']
                    alpha_logits = slim.conv2d(nets,
                                               self.alpha_channel, [3, 3],
                                               scope='pred',
                                               activation_fn=None)

                with tf.variable_scope('reflectance_prediction'):
                    # reflectance prediction
                    nets = endpoints['net7']
                    nets = slim.conv2d_transpose(
                        nets, 512, [3, 3], stride=2,
                        scope='conv_trans1') + endpoints['net6']
                    nets = slim.conv2d_transpose(
                        nets, 512, [3, 3], stride=2,
                        scope='conv_trans2') + endpoints['net5']
                    nets = slim.conv2d_transpose(
                        nets, 256, [3, 3], stride=2,
                        scope='conv_trans3') + endpoints['net4']
                    nets = slim.conv2d_transpose(
                        nets, 128, [3, 3], stride=2,
                        scope='conv_trans4') + endpoints['net3']
                    nets = slim.conv2d_transpose(
                        nets, 128, [3, 3], stride=2,
                        scope='conv_trans5') + endpoints['net2']
                    nets = slim.conv2d_transpose(
                        nets, 64, [3, 3], stride=2,
                        scope='conv_trans6') + endpoints['net1']
                    reflectance_logits = slim.conv2d(nets,
                                                     self.reflectance_channel,
                                                     [3, 3],
                                                     scope='pred',
                                                     activation_fn=None)
        return alpha_logits, reflectance_logits
Exemple #19
0
def STsingle(inputs, outputs, loss_weight, labels):
    # Mean subtraction (BGR) for flying chairs
    mean = tf.constant([104.0, 117.0, 123.0], dtype=tf.float32, name="img_global_mean")
    # tf.tile(mean, [4,192,256,1])
    inputs = inputs - mean
    outputs = outputs - mean
    # Scaling to 0 ~ 1 or -0.4 ~ 0.6?
    inputs = tf.truediv(inputs, 255.0)
    outputs = tf.truediv(outputs, 255.0)

    # Add local response normalization (ACROSS_CHANNELS) for computing photometric loss
    inputs_norm = tf.nn.local_response_normalization(inputs, depth_radius=4, beta=0.7)
    outputs_norm = tf.nn.local_response_normalization(outputs, depth_radius=4, beta=0.7)

    with slim.arg_scope([slim.conv2d, slim.conv2d_transpose, slim.fully_connected], 
                        activation_fn=tf.nn.elu):

        '''
        Shared conv layers
        '''
        conv1_1 = slim.conv2d(tf.concat(3, [inputs, outputs]), 64, [3, 3], scope='conv1_1')
        # conv1_1 = slim.conv2d(inputs, 64, [3, 3], scope='conv1_1')
        conv1_2 = slim.conv2d(conv1_1, 64, [3, 3], scope='conv1_2')
        pool1 = slim.max_pool2d(conv1_2, [2, 2], scope='pool1')

        conv2_1 = slim.conv2d(pool1, 128, [3, 3], scope='conv2_1')
        conv2_2 = slim.conv2d(conv2_1, 128, [3, 3], scope='conv2_2')
        pool2 = slim.max_pool2d(conv2_2, [2, 2], scope='pool2')

        conv3_1 = slim.conv2d(pool2, 256, [3, 3], scope='conv3_1')
        conv3_2 = slim.conv2d(conv3_1, 256, [3, 3], scope='conv3_2')
        conv3_3 = slim.conv2d(conv3_2, 256, [3, 3], scope='conv3_3')
        pool3 = slim.max_pool2d(conv3_3, [2, 2], scope='pool3')

        conv4_1 = slim.conv2d(pool3, 512, [3, 3], scope='conv4_1')
        conv4_2 = slim.conv2d(conv4_1, 512, [3, 3], scope='conv4_2')
        conv4_3 = slim.conv2d(conv4_2, 512, [3, 3], scope='conv4_3')
        pool4 = slim.max_pool2d(conv4_3, [2, 2], scope='pool4')

        conv5_1 = slim.conv2d(pool4, 512, [3, 3], scope='conv5_1')
        conv5_2 = slim.conv2d(conv5_1, 512, [3, 3], scope='conv5_2')
        conv5_3 = slim.conv2d(conv5_2, 512, [3, 3], scope='conv5_3')
        pool5 = slim.max_pool2d(conv5_3, [2, 2], scope='pool5')
        # print pool5.get_shape()
        '''
        Spatial branch
        '''
        flatten5 = slim.flatten(pool5, scope='flatten5')
        fc6 = slim.fully_connected(flatten5, 4096, scope='fc6')
        dropout6 = slim.dropout(fc6, 0.9, scope='dropout6')
        fc7 = slim.fully_connected(dropout6, 4096, scope='fc7')
        dropout7 = slim.dropout(fc7, 0.9, scope='dropout7')
        fc8 = slim.fully_connected(dropout7, 101, activation_fn=None, scope='fc8')
        prob = tf.nn.softmax(fc8)
        actionPredictions = tf.argmax(prob, 1)

        cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(fc8, labels)
        actionLoss = tf.reduce_mean(cross_entropy)

        '''
        Temporal branch
        '''
        # Hyper-params for computing unsupervised loss
        epsilon = 0.0001 
        alpha_c = 0.3
        alpha_s = 0.3
        lambda_smooth = 0.8
        FlowDeltaWeights = tf.constant([0,0,0,0,1,-1,0,0,0,0,0,0,0,1,0,0,-1,0], dtype=tf.float32, shape=[3,3,2,2], name="FlowDeltaWeights")
        scale = 2       # for deconvolution

        # Expanding part
        pr5 = slim.conv2d(pool5, 2, [3, 3], activation_fn=None, scope='pr5')
        h5 = pr5.get_shape()[1].value
        w5 = pr5.get_shape()[2].value
        pr5_input = tf.image.resize_bilinear(inputs_norm, [h5, w5])
        pr5_output = tf.image.resize_bilinear(outputs_norm, [h5, w5])
        flow_scale_5 = 0.625    # (*20/32)
        loss5, _ = loss_interp(pr5, pr5_input, pr5_output, epsilon, alpha_c, alpha_s, lambda_smooth, flow_scale_5, FlowDeltaWeights)
        upconv4 = slim.conv2d_transpose(pool5, 256, [2*scale, 2*scale], stride=scale, scope='upconv4')
        pr5to4 = slim.conv2d_transpose(pr5, 2, [2*scale, 2*scale], stride=scale, activation_fn=None, scope='up_pr5to4')
        concat4 = tf.concat(3, [pool4, upconv4, pr5to4])

        pr4 = slim.conv2d(concat4, 2, [3, 3], activation_fn=None, scope='pr4')
        h4 = pr4.get_shape()[1].value
        w4 = pr4.get_shape()[2].value
        pr4_input = tf.image.resize_bilinear(inputs_norm, [h4, w4])
        pr4_output = tf.image.resize_bilinear(outputs_norm, [h4, w4])
        flow_scale_4 = 1.25    # (*20/16)
        loss4, _ = loss_interp(pr4, pr4_input, pr4_output, epsilon, alpha_c, alpha_s, lambda_smooth, flow_scale_4, FlowDeltaWeights)
        upconv3 = slim.conv2d_transpose(concat4, 128, [2*scale, 2*scale], stride=scale, scope='upconv3')
        pr4to3 = slim.conv2d_transpose(pr4, 2, [2*scale, 2*scale], stride=scale, activation_fn=None, scope='up_pr4to3')
        concat3 = tf.concat(3, [pool3, upconv3, pr4to3])

        pr3 = slim.conv2d(concat3, 2, [3, 3], activation_fn=None, scope='pr3')
        h3 = pr3.get_shape()[1].value
        w3 = pr3.get_shape()[2].value
        pr3_input = tf.image.resize_bilinear(inputs_norm, [h3, w3])
        pr3_output = tf.image.resize_bilinear(outputs_norm, [h3, w3])
        flow_scale_3 = 2.5    # (*20/8)
        loss3, _ = loss_interp(pr3, pr3_input, pr3_output, epsilon, alpha_c, alpha_s, lambda_smooth, flow_scale_3, FlowDeltaWeights)
        upconv2 = slim.conv2d_transpose(concat3, 64, [2*scale, 2*scale], stride=scale, scope='upconv2')
        pr3to2 = slim.conv2d_transpose(pr3, 2, [2*scale, 2*scale], stride=scale, activation_fn=None, scope='up_pr3to2')
        concat2 = tf.concat(3, [pool2, upconv2, pr3to2])

        pr2 = slim.conv2d(concat2, 2, [3, 3], activation_fn=None, scope='pr2')
        h2 = pr2.get_shape()[1].value
        w2 = pr2.get_shape()[2].value
        pr2_input = tf.image.resize_bilinear(inputs_norm, [h2, w2])
        pr2_output = tf.image.resize_bilinear(outputs_norm, [h2, w2])
        flow_scale_2 = 5.0    # (*20/4)
        loss2, _ = loss_interp(pr2, pr2_input, pr2_output, epsilon, alpha_c, alpha_s, lambda_smooth, flow_scale_2, FlowDeltaWeights)
        upconv1 = slim.conv2d_transpose(concat2, 32, [2*scale, 2*scale], stride=scale, scope='upconv1')
        pr2to1 = slim.conv2d_transpose(pr2, 2, [2*scale, 2*scale], stride=scale, activation_fn=None, scope='up_pr2to1')
        concat1 = tf.concat(3, [pool1, upconv1, pr2to1])

        pr1 = slim.conv2d(concat1, 2, [3, 3], activation_fn=None, scope='pr1')
        h1 = pr1.get_shape()[1].value
        w1 = pr1.get_shape()[2].value
        pr1_input = tf.image.resize_bilinear(inputs_norm, [h1, w1])
        pr1_output = tf.image.resize_bilinear(outputs_norm, [h1, w1])
        flow_scale_1 = 10.0    # (*20/2) 
        loss1, prev1 = loss_interp(pr1, pr1_input, pr1_output, epsilon, alpha_c, alpha_s, lambda_smooth, flow_scale_1, FlowDeltaWeights)
        
        # Adding intermediate losses
        all_loss = loss_weight[0]*loss1["total"] + loss_weight[1]*loss2["total"] + loss_weight[2]*loss3["total"] + \
                    loss_weight[3]*loss4["total"] + loss_weight[4]*loss5["total"] + loss_weight[0]*actionLoss
        slim.losses.add_loss(all_loss)

        losses = [loss1, loss2, loss3, loss4, loss5, actionLoss]
        flows_all = [pr1*flow_scale_1, pr2*flow_scale_2, pr3*flow_scale_3, pr4*flow_scale_4, pr5*flow_scale_5]
        
        predictions = [prev1, actionPredictions]
        return losses, flows_all, predictions
Exemple #20
0
def deconv2d(input_, output_dim, ks=4, s=2, stddev=0.02, name="deconv2d"):
    with tf.variable_scope(name):
        return slim.conv2d_transpose(input_, output_dim, ks, s, padding='SAME', activation_fn=None,
                                    weights_initializer=tf.truncated_normal_initializer(stddev=stddev),
                                    biases_initializer=None)
Exemple #21
0
def ppm(input, end_points, name=None):
    with tf.variable_scope('Pyramid_Pooling'):
        end_point = name + 'branch_0'
        net = slim.avg_pool2d(input, [10, 37],
                              stride=1,
                              padding='VALID',
                              scope=end_point)
        end_points[end_point] = net
        end_point = end_point + 'conv_0'
        net = slim.conv2d(net,
                          1, [1, 1],
                          stride=1,
                          padding='SAME',
                          scope=end_point)
        end_points[end_point] = net
        end_point = end_point + 'up'
        net = slim.conv2d_transpose(net,
                                    1, [10, 37],
                                    stride=2,
                                    padding='VALID',
                                    scope=end_point)
        end_points[end_point] = net

        end_point = name + 'branch_1'
        net = slim.avg_pool2d(input, [5, 18],
                              stride=[5, 18],
                              padding='VALID',
                              scope=end_point)
        end_points[end_point] = net
        end_point = name + 'conv_1'
        net = slim.conv2d(net,
                          1, [1, 1],
                          stride=1,
                          padding='SAME',
                          scope=end_point)
        end_points[end_point] = net

        end_point = name + 'branch_2'
        net = slim.avg_pool2d(input, [3, 12],
                              stride=[3, 12],
                              padding='VALID',
                              scope=end_point)
        end_points[end_point] = net
        end_point = name + 'conv_2'
        net = slim.conv2d(net,
                          1, [1, 1],
                          stride=1,
                          padding='SAME',
                          scope=end_point)
        end_points[end_point] = net

        end_point = name + 'branch_3'
        net = slim.avg_pool2d(input, [2, 7],
                              stride=[2, 7],
                              padding='VALID',
                              scope=end_point)
        end_points[end_point] = net
        end_point = name + 'conv_3'
        net = slim.conv2d(net,
                          1, [1, 1],
                          stride=1,
                          padding='SAME',
                          scope=end_point)
        end_points[end_point] = net
Exemple #22
0
def pose_exp_net(tgt_image,
                 src_image_stack,
                 do_exp=False,
                 is_training=True,
                 isReuse=None):
    inputs = tf.concat([tgt_image, src_image_stack], axis=3)
    num_source = 2
    with tf.variable_scope('pose_exp_net', reuse=isReuse) as sc:
        end_points_collection = sc.original_name_scope + '_end_points'
        with slim.arg_scope([slim.conv2d, slim.conv2d_transpose],
                            normalizer_fn=None,
                            weights_regularizer=slim.l2_regularizer(0.05),
                            activation_fn=tf.nn.relu,
                            outputs_collections=end_points_collection):
            # cnv1 to cnv5b are shared between pose and explainability prediction
            cnv1 = slim.conv2d(inputs, 16, [7, 7], stride=2, scope='cnv1')
            cnv2 = slim.conv2d(cnv1, 32, [5, 5], stride=2, scope='cnv2')
            cnv3 = slim.conv2d(cnv2, 64, [3, 3], stride=2, scope='cnv3')
            cnv4 = slim.conv2d(cnv3, 128, [3, 3], stride=2, scope='cnv4')
            cnv5 = slim.conv2d(cnv4, 256, [3, 3], stride=2, scope='cnv5')
            # Pose specific layers
            with tf.variable_scope('pose'):
                cnv6 = slim.conv2d(cnv5, 256, [3, 3], stride=2, scope='cnv6')
                cnv7 = slim.conv2d(cnv6, 256, [3, 3], stride=2, scope='cnv7')
                pose_pred = slim.conv2d(cnv7,
                                        6 * num_source, [1, 1],
                                        scope='pred',
                                        stride=1,
                                        normalizer_fn=None,
                                        activation_fn=None)
                pose_avg = tf.reduce_mean(pose_pred, [1, 2])
                # Empirically we found that scaling by a small constant
                # facilitates training.
                pose_final = 0.01 * tf.reshape(pose_avg, [-1, 6 * num_source])
            # Exp mask specific layers
            if do_exp:
                with tf.variable_scope('exp'):
                    upcnv5 = slim.conv2d_transpose(cnv5,
                                                   256, [3, 3],
                                                   stride=2,
                                                   scope='upcnv5')

                    upcnv4 = slim.conv2d_transpose(upcnv5,
                                                   128, [3, 3],
                                                   stride=2,
                                                   scope='upcnv4')
                    mask4 = slim.conv2d(upcnv4,
                                        num_source * 2, [3, 3],
                                        stride=1,
                                        scope='mask4',
                                        normalizer_fn=None,
                                        activation_fn=None)

                    upcnv3 = slim.conv2d_transpose(upcnv4,
                                                   64, [3, 3],
                                                   stride=2,
                                                   scope='upcnv3')
                    mask3 = slim.conv2d(upcnv3,
                                        num_source * 2, [3, 3],
                                        stride=1,
                                        scope='mask3',
                                        normalizer_fn=None,
                                        activation_fn=None)

                    upcnv2 = slim.conv2d_transpose(upcnv3,
                                                   32, [5, 5],
                                                   stride=2,
                                                   scope='upcnv2')
                    mask2 = slim.conv2d(upcnv2,
                                        num_source * 2, [5, 5],
                                        stride=1,
                                        scope='mask2',
                                        normalizer_fn=None,
                                        activation_fn=None)

                    upcnv1 = slim.conv2d_transpose(upcnv2,
                                                   16, [7, 7],
                                                   stride=2,
                                                   scope='upcnv1')
                    mask1 = slim.conv2d(upcnv1,
                                        num_source * 2, [7, 7],
                                        stride=1,
                                        scope='mask1',
                                        normalizer_fn=None,
                                        activation_fn=None)
            else:
                mask1 = None
                mask2 = None
                mask3 = None
                mask4 = None
            end_points = utils.convert_collection_to_dict(
                end_points_collection)
            return pose_final, [mask1, mask2, mask3, mask4], end_points
Exemple #23
0
 def deconv(self, x, num_out_layers, kernel_size, scale):
     p_x = tf.pad(x, [[0, 0], [1, 1], [1, 1], [0, 0]])
     conv = slim.conv2d_transpose(p_x, num_out_layers, kernel_size, scale,
                                  'SAME')
     return conv[:, 3:-1, 3:-1, :]
Exemple #24
0
def FLowNetSimple(data):
    # link for code used in this function: https://github.com/linjian93/tf-flownet/blob/master/train_flownet_simple.py
    concat1 = data
    conv1 = slim.conv2d(concat1, 64, [7, 7], 2, scope='conv1')
    conv2 = slim.conv2d(conv1, 128, [5, 5], 2, scope='conv2')
    conv3 = slim.conv2d(conv2, 256, [5, 5], 2, scope='conv3')
    conv3_1 = slim.conv2d(conv3, 256, [3, 3], 1, scope='conv3_1')
    conv4 = slim.conv2d(conv3_1, 512, [3, 3], 2, scope='conv4')
    conv4_1 = slim.conv2d(conv4, 512, [3, 3], 1, scope='conv4_1')
    conv5 = slim.conv2d(conv4_1, 512, [3, 3], 2, scope='conv5')
    conv5_1 = slim.conv2d(conv5, 512, [3, 3], 1, scope='conv5_1')
    conv6 = slim.conv2d(conv5_1, 1024, [3, 3], 2, scope='conv6')
    conv6_1 = slim.conv2d(conv6, 1024, [3, 3], 1, scope='conv6_1')
    predict6 = slim.conv2d(conv6_1,
                           2, [3, 3],
                           1,
                           activation_fn=None,
                           scope='pred6')
    # 12 * 16 flow
    deconv5 = slim.conv2d_transpose(conv6_1, 512, [4, 4], 2, scope='deconv5')
    deconvflow6 = slim.conv2d_transpose(predict6,
                                        2, [4, 4],
                                        2,
                                        'SAME',
                                        scope='deconvflow6')
    concat5 = tf.concat((conv5_1, deconv5, deconvflow6),
                        axis=3,
                        name='concat5')
    predict5 = slim.conv2d(concat5,
                           2, [3, 3],
                           1,
                           'SAME',
                           activation_fn=None,
                           scope='predict5')
    # 24 * 32 flow
    deconv4 = slim.conv2d_transpose(concat5,
                                    256, [4, 4],
                                    2,
                                    'SAME',
                                    scope='deconv4')
    deconvflow5 = slim.conv2d_transpose(predict5,
                                        2, [4, 4],
                                        2,
                                        'SAME',
                                        scope='deconvflow5')
    concat4 = tf.concat((conv4_1, deconv4, deconvflow5),
                        axis=3,
                        name='concat4')
    predict4 = slim.conv2d(concat4,
                           2, [3, 3],
                           1,
                           'SAME',
                           activation_fn=None,
                           scope='predict4')
    # 48 * 64 flow
    deconv3 = slim.conv2d_transpose(concat4,
                                    128, [4, 4],
                                    2,
                                    'SAME',
                                    scope='deconv3')
    deconvflow4 = slim.conv2d_transpose(predict4,
                                        2, [4, 4],
                                        2,
                                        'SAME',
                                        scope='deconvflow4')
    concat3 = tf.concat((conv3_1, deconv3, deconvflow4),
                        axis=3,
                        name='concat3')
    predict3 = slim.conv2d(concat3,
                           2, [3, 3],
                           1,
                           'SAME',
                           activation_fn=None,
                           scope='predict3')
    # 96 * 128 flow
    deconv2 = slim.conv2d_transpose(concat3,
                                    64, [4, 4],
                                    2,
                                    'SAME',
                                    scope='deconv2')
    deconvflow3 = slim.conv2d_transpose(predict3,
                                        2, [4, 4],
                                        2,
                                        'SAME',
                                        scope='deconvflow3')
    concat2 = tf.concat((conv2, deconv2, deconvflow3), axis=3, name='concat2')
    predict2 = slim.conv2d(concat2,
                           2, [3, 3],
                           1,
                           'SAME',
                           activation_fn=None,
                           scope='predict2')
    # 192 * 256 flow
    deconv1 = slim.conv2d_transpose(concat2,
                                    64, [4, 4],
                                    2,
                                    'SAME',
                                    scope='deconv1')
    deconvflow2 = slim.conv2d_transpose(predict2,
                                        2, [4, 4],
                                        2,
                                        'SAME',
                                        scope='deconvflow2')
    concat1 = tf.concat((conv1, deconv1, deconvflow2), axis=3, name='concat1')
    predict1 = slim.conv2d(concat1,
                           2, [3, 3],
                           1,
                           'SAME',
                           activation_fn=None,
                           scope='predict1')
    return (predict1, predict3, predict2, predict4, predict5, predict6)
def net_structure(img1, img2, boundary1, boundary2):
    with slim.arg_scope([slim.conv2d, slim.conv2d_transpose],
                        # He (aka MSRA) weight initialization
                        weights_initializer=slim.variance_scaling_initializer(),
                        activation_fn=LeakyReLU,
                        # We will do our own padding to match the original Caffe code
                        padding='VALID'):
        weights_regularizer = slim.l2_regularizer(weight_decay)
        combination_a = tf.concat([img1, boundary1], axis=3)
        combination_b = tf.concat([img2, boundary2], axis=3)
        with slim.arg_scope([slim.conv2d], weights_regularizer=weights_regularizer):
            with slim.arg_scope([slim.conv2d], stride=2):
                conv_a_1 = slim.conv2d(pad(combination_a, 3), 64, 7, scope='conv1')
                conv_a_2 = slim.conv2d(pad(conv_a_1, 2), 128, 5, scope='conv2')
                conv_a_3 = slim.conv2d(pad(conv_a_2, 2), 256, 5, scope='conv3')

                conv_b_1 = slim.conv2d(pad(combination_b, 3), 64, 7, scope='conv1', reuse=True)
                conv_b_2 = slim.conv2d(pad(conv_b_1, 2), 128, 5, scope='conv2', reuse=True)
                conv_b_3 = slim.conv2d(pad(conv_b_2, 2), 256, 5, scope='conv3', reuse=True)

                # Compute cross correlation with leaky relu activation
                cc = correlation.correlation(conv_a_3, conv_b_3, 1, 20, 1, 2, 20)
                cc_relu = LeakyReLU(cc)

            # Combine cross correlation results with convolution of feature map A
            netA_conv = slim.conv2d(conv_a_3, 32, 1, scope='conv_redir')
            # Concatenate along the channels axis
            net = tf.concat([netA_conv, cc_relu], axis=3)

            conv3_1 = slim.conv2d(pad(net), 256, 3, scope='conv3_1')
            with slim.arg_scope([slim.conv2d], num_outputs=512, kernel_size=3):
                conv4 = slim.conv2d(pad(conv3_1), stride=2, scope='conv4')
                conv4_1 = slim.conv2d(pad(conv4), scope='conv4_1')
                conv5 = slim.conv2d(pad(conv4_1), stride=2, scope='conv5')
                conv5_1 = slim.conv2d(pad(conv5), scope='conv5_1')
            conv6 = slim.conv2d(pad(conv5_1), 1024, 3, stride=2, scope='conv6')
            conv6_1 = slim.conv2d(pad(conv6), 1024, 3, scope='conv6_1')

            """ START: Refinement Network """
            with slim.arg_scope([slim.conv2d_transpose], biases_initializer=None):
                predict_flow6 = slim.conv2d(pad(conv6_1), 2, 3,
                                            scope='predict_flow6',
                                            activation_fn=None)
                deconv5 = antipad(slim.conv2d_transpose(conv6_1, 512, 4,
                                                        stride=2,
                                                        scope='deconv5'))
                upsample_flow6to5 = antipad(slim.conv2d_transpose(predict_flow6, 2, 4,
                                                                  stride=2,
                                                                  scope='upsample_flow6to5',
                                                                  activation_fn=None))
                concat5 = tf.concat([conv5_1, deconv5, upsample_flow6to5], axis=3)

                predict_flow5 = slim.conv2d(pad(concat5), 2, 3,
                                            scope='predict_flow5',
                                            activation_fn=None)
                deconv4 = antipad(slim.conv2d_transpose(concat5, 256, 4,
                                                        stride=2,
                                                        scope='deconv4'))
                upsample_flow5to4 = antipad(slim.conv2d_transpose(predict_flow5, 2, 4,
                                                                  stride=2,
                                                                  scope='upsample_flow5to4',
                                                                  activation_fn=None))
                concat4 = tf.concat([conv4_1, deconv4, upsample_flow5to4], axis=3)

                predict_flow4 = slim.conv2d(pad(concat4), 2, 3,
                                            scope='predict_flow4',
                                            activation_fn=None)
                deconv3 = antipad(slim.conv2d_transpose(concat4, 128, 4,
                                                        stride=2,
                                                        scope='deconv3'))
                upsample_flow4to3 = antipad(slim.conv2d_transpose(predict_flow4, 2, 4,
                                                                  stride=2,
                                                                  scope='upsample_flow4to3',
                                                                  activation_fn=None))
                concat3 = tf.concat([conv3_1, deconv3, upsample_flow4to3], axis=3)

                predict_flow3 = slim.conv2d(pad(concat3), 2, 3,
                                            scope='predict_flow3',
                                            activation_fn=None)
                deconv2 = antipad(slim.conv2d_transpose(concat3, 64, 4,
                                                        stride=2,
                                                        scope='deconv2'))
                upsample_flow3to2 = antipad(slim.conv2d_transpose(predict_flow3, 2, 4,
                                                                  stride=2,
                                                                  scope='upsample_flow3to2',
                                                                  activation_fn=None))
                concat2 = tf.concat([conv_a_2, deconv2, upsample_flow3to2], axis=3)

                predict_flow2 = slim.conv2d(pad(concat2), 2, 3,
                                            scope='predict_flow2',
                                            activation_fn=None)
            """ END: Refinement Network """

            '''new loss'''
            # target_height, target_width = int(predict_flow2.shape[1].value), int(predict_flow2.shape[2].value)
            # predict_flow6 = tf.image.resize_bilinear(predict_flow6,
            #                                          tf.stack([target_height, target_width]),
            #                                          align_corners=True)
            # predict_flow5 = tf.image.resize_bilinear(predict_flow5,
            #                                          tf.stack([target_height, target_width]),
            #                                          align_corners=True)
            # predict_flow4 = tf.image.resize_bilinear(predict_flow4,
            #                                          tf.stack([target_height, target_width]),
            #                                          align_corners=True)
            # predict_flow3 = tf.image.resize_bilinear(predict_flow3,
            #                                          tf.stack([target_height, target_width]),
            #                                          align_corners=True)
            # predict = tf.concat([predict_flow5, predict_flow4, predict_flow3, predict_flow2], axis=3)
            # # flow = predict * 20.0
            # flow_temp0 = slim.conv2d(pad(predict), num_outputs=2, kernel_size=2, stride=1, scope='flow_temp0')
            # flow_temp = tf.image.resize_bilinear(flow_temp0,
            #                                      tf.stack([img_height, img_width]),
            #                                      align_corners=True)
            # flow = flow_temp * 20.0

            # origin loss compute

            flow = predict_flow2 * 20.0
            # TODO: Look at Accum (train) or Resample (deploy) to see if we need to do something different
            flow = tf.image.resize_bilinear(flow,
                                            tf.stack([img_height, img_width]),
                                            align_corners=True)

            return {
                'predict_flow6': predict_flow6,
                'predict_flow5': predict_flow5,
                'predict_flow4': predict_flow4,
                'predict_flow3': predict_flow3,
                'predict_flow2': predict_flow2,
                'flow': flow,
            }
Exemple #26
0
    def create_generator(self,
                         z,
                         y,
                         scope_name,
                         is_training=True,
                         reuse=False):
        with tf.variable_scope(scope_name) as scope:
            if reuse:
                scope.reuse_variables()

            batch_norm_params = {
                # 'decay': 0.999,
                'decay': 0.9,  # also known as momentum, they are the same
                'updates_collections': None,
                # 'epsilon': 0.001,
                'epsilon': 1e-5,
                'scale': True,
                'is_training': is_training,
                'scope': 'batch_norm',
            }

            # first argument is where to apply these
            with arg_scope([
                    layers.conv2d, layers.conv2d_transpose,
                    layers.fully_connected
            ],
                           normalizer_fn=layers.batch_norm,
                           normalizer_params=batch_norm_params,
                           weights_initializer=layers.xavier_initializer(
                               uniform=False),
                           biases_initializer=tf.constant_initializer(0.0)):
                # taken from https://github.com/carpedm20/DCGAN-tensorflow/blob/master/model.py
                s_h, s_w = self.image_size[1], self.image_size[0]
                s_h2, s_h4 = int(s_h / 2), int(s_h / 4)
                s_w2, s_w4 = int(s_w / 2), int(s_w / 4)

                yb = tf.reshape(y, [self.batch_size, 1, 1, self.y_dim])
                z = tf.concat([z, y], 1)

                h0 = slim.fully_connected(
                    z,
                    num_outputs=self.gfc_dim,
                    scope='g_h0_lin',
                    activation_fn=slim.nn.relu,
                )

                h0 = tf.concat([h0, y], 1)

                h1 = slim.fully_connected(
                    h0,
                    num_outputs=self.gf_dim * 2 * s_h4 * s_w4,
                    scope='g_h1_lin',
                    activation_fn=slim.nn.relu,
                )

                h1 = tf.reshape(h1,
                                [self.batch_size, s_h4, s_w4, self.gf_dim * 2])

                h1 = conv_cond_concat(h1, yb)

                h2 = slim.conv2d_transpose(
                    h1,
                    num_outputs=self.gf_dim * 2,
                    scope='g_h2',
                    kernel_size=[5, 5],
                    stride=2,
                    activation_fn=slim.nn.relu,
                )

                h2 = conv_cond_concat(h2, yb)

                h3 = slim.conv2d_transpose(h2,
                                           num_outputs=self.c_dim,
                                           scope='g_h3',
                                           kernel_size=[5, 5],
                                           stride=2,
                                           normalizer_fn=None,
                                           activation_fn=slim.nn.sigmoid)

                return h3
 def generator(self, inputs, content_extractor_layers, reuse=False):
     # inputs: (batch, 1, 1, 128)
     with tf.variable_scope('generator', reuse=reuse):
         with slim.arg_scope([slim.conv2d_transpose],
                             padding='SAME',
                             activation_fn=None,
                             stride=2,
                             weights_initializer=tf.contrib.layers.
                             xavier_initializer()):
             with slim.arg_scope([slim.batch_norm],
                                 decay=0.95,
                                 center=True,
                                 scale=True,
                                 activation_fn=tf.nn.relu,
                                 is_training=(self.mode == 'train')):
                 with slim.arg_scope([slim.conv2d],
                                     padding='SAME',
                                     activation_fn=None,
                                     stride=1,
                                     weights_initializer=tf.contrib.layers.
                                     xavier_initializer()):
                     net = slim.conv2d_transpose(
                         inputs,
                         512, [4, 4],
                         padding='VALID',
                         scope='conv_transpose1_1'
                     )  # (batch_size, 4, 4, 512)
                     net = slim.batch_norm(net, scope='bn1_1')
                     net = slim.conv2d(net,
                                       512, [3, 3],
                                       scope='conv_transpose1_2'
                                       )  # (batch_size, 4, 4, 512)
                     net = slim.batch_norm(net, scope='bn1_2')
                     concat = tf.concat(
                         3, (net, content_extractor_layers['conv4_1']))
                     net = slim.conv2d_transpose(
                         concat, 256, [3, 3], scope='conv_transpose2_1'
                     )  # (batch_size, 8, 8, 256)
                     net = slim.batch_norm(net, scope='bn2')
                     net = slim.conv2d(net,
                                       256, [3, 3],
                                       scope='conv_transpose2_2'
                                       )  # (batch_size, 8, 8, 256)
                     net = slim.batch_norm(net, scope='bn2_2')
                     concat = tf.concat(
                         3, (net, content_extractor_layers['conv3_1']))
                     net = slim.conv2d_transpose(
                         concat, 128, [3, 3], scope='conv_transpose3_1'
                     )  # (batch_size, 16, 16, 128)
                     net = slim.batch_norm(net, scope='bn3')
                     net = slim.conv2d(net,
                                       128, [3, 3],
                                       scope='conv_transpose3_2'
                                       )  # (batch_size, 16, 16, 128)
                     net = slim.batch_norm(net, scope='bn3_2')
                     concat = tf.concat(
                         3, (net, content_extractor_layers['conv2_1']))
                     net = slim.conv2d_transpose(
                         concat,
                         3, [3, 3],
                         activation_fn=tf.nn.tanh,
                         scope='conv_transpose4')  # (batch_size, 32, 32, 3)
                     return net
Exemple #28
0
def deconv(x, *args, pad=1, **kwargs):
    with slim.arg_scope([slim.conv2d, slim.conv2d_transpose], padding='VALID'):
        x = padding(x, pad)
        return slim.conv2d_transpose(x, *args, **kwargs)
Exemple #29
0
def VGG16(photo_source, photo_target, geo_source, geo_target, loss_weight):

    # Add local response normalization (ACROSS_CHANNELS) for computing photometric loss
    inputs_norm = tf.nn.local_response_normalization(geo_source,
                                                     depth_radius=4,
                                                     beta=0.7)
    outputs_norm = tf.nn.local_response_normalization(geo_target,
                                                      depth_radius=4,
                                                      beta=0.7)

    with slim.arg_scope([slim.conv2d, slim.conv2d_transpose],
                        weights_initializer=initializers.xavier_initializer(),
                        weights_regularizer=None,
                        biases_initializer=init_ops.zeros_initializer,
                        biases_regularizer=None,
                        activation_fn=tf.nn.elu
                        ):  # original use leaky ReLU, now we use elu

        conv1_1 = slim.conv2d(tf.concat(3, [photo_source, photo_target]),
                              64, [3, 3],
                              scope='conv1_1')
        conv1_2 = slim.conv2d(conv1_1, 64, [3, 3], scope='conv1_2')
        pool1 = slim.max_pool2d(conv1_2, [2, 2], scope='pool1')

        conv2_1 = slim.conv2d(pool1, 128, [3, 3], scope='conv2_1')
        conv2_2 = slim.conv2d(conv2_1, 128, [3, 3], scope='conv2_2')
        pool2 = slim.max_pool2d(conv2_2, [2, 2], scope='pool2')

        conv3_1 = slim.conv2d(pool2, 256, [3, 3], scope='conv3_1')
        conv3_2 = slim.conv2d(conv3_1, 256, [3, 3], scope='conv3_2')
        conv3_3 = slim.conv2d(conv3_2, 256, [3, 3], scope='conv3_3')
        pool3 = slim.max_pool2d(conv3_3, [2, 2], scope='pool3')

        conv4_1 = slim.conv2d(pool3, 512, [3, 3], scope='conv4_1')
        conv4_2 = slim.conv2d(conv4_1, 512, [3, 3], scope='conv4_2')
        conv4_3 = slim.conv2d(conv4_2, 512, [3, 3], scope='conv4_3')
        pool4 = slim.max_pool2d(conv4_3, [2, 2], scope='pool4')

        conv5_1 = slim.conv2d(pool4, 512, [3, 3], scope='conv5_1')
        conv5_2 = slim.conv2d(conv5_1, 512, [3, 3], scope='conv5_2')
        conv5_3 = slim.conv2d(conv5_2, 512, [3, 3], scope='conv5_3')
        pool5 = slim.max_pool2d(conv5_3, [2, 2], scope='pool5')

        # Hyper-params for computing unsupervised loss
        epsilon = 0.0001
        alpha_c = 0.25
        alpha_s = 0.37
        lambda_smooth = 1.0
        scale = 2  # for deconvolution

        deltaWeights = {}
        # Calculating flow derivatives
        flow_width = tf.constant([[0, 0, 0], [0, 1, -1], [0, 0, 0]],
                                 tf.float32)
        flow_width_filter = tf.reshape(flow_width, [3, 3, 1, 1])
        flow_width_filter = tf.tile(flow_width_filter, [1, 1, 2, 1])
        flow_height = tf.constant([[0, 0, 0], [0, 1, 0], [0, -1, 0]],
                                  tf.float32)
        flow_height_filter = tf.reshape(flow_height, [3, 3, 1, 1])
        flow_height_filter = tf.tile(flow_height_filter, [1, 1, 2, 1])
        deltaWeights["flow_width_filter"] = flow_width_filter
        deltaWeights["flow_height_filter"] = flow_height_filter

        needImageGradients = False
        deltaWeights["needImageGradients"] = needImageGradients
        if needImageGradients:
            # Calculating image derivatives
            sobel_x = tf.constant([[-1, 0, 1], [-2, 0, 2], [-1, 0, 1]],
                                  tf.float32)
            sobel_x_filter = tf.reshape(sobel_x, [3, 3, 1, 1])
            sobel_y_filter = tf.transpose(sobel_x_filter, [1, 0, 2, 3])
            deltaWeights["sobel_x_filter"] = sobel_x_filter
            deltaWeights["sobel_y_filter"] = sobel_y_filter

        # Expanding part
        pr5 = slim.conv2d(pool5, 2, [3, 3], activation_fn=None, scope='pr5')
        h5 = pr5.get_shape()[1].value
        w5 = pr5.get_shape()[2].value
        pr5_input = tf.image.resize_bilinear(inputs_norm, [h5, w5])
        pr5_output = tf.image.resize_bilinear(outputs_norm, [h5, w5])
        flow_scale_5 = 0.625  # (*20/32)
        loss5, _ = loss_interp(pr5, pr5_input, pr5_output, epsilon, alpha_c,
                               alpha_s, lambda_smooth, flow_scale_5,
                               deltaWeights)
        upconv4 = slim.conv2d_transpose(pool5,
                                        256, [2 * scale, 2 * scale],
                                        stride=scale,
                                        scope='upconv4')
        pr5to4 = slim.conv2d_transpose(pr5,
                                       2, [2 * scale, 2 * scale],
                                       stride=scale,
                                       activation_fn=None,
                                       scope='up_pr5to4')
        concat4 = tf.concat(3, [pool4, upconv4, pr5to4])

        pr4 = slim.conv2d(concat4, 2, [3, 3], activation_fn=None, scope='pr4')
        h4 = pr4.get_shape()[1].value
        w4 = pr4.get_shape()[2].value
        pr4_input = tf.image.resize_bilinear(inputs_norm, [h4, w4])
        pr4_output = tf.image.resize_bilinear(outputs_norm, [h4, w4])
        flow_scale_4 = 1.25  # (*20/16)
        loss4, _ = loss_interp(pr4, pr4_input, pr4_output, epsilon, alpha_c,
                               alpha_s, lambda_smooth, flow_scale_4,
                               deltaWeights)
        upconv3 = slim.conv2d_transpose(concat4,
                                        128, [2 * scale, 2 * scale],
                                        stride=scale,
                                        scope='upconv3')
        pr4to3 = slim.conv2d_transpose(pr4,
                                       2, [2 * scale, 2 * scale],
                                       stride=scale,
                                       activation_fn=None,
                                       scope='up_pr4to3')
        concat3 = tf.concat(3, [pool3, upconv3, pr4to3])

        pr3 = slim.conv2d(concat3, 2, [3, 3], activation_fn=None, scope='pr3')
        h3 = pr3.get_shape()[1].value
        w3 = pr3.get_shape()[2].value
        pr3_input = tf.image.resize_bilinear(inputs_norm, [h3, w3])
        pr3_output = tf.image.resize_bilinear(outputs_norm, [h3, w3])
        flow_scale_3 = 2.5  # (*20/8)
        loss3, _ = loss_interp(pr3, pr3_input, pr3_output, epsilon, alpha_c,
                               alpha_s, lambda_smooth, flow_scale_3,
                               deltaWeights)
        upconv2 = slim.conv2d_transpose(concat3,
                                        64, [2 * scale, 2 * scale],
                                        stride=scale,
                                        scope='upconv2')
        pr3to2 = slim.conv2d_transpose(pr3,
                                       2, [2 * scale, 2 * scale],
                                       stride=scale,
                                       activation_fn=None,
                                       scope='up_pr3to2')
        concat2 = tf.concat(3, [pool2, upconv2, pr3to2])

        pr2 = slim.conv2d(concat2, 2, [3, 3], activation_fn=None, scope='pr2')
        h2 = pr2.get_shape()[1].value
        w2 = pr2.get_shape()[2].value
        pr2_input = tf.image.resize_bilinear(inputs_norm, [h2, w2])
        pr2_output = tf.image.resize_bilinear(outputs_norm, [h2, w2])
        flow_scale_2 = 5.0  # (*20/4)
        loss2, _ = loss_interp(pr2, pr2_input, pr2_output, epsilon, alpha_c,
                               alpha_s, lambda_smooth, flow_scale_2,
                               deltaWeights)
        upconv1 = slim.conv2d_transpose(concat2,
                                        32, [2 * scale, 2 * scale],
                                        stride=scale,
                                        scope='upconv1')
        pr2to1 = slim.conv2d_transpose(pr2,
                                       2, [2 * scale, 2 * scale],
                                       stride=scale,
                                       activation_fn=None,
                                       scope='up_pr2to1')
        concat1 = tf.concat(3, [pool1, upconv1, pr2to1])

        pr1 = slim.conv2d(concat1, 2, [3, 3], activation_fn=None, scope='pr1')
        h1 = pr1.get_shape()[1].value
        w1 = pr1.get_shape()[2].value
        pr1_input = tf.image.resize_bilinear(inputs_norm, [h1, w1])
        pr1_output = tf.image.resize_bilinear(outputs_norm, [h1, w1])
        flow_scale_1 = 10.0  # (*20/2)
        loss1, prev1 = loss_interp(pr1, pr1_input, pr1_output, epsilon,
                                   alpha_c, alpha_s, lambda_smooth,
                                   flow_scale_1, deltaWeights)

        # Adding intermediate losses
        all_loss = loss_weight[0]*loss1["total"] + loss_weight[1]*loss2["total"] + loss_weight[2]*loss3["total"] + \
                    loss_weight[3]*loss4["total"] + loss_weight[4]*loss5["total"]
        slim.losses.add_loss(all_loss)

        losses = [loss1, loss2, loss3, loss4, loss5]
        flows_all = [
            pr1 * flow_scale_1, pr2 * flow_scale_2, pr3 * flow_scale_3,
            pr4 * flow_scale_4, pr5 * flow_scale_5
        ]

        return losses, flows_all, prev1
Exemple #30
0
def dcgan_generator(z, flags, scope=None, reuse=None):
    """DCGAN-style generator network."""
    nonlinearity = nonlinearity_fn(flags.nonlinearity_g, False)
    ds_fs = flags.downsample_conv_filt_size
    x_fs = flags.extra_conv_filt_size

    if not flags.norm_g:
        normalizer = None
    else:
        normalizer = contrib_slim.batch_norm

    with tf.variable_scope(scope, reuse=reuse):
        out = contrib_slim.fully_connected(z,
                                           4 * 4 * (4 * flags.dim_g),
                                           scope='fc',
                                           normalizer_fn=normalizer,
                                           activation_fn=nonlinearity)
        out = tf.reshape(out, [-1, 4, 4, 4 * flags.dim_g])

        if flags.extra_top_conv:
            out = contrib_slim.conv2d(out,
                                      4 * flags.dim_d,
                                      x_fs,
                                      scope='extratopconv',
                                      activation_fn=nonlinearity,
                                      normalizer_fn=normalizer)

        out = contrib_slim.conv2d_transpose(out,
                                            2 * flags.dim_g,
                                            ds_fs,
                                            scope='conv1',
                                            stride=2,
                                            normalizer_fn=normalizer,
                                            activation_fn=nonlinearity)

        for i in range(flags.extra_depth_g):
            out = contrib_slim.conv2d(out,
                                      2 * flags.dim_g,
                                      x_fs,
                                      scope='extraconv1.{}'.format(i),
                                      normalizer_fn=normalizer,
                                      activation_fn=nonlinearity)

        out = contrib_slim.conv2d_transpose(out,
                                            flags.dim_g,
                                            ds_fs,
                                            scope='conv2',
                                            stride=2,
                                            normalizer_fn=normalizer,
                                            activation_fn=nonlinearity)

        for i in range(flags.extra_depth_g):
            out = contrib_slim.conv2d(out,
                                      flags.dim_g,
                                      x_fs,
                                      scope='extraconv2.{}'.format(i),
                                      normalizer_fn=normalizer,
                                      activation_fn=nonlinearity)

        out = contrib_slim.conv2d_transpose(out,
                                            3,
                                            ds_fs,
                                            scope='conv3',
                                            stride=2,
                                            activation_fn=tf.tanh)

        return out
Exemple #31
0
    def branch_1(self):
        ################################################################################################################
        ####    Branch_1_0: Input: RawImage Output: fc1: bottle layer output just before  deconv                    ####
        ####                                        helper1: concat of feature map with size H*W for final deconv   ####
        ################################################################################################################
        with tf.variable_scope('branch_1_0', reuse=self.reuse):
            pack_img = model_tools.pack_raw(self.input_img)

            conv_1_0_low = slim.conv2d(inputs=pack_img,
                                       num_outputs=128,
                                       kernel_size=self.kernel_size,
                                       scope='conv_1_0_low',
                                       reuse=self.reuse,
                                       activation_fn=model_tools.lrelu)

            dense_1_0, next_in_0 = model_tools.block(conv_1_0_low,
                                                     self.growth_rate,
                                                     self.layers_per_block,
                                                     self.kernel_size_dense,
                                                     self.reuse, 'dense_1_0')
            pool_in_0 = slim.conv2d(inputs=next_in_0,
                                    num_outputs=128,
                                    kernel_size=self.kernel_size,
                                    stride=2,
                                    scope='pool_in_0',
                                    reuse=self.reuse,
                                    activation_fn=model_tools.lrelu)
            dense_1_1, next_in_1 = model_tools.block(pool_in_0,
                                                     self.growth_rate,
                                                     self.layers_per_block,
                                                     self.kernel_size_dense,
                                                     self.reuse, 'dense_1_1')
            dense_1_2, next_in_2 = model_tools.block(next_in_1,
                                                     self.growth_rate,
                                                     self.layers_per_block,
                                                     self.kernel_size_dense,
                                                     self.reuse, 'dense_1_2')
            bottle_1_0 = slim.conv2d(inputs=tf.concat(
                [dense_1_2, dense_1_1, pool_in_0], axis=3),
                                     num_outputs=128,
                                     kernel_size=1,
                                     scope='bottle_1_0',
                                     reuse=self.reuse,
                                     activation_fn=model_tools.lrelu)
            fc11 = bottle_1_0
            helper1 = tf.concat([conv_1_0_low, dense_1_0], axis=3)
            ################################################################################################################
            ####    Branch_1_0: Input: RawImage Output: fc1: bottle layer output just before  deconv                    ####
            ####                                        helper1: concat of feature map with size H*W for final deconv   ####
            ################################################################################################################
            # with tf.variable_scope('branch_1_1', reuse=self.reuse):
            deconv_1_1_0 = slim.conv2d_transpose(
                inputs=fc11,
                num_outputs=128,
                kernel_size=[4, 4],
                stride=2,
                reuse=self.reuse,
                scope='deconv_1_1_0',
                activation_fn=model_tools.lrelu)
            conv_1_1_0 = slim.conv2d(inputs=deconv_1_1_0,
                                     num_outputs=128,
                                     kernel_size=self.kernel_size,
                                     scope='conv_1_1_0',
                                     reuse=self.reuse,
                                     activation_fn=model_tools.lrelu)
            dense_1_3, next_in_0 = model_tools.block(conv_1_1_0,
                                                     self.growth_rate,
                                                     self.layers_per_block,
                                                     self.kernel_size_dense,
                                                     self.reuse, 'dense_1_3')
            bottle_1_1 = slim.conv2d(inputs=tf.concat(
                [helper1, conv_1_1_0, dense_1_3], axis=3),
                                     num_outputs=self.bottle_output,
                                     kernel_size=1,
                                     scope='bottle_1_1',
                                     reuse=self.reuse,
                                     activation_fn=model_tools.lrelu)
            fc12 = bottle_1_1
            #conv_1_1_1 = bottle_1_1
            conv_1_1_1 = slim.conv2d(inputs=bottle_1_1,
                                     num_outputs=12 * Scale**2,
                                     kernel_size=self.kernel_size,
                                     scope='conv_1_1_1',
                                     reuse=self.reuse,
                                     activation_fn=model_tools.lrelu)
            if Scale == 4:
                conv_r = tf.depth_to_space(conv_1_1_1[..., :4 * Scale**2],
                                           Scale * 2)
                conv_g = tf.depth_to_space(
                    conv_1_1_1[..., 4 * Scale**2:8 * Scale**2], Scale * 2)
                conv_b = tf.depth_to_space(conv_1_1_1[..., 8 * Scale**2:],
                                           Scale * 2)
            else:
                conv_r = tf.depth_to_space(
                    tf.depth_to_space(conv_1_1_1[..., :4 * Scale**2], 2),
                    Scale)
                conv_g = tf.depth_to_space(
                    tf.depth_to_space(
                        conv_1_1_1[..., 4 * Scale**2:8 * Scale**2], Scale),
                    Scale)
                conv_b = tf.depth_to_space(
                    tf.depth_to_space(conv_1_1_1[..., 8 * Scale**2:], Scale),
                    Scale)
            rgb = tf.concat([conv_r, conv_g, conv_b], axis=3)
            return rgb, fc11, fc12
def build_heads(pyramid, ih, iw, num_classes, base_anchors, is_training=False, gt_boxes=None):
  """Build the 3-way outputs, i.e., class, box and mask in the pyramid
  Algo
  ----
  For each layer:
    1. Build anchor layer
    2. Process the results of anchor layer, decode the output into rois 
    3. Sample rois 
    4. Build roi layer
    5. Process the results of roi layer, decode the output into boxes
    6. Build the mask layer
    7. Build losses
  """
  outputs = {}
  arg_scope = _extra_conv_arg_scope(activation_fn=None)
  my_sigmoid = None
  with slim.arg_scope(arg_scope):
    with tf.variable_scope('pyramid'):
        # for p in pyramid:
        outputs['rpn'] = {}
        for i in range(5, 1, -1):
          p = 'P%d'%i
          stride = 2 ** i
          
          ## rpn head
          shape = tf.shape(pyramid[p])
          height, width = shape[1], shape[2]
          rpn = slim.conv2d(pyramid[p], 256, [3, 3], stride=1, activation_fn=tf.nn.relu, scope='%s/rpn'%p)
          box = slim.conv2d(rpn, base_anchors * 4, [1, 1], stride=1, scope='%s/rpn/box' % p, \
                  weights_initializer=tf.truncated_normal_initializer(stddev=0.001), activation_fn=my_sigmoid)
          cls = slim.conv2d(rpn, base_anchors * 2, [1, 1], stride=1, scope='%s/rpn/cls' % p, \
                  weights_initializer=tf.truncated_normal_initializer(stddev=0.01))

          anchor_scales = [2 **(i-2), 2 ** (i-1), 2 **(i)]
          all_anchors = gen_all_anchors(height, width, stride, anchor_scales)
          outputs['rpn'][p]={'box':box, 'cls':cls, 'anchor':all_anchors}

        ## gather all rois
        # print (outputs['rpn'])
        rpn_boxes = [tf.reshape(outputs['rpn']['P%d'%p]['box'], [-1, 4]) for p in range(5, 1, -1)]  
        rpn_clses = [tf.reshape(outputs['rpn']['P%d'%p]['cls'], [-1, 1]) for p in range(5, 1, -1)]  
        rpn_anchors = [tf.reshape(outputs['rpn']['P%d'%p]['anchor'], [-1, 4]) for p in range(5, 1, -1)]  
        rpn_boxes = tf.concat(values=rpn_boxes, axis=0)
        rpn_clses = tf.concat(values=rpn_clses, axis=0)
        rpn_anchors = tf.concat(values=rpn_anchors, axis=0)

        outputs['rpn']['box'] = rpn_boxes
        outputs['rpn']['cls'] = rpn_clses
        outputs['rpn']['anchor'] = rpn_anchors
        # outputs['rpn'] = {'box': rpn_boxes, 'cls': rpn_clses, 'anchor': rpn_anchors}
        
        rpn_probs = tf.nn.softmax(tf.reshape(rpn_clses, [-1, 2]))
        rois, roi_clses, scores, = anchor_decoder(rpn_boxes, rpn_probs, rpn_anchors, ih, iw)
        # rois, scores, batch_inds = sample_rpn_outputs(rois, rpn_probs[:, 1])
        rois, scores, batch_inds, mask_rois, mask_scores, mask_batch_inds = \
                sample_rpn_outputs_with_gt(rois, rpn_probs[:, 1], gt_boxes, is_training=is_training)

        # if is_training:
        #     # rois, scores, batch_inds = _add_jittered_boxes(rois, scores, batch_inds, gt_boxes)
        #     rois, scores, batch_inds = _add_jittered_boxes(rois, scores, batch_inds, gt_boxes, jitter=0.2)
        
        outputs['roi'] = {'box': rois, 'score': scores}

        ## cropping regions
        [assigned_rois, assigned_batch_inds, assigned_layer_inds] = \
                assign_boxes(rois, [rois, batch_inds], [2, 3, 4, 5])
        cropped_rois = []
        for i in range(5, 1, -1):
            p = 'P%d'%i
            splitted_rois = assigned_rois[i-2]
            batch_inds = assigned_batch_inds[i-2]
            cropped = ROIAlign(pyramid[p], splitted_rois, batch_inds, stride=2**i,
                               pooled_height=14, pooled_width=14)
            cropped_rois.append(cropped)
        cropped_rois = tf.concat(values=cropped_rois, axis=0)

        outputs['roi']['cropped_rois'] = cropped_rois
        tf.add_to_collection('__CROPPED__', cropped_rois)

        ## refine head
        # to 7 x 7
        cropped_regions = slim.max_pool2d(cropped_rois, [3, 3], stride=2, padding='SAME')
        refine = slim.flatten(cropped_regions)
        refine = slim.fully_connected(refine, 1024, activation_fn=tf.nn.relu)
        refine = slim.dropout(refine, keep_prob=0.75, is_training=is_training)
        refine = slim.fully_connected(refine,  1024, activation_fn=tf.nn.relu)
        refine = slim.dropout(refine, keep_prob=0.75, is_training=is_training)
        cls2 = slim.fully_connected(refine, num_classes, activation_fn=None, 
                weights_initializer=tf.truncated_normal_initializer(stddev=0.01))
        box = slim.fully_connected(refine, num_classes*4, activation_fn=my_sigmoid, 
                weights_initializer=tf.truncated_normal_initializer(stddev=0.001))

        outputs['refined'] = {'box': box, 'cls': cls2}
        
        ## decode refine net outputs
        cls2_prob = tf.nn.softmax(cls2)
        final_boxes, classes, scores = \
                roi_decoder(box, cls2_prob, rois, ih, iw)
         
        ## for testing, maskrcnn takes refined boxes as inputs
        if not is_training:
          rois = final_boxes
          # [assigned_rois, assigned_batch_inds, assigned_layer_inds] = \
          #       assign_boxes(rois, [rois, batch_inds], [2, 3, 4, 5])
          for i in range(5, 1, -1):
            splitted_rois = assigned_rois[i-2]
            batch_inds = assigned_batch_inds[i-2]
            p = 'P%d'%i
            cropped = ROIAlign(pyramid[p], splitted_rois, batch_inds, stride=2**i,
                               pooled_height=14, pooled_width=14)
            cropped_rois.append(cropped)
          cropped_rois = tf.concat(values=cropped_rois, axis=0)
          
        ## mask head
        m = cropped_rois
        for _ in range(4):
            m = slim.conv2d(m, 256, [3, 3], stride=1, padding='SAME', activation_fn=tf.nn.relu)
        # to 28 x 28
        m = slim.conv2d_transpose(m, 256, 2, stride=2, padding='VALID', activation_fn=tf.nn.relu)
        tf.add_to_collection('__TRANSPOSED__', m)
        m = slim.conv2d(m, num_classes, [1, 1], stride=1, padding='VALID', activation_fn=None)
          
        # add a mask, given the predicted boxes and classes
        outputs['mask'] = {'mask':m, 'cls': classes, 'score': scores}
          
  return outputs
Exemple #33
0
    def generator(self, inputs, reuse=False, scope='g_net'):
        n, h, w, c = inputs.get_shape().as_list()

        if self.args.model == 'lstm':
            with tf.variable_scope('LSTM'):
                cell = BasicConvLSTMCell([h / 4, w / 4], [3, 3], 128)
                rnn_state = cell.zero_state(batch_size=self.batch_size, dtype=tf.float32)

        x_unwrap = []
        with tf.variable_scope(scope, reuse=reuse):
            with slim.arg_scope([slim.conv2d, slim.conv2d_transpose],
                                activation_fn=tf.nn.relu, padding='SAME', normalizer_fn=None,
                                weights_initializer=tf.contrib.layers.xavier_initializer(uniform=True),
                                biases_initializer=tf.constant_initializer(0.0)):

                inp_pred = inputs
                for i in xrange(self.n_levels):
                    scale = self.scale ** (self.n_levels - i - 1)
                    hi = int(round(h * scale))
                    wi = int(round(w * scale))
                    inp_blur = tf.image.resize_images(inputs, [hi, wi], method=0)
                    inp_pred = tf.stop_gradient(tf.image.resize_images(inp_pred, [hi, wi], method=0))
                    inp_all = tf.concat([inp_blur, inp_pred], axis=3, name='inp')
                    if self.args.model == 'lstm':
                        rnn_state = tf.image.resize_images(rnn_state, [hi // 4, wi // 4], method=0)

                    # encoder
                    conv1_1 = slim.conv2d(inp_all, 32, [5, 5], scope='enc1_1')
                    conv1_2 = ResnetBlock(conv1_1, 32, 5, scope='enc1_2')
                    conv1_3 = ResnetBlock(conv1_2, 32, 5, scope='enc1_3')
                    conv1_4 = ResnetBlock(conv1_3, 32, 5, scope='enc1_4')
                    conv2_1 = slim.conv2d(conv1_4, 64, [5, 5], stride=2, scope='enc2_1')
                    conv2_2 = ResnetBlock(conv2_1, 64, 5, scope='enc2_2')
                    conv2_3 = ResnetBlock(conv2_2, 64, 5, scope='enc2_3')
                    conv2_4 = ResnetBlock(conv2_3, 64, 5, scope='enc2_4')
                    conv3_1 = slim.conv2d(conv2_4, 128, [5, 5], stride=2, scope='enc3_1')
                    conv3_2 = ResnetBlock(conv3_1, 128, 5, scope='enc3_2')
                    conv3_3 = ResnetBlock(conv3_2, 128, 5, scope='enc3_3')
                    conv3_4 = ResnetBlock(conv3_3, 128, 5, scope='enc3_4')

                    if self.args.model == 'lstm':
                        deconv3_4, rnn_state = cell(conv3_4, rnn_state)
                    else:
                        deconv3_4 = conv3_4

                    # decoder
                    deconv3_3 = ResnetBlock(deconv3_4, 128, 5, scope='dec3_3')
                    deconv3_2 = ResnetBlock(deconv3_3, 128, 5, scope='dec3_2')
                    deconv3_1 = ResnetBlock(deconv3_2, 128, 5, scope='dec3_1')
                    deconv2_4 = slim.conv2d_transpose(deconv3_1, 64, [4, 4], stride=2, scope='dec2_4')
                    cat2 = deconv2_4 + conv2_4
                    deconv2_3 = ResnetBlock(cat2, 64, 5, scope='dec2_3')
                    deconv2_2 = ResnetBlock(deconv2_3, 64, 5, scope='dec2_2')
                    deconv2_1 = ResnetBlock(deconv2_2, 64, 5, scope='dec2_1')
                    deconv1_4 = slim.conv2d_transpose(deconv2_1, 32, [4, 4], stride=2, scope='dec1_4')
                    cat1 = deconv1_4 + conv1_4
                    deconv1_3 = ResnetBlock(cat1, 32, 5, scope='dec1_3')
                    deconv1_2 = ResnetBlock(deconv1_3, 32, 5, scope='dec1_2')
                    deconv1_1 = ResnetBlock(deconv1_2, 32, 5, scope='dec1_1')
                    inp_pred = slim.conv2d(deconv1_1, self.chns, [5, 5], activation_fn=None, scope='dec1_0')

                    if i >= 0:
                        x_unwrap.append(inp_pred)
                    if i == 0:
                        tf.get_variable_scope().reuse_variables()

            return x_unwrap
Exemple #34
0
    def build_model(self, inputs, keep_prob):
        '''
        复现unet模型
        :param inputs:[None,height,width,channel]
        :return:[None,height,width,classes]
        '''
        with slim.arg_scope(
            [slim.conv2d, slim.conv2d_transpose],
                padding="SAME",
                kernel_size=[3, 3],
                stride=1,
                weights_initializer=tf.truncated_normal_initializer(
                    stddev=0.01),
                weights_regularizer=slim.l2_regularizer(0.005)):
            with slim.arg_scope([slim.conv2d_transpose], stride=2):
                with slim.arg_scope([slim.dropout], keep_prob=keep_prob):
                    with tf.name_scope("Unet"):
                        with tf.variable_scope("downsampling"):
                            # downsampling
                            x = slim.conv2d(inputs, 16, scope='conv1')  # 1024
                            x = slim.conv2d(x, 32, scope='conv2')
                            x = slim.conv2d(x, 64, scope='conv3')
                            crop_1 = tf.identity(x, name="crop1")
                            x = slim.dropout(x)

                            x = slim.max_pool2d(x, [2, 2],
                                                2,
                                                padding="VALID",
                                                scope="max_pool1")  # 512
                            x = slim.conv2d(x, 128, scope='conv4')
                            x = slim.conv2d(x, 128, scope='conv5')
                            crop_2 = tf.identity(x, name="crop2")
                            x = slim.dropout(x)

                            x = slim.max_pool2d(x, [2, 2],
                                                2,
                                                padding="VALID",
                                                scope="max_pool2")  # 256
                            x = slim.conv2d(x, 256, scope='conv6')
                            x = slim.conv2d(x, 256, scope='conv7')
                            crop_3 = tf.identity(x, name="crop3")
                            x = slim.dropout(x)

                            x = slim.max_pool2d(x, [2, 2],
                                                2,
                                                padding="VALID",
                                                scope="max_pool3")  # 128
                            x = slim.conv2d(x, 512, scope='conv8')
                            x = slim.conv2d(x, 512, scope='conv9')
                            crop_4 = tf.identity(x, name="crop4")
                            x = slim.dropout(x)

                            x = slim.max_pool2d(x, [2, 2],
                                                2,
                                                padding="VALID",
                                                scope="max_pool3")  # 64
                            x = slim.conv2d(x, 1024, scope='conv10')
                            x = slim.conv2d(x, 1024, scope='conv11')
                            crop_5 = tf.identity(x, name="crop5")
                            x = slim.dropout(x)

                            x = slim.max_pool2d(x, [2, 2],
                                                2,
                                                padding="VALID",
                                                scope="max_pool3")  # 32
                            x = slim.conv2d(x, 2048, scope='conv12')
                            x = slim.conv2d(x, 2048, scope='conv13')
                            x = slim.dropout(x)

                        with tf.variable_scope("upsampling"):
                            # upsampling
                            x = slim.conv2d_transpose(x, 1024,
                                                      scope="deconv1")  # 64
                            x = tf.concat((x, crop_5), axis=3)
                            x = slim.conv2d(x, 1024, scope='upconv1')
                            x = slim.conv2d(x, 1024, scope='upconv2')
                            x = slim.dropout(x)

                            x = slim.conv2d_transpose(x, 512,
                                                      scope="deconv2")  # 128
                            x = tf.concat((x, crop_4), axis=3)
                            x = slim.conv2d(x, 512, scope='upconv3')
                            x = slim.conv2d(x, 512, scope='upconv4')
                            x = slim.dropout(x)

                            x = slim.conv2d_transpose(x, 256,
                                                      scope="deconv3")  # 256
                            x = tf.concat((x, crop_3), axis=3)
                            x = slim.conv2d(x, 256, scope='upconv5')
                            x = slim.conv2d(x, 256, scope='upconv6')
                            x = slim.dropout(x)

                            x = slim.conv2d_transpose(x, 128,
                                                      scope="deconv4")  # 512
                            x = tf.concat((x, crop_2), axis=3)
                            x = slim.conv2d(x, 128, scope='upconv7')
                            x = slim.conv2d(x, 128, scope='upconv8')
                            x = slim.dropout(x)

                            x = slim.conv2d_transpose(x, 64,
                                                      scope="deconv5")  # 1024
                            x = tf.concat((x, crop_1), axis=3)
                            x = slim.conv2d(x, 64, scope='upconv9')
                            x = slim.conv2d(x, 2, scope='upconv10')

            return x
Exemple #35
0
def fcn_model(inputs,
              num_classes=21,
              is_training=True,
              dropout_keep_prob=0.8,
              reuse=None):
    if not is_training:
        dropout_keep_prob = 1.0

    with tf.variable_scope('vgg_16', reuse=reuse):
        with slim.arg_scope(
            [slim.conv2d, slim.conv2d_transpose],
                padding='SAME',
                activation_fn=tf.nn.selu,
                weights_initializer=tf.glorot_normal_initializer()):
            net = slim.repeat(inputs,
                              2,
                              slim.conv2d,
                              64, [3, 3],
                              scope='conv1')
            net = slim.max_pool2d(net, [2, 2], scope='pool1')
            net = slim.repeat(net, 2, slim.conv2d, 128, [3, 3], scope='conv2')
            net = slim.max_pool2d(net, [2, 2], scope='pool2')
            net = slim.repeat(net, 3, slim.conv2d, 256, [3, 3], scope='conv3')
            net = slim.max_pool2d(net, [2, 2], scope='pool3')

            net = tf.contrib.nn.alpha_dropout(net, dropout_keep_prob)

            net = slim.repeat(net, 3, slim.conv2d, 512, [3, 3], scope='conv4')
            net = slim.max_pool2d(net, [2, 2], scope='pool4')
            net = slim.repeat(net, 3, slim.conv2d, 512, [3, 3], scope='conv5')
            net = slim.max_pool2d(net, [2, 2], scope='pool5')

            net = slim.conv2d_transpose(net,
                                        256,
                                        kernel_size=(3, 3),
                                        stride=(2, 2),
                                        scope="deconv1")
            net = tf.contrib.nn.alpha_dropout(net, dropout_keep_prob)
            # net = slim.batch_norm(net, 8, is_training=is_training)
            # net = slim.dropout(net, dropout_keep_prob, is_training=is_training, scope='dropout')

            net = slim.conv2d_transpose(net,
                                        128,
                                        kernel_size=(3, 3),
                                        stride=(2, 2),
                                        scope="deconv2")
            # net = slim.batch_norm(net, 8, is_training=is_training)

            net = slim.conv2d_transpose(net,
                                        64,
                                        kernel_size=(3, 3),
                                        stride=(4, 4),
                                        scope="deconv3")
            net = tf.contrib.nn.alpha_dropout(net, dropout_keep_prob)
            # net = slim.batch_norm(net, 8, is_training=is_training)

            net = slim.conv2d_transpose(net,
                                        32,
                                        kernel_size=(3, 3),
                                        stride=(2, 2),
                                        scope="deconv4")
            # preds = slim.batch_norm(net, 8, is_training=is_training)

            preds = slim.conv2d(net, num_classes, [2, 2], scope="conv6")

            return preds
Exemple #36
0
def deconv2d(x, o_dim, data_format='NHWC', name=None, k=4, s=2, act=None):
    return slim.conv2d_transpose(x, o_dim, k, stride=s, activation_fn=act, scope=name, data_format=data_format)
Exemple #37
0
def add_conv_transpose_layer(*args, **kwargs):
    net = slim.conv2d_transpose(*args, **kwargs)
    tf.add_to_collection(tf.GraphKeys.ACTIVATIONS, net)
    if 'scope' in kwargs:
        print('\t\t{scope}'.format(scope=kwargs['scope']), net.get_shape())
    return net
Exemple #38
0
def disp_net(tgt_image, is_training=True, do_edge=False):
    batch_norm_params = {'is_training': is_training, 'decay': 0.999}
    H = tgt_image.get_shape()[1].value
    W = tgt_image.get_shape()[2].value
    tgt_image = tf.image.resize_bilinear(tgt_image, [127, 415])
    with tf.variable_scope('depth_net') as sc:
        end_points_collection = sc.original_name_scope + '_end_points'
        with slim.arg_scope(
            [slim.conv2d, slim.conv2d_transpose],
                # normalizer_fn = None,
                normalizer_fn=slim.batch_norm,
                normalizer_params=batch_norm_params,
                weights_regularizer=slim.l2_regularizer(0.05),
                activation_fn=tf.nn.relu,
                outputs_collections=end_points_collection):
            cnv1 = slim.conv2d(tgt_image, 32, [7, 7], stride=1, scope='cnv1')
            cnv1b = slim.conv2d(cnv1, 32, [7, 7], stride=1, scope='cnv1b')
            cnv2 = slim.conv2d(cnv1b, 64, [5, 5], stride=2, scope='cnv2')
            cnv2b = slim.conv2d(cnv2, 64, [5, 5], stride=1, scope='cnv2b')
            cnv3 = slim.conv2d(cnv2b, 128, [3, 3], stride=2, scope='cnv3')
            cnv3b = slim.conv2d(cnv3, 128, [3, 3], stride=1, scope='cnv3b')
            cnv4 = slim.conv2d(cnv3b, 256, [3, 3], stride=2, scope='cnv4')
            cnv4b = slim.conv2d(cnv4, 256, [3, 3], stride=1, scope='cnv4b')
            cnv5 = slim.conv2d(cnv4b, 512, [3, 3], stride=2, scope='cnv5')
            cnv5b = slim.conv2d(cnv5, 512, [3, 3], stride=1, scope='cnv5b')
            cnv6 = slim.conv2d(cnv5b, 512, [3, 3], stride=2, scope='cnv6')
            cnv6b = slim.conv2d(cnv6, 512, [3, 3], stride=1, scope='cnv6b')
            cnv7 = slim.conv2d(cnv6b, 512, [3, 3], stride=2, scope='cnv7')
            cnv7b = slim.conv2d(cnv7, 512, [3, 3], stride=1, scope='cnv7b')

            upcnv7 = slim.conv2d_transpose(cnv7b,
                                           512, [3, 3],
                                           stride=2,
                                           scope='upcnv7')
            # There might be dimension mismatch due to uneven down/up-sampling
            upcnv7 = resize_like(upcnv7, cnv6b)
            i7_in = tf.concat([upcnv7, cnv6b], axis=3)
            icnv7 = slim.conv2d(i7_in, 512, [3, 3], stride=1, scope='icnv7')

            upcnv6 = slim.conv2d_transpose(icnv7,
                                           512, [3, 3],
                                           stride=2,
                                           scope='upcnv6')
            upcnv6 = resize_like(upcnv6, cnv5b)
            i6_in = tf.concat([upcnv6, cnv5b], axis=3)
            icnv6 = slim.conv2d(i6_in, 512, [3, 3], stride=1, scope='icnv6')

            upcnv5 = slim.conv2d_transpose(icnv6,
                                           256, [3, 3],
                                           stride=2,
                                           scope='upcnv5')
            upcnv5 = resize_like(upcnv5, cnv4b)
            i5_in = tf.concat([upcnv5, cnv4b], axis=3)
            icnv5 = slim.conv2d(i5_in, 256, [3, 3], stride=1, scope='icnv5')

            upcnv4 = slim.conv2d_transpose(icnv5,
                                           128, [3, 3],
                                           stride=2,
                                           scope='upcnv4')
            i4_in = tf.concat([upcnv4, cnv3b], axis=3)
            icnv4 = slim.conv2d(i4_in, 128, [3, 3], stride=1, scope='icnv4')
            disp4 = DISP_SCALING * slim.conv2d(icnv4,
                                               1, [3, 3],
                                               stride=1,
                                               activation_fn=tf.sigmoid,
                                               normalizer_fn=None,
                                               scope='disp4') + MIN_DISP
            disp4 = tf.image.resize_bilinear(disp4, [H // 8, W // 8])
            disp4_up = tf.image.resize_bilinear(
                disp4, [np.int(H / 4), np.int(W / 4)])

            upcnv3 = slim.conv2d_transpose(icnv4,
                                           64, [3, 3],
                                           stride=2,
                                           scope='upcnv3')
            i3_in = tf.concat([upcnv3, cnv2b, disp4_up], axis=3)
            icnv3 = slim.conv2d(i3_in, 64, [3, 3], stride=1, scope='icnv3')
            disp3 = DISP_SCALING * slim.conv2d(icnv3,
                                               1, [3, 3],
                                               stride=1,
                                               activation_fn=tf.sigmoid,
                                               normalizer_fn=None,
                                               scope='disp3') + MIN_DISP
            disp3 = tf.image.resize_bilinear(disp3, [H // 4, W // 4])
            cnv1b_shape = cnv1b.get_shape().as_list()
            disp3_up = tf.image.resize_bilinear(
                disp3, [cnv1b_shape[1], cnv1b_shape[2]])

            upcnv2 = slim.conv2d_transpose(icnv3,
                                           32, [3, 3],
                                           stride=2,
                                           scope='upcnv2')
            upcnv2 = tf.image.resize_bilinear(upcnv2,
                                              [cnv1b_shape[1], cnv1b_shape[2]])
            i2_in = tf.concat([upcnv2, cnv1b, disp3_up], axis=3)
            icnv2 = slim.conv2d(i2_in, 32, [3, 3], stride=1, scope='icnv2')
            disp2 = DISP_SCALING * slim.conv2d(icnv2,
                                               1, [3, 3],
                                               stride=1,
                                               activation_fn=tf.sigmoid,
                                               normalizer_fn=None,
                                               scope='disp2') + MIN_DISP
            disp2 = tf.image.resize_bilinear(disp2, [H // 2, W // 2])
            disp2_up = tf.image.resize_bilinear(disp2, [H, W])

            upcnv1 = slim.conv2d_transpose(icnv2,
                                           16, [3, 3],
                                           stride=2,
                                           scope='upcnv1')
            disp2_up = tf.image.resize_bilinear(disp2_up, [
                upcnv1.get_shape().as_list()[1],
                upcnv1.get_shape().as_list()[2]
            ])
            i1_in = tf.concat([upcnv1, disp2_up], axis=3)
            icnv1 = slim.conv2d(i1_in, 16, [3, 3], stride=1, scope='icnv1')
            disp1 = DISP_SCALING * slim.conv2d(icnv1,
                                               1, [3, 3],
                                               stride=1,
                                               activation_fn=tf.sigmoid,
                                               normalizer_fn=None,
                                               scope='disp1') + MIN_DISP
            disp1 = tf.image.resize_bilinear(disp1, [H, W])

            # Edge mask layers
            if do_edge:
                with tf.variable_scope('edge'):
                    upcnv7_e = slim.conv2d_transpose(cnv7b,
                                                     512, [4, 4],
                                                     stride=2,
                                                     scope='upcnv7')
                    # There might be dimension mismatch due to uneven down/up-sampling
                    upcnv7_e = resize_like(upcnv7_e, cnv6b)
                    i7_in_e = tf.concat([upcnv7_e, cnv6b], axis=3)
                    icnv7_e = slim.conv2d(i7_in_e,
                                          512, [3, 3],
                                          stride=1,
                                          scope='icnv7')

                    upcnv6_e = slim.conv2d_transpose(icnv7_e,
                                                     512, [4, 4],
                                                     stride=2,
                                                     scope='upcnv6')
                    upcnv6_e = resize_like(upcnv6_e, cnv5b)
                    i6_in_e = tf.concat([upcnv6_e, cnv5b], axis=3)
                    icnv6_e = slim.conv2d(i6_in_e,
                                          512, [3, 3],
                                          stride=1,
                                          scope='icnv6')

                    upcnv5_e = slim.conv2d_transpose(icnv6_e,
                                                     256, [4, 4],
                                                     stride=2,
                                                     scope='upcnv5')
                    upcnv5_e = resize_like(upcnv5_e, cnv4b)
                    i5_in_e = tf.concat([upcnv5_e, cnv4b], axis=3)
                    icnv5_e = slim.conv2d(i5_in_e,
                                          256, [3, 3],
                                          stride=1,
                                          scope='icnv5')

                    upcnv4_e = slim.conv2d_transpose(icnv5_e,
                                                     128, [4, 4],
                                                     stride=2,
                                                     scope='upcnv4')
                    i4_in_e = tf.concat([upcnv4_e, cnv3b], axis=3)
                    icnv4_e = slim.conv2d(i4_in_e,
                                          128, [3, 3],
                                          stride=1,
                                          scope='icnv4')
                    edge4 = slim.conv2d(icnv4_e,
                                        1, [3, 3],
                                        stride=1,
                                        activation_fn=tf.sigmoid,
                                        normalizer_fn=None,
                                        scope='edge4') + MIN_EDGE
                    edge4 = tf.image.resize_nearest_neighbor(
                        edge4, [H // 8, W // 8])
                    # edge4_up = tf.image.resize_bilinear(edge4, [np.int(H/4), np.int(W/4)])
                    edge4_up = tf.image.resize_nearest_neighbor(
                        edge4, [np.int(H / 4), np.int(W / 4)])

                    upcnv3_e = slim.conv2d_transpose(icnv4_e,
                                                     64, [4, 4],
                                                     stride=2,
                                                     scope='upcnv3')
                    i3_in_e = tf.concat([upcnv3_e, cnv2b, edge4_up], axis=3)
                    # i3_in_e  = tf.concat([upcnv3_e, cnv2b], axis=3)
                    icnv3_e = slim.conv2d(i3_in_e,
                                          64, [3, 3],
                                          stride=1,
                                          scope='icnv3')
                    edge3 = slim.conv2d(icnv3_e,
                                        1, [3, 3],
                                        stride=1,
                                        activation_fn=tf.sigmoid,
                                        normalizer_fn=None,
                                        scope='edge3') + MIN_EDGE
                    edge3 = tf.image.resize_nearest_neighbor(
                        edge3, [H // 4, W // 4])
                    # edge3_up = tf.image.resize_bilinear(edge3, [np.int(H/2), np.int(W/2)])
                    edge3_up = tf.image.resize_nearest_neighbor(
                        edge3, [np.int(H / 2), np.int(W / 2)])
                    edge3_up = tf.image.resize_nearest_neighbor(
                        edge3_up, [cnv1b_shape[1], cnv1b_shape[2]])
                    upcnv2_e = slim.conv2d_transpose(icnv3_e,
                                                     32, [4, 4],
                                                     stride=2,
                                                     scope='upcnv2')
                    upcnv2_e = tf.image.resize_nearest_neighbor(
                        upcnv2_e, [cnv1b_shape[1], cnv1b_shape[2]])
                    i2_in_e = tf.concat([upcnv2_e, cnv1b, edge3_up], axis=3)
                    # i2_in_e  = tf.concat([upcnv2_e, cnv1b], axis=3)
                    icnv2_e = slim.conv2d(i2_in_e,
                                          32, [3, 3],
                                          stride=1,
                                          scope='icnv2')
                    edge2 = slim.conv2d(icnv2_e,
                                        1, [3, 3],
                                        stride=1,
                                        activation_fn=tf.sigmoid,
                                        normalizer_fn=None,
                                        scope='edge2') + MIN_EDGE
                    edge2 = tf.image.resize_nearest_neighbor(
                        edge2, [H // 2, W // 2])
                    # edge2_up = tf.image.resize_bilinear(edge2, [H, W])
                    edge2_up = tf.image.resize_nearest_neighbor(edge2, [H, W])

                    upcnv1_e = slim.conv2d_transpose(icnv2_e,
                                                     16, [4, 4],
                                                     stride=2,
                                                     scope='upcnv1')
                    edge2_up = tf.image.resize_nearest_neighbor(
                        edge2, [
                            upcnv1_e.get_shape().as_list()[1],
                            upcnv1_e.get_shape().as_list()[2]
                        ])
                    i1_in_e = tf.concat([upcnv1_e, edge2_up], axis=3)
                    # i1_in_e  = tf.concat([upcnv1_e], axis=3)
                    icnv1_e = slim.conv2d(i1_in_e,
                                          16, [3, 3],
                                          stride=1,
                                          scope='icnv1')
                    edge1 = slim.conv2d(icnv1_e,
                                        1, [3, 3],
                                        stride=1,
                                        activation_fn=tf.sigmoid,
                                        normalizer_fn=None,
                                        scope='edge1') + MIN_EDGE
                    edge1 = tf.image.resize_nearest_neighbor(edge1, [H, W])

                    # down-scale the edges at lower scale from highest resolution edge results
                    # edge2 = slim.max_pool2d(edge1, 2)
                    # edge3 = slim.max_pool2d(edge2, 2)
                    # edge4 = slim.max_pool2d(edge3, 2)
            else:
                edge1 = None
                edge2 = None
                edge3 = None
                edge4 = None

            end_points = utils.convert_collection_to_dict(
                end_points_collection)
            return [disp1, disp2, disp3, disp4], [edge1, edge2, edge3,
                                                  edge4], end_points
Exemple #39
0
    def buildNet(self,
                 images,
                 category_classes,
                 attribute_classes,
                 weight_decay=0.0005,
                 is_training=False,
                 dropout_keep_prob=0.5,
                 stage='landmark'):

        # construct VGG base net
        net, end_points = self.vgg.buildNet(
            'VGG_16',
            images,
            category_classes,
            is_training=is_training,
            weight_decay=weight_decay,
            dropout_keep_prob=dropout_keep_prob,
            final_endpoint='conv4')

        with tf.variable_scope('BCRNN'):
            with slim.arg_scope(
                [slim.conv2d],
                    activation_fn=None,
                    weights_regularizer=slim.l2_regularizer(weight_decay),
                    padding='SAME'):
                # 8 landmarks and 1 background
                #  heat_maps = slim.conv2d(net, 9, [1, 1], scope='ConstructHeatMaps')
                # Only provide 8 landmarks
                heat_maps = slim.conv2d(net,
                                        8, [1, 1],
                                        scope='ConstructHeatMaps')
                heat_maps = tf.sigmoid(heat_maps, name='sigmoid')

            # if stage.lower() == 'landmark':
            #     return heat_maps

            # heat-maps l-collar l-sleeve l-waistline l-hem r-...
            # Should heat_maps be transpose?
            # heat_maps = tf.transpose(heat_maps, (3, 0, 1, 2))
            # grammar:
            # RK:
            #         l.collar <-> l.waistline <-> l.hem;
            #         l.collar <-> l.sleeve;
            #         r.collar <-> r.waistline <-> r.hem;
            #         r.collar <-> r.sleeve:
            # RS:
            #         l.collar <-> r.collar;
            #         l.sleeve <-> r.sleeve;
            #         l.waistline <-> r.waistline;
            #         l.hem <-> r.hem:
            RK1_refined_heatmaps = self.BCRNNBlock(heat_maps, 3, [0, 2, 3],
                                                   'RK_1')
            RK2_refined_heatmaps = self.BCRNNBlock(heat_maps, 2, [0, 1],
                                                   'RK_2')
            RK3_refined_heatmaps = self.BCRNNBlock(heat_maps, 3, [4, 6, 7],
                                                   'RK_3')
            RK4_refined_heatmaps = self.BCRNNBlock(heat_maps, 2, [4, 5],
                                                   'RK_4')

            RS1_refined_heatmaps = self.BCRNNBlock(heat_maps, 2, [0, 4],
                                                   'RS_1')
            RS2_refined_heatmaps = self.BCRNNBlock(heat_maps, 2, [1, 5],
                                                   'RS_2')
            RS3_refined_heatmaps = self.BCRNNBlock(heat_maps, 2, [2, 6],
                                                   'RS_3')
            RS4_refined_heatmaps = self.BCRNNBlock(heat_maps, 2, [3, 7],
                                                   'RS_4')

            # background = heat_maps[8]

            # max merge heatmaps
            l_collar = tf.reduce_max([
                RK1_refined_heatmaps[0], RK2_refined_heatmaps[0],
                RS1_refined_heatmaps[0]
            ],
                                     axis=0)
            l_sleeve = tf.reduce_max(
                [RK2_refined_heatmaps[1], RS2_refined_heatmaps[0]], axis=0)
            l_waistline = tf.reduce_max(
                [RK1_refined_heatmaps[1], RS3_refined_heatmaps[0]], axis=0)
            l_hem = tf.reduce_max(
                [RK1_refined_heatmaps[2], RS4_refined_heatmaps[0]], axis=0)

            r_collar = tf.reduce_max([
                RK3_refined_heatmaps[0], RK4_refined_heatmaps[0],
                RS1_refined_heatmaps[1]
            ],
                                     axis=0)
            r_sleeve = tf.reduce_max(
                [RK4_refined_heatmaps[1], RS2_refined_heatmaps[1]], axis=0)
            r_waistline = tf.reduce_max(
                [RK3_refined_heatmaps[1], RS3_refined_heatmaps[1]], axis=0)
            r_hem = tf.reduce_max(
                [RK3_refined_heatmaps[2], RS4_refined_heatmaps[1]], axis=0)

            refined_heatmaps = tf.stack([
                l_collar,
                l_sleeve,
                l_waistline,
                l_hem,
                r_collar,
                r_sleeve,
                r_waistline,
                r_hem,
            ],
                                        axis=3)

            # landmarks predictions
            # output = tf.nn.softmax(refined_heatmaps, name='RefinedHeatMaps')
            # Not softmax! I think it should be sigmoid to provide the probability!
            # Each pixl should be a probability to express if it is keypoint!
            output = tf.sigmoid(refined_heatmaps, name='RefinedHeatMaps')

        if stage.lower() == 'landmark':
            return output, None

        with tf.variable_scope('LandmarkAttention'):
            output = output[:, :, :, :-1]
            # TODO: This is not be reduce_mean
            # Should be max pooling, get the maxium value from each chanel!
            AL = tf.reduce_mean(output, axis=-1, keep_dims=True)
            # tile_shape = tf.ones_like(output.shape)
            # tile_shape[-1] = output.shape[-1]
            AL = tf.tile(AL, [1, 1, 1, net.shape[-1]])
            GL = tf.multiply(AL, net)

        with tf.variable_scope('ClothingAttention'):
            with slim.arg_scope(
                [slim.conv2d],
                    activation_fn=tf.nn.relu,
                    weights_regularizer=slim.l2_regularizer(weight_decay),
                    biases_initializer=tf.zeros_initializer(),
                    scope='ClothingAttention'):
                AC = slim.max_pool2d(net, [2, 2], scope='AC_pool1')
                AC = slim.conv2d(AC, 512, [3, 3], scope='AC_conv1')
                AC = slim.max_pool2d(AC, [2, 2], scope='AC_pool2')
                AC = slim.conv2d(AC, 512, [3, 3], scope='AC_conv2')
                AC = slim.conv2d_transpose(AC,
                                           num_outputs=512,
                                           stride=4,
                                           kernel_size=[3, 3],
                                           padding='SAME',
                                           scope='AC_upsample')
                AC = tf.sigmoid(AC, 'sigmoid')
                GC = tf.multiply(AC, net)

        with tf.variable_scope('Classification'):
            with slim.arg_scope(
                [slim.conv2d],
                    activation_fn=tf.nn.relu,
                    weights_regularizer=slim.l2_regularizer(weight_decay),
                    biases_initializer=tf.zeros_initializer()):
                net = net + GL + GC
                net = slim.max_pool2d(net, [2, 2], scope='pool4')
                net = slim.repeat(net,
                                  3,
                                  slim.conv2d,
                                  512, [3, 3],
                                  scope='conv5')
                net = slim.max_pool2d(net, [2, 2], scope='pool5')

                # Use conv2d instead of fully_connected layers.
                net = slim.conv2d(net,
                                  4096, [7, 7],
                                  padding='VALID',
                                  scope='fc6')
                net = slim.dropout(net,
                                   dropout_keep_prob,
                                   is_training=is_training,
                                   scope='dropout6')
                net = slim.conv2d(net, 4096, [1, 1], scope='fc7')
                net = slim.dropout(net,
                                   dropout_keep_prob,
                                   is_training=is_training,
                                   scope='dropout7')

                # predict category
                net_category = slim.conv2d(net,
                                           category_classes, [1, 1],
                                           scope='fc8_category')
                net_category = tf.squeeze(net_category, [1, 2],
                                          name='fc8_category/squeezed')
                #net_category = tf.nn.softmax(net_category, name='Predictions_category')
                #net_category = tf.layers.dense(net_category, category_classes, name='Predictions_category')

                # predict attribute
                net_attribute = slim.conv2d(net,
                                            attribute_classes, [1, 1],
                                            activation_fn=tf.nn.sigmoid,
                                            scope='fc8_attribute')
                net_attribute = tf.squeeze(net_attribute, [1, 2],
                                           name='fc8_attribute/squeezed')
                #net_attribute = tf.layers.dense(net_attribute, attribute_classes, activation=None, name='Predictions_attribute')

        return net_category, net_attribute