예제 #1
0
def pre_trained_net(weights, image):

    layers = (
        'conv1_1', 'relu1_1', 'conv1_2', 'relu1_2', 'pool1',
        'conv2_1', 'relu2_1', 'conv2_2', 'relu2_2', 'pool2',
        'conv3_1', 'relu3_1', 'conv3_2', 'relu3_2', 'conv3_3', 'relu3_3', 'pool3',
        'conv4_1', 'relu4_1', 'conv4_2', 'relu4_2', 'conv4_3', 'relu4_3', 'pool4',
        'conv5_1', 'relu5_1', 'conv5_2', 'relu5_2', 'conv5_3', 'relu5_3', 
    )

    net = {} # net stores the output tensor of each layer
    cur_data = image
    for i, name in enumerate(layers):
        layer_type = name[:4]
        
        if layer_type == 'conv':
            kernels = weights[name]["weights"][0][0]
            bias = weights[name]["bias"][0][0]

            # obtain kernel and bias from vgg net (note the difference in layout)
            # matconvnet: weights are [out_channels, in_channels, height, width]
            # tensorflow: weights are [height, width, in_channels, out_channels]
            kernels  = net_utils.get_variable(np.transpose(kernels, (2, 3, 1, 0)), name=name + "_w")
            bias     = net_utils.get_variable(bias.reshape(-1), name=name + "_b")
            cur_data = net_utils.conv2d_basic(cur_data, kernels, bias)
            
        elif layer_type == 'relu':
            cur_data = tf.nn.relu(cur_data, name=name)

        elif layer_type == 'pool':
            cur_data = net_utils.max_pool_2x2(cur_data)
        
        net[name] = cur_data

    return net
예제 #2
0
def inference(image, weights):

    # start a new variable scope "inference"
    with tf.variable_scope("inference"):

        ##################################################
        # obtain the forward result of each layer in vgg
        ##################################################
        # as well as the tensor after last conv layer in each stage
        # NOTE: we do not make use of the result of conv layers in stage 1
        image_net = pre_trained_net(weights, image)

        output_stage_2 = image_net['conv2_2']
        output_stage_3 = image_net['conv3_3']
        output_stage_4 = image_net['conv4_3']
        output_stage_5 = image_net['conv5_3']


        ##############################
        # Preperation for upsampling
        ##############################
        # Prep 2
        output_stage_2_shape = output_stage_2.get_shape()
        nChannels_in = output_stage_2_shape[3].value
        kernels = weights['conv2_2_16']['weights'][0][0]
        bias = weights["conv2_2_16"]["bias"][0][0]
        prep2_w = net_utils.get_variable(np.transpose(kernels, (2, 3, 1, 0)), name="prep2_w")
        prep2_b = net_utils.get_variable(bias.reshape(-1), name="prep2_b")
        prep_2  = net_utils.conv2d_basic(output_stage_2, prep2_w, prep2_b)

        # Prep 3
        output_stage_3_shape = output_stage_3.get_shape()
        nChannels_in = output_stage_3_shape[3].value
        kernels = weights['conv3_3_16']['weights'][0][0]
        bias = weights['conv3_3_16']['bias'][0][0]
        prep3_w = net_utils.get_variable(np.transpose(kernels, (2, 3, 1, 0)), name="prep3_w")
        prep3_b = net_utils.get_variable(bias.reshape(-1), name="prep3_b")
        prep_3  = net_utils.conv2d_basic(output_stage_3, prep3_w, prep3_b)

        # Prep 4
        output_stage_4_shape = output_stage_4.get_shape()
        nChannels_in = output_stage_4_shape[3].value
        kernels = weights['conv4_3_16']['weights'][0][0]
        bias = weights['conv4_3_16']['bias'][0][0]
        prep4_w = net_utils.get_variable(np.transpose(kernels, (2, 3, 1, 0)), name="prep4_w")
        prep4_b = net_utils.get_variable(bias.reshape(-1), name="prep4_b")
        prep_4  = net_utils.conv2d_basic(output_stage_4, prep4_w, prep4_b)

        # Prep 5
        output_stage_5_shape = output_stage_5.get_shape()
        nChannels_in = output_stage_5_shape[3].value
        kernels = weights['conv5_3_16']['weights'][0][0]
        bias = weights['conv5_3_16']['bias'][0][0]
        prep5_w = net_utils.get_variable(np.transpose(kernels, (2, 3, 1, 0)), name="prep5_w")
        prep5_b = net_utils.get_variable(bias.reshape(-1), name="prep5_b")
        prep_5  = net_utils.conv2d_basic(output_stage_5, prep5_w, prep5_b)

        #############################
        # Upsampling for each stage
        #############################
        # matconvnet: weights are [out_channels, in_channels, height, width]
        # tensorflow: weights are [height, width, out_channels, in_channels]

        image_shape = tf.shape(image)
        upsample_shape = tf.stack([FLAGS.batch_size, 480, 854, 16])

        # upsample output_stage_2: upsample by ratio of 2
        # t2_w = net_utils.weight_variable([4, 4, 16, 16], name="t2_w")
        # Directly assign a bilinear kernel filter to the weight
        t2_w = weights['upsample2_']['weights'][0][0]
        t2_w = np.transpose(t2_w, (2, 3, 0, 1))
        upsample_2 = net_utils.conv2d_transpose_strided(prep_2, t2_w, output_shape=upsample_shape, stride=2)

        # upsample output_stage_3: upsample by ratio of 4
        # t3_w = net_utils.weight_variable([8, 8, 16, 16], name="t3_w")
        # Directly assign a bilinear kernel filter to the weight
        t3_w = weights['upsample4_']['weights'][0][0]
        t3_w = np.transpose(t3_w, (2, 3, 0, 1))
        upsample_3 = net_utils.conv2d_transpose_strided(prep_3, t3_w, output_shape=upsample_shape, stride=4)

        # upsample output_stage_4: upsample by ratio of 8
        # t4_w = net_utils.weight_variable([16, 16, 16, 16], name="t4_w")
        # Directly assign a bilinear kernel filter to the weight
        t4_w = weights['upsample8_']['weights'][0][0]
        t4_w = np.transpose(t4_w, (2, 3, 0, 1))
        upsample_4 = net_utils.conv2d_transpose_strided(prep_4, t4_w, output_shape=upsample_shape, stride=8)

        # upsample output_stage_5: upsample by ratio of 16
        # t5_w = net_utils.weight_variable([32, 32, 16, 16], name="t5_w")
        # Directly assign a bilinear kernel filter to the weight
        t5_w = weights['upsample16_']['weights'][0][0]
        t5_w = np.transpose(t5_w, (2, 3, 0, 1))
        upsample_5 = net_utils.conv2d_transpose_strided(prep_5, t5_w, output_shape=upsample_shape, stride=16)

        ########################################
        # Concatenation and Weighted Summation
        ########################################
        fuse = tf.concat([upsample_2, upsample_3, upsample_4, upsample_5], 3)
        fuse_shape = fuse.get_shape()
        kernels = weights['new_score_weighting']['weights'][0][0]
        bias = weights['new_score_weighting']['bias'][0][0]
        fuse_w = net_utils.get_variable(np.transpose(kernels, (2, 3, 1, 0)), name="fuse_w")
        fuse_b = net_utils.get_variable(bias.reshape(-1), name="fuse_b")
        output_fuse = net_utils.conv2d_basic(fuse, fuse_w, fuse_b)

    return output_fuse
def inference(image, model_data):

    weights = np.squeeze(model_data['layers'])

    # start a new variable scope "inference"
    with tf.variable_scope("inference"):

        ##################################################
        # obtain the forward result of each layer in vgg
        ##################################################
        # as well as the tensor after last conv layer in each stage
        # NOTE: we do not make use of the result of conv layers in stage 1
        image_net = pre_trained_net(weights, image)

        output_stage_2 = image_net['conv2_2']
        output_stage_3 = image_net['conv3_3']
        output_stage_4 = image_net['conv4_3']
        output_stage_5 = image_net['conv5_3']

        ##############################
        # Preperation for upsampling
        ##############################
        # Prep 2
        output_stage_2_shape = output_stage_2.get_shape()
        nChannels_in = output_stage_2_shape[3].value
        prep2_w = net_utils.weight_variable([3, 3, nChannels_in, 16],
                                            name="prep2_w")
        prep2_b = net_utils.bias_variable([16], name="prep2_b")
        prep_2 = net_utils.conv2d_basic(output_stage_2, prep2_w, prep2_b)

        # Prep 3
        output_stage_3_shape = output_stage_3.get_shape()
        nChannels_in = output_stage_3_shape[3].value
        prep3_w = net_utils.weight_variable([3, 3, nChannels_in, 16],
                                            name="prep3_w")
        prep3_b = net_utils.bias_variable([16], name="prep3_b")
        prep_3 = net_utils.conv2d_basic(output_stage_3, prep3_w, prep3_b)

        # Prep 4
        output_stage_4_shape = output_stage_4.get_shape()
        nChannels_in = output_stage_4_shape[3].value
        prep4_w = net_utils.weight_variable([3, 3, nChannels_in, 16],
                                            name="prep4_w")
        prep4_b = net_utils.bias_variable([16], name="prep4_b")
        prep_4 = net_utils.conv2d_basic(output_stage_4, prep4_w, prep4_b)

        # Prep 5
        output_stage_5_shape = output_stage_5.get_shape()
        nChannels_in = output_stage_5_shape[3].value
        prep5_w = net_utils.weight_variable([3, 3, nChannels_in, 16],
                                            name="prep5_w")
        prep5_b = net_utils.bias_variable([16], name="prep5_b")
        prep_5 = net_utils.conv2d_basic(output_stage_5, prep5_w, prep5_b)

        #############################
        # Upsampling for each stage
        #############################
        image_shape = tf.shape(image)
        # upsample_shape = tf.stack([FLAGS.batch_size, image_shape[1], image_shape[2], 16])
        upsample_shape = tf.stack([FLAGS.batch_size, 480, 854, 16])
        # upsample output_stage_2: upsample by ratio of 2
        # t2_w = net_utils.weight_variable([4, 4, 16, 16], name="t2_w")
        # Directly assign a bilinear kernel filter to the weight
        t2_w = net_utils.bilinear_upsample_weights(2, 16, 't2_w')
        # t2_b = net_utils.bias_variable([16], name="t2_b")
        upsample_2 = net_utils.conv2d_transpose_strided(
            prep_2, t2_w, output_shape=upsample_shape, stride=2)

        # upsample output_stage_3: upsample by ratio of 4
        # t3_w = net_utils.weight_variable([8, 8, 16, 16], name="t3_w")
        # Directly assign a bilinear kernel filter to the weight
        t3_w = net_utils.bilinear_upsample_weights(4, 16, 't3_w')
        # t3_b = net_utils.bias_variable([16], name="t3_b")
        upsample_3 = net_utils.conv2d_transpose_strided(
            prep_3, t3_w, output_shape=upsample_shape, stride=4)

        # upsample output_stage_4: upsample by ratio of 8
        # t4_w = net_utils.weight_variable([16, 16, 16, 16], name="t4_w")
        # Directly assign a bilinear kernel filter to the weight
        t4_w = net_utils.bilinear_upsample_weights(8, 16, 't4_w')
        # t4_b = net_utils.bias_variable([16], name="t4_b")
        upsample_4 = net_utils.conv2d_transpose_strided(
            prep_4, t4_w, output_shape=upsample_shape, stride=8)

        # upsample output_stage_5: upsample by ratio of 16
        # t5_w = net_utils.weight_variable([32, 32, 16, 16], name="t5_w")
        # Directly assign a bilinear kernel filter to the weight
        t5_w = net_utils.bilinear_upsample_weights(16, 16, 't5_w')
        print(t5_w.get_shape())
        # t5_b = net_utils.bias_variable([16], name="t5_b")
        upsample_5 = net_utils.conv2d_transpose_strided(
            prep_5, t5_w, output_shape=upsample_shape, stride=16)
        ########################################
        # Concatenation and Weighted Summation
        ########################################
        fuse = tf.concat([upsample_2, upsample_3, upsample_4, upsample_5], 3)
        fuse_shape = fuse.get_shape()
        print(fuse_shape)
        fuse_w = net_utils.weight_variable([1, 1, fuse_shape[3].value, 1],
                                           name="fuse_w")
        fuse_b = net_utils.bias_variable([1], name="fuse_b")
        output_fuse = net_utils.conv2d_basic(fuse, fuse_w, fuse_b)

    return output_fuse