def vgg_net(weights, image): layers = ('conv1_1', 'relu1_1', 'conv1_2', 'relu1_2', 'pool1', 'conv2_1', 'relu2_1', 'conv2_2', 'relu2_2', 'pool2', 'conv3_1', 'relu3_1', 'conv3_2', 'relu3_2', 'conv3_3', 'relu3_3', 'conv3_4', 'relu3_4', 'pool3', 'conv4_1', 'relu4_1', 'conv4_2', 'relu4_2', 'conv4_3', 'relu4_3', 'conv4_4', 'relu4_4', 'pool4', 'conv5_1', 'relu5_1', 'conv5_2', 'relu5_2', 'conv5_3', 'relu5_3', 'conv5_4', 'relu5_4') net = {} current = image for i, name in enumerate(layers): kind = name[:4] # layer的类型,是conv还是relu或者pool. if kind == 'conv': kernels, bias = weights[i][0][0][0][0] kernels = utils.get_variable(np.transpose(kernels, (1, 0, 2, 3)), name=name + "_w") bias = utils.get_variable(bias.reshape(-1), name=name + "_b") current = utils.conv2d_basic(current, kernels, bias) current = utils.batch_norm_layer(current, FLAGS.mode, scope_bn=name) # BN处理. elif kind == 'relu': current = tf.nn.relu(current, name=name) elif kind == 'pool': # current = utils.max_pool_2x2(current) current = utils.avg_pool_2x2(current) net[name] = current return net
def vgg_net(weights, image): layers = ( 'conv1_1', 'relu1_1', 'conv1_2', 'relu1_2', 'pool1', 'conv2_1', 'relu2_1', 'conv2_2', 'relu2_2', 'pool2', 'conv3_1', 'relu3_1', 'conv3_2', 'relu3_2', 'conv3_3', 'relu3_3', 'conv3_4', 'relu3_4', 'pool3', 'conv4_1', 'relu4_1', 'conv4_2', 'relu4_2', 'conv4_3', 'relu4_3', 'conv4_4', 'relu4_4', 'pool4', 'conv5_1', 'relu5_1', 'conv5_2', 'relu5_2', 'conv5_3', 'relu5_3', 'conv5_4', 'relu5_4' ) net = {} current = image for i, name in enumerate(layers): kind = name[:4] # layer的类型,是conv还是relu或者pool. if kind == 'conv': kernels, bias = weights[i][0][0][0][0] kernels = utils.get_variable(np.transpose(kernels, (1, 0, 2, 3)), name=name + "_w") bias = utils.get_variable(bias.reshape(-1), name=name + "_b") if name[4:5] == '5': # conv5开始使用atrous_conv2d卷积. current = utils.atrous_conv2d_basic(current, kernels, bias, 2) # rate=2,也即pad=2. current = utils.batch_norm_layer(current, FLAGS.mode, scope_bn=name) # BN处理. else: # conv1-4 current = utils.conv2d_basic(current, kernels, bias) current = utils.batch_norm_layer(current, FLAGS.mode, scope_bn=name) # BN处理. elif kind == 'relu': current = tf.nn.relu(current, name=name) if FLAGS.debug: utils.add_activation_summary(current) elif kind == 'pool': if name[4:5] == '4': current = utils.max_pool_1x1(current) else: current = utils.max_pool_3x3(current) net[name] = current return net
def inference(image, keep_prob, mean): # keep_prob为dropout的占位符. print("setting up vgg initialized conv layers ...") model_data = utils.get_model_data(FLAGS.model_dir, MODEL_URL) # 下载得到VGGmodel mean_pixel = mean weights = np.squeeze(model_data['layers']) processed_image = utils.process_image(image, mean_pixel) # 图像像素值-平均像素值 with tf.variable_scope("inference"): image_net = vgg_net(weights, processed_image) conv_final_layer = image_net["conv5_4"] pool5 = utils.max_pool_1x1(conv_final_layer) # w6~w8都可以做正则化的把?因为都是全连接层啊. # 7x7改成3x3,4096改成了1024,可能特征不够? # 新加的w6-w8 b6-b8都自带初始化. W6 = utils.weight_variable([3, 3, 512, 512], name="W6") b6 = utils.bias_variable([512], name="b6") # data_format = "channels_last" 为默认 # 使用不同rate的孔卷积. Fc6_1 = utils.atrous_conv2d_basic(pool5, W6, b6, 6) Fc6_2 = utils.atrous_conv2d_basic(pool5, W6, b6, 12) Fc6_3 = utils.atrous_conv2d_basic(pool5, W6, b6, 18) Fc6_4 = utils.atrous_conv2d_basic(pool5, W6, b6, 24) Bn6_1 = utils.batch_norm_layer(Fc6_1, FLAGS.mode, scope_bn='Bn') # bn处理要在relu之前. Bn6_2 = utils.batch_norm_layer(Fc6_2, FLAGS.mode, scope_bn='Bn') Bn6_3 = utils.batch_norm_layer(Fc6_3, FLAGS.mode, scope_bn='Bn') Bn6_4 = utils.batch_norm_layer(Fc6_4, FLAGS.mode, scope_bn='Bn') relu6_1 = tf.nn.relu(Bn6_1, name="relu6_1") relu6_2 = tf.nn.relu(Bn6_2, name="relu6_2") relu6_3 = tf.nn.relu(Bn6_3, name="relu6_3") relu6_4 = tf.nn.relu(Bn6_4, name="relu6_4") relu_dropout6_1 = tf.nn.dropout(relu6_1, keep_prob=keep_prob) relu_dropout6_2 = tf.nn.dropout(relu6_2, keep_prob=keep_prob) relu_dropout6_3 = tf.nn.dropout(relu6_3, keep_prob=keep_prob) relu_dropout6_4 = tf.nn.dropout(relu6_4, keep_prob=keep_prob) W7 = utils.weight_variable([1, 1, 512, 512], name="W7") b7 = utils.bias_variable([512], name="b7") Fc7_1 = utils.conv2d_basic(relu_dropout6_1, W7, b7) Fc7_2 = utils.conv2d_basic(relu_dropout6_2, W7, b7) Fc7_3 = utils.conv2d_basic(relu_dropout6_3, W7, b7) Fc7_4 = utils.conv2d_basic(relu_dropout6_4, W7, b7) Bn7_1 = utils.batch_norm_layer(Fc7_1, FLAGS.mode, scope_bn='Bn') Bn7_2 = utils.batch_norm_layer(Fc7_2, FLAGS.mode, scope_bn='Bn') Bn7_3 = utils.batch_norm_layer(Fc7_3, FLAGS.mode, scope_bn='Bn') Bn7_4 = utils.batch_norm_layer(Fc7_4, FLAGS.mode, scope_bn='Bn') relu7_1 = tf.nn.relu(Bn7_1, name="relu7_1") relu7_2 = tf.nn.relu(Bn7_2, name="relu7_2") relu7_3 = tf.nn.relu(Bn7_3, name="relu7_3") relu7_4 = tf.nn.relu(Bn7_4, name="relu7_4") relu_dropout7_1 = tf.nn.dropout(relu7_1, keep_prob=keep_prob) relu_dropout7_2 = tf.nn.dropout(relu7_2, keep_prob=keep_prob) relu_dropout7_3 = tf.nn.dropout(relu7_3, keep_prob=keep_prob) relu_dropout7_4 = tf.nn.dropout(relu7_4, keep_prob=keep_prob) W8 = utils.weight_variable([1, 1, 512, NUM_OF_CLASSESS], name="W8") b8 = utils.bias_variable([NUM_OF_CLASSESS], name="b8") Fc8_1 = utils.conv2d_basic(relu_dropout7_1, W8, b8) Fc8_2 = utils.conv2d_basic(relu_dropout7_2, W8, b8) Fc8_3 = utils.conv2d_basic(relu_dropout7_3, W8, b8) Fc8_4 = utils.conv2d_basic(relu_dropout7_4, W8, b8) Fc8 = tf.add_n([Fc8_1, Fc8_2, Fc8_3, Fc8_4], name="Fc8") # F8的各个层尺寸一样,感受野不同. shape = tf.shape(image) resize_Fc8 = tf.image.resize_images( Fc8, (shape[1], shape[2])) # tf自带的扩尺寸函数resize_images(),默认双线性插值.尺寸扩大8倍至原尺寸256x256 softmax = tf.nn.softmax(resize_Fc8) # tf.nn.softmax(),使前向计算结果转为概率分布 annotation_pred = tf.argmax(softmax, dimension=3, name="prediction") return tf.expand_dims(annotation_pred, dim=3), resize_Fc8, softmax