def encode_decode(image, keep_prob): with tf.variable_scope("encode_decode"): #conv1 with tf.variable_scope('conv1'): W_conv1 = weight_variable([3, 3, 3, 16]) b_conv1 = bias_variable([16]) h_conv1 = tf.nn.relu(conv2d(image, W_conv1) + b_conv1) h_pool1 = max_pool_2x2(h_conv1) # conv2 with tf.variable_scope('conv2'): W_conv2 = weight_variable([3, 3, 16, 32]) b_conv2 = bias_variable([32]) h_conv2 = tf.nn.relu(conv2d(h_pool1, W_conv2) + b_conv2) h_pool2 = max_pool_2x2(max_pool_2x2(h_conv2)) # conv3 with tf.variable_scope('conv3'): W_conv3 = weight_variable([3, 3, 32, 64]) b_conv3 = bias_variable([64]) h_conv3 = tf.nn.relu(conv2d(h_pool2, W_conv3) + b_conv3) h_pool3 = max_pool_2x2(h_conv3) # conv4 with tf.variable_scope('conv4'): W_conv4 = weight_variable([3, 3, 64, 128]) b_conv4 = bias_variable([128]) h_conv4 = tf.nn.relu(conv2d(h_pool3, W_conv4) + b_conv4) h_pool4 = max_pool_2x2(h_conv4) #Upscale with tf.variable_scope('deconv1'): deconv_shape1 = h_pool3.get_shape() W_t1 = weight_variable([3, 3, deconv_shape1[3].value, 128]) b_t1 = utils.bias_variable([deconv_shape1[3].value], name="b_t1") conv_t1 = (utils.conv2d_transpose_strided( h_pool4, W_t1, b_t1, output_shape=tf.shape(h_pool3))) fuse_1 = (tf.add(conv_t1, h_pool3, name="fuse_1")) with tf.variable_scope('deconv2'): deconv_shape2 = h_pool2.get_shape() W_t2 = weight_variable( [3, 3, deconv_shape2[3].value, deconv_shape1[3].value]) b_t2 = utils.bias_variable([deconv_shape2[3].value], name="b_t2") conv_t2 = (utils.conv2d_transpose_strided( fuse_1, W_t2, b_t2, output_shape=tf.shape(h_pool2))) fuse_2 = (tf.add(conv_t2, h_pool2, name="fuse_2")) with tf.variable_scope('deconv3'): shape = tf.shape(image) deconv_shape3 = tf.stack( [shape[0], shape[1], shape[2], OUTPUT_CHANNELS]) W_t3 = weight_variable( [16, 16, OUTPUT_CHANNELS, deconv_shape2[3].value]) b_t3 = utils.bias_variable([OUTPUT_CHANNELS], name="b_t3") conv_t3 = (utils.conv2d_transpose_strided( fuse_2, W_t3, b_t3, output_shape=deconv_shape3, stride=8)) return conv_t3
def upsample(pool5, pool4, pool3, pool2, conv9, image, scope, output_class): with tf.variable_scope(scope): # do the upscaling using 2 fuse layers deconv_shape1 = pool5.get_shape() W_t1 = utils.weight_variable( [4, 4, deconv_shape1[3].value, NUM_OF_CLASSESS], name="W_t1") b_t1 = utils.bias_variable([deconv_shape1[3].value], name="b_t1") conv_t1 = utils.conv2d_transpose_strided(conv9, W_t1, b_t1, output_shape=tf.shape(pool5)) fuse_1 = tf.add(conv_t1, pool5, name="fuse_1") deconv_shape2 = pool4.get_shape() W_t2 = utils.weight_variable( [4, 4, deconv_shape2[3].value, deconv_shape1[3].value], name="W_t2") b_t2 = utils.bias_variable([deconv_shape2[3].value], name="b_t2") conv_t2 = utils.conv2d_transpose_strided(fuse_1, W_t2, b_t2, output_shape=tf.shape(pool4)) fuse_2 = tf.add(conv_t2, pool4, name="fuse_2") deconv_shape3 = pool3.get_shape() W_t3 = utils.weight_variable( [4, 4, deconv_shape3[3].value, deconv_shape2[3].value], name="W_t3") b_t3 = utils.bias_variable([deconv_shape3[3].value], name="b_t3") conv_t3 = utils.conv2d_transpose_strided(fuse_2, W_t3, b_t3, output_shape=tf.shape(pool3)) fuse_3 = tf.add(conv_t3, pool3, name="fuse_3") deconv_shape4 = pool2.get_shape() W_t4 = utils.weight_variable( [4, 4, deconv_shape4[3].value, deconv_shape3[3].value], name="W_t4") b_t4 = utils.bias_variable([deconv_shape4[3].value], name="b_t4") conv_t4 = utils.conv2d_transpose_strided(fuse_3, W_t4, b_t4, output_shape=tf.shape(pool2)) fuse_4 = tf.add(conv_t4, pool2, name="fuse_4") # do the final upscaling shape = tf.shape(image) deconv_shape5 = tf.stack([shape[0], shape[1], shape[2], output_class]) W_t5 = utils.weight_variable( [16, 16, output_class, deconv_shape4[3].value], name="W_t5") b_t5 = utils.bias_variable([output_class], name="b_t5") conv_t5 = utils.conv2d_transpose_strided(fuse_4, W_t5, b_t5, output_shape=deconv_shape5, stride=2) return conv_t5
def inference(image, keep_prob): print("setting up vgg initialized conv layers ...") model_data = utils.get_model_data(FLAGS.model_dir, MODEL_URL) mean = model_data['normalization'][0][0][0] mean_pixel = np.mean(mean, axis=(0, 1)) weights = np.squeeze(model_data['layers']) processed_image = utils.process_image(image, mean_pixel) with tf.variable_scope("inference"): image_net = vgg_net(weights, processed_image) conv_final_layer = image_net["conv5_3"] pool5 = utils.max_pool_2x2(conv_final_layer) W6 = utils.weight_variable([7, 7, 512, 4096], name="W6") b6 = utils.bias_variable([4096], name="b6") conv6 = utils.conv2d_basic(pool5, W6, b6) relu6 = tf.nn.relu(conv6, name="relu6") if FLAGS.debug: utils.add_activation_summary(relu6) relu_dropout6 = tf.nn.dropout(relu6, keep_prob=keep_prob) W7 = utils.weight_variable([1, 1, 4096, 4096], name="W7") b7 = utils.bias_variable([4096], name="b7") conv7 = utils.conv2d_basic(relu_dropout6, W7, b7) relu7 = tf.nn.relu(conv7, name="relu7") if FLAGS.debug: utils.add_activation_summary(relu7) relu_dropout7 = tf.nn.dropout(relu7, keep_prob=keep_prob) W8 = utils.weight_variable([1, 1, 4096, NUM_OF_CLASSESS], name="W8") b8 = utils.bias_variable([NUM_OF_CLASSESS], name="b8") conv8 = utils.conv2d_basic(relu_dropout7, W8, b8) deconv_shape1 = image_net["pool4"].get_shape() W_t1 = utils.weight_variable([4, 4, deconv_shape1[3].value, NUM_OF_CLASSESS], name="W_t1") b_t1 = utils.bias_variable([deconv_shape1[3].value], name="b_t1") conv_t1 = utils.conv2d_transpose_strided(conv8, W_t1, b_t1, output_shape=tf.shape(image_net["pool4"])) fuse_1 = tf.add(conv_t1, image_net["pool4"], name="fuse_1") deconv_shape2 = image_net["pool3"].get_shape() W_t2 = utils.weight_variable([4, 4, deconv_shape2[3].value, deconv_shape1[3].value], name="W_t2") b_t2 = utils.bias_variable([deconv_shape2[3].value], name="b_t2") conv_t2 = utils.conv2d_transpose_strided(fuse_1, W_t2, b_t2, output_shape=tf.shape(image_net["pool3"])) fuse_2 = tf.add(conv_t2, image_net["pool3"], name="fuse_2") shape = tf.shape(image) deconv_shape3 = tf.stack([shape[0], shape[1], shape[2], NUM_OF_CLASSESS]) W_t3 = utils.weight_variable([16, 16, NUM_OF_CLASSESS, deconv_shape2[3].value], name="W_t3") b_t3 = utils.bias_variable([NUM_OF_CLASSESS], name="b_t3") conv_t3 = utils.conv2d_transpose_strided(fuse_2, W_t3, b_t3, output_shape=deconv_shape3, stride=8) annotation_pred = tf.argmax(conv_t3, dimension=3, name="prediction") return tf.expand_dims(annotation_pred, dim=3), conv_t3
def fcn(inputs, keep_prob): with slim.arg_scope( [slim.conv2d, slim.fully_connected], activation_fn=tf.nn.relu, weights_initializer=tf.truncated_normal_initializer(0.0, 0.01), weights_regularizer=slim.l2_regularizer(0.0005) ): net = slim.repeat(inputs, 2, slim.conv2d, 64, [3, 3], scope='conv1') pool1 = slim.max_pool2d(net, [2, 2], scope='pool1') net = slim.repeat(pool1, 2, slim.conv2d, 128, [3, 3], scope='conv2') pool2 = slim.max_pool2d(net, [2, 2], scope='pool2') net = slim.repeat(pool2, 3, slim.conv2d, 256, [3, 3], scope='conv3') pool3 = slim.max_pool2d(net, [2, 2], scope='pool3') net = slim.repeat(pool3, 3, slim.conv2d, 512, [3, 3], scope='conv4') pool4 = slim.max_pool2d(net, [2, 2], scope='pool4') net = slim.repeat(pool4, 3, slim.conv2d, 512, [3, 3], scope='conv5') pool5 = slim.max_pool2d(net, [2, 2], scope='pool5') W6 = utils.weight_variable([7, 7, 512, 4096], name="W6") b6 = utils.bias_variable([4096], name="b6") conv6 = utils.conv2d_basic(pool5, W6, b6) relu6 = tf.nn.relu(conv6, name="relu6") relu_dropout6 = tf.nn.dropout(relu6, keep_prob=keep_prob) W7 = utils.weight_variable([1, 1, 4096, 4096], name="W7") b7 = utils.bias_variable([4096], name="b7") conv7 = utils.conv2d_basic(relu_dropout6, W7, b7) relu7 = tf.nn.relu(conv7, name="relu7") relu_dropout7 = tf.nn.dropout(relu7, keep_prob=keep_prob) W8 = utils.weight_variable([1, 1, 4096, NUM_OF_CLASSESS], name="W8") b8 = utils.bias_variable([NUM_OF_CLASSESS], name="b8") conv8 = utils.conv2d_basic(relu_dropout7, W8, b8) # now to upscale to actual image size deconv_shape1 = pool4.get_shape() W_t1 = utils.weight_variable([4, 4, deconv_shape1[3].value, NUM_OF_CLASSESS], name="W_t1") b_t1 = utils.bias_variable([deconv_shape1[3].value], name="b_t1") conv_t1 = utils.conv2d_transpose_strided(conv8, W_t1, b_t1, output_shape=tf.shape(pool4)) fuse_1 = tf.add(conv_t1, pool4, name="fuse_1") deconv_shape2 = pool3.get_shape() W_t2 = utils.weight_variable([4, 4, deconv_shape2[3].value, deconv_shape1[3].value], name="W_t2") b_t2 = utils.bias_variable([deconv_shape2[3].value], name="b_t2") conv_t2 = utils.conv2d_transpose_strided(fuse_1, W_t2, b_t2, output_shape=tf.shape(pool3)) fuse_2 = tf.add(conv_t2, pool3, name="fuse_2") shape = tf.shape(inputs) deconv_shape3 = tf.stack([shape[0], shape[1], shape[2], NUM_OF_CLASSESS]) W_t3 = utils.weight_variable([16, 16, NUM_OF_CLASSESS, deconv_shape2[3].value], name="W_t3") b_t3 = utils.bias_variable([NUM_OF_CLASSESS], name="b_t3") conv_t3 = utils.conv2d_transpose_strided(fuse_2, W_t3, b_t3, output_shape=deconv_shape3, stride=8) annotation_pred = tf.argmax(conv_t3, dimension=3, name="prediction") return tf.expand_dims(annotation_pred, dim=3), conv_t3
def generator(images, train_phase): print("setting up vgg initialized conv layers ...") model_data = utils.get_model_data(FLAGS.model_dir, MODEL_URL) weights = np.squeeze(model_data['layers']) with tf.variable_scope("generator") as scope: W0 = utils.weight_variable([3, 3, 1, 64], name="W0") b0 = utils.bias_variable([64], name="b0") conv0 = utils.conv2d_basic(images, W0, b0) hrelu0 = tf.nn.relu(conv0, name="relu") image_net = vgg_net(weights, hrelu0) vgg_final_layer = image_net["relu5_3"] pool5 = utils.max_pool_2x2(vgg_final_layer) # now to upscale to actual image size deconv_shape1 = image_net["pool4"].get_shape() W_t1 = utils.weight_variable( [4, 4, deconv_shape1[3].value, pool5.get_shape()[3].value], name="W_t1") b_t1 = utils.bias_variable([deconv_shape1[3].value], name="b_t1") conv_t1 = utils.conv2d_transpose_strided(pool5, W_t1, b_t1, output_shape=tf.shape( image_net["pool4"])) fuse_1 = tf.add(conv_t1, image_net["pool4"], name="fuse_1") deconv_shape2 = image_net["pool3"].get_shape() W_t2 = utils.weight_variable( [4, 4, deconv_shape2[3].value, deconv_shape1[3].value], name="W_t2") b_t2 = utils.bias_variable([deconv_shape2[3].value], name="b_t2") conv_t2 = utils.conv2d_transpose_strided(fuse_1, W_t2, b_t2, output_shape=tf.shape( image_net["pool3"])) fuse_2 = tf.add(conv_t2, image_net["pool3"], name="fuse_2") shape = tf.shape(images) deconv_shape3 = tf.stack([shape[0], shape[1], shape[2], 2]) W_t3 = utils.weight_variable([16, 16, 2, deconv_shape2[3].value], name="W_t3") b_t3 = utils.bias_variable([2], name="b_t3") pred = utils.conv2d_transpose_strided(fuse_2, W_t3, b_t3, output_shape=deconv_shape3, stride=8) # return tf.concat(concat_dim=3, values=[images, pred], name="pred_image") return tf.concat([images, pred], 3, "pred_image")
def decoder_conv(embedding): image_size = IMAGE_SIZE // 16 with tf.name_scope("dec_fc") as scope: W_fc1 = utils.weight_variable([512, image_size * image_size * 256], name="W_fc1") b_fc1 = utils.bias_variable([image_size * image_size * 256], name="b_fc1") h_fc1 = tf.nn.relu(tf.matmul(embedding, W_fc1) + b_fc1) with tf.name_scope("dec_conv1") as scope: h_reshaped = tf.reshape( h_fc1, tf.pack([tf.shape(h_fc1)[0], image_size, image_size, 256])) W_conv_t1 = utils.weight_variable([3, 3, 128, 256], name="W_conv_t1") b_conv_t1 = utils.bias_variable([128], name="b_conv_t1") deconv_shape = tf.pack( [tf.shape(h_fc1)[0], 2 * image_size, 2 * image_size, 128]) h_conv_t1 = tf.nn.relu( utils.conv2d_transpose_strided(h_reshaped, W_conv_t1, b_conv_t1, output_shape=deconv_shape)) with tf.name_scope("dec_conv2") as scope: W_conv_t2 = utils.weight_variable([3, 3, 64, 128], name="W_conv_t2") b_conv_t2 = utils.bias_variable([64], name="b_conv_t2") deconv_shape = tf.pack( [tf.shape(h_conv_t1)[0], 4 * image_size, 4 * image_size, 64]) h_conv_t2 = tf.nn.relu( utils.conv2d_transpose_strided(h_conv_t1, W_conv_t2, b_conv_t2, output_shape=deconv_shape)) with tf.name_scope("dec_conv3") as scope: W_conv_t3 = utils.weight_variable([3, 3, 32, 64], name="W_conv_t3") b_conv_t3 = utils.bias_variable([32], name="b_conv_t3") deconv_shape = tf.pack( [tf.shape(h_conv_t2)[0], 8 * image_size, 8 * image_size, 32]) h_conv_t3 = tf.nn.relu( utils.conv2d_transpose_strided(h_conv_t2, W_conv_t3, b_conv_t3, output_shape=deconv_shape)) with tf.name_scope("dec_conv4") as scope: W_conv_t4 = utils.weight_variable([3, 3, 3, 32], name="W_conv_t4") b_conv_t4 = utils.bias_variable([3], name="b_conv_t4") deconv_shape = tf.pack( [tf.shape(h_conv_t3)[0], IMAGE_SIZE, IMAGE_SIZE, 3]) pred_image = utils.conv2d_transpose_strided(h_conv_t3, W_conv_t4, b_conv_t4, output_shape=deconv_shape) return pred_image
def generator(z, train_mode): with tf.variable_scope("generator") as scope: W_0 = utils.weight_variable([FLAGS.z_dim, 64 * GEN_DIMENSION / 2 * IMAGE_SIZE / 16 * IMAGE_SIZE / 16], name="W_0") b_0 = utils.bias_variable([64 * GEN_DIMENSION / 2 * IMAGE_SIZE / 16 * IMAGE_SIZE / 16], name="b_0") z_0 = tf.matmul(z, W_0) + b_0 h_0 = tf.reshape(z_0, [-1, IMAGE_SIZE / 16, IMAGE_SIZE / 16, 64 * GEN_DIMENSION / 2]) h_bn0 = utils.batch_norm(h_0, 64 * GEN_DIMENSION / 2, train_mode, scope="gen_bn0") h_relu0 = tf.nn.relu(h_bn0, name='relu0') utils.add_activation_summary(h_relu0) # W_1 = utils.weight_variable([5, 5, 64 * GEN_DIMENSION/2, 64 * GEN_DIMENSION], name="W_1") # b_1 = utils.bias_variable([64 * GEN_DIMENSION/2], name="b_1") # deconv_shape = tf.pack([tf.shape(h_relu0)[0], IMAGE_SIZE / 16, IMAGE_SIZE / 16, 64 * GEN_DIMENSION/2]) # h_conv_t1 = utils.conv2d_transpose_strided(h_relu0, W_1, b_1, output_shape=deconv_shape) # h_bn1 = utils.batch_norm(h_conv_t1, 64 * GEN_DIMENSION/2, train_mode, scope="gen_bn1") # h_relu1 = tf.nn.relu(h_bn1, name='relu1') # utils.add_activation_summary(h_relu1) W_2 = utils.weight_variable([5, 5, 64 * GEN_DIMENSION / 4, 64 * GEN_DIMENSION / 2], name="W_2") b_2 = utils.bias_variable([64 * GEN_DIMENSION / 4], name="b_2") deconv_shape = tf.pack([tf.shape(h_relu0)[0], IMAGE_SIZE / 8, IMAGE_SIZE / 8, 64 * GEN_DIMENSION / 4]) h_conv_t2 = utils.conv2d_transpose_strided(h_relu0, W_2, b_2, output_shape=deconv_shape) h_bn2 = utils.batch_norm(h_conv_t2, 64 * GEN_DIMENSION / 4, train_mode, scope="gen_bn2") h_relu2 = tf.nn.relu(h_bn2, name='relu2') utils.add_activation_summary(h_relu2) W_3 = utils.weight_variable([5, 5, 64 * GEN_DIMENSION / 8, 64 * GEN_DIMENSION / 4], name="W_3") b_3 = utils.bias_variable([64 * GEN_DIMENSION / 8], name="b_3") deconv_shape = tf.pack([tf.shape(h_relu2)[0], IMAGE_SIZE / 4, IMAGE_SIZE / 4, 64 * GEN_DIMENSION / 8]) h_conv_t3 = utils.conv2d_transpose_strided(h_relu2, W_3, b_3, output_shape=deconv_shape) h_bn3 = utils.batch_norm(h_conv_t3, 64 * GEN_DIMENSION / 8, train_mode, scope="gen_bn3") h_relu3 = tf.nn.relu(h_bn3, name='relu3') utils.add_activation_summary(h_relu3) W_4 = utils.weight_variable([5, 5, 64 * GEN_DIMENSION / 16, 64 * GEN_DIMENSION / 8], name="W_4") b_4 = utils.bias_variable([64 * GEN_DIMENSION / 16], name="b_4") deconv_shape = tf.pack([tf.shape(h_relu3)[0], IMAGE_SIZE / 2, IMAGE_SIZE / 2, 64 * GEN_DIMENSION / 16]) h_conv_t4 = utils.conv2d_transpose_strided(h_relu3, W_4, b_4, output_shape=deconv_shape) h_bn4 = utils.batch_norm(h_conv_t4, 64 * GEN_DIMENSION / 16, train_mode, scope="gen_bn4") h_relu4 = tf.nn.relu(h_bn4, name='relu4') utils.add_activation_summary(h_relu4) W_5 = utils.weight_variable([5, 5, NUM_OF_CHANNELS, 64 * GEN_DIMENSION / 16], name="W_5") b_5 = utils.bias_variable([NUM_OF_CHANNELS], name="b_5") deconv_shape = tf.pack([tf.shape(h_relu4)[0], IMAGE_SIZE, IMAGE_SIZE, NUM_OF_CHANNELS]) h_conv_t5 = utils.conv2d_transpose_strided(h_relu4, W_5, b_5, output_shape=deconv_shape) pred_image = tf.nn.tanh(h_conv_t5, name='pred_image') utils.add_activation_summary(pred_image) return pred_image
def generator(z, train_mode): with tf.variable_scope("generator") as scope: W_0 = utils.weight_variable([FLAGS.z_dim, 64 * GEN_DIMENSION / 2 * IMAGE_SIZE / 16 * IMAGE_SIZE / 16], name="W_0") b_0 = utils.bias_variable([64 * GEN_DIMENSION / 2 * IMAGE_SIZE / 16 * IMAGE_SIZE / 16], name="b_0") z_0 = tf.matmul(z, W_0) + b_0 h_0 = tf.reshape(z_0, [-1, IMAGE_SIZE / 16, IMAGE_SIZE / 16, 64 * GEN_DIMENSION / 2]) h_bn0 = utils.batch_norm(h_0, 64 * GEN_DIMENSION / 2, train_mode, scope="gen_bn0") h_relu0 = tf.nn.relu(h_bn0, name='relu0') utils.add_activation_summary(h_relu0) # W_1 = utils.weight_variable([5, 5, 64 * GEN_DIMENSION/2, 64 * GEN_DIMENSION], name="W_1") # b_1 = utils.bias_variable([64 * GEN_DIMENSION/2], name="b_1") # deconv_shape = tf.pack([tf.shape(h_relu0)[0], IMAGE_SIZE / 16, IMAGE_SIZE / 16, 64 * GEN_DIMENSION/2]) # h_conv_t1 = utils.conv2d_transpose_strided(h_relu0, W_1, b_1, output_shape=deconv_shape) # h_bn1 = utils.batch_norm(h_conv_t1, 64 * GEN_DIMENSION/2, train_mode, scope="gen_bn1") # h_relu1 = tf.nn.relu(h_bn1, name='relu1') # utils.add_activation_summary(h_relu1) W_2 = utils.weight_variable([5, 5, 64 * GEN_DIMENSION / 4, 64 * GEN_DIMENSION / 2], name="W_2") b_2 = utils.bias_variable([64 * GEN_DIMENSION / 4], name="b_2") deconv_shape = tf.pack([tf.shape(h_relu0)[0], IMAGE_SIZE / 8, IMAGE_SIZE / 8, 64 * GEN_DIMENSION / 4]) h_conv_t2 = utils.conv2d_transpose_strided(h_relu0, W_2, b_2, output_shape=deconv_shape) h_bn2 = utils.batch_norm(h_conv_t2, 64 * GEN_DIMENSION / 4, train_mode, scope="gen_bn2") h_relu2 = tf.nn.relu(h_bn2, name='relu2') utils.add_activation_summary(h_relu2) W_3 = utils.weight_variable([5, 5, 64 * GEN_DIMENSION / 8, 64 * GEN_DIMENSION / 4], name="W_3") b_3 = utils.bias_variable([64 * GEN_DIMENSION / 8], name="b_3") deconv_shape = tf.pack([tf.shape(h_relu2)[0], IMAGE_SIZE / 4, IMAGE_SIZE / 4, 64 * GEN_DIMENSION / 8]) h_conv_t3 = utils.conv2d_transpose_strided(h_relu2, W_3, b_3, output_shape=deconv_shape) h_bn3 = utils.batch_norm(h_conv_t3, 64 * GEN_DIMENSION / 8, train_mode, scope="gen_bn3") h_relu3 = tf.nn.relu(h_bn3, name='relu3') utils.add_activation_summary(h_relu3) W_4 = utils.weight_variable([5, 5, 64 * GEN_DIMENSION / 16, 64 * GEN_DIMENSION / 8], name="W_4") b_4 = utils.bias_variable([64 * GEN_DIMENSION / 16], name="b_4") deconv_shape = tf.pack([tf.shape(h_relu3)[0], IMAGE_SIZE / 2, IMAGE_SIZE / 2, 64 * GEN_DIMENSION / 16]) h_conv_t4 = utils.conv2d_transpose_strided(h_relu3, W_4, b_4, output_shape=deconv_shape) h_bn4 = utils.batch_norm(h_conv_t4, 64 * GEN_DIMENSION / 16, train_mode, scope="gen_bn4") h_relu4 = tf.nn.relu(h_bn4, name='relu4') utils.add_activation_summary(h_relu4) W_5 = utils.weight_variable([5, 5, NUM_OF_CHANNELS, 64 * GEN_DIMENSION / 16], name="W_5") b_5 = utils.bias_variable([NUM_OF_CHANNELS], name="b_5") deconv_shape = tf.pack([tf.shape(h_relu4)[0], IMAGE_SIZE, IMAGE_SIZE, NUM_OF_CHANNELS]) h_conv_t5 = utils.conv2d_transpose_strided(h_relu4, W_5, b_5, output_shape=deconv_shape) pred_image = tf.nn.tanh(h_conv_t5, name='pred_image') utils.add_activation_summary(pred_image) return pred_image
def inference(image, keep_prob): """ Semantic segmentation network definition :param image: input image. Should have values in range 0-255 :param keep_prob: :return: """ print("setting up vgg initialized conv layers ...") model_data = utils.get_model_data(model_dir, MODEL_URL) mean = model_data['normalization'][0][0][0] mean_pixel = np.mean(mean, axis=(0, 1)) weights = np.squeeze(model_data['layers']) processed_image = utils.process_image(image, mean_pixel) with tf.variable_scope("inference"): image_net = vgg_net(weights, processed_image) conv_final_layer = image_net["conv4_3"] W6 = utils.weight_variable([1, 1, 512, 1024], name="W6", init=weight_init) b6 = utils.bias_variable([1024], name="b6") conv6 = utils.conv2d_basic(conv_final_layer, W6, b6) relu6 = tf.nn.relu(conv6, name="relu6") if debug: utils.add_activation_summary(relu6) relu_dropout6 = tf.nn.dropout(relu6, keep_prob=keep_prob) W7 = utils.weight_variable([1, 1, 1024, NUM_OF_CLASSESS], name="W7", init=weight_init) b7 = utils.bias_variable([NUM_OF_CLASSESS], name="b7") conv7 = utils.conv2d_basic(relu_dropout6, W7, b7) # now to upscale to actual image size deconv_shape1 = image_net["pool2"].get_shape() W_t1 = utils.weight_variable([4, 4, deconv_shape1[3].value, NUM_OF_CLASSESS], name="W_t1", init=weight_init) b_t1 = utils.bias_variable([deconv_shape1[3].value], name="b_t1") conv_t1 = utils.conv2d_transpose_strided(conv7, W_t1, b_t1, output_shape=tf.shape(image_net["pool2"])) fuse_1 = tf.add(conv_t1, image_net["pool2"], name="fuse_1") deconv_shape2 = image_net["pool1"].get_shape() W_t2 = utils.weight_variable([4, 4, deconv_shape2[3].value, deconv_shape1[3].value], name="W_t2", init=weight_init) b_t2 = utils.bias_variable([deconv_shape2[3].value], name="b_t2") conv_t2 = utils.conv2d_transpose_strided(fuse_1, W_t2, b_t2, output_shape=tf.shape(image_net["pool1"])) fuse_2 = tf.add(conv_t2, image_net["pool1"], name="fuse_1") shape = tf.shape(image) deconv_shape3 = tf.stack([shape[0], shape[1], shape[2], NUM_OF_CLASSESS]) W_t3 = utils.weight_variable([4, 4, NUM_OF_CLASSESS, deconv_shape2[3].value], name="W_t3", init=weight_init) b_t3 = utils.bias_variable([NUM_OF_CLASSESS], name="b_t3") conv_t3 = utils.conv2d_transpose_strided(fuse_2, W_t3, b_t3, output_shape=deconv_shape3, stride=2) return conv_t3
def FCN(image, keep_prob): with tf.variable_scope("FCN"): #conv1 with tf.variable_scope('conv1'): W_conv1 = weight_variable([3, 3, 3, 32]) b_conv1 = bias_variable([32]) h_conv1 = tf.nn.relu(conv2d(image, W_conv1) + b_conv1) h_pool1 = max_pool_2x2(h_conv1) # conv2 with tf.variable_scope('conv2'): W_conv2 = weight_variable([3, 3, 32, 128]) b_conv2 = bias_variable([128]) h_conv2 = tf.nn.relu(conv2d(h_pool1, W_conv2) + b_conv2) h_pool2 = max_pool_2x2(max_pool_2x2(h_conv2)) # conv3 with tf.variable_scope('conv3'): W_conv3 = weight_variable([3, 3, 128, 256]) b_conv3 = bias_variable([256]) h_conv3 = tf.nn.relu(conv2d(h_pool2, W_conv3) + b_conv3) h_pool3 = max_pool_2x2(h_conv3) # conv4 with tf.variable_scope('conv4'): W_conv4 = weight_variable([3, 3, 256, 512]) b_conv4 = bias_variable([512]) h_conv4 = tf.nn.relu(conv2d(h_pool3, W_conv4) + b_conv4) h_pool4 = max_pool_2x2(h_conv4) #Upscale deconv_shape1 = h_pool3.get_shape() W_t1 = utils.weight_variable([4, 4, deconv_shape1[3].value, 512], name="W_t1") b_t1 = utils.bias_variable([deconv_shape1[3].value], name="b_t1") conv_t1 = utils.conv2d_transpose_strided(h_pool4, W_t1, b_t1, output_shape=tf.shape(h_pool3)) fuse_1 = tf.add(conv_t1, h_pool3, name="fuse_1") deconv_shape2 = h_pool2.get_shape() W_t2 = utils.weight_variable([4, 4, deconv_shape2[3].value, deconv_shape1[3].value], name="W_t2") b_t2 = utils.bias_variable([deconv_shape2[3].value], name="b_t2") conv_t2 = utils.conv2d_transpose_strided(fuse_1, W_t2, b_t2, output_shape=tf.shape(h_pool2)) fuse_2 = tf.add(conv_t2, h_pool2, name="fuse_2") shape = tf.shape(image) deconv_shape3 = tf.stack([shape[0], shape[1], shape[2], NUM_OF_CLASSES_FCN]) W_t3 = utils.weight_variable([16, 16, NUM_OF_CLASSES_FCN, deconv_shape2[3].value], name="W_t3") b_t3 = utils.bias_variable([NUM_OF_CLASSES_FCN], name="b_t3") conv_t3 = utils.conv2d_transpose_strided(fuse_2, W_t3, b_t3, output_shape=deconv_shape3, stride=8) annotation_pred = tf.argmax(conv_t3, dimension=3, name="prediction") return tf.expand_dims(annotation_pred, dim=3), conv_t3
def inference(image, keep_prob): """ Semantic segmentation network definition :param image: input image. Should have values in range 0-255 :param keep_prob: :return: """ print("setting up inception_v3 initialized conv layers ...") with tf.variable_scope("inference"): net, end_points = inception_v3(image, NUM_OF_CLASSESS, True, keep_prob) # now to upscale to actual image size with tf.variable_scope('Upsampling'): with slim.arg_scope([slim.conv2d_transpose], stride=2, padding='SAME'): up_sampling = end_points[''] deconv_shape1 = image_net["pool4"].get_shape() W_t1 = utils.weight_variable( [4, 4, deconv_shape1[3].value, NUM_OF_CLASSESS], name="W_t1") b_t1 = utils.bias_variable([deconv_shape1[3].value], name="b_t1") conv_t1 = utils.conv2d_transpose_strided(conv8, W_t1, b_t1, output_shape=tf.shape( image_net["pool4"])) fuse_1 = tf.add(conv_t1, image_net["pool4"], name="fuse_1") deconv_shape2 = image_net["pool3"].get_shape() W_t2 = utils.weight_variable( [4, 4, deconv_shape2[3].value, deconv_shape1[3].value], name="W_t2") b_t2 = utils.bias_variable([deconv_shape2[3].value], name="b_t2") conv_t2 = utils.conv2d_transpose_strided(fuse_1, W_t2, b_t2, output_shape=tf.shape( image_net["pool3"])) fuse_2 = tf.add(conv_t2, image_net["pool3"], name="fuse_2") shape = tf.shape(image) deconv_shape3 = tf.stack( [shape[0], shape[1], shape[2], NUM_OF_CLASSESS]) W_t3 = utils.weight_variable( [16, 16, NUM_OF_CLASSESS, deconv_shape2[3].value], name="W_t3") b_t3 = utils.bias_variable([NUM_OF_CLASSESS], name="b_t3") conv_t3 = utils.conv2d_transpose_strided(fuse_2, W_t3, b_t3, output_shape=deconv_shape3, stride=8) annotation_pred = tf.argmax(conv_t3, dimension=3, name="prediction") return tf.expand_dims(annotation_pred, dim=3), conv_t3
def build_centers_layers(self, image, keep_prob): with tf.variable_scope("centers"): pool5 = utils.max_pool_2x2(self.image_net["conv5_3"]) W6 = utils.weight_variable([7, 7, 512, 4096], name="W6") b6 = utils.bias_variable([4096], name="b6") conv6 = utils.conv2d_basic(pool5, W6, b6) relu6 = tf.nn.relu(conv6, name="relu6") if self.debug: utils.add_activation_summary(relu6) relu_dropout6 = tf.nn.dropout(relu6, keep_prob=keep_prob) W7 = utils.weight_variable([1, 1, 4096, 4096], name="W7") b7 = utils.bias_variable([4096], name="b7") conv7 = utils.conv2d_basic(relu_dropout6, W7, b7) relu7 = tf.nn.relu(conv7, name="relu7") if self.debug: utils.add_activation_summary(relu7) relu_dropout7 = tf.nn.dropout(relu7, keep_prob=keep_prob) W8 = utils.weight_variable([1, 1, 4096, self.n_classes], name="W8") b8 = utils.bias_variable([self.n_classes], name="b8") conv8 = utils.conv2d_basic(relu_dropout7, W8, b8) # annotation_pred1 = tf.argmax(conv8, dimension=3, name="prediction1") deconv_shape1 = self.image_net["pool4"].get_shape() W_t1 = utils.weight_variable([4, 4, deconv_shape1[3].value, self.n_classes], name="W_t1") b_t1 = utils.bias_variable([deconv_shape1[3].value], name="b_t1") conv_t1 = utils.conv2d_transpose_strided(conv8, W_t1, b_t1, output_shape=tf.shape(self.image_net["pool4"])) fuse_1 = tf.add(conv_t1, self.image_net["pool4"], name="fuse_1") deconv_shape2 = self.image_net["pool3"].get_shape() W_t2 = utils.weight_variable([4, 4, deconv_shape2[3].value, deconv_shape1[3].value], name="W_t2") b_t2 = utils.bias_variable([deconv_shape2[3].value], name="b_t2") conv_t2 = utils.conv2d_transpose_strided(fuse_1, W_t2, b_t2, output_shape=tf.shape(self.image_net["pool3"])) fuse_2 = tf.add(conv_t2, self.image_net["pool3"], name="fuse_2") shape = tf.shape(image) #deconv_shape3 = tf.stack([shape[0], shape[1], shape[2], self.n_classes]) deconv_shape3 = tf.stack([shape[0], shape[1], shape[2], 3 * (self.n_classes - 1)]) W_t3 = utils.weight_variable([16, 16, 3 * (self.n_classes - 1), deconv_shape2[3].value], name="W_t3") b_t3 = utils.bias_variable([3 * (self.n_classes - 1)], name="b_t3") conv_t3 = utils.conv2d_transpose_strided(fuse_2, W_t3, b_t3, output_shape=deconv_shape3, stride=8) #tanh_t3 = tf.math.sigmoid(conv_t3) for i in range(0, 3 * (self.n_classes - 1), 3): current = tf.math.sigmoid(conv_t3[:, :, :, i:i+2]) if i == 0: tanh_t3 = current else: tanh_t3 = tf.concat([tanh_t3, current], axis=-1) tanh_t3 = tf.concat([tanh_t3, tf.nn.relu(conv_t3[:, :, :, i+2:i+3])], axis=-1) return tanh_t3
def deconv2d_layer_concat(x, name, W_s, concat_x, output_shape=None, stride=2, stddev=0.02, if_relu=False): ''' Deconv2d operator for U-Net concat. Args: x: inputs W_s: shape of weight output_shape: shape after deconv2d ''' if output_shape == None: x_shape = tf.shape(x) output_shape = tf.stack( [x_shape[0], x_shape[1] * 2, x_shape[2] * 2, x_shape[3] // 2]) W_t = utils.weight_variable(W_s, stddev=stddev, name='W_' + name) b_t = utils.bias_variable([W_s[2]], name='b_' + name) #conv_t = utils.conv2d_transpose_strided_valid(x, W_t, b_t, output_shape, stride) conv_t = utils.conv2d_transpose_strided(x, W_t, b_t, output_shape, stride) if if_relu: conv_t = tf.nn.relu(conv_t, name=name + '_relu') conv_concat = utils.crop_and_concat(concat_x, conv_t) return conv_concat
def build_labels_layers(self, image, keep_prob): with tf.variable_scope("labels"): pool5 = utils.max_pool_2x2(self.image_net["conv5_3"]) W6 = utils.weight_variable([7, 7, 512, 4096], name="W6") b6 = utils.bias_variable([4096], name="b6") conv6 = utils.conv2d_basic(pool5, W6, b6) relu6 = tf.nn.relu(conv6, name="relu6") if self.debug: utils.add_activation_summary(relu6) relu_dropout6 = tf.nn.dropout(relu6, keep_prob=keep_prob) W7 = utils.weight_variable([1, 1, 4096, 4096], name="W7") b7 = utils.bias_variable([4096], name="b7") conv7 = utils.conv2d_basic(relu_dropout6, W7, b7) relu7 = tf.nn.relu(conv7, name="relu7") if self.debug: utils.add_activation_summary(relu7) relu_dropout7 = tf.nn.dropout(relu7, keep_prob=keep_prob) W8 = utils.weight_variable([1, 1, 4096, self.n_classes], name="W8") b8 = utils.bias_variable([self.n_classes], name="b8") conv8 = utils.conv2d_basic(relu_dropout7, W8, b8) # annotation_pred1 = tf.argmax(conv8, dimension=3, name="prediction1") deconv_shape1 = self.image_net["pool4"].get_shape() W_t1 = utils.weight_variable([4, 4, deconv_shape1[3].value, self.n_classes], name="W_t1") b_t1 = utils.bias_variable([deconv_shape1[3].value], name="b_t1") conv_t1 = utils.conv2d_transpose_strided(conv8, W_t1, b_t1, output_shape=tf.shape(self.image_net["pool4"])) fuse_1 = tf.add(conv_t1, self.image_net["pool4"], name="fuse_1") deconv_shape2 = self.image_net["pool3"].get_shape() W_t2 = utils.weight_variable([4, 4, deconv_shape2[3].value, deconv_shape1[3].value], name="W_t2") b_t2 = utils.bias_variable([deconv_shape2[3].value], name="b_t2") conv_t2 = utils.conv2d_transpose_strided(fuse_1, W_t2, b_t2, output_shape=tf.shape(self.image_net["pool3"])) fuse_2 = tf.add(conv_t2, self.image_net["pool3"], name="fuse_2") shape = tf.shape(image) deconv_shape3 = tf.stack([shape[0], shape[1], shape[2], self.n_classes]) W_t3 = utils.weight_variable([16, 16, self.n_classes, deconv_shape2[3].value], name="W_t3") b_t3 = utils.bias_variable([self.n_classes], name="b_t3") conv_t3 = utils.conv2d_transpose_strided(fuse_2, W_t3, b_t3, output_shape=deconv_shape3, stride=8) annotation_pred = tf.argmax(conv_t3, dimension=3, name="prediction") return tf.expand_dims(annotation_pred, dim=3), conv_t3
def build_pose_layers(self, image, keep_prob, rois): TRUNCATE = 0 with tf.variable_scope("pose"): shape6 = tf.shape(self.image_net["conv5_3"]) deconv_shape6 = tf.stack([shape6[0], 56, 56, 512]) W_6 = utils.weight_variable([4, 4, 512, 512], name="W_6") b_6 = utils.bias_variable([512], name="b_6") conv_6 = utils.conv2d_transpose_strided(self.image_net["conv5_3"], W_6, b_6, output_shape=deconv_shape6, stride=4) roi_layer6 = ROIPoolingLayer(self.roi_pool_h, self.roi_pool_w) pooled_features6 = roi_layer6([conv_6, rois]) pooled_features6 = tf.nn.dropout(pooled_features6, keep_prob=keep_prob) shape7 = tf.shape(self.image_net["conv4_3"]) deconv_shape7 = tf.stack([shape7[0], 56, 56, 512]) W_7 = utils.weight_variable([2, 2, 512, 512], name="W_7") b_7 = utils.bias_variable([512], name="b_7") conv_7 = utils.conv2d_transpose_strided(self.image_net["conv4_3"], W_7, b_7, output_shape=deconv_shape7, stride=2) roi_layer7 = ROIPoolingLayer(self.roi_pool_h, self.roi_pool_w) pooled_features7 = roi_layer7([conv_7, rois]) pooled_features7 = tf.nn.dropout(pooled_features7, keep_prob=keep_prob) roi_add8 = tf.keras.layers.Add()([pooled_features6, pooled_features7]) roi_add9 = tf.reduce_sum(roi_add8, axis=1) shape = roi_add9.get_shape().as_list() dim = 1 for d in shape[1:]: dim *= d roi_add9 = tf.reshape(roi_add9, [-1, dim]) fc9_w = tf.reshape(self.vgg_fc["fc6"][0], [dim, 4096]) fc9 = tf.nn.bias_add(tf.matmul(roi_add9, fc9_w), self.vgg_fc["fc6"][1]) fc_dropout9 = tf.nn.dropout(fc9, keep_prob=keep_prob) fc10_w = tf.reshape(self.vgg_fc["fc7"][0], [4096, 4096]) fc10 = tf.nn.bias_add(tf.matmul(fc_dropout9, fc10_w), self.vgg_fc["fc7"][1]) fc_dropout10 = tf.nn.dropout(fc10, keep_prob=keep_prob) W11 = utils.weight_variable([4096, 4 * (self.n_classes - 1)], name="W11") b11 = utils.bias_variable([4 * (self.n_classes - 1)], name="b11") fc11 = tf.nn.bias_add(tf.matmul(fc_dropout10, W11), b11) tanh11 = tf.math.tanh(fc11) return tanh11
def inference(image, keep_prob): """ Semantic segmentation network definition :param image: input image. Should have values in range 0-255 :param keep_prob: :return: """ print("setting up vgg initialized conv layers ...") model_data = utils.get_model_data(FLAGS.model_dir, MODEL_URL) mean = model_data['normalization'][0][0][0] mean_pixel = np.mean(mean, axis=(0, 1)) weights = np.squeeze(model_data['layers']) processed_image = utils.process_image(image, mean_pixel) with tf.variable_scope("inference"): vgg_end_layer = 'conv4_4' image_net = vgg_net(weights, processed_image, end_layer=vgg_end_layer) conv_final_layer = image_net[vgg_end_layer] dropout = tf.nn.dropout(conv_final_layer, keep_prob=keep_prob) W_final = utils.weight_variable([1, 1, 512, NUM_OF_CLASSES], name="W_final") b_final = utils.bias_variable([NUM_OF_CLASSES], name="b_final") conv_final = utils.conv2d_basic(dropout, W_final, b_final) if FLAGS.debug: utils.add_activation_summary(conv_final) # now to upscale to actual image size deconv_shape2 = image_net["pool2"].get_shape() W_t2 = utils.weight_variable([4, 4, deconv_shape2[3].value, NUM_OF_CLASSES], name="W_t2") b_t2 = utils.bias_variable([deconv_shape2[3].value], name="b_t2") conv_t2 = utils.conv2d_transpose_strided(conv_final, W_t2, b_t2, output_shape=tf.shape(image_net["pool2"])) fuse_2 = tf.add(conv_t2, image_net["pool2"], name="fuse_2") shape = tf.shape(image) deconv_shape3 = tf.stack([shape[0], shape[1], shape[2], NUM_OF_CLASSES]) W_t3 = utils.weight_variable([8, 8, NUM_OF_CLASSES, deconv_shape2[3].value], name="W_t3") b_t3 = utils.bias_variable([NUM_OF_CLASSES], name="b_t3") conv_t3 = utils.conv2d_transpose_strided(fuse_2, W_t3, b_t3, output_shape=deconv_shape3, stride=4) annotation_pred = tf.argmax(conv_t3, axis=3, name="prediction", output_type=tf.int32) return tf.expand_dims(annotation_pred, axis=3), conv_t3
def inference_strided(input_image): W1 = utils.weight_variable([9, 9, 3, 32]) b1 = utils.bias_variable([32]) tf.histogram_summary("W1", W1) tf.histogram_summary("b1", b1) h_conv1 = tf.nn.relu(utils.conv2d_basic(input_image, W1, b1)) W2 = utils.weight_variable([3, 3, 32, 64]) b2 = utils.bias_variable([64]) tf.histogram_summary("W2", W2) tf.histogram_summary("b2", b2) h_conv2 = tf.nn.relu(utils.conv2d_strided(h_conv1, W2, b2)) W3 = utils.weight_variable([3, 3, 64, 128]) b3 = utils.bias_variable([128]) tf.histogram_summary("W3", W3) tf.histogram_summary("b3", b3) h_conv3 = tf.nn.relu(utils.conv2d_strided(h_conv2, W3, b3)) # upstrides W4 = utils.weight_variable([3, 3, 64, 128]) b4 = utils.bias_variable([64]) tf.histogram_summary("W4", W4) tf.histogram_summary("b4", b4) # print h_conv3.get_shape() # print W4.get_shape() h_conv4 = tf.nn.relu(utils.conv2d_transpose_strided(h_conv3, W4, b4)) W5 = utils.weight_variable([3, 3, 32, 64]) b5 = utils.bias_variable([32]) tf.histogram_summary("W5", W5) tf.histogram_summary("b5", b5) h_conv5 = tf.nn.relu(utils.conv2d_transpose_strided(h_conv4, W5, b5)) W6 = utils.weight_variable([9, 9, 32, 3]) b6 = utils.bias_variable([3]) tf.histogram_summary("W6", W6) tf.histogram_summary("b6", b6) pred_image = tf.nn.tanh(utils.conv2d_basic(h_conv5, W6, b6)) return pred_image
def deconv_layer(input, r_field, in_channels, out_channels, out_shape, nr, stride=2): W = utils.weight_variable([r_field, r_field, out_channels, in_channels], name="W_t" + nr) b = utils.bias_variable([out_channels], name="b_t" + nr) conv_t1 = utils.conv2d_transpose_strided(input, W, b, out_shape) return conv_t1
def deconv2d_layer(x, name, W_s, output_shape=None, stride=2): '''Deconv2d operator Args: x: inputs W_s: shape of weight output_shape: shape after deconv2d ''' W_t = utils.weight_variable(W_s, name='W_' + name) b_t = utils.bias_variable([W_s[2]], name='b_' + name) conv_t = utils.conv2d_transpose_strided(x, W_t, b_t, output_shape, stride) print('conv_%s: ' % name, conv_t.get_shape()) return conv_t
def inpainter(embedding, train_mode): with tf.variable_scope("context_inpainter"): image_size = IMAGE_SIZE // 32 with tf.name_scope("dec_fc") as scope: W_fc = utils.weight_variable([1024, image_size * image_size * 512], name="W_fc") b_fc = utils.bias_variable([image_size * image_size * 512], name="b_fc") h_fc = tf.nn.relu(tf.matmul(embedding, W_fc) + b_fc) with tf.name_scope("dec_conv1") as scope: h_reshaped = tf.reshape(h_fc, tf.pack([tf.shape(h_fc)[0], image_size, image_size, 512])) W_conv_t1 = utils.weight_variable_xavier_initialized([3, 3, 256, 512], name="W_conv_t1") b_conv_t1 = utils.bias_variable([256], name="b_conv_t1") deconv_shape = tf.pack([tf.shape(h_reshaped)[0], 2 * image_size, 2 * image_size, 256]) h_conv_t1 = utils.conv2d_transpose_strided(h_reshaped, W_conv_t1, b_conv_t1, output_shape=deconv_shape) h_bn_t1 = utils.batch_norm(h_conv_t1, 256, train_mode, scope="conv_t1_bn") h_relu_t1 = tf.nn.relu(h_bn_t1) with tf.name_scope("dec_conv2") as scope: W_conv_t2 = utils.weight_variable_xavier_initialized([3, 3, 128, 256], name="W_conv_t2") b_conv_t2 = utils.bias_variable([128], name="b_conv_t2") deconv_shape = tf.pack([tf.shape(h_relu_t1)[0], 4 * image_size, 4 * image_size, 128]) h_conv_t2 = utils.conv2d_transpose_strided(h_relu_t1, W_conv_t2, b_conv_t2, output_shape=deconv_shape) h_bn_t2 = utils.batch_norm(h_conv_t2, 128, train_mode, scope="conv_t2_bn") h_relu_t2 = tf.nn.relu(h_bn_t2) with tf.name_scope("dec_conv3") as scope: W_conv_t3 = utils.weight_variable_xavier_initialized([3, 3, 64, 128], name="W_conv_t3") b_conv_t3 = utils.bias_variable([64], name="b_conv_t3") deconv_shape = tf.pack([tf.shape(h_relu_t2)[0], 8 * image_size, 8 * image_size, 64]) h_conv_t3 = utils.conv2d_transpose_strided(h_relu_t2, W_conv_t3, b_conv_t3, output_shape=deconv_shape) h_bn_t3 = utils.batch_norm(h_conv_t3, 64, train_mode, scope="conv_t3_bn") h_relu_t3 = tf.nn.relu(h_bn_t3) with tf.name_scope("dec_conv4") as scope: W_conv_t4 = utils.weight_variable_xavier_initialized([3, 3, 3, 64], name="W_conv_t4") b_conv_t4 = utils.bias_variable([3], name="b_conv_t4") deconv_shape = tf.pack([tf.shape(h_relu_t3)[0], 16 * image_size, 16 * image_size, 3]) pred_image = utils.conv2d_transpose_strided(h_relu_t3, W_conv_t4, b_conv_t4, output_shape=deconv_shape) return pred_image
def decoder_conv(embedding): image_size = IMAGE_SIZE // 16 with tf.name_scope("dec_fc") as scope: W_fc1 = utils.weight_variable([512, image_size * image_size * 256], name="W_fc1") b_fc1 = utils.bias_variable([image_size * image_size * 256], name="b_fc1") h_fc1 = tf.nn.relu(tf.matmul(embedding, W_fc1) + b_fc1) with tf.name_scope("dec_conv1") as scope: h_reshaped = tf.reshape(h_fc1, tf.pack([tf.shape(h_fc1)[0], image_size, image_size, 256])) W_conv_t1 = utils.weight_variable([3, 3, 128, 256], name="W_conv_t1") b_conv_t1 = utils.bias_variable([128], name="b_conv_t1") deconv_shape = tf.pack([tf.shape(h_fc1)[0], 2 * image_size, 2 * image_size, 128]) h_conv_t1 = tf.nn.relu( utils.conv2d_transpose_strided(h_reshaped, W_conv_t1, b_conv_t1, output_shape=deconv_shape)) with tf.name_scope("dec_conv2") as scope: W_conv_t2 = utils.weight_variable([3, 3, 64, 128], name="W_conv_t2") b_conv_t2 = utils.bias_variable([64], name="b_conv_t2") deconv_shape = tf.pack([tf.shape(h_conv_t1)[0], 4 * image_size, 4 * image_size, 64]) h_conv_t2 = tf.nn.relu( utils.conv2d_transpose_strided(h_conv_t1, W_conv_t2, b_conv_t2, output_shape=deconv_shape)) with tf.name_scope("dec_conv3") as scope: W_conv_t3 = utils.weight_variable([3, 3, 32, 64], name="W_conv_t3") b_conv_t3 = utils.bias_variable([32], name="b_conv_t3") deconv_shape = tf.pack([tf.shape(h_conv_t2)[0], 8 * image_size, 8 * image_size, 32]) h_conv_t3 = tf.nn.relu( utils.conv2d_transpose_strided(h_conv_t2, W_conv_t3, b_conv_t3, output_shape=deconv_shape)) with tf.name_scope("dec_conv4") as scope: W_conv_t4 = utils.weight_variable([3, 3, 3, 32], name="W_conv_t4") b_conv_t4 = utils.bias_variable([3], name="b_conv_t4") deconv_shape = tf.pack([tf.shape(h_conv_t3)[0], IMAGE_SIZE, IMAGE_SIZE, 3]) pred_image = utils.conv2d_transpose_strided(h_conv_t3, W_conv_t4, b_conv_t4, output_shape=deconv_shape) return pred_image
def inference(self, images, inference_name, channel, keep_prob): """ Semantic segmentation network definition :param image: input image. Should have values in range 0-255 :param keep_prob: :return: """ print("setting up vgg initialized conv layers ...") model_data = utils.get_model_data(cfgs.model_dir, MODEL_URL) mean = model_data['normalization'][0][0][0] mean_pixel = np.mean(mean) self.mean_ = mean_pixel weights = np.squeeze(model_data['layers']) processed_image = utils.process_image(images, mean_pixel) #with tf.variable_scope("inference"): with tf.variable_scope(inference_name): W1 = utils.weight_variable([3, 3, channel, 64], name="W1") b1 = utils.bias_variable([64], name="b1") #conv1 = utils.conv2d_basic(images, W1, b1) conv1 = utils.conv2d_basic(processed_image, W1, b1) relu1 = tf.nn.relu(conv1, name='relu1') #pretrain image_net = self.vgg_net(weights, relu1) conv_final_layer = image_net["conv5_3"] pool5 = utils.max_pool_2x2(conv_final_layer) W6 = utils.weight_variable([7, 7, 512, 4096], name="W6") b6 = utils.bias_variable([4096], name="b6") conv6 = utils.conv2d_basic(pool5, W6, b6) relu6 = tf.nn.relu(conv6, name="relu6") relu_dropout6 = tf.nn.dropout(relu6, keep_prob=keep_prob) W7 = utils.weight_variable([1, 1, 4096, 4096], name="W7") b7 = utils.bias_variable([4096], name="b7") conv7 = utils.conv2d_basic(relu_dropout6, W7, b7) relu7 = tf.nn.relu(conv7, name="relu7") ''' if cfgs.debug: utils.add_activation_summary(relu7)''' relu_dropout7 = tf.nn.dropout(relu7, keep_prob=keep_prob) W8 = utils.weight_variable([1, 1, 4096, NUM_OF_CLASSESS], name="W8") b8 = utils.bias_variable([NUM_OF_CLASSESS], name="b8") conv8 = utils.conv2d_basic(relu_dropout7, W8, b8) # annotation_pred1 = tf.argmax(conv8, dimension=3, name="prediction1") # now to upscale to actual image size deconv_shape1 = image_net["pool4"].get_shape() W_t1 = utils.weight_variable( [4, 4, deconv_shape1[3].value, NUM_OF_CLASSESS], name="W_t1") b_t1 = utils.bias_variable([deconv_shape1[3].value], name="b_t1") conv_t1 = utils.conv2d_transpose_strided(conv8, W_t1, b_t1, output_shape=tf.shape( image_net["pool4"])) fuse_1 = tf.add(conv_t1, image_net["pool4"], name="fuse_1") deconv_shape2 = image_net["pool3"].get_shape() W_t2 = utils.weight_variable( [4, 4, deconv_shape2[3].value, deconv_shape1[3].value], name="W_t2") b_t2 = utils.bias_variable([deconv_shape2[3].value], name="b_t2") conv_t2 = utils.conv2d_transpose_strided(fuse_1, W_t2, b_t2, output_shape=tf.shape( image_net["pool3"])) fuse_2 = tf.add(conv_t2, image_net["pool3"], name="fuse_2") shape = tf.shape(images) deconv_shape3 = tf.stack( [shape[0], shape[1], shape[2], NUM_OF_CLASSESS]) W_t3 = utils.weight_variable( [16, 16, NUM_OF_CLASSESS, deconv_shape2[3].value], name="W_t3") b_t3 = utils.bias_variable([NUM_OF_CLASSESS], name="b_t3") conv_t3 = utils.conv2d_transpose_strided( fuse_2, W_t3, b_t3, output_shape=deconv_shape3, stride=8) annotation_pred = tf.argmax(conv_t3, dimension=3, name="prediction") #self.pred_annotation = tf.expand_dims(annotation_pred, dim=3) #self.logits = conv_t3 return conv_t3
def segment(image, keep_prob_conv, input_channels, output_channels, scope): with tf.variable_scope(scope): ############### # downsample # ############### # build the second layer; input_size = 224, output_size = 112 W2 = utils.weight_variable([3, 3, input_channels, 64], name="W2") b2 = utils.bias_variable([64], name="b2") conv2 = utils.conv2d_basic(image, W2, b2, name="conv2") relu2 = tf.nn.relu(conv2, name="relu2") pool2 = utils.max_pool_2x2(relu2) dropout2 = tf.nn.dropout(pool2, keep_prob=keep_prob_conv) # build the third layer; input_size = 112, output_size = 56 W3 = utils.weight_variable([3, 3, 64, 128], name="W3") b3 = utils.bias_variable([128], name="b3") conv3 = utils.conv2d_basic(dropout2, W3, b3, name="conv3") relu3 = tf.nn.relu(conv3, name="relu3") pool3 = utils.max_pool_2x2(relu3) dropout3 = tf.nn.dropout(pool3, keep_prob=keep_prob_conv) # build the fourth layer; input_size = 56, output_size = 28 W4 = utils.weight_variable([3, 3, 128, 256], name="W4") b4 = utils.bias_variable([256], name="b4") conv4 = utils.conv2d_basic(dropout3, W4, b4, name="conv4") relu4 = tf.nn.relu(conv4, name="relu4") pool4 = utils.max_pool_2x2(relu4) dropout4 = tf.nn.dropout(pool4, keep_prob=keep_prob_conv) # build the fifth layer; input_size = 28, output_size = 14 W5 = utils.weight_variable([3, 3, 256, 512], name="W5") b5 = utils.bias_variable([512], name="b5") conv5 = utils.conv2d_basic(dropout4, W5, b5, name="conv5") relu5 = tf.nn.relu(conv5, name="relu5") pool5 = utils.max_pool_2x2(relu5) dropout5 = tf.nn.dropout(pool5, keep_prob=keep_prob_conv) # build the sixth layer; input_size = 14, output_size = 7 W6 = utils.weight_variable([3, 3, 512, 512], name="W6") b6 = utils.bias_variable([512], name="b6") conv6 = utils.conv2d_basic(dropout5, W6, b6, name="conv6") relu6 = tf.nn.relu(conv6, name="relu6") pool6 = utils.max_pool_2x2(relu6) dropout6 = tf.nn.dropout(pool6, keep_prob=keep_prob_conv) # build the seventh layer, input_size = 7, output_size = 7 W7 = utils.weight_variable([3, 3, 512, 4096], name="W7") b7 = utils.bias_variable([4096], name="b7") conv7 = utils.conv2d_basic(dropout6, W7, b7, name="conv7") ####################### # Upsample ####################### # do the upscaling using 2 fuse layers deconv_shape1 = pool5.get_shape() W_t1 = utils.weight_variable([4, 4, deconv_shape1[3].value, 4096], name="W_t1") b_t1 = utils.bias_variable([deconv_shape1[3].value], name="b_t1") conv_t1 = utils.conv2d_transpose_strided(conv7, W_t1, b_t1, output_shape=tf.shape(pool5)) fuse_1 = tf.add(conv_t1, pool5, name="fuse_1") deconv_shape2 = pool4.get_shape() W_t2 = utils.weight_variable( [4, 4, deconv_shape2[3].value, deconv_shape1[3].value], name="W_t2") b_t2 = utils.bias_variable([deconv_shape2[3].value], name="b_t2") conv_t2 = utils.conv2d_transpose_strided(fuse_1, W_t2, b_t2, output_shape=tf.shape(pool4)) fuse_2 = tf.add(conv_t2, pool4, name="fuse_2") deconv_shape3 = pool3.get_shape() W_t3 = utils.weight_variable( [4, 4, deconv_shape3[3].value, deconv_shape2[3].value], name="W_t3") b_t3 = utils.bias_variable([deconv_shape3[3].value], name="b_t3") conv_t3 = utils.conv2d_transpose_strided(fuse_2, W_t3, b_t3, output_shape=tf.shape(pool3)) fuse_3 = tf.add(conv_t3, pool3, name="fuse_3") deconv_shape4 = pool2.get_shape() W_t4 = utils.weight_variable( [4, 4, deconv_shape4[3].value, deconv_shape3[3].value], name="W_t4") b_t4 = utils.bias_variable([deconv_shape4[3].value], name="b_t4") conv_t4 = utils.conv2d_transpose_strided(fuse_3, W_t4, b_t4, output_shape=tf.shape(pool2)) fuse_4 = tf.add(conv_t4, pool2, name="fuse_4") # do the final upscaling shape = tf.shape(image) deconv_shape5 = tf.stack( [shape[0], shape[1], shape[2], output_channels]) W_t5 = utils.weight_variable( [16, 16, output_channels, deconv_shape4[3].value], name="W_t5") b_t5 = utils.bias_variable([output_channels], name="b_t5") conv_t5 = utils.conv2d_transpose_strided(fuse_4, W_t5, b_t5, output_shape=deconv_shape5, stride=2) annotation_pred = tf.argmax(conv_t5, dimension=3, name="prediction") return tf.expand_dims(annotation_pred, dim=3), conv_t5
def __init__(self, image, mask): #conv1 with tf.variable_scope('conv1'): W_conv1 = weight_variable([3, 3, 3, 8]) b_conv1 = bias_variable([8]) h_conv1 = tf.nn.relu(conv2d(image, W_conv1) + b_conv1) h_pool1 = max_pool_2x2(h_conv1) # conv2 with tf.variable_scope('conv2'): W_conv2 = weight_variable([3, 3, 8, 16]) b_conv2 = bias_variable([16]) h_conv2 = tf.nn.relu(conv2d(h_pool1, W_conv2) + b_conv2) h_pool2 = max_pool_2x2(h_conv2) # conv3 with tf.variable_scope('conv3'): W_conv3 = weight_variable([3, 3, 16, 32]) b_conv3 = bias_variable([32]) h_conv3 = tf.nn.relu(conv2d(h_pool2, W_conv3) + b_conv3) h_pool3 = max_pool_2x2(h_conv3) # conv4 with tf.variable_scope('conv4'): W_conv4 = weight_variable([3, 3, 32, 64]) b_conv4 = bias_variable([64]) h_conv4 = tf.nn.relu(conv2d(h_pool3, W_conv4) + b_conv4) h_pool4 = max_pool_2x2(h_conv4) # conv5 with tf.variable_scope('conv5'): W_conv5 = weight_variable([3, 3, 64, 128]) b_conv5 = bias_variable([128]) h_conv5 = (conv2d(h_pool4, W_conv5) + b_conv5) h_pool5 = max_pool_2x2(h_conv5) #Upscale with tf.variable_scope('tconv4'): W_t1 = utils.weight_variable([ 4, 4, h_pool4.get_shape()[3].value, h_pool5.get_shape()[3].value ], name="W_t1") b_t1 = utils.bias_variable([h_pool4.get_shape()[3].value], name="b_t1") conv_t1 = utils.conv2d_transpose_strided( h_pool5, W_t1, b_t1, output_shape=tf.shape( h_pool4)) #conv_t1 should have shape of h_pool4 fuse_1 = tf.add( conv_t1, h_pool4, name="fuse_1") #fuse_1 matches shape matches of h_pool4 with tf.variable_scope('tconv3'): W_t2 = utils.weight_variable([ 4, 4, h_pool3.get_shape()[3].value, h_pool4.get_shape()[3].value ], name="W_t2") b_t2 = utils.bias_variable([h_pool3.get_shape()[3].value], name="b_t2") conv_t2 = utils.conv2d_transpose_strided( fuse_1, W_t2, b_t2, output_shape=tf.shape(h_pool3)) fuse_2 = tf.add(conv_t2, h_pool3, name="fuse_2") # fuse_2 should match h_pool3 shape with tf.variable_scope('tconv2'): W_t3 = utils.weight_variable([ 4, 4, h_pool2.get_shape()[3].value, h_pool3.get_shape()[3].value ], name="W_t3") b_t3 = utils.bias_variable([h_pool2.get_shape()[3].value], name="b_t3") conv_t3 = utils.conv2d_transpose_strided( fuse_2, W_t3, b_t3, output_shape=tf.shape(h_pool2)) fuse_3 = tf.add(conv_t3, h_pool2, name="fuse_3") # fuse_3 should match h_pool2 shape with tf.variable_scope('tconv1'): W_t4 = utils.weight_variable([ 4, 4, h_pool1.get_shape()[3].value, h_pool2.get_shape()[3].value ], name="W_t4") b_t4 = utils.bias_variable([h_pool1.get_shape()[3].value], name="b_t4") conv_t4 = utils.conv2d_transpose_strided( fuse_3, W_t4, b_t4, output_shape=tf.shape(h_pool1)) fuse_4 = tf.add(conv_t4, h_pool1, name="fuse_4") # fuse_4 should match h_pool1 shape with tf.variable_scope('tconv0'): output_shape = tf.stack([ tf.shape(image)[0], tf.shape(image)[1], tf.shape(image)[2], NUM_OF_CLASSES ]) W_t5 = utils.weight_variable( [4, 4, NUM_OF_CLASSES, fuse_4.get_shape()[3].value], name="W_t5") b_t5 = utils.bias_variable([NUM_OF_CLASSES], name="b_t5") conv_t5 = utils.conv2d_transpose_strided(fuse_4, W_t5, b_t5, output_shape=output_shape) annotation_pred = tf.argmax(conv_t5, dimension=3, name="prediction") self.predictions = tf.expand_dims(annotation_pred, dim=3) self.last_conv_layer = conv_t5 self.softmax = tf.nn.softmax(conv_t5) self.original_image = image self.mask = mask self.loss = tf.reduce_mean( (tf.nn.sparse_softmax_cross_entropy_with_logits( logits=conv_t5, labels=tf.squeeze(mask, squeeze_dims=[3]), name="entropy"))) #self.accuracy = tf.metrics.accuracy( # self.mask, # self.predictions, #) self.accuracy = tf.metrics.mean_iou(self.mask, self.predictions, num_classes=2, weights=None, metrics_collections=None, updates_collections=None, name=None)
def u_net(image, phase_train): with tf.variable_scope("u_net"): w1_1 = utils.weight_variable([3, 3, int(image.shape[3]), 32], name="w1_1") b1_1 = utils.bias_variable([32], name="b1_1") conv1_1 = utils.conv2d_basic(image, w1_1, b1_1) relu1_1 = tf.nn.relu(conv1_1, name="relu1_1") w1_2 = utils.weight_variable([3, 3, 32, 32], name="w1_2") b1_2 = utils.bias_variable([32], name="b1_2") conv1_2 = utils.conv2d_basic(relu1_1, w1_2, b1_2) relu1_2 = tf.nn.relu(conv1_2, name="relu1_2") pool1 = utils.max_pool_2x2(relu1_2) bn1 = utils.batch_norm(pool1, pool1.get_shape()[3], phase_train, scope="bn1") w2_1 = utils.weight_variable([3, 3, 32, 64], name="w2_1") b2_1 = utils.bias_variable([64], name="b2_1") conv2_1 = utils.conv2d_basic(bn1, w2_1, b2_1) relu2_1 = tf.nn.relu(conv2_1, name="relu2_1") w2_2 = utils.weight_variable([3, 3, 64, 64], name="w2_2") b2_2 = utils.bias_variable([64], name="b2_2") conv2_2 = utils.conv2d_basic(relu2_1, w2_2, b2_2) relu2_2 = tf.nn.relu(conv2_2, name="relu2_2") pool2 = utils.max_pool_2x2(relu2_2) bn2 = utils.batch_norm(pool2, pool2.get_shape()[3], phase_train, scope="bn2") w3_1 = utils.weight_variable([3, 3, 64, 128], name="w3_1") b3_1 = utils.bias_variable([128], name="b3_1") conv3_1 = utils.conv2d_basic(bn2, w3_1, b3_1) relu3_1 = tf.nn.relu(conv3_1, name="relu3_1") w3_2 = utils.weight_variable([3, 3, 128, 128], name="w3_2") b3_2 = utils.bias_variable([128], name="b3_2") conv3_2 = utils.conv2d_basic(relu3_1, w3_2, b3_2) relu3_2 = tf.nn.relu(conv3_2, name="relu3_2") pool3 = utils.max_pool_2x2(relu3_2) bn3 = utils.batch_norm(pool3, pool3.get_shape()[3], phase_train, scope="bn3") w4_1 = utils.weight_variable([3, 3, 128, 256], name="w4_1") b4_1 = utils.bias_variable([256], name="b4_1") conv4_1 = utils.conv2d_basic(bn3, w4_1, b4_1) relu4_1 = tf.nn.relu(conv4_1, name="relu4_1") w4_2 = utils.weight_variable([3, 3, 256, 256], name="w4_2") b4_2 = utils.bias_variable([256], name="b4_2") conv4_2 = utils.conv2d_basic(relu4_1, w4_2, b4_2) relu4_2 = tf.nn.relu(conv4_2, name="relu4_2") pool4 = utils.max_pool_2x2(relu4_2) bn4 = utils.batch_norm(pool4, pool4.get_shape()[3], phase_train, scope="bn4") w5_1 = utils.weight_variable([3, 3, 256, 512], name="w5_1") b5_1 = utils.bias_variable([512], name="b5_1") conv5_1 = utils.conv2d_basic(bn4, w5_1, b5_1) relu5_1 = tf.nn.relu(conv5_1, name="relu5_1") w5_2 = utils.weight_variable([3, 3, 512, 512], name="w5_2") b5_2 = utils.bias_variable([512], name="b5_2") conv5_2 = utils.conv2d_basic(relu5_1, w5_2, b5_2) relu5_2 = tf.nn.relu(conv5_2, name="relu5_2") bn5 = utils.batch_norm(relu5_2, relu5_2.get_shape()[3], phase_train, scope="bn5") ###up6 W_t1 = utils.weight_variable([2, 2, 256, 512], name="W_t1") b_t1 = utils.bias_variable([256], name="b_t1") conv_t1 = utils.conv2d_transpose_strided( bn5, W_t1, b_t1, output_shape=tf.shape(relu4_2)) merge1 = tf.concat([conv_t1, relu4_2], 3) w6_1 = utils.weight_variable([3, 3, 512, 256], name="w6_1") b6_1 = utils.bias_variable([256], name="b6_1") conv6_1 = utils.conv2d_basic(merge1, w6_1, b6_1) relu6_1 = tf.nn.relu(conv6_1, name="relu6_1") w6_2 = utils.weight_variable([3, 3, 256, 256], name="w6_2") b6_2 = utils.bias_variable([256], name="b6_2") conv6_2 = utils.conv2d_basic(relu6_1, w6_2, b6_2) relu6_2 = tf.nn.relu(conv6_2, name="relu6_2") bn6 = utils.batch_norm(relu6_2, relu6_2.get_shape()[3], phase_train, scope="bn6") ###up7 W_t2 = utils.weight_variable([2, 2, 128, 256], name="W_t2") b_t2 = utils.bias_variable([128], name="b_t2") conv_t2 = utils.conv2d_transpose_strided( bn6, W_t2, b_t2, output_shape=tf.shape(relu3_2)) merge2 = tf.concat([conv_t2, relu3_2], 3) w7_1 = utils.weight_variable([3, 3, 256, 128], name="w7_1") b7_1 = utils.bias_variable([128], name="b7_1") conv7_1 = utils.conv2d_basic(merge2, w7_1, b7_1) relu7_1 = tf.nn.relu(conv7_1, name="relu7_1") w7_2 = utils.weight_variable([3, 3, 128, 128], name="w7_2") b7_2 = utils.bias_variable([128], name="b7_2") conv7_2 = utils.conv2d_basic(relu7_1, w7_2, b7_2) relu7_2 = tf.nn.relu(conv7_2, name="relu7_2") bn7 = utils.batch_norm(relu7_2, relu7_2.get_shape()[3], phase_train, scope="bn7") ###up8 W_t3 = utils.weight_variable([2, 2, 64, 128], name="W_t3") b_t3 = utils.bias_variable([64], name="b_t3") conv_t3 = utils.conv2d_transpose_strided( bn7, W_t3, b_t3, output_shape=tf.shape(relu2_2)) merge3 = tf.concat([conv_t3, relu2_2], 3) w8_1 = utils.weight_variable([3, 3, 128, 64], name="w8_1") b8_1 = utils.bias_variable([64], name="b8_1") conv8_1 = utils.conv2d_basic(merge3, w8_1, b8_1) relu8_1 = tf.nn.relu(conv8_1, name="relu8_1") w8_2 = utils.weight_variable([3, 3, 64, 64], name="w8_2") b8_2 = utils.bias_variable([64], name="b8_2") conv8_2 = utils.conv2d_basic(relu8_1, w8_2, b8_2) relu8_2 = tf.nn.relu(conv8_2, name="relu8_2") bn8 = utils.batch_norm(relu8_2, relu8_2.get_shape()[3], phase_train, scope="bn8") ###up9 W_t4 = utils.weight_variable([2, 2, 32, 64], name="W_t4") b_t4 = utils.bias_variable([32], name="b_t4") conv_t4 = utils.conv2d_transpose_strided( bn8, W_t4, b_t4, output_shape=tf.shape(relu1_2)) merge4 = tf.concat([conv_t4, relu1_2], 3) w9_1 = utils.weight_variable([3, 3, 64, 32], name="w9_1") b9_1 = utils.bias_variable([32], name="b9_1") conv9_1 = utils.conv2d_basic(merge4, w9_1, b9_1) relu9_1 = tf.nn.relu(conv9_1, name="relu9_1") w9_2 = utils.weight_variable([3, 3, 32, 32], name="w9_2") b9_2 = utils.bias_variable([32], name="b9_2") conv9_2 = utils.conv2d_basic(relu9_1, w9_2, b9_2) relu9_2 = tf.nn.relu(conv9_2, name="relu9_2") bn9 = utils.batch_norm(relu9_2, relu9_2.get_shape()[3], phase_train, scope="bn9") ###output scoreMap w10 = utils.weight_variable([1, 1, 32, NUM_OF_CLASSESS], name="w10") b10 = utils.bias_variable([NUM_OF_CLASSESS], name="b10") conv10 = utils.conv2d_basic(bn9, w10, b10) annotation_pred = tf.argmax(conv10, dimension=3, name="prediction") return annotation_pred, conv10
def inference(self, image, keep_prob): """ Semantic segmentation network definition :param image: input image. Should have values in range 0-255 :param keep_prob: :return: """ print("setting up vgg initialized conv layers ...") model_data = utils.get_model_data(self.model_dir, MODEL_URL) mean = model_data['normalization'][0][0][0] mean_pixel = np.mean(mean, axis=(0, 1)) weights = np.squeeze(model_data['layers']) processed_image = utils.process_image(image, mean_pixel) with tf.variable_scope("inference"): image_net = self.vgg_net(weights, processed_image) conv_final_layer = image_net["conv5_3"] pool5 = utils.max_pool_2x2(conv_final_layer) with tf.variable_scope("FCN"): W6 = utils.weight_variable([7, 7, 512, 4096], name="W6") b6 = utils.bias_variable([4096], name="b6") conv6 = utils.conv2d_basic(pool5, W6, b6) relu6 = tf.nn.relu(conv6, name="relu6") relu_dropout6 = tf.nn.dropout(relu6, keep_prob=keep_prob) W7 = utils.weight_variable([1, 1, 4096, 4096], name="W7") b7 = utils.bias_variable([4096], name="b7") conv7 = utils.conv2d_basic(relu_dropout6, W7, b7) relu7 = tf.nn.relu(conv7, name="relu7") relu_dropout7 = tf.nn.dropout(relu7, keep_prob=keep_prob) W8 = utils.weight_variable([1, 1, 4096, NUM_OF_CLASSES], name="W8") b8 = utils.bias_variable([NUM_OF_CLASSES], name="b8") conv8 = utils.conv2d_basic(relu_dropout7, W8, b8) # now to upscale to actual image size deconv_shape1 = image_net["pool4"].get_shape() W_t1 = utils.weight_variable( [4, 4, deconv_shape1[3].value, NUM_OF_CLASSES], name="W_t1") b_t1 = utils.bias_variable([deconv_shape1[3].value], name="b_t1") conv_t1 = utils.conv2d_transpose_strided(conv8, W_t1, b_t1, output_shape=tf.shape( image_net["pool4"])) fuse_1 = tf.add(conv_t1, image_net["pool4"], name="fuse_1") deconv_shape2 = image_net["pool3"].get_shape() W_t2 = utils.weight_variable( [4, 4, deconv_shape2[3].value, deconv_shape1[3].value], name="W_t2") b_t2 = utils.bias_variable([deconv_shape2[3].value], name="b_t2") conv_t2 = utils.conv2d_transpose_strided(fuse_1, W_t2, b_t2, output_shape=tf.shape( image_net["pool3"])) fuse_2 = tf.add(conv_t2, image_net["pool3"], name="fuse_2") shape = tf.shape(image) deconv_shape3 = tf.stack( [shape[0], shape[1], shape[2], NUM_OF_CLASSES]) W_t3 = utils.weight_variable( [16, 16, NUM_OF_CLASSES, deconv_shape2[3].value], name="W_t3") b_t3 = utils.bias_variable([NUM_OF_CLASSES], name="b_t3") conv_t3 = utils.conv2d_transpose_strided( fuse_2, W_t3, b_t3, output_shape=deconv_shape3, stride=8) annotation_pred = tf.argmax(conv_t3, dimension=3, name="prediction") print("anno, conv_t3 shape", tf.shape(annotation_pred), tf.shape(conv_t3)) return tf.expand_dims(annotation_pred, dim=3), conv_t3
def inference(image, keep_prob): """ FCN 그래프 구조 정의 arguments: image: 인풋 이미지 0-255 사이의 값을 가지고 있어야합니다. keep_prob: 드롭아웃에서 드롭하지 않을 노드의 비율 """ # 다운로드 받은 VGGNet을 불러옵니다. print("setting up vgg initialized conv layers ...") model_data = utils.get_model_data(FLAGS.model_dir, MODEL_URL) mean = model_data['normalization'][0][0][0] mean_pixel = np.mean(mean, axis=(0, 1)) weights = np.squeeze(model_data['layers']) # 이미지에 Mean Normalization을 수행합니다. processed_image = utils.process_image(image, mean_pixel) with tf.variable_scope("inference"): image_net = vgg_net(weights, processed_image) # VGGNet의 conv5(conv5_3) 레이어를 불러옵니다. conv_final_layer = image_net["conv5_3"] # pool5를 정의합니다. pool5 = utils.max_pool_2x2(conv_final_layer) # conv6을 정의합니다. W6 = utils.weight_variable([7, 7, 512, 4096], name="W6") b6 = utils.bias_variable([4096], name="b6") conv6 = utils.conv2d_basic(pool5, W6, b6) relu6 = tf.nn.relu(conv6, name="relu6") relu_dropout6 = tf.nn.dropout(relu6, keep_prob=keep_prob) # conv7을 정의합니다. (1x1 conv) W7 = utils.weight_variable([1, 1, 4096, 4096], name="W7") b7 = utils.bias_variable([4096], name="b7") conv7 = utils.conv2d_basic(relu_dropout6, W7, b7) relu7 = tf.nn.relu(conv7, name="relu7") relu_dropout7 = tf.nn.dropout(relu7, keep_prob=keep_prob) # conv8을 정의합니다. (1x1 conv) W8 = utils.weight_variable([1, 1, 4096, NUM_OF_CLASSESS], name="W8") b8 = utils.bias_variable([NUM_OF_CLASSESS], name="b8") conv8 = utils.conv2d_basic(relu_dropout7, W8, b8) # FCN-8s를 위한 Skip Layers Fusion을 설정합니다. # 이제 원본 이미지 크기로 Upsampling하기 위한 deconv 레이어를 정의합니다. deconv_shape1 = image_net["pool4"].get_shape() W_t1 = utils.weight_variable( [4, 4, deconv_shape1[3].value, NUM_OF_CLASSESS], name="W_t1") b_t1 = utils.bias_variable([deconv_shape1[3].value], name="b_t1") # conv8의 이미지를 2배 확대합니다. conv_t1 = utils.conv2d_transpose_strided(conv8, W_t1, b_t1, output_shape=tf.shape( image_net["pool4"])) # 2x conv8과 pool4를 더해 fuse_1 이미지를 만듭니다. fuse_1 = tf.add(conv_t1, image_net["pool4"], name="fuse_1") deconv_shape2 = image_net["pool3"].get_shape() W_t2 = utils.weight_variable( [4, 4, deconv_shape2[3].value, deconv_shape1[3].value], name="W_t2") b_t2 = utils.bias_variable([deconv_shape2[3].value], name="b_t2") # fuse_1 이미지를 2배 확대합니다. conv_t2 = utils.conv2d_transpose_strided(fuse_1, W_t2, b_t2, output_shape=tf.shape( image_net["pool3"])) # 2x fuse_1과 pool3를 더해 fuse_2 이미지를 만듭니다. fuse_2 = tf.add(conv_t2, image_net["pool3"], name="fuse_2") shape = tf.shape(image) deconv_shape3 = tf.stack( [shape[0], shape[1], shape[2], NUM_OF_CLASSESS]) W_t3 = utils.weight_variable( [16, 16, NUM_OF_CLASSESS, deconv_shape2[3].value], name="W_t3") b_t3 = utils.bias_variable([NUM_OF_CLASSESS], name="b_t3") # fuse_2 이미지를 8배 확대합니다. conv_t3 = utils.conv2d_transpose_strided(fuse_2, W_t3, b_t3, output_shape=deconv_shape3, stride=8) # 최종 prediction 결과를 결정하기 위해 마지막 activation들 중에서 argmax로 최대값을 가진 activation을 추출합니다. annotation_pred = tf.argmax(conv_t3, dimension=3, name="prediction") return tf.expand_dims(annotation_pred, dim=3), conv_t3
def inference(image, keep_prob): #IMG_MEAN = np.array((104.00698793/255,116.66876762/255,122.67891434/255,146.01657/255), dtype=np.float32) #processed_image = utils.process_image(image, IMG_MEAN) with tf.variable_scope("seg_inference"): W1_1 = utils.weight_variable([3, 3, 3, 64], name="W1_1") b1_1 = utils.bias_variable([64], name="b1_1") conv1_1 = utils.conv2d_basic(image, W1_1, b1_1) relu1_1 = tf.nn.relu(conv1_1, name="relu1_1") W1_2 = utils.weight_variable([3, 3, 64, 64], name="W1_2") b1_2 = utils.bias_variable([64], name="b1_2") conv1_2 = utils.conv2d_basic(relu1_1, W1_2, b1_2) relu1_2 = tf.nn.relu(conv1_2, name="relu1_2") ra_1, ra_1_small = utils.RA_unit(relu1_2, relu1_2.shape[1].value, relu1_2.shape[2].value, 16) W_s1 = utils.weight_variable([3, 3, 64*(1+16), 64], name="W_s1") b_s1 = utils.bias_variable([64], name="b_s1") conv_s1 = utils.conv2d_basic(ra_1, W_s1, b_s1) relu_s1 = tf.nn.relu(conv_s1, name="relu_s1") pool1 = utils.max_pool_2x2(relu_s1) W2_1 = utils.weight_variable([3, 3, 64, 128], name="W2_1") b2_1 = utils.bias_variable([128], name="b2_1") conv2_1 = utils.conv2d_basic(pool1, W2_1, b2_1) relu2_1 = tf.nn.relu(conv2_1, name="relu2_1") W2_2 = utils.weight_variable([3, 3, 128, 128], name="W2_2") b2_2 = utils.bias_variable([128], name="b2_2") conv2_2 = utils.conv2d_basic(relu2_1, W2_2, b2_2) relu2_2 = tf.nn.relu(conv2_2, name="relu2_2") ra_2, ra_2_small = utils.RA_unit(relu2_2, relu2_2.shape[1].value, relu2_2.shape[2].value, 16) W_s2 = utils.weight_variable([3, 3, 128*(1+16), 128], name="W_s2") b_s2 = utils.bias_variable([128], name="b_s2") conv_s2 = utils.conv2d_basic(ra_2, W_s2, b_s2) relu_s2 = tf.nn.relu(conv_s2, name="relu_s2") pool2 = utils.max_pool_2x2(relu_s2) W3_1 = utils.weight_variable([3, 3, 128, 256], name="W3_1") b3_1 = utils.bias_variable([256], name="b3_1") conv3_1 = utils.conv2d_basic(pool2, W3_1, b3_1) relu3_1 = tf.nn.relu(conv3_1, name="relu3_1") W3_2 = utils.weight_variable([3, 3, 256, 256], name="W3_2") b3_2 = utils.bias_variable([256], name="b3_2") conv3_2 = utils.conv2d_basic(relu3_1, W3_2, b3_2) relu3_2 = tf.nn.relu(conv3_2, name="relu3_2") W3_3 = utils.weight_variable([3, 3, 256, 256], name="W3_3") b3_3 = utils.bias_variable([256], name="b3_3") conv3_3 = utils.conv2d_basic(relu3_2, W3_3, b3_3) relu3_3 = tf.nn.relu(conv3_3, name="relu3_3") ra_3, ra_3_small = utils.RA_unit(relu3_3, relu3_3.shape[1].value, relu3_3.shape[2].value, 16) W_s3 = utils.weight_variable([3, 3, 256*(1+16), 256], name="W_s3") b_s3 = utils.bias_variable([256], name="b_s3") conv_s3 = utils.conv2d_basic(ra_3, W_s3, b_s3) relu_s3 = tf.nn.relu(conv_s3, name="relu_s3") pool3 = utils.max_pool_2x2(relu_s3) W4_1 = utils.weight_variable([3, 3, 256, 512], name="W4_1") b4_1 = utils.bias_variable([512], name="b4_1") conv4_1 = utils.conv2d_basic(pool3, W4_1, b4_1) relu4_1 = tf.nn.relu(conv4_1, name="relu4_1") W4_2 = utils.weight_variable([3, 3, 512, 512], name="W4_2") b4_2 = utils.bias_variable([512], name="b4_2") conv4_2 = utils.conv2d_basic(relu4_1, W4_2, b4_2) relu4_2 = tf.nn.relu(conv4_2, name="relu4_2") W4_3 = utils.weight_variable([3, 3, 512, 512], name="W4_3") b4_3 = utils.bias_variable([512], name="b4_3") conv4_3 = utils.conv2d_basic(relu4_2, W4_3, b4_3) relu4_3 = tf.nn.relu(conv4_3, name="relu4_3") ra_4, ra_4_small = utils.RA_unit(relu4_3, relu4_3.shape[1].value, relu4_3.shape[2].value, 16) W_s4 = utils.weight_variable([3, 3, 512*(1+16), 512], name="W_s4") b_s4 = utils.bias_variable([512], name="b_s4") conv_s4 = utils.conv2d_basic(ra_4, W_s4, b_s4) relu_s4 = tf.nn.relu(conv_s4, name="relu_s4") pool4 = utils.max_pool_2x2(relu_s4) W5_1 = utils.weight_variable([3, 3, 512, 512], name="W5_1") b5_1 = utils.bias_variable([512], name="b5_1") conv5_1 = utils.conv2d_basic(pool4, W5_1, b5_1) relu5_1 = tf.nn.relu(conv5_1, name="relu5_1") W5_2 = utils.weight_variable([3, 3, 512, 512], name="W5_2") b5_2 = utils.bias_variable([512], name="b5_2") conv5_2 = utils.conv2d_basic(relu5_1, W5_2, b5_2) relu5_2 = tf.nn.relu(conv5_2, name="relu5_2") W5_3 = utils.weight_variable([3, 3, 512, 512], name="W5_3") b5_3 = utils.bias_variable([512], name="b5_3") conv5_3 = utils.conv2d_basic(relu5_2, W5_3, b5_3) relu5_3 = tf.nn.relu(conv5_3, name="relu5_3") ra_5, ra_5_small = utils.RA_unit(relu5_3, relu5_3.shape[1].value, relu5_3.shape[2].value, 8) W_s5 = utils.weight_variable([3, 3, 512*(1+8), 512], name="W_s5") b_s5 = utils.bias_variable([512], name="b_s5") conv_s5 = utils.conv2d_basic(ra_5, W_s5, b_s5) relu_s5 = tf.nn.relu(conv_s5, name="relu_s5") pool5 = utils.max_pool_2x2(relu_s5) W6 = utils.weight_variable([7, 7, pool5.shape[3].value, 4096], name="W6") b6 = utils.bias_variable([4096], name="b6") conv6 = utils.conv2d_basic(pool4, W6, b6) relu6 = tf.nn.relu(conv6, name="relu6") relu_dropout6 = tf.nn.dropout(relu6, keep_prob=keep_prob) W7 = utils.weight_variable([1, 1, 4096, 4096], name="W7") b7 = utils.bias_variable([4096], name="b7") conv7 = utils.conv2d_basic(relu_dropout6, W7, b7) relu7 = tf.nn.relu(conv7, name="relu7") relu_dropout7 = tf.nn.dropout(relu7, keep_prob=keep_prob) W8 = utils.weight_variable([1, 1, 4096, NUM_OF_CLASSESS], name="W8") #in our case num_of_classess = 2 : road, non-road b8 = utils.bias_variable([NUM_OF_CLASSESS], name="b8") conv8 = utils.conv2d_basic(relu_dropout7, W8, b8) # now to upscale to actual image size deconv_shape1 = pool3.get_shape() W_t1 = utils.weight_variable([4, 4, deconv_shape1[3].value, NUM_OF_CLASSESS], name="W_t1") b_t1 = utils.bias_variable([deconv_shape1[3].value], name="b_t1") conv_t1 = utils.conv2d_transpose_strided(conv8, W_t1, b_t1, output_shape=tf.shape(pool3)) fuse_1 = tf.add(conv_t1, pool3, name="fuse_1") deconv_shape2 = pool2.get_shape() W_t2 = utils.weight_variable([4, 4, deconv_shape2[3].value, deconv_shape1[3].value], name="W_t2") b_t2 = utils.bias_variable([deconv_shape2[3].value], name="b_t2") conv_t2 = utils.conv2d_transpose_strided(fuse_1, W_t2, b_t2, output_shape=tf.shape(pool2)) fuse_2 = tf.add(conv_t2, pool2, name="fuse_2") print("fuse_2 shape:") print(fuse_2.shape) shape = tf.shape(image) deconv_shape3 = tf.stack([shape[0], shape[1], shape[2], NUM_OF_CLASSESS]) W_t3 = utils.weight_variable([16, 16, NUM_OF_CLASSESS, fuse_2.shape[3].value], name="W_t3") b_t3 = utils.bias_variable([NUM_OF_CLASSESS], name="b_t3") conv_t3 = utils.conv2d_transpose_strided(fuse_2, W_t3, b_t3, output_shape=deconv_shape3, stride=4, stride_y=4) annotation_pred = tf.argmax(conv_t3, dimension=3, name="prediction") return annotation_pred, conv_t3 # conv_t3 is the finnal result
def inference(image, keep_prob): """ Semantic segmentation network definition :param image: input image. Should have values in range 0-255 :param keep_prob: :return: """ print("setting up vgg initialized conv layers ...") model_data = utils.get_model_data(FLAGS.model_dir, MODEL_URL) mean = model_data['normalization'][0][0][0] mean_pixel = np.mean(mean, axis=(0, 1)) weights = np.squeeze(model_data['layers']) #processed_image = utils.process_image(image, mean_pixel) with tf.variable_scope("inference"): image_net = vgg_net(weights, image) conv_final_layer = image_net["conv5_3"] pool5 = utils.max_pool_2x2(conv_final_layer) W6 = utils.weight_variable([7, 7, 512, 4096], name="W6") b6 = utils.bias_variable([4096], name="b6") conv6 = utils.conv2d_basic(pool5, W6, b6) relu6 = tf.nn.relu(conv6, name="relu6") if FLAGS.debug: utils.add_activation_summary(relu6) relu_dropout6 = tf.nn.dropout(relu6, keep_prob=keep_prob) W7 = utils.weight_variable([1, 1, 4096, 4096], name="W7") b7 = utils.bias_variable([4096], name="b7") conv7 = utils.conv2d_basic(relu_dropout6, W7, b7) relu7 = tf.nn.relu(conv7, name="relu7") if FLAGS.debug: utils.add_activation_summary(relu7) relu_dropout7 = tf.nn.dropout(relu7, keep_prob=keep_prob) W8 = utils.weight_variable([1, 1, 4096, NUM_OF_CLASSESS], name="W8") b8 = utils.bias_variable([NUM_OF_CLASSESS], name="b8") conv8 = utils.conv2d_basic(relu_dropout7, W8, b8) # annotation_pred1 = tf.argmax(conv8, dimension=3, name="prediction1") # now to upscale to actual image size deconv_shape1 = image_net["pool4"].get_shape() W_t1 = utils.weight_variable([4, 4, deconv_shape1[3].value, NUM_OF_CLASSESS], name="W_t1") b_t1 = utils.bias_variable([deconv_shape1[3].value], name="b_t1") conv_t1 = utils.conv2d_transpose_strided(conv8, W_t1, b_t1, output_shape=tf.shape(image_net["pool4"])) fuse_1 = tf.add(conv_t1, image_net["pool4"], name="fuse_1") deconv_shape2 = image_net["pool3"].get_shape() W_t2 = utils.weight_variable([4, 4, deconv_shape2[3].value, deconv_shape1[3].value], name="W_t2") b_t2 = utils.bias_variable([deconv_shape2[3].value], name="b_t2") conv_t2 = utils.conv2d_transpose_strided(fuse_1, W_t2, b_t2, output_shape=tf.shape(image_net["pool3"])) fuse_2 = tf.add(conv_t2, image_net["pool3"], name="fuse_2") shape = tf.shape(image) deconv_shape3 = tf.stack([shape[0], shape[1], shape[2], NUM_OF_CLASSESS]) W_t3 = utils.weight_variable([16, 16, NUM_OF_CLASSESS, deconv_shape2[3].value], name="W_t3") b_t3 = utils.bias_variable([NUM_OF_CLASSESS], name="b_t3") conv_t3 = utils.conv2d_transpose_strided(fuse_2, W_t3, b_t3, output_shape=deconv_shape3, stride=8) annotation_pred = tf.argmax(conv_t3, dimension=3, name="prediction") return tf.expand_dims(annotation_pred, dim=3), conv_t3
def build(self, rgb, ROIMap, NUM_CLASSES,keep_prob): # Build the fully convolutional neural network with valve filters and load weight for decoder based on trained VGG16 network """ load variable from npy to build the VGG :param rgb: rgb image [batch, height, width, 3] values 0-255, ROImap binary maps with ROI pixels marked 1 and background marked zero """ self.SumWeights = tf.constant(0.0,name="SumFiltersWeights") # Sum of weights of all filters for weight decay loss print("build model started") # rgb_scaled = rgb * 255.0 # Convert RGB to BGR and substract pixels mean red, green, blue = tf.split(axis=3, num_or_size_splits=3, value=rgb) bgr = tf.concat(axis=3, values=[ blue - VGG_MEAN[0], green - VGG_MEAN[1], red - VGG_MEAN[2], ]) # -----------------------------Build network encoder based on VGG16 network and load the trained VGG16 weights----------------------------------------- # Layer 1 self.conv1_1 = self.conv_layer_NoRelu(bgr, "conv1_1") # Build Convolution layer and load weights W = tf.Variable(tf.truncated_normal([3, 3, 1, 64], mean=0.0, stddev=0.01, dtype=tf.float32), name="W0") B = tf.Variable(tf.truncated_normal([64], mean=0.0, stddev=0.01, dtype=tf.float32), name="B0") self.RelevanceMap = tf.nn.bias_add( tf.nn.conv2d(tf.cast(ROIMap, tf.float32), W, [1, 1, 1, 1], padding="SAME"), B) # apply covolution add bias self.conv1_2 = self.conv_layer(tf.nn.relu(self.conv1_1 * self.RelevanceMap), "conv1_2") self.pool1 = self.max_pool(self.conv1_2, 'pool1') #Max Pooling # Layer 2 self.conv2_1 = self.conv_layer(self.pool1, "conv2_1") self.conv2_2 = self.conv_layer(self.conv2_1, "conv2_2") self.pool2 = self.max_pool(self.conv2_2, 'pool2') # Layer 3 self.conv3_1 = self.conv_layer(self.pool2, "conv3_1") self.conv3_2 = self.conv_layer(self.conv3_1, "conv3_2") self.conv3_3 = self.conv_layer(self.conv3_2, "conv3_3") self.pool3 = self.max_pool(self.conv3_3, 'pool3') # Layer 4 self.conv4_1 = self.conv_layer(self.pool3, "conv4_1") self.conv4_2 = self.conv_layer(self.conv4_1, "conv4_2") self.conv4_3 = self.conv_layer(self.conv4_2, "conv4_3") self.pool4 = self.max_pool(self.conv4_3, 'pool4') # Layer 5 self.conv5_1 = self.conv_layer(self.pool4, "conv5_1") self.conv5_2 = self.conv_layer(self.conv5_1, "conv5_2") self.conv5_3 = self.conv_layer(self.conv5_2, "conv5_3") self.pool5 = self.max_pool(self.conv5_3, 'pool5') ##-----------------------Build Net Fully connvolutional layers------------------------------------------------------------------------------------ W6 = utils.weight_variable([7, 7, 512, 4096],name="W6") # Create tf weight for the new layer with initial weights with normal random distrubution mean zero and std 0.02 b6 = utils.bias_variable([4096], name="b6") # Create tf biasefor the new layer with initial weights of 0 self.conv6 = utils.conv2d_basic(self.pool5 , W6, b6) # Check the size of this net input is it same as input or is it 1X1 self.relu6 = tf.nn.relu(self.conv6, name="relu6") # if FLAGS.debug: utils.add_activation_summary(relu6) self.relu_dropout6 = tf.nn.dropout(self.relu6,keep_prob=keep_prob) # Apply dropout for traning need to be added only for training W7 = utils.weight_variable([1, 1, 4096, 4096], name="W7") # 1X1 Convloution b7 = utils.bias_variable([4096], name="b7") self.conv7 = utils.conv2d_basic(self.relu_dropout6, W7, b7) # 1X1 Convloution self.relu7 = tf.nn.relu(self.conv7, name="relu7") # if FLAGS.debug: utils.add_activation_summary(relu7) self.relu_dropout7 = tf.nn.dropout(self.relu7, keep_prob=keep_prob) # Another dropout need to be used only for training W8 = utils.weight_variable([1, 1, 4096, NUM_CLASSES],name="W8") # Basically the output num of classes imply the output is already the prediction this is flexible can be change however in multinet class number of 2 give good results b8 = utils.bias_variable([NUM_CLASSES], name="b8") self.conv8 = utils.conv2d_basic(self.relu_dropout7, W8, b8) # annotation_pred1 = tf.argmax(conv8, dimension=3, name="prediction1") #-------------------------------------Build Decoder -------------------------------------------------------------------------------------------------- # now to upscale to actual image size deconv_shape1 = self.pool4.get_shape() # Set the output shape for the the transpose convolution output take only the depth since the transpose convolution will have to have the same depth for output W_t1 = utils.weight_variable([4, 4, deconv_shape1[3].value, NUM_CLASSES],name="W_t1") # Deconvolution/transpose in size 4X4 note that the output shape is of depth NUM_OF_CLASSES this is not necessary in will need to be fixed if you only have 2 catagories b_t1 = utils.bias_variable([deconv_shape1[3].value], name="b_t1") self.conv_t1 = utils.conv2d_transpose_strided(self.conv8, W_t1, b_t1, output_shape=tf.shape(self.pool4)) # Use strided convolution to double layer size (depth is the depth of pool4 for the later element wise addition self.fuse_1 = tf.add(self.conv_t1, self.pool4, name="fuse_1") # Add element wise the pool layer from the decoder deconv_shape2 = self.pool3.get_shape() W_t2 = utils.weight_variable([4, 4, deconv_shape2[3].value, deconv_shape1[3].value], name="W_t2") b_t2 = utils.bias_variable([deconv_shape2[3].value], name="b_t2") self.conv_t2 = utils.conv2d_transpose_strided(self.fuse_1, W_t2, b_t2, output_shape=tf.shape(self.pool3)) self.fuse_2 = tf.add(self.conv_t2, self.pool3, name="fuse_2") shape = tf.shape(rgb) W_t3 = utils.weight_variable([16, 16, NUM_CLASSES, deconv_shape2[3].value], name="W_t3") b_t3 = utils.bias_variable([NUM_CLASSES], name="b_t3") self.conv_t3 = utils.conv2d_transpose_strided(self.fuse_2, W_t3, b_t3, output_shape=[shape[0], shape[1], shape[2], NUM_CLASSES], stride=8) #Split final probability map to set of categories in given granularity self.VesselProb, self.PhaseProb, self.LiquidSolidProb, self.ExactPhaseProb = tf.split(self.conv_t3, [2, 3, 4, 15], 3) #--------------------Transform probability vectors to label maps----------------------------------------------------------------- self.ExactPhasePred = tf.argmax(self.ExactPhaseProb, dimension=3, name="ExactPhasePred") self.LiquidSolidPred = tf.argmax(self.LiquidSolidProb, dimension=3, name="LiquidSolidPred") self.PhasePred = tf.argmax(self.PhaseProb, dimension=3, name="PhasePred") self.VesselPred = tf.argmax(self.VesselProb, dimension=3, name="VesselPred") print("FCN model built")
def unet_upsample(image, dw_h_convs, variables, layer_id, weight_id, filter_size, num_of_feature, num_of_layers, keep_prob, name, debug, restore=False, weights=None): new_variables = [] in_node = dw_h_convs[num_of_layers - 1] # upsample layer for layer in range(num_of_layers - 2, -1, -1): features = 2**(layer + 1) * num_of_feature stddev = 0.02 wd_name = name + '_layer_up' + str(layer_id) + '_w' bd_name = name + '_layer_up' + str(layer_id) + '_b' w1_name = name + '_layer_up_conv' + str(layer_id) + '_w0' w2_name = name + '_layer_up_conv' + str(layer_id) + '_w1' b1_name = name + '_layer_up_conv' + str(layer_id) + '_b0' b2_name = name + '_layer_up_conv' + str(layer_id) + '_b1' relu_name = name + '_layer_up_conv' + str(layer_id) + '_feat' # pooling size is 2 if restore == True: wd = utils.get_variable(weights[weight_id], wd_name) weight_id += 1 bd = utils.get_variable(weights[weight_id], bd_name) weight_id += 1 w1 = utils.get_variable(weights[weight_id], w1_name) weight_id += 1 w2 = utils.get_variable(weights[weight_id], w2_name) weight_id += 1 b1 = utils.get_variable(weights[weight_id], b1_name) weight_id += 1 b2 = utils.get_variable(weights[weight_id], b2_name) weight_id += 1 else: wd = utils.weight_variable([2, 2, features // 2, features], stddev, wd_name) bd = utils.bias_variable([features // 2], bd_name) w1 = utils.weight_variable( [filter_size, filter_size, features, features // 2], stddev, w1_name) w2 = utils.weight_variable( [filter_size, filter_size, features // 2, features // 2], stddev, w2_name) b1 = utils.bias_variable([features // 2], b1_name) b2 = utils.bias_variable([features // 2], b2_name) h_deconv = tf.nn.relu( utils.conv2d_transpose_strided(in_node, wd, bd, keep_prob=keep_prob)) h_deconv_concat = utils.crop_and_concat(dw_h_convs[layer], h_deconv) conv1 = utils.conv2d_basic(h_deconv_concat, w1, b1, keep_prob) h_conv = tf.nn.relu(conv1) conv2 = utils.conv2d_basic(h_conv, w2, b2, keep_prob) in_node = tf.nn.relu(conv2, relu_name) if debug: utils.add_activation_summary(in_node) utils.add_to_image_summary( utils.get_image_summary(in_node, relu_name + '_image')) new_variables.extend((wd, bd, w1, w2, b1, b2)) layer_id += 1 return in_node, new_variables, layer_id, weight_id
def AutoencorderCLustering(image, filter_size, num_of_feature, num_of_layers, keep_prob, name, debug, Class, restore=False, weights=None): channels = image.get_shape().as_list()[-1] dw_h_convs = {} variables = [] pools = {} in_node = image # downsample layer layer_id = 0 weight_id = 0 for layer in range(0, num_of_layers): features = 2**layer * num_of_feature stddev = np.sqrt(float(2) / (filter_size**2 * features)) w1_name = name + '_layer_' + str(layer_id) + '_w_0' w2_name = name + '_layer_' + str(layer_id) + '_w_1' b1_name = name + '_layer_' + str(layer_id) + '_b_0' b2_name = name + '_layer_' + str(layer_id) + '_b_1' relu_name = name + '_layer_' + str(layer_id) + '_feat' if layer == 0: if restore == True: w1 = utils.get_variable(weights[weight_id], w1_name) weight_id += 1 else: w1 = utils.weight_variable( [filter_size, filter_size, channels, features], stddev, w1_name) else: if restore == True: w1 = utils.get_variable(weights[weight_id], w1_name) weight_id += 1 else: w1 = utils.weight_variable( [filter_size, filter_size, features // 2, features], stddev, w1_name) if restore == True: w2 = utils.get_variable(weights[weight_id], w2_name) weight_id += 1 b1 = utils.get_variable(weights[weight_id], b1_name) weight_id += 1 b2 = utils.get_variable(weights[weight_id], b2_name) weight_id += 1 else: w2 = utils.weight_variable( [filter_size, filter_size, features, features], stddev, w2_name) b1 = utils.bias_variable([features], b1_name) b2 = utils.bias_variable([features], b2_name) conv1 = utils.conv2d_basic(in_node, w1, b1, keep_prob) tmp_h_conv = tf.nn.relu(conv1) conv2 = utils.conv2d_basic(tmp_h_conv, w2, b2, keep_prob) dw_h_convs[layer] = tf.nn.relu(conv2, relu_name) if layer < num_of_layers - 1: pools[layer] = utils.max_pool_2x2(dw_h_convs[layer]) in_node = pools[layer] if debug: utils.add_activation_summary(dw_h_convs[layer]) utils.add_to_image_summary( utils.get_image_summary(dw_h_convs[layer], relu_name + '_image')) variables.extend((w1, w2, b1, b2)) layer_id += 1 EncodedNode = dw_h_convs[num_of_layers - 1] # upsample layer Representation = [] for k in range(Class): in_node = EncodedNode for layer in range(num_of_layers - 2, -1, -1): features = 2**(layer + 1) * num_of_feature stddev = np.sqrt(float(2) / (filter_size**2 * features)) wd_name = name + '_layer_up' + str( layer_id) + '_w' + 'Class' + str(k) bd_name = name + '_layer_up' + str( layer_id) + '_b' + 'Class' + str(k) w1_name = name + '_layer_up_conv' + str( layer_id) + '_w0' + 'Class' + str(k) w2_name = name + '_layer_up_conv' + str( layer_id) + '_w1' + 'Class' + str(k) b1_name = name + '_layer_up_conv' + str( layer_id) + '_b0' + 'Class' + str(k) b2_name = name + '_layer_up_conv' + str( layer_id) + '_b1' + 'Class' + str(k) relu_name = name + '_layer_up_conv' + str( layer_id) + '_feat' + 'Class' + str(k) # pooling size is 2 if restore == True: wd = utils.get_variable(weights[weight_id], wd_name) weight_id += 1 bd = utils.get_variable(weights[weight_id], bd_name) weight_id += 1 w1 = utils.get_variable(weights[weight_id], w1_name) weight_id += 1 w2 = utils.get_variable(weights[weight_id], w2_name) weight_id += 1 b1 = utils.get_variable(weights[weight_id], b1_name) weight_id += 1 b2 = utils.get_variable(weights[weight_id], b2_name) weight_id += 1 else: wd = utils.weight_variable([2, 2, features // 2, features], stddev, wd_name) bd = utils.bias_variable([features // 2], bd_name) w1 = utils.weight_variable( [filter_size, filter_size, features, features // 2], stddev, w1_name) w2 = utils.weight_variable( [filter_size, filter_size, features // 2, features // 2], stddev, w2_name) b1 = utils.bias_variable([features // 2], b1_name) b2 = utils.bias_variable([features // 2], b2_name) h_deconv = tf.nn.relu( utils.conv2d_transpose_strided(in_node, wd, bd)) # h_deconv_concat = utils.crop_and_concat(dw_h_convs[layer], h_deconv, tf.shape(image)[0]) h_deconv_concat = utils.crop_and_concat(dw_h_convs[layer], h_deconv) conv1 = utils.conv2d_basic(h_deconv_concat, w1, b1, keep_prob) h_conv = tf.nn.relu(conv1) conv2 = utils.conv2d_basic(h_conv, w2, b2, keep_prob) in_node = tf.nn.relu(conv2, relu_name) if debug: utils.add_to_image_summary( utils.get_image_summary(in_node, relu_name + '_image')) utils.add_to_image_summary( utils.get_image_summary(conv2, relu_name + '_image')) variables.extend((wd, bd, w1, w2, b1, b2)) layer_id += 1 w_name = name + '_final_layer_' + str(layer_id) + '_w' + str(k) b_name = name + '_final_layer_' + str(layer_id) + '_b' + str(k) relu_name = name + '_final_layer_' + str(layer_id) + '_feat' + str(k) if restore == True: w = utils.get_variable(weights[weight_id], w_name) weight_id += 1 b = utils.get_variable(weights[weight_id], b_name) weight_id += 1 else: w = utils.weight_variable([1, 1, num_of_feature, channels], stddev, w_name) b = utils.bias_variable([channels], b_name) y_conv = tf.nn.relu(utils.conv2d_basic(in_node, w, b), relu_name) variables.extend((w, b)) if debug: utils.add_activation_summary(y_conv) utils.add_to_image_summary( utils.get_image_summary(y_conv, relu_name)) Representation.append(y_conv) return Representation, variables, dw_h_convs
def inference(image, keep_prob): """ Semantic segmentation network definition :param image: input image. Should have values in range 0-255,shape pf [None,IMAGE_SIZE,IMAGE_SIZE,3] :param keep_prob: dropout rate :return: """ print("setting up vgg initialized conv layers ...") model_data = utils.get_model_data(FLAGS.model_dir, MODEL_URL) # load VGG-19 some parameters mean = model_data['normalization'][0][0][0] mean_pixel = np.mean(mean, axis=(0, 1)) weights = np.squeeze(model_data['layers']) processed_image = utils.process_image(image, mean_pixel) with tf.variable_scope("inference"): # get the output of each layer in vgg image_net = vgg_net(weights, processed_image) conv_final_layer = image_net["conv5_3"] # 14*14*512 print('get conv5_3 from vgg ', conv_final_layer) pool5 = utils.max_pool_2x2(conv_final_layer) # 7*7*512 print('get pool5 ', pool5) W6 = utils.weight_variable([7, 7, 512, 4096], name="W6") b6 = utils.bias_variable([4096], name="b6") conv6 = utils.conv2d_basic( pool5, W6, b6) # same padding, stride=1, output= n*7*7*4096 print('conv6', conv6) relu6 = tf.nn.relu(conv6, name="relu6") relu_dropout6 = tf.nn.dropout(relu6, keep_prob=keep_prob) W7 = utils.weight_variable([1, 1, 4096, 4096], name="W7") b7 = utils.bias_variable([4096], name="b7") conv7 = utils.conv2d_basic( relu_dropout6, W7, b7) # same padding, stride=1, output= n*7*7*4096 print('conv7', conv7) relu7 = tf.nn.relu(conv7, name="relu7") relu_dropout7 = tf.nn.dropout(relu7, keep_prob=keep_prob) W8 = utils.weight_variable([1, 1, 4096, NUM_OF_CLASSESS], name="W8") b8 = utils.bias_variable([NUM_OF_CLASSESS], name="b8") conv8 = utils.conv2d_basic( relu_dropout7, W8, b8) # same padding, stride=1, output= n*7*7*151 print('conv8', conv8) # annotation_pred1 = tf.argmax(conv8, dimension=3, name="prediction1") # now to upscale to actual image size deconv_shape1 = image_net["pool4"].get_shape() # n*14*14*512 W_t1 = utils.weight_variable( [4, 4, deconv_shape1[3].value, NUM_OF_CLASSESS], name="W_t1") b_t1 = utils.bias_variable([deconv_shape1[3].value], name="b_t1") # n*7*7*151 => n*14*14*512 (by f=[4,4,512,151],stride=2) conv_t1 = utils.conv2d_transpose_strided(conv8, W_t1, b_t1, output_shape=tf.shape( image_net["pool4"])) print('conv_t1', conv_t1) fuse_1 = tf.add(conv_t1, image_net["pool4"], name="fuse_1") deconv_shape2 = image_net["pool3"].get_shape() # n*28*28*256 W_t2 = utils.weight_variable( [4, 4, deconv_shape2[3].value, deconv_shape1[3].value], name="W_t2") b_t2 = utils.bias_variable([deconv_shape2[3].value], name="b_t2") # n*14*14*512 => n*28*28*256 (by f=[4,4,256,512],stride=2) conv_t2 = utils.conv2d_transpose_strided(fuse_1, W_t2, b_t2, output_shape=tf.shape( image_net["pool3"])) print('conv_t2', conv_t2) fuse_2 = tf.add(conv_t2, image_net["pool3"], name="fuse_2") shape = tf.shape(image) deconv_shape3 = tf.stack( [shape[0], shape[1], shape[2], NUM_OF_CLASSESS]) W_t3 = utils.weight_variable( [16, 16, NUM_OF_CLASSESS, deconv_shape2[3].value], name="W_t3") b_t3 = utils.bias_variable([NUM_OF_CLASSESS], name="b_t3") # n*28*28*256 => n*224*224*151 (by f=[16,16,151,256],stride=8) conv_t3 = utils.conv2d_transpose_strided(fuse_2, W_t3, b_t3, output_shape=deconv_shape3, stride=8) print('conv_t3', conv_t3) annotation_pred = tf.argmax(conv_t3, dimension=3, name="prediction") print('annotation_pred', annotation_pred) return tf.expand_dims(annotation_pred, dim=3), conv_t3
def create(self): with tf.variable_scope("inference"): # output 112x112x64 W1 = utils.weight_variable([5, 5, 3, 64], name="W1") b1 = utils.bias_variable([64], name="b1") conv1 = utils.conv2d_basic(self.X, W1, b1) relu1 = tf.nn.relu(conv1, name="relu1") relu1_bn = tf.contrib.layers.batch_norm(relu1, scale=True, is_training=self.is_train, updates_collections=None) pool1 = utils.max_pool_2x2(relu1_bn) # output 56x56x128 W2 = utils.weight_variable([5, 5, 64, 128], name="W2") b2 = utils.bias_variable([128], name="b2") conv2 = utils.conv2d_basic(pool1, W2, b2) relu2 = tf.nn.relu(conv2, name="relu2") relu2_bn = tf.contrib.layers.batch_norm(relu2, scale=True, is_training=self.is_train, updates_collections=None) pool2 = utils.max_pool_2x2(relu2_bn) # output 28x28x128 W3 = utils.weight_variable([5, 5, 128, 128], name="W3") b3 = utils.bias_variable([128], name="b3") conv3 = utils.conv2d_basic(pool2, W3, b3) relu3 = tf.nn.relu(conv3, name="relu3") relu3_bn = tf.contrib.layers.batch_norm(relu3, scale=True, is_training=self.is_train, updates_collections=None) pool3 = utils.max_pool_2x2(relu3_bn) # output 14x14x128 W4 = utils.weight_variable([5, 5, 128, 128], name="W4") b4 = utils.bias_variable([128], name="b4") conv4 = utils.conv2d_basic(pool3, W4, b4) relu4 = tf.nn.relu(conv4, name="relu4") relu4_bn = tf.contrib.layers.batch_norm(relu4, scale=True, is_training=self.is_train, updates_collections=None) pool4 = utils.max_pool_2x2(relu4_bn) # output 7x7x128 W5 = utils.weight_variable([5, 5, 128, 128], name="W5") b5 = utils.bias_variable([128], name="b5") conv5 = utils.conv2d_basic(pool4, W5, b5) relu5 = tf.nn.relu(conv5, name="relu5") relu5_bn = tf.contrib.layers.batch_norm(relu5, scale=True, is_training=self.is_train, updates_collections=None) pool5 = utils.max_pool_2x2(relu5_bn) # now to upscale to actual image size #upscale to pool4 W_t1 = utils.weight_variable([5, 5, 128, 128], name="W_t1") b_t1 = utils.bias_variable([128], name="b_t1") conv_t1 = utils.conv2d_transpose_strided(pool5, W_t1, b_t1, output_shape=tf.shape(pool4)) fuse_1 = tf.concat([conv_t1, pool4],3 , name="fuse_1") relu_t1 = tf.nn.relu(fuse_1, name="relu_1") relu_t1bn = tf.contrib.layers.batch_norm(relu_t1, scale=True, is_training=self.is_train, updates_collections=None) W_t2 = utils.weight_variable([5, 5, 128, 256], name="W_t2") b_t2 = utils.bias_variable([128], name="b_t2") conv_t2 = utils.conv2d_transpose_strided(relu_t1bn, W_t2, b_t2, output_shape=tf.shape(pool3)) fuse_2 = tf.concat([conv_t2, pool3],3 , name="fuse_2") relu_t2 = tf.nn.relu(fuse_2, name="relu_2") relu_t2bn = tf.contrib.layers.batch_norm(relu_t2, scale=True, is_training=self.is_train, updates_collections=None) W_t3 = utils.weight_variable([5, 5, 128, 256], name="W_t3") b_t3 = utils.bias_variable([128], name="b_t3") conv_t3 = utils.conv2d_transpose_strided(relu_t2bn, W_t3, b_t3, output_shape=tf.shape(pool2)) fuse_3 = tf.concat([conv_t3, pool2],3 , name="fuse_3") relu_t3 = tf.nn.relu(fuse_3, name="relu_3") relu_t3bn = tf.contrib.layers.batch_norm(relu_t3, scale=True, is_training=self.is_train, updates_collections=None) W_t4 = utils.weight_variable([5, 5, 64, 256], name="W_t4") b_t4 = utils.bias_variable([64], name="b_t4") conv_t4 = utils.conv2d_transpose_strided(relu_t3bn, W_t4, b_t4, output_shape=tf.shape(pool1)) fuse_4 = tf.concat([conv_t4, pool1],3, name="fuse_4") relu_t4 = tf.nn.relu(fuse_4, name="relu_4") relu_t4bn = tf.contrib.layers.batch_norm(relu_t4, scale=True, is_training=self.is_train, updates_collections=None) shape = tf.shape(self.X) deconv_shape3 = tf.stack([shape[0], shape[1], shape[2], self.NUM_CLASSES]) W_t3 = utils.weight_variable([5, 5, self.NUM_CLASSES, relu_t4bn.get_shape()[3].value], name="W_t5") b_t3 = utils.bias_variable([self.NUM_CLASSES], name="b_t5") self.logits = utils.conv2d_transpose_strided(relu_t4bn, W_t3, b_t3, output_shape=deconv_shape3)