def inference_conv(image): # incomplete :/ image_reshaped = tf.reshape(image, [-1, IMAGE_SIZE, IMAGE_SIZE, 1]) with tf.name_scope("conv1") as scope: W_conv1 = utils.weight_variable([3, 3, 1, 32], name="W_conv1") b_conv1 = utils.bias_variable([32], name="b_conv1") add_to_reg_loss_and_summary(W_conv1, b_conv1) h_conv1 = tf.nn.tanh( utils.conv2d_basic(image_reshaped, W_conv1, b_conv1)) with tf.name_scope("conv2") as scope: W_conv2 = utils.weight_variable([3, 3, 32, 64], name="W_conv2") b_conv2 = utils.bias_variable([64], name="b_conv2") add_to_reg_loss_and_summary(W_conv2, b_conv2) h_conv2 = tf.nn.tanh(utils.conv2d_strided(h_conv1, W_conv2, b_conv2)) with tf.name_scope("conv3") as scope: W_conv3 = utils.weight_variable([3, 3, 64, 128], name="W_conv3") b_conv3 = utils.bias_variable([128], name="b_conv3") add_to_reg_loss_and_summary(W_conv3, b_conv3) h_conv3 = tf.nn.tanh(utils.conv2d_strided(h_conv2, W_conv3, b_conv3)) with tf.name_scope("conv4") as scope: W_conv4 = utils.weight_variable([3, 3, 128, 256], name="W_conv4") b_conv4 = utils.bias_variable([256], name="b_conv4") add_to_reg_loss_and_summary(W_conv4, b_conv4) h_conv4 = tf.nn.tanh(utils.conv2d_strided(h_conv3, W_conv4, b_conv4))
def inference(data): with tf.variable_scope("inference") as scope: W_1 = utils.weight_variable([IMAGE_SIZE * IMAGE_SIZE * 50], name="W_1") b_1 = utils.bias_variable([50], name="b_1") h_1 = tf.nn.relu(tf.matmul(data, tf.reshape(W_1, [IMAGE_SIZE * IMAGE_SIZE, 50])) + b_1, name='h_1') utils.add_activation_summary(h_1) W_2 = utils.weight_variable([50 * 50], name="W_2") b_2 = utils.bias_variable([50], name="b_2") h_2 = tf.nn.relu(tf.matmul(h_1, tf.reshape(W_2, [50, 50])) + b_2, name='h_2') utils.add_activation_summary(h_2) W_3 = utils.weight_variable([50 * 50], name="W_3") b_3 = utils.bias_variable([50], name="b_3") h_3 = tf.nn.relu(tf.matmul(h_2, tf.reshape(W_3, [50, 50])) + b_3, name='h_3') utils.add_activation_summary(h_3) W_4 = utils.weight_variable([50 * 50], name="W_4") b_4 = utils.bias_variable([50], name="b_4") h_4 = tf.nn.relu(tf.matmul(h_3, tf.reshape(W_4, [50, 50])) + b_4, name='h_4') utils.add_activation_summary(h_4) W_final = utils.weight_variable([50 * 10], name="W_final") b_final = utils.bias_variable([10], name="b_final") pred = tf.nn.softmax(tf.matmul(h_4, tf.reshape(W_final, [50, 10])) + b_final, name='h_final') # utils.add_activation_summary(pred) return pred
def inference_conv(image): # incomplete :/ image_reshaped = tf.reshape(image, [-1, IMAGE_SIZE, IMAGE_SIZE, 1]) with tf.name_scope("conv1") as scope: W_conv1 = utils.weight_variable([3, 3, 1, 32], name="W_conv1") b_conv1 = utils.bias_variable([32], name="b_conv1") add_to_reg_loss_and_summary(W_conv1, b_conv1) h_conv1 = tf.nn.tanh(utils.conv2d_basic(image_reshaped, W_conv1, b_conv1)) with tf.name_scope("conv2") as scope: W_conv2 = utils.weight_variable([3, 3, 32, 64], name="W_conv2") b_conv2 = utils.bias_variable([64], name="b_conv2") add_to_reg_loss_and_summary(W_conv2, b_conv2) h_conv2 = tf.nn.tanh(utils.conv2d_strided(h_conv1, W_conv2, b_conv2)) with tf.name_scope("conv3") as scope: W_conv3 = utils.weight_variable([3, 3, 64, 128], name="W_conv3") b_conv3 = utils.bias_variable([128], name="b_conv3") add_to_reg_loss_and_summary(W_conv3, b_conv3) h_conv3 = tf.nn.tanh(utils.conv2d_strided(h_conv2, W_conv3, b_conv3)) with tf.name_scope("conv4") as scope: W_conv4 = utils.weight_variable([3, 3, 128, 256], name="W_conv4") b_conv4 = utils.bias_variable([256], name="b_conv4") add_to_reg_loss_and_summary(W_conv4, b_conv4) h_conv4 = tf.nn.tanh(utils.conv2d_strided(h_conv3, W_conv4, b_conv4))
def encode_decode(image, keep_prob): with tf.variable_scope("encode_decode"): #conv1 with tf.variable_scope('conv1'): W_conv1 = weight_variable([3, 3, 3, 16]) b_conv1 = bias_variable([16]) h_conv1 = tf.nn.relu(conv2d(image, W_conv1) + b_conv1) h_pool1 = max_pool_2x2(h_conv1) # conv2 with tf.variable_scope('conv2'): W_conv2 = weight_variable([3, 3, 16, 32]) b_conv2 = bias_variable([32]) h_conv2 = tf.nn.relu(conv2d(h_pool1, W_conv2) + b_conv2) h_pool2 = max_pool_2x2(max_pool_2x2(h_conv2)) # conv3 with tf.variable_scope('conv3'): W_conv3 = weight_variable([3, 3, 32, 64]) b_conv3 = bias_variable([64]) h_conv3 = tf.nn.relu(conv2d(h_pool2, W_conv3) + b_conv3) h_pool3 = max_pool_2x2(h_conv3) # conv4 with tf.variable_scope('conv4'): W_conv4 = weight_variable([3, 3, 64, 128]) b_conv4 = bias_variable([128]) h_conv4 = tf.nn.relu(conv2d(h_pool3, W_conv4) + b_conv4) h_pool4 = max_pool_2x2(h_conv4) #Upscale with tf.variable_scope('deconv1'): deconv_shape1 = h_pool3.get_shape() W_t1 = weight_variable([3, 3, deconv_shape1[3].value, 128]) b_t1 = utils.bias_variable([deconv_shape1[3].value], name="b_t1") conv_t1 = (utils.conv2d_transpose_strided( h_pool4, W_t1, b_t1, output_shape=tf.shape(h_pool3))) fuse_1 = (tf.add(conv_t1, h_pool3, name="fuse_1")) with tf.variable_scope('deconv2'): deconv_shape2 = h_pool2.get_shape() W_t2 = weight_variable( [3, 3, deconv_shape2[3].value, deconv_shape1[3].value]) b_t2 = utils.bias_variable([deconv_shape2[3].value], name="b_t2") conv_t2 = (utils.conv2d_transpose_strided( fuse_1, W_t2, b_t2, output_shape=tf.shape(h_pool2))) fuse_2 = (tf.add(conv_t2, h_pool2, name="fuse_2")) with tf.variable_scope('deconv3'): shape = tf.shape(image) deconv_shape3 = tf.stack( [shape[0], shape[1], shape[2], OUTPUT_CHANNELS]) W_t3 = weight_variable( [16, 16, OUTPUT_CHANNELS, deconv_shape2[3].value]) b_t3 = utils.bias_variable([OUTPUT_CHANNELS], name="b_t3") conv_t3 = (utils.conv2d_transpose_strided( fuse_2, W_t3, b_t3, output_shape=deconv_shape3, stride=8)) return conv_t3
def inference_simple(dataset): with tf.name_scope("conv1") as scope: W1 = utils.weight_variable([5, 5, 1, 32], name="W1") b1 = utils.bias_variable([32], name="b1") tf.histogram_summary("W1", W1) tf.histogram_summary("b1", b1) h_conv1 = tf.nn.relu(utils.conv2d_basic(dataset, W1, b1), name="h_conv1") h_pool1 = utils.max_pool_2x2(h_conv1) with tf.name_scope("conv2") as scope: W2 = utils.weight_variable([3, 3, 32, 64], name="W2") b2 = utils.bias_variable([64], name="b2") tf.histogram_summary("W2", W2) tf.histogram_summary("b2", b2) h_conv2 = tf.nn.relu(utils.conv2d_basic(h_pool1, W2, b2), name="h_conv2") h_pool2 = utils.max_pool_2x2(h_conv2) with tf.name_scope("fc") as scope: image_size = IMAGE_SIZE // 4 h_flat = tf.reshape(h_pool2, [-1, image_size * image_size * 64]) W_fc = utils.weight_variable([image_size * image_size * 64, NUM_LABELS], name="W_fc") b_fc = utils.bias_variable([NUM_LABELS], name="b_fc") tf.histogram_summary("W_fc", W_fc) tf.histogram_summary("b_fc", b_fc) pred = tf.matmul(h_flat, W_fc) + b_fc return pred
def up_layer(x1, x2, in_size1, in_size2, out_size, i): # Up 1 W1 = utils.weight_variable([2, 2, 2, in_size2, in_size1], name="W_u_" + str(i) + "_1") b1 = utils.bias_variable([in_size2], name="b_u_" + str(i) + "_1") deco1 = utils.conv3d_transpose_strided(x1, W1, b1, output_shape=tf.shape(x2)) relu1 = tf.nn.relu(deco1, name="relu_d_" + str(i) + "_1") # Concat conc1 = tf.concat([relu1, x2], -1) # concat along the channels dimension # Conv1 W2 = utils.weight_variable([3, 3, 3, in_size2 * 2, out_size], name="W_u_" + str(i) + "_2") b2 = utils.bias_variable([out_size], name="b_u_" + str(i) + "_2") conv1 = utils.conv3d_basic(conc1, W2, b2) relu2 = tf.nn.relu(conv1, name="relu_u_" + str(i) + "_2") relu2 = tf.nn.dropout(relu2, keep_prob=keep_prob) # Conv2 W3 = utils.weight_variable([3, 3, 3, out_size, out_size], name="W_u_" + str(i) + "_3") b3 = utils.bias_variable([out_size], name="b_u_" + str(i) + "_3") conv3 = utils.conv3d_basic(relu2, W3, b3) relu3 = tf.nn.relu(conv3, name="relu_u_" + str(i) + "_3") relu3 = tf.nn.dropout(relu3, keep_prob=keep_prob) return relu3
def encoder_conv(image): with tf.name_scope("enc_conv1") as scope: W_conv1 = utils.weight_variable([3, 3, 3, 32], name="W_conv1") b_conv1 = utils.bias_variable([32], name="b_conv1") h_conv1 = tf.nn.tanh(utils.conv2d_strided(image, W_conv1, b_conv1)) with tf.name_scope("enc_conv2") as scope: W_conv2 = utils.weight_variable([3, 3, 32, 64], name="W_conv2") b_conv2 = utils.bias_variable([64], name="b_conv2") h_conv2 = tf.nn.tanh(utils.conv2d_strided(h_conv1, W_conv2, b_conv2)) with tf.name_scope("enc_conv3") as scope: W_conv3 = utils.weight_variable([3, 3, 64, 128], name="W_conv3") b_conv3 = utils.bias_variable([128], name="b_conv3") h_conv3 = tf.nn.tanh(utils.conv2d_strided(h_conv2, W_conv3, b_conv3)) with tf.name_scope("enc_conv4") as scope: W_conv4 = utils.weight_variable([3, 3, 128, 256], name="W_conv4") b_conv4 = utils.bias_variable([256], name="b_conv4") h_conv4 = tf.nn.tanh(utils.conv2d_strided(h_conv3, W_conv4, b_conv4)) with tf.name_scope("enc_fc") as scope: image_size = IMAGE_SIZE // 16 h_conv4_flatten = tf.reshape(h_conv4, [-1, image_size * image_size * 256]) W_fc5 = utils.weight_variable([image_size * image_size * 256, 512], name="W_fc5") b_fc5 = utils.bias_variable([512], name="b_fc5") encoder_val = tf.matmul(h_conv4_flatten, W_fc5) + b_fc5 return encoder_val
def inference_resnet(dataset): dataset_reshaped = tf.reshape(dataset, [-1, 28, 28, 1]) with tf.name_scope("conv1") as scope: W_conv1 = utils.weight_variable([5, 5, 1, 32], name="W_conv1") bias1 = utils.bias_variable([32], name="bias1") tf.histogram_summary("W_conv1", W_conv1) tf.histogram_summary("bias1", bias1) h_conv1 = tf.nn.relu( utils.conv2d_basic(dataset_reshaped, W_conv1, bias1)) h_norm1 = utils.local_response_norm(h_conv1) bottleneck_1 = utils.bottleneck_unit(h_norm1, 32, 32, down_stride=True, name="res1") bottleneck_2 = utils.bottleneck_unit(bottleneck_1, 64, 64, down_stride=True, name="res2") with tf.name_scope("fc1") as scope: h_flat = tf.reshape(bottleneck_2, [-1, 7 * 7 * 64]) W_fc1 = utils.weight_variable([7 * 7 * 64, 10], name="W_fc1") bias_fc1 = utils.bias_variable([10], name="bias_fc1") tf.histogram_summary("W_fc1", W_fc1) tf.histogram_summary("bias_fc1", bias_fc1) logits = tf.matmul(h_flat, W_fc1) + bias_fc1 return logits
def inference_conv(dataset): dataset_reshaped = tf.reshape(dataset, [-1, 28, 28, 1]) with tf.name_scope("conv1") as scope: W_conv1 = utils.weight_variable([5, 5, 1, 32], name="W_conv1") bias1 = utils.bias_variable([32], name="bias1") tf.histogram_summary("W_conv1", W_conv1) tf.histogram_summary("bias1", bias1) h_conv1 = tf.nn.relu( utils.conv2d_basic(dataset_reshaped, W_conv1, bias1)) h_norm1 = utils.local_response_norm(h_conv1) h_pool1 = utils.max_pool_2x2(h_norm1) with tf.name_scope("conv2") as scope: W_conv2 = utils.weight_variable([3, 3, 32, 64], name="W_conv2") bias2 = utils.bias_variable([64], name="bias2") tf.histogram_summary("W_conv2", W_conv2) tf.histogram_summary("bias2", bias2) h_conv2 = tf.nn.relu(utils.conv2d_basic(h_pool1, W_conv2, bias2)) h_norm2 = utils.local_response_norm(h_conv2) h_pool2 = utils.max_pool_2x2(h_norm2) with tf.name_scope("fc1") as scope: h_flat = tf.reshape(h_pool2, [-1, 7 * 7 * 64]) W_fc1 = utils.weight_variable([7 * 7 * 64, 10], name="W_fc1") bias_fc1 = utils.bias_variable([10], name="bias_fc1") tf.histogram_summary("W_fc1", W_fc1) tf.histogram_summary("bias_fc1", bias_fc1) logits = tf.matmul(h_flat, W_fc1) + bias_fc1 return logits
def upsample(pool5, pool4, pool3, pool2, conv9, image, scope, output_class): with tf.variable_scope(scope): # do the upscaling using 2 fuse layers deconv_shape1 = pool5.get_shape() W_t1 = utils.weight_variable( [4, 4, deconv_shape1[3].value, NUM_OF_CLASSESS], name="W_t1") b_t1 = utils.bias_variable([deconv_shape1[3].value], name="b_t1") conv_t1 = utils.conv2d_transpose_strided(conv9, W_t1, b_t1, output_shape=tf.shape(pool5)) fuse_1 = tf.add(conv_t1, pool5, name="fuse_1") deconv_shape2 = pool4.get_shape() W_t2 = utils.weight_variable( [4, 4, deconv_shape2[3].value, deconv_shape1[3].value], name="W_t2") b_t2 = utils.bias_variable([deconv_shape2[3].value], name="b_t2") conv_t2 = utils.conv2d_transpose_strided(fuse_1, W_t2, b_t2, output_shape=tf.shape(pool4)) fuse_2 = tf.add(conv_t2, pool4, name="fuse_2") deconv_shape3 = pool3.get_shape() W_t3 = utils.weight_variable( [4, 4, deconv_shape3[3].value, deconv_shape2[3].value], name="W_t3") b_t3 = utils.bias_variable([deconv_shape3[3].value], name="b_t3") conv_t3 = utils.conv2d_transpose_strided(fuse_2, W_t3, b_t3, output_shape=tf.shape(pool3)) fuse_3 = tf.add(conv_t3, pool3, name="fuse_3") deconv_shape4 = pool2.get_shape() W_t4 = utils.weight_variable( [4, 4, deconv_shape4[3].value, deconv_shape3[3].value], name="W_t4") b_t4 = utils.bias_variable([deconv_shape4[3].value], name="b_t4") conv_t4 = utils.conv2d_transpose_strided(fuse_3, W_t4, b_t4, output_shape=tf.shape(pool2)) fuse_4 = tf.add(conv_t4, pool2, name="fuse_4") # do the final upscaling shape = tf.shape(image) deconv_shape5 = tf.stack([shape[0], shape[1], shape[2], output_class]) W_t5 = utils.weight_variable( [16, 16, output_class, deconv_shape4[3].value], name="W_t5") b_t5 = utils.bias_variable([output_class], name="b_t5") conv_t5 = utils.conv2d_transpose_strided(fuse_4, W_t5, b_t5, output_shape=deconv_shape5, stride=2) return conv_t5
def generator(images, train_phase): print("setting up vgg initialized conv layers ...") model_data = utils.get_model_data(FLAGS.model_dir, MODEL_URL) weights = np.squeeze(model_data['layers']) with tf.variable_scope("generator") as scope: W0 = utils.weight_variable([3, 3, 1, 64], name="W0") b0 = utils.bias_variable([64], name="b0") conv0 = utils.conv2d_basic(images, W0, b0) hrelu0 = tf.nn.relu(conv0, name="relu") image_net = vgg_net(weights, hrelu0) vgg_final_layer = image_net["relu5_3"] pool5 = utils.max_pool_2x2(vgg_final_layer) # now to upscale to actual image size deconv_shape1 = image_net["pool4"].get_shape() W_t1 = utils.weight_variable( [4, 4, deconv_shape1[3].value, pool5.get_shape()[3].value], name="W_t1") b_t1 = utils.bias_variable([deconv_shape1[3].value], name="b_t1") conv_t1 = utils.conv2d_transpose_strided(pool5, W_t1, b_t1, output_shape=tf.shape( image_net["pool4"])) fuse_1 = tf.add(conv_t1, image_net["pool4"], name="fuse_1") deconv_shape2 = image_net["pool3"].get_shape() W_t2 = utils.weight_variable( [4, 4, deconv_shape2[3].value, deconv_shape1[3].value], name="W_t2") b_t2 = utils.bias_variable([deconv_shape2[3].value], name="b_t2") conv_t2 = utils.conv2d_transpose_strided(fuse_1, W_t2, b_t2, output_shape=tf.shape( image_net["pool3"])) fuse_2 = tf.add(conv_t2, image_net["pool3"], name="fuse_2") shape = tf.shape(images) deconv_shape3 = tf.stack([shape[0], shape[1], shape[2], 2]) W_t3 = utils.weight_variable([16, 16, 2, deconv_shape2[3].value], name="W_t3") b_t3 = utils.bias_variable([2], name="b_t3") pred = utils.conv2d_transpose_strided(fuse_2, W_t3, b_t3, output_shape=deconv_shape3, stride=8) # return tf.concat(concat_dim=3, values=[images, pred], name="pred_image") return tf.concat([images, pred], 3, "pred_image")
def decoder_conv(embedding): image_size = IMAGE_SIZE // 16 with tf.name_scope("dec_fc") as scope: W_fc1 = utils.weight_variable([512, image_size * image_size * 256], name="W_fc1") b_fc1 = utils.bias_variable([image_size * image_size * 256], name="b_fc1") h_fc1 = tf.nn.relu(tf.matmul(embedding, W_fc1) + b_fc1) with tf.name_scope("dec_conv1") as scope: h_reshaped = tf.reshape( h_fc1, tf.pack([tf.shape(h_fc1)[0], image_size, image_size, 256])) W_conv_t1 = utils.weight_variable([3, 3, 128, 256], name="W_conv_t1") b_conv_t1 = utils.bias_variable([128], name="b_conv_t1") deconv_shape = tf.pack( [tf.shape(h_fc1)[0], 2 * image_size, 2 * image_size, 128]) h_conv_t1 = tf.nn.relu( utils.conv2d_transpose_strided(h_reshaped, W_conv_t1, b_conv_t1, output_shape=deconv_shape)) with tf.name_scope("dec_conv2") as scope: W_conv_t2 = utils.weight_variable([3, 3, 64, 128], name="W_conv_t2") b_conv_t2 = utils.bias_variable([64], name="b_conv_t2") deconv_shape = tf.pack( [tf.shape(h_conv_t1)[0], 4 * image_size, 4 * image_size, 64]) h_conv_t2 = tf.nn.relu( utils.conv2d_transpose_strided(h_conv_t1, W_conv_t2, b_conv_t2, output_shape=deconv_shape)) with tf.name_scope("dec_conv3") as scope: W_conv_t3 = utils.weight_variable([3, 3, 32, 64], name="W_conv_t3") b_conv_t3 = utils.bias_variable([32], name="b_conv_t3") deconv_shape = tf.pack( [tf.shape(h_conv_t2)[0], 8 * image_size, 8 * image_size, 32]) h_conv_t3 = tf.nn.relu( utils.conv2d_transpose_strided(h_conv_t2, W_conv_t3, b_conv_t3, output_shape=deconv_shape)) with tf.name_scope("dec_conv4") as scope: W_conv_t4 = utils.weight_variable([3, 3, 3, 32], name="W_conv_t4") b_conv_t4 = utils.bias_variable([3], name="b_conv_t4") deconv_shape = tf.pack( [tf.shape(h_conv_t3)[0], IMAGE_SIZE, IMAGE_SIZE, 3]) pred_image = utils.conv2d_transpose_strided(h_conv_t3, W_conv_t4, b_conv_t4, output_shape=deconv_shape) return pred_image
def generator(z, train_mode): with tf.variable_scope("generator") as scope: W_0 = utils.weight_variable([FLAGS.z_dim, 64 * GEN_DIMENSION / 2 * IMAGE_SIZE / 16 * IMAGE_SIZE / 16], name="W_0") b_0 = utils.bias_variable([64 * GEN_DIMENSION / 2 * IMAGE_SIZE / 16 * IMAGE_SIZE / 16], name="b_0") z_0 = tf.matmul(z, W_0) + b_0 h_0 = tf.reshape(z_0, [-1, IMAGE_SIZE / 16, IMAGE_SIZE / 16, 64 * GEN_DIMENSION / 2]) h_bn0 = utils.batch_norm(h_0, 64 * GEN_DIMENSION / 2, train_mode, scope="gen_bn0") h_relu0 = tf.nn.relu(h_bn0, name='relu0') utils.add_activation_summary(h_relu0) # W_1 = utils.weight_variable([5, 5, 64 * GEN_DIMENSION/2, 64 * GEN_DIMENSION], name="W_1") # b_1 = utils.bias_variable([64 * GEN_DIMENSION/2], name="b_1") # deconv_shape = tf.pack([tf.shape(h_relu0)[0], IMAGE_SIZE / 16, IMAGE_SIZE / 16, 64 * GEN_DIMENSION/2]) # h_conv_t1 = utils.conv2d_transpose_strided(h_relu0, W_1, b_1, output_shape=deconv_shape) # h_bn1 = utils.batch_norm(h_conv_t1, 64 * GEN_DIMENSION/2, train_mode, scope="gen_bn1") # h_relu1 = tf.nn.relu(h_bn1, name='relu1') # utils.add_activation_summary(h_relu1) W_2 = utils.weight_variable([5, 5, 64 * GEN_DIMENSION / 4, 64 * GEN_DIMENSION / 2], name="W_2") b_2 = utils.bias_variable([64 * GEN_DIMENSION / 4], name="b_2") deconv_shape = tf.pack([tf.shape(h_relu0)[0], IMAGE_SIZE / 8, IMAGE_SIZE / 8, 64 * GEN_DIMENSION / 4]) h_conv_t2 = utils.conv2d_transpose_strided(h_relu0, W_2, b_2, output_shape=deconv_shape) h_bn2 = utils.batch_norm(h_conv_t2, 64 * GEN_DIMENSION / 4, train_mode, scope="gen_bn2") h_relu2 = tf.nn.relu(h_bn2, name='relu2') utils.add_activation_summary(h_relu2) W_3 = utils.weight_variable([5, 5, 64 * GEN_DIMENSION / 8, 64 * GEN_DIMENSION / 4], name="W_3") b_3 = utils.bias_variable([64 * GEN_DIMENSION / 8], name="b_3") deconv_shape = tf.pack([tf.shape(h_relu2)[0], IMAGE_SIZE / 4, IMAGE_SIZE / 4, 64 * GEN_DIMENSION / 8]) h_conv_t3 = utils.conv2d_transpose_strided(h_relu2, W_3, b_3, output_shape=deconv_shape) h_bn3 = utils.batch_norm(h_conv_t3, 64 * GEN_DIMENSION / 8, train_mode, scope="gen_bn3") h_relu3 = tf.nn.relu(h_bn3, name='relu3') utils.add_activation_summary(h_relu3) W_4 = utils.weight_variable([5, 5, 64 * GEN_DIMENSION / 16, 64 * GEN_DIMENSION / 8], name="W_4") b_4 = utils.bias_variable([64 * GEN_DIMENSION / 16], name="b_4") deconv_shape = tf.pack([tf.shape(h_relu3)[0], IMAGE_SIZE / 2, IMAGE_SIZE / 2, 64 * GEN_DIMENSION / 16]) h_conv_t4 = utils.conv2d_transpose_strided(h_relu3, W_4, b_4, output_shape=deconv_shape) h_bn4 = utils.batch_norm(h_conv_t4, 64 * GEN_DIMENSION / 16, train_mode, scope="gen_bn4") h_relu4 = tf.nn.relu(h_bn4, name='relu4') utils.add_activation_summary(h_relu4) W_5 = utils.weight_variable([5, 5, NUM_OF_CHANNELS, 64 * GEN_DIMENSION / 16], name="W_5") b_5 = utils.bias_variable([NUM_OF_CHANNELS], name="b_5") deconv_shape = tf.pack([tf.shape(h_relu4)[0], IMAGE_SIZE, IMAGE_SIZE, NUM_OF_CHANNELS]) h_conv_t5 = utils.conv2d_transpose_strided(h_relu4, W_5, b_5, output_shape=deconv_shape) pred_image = tf.nn.tanh(h_conv_t5, name='pred_image') utils.add_activation_summary(pred_image) return pred_image
def inference(image, keep_prob): """ Semantic segmentation network definition :param image: input image. Should have values in range 0-255 :param keep_prob: :return: """ print("setting up inception_v3 initialized conv layers ...") with tf.variable_scope("inference"): net, end_points = inception_v3(image, NUM_OF_CLASSESS, True, keep_prob) # now to upscale to actual image size with tf.variable_scope('Upsampling'): with slim.arg_scope([slim.conv2d_transpose], stride=2, padding='SAME'): up_sampling = end_points[''] deconv_shape1 = image_net["pool4"].get_shape() W_t1 = utils.weight_variable( [4, 4, deconv_shape1[3].value, NUM_OF_CLASSESS], name="W_t1") b_t1 = utils.bias_variable([deconv_shape1[3].value], name="b_t1") conv_t1 = utils.conv2d_transpose_strided(conv8, W_t1, b_t1, output_shape=tf.shape( image_net["pool4"])) fuse_1 = tf.add(conv_t1, image_net["pool4"], name="fuse_1") deconv_shape2 = image_net["pool3"].get_shape() W_t2 = utils.weight_variable( [4, 4, deconv_shape2[3].value, deconv_shape1[3].value], name="W_t2") b_t2 = utils.bias_variable([deconv_shape2[3].value], name="b_t2") conv_t2 = utils.conv2d_transpose_strided(fuse_1, W_t2, b_t2, output_shape=tf.shape( image_net["pool3"])) fuse_2 = tf.add(conv_t2, image_net["pool3"], name="fuse_2") shape = tf.shape(image) deconv_shape3 = tf.stack( [shape[0], shape[1], shape[2], NUM_OF_CLASSESS]) W_t3 = utils.weight_variable( [16, 16, NUM_OF_CLASSESS, deconv_shape2[3].value], name="W_t3") b_t3 = utils.bias_variable([NUM_OF_CLASSESS], name="b_t3") conv_t3 = utils.conv2d_transpose_strided(fuse_2, W_t3, b_t3, output_shape=deconv_shape3, stride=8) annotation_pred = tf.argmax(conv_t3, dimension=3, name="prediction") return tf.expand_dims(annotation_pred, dim=3), conv_t3
def FCN(image, keep_prob): with tf.variable_scope("FCN"): #conv1 with tf.variable_scope('conv1'): W_conv1 = weight_variable([3, 3, 3, 32]) b_conv1 = bias_variable([32]) h_conv1 = tf.nn.relu(conv2d(image, W_conv1) + b_conv1) h_pool1 = max_pool_2x2(h_conv1) # conv2 with tf.variable_scope('conv2'): W_conv2 = weight_variable([3, 3, 32, 128]) b_conv2 = bias_variable([128]) h_conv2 = tf.nn.relu(conv2d(h_pool1, W_conv2) + b_conv2) h_pool2 = max_pool_2x2(max_pool_2x2(h_conv2)) # conv3 with tf.variable_scope('conv3'): W_conv3 = weight_variable([3, 3, 128, 256]) b_conv3 = bias_variable([256]) h_conv3 = tf.nn.relu(conv2d(h_pool2, W_conv3) + b_conv3) h_pool3 = max_pool_2x2(h_conv3) # conv4 with tf.variable_scope('conv4'): W_conv4 = weight_variable([3, 3, 256, 512]) b_conv4 = bias_variable([512]) h_conv4 = tf.nn.relu(conv2d(h_pool3, W_conv4) + b_conv4) h_pool4 = max_pool_2x2(h_conv4) #Upscale deconv_shape1 = h_pool3.get_shape() W_t1 = utils.weight_variable([4, 4, deconv_shape1[3].value, 512], name="W_t1") b_t1 = utils.bias_variable([deconv_shape1[3].value], name="b_t1") conv_t1 = utils.conv2d_transpose_strided(h_pool4, W_t1, b_t1, output_shape=tf.shape(h_pool3)) fuse_1 = tf.add(conv_t1, h_pool3, name="fuse_1") deconv_shape2 = h_pool2.get_shape() W_t2 = utils.weight_variable([4, 4, deconv_shape2[3].value, deconv_shape1[3].value], name="W_t2") b_t2 = utils.bias_variable([deconv_shape2[3].value], name="b_t2") conv_t2 = utils.conv2d_transpose_strided(fuse_1, W_t2, b_t2, output_shape=tf.shape(h_pool2)) fuse_2 = tf.add(conv_t2, h_pool2, name="fuse_2") shape = tf.shape(image) deconv_shape3 = tf.stack([shape[0], shape[1], shape[2], NUM_OF_CLASSES_FCN]) W_t3 = utils.weight_variable([16, 16, NUM_OF_CLASSES_FCN, deconv_shape2[3].value], name="W_t3") b_t3 = utils.bias_variable([NUM_OF_CLASSES_FCN], name="b_t3") conv_t3 = utils.conv2d_transpose_strided(fuse_2, W_t3, b_t3, output_shape=deconv_shape3, stride=8) annotation_pred = tf.argmax(conv_t3, dimension=3, name="prediction") return tf.expand_dims(annotation_pred, dim=3), conv_t3
def inference(image, keep_prob): """ Semantic segmentation network definition :param image: input image. Should have values in range 0-255 :param keep_prob: :return: """ print("setting up vgg initialized conv layers ...") model_data = utils.get_model_data(model_dir, MODEL_URL) mean = model_data['normalization'][0][0][0] mean_pixel = np.mean(mean, axis=(0, 1)) weights = np.squeeze(model_data['layers']) processed_image = utils.process_image(image, mean_pixel) with tf.variable_scope("inference"): image_net = vgg_net(weights, processed_image) conv_final_layer = image_net["conv4_3"] W6 = utils.weight_variable([1, 1, 512, 1024], name="W6", init=weight_init) b6 = utils.bias_variable([1024], name="b6") conv6 = utils.conv2d_basic(conv_final_layer, W6, b6) relu6 = tf.nn.relu(conv6, name="relu6") if debug: utils.add_activation_summary(relu6) relu_dropout6 = tf.nn.dropout(relu6, keep_prob=keep_prob) W7 = utils.weight_variable([1, 1, 1024, NUM_OF_CLASSESS], name="W7", init=weight_init) b7 = utils.bias_variable([NUM_OF_CLASSESS], name="b7") conv7 = utils.conv2d_basic(relu_dropout6, W7, b7) # now to upscale to actual image size deconv_shape1 = image_net["pool2"].get_shape() W_t1 = utils.weight_variable([4, 4, deconv_shape1[3].value, NUM_OF_CLASSESS], name="W_t1", init=weight_init) b_t1 = utils.bias_variable([deconv_shape1[3].value], name="b_t1") conv_t1 = utils.conv2d_transpose_strided(conv7, W_t1, b_t1, output_shape=tf.shape(image_net["pool2"])) fuse_1 = tf.add(conv_t1, image_net["pool2"], name="fuse_1") deconv_shape2 = image_net["pool1"].get_shape() W_t2 = utils.weight_variable([4, 4, deconv_shape2[3].value, deconv_shape1[3].value], name="W_t2", init=weight_init) b_t2 = utils.bias_variable([deconv_shape2[3].value], name="b_t2") conv_t2 = utils.conv2d_transpose_strided(fuse_1, W_t2, b_t2, output_shape=tf.shape(image_net["pool1"])) fuse_2 = tf.add(conv_t2, image_net["pool1"], name="fuse_1") shape = tf.shape(image) deconv_shape3 = tf.stack([shape[0], shape[1], shape[2], NUM_OF_CLASSESS]) W_t3 = utils.weight_variable([4, 4, NUM_OF_CLASSESS, deconv_shape2[3].value], name="W_t3", init=weight_init) b_t3 = utils.bias_variable([NUM_OF_CLASSESS], name="b_t3") conv_t3 = utils.conv2d_transpose_strided(fuse_2, W_t3, b_t3, output_shape=deconv_shape3, stride=2) return conv_t3
def inference(inputs): with tf.name_scope("input"): W1 = utils.weight_variable([2, NEURONS_PER_LAYER], name="weights_1") b1 = utils.bias_variable([NEURONS_PER_LAYER], name="bias_1") tf.histogram_summary("W1", W1) tf.histogram_summary("b1", b1) h1 = tf.nn.relu(tf.nn.bias_add(tf.matmul(inputs, W1), b1)) with tf.name_scope("hidden2"): W2 = utils.weight_variable([NEURONS_PER_LAYER, NEURONS_PER_LAYER], name="weights_2") b2 = utils.bias_variable([NEURONS_PER_LAYER], name="bias_2") tf.histogram_summary("W2", W2) tf.histogram_summary("b2", b2) h2 = tf.nn.relu(tf.matmul(h1, W2) + b2) with tf.name_scope("hidden3"): W3 = utils.weight_variable([NEURONS_PER_LAYER, NEURONS_PER_LAYER], name="weights_3") b3 = utils.bias_variable([NEURONS_PER_LAYER], name="bias_3") tf.histogram_summary("W3", W3) tf.histogram_summary("b3", b3) h3 = tf.nn.relu(tf.matmul(h2, W3) + b3) with tf.name_scope("hidden4"): W4 = utils.weight_variable([NEURONS_PER_LAYER, NEURONS_PER_LAYER], name="weights_4") b4 = utils.bias_variable([NEURONS_PER_LAYER], name="bias_4") tf.histogram_summary("W4", W4) tf.histogram_summary("b4", b4) h4 = tf.nn.relu(tf.matmul(h3, W4) + b4) with tf.name_scope("hidden5"): W5 = utils.weight_variable([NEURONS_PER_LAYER, NEURONS_PER_LAYER], name="weights_5") b5 = utils.bias_variable([NEURONS_PER_LAYER], name="bias_5") tf.histogram_summary("W5", W5) tf.histogram_summary("b5", b5) h5 = tf.nn.relu(tf.matmul(h4, W5) + b5) with tf.name_scope("hidden6"): W6 = utils.weight_variable([NEURONS_PER_LAYER, NEURONS_PER_LAYER], name="weights_6") b6 = utils.bias_variable([NEURONS_PER_LAYER], name="bias_6") tf.histogram_summary("W6", W6) tf.histogram_summary("b6", b6) h6 = tf.nn.relu(tf.matmul(h5, W6) + b6) with tf.name_scope("hidden7"): W7 = utils.weight_variable([NEURONS_PER_LAYER, NEURONS_PER_LAYER], name="weights_7") b7 = utils.bias_variable([NEURONS_PER_LAYER], name="bias_7") tf.histogram_summary("W7", W6) tf.histogram_summary("b7", b6) h7 = tf.nn.relu(tf.matmul(h6, W7) + b7) with tf.name_scope("hidden8"): W8 = utils.weight_variable([NEURONS_PER_LAYER, NEURONS_PER_LAYER], name="weights_8") b8 = utils.bias_variable([NEURONS_PER_LAYER], name="bias_8") tf.histogram_summary("W8", W6) tf.histogram_summary("b8", b6) h8 = tf.nn.relu(tf.matmul(h7, W8) + b8) with tf.name_scope("output"): W9 = utils.weight_variable([NEURONS_PER_LAYER, channels], name="weights_9") b9 = utils.bias_variable([channels], name="bias_9") tf.histogram_summary("W9", W9) tf.histogram_summary("b9", b9) pred = tf.matmul(h8, W9) + b9 return pred
def inference_res(input_image): W1 = utils.weight_variable([3, 3, 3, 32]) b1 = utils.bias_variable([32]) hconv_1 = tf.nn.relu(utils.conv2d_basic(input_image, W1, b1)) h_norm = utils.local_response_norm(hconv_1) bottleneck_1 = utils.bottleneck_unit(h_norm, 16, 16, down_stride=True, name="res_1") bottleneck_2 = utils.bottleneck_unit(bottleneck_1, 8, 8, down_stride=True, name="res_2") bottleneck_3 = utils.bottleneck_unit(bottleneck_2, 16, 16, up_stride=True, name="res_3") bottleneck_4 = utils.bottleneck_unit(bottleneck_3, 32, 32, up_stride=True, name="res_4") W5 = utils.weight_variable([3, 3, 32, 3]) b5 = utils.bias_variable([3]) out = tf.nn.tanh(utils.conv2d_basic(bottleneck_4, W5, b5)) return out
def decoder_fc(z): with tf.variable_scope("decoder") as scope: Wd_fc1 = utils.weight_variable([FLAGS.z_dim, 50], name="Wd_fc1") bd_fc1 = utils.bias_variable([50], name="bd_fc1") hd_relu1 = activation_function(tf.matmul(z, Wd_fc1) + bd_fc1, name="hdfc_1") Wd_fc2 = utils.weight_variable([50, 50], name="Wd_fc2") bd_fc2 = utils.bias_variable([50], name="bd_fc2") hd_relu2 = activation_function(tf.matmul(hd_relu1, Wd_fc2) + bd_fc2, name="hdfc_2") Wd_fc3 = utils.weight_variable([50, IMAGE_SIZE * IMAGE_SIZE], name="Wd_fc3") bd_fc3 = utils.bias_variable([IMAGE_SIZE * IMAGE_SIZE], name="bd_fc3") pred_image = tf.matmul(hd_relu2, Wd_fc3) + bd_fc3 return pred_image
def deconv2d_layer_concat(x, name, W_s, concat_x, output_shape=None, stride=2, stddev=0.02, if_relu=False): ''' Deconv2d operator for U-Net concat. Args: x: inputs W_s: shape of weight output_shape: shape after deconv2d ''' if output_shape == None: x_shape = tf.shape(x) output_shape = tf.stack( [x_shape[0], x_shape[1] * 2, x_shape[2] * 2, x_shape[3] // 2]) W_t = utils.weight_variable(W_s, stddev=stddev, name='W_' + name) b_t = utils.bias_variable([W_s[2]], name='b_' + name) #conv_t = utils.conv2d_transpose_strided_valid(x, W_t, b_t, output_shape, stride) conv_t = utils.conv2d_transpose_strided(x, W_t, b_t, output_shape, stride) if if_relu: conv_t = tf.nn.relu(conv_t, name=name + '_relu') conv_concat = utils.crop_and_concat(concat_x, conv_t) return conv_concat
def inference(image, keep_prob): """ Semantic segmentation network definition :param image: input image. Should have values in range 0-255 :param keep_prob: :return: """ print("setting up vgg initialized conv layers ...") model_data = utils.get_model_data(FLAGS.model_dir, FLAGS.MODEL_NAME) #mean = model_data['normalization'][0][0][0] #mean_pixel = np.mean(mean, axis=(0, 1)) weights = np.squeeze(model_data['params'][0]) #processed_image = utils.process_image(image, mean_pixel) with tf.variable_scope("inference"): image_net = res_net(weights, image) #conv_final_layer = image_net["res5c"] conv_final_layer = image_net fc_w = utils.weight_variable([1, 1, 2048, FLAGS.NUM_OF_CLASSESS], name="fc_w") fc_b = utils.bias_variable([FLAGS.NUM_OF_CLASSESS], name="fc_b") fc = utils.conv2d_basic(conv_final_layer, fc_w, fc_b) fc_dropout = tf.nn.dropout(fc, keep_prob) logits = tf.squeeze(fc_dropout, [1, 2]) #logits = tf.squeeze(utils.conv2d_basic(conv_final_layer, fc_w, fc_b), [1,2]) print('logits shape', logits.shape) annotation_pred = tf.argmax(logits, dimension=1, name="prediction") return tf.expand_dims(annotation_pred, dim=1), logits
def conv2d_layer(x, name, W_s, pool_, if_relu=False, stride=2, stddev=0.02, if_dropout=False, keep_prob_=1): '''Conv2d operator Args: pool_: if pool_==0:not pooling else pooling ''' W = utils.weight_variable(W_s, stddev=stddev, name='W' + name) b = utils.bias_variable([W_s[3]], name='b' + name) #conv = utils.conv2d_strided_valid(x, W, b, stride) conv = utils.conv2d_strided(x, W, b, stride) print('shape after conv: ', conv.shape) print('--------------------------------') if if_dropout: conv = tf.nn.dropout(conv, keep_prob_) if if_relu: conv = tf.nn.relu(conv, name=name + '_relu') if pool_: conv = utils.max_pool(conv, pool_, 2) print('shape after pool: ', conv.shape) return conv
def conv_layer(input, r_field, input_c, out_c, nr): W = utils.weight_variable([r_field, r_field, input_c, out_c], name="W" + str(nr)) b = utils.bias_variable([out_c], name="b" + str(nr)) conv = utils.conv2d_basic(input, W, b, name="conv" + str(nr)) relu = tf.nn.relu(conv, name="relu" + str(nr)) return relu
def build_pose_layers(self, image, keep_prob, rois): TRUNCATE = 0 with tf.variable_scope("pose"): shape6 = tf.shape(self.image_net["conv5_3"]) deconv_shape6 = tf.stack([shape6[0], 56, 56, 512]) W_6 = utils.weight_variable([4, 4, 512, 512], name="W_6") b_6 = utils.bias_variable([512], name="b_6") conv_6 = utils.conv2d_transpose_strided(self.image_net["conv5_3"], W_6, b_6, output_shape=deconv_shape6, stride=4) roi_layer6 = ROIPoolingLayer(self.roi_pool_h, self.roi_pool_w) pooled_features6 = roi_layer6([conv_6, rois]) pooled_features6 = tf.nn.dropout(pooled_features6, keep_prob=keep_prob) shape7 = tf.shape(self.image_net["conv4_3"]) deconv_shape7 = tf.stack([shape7[0], 56, 56, 512]) W_7 = utils.weight_variable([2, 2, 512, 512], name="W_7") b_7 = utils.bias_variable([512], name="b_7") conv_7 = utils.conv2d_transpose_strided(self.image_net["conv4_3"], W_7, b_7, output_shape=deconv_shape7, stride=2) roi_layer7 = ROIPoolingLayer(self.roi_pool_h, self.roi_pool_w) pooled_features7 = roi_layer7([conv_7, rois]) pooled_features7 = tf.nn.dropout(pooled_features7, keep_prob=keep_prob) roi_add8 = tf.keras.layers.Add()([pooled_features6, pooled_features7]) roi_add9 = tf.reduce_sum(roi_add8, axis=1) shape = roi_add9.get_shape().as_list() dim = 1 for d in shape[1:]: dim *= d roi_add9 = tf.reshape(roi_add9, [-1, dim]) fc9_w = tf.reshape(self.vgg_fc["fc6"][0], [dim, 4096]) fc9 = tf.nn.bias_add(tf.matmul(roi_add9, fc9_w), self.vgg_fc["fc6"][1]) fc_dropout9 = tf.nn.dropout(fc9, keep_prob=keep_prob) fc10_w = tf.reshape(self.vgg_fc["fc7"][0], [4096, 4096]) fc10 = tf.nn.bias_add(tf.matmul(fc_dropout9, fc10_w), self.vgg_fc["fc7"][1]) fc_dropout10 = tf.nn.dropout(fc10, keep_prob=keep_prob) W11 = utils.weight_variable([4096, 4 * (self.n_classes - 1)], name="W11") b11 = utils.bias_variable([4 * (self.n_classes - 1)], name="b11") fc11 = tf.nn.bias_add(tf.matmul(fc_dropout10, W11), b11) tanh11 = tf.math.tanh(fc11) return tanh11
def inference_fully_convolutional(dataset): ''' Fully convolutional inference on notMNIST dataset :param datset: [batch_size, 28*28*1] tensor :return: logits ''' dataset_reshaped = tf.reshape(dataset, [-1, 28, 28, 1]) with tf.name_scope("conv1") as scope: W_conv1 = utils.weight_variable_xavier_initialized([3, 3, 1, 32], name="W_conv1") b_conv1 = utils.bias_variable([32], name="b_conv1") h_conv1 = tf.nn.relu( utils.conv2d_strided(dataset_reshaped, W_conv1, b_conv1)) with tf.name_scope("conv2") as scope: W_conv2 = utils.weight_variable_xavier_initialized([3, 3, 32, 64], name="W_conv2") b_conv2 = utils.bias_variable([64], name="b_conv2") h_conv2 = tf.nn.relu(utils.conv2d_strided(h_conv1, W_conv2, b_conv2)) with tf.name_scope("conv3") as scope: W_conv3 = utils.weight_variable_xavier_initialized([3, 3, 64, 128], name="W_conv3") b_conv3 = utils.bias_variable([128], name="b_conv3") h_conv3 = tf.nn.relu(utils.conv2d_strided(h_conv2, W_conv3, b_conv3)) with tf.name_scope("conv4") as scope: W_conv4 = utils.weight_variable_xavier_initialized([3, 3, 128, 256], name="W_conv4") b_conv4 = utils.bias_variable([256], name="b_conv4") h_conv4 = tf.nn.relu(utils.conv2d_strided(h_conv3, W_conv4, b_conv4)) with tf.name_scope("conv5") as scope: # W_conv5 = utils.weight_variable_xavier_initialized([2, 2, 256, 512], name="W_conv5") # b_conv5 = utils.bias_variable([512], name="b_conv5") # h_conv5 = tf.nn.relu(utils.conv2d_strided(h_conv4, W_conv5, b_conv5)) h_conv5 = utils.avg_pool_2x2(h_conv4) with tf.name_scope("conv6") as scope: W_conv6 = utils.weight_variable_xavier_initialized([1, 1, 256, 10], name="W_conv6") b_conv6 = utils.bias_variable([10], name="b_conv6") logits = tf.nn.relu(utils.conv2d_basic(h_conv5, W_conv6, b_conv6)) print logits.get_shape() logits = tf.reshape(logits, [-1, 10]) return logits
def inference(image, keep_prob): """ Semantic segmentation network definition :param image: input image. Should have values in range 0-255 :param keep_prob: :return: """ print("setting up vgg initialized conv layers ...") model_data = utils.get_model_data(FLAGS.model_dir, MODEL_URL) mean = model_data['normalization'][0][0][0] mean_pixel = np.mean(mean, axis=(0, 1)) weights = np.squeeze(model_data['layers']) processed_image = utils.process_image(image, mean_pixel) with tf.variable_scope("inference"): vgg_end_layer = 'conv4_4' image_net = vgg_net(weights, processed_image, end_layer=vgg_end_layer) conv_final_layer = image_net[vgg_end_layer] dropout = tf.nn.dropout(conv_final_layer, keep_prob=keep_prob) W_final = utils.weight_variable([1, 1, 512, NUM_OF_CLASSES], name="W_final") b_final = utils.bias_variable([NUM_OF_CLASSES], name="b_final") conv_final = utils.conv2d_basic(dropout, W_final, b_final) if FLAGS.debug: utils.add_activation_summary(conv_final) # now to upscale to actual image size deconv_shape2 = image_net["pool2"].get_shape() W_t2 = utils.weight_variable([4, 4, deconv_shape2[3].value, NUM_OF_CLASSES], name="W_t2") b_t2 = utils.bias_variable([deconv_shape2[3].value], name="b_t2") conv_t2 = utils.conv2d_transpose_strided(conv_final, W_t2, b_t2, output_shape=tf.shape(image_net["pool2"])) fuse_2 = tf.add(conv_t2, image_net["pool2"], name="fuse_2") shape = tf.shape(image) deconv_shape3 = tf.stack([shape[0], shape[1], shape[2], NUM_OF_CLASSES]) W_t3 = utils.weight_variable([8, 8, NUM_OF_CLASSES, deconv_shape2[3].value], name="W_t3") b_t3 = utils.bias_variable([NUM_OF_CLASSES], name="b_t3") conv_t3 = utils.conv2d_transpose_strided(fuse_2, W_t3, b_t3, output_shape=deconv_shape3, stride=4) annotation_pred = tf.argmax(conv_t3, axis=3, name="prediction", output_type=tf.int32) return tf.expand_dims(annotation_pred, axis=3), conv_t3
def vgg_net(weights, image): layers = ('conv1_1', 'relu1_1', 'conv1_2', 'relu1_2', 'pool1', 'conv2_1', 'relu2_1', 'conv2_2', 'relu2_2', 'pool2', 'conv3_1', 'relu3_1', 'conv3_2', 'relu3_2', 'conv3_3', 'relu3_3', 'conv3_4', 'relu3_4', 'pool3', 'conv4_1', 'relu4_1', 'conv4_2', 'relu4_2', 'conv4_3', 'relu4_3', 'conv4_4', 'relu4_4', 'pool4', 'conv5_1', 'relu5_1', 'conv5_2', 'relu5_2', 'conv5_3', 'relu5_3', 'conv5_4', 'relu5_4') ''' weights[i][0][0][0][0]: <tf.Variable 'inference/conv1_1_w:0' shape=(3, 3, 3, 64) dtype=float32_ref> <tf.Variable 'inference/conv1_1_b:0' shape=(64,) dtype=float32_ref> <tf.Variable 'inference/conv1_2_w:0' shape=(3, 3, 64, 64) dtype=float32_ref> <tf.Variable 'inference/conv1_2_b:0' shape=(64,) dtype=float32_ref> <tf.Variable 'inference/conv2_1_w:0' shape=(3, 3, 64, 128) dtype=float32_ref> <tf.Variable 'inference/conv2_1_b:0' shape=(128,) dtype=float32_ref> <tf.Variable 'inference/conv2_2_w:0' shape=(3, 3, 128, 128) dtype=float32_ref> <tf.Variable 'inference/conv2_2_b:0' shape=(128,) dtype=float32_ref> ''' net = {} current = image for i, name in enumerate( layers ): # 对于一个可迭代/可遍历的对象(如列表、字符串),enumerate将其组成一个索引序列,利用它可以同时获得索引和值 kind = name[:4] num = name[4:] if kind == 'conv' and num == '1_1': W = utils.weight_variable( [3, 3, 4, 64], name=name + "_w") # [patch 7*7,insize 512, outsize 4096] b = utils.bias_variable([64], name=name + "_b") current = utils.conv2d_basic(current, W, b) elif kind == 'conv' and num != '1_1': kernels, bias = weights[i][0][0][0][0] # print("kernels:",i,kernels) # print kernels # matconvnet: weights are [width, height, in_channels, out_channels] # tensorflow: weights are [height, width, in_channels, out_channels] kernels = utils.get_variable(np.transpose(kernels, (1, 0, 2, 3)), name=name + "_w") # print(kernels) bias = utils.get_variable(bias.reshape(-1), name=name + "_b") # print(bias) current = utils.conv2d_basic(current, kernels, bias) elif kind == 'relu': current = tf.nn.relu(current, name=name) if FLAGS.debug: utils.add_activation_summary(current) elif kind == 'pool': current = utils.avg_pool_2x2(current) net[name] = current return net
def deepscores_cnn(image, nr_class): # placeholder for dropout input keep_prob = tf.placeholder(tf.float32) # five layers of 3x3 convolutions, followed by relu, 2x2-maxpool and dropout W1 = utils.weight_variable([3, 3, 1, 32], name="W1") b1 = utils.bias_variable([32], name="b1") conv1 = utils.conv2d_basic(image, W1, b1, name="conv1") relu1 = tf.nn.relu(conv1, name="relu1") pool1 = utils.max_pool_2x2(relu1) dropout1 = tf.nn.dropout(pool1, keep_prob=keep_prob) W2 = utils.weight_variable([3, 3, 32, 64], name="W2") b2 = utils.bias_variable([64], name="b2") conv2 = utils.conv2d_basic(dropout1, W2, b2, name="conv2") relu2 = tf.nn.relu(conv2, name="relu2") pool2 = utils.max_pool_2x2(relu2) dropout2 = tf.nn.dropout(pool2, keep_prob=keep_prob) W3 = utils.weight_variable([3, 3, 64, 128], name="W3") b3 = utils.bias_variable([128], name="b3") conv3 = utils.conv2d_basic(dropout2, W3, b3, name="conv3") relu3 = tf.nn.relu(conv3, name="relu3") pool3 = utils.max_pool_2x2(relu3) dropout3 = tf.nn.dropout(pool3, keep_prob=keep_prob) W4 = utils.weight_variable([3, 3, 128, 256], name="W4") b4 = utils.bias_variable([256], name="b4") conv4 = utils.conv2d_basic(dropout3, W4, b4, name="conv4") relu4 = tf.nn.relu(conv4, name="relu4") pool4 = utils.max_pool_2x2(relu4) dropout4 = tf.nn.dropout(pool4, keep_prob=keep_prob) W5 = utils.weight_variable([3, 3, 256, 512], name="W5") b5 = utils.bias_variable([512], name="b5") conv5 = utils.conv2d_basic(dropout4, W5, b5, name="conv5") relu5 = tf.nn.relu(conv5, name="relu5") pool5 = utils.max_pool_2x2(relu5) dropout5 = tf.nn.dropout(pool5, keep_prob=keep_prob) # to fully connected layers # downsampled 5 times so feature maps should be 32 times smaller # size is 7*4*512 W_fc1 = utils.weight_variable([7*4*512, 1024]) b_fc1 = utils.bias_variable([1024]) dropout5_flat = tf.reshape(dropout5, [-1, 7*4*512]) h_fc1 = tf.nn.relu(tf.matmul(dropout5_flat, W_fc1) + b_fc1) W_fc2 = utils.weight_variable([1024, nr_class]) b_fc2 = utils.bias_variable([nr_class]) y_conv = tf.matmul(h_fc1, W_fc2) + b_fc2 return y_conv, keep_prob
def discriminator(input_images, train_mode): # dropout_prob = 1.0 # if train_mode: # dropout_prob = 0.5 W_conv0 = utils.weight_variable([5, 5, NUM_OF_CHANNELS, 64 * 1], name="W_conv0") b_conv0 = utils.bias_variable([64 * 1], name="b_conv0") h_conv0 = utils.conv2d_strided(input_images, W_conv0, b_conv0) h_bn0 = h_conv0 # utils.batch_norm(h_conv0, 64 * 1, train_mode, scope="disc_bn0") h_relu0 = utils.leaky_relu(h_bn0, 0.2, name="h_relu0") utils.add_activation_summary(h_relu0) W_conv1 = utils.weight_variable([5, 5, 64 * 1, 64 * 2], name="W_conv1") b_conv1 = utils.bias_variable([64 * 2], name="b_conv1") h_conv1 = utils.conv2d_strided(h_relu0, W_conv1, b_conv1) h_bn1 = utils.batch_norm(h_conv1, 64 * 2, train_mode, scope="disc_bn1") h_relu1 = utils.leaky_relu(h_bn1, 0.2, name="h_relu1") utils.add_activation_summary(h_relu1) W_conv2 = utils.weight_variable([5, 5, 64 * 2, 64 * 4], name="W_conv2") b_conv2 = utils.bias_variable([64 * 4], name="b_conv2") h_conv2 = utils.conv2d_strided(h_relu1, W_conv2, b_conv2) h_bn2 = utils.batch_norm(h_conv2, 64 * 4, train_mode, scope="disc_bn2") h_relu2 = utils.leaky_relu(h_bn2, 0.2, name="h_relu2") utils.add_activation_summary(h_relu2) W_conv3 = utils.weight_variable([5, 5, 64 * 4, 64 * 8], name="W_conv3") b_conv3 = utils.bias_variable([64 * 8], name="b_conv3") h_conv3 = utils.conv2d_strided(h_relu2, W_conv3, b_conv3) h_bn3 = utils.batch_norm(h_conv3, 64 * 8, train_mode, scope="disc_bn3") h_relu3 = utils.leaky_relu(h_bn3, 0.2, name="h_relu3") utils.add_activation_summary(h_relu3) shape = h_relu3.get_shape().as_list() h_3 = tf.reshape( h_relu3, [FLAGS.batch_size, (IMAGE_SIZE // 16) * (IMAGE_SIZE // 16) * shape[3]]) W_4 = utils.weight_variable([h_3.get_shape().as_list()[1], 1], name="W_4") b_4 = utils.bias_variable([1], name="b_4") h_4 = tf.matmul(h_3, W_4) + b_4 return tf.nn.sigmoid(h_4), h_4, h_relu3
def encoder_fc(images): with tf.variable_scope("encoder") as scope: W_fc1 = utils.weight_variable([IMAGE_SIZE * IMAGE_SIZE, 50], name="W_fc1") b_fc1 = utils.bias_variable([50], name="b_fc1") h_relu1 = activation_function(tf.matmul(images, W_fc1) + b_fc1, name="hfc_1") W_fc2 = utils.weight_variable([50, 50], name="W_fc2") b_fc2 = utils.bias_variable([50], name="b_fc2") h_relu2 = activation_function(tf.matmul(h_relu1, W_fc2) + b_fc2, name="hfc_2") W_fc3 = utils.weight_variable([50, FLAGS.z_dim], name="W_fc3") b_fc3 = utils.bias_variable([FLAGS.z_dim], name="b_fc3") mu = tf.add(tf.matmul(h_relu2, W_fc3), b_fc3, name="mu") utils.add_activation_summary(mu) W_fc4 = utils.weight_variable([50, FLAGS.z_dim], name="W_fc4") b_fc4 = utils.bias_variable([FLAGS.z_dim], name="b_fc4") log_var = tf.add(tf.matmul(h_relu2, W_fc4), b_fc4, name="log_var") utils.add_activation_summary(log_var) return mu, log_var
def deconv_layer(input, r_field, in_channels, out_channels, out_shape, nr, stride=2): W = utils.weight_variable([r_field, r_field, out_channels, in_channels], name="W_t" + nr) b = utils.bias_variable([out_channels], name="b_t" + nr) conv_t1 = utils.conv2d_transpose_strided(input, W, b, out_shape) return conv_t1
def inference_fully_convolutional(dataset): ''' Fully convolutional inference on notMNIST dataset :param datset: [batch_size, 28*28*1] tensor :return: logits ''' dataset_reshaped = tf.reshape(dataset, [-1, 28, 28, 1]) with tf.name_scope("conv1") as scope: W_conv1 = utils.weight_variable_xavier_initialized([3, 3, 1, 32], name="W_conv1") b_conv1 = utils.bias_variable([32], name="b_conv1") h_conv1 = tf.nn.relu(utils.conv2d_strided(dataset_reshaped, W_conv1, b_conv1)) with tf.name_scope("conv2") as scope: W_conv2 = utils.weight_variable_xavier_initialized([3, 3, 32, 64], name="W_conv2") b_conv2 = utils.bias_variable([64], name="b_conv2") h_conv2 = tf.nn.relu(utils.conv2d_strided(h_conv1, W_conv2, b_conv2)) with tf.name_scope("conv3") as scope: W_conv3 = utils.weight_variable_xavier_initialized([3, 3, 64, 128], name="W_conv3") b_conv3 = utils.bias_variable([128], name="b_conv3") h_conv3 = tf.nn.relu(utils.conv2d_strided(h_conv2, W_conv3, b_conv3)) with tf.name_scope("conv4") as scope: W_conv4 = utils.weight_variable_xavier_initialized([3, 3, 128, 256], name="W_conv4") b_conv4 = utils.bias_variable([256], name="b_conv4") h_conv4 = tf.nn.relu(utils.conv2d_strided(h_conv3, W_conv4, b_conv4)) with tf.name_scope("conv5") as scope: # W_conv5 = utils.weight_variable_xavier_initialized([2, 2, 256, 512], name="W_conv5") # b_conv5 = utils.bias_variable([512], name="b_conv5") # h_conv5 = tf.nn.relu(utils.conv2d_strided(h_conv4, W_conv5, b_conv5)) h_conv5 = utils.avg_pool_2x2(h_conv4) with tf.name_scope("conv6") as scope: W_conv6 = utils.weight_variable_xavier_initialized([1, 1, 256, 10], name="W_conv6") b_conv6 = utils.bias_variable([10], name="b_conv6") logits = tf.nn.relu(utils.conv2d_basic(h_conv5, W_conv6, b_conv6)) print logits.get_shape() logits = tf.reshape(logits, [-1, 10]) return logits
def down_layer(x, in_size, out_size, i): # Down 1 W1 = utils.weight_variable([3, 3, 3, in_size, out_size // 2], name="W_d_" + str(i) + "_1") b1 = utils.bias_variable([out_size // 2], name="b_d_" + str(i) + "_1") conv1 = utils.conv3d_basic(x, W1, b1) relu1 = tf.nn.relu(conv1, name="relu_d_" + str(i) + "_1") relu1 = tf.nn.dropout(relu1, keep_prob=keep_prob) # Down 2 W2 = utils.weight_variable([3, 3, 3, out_size // 2, out_size], name="W_d_" + str(i) + "_2") b2 = utils.bias_variable([out_size], name="b_d_" + str(i) + "_2") conv2 = utils.conv3d_basic(relu1, W2, b2) relu2 = tf.nn.relu(conv2, name="relu_d_" + str(i) + "_2") relu2 = tf.nn.dropout(relu2, keep_prob=keep_prob) # Pool 1 pool = utils.max_pool_2x2x2(relu2) return relu2, pool
def discriminator(input_images, train_mode): # dropout_prob = 1.0 # if train_mode: # dropout_prob = 0.5 W_conv0 = utils.weight_variable([5, 5, NUM_OF_CHANNELS, 64 * 1], name="W_conv0") b_conv0 = utils.bias_variable([64 * 1], name="b_conv0") h_conv0 = utils.conv2d_strided(input_images, W_conv0, b_conv0) h_bn0 = h_conv0 # utils.batch_norm(h_conv0, 64 * 1, train_mode, scope="disc_bn0") h_relu0 = utils.leaky_relu(h_bn0, 0.2, name="h_relu0") utils.add_activation_summary(h_relu0) W_conv1 = utils.weight_variable([5, 5, 64 * 1, 64 * 2], name="W_conv1") b_conv1 = utils.bias_variable([64 * 2], name="b_conv1") h_conv1 = utils.conv2d_strided(h_relu0, W_conv1, b_conv1) h_bn1 = utils.batch_norm(h_conv1, 64 * 2, train_mode, scope="disc_bn1") h_relu1 = utils.leaky_relu(h_bn1, 0.2, name="h_relu1") utils.add_activation_summary(h_relu1) W_conv2 = utils.weight_variable([5, 5, 64 * 2, 64 * 4], name="W_conv2") b_conv2 = utils.bias_variable([64 * 4], name="b_conv2") h_conv2 = utils.conv2d_strided(h_relu1, W_conv2, b_conv2) h_bn2 = utils.batch_norm(h_conv2, 64 * 4, train_mode, scope="disc_bn2") h_relu2 = utils.leaky_relu(h_bn2, 0.2, name="h_relu2") utils.add_activation_summary(h_relu2) W_conv3 = utils.weight_variable([5, 5, 64 * 4, 64 * 8], name="W_conv3") b_conv3 = utils.bias_variable([64 * 8], name="b_conv3") h_conv3 = utils.conv2d_strided(h_relu2, W_conv3, b_conv3) h_bn3 = utils.batch_norm(h_conv3, 64 * 8, train_mode, scope="disc_bn3") h_relu3 = utils.leaky_relu(h_bn3, 0.2, name="h_relu3") utils.add_activation_summary(h_relu3) shape = h_relu3.get_shape().as_list() h_3 = tf.reshape(h_relu3, [FLAGS.batch_size, (IMAGE_SIZE // 16) * (IMAGE_SIZE // 16) * shape[3]]) W_4 = utils.weight_variable([h_3.get_shape().as_list()[1], 1], name="W_4") b_4 = utils.bias_variable([1], name="b_4") h_4 = tf.matmul(h_3, W_4) + b_4 return tf.nn.sigmoid(h_4), h_4, h_relu3
def inpainter(embedding, train_mode): with tf.variable_scope("context_inpainter"): image_size = IMAGE_SIZE // 32 with tf.name_scope("dec_fc") as scope: W_fc = utils.weight_variable([1024, image_size * image_size * 512], name="W_fc") b_fc = utils.bias_variable([image_size * image_size * 512], name="b_fc") h_fc = tf.nn.relu(tf.matmul(embedding, W_fc) + b_fc) with tf.name_scope("dec_conv1") as scope: h_reshaped = tf.reshape(h_fc, tf.pack([tf.shape(h_fc)[0], image_size, image_size, 512])) W_conv_t1 = utils.weight_variable_xavier_initialized([3, 3, 256, 512], name="W_conv_t1") b_conv_t1 = utils.bias_variable([256], name="b_conv_t1") deconv_shape = tf.pack([tf.shape(h_reshaped)[0], 2 * image_size, 2 * image_size, 256]) h_conv_t1 = utils.conv2d_transpose_strided(h_reshaped, W_conv_t1, b_conv_t1, output_shape=deconv_shape) h_bn_t1 = utils.batch_norm(h_conv_t1, 256, train_mode, scope="conv_t1_bn") h_relu_t1 = tf.nn.relu(h_bn_t1) with tf.name_scope("dec_conv2") as scope: W_conv_t2 = utils.weight_variable_xavier_initialized([3, 3, 128, 256], name="W_conv_t2") b_conv_t2 = utils.bias_variable([128], name="b_conv_t2") deconv_shape = tf.pack([tf.shape(h_relu_t1)[0], 4 * image_size, 4 * image_size, 128]) h_conv_t2 = utils.conv2d_transpose_strided(h_relu_t1, W_conv_t2, b_conv_t2, output_shape=deconv_shape) h_bn_t2 = utils.batch_norm(h_conv_t2, 128, train_mode, scope="conv_t2_bn") h_relu_t2 = tf.nn.relu(h_bn_t2) with tf.name_scope("dec_conv3") as scope: W_conv_t3 = utils.weight_variable_xavier_initialized([3, 3, 64, 128], name="W_conv_t3") b_conv_t3 = utils.bias_variable([64], name="b_conv_t3") deconv_shape = tf.pack([tf.shape(h_relu_t2)[0], 8 * image_size, 8 * image_size, 64]) h_conv_t3 = utils.conv2d_transpose_strided(h_relu_t2, W_conv_t3, b_conv_t3, output_shape=deconv_shape) h_bn_t3 = utils.batch_norm(h_conv_t3, 64, train_mode, scope="conv_t3_bn") h_relu_t3 = tf.nn.relu(h_bn_t3) with tf.name_scope("dec_conv4") as scope: W_conv_t4 = utils.weight_variable_xavier_initialized([3, 3, 3, 64], name="W_conv_t4") b_conv_t4 = utils.bias_variable([3], name="b_conv_t4") deconv_shape = tf.pack([tf.shape(h_relu_t3)[0], 16 * image_size, 16 * image_size, 3]) pred_image = utils.conv2d_transpose_strided(h_relu_t3, W_conv_t4, b_conv_t4, output_shape=deconv_shape) return pred_image
def deconv2d_layer(x, name, W_s, output_shape=None, stride=2): '''Deconv2d operator Args: x: inputs W_s: shape of weight output_shape: shape after deconv2d ''' W_t = utils.weight_variable(W_s, name='W_' + name) b_t = utils.bias_variable([W_s[2]], name='b_' + name) conv_t = utils.conv2d_transpose_strided(x, W_t, b_t, output_shape, stride) print('conv_%s: ' % name, conv_t.get_shape()) return conv_t
def inferece(dataset, prob): with tf.name_scope("conv1") as scope: W_conv1 = utils.weight_variable([5, 5, 1, 32]) b_conv1 = utils.bias_variable([32]) tf.histogram_summary("W_conv1", W_conv1) tf.histogram_summary("b_conv1", b_conv1) h_conv1 = utils.conv2d_basic(dataset, W_conv1, b_conv1) h_1 = tf.nn.relu(h_conv1) h_pool1 = utils.max_pool_2x2(h_1) add_to_regularization_loss(W_conv1, b_conv1) with tf.name_scope("conv2") as scope: W_conv2 = utils.weight_variable([3, 3, 32, 64]) b_conv2 = utils.bias_variable([64]) tf.histogram_summary("W_conv2", W_conv2) tf.histogram_summary("b_conv2", b_conv2) h_conv2 = utils.conv2d_basic(h_pool1, W_conv2, b_conv2) h_2 = tf.nn.relu(h_conv2) h_pool2 = utils.max_pool_2x2(h_2) add_to_regularization_loss(W_conv2, b_conv2) with tf.name_scope("fc_1") as scope: image_size = IMAGE_SIZE / 4 h_flat = tf.reshape(h_pool2, [-1, image_size * image_size * 64]) W_fc1 = utils.weight_variable([image_size * image_size * 64, 256]) b_fc1 = utils.bias_variable([256]) tf.histogram_summary("W_fc1", W_fc1) tf.histogram_summary("b_fc1", b_fc1) h_fc1 = tf.nn.relu(tf.matmul(h_flat, W_fc1) + b_fc1) h_fc1_dropout = tf.nn.dropout(h_fc1, prob) with tf.name_scope("fc_2") as scope: W_fc2 = utils.weight_variable([256, NUM_LABELS]) b_fc2 = utils.bias_variable([NUM_LABELS]) tf.histogram_summary("W_fc2", W_fc2) tf.histogram_summary("b_fc2", b_fc2) pred = tf.matmul(h_fc1, W_fc2) + b_fc2 return pred
def inference(dataset): with tf.name_scope("conv1") as scope: W1 = utils.weight_variable([5, 5, 1, 32], name="W1") b1 = utils.bias_variable([32], name="b1") tf.histogram_summary("W1", W1) tf.histogram_summary("b1", b1) h_conv1 = utils.conv2d_basic(dataset, W1, b1) h_norm1 = utils.local_response_norm(h_conv1) h_1 = tf.nn.relu(h_norm1, name="conv1") h_pool1 = utils.max_pool_2x2(h_1) with tf.name_scope("conv2") as scope: W2 = utils.weight_variable([3, 3, 32, 64], name="W2") b2 = utils.bias_variable([64], name="b2") tf.histogram_summary("W2", W2) tf.histogram_summary("b2", b2) h_conv2 = utils.conv2d_basic(h_pool1, W2, b2) h_norm2 = utils.local_response_norm(h_conv2) h_2 = tf.nn.relu(h_norm2, name="conv2") h_pool2 = utils.max_pool_2x2(h_2) with tf.name_scope("conv3") as scope: W3 = utils.weight_variable([3, 3, 64, 128], name="W3") b3 = utils.bias_variable([128], name="b3") tf.histogram_summary("W3", W3) tf.histogram_summary("b3", b3) h_conv3 = utils.conv2d_basic(h_pool2, W3, b3) h_norm3 = utils.local_response_norm(h_conv3) h_3 = tf.nn.relu(h_norm3, name="conv3") h_pool3 = utils.max_pool_2x2(h_3) with tf.name_scope("conv4") as scope: W4 = utils.weight_variable([3, 3, 128, 256], name="W4") b4 = utils.bias_variable([256], name="b4") tf.histogram_summary("W4", W4) tf.histogram_summary("b4", b4) h_conv4 = utils.conv2d_basic(h_pool3, W4, b4) h_norm4 = utils.local_response_norm(h_conv4) h_4 = tf.nn.relu(h_norm4, name="conv4") with tf.name_scope("fc1") as scope: image_size = IMAGE_SIZE // 8 h_flat = tf.reshape(h_4, [-1, image_size * image_size * 256]) W_fc1 = utils.weight_variable([image_size * image_size * 256, 512], name="W_fc1") b_fc1 = utils.bias_variable([512], name="b_fc1") tf.histogram_summary("W_fc1", W_fc1) tf.histogram_summary("b_fc1", b_fc1) h_fc1 = tf.nn.relu(tf.matmul(h_flat, W_fc1) + b_fc1) with tf.name_scope("fc2") as scope: W_fc2 = utils.weight_variable([512, NUM_LABELS], name="W_fc2") b_fc2 = utils.bias_variable([NUM_LABELS], name="b_fc2") tf.histogram_summary("W_fc2", W_fc2) tf.histogram_summary("b_fc2", b_fc2) pred = tf.matmul(h_fc1, W_fc2) + b_fc2 return pred
def decoder_conv(embedding): image_size = IMAGE_SIZE // 16 with tf.name_scope("dec_fc") as scope: W_fc1 = utils.weight_variable([512, image_size * image_size * 256], name="W_fc1") b_fc1 = utils.bias_variable([image_size * image_size * 256], name="b_fc1") h_fc1 = tf.nn.relu(tf.matmul(embedding, W_fc1) + b_fc1) with tf.name_scope("dec_conv1") as scope: h_reshaped = tf.reshape(h_fc1, tf.pack([tf.shape(h_fc1)[0], image_size, image_size, 256])) W_conv_t1 = utils.weight_variable([3, 3, 128, 256], name="W_conv_t1") b_conv_t1 = utils.bias_variable([128], name="b_conv_t1") deconv_shape = tf.pack([tf.shape(h_fc1)[0], 2 * image_size, 2 * image_size, 128]) h_conv_t1 = tf.nn.relu( utils.conv2d_transpose_strided(h_reshaped, W_conv_t1, b_conv_t1, output_shape=deconv_shape)) with tf.name_scope("dec_conv2") as scope: W_conv_t2 = utils.weight_variable([3, 3, 64, 128], name="W_conv_t2") b_conv_t2 = utils.bias_variable([64], name="b_conv_t2") deconv_shape = tf.pack([tf.shape(h_conv_t1)[0], 4 * image_size, 4 * image_size, 64]) h_conv_t2 = tf.nn.relu( utils.conv2d_transpose_strided(h_conv_t1, W_conv_t2, b_conv_t2, output_shape=deconv_shape)) with tf.name_scope("dec_conv3") as scope: W_conv_t3 = utils.weight_variable([3, 3, 32, 64], name="W_conv_t3") b_conv_t3 = utils.bias_variable([32], name="b_conv_t3") deconv_shape = tf.pack([tf.shape(h_conv_t2)[0], 8 * image_size, 8 * image_size, 32]) h_conv_t3 = tf.nn.relu( utils.conv2d_transpose_strided(h_conv_t2, W_conv_t3, b_conv_t3, output_shape=deconv_shape)) with tf.name_scope("dec_conv4") as scope: W_conv_t4 = utils.weight_variable([3, 3, 3, 32], name="W_conv_t4") b_conv_t4 = utils.bias_variable([3], name="b_conv_t4") deconv_shape = tf.pack([tf.shape(h_conv_t3)[0], IMAGE_SIZE, IMAGE_SIZE, 3]) pred_image = utils.conv2d_transpose_strided(h_conv_t3, W_conv_t4, b_conv_t4, output_shape=deconv_shape) return pred_image
def encoder(dataset, train_mode): with tf.variable_scope("Encoder"): with tf.name_scope("enc_conv1") as scope: W_conv1 = utils.weight_variable_xavier_initialized([3, 3, 3, 32], name="W_conv1") b_conv1 = utils.bias_variable([32], name="b_conv1") h_conv1 = utils.conv2d_strided(dataset, W_conv1, b_conv1) h_bn1 = utils.batch_norm(h_conv1, 32, train_mode, scope="conv1_bn") h_relu1 = tf.nn.relu(h_bn1) with tf.name_scope("enc_conv2") as scope: W_conv2 = utils.weight_variable_xavier_initialized([3, 3, 32, 64], name="W_conv2") b_conv2 = utils.bias_variable([64], name="b_conv2") h_conv2 = utils.conv2d_strided(h_relu1, W_conv2, b_conv2) h_bn2 = utils.batch_norm(h_conv2, 64, train_mode, scope="conv2_bn") h_relu2 = tf.nn.relu(h_bn2) with tf.name_scope("enc_conv3") as scope: W_conv3 = utils.weight_variable_xavier_initialized([3, 3, 64, 128], name="W_conv3") b_conv3 = utils.bias_variable([128], name="b_conv3") h_conv3 = utils.conv2d_strided(h_relu2, W_conv3, b_conv3) h_bn3 = utils.batch_norm(h_conv3, 128, train_mode, scope="conv3_bn") h_relu3 = tf.nn.relu(h_bn3) with tf.name_scope("enc_conv4") as scope: W_conv4 = utils.weight_variable_xavier_initialized([3, 3, 128, 256], name="W_conv4") b_conv4 = utils.bias_variable([256], name="b_conv4") h_conv4 = utils.conv2d_strided(h_relu3, W_conv4, b_conv4) h_bn4 = utils.batch_norm(h_conv4, 256, train_mode, scope="conv4_bn") h_relu4 = tf.nn.relu(h_bn4) with tf.name_scope("enc_conv5") as scope: W_conv5 = utils.weight_variable_xavier_initialized([3, 3, 256, 512], name="W_conv5") b_conv5 = utils.bias_variable([512], name="b_conv5") h_conv5 = utils.conv2d_strided(h_relu4, W_conv5, b_conv5) h_bn5 = utils.batch_norm(h_conv5, 512, train_mode, scope="conv5_bn") h_relu5 = tf.nn.relu(h_bn5) with tf.name_scope("enc_fc") as scope: image_size = IMAGE_SIZE // 32 h_relu5_flatten = tf.reshape(h_relu5, [-1, image_size * image_size * 512]) W_fc = utils.weight_variable([image_size * image_size * 512, 1024], name="W_fc") b_fc = utils.bias_variable([1024], name="b_fc") encoder_val = tf.matmul(h_relu5_flatten, W_fc) + b_fc return encoder_val
def inference_strided(input_image): W1 = utils.weight_variable([9, 9, 3, 32]) b1 = utils.bias_variable([32]) tf.histogram_summary("W1", W1) tf.histogram_summary("b1", b1) h_conv1 = tf.nn.relu(utils.conv2d_basic(input_image, W1, b1)) W2 = utils.weight_variable([3, 3, 32, 64]) b2 = utils.bias_variable([64]) tf.histogram_summary("W2", W2) tf.histogram_summary("b2", b2) h_conv2 = tf.nn.relu(utils.conv2d_strided(h_conv1, W2, b2)) W3 = utils.weight_variable([3, 3, 64, 128]) b3 = utils.bias_variable([128]) tf.histogram_summary("W3", W3) tf.histogram_summary("b3", b3) h_conv3 = tf.nn.relu(utils.conv2d_strided(h_conv2, W3, b3)) # upstrides W4 = utils.weight_variable([3, 3, 64, 128]) b4 = utils.bias_variable([64]) tf.histogram_summary("W4", W4) tf.histogram_summary("b4", b4) # print h_conv3.get_shape() # print W4.get_shape() h_conv4 = tf.nn.relu(utils.conv2d_transpose_strided(h_conv3, W4, b4)) W5 = utils.weight_variable([3, 3, 32, 64]) b5 = utils.bias_variable([32]) tf.histogram_summary("W5", W5) tf.histogram_summary("b5", b5) h_conv5 = tf.nn.relu(utils.conv2d_transpose_strided(h_conv4, W5, b5)) W6 = utils.weight_variable([9, 9, 32, 3]) b6 = utils.bias_variable([3]) tf.histogram_summary("W6", W6) tf.histogram_summary("b6", b6) pred_image = tf.nn.tanh(utils.conv2d_basic(h_conv5, W6, b6)) return pred_image
def inference_fc(image): with tf.name_scope("fc1") as scope: W_fc1 = utils.weight_variable([IMAGE_SIZE * IMAGE_SIZE, 50], name="W_fc1") b_fc1 = utils.bias_variable([50], name="b_fc1") add_to_reg_loss_and_summary(W_fc1, b_fc1) h_fc1 = tf.nn.tanh(tf.matmul(image, W_fc1) + b_fc1) with tf.name_scope("fc2") as scope: W_fc2 = utils.weight_variable([50, 50], name="W_fc2") b_fc2 = utils.bias_variable([50], name="b_fc2") add_to_reg_loss_and_summary(W_fc2, b_fc2) h_fc2 = tf.nn.tanh(tf.matmul(h_fc1, W_fc2) + b_fc2) with tf.name_scope("fc3") as scope: W_fc3 = utils.weight_variable([50, 3], name="W_fc3") b_fc3 = utils.bias_variable([3], name="b_fc3") add_to_reg_loss_and_summary(W_fc3, b_fc3) h_fc3 = tf.nn.tanh(tf.matmul(h_fc2, W_fc3) + b_fc3) with tf.name_scope("fc4") as scope: W_fc4 = utils.weight_variable([3, 50], name="W_fc4") b_fc4 = utils.bias_variable([50], name="b_fc4") add_to_reg_loss_and_summary(W_fc4, b_fc4) h_fc4 = tf.nn.tanh(tf.matmul(h_fc3, W_fc4) + b_fc4) with tf.name_scope("fc5") as scope: W_fc5 = utils.weight_variable([50, 50], name="W_fc5") b_fc5 = utils.bias_variable([50], name="b_fc5") add_to_reg_loss_and_summary(W_fc5, b_fc5) h_fc5 = tf.nn.tanh(tf.matmul(h_fc4, W_fc5) + b_fc5) # h_fc_dropout = tf.nn.dropout(h_fc5, 0.5) with tf.name_scope("fc6") as scope: W_fc6 = utils.weight_variable([50, IMAGE_SIZE * IMAGE_SIZE], name="W_fc6") b_fc6 = utils.bias_variable([IMAGE_SIZE * IMAGE_SIZE], name="b_fc6") add_to_reg_loss_and_summary(W_fc6, b_fc6) pred = tf.matmul(h_fc5, W_fc6) + b_fc6 return h_fc3, pred
def main(argv=None): global_step = tf.Variable(0, trainable=False) img_A = tf.placeholder(tf.float32, [None, IMAGE_SIZE, IMAGE_SIZE, 3]) img_B = tf.placeholder(tf.float32, [None, IMAGE_SIZE, IMAGE_SIZE, 3]) img_C = tf.placeholder(tf.float32, [None, IMAGE_SIZE, IMAGE_SIZE, 3]) img_D = tf.placeholder(tf.float32, [None, IMAGE_SIZE, IMAGE_SIZE, 3]) tf.image_summary("A", img_A, max_images=2) tf.image_summary("B", img_B, max_images=2) tf.image_summary("C", img_C, max_images=2) tf.image_summary("Ground_truth", img_D, max_images=2) print "Setting up encoder.." with tf.variable_scope("encoder") as scope: enc_A = encoder_conv(img_A) scope.reuse_variables() enc_B = encoder_conv(img_B) enc_C = encoder_conv(img_C) enc_D = encoder_conv(img_D) print "Setting up analogy calc.." # analogy calculation analogy_input = tf.concat(1, [enc_B - enc_A, enc_C]) W_analogy1 = utils.weight_variable([1024, 512], name="W_analogy1") b_analogy1 = utils.bias_variable([512], name="b_analogy1") analogy_fc1 = tf.nn.relu(tf.matmul(analogy_input, W_analogy1) + b_analogy1) W_analogy2 = utils.weight_variable([512, 512], name="W_analogy2") b_analogy2 = utils.bias_variable([512], name="b_analogy2") analogy_fc2 = tf.nn.relu(tf.matmul(analogy_fc1, W_analogy2) + b_analogy2) pred = decoder_conv(enc_C + analogy_fc2) tf.image_summary("Pred_image", pred, max_images=2) print "Setting up regularization/ summary variables..." for var in tf.trainable_variables(): add_to_regularization_and_summary(var) print "Loss and train setup..." loss1 = tf.sqrt(2*tf.nn.l2_loss(pred - img_D)) / FLAGS.batch_size tf.scalar_summary("image_loss", loss1) loss2 = tf.sqrt(2* tf.nn.l2_loss(enc_D - enc_C - analogy_fc2)) / FLAGS.batch_size tf.scalar_summary("analogy_loss", loss2) loss3 = tf.add_n(tf.get_collection("reg_loss")) tf.scalar_summary("regularization", loss3) total_loss = loss1 + ANALOGY_COEFF * loss2 + REGULARIZER * loss3 tf.scalar_summary("Total_loss", total_loss) train_op = train(total_loss, global_step) summary_op = tf.merge_all_summaries() utils.maybe_download_and_extract(FLAGS.data_dir, DATA_URL, is_tarfile=True) print "Initializing Loader class..." loader = AnalogyDataLoader.Loader(FLAGS.data_dir, FLAGS.batch_size) eval_A, eval_B, eval_C, eval_D = read_eval_inputs(loader) eval_feed = {img_A: eval_A, img_B: eval_B, img_C: eval_C, img_D: eval_D} with tf.Session() as sess: sess.run(tf.initialize_all_variables()) print "Setting up summary and saver..." summary_writer = tf.train.SummaryWriter(FLAGS.logs_dir, sess.graph) saver = tf.train.Saver() ckpt = tf.train.get_checkpoint_state(FLAGS.logs_dir) if ckpt and ckpt.model_checkpoint_path: saver.restore(sess, ckpt.model_checkpoint_path) print "Model restored!" for step in xrange(MAX_ITERATIONS): A, B, C, D = read_train_inputs(loader) feed_dict = {img_A: A, img_B: B, img_C: C, img_D: D} if step % 1000 == 0: eval_loss = sess.run([loss1, loss2, loss3, total_loss], feed_dict=eval_feed) print "Evaluation: (Image loss %f, Variation loss %f, Reg loss %f) total loss %f" % tuple(eval_loss) sess.run(train_op, feed_dict=feed_dict) if step % 100 == 0: [loss_val, summary_str] = sess.run([total_loss, summary_op], feed_dict=feed_dict) print "%s Step %d: Training loss %f" % (datetime.now(), step, loss_val) summary_writer.add_summary(summary_str, global_step=step) saver.save(sess, FLAGS.logs_dir + "model.ckpt", global_step=step)
def inference(image, keep_prob): """ Semantic segmentation network definition :param image: input image. Should have values in range 0-255 :param keep_prob: :return: """ print("setting up vgg initialized conv layers ...") model_data = utils.get_model_data(FLAGS.model_dir, MODEL_URL) mean = model_data['normalization'][0][0][0] mean_pixel = np.mean(mean, axis=(0, 1)) weights = np.squeeze(model_data['layers']) #processed_image = utils.process_image(image, mean_pixel) with tf.variable_scope("inference"): image_net = vgg_net(weights, image) conv_final_layer = image_net["conv5_3"] pool5 = utils.max_pool_2x2(conv_final_layer) W6 = utils.weight_variable([7, 7, 512, 4096], name="W6") b6 = utils.bias_variable([4096], name="b6") conv6 = utils.conv2d_basic(pool5, W6, b6) relu6 = tf.nn.relu(conv6, name="relu6") if FLAGS.debug: utils.add_activation_summary(relu6) relu_dropout6 = tf.nn.dropout(relu6, keep_prob=keep_prob) W7 = utils.weight_variable([1, 1, 4096, 4096], name="W7") b7 = utils.bias_variable([4096], name="b7") conv7 = utils.conv2d_basic(relu_dropout6, W7, b7) relu7 = tf.nn.relu(conv7, name="relu7") if FLAGS.debug: utils.add_activation_summary(relu7) relu_dropout7 = tf.nn.dropout(relu7, keep_prob=keep_prob) W8 = utils.weight_variable([1, 1, 4096, NUM_OF_CLASSESS], name="W8") b8 = utils.bias_variable([NUM_OF_CLASSESS], name="b8") conv8 = utils.conv2d_basic(relu_dropout7, W8, b8) # annotation_pred1 = tf.argmax(conv8, dimension=3, name="prediction1") # now to upscale to actual image size deconv_shape1 = image_net["pool4"].get_shape() W_t1 = utils.weight_variable([4, 4, deconv_shape1[3].value, NUM_OF_CLASSESS], name="W_t1") b_t1 = utils.bias_variable([deconv_shape1[3].value], name="b_t1") conv_t1 = utils.conv2d_transpose_strided(conv8, W_t1, b_t1, output_shape=tf.shape(image_net["pool4"])) fuse_1 = tf.add(conv_t1, image_net["pool4"], name="fuse_1") deconv_shape2 = image_net["pool3"].get_shape() W_t2 = utils.weight_variable([4, 4, deconv_shape2[3].value, deconv_shape1[3].value], name="W_t2") b_t2 = utils.bias_variable([deconv_shape2[3].value], name="b_t2") conv_t2 = utils.conv2d_transpose_strided(fuse_1, W_t2, b_t2, output_shape=tf.shape(image_net["pool3"])) fuse_2 = tf.add(conv_t2, image_net["pool3"], name="fuse_2") shape = tf.shape(image) deconv_shape3 = tf.stack([shape[0], shape[1], shape[2], NUM_OF_CLASSESS]) W_t3 = utils.weight_variable([16, 16, NUM_OF_CLASSESS, deconv_shape2[3].value], name="W_t3") b_t3 = utils.bias_variable([NUM_OF_CLASSESS], name="b_t3") conv_t3 = utils.conv2d_transpose_strided(fuse_2, W_t3, b_t3, output_shape=deconv_shape3, stride=8) annotation_pred = tf.argmax(conv_t3, dimension=3, name="prediction") return tf.expand_dims(annotation_pred, dim=3), conv_t3
def mlp(inputs, output_dimension, scope=""): shape = inputs.get_shape().as_list() W_fc1 = utils.weight_variable([shape[1], output_dimension]) b_fc1 = utils.bias_variable([output_dimension]) linear = tf.matmul(inputs, W_fc1) + b_fc1 return linear