def vgg_net(weights, image): layers = ('conv1_1', 'relu1_1', 'conv1_2', 'relu1_2', 'pool1', 'conv2_1', 'relu2_1', 'conv2_2', 'relu2_2', 'pool2', 'conv3_1', 'relu3_1', 'conv3_2', 'relu3_2', 'conv3_3', 'relu3_3', 'conv3_4', 'relu3_4', 'pool3', 'conv4_1', 'relu4_1', 'conv4_2', 'relu4_2', 'conv4_3', 'relu4_3', 'conv4_4', 'relu4_4', 'pool4', 'conv5_1', 'relu5_1', 'conv5_2', 'relu5_2', 'conv5_3', 'relu5_3', 'conv5_4', 'relu5_4') net = {} current = image for i, name in enumerate(layers): kind = name[:4] if kind == 'conv': kernels, bias = weights[i][0][0][0][0] # matconvnet: weights are [width, height, in_channels, out_channels] # tensorflow: weights are [height, width, in_channels, out_channels] kernels = utils.get_variable(np.transpose(kernels, (1, 0, 2, 3)), name=name + "_w") bias = utils.get_variable(bias.reshape(-1), name=name + "_b") current = utils.conv2d_basic(current, kernels, bias) elif kind == 'relu': current = tf.nn.relu(current, name=name) if FLAGS.debug: utils.add_activation_summary(current) elif kind == 'pool': current = utils.avg_pool_2x2(current) net[name] = current return net
def global_soft_ncut(reference_map, image_segment): """ Args: reference_map: [B, H, W] image_segment: [B, H, W, K] Returns: Soft_Ncut: scalar """ batch_size = tf.shape(reference_map)[0] # B num_class = image_segment.get_shape()[-1].value # K image_shape = reference_map.get_shape() weight_size = image_shape[1].value * image_shape[2].value # H*W image_segment = tf.reshape(image_segment, tf.stack([batch_size, num_class, weight_size])) # [B, K, H*W] image_weights = dense_global_weight(reference_map) # [B, H*W, H*W] # Dis-association # [B, K, H*W] @ [B, H*W, H*W] batch matmul = [B, K, H*W] W_Ak = tf.einsum('aij,ajk->aik', image_segment, image_weights) # [B, K, H*W] dis_assoc = tf.einsum('ijk,ijk->ij', W_Ak, image_segment) # [B, K] dis_assoc = tf.identity(dis_assoc, name="dis_assoc") # Association sum_W = tf.reduce_sum(image_weights, axis=2) # [B, H*W] assoc = tf.einsum('ijk,ik->ij', image_segment, sum_W) assoc = tf.identity(assoc, name="assoc") utils.add_activation_summary(dis_assoc) utils.add_activation_summary(assoc) # Soft NCut eps = 1e-6 soft_ncut = tf.cast(num_class, tf.float32) - \ tf.reduce_sum((dis_assoc + eps) / (assoc + eps), axis=1) return soft_ncut
def activation_function(x, name=""): activation_dict = {'relu': tf.nn.relu(x, name), 'elu': tf.nn.elu(x, name), 'lrelu': utils.leaky_relu(x, 0.2, name), 'tanh': tf.nn.tanh(x, name), 'sigmoid': tf.nn.sigmoid(x, name)} act = activation_dict[FLAGS.activation] utils.add_activation_summary(act) return act
def inference(image, keep_prob): print("setting up vgg initialized conv layers ...") model_data = utils.get_model_data(FLAGS.model_dir, MODEL_URL) mean = model_data['normalization'][0][0][0] mean_pixel = np.mean(mean, axis=(0, 1)) weights = np.squeeze(model_data['layers']) processed_image = utils.process_image(image, mean_pixel) with tf.variable_scope("inference"): image_net = vgg_net(weights, processed_image) conv_final_layer = image_net["conv5_3"] pool5 = utils.max_pool_2x2(conv_final_layer) W6 = utils.weight_variable([7, 7, 512, 4096], name="W6") b6 = utils.bias_variable([4096], name="b6") conv6 = utils.conv2d_basic(pool5, W6, b6) relu6 = tf.nn.relu(conv6, name="relu6") if FLAGS.debug: utils.add_activation_summary(relu6) relu_dropout6 = tf.nn.dropout(relu6, keep_prob=keep_prob) W7 = utils.weight_variable([1, 1, 4096, 4096], name="W7") b7 = utils.bias_variable([4096], name="b7") conv7 = utils.conv2d_basic(relu_dropout6, W7, b7) relu7 = tf.nn.relu(conv7, name="relu7") if FLAGS.debug: utils.add_activation_summary(relu7) relu_dropout7 = tf.nn.dropout(relu7, keep_prob=keep_prob) W8 = utils.weight_variable([1, 1, 4096, NUM_OF_CLASSESS], name="W8") b8 = utils.bias_variable([NUM_OF_CLASSESS], name="b8") conv8 = utils.conv2d_basic(relu_dropout7, W8, b8) deconv_shape1 = image_net["pool4"].get_shape() W_t1 = utils.weight_variable([4, 4, deconv_shape1[3].value, NUM_OF_CLASSESS], name="W_t1") b_t1 = utils.bias_variable([deconv_shape1[3].value], name="b_t1") conv_t1 = utils.conv2d_transpose_strided(conv8, W_t1, b_t1, output_shape=tf.shape(image_net["pool4"])) fuse_1 = tf.add(conv_t1, image_net["pool4"], name="fuse_1") deconv_shape2 = image_net["pool3"].get_shape() W_t2 = utils.weight_variable([4, 4, deconv_shape2[3].value, deconv_shape1[3].value], name="W_t2") b_t2 = utils.bias_variable([deconv_shape2[3].value], name="b_t2") conv_t2 = utils.conv2d_transpose_strided(fuse_1, W_t2, b_t2, output_shape=tf.shape(image_net["pool3"])) fuse_2 = tf.add(conv_t2, image_net["pool3"], name="fuse_2") shape = tf.shape(image) deconv_shape3 = tf.stack([shape[0], shape[1], shape[2], NUM_OF_CLASSESS]) W_t3 = utils.weight_variable([16, 16, NUM_OF_CLASSESS, deconv_shape2[3].value], name="W_t3") b_t3 = utils.bias_variable([NUM_OF_CLASSESS], name="b_t3") conv_t3 = utils.conv2d_transpose_strided(fuse_2, W_t3, b_t3, output_shape=deconv_shape3, stride=8) annotation_pred = tf.argmax(conv_t3, dimension=3, name="prediction") return tf.expand_dims(annotation_pred, dim=3), conv_t3
def vgg_net(weights, image): layers = ('conv1_1', 'relu1_1', 'conv1_2', 'relu1_2', 'pool1', 'conv2_1', 'relu2_1', 'conv2_2', 'relu2_2', 'pool2', 'conv3_1', 'relu3_1', 'conv3_2', 'relu3_2', 'conv3_3', 'relu3_3', 'conv3_4', 'relu3_4', 'pool3', 'conv4_1', 'relu4_1', 'conv4_2', 'relu4_2', 'conv4_3', 'relu4_3', 'conv4_4', 'relu4_4', 'pool4', 'conv5_1', 'relu5_1', 'conv5_2', 'relu5_2', 'conv5_3', 'relu5_3', 'conv5_4', 'relu5_4') ''' weights[i][0][0][0][0]: <tf.Variable 'inference/conv1_1_w:0' shape=(3, 3, 3, 64) dtype=float32_ref> <tf.Variable 'inference/conv1_1_b:0' shape=(64,) dtype=float32_ref> <tf.Variable 'inference/conv1_2_w:0' shape=(3, 3, 64, 64) dtype=float32_ref> <tf.Variable 'inference/conv1_2_b:0' shape=(64,) dtype=float32_ref> <tf.Variable 'inference/conv2_1_w:0' shape=(3, 3, 64, 128) dtype=float32_ref> <tf.Variable 'inference/conv2_1_b:0' shape=(128,) dtype=float32_ref> <tf.Variable 'inference/conv2_2_w:0' shape=(3, 3, 128, 128) dtype=float32_ref> <tf.Variable 'inference/conv2_2_b:0' shape=(128,) dtype=float32_ref> ''' net = {} current = image for i, name in enumerate( layers ): # 对于一个可迭代/可遍历的对象(如列表、字符串),enumerate将其组成一个索引序列,利用它可以同时获得索引和值 kind = name[:4] num = name[4:] if kind == 'conv' and num == '1_1': W = utils.weight_variable( [3, 3, 4, 64], name=name + "_w") # [patch 7*7,insize 512, outsize 4096] b = utils.bias_variable([64], name=name + "_b") current = utils.conv2d_basic(current, W, b) elif kind == 'conv' and num != '1_1': kernels, bias = weights[i][0][0][0][0] # print("kernels:",i,kernels) # print kernels # matconvnet: weights are [width, height, in_channels, out_channels] # tensorflow: weights are [height, width, in_channels, out_channels] kernels = utils.get_variable(np.transpose(kernels, (1, 0, 2, 3)), name=name + "_w") # print(kernels) bias = utils.get_variable(bias.reshape(-1), name=name + "_b") # print(bias) current = utils.conv2d_basic(current, kernels, bias) elif kind == 'relu': current = tf.nn.relu(current, name=name) if FLAGS.debug: utils.add_activation_summary(current) elif kind == 'pool': current = utils.avg_pool_2x2(current) net[name] = current return net
def activation_function(x, name=""): activation_dict = { 'relu': tf.nn.relu(x, name), 'elu': tf.nn.elu(x, name), 'lrelu': utils.leaky_relu(x, 0.2, name), 'tanh': tf.nn.tanh(x, name), 'sigmoid': tf.nn.sigmoid(x, name) } act = activation_dict[FLAGS.activation] utils.add_activation_summary(act) return act
def generator(z, train_mode): with tf.variable_scope("generator") as scope: W_0 = utils.weight_variable([FLAGS.z_dim, 64 * GEN_DIMENSION / 2 * IMAGE_SIZE / 16 * IMAGE_SIZE / 16], name="W_0") b_0 = utils.bias_variable([64 * GEN_DIMENSION / 2 * IMAGE_SIZE / 16 * IMAGE_SIZE / 16], name="b_0") z_0 = tf.matmul(z, W_0) + b_0 h_0 = tf.reshape(z_0, [-1, IMAGE_SIZE / 16, IMAGE_SIZE / 16, 64 * GEN_DIMENSION / 2]) h_bn0 = utils.batch_norm(h_0, 64 * GEN_DIMENSION / 2, train_mode, scope="gen_bn0") h_relu0 = tf.nn.relu(h_bn0, name='relu0') utils.add_activation_summary(h_relu0) # W_1 = utils.weight_variable([5, 5, 64 * GEN_DIMENSION/2, 64 * GEN_DIMENSION], name="W_1") # b_1 = utils.bias_variable([64 * GEN_DIMENSION/2], name="b_1") # deconv_shape = tf.pack([tf.shape(h_relu0)[0], IMAGE_SIZE / 16, IMAGE_SIZE / 16, 64 * GEN_DIMENSION/2]) # h_conv_t1 = utils.conv2d_transpose_strided(h_relu0, W_1, b_1, output_shape=deconv_shape) # h_bn1 = utils.batch_norm(h_conv_t1, 64 * GEN_DIMENSION/2, train_mode, scope="gen_bn1") # h_relu1 = tf.nn.relu(h_bn1, name='relu1') # utils.add_activation_summary(h_relu1) W_2 = utils.weight_variable([5, 5, 64 * GEN_DIMENSION / 4, 64 * GEN_DIMENSION / 2], name="W_2") b_2 = utils.bias_variable([64 * GEN_DIMENSION / 4], name="b_2") deconv_shape = tf.pack([tf.shape(h_relu0)[0], IMAGE_SIZE / 8, IMAGE_SIZE / 8, 64 * GEN_DIMENSION / 4]) h_conv_t2 = utils.conv2d_transpose_strided(h_relu0, W_2, b_2, output_shape=deconv_shape) h_bn2 = utils.batch_norm(h_conv_t2, 64 * GEN_DIMENSION / 4, train_mode, scope="gen_bn2") h_relu2 = tf.nn.relu(h_bn2, name='relu2') utils.add_activation_summary(h_relu2) W_3 = utils.weight_variable([5, 5, 64 * GEN_DIMENSION / 8, 64 * GEN_DIMENSION / 4], name="W_3") b_3 = utils.bias_variable([64 * GEN_DIMENSION / 8], name="b_3") deconv_shape = tf.pack([tf.shape(h_relu2)[0], IMAGE_SIZE / 4, IMAGE_SIZE / 4, 64 * GEN_DIMENSION / 8]) h_conv_t3 = utils.conv2d_transpose_strided(h_relu2, W_3, b_3, output_shape=deconv_shape) h_bn3 = utils.batch_norm(h_conv_t3, 64 * GEN_DIMENSION / 8, train_mode, scope="gen_bn3") h_relu3 = tf.nn.relu(h_bn3, name='relu3') utils.add_activation_summary(h_relu3) W_4 = utils.weight_variable([5, 5, 64 * GEN_DIMENSION / 16, 64 * GEN_DIMENSION / 8], name="W_4") b_4 = utils.bias_variable([64 * GEN_DIMENSION / 16], name="b_4") deconv_shape = tf.pack([tf.shape(h_relu3)[0], IMAGE_SIZE / 2, IMAGE_SIZE / 2, 64 * GEN_DIMENSION / 16]) h_conv_t4 = utils.conv2d_transpose_strided(h_relu3, W_4, b_4, output_shape=deconv_shape) h_bn4 = utils.batch_norm(h_conv_t4, 64 * GEN_DIMENSION / 16, train_mode, scope="gen_bn4") h_relu4 = tf.nn.relu(h_bn4, name='relu4') utils.add_activation_summary(h_relu4) W_5 = utils.weight_variable([5, 5, NUM_OF_CHANNELS, 64 * GEN_DIMENSION / 16], name="W_5") b_5 = utils.bias_variable([NUM_OF_CHANNELS], name="b_5") deconv_shape = tf.pack([tf.shape(h_relu4)[0], IMAGE_SIZE, IMAGE_SIZE, NUM_OF_CHANNELS]) h_conv_t5 = utils.conv2d_transpose_strided(h_relu4, W_5, b_5, output_shape=deconv_shape) pred_image = tf.nn.tanh(h_conv_t5, name='pred_image') utils.add_activation_summary(pred_image) return pred_image
def vgg_net(weights, image): # def vgg_net(image): layers = ( 'conv1_1', 'relu1_1', 'conv1_2', 'relu1_2', 'pool1', 'conv2_1', 'relu2_1', 'conv2_2', 'relu2_2', 'pool2', 'conv3_1', 'relu3_1', 'conv3_2', 'relu3_2', 'conv3_3', 'relu3_3', 'conv3_4', 'relu3_4', 'pool3', 'conv4_1', 'relu4_1', 'conv4_2', 'relu4_2', 'conv4_3', 'relu4_3', 'conv4_4', 'relu4_4', 'pool4', 'conv5_1', 'relu5_1', 'conv5_2', 'relu5_2', 'conv5_3', 'relu5_3', 'conv5_4', 'relu5_4' ) net = {} current = image # [n,224,224,3] for i, name in enumerate(layers): kind = name[:4] # kind2=name[:5] if kind=='conv':#kind2 == 'conv1': # ''' kernels, bias = weights[i][0][0][0][0] # matconvnet: weights are [width, height, in_channels, out_channels] # tensorflow: weights are [height, width, in_channels, out_channels] kernels = utils.get_variable(np.transpose(kernels, (1, 0, 2, 3)), name=name + "_w") bias = utils.get_variable(bias.reshape(-1), name=name + "_b") current = utils.conv2d_basic(current, kernels, bias) ''' current=tf.layers.conv2d(current,64,3,padding='same') elif kind2=='conv2': current = tf.layers.conv2d(current, 128, 3, padding='same') elif kind2=='conv3': current = tf.layers.conv2d(current, 256, 3, padding='same') elif kind2=='conv4': current = tf.layers.conv2d(current, 512, 3, padding='same') elif kind2=='conv5': current = tf.layers.conv2d(current, 512, 3, padding='same') else: pass # ''' if kind == 'relu': current = tf.nn.relu(current, name=name) if FLAGS.debug: utils.add_activation_summary(current) elif kind == 'pool': current = utils.avg_pool_2x2(current) net[name] = current return net # [n,14,14,512]
def inference(image, keep_prob): """ Semantic segmentation network definition :param image: input image. Should have values in range 0-255 :param keep_prob: :return: """ print("setting up vgg initialized conv layers ...") model_data = utils.get_model_data(model_dir, MODEL_URL) mean = model_data['normalization'][0][0][0] mean_pixel = np.mean(mean, axis=(0, 1)) weights = np.squeeze(model_data['layers']) processed_image = utils.process_image(image, mean_pixel) with tf.variable_scope("inference"): image_net = vgg_net(weights, processed_image) conv_final_layer = image_net["conv4_3"] W6 = utils.weight_variable([1, 1, 512, 1024], name="W6", init=weight_init) b6 = utils.bias_variable([1024], name="b6") conv6 = utils.conv2d_basic(conv_final_layer, W6, b6) relu6 = tf.nn.relu(conv6, name="relu6") if debug: utils.add_activation_summary(relu6) relu_dropout6 = tf.nn.dropout(relu6, keep_prob=keep_prob) W7 = utils.weight_variable([1, 1, 1024, NUM_OF_CLASSESS], name="W7", init=weight_init) b7 = utils.bias_variable([NUM_OF_CLASSESS], name="b7") conv7 = utils.conv2d_basic(relu_dropout6, W7, b7) # now to upscale to actual image size deconv_shape1 = image_net["pool2"].get_shape() W_t1 = utils.weight_variable([4, 4, deconv_shape1[3].value, NUM_OF_CLASSESS], name="W_t1", init=weight_init) b_t1 = utils.bias_variable([deconv_shape1[3].value], name="b_t1") conv_t1 = utils.conv2d_transpose_strided(conv7, W_t1, b_t1, output_shape=tf.shape(image_net["pool2"])) fuse_1 = tf.add(conv_t1, image_net["pool2"], name="fuse_1") deconv_shape2 = image_net["pool1"].get_shape() W_t2 = utils.weight_variable([4, 4, deconv_shape2[3].value, deconv_shape1[3].value], name="W_t2", init=weight_init) b_t2 = utils.bias_variable([deconv_shape2[3].value], name="b_t2") conv_t2 = utils.conv2d_transpose_strided(fuse_1, W_t2, b_t2, output_shape=tf.shape(image_net["pool1"])) fuse_2 = tf.add(conv_t2, image_net["pool1"], name="fuse_1") shape = tf.shape(image) deconv_shape3 = tf.stack([shape[0], shape[1], shape[2], NUM_OF_CLASSESS]) W_t3 = utils.weight_variable([4, 4, NUM_OF_CLASSESS, deconv_shape2[3].value], name="W_t3", init=weight_init) b_t3 = utils.bias_variable([NUM_OF_CLASSESS], name="b_t3") conv_t3 = utils.conv2d_transpose_strided(fuse_2, W_t3, b_t3, output_shape=deconv_shape3, stride=2) return conv_t3
def generator(z, train_mode): with tf.variable_scope("generator") as scope: W_0 = utils.weight_variable([FLAGS.z_dim, 64 * GEN_DIMENSION / 2 * IMAGE_SIZE / 16 * IMAGE_SIZE / 16], name="W_0") b_0 = utils.bias_variable([64 * GEN_DIMENSION / 2 * IMAGE_SIZE / 16 * IMAGE_SIZE / 16], name="b_0") z_0 = tf.matmul(z, W_0) + b_0 h_0 = tf.reshape(z_0, [-1, IMAGE_SIZE / 16, IMAGE_SIZE / 16, 64 * GEN_DIMENSION / 2]) h_bn0 = utils.batch_norm(h_0, 64 * GEN_DIMENSION / 2, train_mode, scope="gen_bn0") h_relu0 = tf.nn.relu(h_bn0, name='relu0') utils.add_activation_summary(h_relu0) # W_1 = utils.weight_variable([5, 5, 64 * GEN_DIMENSION/2, 64 * GEN_DIMENSION], name="W_1") # b_1 = utils.bias_variable([64 * GEN_DIMENSION/2], name="b_1") # deconv_shape = tf.pack([tf.shape(h_relu0)[0], IMAGE_SIZE / 16, IMAGE_SIZE / 16, 64 * GEN_DIMENSION/2]) # h_conv_t1 = utils.conv2d_transpose_strided(h_relu0, W_1, b_1, output_shape=deconv_shape) # h_bn1 = utils.batch_norm(h_conv_t1, 64 * GEN_DIMENSION/2, train_mode, scope="gen_bn1") # h_relu1 = tf.nn.relu(h_bn1, name='relu1') # utils.add_activation_summary(h_relu1) W_2 = utils.weight_variable([5, 5, 64 * GEN_DIMENSION / 4, 64 * GEN_DIMENSION / 2], name="W_2") b_2 = utils.bias_variable([64 * GEN_DIMENSION / 4], name="b_2") deconv_shape = tf.pack([tf.shape(h_relu0)[0], IMAGE_SIZE / 8, IMAGE_SIZE / 8, 64 * GEN_DIMENSION / 4]) h_conv_t2 = utils.conv2d_transpose_strided(h_relu0, W_2, b_2, output_shape=deconv_shape) h_bn2 = utils.batch_norm(h_conv_t2, 64 * GEN_DIMENSION / 4, train_mode, scope="gen_bn2") h_relu2 = tf.nn.relu(h_bn2, name='relu2') utils.add_activation_summary(h_relu2) W_3 = utils.weight_variable([5, 5, 64 * GEN_DIMENSION / 8, 64 * GEN_DIMENSION / 4], name="W_3") b_3 = utils.bias_variable([64 * GEN_DIMENSION / 8], name="b_3") deconv_shape = tf.pack([tf.shape(h_relu2)[0], IMAGE_SIZE / 4, IMAGE_SIZE / 4, 64 * GEN_DIMENSION / 8]) h_conv_t3 = utils.conv2d_transpose_strided(h_relu2, W_3, b_3, output_shape=deconv_shape) h_bn3 = utils.batch_norm(h_conv_t3, 64 * GEN_DIMENSION / 8, train_mode, scope="gen_bn3") h_relu3 = tf.nn.relu(h_bn3, name='relu3') utils.add_activation_summary(h_relu3) W_4 = utils.weight_variable([5, 5, 64 * GEN_DIMENSION / 16, 64 * GEN_DIMENSION / 8], name="W_4") b_4 = utils.bias_variable([64 * GEN_DIMENSION / 16], name="b_4") deconv_shape = tf.pack([tf.shape(h_relu3)[0], IMAGE_SIZE / 2, IMAGE_SIZE / 2, 64 * GEN_DIMENSION / 16]) h_conv_t4 = utils.conv2d_transpose_strided(h_relu3, W_4, b_4, output_shape=deconv_shape) h_bn4 = utils.batch_norm(h_conv_t4, 64 * GEN_DIMENSION / 16, train_mode, scope="gen_bn4") h_relu4 = tf.nn.relu(h_bn4, name='relu4') utils.add_activation_summary(h_relu4) W_5 = utils.weight_variable([5, 5, NUM_OF_CHANNELS, 64 * GEN_DIMENSION / 16], name="W_5") b_5 = utils.bias_variable([NUM_OF_CHANNELS], name="b_5") deconv_shape = tf.pack([tf.shape(h_relu4)[0], IMAGE_SIZE, IMAGE_SIZE, NUM_OF_CHANNELS]) h_conv_t5 = utils.conv2d_transpose_strided(h_relu4, W_5, b_5, output_shape=deconv_shape) pred_image = tf.nn.tanh(h_conv_t5, name='pred_image') utils.add_activation_summary(pred_image) return pred_image
def build_centers_layers(self, image, keep_prob): with tf.variable_scope("centers"): pool5 = utils.max_pool_2x2(self.image_net["conv5_3"]) W6 = utils.weight_variable([7, 7, 512, 4096], name="W6") b6 = utils.bias_variable([4096], name="b6") conv6 = utils.conv2d_basic(pool5, W6, b6) relu6 = tf.nn.relu(conv6, name="relu6") if self.debug: utils.add_activation_summary(relu6) relu_dropout6 = tf.nn.dropout(relu6, keep_prob=keep_prob) W7 = utils.weight_variable([1, 1, 4096, 4096], name="W7") b7 = utils.bias_variable([4096], name="b7") conv7 = utils.conv2d_basic(relu_dropout6, W7, b7) relu7 = tf.nn.relu(conv7, name="relu7") if self.debug: utils.add_activation_summary(relu7) relu_dropout7 = tf.nn.dropout(relu7, keep_prob=keep_prob) W8 = utils.weight_variable([1, 1, 4096, self.n_classes], name="W8") b8 = utils.bias_variable([self.n_classes], name="b8") conv8 = utils.conv2d_basic(relu_dropout7, W8, b8) # annotation_pred1 = tf.argmax(conv8, dimension=3, name="prediction1") deconv_shape1 = self.image_net["pool4"].get_shape() W_t1 = utils.weight_variable([4, 4, deconv_shape1[3].value, self.n_classes], name="W_t1") b_t1 = utils.bias_variable([deconv_shape1[3].value], name="b_t1") conv_t1 = utils.conv2d_transpose_strided(conv8, W_t1, b_t1, output_shape=tf.shape(self.image_net["pool4"])) fuse_1 = tf.add(conv_t1, self.image_net["pool4"], name="fuse_1") deconv_shape2 = self.image_net["pool3"].get_shape() W_t2 = utils.weight_variable([4, 4, deconv_shape2[3].value, deconv_shape1[3].value], name="W_t2") b_t2 = utils.bias_variable([deconv_shape2[3].value], name="b_t2") conv_t2 = utils.conv2d_transpose_strided(fuse_1, W_t2, b_t2, output_shape=tf.shape(self.image_net["pool3"])) fuse_2 = tf.add(conv_t2, self.image_net["pool3"], name="fuse_2") shape = tf.shape(image) #deconv_shape3 = tf.stack([shape[0], shape[1], shape[2], self.n_classes]) deconv_shape3 = tf.stack([shape[0], shape[1], shape[2], 3 * (self.n_classes - 1)]) W_t3 = utils.weight_variable([16, 16, 3 * (self.n_classes - 1), deconv_shape2[3].value], name="W_t3") b_t3 = utils.bias_variable([3 * (self.n_classes - 1)], name="b_t3") conv_t3 = utils.conv2d_transpose_strided(fuse_2, W_t3, b_t3, output_shape=deconv_shape3, stride=8) #tanh_t3 = tf.math.sigmoid(conv_t3) for i in range(0, 3 * (self.n_classes - 1), 3): current = tf.math.sigmoid(conv_t3[:, :, :, i:i+2]) if i == 0: tanh_t3 = current else: tanh_t3 = tf.concat([tanh_t3, current], axis=-1) tanh_t3 = tf.concat([tanh_t3, tf.nn.relu(conv_t3[:, :, :, i+2:i+3])], axis=-1) return tanh_t3
def unet(cls, image, keep_prob, phase_train, output_channel, num_layers, is_debug=False): net = {} batch_size = tf.shape(image)[0] current = image net['image'] = current for index_module in range(num_layers): # Check type of module is_encoder = index_module < num_layers//2 is_decoder = index_module > num_layers//2 is_classifier = index_module == num_layers//2 # Set number of input and output channels in_ch = current.get_shape()[-1] mod_output = 'mod%d_out' if is_encoder: current = cls.unet_encode(current, keep_prob, phase_train, index_module) name = mod_output%index_module net[name] = current current = slim.max_pool2d(current, [2, 2], stride = 2, padding='SAME') if is_classifier: current = cls.unet_encode(current, keep_prob, phase_train, index_module) name = mod_output%index_module net[name] = current current = cls.upconv(current, index_module) if is_decoder: fuse_pool = mod_output%(num_layers-1-index_module) print(index_module, num_layers-1-index_module) print(net[fuse_pool].get_shape()) print(current.get_shape()) current = tf.concat([current, net[fuse_pool]], axis=3, name="fuse_%d"%index_module) current = cls.unet_decode(current, keep_prob, phase_train, index_module) name = mod_output%index_module net[name] = current if index_module != num_layers-1: current = cls.upconv(current, index_module) if is_debug: print(name) print(net[name].get_shape()) utils.add_activation_summary(current) # conv1x1 current = slim.conv2d(current, output_channel, 1) name = 'segment' net[name] = current if is_debug: print(name) print(net[name].get_shape()) print('unet complete') return net
def soft_ncut(image, image_segment, image_weights): """ Args: image: [B, H, W, C] image_segment: [B, H, W, K] image_weights: [B, H*W, H*W] Returns: Soft_Ncut: scalar """ batch_size = tf.shape(image)[0] num_class = tf.shape(image_segment)[-1] image_shape = image.get_shape() weight_size = image_shape[1].value * image_shape[2].value image_segment = tf.transpose(image_segment, [0, 3, 1, 2]) # [B, K, H, W] image_segment = tf.reshape(image_segment, tf.stack([batch_size, num_class, weight_size])) # [B, K, H*W] # Dis-association # [B0, H*W, H*W] @ [B1, K1, H*W] contract on [[2],[2]] = [B0, H*W, B1, K1] W_Ak = sparse_tensor_dense_tensordot(image_weights, image_segment, axes=[[2], [2]]) W_Ak = tf.transpose(W_Ak, [0, 2, 3, 1]) # [B0, B1, K1, H*W] W_Ak = synchronize_axes(W_Ak, [0, 1], tensor_dims=4) # [B0=B1, K1, H*W] # [B1, K1, H*W] @ [B2, K2, H*W] contract on [[2],[2]] = [B1, K1, B2, K2] dis_assoc = tf.tensordot(W_Ak, image_segment, axes=[[2], [2]]) dis_assoc = synchronize_axes(dis_assoc, [0, 2], tensor_dims=4) # [B1=B2, K1, K2] dis_assoc = synchronize_axes(dis_assoc, [1, 2], tensor_dims=3) # [K1=K2, B1=B2] dis_assoc = tf.transpose(dis_assoc, [1, 0]) # [B1=B2, K1=K2] dis_assoc = tf.identity(dis_assoc, name="dis_assoc") # Association # image_segment: [B0, K0, H*W] sum_W = tf.sparse_reduce_sum(image_weights, axis=2) # [B1, W*H] assoc = tf.tensordot(image_segment, sum_W, axes=[2, 1]) # [B0, K0, B1] assoc = synchronize_axes(assoc, [0, 2], tensor_dims=3) # [B0=B1, K0] assoc = tf.identity(assoc, name="assoc") utils.add_activation_summary(dis_assoc) utils.add_activation_summary(assoc) # Soft NCut eps = 1e-6 soft_ncut = tf.cast(num_class, tf.float32) - \ tf.reduce_sum((dis_assoc + eps) / (assoc + eps), axis=1) return soft_ncut
def inference(data): with tf.variable_scope("inference") as scope: W_1 = utils.weight_variable([IMAGE_SIZE * IMAGE_SIZE * 50], name="W_1") b_1 = utils.bias_variable([50], name="b_1") h_1 = tf.nn.relu(tf.matmul(data, tf.reshape(W_1, [IMAGE_SIZE * IMAGE_SIZE, 50])) + b_1, name='h_1') utils.add_activation_summary(h_1) W_2 = utils.weight_variable([50 * 50], name="W_2") b_2 = utils.bias_variable([50], name="b_2") h_2 = tf.nn.relu(tf.matmul(h_1, tf.reshape(W_2, [50, 50])) + b_2, name='h_2') utils.add_activation_summary(h_2) W_3 = utils.weight_variable([50 * 50], name="W_3") b_3 = utils.bias_variable([50], name="b_3") h_3 = tf.nn.relu(tf.matmul(h_2, tf.reshape(W_3, [50, 50])) + b_3, name='h_3') utils.add_activation_summary(h_3) W_4 = utils.weight_variable([50 * 50], name="W_4") b_4 = utils.bias_variable([50], name="b_4") h_4 = tf.nn.relu(tf.matmul(h_3, tf.reshape(W_4, [50, 50])) + b_4, name='h_4') utils.add_activation_summary(h_4) W_final = utils.weight_variable([50 * 10], name="W_final") b_final = utils.bias_variable([10], name="b_final") pred = tf.nn.softmax(tf.matmul(h_4, tf.reshape(W_final, [50, 10])) + b_final, name='h_final') # utils.add_activation_summary(pred) return pred
def inference(data): with tf.variable_scope("inference") as scope: W_1 = utils.weight_variable([IMAGE_SIZE * IMAGE_SIZE * 50], name="W_1") b_1 = utils.bias_variable([50], name="b_1") h_1 = tf.nn.relu(tf.matmul(data, tf.reshape(W_1, [IMAGE_SIZE * IMAGE_SIZE, 50])) + b_1, name='h_1') utils.add_activation_summary(h_1) W_2 = utils.weight_variable([50 * 50], name="W_2") b_2 = utils.bias_variable([50], name="b_2") h_2 = tf.nn.relu(tf.matmul(h_1, tf.reshape(W_2, [50, 50])) + b_2, name='h_2') utils.add_activation_summary(h_2) W_3 = utils.weight_variable([50 * 50], name="W_3") b_3 = utils.bias_variable([50], name="b_3") h_3 = tf.nn.relu(tf.matmul(h_2, tf.reshape(W_3, [50, 50])) + b_3, name='h_3') utils.add_activation_summary(h_3) W_4 = utils.weight_variable([50 * 50], name="W_4") b_4 = utils.bias_variable([50], name="b_4") h_4 = tf.nn.relu(tf.matmul(h_3, tf.reshape(W_4, [50, 50])) + b_4, name='h_4') utils.add_activation_summary(h_4) W_final = utils.weight_variable([50 * 10], name="W_final") b_final = utils.bias_variable([10], name="b_final") pred = tf.nn.softmax(tf.matmul(h_4, tf.reshape(W_final, [50, 10])) + b_final, name='h_final') # utils.add_activation_summary(pred) return pred
def vgg_net(weights, image): layers = ( 'conv1_1', 'relu1_1', 'conv1_2', 'relu1_2', 'pool1', 'conv2_1', 'relu2_1', 'conv2_2', 'relu2_2', 'pool2', 'conv3_1', 'relu3_1', 'conv3_2', 'relu3_2', 'conv3_3', 'relu3_3', 'conv3_4', 'relu3_4', 'pool3', 'conv4_1', 'relu4_1', 'conv4_2', 'relu4_2', 'conv4_3', 'relu4_3', 'conv4_4', 'relu4_4', 'pool4', 'conv5_1', 'relu5_1', 'conv5_2', 'relu5_2', 'conv5_3', 'relu5_3', 'conv5_4', 'relu5_4' ) net = {} current = image for i, name in enumerate(layers): kind = name[:4] if kind == 'conv': kernels, bias = weights[i][0][0][0][0] if (name == 'conv1_1'): kernel_shape = kernels.shape[:2] + (4, ) + kernels.shape[3:] else: kernel_shape = kernels.shape bias_shape = bias.shape # matconvnet: weights are [width, height, in_channels, out_channels] # tensorflow: weights are [height, width, in_channels, out_channels] new_kernel = np.zeros(kernel_shape) new_kernel_shape = np.transpose(new_kernel, (1, 0, 2, 3)).shape # print(f"new kernel shape: {new_kernel_shape}") new_bias = np.zeros(bias_shape) new_bias_shape = new_bias.reshape(-1).shape[0] # print(f"new bias shape: {new_bias_shape}") kernels = utils.weight_variable(shape=new_kernel_shape, name=name + "_w" ) bias = utils.bias_variable(shape=[new_bias_shape], name=name + "_b") current = utils.conv2d_basic(current, kernels, bias) elif kind == 'relu': current = tf.nn.relu(current, name=name) if FLAGS.debug: utils.add_activation_summary(current) elif kind == 'pool': current = utils.avg_pool_2x2(current) net[name] = current # print(f"VGG-19 {name} layer: {current.shape}") return net
def vgg_net(weights, image): layers = ('relu1_1', 'conv1_2', 'relu1_2', 'pool1', 'conv2_1', 'relu2_1', 'conv2_2', 'relu2_2', 'pool2', 'conv3_1', 'relu3_1', 'conv3_2', 'relu3_2', 'conv3_3', 'relu3_3', 'conv3_4', 'relu3_4', 'pool3', 'conv4_1', 'relu4_1', 'conv4_2', 'relu4_2', 'conv4_3', 'relu4_3', 'conv4_4', 'relu4_4', 'pool4', 'conv5_1', 'relu5_1', 'conv5_2', 'relu5_2', 'conv5_3', 'relu5_3', 'conv5_4', 'relu5_4') net = {} current = image #grayiterstart = 0 if GRAY_MODE: i = 0 name = 'conv1_1' # layers[0] kind = name[:4] #=conv bias = weights[0][0][0][0][0][1] kernels = weights[0][0][0][0][0][0] #put the in_channels first, select 1st channel #[w,h,in_channels,out_channels] kernelstrans = np.array([np.transpose(kernels, (2, 0, 1, 3))[0]]) #[in_channels,w,h,out_channels] kernels = np.transpose(kernelstrans, (2, 1, 0, 3)) #after line above : [h,w,in_channels,out_channels] kernels = utils.get_variable(kernels, name=name + "_w") bias = utils.get_variable(bias.reshape(-1), name=name + "_b") current = utils.conv2d_basic(current, kernels, bias) # for i, name in enumerate(layers): kind = name[:4] if kind == 'conv': kernels, bias = weights[i + 1][0][0][0][0] # matconvnet: weights are [width, height, in_channels, out_channels] # tensorflow: weights are [height, width, in_channels, out_channels] kernels = utils.get_variable(np.transpose(kernels, (1, 0, 2, 3)), name=name + "_w") bias = utils.get_variable(bias.reshape(-1), name=name + "_b") current = utils.conv2d_basic(current, kernels, bias) elif kind == 'relu': current = tf.nn.relu(current, name=name) if FLAGS.debug: utils.add_activation_summary(current) elif kind == 'pool': current = utils.avg_pool_2x2(current) net[name] = current return net
def create_unet(image, label, n_class, filter_size, num_of_feature, num_of_layers, keep_prob, name, reg_weight=0.001, debug=True, restore=False, weights=None, ClassWeights=[1, 1, 1]): with tf.name_scope("u-net"): y_conv, variables, layer_id, dw_h_convs = unet( image, n_class, filter_size, num_of_feature, num_of_layers, keep_prob, name, debug, restore, weights) clean_y_out = tf.reshape( tf.nn.softmax(tf.reshape(y_conv, [-1, n_class])), tf.shape(y_conv), 'segmentation_map') #softmax y output # summary if debug: utils.add_activation_summary(clean_y_out) utils.add_to_image_summary(clean_y_out) for var in variables: utils.add_to_regularization_and_summary(var) with tf.name_scope("loss"): # adding class weight # loss = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(labels = tf.reshape(label, [-1]), logits = tf.multiply(tf.reshape(y_conv, [-1, n_class]), ClassWeights))) loss = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits( labels=tf.reshape(label, [-1, n_class]), logits=tf.reshape(y_conv, [-1, n_class]))) weight_decay = 0 if reg_weight != None: for var in variables: weight_decay = weight_decay + tf.nn.l2_loss(var) loss = tf.reduce_sum(loss + reg_weight * weight_decay, name='loss') if debug: utils.add_scalar_summary(loss) return loss, clean_y_out, variables, dw_h_convs
def build_labels_layers(self, image, keep_prob): with tf.variable_scope("labels"): pool5 = utils.max_pool_2x2(self.image_net["conv5_3"]) W6 = utils.weight_variable([7, 7, 512, 4096], name="W6") b6 = utils.bias_variable([4096], name="b6") conv6 = utils.conv2d_basic(pool5, W6, b6) relu6 = tf.nn.relu(conv6, name="relu6") if self.debug: utils.add_activation_summary(relu6) relu_dropout6 = tf.nn.dropout(relu6, keep_prob=keep_prob) W7 = utils.weight_variable([1, 1, 4096, 4096], name="W7") b7 = utils.bias_variable([4096], name="b7") conv7 = utils.conv2d_basic(relu_dropout6, W7, b7) relu7 = tf.nn.relu(conv7, name="relu7") if self.debug: utils.add_activation_summary(relu7) relu_dropout7 = tf.nn.dropout(relu7, keep_prob=keep_prob) W8 = utils.weight_variable([1, 1, 4096, self.n_classes], name="W8") b8 = utils.bias_variable([self.n_classes], name="b8") conv8 = utils.conv2d_basic(relu_dropout7, W8, b8) # annotation_pred1 = tf.argmax(conv8, dimension=3, name="prediction1") deconv_shape1 = self.image_net["pool4"].get_shape() W_t1 = utils.weight_variable([4, 4, deconv_shape1[3].value, self.n_classes], name="W_t1") b_t1 = utils.bias_variable([deconv_shape1[3].value], name="b_t1") conv_t1 = utils.conv2d_transpose_strided(conv8, W_t1, b_t1, output_shape=tf.shape(self.image_net["pool4"])) fuse_1 = tf.add(conv_t1, self.image_net["pool4"], name="fuse_1") deconv_shape2 = self.image_net["pool3"].get_shape() W_t2 = utils.weight_variable([4, 4, deconv_shape2[3].value, deconv_shape1[3].value], name="W_t2") b_t2 = utils.bias_variable([deconv_shape2[3].value], name="b_t2") conv_t2 = utils.conv2d_transpose_strided(fuse_1, W_t2, b_t2, output_shape=tf.shape(self.image_net["pool3"])) fuse_2 = tf.add(conv_t2, self.image_net["pool3"], name="fuse_2") shape = tf.shape(image) deconv_shape3 = tf.stack([shape[0], shape[1], shape[2], self.n_classes]) W_t3 = utils.weight_variable([16, 16, self.n_classes, deconv_shape2[3].value], name="W_t3") b_t3 = utils.bias_variable([self.n_classes], name="b_t3") conv_t3 = utils.conv2d_transpose_strided(fuse_2, W_t3, b_t3, output_shape=deconv_shape3, stride=8) annotation_pred = tf.argmax(conv_t3, dimension=3, name="prediction") return tf.expand_dims(annotation_pred, dim=3), conv_t3
def inference(image, keep_prob): """ Semantic segmentation network definition :param image: input image. Should have values in range 0-255 :param keep_prob: :return: """ print("setting up vgg initialized conv layers ...") model_data = utils.get_model_data(FLAGS.model_dir, MODEL_URL) mean = model_data['normalization'][0][0][0] mean_pixel = np.mean(mean, axis=(0, 1)) weights = np.squeeze(model_data['layers']) processed_image = utils.process_image(image, mean_pixel) with tf.variable_scope("inference"): vgg_end_layer = 'conv4_4' image_net = vgg_net(weights, processed_image, end_layer=vgg_end_layer) conv_final_layer = image_net[vgg_end_layer] dropout = tf.nn.dropout(conv_final_layer, keep_prob=keep_prob) W_final = utils.weight_variable([1, 1, 512, NUM_OF_CLASSES], name="W_final") b_final = utils.bias_variable([NUM_OF_CLASSES], name="b_final") conv_final = utils.conv2d_basic(dropout, W_final, b_final) if FLAGS.debug: utils.add_activation_summary(conv_final) # now to upscale to actual image size deconv_shape2 = image_net["pool2"].get_shape() W_t2 = utils.weight_variable([4, 4, deconv_shape2[3].value, NUM_OF_CLASSES], name="W_t2") b_t2 = utils.bias_variable([deconv_shape2[3].value], name="b_t2") conv_t2 = utils.conv2d_transpose_strided(conv_final, W_t2, b_t2, output_shape=tf.shape(image_net["pool2"])) fuse_2 = tf.add(conv_t2, image_net["pool2"], name="fuse_2") shape = tf.shape(image) deconv_shape3 = tf.stack([shape[0], shape[1], shape[2], NUM_OF_CLASSES]) W_t3 = utils.weight_variable([8, 8, NUM_OF_CLASSES, deconv_shape2[3].value], name="W_t3") b_t3 = utils.bias_variable([NUM_OF_CLASSES], name="b_t3") conv_t3 = utils.conv2d_transpose_strided(fuse_2, W_t3, b_t3, output_shape=deconv_shape3, stride=4) annotation_pred = tf.argmax(conv_t3, axis=3, name="prediction", output_type=tf.int32) return tf.expand_dims(annotation_pred, axis=3), conv_t3
def vgg_net(weights, image): layers = ( 'conv1_1', 'relu1_1', 'conv1_2', 'relu1_2', 'pool1', 'conv2_1', 'relu2_1', 'conv2_2', 'relu2_2', 'pool2', 'conv3_1', 'relu3_1', 'conv3_2', 'relu3_2', 'conv3_3', 'relu3_3', 'conv3_4', 'relu3_4', 'pool3', 'conv4_1', 'relu4_1', 'conv4_2', 'relu4_2', 'conv4_3', 'relu4_3', 'conv4_4', 'relu4_4', 'pool4', 'conv5_1', 'relu5_1', 'conv5_2', 'relu5_2', 'conv5_3', 'relu5_3', 'conv5_4', 'relu5_4' ) net = {} current = image for i, name in enumerate(layers): kind = name[:4] # layer的类型,是conv还是relu或者pool. if kind == 'conv': kernels, bias = weights[i][0][0][0][0] kernels = utils.get_variable(np.transpose(kernels, (1, 0, 2, 3)), name=name + "_w") bias = utils.get_variable(bias.reshape(-1), name=name + "_b") if name[4:5] == '5': # conv5开始使用atrous_conv2d卷积. current = utils.atrous_conv2d_basic(current, kernels, bias, 2) # rate=2,也即pad=2. current = utils.batch_norm_layer(current, FLAGS.mode, scope_bn=name) # BN处理. else: # conv1-4 current = utils.conv2d_basic(current, kernels, bias) current = utils.batch_norm_layer(current, FLAGS.mode, scope_bn=name) # BN处理. elif kind == 'relu': current = tf.nn.relu(current, name=name) if FLAGS.debug: utils.add_activation_summary(current) elif kind == 'pool': if name[4:5] == '4': current = utils.max_pool_1x1(current) else: current = utils.max_pool_3x3(current) net[name] = current return net
def inference(data, keep_prob): with tf.variable_scope("inference") as scope: weight_variable_size = IMAGE_SIZE * IMAGE_SIZE * 50 + 50 * 50 * 3 + 50 * 10 bias_variable_size = 4 * 50 + 10 print (weight_variable_size + bias_variable_size) variable = utils.weight_variable([weight_variable_size + bias_variable_size], name="variables") weight_variable = tf.slice(variable, [0], [weight_variable_size], name="weights") bias_variable = tf.slice(variable, [weight_variable_size], [bias_variable_size], name="biases") weight_offset = 0 bias_offset = 0 W_1 = tf.slice(weight_variable, [weight_offset], [IMAGE_SIZE * IMAGE_SIZE * 50], name="W_1") b_1 = tf.slice(bias_variable, [bias_offset], [50], name="b_1") h_1_relu = tf.nn.relu(tf.matmul(data, tf.reshape(W_1, [IMAGE_SIZE * IMAGE_SIZE, 50])) + b_1, name='h_1') h_1 = tf.nn.dropout(h_1_relu, keep_prob) utils.add_activation_summary(h_1) weight_offset += IMAGE_SIZE * IMAGE_SIZE * 50 bias_offset += 50 W_2 = tf.slice(weight_variable, [weight_offset], [50 * 50], name="W_2") b_2 = tf.slice(bias_variable, [bias_offset], [50], name="b_2") h_2_relu = tf.nn.relu(tf.matmul(h_1, tf.reshape(W_2, [50, 50])) + b_2, name='h_2') h_2 = tf.nn.dropout(h_2_relu, keep_prob) utils.add_activation_summary(h_2) weight_offset += 50 * 50 bias_offset += 50 W_3 = tf.slice(weight_variable, [weight_offset], [50 * 50], name="W_3") b_3 = tf.slice(bias_variable, [bias_offset], [50], name="b_3") h_3_relu = tf.nn.relu(tf.matmul(h_2, tf.reshape(W_3, [50, 50])) + b_3, name='h_3') h_3 = tf.nn.dropout(h_3_relu, keep_prob) utils.add_activation_summary(h_3) weight_offset += 50 * 50 bias_offset += 50 W_4 = tf.slice(weight_variable, [weight_offset], [50 * 50], name="W_4") b_4 = tf.slice(bias_variable, [bias_offset], [50], name="b_4") h_4_relu = tf.nn.relu(tf.matmul(h_3, tf.reshape(W_4, [50, 50])) + b_4, name='h_4') h_4 = tf.nn.dropout(h_4_relu, keep_prob) utils.add_activation_summary(h_4) weight_offset += 50 * 50 bias_offset += 50 W_final = tf.slice(weight_variable, [weight_offset], [50 * 10], name="W_final") b_final = tf.slice(bias_variable, [bias_offset], [10], name="b_final") pred = tf.nn.softmax(tf.matmul(h_4, tf.reshape(W_final, [50, 10])) + b_final, name='h_final') # utils.add_activation_summary(pred) return pred
def encoder_fc(images): with tf.variable_scope("encoder") as scope: W_fc1 = utils.weight_variable([IMAGE_SIZE * IMAGE_SIZE, 50], name="W_fc1") b_fc1 = utils.bias_variable([50], name="b_fc1") h_relu1 = activation_function(tf.matmul(images, W_fc1) + b_fc1, name="hfc_1") W_fc2 = utils.weight_variable([50, 50], name="W_fc2") b_fc2 = utils.bias_variable([50], name="b_fc2") h_relu2 = activation_function(tf.matmul(h_relu1, W_fc2) + b_fc2, name="hfc_2") W_fc3 = utils.weight_variable([50, FLAGS.z_dim], name="W_fc3") b_fc3 = utils.bias_variable([FLAGS.z_dim], name="b_fc3") mu = tf.add(tf.matmul(h_relu2, W_fc3), b_fc3, name="mu") utils.add_activation_summary(mu) W_fc4 = utils.weight_variable([50, FLAGS.z_dim], name="W_fc4") b_fc4 = utils.bias_variable([FLAGS.z_dim], name="b_fc4") log_var = tf.add(tf.matmul(h_relu2, W_fc4), b_fc4, name="log_var") utils.add_activation_summary(log_var) return mu, log_var
def vgg_net(weights, image, debug): layers = ( 'conv1_1', 'relu1_1', 'conv1_2', 'relu1_2', 'pool1', 'conv2_1', 'relu2_1', 'conv2_2', 'relu2_2', 'pool2', 'conv3_1', 'relu3_1', 'conv3_2', 'relu3_2', 'conv3_3', 'relu3_3', 'conv3_4', 'relu3_4', 'pool3', 'conv4_1', 'relu4_1', 'conv4_2', 'relu4_2', 'conv4_3', 'relu4_3', 'conv4_4', 'relu4_4', 'pool4', 'conv5_1', 'relu5_1', 'conv5_2', 'relu5_2', 'conv5_3', 'relu5_3', 'conv5_4', 'relu5_4' ) net = {} current = image restore_vars = [] for i, name in enumerate(layers): kind = name[:4] if kind == 'conv': # Utilizing stored weights of ImageNet pretrained network to provide the correct shapes kernels, bias = weights[i][0][0][0][0] # matconvnet: weights are [width, height, in_channels, out_channels] # tensorflow: weights are [height, width, in_channels, out_channels] kernels = utils.weight_variable(np.transpose(kernels, (1, 0, 2, 3)).shape, name=name + "_w") bias = utils.bias_variable(bias.reshape(-1).shape, name=name + "_b") current = utils.conv2d_basic(current, kernels, bias) restore_vars += [kernels, bias] elif kind == 'relu': current = tf.nn.relu(current, name=name) if debug: utils.add_activation_summary(current) elif kind == 'pool': current = utils.avg_pool_2x2(current) net[name] = current return net, restore_vars
def vgg_net(weights, image): # VGG网络前五大部分 layers = ('conv1_1', 'relu1_1', 'conv1_2', 'relu1_2', 'pool1', 'conv2_1', 'relu2_1', 'conv2_2', 'relu2_2', 'pool2', 'conv3_1', 'relu3_1', 'conv3_2', 'relu3_2', 'conv3_3', 'relu3_3', 'conv3_4', 'relu3_4', 'pool3', 'conv4_1', 'relu4_1', 'conv4_2', 'relu4_2', 'conv4_3', 'relu4_3', 'conv4_4', 'relu4_4', 'pool4', 'conv5_1', 'relu5_1', 'conv5_2', 'relu5_2', 'conv5_3', 'relu5_3', 'conv5_4', 'relu5_4') net = {} current = image # 预测图像 for i, name in enumerate(layers): kind = name[:4] if kind == 'conv': kernels, bias = weights[i][0][0][0][0] # matconvnet: weights are [width, height, in_channels, out_channels] # tensorflow: weights are [height, width, in_channels, out_channels] kernels = utils.get_variable(np.transpose(kernels, (1, 0, 2, 3)), name=name + "_w") # conv1_1_w bias = utils.get_variable(bias.reshape(-1), name=name + "_b") # conv1_1_b current = utils.conv2d_basic(current, kernels, bias) # 前向传播结果 current elif kind == 'relu': current = tf.nn.relu(current, name=name) # relu1_1 if FLAGS.debug: # 是否开启debug模式 true / false utils.add_activation_summary(current) # 画图 elif kind == 'pool': # vgg 的前5层的stride都是2,也就是前5层的size依次减小1倍 # 这里处理了前4层的stride,用的是平均池化 # 第5层的pool在下文的外部处理了,用的是最大池化 # pool1 size缩小2倍 # pool2 size缩小4倍 # pool3 size缩小8倍 # pool4 size缩小16倍 current = utils.avg_pool_2x2(current) net[name] = current # 每层前向传播结果放在net中, 是一个字典 return net
def vgg_net(weights, image): layers = ( 'conv1_1', 'relu1_1', 'conv1_2', 'relu1_2', 'pool1', 'conv2_1', 'relu2_1', 'conv2_2', 'relu2_2', 'pool2', 'conv3_1', 'relu3_1', 'conv3_2', 'relu3_2', 'conv3_3', 'relu3_3', 'conv3_4', 'relu3_4', 'pool3', 'conv4_1', 'relu4_1', 'conv4_2', 'relu4_2', 'conv4_3', 'relu4_3', 'conv4_4', 'relu4_4', 'pool4', 'conv5_1', 'relu5_1', 'conv5_2', 'relu5_2', 'conv5_3', 'relu5_3', 'conv5_4', 'relu5_4' ) net = {} current = image for i, name in enumerate(layers): if name in ['conv3_4', 'relu3_4', 'conv4_4', 'relu4_4', 'conv5_4', 'relu5_4']: continue kind = name[:4] if kind == 'conv': kernels, bias = weights[i][0][0][0][0] # matconvnet: weights are [width, height, in_channels, out_channels] # tensorflow: weights are [height, width, in_channels, out_channels] kernels = utils.get_variable(np.transpose(kernels, (1, 0, 2, 3)), name=name + "_w") bias = utils.get_variable(bias.reshape(-1), name=name + "_b") current = utils.conv2d_basic(current, kernels, bias) elif kind == 'relu': current = tf.nn.relu(current, name=name) if FLAGS.debug: utils.add_activation_summary(current) elif kind == 'pool': current = utils.avg_pool_2x2(current) net[name] = current return net
def vgg_net(weights, image): """ :param weights: np matrix :param image: tf place holder <- fed with np arrays :return: a dict. key is the name of layer, value is the corresponding opration node in tf graph. """ layers = ('conv1_1', 'relu1_1', 'conv1_2', 'relu1_2', 'pool1', 'conv2_1', 'relu2_1', 'conv2_2', 'relu2_2', 'pool2', 'conv3_1', 'relu3_1', 'conv3_2', 'relu3_2', 'conv3_3', 'relu3_3', 'conv3_4', 'relu3_4', 'pool3', 'conv4_1', 'relu4_1', 'conv4_2', 'relu4_2', 'conv4_3', 'relu4_3', 'conv4_4', 'relu4_4', 'pool4', 'conv5_1', 'relu5_1', 'conv5_2', 'relu5_2', 'conv5_3', 'relu5_3', 'conv5_4', 'relu5_4') net = {} current = image for i, name in enumerate(layers): kind = name[:4] if kind == 'conv': kernels, bias = weights[i][0][0][0][0] # matconvnet: weights are [width, height, in_channels, out_channels] # tensorflow: weights are [height, width, in_channels, out_channels] # weight tf var kernels = utils.get_variable(np.transpose(kernels, (1, 0, 2, 3)), name=name + "_w") # bias tf var bias = utils.get_variable(bias.reshape(-1), name=name + "_b") # the output tf layer node current = utils.conv2d_basic(current, kernels, bias) elif kind == 'relu': current = tf.nn.relu(current, name=name) if FLAGS.debug: utils.add_activation_summary(current) elif kind == 'pool': # average pooling is this correct????? current = utils.avg_pool_2x2(current) net[name] = current return net
def vgg_net(weights, image): ## fcn的前五层网络就是vgg网络 layers = ('conv1_1', 'relu1_1', 'conv1_2', 'relu1_2', 'pool1', 'conv2_1', 'relu2_1', 'conv2_2', 'relu2_2', 'pool2', 'conv3_1', 'relu3_1', 'conv3_2', 'relu3_2', 'conv3_3', 'relu3_3', 'conv3_4', 'relu3_4', 'pool3', 'conv4_1', 'relu4_1', 'conv4_2', 'relu4_2', 'conv4_3', 'relu4_3', 'conv4_4', 'relu4_4', 'pool4', 'conv5_1', 'relu5_1', 'conv5_2', 'relu5_2', 'conv5_3', 'relu5_3', 'conv5_4', 'relu5_4') net = {} #字典 current = image for i, name in enumerate(layers): kind = name[:4] if kind == 'conv': kernels, bias = weights[i][0][0][0][0] # matconvnet: weights are [width, height, in_channels, out_channels] # tensorflow: weights are [height, width, in_channels, out_channels] kernels = utils.get_variable(np.transpose(kernels, (1, 0, 2, 3)), name=name + "_w") bias = utils.get_variable(bias.reshape(-1), name=name + "_b") current = utils.conv2d_basic(current, kernels, bias) elif kind == 'relu': current = tf.nn.relu(current, name=name) if FLAGS.debug: utils.add_activation_summary(current) elif kind == 'pool': ## vgg 的前5层的stride都是2,也就是前5层的size依次减小1倍 ## 这里处理了前4层的stride,用的是平均池化 ## 第5层的pool在下文的外部处理了,用的是最大池化 ## pool1 size缩小2倍 ## pool2 size缩小4倍 ## pool3 size缩小8倍 ## pool4 size缩小16倍 current = utils.avg_pool_2x2(current) ## 平均池化 net[name] = current return net ## vgg每层的结果都保存再net中了
def vgg_net(weights, image): # 首先我们定义FCN16S中使用VGG16层中的名字,用来生成相同的网络 layers = ("conv1_1", "relu1_1", "conv1_2", "relu1_2", "pool1", "conv2_1", "relu2_1", "conv2_2", "relu2_2", "pool2", "conv3_1", "relu3_1", "conv3_2", "relu3_2", "conv3_3", "relu3_3", "pool3", "conv4_1", "relu4_1", "conv4_2", "relu4_2", "conv4_3", "relu4_3", "pool4", "conv5_1", "relu5_1", "conv5_2", "relu5_2", "conv5_3", "relu5_3", "pool5") # 生成的公有层的所有接口 net = {} # 当前输入 current = image for i, name in enumerate(layers): # 获取前面层名字的前四个字符 kind = name[:4] if kind == "conv": kernels = weights[i][0][0][0][0][0] bias = weights[i][0][0][0][0][1] print(weights[i][0][0][0][0][0].shape) print(weights[i][0][0][0][0][1].shape) # matconvnet: weights are [width, height, in_channels, out_channels] # tensorflow: weights are [height, width, in_channels, out_channels] # 生成变量 kernels = utils.get_variable(np.transpose(kernels, (1, 0, 2, 3)), name=name + "_w") bias = utils.get_variable(bias.reshape(-1), name=name + "_b") current = utils.conv2d_basic(current, kernels, bias) elif kind == "relu": current = tf.nn.relu(current, name=name) if FLAGS.debug: utils.add_activation_summary(current) elif kind == "pool": current = utils.max_pool_2x2(current) net[name] = current return net
def encoder_fc(images): with tf.variable_scope("encoder") as scope: W_fc1 = utils.weight_variable([IMAGE_SIZE * IMAGE_SIZE, 50], name="W_fc1") b_fc1 = utils.bias_variable([50], name="b_fc1") h_relu1 = activation_function(tf.matmul(images, W_fc1) + b_fc1, name="hfc_1") W_fc2 = utils.weight_variable([50, 50], name="W_fc2") b_fc2 = utils.bias_variable([50], name="b_fc2") h_relu2 = activation_function(tf.matmul(h_relu1, W_fc2) + b_fc2, name="hfc_2") W_fc3 = utils.weight_variable([50, FLAGS.z_dim], name="W_fc3") b_fc3 = utils.bias_variable([FLAGS.z_dim], name="b_fc3") mu = tf.add(tf.matmul(h_relu2, W_fc3), b_fc3, name="mu") utils.add_activation_summary(mu) W_fc4 = utils.weight_variable([50, FLAGS.z_dim], name="W_fc4") b_fc4 = utils.bias_variable([FLAGS.z_dim], name="b_fc4") log_var = tf.add(tf.matmul(h_relu2, W_fc4), b_fc4, name="log_var") utils.add_activation_summary(log_var) return mu, log_var
def discriminator(input_images, train_mode): # dropout_prob = 1.0 # if train_mode: # dropout_prob = 0.5 W_conv0 = utils.weight_variable([5, 5, NUM_OF_CHANNELS, 64 * 1], name="W_conv0") b_conv0 = utils.bias_variable([64 * 1], name="b_conv0") h_conv0 = utils.conv2d_strided(input_images, W_conv0, b_conv0) h_bn0 = h_conv0 # utils.batch_norm(h_conv0, 64 * 1, train_mode, scope="disc_bn0") h_relu0 = utils.leaky_relu(h_bn0, 0.2, name="h_relu0") utils.add_activation_summary(h_relu0) W_conv1 = utils.weight_variable([5, 5, 64 * 1, 64 * 2], name="W_conv1") b_conv1 = utils.bias_variable([64 * 2], name="b_conv1") h_conv1 = utils.conv2d_strided(h_relu0, W_conv1, b_conv1) h_bn1 = utils.batch_norm(h_conv1, 64 * 2, train_mode, scope="disc_bn1") h_relu1 = utils.leaky_relu(h_bn1, 0.2, name="h_relu1") utils.add_activation_summary(h_relu1) W_conv2 = utils.weight_variable([5, 5, 64 * 2, 64 * 4], name="W_conv2") b_conv2 = utils.bias_variable([64 * 4], name="b_conv2") h_conv2 = utils.conv2d_strided(h_relu1, W_conv2, b_conv2) h_bn2 = utils.batch_norm(h_conv2, 64 * 4, train_mode, scope="disc_bn2") h_relu2 = utils.leaky_relu(h_bn2, 0.2, name="h_relu2") utils.add_activation_summary(h_relu2) W_conv3 = utils.weight_variable([5, 5, 64 * 4, 64 * 8], name="W_conv3") b_conv3 = utils.bias_variable([64 * 8], name="b_conv3") h_conv3 = utils.conv2d_strided(h_relu2, W_conv3, b_conv3) h_bn3 = utils.batch_norm(h_conv3, 64 * 8, train_mode, scope="disc_bn3") h_relu3 = utils.leaky_relu(h_bn3, 0.2, name="h_relu3") utils.add_activation_summary(h_relu3) shape = h_relu3.get_shape().as_list() h_3 = tf.reshape( h_relu3, [FLAGS.batch_size, (IMAGE_SIZE // 16) * (IMAGE_SIZE // 16) * shape[3]]) W_4 = utils.weight_variable([h_3.get_shape().as_list()[1], 1], name="W_4") b_4 = utils.bias_variable([1], name="b_4") h_4 = tf.matmul(h_3, W_4) + b_4 return tf.nn.sigmoid(h_4), h_4, h_relu3
def discriminator(input_images, train_mode): # dropout_prob = 1.0 # if train_mode: # dropout_prob = 0.5 W_conv0 = utils.weight_variable([5, 5, NUM_OF_CHANNELS, 64 * 1], name="W_conv0") b_conv0 = utils.bias_variable([64 * 1], name="b_conv0") h_conv0 = utils.conv2d_strided(input_images, W_conv0, b_conv0) h_bn0 = h_conv0 # utils.batch_norm(h_conv0, 64 * 1, train_mode, scope="disc_bn0") h_relu0 = utils.leaky_relu(h_bn0, 0.2, name="h_relu0") utils.add_activation_summary(h_relu0) W_conv1 = utils.weight_variable([5, 5, 64 * 1, 64 * 2], name="W_conv1") b_conv1 = utils.bias_variable([64 * 2], name="b_conv1") h_conv1 = utils.conv2d_strided(h_relu0, W_conv1, b_conv1) h_bn1 = utils.batch_norm(h_conv1, 64 * 2, train_mode, scope="disc_bn1") h_relu1 = utils.leaky_relu(h_bn1, 0.2, name="h_relu1") utils.add_activation_summary(h_relu1) W_conv2 = utils.weight_variable([5, 5, 64 * 2, 64 * 4], name="W_conv2") b_conv2 = utils.bias_variable([64 * 4], name="b_conv2") h_conv2 = utils.conv2d_strided(h_relu1, W_conv2, b_conv2) h_bn2 = utils.batch_norm(h_conv2, 64 * 4, train_mode, scope="disc_bn2") h_relu2 = utils.leaky_relu(h_bn2, 0.2, name="h_relu2") utils.add_activation_summary(h_relu2) W_conv3 = utils.weight_variable([5, 5, 64 * 4, 64 * 8], name="W_conv3") b_conv3 = utils.bias_variable([64 * 8], name="b_conv3") h_conv3 = utils.conv2d_strided(h_relu2, W_conv3, b_conv3) h_bn3 = utils.batch_norm(h_conv3, 64 * 8, train_mode, scope="disc_bn3") h_relu3 = utils.leaky_relu(h_bn3, 0.2, name="h_relu3") utils.add_activation_summary(h_relu3) shape = h_relu3.get_shape().as_list() h_3 = tf.reshape(h_relu3, [FLAGS.batch_size, (IMAGE_SIZE // 16) * (IMAGE_SIZE // 16) * shape[3]]) W_4 = utils.weight_variable([h_3.get_shape().as_list()[1], 1], name="W_4") b_4 = utils.bias_variable([1], name="b_4") h_4 = tf.matmul(h_3, W_4) + b_4 return tf.nn.sigmoid(h_4), h_4, h_relu3
def region_pooling(birdview_feat, frontview_feat, rgbview_feat, birdview_rois, frontview_rois, rgbview_rois, birdview_rois_ind, frontview_rois_ind, rgbview_rois_ind, ROI_H, ROI_W, debug): # dynamic region pooling birdview_channel = birdview_feat.get_shape().as_list()[3] frontview_channel = frontview_feat.get_shape().as_list()[3] rgbview_channel = rgbview_feat.get_shape().as_list()[3] birdview_region_list = [] frontview_region_list = [] rgbview_region_list = [] birdview_pooling_ROI = tf.image.crop_and_resize(birdview_feat, birdview_rois, birdview_rois_ind, [ROI_H, ROI_W], name = 'birdview_pooling_ROI') frontview_pooling_ROI = tf.image.crop_and_resize(frontview_feat, frontview_rois, frontview_rois_ind, [ROI_H, ROI_W], name = 'frontview_pooling_ROI') rgbview_pooling_ROI = tf.image.crop_and_resize(rgbview_feat, rgbview_rois, rgbview_rois_ind, [ROI_H, ROI_W], name = 'rgbview_pooling_ROI') if debug: utils.add_activation_summary(birdview_pooling_ROI) utils.add_activation_summary(frontview_pooling_ROI) utils.add_activation_summary(rgbview_pooling_ROI) return birdview_pooling_ROI, frontview_pooling_ROI, rgbview_pooling_ROI
def inference(image, keep_prob): """ Semantic segmentation network definition :param image: input image. Should have values in range 0-255 :param keep_prob: :return: """ print("setting up vgg initialized conv layers ...") model_data = utils.get_model_data(FLAGS.model_dir, MODEL_URL) mean = model_data['normalization'][0][0][0] mean_pixel = np.mean(mean, axis=(0, 1)) weights = np.squeeze(model_data['layers']) #processed_image = utils.process_image(image, mean_pixel) with tf.variable_scope("inference"): image_net = vgg_net(weights, image) conv_final_layer = image_net["conv5_3"] pool5 = utils.max_pool_2x2(conv_final_layer) W6 = utils.weight_variable([7, 7, 512, 4096], name="W6") b6 = utils.bias_variable([4096], name="b6") conv6 = utils.conv2d_basic(pool5, W6, b6) relu6 = tf.nn.relu(conv6, name="relu6") if FLAGS.debug: utils.add_activation_summary(relu6) relu_dropout6 = tf.nn.dropout(relu6, keep_prob=keep_prob) W7 = utils.weight_variable([1, 1, 4096, 4096], name="W7") b7 = utils.bias_variable([4096], name="b7") conv7 = utils.conv2d_basic(relu_dropout6, W7, b7) relu7 = tf.nn.relu(conv7, name="relu7") if FLAGS.debug: utils.add_activation_summary(relu7) relu_dropout7 = tf.nn.dropout(relu7, keep_prob=keep_prob) W8 = utils.weight_variable([1, 1, 4096, NUM_OF_CLASSESS], name="W8") b8 = utils.bias_variable([NUM_OF_CLASSESS], name="b8") conv8 = utils.conv2d_basic(relu_dropout7, W8, b8) # annotation_pred1 = tf.argmax(conv8, dimension=3, name="prediction1") # now to upscale to actual image size deconv_shape1 = image_net["pool4"].get_shape() W_t1 = utils.weight_variable([4, 4, deconv_shape1[3].value, NUM_OF_CLASSESS], name="W_t1") b_t1 = utils.bias_variable([deconv_shape1[3].value], name="b_t1") conv_t1 = utils.conv2d_transpose_strided(conv8, W_t1, b_t1, output_shape=tf.shape(image_net["pool4"])) fuse_1 = tf.add(conv_t1, image_net["pool4"], name="fuse_1") deconv_shape2 = image_net["pool3"].get_shape() W_t2 = utils.weight_variable([4, 4, deconv_shape2[3].value, deconv_shape1[3].value], name="W_t2") b_t2 = utils.bias_variable([deconv_shape2[3].value], name="b_t2") conv_t2 = utils.conv2d_transpose_strided(fuse_1, W_t2, b_t2, output_shape=tf.shape(image_net["pool3"])) fuse_2 = tf.add(conv_t2, image_net["pool3"], name="fuse_2") shape = tf.shape(image) deconv_shape3 = tf.stack([shape[0], shape[1], shape[2], NUM_OF_CLASSESS]) W_t3 = utils.weight_variable([16, 16, NUM_OF_CLASSESS, deconv_shape2[3].value], name="W_t3") b_t3 = utils.bias_variable([NUM_OF_CLASSESS], name="b_t3") conv_t3 = utils.conv2d_transpose_strided(fuse_2, W_t3, b_t3, output_shape=deconv_shape3, stride=8) annotation_pred = tf.argmax(conv_t3, dimension=3, name="prediction") return tf.expand_dims(annotation_pred, dim=3), conv_t3
def main(argv=None): print("Setting up image reader...") train_images, valid_images, test_images = flowers.read_dataset(FLAGS.data_dir) # image_options = {"crop": True, "crop_size": MODEL_IMAGE_SIZE, "resize": True, "resize_size": IMAGE_SIZE} # dataset_reader = dataset.BatchDatset(train_images, image_options) # images = tf.placeholder(tf.float32, [FLAGS.batch_size, IMAGE_SIZE, IMAGE_SIZE, NUM_OF_CHANNELS]) filename_queue = tf.train.string_input_producer(train_images) images = read_input_queue(filename_queue) train_phase = tf.placeholder(tf.bool) z_vec = tf.placeholder(tf.float32, [None, FLAGS.z_dim], name="z") print("Setting up network model...") tf.histogram_summary("z", z_vec) tf.image_summary("image_real", images, max_images=1) gen_images = generator(z_vec, train_phase) tf.image_summary("image_generated", gen_images, max_images=3) with tf.variable_scope("discriminator") as scope: discriminator_real_prob, logits_real, feature_real = discriminator(images, train_phase) utils.add_activation_summary(tf.identity(discriminator_real_prob, name='disc_real_prob')) scope.reuse_variables() discriminator_fake_prob, logits_fake, feature_fake = discriminator(gen_images, train_phase) utils.add_activation_summary(tf.identity(discriminator_fake_prob, name='disc_fake_prob')) discriminator_loss_real = tf.reduce_mean( tf.nn.sigmoid_cross_entropy_with_logits(logits_real, tf.ones_like(logits_real))) discrimintator_loss_fake = tf.reduce_mean( tf.nn.sigmoid_cross_entropy_with_logits(logits_fake, tf.zeros_like(logits_fake))) discriminator_loss = discrimintator_loss_fake + discriminator_loss_real gen_loss_1 = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits_fake, tf.ones_like(logits_fake))) gen_loss_2 = tf.reduce_mean(tf.nn.l2_loss(feature_real - feature_fake)) / (IMAGE_SIZE * IMAGE_SIZE) gen_loss = gen_loss_1 + 0.1 * gen_loss_2 tf.scalar_summary("Discriminator_loss_real", discriminator_loss_real) tf.scalar_summary("Discrimintator_loss_fake", discrimintator_loss_fake) tf.scalar_summary("Discriminator_loss", discriminator_loss) tf.scalar_summary("Generator_loss", gen_loss) train_variables = tf.trainable_variables() generator_variables = [v for v in train_variables if v.name.startswith("generator")] # print(map(lambda x: x.op.name, generator_variables)) discriminator_variables = [v for v in train_variables if v.name.startswith("discriminator")] # print(map(lambda x: x.op.name, discriminator_variables)) generator_train_op = train(gen_loss, generator_variables) discriminator_train_op = train(discriminator_loss, discriminator_variables) for v in train_variables: utils.add_to_regularization_and_summary(var=v) def visualize(): count = 20 z_feed = np.random.uniform(-1.0, 1.0, size=(count, FLAGS.z_dim)).astype(np.float32) # z_feed = np.tile(np.random.uniform(-1.0, 1.0, size=(1, FLAGS.z_dim)).astype(np.float32), (count, 1)) # z_feed[:, 25] = sorted(10.0 * np.random.randn(count)) image = sess.run(gen_images, feed_dict={z_vec: z_feed, train_phase: False}) for iii in xrange(count): print(image.shape) utils.save_image(image[iii, :, :, :], IMAGE_SIZE, FLAGS.logs_dir, name=str(iii)) print("Saving image" + str(iii)) sess = tf.Session() summary_op = tf.merge_all_summaries() saver = tf.train.Saver() summary_writer = tf.train.SummaryWriter(FLAGS.logs_dir, sess.graph) sess.run(tf.initialize_all_variables()) ckpt = tf.train.get_checkpoint_state(FLAGS.logs_dir) if ckpt and ckpt.model_checkpoint_path: saver.restore(sess, ckpt.model_checkpoint_path) print("Model restored...") visualize() return coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(sess, coord) try: for itr in xrange(MAX_ITERATIONS): batch_z = np.random.uniform(-1.0, 1.0, size=[FLAGS.batch_size, FLAGS.z_dim]).astype(np.float32) # feed_dict = {images: dataset_reader.next_batch(FLAGS.batch_size), z_vec: batch_z, train_phase: True} feed_dict = {z_vec: batch_z, train_phase: True} sess.run(discriminator_train_op, feed_dict=feed_dict) sess.run(generator_train_op, feed_dict=feed_dict) sess.run(generator_train_op, feed_dict=feed_dict) if itr % 10 == 0: g_loss_val, d_loss_val, summary_str = sess.run([gen_loss, discriminator_loss, summary_op], feed_dict=feed_dict) print("Step: %d, generator loss: %g, discriminator_loss: %g" % (itr, g_loss_val, d_loss_val)) summary_writer.add_summary(summary_str, itr) if itr % 500 == 0: saver.save(sess, FLAGS.logs_dir + "model.ckpt", global_step=itr) except tf.errors.OutOfRangeError: print('Done training -- epoch limit reached') except KeyboardInterrupt: print("Ending Training...") finally: coord.request_stop() # Wait for threads to finish. coord.join(threads)
def autoencorder_antn(image, n_class, filter_size, num_of_feature, num_of_layers, keep_prob, debug, restore=False, shared_weights=None, M_weights=None, AE_weights=None): stddev = 0.02 channels = image.get_shape().as_list()[-1] with tf.name_scope("shared-network"): name = 'shared' inter_feat, shared_variables, layer_id, weight_id = unet_downsample( image, filter_size, num_of_feature, num_of_layers, keep_prob, name, debug, restore, shared_weights) with tf.name_scope("main-network"): name = 'main-network' M_feat, M_variables, M_layer_id, M_weight_id = unet_upsample( image, inter_feat, shared_variables, layer_id, weight_id, filter_size, num_of_feature, num_of_layers, keep_prob, name, debug, restore, M_weights) w_name = name + '_final_layer_' + str(M_layer_id) + '_w' b_name = name + '_final_layer_' + str(M_layer_id) + '_b' relu_name = name + '_final_layer_' + str(M_layer_id) + '_feat' if restore == True: w = utils.get_variable(M_weights[M_weight_id], w_name) M_weight_id += 1 b = utils.get_variable(M_weights[M_weight_id], b_name) M_weight_id += 1 else: w = utils.weight_variable([1, 1, num_of_feature, n_class], stddev, w_name) M_weight_id += 1 b = utils.bias_variable([n_class], b_name) M_weight_id += 1 y_conv = utils.conv2d_basic(M_feat, w, b, keep_prob) y_conv_relu = tf.nn.relu(y_conv) clean_y_out = tf.reshape( tf.nn.softmax(tf.reshape(y_conv, [-1, n_class])), tf.shape(y_conv), 'segmentation_map') M_variables.extend((w, b)) if debug: utils.add_activation_summary(clean_y_out) utils.add_to_image_summary(clean_y_out) M_layer_id += 1 with tf.name_scope("auto-encoder"): name = 'auto-encoder' # AE_conv, AE_variables, AE_layer_id, AE_weight_id = unet_upsample(image, inter_feat, shared_variables, layer_id, weight_id, filter_size, # num_of_feature, num_of_layers, keep_prob, name, debug, # restore, weights) w_name = name + '_final_layer_' + str(M_layer_id) + '_w' b_name = name + '_final_layer_' + str(M_layer_id) + '_b' relu_name = name + '_final_layer_' + str(M_layer_id) + '_feat' # contrating layer of main network as input # if restore == True: # w = utils.get_variable(weights[AE_weight_id], w_name) # AE_weight_id += 1 # b = utils.get_variable(weights[AE_weight_id], b_name) # AE_weight_id += 1 # else: # w = utils.weight_variable([1, 1, num_of_feature, channels], stddev, w_name) # AE_weight_id+=1 # b = utils.bias_variable([channels], b_name) # AE_weight_id+=1 # AE_feat = tf.nn.relu(utils.conv2d_basic(AE_conv, w, b, keep_prob), relu_name) # AE_variables.extend((w, b)) # AE_layer_id+=1 # last layer of main network as input AE_variables = [] w = utils.weight_variable([1, 1, num_of_feature, channels], stddev, w_name) b = utils.bias_variable([channels], b_name) AE_feat = tf.nn.relu(utils.conv2d_basic(M_feat, w, b, keep_prob), relu_name) AE_variables.extend((w, b)) if debug: utils.add_activation_summary(AE_feat) utils.add_to_image_summary( utils.get_image_summary(AE_feat, relu_name + '_image')) with tf.name_scope("trans-layer"): # trans_variables = [] name = 'trans_layer' # wd_name = name + str(layer_id) + '_w' # bd_name = name + str(layer_id) + '_b' # features = 2 ** (num_of_layers - 1) * num_of_feature # wd = utils.weight_variable([2, 2, n_class * n_class, features], stddev, wd_name) # bd = utils.bias_variable([features//2], bd_name) # output_shape = [tf.shape(inter_feat)[0], tf.shape(inter_feat)[1] * 4, tf.shape(inter_feat)[2] * 4, n_class * n_class] # tran_y_feat = tf.nn.relu(utils.conv2d_transpose_strided(in_node, wd, bd, output_shape=output_shape, keep_prob = keep_prob)) # trans_variables.extend((wd, bd)) tran_y_feat = _trans_layer( inter_feat[0], n_class * n_class, [image.get_shape().as_list()[1], image.get_shape().as_list()[2]]) class_tran_y_out = [] for i in range(n_class): class_tran_y_out.append( tf.reshape(tf.nn.softmax( tf.reshape( tran_y_feat[:, :, :, i * n_class:(i * n_class + n_class)], [-1, n_class])), tf.shape(clean_y_out), name='tran_map' + str(i))) tran_map = tf.concat(class_tran_y_out, 3, name='tran_map') if debug: for i in range(n_class): for j in range(n_class): # utils.add_activation_summary(utils.get_image_summary(tran_map, str(i) + '_to_'+ str(j), i * n_class + j)) utils.add_activation_summary(tran_map[:, :, :, i * n_class + j]) utils.add_to_image_summary( utils.get_image_summary(tran_map, str(i) + '_to_' + str(j), i * n_class + j)) with tf.name_scope("noisy-map-layer"): noise_y_out = tf.reshape(tf.matmul( tf.reshape(clean_y_out, [-1, 1, n_class]), tf.reshape(tran_map, [-1, n_class, n_class])), tf.shape(clean_y_out), name='noise_output') # summary if debug: utils.add_activation_summary(noise_y_out) utils.add_to_image_summary(noise_y_out) return noise_y_out, clean_y_out, y_conv, tran_y_feat, tran_map, AE_feat, shared_variables, AE_variables, M_variables, inter_feat
def unet_upsample(image, dw_h_convs, variables, layer_id, weight_id, filter_size, num_of_feature, num_of_layers, keep_prob, name, debug, restore=False, weights=None): new_variables = [] in_node = dw_h_convs[num_of_layers - 1] # upsample layer for layer in range(num_of_layers - 2, -1, -1): features = 2**(layer + 1) * num_of_feature stddev = 0.02 wd_name = name + '_layer_up' + str(layer_id) + '_w' bd_name = name + '_layer_up' + str(layer_id) + '_b' w1_name = name + '_layer_up_conv' + str(layer_id) + '_w0' w2_name = name + '_layer_up_conv' + str(layer_id) + '_w1' b1_name = name + '_layer_up_conv' + str(layer_id) + '_b0' b2_name = name + '_layer_up_conv' + str(layer_id) + '_b1' relu_name = name + '_layer_up_conv' + str(layer_id) + '_feat' # pooling size is 2 if restore == True: wd = utils.get_variable(weights[weight_id], wd_name) weight_id += 1 bd = utils.get_variable(weights[weight_id], bd_name) weight_id += 1 w1 = utils.get_variable(weights[weight_id], w1_name) weight_id += 1 w2 = utils.get_variable(weights[weight_id], w2_name) weight_id += 1 b1 = utils.get_variable(weights[weight_id], b1_name) weight_id += 1 b2 = utils.get_variable(weights[weight_id], b2_name) weight_id += 1 else: wd = utils.weight_variable([2, 2, features // 2, features], stddev, wd_name) bd = utils.bias_variable([features // 2], bd_name) w1 = utils.weight_variable( [filter_size, filter_size, features, features // 2], stddev, w1_name) w2 = utils.weight_variable( [filter_size, filter_size, features // 2, features // 2], stddev, w2_name) b1 = utils.bias_variable([features // 2], b1_name) b2 = utils.bias_variable([features // 2], b2_name) h_deconv = tf.nn.relu( utils.conv2d_transpose_strided(in_node, wd, bd, keep_prob=keep_prob)) h_deconv_concat = utils.crop_and_concat(dw_h_convs[layer], h_deconv) conv1 = utils.conv2d_basic(h_deconv_concat, w1, b1, keep_prob) h_conv = tf.nn.relu(conv1) conv2 = utils.conv2d_basic(h_conv, w2, b2, keep_prob) in_node = tf.nn.relu(conv2, relu_name) if debug: utils.add_activation_summary(in_node) utils.add_to_image_summary( utils.get_image_summary(in_node, relu_name + '_image')) new_variables.extend((wd, bd, w1, w2, b1, b2)) layer_id += 1 return in_node, new_variables, layer_id, weight_id
def NTN(image, n_class, filter_size, num_of_feature, num_of_layers, keep_prob, name, debug, restore=False, weights=None, Unsupervised=False): with tf.name_scope("u-net"): y_conv, variables, layer_id, dw_h_convs = unet( image, n_class, filter_size, num_of_feature, num_of_layers, keep_prob, name, debug, restore, weights) clean_y_out = tf.reshape( tf.nn.softmax(tf.reshape(y_conv, [-1, n_class])), tf.shape(y_conv), 'segmentation_map') #softmax y output # summary if debug: utils.add_activation_summary(clean_y_out) utils.add_to_image_summary(clean_y_out) with tf.name_scope("trans-layer"): if Unsupervised == False: w = utils.get_variable( np.reshape(np.eye(n_class), [1, 1, n_class, n_class]), 'trans-prob-weight') else: w = utils.weight_constant( np.reshape(np.ones( (n_class, n_class)), [1, 1, n_class, n_class]) * 1. / n_class, 'trans-prob-weight') TransProbVar = [] noise_y_out = tf.nn.conv2d(clean_y_out, w, strides=[1, 1, 1, 1], padding="SAME", name="NoisySegMap") TransProbVar.append(w) if debug: utils.add_activation_summary(noise_y_out) utils.add_to_image_summary(noise_y_out) with tf.name_scope("MapTransProb"): if Unsupervised == False: MIN = tf.zeros([n_class, n_class], dtype=tf.float32) MAX = tf.ones([n_class, n_class], dtype=tf.float32) I_MIN = tf.maximum(w[0, 0], MIN, name="MAXIMUM") I_MAX = tf.minimum(I_MIN, MAX, name="MINIMUM") B = tf.reshape( I_MAX / tf.reshape((tf.reduce_sum(I_MAX, 1)), [n_class, 1]), [1, 1, n_class, n_class]) MapTransProb = tf.assign(w, B) else: MapTransProb = None return noise_y_out, clean_y_out, MapTransProb, variables, TransProbVar, dw_h_convs
def unet_downsample(image, filter_size, num_of_feature, num_of_layers, keep_prob, name, debug, restore=False, weights=None): channels = image.get_shape().as_list()[-1] dw_h_convs = {} variables = [] pools = {} in_node = image # downsample layer layer_id = 0 weight_id = 0 for layer in range(0, num_of_layers): features = 2**layer * num_of_feature stddev = 0.02 w1_name = name + '_layer_' + str(layer_id) + '_w_0' w2_name = name + '_layer_' + str(layer_id) + '_w_1' b1_name = name + '_layer_' + str(layer_id) + '_b_0' b2_name = name + '_layer_' + str(layer_id) + '_b_1' relu_name = name + '_layer_' + str(layer_id) + '_feat' if layer == 0: if restore == True: w1 = utils.get_variable(weights[weight_id], w1_name) weight_id += 1 else: w1 = utils.weight_variable( [filter_size, filter_size, channels, features], stddev, w1_name) else: if restore == True: w1 = utils.get_variable(weights[weight_id], w1_name) weight_id += 1 else: w1 = utils.weight_variable( [filter_size, filter_size, features // 2, features], stddev, w1_name) if restore == True: w2 = utils.get_variable(weights[weight_id], w2_name) weight_id += 1 b1 = utils.get_variable(weights[weight_id], b1_name) weight_id += 1 b2 = utils.get_variable(weights[weight_id], b2_name) weight_id += 1 else: w2 = utils.weight_variable( [filter_size, filter_size, features, features], stddev, w2_name) b1 = utils.bias_variable([features], b1_name) b2 = utils.bias_variable([features], b2_name) conv1 = utils.conv2d_basic(in_node, w1, b1, keep_prob) tmp_h_conv = tf.nn.relu(conv1) conv2 = utils.conv2d_basic(tmp_h_conv, w2, b2, keep_prob) dw_h_convs[layer] = tf.nn.relu(conv2, relu_name) if layer < num_of_layers - 1: pools[layer] = utils.max_pool_2x2(dw_h_convs[layer]) in_node = pools[layer] if debug: utils.add_activation_summary(dw_h_convs[layer]) utils.add_to_image_summary( utils.get_image_summary(dw_h_convs[layer], relu_name + '_image')) variables.extend((w1, w2, b1, b2)) layer_id += 1 return dw_h_convs, variables, layer_id, weight_id