def des(): # Function to nicely print segmentation results with # colorbar showing class names def discrete_matshow(data, labels_names=[], title=""): fig_size = [7, 6] plt.rcParams["figure.figsize"] = fig_size # get discrete colormap cmap = plt.get_cmap('Paired', np.max(data) - np.min(data) + 1) # set limits .5 outside true range mat = plt.matshow(data, cmap=cmap, vmin=np.min(data) - .5, vmax=np.max(data) + .5) # tell the colorbar to tick at integers cax = plt.colorbar(mat, ticks=np.arange(np.min(data), np.max(data) + 1)) # The names to be printed aside the colorbar if labels_names: cax.ax.set_yticklabels(labels_names) if title: plt.suptitle(title, fontsize=15, fontweight='bold') plt.show() with tf.Graph().as_default(): url = ("https://upload.wikimedia.org/wikipedia/commons/d/d9/" "First_Student_IC_school_bus_202076.jpg") image_string = urllib2.urlopen(url).read() image = tf.image.decode_jpeg(image_string, channels=3) # Convert image to float32 before subtracting the # mean pixel value image_float = tf.to_float(image, name='ToFloat') # Subtract the mean pixel value from each pixel processed_image = _mean_image_subtraction(image_float, [_R_MEAN, _G_MEAN, _B_MEAN]) input_image = tf.expand_dims(processed_image, 0) with slim.arg_scope(vgg.vgg_arg_scope()): # spatial_squeeze option enables to use network in a fully # convolutional manner logits, _ = vgg.vgg_16(input_image, num_classes=1000, is_training=False, spatial_squeeze=False) # For each pixel we get predictions for each class # out of 1000. We need to pick the one with the highest # probability. To be more precise, these are not probabilities, # because we didn't apply softmax. But if we pick a class # with the highest value it will be equivalent to picking # the highest value after applying softmax print logits pred = tf.argmax(logits, axis=3) # pred = tf.expand_dims(pred,axis=-1) # pred = slim.conv2d_transpose(pred,num_outputs=1,kernel_size=4,stride=2,weights_initializer=tf.constant_initializer(1)) print pred init_fn = slim.assign_from_checkpoint_fn( 'vgg_16.ckpt', slim.get_model_variables('vgg_16')) with tf.Session() as sess: sess.run(tf.global_variables_initializer()) init_fn(sess) segmentation, np_image, np_logits = sess.run([pred, image, logits]) # Remove the first empty dimension print segmentation print segmentation.shape print np_logits.shape segmentation = np.squeeze(segmentation) names = imagenet.create_readable_names_for_imagenet_labels() # Let's get unique predicted classes (from 0 to 1000) and # relable the original predictions so that classes are # numerated starting from zero unique_classes, relabeled_image = np.unique(segmentation, return_inverse=True) segmentation_size = segmentation.shape print segmentation_size relabeled_image = relabeled_image.reshape(segmentation_size) # Show the downloaded image plt.figure() plt.imshow(np_image.astype(np.uint8)) plt.suptitle("Input Image", fontsize=14, fontweight='bold') plt.axis('off') plt.show() discrete_matshow(data=relabeled_image, labels_names=[], title="Segmentation") upsampled_logits = upsample_tf(factor=32, input_img=np_logits.squeeze()) upsampled_predictions = upsampled_logits.squeeze().argmax(axis=2) unique_classes, relabeled_image = np.unique(upsampled_predictions, return_inverse=True) relabeled_image = relabeled_image.reshape(upsampled_predictions.shape) labels_names = [] for index, current_class_number in enumerate(unique_classes): labels_names.append(str(index) + ' ' + names[current_class_number + 1]) # Show the downloaded image plt.figure() plt.imshow(np_image.astype(np.uint8)) plt.suptitle("Input Image", fontsize=14, fontweight='bold') plt.axis('off') plt.show() discrete_matshow(data=relabeled_image, labels_names=labels_names, title="Segmentation")
mat = plt.matshow(img, cmap=cmap, vmin=minval - 0.5, vmax=maxval + 0.5) #定义colorbar cax = plt.colorbar(mat, ticks=np.arange(minval, maxval + 1), shrink=2) # 添加类别名称 if labels_str: cax.ax.set_yticklabels(labels_str) if title: plt.suptitle(title, fontsize=14, fontweight='bold') input_imgs = tf.placeholder("float", [None, None, 3]) # 每个像素减去像素的均值 processed_image = _mean_image_subtraction(input_imgs, [_R_MEAN, _G_MEAN, _B_MEAN]) input_image = tf.expand_dims(processed_image, 0) #print(input_image.shape) with slim.arg_scope( vgg.vgg_arg_scope()): # spatial_squeeze选项指定是否压缩结果的空间维度将不必要的空间维度删除 logits, _ = vgg.vgg_19(input_image, num_classes=1000, is_training=False, spatial_squeeze=False) pred = tf.argmax(logits, dimension=3) init_fn = slim.assign_from_checkpoint_fn( os.path.join(checkpoints_dir, 'vgg_19.ckpt'), slim.get_model_variables('vgg_19'))
url = ("https://upload.wikimedia.org/wikipedia/commons/d/d9/" "First_Student_IC_school_bus_202076.jpg") # Open specified url and load image as a string image_string = urllib2.urlopen(url).read() # Decode string into matrix with intensity values image = tf.image.decode_jpeg(image_string, channels=3) # Convert image to float32 before subtracting the # mean pixel value image_float = tf.to_float(image, name='ToFloat') # Subtract the mean pixel value from each pixel mean_centered_image = _mean_image_subtraction(image_float, [_R_MEAN, _G_MEAN, _B_MEAN]) processed_images = tf.expand_dims(mean_centered_image, 0) upsample_filter_tensor = tf.constant(upsample_filter_np) # Define the model that we want to use -- specify to use only two classes at the last layer with slim.arg_scope(vgg.vgg_arg_scope()): logits, end_points = vgg.vgg_16(processed_images, num_classes=2, is_training=False, spatial_squeeze=False, fc_conv_padding='SAME') downsampled_logits_shape = tf.shape(logits)
res = tf.nn.conv2d_transpose( expanded_img, upsample_filter_np, output_shape=[1, new_height, new_width, number_of_classes], strides=[1, factor, factor, 1]) res = sess.run(res) return np.squeeze(res) with tf.Graph().as_default(): image = tf.image.decode_jpeg(tf.read_file("apple.jpg"), channels=3) image = tf.image.resize_images(image, [224, 224]) # 减去均值之前,将像素值转为32位浮点 image_float = tf.to_float(image, name='ToFloat') # 每个像素减去像素的均值 processed_image = _mean_image_subtraction(image_float, [_R_MEAN, _G_MEAN, _B_MEAN]) input_image = tf.expand_dims(processed_image, 0) with slim.arg_scope(vgg.vgg_arg_scope()): logits, endpoints = vgg.vgg_16(input_image, num_classes=1000, is_training=False, spatial_squeeze=False) pred = tf.argmax(logits, dimension=3) #对输出层进行逐个比较,取得不同层同一位置中最大的概率所对应的值 init_fn = slim.assign_from_checkpoint_fn( os.path.join(checkpoints_path, 'vgg_16.ckpt'), slim.get_model_variables('vgg_16')) with tf.Session() as sess: init_fn(sess) fcn8s, fcn16s, fcn32s = sess.run([ endpoints["vgg_16/pool3"], endpoints["vgg_16/pool4"],