def run(): img_path = input('Input the path and image name:') img_ready = utils.load_image(img_path) fig = plt.figure(u"Top-5 预测结果") with tf.Session() as sess: images = tf.placeholder(tf.float32, [1, 224, 224, 3]) vgg = vgg16.Vgg16() vgg.forward(images) probability = sess.run(vgg.prob, feed_dict={images: img_ready}) top5 = np.argsort(probability[0])[-1:-6:-1] print("top5:", top5) values = [] bar_label = [] for n, i in enumerate(top5): print("n:", n) print("i:", i) values.append(probability[0][i]) bar_label.append(labels[i]) print(i, ":", labels[i], "----", utils.percent(probability[0][i])) ax = fig.add_subplot(111) ax.bar(range(len(values)), values, tick_label=bar_label, width=0.5, fc='g') ax.set_ylabel(u'probabilityit') ax.set_title(u'Top-5') for a, b in zip(range(len(values)), values): ax.text(a, b + 0.0005, utils.percent(b), ha='center', va='bottom', fontsize=7) plt.show()
def call_vgg_16(input_bgr): '''call for vgg_16 model args: input_bgr: bgr image [batch, height, width, 3] values scaled [-1, 1] return: f1, f2, f3, f4 ''' input_bgr_scaled = data_generator.Data_Generator.de_scale_img(input_bgr) input_bgr_224 = tf.image.resize_images(input_bgr_scaled, [224, 224]) vgg_16_model = vgg_16.Vgg16(vgg16_npy_path=flags.FLAGS.vgg_model_dir) vgg_16_model.build(input_bgr_224) f1, f2, f3, f4 = vgg_16_model.conv1_2, vgg_16_model.conv2_2, vgg_16_model.conv3_3, vgg_16_model.conv4_3 return f1, f2, f3, f4
def init_model(train=True): """ Init model for both training and testing. :param train: indicate if current is in training :return: all stuffs that need for this model """ # Create training summary folder if not exist create_folder("summary/train/images") # Create testing summary folder if not exist create_folder("summary/test/images") # Use gpu if exist with tf.device('/device:GPU:0'): # Init image data file path print("⏳ Init input file path...") if train: file_paths = init_file_path(training_dir) else: file_paths = init_file_path(testing_dir) # Init training flag and global step print("⏳ Init placeholder and variables...") is_training = tf.placeholder(tf.bool, name="is_training") global_step = tf.train.get_or_create_global_step() # Load vgg16 model print("🤖 Load vgg16 model...") vgg = vgg16.Vgg16() # Build residual encoder model print("🤖 Build residual encoder model...") residual_encoder = ResidualEncoder() # Get dataset iterator iterator = get_dataset_iterator(file_paths, batch_size, shuffle=True) # Get color image color_image_rgb = iterator.get_next(name="color_image_rgb") color_image_yuv = rgb_to_yuv(color_image_rgb, "color_image_yuv") # Get gray image gray_image_one_channel = tf.image.rgb_to_grayscale(color_image_rgb, name="gray_image_one_channel") gray_image_three_channels = tf.image.grayscale_to_rgb(gray_image_one_channel, name="gray_image_three_channels") gray_image_yuv = rgb_to_yuv(gray_image_three_channels, "gray_image_yuv") # Build vgg model with tf.name_scope("vgg16"): vgg.build(gray_image_three_channels) # Predict model predict = residual_encoder.build(input_data=gray_image_three_channels, vgg=vgg, is_training=is_training) predict_yuv = tf.concat(axis=3, values=[tf.slice(gray_image_yuv, [0, 0, 0, 0], [-1, -1, -1, 1], name="gray_image_y"), predict], name="predict_yuv") predict_rgb = yuv_to_rgb(predict_yuv, "predict_rgb") # Get loss loss = residual_encoder.get_loss(predict_val=predict, real_val=tf.slice(color_image_yuv, [0, 0, 0, 1], [-1, -1, -1, 2], name="color_image_uv")) # Prepare optimizer update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) with tf.control_dependencies(update_ops): optimizer = tf.train.AdamOptimizer().minimize(loss, global_step=global_step, name='optimizer') # Init tensorflow summaries print("⏳ Init tensorflow summaries...") tf.summary.histogram("loss", loss) tf.summary.image("gray_image", gray_image_three_channels, max_outputs=1) tf.summary.image("predict_image", predict_rgb, max_outputs=1) tf.summary.image("color_image", color_image_rgb, max_outputs=1) return is_training, global_step, optimizer, loss, predict_rgb, color_image_rgb, gray_image_three_channels, file_paths
grads = optimizer.compute_gradients(loss, var_list=var_list) for grad, var in grads: utils.add_gradient_summary(grad, var) return optimizer.apply_gradients(grads) with tf.name_scope("input"): x = tf.placeholder(tf.float32, [BATCH_SIZE, HEIGHT, WIDTH, 3], name='x_input') y = tf.placeholder(tf.float32, [BATCH_SIZE, HEIGHT, WIDTH,2], name='ground_truth') is_training = tf.placeholder(tf.bool, name="is_training") global_step = tf.train.get_or_create_global_step() # Load vgg16 model print("🤖 Load vgg16 model...") vgg = vgg16.Vgg16() # Build residual encoder model print("🤖 Build residual encoder model...") residual_decoder = ResidualDecoder() with tf.name_scope("vgg16"): vgg.build(x) logits = residual_decoder.build(input_data=x, vgg=vgg, is_training=is_training) with tf.name_scope("loss"): # Get loss loss = residual_decoder.get_loss(predict_val=logits, real_val=y) tf.summary.histogram("loss", loss)
# with tf.Session( # config=tf.ConfigProto(gpu_options=(tf.GPUOptions(per_process_gpu_memory_fraction=0.7)))) as sess: # images = tf.placeholder("float", [2, 224, 224, 3]) # feed_dict = {images: batch} # # vgg = vgg16.Vgg16() # with tf.name_scope("content_vgg"): # vgg.build(images) # # prob = sess.run(vgg.prob, feed_dict=feed_dict) # print(prob) # utils.print_prob(prob[0], './synset.txt') # utils.print_prob(prob[1], './synset.txt') with tf.device('/cpu:0'): with tf.Session() as sess: images = tf.placeholder("float", [2, 224, 224, 3]) feed_dict = {images: batch} vgg = vgg16.Vgg16() ##引用vgg16.py里面定义的Vgg16类 with tf.name_scope("content_vgg"): vgg.build(images) prob = sess.run( vgg.prob, feed_dict=feed_dict ) ### 在build()函数里面,属于成员变量,self.prob = tf.nn.softmax(self.fc8, name="prob") print(prob) ####打印的预测矩阵(2,1000)类似手写字识别: [1,0,0,0...]:表示0 utils.print_prob(prob[0], './synset.txt') utils.print_prob(prob[1], './synset.txt')
def init_model(train=True): # 初始化模型 """ 初始化模型 :param train: 指明是训练还是测试 :return: 返回这个模型所有所需要的东西 """ create_folder("summary/train/images") create_folder("summary/test/images") # 使用GPU加速 with tf.device('/device:GPU:0'): # 初始化图片数据路径 print("⏳ Init input file path...") if train: file_paths = init_file_path(training_dir) # 训练集的图片路径,返回的是所有图片的路径数组 else: # testing = input("测试集的路径:") file_paths = init_file_path(testing_dir) # 测试路径 # Init training flag and global step print("⏳ Init placeholder and variables...") is_training = tf.placeholder(tf.bool, name="is_training") global_step = tf.train.get_or_create_global_step() # Load vgg16 model print("🤖 Load vgg16 model...") vgg = vgg16.Vgg16() # Build residual encoder model print("🤖 Build residual encoder model...") residual_encoder = ResidualEncoder() # Get dataset iterator iterator = get_dataset_iterator(file_paths, batch_size, shuffle=True) # Get color image color_image_rgb = iterator.get_next( name="color_image_rgb") # 获取下一张彩色图片 color_image_yuv = rgb_to_yuv(color_image_rgb, "color_image_yuv") # 将获取的rgb图转换成yuv格式 # Get gray image gray_image_one_channel = tf.image.rgb_to_grayscale( color_image_rgb, name="gray_image_one_channel") # 获取灰度图片(单通道) # 由上一步得到的灰度图转换成rgb3通道格式 gray_image_three_channels = tf.image.grayscale_to_rgb( gray_image_one_channel, name="gray_image_three_channels") # 三通道的灰度图(rgb) gray_image_yuv = rgb_to_yuv(gray_image_three_channels, "gray_image_yuv") # 灰度图(rgb)转换yuv格式的灰度图像 # Build vgg model with tf.name_scope("vgg16"): vgg.build( gray_image_three_channels) #建立vgg模型,将三通道的灰度图输入到VGG网络中预测一些基本信息 # Predict model # 建立残差编码模型: input_data :给第一层输入的数据 vgg: vgg模型 is_training: 一个标志指示是否在训练 predict = residual_encoder.build(input_data=gray_image_three_channels, vgg=vgg, is_training=is_training) # 预测的u,v两个空间 predict_yuv = tf.concat(axis=3, values=[ tf.slice(gray_image_yuv, [0, 0, 0, 0], [-1, -1, -1, 1], name="gray_image_y"), predict ], name="predict_yuv") # 将y,u,v三个空间拼接 predict_rgb = yuv_to_rgb(predict_yuv, "predict_rgb") # Get loss # 预测出来的uv两个通道与真实的uv两个通道的loss loss = residual_encoder.get_loss( predict_val=predict, real_val=tf.slice(color_image_yuv, [0, 0, 0, 1], [-1, -1, -1, 2], name="color_image_uv")) # Prepare optimizer update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) with tf.control_dependencies(update_ops): # global_step记录的其实是train阶段每一步的索引,或者说是训练迭代的计数器,比如说在最后画loss和 accuracy的横坐标即是global_step lr = tf.train.exponential_decay(0.001, global_step, 1000, 0.96) optimizer = tf.train.AdamOptimizer(learning_rate=lr).minimize( loss, global_step=global_step, name='optimizer') #global_step在训练中是计数的作用,每训练一个batch就加1 # Init tensorflow summaries print("⏳ Init tensorflow summaries...") tf.summary.histogram("loss", loss) tf.summary.image("gray_image", gray_image_three_channels, max_outputs=1) tf.summary.image("predict_image", predict_rgb, max_outputs=1) tf.summary.image("color_image", color_image_rgb, max_outputs=1) return is_training, global_step, optimizer, loss, predict_rgb, color_image_rgb, gray_image_three_channels, file_paths
def init_model(train=True): """ Init model for both training and testing. :param train: indicate if current is in training :return: all stuffs that need for this model """ # Create training summary folder if not exist create_folder("summary/train/images") # Create testing summary folder if not exist create_folder("summary/test/images") # Use gpu if exist with tf.device('/device:GPU:0'): # Init image data file path testdir = input("输入图片的路径:") #file_paths = init_file_path(testing_dir) # Init training flag and global step print("⏳ Init placeholder and variables...") is_training = tf.placeholder(tf.bool, name="is_training") global_step = tf.train.get_or_create_global_step() # Load vgg16 model print("🤖 Load vgg16 model...") vgg = vgg16.Vgg16() # Build residual encoder model print("🤖 Build residual encoder model...") residual_encoder = ResidualEncoder() # Get dataset iterator #iterator = get_dataset_iterator(file_paths, batch_size, shuffle=True) # Get color image # color_image_rgb = iterator.get_next(name="color_image_rgb") # 获取下一张彩色图片 color_image_rgb = read_image(testdir) color_image_yuv = rgb_to_yuv(color_image_rgb, "color_image_yuv") # 将获取的rgb图转换成yuv格式 # Get gray image gray_image_one_channel = tf.image.rgb_to_grayscale( color_image_rgb, name="gray_image_one_channel") # 获取灰度图片 # 由上一步得到的灰度图转换成rgb3通道格式 gray_image_three_channels = tf.image.grayscale_to_rgb( gray_image_one_channel, name="gray_image_three_channels") # 三通道的灰度图 gray_image_yuv = rgb_to_yuv(gray_image_three_channels, "gray_image_yuv") # 灰度图转换yuv格式的灰度图像 # Build vgg model with tf.name_scope("vgg16"): vgg.build(gray_image_three_channels) #建立vgg模型 # Predict model # 建立残差编码模型: input_data :给第一层输入的数据 vgg: vgg模型 is_training: 一个标志指示是否在训练 predict = residual_encoder.build(input_data=gray_image_three_channels, vgg=vgg, is_training=is_training) # 预测的u,v两个空间 predict_yuv = tf.concat(axis=3, values=[ tf.slice(gray_image_yuv, [0, 0, 0, 0], [-1, -1, -1, 1], name="gray_image_y"), predict ], name="predict_yuv") # 将y,u,v三个空间拼接 predict_rgb = yuv_to_rgb(predict_yuv, "predict_rgb") # Get loss # 预测出来的uv两个通道与真实的uv两个通道的loss loss = residual_encoder.get_loss( predict_val=predict, real_val=tf.slice(color_image_yuv, [0, 0, 0, 1], [-1, -1, -1, 2], name="color_image_uv")) # Prepare optimizer update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) with tf.control_dependencies(update_ops): # global_step记录的其实是train阶段每一步的索引,或者说是训练迭代的计数器,比如说在最后画loss和 accuracy的横坐标即是global_step lr = tf.train.exponential_decay(0.001, global_step, 1000, 0.96) optimizer = tf.train.AdamOptimizer(learning_rate=lr).minimize( loss, global_step=global_step, name='optimizer') #global_step在训练中是计数的作用,每训练一个batch就加1 # Init tensorflow summaries print("⏳ Init tensorflow summaries...") tf.summary.histogram("loss", loss) tf.summary.image("gray_image", gray_image_three_channels, max_outputs=1) tf.summary.image("predict_image", predict_rgb, max_outputs=1) tf.summary.image("color_image", color_image_rgb, max_outputs=1) return is_training, global_step, optimizer, loss, predict_rgb, color_image_rgb, gray_image_three_channels, testdir
def init_model(train=True): """ Init model for both training and testing :param train: indicate if current is in training :return: all stuffs that need for this model """ # Create training summary folder if not exist create_folder("summary/train/images") # Create testing summary folder if not exist create_folder("summary/test/images") # Init image data file path print "Init file path" if train: file_paths = init_file_path(train_dir) else: file_paths = init_file_path(test_dir) # Init placeholder and global step print "Init placeholder" is_training = tf.placeholder(tf.bool, name="training_flag") global_step = tf.Variable(0, name='global_step', trainable=False) uv = tf.placeholder(tf.uint8, name='uv') # Init vgg16 model print "Init vgg16 model" vgg = vgg16.Vgg16() # Init residual encoder model print "Init residual encoder model" residual_encoder = ResidualEncoder() # Color image color_image_rgb = input_pipeline(file_paths, batch_size, test=not train) color_image_yuv = rgb_to_yuv(color_image_rgb, "rgb2yuv_for_color_image") # Gray image gray_image = tf.image.rgb_to_grayscale(color_image_rgb, name="gray_image") gray_image_rgb = tf.image.grayscale_to_rgb(gray_image, name="gray_image_rgb") gray_image_yuv = rgb_to_yuv(gray_image_rgb, "rgb2yuv_for_gray_image") gray_image = tf.concat(concat_dim=3, values=[gray_image, gray_image, gray_image], name="gray_image_input") # Build vgg model with tf.name_scope("content_vgg"): vgg.build(gray_image) # Predict model predict = residual_encoder.build(input_data=gray_image, vgg=vgg, is_training=is_training) predict_yuv = tf.concat(concat_dim=3, values=[ tf.slice(gray_image_yuv, [0, 0, 0, 0], [-1, -1, -1, 1], name="gray_image_y"), predict ], name="predict_yuv") predict_rgb = yuv_to_rgb(predict_yuv, "yuv2rgb_for_pred_image") # Cost cost = residual_encoder.get_cost(predict_val=predict, real_val=tf.slice(color_image_yuv, [0, 0, 0, 1], [-1, -1, -1, 2], name="color_image_uv")) u_channel_cost = tf.slice(cost, [0, 0, 0, 0], [-1, -1, -1, 1], name="u_channel_cost") v_channel_cost = tf.slice(cost, [0, 0, 0, 1], [-1, -1, -1, 1], name="v_channel_cost") cost = tf.case( { tf.equal(uv, 1): lambda: u_channel_cost, tf.equal(uv, 2): lambda: v_channel_cost }, default=lambda: (u_channel_cost + v_channel_cost) / 2, exclusive=True, name="cost") # Using different learning rate in different training steps # lr = tf.div(learning_rate, tf.cast(tf.pow(2, tf.div(global_step, 160000)), tf.float32), name="learning_rate") # Optimizer optimizer = tf.train.GradientDescentOptimizer( learning_rate=learning_rate).minimize(cost, global_step=global_step) # Summaries print "Init summaries" tf.histogram_summary("cost", tf.reduce_mean(cost)) tf.image_summary("color_image_rgb", color_image_rgb, max_images=1) tf.image_summary("predict_rgb", predict_rgb, max_images=1) tf.image_summary("gray_image", gray_image_rgb, max_images=1) return is_training, global_step, uv, optimizer, cost, predict, predict_rgb, color_image_rgb, gray_image_rgb, file_paths
def main(classify=True): """ Main entry point for program. """ logger.debug(os.path.realpath(__file__)) logger.debug("Function is going to classify: {}".format(classify)) if classify: # I dislike this from vgg import vgg16 from keras.utils import get_file from keras.preprocessing.image import ImageDataGenerator from numpy import save # Generate file paths logger.info("Generating temp filepath for image...") # Temp folder chdir(TEMP_ROOT) rmtree(path.split(TEMP_DIRECTORY)[0], ignore_errors=True) makedirs(path.join(TEMP_ROOT, TEMP_DIRECTORY)) # Output folder logger.info("Generating output filepath for storage...") if not path.exists(ARCHIVE_DIRECTORY): makedirs(ARCHIVE_DIRECTORY) logger.info("Done.") if pi_platform: logger.info("Running on Raspberry Pi Platform.") # Take image logger.info("Taking image with camera...") with picamera.PiCamera() as camera: # Get her up and running camera.start_preview() logger.debug("Zzzz. Camera warm-up.") sleep(2) # Warm up time camera.resolution = (1024, 768) camera.vflip = True filename = strftime("%Y-%m-%d %H:%M:%S", gmtime()) camera.capture( path.join(TEMP_ROOT, TEMP_DIRECTORY, filename + ".jpg"), 'jpeg') logger.info("Done.") else: logger.info("Not running on Raspberry Pi Platform.") logger.info("Using dummy image.") filename = "doggo" # Because I'm a professional like that. copy(path.join(EXECUTION_PATH, filename + ".jpg"), path.join(TEMP_ROOT, TEMP_DIRECTORY, filename + ".jpg")) logger.info("Done.") if classify: # Create VGG logging.info("Generating VGG model... patience please.") vgg = vgg16.Vgg16() vgg.compile() logging.info("Your patience is rewarded. Done.") # Classify image logging.info("Classifying the image...") gen = ImageDataGenerator() test = gen.flow_from_directory( TEMP_DIRECTORY.split("/")[0], target_size=(224, 224), batch_size=1, class_mode=None, shuffle=False, ) predictions = vgg.model.predict_generator(generator=test, val_samples=1) logging.info("Done. Most likely outcome was #{}".format( predictions.argmax())) # Load the classes imagenet_classes_location = get_file( "imagenet_class_index.json", "files.fast.ai/models/imagenet_class_index.json", cache_subdir='models') with open(imagenet_classes_location, 'r') as f: imagenet_classes = json.load(f) logging.info("Classname: {}".format(imagenet_classes[str( predictions.argmax())])) # Archive results and predictions logging.info("Archiving prediction results.") save(path.join(ARCHIVE_DIRECTORY, filename + ".npy"), predictions) with open(path.join(ARCHIVE_DIRECTORY, filename + " results.csv"), 'a') as f: f.write("index, class, probability\n") for i in range(1000): f.write("{},{},{}\n".format(str(i), imagenet_classes[str(i)][1], predictions[0][i])) # Archive logger.info("Archiving captured image...") copy(path.join(TEMP_ROOT, TEMP_DIRECTORY, filename + ".jpg"), path.join(ARCHIVE_DIRECTORY, filename)) logger.info("Done.")