2、如果想要保存,利用r_image.save("img.jpg")即可保存。 3、如果想要原图和分割图不混合,可以把blend参数设置成False。 4、如果想根据mask获取对应的区域,可以参考detect_image中,利用预测结果绘图的部分。 seg_img = np.zeros((np.shape(pr)[0],np.shape(pr)[1],3)) for c in range(self.num_classes): seg_img[:, :, 0] += ((pr == c)*( self.colors[c][0] )).astype('uint8') seg_img[:, :, 1] += ((pr == c)*( self.colors[c][1] )).astype('uint8') seg_img[:, :, 2] += ((pr == c)*( self.colors[c][2] )).astype('uint8') ''' import tensorflow as tf from PIL import Image from pspnet import Pspnet gpus = tf.config.experimental.list_physical_devices(device_type='GPU') for gpu in gpus: tf.config.experimental.set_memory_growth(gpu, True) pspnet = Pspnet() while True: img = input('Input image filename:') try: image = Image.open(img) except: print('Open Error! Try again!') continue else: r_image = pspnet.detect_image(image) r_image.show()
# 打印以下超参数 for key in args.__dict__: if key.find('__') == -1: offset = 20 - key.__len__() print(key + ' ' * offset, args.__dict__[key]) # 使用那一块显卡 os.environ["CUDA_VISIBLE_DEVICES"] = "6" data_path_df = pd.read_csv('dataset/path_list.csv') data_path_df = data_path_df.sample(frac=1) # 第一次打乱 dataset = DataSet(image_path=data_path_df['image'].values, label_path=data_path_df['label'].values) model = Pspnet(batch_norm_decay=args.batch_norm_decay) image = tf.placeholder(tf.float32, [None, 1024, 1024, 3], name='input_x') label = tf.placeholder(tf.int32, [None, 1024, 1024]) lr = tf.placeholder(tf.float32, ) logits = model.forward_pass(image) logits_prob = tf.nn.softmax(logits=logits, name='logits_prob') predicts = tf.argmax(logits, axis=-1, name='predicts') variables_to_restore = tf.trainable_variables(scope='resnet_v2_50') # finetune resnet_v2_50的参数(block1到block4) restorer = tf.train.Saver(variables_to_restore) # cross_entropy cross_entropy = tf.reduce_mean(
#-------------------------------------# # 调用摄像头或者视频进行检测 # 调用摄像头直接运行即可 # 调用视频可以将cv2.VideoCapture()指定路径 # 视频的保存并不难,可以百度一下看看 #-------------------------------------# import time import cv2 import numpy as np from PIL import Image from pspnet import Pspnet pspnet = Pspnet() #-------------------------------------# # 调用摄像头 # capture=cv2.VideoCapture("1.mp4") #-------------------------------------# capture = cv2.VideoCapture(0) fps = 0.0 while (True): t1 = time.time() # 读取某一帧 ref, frame = capture.read() # 格式转变,BGRtoRGB frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) # 转变成Image frame = Image.fromarray(np.uint8(frame)) # 进行检测
def configure_networks_single(self): #—————————————— step:1 ——————————————# # 设置X\Y的容器;把标签转换成one-hot形式,为了下一步计算loss时使用; self.inputs = tf.placeholder(tf.float32, self.input_shape, name='inputs') self.annotations = tf.placeholder(tf.int64, self.output_shape, name='annotations') self.is_train = tf.placeholder(tf.bool, name='is_train') expand_annotations = tf.expand_dims(self.annotations, -1, name='annotations/expand_dims') one_hot_annotations = tf.squeeze(expand_annotations, axis=[self.channel_axis], name='annotations/squeeze') one_hot_annotations = tf.one_hot(one_hot_annotations, depth=self.conf.class_num, axis=self.channel_axis, name='annotations/one_hot') #—————————————— step:2 ——————————————# # 根据搭建的模型计算预测出来的Y;除了预测值可能还包括一些其他想输出的参数; if self.conf.network_name == "ascnet": model = Ascnet(self.sess, self.conf, self.is_train) self.predictions, self.rates = model.inference(self.inputs) if self.conf.network_name == "segnet": model = Segnet(self.sess, self.conf, self.is_train) self.predictions, self.rates = model.inference(self.inputs) if self.conf.network_name == "deeplabv3": model = Deeplabv3(self.sess, self.conf, self.is_train) self.predictions, self.rates = model.inference(self.inputs) if self.conf.network_name == "deeplabv3plus": model = Deeplabv3plus(self.sess, self.conf, self.is_train) self.predictions, self.rates = model.inference(self.inputs) if self.conf.network_name == "unet": model = Unet(self.sess, self.conf, self.is_train) self.predictions, self.rates = model.inference(self.inputs) if self.conf.network_name == "pspnet": model = Pspnet(self.sess, self.conf, self.is_train) self.predictions, self.rates, self.pred2 = model.inference( self.inputs) #—————————————— step:3 ——————————————# # 根据预测值和one-hot的标签计算loss,选择softmax_cross_entropy损失函数; # Camvid 数据集的weight #weights = [0.01,0.007,0.161,0.007,0.02,0.016,0.18, 0.15,0.04,0.29,1.0,0.04] #weights = tf.convert_to_tensor(weights) #将list转成tensor, shape为[50, ] #weights = tf.reduce_sum(tf.multiply(one_hot_annotations, weights), -1, name='loss/weights') # 采用有weights的loss #losses = tf.losses.softmax_cross_entropy(one_hot_annotations, self.predictions, weights=weights, scope='loss/losses') # 采用普通的loss losses = tf.losses.softmax_cross_entropy(one_hot_annotations, self.predictions, scope='loss/losses') self.loss_op = tf.reduce_mean(losses, name='loss/loss_op') # PSPnet有特殊的辅助loss if self.conf.network_name == "pspnet": # 采用有weights的loss #losses2 = tf.losses.softmax_cross_entropy(one_hot_annotations, self.pred2, weights=weights, scope='loss/losses2') # 采用普通的loss losses2 = tf.losses.softmax_cross_entropy(one_hot_annotations, self.pred2, scope='loss/losses2') self.loss_op2 = tf.reduce_mean(losses2, name='loss/loss_op2') self.loss_op = self.loss_op + self.loss_op2 * 0.4 #—————————————— step:4 ——————————————# # 选择优化器和学习率,设置训练使用的 train_op optimizer = tf.train.AdamOptimizer( learning_rate=self.conf.learning_rate, beta1=self.conf.beta1, beta2=self.conf.beta2, epsilon=self.conf.epsilon) # 添加一些需要训练的变量作为train_op依赖项,主要是为了使用BN update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) with tf.control_dependencies(update_ops): self.train_op = optimizer.minimize(self.loss_op, name='train_op') #—————————————— step:5 ——————————————# # 计算三个评价指标:accuracy、miou、dice,因为没有直接计算dice的函数, # 所以保存的是预测值和标签,在验证时再用他们计算dice # 1)计算accuracy self.decoded_predictions = tf.argmax(self.predictions, self.channel_axis, name='accuracy/decode_pred') correct_prediction = tf.equal(self.annotations, self.decoded_predictions, name='accuracy/correct_pred') self.accuracy_op = tf.reduce_mean(tf.cast(correct_prediction, tf.float32, name='accuracy/cast'), name='accuracy/accuracy_op') # 2)计算miou weights = tf.cast(tf.greater(self.decoded_predictions, 0, name='m_iou/greater'), tf.int32, name='m_iou/weights') self.m_iou, self.miou_op = tf.metrics.mean_iou( self.annotations, self.decoded_predictions, self.conf.class_num, weights, name='m_iou/m_ious') # 3)计算dice----保存需要用的gt和out self.out = tf.cast(self.decoded_predictions, tf.float32) self.gt = tf.cast(self.annotations, tf.float32) #—————————————— step:6 ——————————————# # 初始化全局变量,这一步需要在session运行训练之前做 tf.set_random_seed(self.conf.random_seed) self.sess.run(tf.global_variables_initializer()) #—————————————— step:7 ——————————————# # 用于保存模型和summary # 保存BN中不可训练的参数,自己去找那些参数 trainable_vars = tf.trainable_variables() #可训练的参数 g_list = tf.global_variables() bn_moving_vars = [ g for g in g_list if 'batch_norm/moving_mean' in g.name ] bn_moving_vars += [ g for g in g_list if 'batch_norm/moving_variance' in g.name ] trainable_vars += bn_moving_vars self.saver = tf.train.Saver(var_list=trainable_vars, max_to_keep=0) self.writer = tf.summary.FileWriter(self.conf.logdir, self.sess.graph)
def configure_networks_multi(self): #—————————————— step:1 ——————————————# # 设置X\Y的容器;把标签转换成one-hot形式,为了下一步计算loss时使用; self.inputs = tf.placeholder(tf.float32, self.input_shape, name='inputs') self.annotations = tf.placeholder(tf.int64, self.output_shape, name='annotations') self.is_train = tf.placeholder(tf.bool, name='is_train') expand_annotations = tf.expand_dims(self.annotations, -1, name='annotations/expand_dims') one_hot_annotations = tf.squeeze(expand_annotations, axis=[self.channel_axis], name='annotations/squeeze') one_hot_annotations = tf.one_hot(one_hot_annotations, depth=self.conf.class_num, axis=self.channel_axis, name='annotations/one_hot') #—————————————— step:2 ——————————————# # 利用list记录每个GPU上的指标,然后concat成一个batch后再计算; tower_grads = [] tower_predictions = [] tower_rate = [] #—————————————— step:3 ——————————————#——设置优化器———# optimizer = tf.train.AdamOptimizer( learning_rate=self.conf.learning_rate, beta1=self.conf.beta1, beta2=self.conf.beta2, epsilon=self.conf.epsilon) #—————————————— step:4 ——————————————#——多个GPU的计算———# # tf.variable_scope 作用:指定变量的作用域,用于变量共享 with tf.variable_scope(tf.get_variable_scope()): for i in range(self.conf.gpu_num): print("this is %d gpu" % i) with tf.device("/gpu:%d" % i): with tf.name_scope("tower_%d" % i): # 拆分数据给每个GPU;并把标签转换成one-hot形式,为了下一步计算loss时使用; self.x = self.inputs[i * self.conf.batch:(i + 1) * self.conf.batch] self.y = self.annotations[i * self.conf.batch:(i + 1) * self.conf.batch] expand_y = tf.expand_dims(self.y, -1, name='y/expand_dims') one_hot_y = tf.squeeze(expand_y, axis=[self.channel_axis], name='y/squeeze') one_hot_y = tf.one_hot(one_hot_y, depth=self.conf.class_num, axis=self.channel_axis, name='y/one_hot') # 计算预测出来的Y if self.conf.network_name == "ascnet": model = Ascnet(self.sess, self.conf, self.is_train) self.predictions, self.rates = model.inference( self.inputs) if self.conf.network_name == "segnet": model = Segnet(self.sess, self.conf, self.is_train) self.predictions, self.rates = model.inference( self.inputs) if self.conf.network_name == "deeplabv3": model = Deeplabv3(self.sess, self.conf, self.is_train) self.predictions, self.rates = model.inference( self.inputs) if self.conf.network_name == "deeplabv3plus": model = Deeplabv3plus(self.sess, self.conf, self.is_train) self.predictions, self.rates = model.inference( self.inputs) if self.conf.network_name == "unet": model = Unet(self.sess, self.conf, self.is_train) self.predictions, self.rates = model.inference( self.inputs) if self.conf.network_name == "pspnet": model = Pspnet(self.sess, self.conf, self.is_train) self.predictions, self.rates, self.pred2 = model.inference( self.inputs) # 计算loss # Camvid 数据集的weight #weights = [0.01,0.007,0.161,0.007,0.02,0.016,0.18, 0.15,0.04,0.29,1.0,0.04] #weights = tf.convert_to_tensor(weights) #将list转成tensor, shape为[50, ] #weights = tf.reduce_sum(tf.multiply(one_hot_annotations, weights), -1, name='loss/weights') # 采用有weights的loss #losses = tf.losses.softmax_cross_entropy(one_hot_y, prediction, weights=weights, scope='loss/losses') # 采用普通的loss losses = tf.losses.softmax_cross_entropy( one_hot_y, prediction, scope='loss/losses') loss_each = tf.reduce_mean(losses, name='loss/loss_each') if self.conf.network_name == "pspnet": # 采用有weights的loss #losses2 = tf.losses.softmax_cross_entropy(one_hot_annotations, self.pred2, weights=weights, scope='loss/losses2') # 采用普通的loss losses2 = tf.losses.softmax_cross_entropy( one_hot_annotations, self.pred2, scope='loss/losses2') loss_each2 = tf.reduce_mean(losses2, name='loss/loss_op2') loss_each = loss_each + loss_each2 * 0.4 # 共享变量:在第一次声明变量之后,将控制变量重用的参数设置为True,这样可以让不同的GPU更新同一组参数 # 注意tf.name_scope函数并不会影响tf.get_variable的命名空间,它只影响tf.variable的 tf.get_variable_scope().reuse_variables() # 计算梯度;并保存当前GPU上的指标; grads = optimizer.compute_gradients(loss_each) tower_grads.append(grads) tower_predictions.append(prediction) tower_rate.append(rate) #—————————————— step:5 ——————————————# # 计算平均梯度;并设置训练OP grads = self.average_gradients(tower_grads) # 添加一些需要训练的变量作为train_op依赖项 # optimizer.apply_gradients作用:在计算完梯度后,最小化梯度的操作,相当于optimizer.minimize的第二步 # 添加一些需要训练的变量作为train_op依赖项,主要是为了使用BN update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) with tf.control_dependencies(update_ops): self.train_op = optimizer.apply_gradients(grads, name='train_op') #—————————————— step:6 ——————————————# # 计算评价指标 # 1)计算self.predictions 和 self.rates for i in range(self.conf.gpu_num): if i == 0: preds = tower_predictions[i] r = tower_rate[i] else: preds = tf.concat([preds, tower_predictions[i]], self.batch_axis, name='preds/concat' + str(i)) r = tf.concat([r, tower_rate[i]], self.batch_axis, name='r/concat' + str(i)) self.predictions = preds self.rates = r # 2)计算loss loss_merge = tf.losses.softmax_cross_entropy(one_hot_annotations, self.predictions, scope='loss/loss_merge') self.loss_op = tf.reduce_mean(loss_merge, name='loss/loss_op') # 3)计算accuracy self.decoded_predictions = tf.argmax(self.predictions, self.channel_axis, name='accuracy/decode_pred') correct_prediction = tf.equal(self.annotations, self.decoded_predictions, name='accuracy/correct_pred') self.accuracy_op = tf.reduce_mean(tf.cast(correct_prediction, tf.float32, name='accuracy/cast'), name='accuracy/accuracy_op') # 4)计算miou weights = tf.cast(tf.greater(self.decoded_predictions, 0, name='m_iou/greater'), tf.int32, name='m_iou/weights') self.m_iou, self.miou_op = tf.metrics.mean_iou( self.annotations, self.decoded_predictions, self.conf.class_num, weights, name='m_iou/m_ious') # 5)计算dice self.out = self.decoded_predictions self.gt = self.annotations #—————————————— step:7 ——————————————#——初始化全局变量———# tf.set_random_seed(self.conf.random_seed) self.sess.run(tf.global_variables_initializer()) for v in tf.global_variables(): print(v.name) #—————————————— step:8 ——————————————# # 用于保存模型和summary # 保存BN中不可训练的参数,自己去找那些参数 trainable_vars = tf.trainable_variables() #可训练的参数 g_list = tf.global_variables() bn_moving_vars = [ g for g in g_list if 'batch_norm/moving_mean' in g.name ] bn_moving_vars += [ g for g in g_list if 'batch_norm/moving_variance' in g.name ] trainable_vars += bn_moving_vars self.saver = tf.train.Saver(var_list=trainable_vars, max_to_keep=0) self.writer = tf.summary.FileWriter(self.conf.logdir, self.sess.graph)
#-------------------------------------------------------# VOCdevkit_path = 'VOCdevkit' image_ids = open( os.path.join(VOCdevkit_path, "VOC2007/ImageSets/Segmentation/val.txt"), 'r').read().splitlines() gt_dir = os.path.join(VOCdevkit_path, "VOC2007/SegmentationClass/") miou_out_path = "miou_out" pred_dir = os.path.join(miou_out_path, 'detection-results') if miou_mode == 0 or miou_mode == 1: if not os.path.exists(pred_dir): os.makedirs(pred_dir) print("Load model.") pspnet = Pspnet() print("Load model done.") print("Get predict result.") for image_id in tqdm(image_ids): image_path = os.path.join( VOCdevkit_path, "VOC2007/JPEGImages/" + image_id + ".jpg") image = Image.open(image_path) image = pspnet.get_miou_png(image) image.save(os.path.join(pred_dir, image_id + ".png")) print("Get predict result done.") if miou_mode == 0 or miou_mode == 2: print("Get miou.") hist, IoUs, PA_Recall, Precision = compute_mIoU( gt_dir, pred_dir, image_ids, num_classes,
# 将单张图片预测、摄像头检测和FPS测试功能 # 整合到了一个py文件中,通过指定mode进行模式的修改。 #----------------------------------------------------# import time import cv2 import numpy as np from PIL import Image from pspnet import Pspnet if __name__ == "__main__": #-------------------------------------------------------------------------# # 如果想要修改对应种类的颜色,到__init__函数里修改self.colors即可 #-------------------------------------------------------------------------# pspnet = Pspnet() #----------------------------------------------------------------------------------------------------------# # mode用于指定测试的模式: # 'predict'表示单张图片预测,如果想对预测过程进行修改,如保存图片,截取对象等,可以先看下方详细的注释 # 'video'表示视频检测,可调用摄像头或者视频进行检测,详情查看下方注释。 # 'fps'表示测试fps,使用的图片是img里面的street.jpg,详情查看下方注释。 # 'dir_predict'表示遍历文件夹进行检测并保存。默认遍历img文件夹,保存img_out文件夹,详情查看下方注释。 #----------------------------------------------------------------------------------------------------------# mode = "predict" #----------------------------------------------------------------------------------------------------------# # video_path用于指定视频的路径,当video_path=0时表示检测摄像头 # 想要检测视频,则设置如video_path = "xxx.mp4"即可,代表读取出根目录下的xxx.mp4文件。 # video_save_path表示视频保存的路径,当video_save_path=""时表示不保存 # 想要保存视频,则设置如video_save_path = "yyy.mp4"即可,代表保存为根目录下的yyy.mp4文件。 # video_fps用于保存的视频的fps # video_path、video_save_path和video_fps仅在mode='video'时有效