def process_image(select_threshold=0.15, nms_threshold=.1, net_shape=(300, 300)): # select_threshold:box阈值——每个像素的box分类预测数据的得分会与box阈值比较,高于一个box阈值则认为这个box成功框到了一个对象 # nms_threshold:重合度阈值——同一对象的两个框的重合度高于该阈值,则运行下面去重函数 print("开始检测...") while True: # 从视频中读取一帧 ret,image_np = cap.read() # 执行SSD模型,得到4维输入变量,分类预测,坐标预测,rbbox_img参数为最大检测范围,本文固定为[0,0,1,1]即全图 rimg, rpredictions, rlocalisations, rbbox_img = isess.run([image_4d, predictions, localisations, bbox_img], feed_dict={img_input: image_np}) # ssd_bboxes_select()函数根据每个特征层的分类预测分数,归一化后的映射坐标, # ancohor_box的大小,通过设定一个阈值计算得到每个特征层检测到的对象以及其分类和坐标 rclasses, rscores, rbboxes = np_methods.ssd_bboxes_select(rpredictions, rlocalisations, ssd_anchors, select_threshold=select_threshold, img_shape=net_shape, num_classes=21, decode=True) # 检测有没有超出检测边缘 rbboxes = np_methods.bboxes_clip(rbbox_img, rbboxes) rclasses, rscores, rbboxes = np_methods.bboxes_sort(rclasses, rscores, rbboxes, top_k=400) # 去重,将重复检测到的目标去掉 rclasses, rscores, rbboxes = np_methods.bboxes_nms(rclasses, rscores, rbboxes, nms_threshold=nms_threshold) # 将box的坐标重新映射到原图上(上文所有的坐标都进行了归一化,所以要逆操作一次) rbboxes = np_methods.bboxes_resize(rbbox_img, rbboxes) bboxes_draw_on_img(image_np, rclasses, rscores, rbboxes, colors_plasma, thickness=8) cv2.imshow('object detection', cv2.resize(image_np, (1000, 800))) if cv2.waitKey(10) & 0xFF == ord('q'): break cap.release() cv2.destroyAllWindows() print("检测结束...")
def process_image(img, select_threshold=0.5, nms_threshold=.45, net_shape=(300, 300)): # Run SSD network. rimg, rpredictions, rlocalisations, rbbox_img = isess.run( [image_4d, predictions, localisations, bbox_img], feed_dict={img_input: img}) # Get classes and bboxes from the net outputs. rclasses, rscores, rbboxes = np_methods.ssd_bboxes_select( rpredictions, rlocalisations, ssd_anchors, select_threshold=select_threshold, img_shape=net_shape, num_classes=21, decode=True) rbboxes = np_methods.bboxes_clip(rbbox_img, rbboxes) rclasses, rscores, rbboxes = np_methods.bboxes_sort(rclasses, rscores, rbboxes, top_k=400) rclasses, rscores, rbboxes = np_methods.bboxes_nms( rclasses, rscores, rbboxes, nms_threshold=nms_threshold) # Resize bboxes to original image shape. Note: useless for Resize.WARP! rbboxes = np_methods.bboxes_resize(rbbox_img, rbboxes) return rclasses, rscores, rbboxes
def process_image(img, select_threshold=0.6, nms_threshold=.45, net_shape=(300, 300)): # Run SSD network. softmax = isess.graph.get_tensor_by_name( "ssd_300_vgg/softmax_5/Reshape_1:0") conv62 = isess.graph.get_tensor_by_name( "ssd_300_vgg/block11_box/Reshape:0") rimg, rpredictions, rlocalisations, rbbox_img = isess.run( [image_4d, predictions, localisations, bbox_img], feed_dict={img_input: img}) # Get classes and bboxes from the net outputs. rclasses, rscores, rbboxes = np_methods.ssd_bboxes_select( rpredictions, rlocalisations, ssd_anchors, select_threshold=select_threshold, img_shape=net_shape, num_classes=2, decode=True) rbboxes = np_methods.bboxes_clip(rbbox_img, rbboxes) rclasses, rscores, rbboxes = np_methods.bboxes_sort(rclasses, rscores, rbboxes, top_k=400) rclasses, rscores, rbboxes = np_methods.bboxes_nms( rclasses, rscores, rbboxes, nms_threshold=nms_threshold) # Resize bboxes to original image shape. Note: useless for Resize.WARP! rbboxes = np_methods.bboxes_resize(rbbox_img, rbboxes) return rclasses, rscores, rbboxes
def process_image(img, tensors_dict, select_threshold=0.5, nms_threshold=.45, net_shape=(300, 300)): # Read tensors in tensors_dict. img_input = tensors_dict['img_input'] image_4d = tensors_dict['image_4d'] predictions = tensors_dict['predictions'] localisations = tensors_dict['localisations'] bbox_img = tensors_dict['bbox_img'] ssd_anchors = tensors_dict['ssd_anchors'] # Run SSD network. rimg, rpredictions, rlocalisations, rbbox_img \ = sess.run([image_4d, predictions, localisations, bbox_img], feed_dict={img_input: img}) # Get classes and bboxes from the net outputs. rclasses, rscores, rbboxes = np_methods.ssd_bboxes_select( rpredictions, rlocalisations, ssd_anchors, select_threshold=select_threshold, img_shape=net_shape, num_classes=21, decode=True) rbboxes = np_methods.bboxes_clip(rbbox_img, rbboxes) rclasses, rscores, rbboxes = np_methods.bboxes_sort( rclasses, rscores, rbboxes, top_k=400) rclasses, rscores, rbboxes = np_methods.bboxes_nms( rclasses, rscores, rbboxes, nms_threshold=nms_threshold) # Resize bboxes to original image shape. Note: useless for Resize.WARP! rbboxes = np_methods.bboxes_resize(rbbox_img, rbboxes) return rclasses, rscores, rbboxes
def process_tensor(self, tensors, net_shape=(150, 496), select_threshold=0.9, nms_threshold=0.3): rpredictions, rlocalisations, rbbox_img = tensors ssd_anchors = self.ssd.anchors(net_shape) rclasses, rscores, rbboxes, ridxes, rlayers = np_methods.ssd_bboxes_select( rpredictions, rlocalisations, ssd_anchors, select_threshold=select_threshold, img_shape=net_shape, num_classes=self.ssd.params.num_classes, decode=True) rbboxes = np_methods.bboxes_clip(rbbox_img, rbboxes) rclasses, rscores, rbboxes, ridxes, rlayers = np_methods.bboxes_sort( rclasses, rscores, rbboxes, ridxes, rlayers, top_k=400) rclasses, rscores, rbboxes, ridxes, rlayers = np_methods.bboxes_nms( rclasses, rscores, rbboxes, ridxes, rlayers, nms_threshold=nms_threshold) # Resize bboxes to original image shape. Note: useless for Resize.WARP! rbboxes = np_methods.bboxes_resize(rbbox_img, rbboxes) return rclasses, rscores, rbboxes, ridxes, rlayers
def process_image(img, select_threshold=0.55, nms_threshold=.45, net_shape=(512, 512)): # for each img frame # Run SSD network. rimg, rpredictions, rlocalisations, rbbox_img = isess.run( [image_4d, predictions, localisations, bbox_img], feed_dict={img_input: img}) # Get classes and bboxes from the net outputs. rclasses, rscores, rbboxes = np_methods.ssd_bboxes_select( rpredictions, rlocalisations, ssd_anchors, select_threshold=select_threshold, img_shape=net_shape, num_classes=21, decode=True) rbboxes = np_methods.bboxes_clip(rbbox_img, rbboxes) rclasses, rscores, rbboxes = np_methods.bboxes_sort( rclasses, rscores, rbboxes, top_k=400) # sort bounding boxes by size rclasses, rscores, rbboxes = np_methods.bboxes_nms( rclasses, rscores, rbboxes, nms_threshold=nms_threshold) # non-maximum suppression # Resize bboxes to original image shape. Note: useless for Resize.WARP! rbboxes = np_methods.bboxes_resize( rbbox_img, rbboxes) # [ymin, xmin, ymax, xmax] from 0 to 1? return rclasses, rscores, rbboxes
def run_net(self, img, bboxes_sort_top_k=400): # Run SSD network. r_img, r_predictions, r_localisations, r_bbox_img = self.sess.run( [ self.image_4d, self.predictions, self.localisations, self.bbox_img ], feed_dict={self.img_input: img}) # 将符合条件(非背景得分大于select_threshold)框的类别、得分和边界框筛选出 r_classes, r_scores, r_bboxes = np_methods.ssd_bboxes_select( r_predictions, r_localisations, self.ssd_anchors, select_threshold=self.select_threshold, img_shape=self.net_shape, num_classes=self.num_class, decode=True) # 使bboxes的范围在bbox_ref内 r_bboxes = np_methods.bboxes_clip(r_bbox_img, r_bboxes) # 根据得分排序,选择top_k r_classes, r_scores, r_bboxes = np_methods.bboxes_sort( r_classes, r_scores, r_bboxes, top_k=bboxes_sort_top_k) # 非极大值抑制(non maximum suppression) r_classes, r_scores, r_bboxes = np_methods.bboxes_nms( r_classes, r_scores, r_bboxes, self.nms_threshold) # Resize bboxes to original image shape. r_bboxes = np_methods.bboxes_resize(r_bbox_img, r_bboxes) return r_classes, r_scores, r_bboxes
def process_image(img, select_threshold=0.01, nms_threshold=.45, net_shape=net_shape): # Run txt network. startTime = time.time() rimg, rpredictions, rlogits, rlocalisations, rbbox_img = isess.run( [image_4d, predictions, logits, localisations, bbox_img], feed_dict={img_input: img}) end_time = time.time() print(end_time - startTime) # Get classes and bboxes from the net outputs rclasses, rscores, rbboxes = np_methods.ssd_bboxes_select( rpredictions, rlocalisations, txt_anchors, select_threshold=select_threshold, img_shape=net_shape, num_classes=2, decode=True) rbboxes = np_methods.bboxes_clip(rbbox_img, rbboxes) # print(rscores) rclasses, rscores, rbboxes = np_methods.bboxes_sort(rclasses, rscores, rbboxes, top_k=400) rclasses, rscores, rbboxes = np_methods.bboxes_nms( rclasses, rscores, rbboxes, nms_threshold=nms_threshold) # Resize bboxes to original image shape. Note: useless for Resize.WARP! rbboxes = np_methods.bboxes_resize(rbbox_img, rbboxes) return rclasses, rscores, rbboxes
def process_image(img, select_threshold=0.05, nms_threshold=0.3): # Run PyramidBox network. h,w=img.shape[:2] if h<w and h<640: scale=640./h h=640 w=int(w*scale) elif h>=w and w<640: scale=640./w w=640 h=int(h*scale) img_tmp=Image.fromarray(np.uint8(img)) resized_img=img_tmp.resize((w,h)) net_shape=np.array(resized_img).shape[:2] rimg, rpredictions, rlocalisations, rbbox_img,e_ps = isess.run([image_4d, predictions, localisations, bbox_img,end_points],feed_dict={img_input: resized_img}) layer_shape=[e_ps['block3'].shape[1:3],e_ps['block4'].shape[1:3],e_ps['block5'].shape[1:3],e_ps['block7'].shape[1:3],e_ps['block8'].shape[1:3],e_ps['block9'].shape[1:3]] # SSD default anchor boxes. ssd_anchors = g_ssd_model.ssd_anchors_all_layers(feat_shapes=layer_shape,img_shape=net_shape) # Get classes and bboxes from the net outputs. rclasses, rscores, rbboxes = np_methods.ssd_bboxes_select( rpredictions, rlocalisations[0], ssd_anchors, select_threshold=select_threshold, img_shape=net_shape, num_classes=2, decode=True) rbboxes = np_methods.bboxes_clip(rbbox_img, rbboxes) rclasses, rscores, rbboxes = np_methods.bboxes_sort(rclasses, rscores, rbboxes, top_k=1000) return rclasses, rscores, rbboxes
def process_image(self, img, select_threshold=0.3, nms_threshold=0.35): # Run SSD network. rimg, rpredictions, rlocalisations, rbbox_img = self.isess.run( [ self.image_4d, self.predictions, self.localisations, self.bbox_img ], feed_dict={self.img_input: img}) rclasses, rscores, rbboxes = np_methods.ssd_bboxes_select( rpredictions, rlocalisations, self.layers_anchors, select_threshold=select_threshold, img_shape=self.net_shape, num_classes=self.num_classes, decode=True) rbboxes = np_methods.bboxes_clip(rbbox_img, rbboxes) rclasses, rscores, rbboxes = np_methods.bboxes_sort(rclasses, rscores, rbboxes, top_k=400) rclasses, rscores, rbboxes = np_methods.bboxes_nms( rclasses, rscores, rbboxes, nms_threshold=nms_threshold) # Resize bboxes to original image shape. rbboxes = np_methods.bboxes_resize(rbbox_img, rbboxes) return rclasses, rscores, rbboxes
def get_detection(self, cv_bgr, select_threshold=0.5, nms_threshold=.45, net_shape=(300, 300)): # 予測実行 # Run SSD network. rpredictions, rlocalisations, rbbox_img = self.sess.run( [self.predictions, self.localisations, self.bbox_img], feed_dict={self.input_x: cv_bgr}) # Get classes and bboxes from the net outputs. rclasses, rscores, rbboxes = np_methods.ssd_bboxes_select( rpredictions, rlocalisations, self.ssd_anchors, select_threshold=select_threshold, img_shape=net_shape, num_classes=5, decode=True) rbboxes = np_methods.bboxes_clip(rbbox_img, rbboxes) rclasses, rscores, rbboxes = np_methods.bboxes_sort(rclasses, rscores, rbboxes, top_k=400) rclasses, rscores, rbboxes = np_methods.bboxes_nms( rclasses, rscores, rbboxes, nms_threshold=nms_threshold) # Resize bboxes to original image shape. Note: useless for Resize.WARP! rbboxes = np_methods.bboxes_resize(rbbox_img, rbboxes) return rclasses, rscores, rbboxes
def process_image(img, select_threshold=0.3, nms_threshold=.8, net_shape=(300, 300)): #先获取SSD网络的层相关的参数 rimg, rpredictions, rlocalisations, rbbox_img = isess.run( [image_4d, predictions, localisations, bbox_img], feed_dict={img_input: img}) #获取分类结果,位置 rclasses, rscores, rbboxes = np_methods.ssd_bboxes_select( rpredictions, rlocalisations, ssd_anchors, select_threshold=select_threshold, img_shape=net_shape, num_classes=21, decode=True) rbboxes = np_methods.bboxes_clip(rbbox_img, rbboxes) rclasses, rscores, rbboxes = np_methods.bboxes_sort(rclasses, rscores, rbboxes, top_k=400) rclasses, rscores, rbboxes = np_methods.bboxes_nms( rclasses, rscores, rbboxes, nms_threshold=nms_threshold) # 让我们在图中画出来就行了 rbboxes = np_methods.bboxes_resize(rbbox_img, rbboxes) bboxes_draw_on_img(img, rclasses, rscores, rbboxes, colors_plasma, thickness=2) return img
def Apply(self): if not self.input: return rimg, rpredictions, rlocalisations, rbbox_img = self.isess.run( [self.image_4d, self.pred, self.loc, self.bbx], feed_dict={self.img_input: self.input['img']}) self.rclasses, self.rscores, self.rbboxes = np_methods.ssd_bboxes_select( rpredictions, rlocalisations, self.ssd_anchors, select_threshold=self.thres, img_shape=self.net_shape, num_classes=self.total_classes, decode=True) self.rbboxes = np_methods.bboxes_clip(rbbox_img, self.rbboxes) self.rclasses, self.rscores, self.rbboxes = np_methods.bboxes_sort( self.rclasses, self.rscores, self.rbboxes, top_k=400) self.rclasses, self.rscores, self.rbboxes = np_methods.bboxes_nms( self.rclasses, self.rscores, self.rbboxes, nms_threshold=self.nms_thres) self.rbboxes = np_methods.bboxes_resize(rbbox_img, self.rbboxes)
def process_image(img, select_threshold=0.5, nms_threshold=.45, net_shape=(300,300),): rimg, rpredictions, rlocalisations, rbbox_img = isess.run( [image_4d, predictions, localisations, bbox_img], feed_dict={img_input: img} ) print('rimg', rimg) print('rpredictions', rpredictions) print('rlocalisations', rlocalisations) print('rbbox_img', rbbox_img) rclasses, rscores, rbboxes = np_methods.ssd_bboxes_select( rpredictions, rlocalisations, ssd_anchors, select_threshold=select_threshold, img_shape=net_shape, num_classes=21, decode=True ) rbboxes = np_methods.bboxes_clip(rbbox_img, rbboxes) rclasses, rscores, rbboxes = np_methods.bboxes_sort( rclasses, rscores, rbboxes, top_k=400, ) rclasses, rscores, rbboxes = np_methods.bboxes_nms( rclasses, rscores, rbboxes, nms_threshold=nms_threshold, ) rbboxes = np_methods.bboxes_resize(rbbox_img, rbboxes) return rclasses, rscores, rbboxes
def process_image(img, select_threshold=0.5, nms_threshold=.45, net_shape=(300, 300), islarge=True, ismedium= True): # Run SSD network. starttime = time.time() rimg, rpredictions, rlocalisations, rbbox_img = isess.run([image_4d, predictions, localisations, bbox_img], feed_dict={img_input: img, large:islarge, medium:ismedium}) if not islarge and not ismedium: rpredictions = rpredictions[0:4] rlocalisations = rlocalisations[0:4] ssd_anchors = ssd_anchors2 elif ismedium: rpredictions = rpredictions[0:5] rlocalisations = rlocalisations[0:5] ssd_anchors = ssd_anchors3 else: ssd_anchors = ssd_anchors1 networktime = time.time()-starttime # Get classes and bboxes from the net outputs. rclasses, rscores, rbboxes = np_methods.ssd_bboxes_select( rpredictions, rlocalisations, ssd_anchors, select_threshold=select_threshold, img_shape=net_shape, num_classes=21, decode=True) rbboxes = np_methods.bboxes_clip(rbbox_img, rbboxes) rclasses, rscores, rbboxes = np_methods.bboxes_sort(rclasses, rscores, rbboxes, top_k=400) rclasses, rscores, rbboxes = np_methods.bboxes_nms(rclasses, rscores, rbboxes, nms_threshold=nms_threshold) # Resize bboxes to original image shape. Note: useless for Resize.WARP! rbboxes = np_methods.bboxes_resize(rbbox_img, rbboxes) return rclasses, rscores, rbboxes,networktime
def select_single_result(bti_rpredictions, bti_rlocalisations, ssd_anchors=None, select_threshold=0.5, nms_threshold=.45, net_shape=(300, 300)): if ssd_anchors is None: raise Exception('ssd_anchors is None') rbbox_img = np.array([0., 0., 1., 1.]) # Get classes and bboxes from the net outputs. rclasses, rscores, rbboxes = np_methods.ssd_bboxes_select( bti_rpredictions, bti_rlocalisations, ssd_anchors, select_threshold=select_threshold, img_shape=net_shape, num_classes=21, decode=True) rbboxes = np_methods.bboxes_clip(rbbox_img, rbboxes) rclasses, rscores, rbboxes = np_methods.bboxes_sort(rclasses, rscores, rbboxes, top_k=400) rclasses, rscores, rbboxes = np_methods.bboxes_nms( rclasses, rscores, rbboxes, nms_threshold=nms_threshold) # Resize bboxes to original image shape. Note: useless for Resize.WARP! rbboxes = np_methods.bboxes_resize(rbbox_img, rbboxes) return rclasses, rscores, rbboxes
def process_image(self, img, select_threshold=0.5, nms_threshold=.45): """Main image processing routine.""" # Run SSD network. rimg, rpredictions, rlocalisations, rbbox_img = self.isess.run([\ self.cropper_model.image_4d, self.cropper_model.predictions, self.cropper_model.localisations, self.cropper_model.bbox_img],\ feed_dict={self.cropper_model.img_input: img} ) # Get classes and bboxes from the net outputs. rclasses, rscores, rbboxes = np_methods.ssd_bboxes_select( rpredictions, rlocalisations, self.cropper_model.ssd_anchors, select_threshold=select_threshold, img_shape=self.net_shape, num_classes=21, decode=True) rbboxes = np_methods.bboxes_clip(rbbox_img, rbboxes) rclasses, rscores, rbboxes = np_methods.bboxes_sort(rclasses, rscores, rbboxes, top_k=400) rclasses, rscores, rbboxes = np_methods.bboxes_nms( rclasses, rscores, rbboxes, nms_threshold=nms_threshold) # Resize bboxes to original image shape. Note: useless for Resize.WARP! rbboxes = np_methods.bboxes_resize(rbbox_img, rbboxes) return rclasses, rscores, rbboxes
def process_image(video_path, outPutDirName, select_threshold=0.2, nms_threshold=.1, net_shape=(300, 300)): print("开始识别...") # select_threshold:box阈值——每个像素的box分类预测数据的得分会与box阈值比较,高于一个box阈值则认为这个box成功框到了一个对象 # nms_threshold:重合度阈值——同一对象的两个框的重合度高于该阈值,则运行下面去重函数 times = 0 #提取视频的频率,每2帧提取一个 frameFrequency = 4 cap = cv2.VideoCapture(video_path) while True: times += 1 ret, image_np = cap.read() if not ret: print('not res , not image') break # 执行SSD模型,得到4维输入变量,分类预测,坐标预测,rbbox_img参数为最大检测范围,本文固定为[0,0,1,1]即全图 rimg, rpredictions, rlocalisations, rbbox_img = isess.run( [image_4d, predictions, localisations, bbox_img], feed_dict={img_input: image_np}) # ssd_bboxes_select函数根据每个特征层的分类预测分数,归一化后的映射坐标, # ancohor_box的大小,通过设定一个阈值计算得到每个特征层检测到的对象以及其分类和坐标 rclasses, rscores, rbboxes = np_methods.ssd_bboxes_select( rpredictions, rlocalisations, ssd_anchors, select_threshold=select_threshold, img_shape=net_shape, num_classes=21, decode=True) # 检测有没有超出检测边缘 rbboxes = np_methods.bboxes_clip(rbbox_img, rbboxes) rclasses, rscores, rbboxes = np_methods.bboxes_sort(rclasses, rscores, rbboxes, top_k=400) # 去重,将重复检测到的目标去掉 rclasses, rscores, rbboxes = np_methods.bboxes_nms( rclasses, rscores, rbboxes, nms_threshold=nms_threshold) # 将box的坐标重新映射到原图上(上文所有的坐标都进行了归一化,所以要逆操作一次) rbboxes = np_methods.bboxes_resize(rbbox_img, rbboxes) bboxes_draw_on_img(image_np, rclasses, rscores, rbboxes, colors_plasma, thickness=8) if times % frameFrequency == 0: print("正在保存第{}张图片...".format(times)) cv2.imwrite(outPutDirName + str(times) + '.jpg', image_np) print("图片识别完毕,,,") cap.release() return times
def process_image(img, select_threshold=0.5, nms_threshold=.01, net_shape=(300, 300)): # select_threshold:box阈值——每个像素的box分类预测数据的得分会与box阈值比较,高于一个box阈值则认为这个box成功框到了一个对象 # nms_threshold:重合度阈值——同一对象的两个框的重合度高于该阈值,则运行下面去重函数 # 执行SSD模型,得到4维输入变量,分类预测,坐标预测,rbbox_img参数为最大检测范围,本文固定为[0,0,1,1]即全图 rimg, rpredictions, rlocalisations, rbbox_img = isess.run( [image_4d, predictions, localisations, bbox_img], feed_dict={img_input: img}) # ssd_bboxes_select()函数根据每个特征层的分类预测分数,归一化后的映射坐标, # ancohor_box的大小,通过设定一个阈值计算得到每个特征层检测到的对象以及其分类和坐标 rclasses, rscores, rbboxes = np_methods.ssd_bboxes_select( rpredictions, rlocalisations, ssd_anchors, select_threshold=select_threshold, img_shape=net_shape, num_classes=21, decode=True) """ 这个函数做的事情比较多,这里说的细致一些: 首先是输入,输入的数据为每个特征层(一共6个,见上文)的: rpredictions: 分类预测数据, rlocalisations: 坐标预测数据, ssd_anchors: anchors_box数据 其中: 分类预测数据为当前特征层中每个像素的每个box的分类预测 坐标预测数据为当前特征层中每个像素的每个box的坐标预测 anchors_box数据为当前特征层中每个像素的每个box的修正数据 函数根据坐标预测数据和anchors_box数据,计算得到每个像素的每个box的中心和长宽,这个中心坐标和长宽会根据一个算法进行些许的修正, 从而得到一个更加准确的box坐标;修正的算法会在后文中详细解释,如果只是为了理解算法流程也可以不必深究这个,因为这个修正算法属于经验算 法,并没有太多逻辑可循。 修正完box和中心后,函数会计算每个像素的每个box的分类预测数据的得分,当这个分数高于一个阈值(这里是0.5)则认为这个box成功 框到了一个对象,然后将这个box的坐标数据,所属分类和分类得分导出,从而得到: rclasses:所属分类 rscores:分类得分 rbboxes:坐标 最后要注意的是,同一个目标可能会在不同的特征层都被检测到,并且他们的box坐标会有些许不同,这里并没有去掉重复的目标,而是在下文 中专门用了一个函数来去重 """ # 检测有没有超出检测边缘 rbboxes = np_methods.bboxes_clip(rbbox_img, rbboxes) rclasses, rscores, rbboxes = np_methods.bboxes_sort(rclasses, rscores, rbboxes, top_k=400) # 去重,将重复检测到的目标去掉 rclasses, rscores, rbboxes = np_methods.bboxes_nms( rclasses, rscores, rbboxes, nms_threshold=nms_threshold) # 将box的坐标重新映射到原图上(上文所有的坐标都进行了归一化,所以要逆操作一次) rbboxes = np_methods.bboxes_resize(rbbox_img, rbboxes) return rclasses, rscores, rbboxes
def Apply(self): self.internal_result = self.istub.Predict(self.internal_request, 10.0) rpredictions = [ tensor_util.MakeNdarray( self.internal_result.outputs['predictions0']), tensor_util.MakeNdarray( self.internal_result.outputs['predictions1']), tensor_util.MakeNdarray( self.internal_result.outputs['predictions2']), tensor_util.MakeNdarray( self.internal_result.outputs['predictions3']), tensor_util.MakeNdarray( self.internal_result.outputs['predictions4']), tensor_util.MakeNdarray( self.internal_result.outputs['predictions5']), tensor_util.MakeNdarray( self.internal_result.outputs['predictions6']) ] rlocalisations = [ tensor_util.MakeNdarray( self.internal_result.outputs['localisations0']), tensor_util.MakeNdarray( self.internal_result.outputs['localisations1']), tensor_util.MakeNdarray( self.internal_result.outputs['localisations2']), tensor_util.MakeNdarray( self.internal_result.outputs['localisations3']), tensor_util.MakeNdarray( self.internal_result.outputs['localisations4']), tensor_util.MakeNdarray( self.internal_result.outputs['localisations5']), tensor_util.MakeNdarray( self.internal_result.outputs['localisations6']) ] rbbox_img = tensor_util.MakeNdarray( self.internal_result.outputs['bbox_img']) self.rclasses, self.rscores, self.rbboxes = np_methods.ssd_bboxes_select( rpredictions, rlocalisations, SSD.ssd_anchors, select_threshold=SSD.thres, img_shape=SSD.net_shape, num_classes=SSD.total_classes, decode=True) self.rbboxes = np_methods.bboxes_clip(rbbox_img, self.rbboxes) self.rclasses, self.rscores, self.rbboxes = np_methods.bboxes_sort( self.rclasses, self.rscores, self.rbboxes, top_k=400) self.rclasses, self.rscores, self.rbboxes = np_methods.bboxes_nms( self.rclasses, self.rscores, self.rbboxes, nms_threshold=SSD.nms_thres) self.rbboxes = np_methods.bboxes_resize(rbbox_img, self.rbboxes)
def process_image(imgs, select_threshold=0.5, nms_threshold=.45, net_shape=(300, 300)): # Run SSD network mtime.start() rpredictions, rlocalisations = isess.run([predictions, localisations], feed_dict={img_4d: imgs}) mtime.consume('run_foward') bat_rpredictions, bat_rlocalisations = re_construct_result( rpredictions, rlocalisations) mtime.consume('reconstuct_result') bat_rclasses = [] bat_rscores = [] bat_rbboxes = [] rbbox_img = np.array([0., 0., 1., 1.]) mtime.start() for bindex in range(batch_size): bti_rpredictions = bat_rpredictions[bindex] bti_rlocalisations = bat_rlocalisations[bindex] # Get classes and bboxes from the net outputs. rclasses, rscores, rbboxes = np_methods.ssd_bboxes_select( bti_rpredictions, bti_rlocalisations, ssd_anchors, select_threshold=select_threshold, img_shape=net_shape, num_classes=21, decode=True) rbboxes = np_methods.bboxes_clip(rbbox_img, rbboxes) rclasses, rscores, rbboxes = np_methods.bboxes_sort(rclasses, rscores, rbboxes, top_k=400) rclasses, rscores, rbboxes = np_methods.bboxes_nms( rclasses, rscores, rbboxes, nms_threshold=nms_threshold) # Resize bboxes to original image shape. Note: useless for Resize.WARP! rbboxes = np_methods.bboxes_resize(rbbox_img, rbboxes) # batch_process bat_rclasses.append(rclasses) bat_rscores.append(rscores) bat_rbboxes.append(rbboxes) mtime.consume('find_box') return bat_rclasses, bat_rscores, bat_rbboxes
def process_image(img, select_threshold=0.5, nms_threshold=.45, net_shape=(shapeWidth, shapeHeight)): # Run SSD network. rimg, rpredictions, rlocalisations, rbbox_img = isess.run([image_4d, predictions, localisations, bbox_img], feed_dict={img_input: img}) # Get classes and bboxes from the net outputs. rclasses, rscores, rbboxes = np_methods.ssd_bboxes_select( rpredictions, rlocalisations, ssd_anchors, select_threshold=select_threshold, img_shape=net_shape, num_classes=21, decode=True) rbboxes = np_methods.bboxes_clip(rbbox_img, rbboxes) rclasses, rscores, rbboxes = np_methods.bboxes_sort(rclasses, rscores, rbboxes, top_k=400) rclasses, rscores, rbboxes = np_methods.bboxes_nms(rclasses, rscores, rbboxes, nms_threshold=nms_threshold) # Resize bboxes to original image shape. Note: useless for Resize.WARP! rbboxes = np_methods.bboxes_resize(rbbox_img, rbboxes) return rclasses, rscores, rbboxes
def select_batch_result(bat_rpredictions, bat_rlocalisations, ssd_anchors=None, select_threshold=0.5, nms_threshold=.45, net_shape=(300, 300)): if ssd_anchors is None: raise Exception('ssd_anchors is None') bat_rclasses = [] bat_rscores = [] bat_rbboxes = [] rbbox_img = np.array([0., 0., 1., 1.]) batch_size = len(bat_rpredictions) for bindex in range(batch_size): bti_rpredictions = bat_rpredictions[bindex] bti_rlocalisations = bat_rlocalisations[bindex] # Get classes and bboxes from the net outputs. rclasses, rscores, rbboxes = np_methods.ssd_bboxes_select( bti_rpredictions, bti_rlocalisations, ssd_anchors, select_threshold=select_threshold, img_shape=net_shape, num_classes=21, decode=True) rbboxes = np_methods.bboxes_clip(rbbox_img, rbboxes) rclasses, rscores, rbboxes = np_methods.bboxes_sort(rclasses, rscores, rbboxes, top_k=400) rclasses, rscores, rbboxes = np_methods.bboxes_nms( rclasses, rscores, rbboxes, nms_threshold=nms_threshold) # Resize bboxes to original image shape. Note: useless for Resize.WARP! rbboxes = np_methods.bboxes_resize(rbbox_img, rbboxes) # batch_process bat_rclasses.append(rclasses) bat_rscores.append(rscores) bat_rbboxes.append(rbboxes) return bat_rclasses, bat_rscores, bat_rbboxes
def execute(data: list, **kwargs): for row in data: rpredictions = [ np.array(row['ssd_300_vgg/softmax/Reshape_1:0']), np.array(row['ssd_300_vgg/softmax_1/Reshape_1:0']), np.array(row['ssd_300_vgg/softmax_2/Reshape_1:0']), np.array(row['ssd_300_vgg/softmax_3/Reshape_1:0']), np.array(row['ssd_300_vgg/softmax_4/Reshape_1:0']), np.array(row['ssd_300_vgg/softmax_5/Reshape_1:0']) ] rlocalisations = [ np.array(row['ssd_300_vgg/block4_box/Reshape:0']), np.array(row['ssd_300_vgg/block7_box/Reshape:0']), np.array(row['ssd_300_vgg/block8_box/Reshape:0']), np.array(row['ssd_300_vgg/block9_box/Reshape:0']), np.array(row['ssd_300_vgg/block10_box/Reshape:0']), np.array(row['ssd_300_vgg/block11_box/Reshape:0']) ] rbbox_img = row['bbox_img'] # Get classes and bboxes from the net outputs. rclasses, rscores, rbboxes = np_methods.ssd_bboxes_select( rpredictions, rlocalisations, ssd_anchors, select_threshold=SELECT_TRESHOLD, img_shape=NET_SHAPE, num_classes=21, decode=True) rbboxes = np_methods.bboxes_clip(rbbox_img, rbboxes) rclasses, rscores, rbboxes = np_methods.bboxes_sort(rclasses, rscores, rbboxes, top_k=400) rclasses, rscores, rbboxes = np_methods.bboxes_nms( rclasses, rscores, rbboxes, nms_threshold=NMS_TRESHOLD) # Resize bboxes to original image shape. Note: useless for Resize.WARP! rbboxes = np_methods.bboxes_resize(rbbox_img, rbboxes) row["classes"] = rclasses.tolist() row["scores"] = rscores.tolist() row["boxes"] = rbboxes.tolist() return data
def post_process(img, select_thresh=0.5, nms_thresh=0.45): rimg, rpredictions, rlocalizations, rbbox_img = isess.run([image_4d, predictions, localizations, bbox_img], feed_dict= {img_input: img}) # get the classes and bboxes from the output rclasses, rscores, rbboxes = np_methods.ssd_bboxes_select(rpredictions, rlocalizations, ssd_anchors, select_threshold=select_thresh, img_shape = net_shape, num_classes = 21, decode = True) rbboxes = np_methods.bboxes_clip(rbbox_img, rbboxes) rclasses, rscores, rbboxes = np_methods.bboxes_sort(rclasses, rscores, rbboxes, top_k = 400) rclasses, rscores, rbboxes = np_methods.bboxes_nms(rclasses, rscores, rbboxes, nms_threshold = nms_thresh) # Resize the bboxes to the original image sizes, but useless for Resize.WARP rbboxes = np_methods.bboxes_resize(rbbox_img, rbboxes) return rclasses, rscores, rbboxes
def process_image(img, select_threshold=0.5, nms_threshold=.45, net_shape=(300, 300)): # Run SSD network. #------将一张图片输入ssd网络后,rpredictions,预测的21个类别,会输出6层预测,(第一层预测为1(batch size)*38(宽的网格数)* # 38(长的网格数)*4(每个网格4个anchor)*21(21个种类);第2为(1, 19, 19, 6, 21)....第6层(1, 1, 1, 4, 21)) #-----rlocalisations 预测的ancher的 cx cy w h的值,所以也是6层,第一层: # (1, 38, 38, 4, 4),注意这个其实不是ancher的大小真正的值,这个只是相对 # 初始ancher的变形的一些参数,所以要得到acher的大小,需要转换具体在np_methods的 # ssd_bboxes_decode方法中计算,在代码这个过程成为decode #至于为啥ssd出来的location 不是中心点的x,y和w,h,而是他们的变形参数。 # 因为需要考虑一个因素就是不会去预测正确的ancher与初始ancher的绝对值差别多少, # 而是预测一定的变形参数,主要是考虑到泛化性,具体可以百度 bounding boxing regression。 rimg, rpredictions, rlocalisations, rbbox_img = isess.run( [image_4d, predictions, localisations, bbox_img], feed_dict={img_input: img}) # # Get classes and bboxes from the net outputs. #将21类预测score大于阈值,将符合的ancher 选出 #rclasses=(1,符合条件的ancher数), rscores=(1,符合条件的ancher数), rbboxes=(符合条件的ancher数,4) rclasses, rscores, rbboxes = np_methods.ssd_bboxes_select( rpredictions, rlocalisations, ssd_anchors, select_threshold=select_threshold, img_shape=net_shape, num_classes=21, decode=True) # 主要剪掉一些超出图片最大边框的ancher rbboxes = np_methods.bboxes_clip(rbbox_img, rbboxes) # 排序选出top_k个预测score最大的ancher box rclasses, rscores, rbboxes = np_methods.bboxes_sort(rclasses, rscores, rbboxes, top_k=400) # 根据ancher的non max suppression 保留重叠面积小,或者是不同种类的acher.(主要是去除那些预测种类一样,又重叠面积大的ancher) rclasses, rscores, rbboxes = np_methods.bboxes_nms( rclasses, rscores, rbboxes, nms_threshold=nms_threshold) # Resize bboxes to original image shape. Note: useless for Resize.WARP! rbboxes = np_methods.bboxes_resize(rbbox_img, rbboxes) return rclasses, rscores, rbboxes
def predict(img, select_threshold=0., nms_threshold=0.): """ Detect scene text Parameters ---------- img : np.ndarray with shape (None, None, 3) and dtype np.float32 select_threshold : Only return results with score larger than this number nms_threshold : Threshold of non-maximum selection to bounding boxes Returns ------- (classes, scores, bboxes) : tuple classes : shape=(N, ) class of objects predicted scores : shape=(N, ) confidence of prediction bboxes : shape=(N, 12) bounding box of objects """ result = _predict(tf.convert_to_tensor(img)) rbbox_img = result['bbox'] rpredictions = [result[f"prediction_{i}"] for i in range(6)] rlocalisations = [result[f"localisation_{i}"] for i in range(6)] rclasses, rscores, rbboxes = np_methods.ssd_bboxes_select( rpredictions, rlocalisations, _txt_anchors, select_threshold=select_threshold, img_shape=net_shape, num_classes=2, decode=True) rbboxes = np_methods.bboxes_clip(rbbox_img, rbboxes) # print(rscores) rclasses, rscores, rbboxes = np_methods.bboxes_sort(rclasses, rscores, rbboxes, top_k=400) rclasses, rscores, rbboxes = np_methods.bboxes_nms( rclasses, rscores, rbboxes, nms_threshold=nms_threshold) # Resize bboxes to original image shape. Note: useless for Resize.WARP! rbboxes = np_methods.bboxes_resize(rbbox_img, rbboxes) return rclasses, rscores, rbboxes
def postprocess(self, rpredictions, rlocalisations, rbbox_img): rclasses, rscores, rbboxes = np_methods.ssd_bboxes_select( rpredictions, rlocalisations, SSDServer.ssd_anchors, select_threshold=SSDServer.SELECT_TRESHOLD, img_shape=SSDServer.NET_SHAPE, num_classes=21, decode=True) rbboxes = np_methods.bboxes_clip(rbbox_img, rbboxes) rclasses, rscores, rbboxes = np_methods.bboxes_sort( rclasses, rscores, rbboxes, top_k=SSDServer.TOP_K) rclasses, rscores, rbboxes = np_methods.bboxes_nms( rclasses, rscores, rbboxes, nms_threshold=SSDServer.NMS_TRESHOLD) rbboxes = np_methods.bboxes_resize(rbbox_img, rbboxes) rclasses = list(map(lambda c: SSDServer.VOC_MAP.get(c, "NA"), rclasses)) class_arr = [bytes(x, "utf-8") for x in rclasses] classes_tensor = hs.TensorProto( dtype=hs.DT_STRING, tensor_shape=hs.TensorShapeProto( dim=[hs.TensorShapeProto.Dim(size=-1)]), string_val=class_arr) scores_tensor = hs.TensorProto( dtype=hs.DT_DOUBLE, tensor_shape=hs.TensorShapeProto( dim=[hs.TensorShapeProto.Dim(size=-1)]), double_val=rscores) bboxes_tensor = hs.TensorProto(dtype=hs.DT_DOUBLE, tensor_shape=hs.TensorShapeProto(dim=[ hs.TensorShapeProto.Dim(size=-1), hs.TensorShapeProto.Dim(size=4) ]), double_val=rbboxes.flatten()) return classes_tensor, scores_tensor, bboxes_tensor
def process_image(img, select_threshold=0.5, nms_threshold=.45, net_shape=(300, 300)): # Run SSD network. # Resize image to height 300. factor = 300. / float(img.shape[0]) img = cv2.resize(img, (0,0), fx=factor, fy=factor) rimg, rpredictions, rlocalisations, rbbox_img = isess.run([image_4d, predictions, localisations, bbox_img], feed_dict={img_input: img}) ssd_net.update_feature_shapes(rpredictions) anchors = ssd_net.anchors(img.shape, dtype=np.float32) # Get classes and bboxes from the net outputs. rclasses, rscores, rbboxes = np_methods.ssd_bboxes_select( rpredictions, rlocalisations, anchors, select_threshold=select_threshold, img_shape=net_shape, num_classes=8, decode=True) rbboxes = np_methods.bboxes_clip(rbbox_img, rbboxes) rclasses, rscores, rbboxes = np_methods.bboxes_sort(rclasses, rscores, rbboxes, top_k=400,priority_inside=True, margin=0.0) rclasses_nms, rscores_nms, rbboxes_nms = bboxes_nms_intersection_avg(rclasses, rscores, rbboxes, threshold=nms_threshold) return rclasses_nms, rscores_nms, rbboxes_nms
def process_image(img, select_threshold=0.5, nms_threshold=.45, net_shape=(300, 300)): # Run SSD network. rimg, rpredictions, rlocalisations, rbbox_img = isess.run( [image_4d, predictions, localisations, bbox_img], feed_dict={img_input: img}) output_names = [re.sub(':0', '', image_4d.name)] output_names.extend([re.sub(':0', '', i.name) for i in localisations]) output_names.extend([re.sub(':0', '', i.name) for i in predictions]) print(output_names) frzdef = tf.compat.v1.graph_util.convert_variables_to_constants( isess, isess.graph_def, output_names) prefix = 'ssd_net_' with open(prefix + 'frozen.pb', 'wb') as f: f.write(frzdef.SerializeToString()) tf.train.write_graph(isess.graph_def, '.', prefix + 'frozen.pbtxt', as_text=True) # Get classes and bboxes from the net outputs. rclasses, rscores, rbboxes = np_methods.ssd_bboxes_select( rpredictions, rlocalisations, ssd_anchors, select_threshold=select_threshold, img_shape=net_shape, num_classes=21, decode=True) rbboxes = np_methods.bboxes_clip(rbbox_img, rbboxes) rclasses, rscores, rbboxes = np_methods.bboxes_sort(rclasses, rscores, rbboxes, top_k=400) rclasses, rscores, rbboxes = np_methods.bboxes_nms( rclasses, rscores, rbboxes, nms_threshold=nms_threshold) # Resize bboxes to original image shape. Note: useless for Resize.WARP! rbboxes = np_methods.bboxes_resize(rbbox_img, rbboxes) return rclasses, rscores, rbboxes
def process_image(img, select_threshold=0.2, nms_threshold=.1, net_shape=(300, 300)): # select_threshold:box阈值——每个像素的box分类预测数据的得分会与box阈值比较,高于一个box阈值则认为这个box成功框到了一个对象 # nms_threshold:重合度阈值——同一对象的两个框的重合度高于该阈值,则运行下面去重函数 # 执行SSD模型,得到4维输入变量,分类预测,坐标预测,rbbox_img参数为最大检测范围,本文固定为[0,0,1,1]即全图 rimg, rpredictions, rlocalisations, rbbox_img = isess.run( [image_4d, predictions, localisations, bbox_img], feed_dict={img_input: img}) # ssd_bboxes_select函数根据每个特征层的分类预测分数,归一化后的映射坐标, # ancohor_box的大小,通过设定一个阈值计算得到每个特征层检测到的对象以及其分类和坐标 rclasses, rscores, rbboxes = np_methods.ssd_bboxes_select( rpredictions, rlocalisations, ssd_anchors, select_threshold=select_threshold, img_shape=net_shape, num_classes=21, decode=True) # 检测有没有超出检测边缘 rbboxes = np_methods.bboxes_clip(rbbox_img, rbboxes) rclasses, rscores, rbboxes = np_methods.bboxes_sort(rclasses, rscores, rbboxes, top_k=400) # 去重,将重复检测到的目标去掉 rclasses, rscores, rbboxes = np_methods.bboxes_nms( rclasses, rscores, rbboxes, nms_threshold=nms_threshold) # 将box的坐标重新映射到原图上(上文所有的坐标都进行了归一化,所以要逆操作一次) rbboxes = np_methods.bboxes_resize(rbbox_img, rbboxes) bboxes_draw_on_img(img, rclasses, rscores, rbboxes, colors_plasma, thickness=8) return img
def eval_ssd(image_path=None): print('=> Evaluating SSD...') if image_path == None: print("Image Path not speficied") sys.exit(1) img = mpimg.imread(image_path) # Run SSD network. starttime = time.time() rimg, rpredictions, rlocalizations, rbbox_img = isess.run( [image_4d, predictions, localizations, bbox_img], feed_dict={img_input: img}) print("--- %s seconds ---" % (time.time() - starttime)) # Get classes and bboxes from the net outputs. rclasses, rscores, rbboxes = np_methods.ssd_bboxes_select( rpredictions, rlocalizations, ssd_anchors, select_threshold=0.0, img_shape=net_shape, num_classes=num_classes, decode=True) rbboxes = np_methods.bboxes_clip(rbbox_img, rbboxes) rclasses, rscores, rbboxes = np_methods.bboxes_sort(rclasses, rscores, rbboxes, top_k=400) rclasses, rscores, rbboxes = np_methods.bboxes_nms(rclasses, rscores, rbboxes, nms_threshold=0.0) # Resize bboxes to original image shape. Note: useless for Resize.WARP! rbboxes = np_methods.bboxes_resize(rbbox_img, rbboxes) ### visualization.bboxes_draw_on_img(img, rclasses, rscores, rbboxes, visualization.colors_plasma) visualization.plt_bboxes(img, rclasses, rscores, rbboxes, dataset_name) pred_list = visualization.save_as_JSON(img, rclasses, rscores, rbboxes, dataset_name) return pred_list