def detected_bboxes(self, predictions, localisations, select_threshold=None, nms_threshold=0.5, clipping_bbox=None, top_k=400): """Get the detected bounding boxes from the SSD network output. """ # Select top_k bboxes from predictions, and clip rclasses, rscores, rbboxes = \ ssd_common.tf_ssd_bboxes_select(predictions, localisations, select_threshold=select_threshold) rclasses, rscores, rbboxes = \ tfe.bboxes_sort(rclasses, rscores, rbboxes, top_k=top_k) if clipping_bbox is not None: rbboxes = tfe.bboxes_clip(clipping_bbox, rbboxes) # Apply NMS algorithm. rclasses, rscores, rbboxes = \ tfe.bboxes_nms_batch(rclasses, rscores, rbboxes, nms_threshold=nms_threshold, num_classes=self.params.num_classes) rclasses, rscores, rbboxes = \ tfe.bboxes_sort(rclasses, rscores, rbboxes, top_k=tf.minimum(200, tf.shape(rclasses)[1])) return rclasses, rscores, rbboxes
def detected_bboxes( self, predictions, localisations, #通过SSD网络,得到检测到的bbox select_threshold=None, nms_threshold=0.5, clipping_bbox=None, top_k=400, keep_top_k=200): """Get the detected bounding boxes from the SSD network output. """ # Select top_k bboxes from predictions, and clip #选取top_k=400个框,并对框做修建(超出原图尺寸范围的切掉) # 得到对应某个类别的得分值以及bbox rscores, rbboxes = \ ssd_common.tf_ssd_bboxes_select(predictions, localisations, select_threshold=select_threshold, num_classes=self.params.num_classes) # 按照得分高低,筛选出400个bbox和对应得分 rscores, rbboxes = \ tfe.bboxes_sort(rscores, rbboxes, top_k=top_k) # Apply NMS algorithm. #应用非极大值抑制,筛选掉与得分最高bbox重叠率大于0.5的,保留200个 rscores, rbboxes = \ tfe.bboxes_nms_batch(rscores, rbboxes, nms_threshold=nms_threshold, keep_top_k=keep_top_k) if clipping_bbox is not None: rbboxes = tfe.bboxes_clip(clipping_bbox, rbboxes) return rscores, rbboxes #返回裁剪好的bbox和对应得分
def detected_bboxes(self, predictions, localisations, select_threshold=None, nms_threshold=0.5, clipping_bbox=None, top_k=400, keep_top_k=200): """Get the detected bounding boxes from the RON network output. """ # Select top_k bboxes from predictions, and clip rscores, rbboxes = \ ssd_common.tf_ssd_bboxes_select(predictions, localisations, select_threshold=select_threshold, num_classes=self.params.num_classes) if clipping_bbox is not None: rbboxes = tfe.bboxes_clip(clipping_bbox, rbboxes) rscores, rbboxes = self.bboxes_filter_min(rscores, rbboxes, top_k) rscores, rbboxes = \ tfe.bboxes_sort(rscores, rbboxes, top_k=top_k) # Apply NMS algorithm. rscores, rbboxes = \ tfe.bboxes_nms_batch(rscores, rbboxes, nms_threshold=nms_threshold, keep_top_k=keep_top_k) return rscores, rbboxes
def detected_bboxes(predictions, localisations, num_classes, select_threshold=0.01, nms_threshold=0.45, clipping_bbox=None, top_k=400, keep_top_k=200): """Get the detected bounding boxes from the SSD network output. """ # Select top_k bboxes from predictions, and clip rscores, rbboxes = ssd_utils.bboxes_select( predictions, localisations, select_threshold=select_threshold, num_classes=num_classes) rscores, rbboxes = tfe.bboxes_sort(rscores, rbboxes, top_k=top_k) # Apply NMS algorithm. rscores, rbboxes = tfe.bboxes_nms_batch(rscores, rbboxes, nms_threshold=nms_threshold, keep_top_k=keep_top_k) if clipping_bbox is not None: rbboxes = tfe.bboxes_clip(clipping_bbox, rbboxes) return rscores, rbboxes
def detected_bboxes(self, predictions, localisations, select_threshold=None, nms_threshold=0.5, clipping_bbox=None, top_k=400, keep_top_k=200): """Get the detected bounding boxes from the SSD network output. """ # Select top_k bboxes from predictions, and clip rscores, rbboxes = \ ssd_common.tf_ssd_bboxes_select(predictions, localisations, select_threshold=select_threshold, num_classes=self.params.num_classes) rscores, rbboxes = \ tfe.bboxes_sort(rscores, rbboxes, top_k=top_k)#按照得分高低,筛选出400个bbox和对应得分 # Apply NMS algorithm. rscores, rbboxes = \ tfe.bboxes_nms_batch(rscores, rbboxes, nms_threshold=nms_threshold, keep_top_k=keep_top_k)#应用非极大值抑制,筛选掉与得分最高bbox重叠率大于0.5的,保留200个 if clipping_bbox is not None: rbboxes = tfe.bboxes_clip(clipping_bbox, rbboxes) return rscores, rbboxes
def detected_bboxes(self,predictions,locations,selected_threshold=None,nms_threshold=0.5,clipping_bbox=None,top_k=400,keep_top_k=200): rscores,rbboxes=ssd_common.tf.ssd_bboxes_selected(predictions,locations,selected_threshold=selected_threshold,num_classes=self.num_classes) rscores,rbboxes= \ tfe.bboxes_sort(rscores,rbboxes,top_k=top_k) rscores,rbboxes= \ tfe.bboxes_nms_batch(rscores,rbboxes,nms_threshold=nms_threshold,keep_top_k=keep_top_k) return rscores,rbboxes
def detected_bboxes(self, predictions, localisations, select_threshold=None, nms_threshold=0.5, clipping_bbox=None, top_k=400, keep_top_k=200): """Get the detected bounding boxes from the SSD network output. """ # Select top_k bboxes from predictions, and clip # print "==========detected_bboxes==========begin==========" # print "predictions = {}".format(predictions) # print "localisations = {}".format(localisations) rscores, rbboxes = \ ssd_common.tf_ssd_bboxes_select(predictions, localisations, select_threshold=select_threshold, num_classes=self.params.num_classes) # print "" # print "rscore = {}".format(rscores) # print "rbboxes = {}".format(rbboxes) rscores, rbboxes = \ tfe.bboxes_sort(rscores, rbboxes, top_k=top_k) # print "" # print "rscore = {}".format(rscores) # print "rbboxes = {}".format(rbboxes) # Apply NMS algorithm. rscores, rbboxes = \ tfe.bboxes_nms_batch(rscores, rbboxes, nms_threshold=nms_threshold, keep_top_k=keep_top_k) # if clipping_bbox is not None: # rbboxes = tfe.bboxes_clip(clipping_bbox, rbboxes) # print "" # print "detected_bboxes rscores={}".format(rscores) # print "detected_bboxes rbboxes={}".format(rbboxes) # print "==========detected_bboxes==========end==========" return rscores, rbboxes
def detected_bboxes(self, predictions, localisations, clipping_bbox=None): """Get the detected bounding boxes from the SSD network output. """ # Select top_k bboxes from predictions, and clip rscores, rbboxes = \ ssd_common.tf_ssd_bboxes_select(predictions, localisations, select_threshold=self.select_threshold, num_classes=self.num_classes) rscores, rbboxes = \ tfe.bboxes_sort(rscores, rbboxes, top_k=self.select_top_k) # Apply NMS algorithm. rscores, rbboxes = \ tfe.bboxes_nms_batch(rscores, rbboxes, nms_threshold=self.nms_threshold, keep_top_k=self.keep_top_k) if clipping_bbox is not None: rbboxes = tfe.bboxes_clip(clipping_bbox, rbboxes) return rscores, rbboxes
def detected_bboxes(self, predictions, localisations, select_threshold=None, nms_threshold=0.5, clipping_bbox=None, top_k=400, keep_top_k=200): """Get the detected bounding boxes from the SSD network output. """ # Select top_k bboxes from predictions, and clip rscores, rbboxes = \ ssd_common.tf_ssd_bboxes_select(predictions, localisations, select_threshold=select_threshold, num_classes=self.params.num_classes) rscores, rbboxes = \ tfe.bboxes_sort(rscores, rbboxes, top_k=top_k) # Apply NMS algorithm. rscores, rbboxes = \ tfe.bboxes_nms_batch(rscores, rbboxes, nms_threshold=nms_threshold, keep_top_k=keep_top_k) # if clipping_bbox is not None: # rbboxes = tfe.bboxes_clip(clipping_bbox, rbboxes) return rscores, rbboxes
class SSDNet(object): """Implementation of the SSD VGG-based 300 network. The default features layers with 300x300 image input are: conv4 ==> 38 x 38 conv7 ==> 19 x 19 conv8 ==> 10 x 10 conv9 ==> 5 x 5 conv10 ==> 3 x 3 conv11 ==> 1 x 1 The default image size used to train this network is 300x300. #训练输入图像尺寸默认为300x300 """ default_params = SSDParams( #默认参数 img_shape=(300, 300), num_classes=21, #包含背景在内,共21类目标类别 no_annotation_label=21, feat_layers=['block4', 'block7', 'block8', 'block9', 'block10', 'block11'], #特征层名字 feat_shapes=[(38, 38), (19, 19), (10, 10), (5, 5), (3, 3), (1, 1)], #特征层尺寸 anchor_size_bounds=[0.15, 0.90], # anchor_size_bounds=[0.20, 0.90], #论文中初始预测框大小为0.2x300~0.9x300;实际代码是[45,270] anchor_sizes=[(21., 45.), #直接给出的每个特征图上起初的锚点框大小;如第一个特征层框大小是h:21;w:45; 共6个特征图用于回归 (45., 99.), #越小的框能够得到原图上更多的局部信息,反之得到更多的全局信息; (99., 153.), (153., 207.), (207., 261.), (261., 315.)], # anchor_sizes=[(30., 60.), # (60., 111.), # (111., 162.), # (162., 213.), # (213., 264.), # (264., 315.)], anchor_ratios=[[2, .5], #每个特征层上的每个特征点预测的box长宽比及数量;如:block4: def_boxes:4 [2, .5, 3, 1./3], #block7: def_boxes:6 (ratios中的4个+默认的1:1+额外增加的一个=6) [2, .5, 3, 1./3], #block8: def_boxes:6 [2, .5, 3, 1./3], #block9: def_boxes:6 [2, .5], #block10: def_boxes:4 [2, .5]], #block11: def_boxes:4 #备注:实际上略去了默认的ratio=1以及多加了一个sqrt(初始框宽*初始框高),后面代码有 anchor_steps=[8, 16, 32, 64, 100, 300], #特征图锚点框放大到原始图的缩放比例; anchor_offset=0.5, #每个锚点框中心点在该特征图cell中心,因此offset=0.5 normalizations=[20, -1, -1, -1, -1, -1], #是否归一化,大于0则进行,否则不做归一化;目前看来只对block_4进行正则化,因为该层比较靠前,其norm较大,需做L2正则化(仅仅对每个像素在channel维度做归一化)以保证和后面检测层差异不是很大; prior_scaling=[0.1, 0.1, 0.2, 0.2] #特征图上每个目标与参考框间的尺寸缩放(y,x,h,w)解码时用到 ) def __init__(self, params=None): #网络参数的初始化 """Init the SSD net with some parameters. Use the default ones if none provided. """ if isinstance(params, SSDParams): #是否有参数输入,是则用输入的,否则使用默认的 self.params = params #isinstance是python的內建函数,如果参数1与参数2的类型相同则返回true; else: self.params = SSDNet.default_params # ======================================================================= # def net(self, inputs, #定义网络模型 is_training=True, #是否训练 update_feat_shapes=True, #是否更新特征层的尺寸 dropout_keep_prob=0.5, #dropout=0.5 prediction_fn=slim.softmax, #采用softmax预测结果 reuse=None, scope='ssd_300_vgg'): #网络名:ssd_300_vgg (基础网络时VGG,输入训练图像size是300x300) """SSD network definition. """ r = ssd_net(inputs, #网络输入参数r num_classes=self.params.num_classes, feat_layers=self.params.feat_layers, anchor_sizes=self.params.anchor_sizes, anchor_ratios=self.params.anchor_ratios, normalizations=self.params.normalizations, is_training=is_training, dropout_keep_prob=dropout_keep_prob, prediction_fn=prediction_fn, reuse=reuse, scope=scope) # Update feature shapes (try at least!) #下面这步我的理解就是让读者自行更改特征层的输入,未必论文中介绍的那几个block if update_feat_shapes: #是否更新特征层图像尺寸? shapes = ssd_feat_shapes_from_net(r[0], self.params.feat_shapes) #输入特征层图像尺寸以及inputs(应该是预测的特征尺寸),输出更新后的特征图尺寸列表 self.params = self.params._replace(feat_shapes=shapes) #将更新的特征图尺寸shapes替换当前的特征图尺寸 return r #更新网络输入参数r def arg_scope(self, weight_decay=0.0005, data_format='NHWC'): #定义权重衰减=0.0005,L2正则化项系数;数据类型是NHWC """Network arg_scope. """ return ssd_arg_scope(weight_decay, data_format=data_format) def arg_scope_caffe(self, caffe_scope): """Caffe arg_scope used for weights importing. """ return ssd_arg_scope_caffe(caffe_scope) # ======================================================================= # def update_feature_shapes(self, predictions): #更新特征形状尺寸(来自预测结果) """Update feature shapes from predictions collection (Tensor or Numpy array). """ shapes = ssd_feat_shapes_from_net(predictions, self.params.feat_shapes) self.params = self.params._replace(feat_shapes=shapes) def anchors(self, img_shape, dtype=np.float32): #输入原始图像尺寸;返回每个特征层每个参考锚点框的位置及尺寸信息(x,y,h,w) """Compute the default anchor boxes, given an image shape. """ return ssd_anchors_all_layers(img_shape, #这是个关键函数;检测所有特征层中的参考锚点框位置和尺寸信息 self.params.feat_shapes, self.params.anchor_sizes, self.params.anchor_ratios, self.params.anchor_steps, self.params.anchor_offset, dtype) def bboxes_encode(self, labels, bboxes, anchors, #编码,用于将标签信息,真实目标信息和锚点框信息编码在一起;得到预测真实框到参考框的转换值 scope=None): """Encode labels and bounding boxes. """ return ssd_common.tf_ssd_bboxes_encode( labels, bboxes, anchors, self.params.num_classes, self.params.no_annotation_label, #未标注的标签(应该代表背景) ignore_threshold=0.5, #IOU筛选阈值 prior_scaling=self.params.prior_scaling, #特征图目标与参考框间的尺寸缩放(0.1,0.1,0.2,0.2) scope=scope) def bboxes_decode(self, feat_localizations, anchors, #解码,用锚点框信息,锚点框与预测真实框间的转换值,得到真是的预测框(ymin,xmin,ymax,xmax) scope='ssd_bboxes_decode'): """Encode labels and bounding boxes. """ return ssd_common.tf_ssd_bboxes_decode( feat_localizations, anchors, prior_scaling=self.params.prior_scaling, scope=scope) def detected_bboxes(self, predictions, localisations, #通过SSD网络,得到检测到的bbox select_threshold=None, nms_threshold=0.5, clipping_bbox=None, top_k=400, keep_top_k=200): """Get the detected bounding boxes from the SSD network output. """ # Select top_k bboxes from predictions, and clip #选取top_k=400个框,并对框做修建(超出原图尺寸范围的切掉) rscores, rbboxes = \ #得到对应某个类别的得分值以及bbox ssd_common.tf_ssd_bboxes_select(predictions, localisations, select_threshold=select_threshold, num_classes=self.params.num_classes) rscores, rbboxes = \ #按照得分高低,筛选出400个bbox和对应得分 tfe.bboxes_sort(rscores, rbboxes, top_k=top_k)