forked from VisualComputingInstitute/SiamR-CNN
/
model_rpn.py
executable file
·161 lines (139 loc) · 7 KB
/
model_rpn.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
# -*- coding: utf-8 -*-
import tensorflow as tf
from tensorpack.models import Conv2D, layer_register
from tensorpack.tfutils.argscope import argscope
from tensorpack.tfutils.scope_utils import auto_reuse_variable_scope, under_name_scope
from tensorpack.tfutils.summary import add_moving_summary
from config import config as cfg
from model_box import clip_boxes
@layer_register(log_shape=True)
@auto_reuse_variable_scope
def rpn_head(featuremap, channel, num_anchors):
#头部,传入feature map,输出boxes和它们的label,背景还是物体
"""
Returns:
label_logits: fHxfWxNA
box_logits: fHxfWxNAx4
"""
with argscope(Conv2D, data_format='channels_first',
kernel_initializer=tf.random_normal_initializer(stddev=0.01)):
hidden = Conv2D('conv0', featuremap, channel, 3, activation=tf.nn.relu)
label_logits = Conv2D('class', hidden, num_anchors, 1)
box_logits = Conv2D('box', hidden, 4 * num_anchors, 1)
# 1, NA(*4), im/16, im/16 (NCHW)
label_logits = tf.transpose(label_logits, [0, 2, 3, 1]) # 1xfHxfWxNA
label_logits = tf.squeeze(label_logits, 0) # fHxfWxNA
shp = tf.shape(box_logits) # 1x(NAx4)xfHxfW
box_logits = tf.transpose(box_logits, [0, 2, 3, 1]) # 1xfHxfWx(NAx4)
box_logits = tf.reshape(box_logits, tf.stack([shp[2], shp[3], num_anchors, 4])) # fHxfWxNAx4
return label_logits, box_logits
@under_name_scope()
def rpn_losses(anchor_labels, anchor_boxes, label_logits, box_logits):
#字面意思(box和label的损失函数
"""
Args:
anchor_labels: fHxfWxNA
anchor_boxes: fHxfWxNAx4, encoded
label_logits: fHxfWxNA
box_logits: fHxfWxNAx4
Returns:
label_loss, box_loss
"""
with tf.device('/cpu:0'):
valid_mask = tf.stop_gradient(tf.not_equal(anchor_labels, -1))
pos_mask = tf.stop_gradient(tf.equal(anchor_labels, 1))
nr_valid = tf.stop_gradient(tf.count_nonzero(valid_mask, dtype=tf.int32), name='num_valid_anchor')
nr_pos = tf.identity(tf.count_nonzero(pos_mask, dtype=tf.int32), name='num_pos_anchor')
# nr_pos is guaranteed >0 in C4. But in FPN. even nr_valid could be 0.
valid_anchor_labels = tf.boolean_mask(anchor_labels, valid_mask)
valid_label_logits = tf.boolean_mask(label_logits, valid_mask)
with tf.name_scope('label_metrics'):
valid_label_prob = tf.nn.sigmoid(valid_label_logits)
summaries = []
with tf.device('/cpu:0'):
for th in [0.5, 0.2, 0.1]:
valid_prediction = tf.cast(valid_label_prob > th, tf.int32)
nr_pos_prediction = tf.reduce_sum(valid_prediction, name='num_pos_prediction')
pos_prediction_corr = tf.count_nonzero(
tf.logical_and(
valid_label_prob > th,
tf.equal(valid_prediction, valid_anchor_labels)),
dtype=tf.int32)
placeholder = 0.5 # A small value will make summaries appear lower.
recall = tf.cast(tf.truediv(pos_prediction_corr, nr_pos), tf.float32)
recall = tf.where(tf.equal(nr_pos, 0), placeholder, recall, name='recall_th{}'.format(th))
precision = tf.cast(tf.truediv(pos_prediction_corr, nr_pos_prediction), tf.float32)
precision = tf.where(tf.equal(nr_pos_prediction, 0),
placeholder, precision, name='precision_th{}'.format(th))
summaries.extend([precision, recall])
add_moving_summary(*summaries)
#这里是loss summary,底下算label和boxes的loss
# Per-level loss summaries in FPN may appear lower due to the use of a small placeholder.
# But the total RPN loss will be fine. TODO make the summary op smarter
placeholder = 0.
label_loss = tf.nn.sigmoid_cross_entropy_with_logits(
labels=tf.cast(valid_anchor_labels, tf.float32), logits=valid_label_logits)
label_loss = tf.reduce_sum(label_loss) * (1. / cfg.RPN.BATCH_PER_IM)
label_loss = tf.where(tf.equal(nr_valid, 0), placeholder, label_loss, name='label_loss')
#这里用cross entropy算labels的loss
pos_anchor_boxes = tf.boolean_mask(anchor_boxes, pos_mask)
pos_box_logits = tf.boolean_mask(box_logits, pos_mask)
delta = 1.0 / 9
box_loss = tf.losses.huber_loss(
pos_anchor_boxes, pos_box_logits, delta=delta,
reduction=tf.losses.Reduction.SUM) / delta
box_loss = box_loss * (1. / cfg.RPN.BATCH_PER_IM)
box_loss = tf.where(tf.equal(nr_pos, 0), placeholder, box_loss, name='box_loss')
#这里是huber loss for boxes
add_moving_summary(label_loss, box_loss, nr_valid, nr_pos)
return [label_loss, box_loss]
@under_name_scope()
def generate_rpn_proposals(boxes, scores, img_shape,
pre_nms_topk, post_nms_topk=None):
#输出的是rpn预测的boxes和score
"""
Sample RPN proposals by the following steps:
1. Pick top k1 by scores
2. NMS them
3. Pick top k2 by scores. Default k2 == k1, i.e. does not filter the NMS output.
Args:
boxes: nx4 float dtype, the proposal boxes. Decoded to floatbox already
scores: n float, the logits
img_shape: [h, w]
pre_nms_topk, post_nms_topk (int): See above.
Returns:
boxes: kx4 float
scores: k logits
"""
assert boxes.shape.ndims == 2, boxes.shape
if post_nms_topk is None:
post_nms_topk = pre_nms_topk
topk = tf.minimum(pre_nms_topk, tf.size(scores))
topk_scores, topk_indices = tf.nn.top_k(scores, k=topk, sorted=False)
topk_boxes = tf.gather(boxes, topk_indices)
topk_boxes = clip_boxes(topk_boxes, img_shape)
#找top k个score和对应的boxes
topk_boxes_x1y1x2y2 = tf.reshape(topk_boxes, (-1, 2, 2))
topk_boxes_x1y1, topk_boxes_x2y2 = tf.split(topk_boxes_x1y1x2y2, 2, axis=1)
# nx1x2 each
wbhb = tf.squeeze(topk_boxes_x2y2 - topk_boxes_x1y1, axis=1)
valid = tf.reduce_all(wbhb > cfg.RPN.MIN_SIZE, axis=1) # n,
topk_valid_boxes_x1y1x2y2 = tf.boolean_mask(topk_boxes_x1y1x2y2, valid)
topk_valid_scores = tf.boolean_mask(topk_scores, valid)
#找出valid boxes和它们的score
# TODO not needed
topk_valid_boxes_y1x1y2x2 = tf.reshape(
tf.reverse(topk_valid_boxes_x1y1x2y2, axis=[2]),
(-1, 4), name='nms_input_boxes')
nms_indices = tf.image.non_max_suppression(
topk_valid_boxes_y1x1y2x2,
topk_valid_scores,
max_output_size=post_nms_topk,
iou_threshold=cfg.RPN.PROPOSAL_NMS_THRESH)
#nms排除多余的框
topk_valid_boxes = tf.reshape(topk_valid_boxes_x1y1x2y2, (-1, 4))
proposal_boxes = tf.gather(topk_valid_boxes, nms_indices)
proposal_scores = tf.gather(topk_valid_scores, nms_indices)
#要的boxes和score的结果,这里没再筛,“Default k2 == k1, i.e. does not filter the NMS output.”
tf.sigmoid(proposal_scores, name='probs') # for visualization
return tf.stop_gradient(proposal_boxes, name='boxes'), tf.stop_gradient(proposal_scores, name='scores')