def __loss_per_scale(self, name, conv, pred, label, bboxes, anchors, stride): """ :param name: loss的名字 :param conv: conv是yolo卷积层的原始输出 shape为(batch_size, output_size, output_size, anchor_per_scale * (5 + num_class)) :param pred: conv是yolo输出的预测bbox的信息(x, y, w, h, conf, prob), 其中(x, y, w, h)的大小是相对于input_size的,如input_size=416,(x, y, w, h) = (120, 200, 50, 70) shape为(batch_size, output_size, output_size, anchor_per_scale, 5 + num_class) :param label: shape为(batch_size, output_size, output_size, anchor_per_scale, 5 + num_classes) 只有best anchor对应位置的数据才为(x, y, w, h, 1, classes), (x, y, w, h)的大小是bbox纠正后的原始大小 :param bboxes: shape为(batch_size, max_bbox_per_scale, 4), 存储的坐标为(x, y, w, h),(x, y, w, h)的大小都是bbox纠正后的原始大小 bboxes用于计算相应detector的预测框与该detector负责预测的所有bbox的IOU :param anchors: 相应detector的anchors :param stride: 相应detector的stride """ with tf.name_scope(name): conv_shape = tf.shape(conv) batch_size = conv_shape[0] output_size = conv_shape[1] input_size = stride * output_size conv = tf.reshape( conv, (batch_size, output_size, output_size, self.__anchor_per_scale, 5 + self.__num_classes)) conv_raw_dxdy = conv[:, :, :, :, 0:2] conv_raw_dwdh = conv[:, :, :, :, 2:4] conv_raw_conf = conv[:, :, :, :, 4:5] conv_raw_prob = conv[:, :, :, :, 5:] pred_xywh = pred[:, :, :, :, 0:4] pred_conf = pred[:, :, :, :, 4:5] label_xy = label[:, :, :, :, 0:2] label_wh = label[:, :, :, :, 2:4] respond_bbox = label[:, :, :, :, 4:5] label_prob = label[:, :, :, :, 5:] # (1)计算xywh损失 y = tf.tile( tf.range(output_size, dtype=tf.int32)[:, tf.newaxis], [1, output_size]) x = tf.tile( tf.range(output_size, dtype=tf.int32)[tf.newaxis, :], [output_size, 1]) xy_grid = tf.concat([x[:, :, tf.newaxis], y[:, :, tf.newaxis]], axis=-1) xy_grid = tf.tile(xy_grid[tf.newaxis, :, :, tf.newaxis, :], [batch_size, 1, 1, self.__anchor_per_scale, 1]) xy_grid = tf.cast(xy_grid, tf.float32) label_txty = 1.0 * label_xy / stride - xy_grid label_raw_twth = tf.log((1.0 * label_wh / stride) / anchors) label_raw_twth = tf.where(tf.is_inf(label_raw_twth), tf.zeros_like(label_raw_twth), label_raw_twth) input_size = tf.cast(input_size, tf.float32) bbox_loss_scale = 2.0 - 1.0 * label_wh[:, :, :, :, 0: 1] * label_wh[:, :, :, :, 1:2] / ( input_size **2) xy_loss = respond_bbox * bbox_loss_scale * \ tf.nn.sigmoid_cross_entropy_with_logits(labels=label_txty, logits=conv_raw_dxdy) wh_loss = 0.5 * respond_bbox * bbox_loss_scale * tf.square( label_raw_twth - conv_raw_dwdh) # (2)计算confidence损失 iou = utils.iou_calc4( pred_xywh[:, :, :, :, np.newaxis, :], bboxes[:, np.newaxis, np.newaxis, np.newaxis, :, :]) max_iou = tf.reduce_max(iou, axis=-1) max_iou = max_iou[:, :, :, :, np.newaxis] respond_bgd = (1.0 - respond_bbox) * tf.cast( max_iou < self.__iou_loss_thresh, tf.float32) conf_focal = self.__focal(respond_bbox, pred_conf) conf_loss = conf_focal * ( respond_bbox * tf.nn.sigmoid_cross_entropy_with_logits( labels=respond_bbox, logits=conv_raw_conf) + respond_bgd * tf.nn.sigmoid_cross_entropy_with_logits( labels=respond_bbox, logits=conv_raw_conf)) # (3)计算classes损失 prob_loss = respond_bbox * tf.nn.sigmoid_cross_entropy_with_logits( labels=label_prob, logits=conv_raw_prob) loss = tf.concat([xy_loss, wh_loss, conf_loss, prob_loss], axis=-1) loss = tf.reduce_mean(tf.reduce_sum(loss, axis=[1, 2, 3, 4])) return loss
def __loss_per_scale(self, name, conv, pred, label, bboxes, anchors, stride): """ :param name: loss的名字 :param conv: conv是yolo卷积层的原始输出 shape为(batch_size, output_size, output_size, anchor_per_scale * (5 + num_class)) :param pred: conv是yolo输出的预测bbox的信息(x, y, w, h, conf, prob), 其中(x, y, w, h)的大小是相对于input_size的,如input_size=416,(x, y, w, h) = (120, 200, 50, 70) shape为(batch_size, output_size, output_size, anchor_per_scale, 5 + num_class) :param label: shape为(batch_size, output_size, output_size, anchor_per_scale, 5 + num_classes) 只有best anchor对应位置的数据才为(x, y, w, h, 1, classes), (x, y, w, h)的大小是bbox纠正后的原始大小 :param bboxes: shape为(batch_size, max_bbox_per_scale, 4), 存储的坐标为(x, y, w, h),(x, y, w, h)的大小都是bbox纠正后的原始大小 bboxes用于计算相应detector的预测框与该detector负责预测的所有bbox的IOU :param anchors: 相应detector的anchors :param stride: 相应detector的stride """ with tf.name_scope(name): conv_shape = tf.shape(conv) batch_size = conv_shape[0] output_size = conv_shape[1] input_size = stride * output_size conv = tf.reshape( conv, (batch_size, output_size, output_size, self.__anchor_per_scale, 5 + self.__num_classes)) conv_raw_conf = conv[:, :, :, :, 4:5] conv_raw_prob = conv[:, :, :, :, 5:] pred_xywh = pred[:, :, :, :, 0:4] pred_conf = pred[:, :, :, :, 4:5] label_xywh = label[:, :, :, :, 0:4] respond_bbox = label[:, :, :, :, 4:5] label_prob = label[:, :, :, :, 5:] GIOU = utils.GIOU(pred_xywh, label_xywh) GIOU = GIOU[..., np.newaxis] input_size = tf.cast(input_size, tf.float32) bbox_loss_scale = 2.0 - 1.0 * label_xywh[:, :, :, :, 2: 3] * label_xywh[:, :, :, :, 3:4] / ( input_size **2) GIOU_loss = respond_bbox * bbox_loss_scale * (1.0 - GIOU) # (2)计算confidence损失 iou = utils.iou_calc4( pred_xywh[:, :, :, :, np.newaxis, :], bboxes[:, np.newaxis, np.newaxis, np.newaxis, :, :]) max_iou = tf.reduce_max(iou, axis=-1) max_iou = max_iou[:, :, :, :, np.newaxis] respond_bgd = (1.0 - respond_bbox) * tf.cast( max_iou < self.__iou_loss_thresh, tf.float32) conf_focal = self.__focal(respond_bbox, pred_conf) conf_loss = conf_focal * ( respond_bbox * tf.nn.sigmoid_cross_entropy_with_logits( labels=respond_bbox, logits=conv_raw_conf) + respond_bgd * tf.nn.sigmoid_cross_entropy_with_logits( labels=respond_bbox, logits=conv_raw_conf)) # (3)计算classes损失 prob_loss = respond_bbox * tf.nn.sigmoid_cross_entropy_with_logits( labels=label_prob, logits=conv_raw_prob) loss = tf.concat([GIOU_loss, conf_loss, prob_loss], axis=-1) loss = tf.reduce_mean(tf.reduce_sum(loss, axis=[1, 2, 3, 4])) return loss