def __loss_per_scale(self, name, conv, pred, label, bboxes, stride): with tf.name_scope(name): conv_shape = tf.shape(conv) batch_size = conv_shape[0] output_size = conv_shape[1] input_size = stride * output_size conv = tf.reshape(conv, (batch_size, output_size, output_size, self.__gt_per_grid, 5 + self.__num_classes)) conv_raw_conf = conv[:, :, :, :, 4:5] conv_raw_prob = conv[:, :, :, :, 5:] pred_coor = pred[:, :, :, :, 0:4] pred_conf = pred[:, :, :, :, 4:5] label_coor = label[:, :, :, :, 0:4] respond_bbox = label[:, :, :, :, 4:5] label_prob = label[:, :, :, :, 5:-1] label_mixw = label[:, :, :, :, -1:] # 计算GIOU损失 GIOU = tools.GIOU(pred_coor, label_coor) GIOU = GIOU[..., np.newaxis] input_size = tf.cast(input_size, tf.float32) bbox_wh = label_coor[..., 2:] - label_coor[..., :2] bbox_loss_scale = 2.0 - 1.0 * bbox_wh[0] * bbox_wh[1] / (input_size **2) GIOU_loss = respond_bbox * bbox_loss_scale * (1.0 - GIOU) # (2)计算confidence损失 iou = tools.iou_calc3( pred_coor[:, :, :, :, np.newaxis, :], bboxes[:, np.newaxis, np.newaxis, np.newaxis, :, :]) max_iou = tf.reduce_max(iou, axis=-1) max_iou = max_iou[:, :, :, :, np.newaxis] respond_bgd = (1.0 - respond_bbox) * tf.cast( max_iou < self.__iou_loss_thresh, tf.float32) conf_focal = self.__focal(respond_bbox, pred_conf) conf_loss = conf_focal * ( respond_bbox * tf.nn.sigmoid_cross_entropy_with_logits( labels=respond_bbox, logits=conv_raw_conf) + respond_bgd * tf.nn.sigmoid_cross_entropy_with_logits( labels=respond_bbox, logits=conv_raw_conf)) # (3)计算classes损失 prob_loss = respond_bbox * tf.nn.sigmoid_cross_entropy_with_logits( labels=label_prob, logits=conv_raw_prob) loss = tf.concat([GIOU_loss, conf_loss, prob_loss], axis=-1) loss = loss * label_mixw loss = tf.reduce_mean(tf.reduce_sum(loss, axis=[1, 2, 3, 4])) return loss
def __loss_per_scale(self, name, conv, pred, label, bboxes, stride): """ :param name: loss的名字 :param conv: conv是yolo卷积层的原始输出 shape为(batch_size, output_size, output_size, anchor_per_scale * (5 + num_class)) :param pred: conv是yolo输出的预测bbox的信息(x, y, w, h, conf, prob), 其中(x, y, w, h)的大小是相对于input_size的,如input_size=416,(x, y, w, h) = (120, 200, 50, 70) shape为(batch_size, output_size, output_size, anchor_per_scale, 5 + num_class) :param label: shape为(batch_size, output_size, output_size, anchor_per_scale, 6 + num_classes) 只有负责预测GT的对应位置的数据才为(xmin, ymin, xmax, ymax, 1, classes, mixup_weights), 其他位置的数据都为(0, 0, 0, 0, 0, 0..., 1) :param bboxes: shape为(batch_size, max_bbox_per_scale, 4), 存储的坐标为(xmin, ymin, xmax, ymax) bboxes用于计算相应detector的预测框与该detector负责预测的所有bbox的IOU :param anchors: 相应detector的anchors :param stride: 相应detector的stride """ with tf.name_scope(name): conv_shape = tf.shape(conv) batch_size = conv_shape[0] output_size = conv_shape[1] input_size = stride * output_size conv = tf.reshape(conv, (batch_size, output_size, output_size, self.__gt_per_grid, 5 + self.__num_classes)) conv_raw_conf = conv[..., 4:5] conv_raw_prob = conv[..., 5:] pred_coor = pred[..., 0:4] pred_conf = pred[..., 4:5] label_coor = label[..., 0:4] respond_bbox = label[..., 4:5] label_prob = label[..., 5:-1] label_mixw = label[..., -1:] # 计算GIOU损失 GIOU = tools.GIOU(pred_coor, label_coor) GIOU = GIOU[..., np.newaxis] input_size = tf.cast(input_size, tf.float32) bbox_wh = label_coor[..., 2:] - label_coor[..., :2] bbox_loss_scale = 2.0 - 1.0 * bbox_wh[..., 0:1] * bbox_wh[ ..., 1:2] / (input_size**2) GIOU_loss = respond_bbox * bbox_loss_scale * (1.0 - GIOU) # (2)计算confidence损失 iou = tools.iou_calc3( pred_coor[:, :, :, :, np.newaxis, :], bboxes[:, np.newaxis, np.newaxis, np.newaxis, :, :]) max_iou = tf.reduce_max(iou, axis=-1) max_iou = max_iou[..., np.newaxis] respond_bgd = (1.0 - respond_bbox) * tf.cast( max_iou < self.__iou_loss_thresh, tf.float32) conf_focal = self.__focal(respond_bbox, pred_conf) conf_loss = conf_focal * ( respond_bbox * tf.nn.sigmoid_cross_entropy_with_logits( labels=respond_bbox, logits=conv_raw_conf) + respond_bgd * tf.nn.sigmoid_cross_entropy_with_logits( labels=respond_bbox, logits=conv_raw_conf)) # (3)计算classes损失 prob_loss = respond_bbox * tf.nn.sigmoid_cross_entropy_with_logits( labels=label_prob, logits=conv_raw_prob) loss = tf.concat([GIOU_loss, conf_loss, prob_loss], axis=-1) loss = loss * label_mixw loss = tf.reduce_mean(tf.reduce_sum(loss, axis=[1, 2, 3, 4])) return loss