def __call__(self, y_true, y_pred): """ calculate the loss of model prediction for one scale """ pred_xy_rel = tf.sigmoid(y_pred[..., 0:2]) pred_wh_rel = y_pred[..., 2:4] pred_box_abs, pred_obj, pred_class = get_absolute_yolo_box( y_pred, self.valid_anchors_wh, self.num_classes) pred_box_abs = xywh_to_x1x2y1y2(pred_box_abs) true_xy_abs, true_wh_abs, true_obj, true_class = tf.split( y_true, (2, 2, 1, self.num_classes), axis=-1) true_box_abs = tf.concat([true_xy_abs, true_wh_abs], axis=-1) true_box_abs = xywh_to_x1x2y1y2(true_box_abs) true_box_rel = get_relative_yolo_box(y_true, self.valid_anchors_wh) true_xy_rel = true_box_rel[..., 0:2] true_wh_rel = true_box_rel[..., 2:4] weight = 2 - true_wh_abs[..., 0] * true_wh_abs[..., 1] xy_loss = self.calc_xy_loss(true_obj, true_xy_rel, pred_xy_rel, weight) wh_loss = self.calc_wh_loss(true_obj, true_wh_rel, pred_wh_rel, weight) class_loss = self.calc_class_loss(true_obj, true_class, pred_class) ignore_mask = self.calc_ignore_mask(true_obj, true_box_abs, pred_box_abs) obj_loss = self.calc_obj_loss(true_obj, pred_obj, ignore_mask) return xy_loss + wh_loss + class_loss + obj_loss, (xy_loss, wh_loss, class_loss, obj_loss)
def forward(self, y_true, y_pred): """ - y_pred to bbox_abs - get pred_xy_rel and pred_wh_rel """ pred_box_abs, pred_obj, pred_landmark, pred_box_rel = get_absolute_yolo_box( y_pred, self.valid_anchors_wh, self.num_landmarks) # print(self.valid_anchors_wh) # print('=' * 20, 'get_abs_pred', '*' * 20) # print(pred_box_abs.shape) # print(pred_obj.shape) # print(pred_landmark.shape) # print(pred_box_rel.shape) pred_box_abs = xywh_to_x1x2y1y2(pred_box_abs) pred_xy_rel = pred_box_rel[..., 0:2] pred_wh_rel = pred_box_rel[..., 2:4] """ - y_true to bbox_rel - get true_xy_rel and true_wh_rel """ true_box_rel, true_obj, true_landmark, true_box_abs = get_relative_yolo_box( y_true, self.valid_anchors_wh, self.num_landmarks) # print(true_obj) # print('=' * 20, 'get_rel_pred', '*' * 20) # print(true_box_rel.shape) # print(true_obj.shape) # print(true_landmark.shape) # print(true_box_abs.shape) true_box_abs = xywh_to_x1x2y1y2(true_box_abs) true_xy_rel = true_box_rel[..., 0:2] true_wh_rel = true_box_rel[..., 2:4] true_wh_abs = true_box_abs[..., 2:4] weight = 2 - true_wh_abs[..., 0] * true_wh_abs[..., 1] # print('=' * 20, 'calc_loss', '*' * 20) xy_loss = self.calc_xy_loss(true_xy_rel, pred_xy_rel, true_obj, weight) wh_loss = self.calc_xy_loss(true_wh_rel, pred_wh_rel, true_obj, weight) landmark_loss = self.calc_xy_loss(true_landmark, pred_landmark, true_obj, weight) ignore_mask = self.calc_ignore_mask(true_box_abs, pred_box_abs, true_obj) # print('=' * 10, 'xy_loss', '=' * 10) # print(xy_loss) # print('=' * 10, 'wh_loss', '=' * 10) # print(wh_loss) # print('=' * 10, 'landmark_loss', '=' * 10) # print(landmark_loss) # print('-' * 10, 'obj_loss', '-' * 10) obj_loss = self.calc_obj_loss(true_obj, pred_obj, ignore_mask) return xy_loss + wh_loss + landmark_loss + obj_loss, (xy_loss, wh_loss, landmark_loss, obj_loss)
def __call__(self, raw_yolo_outputs): boxes, objectness, class_probs = [], [], [] for o in raw_yolo_outputs: batch_size = tf.shape(o[0])[0] num_classes = tf.shape(o[2])[-1] # needs to translate from xywh to y1x1y2x2 format boxes.append(tf.reshape(o[0], (batch_size, -1, 4))) objectness.append(tf.reshape(o[1], (batch_size, -1, 1))) class_probs.append(tf.reshape(o[2], (batch_size, -1, num_classes))) boxes = xywh_to_x1x2y1y2(tf.concat(boxes, axis=1)) objectness = tf.concat(objectness, axis=1) class_probs = tf.concat(class_probs, axis=1) scores = objectness scores = tf.reshape(scores, (tf.shape(scores)[0], -1, tf.shape(scores)[-1])) final_boxes, final_scores, final_classes, valid_detections = self.batch_non_maximum_suppression( boxes, scores, class_probs, self.iou_thresh, self.score_thresh, self.max_detection) return final_boxes, final_scores, final_classes, valid_detections
def forward(self, raw_yolo_outputs): boxes, objectness, landmarks_coord = [], [], [] for raw_yolo_out in raw_yolo_outputs: # raw_yolo_out : (bbox_abs, objectness, landmarks_probs, bbox_rel) # print(raw_yolo_out[1].shape) batch_size = raw_yolo_out[0].size(0) num_landmarks = raw_yolo_out[2].size(-1) boxes.append(raw_yolo_out[0].view(batch_size, -1, 4)) # if raw_yolo_out[1].shape[1] == 26: # pass # # print(raw_yolo_out[1].contiguous().view(batch_size, -1, 1)[0][76]) # # # val = 0 # # d_val = 0 # # for d, i in enumerate(raw_yolo_out[1].contiguous().view(batch_size, -1, 1)[0]): # # print(i) # # if i > val: # # val = i # # d_val = d # # print(d_val, val) objectness.append(raw_yolo_out[1].contiguous().view(batch_size, -1, 1)) landmarks_coord.append(raw_yolo_out[2].contiguous().view(batch_size, -1, num_landmarks)) boxes = xywh_to_x1x2y1y2(torch.cat(boxes, dim=1)) objectness = torch.cat(objectness, dim=1) landmark_coord = torch.cat(landmarks_coord, dim=1) return self.batch_non_maximum_suppression(boxes, objectness, landmark_coord)
def forward(self, y_true, y_pred): # iou, ignore_mask 계산에 필요 pred_box_abs, pred_obj, pred_class, pred_box_rel = get_absolute_yolo_box( y_pred, self.valid_anchors_wh, self.num_classes) pred_box_abs = xywh_to_x1x2y1y2(pred_box_abs) pred_xy_rel = pred_box_rel[..., 0:2] pred_wh_rel = pred_box_rel[..., 2:4] # print(f'pred_box_abs: {pred_box_abs.shape}') # print(f'pred_xy_rel: {pred_xy_rel.shape}') # print(f'pred_wh_rel: {pred_wh_rel.shape}') # loss 계산에 필요 true_box_rel, true_obj, true_class, true_box_abs = get_relative_yolo_box( y_true, self.valid_anchors_wh, self.num_classes) true_box_abs = xywh_to_x1x2y1y2(true_box_abs) true_xy_rel = true_box_rel[..., 0:2] true_wh_rel = true_box_rel[..., 2:4] true_wh_abs = true_box_abs[..., 2:4] # print(f'true_box_abs: {true_box_abs.shape}') # print(f'true_box_rel: {true_box_rel.shape}') # print(f'true_obj_rel: {true_obj.shape}') # w, h를 통해 작은 box detect를 위한 조정 weight = 2 - true_wh_abs[..., 0] * true_wh_abs[..., 1] xy_loss = self.calc_xywh_loss(true_xy_rel, pred_xy_rel, true_obj, weight) wh_loss = self.calc_xywh_loss(true_wh_rel, pred_wh_rel, true_obj, weight) class_loss = self.calc_class_loss(true_obj, true_class, pred_class) ignore_mask = self.calc_ignore_mask(true_box_abs, pred_box_abs, true_obj) # print(f'ignore_mask: {ignore_mask.shape}') obj_loss = self.calc_obj_loss(true_obj, pred_obj, ignore_mask) # print(f'xy_loss : {xy_loss}') # print(f'wh_loss : {wh_loss}') # print(f'class_loss : {class_loss}') # print(f'ignore_mask : {ignore_mask}') # print(f'obj_loss : {obj_loss}') return xy_loss + wh_loss + class_loss + obj_loss, (xy_loss, wh_loss, class_loss, obj_loss)
def forward(self, y_true, y_pred): # print(y_true[0][idx[0]][idx[1]][idx[2]][:5]) # print(y_pred[0][idx[0]][idx[1]][idx[2]][:5]) # iou, ignore_mask 계산에 필요 pred_box_abs, pred_obj, pred_class, pred_box_rel = get_absolute_yolo_box( y_pred, self.valid_anchors_wh, self.num_classes) pred_box_abs = xywh_to_x1x2y1y2(pred_box_abs) pred_xy_rel = pred_box_rel[..., 0:2] pred_wh_rel = pred_box_rel[..., 2:4] # loss 계산에 필요 # print('======', y_true[0][idx[0]][idx[1]][idx[2]][:5]) true_box_rel, true_obj, true_class, true_box_abs = get_relative_yolo_box( y_true, self.valid_anchors_wh, self.num_classes) # print(true_box_rel) # print(true_box_rel[0][9][4][2]) # print(true_obj[0][9][4][2]) true_box_abs = xywh_to_x1x2y1y2(true_box_abs) true_xy_rel = true_box_rel[..., 0:2] true_wh_rel = true_box_rel[..., 2:4] true_wh_abs = true_box_abs[..., 2:4] # w, h를 통해 작은 box detect를 위한 조정 weight = 2 - true_wh_abs[..., 0] * true_wh_abs[..., 1] xy_loss = self.calc_xywh_loss(true_xy_rel, pred_xy_rel, true_obj, weight) wh_loss = self.calc_xywh_loss(true_wh_rel, pred_wh_rel, true_obj, weight) class_loss = self.calc_class_loss(true_obj, true_class, pred_class) ignore_mask = self.calc_ignore_mask(true_box_abs, pred_box_abs, true_obj) obj_loss = self.calc_obj_loss(true_obj, pred_obj, ignore_mask) return xy_loss + wh_loss + class_loss + obj_loss, (xy_loss, wh_loss, class_loss, obj_loss)
def forward(self, raw_yolo_output): boxes, objectness, class_prob = [], [], [] # raw_yolo_out (bbox_abs, objectness, class_probs, bbox_rel) for raw_yolo_out in raw_yolo_output: batch_size = raw_yolo_out[0].size(0) num_classes = raw_yolo_out[2].size(-1) boxes.append(raw_yolo_out[0].view(batch_size, -1, 4)) objectness.append(raw_yolo_out[1].contiguous().view( batch_size, -1, 1)) class_prob.append(raw_yolo_out[2].contiguous().view( batch_size, -1, num_classes)) boxes = xywh_to_x1x2y1y2(torch.cat(boxes, dim=1)) objectness = torch.cat(objectness, dim=1) class_prob = torch.cat(class_prob, dim=1) scores = objectness scores_shape = scores.shape scores = torch.reshape(scores, [scores_shape[0], -1, scores_shape[-1]]) return self.batch_non_maximum_suppression(boxes, scores, class_prob)
def __call__(self, y_true, y_pred): """ calculate the loss of model prediction for one scale """ # for xy and wh, I seperated them into two groups with different suffix # suffix rel (relative) means that its coordinates are relative to cells # basically (tx, ty, tw, th) format from the paper # _rel is used to calcuate the loss # suffix abs (absolute) means that its coordinates are absolute with in whole image # basically (bx, by, bw, bh) format from the paper # _abs is used to calcuate iou and ignore mask # split y_pred into xy, wh, objectness and one-hot classes # pred_xy_rel: (batch, grid, grid, anchor, 2) # pred_wh_rel: (batch, grid, grid, anchor, 2) # TODO: Add comment for the sigmoid here pred_xy_rel = tf.sigmoid(y_pred[..., 0:2]) pred_wh_rel = y_pred[..., 2:4] # this box is used to calculate iou, NOT loss. so we can't use # cell offset anymore and have to transform it into true values # both pred_obj and pred_class has been sigmoid'ed here # pred_xy_abs: (batch, grid, grid, anchor, 2) # pred_wh_abs: (batch, grid, grid, anchor, 2) # pred_obj: (batch, grid, grid, anchor, 1) # pred_class: (batch, grid, grid, anchor, num_classes) pred_box_abs, pred_obj, pred_class = get_absolute_yolo_box( y_pred, self.valid_anchors_wh, self.num_classes) pred_box_abs = xywh_to_x1x2y1y2(pred_box_abs) # split y_true into xy, wh, objectness and one-hot classes # pred_xy_abs: (batch, grid, grid, anchor, 2) # pred_wh_abs: (batch, grid, grid, anchor, 2) # pred_obj: (batch, grid, grid, anchor, 1) # pred_class: (batch, grid, grid, anchor, num_classes) true_xy_abs, true_wh_abs, true_obj, true_class = tf.split( y_true, (2, 2, 1, self.num_classes), axis=-1) true_box_abs = tf.concat([true_xy_abs, true_wh_abs], axis=-1) true_box_abs = xywh_to_x1x2y1y2(true_box_abs) # true_box_rel: (batch, grid, grid, anchor, 4) true_box_rel = get_relative_yolo_box(y_true, self.valid_anchors_wh) true_xy_rel = true_box_rel[..., 0:2] true_wh_rel = true_box_rel[..., 2:4] # some adjustment to improve small box detection, note the (2-truth.w*truth.h) below # https://github.com/pjreddie/darknet/blob/f6d861736038da22c9eb0739dca84003c5a5e275/src/yolo_layer.c#L190 weight = 2 - true_wh_abs[..., 0] * true_wh_abs[..., 1] # YoloV2: # "If the cell is offset from the top left corner of the image by (cx , cy) # and the bounding box prior has width and height pw , ph , then the predictions correspond to:" # # to calculate the iou and determine the ignore mask, we need to first transform # prediction into real coordinates (bx, by, bw, bh) # YoloV2: # "This ground truth value can be easily computed by inverting the equations above." # # to calculate loss and differentiation, we need to transform ground truth into # cell offset first like demonstrated here: # https://github.com/pjreddie/darknet/blob/f6d861736038da22c9eb0739dca84003c5a5e275/src/yolo_layer.c#L93 xy_loss = self.calc_xy_loss(true_obj, true_xy_rel, pred_xy_rel, weight) wh_loss = self.calc_wh_loss(true_obj, true_wh_rel, pred_wh_rel, weight) class_loss = self.calc_class_loss(true_obj, true_class, pred_class) # use the absolute yolo box to calculate iou and ignore mask ignore_mask = self.calc_ignore_mask(true_obj, true_box_abs, pred_box_abs) obj_loss = self.calc_obj_loss(true_obj, pred_obj, ignore_mask) # YoloV1: Function (3) return xy_loss + wh_loss + class_loss + obj_loss, (xy_loss, wh_loss, class_loss, obj_loss)