def forward(self, outputs, tg): # tg: targets outputs['hm_mc'] = _sigmoid(outputs['hm_mc']) outputs['hm_ver'] = _sigmoid(outputs['hm_ver']) # Normalize dimension # TODO: What happend if the norm_dim < 0, we can't apply the log operator # tg['dim'] = self.normalize_dim(tg['dim']) # tg['dim'] = F.log(tg['dim']) # take the log of the normalized dimension # Follow depth loss in CenterNet outputs['depth'] = 1. / (_sigmoid(outputs['depth']) + 1e-9) - 1. l_hm_mc = self.focal_loss(outputs['hm_mc'], tg['hm_mc']) l_hm_ver = self.focal_loss(outputs['hm_ver'], tg['hm_ver']) # output, mask, ind, target l_vercoor = self.vercoor_l1(outputs['vercoor'], tg['ver_coor_mask'], tg['indices_center'], tg['ver_coor']) l_cenoff = self.l1_loss(outputs['cenoff'], tg['obj_mask'], tg['indices_center'], tg['cen_offset']) l_veroff = self.l1_loss(outputs['veroff'], tg['ver_offset_mask'], tg['indices_vertexes'], tg['ver_offset']) # TODO: What happend if the norm_dim < 0, we can't apply the log operator # Apply dimension loss (l1_loss) in the CenterNet instead of the l2_loss in the paper l_dim = self.l1_loss(outputs['dim'], tg['obj_mask'], tg['indices_center'], tg['dim']) # output, mask, ind, rotbin, rotres l_rot = self.rot_loss(outputs['rot'], tg['obj_mask'], tg['indices_center'], tg['rotbin'], tg['rotres']) # Apply depth loss (l1_loss) in the CenterNet instead of the l2_loss in the paper # l_depth = self.l2_loss(torch.log(outputs['depth']), tg['obj_mask'], tg['indices_center'], torch.log(tg['depth'])) l_depth = self.l1_loss(outputs['depth'], tg['obj_mask'], tg['indices_center'], tg['depth']) l_boxwh = self.l1_loss(outputs['wh'], tg['obj_mask'], tg['indices_center'], tg['wh']) total_loss = l_hm_mc * self.weight_hm_mc + l_hm_ver * self.weight_hm_ver + l_vercoor * self.weight_vercoor + \ l_cenoff * self.weight_cenoff + l_veroff * self.weight_veroff + l_dim * self.weight_dim + \ l_rot * self.weight_rot + l_depth * self.weight_depth + l_boxwh * self.weight_wh loss_stats = { 'total_loss': to_cpu(total_loss).item(), 'hm_mc_loss': to_cpu(l_hm_mc).item(), 'hm_ver_loss': to_cpu(l_hm_ver).item(), 'ver_coor_loss': to_cpu(l_vercoor).item(), 'cen_offset_loss': to_cpu(l_cenoff).item(), 'ver_offset_loss': to_cpu(l_veroff).item(), 'dim_loss': to_cpu(l_dim).item(), 'rot_loss': to_cpu(l_rot).item(), 'depth_loss': to_cpu(l_depth).item(), 'wh_loss': to_cpu(l_boxwh).item() } return total_loss, loss_stats
def iou_pred_vs_target_boxes(pred_boxes, target_boxes, nG, GIoU=False, DIoU=False, CIoU=False): assert pred_boxes.size() == target_boxes.size(), "Unmatch size of pred_boxes and target_boxes" device = pred_boxes.device pred_boxes_cpu = to_cpu(pred_boxes).numpy() target_boxes_cpu = to_cpu(target_boxes).numpy() target_boxes_cpu[:, :4] *= nG # scale up x, y, w, l x, y, w, l, im, re = target_boxes_cpu.transpose(1, 0) yaw = np.arctan2(im, re) target_conners = bev_utils.get_corners_vectorize(x, y, w, l, yaw) target_polygons = cvt_box_2_polygon(target_conners) target_areas = [polygon_.area for polygon_ in target_polygons] x, y, w, l, im, re = pred_boxes_cpu.transpose(1, 0) yaw = np.arctan2(im, re) pred_conners = bev_utils.get_corners_vectorize(x, y, w, l, yaw) pred_polygons = cvt_box_2_polygon(pred_conners) pred_areas = [polygon_.area for polygon_ in pred_polygons] ious = [] iou_losses = [] n_boxes = target_boxes_cpu.shape[0] # Thinking to apply vectorization this step for box_idx in range(n_boxes): pred_cons, t_cons = pred_conners[box_idx], target_conners[box_idx] pred_poly, t_poly = pred_polygons[box_idx], target_polygons[box_idx] pred_area, t_area = pred_areas[box_idx], target_areas[box_idx] intersection = pred_poly.intersection(t_poly).area union = pred_area + t_area - intersection iou = intersection / (union + 1e-16) if GIoU: convex_conners = np.concatenate((pred_cons, t_cons)) hull = ConvexHull(convex_conners) convex_conners = convex_conners[hull.vertices] convex_polygon = Polygon([(convex_conners[i, 0], convex_conners[i, 1]) for i in range(len(convex_conners))]).buffer(0) convex_area = convex_polygon.area l_iou = 1. - (iou - (convex_area - union) / (convex_area + 1e-16)) else: l_iou = 1. - iou if DIoU or CIoU: raise NotImplementedError ious.append(iou) iou_losses.append(l_iou) return torch.tensor(ious, device=device, dtype=torch.float), torch.tensor(iou_losses, device=device, dtype=torch.float)
def compute_grid_offsets(self, grid_size): self.grid_size = grid_size g = self.grid_size self.stride = self.img_size / self.grid_size # Calculate offsets for each grid self.grid_x = torch.arange(g, device=self.device, dtype=torch.float).repeat(g, 1).view( [1, 1, g, g]) self.grid_y = torch.arange(g, device=self.device, dtype=torch.float).repeat(g, 1).t().view( [1, 1, g, g]) self.scaled_anchors = torch.tensor( [(a_w / self.stride, a_h / self.stride, im, re) for a_w, a_h, im, re in self.anchors], device=self.device, dtype=torch.float) self.anchor_w = self.scaled_anchors[:, 0:1].view( (1, self.num_anchors, 1, 1)) self.anchor_h = self.scaled_anchors[:, 1:2].view( (1, self.num_anchors, 1, 1)) # Pre compute polygons and areas of anchors self.scaled_anchors_polygons = get_polygons_fix_xy(to_cpu( self.scaled_anchors).numpy(), fix_xy=100) self.scaled_anchors_areas = [ polygon_.area for polygon_ in self.scaled_anchors_polygons ]
def iou_pred_vs_target_boxes(pred_boxes, target_boxes, nG, GIoU=False, DIoU=False, CIoU=False): assert pred_boxes.size() == target_boxes.size(), "Unmatch size of pred_boxes and target_boxes" device = pred_boxes.device pred_boxes_cpu = to_cpu(pred_boxes).numpy() target_boxes_cpu = to_cpu(target_boxes).numpy() target_boxes_cpu[:, :4] *= nG # scale up x, y, w, l ious = [] # Thinking to apply vectorization this step for pred_box, target_box in zip(pred_boxes_cpu, target_boxes_cpu): iou = iou_rotated_11_boxes(pred_box, target_box) if GIoU or DIoU or CIoU: raise NotImplementedError ious.append(iou) return torch.tensor(ious, device=device, dtype=torch.float)
def forward(self, outputs, tg): # tg: targets outputs['hm_cen'] = _sigmoid(outputs['hm_cen']) outputs['hm_conners'] = _sigmoid(outputs['hm_conners']) outputs['cen_offset'] = _sigmoid(outputs['cen_offset']) outputs['direction'] = _sigmoid(outputs['direction']) l_hm_cen = self.focal_loss(outputs['hm_cen'], tg['hm_cen']) l_hm_conners = self.focal_loss(outputs['hm_conners'], tg['hm_conners']) l_cen_offset = self.l1_loss(outputs['cen_offset'], tg['obj_mask'], tg['indices_center'], tg['cen_offset']) l_direction = self.l1_loss(outputs['direction'], tg['obj_mask'], tg['indices_center'], tg['direction']) # Apply the L1_loss balanced for z coor and dimension regression l_z_coor = self.l1_loss_balanced(outputs['z_coor'], tg['obj_mask'], tg['indices_center'], tg['z_coor']) l_dim = self.l1_loss_balanced(outputs['dim'], tg['obj_mask'], tg['indices_center'], tg['dim']) total_loss = l_hm_cen * self.weight_hm_cen + l_cen_offset * self.weight_cenoff + \ l_dim * self.weight_dim + l_direction * self.weight_direction + \ l_z_coor * self.weight_z_coor + l_hm_conners * self.weight_hm_conners loss_stats = { 'total_loss': to_cpu(total_loss).item(), 'hm_cen_loss': to_cpu(l_hm_cen).item(), 'hm_conners_loss': to_cpu(l_hm_conners).item(), 'cen_offset_loss': to_cpu(l_cen_offset).item(), 'dim_loss': to_cpu(l_dim).item(), 'direction_loss': to_cpu(l_direction).item(), 'z_coor_loss': to_cpu(l_z_coor).item(), } return total_loss, loss_stats
def forward(self, input, source_image, targets=None): # batch_size, c, h, w img_size = source_image.size(2) ind = 0 #self.loss = None outputs = dict() loss = 0. yolo_outputs = [] for block_wrapper in self.blocks: block = block_wrapper.dict_block if block['type'] == 'yolo': x, layer_loss = self.models[ind](input[ind], targets, img_size, self.use_giou_loss) loss += layer_loss yolo_outputs.append(x) ind = ind + 1 yolo_outputs = to_cpu(torch.cat(yolo_outputs, 1)) #yolo_outputs = torch.cat(yolo_outputs, 1) #yolo_outputs = x return yolo_outputs if targets is None else (loss, yolo_outputs)
def build_targets(self, out_boxes, pred_cls, target, anchors): """ Built yolo targets to compute loss :param out_boxes: [num_samples or batch, num_anchors, grid_size, grid_size, 6] :param pred_cls: [num_samples or batch, num_anchors, grid_size, grid_size, num_classes] :param target: [num_boxes, 8] :param anchors: [num_anchors, 4] :return: """ nB, nA, nG, _, nC = pred_cls.size() n_target_boxes = target.size(0) # Create output tensors on "device" obj_mask = torch.full(size=(nB, nA, nG, nG), fill_value=0, device=self.device, dtype=torch.uint8) noobj_mask = torch.full(size=(nB, nA, nG, nG), fill_value=1, device=self.device, dtype=torch.uint8) class_mask = torch.full(size=(nB, nA, nG, nG), fill_value=0, device=self.device, dtype=torch.float) iou_scores = torch.full(size=(nB, nA, nG, nG), fill_value=0, device=self.device, dtype=torch.float) tx = torch.full(size=(nB, nA, nG, nG), fill_value=0, device=self.device, dtype=torch.float) ty = torch.full(size=(nB, nA, nG, nG), fill_value=0, device=self.device, dtype=torch.float) tw = torch.full(size=(nB, nA, nG, nG), fill_value=0, device=self.device, dtype=torch.float) th = torch.full(size=(nB, nA, nG, nG), fill_value=0, device=self.device, dtype=torch.float) tim = torch.full(size=(nB, nA, nG, nG), fill_value=0, device=self.device, dtype=torch.float) tre = torch.full(size=(nB, nA, nG, nG), fill_value=0, device=self.device, dtype=torch.float) tcls = torch.full(size=(nB, nA, nG, nG, nC), fill_value=0, device=self.device, dtype=torch.float) tconf = obj_mask.float() iou_losses = torch.zeros(size=(1, ), device=self.device, dtype=torch.float) if n_target_boxes > 0: # Make sure that there is at least 1 box # Convert to position relative to box target_boxes = target[:, 2:8] gxy = target_boxes[:, :2] * nG # scale up x, y gwh = target_boxes[:, 2:4] * nG # scale up w, l gimre = target_boxes[:, 4:] targets_polygons = get_polygons_fix_xy(to_cpu( target_boxes[:, 2:6] * nG).numpy(), fix_xy=100) targets_areas = [polygon_.area for polygon_ in targets_polygons] # Get anchors with best iou ious = iou_rotated_boxes_vs_anchors(self.scaled_anchors_polygons, self.scaled_anchors_areas, targets_polygons, targets_areas) best_ious, best_n = ious.max(0) b, target_labels = target[:, :2].long().t() gx, gy = gxy.t() gw, gh = gwh.t() gim, gre = gimre.t() gi, gj = gxy.long().t() # Set masks obj_mask[b, best_n, gj, gi] = 1 noobj_mask[b, best_n, gj, gi] = 0 # Set noobj mask to zero where iou exceeds ignore threshold for i, anchor_ious in enumerate(ious.t()): noobj_mask[b[i], anchor_ious > self.ignore_thresh, gj[i], gi[i]] = 0 # Coordinates tx[b, best_n, gj, gi] = gx - gx.floor() ty[b, best_n, gj, gi] = gy - gy.floor() # Width and height tw[b, best_n, gj, gi] = torch.log(gw / anchors[best_n][:, 0] + 1e-16) th[b, best_n, gj, gi] = torch.log(gh / anchors[best_n][:, 1] + 1e-16) # Im and real part tim[b, best_n, gj, gi] = gim tre[b, best_n, gj, gi] = gre # One-hot encoding of label tcls[b, best_n, gj, gi, target_labels] = 1 class_mask[b, best_n, gj, gi] = ( pred_cls[b, best_n, gj, gi].argmax(-1) == target_labels).float() ious, iou_losses = iou_pred_vs_target_boxes(out_boxes[b, best_n, gj, gi], target_boxes, nG, GIoU=True) iou_scores[b, best_n, gj, gi] = ious tconf = obj_mask.float() return iou_scores, iou_losses, class_mask, obj_mask.type(torch.bool), noobj_mask.type(torch.bool), \ tx, ty, tw, th, tim, tre, tcls, tconf
def forward(self, x, targets=None, img_size=608): """ :param x: [num_samples or batch, num_anchors * (6 + 1 + num_classes), grid_size, grid_size] :param targets: [num boxes, 8] (box_idx, class, x, y, w, l, sin(yaw), cos(yaw)) :param img_size: default 608 :return: """ self.img_size = img_size self.device = x.device num_samples, _, _, grid_size = x.size() prediction = x.view(num_samples, self.num_anchors, self.num_classes + 7, grid_size, grid_size) prediction = prediction.permute(0, 1, 3, 4, 2).contiguous() # prediction size: [num_samples, num_anchors, grid_size, grid_size, num_classes + 7] # Get outputs pred_x = torch.sigmoid(prediction[..., 0]) pred_y = torch.sigmoid(prediction[..., 1]) pred_w = prediction[..., 2] # Width pred_h = prediction[..., 3] # Height pred_im = prediction[..., 4] # angle imaginary part pred_re = prediction[..., 5] # angle real part pred_conf = torch.sigmoid(prediction[..., 6]) # Conf pred_cls = torch.sigmoid(prediction[..., 7:]) # Cls pred. # If grid size does not match current we compute new offsets if grid_size != self.grid_size: self.compute_grid_offsets(grid_size) # Add offset and scale with anchors # pred_boxes size: [num_samples, num_anchors, grid_size, grid_size, 6] out_boxes = torch.empty(prediction[..., :6].shape, device=self.device, dtype=torch.float) out_boxes[..., 0] = pred_x.clone().detach() + self.grid_x out_boxes[..., 1] = pred_y.clone().detach() + self.grid_y out_boxes[..., 2] = torch.exp( pred_w.clone().detach()).clamp(1E3) * self.anchor_w out_boxes[..., 3] = torch.exp( pred_h.clone().detach()).clamp(1E3) * self.anchor_h out_boxes[..., 4] = pred_im.clone().detach() out_boxes[..., 5] = pred_re.clone().detach() output = torch.cat(( out_boxes[..., :4].view(num_samples, -1, 4) * self.stride, out_boxes[..., 4:6].view(num_samples, -1, 2), pred_conf.clone().view(num_samples, -1, 1), pred_cls.clone().view(num_samples, -1, self.num_classes), ), dim=-1) # output size: [num_samples, num boxes, 7 + num_classes] if targets is None: return output, 0 else: reduction = 'mean' iou_scores, iou_losses, class_mask, obj_mask, noobj_mask, tx, ty, tw, th, tim, tre, tcls, tconf = self.build_targets( out_boxes=out_boxes, pred_cls=pred_cls, target=targets, anchors=self.scaled_anchors) loss_box = iou_losses.sum( ) if reduction == 'sum' else iou_losses.mean() loss_im = F.mse_loss(pred_im[obj_mask], tim[obj_mask], reduction=reduction) loss_re = F.mse_loss(pred_re[obj_mask], tre[obj_mask], reduction=reduction) loss_im_re = ( 1. - torch.sqrt(pred_im[obj_mask]**2 + pred_re[obj_mask]**2) )**2 # as tim^2 + tre^2 = 1 loss_im_re_red = loss_im_re.sum( ) if reduction == 'sum' else loss_im_re.mean() loss_eular = loss_im + loss_re + loss_im_re_red loss_conf_obj = F.binary_cross_entropy(pred_conf[obj_mask], tconf[obj_mask], reduction=reduction) loss_conf_noobj = F.binary_cross_entropy(pred_conf[noobj_mask], tconf[noobj_mask], reduction=reduction) loss_obj = self.obj_scale * loss_conf_obj + self.noobj_scale * loss_conf_noobj loss_cls = F.binary_cross_entropy(pred_cls[obj_mask], tcls[obj_mask], reduction=reduction) total_loss = loss_box * self.lbox_scale + loss_obj * self.lobj_scale + loss_cls * self.lcls_scale + loss_eular * self.leular_scale # Metrics (store loss values using tensorboard) cls_acc = 100 * class_mask[obj_mask].mean() conf_obj = pred_conf[obj_mask].mean() conf_noobj = pred_conf[noobj_mask].mean() conf50 = (pred_conf > 0.5).float() iou50 = (iou_scores > 0.5).float() iou75 = (iou_scores > 0.75).float() detected_mask = conf50 * class_mask * tconf precision = torch.sum( iou50 * detected_mask) / (conf50.sum() + 1e-16) recall50 = torch.sum( iou50 * detected_mask) / (obj_mask.sum() + 1e-16) recall75 = torch.sum( iou75 * detected_mask) / (obj_mask.sum() + 1e-16) self.metrics = { "loss": to_cpu(total_loss).item(), 'loss_box': to_cpu(loss_box).item(), 'loss_eular': to_cpu(loss_eular).item(), 'loss_im': to_cpu(loss_im).item(), 'loss_re': to_cpu(loss_re).item(), "loss_obj": to_cpu(loss_obj).item(), "loss_cls": to_cpu(loss_cls).item(), "cls_acc": to_cpu(cls_acc).item(), "recall50": to_cpu(recall50).item(), "recall75": to_cpu(recall75).item(), "precision": to_cpu(precision).item(), "conf_obj": to_cpu(conf_obj).item(), "conf_noobj": to_cpu(conf_noobj).item() } return output, total_loss
def forward(self, x, targets=None, img_size=608): """ :param x: [num_samples or batch, num_anchors * (6 + 1 + num_classes), grid_size, grid_size] :param targets: [num boxes, 8] (box_idx, class, x, y, w, l, sin(yaw), cos(yaw)) :param img_size: default 608 :return: """ self.img_size = img_size self.device = x.device num_samples, _, _, grid_size = x.size() prediction = x.view(num_samples, self.num_anchors, self.num_classes + 7, grid_size, grid_size) prediction = prediction.permute(0, 1, 3, 4, 2).contiguous() # prediction size: [num_samples, num_anchors, grid_size, grid_size, num_classes + 7] # Get outputs x = torch.sigmoid(prediction[..., 0]) * self.scale_x_y - 0.5 * ( self.scale_x_y - 1) # Center x y = torch.sigmoid(prediction[..., 1]) * self.scale_x_y - 0.5 * ( self.scale_x_y - 1) # Center y w = prediction[..., 2] # Width h = prediction[..., 3] # Height im = prediction[..., 4] # angle imaginary part re = prediction[..., 5] # angle real part pred_conf = torch.sigmoid(prediction[..., 6]) # Conf pred_cls = torch.sigmoid(prediction[..., 7:]) # Cls pred. # If grid size does not match current we compute new offsets if grid_size != self.grid_size: self.compute_grid_offsets(grid_size) # Add offset and scale with anchors # pred_boxes size: [num_samples, num_anchors, grid_size, grid_size, 6] pred_boxes = torch.empty(prediction[..., :6].shape, device=self.device, dtype=torch.float) pred_boxes[..., 0] = x.detach() + self.grid_x pred_boxes[..., 1] = y.detach() + self.grid_y pred_boxes[..., 2] = torch.exp(w.detach()) * self.anchor_w pred_boxes[..., 3] = torch.exp(h.detach()) * self.anchor_h pred_boxes[..., 4] = im.detach() pred_boxes[..., 5] = re.detach() output = torch.cat(( pred_boxes[..., :4].view(num_samples, -1, 4) * self.stride, pred_boxes[..., 4:].view(num_samples, -1, 2), pred_conf.view(num_samples, -1, 1), pred_cls.view(num_samples, -1, self.num_classes), ), dim=-1) # output size: [num_samples, num boxes, 7 + num_classes] if targets is None: return output, 0 else: obj_mask, noobj_mask, tx, ty, tw, th, tim, tre, tcls, tconf = self.build_targets( pred_cls=pred_cls, target=targets, anchors=self.scaled_anchors) # Loss : Mask outputs to ignore non-existing objects (except with conf. loss) loss_x = self.mse_loss(x[obj_mask], tx[obj_mask]) loss_y = self.mse_loss(y[obj_mask], ty[obj_mask]) loss_w = self.mse_loss(w[obj_mask], tw[obj_mask]) loss_h = self.mse_loss(h[obj_mask], th[obj_mask]) loss_im = self.mse_loss(im[obj_mask], tim[obj_mask]) loss_re = self.mse_loss(re[obj_mask], tre[obj_mask]) loss_eular = loss_im + loss_re loss_conf_obj = self.bce_loss(pred_conf[obj_mask], tconf[obj_mask]) loss_conf_noobj = self.bce_loss(pred_conf[noobj_mask], tconf[noobj_mask]) loss_conf = self.obj_scale * loss_conf_obj + self.noobj_scale * loss_conf_noobj loss_cls = self.bce_loss(pred_cls[obj_mask], tcls[obj_mask]) total_loss = loss_x + loss_y + loss_w + loss_h + loss_eular + loss_conf + loss_cls # Metrics (store loss values using tensorboard) self.metrics = { "loss": to_cpu(total_loss).item(), "x": to_cpu(loss_x).item(), "y": to_cpu(loss_y).item(), "w": to_cpu(loss_w).item(), "h": to_cpu(loss_h).item(), "im": to_cpu(loss_im).item(), "re": to_cpu(loss_re).item(), "conf": to_cpu(loss_conf).item(), "cls": to_cpu(loss_cls).item() } return output, total_loss
def forward(self, x, targets=None): # batch_size, c, h, w img_size = x.size(2) ind = -2 self.loss = None outputs = dict() loss = 0. yolo_outputs = [] for block in self.blocks: ind = ind + 1 # if ind > 0: # return x if block['type'] == 'net': continue elif block['type'] in [ 'convolutional', 'maxpool', 'reorg', 'upsample', 'avgpool', 'softmax', 'connected' ]: x = self.models[ind](x) outputs[ind] = x elif block['type'] == 'route': layers = block['layers'].split(',') layers = [ int(i) if int(i) > 0 else int(i) + ind for i in layers ] if len(layers) == 1: if 'groups' not in block.keys() or int( block['groups']) == 1: x = outputs[layers[0]] outputs[ind] = x else: groups = int(block['groups']) group_id = int(block['group_id']) _, b, _, _ = outputs[layers[0]].shape x = outputs[layers[0]][:, b // groups * group_id:b // groups * (group_id + 1)] outputs[ind] = x elif len(layers) == 2: x1 = outputs[layers[0]] x2 = outputs[layers[1]] x = torch.cat((x1, x2), 1) outputs[ind] = x elif len(layers) == 4: x1 = outputs[layers[0]] x2 = outputs[layers[1]] x3 = outputs[layers[2]] x4 = outputs[layers[3]] x = torch.cat((x1, x2, x3, x4), 1) outputs[ind] = x else: print("rounte number > 2 ,is {}".format(len(layers))) elif block['type'] == 'shortcut': from_layer = int(block['from']) activation = block['activation'] from_layer = from_layer if from_layer > 0 else from_layer + ind x1 = outputs[from_layer] x2 = outputs[ind - 1] x = x1 + x2 if activation == 'leaky': x = F.leaky_relu(x, 0.1, inplace=True) elif activation == 'relu': x = F.relu(x, inplace=True) outputs[ind] = x elif block['type'] == 'yolo': x, layer_loss = self.models[ind](x, targets, img_size, self.use_giou_loss) loss += layer_loss yolo_outputs.append(x) elif block['type'] == 'cost': continue else: print('unknown type %s' % (block['type'])) yolo_outputs = to_cpu(torch.cat(yolo_outputs, 1)) return yolo_outputs if targets is None else (loss, yolo_outputs)
def forward(self, x, targets=None, img_size=608, use_giou_loss=False): """ :param x: [num_samples or batch, num_anchors * (8 + 1 + num_classes), grid_size, grid_size] :param targets: [num boxes, 9] (box_idx, class, x, y, z, h, w, l, yaw) :param img_size: default 608 :return: """ self.img_size = img_size self.use_giou_loss = use_giou_loss self.device = x.device num_samples, _, _, grid_size = x.size() prediction = x.view(num_samples, self.num_anchors, self.num_classes + 9, grid_size, grid_size) prediction = prediction.permute(0, 1, 3, 4, 2).contiguous() # prediction size: [num_samples, num_anchors, grid_size, grid_size, num_classes + 9] # Get outputs pred_x = torch.sigmoid(prediction[..., 0]) pred_y = torch.sigmoid(prediction[..., 1]) pred_z = torch.sigmoid(prediction[..., 2]) pred_h = prediction[..., 3] # Height pred_w = prediction[..., 4] # Width pred_l = prediction[..., 5] # Length pred_im = prediction[..., 6] # angle imaginary part (range: 0 to 1) pred_re = prediction[..., 7] # angle real part (range: 0 to 1) pred_conf = torch.sigmoid(prediction[..., 8]) # Conf pred_cls = torch.sigmoid(prediction[..., 9:]) # Cls pred. # If grid size does not match current we compute new offsets if grid_size != self.grid_size: self.compute_grid_offsets(grid_size) # Add offset and scale with anchors # pred_boxes size: [num_samples, num_anchors, grid_size, grid_size, 6] pred_boxes = torch.empty(prediction[..., :8].shape, device=self.device, dtype=torch.float) pred_boxes[..., 0] = pred_x + self.grid_x pred_boxes[..., 1] = pred_y + self.grid_y pred_boxes[..., 2] = pred_z # Only 1 grid pred_boxes[..., 3] = torch.exp(pred_h).clamp(max=1E3) * self.anchor_h pred_boxes[..., 4] = torch.exp(pred_w).clamp(max=1E3) * self.anchor_w pred_boxes[..., 5] = torch.exp(pred_l).clamp(max=1E3) * self.anchor_l pred_boxes[..., 6] = pred_im pred_boxes[..., 7] = pred_re output = torch.cat( ( pred_boxes[..., :2].view(num_samples, -1, 2) * self.stride, # x, y pred_boxes[..., 2:3].view(num_samples, -1, 1), # z pred_boxes[..., 3:6].view(num_samples, -1, 3) * self.stride, # h, w, l pred_boxes[..., 6:8].view(num_samples, -1, 2), # im, re pred_conf.view(num_samples, -1, 1), # conf pred_cls.view(num_samples, -1, self.num_classes), # classes ), dim=-1) # output size: [num_samples, num boxes, 9 + num_classes] if targets is None: return output, 0 else: self.reduction = 'mean' iou_scores, giou_loss, class_mask, obj_mask, noobj_mask, tx, ty, tz, th, tw, tl, tim, tre, tcls, tconf = self.build_targets( pred_boxes=pred_boxes, pred_cls=pred_cls, target=targets, anchors=self.scaled_anchors) loss_x = F.mse_loss(pred_x[obj_mask], tx[obj_mask], reduction=self.reduction) loss_y = F.mse_loss(pred_y[obj_mask], ty[obj_mask], reduction=self.reduction) loss_z = F.mse_loss(pred_z[obj_mask], tz[obj_mask], reduction=self.reduction) loss_h = F.mse_loss(pred_h[obj_mask], th[obj_mask], reduction=self.reduction) loss_w = F.mse_loss(pred_w[obj_mask], tw[obj_mask], reduction=self.reduction) loss_l = F.mse_loss(pred_l[obj_mask], tl[obj_mask], reduction=self.reduction) loss_im = F.mse_loss(pred_im[obj_mask], tim[obj_mask], reduction=self.reduction) loss_re = F.mse_loss(pred_re[obj_mask], tre[obj_mask], reduction=self.reduction) loss_box = loss_x + loss_y + loss_z + loss_h + loss_w + loss_l + loss_re + loss_im loss_conf_obj = F.binary_cross_entropy(pred_conf[obj_mask], tconf[obj_mask], reduction=self.reduction) loss_conf_noobj = F.binary_cross_entropy(pred_conf[noobj_mask], tconf[noobj_mask], reduction=self.reduction) loss_cls = F.binary_cross_entropy(pred_cls[obj_mask], tcls[obj_mask], reduction=self.reduction) if self.use_giou_loss: loss_obj = loss_conf_obj + loss_conf_noobj total_loss = giou_loss * self.lgiou_scale + loss_obj * self.lobj_scale + loss_cls * self.lcls_scale else: loss_obj = self.obj_scale * loss_conf_obj + self.noobj_scale * loss_conf_noobj total_loss = loss_box + loss_obj + loss_cls # Metrics (store loss values using tensorboard) cls_acc = 100 * class_mask[obj_mask].mean() conf_obj = pred_conf[obj_mask].mean() conf_noobj = pred_conf[noobj_mask].mean() conf50 = (pred_conf > 0.5).float() iou50 = (iou_scores > 0.5).float() iou75 = (iou_scores > 0.75).float() detected_mask = conf50 * class_mask * tconf precision = torch.sum( iou50 * detected_mask) / (conf50.sum() + 1e-16) recall50 = torch.sum( iou50 * detected_mask) / (obj_mask.sum() + 1e-16) recall75 = torch.sum( iou75 * detected_mask) / (obj_mask.sum() + 1e-16) self.metrics = { "loss": to_cpu(total_loss).item(), "iou_score": to_cpu(iou_scores[obj_mask].mean()).item(), 'giou_loss': to_cpu(giou_loss).item(), 'loss_x': to_cpu(loss_x).item(), 'loss_y': to_cpu(loss_y).item(), 'loss_z': to_cpu(loss_z).item(), 'loss_h': to_cpu(loss_h).item(), 'loss_w': to_cpu(loss_w).item(), 'loss_l': to_cpu(loss_l).item(), 'loss_im': to_cpu(loss_im).item(), 'loss_re': to_cpu(loss_re).item(), "loss_obj": to_cpu(loss_obj).item(), "loss_cls": to_cpu(loss_cls).item(), "cls_acc": to_cpu(cls_acc).item(), "recall50": to_cpu(recall50).item(), "recall75": to_cpu(recall75).item(), "precision": to_cpu(precision).item(), "conf_obj": to_cpu(conf_obj).item(), "conf_noobj": to_cpu(conf_noobj).item() } return output, total_loss
def forward(self, x, targets=None, img_size=608): """ :param x: [num_samples or batch, num_anchors * (6 + 1 + num_classes), grid_size, grid_size] :param targets: [num boxes, 8] (box_idx, class, x, y, w, l, sin(yaw), cos(yaw)) :param img_size: default 608 :return: """ self.img_size = img_size self.device = x.device num_samples, _, _, grid_size = x.size() prediction = x.view(num_samples, self.num_anchors, self.num_classes + 7, grid_size, grid_size) prediction = prediction.permute(0, 1, 3, 4, 2).contiguous() # prediction size: [num_samples, num_anchors, grid_size, grid_size, num_classes + 7] # Get outputs pred_x = torch.sigmoid(prediction[..., 0]) pred_y = torch.sigmoid(prediction[..., 1]) pred_w = prediction[..., 2] # Width pred_h = prediction[..., 3] # Height pred_im = prediction[..., 4] # angle imaginary part pred_re = prediction[..., 5] # angle real part pred_conf = torch.sigmoid(prediction[..., 6]) # Conf pred_cls = torch.sigmoid(prediction[..., 7:]) # Cls pred. # If grid size does not match current we compute new offsets if grid_size != self.grid_size: self.compute_grid_offsets(grid_size) # Add offset and scale with anchors # pred_boxes size: [num_samples, num_anchors, grid_size, grid_size, 6] out_boxes = torch.empty(prediction[..., :6].shape, device=self.device, dtype=torch.float) out_boxes[..., 0] = pred_x.clone().detach() + self.grid_x out_boxes[..., 1] = pred_y.clone().detach() + self.grid_y out_boxes[..., 2] = torch.exp(pred_w.clone().detach()) * self.anchor_w out_boxes[..., 3] = torch.exp(pred_h.clone().detach()) * self.anchor_h out_boxes[..., 4] = pred_im.clone().detach() out_boxes[..., 5] = pred_re.clone().detach() output = torch.cat(( out_boxes[..., :4].view(num_samples, -1, 4) * self.stride, out_boxes[..., 4:6].view(num_samples, -1, 2), pred_conf.clone().view(num_samples, -1, 1), pred_cls.clone().view(num_samples, -1, self.num_classes), ), dim=-1) # output size: [num_samples, num boxes, 7 + num_classes] if targets is None: return output, 0 else: reduction = 'mean' iou_scores, class_mask, obj_mask, noobj_mask, tx, ty, tw, th, tim, tre, tcls, tconf = self.build_targets( out_boxes=out_boxes, pred_cls=pred_cls, target=targets, anchors=self.scaled_anchors) iou_masked = iou_scores[obj_mask] # size: (n_target_boxes,) loss_box = (1. - iou_masked).sum() if reduction == 'sum' else ( 1. - iou_masked).mean() loss_conf_obj = F.binary_cross_entropy(pred_conf[obj_mask], tconf[obj_mask], reduction=reduction) loss_conf_noobj = F.binary_cross_entropy(pred_conf[noobj_mask], tconf[noobj_mask], reduction=reduction) loss_obj = self.obj_scale * loss_conf_obj + self.noobj_scale * loss_conf_noobj loss_cls = F.binary_cross_entropy(pred_cls[obj_mask], tcls[obj_mask], reduction=reduction) total_loss = loss_box * self.lbox_scale + loss_obj * self.lobj_scale + loss_cls * self.lcls_scale # Metrics (store loss values using tensorboard) self.metrics = { "loss": to_cpu(total_loss).item(), 'loss_box': to_cpu(loss_box).item(), "loss_obj": to_cpu(loss_obj).item(), "loss_cls": to_cpu(loss_cls).item() } return output, total_loss
def forward(self, x, targets=None, img_size=608): FloatTensor = torch.cuda.FloatTensor if x.is_cuda else torch.FloatTensor num_samples = x.size(0) grid_size = x.size(2) prediction = (x.view(num_samples, self.num_anchors, self.num_classes + 7, grid_size, grid_size).permute(0, 1, 3, 4, 2).contiguous()) # Get outputs x = torch.sigmoid(prediction[..., 0]) * self.scale_x_y - 0.5 * ( self.scale_x_y - 1) # Center x y = torch.sigmoid(prediction[..., 1]) * self.scale_x_y - 0.5 * ( self.scale_x_y - 1) # Center y w = prediction[..., 2] # Width h = prediction[..., 3] # Height im = prediction[..., 4] # angle imaginary part re = prediction[..., 5] # angle real part pred_conf = torch.sigmoid(prediction[..., 6]) # Conf pred_cls = torch.sigmoid(prediction[..., 7:]) # Cls pred. # If grid size does not match current we compute new offsets if grid_size != self.grid_size: self.compute_grid_offsets(grid_size, cuda=x.is_cuda) # Add offset and scale with anchors pred_boxes = FloatTensor(prediction[..., :6].shape) pred_boxes[..., 0] = x.detach() + self.grid_x pred_boxes[..., 1] = y.detach() + self.grid_y pred_boxes[..., 2] = torch.exp(w.detach()) * self.anchor_w pred_boxes[..., 3] = torch.exp(h.detach()) * self.anchor_h pred_boxes[..., 4] = im pred_boxes[..., 5] = re output = torch.cat( ( # pred_boxes.view(num_samples, -1, 6) * self.stride, pred_boxes[..., :4].view(num_samples, -1, 4) * self.stride, pred_boxes[..., 4:].view(num_samples, -1, 2), pred_conf.view(num_samples, -1, 1), pred_cls.view(num_samples, -1, self.num_classes), ), dim=-1, ) if targets is None: return output, 0 else: iou_scores, class_mask, obj_mask, noobj_mask, tx, ty, tw, th, tim, tre, tcls, tconf = self.build_targets( pred_boxes=pred_boxes, pred_cls=pred_cls, target=targets, anchors=self.scaled_anchors) # Loss : Mask outputs to ignore non-existing objects (except with conf. loss) loss_x = self.mse_loss(x[obj_mask], tx[obj_mask]) loss_y = self.mse_loss(y[obj_mask], ty[obj_mask]) loss_w = self.mse_loss(w[obj_mask], tw[obj_mask]) loss_h = self.mse_loss(h[obj_mask], th[obj_mask]) loss_im = self.mse_loss(im[obj_mask], tim[obj_mask]) loss_re = self.mse_loss(re[obj_mask], tre[obj_mask]) loss_eular = loss_im + loss_re loss_conf_obj = self.bce_loss(pred_conf[obj_mask], tconf[obj_mask]) loss_conf_noobj = self.bce_loss(pred_conf[noobj_mask], tconf[noobj_mask]) loss_conf = self.obj_scale * loss_conf_obj + self.noobj_scale * loss_conf_noobj loss_cls = self.bce_loss(pred_cls[obj_mask], tcls[obj_mask]) total_loss = loss_x + loss_y + loss_w + loss_h + loss_eular + loss_conf + loss_cls # Metrics cls_acc = 100 * class_mask[obj_mask].mean() conf_obj = pred_conf[obj_mask].mean() conf_noobj = pred_conf[noobj_mask].mean() conf50 = (pred_conf > 0.5).float() iou50 = (iou_scores > 0.5).float() iou75 = (iou_scores > 0.75).float() detected_mask = conf50 * class_mask * tconf precision = torch.sum( iou50 * detected_mask) / (conf50.sum() + 1e-16) recall50 = torch.sum( iou50 * detected_mask) / (obj_mask.sum() + 1e-16) recall75 = torch.sum( iou75 * detected_mask) / (obj_mask.sum() + 1e-16) self.metrics = { "loss": to_cpu(total_loss).item(), "x": to_cpu(loss_x).item(), "y": to_cpu(loss_y).item(), "w": to_cpu(loss_w).item(), "h": to_cpu(loss_h).item(), "im": to_cpu(loss_im).item(), "re": to_cpu(loss_re).item(), "conf": to_cpu(loss_conf).item(), "cls": to_cpu(loss_cls).item(), "cls_acc": to_cpu(cls_acc).item(), "recall50": to_cpu(recall50).item(), "recall75": to_cpu(recall75).item(), "precision": to_cpu(precision).item(), "conf_obj": to_cpu(conf_obj).item(), "conf_noobj": to_cpu(conf_noobj).item(), "grid_size": grid_size, } return output, total_loss