def visualize_training(self, batched_inputs, results): """ A function used to visualize ground truth images and final network predictions. It shows ground truth bounding boxes on the original image and up to 20 predicted object bounding boxes on the original image. Args: batched_inputs (list): a list that contains input to the model. results (List[Instances]): a list of #images elements. """ from detectron2.utils.visualizer import Visualizer assert len(batched_inputs) == len( results), "Cannot visualize inputs and results of different sizes" storage = get_event_storage() max_boxes = 20 image_index = 0 # only visualize a single image img = batched_inputs[image_index]["image"].cpu().numpy() assert img.shape[0] == 3, "Images should have 3 channels." if self.input_format == "BGR": img = img[::-1, :, :] img = img.transpose(1, 2, 0) v_gt = Visualizer(img, None) v_gt = v_gt.overlay_instances( boxes=batched_inputs[image_index]["instances"].gt_boxes) anno_img = v_gt.get_image() processed_results = detector_postprocess(results[image_index], img.shape[0], img.shape[1]) predicted_boxes = processed_results.pred_boxes.tensor.detach().cpu( ).numpy() v_pred = Visualizer(img, None) v_pred = v_pred.overlay_instances(boxes=predicted_boxes[0:max_boxes]) prop_img = v_pred.get_image() vis_img = np.vstack((anno_img, prop_img)) vis_img = vis_img.transpose(2, 0, 1) vis_name = f"Top: GT bounding boxes; Bottom: {max_boxes} Highest Scoring Results" storage.put_image(vis_name, vis_img)
def _log_accuracy(self): """ Log the accuracy metrics to EventStorage. """ num_instances = self.gt_classes.numel() pred_classes = self.pred_class_logits.argmax(dim=1) bg_class_ind = self.pred_class_logits.shape[1] - 1 fg_inds = (self.gt_classes >= 0) & (self.gt_classes < bg_class_ind) num_fg = fg_inds.nonzero().numel() fg_gt_classes = self.gt_classes[fg_inds] fg_pred_classes = pred_classes[fg_inds] num_false_negative = (fg_pred_classes == bg_class_ind).nonzero().numel() num_accurate = (pred_classes == self.gt_classes).nonzero().numel() fg_num_accurate = (fg_pred_classes == fg_gt_classes).nonzero().numel() storage = get_event_storage() storage.put_scalar("fast_rcnn/cls_accuracy", num_accurate / num_instances) if num_fg > 0: storage.put_scalar("fast_rcnn/fg_cls_accuracy", fg_num_accurate / num_fg) storage.put_scalar("fast_rcnn/false_negative", num_false_negative / num_fg)
def losses(self): """ Return the losses from a set of RPN predictions and their associated ground-truth. Returns: dict[loss name -> loss value]: A dict mapping from loss name to loss value. Loss names are: `loss_rpn_cls` for objectness classification and `loss_rpn_loc` for proposal localization. """ gt_labels = torch.stack(self.gt_labels) anchors = self.anchors[0].cat(self.anchors).tensor # Ax(4 or 5) gt_anchor_deltas = [ self.box2box_transform.get_deltas(anchors, k) for k in self.gt_boxes ] gt_anchor_deltas = torch.stack(gt_anchor_deltas) # Log the number of positive/negative anchors per-image that's used in training num_pos_anchors = (gt_labels == 1).sum().item() num_neg_anchors = (gt_labels == 0).sum().item() storage = get_event_storage() storage.put_scalar("rpn/num_pos_anchors", num_pos_anchors / self.num_images) storage.put_scalar("rpn/num_neg_anchors", num_neg_anchors / self.num_images) objectness_loss, localization_loss = rpn_losses( gt_labels, gt_anchor_deltas, # concat on the Hi*Wi*A dimension cat(self.pred_objectness_logits, dim=1), cat(self.pred_anchor_deltas, dim=1), self.smooth_l1_beta, ) normalizer = self.batch_size_per_image * self.num_images return { "loss_rpn_cls": objectness_loss / normalizer, "loss_rpn_loc": localization_loss / normalizer, }
def __init__(self, images, pred_logits, pred_offsets, gt_sizes=None, strides=None): self.image_sizes = images.image_sizes self.pred_logits = pred_logits self.pred_offsets = pred_offsets self.pred_coordinates = [ offsets2coordinates(offset, images.tensor.shape[-2:]) for offset in pred_offsets ] device = self.pred_logits[0].device self.gt_sizes = torch.sqrt( torch.pow(gt_sizes.tensor.to(device), 2).sum(1)) self.strides = strides self.num_feature_maps = len(pred_logits) storage = get_event_storage() storage.put_image("sizes", self.gt_sizes[0:1] / 512)
def visualize_training(self, batched_inputs, proposals): """ A function used to visualize images and proposals. It shows ground truth bounding boxes on the original image and up to 20 predicted object proposals on the original image. Users can implement different visualization functions for different models. Args: batched_inputs (list): a list that contains input to the model. proposals (list): a list that contains predicted proposals. Both batched_inputs and proposals should have the same length. """ from detectron2.utils.visualizer import Visualizer storage = get_event_storage() max_vis_prop = 20 for input, prop in zip(batched_inputs, proposals): img = input["image"].cpu().numpy() assert img.shape[0] == 3, "Images should have 3 channels." if self.input_format == "BGR": img = img[::-1, :, :] img = img.transpose(1, 2, 0) v_gt = Visualizer(img, None) v_gt = v_gt.overlay_instances(boxes=input["instances"].gt_boxes) anno_img = v_gt.get_image() box_size = min(len(prop.proposal_boxes), max_vis_prop) v_pred = Visualizer(img, None) v_pred = v_pred.overlay_instances( boxes=prop.proposal_boxes[0:box_size].tensor.cpu().numpy() ) prop_img = v_pred.get_image() vis_img = np.concatenate((anno_img, prop_img), axis=1) vis_img = vis_img.transpose(2, 0, 1) vis_name = "Left: GT bounding boxes; Right: Predicted proposals" storage.put_image(vis_name, vis_img) break # only visualize one image in a batch
def forward(self, batched_inputs): if not self.training: return self.inference(batched_inputs) images = self.preprocess_image(batched_inputs) gt_instances = [x["instances"].to(self.device) for x in batched_inputs] for i in range(len(gt_instances)): dataset_source = batched_inputs[i]['dataset_source'] gt_instances[i]._dataset_source = dataset_source features = self.backbone(images.tensor) # #lvl proposals, proposal_losses = self.proposal_generator( images, features, gt_instances) _, detector_losses = self.roi_heads(images, features, proposals, gt_instances) if self.vis_period > 0: storage = get_event_storage() if storage.iter % self.vis_period == 0: self.visualize_training(batched_inputs, proposals) losses = {} losses.update(proposal_losses) losses.update(detector_losses) return losses
def get_clustering_loss(self, input_features, proposals): if not self.enable_clustering: return 0 storage = get_event_storage() c_loss = 0 if storage.iter == self.clustering_start_iter: items = self.feature_store.retrieve(-1) for index, item in enumerate(items): if len(item) == 0: self.means[index] = None else: mu = torch.tensor(item).mean(dim=0) self.means[index] = mu c_loss = self.clstr_loss_l2_cdist(input_features, proposals) # Freeze the parameters when clustering starts # for param in self.ae_model.parameters(): # param.requires_grad = False elif storage.iter > self.clustering_start_iter: if storage.iter % self.clustering_update_mu_iter == 0: # Compute new MUs items = self.feature_store.retrieve(-1) new_means = [None for _ in range(self.num_classes + 1)] for index, item in enumerate(items): if len(item) == 0: new_means[index] = None else: new_means[index] = torch.tensor(item).mean(dim=0) # Update the MUs for i, mean in enumerate(self.means): if(mean) is not None and new_means[i] is not None: self.means[i] = self.clustering_momentum * mean + \ (1 - self.clustering_momentum) * new_means[i] c_loss = self.clstr_loss_l2_cdist(input_features, proposals) return c_loss
def forward(self, batched_inputs: Tuple[Dict[str, Tensor]]): """ Args: batched_inputs: a list, batched outputs of :class:`DatasetMapper` . Each item in the list contains the inputs for one image. For now, each item in the list is a dict that contains: * image: Tensor, image in (C, H, W) format. * instances: Instances Other information that's included in the original dicts, such as: * "height", "width" (int): the output resolution of the model, used in inference. See :meth:`postprocess` for details. Returns: in training, dict[str: Tensor]: mapping from a named loss to a tensor storing the loss. Used during training only. in inference, the standard output format, described in :doc:`/tutorials/models`. """ images = self.preprocess_image(batched_inputs) features = self.backbone(images.tensor) features = [features[f] for f in self.head_in_features] anchors = self.anchor_generator(features) pred_logits, pred_anchor_deltas = self.head(features) # Transpose the Hi*Wi*A dimension to the middle: pred_logits = [ permute_to_N_HWA_K(x, self.num_classes) for x in pred_logits ] pred_anchor_deltas = [ permute_to_N_HWA_K(x, 4) for x in pred_anchor_deltas ] if self.training: assert not torch.jit.is_scripting(), "Not supported" assert "instances" in batched_inputs[ 0], "Instance annotations are missing in training!" gt_instances = [ x["instances"].to(self.device) for x in batched_inputs ] gt_labels, gt_boxes = self.label_anchors(anchors, gt_instances) losses = self.losses(anchors, pred_logits, gt_labels, pred_anchor_deltas, gt_boxes) if self.vis_period > 0: storage = get_event_storage() if storage.iter % self.vis_period == 0: results = self.inference(anchors, pred_logits, pred_anchor_deltas, images.image_sizes) self.visualize_training(batched_inputs, results) return losses else: results = self.inference(anchors, pred_logits, pred_anchor_deltas, images.image_sizes) if torch.jit.is_scripting(): return results processed_results = [] for results_per_image, input_per_image, image_size in zip( results, batched_inputs, images.image_sizes): height = input_per_image.get("height", image_size[0]) width = input_per_image.get("width", image_size[1]) r = detector_postprocess(results_per_image, height, width) processed_results.append({"instances": r}) return processed_results
def forward(self, images, features, gt_instances=None): """ Args: images (ImageList): input images of length `N` features (dict[str: Tensor]): input data as a mapping from feature map name to tensor. Axis 0 represents the number of images `N` in the input data; axes 1-3 are channels, height, and width, which may vary between feature maps (e.g., if a feature pyramid is used). gt_instances (list[Instances], optional): a length `N` list of `Instances`s. Each `Instances` stores ground-truth instances for the corresponding image. Returns: proposals: list[Instances]: contains fields "proposal_boxes", "objectness_logits" loss: dict[Tensor] or None """ features = [features[f] for f in self.in_features] pred_objectness_logits, pred_deltas = self.init_head(features) torch.cuda.synchronize() point_centers, strides = self.get_center_grid(features) point_centers = point_centers.to(pred_deltas[0].device) strides = strides.to(pred_deltas[0].device) pred_boxes = self.init_head.points2bbox(point_centers, pred_deltas) if self.training: # (N, H*W*L), (N, H*W*L, 4) gt_labels, gt_boxes = self.label_and_sample_points( point_centers, gt_instances, strides) else: gt_labels, gt_boxes = None, None outputs = RepPointsGeneratorResult( pred_objectness_logits, pred_boxes, gt_labels, gt_boxes ) if self.training: losses = {k: v * self.loss_weight for k, v in outputs.losses(strides).items()} else: losses = {} proposals = outputs.predict_proposals() logits = outputs.predict_objectness_logits() if self.debug: storage = get_event_storage() start = 0 for i, f in enumerate(features): h, w = f.shape[-2:] centers = point_centers[start:start + h * w].view(h, w, 2) stride = strides[start:start + h * w].view(h, w) storage.put_image("centers_x-%d" % i, (centers[..., 0:1] / centers[..., 0:1].max()).permute(2, 0, 1)) storage.put_image("centers_y-%d" % i, (centers[..., 1:] / centers[..., 1:].max()).permute(2, 0, 1)) storage.put_image("strides-%d" % i, (stride[None] / 64).float()) gt_label = gt_labels[0, start:start + h * w].view(1, h, w) storage.put_image("gt-labels-%d" % i, gt_label.float()) storage.put_image("pred-logits-%d" % i, torch.sigmoid(logits[i][0].view(1, h, w))) start += h * w # storage.clear_images() with torch.no_grad(): # Find the top proposals by applying NMS and removing boxes that # are too small. The proposals are treated as fixed for approximate # joint training with roi heads. This approach ignores the derivative # w.r.t. the proposal boxes’ coordinates that are also network # responses, so is approximate. proposals = find_top_rpn_proposals( proposals, logits, images, self.nms_thresh, self.pre_nms_topk[self.training], self.post_nms_topk[self.training], self.min_box_side_len, self.training, ) return proposals, losses
def losses(self, predictions, proposals, void_predictions, void_proposals, image_path=None, flips=None, use_exemplar=False): """ Args: predictions: return values of :meth:`forward()`. proposals (list[Instances]): proposals that match the features that were used to compute predictions. """ if utils.get_rank() == 0: storage = get_event_storage() storage.put_scalar( "exemplar/num_pseudo_gt", len(self.pseudo_gt) if self.pseudo_gt is not None else 0) scores, proposal_deltas, feature = predictions void_scores, _, void_feature = void_predictions if len(void_scores) > 0: neg_sample = void_scores storage = get_event_storage() storage.put_scalar("exemplar/num_neg_sample", len(neg_sample)) void_neg_loss = -torch.log( 1 - neg_sample.softmax(-1)[:, :self.num_classes - 1] + 1e-8) if len(void_neg_loss) > 0: void_neg_loss = void_neg_loss.sum() / len(void_neg_loss) else: void_neg_loss = void_neg_loss.sum() else: void_neg_loss = scores.sum() * 0 void_loss = {'loss_void_neg': void_neg_loss} if use_exemplar: a, b, c = void_predictions l = sum([len(x) for x in void_proposals[:-1]]) self.add_feature(predictions, proposals, (a[:l], b[:l], c[:l]), void_proposals[:-1], image_path[:-1], flips[:-1]) else: self.add_feature(predictions, proposals, void_predictions, void_proposals, image_path, flips) frcnn_outputs = FastRCNNOutputs( self.box2box_transform, scores, proposal_deltas, proposals, self.smooth_l1_beta, self.box_reg_loss_type, self.box_reg_loss_weight, self.label_converter, add_unlabeled_class=self.add_unlabeled_class, cls_weight=self.cls_weight.weight.view(-1), bg_class_ind=self.num_classes - 1) losses = frcnn_outputs.losses() self.step += 1 losses.update(void_loss) return losses
def add_exemplar(self, exemplar_info, void_features, void_proposals, image_path, flips, dir_name='pseudo_gts'): exemplar_features, exemplar_labels, exemplar_length = exemplar_info p = image_path[0].split('/')[-1].split('.')[0] templete = image_path[0].replace(p, '{:012}') if self.step % 100 == 0: # sync multi-gpus self.sync_pseudo_gt(templete, dir_name) if len(exemplar_features) == 0 or len(void_features) == 0: if utils.get_rank() == 0: storage = get_event_storage() storage.put_scalar("exemplar/add_exemplar", 0) return None boxes = [x.proposal_boxes.tensor for x in void_proposals] l = [len(b) for b in boxes] sizes = [x._image_size for x in void_proposals] cos = get_cos_sim(void_features, exemplar_features) th = max(0.01, self.cos_thresh - (0.01 * self.n_pseudo_gt / 200)) if float(cos.max()) < 1 - th: if utils.get_rank() == 0: storage = get_event_storage() storage.put_scalar("exemplar/add_exemplar", 0) return None cos = cos.split(l) data = [] cos_log = [] label_log = [] new_label = [ -torch.ones((len(x), ), device=cos[0].device) for x in void_proposals ] for i, (c, bbox, p, s) in enumerate(zip(cos, boxes, image_path, sizes)): H, W = s area = (bbox[:, 2] - bbox[:, 0]) * (bbox[:, 3] - bbox[:, 1]) ind = size_condition(area, self.size_opt) bbox = bbox[ind] nonzero_ind = ind.nonzero() if len(bbox) == 0: continue c = c[ind] score, ind = c.view(len(bbox), -1).max(dim=0) bbox = bbox[ind] cc = score labels = exemplar_labels nonzero_ind = nonzero_ind[ind] ind = cc > 1 - th cc = cc[ind] bbox = bbox[ind] nonzero_ind = nonzero_ind[ind] keep = nms(bbox, cc, self.nms_thresh) bbox = bbox[keep] cc = cc[keep] l = labels[keep] nonzero_ind = nonzero_ind[keep] bbox = bbox.div(torch.as_tensor([[W, H, W, H]], device=bbox.device)) if flips[i] == 1: bbox[:, 0] = 1 - bbox[:, 0] bbox[:, 2] = 1 - bbox[:, 2] bbox = torch.index_select( bbox, -1, torch.as_tensor([2, 1, 0, 3], device=bbox.device)) labels = l.view(-1, 1).float() new_label[i][nonzero_ind] = labels path = int(p.split('/')[-1].split('.')[0]) pa = torch.ones((len(bbox), 1), device=bbox.device) * path datum = torch.cat((pa, labels, bbox), dim=-1) data.append(datum) cos_log.append(cc) label_log.append(l) if len(data) > 0: dir_name = os.path.join(self.output_dir, dir_name) data = torch.cat(data) self.pseudo_gt = torch.cat((self.pseudo_gt, data)) if utils.get_rank() == 0: storage = get_event_storage() storage.put_scalar("exemplar/add_exemplar", len(data)) return new_label
def clustering(self, image_path=None): # sync data self.sync_pseudo_gt() feature = self.gather(self.feature_memory) obj_score = self.gather(self.obj_score_memory) paths = self.gather(self.path_memory) bbox = self.gather(self.bbox_memory) self.feature_memory = [] self.obj_score_memory = [] self.path_memory = [] self.bbox_memory = [] if utils.get_rank() == 0 and self.cls_weight.weight.sum() < len( self.cls_weight.weight): ids, centroid, var = clustering(feature, K=self.num_centroid, step=self.step, device=feature.device, tol=1e-3, Niter=150) count = torch.bincount(ids) mean_obj_score = torch.bincount( ids, weights=obj_score.to(ids.device)) / (count + 1e-6) # top 10 % dense clusters. dist_topk_bound = -torch.topk( -var.view(-1), k=min(len(mean_obj_score), 13)).values[-1] mask = var < dist_topk_bound # number of found unknown classes cls_weight = sum(self.cls_weight.weight) - self.num_classes # high objectness clusters. cluster_obj_thresh = min( self.cluster_obj_thresh * (1 + cls_weight / len(self.cls_weight.weight)), 0.99) obj_mask = mean_obj_score.to(mask.device) > cluster_obj_thresh mask = torch.logical_and(mask, obj_mask.to(mask.device)) mask = mask.bool().view(-1) ids = ids.long().view(-1) paths = paths[mask[ids]] bbox = bbox[mask[ids]] feature = feature[mask[ids]] obj_score = obj_score[mask[ids]] ids = ids[mask[ids]] centroid = centroid[mask] if len(obj_score) > 0: obj_thresh = min(self.coupled_obj_thresh, max(obj_score)) else: obj_thresh = self.coupled_obj_thresh obj_thresh = obj_thresh + (self.n_pseudo_gt * 0.01 / 100) obj_thresh = min(obj_thresh, 0.99) idx = obj_score >= obj_thresh bbox = bbox[idx] feature = feature[idx] paths = paths[idx] obj_score = obj_score[idx] ids = ids[idx] feats = [] boxes = [] ps = [] obj_scores = [] new_ids = [] cls_weight = sum(self.cls_weight.weight) - self.num_classes coupled_cos_thresh = self.coupled_cos_thresh * ( 1 - cls_weight / len(self.cls_weight.weight)) coupled_cos_thresh = max(coupled_cos_thresh, 0.01) for i, l in enumerate(sorted(ids.unique())): idx = ids == l feat = feature[idx] bb = bbox[idx] path = paths[idx] obj = obj_score[idx] cos_sim = get_cos_sim(feat, feat).view(-1) cos_dist = 1 - cos_sim idx = cos_dist.argsort() used = [] used_path = [] printer = cos_sim[idx] printer = printer[printer < 0.99999] # eliminate same element pairs for v in idx: x, y = v // len(feat), v % len(feat) if cos_dist[v] > coupled_cos_thresh: break if path[x] != path[y] and path[ x] not in used_path and path[y] not in used_path: used.append(x) used.append(y) used_path.append(path[x]) used_path.append(path[y]) if len(used) > 0: idx = torch.as_tensor(used, device=feat.device) temp_ids = torch.ones( (len(used), ), device=feat.device) * l feats.append(feat[idx]) boxes.append(bb[idx]) ps.append(path[idx]) obj_scores.append(obj[idx]) new_ids.append(temp_ids) if len(feats) > 0: feature = torch.cat(feats) bbox = torch.cat(boxes) paths = torch.cat(ps) obj_score = torch.cat(obj_scores) ids = torch.cat(new_ids) cls_weight = self.cls_weight.weight start_l = int(cls_weight.sum() ) + self.original_num_classes - self.num_classes labels = -ids - 1 unique_label = labels.unique() unique_label = unique_label[:cls_weight.shape[0] - int(cls_weight.sum())] for i, p in enumerate(unique_label): if i + start_l - self.original_num_classes == self.num_centroid: break labels[labels == p] = i + start_l idx = labels > 0 obj_score = obj_score[idx] labels = labels[idx] paths = paths[idx] feature = feature[idx] bbox = bbox[idx] data = torch.cat( (paths.unsqueeze(1), labels.unsqueeze(1).float(), bbox), dim=-1) else: data = torch.zeros((0, 6), device=feature.device) if image_path is not None and len(data) > 0: utils.save_boxes(data, feature.detach(), obj_score.detach(), image_path, self.pal, self.step, self.num_classes, self.output_dir) size = torch.as_tensor([len(data), len(centroid)], device=feature.device).float() storage = get_event_storage() storage.put_scalar("exemplar/obj_th", float(obj_thresh)) storage.put_scalar("exemplar/cluster_obj_th", float(cluster_obj_thresh)) storage.put_scalar("exemplar/sel_cluster", int(mask.sum())) storage.put_scalar("exemplar/coupled_cos_th", float(coupled_cos_thresh)) storage.put_scalar("exemplar/new", len(data)) else: size = torch.empty(size=(1, 2), device=feature.device) # gather if utils.get_world_size() > 1: torch.cuda.synchronize() dist.broadcast(size, 0) if utils.get_rank() > 0: data = torch.empty(size=(int(size[0, 0]), 6), device=feature.device) torch.cuda.synchronize() dist.broadcast(data, 0) l_cls = self.original_num_classes - 1 l_new = int(data[:, 1].max() - l_cls) if len(data) > 0 else 0 cls_weight = self.cls_weight.weight.data cls_weight[:self.num_classes + l_new] = 1 self.cls_weight.weight.data = cls_weight if self.pseudo_gt is None: self.pseudo_gt = data else: self.pseudo_gt = torch.cat((self.pseudo_gt, data)) self.n_pseudo_gt = len(self.pseudo_gt) # flush if utils.get_rank() == 0: try: torch.save( self.pseudo_gt.cpu(), os.path.join(self.output_dir, 'pseudo_gts/{}.pth'.format(self.step))) except: pass
def fcos_losses(self, instances): num_classes = instances.logits_pred.size(1) assert num_classes == self.num_classes labels = instances.labels.flatten() gt_object = instances.gt_inds pos_inds = torch.nonzero(labels != num_classes).squeeze(1) neg_inds = torch.nonzero(labels == num_classes).squeeze(1) num_pos_local = pos_inds.numel() num_gpus = get_world_size() total_num_pos = reduce_sum(pos_inds.new_tensor([num_pos_local])).item() num_pos_avg = max(total_num_pos / num_gpus, 1.0) # prepare one_hot class_target = torch.zeros_like(instances.logits_pred) class_target[pos_inds, labels[pos_inds]] = 1 class_loss = sigmoid_focal_loss_jit( instances.logits_pred, class_target, alpha=self.focal_loss_alpha, gamma=self.focal_loss_gamma, reduction="none", ) #/ num_pos_avg positive_diff = ( 1 - instances.logits_pred[class_target == 1].sigmoid()).abs() negative_diff = ( 0 - instances.logits_pred[class_target == 0].sigmoid()).abs() positive_mean = positive_diff.mean().detach() positive_std = positive_diff.std().detach() negative_mean = negative_diff.mean().detach() negative_std = negative_diff.std().detach() upper_true_loss = class_loss.flatten()[(class_target == 1).flatten()][ (positive_diff > (positive_mean + positive_std))].sum() / num_pos_avg under_true_loss = class_loss.flatten()[(class_target == 1).flatten()][ (positive_diff <= (positive_mean + positive_std))].sum() / num_pos_avg upper_false_loss = class_loss.flatten()[(class_target == 0).flatten()][ (negative_diff > (negative_mean + negative_std))].sum() / num_pos_avg under_false_loss = class_loss.flatten()[(class_target == 0).flatten()][ (negative_diff <= (negative_mean + negative_std))].sum() / num_pos_avg storage = get_event_storage() if storage.iter % 20 == 0: logger.info( "upper_true {}, under_true {} upper_false {} under_false {}". format((positive_diff > positive_mean + positive_std).sum(), (positive_diff <= positive_mean + positive_std).sum(), (negative_diff > negative_mean + negative_std).sum(), (negative_diff <= negative_mean + negative_std).sum())) instances = instances[pos_inds] instances.pos_inds = pos_inds #assert (instances.gt_inds.unique() != gt_object.unique()).sum() == 0 ctrness_targets = compute_ctrness_targets(instances.reg_targets) ctrness_targets_sum = ctrness_targets.sum() loss_denorm = max( reduce_sum(ctrness_targets_sum).item() / num_gpus, 1e-6) instances.gt_ctrs = ctrness_targets if pos_inds.numel() > 0: reg_loss = self.loc_loss_func(instances.reg_pred, instances.reg_targets, ctrness_targets) / loss_denorm ctrness_loss = torch.nn.MSELoss(reduction="sum")( instances.ctrness_pred.sigmoid(), ctrness_targets) / num_pos_avg else: reg_loss = instances.reg_pred.sum() * 0 ctrness_loss = instances.ctrness_pred.sum() * 0 losses = { "loss_upper_true_cls": upper_true_loss, "loss_under_true_cls": under_true_loss, "loss_upper_false_cls": upper_false_loss, "loss_under_false_cls": under_false_loss, "loss_fcos_loc": reg_loss, "loss_fcos_ctr": ctrness_loss, #"loss_negative_identity_mean": negative_identity_mean_loss, #"loss_negative_identity_std": negative_identity_std_loss, #"loss_positive_identity": positive_identity_loss, } extras = {"instances": instances, "loss_denorm": loss_denorm} return extras, losses
def losses( self, anchors: List[Boxes], pred_objectness_logits: List[torch.Tensor], gt_labels: List[torch.Tensor], pred_anchor_deltas: List[torch.Tensor], gt_boxes: List[torch.Tensor], ) -> Dict[str, torch.Tensor]: """ Return the losses from a set of RPN predictions and their associated ground-truth. Args: anchors (list[Boxes or RotatedBoxes]): anchors for each feature map, each has shape (Hi*Wi*A, B), where B is box dimension (4 or 5). pred_objectness_logits (list[Tensor]): A list of L elements. Element i is a tensor of shape (N, Hi*Wi*A) representing the predicted objectness logits for all anchors. gt_labels (list[Tensor]): Output of :meth:`label_and_sample_anchors`. pred_anchor_deltas (list[Tensor]): A list of L elements. Element i is a tensor of shape (N, Hi*Wi*A, 4 or 5) representing the predicted "deltas" used to transform anchors to proposals. gt_boxes (list[Tensor]): Output of :meth:`label_and_sample_anchors`. Returns: dict[loss name -> loss value]: A dict mapping from loss name to loss value. Loss names are: `loss_rpn_cls` for objectness classification and `loss_rpn_loc` for proposal localization. """ num_images = len(gt_labels) gt_labels = torch.stack(gt_labels) # (N, sum(Hi*Wi*Ai)) # Log the number of positive/negative anchors per-image that's used in training pos_mask = gt_labels == 1 num_pos_anchors = pos_mask.sum().item() num_neg_anchors = (gt_labels == 0).sum().item() storage = get_event_storage() storage.put_scalar("rpn/num_pos_anchors", num_pos_anchors / num_images) storage.put_scalar("rpn/num_neg_anchors", num_neg_anchors / num_images) if self.box_reg_loss_type == "smooth_l1": anchors = type(anchors[0]).cat(anchors).tensor # Ax(4 or 5) gt_anchor_deltas = [ self.box2box_transform.get_deltas(anchors, k) for k in gt_boxes ] gt_anchor_deltas = torch.stack( gt_anchor_deltas) # (N, sum(Hi*Wi*Ai), 4 or 5) localization_loss = smooth_l1_loss( cat(pred_anchor_deltas, dim=1)[pos_mask], gt_anchor_deltas[pos_mask], self.smooth_l1_beta, reduction="sum", ) elif self.box_reg_loss_type == "giou": pred_proposals = self._decode_proposals(anchors, pred_anchor_deltas) pred_proposals = cat(pred_proposals, dim=1) pred_proposals = pred_proposals.view(-1, pred_proposals.shape[-1]) pos_mask = pos_mask.view(-1) localization_loss = giou_loss(pred_proposals[pos_mask], cat(gt_boxes)[pos_mask], reduction="sum") elif self.box_reg_loss_type == "diou": anchors = type(anchors[0]).cat(anchors).tensor # Ax(4 or 5) gt_anchor_deltas = [ self.box2box_transform.get_deltas(anchors, k) for k in gt_boxes ] gt_anchor_deltas = torch.stack( gt_anchor_deltas) # (N, sum(Hi*Wi*Ai), 4 or 5) localization_loss = compute_diou( cat(pred_anchor_deltas, dim=1)[pos_mask], gt_anchor_deltas[pos_mask], self.box2box_transform.weights, self.box2box_transform.scale_clamp) # elif self.box_reg_loss_type == "diou_bbox": # pred_proposals = self._decode_proposals(anchors, pred_anchor_deltas) # pred_proposals = cat(pred_proposals, dim=1) # pred_proposals = pred_proposals.view(-1, pred_proposals.shape[-1]) # pos_mask = pos_mask.view(-1) # localization_loss = giou_loss( # pred_proposals[pos_mask], cat(gt_boxes)[pos_mask] # ) elif self.box_reg_loss_type == "diou_mmdet": pred_proposals = self._decode_proposals(anchors, pred_anchor_deltas) pred_proposals = cat(pred_proposals, dim=1) pred_proposals = pred_proposals.view(-1, pred_proposals.shape[-1]) pos_mask = pos_mask.view(-1) localization_loss = compute_diou_mmdet(pred_proposals[pos_mask], cat(gt_boxes)[pos_mask]) elif self.box_reg_loss_type == "ciou_mmdet": pred_proposals = self._decode_proposals(anchors, pred_anchor_deltas) pred_proposals = cat(pred_proposals, dim=1) pred_proposals = pred_proposals.view(-1, pred_proposals.shape[-1]) pos_mask = pos_mask.view(-1) localization_loss = compute_ciou_mmdet(pred_proposals[pos_mask], cat(gt_boxes)[pos_mask]) else: raise ValueError( f"Invalid rpn box reg loss type '{self.box_reg_loss_type}'") valid_mask = gt_labels >= 0 objectness_loss = F.binary_cross_entropy_with_logits( cat(pred_objectness_logits, dim=1)[valid_mask], gt_labels[valid_mask].to(torch.float32), reduction="sum", ) normalizer = self.batch_size_per_image * num_images losses = { "loss_rpn_cls": objectness_loss / normalizer, "loss_rpn_loc": localization_loss / normalizer, } losses = { k: v * self.loss_weight.get(k, 1.0) for k, v in losses.items() } return losses
def _forward_box( self, features: Dict[str, torch.Tensor], proposals: List[Instances] ) -> Union[Dict[str, torch.Tensor], List[Instances]]: """ Forward logic of the box prediction branch. If `self.train_on_pred_boxes is True`, the function puts predicted boxes in the `proposal_boxes` field of `proposals` argument. Args: features (dict[str, Tensor]): mapping from feature map names to tensor. Same as in :meth:`ROIHeads.forward`. proposals (list[Instances]): the per-image object proposals with their matching ground truth. Each has fields "proposal_boxes", and "objectness_logits", "gt_classes", "gt_boxes". Returns: In training, a dict of losses. In inference, a list of `Instances`, the predicted instances. """ features = [features[f] for f in self.box_in_features] box_features = self.box_pooler(features, [x.proposal_boxes for x in proposals]) objectness_logits = torch.cat([x.objectness_logits + 1 for x in proposals], dim=0) box_features = box_features * objectness_logits.view(-1, 1, 1, 1) if self.training: storage = get_event_storage() storage.put_scalar("proposals/objectness_logits+1 mean", objectness_logits.mean()) storage.put_scalar("proposals/objectness_logits+1 max", objectness_logits.max()) storage.put_scalar("proposals/objectness_logits+1 min", objectness_logits.min()) # torch.cuda.empty_cache() box_features = self.box_head(box_features) predictions = self.box_predictor(box_features, proposals) # del box_features if self.training: losses = self.box_predictor.losses(predictions, proposals, self.gt_classes_img_oh) self.pred_class_img_logits = ( self.box_predictor.predict_probs_img(predictions, proposals).clone().detach() ) prev_pred_scores = predictions[0].detach() prev_pred_boxes = [p.proposal_boxes for p in proposals] for k in range(self.refine_K): suffix = "_r" + str(k) targets, target_weights = self.get_pgt( prev_pred_boxes, prev_pred_scores, proposals, suffix ) proposal_append_gt = self.proposal_append_gt self.proposal_append_gt = False proposals_k, matched_idxs = self.label_and_sample_proposals( proposals, targets, ret_MI=True, suffix=suffix ) self.proposal_append_gt = proposal_append_gt proposal_weights = torch.cat( [ torch.index_select(target_weight, 0, matched_idx) for target_weight, matched_idx in zip(target_weights, matched_idxs) ], dim=0, ) predictions_k = self.box_refinery[k](box_features) losses_k = self.box_refinery[k].losses(predictions_k, proposals_k, proposal_weights) prev_pred_scores = self.box_refinery[k].predict_probs(predictions_k, proposals_k) prev_pred_boxes = self.box_refinery[k].predict_boxes(predictions_k, proposals_k) prev_pred_scores = [ prev_pred_score.detach() for prev_pred_score in prev_pred_scores ] prev_pred_boxes = [prev_pred_box.detach() for prev_pred_box in prev_pred_boxes] losses.update(losses_k) # proposals is modified in-place below, so losses must be computed first. if self.train_on_pred_boxes: with torch.no_grad(): pred_boxes = self.box_predictor.predict_boxes_for_gt_classes( predictions, proposals ) for proposals_per_image, pred_boxes_per_image in zip(proposals, pred_boxes): proposals_per_image.proposal_boxes = Boxes(pred_boxes_per_image) return losses else: if self.refine_reg[-1]: predictions_k = self.box_refinery[-1](box_features) pred_instances, _, all_scores, all_boxes = self.box_refinery[-1].inference( predictions_k, proposals ) else: predictions_K = [] for k in range(self.refine_K): predictions_k = self.box_refinery[k](box_features) predictions_K.append(predictions_k) pred_instances, _, all_scores, all_boxes = self.box_refinery[-1].inference( predictions_K, proposals ) return pred_instances, all_scores, all_boxes
def test(cls, cfg, model, mapper_object, evaluators=None): """ Args: cfg (CfgNode): model (nn.Module): evaluators (list[DatasetEvaluator] or None): if None, will call :meth:`build_evaluator`. Otherwise, must have the same length as `cfg.DATASETS.TEST`. Returns: dict: a dict of result metrics """ logger = logging.getLogger(__name__) if isinstance(evaluators, DatasetEvaluator): evaluators = [evaluators] if evaluators is not None: assert len( cfg.DATASETS.TEST) == len(evaluators), "{} != {}".format( len(cfg.DATASETS.TEST), len(evaluators)) results = OrderedDict() for idx, dataset_name in enumerate(cfg.DATASETS.TEST): # if(not isTrackAccuracy): # break data_loader = cls.build_test_loader(cfg, dataset_name, mapper_object) # When evaluators are passed in as arguments, # implicitly assume that evaluators can be created before data_loader. if evaluators is not None: evaluator = evaluators[idx] else: try: evaluator = cls.build_evaluator(cfg, dataset_name) except NotImplementedError: logger.warn( "No evaluator found. Use `DefaultTrainer.test(evaluators=)`, " "or implement its `build_evaluator` method.") results[dataset_name] = {} continue # if(True): return results # return results results_i = evaluate.inference_on_dataset(model, data_loader, evaluator) accuracy_test = round((results_i["accuracy"] * 100), 2) storage = get_event_storage() storage.put_scalar("accuracy_" + dataset_name, accuracy_test, smoothing_hint=False) results[dataset_name] = results_i if comm.is_main_process(): assert isinstance( results_i, dict ), "Evaluator must return a dict on the main process. Got {} instead.".format( results_i) logger.info("Evaluation results for {} in csv format:".format( dataset_name)) print_csv_format(results_i) if len(results) == 1: results = list(results.values())[0] return results
def losses( self, anchors: List[Boxes], pred_objectness_logits: List[torch.Tensor], gt_labels: List[torch.Tensor], pred_anchor_deltas: List[torch.Tensor], gt_boxes: List[torch.Tensor], ) -> Dict[str, torch.Tensor]: """ Return the losses from a set of RPN predictions and their associated ground-truth. Args: anchors (list[Boxes or RotatedBoxes]): anchors for each feature map, each has shape (Hi*Wi*A, B), where B is box dimension (4 or 5). pred_objectness_logits (list[Tensor]): A list of L elements. Element i is a tensor of shape (N, Hi*Wi*A) representing the predicted objectness logits for all anchors. gt_labels (list[Tensor]): Output of :meth:`label_and_sample_anchors`. pred_anchor_deltas (list[Tensor]): A list of L elements. Element i is a tensor of shape (N, Hi*Wi*A, 4 or 5) representing the predicted "deltas" used to transform anchors to proposals. gt_boxes (list[Tensor]): Output of :meth:`label_and_sample_anchors`. Returns: dict[loss name -> loss value]: A dict mapping from loss name to loss value. Loss names are: `loss_rpn_cls` for objectness classification and `loss_rpn_loc` for proposal localization. """ num_images = len(gt_labels) gt_labels = torch.stack(gt_labels) # (N, sum(Hi*Wi*Ai)) # Log the number of positive/negative anchors per-image that's used in training pos_mask = gt_labels == 1 num_pos_anchors = pos_mask.sum().item() num_neg_anchors = (gt_labels == 0).sum().item() storage = get_event_storage() storage.put_scalar("rpn/num_pos_anchors", num_pos_anchors / num_images) storage.put_scalar("rpn/num_neg_anchors", num_neg_anchors / num_images) localization_loss = _dense_box_regression_loss( anchors, self.box2box_transform, pred_anchor_deltas, gt_boxes, pos_mask, box_reg_loss_type=self.box_reg_loss_type, smooth_l1_beta=self.smooth_l1_beta, ) valid_mask = gt_labels >= 0 objectness_loss = F.binary_cross_entropy_with_logits( cat(pred_objectness_logits, dim=1)[valid_mask], gt_labels[valid_mask].to(torch.float32), reduction="sum", ) normalizer = self.batch_size_per_image * num_images losses = { "loss_rpn_cls": objectness_loss / normalizer, "loss_rpn_loc": localization_loss / normalizer, } losses = { k: v * self.loss_weight.get(k, 1.0) for k, v in losses.items() } return losses