def forward(self, outputs, targets): """ This performs the loss computation. Parameters: outputs: dict of tensors, see the output specification of the model for the format targets: list of dicts, such that len(targets) == batch_size. The expected keys in each dict depends on the losses applied, see each loss' doc """ outputs_without_aux = {k: v for k, v in outputs.items() if k !=\ 'aux_outputs'} indices = self.matcher(outputs_without_aux, targets) num_boxes = sum(len(t['labels']) for t in targets) num_boxes = paddle.to_tensor([num_boxes], dtype=torch.float, device =next(iter(outputs.values())).device) if is_dist_avail_and_initialized(): torch2paddle.all_reduce(num_boxes) num_boxes = paddle.clip(num_boxes / get_world_size(), min=1).item() losses = {} for loss in self.losses: losses.update(self.get_loss(loss, outputs, targets, indices, num_boxes)) if 'aux_outputs' in outputs: for i, aux_outputs in enumerate(outputs['aux_outputs']): indices = self.matcher(aux_outputs, targets) for loss in self.losses: if loss == 'masks': continue kwargs = {} if loss == 'labels': kwargs = {'log': False} l_dict = self.get_loss(loss, aux_outputs, targets, indices, num_boxes, **kwargs) l_dict = {(k + f'_{i}'): v for k, v in l_dict.items()} losses.update(l_dict) return losses
def forward(self, outputs, targets): """ This performs the loss computation. Parameters: outputs: dict of tensors, see the output specification of the model for the format targets: list of dicts, such that len(targets) == batch_size. The expected keys in each dict depends on the losses applied, see each loss' doc """ # Compute the average number of target boxes accross all nodes, for normalization purposes # TODO: this is a reserved function fro a negative sample training to improve the robustness like DasiamRPN num_boxes = sum(t['valid'].item() for t in targets) # print("num of valid boxes: {}".format(num_boxes)) # debug num_boxes = torch.as_tensor([num_boxes], dtype=torch.float, device=next(iter(outputs.values())).device) if is_dist_avail_and_initialized(): torch.distributed.all_reduce(num_boxes) num_boxes = torch.clamp(num_boxes / get_world_size(), min=1).item() # Compute all the requested losses losses = {} for loss in self.losses: losses.update(loss(outputs, targets, num_boxes)) # In case of auxiliary losses, we repeat this process with the output of each intermediate layer. if 'aux_outputs' in outputs: for i, aux_outputs in enumerate(outputs['aux_outputs']): for loss in self.losses: l_dict = loss(aux_outputs, targets, num_boxes) l_dict = {k + f'_{i}': v for k, v in l_dict.items()} losses.update(l_dict) return losses
def forward(self, outputs, targets): """ This performs the loss computation. Parameters: outputs: dict of tensors, see the output specification of the model for the format targets: list of dicts, such that len(targets) == batch_size. The expected keys in each dict depends on the losses applied, see each loss' doc """ outputs_without_aux = {k: v for k, v in outputs.items() if k != 'aux_outputs' and k != 'enc_outputs'} # Retrieve the matching between the outputs of the last layer and the targets indices = self.matcher(outputs_without_aux, targets) # Compute the average number of target boxes accross all nodes, for normalization purposes num_boxes = sum(len(t["labels"]) for t in targets) num_boxes = torch.as_tensor([num_boxes], dtype=torch.float, device=next(iter(outputs.values())).device) if is_dist_avail_and_initialized(): torch.distributed.all_reduce(num_boxes) num_boxes = torch.clamp(num_boxes / get_world_size(), min=1).item() # Compute all the requested losses losses = {} for loss in self.losses: kwargs = {} losses.update(self.get_loss(loss, outputs, targets, indices, num_boxes, **kwargs)) # In case of auxiliary losses, we repeat this process with the output of each intermediate layer. if 'aux_outputs' in outputs: for i, aux_outputs in enumerate(outputs['aux_outputs']): indices = self.matcher(aux_outputs, targets) for loss in self.losses: if loss == 'masks': # Intermediate masks losses are too costly to compute, we ignore them. continue kwargs = {} if loss == 'labels': # Logging is enabled only for the last layer kwargs['log'] = False l_dict = self.get_loss(loss, aux_outputs, targets, indices, num_boxes, **kwargs) l_dict = {k + f'_{i}': v for k, v in l_dict.items()} losses.update(l_dict) if 'enc_outputs' in outputs: enc_outputs = outputs['enc_outputs'] bin_targets = copy.deepcopy(targets) for bt in bin_targets: bt['labels'] = torch.zeros_like(bt['labels']) indices = self.matcher(enc_outputs, bin_targets) for loss in self.losses: if loss == 'masks': # Intermediate masks losses are too costly to compute, we ignore them. continue kwargs = {} if loss == 'labels': # Logging is enabled only for the last layer kwargs['log'] = False l_dict = self.get_loss(loss, enc_outputs, bin_targets, indices, num_boxes, **kwargs) l_dict = {k + f'_enc': v for k, v in l_dict.items()} losses.update(l_dict) return losses
def get_num_boxes(self, num_samples): num_boxes = torch.as_tensor(num_samples, dtype=torch.float, device=self.sample_device) if is_dist_avail_and_initialized(): torch.distributed.all_reduce(num_boxes) num_boxes = torch.clamp(num_boxes / get_world_size(), min=1).item() return num_boxes
def forward(self, outputs): """ This performs the loss computation. Parameters: outputs: dict of tensors, see the output specification of the model for the format """ # Since we are doing meta-learning over our constructed meta-tasks, the targets for these meta-tasks are # stored in outputs['meta_targets']. We dont use original targets. targets = outputs['meta_targets'] outputs_without_aux = { k: v for k, v in outputs.items() if k != 'aux_outputs' and k != 'enc_outputs' } # Retrieve the matching between the outputs of the last layer and the targets indices = self.matcher(outputs_without_aux, targets) # Compute the average number of target boxes accross all nodes, for normalization purposes num_boxes = sum(len(t["labels"]) for t in targets) num_boxes = torch.as_tensor([num_boxes], dtype=torch.float, device=next(iter(outputs.values())).device) if is_dist_avail_and_initialized(): torch.distributed.all_reduce(num_boxes) num_boxes = torch.clamp(num_boxes / get_world_size(), min=1).item() # Compute all the requested losses losses = {} for loss in self.losses: kwargs = {} losses.update( self.get_loss(loss, outputs, targets, indices, num_boxes, **kwargs)) # In case of auxiliary losses, we repeat this process with the output of each intermediate layer. if 'aux_outputs' in outputs: for i, aux_outputs in enumerate(outputs['aux_outputs']): indices = self.matcher(aux_outputs, targets) for loss in self.losses: if loss == 'category_codes_cls': # meta-attention cls loss not for aux_outputs continue kwargs = {} if loss == 'labels': # Logging is enabled only for the last layer kwargs['log'] = False l_dict = self.get_loss(loss, aux_outputs, targets, indices, num_boxes, **kwargs) l_dict = {k + f'_{i}': v for k, v in l_dict.items()} losses.update(l_dict) return losses
def forward(self, outputs, targets): """Loss computation. Args: outputs (dict): Dict of RTD outputs, which are tensors. targets (dict): List of dicts, such that len(targets) == batch_size. The expected keys in each dict depends on the losses applied. Returns: losses (dict): Dict of losses. """ outputs_without_aux = { k: v for k, v in outputs.items() if k != 'aux_outputs' } # Retrieve the matching between the outputs of the last layer and the targets indices = self.matcher(outputs_without_aux, targets) # Compute the average number of target boxes accross all nodes, for normalization purposes num_boxes = sum(len(t['labels']) for t in targets) num_boxes = torch.as_tensor([num_boxes], dtype=torch.float, device=next(iter(outputs.values())).device) if is_dist_avail_and_initialized(): torch.distributed.all_reduce(num_boxes) num_boxes = torch.clamp(num_boxes / get_world_size(), min=1).item() # Compute all the requested losses losses = {} for loss in self.losses: losses.update( self.get_loss(loss, outputs, targets, indices, num_boxes)) # In case of auxiliary losses, we repeat this process with the output of each intermediate layer. if 'aux_outputs' in outputs and 'iou' not in self.losses: for i, aux_outputs in enumerate(outputs['aux_outputs']): indices = self.matcher(aux_outputs, targets) for loss in self.losses: if loss == 'masks': # Intermediate masks losses are too costly to compute, we ignore them. continue kwargs = {} if loss == 'labels': # Logging is enabled only for the last layer kwargs = {'log': False} l_dict = self.get_loss(loss, aux_outputs, targets, indices, num_boxes, **kwargs) l_dict = {k + f'_{i}': v for k, v in l_dict.items()} losses.update(l_dict) return losses
def forward(self, outputs, targets, origin_indices=None): """ This performs the loss computation. Parameters: outputs: dict of tensors, see the output specification of the model for the format targets: list of dicts, such that len(targets) == batch_size. The expected keys in each dict depends on the losses applied, see each loss' doc """ outputs_without_aux = {k: v for k, v in outputs.items() if k != 'aux_outputs'} origin_indices = self.matcher(outputs_without_aux, targets) num_items = sum(len(t["labels"]) for t in targets) num_items = torch.as_tensor([num_items], dtype=torch.float, device=next(iter(outputs.values())).device) if is_dist_avail_and_initialized(): torch.distributed.all_reduce(num_items) num_items = torch.clamp(num_items / get_world_size(), min=1).item() # Compute all the requested losses losses = {} for loss in self.losses: losses.update(self.get_loss(loss, outputs, targets, num_items, origin_indices=origin_indices)) # In case of auxiliary losses, we repeat this process with the output of each intermediate layer. aux_name = 'aux_outputs' if aux_name in outputs: for i, aux_outputs in enumerate(outputs[aux_name]): origin_indices = self.matcher(aux_outputs, targets) for loss in self.losses: kwargs = {} if loss == 'labels': # Logging is enabled only for the last layer kwargs = {'log': False} l_dict = self.get_loss(loss, aux_outputs, targets, num_items, origin_indices=origin_indices, **kwargs) l_dict = {k + f'_{i}': v for k, v in l_dict.items()} losses.update(l_dict) return losses
def forward(self, outputs, targets): outputs_without_aux = { k: v for k, v in outputs.items() if k != 'aux_outputs' } # Retrieve the matching between the outputs of the last layer and the targets indices = self.matcher(outputs_without_aux, targets) num_interactions = sum(len(t['obj_labels']) for t in targets) num_interactions = torch.as_tensor([num_interactions], dtype=torch.float, device=next(iter( outputs.values())).device) if is_dist_avail_and_initialized(): torch.distributed.all_reduce(num_interactions) num_interactions = torch.clamp(num_interactions / get_world_size(), min=1).item() # Compute all the requested losses losses = {} for loss in self.losses: losses.update( self.get_loss(loss, outputs, targets, indices, num_interactions)) # In case of auxiliary losses, we repeat this process with the output of each intermediate layer. if 'aux_outputs' in outputs: for i, aux_outputs in enumerate(outputs['aux_outputs']): indices = self.matcher(aux_outputs, targets) for loss in self.losses: kwargs = {} if loss == 'obj_labels': # Logging is enabled only for the last layer kwargs = {'log': False} l_dict = self.get_loss(loss, aux_outputs, targets, indices, num_interactions, **kwargs) l_dict = {k + f'_{i}': v for k, v in l_dict.items()} losses.update(l_dict) return losses
def forward(self, outputs, targets): """ This performs the loss computation. Parameters: outputs: dict of tensors, see the output specification of the model for the format targets: list of dicts, such that len(targets) == batch_size. The expected keys in each dict depends on the losses applied, see each loss' doc """ # 接下来看下前向过程,了解下loss的计算过程。这里一定要先搞清楚模型输出(outputs)和GT(targets)的形式,对于outputs可参考下列的注释; # 而targets是一个包含多个dict的list,长度与batch size相等,其中每个dict的形式如同COCO数据集的标注, # outputs是DETR模型的输出,是一个dict,形式如下: # {'pred_logits':(b, num_queries=100, num_classes), # 'pred_boxes':(b, num_queries=100, 4), # 'aux_outputs':[{'pred_logits':..,'pred_boxes':...}, {...}, ...]} # 过滤掉中间层的输出,只保留最后一层的预测结果 outputs_without_aux = { k: v for k, v in outputs.items() if k != 'aux_outputs' } # Retrieve the matching between the outputs of the last layer and the targets # 计算loss的一个关键前置步骤就是将模型输出的预测结果与GT进行匹配,对应下面self.matcher()部分,返回的indices的形式在下面注释中说明。 # 将预测结果与GT匹配,indices是一个包含多个元组的list,长度与batch size相等,每个元组为(index_i,index_j),前者是匹配的预测索引, # 后者是GT索引,并且len(index_i)=len(index_j)=min(num_queries,num_targets_in_image) indices = self.matcher(outputs_without_aux, targets) # Compute the average number of target boxes accross all nodes, for normalization purposes # 计算这个batch的图像中目标物体的数量,在所有分布式节点之间同步 num_boxes = sum(len(t["labels"]) for t in targets) num_boxes = torch.as_tensor([num_boxes], dtype=torch.float, device=next(iter(outputs.values())).device) if is_dist_avail_and_initialized(): torch.distributed.all_reduce(num_boxes) num_boxes = torch.clamp(num_boxes / get_world_size(), min=1).item() # Compute all the requested losses # 接下来是计算各种类型的loss,并将对应结果存到一个dict中(下面的losses变量),self.get_loss()方法返回loss计算结果。 losses = {} for loss in self.losses: # 计算特定类型的loss(这里的loss变量是字符串:'labels','boxes','cardinality','masks',表示loss类型), # get_loss方法中并不涉及具体loss的计算,其仅仅是将不同类型的loss计算映射到对应的方法,最后将计算结果返回。 losses.update( self.get_loss(loss, outputs, targets, indices, num_boxes)) # In case of auxiliary losses, we repeat this process with the output of each intermediate layer. # 若模型输出包含了中间层输出,则一并计算对应的loss if 'aux_outputs' in outputs: for i, aux_outputs in enumerate(outputs['aux_outputs']): indices = self.matcher(aux_outputs, targets) for loss in self.losses: if loss == 'masks': # Intermediate masks losses are too costly to compute, we ignore them. continue kwargs = {} if loss == 'labels': # Logging is enabled only for the last layer kwargs = {'log': False} l_dict = self.get_loss(loss, aux_outputs, targets, indices, num_boxes, **kwargs) l_dict = {k + f'_{i}': v for k, v in l_dict.items()} losses.update(l_dict) return losses
def forward(self, outputs, targets, indices_track=None, track_on=False): """ This performs the loss computation. Parameters: outputs: dict of tensors, see the output specification of the model for the format targets: list of dicts, such that len(targets) == batch_size. The expected keys in each dict depends on the losses applied, see each loss' doc """ outputs_without_aux = { k: v for k, v in outputs.items() if k != 'aux_outputs' } if track_on: track_exists = "pred_tracks" in outputs_without_aux.keys() assert track_exists is True # Track Match. indices_track, targets = self.track_matcher( outputs_without_aux, targets, indices_track=indices_track) # Compute the average number of target boxes accross all nodes, for normalization purposes num_boxes = sum(len(t["labels"]) for t in targets) num_boxes = torch.as_tensor([num_boxes], dtype=torch.float, device=next(iter( outputs.values())).device) if is_dist_avail_and_initialized(): torch.distributed.all_reduce(num_boxes) num_boxes = torch.clamp(num_boxes / get_world_size(), min=1).item() # Compute all the requested losses losses = {} losses_type = self.losses + ['tracks'] for loss in losses_type: losses.update( self.get_loss(loss, outputs, targets, indices_track, num_boxes)) # In case of auxiliary losses, we repeat this process with the output of each intermediate layer. if 'aux_outputs' in outputs: for i, aux_outputs in enumerate(outputs['aux_outputs']): assert track_exists is True for loss in losses_type: if loss == 'masks': # Intermediate masks losses are too costly to compute, we ignore them. continue kwargs = {} if loss == 'labels': # Logging is enabled only for the last layer kwargs = {'log': False} # we use the default matcher in tracking target. l_dict = self.get_loss(loss, aux_outputs, targets, indices_track, num_boxes, **kwargs) l_dict = {k + f'_{i}': v for k, v in l_dict.items()} losses.update(l_dict) else: track_exists = "pred_tracks" in outputs_without_aux.keys() if track_exists: outputs_without_aux.pop("pred_tracks") # Retrieve the matching between the outputs of the last layer and the targets indices_track = self.matcher(outputs_without_aux, targets) # Compute the average number of target boxes accross all nodes, for normalization purposes num_boxes = sum(len(t["labels"]) for t in targets) num_boxes = torch.as_tensor([num_boxes], dtype=torch.float, device=next(iter( outputs.values())).device) if is_dist_avail_and_initialized(): torch.distributed.all_reduce(num_boxes) num_boxes = torch.clamp(num_boxes / get_world_size(), min=1).item() # Compute all the requested losses losses = {} for loss in self.losses: losses.update( self.get_loss(loss, outputs, targets, indices_track, num_boxes)) # In case of auxiliary losses, we repeat this process with the output of each intermediate layer. if 'aux_outputs' in outputs: for i, aux_outputs in enumerate(outputs['aux_outputs']): if track_exists: aux_outputs.pop("pred_tracks") indices = self.matcher(aux_outputs, targets) for loss in self.losses: if loss == 'masks': # Intermediate masks losses are too costly to compute, we ignore them. continue kwargs = {} if loss == 'labels': # Logging is enabled only for the last layer kwargs = {'log': False} l_dict = self.get_loss(loss, aux_outputs, targets, indices, num_boxes, **kwargs) l_dict = {k + f'_{i}': v for k, v in l_dict.items()} losses.update(l_dict) return losses, indices_track
def forward(self, outputs, bbox_tgts, clas_tgts): """ This performs the loss computation. Parameters: outputs: dict of tensors, see the output specification of the model for the format targets: list of dicts, such that len(targets) == batch_size. The expected keys in each dict depends on the losses applied, see each loss' doc """ device = next(iter(outputs.values())).device targets = [] # for each image for bbox_gt, class_gt in zip(bbox_tgts, clas_tgts): # extract non zero boxes and labels bbox_gt = bbox_gt[np.nonzero(class_gt)].squeeze(dim=1).cpu() class_gt = class_gt[class_gt > 0] - 1 # change gt from y,x,y2,x2 -> x,y,w,h bbox_gt[:, 2:] = bbox_gt[:, 2:] - bbox_gt[:, :2] bbox_gt = bbox_gt[:, [1, 0, 3, 2]] # change gt from x,y,w,h -> cxcywh bbox_gt[:, :2] = bbox_gt[:, :2] + 0.5 * bbox_gt[:, 2:] # scale form input(-1, 1) to expected (0, 1) bbox_gt[:, 2:] = bbox_gt[:, 2:] / 2. bbox_gt[:, :2] = (bbox_gt[:, :2] + 1) / 2. targets.append({ "boxes": bbox_gt.to(device), "labels": class_gt.to(device), }) outputs_without_aux = { k: v for k, v in outputs.items() if k != 'aux_outputs' } # Retrieve the matching between the outputs of the last layer and the targets indices = self.matcher(outputs_without_aux, targets) # Compute the average number of target boxes accross all nodes, for normalization purposes num_boxes = sum(len(t["labels"]) for t in targets) num_boxes = torch.as_tensor([num_boxes], dtype=torch.float, device=next(iter(outputs.values())).device) if is_dist_avail_and_initialized(): torch.distributed.all_reduce(num_boxes) num_boxes = torch.clamp(num_boxes / get_world_size(), min=1).item() # Compute all the requested losses losses = {} for loss in self.losses: losses.update( self.get_loss(loss, outputs, targets, indices, num_boxes)) # In case of auxiliary losses, we repeat this process with the output of each intermediate layer. if 'aux_outputs' in outputs: for i, aux_outputs in enumerate(outputs['aux_outputs']): indices = self.matcher(aux_outputs, targets) for loss in self.losses: if loss == 'masks': # Intermediate masks losses are too costly to compute, we ignore them. continue kwargs = {} if loss == 'labels': # Logging is enabled only for the last layer kwargs = {'log': False} l_dict = self.get_loss(loss, aux_outputs, targets, indices, num_boxes, **kwargs) l_dict = {k + f'_{i}': v for k, v in l_dict.items()} losses.update(l_dict) self.metrics = {} for name in losses: if name in self.metric_names: self.metrics[name] = losses[ name] if name not in self.weight_dict else losses[ name] * self.weight_dict[name] losses = sum(losses[k] * self.weight_dict[k] for k in losses.keys() if k in self.weight_dict) return losses