Example #1
0
 def forward(self, outputs, targets):
     """ This performs the loss computation.
     Parameters:
          outputs: dict of tensors, see the output specification of the model for the format
          targets: list of dicts, such that len(targets) == batch_size.
                   The expected keys in each dict depends on the losses applied, see each loss' doc
     """
     outputs_without_aux = {k: v for k, v in outputs.items() if k !=\
         'aux_outputs'}
     indices = self.matcher(outputs_without_aux, targets)
     num_boxes = sum(len(t['labels']) for t in targets)
     num_boxes = paddle.to_tensor([num_boxes], dtype=torch.float, device
         =next(iter(outputs.values())).device)
     if is_dist_avail_and_initialized():
         torch2paddle.all_reduce(num_boxes)
     num_boxes = paddle.clip(num_boxes / get_world_size(), min=1).item()
     losses = {}
     for loss in self.losses:
         losses.update(self.get_loss(loss, outputs, targets, indices,
             num_boxes))
     if 'aux_outputs' in outputs:
         for i, aux_outputs in enumerate(outputs['aux_outputs']):
             indices = self.matcher(aux_outputs, targets)
             for loss in self.losses:
                 if loss == 'masks':
                     continue
                 kwargs = {}
                 if loss == 'labels':
                     kwargs = {'log': False}
                 l_dict = self.get_loss(loss, aux_outputs, targets,
                     indices, num_boxes, **kwargs)
                 l_dict = {(k + f'_{i}'): v for k, v in l_dict.items()}
                 losses.update(l_dict)
     return losses
Example #2
0
    def forward(self, outputs, targets):
        """ This performs the loss computation.
        Parameters:
             outputs: dict of tensors, see the output specification of the model for the format
             targets: list of dicts, such that len(targets) == batch_size.
                      The expected keys in each dict depends on the losses applied, see each loss' doc
        """

        # Compute the average number of target boxes accross all nodes, for normalization purposes
        # TODO: this is a reserved function fro a negative sample training to improve the robustness like DasiamRPN
        num_boxes = sum(t['valid'].item() for t in targets)
        # print("num of valid boxes: {}".format(num_boxes)) # debug
        num_boxes = torch.as_tensor([num_boxes],
                                    dtype=torch.float,
                                    device=next(iter(outputs.values())).device)
        if is_dist_avail_and_initialized():
            torch.distributed.all_reduce(num_boxes)
        num_boxes = torch.clamp(num_boxes / get_world_size(), min=1).item()

        # Compute all the requested losses
        losses = {}
        for loss in self.losses:
            losses.update(loss(outputs, targets, num_boxes))

        # In case of auxiliary losses, we repeat this process with the output of each intermediate layer.
        if 'aux_outputs' in outputs:
            for i, aux_outputs in enumerate(outputs['aux_outputs']):
                for loss in self.losses:
                    l_dict = loss(aux_outputs, targets, num_boxes)
                    l_dict = {k + f'_{i}': v for k, v in l_dict.items()}
                    losses.update(l_dict)

        return losses
Example #3
0
    def forward(self, outputs, targets):
        """ This performs the loss computation.
        Parameters:
             outputs: dict of tensors, see the output specification of the model for the format
             targets: list of dicts, such that len(targets) == batch_size.
                      The expected keys in each dict depends on the losses applied, see each loss' doc
        """
        outputs_without_aux = {k: v for k, v in outputs.items() if k != 'aux_outputs' and k != 'enc_outputs'}

        # Retrieve the matching between the outputs of the last layer and the targets
        indices = self.matcher(outputs_without_aux, targets)

        # Compute the average number of target boxes accross all nodes, for normalization purposes
        num_boxes = sum(len(t["labels"]) for t in targets)
        num_boxes = torch.as_tensor([num_boxes], dtype=torch.float, device=next(iter(outputs.values())).device)
        if is_dist_avail_and_initialized():
            torch.distributed.all_reduce(num_boxes)
        num_boxes = torch.clamp(num_boxes / get_world_size(), min=1).item()

        # Compute all the requested losses
        losses = {}
        for loss in self.losses:
            kwargs = {}
            losses.update(self.get_loss(loss, outputs, targets, indices, num_boxes, **kwargs))

        # In case of auxiliary losses, we repeat this process with the output of each intermediate layer.
        if 'aux_outputs' in outputs:
            for i, aux_outputs in enumerate(outputs['aux_outputs']):
                indices = self.matcher(aux_outputs, targets)
                for loss in self.losses:
                    if loss == 'masks':
                        # Intermediate masks losses are too costly to compute, we ignore them.
                        continue
                    kwargs = {}
                    if loss == 'labels':
                        # Logging is enabled only for the last layer
                        kwargs['log'] = False
                    l_dict = self.get_loss(loss, aux_outputs, targets, indices, num_boxes, **kwargs)
                    l_dict = {k + f'_{i}': v for k, v in l_dict.items()}
                    losses.update(l_dict)

        if 'enc_outputs' in outputs:
            enc_outputs = outputs['enc_outputs']
            bin_targets = copy.deepcopy(targets)
            for bt in bin_targets:
                bt['labels'] = torch.zeros_like(bt['labels'])
            indices = self.matcher(enc_outputs, bin_targets)
            for loss in self.losses:
                if loss == 'masks':
                    # Intermediate masks losses are too costly to compute, we ignore them.
                    continue
                kwargs = {}
                if loss == 'labels':
                    # Logging is enabled only for the last layer
                    kwargs['log'] = False
                l_dict = self.get_loss(loss, enc_outputs, bin_targets, indices, num_boxes, **kwargs)
                l_dict = {k + f'_enc': v for k, v in l_dict.items()}
                losses.update(l_dict)

        return losses
Example #4
0
 def get_num_boxes(self, num_samples):
     num_boxes = torch.as_tensor(num_samples,
                                 dtype=torch.float,
                                 device=self.sample_device)
     if is_dist_avail_and_initialized():
         torch.distributed.all_reduce(num_boxes)
     num_boxes = torch.clamp(num_boxes / get_world_size(), min=1).item()
     return num_boxes
Example #5
0
    def forward(self, outputs):
        """ This performs the loss computation.
        Parameters:
             outputs: dict of tensors, see the output specification of the model for the format
        """
        # Since we are doing meta-learning over our constructed meta-tasks, the targets for these meta-tasks are
        # stored in outputs['meta_targets']. We dont use original targets.
        targets = outputs['meta_targets']

        outputs_without_aux = {
            k: v
            for k, v in outputs.items()
            if k != 'aux_outputs' and k != 'enc_outputs'
        }

        # Retrieve the matching between the outputs of the last layer and the targets
        indices = self.matcher(outputs_without_aux, targets)

        # Compute the average number of target boxes accross all nodes, for normalization purposes
        num_boxes = sum(len(t["labels"]) for t in targets)
        num_boxes = torch.as_tensor([num_boxes],
                                    dtype=torch.float,
                                    device=next(iter(outputs.values())).device)

        if is_dist_avail_and_initialized():
            torch.distributed.all_reduce(num_boxes)
        num_boxes = torch.clamp(num_boxes / get_world_size(), min=1).item()

        # Compute all the requested losses
        losses = {}
        for loss in self.losses:
            kwargs = {}
            losses.update(
                self.get_loss(loss, outputs, targets, indices, num_boxes,
                              **kwargs))

        # In case of auxiliary losses, we repeat this process with the output of each intermediate layer.
        if 'aux_outputs' in outputs:
            for i, aux_outputs in enumerate(outputs['aux_outputs']):
                indices = self.matcher(aux_outputs, targets)
                for loss in self.losses:
                    if loss == 'category_codes_cls':
                        # meta-attention cls loss not for aux_outputs
                        continue
                    kwargs = {}
                    if loss == 'labels':
                        # Logging is enabled only for the last layer
                        kwargs['log'] = False
                    l_dict = self.get_loss(loss, aux_outputs, targets, indices,
                                           num_boxes, **kwargs)
                    l_dict = {k + f'_{i}': v for k, v in l_dict.items()}
                    losses.update(l_dict)

        return losses
Example #6
0
    def forward(self, outputs, targets):
        """Loss computation.

        Args:
            outputs (dict): Dict of RTD outputs, which are tensors.
            targets (dict): List of dicts, such that len(targets) == batch_size.
                The expected keys in each dict depends on the losses applied.

        Returns:
            losses (dict): Dict of losses.
        """
        outputs_without_aux = {
            k: v
            for k, v in outputs.items() if k != 'aux_outputs'
        }

        # Retrieve the matching between the outputs of the last layer and the targets
        indices = self.matcher(outputs_without_aux, targets)

        # Compute the average number of target boxes accross all nodes, for normalization purposes
        num_boxes = sum(len(t['labels']) for t in targets)
        num_boxes = torch.as_tensor([num_boxes],
                                    dtype=torch.float,
                                    device=next(iter(outputs.values())).device)
        if is_dist_avail_and_initialized():
            torch.distributed.all_reduce(num_boxes)
        num_boxes = torch.clamp(num_boxes / get_world_size(), min=1).item()

        # Compute all the requested losses
        losses = {}
        for loss in self.losses:
            losses.update(
                self.get_loss(loss, outputs, targets, indices, num_boxes))

        # In case of auxiliary losses, we repeat this process with the output of each intermediate layer.
        if 'aux_outputs' in outputs and 'iou' not in self.losses:
            for i, aux_outputs in enumerate(outputs['aux_outputs']):
                indices = self.matcher(aux_outputs, targets)
                for loss in self.losses:
                    if loss == 'masks':
                        # Intermediate masks losses are too costly to compute, we ignore them.
                        continue
                    kwargs = {}
                    if loss == 'labels':
                        # Logging is enabled only for the last layer
                        kwargs = {'log': False}
                    l_dict = self.get_loss(loss, aux_outputs, targets, indices,
                                           num_boxes, **kwargs)
                    l_dict = {k + f'_{i}': v for k, v in l_dict.items()}
                    losses.update(l_dict)
        return losses
Example #7
0
    def forward(self, outputs, targets, origin_indices=None):
        """ This performs the loss computation.
        Parameters:
             outputs: dict of tensors, see the output specification of the model for the format
             targets: list of dicts, such that len(targets) == batch_size.
                      The expected keys in each dict depends on the losses applied, see each loss' doc
        """
        outputs_without_aux = {k: v for k, v in outputs.items() if k != 'aux_outputs'}

        
        origin_indices = self.matcher(outputs_without_aux, targets)


        num_items = sum(len(t["labels"]) for t in targets)

        num_items = torch.as_tensor([num_items], dtype=torch.float, device=next(iter(outputs.values())).device)
        if is_dist_avail_and_initialized():
            torch.distributed.all_reduce(num_items)
        num_items = torch.clamp(num_items / get_world_size(), min=1).item()

        # Compute all the requested losses
        losses = {}
        for loss in self.losses:
            losses.update(self.get_loss(loss, outputs, targets,  num_items, origin_indices=origin_indices))
            

        # In case of auxiliary losses, we repeat this process with the output of each intermediate layer.
        aux_name = 'aux_outputs'
        if aux_name in outputs:
            for i, aux_outputs in enumerate(outputs[aux_name]):
                
                origin_indices = self.matcher(aux_outputs, targets)

                for loss in self.losses:
                    
                    kwargs = {}
                    if loss == 'labels':
                        # Logging is enabled only for the last layer
                        kwargs = {'log': False}

                    l_dict = self.get_loss(loss, aux_outputs, targets, num_items, origin_indices=origin_indices, **kwargs)

                    l_dict = {k + f'_{i}': v for k, v in l_dict.items()}
                    losses.update(l_dict)

        return losses
Example #8
0
    def forward(self, outputs, targets):
        outputs_without_aux = {
            k: v
            for k, v in outputs.items() if k != 'aux_outputs'
        }

        # Retrieve the matching between the outputs of the last layer and the targets
        indices = self.matcher(outputs_without_aux, targets)

        num_interactions = sum(len(t['obj_labels']) for t in targets)
        num_interactions = torch.as_tensor([num_interactions],
                                           dtype=torch.float,
                                           device=next(iter(
                                               outputs.values())).device)
        if is_dist_avail_and_initialized():
            torch.distributed.all_reduce(num_interactions)
        num_interactions = torch.clamp(num_interactions / get_world_size(),
                                       min=1).item()

        # Compute all the requested losses
        losses = {}
        for loss in self.losses:
            losses.update(
                self.get_loss(loss, outputs, targets, indices,
                              num_interactions))

        # In case of auxiliary losses, we repeat this process with the output of each intermediate layer.
        if 'aux_outputs' in outputs:
            for i, aux_outputs in enumerate(outputs['aux_outputs']):
                indices = self.matcher(aux_outputs, targets)
                for loss in self.losses:
                    kwargs = {}
                    if loss == 'obj_labels':
                        # Logging is enabled only for the last layer
                        kwargs = {'log': False}
                    l_dict = self.get_loss(loss, aux_outputs, targets, indices,
                                           num_interactions, **kwargs)
                    l_dict = {k + f'_{i}': v for k, v in l_dict.items()}
                    losses.update(l_dict)

        return losses
Example #9
0
    def forward(self, outputs, targets):
        """ This performs the loss computation.
        Parameters:
             outputs: dict of tensors, see the output specification of the model for the format
             targets: list of dicts, such that len(targets) == batch_size.
                      The expected keys in each dict depends on the losses applied, see each loss' doc
        """
        # 接下来看下前向过程,了解下loss的计算过程。这里一定要先搞清楚模型输出(outputs)和GT(targets)的形式,对于outputs可参考下列的注释;
        # 而targets是一个包含多个dict的list,长度与batch size相等,其中每个dict的形式如同COCO数据集的标注,
        # outputs是DETR模型的输出,是一个dict,形式如下:
        # {'pred_logits':(b, num_queries=100, num_classes),
        # 'pred_boxes':(b, num_queries=100, 4),
        # 'aux_outputs':[{'pred_logits':..,'pred_boxes':...}, {...}, ...]}
        # 过滤掉中间层的输出,只保留最后一层的预测结果
        outputs_without_aux = {
            k: v
            for k, v in outputs.items() if k != 'aux_outputs'
        }

        # Retrieve the matching between the outputs of the last layer and the targets
        # 计算loss的一个关键前置步骤就是将模型输出的预测结果与GT进行匹配,对应下面self.matcher()部分,返回的indices的形式在下面注释中说明。
        # 将预测结果与GT匹配,indices是一个包含多个元组的list,长度与batch size相等,每个元组为(index_i,index_j),前者是匹配的预测索引,
        # 后者是GT索引,并且len(index_i)=len(index_j)=min(num_queries,num_targets_in_image)
        indices = self.matcher(outputs_without_aux, targets)

        # Compute the average number of target boxes accross all nodes, for normalization purposes
        # 计算这个batch的图像中目标物体的数量,在所有分布式节点之间同步
        num_boxes = sum(len(t["labels"]) for t in targets)
        num_boxes = torch.as_tensor([num_boxes],
                                    dtype=torch.float,
                                    device=next(iter(outputs.values())).device)
        if is_dist_avail_and_initialized():
            torch.distributed.all_reduce(num_boxes)
        num_boxes = torch.clamp(num_boxes / get_world_size(), min=1).item()

        # Compute all the requested losses
        # 接下来是计算各种类型的loss,并将对应结果存到一个dict中(下面的losses变量),self.get_loss()方法返回loss计算结果。
        losses = {}
        for loss in self.losses:
            # 计算特定类型的loss(这里的loss变量是字符串:'labels','boxes','cardinality','masks',表示loss类型),
            # get_loss方法中并不涉及具体loss的计算,其仅仅是将不同类型的loss计算映射到对应的方法,最后将计算结果返回。
            losses.update(
                self.get_loss(loss, outputs, targets, indices, num_boxes))

        # In case of auxiliary losses, we repeat this process with the output of each intermediate layer.
        # 若模型输出包含了中间层输出,则一并计算对应的loss
        if 'aux_outputs' in outputs:
            for i, aux_outputs in enumerate(outputs['aux_outputs']):
                indices = self.matcher(aux_outputs, targets)
                for loss in self.losses:
                    if loss == 'masks':
                        # Intermediate masks losses are too costly to compute, we ignore them.
                        continue
                    kwargs = {}
                    if loss == 'labels':
                        # Logging is enabled only for the last layer
                        kwargs = {'log': False}
                    l_dict = self.get_loss(loss, aux_outputs, targets, indices,
                                           num_boxes, **kwargs)
                    l_dict = {k + f'_{i}': v for k, v in l_dict.items()}
                    losses.update(l_dict)

        return losses
Example #10
0
    def forward(self, outputs, targets, indices_track=None, track_on=False):
        """ This performs the loss computation.
        Parameters:
             outputs: dict of tensors, see the output specification of the model for the format
             targets: list of dicts, such that len(targets) == batch_size.
                      The expected keys in each dict depends on the losses applied, see each loss' doc
        """
        outputs_without_aux = {
            k: v
            for k, v in outputs.items() if k != 'aux_outputs'
        }

        if track_on:
            track_exists = "pred_tracks" in outputs_without_aux.keys()
            assert track_exists is True
            # Track Match.
            indices_track, targets = self.track_matcher(
                outputs_without_aux, targets, indices_track=indices_track)
            # Compute the average number of target boxes accross all nodes, for normalization purposes
            num_boxes = sum(len(t["labels"]) for t in targets)
            num_boxes = torch.as_tensor([num_boxes],
                                        dtype=torch.float,
                                        device=next(iter(
                                            outputs.values())).device)
            if is_dist_avail_and_initialized():
                torch.distributed.all_reduce(num_boxes)
            num_boxes = torch.clamp(num_boxes / get_world_size(), min=1).item()
            # Compute all the requested losses
            losses = {}
            losses_type = self.losses + ['tracks']
            for loss in losses_type:
                losses.update(
                    self.get_loss(loss, outputs, targets, indices_track,
                                  num_boxes))
            # In case of auxiliary losses, we repeat this process with the output of each intermediate layer.
            if 'aux_outputs' in outputs:
                for i, aux_outputs in enumerate(outputs['aux_outputs']):
                    assert track_exists is True
                    for loss in losses_type:
                        if loss == 'masks':
                            # Intermediate masks losses are too costly to compute, we ignore them.
                            continue
                        kwargs = {}
                        if loss == 'labels':
                            # Logging is enabled only for the last layer
                            kwargs = {'log': False}
                        # we use the default matcher in tracking target.
                        l_dict = self.get_loss(loss, aux_outputs, targets,
                                               indices_track, num_boxes,
                                               **kwargs)
                        l_dict = {k + f'_{i}': v for k, v in l_dict.items()}
                        losses.update(l_dict)
        else:
            track_exists = "pred_tracks" in outputs_without_aux.keys()
            if track_exists:
                outputs_without_aux.pop("pred_tracks")
            # Retrieve the matching between the outputs of the last layer and the targets
            indices_track = self.matcher(outputs_without_aux, targets)
            # Compute the average number of target boxes accross all nodes, for normalization purposes
            num_boxes = sum(len(t["labels"]) for t in targets)
            num_boxes = torch.as_tensor([num_boxes],
                                        dtype=torch.float,
                                        device=next(iter(
                                            outputs.values())).device)
            if is_dist_avail_and_initialized():
                torch.distributed.all_reduce(num_boxes)
            num_boxes = torch.clamp(num_boxes / get_world_size(), min=1).item()
            # Compute all the requested losses
            losses = {}
            for loss in self.losses:
                losses.update(
                    self.get_loss(loss, outputs, targets, indices_track,
                                  num_boxes))
            # In case of auxiliary losses, we repeat this process with the output of each intermediate layer.
            if 'aux_outputs' in outputs:
                for i, aux_outputs in enumerate(outputs['aux_outputs']):
                    if track_exists:
                        aux_outputs.pop("pred_tracks")
                    indices = self.matcher(aux_outputs, targets)
                    for loss in self.losses:
                        if loss == 'masks':
                            # Intermediate masks losses are too costly to compute, we ignore them.
                            continue
                        kwargs = {}
                        if loss == 'labels':
                            # Logging is enabled only for the last layer
                            kwargs = {'log': False}
                        l_dict = self.get_loss(loss, aux_outputs, targets,
                                               indices, num_boxes, **kwargs)
                        l_dict = {k + f'_{i}': v for k, v in l_dict.items()}
                        losses.update(l_dict)

        return losses, indices_track
    def forward(self, outputs, bbox_tgts, clas_tgts):
        """ This performs the loss computation.
        Parameters:
             outputs: dict of tensors, see the output specification of the model for the format
             targets: list of dicts, such that len(targets) == batch_size.
                      The expected keys in each dict depends on the losses applied, see each loss' doc
        """
        device = next(iter(outputs.values())).device

        targets = []
        # for each image
        for bbox_gt, class_gt in zip(bbox_tgts, clas_tgts):
            # extract non zero boxes and labels
            bbox_gt = bbox_gt[np.nonzero(class_gt)].squeeze(dim=1).cpu()
            class_gt = class_gt[class_gt > 0] - 1
            # change gt from y,x,y2,x2 -> x,y,w,h
            bbox_gt[:, 2:] = bbox_gt[:, 2:] - bbox_gt[:, :2]
            bbox_gt = bbox_gt[:, [1, 0, 3, 2]]
            # change gt from x,y,w,h -> cxcywh
            bbox_gt[:, :2] = bbox_gt[:, :2] + 0.5 * bbox_gt[:, 2:]

            # scale form input(-1, 1) to expected (0, 1)
            bbox_gt[:, 2:] = bbox_gt[:, 2:] / 2.
            bbox_gt[:, :2] = (bbox_gt[:, :2] + 1) / 2.

            targets.append({
                "boxes": bbox_gt.to(device),
                "labels": class_gt.to(device),
            })

        outputs_without_aux = {
            k: v
            for k, v in outputs.items() if k != 'aux_outputs'
        }

        # Retrieve the matching between the outputs of the last layer and the targets
        indices = self.matcher(outputs_without_aux, targets)

        # Compute the average number of target boxes accross all nodes, for normalization purposes
        num_boxes = sum(len(t["labels"]) for t in targets)
        num_boxes = torch.as_tensor([num_boxes],
                                    dtype=torch.float,
                                    device=next(iter(outputs.values())).device)
        if is_dist_avail_and_initialized():
            torch.distributed.all_reduce(num_boxes)
        num_boxes = torch.clamp(num_boxes / get_world_size(), min=1).item()

        # Compute all the requested losses
        losses = {}
        for loss in self.losses:
            losses.update(
                self.get_loss(loss, outputs, targets, indices, num_boxes))

        # In case of auxiliary losses, we repeat this process with the output of each intermediate layer.
        if 'aux_outputs' in outputs:
            for i, aux_outputs in enumerate(outputs['aux_outputs']):
                indices = self.matcher(aux_outputs, targets)
                for loss in self.losses:
                    if loss == 'masks':
                        # Intermediate masks losses are too costly to compute, we ignore them.
                        continue
                    kwargs = {}
                    if loss == 'labels':
                        # Logging is enabled only for the last layer
                        kwargs = {'log': False}
                    l_dict = self.get_loss(loss, aux_outputs, targets, indices,
                                           num_boxes, **kwargs)
                    l_dict = {k + f'_{i}': v for k, v in l_dict.items()}
                    losses.update(l_dict)

        self.metrics = {}
        for name in losses:
            if name in self.metric_names:
                self.metrics[name] = losses[
                    name] if name not in self.weight_dict else losses[
                        name] * self.weight_dict[name]

        losses = sum(losses[k] * self.weight_dict[k] for k in losses.keys()
                     if k in self.weight_dict)
        return losses