Exemple #1
0
def evaluate(model, dataloader, crf, criterion, f1=False):
    model.eval()
    if f1:
        confusion_matrix = ConfusionMeter(8)
        confusion_matrix.reset()
    correct, total = 0, 0
    loss_total = 0
    for i, (features, labels, _, lengths) in enumerate(dataloader):
        max_length = max(lengths)
        features = cuda_var_wrapper(features[:, :, :max_length], volatile=True)
        labels = cuda_var_wrapper(labels[:, :max_length], volatile=True)
        lengths = cuda_var_wrapper(lengths)
        if crf:
            _, output = model(features, lengths)
            loss = model.forward_alg(features, labels, lengths)
            preds = output
        else:
            output = model(features)
            loss = criterion(output.contiguous().view(-1, 8),
                             labels.contiguous().view(-1))
            _, preds = torch.max(F.softmax(output, 2), 2)
        correct_batch, total_batch = get_batch_accuracy(
            labels, output, lengths)
        loss_total += loss.data[0] * features.size()[0]
        correct += correct_batch
        total += total_batch
        if f1:
            if isinstance(preds, Variable):
                preds = preds.data
            for pred, truth, length in zip(preds, labels.data, lengths.data):
                confusion_matrix.add(pred[:length], truth[:length])
    if f1:
        # print(confusion_matrix.value())
        precision, recall, f1 = precision_recall_f1(confusion_matrix)
        return correct / total, loss_total, precision, recall, f1
    return correct / total, loss_total
Exemple #2
0
class FasterRCNNTrainer(nn.Module):
    """wrapper for conveniently training. return losses

    The losses include:

    * :obj:`rpn_loc_loss`: The localization loss for \
        Region Proposal Network (RPN).
    * :obj:`rpn_cls_loss`: The classification loss for RPN.
    * :obj:`roi_loc_loss`: The localization loss for the head module.
    * :obj:`roi_cls_loss`: The classification loss for the head module.
    * :obj:`total_loss`: The sum of 4 loss above.

    Args:
        faster_rcnn (model.FasterRCNN):
            A Faster R-CNN model that is going to be trained.
    """
    def __init__(self, faster_rcnn):
        super(FasterRCNNTrainer, self).__init__()

        self.faster_rcnn = faster_rcnn
        #在faster_rcnn_loc_losss中调用,用来计算位置损失函数时用到的超参
        self.rpn_sigma = opt.rpn_sigma
        self.roi_sigma = opt.roi_sigma

        # target creator create gt_bbox gt_label etc as training targets.
        #用于从20000个候选anchor中产生256个anchor进行二分类和位置回归,用于rpn的训练
        self.anchor_target_creator = AnchorTargetCreator()
        #从2000个筛选出的ROIS中再次选出128个ROIs用于ROIhead训练
        self.proposal_target_creator = ProposalTargetCreator()
        #定义位置信息的均值方差。因为送入网络训练的位置信息需全部归一化处理
        self.loc_normalize_mean = faster_rcnn.loc_normalize_mean
        self.loc_normalize_std = faster_rcnn.loc_normalize_std

        self.optimizer = self.faster_rcnn.get_optimizer()
        # visdom wrapper
        self.vis = Visualizer(env=opt.env)

        # indicators for training status
        self.rpn_cm = ConfusionMeter(2)
        self.roi_cm = ConfusionMeter(21)
        self.meters = {k: AverageValueMeter()
                       for k in LossTuple._fields}  # average loss

    #@staticmethod
    def forward(self, imgs, bboxes, labels, scale):
        """Forward Faster R-CNN and calculate losses.

        Here are notations used.

        * :math:`N` is the batch size.
        * :math:`R` is the number of bounding boxes per image.

        Currently, only :math:`N=1` is supported.

        Args:
            imgs (~torch.autograd.Variable): A variable with a batch of images.
            bboxes (~torch.autograd.Variable): A batch of bounding boxes.
                Its shape is :math:`(N, R, 4)`.
            labels (~torch.autograd..Variable): A batch of labels.
                Its shape is :math:`(N, R)`. The background is excluded from
                the definition, which means that the range of the value
                is :math:`[0, L - 1]`. :math:`L` is the number of foreground
                classes.
            scale (float): Amount of scaling applied to
                the raw image during preprocessing.

        Returns:
            namedtuple of 5 losses
        """
        n = bboxes.shape[0]
        if n != 1:
            raise ValueError('Currently only batch size 1 is supported.')

        _, _, H, W = imgs.shape
        img_size = (H, W)
        #提取图片特征
        features = self.faster_rcnn.extractor(imgs)
        #ProposalCreator(过程)
        #1.对于每张图片,利用它的feature map, 计算 (H/16)× (W/16)×9(大概20000)个anchor属于前景的概率,以及对应的位置参数。
        #2.选取概率较大的12000个anchor
        #3.利用回归的位置参数,修正这12000个anchor的位置,得到RoIs
        #4.利用非极大值((Non-maximum suppression, NMS)抑制,选出概率最大的2000个RoIs
        rpn_locs, rpn_scores, rois, roi_indices, anchor = \
            self.faster_rcnn.rpn(features, img_size, scale)

        # Since batch size is one, convert variables to singular form
        bbox = bboxes[0]
        label = labels[0]
        rpn_score = rpn_scores[0]
        rpn_loc = rpn_locs[0]
        roi = rois

        # Sample RoIs and forward
        # it's fine to break the computation graph of rois,
        # consider them as constant input
        #经过proposal_target_creator网络产生采样过后的sample_roi,以及其对应的gt_cls_loc和gt_score
        sample_roi, gt_roi_loc, gt_roi_label = self.proposal_target_creator(
            roi, at.tonumpy(bbox), at.tonumpy(label), self.loc_normalize_mean,
            self.loc_normalize_std)
        # NOTE it's all zero because now it only support for batch=1 now
        sample_roi_index = t.zeros(len(sample_roi))
        #经过head网络,完成预测
        roi_cls_loc, roi_score = self.faster_rcnn.head(features, sample_roi,
                                                       sample_roi_index)

        # ------------------ RPN losses -------------------#
        #在20000个anchor中挑选256个anchor进行rpn训练过程中的损失计算
        #挑选过程:
        #1.对于每一个ground truth bounding box (gt_bbox),选择和它重叠度(IoU)最高的一个anchor作为正样本
        #2.对于剩下的anchor,从中选择和任意一个gt_bbox重叠度超过0.7的anchor,作为正样本,正样本的数目不超过128个。
        #3.随机选择和gt_bbox重叠度小于0.3的anchor作为负样本。负样本和正样本的总数为256。
        gt_rpn_loc, gt_rpn_label = self.anchor_target_creator(
            at.tonumpy(bbox), anchor, img_size)
        gt_rpn_label = at.totensor(gt_rpn_label).long()
        gt_rpn_loc = at.totensor(gt_rpn_loc)
        #loc类损失采用l1损失
        rpn_loc_loss = _fast_rcnn_loc_loss(rpn_loc, gt_rpn_loc,
                                           gt_rpn_label.data, self.rpn_sigma)

        # NOTE: default value of ignore_index is -100 ...
        #label类损失采用交叉熵
        rpn_cls_loss = F.cross_entropy(rpn_score,
                                       gt_rpn_label.cuda(),
                                       ignore_index=-1)
        _gt_rpn_label = gt_rpn_label[gt_rpn_label > -1]
        _rpn_score = at.tonumpy(rpn_score)[at.tonumpy(gt_rpn_label) > -1]
        self.rpn_cm.add(at.totensor(_rpn_score, False),
                        _gt_rpn_label.data.long())

        # ------------------ ROI losses (fast rcnn loss) -------------------#
        n_sample = roi_cls_loc.shape[0]
        roi_cls_loc = roi_cls_loc.view(n_sample, -1, 4)
        roi_loc = roi_cls_loc[t.arange(0, n_sample).long().cuda(), \
                              at.totensor(gt_roi_label).long()]
        gt_roi_label = at.totensor(gt_roi_label).long()
        gt_roi_loc = at.totensor(gt_roi_loc)

        roi_loc_loss = _fast_rcnn_loc_loss(roi_loc.contiguous(), gt_roi_loc,
                                           gt_roi_label.data, self.roi_sigma)

        roi_cls_loss = nn.CrossEntropyLoss()(roi_score, gt_roi_label.cuda())

        self.roi_cm.add(at.totensor(roi_score, False),
                        gt_roi_label.data.long())
        #共两大类损失loc和label,每类下分为rpn部分和roihead的损失,所以共四种
        losses = [rpn_loc_loss, rpn_cls_loss, roi_loc_loss, roi_cls_loss]
        losses = losses + [sum(losses)]

        return LossTuple(*losses)

    #进行了一次参数优化
    def train_step(self, imgs, bboxes, labels, scale):
        #将梯度数据全部清0
        self.optimizer.zero_grad()
        #利用前向传播函数将所有损失计算出来
        losses = self.forward(imgs, bboxes, labels, scale)
        #反向传播计算梯度
        losses.total_loss.backward()
        #进行一次参数优化过程
        self.optimizer.step()
        #将所有损失的数据更新到可视化界面
        self.update_meters(losses)
        return losses

    def save(self, save_optimizer=False, save_path=None, **kwargs):
        """serialize models include optimizer and other info
        return path where the model-file is stored.

        Args:
            save_optimizer (bool): whether save optimizer.state_dict().
            save_path (string): where to save model, if it's None, save_path
                is generate using time str and info from kwargs.
        
        Returns:
            save_path(str): the path to save models.
        """
        save_dict = dict()

        save_dict['model'] = self.faster_rcnn.state_dict()
        save_dict['config'] = opt._state_dict()
        save_dict['other_info'] = kwargs
        save_dict['vis_info'] = self.vis.state_dict()

        if save_optimizer:
            save_dict['optimizer'] = self.optimizer.state_dict()

        if save_path is None:
            timestr = time.strftime('%m%d%H%M')
            save_path = 'checkpoint_caffe/fasterrcnn_%s' % timestr
            for k_, v_ in kwargs.items():
                save_path += '_%s' % v_

        save_dir = os.path.dirname(save_path)
        if not os.path.exists(save_dir):
            os.makedirs(save_dir)

        t.save(save_dict, save_path)
        self.vis.save([self.vis.env])
        return save_path

    def load(
        self,
        path,
        load_optimizer=True,
        parse_opt=False,
    ):
        state_dict = t.load(path)
        if 'model' in state_dict:
            self.faster_rcnn.load_state_dict(state_dict['model'])
        else:  # legacy way, for backward compatibility
            self.faster_rcnn.load_state_dict(state_dict)
            return self
        if parse_opt:
            opt._parse(state_dict['config'])
        if 'optimizer' in state_dict and load_optimizer:
            self.optimizer.load_state_dict(state_dict['optimizer'])
        return self
#update_meters,reset_meters以及get_meter_data()负责将数据向可视化界面更新传输获取以及重置的函数,
# 不太懂,但和主要代码没啥关系

    def update_meters(self, losses):
        loss_d = {k: at.scalar(v) for k, v in losses._asdict().items()}
        for key, meter in self.meters.items():
            meter.add(loss_d[key])

    def reset_meters(self):
        for key, meter in self.meters.items():
            meter.reset()
        self.roi_cm.reset()
        self.rpn_cm.reset()

    def get_meter_data(self):
        return {k: v.value()[0] for k, v in self.meters.items()}
Exemple #3
0
class FasterRCNNTrainer(nn.Module):
    """wrapper for conveniently training. return losses

    The losses include:

    * :obj:`rpn_loc_loss`: The localization loss for \
        Region Proposal Network (RPN).
    * :obj:`rpn_cls_loss`: The classification loss for RPN.
    * :obj:`roi_loc_loss`: The localization loss for the head module.
    * :obj:`roi_cls_loss`: The classification loss for the head module.
    * :obj:`total_loss`: The sum of 4 loss above.

    Args:
        faster_rcnn (model.FasterRCNN):
            A Faster R-CNN model that is going to be trained.
    """
    def __init__(self, faster_rcnn, nclasses):
        super(FasterRCNNTrainer, self).__init__()

        self.faster_rcnn = faster_rcnn
        self.rpn_sigma = opt.rpn_sigma
        self.roi_sigma = opt.roi_sigma

        # target creator create gt_bbox gt_label etc as training targets.
        self.anchor_target_creator = AnchorTargetCreator()
        self.proposal_target_creator = ProposalTargetCreator()

        self.loc_normalize_mean = faster_rcnn.loc_normalize_mean
        self.loc_normalize_std = faster_rcnn.loc_normalize_std

        self.optimizer = self.faster_rcnn.get_optimizer()
        # visdom wrapper
        self.vis = Visualizer(env=opt.env)

        # indicators for training status
        self.rpn_cm = ConfusionMeter(2)
        self.roi_cm = ConfusionMeter(nclasses)
        self.meters = {k: AverageValueMeter()
                       for k in LossTuple._fields}  # average loss

    def forward(self, imgs, bboxes, labels, scale):
        """Forward Faster R-CNN and calculate losses.

        Here are notations used.

        * :math:`N` is the batch size.
        * :math:`R` is the number of bounding boxes per image.

        Currently, only :math:`N=1` is supported.

        Args:
            imgs (~torch.autograd.Variable): A variable with a batch of images.
            bboxes (~torch.autograd.Variable): A batch of bounding boxes.
                Its shape is :math:`(N, R, 4)`.
            labels (~torch.autograd..Variable): A batch of labels.
                Its shape is :math:`(N, R)`. The background is excluded from
                the definition, which means that the range of the value
                is :math:`[0, L - 1]`. :math:`L` is the number of foreground
                classes.
            scale (float): Amount of scaling applied to
                the raw image during preprocessing.

        Returns:
            namedtuple of 5 losses
        """
        n = bboxes.shape[0]
        if n != 1:
            raise ValueError('Currently only batch size 1 is supported.')

        _, _, H, W = imgs.shape
        img_size = (H, W)

        features = self.faster_rcnn.extractor(imgs)

        rpn_locs, rpn_scores, rois, roi_indices, anchor = \
            self.faster_rcnn.rpn(features, img_size, scale)

        # Since batch size is one, convert variables to singular form
        bbox = bboxes[0]
        label = labels[0]
        rpn_score = rpn_scores[0]
        rpn_loc = rpn_locs[0]
        roi = rois

        # Sample RoIs and forward
        # it's fine to break the computation graph of rois,
        # consider them as constant input
        sample_roi, gt_roi_loc, gt_roi_label = self.proposal_target_creator(
            roi, at.tonumpy(bbox), at.tonumpy(label), self.loc_normalize_mean,
            self.loc_normalize_std)
        # NOTE it's all zero because now it only support for batch=1 now
        sample_roi_index = t.zeros(len(sample_roi))
        roi_cls_loc, roi_score = self.faster_rcnn.head(features, sample_roi,
                                                       sample_roi_index)

        # ------------------ RPN losses -------------------#
        gt_rpn_loc, gt_rpn_label = self.anchor_target_creator(
            at.tonumpy(bbox), at.tonumpy(anchor), img_size)
        gt_rpn_label = at.totensor(gt_rpn_label).long()
        gt_rpn_loc = at.totensor(gt_rpn_loc)
        rpn_loc_loss = _fast_rcnn_loc_loss(rpn_loc, gt_rpn_loc,
                                           gt_rpn_label.data, self.rpn_sigma)

        # NOTE: default value of ignore_index is -100 ...
        rpn_cls_loss = F.cross_entropy(rpn_score,
                                       gt_rpn_label.cuda(),
                                       ignore_index=-1)
        _gt_rpn_label = gt_rpn_label[gt_rpn_label > -1]
        _rpn_score = at.tonumpy(rpn_score)[at.tonumpy(gt_rpn_label) > -1]
        self.rpn_cm.add(at.totensor(_rpn_score, False),
                        _gt_rpn_label.data.long())

        # ------------------ ROI losses (fast rcnn loss) -------------------#
        n_sample = roi_cls_loc.shape[0]
        roi_cls_loc = roi_cls_loc.view(n_sample, -1, 4)
        roi_loc = roi_cls_loc[t.arange(0, n_sample).long().cuda(), \
                              at.totensor(gt_roi_label).long()]
        gt_roi_label = at.totensor(gt_roi_label).long()
        gt_roi_loc = at.totensor(gt_roi_loc)

        roi_loc_loss = _fast_rcnn_loc_loss(roi_loc.contiguous(), gt_roi_loc,
                                           gt_roi_label.data, self.roi_sigma)

        roi_cls_loss = nn.CrossEntropyLoss()(roi_score, gt_roi_label.cuda())

        self.roi_cm.add(at.totensor(roi_score, False),
                        gt_roi_label.data.long())

        losses = [rpn_loc_loss, rpn_cls_loss, roi_loc_loss, roi_cls_loss]
        losses = losses + [sum(losses)]

        return LossTuple(*losses)

    def train_step(self, imgs, bboxes, labels, scale):
        self.optimizer.zero_grad()
        losses = self.forward(imgs, bboxes, labels, scale)
        losses.total_loss.backward()
        self.optimizer.step()
        self.update_meters(losses)
        return losses

    def save(self, save_optimizer=False, save_path=None, **kwargs):
        """serialize models include optimizer and other info
        return path where the model-file is stored.

        Args:
            save_optimizer (bool): whether save optimizer.state_dict().
            save_path (string): where to save model, if it's None, save_path
                is generate using time str and info from kwargs.
        
        Returns:
            save_path(str): the path to save models.
        """
        save_dict = dict()

        save_dict['model'] = self.faster_rcnn.state_dict()
        save_dict['config'] = opt._state_dict()
        save_dict['other_info'] = kwargs
        save_dict['vis_info'] = self.vis.state_dict()

        if save_optimizer:
            save_dict['optimizer'] = self.optimizer.state_dict()

        if save_path is None:
            timestr = time.strftime('%m%d%H%M')
            save_path = 'checkpoints/fasterrcnn_%s' % timestr
            for k_, v_ in kwargs.items():
                save_path += '_%s' % v_

        save_dir = os.path.dirname(save_path)
        if not os.path.exists(save_dir):
            os.makedirs(save_dir)

        t.save(save_dict, save_path)
        self.vis.save([self.vis.env])
        return save_path

    def load(
        self,
        path,
        load_optimizer=True,
        parse_opt=False,
    ):
        state_dict = t.load(path)
        if 'model' in state_dict:
            self.faster_rcnn.load_state_dict(state_dict['model'])
        else:  # legacy way, for backward compatibility
            self.faster_rcnn.load_state_dict(state_dict)
            return self
        if parse_opt:
            opt._parse(state_dict['config'])
        if 'optimizer' in state_dict and load_optimizer:
            self.optimizer.load_state_dict(state_dict['optimizer'])
        return self

    def update_meters(self, losses):
        loss_d = {k: at.scalar(v) for k, v in losses._asdict().items()}
        for key, meter in self.meters.items():
            meter.add(loss_d[key])

    def reset_meters(self):
        for key, meter in self.meters.items():
            meter.reset()
        self.roi_cm.reset()
        self.rpn_cm.reset()

    def get_meter_data(self):
        return {k: v.value()[0] for k, v in self.meters.items()}
Exemple #4
0
class FasterRCNNTrainer(nn.Module):
    """wrapper for conveniently training. return losses

    The losses include:

    * :obj:`rpn_loc_loss`: The localization loss for \
        Region Proposal Network (RPN).
    * :obj:`rpn_cls_loss`: The classification loss for RPN.
    * :obj:`roi_loc_loss`: The localization loss for the head module.
    * :obj:`roi_cls_loss`: The classification loss for the head module.
    * :obj:`total_loss`: The sum of 4 loss above.

    Args:
        faster_rcnn (model.FasterRCNN):
            A Faster R-CNN model that is going to be trained.
    """
    def __init__(self, faster_rcnn):
        super(FasterRCNNTrainer, self).__init__()

        self.faster_rcnn = faster_rcnn
        self.rpn_sigma = opt.rpn_sigma
        self.roi_sigma = opt.roi_sigma

        # target creator create gt_bbox gt_label etc as training targets.
        self.anchor_target_creator = AnchorTargetCreator()
        self.proposal_target_creator = ProposalTargetCreator()

        self.loc_normalize_mean = faster_rcnn.loc_normalize_mean
        self.loc_normalize_std = faster_rcnn.loc_normalize_std

        self.optimizer = self.faster_rcnn.get_optimizer()
        # visdom wrapper
        self.vis = Visualizer(env=opt.env)

        # indicators for training status
        self.rpn_cm = ConfusionMeter(2)
        self.roi_cm = ConfusionMeter(21)
        self.meters = {k: AverageValueMeter()
                       for k in LossTuple._fields}  # average loss

    # pic6.png
    # 整幅图片描述在求损失之前训练过程经历了什么!不准确的说是一个伪正向传播的过程,为啥说是伪正向传播呢,因为过程中调用了proposal_target_creator(),
    # 而这个函数的作用其实是为了训练ROI_Header网络而提供所谓的128张sample_roi以及它的ground_truth的位置和label用的!所以它的根本目的是为了训练网络,在测试的时候是用不到的!
    # 流程图中红色圆框代表的是网络运行过程中产生的参数,而蓝色框代表的是网络定义的时候就有的参数!仔细看整个流程图,网络的运作结构就一目了然了!下面解释下代码:
    # n= bboxes.shape[0]首先获取batch个数,如果不等于就报错,因为本程序只支持batch_size=1,接着读取图片的高和宽,这里解释下,不论图片还是bbox,它们的数据格式都是形如n,c,hh,ww这种,所以H,W就可以获取到图片的尺寸,
    # 紧接着用self.faster_rcnn.extractor(imgs)提取图片的特征,然后放到rpn网络里面self.faster_rcnn.rpn(feature,img_size,scale)提取出rpn_locs,rpn_scores,rois,roi_indices,anchor来,
    # 下一步就是经过proposal_target_creator网络产生采样过后的sample_roi,以及其对应的gt_cls_loc和gt_score,最后经过head网络,完成整个的预测过程!流程图中的结构是一模一样的!
    # 但是这个文件之所以叫trainer就是因为不仅仅有正向的运作过程,肯定还有反向的传播,包括了损失计算等等,没错,接下来我们看下面的损失计算部分的流程图
    # pic7.png
    # 如上图所示,其实剩下的代码就是计算了两部分的损失,一个是RPN_losses,一个是ROI_Losses,为啥要这样做呢?大家考虑一下,这个Faster-rcnn的网络,哪些地方应用到了网络呢?一个是提取proposal的过程,
    # 在faster-rcnn里创造性的提出了anchor,用网络来产生proposals,所以rpn_losses就是为了计算这部分的损失,从而使用梯度下降的办法来提升提取prososal的网络的性能,另一个使用到网络的地方就是ROI_header,
    # 没错就是在利用特征图和ROIs来预测目标检测的类别以及位置的偏移量的时候再一次使用到了网络,那这部分预测网络的性能如何保证呢?ROI_losses就是计算这部分的损失函数,从而用梯度下降的办法来继续提升网络的性能
    # 这样一来,这两部分的网络的损失都记算出来了!forward函数也就介绍完了!这个地方需要特别注意的一点就是rpn_cm和roi_cm这两个对象应该是Confusion matrix也就是混淆矩阵啦,作用就是用于后续的数据可视化
    def forward(self, imgs, bboxes, labels, scale):
        """Forward Faster R-CNN and calculate losses.

        Here are notations used.

        * :math:`N` is the batch size.
        * :math:`R` is the number of bounding boxes per image.

        Currently, only :math:`N=1` is supported.

        Args:
            imgs (~torch.autograd.Variable): A variable with a batch of images.
            bboxes (~torch.autograd.Variable): A batch of bounding boxes.
                Its shape is :math:`(N, R, 4)`.
            labels (~torch.autograd..Variable): A batch of labels.
                Its shape is :math:`(N, R)`. The background is excluded from
                the definition, which means that the range of the value
                is :math:`[0, L - 1]`. :math:`L` is the number of foreground
                classes.
            scale (float): Amount of scaling applied to
                the raw image during preprocessing.

        Returns:
            namedtuple of 5 losses
        """
        n = bboxes.shape[0]
        if n != 1:
            raise ValueError('Currently only batch size 1 is supported.')

        _, _, H, W = imgs.shape
        img_size = (H, W)

        features = self.faster_rcnn.extractor(imgs)

        rpn_locs, rpn_scores, rois, roi_indices, anchor = \
            self.faster_rcnn.rpn(features, img_size, scale)

        # Since batch size is one, convert variables to singular form
        bbox = bboxes[0]
        label = labels[0]
        rpn_score = rpn_scores[0]
        rpn_loc = rpn_locs[0]
        roi = rois

        # Sample RoIs and forward
        # it's fine to break the computation graph of rois,
        # consider them as constant input
        sample_roi, gt_roi_loc, gt_roi_label = self.proposal_target_creator(
            roi, at.tonumpy(bbox), at.tonumpy(label), self.loc_normalize_mean,
            self.loc_normalize_std)
        # NOTE it's all zero because now it only support for batch=1 now
        sample_roi_index = t.zeros(len(sample_roi))
        roi_cls_loc, roi_score = self.faster_rcnn.head(features, sample_roi,
                                                       sample_roi_index)

        # ------------------ RPN losses -------------------#
        gt_rpn_loc, gt_rpn_label = self.anchor_target_creator(
            at.tonumpy(bbox), anchor, img_size)
        gt_rpn_label = at.totensor(gt_rpn_label).long()
        gt_rpn_loc = at.totensor(gt_rpn_loc)
        rpn_loc_loss = _fast_rcnn_loc_loss(rpn_loc, gt_rpn_loc,
                                           gt_rpn_label.data, self.rpn_sigma)

        # NOTE: default value of ignore_index is -100 ...
        rpn_cls_loss = F.cross_entropy(rpn_score,
                                       gt_rpn_label.cuda(),
                                       ignore_index=-1)
        _gt_rpn_label = gt_rpn_label[gt_rpn_label > -1]
        _rpn_score = at.tonumpy(rpn_score)[at.tonumpy(gt_rpn_label) > -1]
        self.rpn_cm.add(at.totensor(_rpn_score, False),
                        _gt_rpn_label.data.long())

        # ------------------ ROI losses (fast rcnn loss) -------------------#
        n_sample = roi_cls_loc.shape[0]
        roi_cls_loc = roi_cls_loc.view(n_sample, -1, 4)
        roi_loc = roi_cls_loc[t.arange(0, n_sample).long().cuda(), \
                              at.totensor(gt_roi_label).long()]
        gt_roi_label = at.totensor(gt_roi_label).long()
        gt_roi_loc = at.totensor(gt_roi_loc)

        roi_loc_loss = _fast_rcnn_loc_loss(roi_loc.contiguous(), gt_roi_loc,
                                           gt_roi_label.data, self.roi_sigma)

        roi_cls_loss = nn.CrossEntropyLoss()(roi_score, gt_roi_label.cuda())

        self.roi_cm.add(at.totensor(roi_score, False),
                        gt_roi_label.data.long())

        losses = [rpn_loc_loss, rpn_cls_loss, roi_loc_loss, roi_cls_loss]
        losses = losses + [sum(losses)]

        return LossTuple(*losses)

    # 整个函数实际上就是进行了一次参数的优化过程,首先self.optimizer.zero_grad()将梯度数据全部清零,然后利用刚刚介绍的self.forward(imgs,bboxes,labels,scales)函数将所有的损失计算出来,
    # 接着进行依次losses.total_loss.backward()反向传播计算梯度,self.optimizer.step()进行一次参数更新过程,self.update_meters(losses)就是将所有损失的数据更新到可视化界面上,最后将losses返回!
    def train_step(self, imgs, bboxes, labels, scale):
        self.optimizer.zero_grad()
        losses = self.forward(imgs, bboxes, labels, scale)
        losses.total_loss.backward()
        self.optimizer.step()
        self.update_meters(losses)
        return losses

    def save(self, save_optimizer=False, save_path=None, **kwargs):
        """serialize models include optimizer and other info
        return path where the model-file is stored.

        Args:
            save_optimizer (bool): whether save optimizer.state_dict().
            save_path (string): where to save model, if it's None, save_path
                is generate using time str and info from kwargs.
        
        Returns:
            save_path(str): the path to save models.
        """
        save_dict = dict()

        save_dict['model'] = self.faster_rcnn.state_dict()
        save_dict['config'] = opt._state_dict()
        save_dict['other_info'] = kwargs
        save_dict['vis_info'] = self.vis.state_dict()

        if save_optimizer:
            save_dict['optimizer'] = self.optimizer.state_dict()

        if save_path is None:
            timestr = time.strftime('%m%d%H%M')
            save_path = 'checkpoints/fasterrcnn_%s' % timestr
            for k_, v_ in kwargs.items():
                save_path += '_%s' % v_

        save_dir = os.path.dirname(save_path)
        if not os.path.exists(save_dir):
            os.makedirs(save_dir)

        t.save(save_dict, save_path)
        self.vis.save([self.vis.env])
        return save_path

    def load(
        self,
        path,
        load_optimizer=True,
        parse_opt=False,
    ):
        state_dict = t.load(path)
        if 'model' in state_dict:
            self.faster_rcnn.load_state_dict(state_dict['model'])
        else:  # legacy way, for backward compatibility
            self.faster_rcnn.load_state_dict(state_dict)
            return self
        if parse_opt:
            opt._parse(state_dict['config'])
        if 'optimizer' in state_dict and load_optimizer:
            self.optimizer.load_state_dict(state_dict['optimizer'])
        return self

    def update_meters(self, losses):
        loss_d = {k: at.scalar(v) for k, v in losses._asdict().items()}
        for key, meter in self.meters.items():
            meter.add(loss_d[key])

    def reset_meters(self):
        for key, meter in self.meters.items():
            meter.reset()
        self.roi_cm.reset()
        self.rpn_cm.reset()

    def get_meter_data(self):
        return {k: v.value()[0] for k, v in self.meters.items()}
Exemple #5
0
class FasterRCNNTrainer(nn.Module):
    """把训练过程写入类里面,方便训练"""
    def __init__(self, faster_rcnn):
        """faster_rcnn是继承了faster rcnn基类的子网络"""
        super(FasterRCNNTrainer, self).__init__()
        self.faster_rcnn = faster_rcnn

        # 锚点框相对于真实框的真实偏移量和前景背景标签
        self.anchor_target_creator = AnchorTargetCreator()
        # 候选框相对于真实框的真实偏移量和类别标签
        self.proposal_target_creator = ProposalTargetCreator()

        # 位置估计的均值和标准差
        self.loc_normalize_mean = self.faster_rcnn.loc_normalize_mean
        self.loc_normalize_std = self.faster_rcnn.loc_normalize_std

        # 优化器
        self.optimizer = self.faster_rcnn.get_optimizer()

        # 损失计算的超参数
        self.rpn_sigma = OPT.rpn_sigma
        self.roi_sigma = OPT.roi_sigma

        # 训练过程中的一些评估指标
        # rpn过程的评估指标--混淆矩阵
        self.rpn_cm = ConfusionMeter(2)  # 只有前景和背景两类
        # fast rcnn过程的评估指标--混淆矩阵
        self.roi_cm = ConfusionMeter(OPT.n_fg_class + 1)  # 前景类别数+背景类
        # 损失函数--average loss
        # 每个损失函数都运用一个averagevaluemeter进行求平均
        self.meters = {k: AverageValueMeter() for k in LossTuple._fields}

    def forward(self, imgs, bboxes, labels, scale):
        """前向传播过程计算损失
        参数:
            imgs: [N, C, H, W]
            bboxes: [N, R, 4]
            labels: [N, R]
            scale: 单个值就可以
        返回:5个损失"""
        num_batch = bboxes.shape[0]
        if num_batch != 1:
            raise ValueError("仅支持batch_size=1")

        # 得到图片的尺寸H, W
        _, _, H, W = imgs.shape
        img_size = (H, W)
        # 得到特征图
        features = self.faster_rcnn.extractor(imgs)
        # 进入rpn网络, 输出预测的锚点框预测偏移量和得分
        rpn_locs, rpn_scores, rois, roi_indices, anchor = self.faster_rcnn.rpn(
            features, img_size, scale)
        # 由于batch size为1,所以取其中的元素为:
        bbox = bboxes[0]
        label = labels[0]
        rpn_loc = rpn_locs[0]
        rpn_score = rpn_scores[0]
        roi = rois

        # 产生锚点框的真实偏移量和标签
        gt_rpn_loc, gt_rpn_label = self.anchor_target_creator(
            bbox=tonumpy(data=bbox), anchor=anchor, img_size=img_size)

        # 产生候选框的真实偏移量和标签
        sample_roi, gt_roi_loc, gt_roi_label = self.proposal_target_creator(
            roi=roi,
            bbox=tonumpy(bbox),
            label=tonumpy(label),
            loc_normalize_mean=self.loc_normalize_mean,
            loc_normalize_std=self.loc_normalize_std)
        # 由于batch_size=1,所以sample_roi_indice都为0
        sample_roi_index = torch.zeros(len(sample_roi))
        # 产生由候选框产生的预测框的偏移量和得分
        roi_cls_loc, roi_score = self.faster_rcnn.head(
            x=features, rois=sample_roi, roi_indices=sample_roi_index)

        # ------------------------rpn loss----------------------------------#
        gt_rpn_label = totensor(data=gt_rpn_label).long()
        gt_rpn_loc = totensor(data=gt_rpn_loc)
        rpn_loc_loss = _faster_rcnn_loc_loss(pred_loc=rpn_loc,
                                             gt_loc=gt_rpn_loc,
                                             gt_label=gt_rpn_label.data,
                                             sigma=self.rpn_sigma)
        rpn_cls_loss = F.cross_entropy(input=rpn_score,
                                       target=gt_rpn_label.cuda(),
                                       ignore_index=-1)
        # 除了标签为-1之外的真实标签
        _gt_rpn_label = gt_rpn_label[gt_rpn_label > -1]
        _rpn_score = tonumpy(data=rpn_score)[tonumpy(data=gt_rpn_label) > -1]
        self.rpn_cm.add(predicted=totensor(data=_rpn_score, cuda=False),
                        target=_gt_rpn_label.data.long())

        # ---------------------roi loss---------------------------------------#
        n_sample = roi_cls_loc.shape[0]
        roi_cls_loc = roi_cls_loc.view(n_sample, -1, 4)
        # 取出gt_roi_label对应的预测框的预测偏移量
        roi_loc = roi_cls_loc[torch.arange(0, n_sample),
                              totensor(data=gt_roi_label).long()]
        gt_roi_loc = totensor(data=gt_roi_loc)
        gt_roi_label = totensor(data=gt_roi_label).long()
        roi_loc_loss = _faster_rcnn_loc_loss(pred_loc=roi_loc.contiguous(),
                                             gt_loc=gt_roi_loc,
                                             gt_label=gt_roi_label.data,
                                             sigma=self.roi_sigma)
        roi_cls_loss = nn.CrossEntropyLoss()(roi_score, gt_roi_label.cuda())
        self.roi_cm.add(predicted=totensor(roi_score, False),
                        target=gt_roi_label.data.long())
        losses = [rpn_loc_loss, rpn_cls_loss, roi_loc_loss, roi_cls_loss]
        losses = losses + [sum(losses)]
        return LossTuple(*losses)

    def train_step(self, imgs, bboxes, labels, scale):
        """训练过程"""
        # 梯度清零
        self.optimizer.zero_grad()
        # 得到损失
        losses = self.forward(imgs, bboxes, labels, scale)
        # 总损失反向传播
        losses.total_loss.backward()
        # 更新梯度
        self.optimizer.step()
        # 累加各个损失函数
        self.update_meters(losses)
        return losses  # 返回损失

    def val_step(self, imgs, sizes, bboxes, labels):
        """验证过程"""
        self.optimizer.zero_grad()
        scale = imgs.shape[2] / (sizes[0].item())
        with torch.no_grad():
            losses = self.forward(imgs, bboxes, labels, scale)
            self.update_meters(losses)
        return losses

    def update_meters(self, losses):
        """对各个损失分别求均值"""
        # 由于train_step返回的是nametuple形式的损失,所以要先变成字典
        loss_dict = {k: scalar(v) for k, v in losses._asdict().items()}
        # 分别遍历每种损失,求其均值
        for key, meter in self.meters.items():
            meter.add(loss_dict[key])

    def reset_meters(self):
        # 将损失值清零,用在一个epoch之后
        for key, meter in self.meters.items():
            meter.reset()
        self.rpn_cm.reset()
        self.roi_cm.reset()

    def get_meter_data(self):
        # 获取损失值
        return {k: v.value()[0] for k, v in self.meters.items()}

    def save(self, save_optimizer=False, save_path=None, **kwargs):
        """保存模型,并返回模型保存的路径"""
        save_dict = dict()  # 要存储的信息

        # 模型权重和偏置参数
        save_dict["model"] = self.faster_rcnn.state_dict()
        # 配置文件
        save_dict["config"] = OPT._state_dict()
        # 其他信息,如果写其他信息的话则保存
        save_dict["other_info"] = kwargs

        if save_optimizer:
            # 如果要保存优化器的参数信息,则把其加入save_dict中
            save_dict["optimizer"] = self.optimizer.state_dict()
        # 如果保存路径为None,则由时间戳自动生成
        if save_path is None:
            timestr = time.strftime("%m%d%H%M")
            save_path = "checkpoints/fasterrcnn_%s" % timestr
            for k_, v_ in kwargs.items():
                save_path += "_%s" % v_
        # 存储路径
        save_dir = os.path.dirname(save_path)
        if not os.path.exists(save_dir):
            os.makedirs(save_dir)
        # 进行存储
        torch.save(save_dict, save_path)
        return save_path

    def load(self, path, load_optimizer=True, parse_opt=False):
        """加载模型优化器参数之类的"""
        state_dict = torch.load(path)
        if "model" in state_dict:
            self.faster_rcnn.load_state_dict(state_dict["model"])
        else:
            self.faster_rcnn.load_state_dict(state_dict)
            return self
        # 如果加载配置文件
        if parse_opt:
            OPT._parse(state_dict["config"])
        # 如果加载优化器
        if "optimizer" in state_dict and load_optimizer:
            self.optimizer.load_state_dict(state_dict["optimizer"])
        return self
Exemple #6
0
def train(
        main_options: MainOptions,
        train_options: TrainOptions
) -> None:
    assert train_options.dim == 2 or train_options.dim == 3, \
        "Only 2D is supported at the moment " \
        "for data loading and observation / transition. " \
        "See torchvision.datasets.ImageFolder"

    output_dir = train_options.output_dir

    model_dir = "models"
    if not exists(join(output_dir, model_dir)):
        mkdir(join(output_dir, model_dir))
    if exists(join(output_dir, model_dir)) \
            and not isdir(join(output_dir, model_dir)):
        raise Exception(f"\"{join(output_dir, model_dir)}\""
                        f"is not a directory.")

    exp_name = "MARLClassification"
    mlflow.set_experiment(exp_name)

    mlflow.start_run(run_name=f"train_{main_options.run_id}")

    mlflow.log_param("output_dir", output_dir)
    mlflow.log_param("model_dir", join(output_dir, model_dir))

    img_pipeline = tr.Compose([
        tr.ToTensor(),
        custom_tr.NormalNorm()
    ])

    if train_options.ft_extr_str.startswith("resisc"):
        dataset_constructor = RESISC45Dataset
    elif train_options.ft_extr_str.startswith("mnist"):
        dataset_constructor = MNISTDataset
    else:
        dataset_constructor = KneeMRIDataset

    nn_models = ModelsWrapper(
        train_options.ft_extr_str,
        train_options.window_size,
        train_options.hidden_size_belief,
        train_options.hidden_size_action,
        train_options.hidden_size_msg,
        train_options.hidden_size_state,
        train_options.dim,
        train_options.action,
        train_options.nb_class,
        train_options.hidden_size_linear_belief,
        train_options.hidden_size_linear_action
    )

    dataset = dataset_constructor(img_pipeline)

    marl_m = MultiAgent(
        main_options.nb_agent,
        nn_models,
        train_options.hidden_size_belief,
        train_options.hidden_size_action,
        train_options.window_size,
        train_options.hidden_size_msg,
        train_options.action,
        obs_generic,
        trans_generic
    )

    mlflow.log_params({
        "ft_extractor": train_options.ft_extr_str,
        "window_size": train_options.window_size,
        "hidden_size_belief": train_options.hidden_size_belief,
        "hidden_size_action": train_options.hidden_size_action,
        "hidden_size_msg": train_options.hidden_size_msg,
        "hidden_size_state": train_options.hidden_size_state,
        "dim": train_options.dim,
        "action": train_options.action,
        "nb_class": train_options.nb_class,
        "hidden_size_linear_belief":
            train_options.hidden_size_linear_belief,
        "hidden_size_linear_action":
            train_options.hidden_size_linear_action,
        "nb_agent": main_options.nb_agent,
        "frozen_modules": train_options.frozen_modules,
        "epsilon": train_options.epsilon,
        "epsilon_decay": train_options.epsilon_decay,
        "nb_epoch": train_options.nb_epoch,
        "learning_rate": train_options.learning_rate,
        "img_size": train_options.img_size,
        "retry_number": train_options.retry_number,
        "step": main_options.step,
        "batch_size": train_options.batch_size
    })

    json_f = open(join(output_dir, "class_to_idx.json"), "w")
    json.dump(dataset.class_to_idx, json_f)
    json_f.close()
    mlflow.log_artifact(join(output_dir, "class_to_idx.json"))

    cuda = main_options.cuda
    device_str = "cpu"

    # Pass pytorch stuff to GPU
    # for agents hidden tensors (belief etc.)
    if cuda:
        nn_models.cuda()
        marl_m.cuda()
        device_str = "cuda"

    mlflow.log_param("device", device_str)

    module_to_train = ModelsWrapper.module_list \
        .difference(train_options.frozen_modules)

    # for RL agent models parameters
    optim = th.optim.Adam(
        nn_models.get_params(list(module_to_train)),
        lr=train_options.learning_rate
    )

    idx = th.randperm(len(dataset))
    idx_train = idx[:int(0.85 * idx.size(0))]
    idx_test = idx[int(0.85 * idx.size(0)):]

    train_dataset = Subset(dataset, idx_train)
    test_dataset = Subset(dataset, idx_test)

    train_dataloader = DataLoader(
        train_dataset, batch_size=train_options.batch_size,
        shuffle=True, num_workers=3, drop_last=False
    )

    test_dataloader = DataLoader(
        test_dataset, batch_size=train_options.batch_size,
        shuffle=True, num_workers=3, drop_last=False
    )

    epsilon = train_options.epsilon

    curr_step = 0

    for e in range(train_options.nb_epoch):
        nn_models.train()

        sum_loss = 0.
        i = 0

        conf_meter = ConfusionMeter(train_options.nb_class)

        tqdm_bar = tqdm(train_dataloader)
        for x_train, y_train in tqdm_bar:
            x_train, y_train = x_train.to(th.device(device_str)), \
                               y_train.to(th.device(device_str))

            # pred = [Nr, Ns, Nb, Nc]
            # prob = [Nr, Ns, Nb]
            retry_pred, retry_prob = episode_retry(
                marl_m, x_train, epsilon,
                main_options.step,
                train_options.retry_number,
                train_options.nb_class, device_str
            )

            # Class one hot encoding
            y_eye = th.eye(
                train_options.nb_class,
                device=th.device(device_str)
            )[y_train.unsqueeze(0)].unsqueeze(1).repeat(
                1, main_options.step, 1, 1)

            # Update confusion meter
            # mean between trials
            conf_meter.add(
                retry_pred.detach()[:, -1, :, :].mean(dim=0),
                y_train
            )

            # L2 Loss - Classification error / reward
            # reward = -error(y_true, y_step_pred).mean(class_dim)
            r = -th.pow(y_eye - retry_pred, 2.).mean(dim=-1)

            # Compute loss
            losses = retry_prob * r.detach() + r

            # Losses mean on images batch and trials
            # maximize(E[reward]) -> minimize(-E[reward])
            loss = -losses.mean()

            # Reset gradient
            optim.zero_grad()

            # Backward on compute graph
            loss.backward()

            # Update weights
            optim.step()

            # Update epoch loss sum
            sum_loss += loss.item()

            # Compute global score
            precs, recs = prec_rec(conf_meter)

            if curr_step % 100 == 0:
                mlflow.log_metrics(
                    {"loss": loss.item(),
                     "train_prec": precs.mean().item(),
                     "train_rec": recs.mean().item(),
                     "epsilon": epsilon},
                    step=curr_step
                )

            tqdm_bar.set_description(
                f"Epoch {e} - Train, "
                f"loss = {sum_loss / (i + 1):.4f}, "
                f"eps = {epsilon:.4f}, "
                f"train_prec = {precs.mean():.3f}, "
                f"train_rec = {recs.mean():.3f}"
            )

            epsilon *= train_options.epsilon_decay
            epsilon = max(epsilon, 0.)

            i += 1
            curr_step += 1

        sum_loss /= len(train_dataloader)

        save_conf_matrix(conf_meter, e, output_dir, "train")

        mlflow.log_artifact(
            join(output_dir, f"confusion_matrix_epoch_{e}_train.png")
        )

        nn_models.eval()
        conf_meter.reset()

        with th.no_grad():
            tqdm_bar = tqdm(test_dataloader)
            for x_test, y_test in tqdm_bar:
                x_test, y_test = x_test.to(th.device(device_str)), \
                                 y_test.to(th.device(device_str))

                preds, _ = episode(marl_m, x_test, 0., main_options.step)

                conf_meter.add(preds.detach(), y_test)

                # Compute score
                precs, recs = prec_rec(conf_meter)

                tqdm_bar.set_description(
                    f"Epoch {e} - Eval, "
                    f"eval_prec = {precs.mean():.4f}, "
                    f"eval_rec = {recs.mean():.4f}"
                )

        # Compute score
        precs, recs = prec_rec(conf_meter)

        save_conf_matrix(conf_meter, e, output_dir, "eval")

        mlflow.log_metrics(
            {"eval_prec": precs.mean(),
             "eval_recs": recs.mean()},
            step=curr_step
        )

        nn_models.json_args(
            join(output_dir,
                 model_dir,
                 f"marl_epoch_{e}.json")
        )
        th.save(
            nn_models.state_dict(),
            join(output_dir, model_dir,
                 f"nn_models_epoch_{e}.pt")
        )

        mlflow.log_artifact(
            join(output_dir,
                 model_dir,
                 f"marl_epoch_{e}.json")
        )
        mlflow.log_artifact(
            join(output_dir, model_dir,
                 f"nn_models_epoch_{e}.pt")
        )
        mlflow.log_artifact(
            join(output_dir,
                 f"confusion_matrix_epoch_{e}_eval.png")
        )

    empty_pipe = tr.Compose([
        tr.ToTensor()
    ])

    dataset_tmp = dataset_constructor(empty_pipe)

    test_dataloader_ori = Subset(dataset_tmp, idx_test)
    test_dataloader = Subset(dataset, idx_test)

    test_idx = randint(0, len(test_dataloader_ori))

    visualize_steps(
        marl_m, test_dataloader[test_idx][0],
        test_dataloader_ori[test_idx][0],
        main_options.step, train_options.window_size,
        output_dir, train_options.nb_class, device_str,
        dataset.class_to_idx
    )

    mlflow.end_run()
Exemple #7
0
class FasterRCNNTrainer(nn.Module):
    """
    方便FasterRCNN训练,输入图像imgs、标签labels、bboxes标定框和原始缩放尺度scale,输出对应的losses
    总体的losses包含rpn_loc_loss、rpn_cls_loss、roi_loc_loss、roi_cls_loss
    rpn_loc_loss:rpn区域建议网络的定位loss(前景定位微调loss)
    rpn_cls_loss:rpn区域建议网络的分类loss(前景和背景的类无关分类loss)
    roi_los_loss:ROIHead模块roi的定位loss(每一类相关的定位loss)
    roi_cls_loss:ROIHead模块roi的分类loss(每一类的分类loss)

    输入:
        faster_rcnn:FasterRCNN
    输出:
        total_loss:total_loss=rpn_loc_loss+rpn_cls_loss+roi_loc_loss_roi_cls_loss
    """
    def __init__(self, faster_rcnn):
        """
        :type faster_rcnn: FasterRCNN
        """
        super(FasterRCNNTrainer, self).__init__()

        self.faster_rcnn = faster_rcnn  # faster_rcnn模块用来进行目标检测

        # 设置rpn和roi的sigma参数
        self.rpn_sigma = faster_rcnn_config.rpn_sigma
        self.roi_sigma = faster_rcnn_config.roi_sigma

        # target creator create gt_bbox gt_label etc as training targets.
        self.anchor_target_creator = AnchorTargetCreator()
        self.proposal_target_creator = ProposalTargetCreator()

        self.loc_normalize_mean = faster_rcnn.loc_normalize_mean
        self.loc_normalize_std = faster_rcnn.loc_normalize_std

        self.optimizer = self.faster_rcnn.get_optimizer()

        # indicators for training status
        self.rpn_cm = ConfusionMeter(2)
        self.roi_cm = ConfusionMeter(21)
        self.meters = {k: AverageValueMeter()
                       for k in LossTuple._fields}  # average loss

    def forward(self, imgs, bboxes, labels, scale):
        """Forward Faster R-CNN and calculate losses.

        Here are notations used.

        * :math:`N` is the batch size.
        * :math:`R` is the number of bounding boxes per image.

        Currently, only :math:`N=1` is supported.

        Args:
            imgs (~torch.autograd.Variable): A variable with a batch of images.
            bboxes (~torch.autograd.Variable): A batch of bounding boxes.
                Its shape is :math:`(N, R, 4)`.
            labels (~torch.autograd..Variable): A batch of labels.
                Its shape is :math:`(N, R)`. The background is excluded from
                the definition, which means that the range of the value
                is :math:`[0, L - 1]`. :math:`L` is the number of foreground
                classes.
            scale (float): Amount of scaling applied to
                the raw image during preprocessing.

        Returns:
            namedtuple of 5 losses
        """
        n = bboxes.shape[0]
        if n != 1:
            raise ValueError('Currently only batch size 1 is supported.')

        _, _, H, W = imgs.shape
        img_size = (H, W)

        features = self.faster_rcnn.extractor(imgs)

        rpn_locs, rpn_scores, rois, roi_indices, anchor = \
            self.faster_rcnn.rpn(features, img_size, scale)

        # Since batch size is one, convert variables to singular form
        bbox = bboxes[0]
        label = labels[0]
        rpn_score = rpn_scores[0]
        rpn_loc = rpn_locs[0]
        roi = rois

        # Sample RoIs and forward
        # it's fine to break the computation graph of rois,
        # consider them as constant input
        sample_roi, gt_roi_loc, gt_roi_label = self.proposal_target_creator(
            roi, bbox, label, self.loc_normalize_mean, self.loc_normalize_std)
        # NOTE it's all zero because now it only support for batch=1 now
        sample_roi_index = torch.zeros(len(sample_roi))
        roi_cls_loc, roi_score = self.faster_rcnn.head(features, sample_roi,
                                                       sample_roi_index)

        # ------------------ RPN losses -------------------#
        gt_rpn_loc, gt_rpn_label = self.anchor_target_creator(
            bbox, anchor, img_size)
        gt_rpn_label = Variable(gt_rpn_label).long()
        gt_rpn_loc = Variable(gt_rpn_loc)
        rpn_loc_loss = fast_rcnn_loc_loss(rpn_loc, gt_rpn_loc,
                                          gt_rpn_label.data, self.rpn_sigma)

        # NOTE: default value of ignore_index is -100 ...
        rpn_cls_loss = F.cross_entropy(rpn_score,
                                       gt_rpn_label,
                                       ignore_index=-1)
        _gt_rpn_label = gt_rpn_label[gt_rpn_label > -1]
        _rpn_score = rpn_score[gt_rpn_label > -1]
        self.rpn_cm.add(_rpn_score, _gt_rpn_label.data.long())

        # ------------------ ROI losses (fast rcnn loss) -------------------#
        n_sample = roi_cls_loc.shape[0]
        roi_cls_loc = roi_cls_loc.view(n_sample, -1, 4)
        roi_loc = roi_cls_loc[torch.arange(0, n_sample).long(),
                              gt_roi_label.long()]
        gt_roi_label = Variable(gt_roi_label).long()
        gt_roi_loc = Variable(gt_roi_loc)

        roi_loc_loss = fast_rcnn_loc_loss(roi_loc.contiguous(), gt_roi_loc,
                                          gt_roi_label.data, self.roi_sigma)

        roi_cls_loss = nn.CrossEntropyLoss()(roi_score, gt_roi_label)

        self.roi_cm.add(roi_score, gt_roi_label.data.long())

        losses = [rpn_loc_loss, rpn_cls_loss, roi_loc_loss, roi_cls_loss]
        losses = losses + [sum(losses)]

        return LossTuple(*losses)

    def train_step(self, imgs, bboxes, labels, scale):
        self.optimizer.zero_grad()
        losses = self.forward(imgs, bboxes, labels, scale)
        losses.total_loss.backward()
        self.optimizer.step()
        self.update_meters(losses)
        return losses

    def save(self, save_optimizer=False, save_path=None, **kwargs):
        """serialize models include optimizer and other info
        return path where the model-file is stored.

        Args:
            save_optimizer (bool): whether save optimizer.state_dict().
            save_path (string): where to save model, if it's None, save_path
                is generate using time str and info from kwargs.

        Returns:
            save_path(str): the path to save models.
        """
        save_dict = dict()

        save_dict['model'] = self.faster_rcnn.state_dict()
        save_dict['config'] = faster_rcnn_config.state_dict()
        save_dict['other_info'] = kwargs

        if save_optimizer:
            save_dict['optimizer'] = self.optimizer.state_dict()

        if save_path is None:
            timestr = time.strftime('%m%d%H%M')
            save_path = 'checkpoints/fasterrcnn_%s' % timestr
            for k_, v_ in kwargs.items():
                save_path += '_%s' % v_

        torch.save(save_dict, save_path)
        return save_path

    def load(
        self,
        path,
        load_optimizer=True,
        parse_opt=False,
    ):
        state_dict = torch.load(path)
        if 'model' in state_dict:
            self.faster_rcnn.load_state_dict(state_dict['model'])
        else:  # legacy way, for backward compatibility
            self.faster_rcnn.load_state_dict(state_dict)
            return self
        if parse_opt:
            faster_rcnn_config.parse(state_dict['config'])
        if 'optimizer' in state_dict and load_optimizer:
            self.optimizer.load_state_dict(state_dict['optimizer'])
        return self

    def update_meters(self, losses):
        loss_d = {k: v for k, v in losses._asdict().items()}
        for key, meter in self.meters.items():
            meter.add(loss_d[key])

    def reset_meters(self):
        for key, meter in self.meters.items():
            meter.reset()
        self.roi_cm.reset()
        self.rpn_cm.reset()

    def get_meter_data(self):
        return {k: v.value()[0] for k, v in self.meters.items()}
class FasterRCNNTrainer(nn.Module):
    def __init__(self, faster_rcnn):
        # 继承父模块的初始化
        super(FasterRCNNTrainer, self).__init__()

        self.faster_rcnn = faster_rcnn
        # 下面2个参数是在_faster_rcnn_loc_loss调用用来计算位置损失函数用到的超参数
        self.rpn_sigma = opt.rpn_sigma
        self.roi_sigma = opt.roi_sigma

        # target creator create gt_bbox gt_label etc as training targets.
        # 用于从20000个候选anchor中产生256个anchor进行二分类和位置回归,也就是
        # 为rpn网络产生的预测位置和预测类别提供真正的ground_truth标准
        self.anchor_target_creator = AnchorTargetCreator()
        # AnchorTargetCreator和ProposalTargetCreator是为了生成训练的目标
        # (或称ground truth),只在训练阶段用到,ProposalCreator是RPN为Fast
        #  R-CNN生成RoIs,在训练和测试阶段都会用到。所以测试阶段直接输进来300
        # 个RoIs,而训练阶段会有AnchorTargetCreator的再次干预
        self.proposal_target_creator = ProposalTargetCreator()
        # (0., 0., 0., 0.)
        self.loc_normalize_mean = faster_rcnn.loc_normalize_mean
        # (0.1, 0.1, 0.2, 0.2)
        self.loc_normalize_std = faster_rcnn.loc_normalize_std
        # SGD
        self.optimizer = self.faster_rcnn.get_optimizer()
        # 可视化,vis_tool.py
        self.vis = Visualizer(env=opt.env)

        # 混淆矩阵,就是验证预测值与真实值精确度的矩阵ConfusionMeter
        # (2)括号里的参数指的是类别数
        self.rpn_cm = ConfusionMeter(2)
        # roi的类别有21种(20个object类+1个background)
        self.roi_cm = ConfusionMeter(21)
        # 平均损失
        self.meters = {k: AverageValueMeter()
                       for k in LossTuple._fields}  # average loss

    def forward(self, imgs, bboxes, labels, scale):
        # 获取batch个数
        n = bboxes.shape[0]
        if n != 1:
            raise ValueError('Currently only batch size 1 is supported.')

        _, _, H, W = imgs.shape
        # (n,c,hh,ww)
        img_size = (H, W)

        # vgg16 conv5_3之前的部分提取图片的特征
        features = self.faster_rcnn.extractor(imgs)

        # rpn_locs的维度(hh*ww*9,4),rpn_scores维度为(hh*ww*9,2),
        #  rois的维度为(2000,4),roi_indices用不到,anchor的维度为
        # (hh*ww*9,4),H和W是经过数据预处理后的。计算(H/16)x(W/16)x9
        # (大概20000)个anchor属于前景的概率,取前12000个并经过NMS得到2000个
        # 近似目标框G^的坐标。roi的维度为(2000,4)

        rpn_locs, rpn_scores, rois, roi_indices, anchor = \
            self.faster_rcnn.rpn(features, img_size, scale)

        # Since batch size is one, convert variables to singular form
        # bbox维度(N, R, 4)
        bbox = bboxes[0]
        # labels维度为(N,R)
        label = labels[0]
        #hh*ww*9
        rpn_score = rpn_scores[0]
        # hh*ww*9
        rpn_loc = rpn_locs[0]
        # (2000,4)
        roi = rois

        # Sample RoIs and forward
        # 调用proposal_target_creator函数生成sample roi(128,4)、
        # gt_roi_loc(128,4)、gt_roi_label(128,1),RoIHead网络
        # 利用这sample_roi+featue为输入,输出是分类(21类)和回归
        # (进一步微调bbox)的预测值,那么分类回归的groud truth就
        # 是ProposalTargetCreator输出的gt_roi_label和gt_roi_loc。

        sample_roi, gt_roi_loc, gt_roi_label = self.proposal_target_creator(
            roi, at.tonumpy(bbox), at.tonumpy(label), self.loc_normalize_mean,
            self.loc_normalize_std)
        # NOTE it's all zero because now it only support for batch=1 now
        sample_roi_index = t.zeros(len(sample_roi))
        # roi回归输出的是128*84和128*21,然而真实位置参数是128*4和真实标签128*1
        roi_cls_loc, roi_score = self.faster_rcnn.head(features, sample_roi,
                                                       sample_roi_index)

        # ------------------ RPN losses -------------------#
        # 输入20000个anchor和bbox,调用anchor_target_creator函数得到
        # 2000个anchor与bbox的偏移量与label
        gt_rpn_loc, gt_rpn_label = self.anchor_target_creator(
            at.tonumpy(bbox), anchor, img_size)
        gt_rpn_label = at.totensor(gt_rpn_label).long()
        gt_rpn_loc = at.totensor(gt_rpn_loc)
        # 下面分析_fast_rcnn_loc_loss函数。rpn_loc为rpn网络回归出来的偏移量
        # (20000个),gt_rpn_loc为anchor_target_creator函数得到2000个anchor
        # 与bbox的偏移量,rpn_sigma=1.
        rpn_loc_loss = _fast_rcnn_loc_loss(rpn_loc, gt_rpn_loc,
                                           gt_rpn_label.data, self.rpn_sigma)

        # NOTE: default value of ignore_index is -100 ...
        # rpn_score为rpn网络得到的(20000个)与anchor_target_creator
        # 得到的2000个label求交叉熵损失
        rpn_cls_loss = F.cross_entropy(rpn_score,
                                       gt_rpn_label.cuda(),
                                       ignore_index=-1)
        _gt_rpn_label = gt_rpn_label[gt_rpn_label > -1]  #不计算背景类
        _rpn_score = at.tonumpy(rpn_score)[at.tonumpy(gt_rpn_label) > -1]
        self.rpn_cm.add(at.totensor(_rpn_score, False),
                        _gt_rpn_label.data.long())

        # ------------------ ROI losses (fast rcnn loss) -------------------#
        # roi_cls_loc为VGG16RoIHead的输出(128*84), n_sample=128
        n_sample = roi_cls_loc.shape[0]
        # roi_cls_loc=(128,21,4)
        roi_cls_loc = roi_cls_loc.view(n_sample, -1, 4)
        roi_loc = roi_cls_loc[t.arange(0, n_sample).long().cuda(), \
                              at.totensor(gt_roi_label).long()]
        # proposal_target_creator()生成的128个proposal与bbox求得的偏移量
        # dx,dy,dw,dh
        gt_roi_label = at.totensor(gt_roi_label).long()
        # 128个标签
        gt_roi_loc = at.totensor(gt_roi_loc)
        # 采用smooth_l1_loss
        roi_loc_loss = _fast_rcnn_loc_loss(roi_loc.contiguous(), gt_roi_loc,
                                           gt_roi_label.data, self.roi_sigma)
        # 求交叉熵损失
        roi_cls_loss = nn.CrossEntropyLoss()(roi_score, gt_roi_label.cuda())

        self.roi_cm.add(at.totensor(roi_score, False),
                        gt_roi_label.data.long())
        # 四个loss加起来
        losses = [rpn_loc_loss, rpn_cls_loss, roi_loc_loss, roi_cls_loss]
        losses = losses + [sum(losses)]

        return LossTuple(*losses)

    # 整个函数实际上就是进行了一次参数的优化过程,首先`self.optimizer.zero_grad()`将梯度数据全部清零,
    # 然后利用刚刚介绍`self.forward(imgs,bboxes,labels,scales)`函数将所有的损失计算出来,接着进行
    # 依次`losses.total_loss.backward()`反向传播计算梯度,`self.optimizer.step()`进行一次参数
    # 更新过程,`self.update_meters(losses)`就是将所有损失的数据更新到可视化界面上,最后将`losses`返回
    def train_step(self, imgs, bboxes, labels, scale):
        self.optimizer.zero_grad()
        losses = self.forward(imgs, bboxes, labels, scale)
        losses.total_loss.backward()
        self.optimizer.step()
        self.update_meters(losses)
        return losses

    # 模型保存
    def save(self, save_optimizer=False, save_path=None, **kwargs):
        save_dict = dict()

        save_dict['model'] = self.faster_rcnn.state_dict()
        save_dict['config'] = opt._state_dict()
        save_dict['other_info'] = kwargs
        save_dict['vis_info'] = self.vis.state_dict()

        if save_optimizer:
            save_dict['optimizer'] = self.optimizer.state_dict()

        if save_path is None:
            timestr = time.strftime('%m%d%H%M')
            save_path = 'checkpoints/fasterrcnn_%s' % timestr
            for k_, v_ in kwargs.items():
                save_path += '_%s' % v_

        save_dir = os.path.dirname(save_path)
        if not os.path.exists(save_dir):
            os.makedirs(save_dir)

        t.save(save_dict, save_path)
        self.vis.save([self.vis.env])
        return save_path

    # 模型加载
    def load(
        self,
        path,
        load_optimizer=True,
        parse_opt=False,
    ):
        state_dict = t.load(path)
        if 'model' in state_dict:
            self.faster_rcnn.load_state_dict(state_dict['model'])
        else:  # legacy way, for backward compatibility
            self.faster_rcnn.load_state_dict(state_dict)
            return self
        if parse_opt:
            opt._parse(state_dict['config'])
        if 'optimizer' in state_dict and load_optimizer:
            self.optimizer.load_state_dict(state_dict['optimizer'])
        return self

    def update_meters(self, losses):
        loss_d = {k: at.scalar(v) for k, v in losses._asdict().items()}
        for key, meter in self.meters.items():
            meter.add(loss_d[key])

    def reset_meters(self):
        for key, meter in self.meters.items():
            meter.reset()
        self.roi_cm.reset()
        self.rpn_cm.reset()

    def get_meter_data(self):
        return {k: v.value()[0] for k, v in self.meters.items()}
class Head_Detector_Trainer(nn.Module):
    def __init__(self, head_detector):
        super(Head_Detector_Trainer, self).__init__()
        self.head_detector = head_detector
        self.rpn_sigma = opt.rpn_sigma
        self.anchor_target_creator = AnchorTargetCreator()
        self.optimizer = self.head_detector.get_optimizer()
        #self.vis = Visualizer(env=opt.env)
        self.rpn_cm = ConfusionMeter(2)
        self.meters = {k: AverageValueMeter() for k in LossTuple._fields}  # average loss

    def forward(self, imgs, bboxs, scale):
        n,_,_ = bboxs.size()
        if n != 1:
            raise ValueError('Currently only batch size 1 is supported.')        
        _, _, H, W = imgs.size()
        img_size = (H, W)
        features = self.head_detector.extractor(imgs)
        rpn_locs, rpn_scores, rois, rois_scores, anchor = self.head_detector.rpn(features, img_size, scale)
        bbox = bboxs[0]
        rpn_score = rpn_scores[0]
        rpn_loc = rpn_locs[0]

        # ------------------ RPN losses -------------------#
        gt_rpn_loc, gt_rpn_label = self.anchor_target_creator(at.tonumpy(bbox),anchor,img_size)
        gt_rpn_label = at.tovariable(gt_rpn_label).long()
        gt_rpn_loc = at.tovariable(gt_rpn_loc)
        rpn_loc_loss = head_detector_loss(
            rpn_loc,
            gt_rpn_loc,
            gt_rpn_label.data,
            self.rpn_sigma)

        rpn_cls_loss = F.cross_entropy(rpn_score, gt_rpn_label.cuda(), ignore_index=-1)
        _gt_rpn_label = gt_rpn_label[gt_rpn_label > -1]
        _rpn_score = at.tonumpy(rpn_score)[at.tonumpy(gt_rpn_label) > -1]
        self.rpn_cm.add(at.totensor(_rpn_score, False), _gt_rpn_label.data.long())
        losses = [rpn_loc_loss, rpn_cls_loss]
        losses = losses + [sum(losses)]

        return LossTuple(*losses), rois, rois_scores

    def train_step(self, imgs, bboxes, scale):
        self.optimizer.zero_grad()
        losses, rois, rois_scores = self.forward(imgs, bboxes, scale)
        losses.total_loss.backward()
        self.optimizer.step()
        self.update_meters(losses)
        return losses, rois, rois_scores

    def save(self, save_optimizer=False, save_path=None, **kwargs):
        save_dict = dict()
        save_dict['model'] = self.head_detector.state_dict()
        save_dict['config'] = opt._state_dict()
        save_dict['other_info'] = kwargs
        save_dict['vis_info'] = self.vis.state_dict()

        if save_optimizer:
            save_dict['optimizer'] = self.optimizer.state_dict()

        if save_path is None:
            timestr = time.strftime('%m%d%H%M')
            save_path = os.path.join(opt.model_save_path, 'head_detector%s' % timestr)             
            for k_, v_ in kwargs.items():
                save_path += '_%s' % v_

        t.save(save_dict, save_path)
        self.vis.save([self.vis.env])
        return save_path

    def load(self, path, load_optimizer=True, parse_opt=False, ):
        state_dict = t.load(path)
        if 'model' in state_dict:
            self.head_detector.load_state_dict(state_dict['model'])
        else:  
            self.head_detector.load_state_dict(state_dict)
            return self
        if parse_opt:
            opt._parse(state_dict['config'])
        if 'optimizer' in state_dict and load_optimizer:
            self.optimizer.load_state_dict(state_dict['optimizer'])
        return self

    def update_meters(self, losses):
        loss_d = {k: at.scalar(at.tonumpy(v)) for k, v in losses._asdict().items()}
        for key, meter in self.meters.items():
            meter.add(loss_d[key])

    def reset_meters(self):
        for key, meter in self.meters.items():
            meter.reset()
        self.rpn_cm.reset()

    def get_meter_data(self):
        return {k: v.value()[0] for k, v in self.meters.items()}
Exemple #10
0
class FasterRCNNTrainer(nn.Module):
    """wrapper for conveniently training. return losses

    The losses include:

    * :obj:`rpn_loc_loss`: The localization loss for \
        Region Proposal Network (RPN).
    * :obj:`rpn_cls_loss`: The classification loss for RPN.
    * :obj:`roi_loc_loss`: The localization loss for the head module.
    * :obj:`roi_cls_loss`: The classification loss for the head module.
    * :obj:`total_loss`: The sum of 4 loss above.

    Args:
        faster_rcnn (model.FasterRCNN):
            A Faster R-CNN model that is going to be trained.
    """

    def __init__(self, faster_rcnn):
        super(FasterRCNNTrainer, self).__init__()

        self.faster_rcnn = faster_rcnn
        self.rpn_sigma = opt.rpn_sigma
        self.roi_sigma = opt.roi_sigma

        # target creator create gt_bbox gt_label etc as training targets. 
        self.anchor_target_creator = AnchorTargetCreator()#用于从20000个候选anchor中产生256个anchor进行二分类和位置回归,也就是为rpn网络产生的预测位置和预测类别提供真正的ground_truth标准
        self.proposal_target_creator = ProposalTargetCreator()#AnchorTargetCreator和ProposalTargetCreator是为了生成训练的目标

        self.loc_normalize_mean = faster_rcnn.loc_normalize_mean#
        self.loc_normalize_std = faster_rcnn.loc_normalize_std

        self.optimizer = self.faster_rcnn.get_optimizer()#SGD
        # visdom wrapper
        self.vis = Visualizer(env=opt.env)

        # indicators for training status
        self.rpn_cm = ConfusionMeter(2)#构造一个用于多类分类问题的混淆矩阵
        self.roi_cm = ConfusionMeter(21)
        self.meters = {k: AverageValueMeter() for k in LossTuple._fields}  # average loss

    def forward(self, imgs, bboxes, labels, scale):
        """Forward Faster R-CNN and calculate losses.

        Here are notations used.

        * :math:`N` is the batch size.
        * :math:`R` is the number of bounding boxes per image.

        Currently, only :math:`N=1` is supported.

        Args:
            imgs (~torch.autograd.Variable): A variable with a batch of images.
            bboxes (~torch.autograd.Variable): A batch of bounding boxes.
                Its shape is :math:`(N, R, 4)`.
            labels (~torch.autograd..Variable): A batch of labels.
                Its shape is :math:`(N, R)`. The background is excluded from
                the definition, which means that the range of the value
                is :math:`[0, L - 1]`. :math:`L` is the number of foreground
                classes.
            scale (float): Amount of scaling applied to
                the raw image during preprocessing.

        Returns:
            namedtuple of 5 losses
        """
        n = bboxes.shape[0]
        if n != 1:
            raise ValueError('Currently only batch size 1 is supported.')

        _, _, H, W = imgs.shape#(1,3,600,800)
        img_size = (H, W)#(600,800)

        features = self.faster_rcnn.extractor(imgs)#(1,512,38,50)

        rpn_locs, rpn_scores, rois, roi_indices, anchor = \
            self.faster_rcnn.rpn(features, img_size, scale)#(1,38*50*9,4),(1,38*50*9,2),(2000,4),(38*50*9,4)

        # Since batch size is one, convert variables to singular form
        bbox = bboxes[0]#(1,R,4)
        label = labels[0]#(1,R)
        rpn_score = rpn_scores[0]#(1,38*50*9,2)
        rpn_loc = rpn_locs[0]#(1,38*50*9,4)
        roi = rois#(2000,4)

        # Sample RoIs and forward
        # it's fine to break the computation graph of rois, 
        # consider them as constant input
        sample_roi, gt_roi_loc, gt_roi_label = self.proposal_target_creator(
            roi,
            at.tonumpy(bbox),
            at.tonumpy(label),
            self.loc_normalize_mean,
            self.loc_normalize_std)##调用proposal_target_creator函数生成sample roi(128,4)、gt_roi_loc(128,4)、gt_roi_label(128,1),RoIHead网络利用这sample_roi+featue为输入,输出是分类(21类)和回归(进一步微调bbox)的预测值,那么分类回归的groud truth就是ProposalTargetCreator输出的gt_roi_label和gt_roi_loc
        # NOTE it's all zero because now it only support for batch=1 now
        sample_roi_index = t.zeros(len(sample_roi))
        roi_cls_loc, roi_score = self.faster_rcnn.head(
            features,
            sample_roi,
            sample_roi_index)#roi回归输出的是128*84和128*21,然而真实位置参数是128*4和真实标签128*1

        # ------------------ RPN losses -------------------#
        gt_rpn_loc, gt_rpn_label = self.anchor_target_creator(
            at.tonumpy(bbox),
            anchor,
            img_size)#输入20000个anchor和bbox,调用anchor_target_creator函数得到2000个anchor与bbox的偏移量与label
        gt_rpn_label = at.totensor(gt_rpn_label).long()
        gt_rpn_loc = at.totensor(gt_rpn_loc)
        rpn_loc_loss = _fast_rcnn_loc_loss(
            rpn_loc,
            gt_rpn_loc,
            gt_rpn_label.data,
            self.rpn_sigma)#rpn_loc为rpn网络回归出来的偏移量(38*50*9个),gt_rpn_loc为anchor_target_creator函数得到2000个anchor与bbox的偏移量,rpn_sigma=1.

        # NOTE: default value of ignore_index is -100 ...
        rpn_cls_loss = F.cross_entropy(rpn_score, gt_rpn_label.cuda(), ignore_index=-1)#交叉熵损失函数
        _gt_rpn_label = gt_rpn_label[gt_rpn_label > -1] #不计算背景类
        _rpn_score = at.tonumpy(rpn_score)[at.tonumpy(gt_rpn_label) > -1]
        self.rpn_cm.add(at.totensor(_rpn_score, False), _gt_rpn_label.data.long())

        # ------------------ ROI losses (fast rcnn loss) -------------------#
        n_sample = roi_cls_loc.shape[0]#roi_cls_loc为VGG16RoIHead的输出(128,84), n_sample=128
        roi_cls_loc = roi_cls_loc.view(n_sample, -1, 4)#(128,21,4)
        roi_loc = roi_cls_loc[t.arange(0, n_sample).long().cuda(), \
                              at.totensor(gt_roi_label).long()]
        gt_roi_label = at.totensor(gt_roi_label).long()#proposal_target_creator()生成的128个proposal与bbox求得的偏移量dx,dy,dw,dh
        gt_roi_loc = at.totensor(gt_roi_loc)

        roi_loc_loss = _fast_rcnn_loc_loss(
            roi_loc.contiguous(),
            gt_roi_loc,
            gt_roi_label.data,
            self.roi_sigma)

        roi_cls_loss = nn.CrossEntropyLoss()(roi_score, gt_roi_label.cuda())

        self.roi_cm.add(at.totensor(roi_score, False), gt_roi_label.data.long())

        losses = [rpn_loc_loss, rpn_cls_loss, roi_loc_loss, roi_cls_loss]# 4个loss加起来
        losses = losses + [sum(losses)]

        return LossTuple(*losses)

    def train_step(self, imgs, bboxes, labels, scale):
        self.optimizer.zero_grad()
        losses = self.forward(imgs, bboxes, labels, scale)
        losses.total_loss.backward()
        self.optimizer.step()
        self.update_meters(losses)
        return losses

    def save(self, save_optimizer=False, save_path=None, **kwargs):
        """serialize models include optimizer and other info
        return path where the model-file is stored.

        Args:
            save_optimizer (bool): whether save optimizer.state_dict().
            save_path (string): where to save model, if it's None, save_path
                is generate using time str and info from kwargs.
        
        Returns:
            save_path(str): the path to save models.
        """
        save_dict = dict()

        save_dict['model'] = self.faster_rcnn.state_dict()
        save_dict['config'] = opt._state_dict()
        save_dict['other_info'] = kwargs
        save_dict['vis_info'] = self.vis.state_dict()

        if save_optimizer:
            save_dict['optimizer'] = self.optimizer.state_dict()

        if save_path is None:
            timestr = time.strftime('%m%d%H%M')
            save_path = 'checkpoints/fasterrcnn_%s' % timestr
            for k_, v_ in kwargs.items():
                save_path += '_%s' % v_

        save_dir = os.path.dirname(save_path)
        if not os.path.exists(save_dir):
            os.makedirs(save_dir)

        t.save(save_dict, save_path)
        self.vis.save([self.vis.env])
        return save_path

    def load(self, path, load_optimizer=True, parse_opt=False, ):
        state_dict = t.load(path)
        if 'model' in state_dict:
            self.faster_rcnn.load_state_dict(state_dict['model'])
        else:  # legacy way, for backward compatibility
            self.faster_rcnn.load_state_dict(state_dict)
            return self
        if parse_opt:
            opt._parse(state_dict['config'])
        if 'optimizer' in state_dict and load_optimizer:
            self.optimizer.load_state_dict(state_dict['optimizer'])
        return self

    def update_meters(self, losses):
        loss_d = {k: at.scalar(v) for k, v in losses._asdict().items()}
        for key, meter in self.meters.items():
            meter.add(loss_d[key])

    def reset_meters(self):
        for key, meter in self.meters.items():
            meter.reset()
        self.roi_cm.reset()
        self.rpn_cm.reset()

    def get_meter_data(self):
        return {k: v.value()[0] for k, v in self.meters.items()}
Exemple #11
0
class FasterRCNNTrainer(nn.Module):
    """wrapper for conveniently training. return losses

    The losses include:

    * :obj:`rpn_loc_loss`: The localization loss for \
        Region Proposal Network (RPN).
    * :obj:`rpn_cls_loss`: The classification loss for RPN.
    * :obj:`roi_loc_loss`: The localization loss for the head module.
    * :obj:`roi_cls_loss`: The classification loss for the head module.
    * :obj:`total_loss`: The sum of 4 loss above.

    Args:
        faster_rcnn (model.FasterRCNN):
            A Faster R-CNN model that is going to be trained.
    """
    def __init__(self, faster_rcnn):
        # 继承父模块的初始化
        super(FasterRCNNTrainer, self).__init__()

        self.faster_rcnn = faster_rcnn
        self.rpn_sigma = opt.rpn_sigma
        self.roi_sigma = opt.roi_sigma  # 超参:在_faster_rcnn_loc_loss调用用来计算位置损失函数

        # target creator create gt_bbox gt_label etc as training targets.
        # 用于从20000个候选anchor中产生256个anchor进行二分类和位置回归,也就是为rpn网络产生的预测位置和预测类别提供真正的ground_truth标准
        self.anchor_target_creator = AnchorTargetCreator()
        # AnchorTargetCreator和ProposalTargetCreator是为了生成训练的目标(或称ground truth),只在训练阶段用到,ProposalCreator是RPN为Fast R-CNN生成RoIs,在训练和测试阶段都会用到。所以测试阶段直接输进来300个RoIs,而训练阶段会有AnchorTargetCreator的再次干预。
        self.proposal_target_creator = ProposalTargetCreator()
        # (0., 0., 0., 0.)
        self.loc_normalize_mean = faster_rcnn.loc_normalize_mean
        # (0.1, 0.1, 0.2, 0.2)
        self.loc_normalize_std = faster_rcnn.loc_normalize_std

        self.optimizer = self.faster_rcnn.get_optimizer()  # SGD
        # visdom wrapper
        self.vis = Visualizer(env=opt.env)  # 可视化工具

        # indicators for training status
        # 混淆矩阵,验证预测值和真实值精确度,2为类别数
        self.rpn_cm = ConfusionMeter(2)
        #
        self.roi_cm = ConfusionMeter(21)
        self.meters = {k: AverageValueMeter()
                       for k in LossTuple._fields}  # average loss

    def forward(self, imgs, bboxes, labels, scale):
        """Forward Faster R-CNN and calculate losses.

        Here are notations used.

        * :math:`N` is the batch size.
        * :math:`R` is the number of bounding boxes per image.

        Currently, only :math:`N=1` is supported.

        Args:
            imgs (~torch.autograd.Variable): A variable with a batch of images.
            bboxes (~torch.autograd.Variable): A batch of bounding boxes.
                Its shape is :math:`(N, R, 4)`.
            labels (~torch.autograd..Variable): A batch of labels.
                Its shape is :math:`(N, R)`. The background is excluded from
                the definition, which means that the range of the value
                is :math:`[0, L - 1]`. :math:`L` is the number of foreground
                classes.
            scale (float): Amount of scaling applied to
                the raw image during preprocessing.

        Returns:
            namedtuple of 5 losses
        """
        n = bboxes.shape[0]  # batch个数
        print("trainer.py - batch个数", n)
        if n != 1:
            raise ValueError('Currently only batch size 1 is supported.')

        _, _, H, W = imgs.shape
        img_size = (H, W)

        features = self.faster_rcnn.extractor(imgs)  # vgg16 con5_3之前的部分提取图片特征

        # rpn_locs维度(hh*ww*9, 4), rpn_scores维度(hh*ww*9, 2), rois维度(2000,4), roi_indices用不到
        # anchor的维度为(hh*ww*9, 4), H和W是经过数据预处理之后的。计算(H/16)*(W/16)*9大概20000个anchor属于前景的概率
        # 取12000个经过NMS得到2000个近似目标框G^坐标。roi维度为(2000,4)
        rpn_locs, rpn_scores, rois, roi_indices, anchor = \
            self.faster_rcnn.rpn(features, img_size, scale)

        # Since batch size is one, convert variables to singular form
        bbox = bboxes[0]  # (N, R, 4)
        label = labels[0]  # (N, R)
        rpn_score = rpn_scores[0]  #(hh * ww * 9, 4)
        rpn_loc = rpn_locs[0]  # hh * ww * 9
        roi = rois  # (2000, 4)

        # Sample RoIs and forward
        # it's fine to break the computation graph of rois,
        # consider them as constant input
        sample_roi, gt_roi_loc, gt_roi_label = self.proposal_target_creator(
            roi, at.tonumpy(bbox), at.tonumpy(label), self.loc_normalize_mean,
            self.loc_normalize_std)
        # 调用proposal_target_creator函数生成sample roi(128,4),
        # gt_roi_loc(128, 4), gt_roi_label(128,1)

        # RoIHead网络利用sample_roi+featue为输入,
        # 输出是分类(21类)和回归(进一步微调bbox)的预测值,
        # 那么分类回归的ground truth就是ProposalTargetCreator输出的
        # gt_roi_label和gt_roi_loc
        # NOTE it's all zero because now it only support for batch=1 now
        sample_roi_index = t.zeros(len(sample_roi))
        roi_cls_loc, roi_score = self.faster_rcnn.head(features, sample_roi,
                                                       sample_roi_index)

        # ------------------ RPN losses -------------------#
        # 输入20000个anchor和bbox,调用anchor_target_creator函数
        # 得到2000个anchor与bbox的偏移量与label
        gt_rpn_loc, gt_rpn_label = self.anchor_target_creator(
            at.tonumpy(bbox), anchor, img_size)
        gt_rpn_label = at.totensor(gt_rpn_label).long()
        gt_rpn_loc = at.totensor(gt_rpn_loc)
        rpn_loc_loss = _fast_rcnn_loc_loss(
            rpn_loc,  # rpn网络回归的偏移量20000个
            gt_rpn_loc,  # anchor_target_creator函数得到的2000个anchor与bbox偏移量
            gt_rpn_label.data,
            self.rpn_sigma)  # 1

        # NOTE: default value of ignore_index is -100 ...
        rpn_cls_loss = F.cross_entropy(  # 求交叉熵损失
            rpn_score,  # rpn网络得到的20000个
            gt_rpn_label.cuda(),  # anchor_target_creator得到的2000个label
            ignore_index=-1)
        _gt_rpn_label = gt_rpn_label[gt_rpn_label > -1]  # 不计算背景类
        _rpn_score = at.tonumpy(rpn_score)[at.tonumpy(gt_rpn_label) > -1]
        self.rpn_cm.add(at.totensor(_rpn_score, False),
                        _gt_rpn_label.data.long())

        # ------------------ ROI losses (fast rcnn loss) -------------------#
        # roi_cls_loc为VGG16RoIHead的输出(128*84),n_sample=128
        n_sample = roi_cls_loc.shape[0]
        # roi_cls_los = (128, 21, 4)
        roi_cls_loc = roi_cls_loc.view(n_sample, -1, 4)
        roi_loc = roi_cls_loc[t.arange(0, n_sample).long().cuda(), \
                              at.totensor(gt_roi_label).long()]
        # proposal_target_creator()生成的128个proposal与bbox求得偏移量dx, dy, dw, dh
        gt_roi_label = at.totensor(gt_roi_label).long()
        # 128个标签
        gt_roi_loc = at.totensor(gt_roi_loc)

        # 采用smooth_l1_loss
        roi_loc_loss = _fast_rcnn_loc_loss(roi_loc.contiguous(), gt_roi_loc,
                                           gt_roi_label.data, self.roi_sigma)

        # 求交叉熵损失
        roi_cls_loss = nn.CrossEntropyLoss()(roi_score, gt_roi_label.cuda())

        self.roi_cm.add(at.totensor(roi_score, False),
                        gt_roi_label.data.long())

        # 四个loss加起来
        losses = [rpn_loc_loss, rpn_cls_loss, roi_loc_loss, roi_cls_loss]
        losses = losses + [sum(losses)]

        return LossTuple(*losses)

    # 参数优化
    def train_step(self, imgs, bboxes, labels, scale):
        self.optimizer.zero_grad()  # 清零梯度
        losses = self.forward(imgs, bboxes, labels, scale)  # 计算所有损失
        losses.total_loss.backward()  # 反向传播计算梯度
        self.optimizer.step()  # 参数更新
        self.update_meters(losses)  # 将损失更新到可视化界面
        return losses  # 返回损失

    # 保存模型
    def save(self, save_optimizer=False, save_path=None, **kwargs):
        """serialize models include optimizer and other info
        return path where the model-file is stored.

        Args:
            save_optimizer (bool): whether save optimizer.state_dict().
            save_path (string): where to save model, if it's None, save_path
                is generate using time str and info from kwargs.
        
        Returns:
            save_path(str): the path to save models.
        """
        save_dict = dict()

        save_dict['model'] = self.faster_rcnn.state_dict()
        save_dict['config'] = opt._state_dict()
        save_dict['other_info'] = kwargs
        save_dict['vis_info'] = self.vis.state_dict()

        if save_optimizer:
            save_dict['optimizer'] = self.optimizer.state_dict()

        if save_path is None:
            timestr = time.strftime('%m%d%H%M')
            save_path = 'checkpoints/fasterrcnn_%s' % timestr
            for k_, v_ in kwargs.items():
                save_path += '_%s' % v_

        save_dir = os.path.dirname(save_path)
        if not os.path.exists(save_dir):
            os.makedirs(save_dir)

        t.save(save_dict, save_path)
        self.vis.save([self.vis.env])
        return save_path

    # 加载模型
    def load(
        self,
        path,
        load_optimizer=True,
        parse_opt=False,
    ):
        state_dict = t.load(path)
        if 'model' in state_dict:
            self.faster_rcnn.load_state_dict(state_dict['model'])
        else:  # legacy way, for backward compatibility
            self.faster_rcnn.load_state_dict(state_dict)
            return self
        if parse_opt:
            opt._parse(state_dict['config'])
        if 'optimizer' in state_dict and load_optimizer:
            self.optimizer.load_state_dict(state_dict['optimizer'])
        return self

    # 更新可视化
    def update_meters(self, losses):
        loss_d = {k: at.scalar(v) for k, v in losses._asdict().items()}
        for key, meter in self.meters.items():
            meter.add(loss_d[key])

    # 重置函数
    def reset_meters(self):
        for key, meter in self.meters.items():
            meter.reset()
        self.roi_cm.reset()
        self.rpn_cm.reset()

    # 获取数据
    def get_meter_data(self):
        return {k: v.value()[0] for k, v in self.meters.items()}
class FasterRCNNTrainer(nn.Module):
    """wrapper for conveniently training. return losses

    The losses include:

    * :obj:`rpn_loc_loss`: The localization loss for \
        Region Proposal Network (RPN).
    * :obj:`rpn_cls_loss`: The classification loss for RPN.
    * :obj:`roi_loc_loss`: The localization loss for the head module.
    * :obj:`roi_cls_loss`: The classification loss for the head module.
    * :obj:`total_loss`: The sum of 4 loss above.

    Args:
        faster_rcnn (model.FasterRCNN):
            A Faster R-CNN model that is going to be trained.
    """
    def __init__(self, faster_rcnn):
        super(FasterRCNNTrainer, self).__init__()

        self.faster_rcnn = faster_rcnn
        self.rpn_sigma = opt.rpn_sigma
        self.roi_sigma = opt.roi_sigma

        # target creator create gt_bbox gt_label etc as training targets.
        self.anchor_target_creator = AnchorTargetCreator()
        self.proposal_target_creator = ProposalTargetCreator()

        self.loc_normalize_mean = faster_rcnn.loc_normalize_mean
        self.loc_normalize_std = faster_rcnn.loc_normalize_std

        self.optimizer = self.faster_rcnn.get_optimizer()
        # visdom wrapper
        self.vis = Visualizer(env=opt.env)

        # indicators for training status
        self.rpn_cm = ConfusionMeter(2)
        self.roi_cm = ConfusionMeter(4)  #ConfusionMeter(21)
        self.meters = {k: AverageValueMeter()
                       for k in LossTuple._fields}  # average loss
        self.sparse = False

    def forward(self, imgs, bboxes, labels, scale):
        """Forward Faster R-CNN and calculate losses.

        Here are notations used.

        * :math:`N` is the batch size.
        * :math:`R` is the number of bounding boxes per image.

        Currently, only :math:`N=1` is supported.

        Args:
            imgs (~torch.autograd.Variable): A variable with a batch of images.
            bboxes (~torch.autograd.Variable): A batch of bounding boxes.
                Its shape is :math:`(N, R, 4)`.
            labels (~torch.autograd..Variable): A batch of labels.
                Its shape is :math:`(N, R)`. The background is excluded from
                the definition, which means that the range of the value
                is :math:`[0, L - 1]`. :math:`L` is the number of foreground
                classes.
            scale (float): Amount of scaling applied to
                the raw image during preprocessing.

        Returns:
            namedtuple of 5 losses
        """
        n = bboxes.shape[0]
        if n != 1:
            raise ValueError('Currently only batch size 1 is supported.')

        _, _, H, W = imgs.shape
        img_size = (H, W)

        features = self.faster_rcnn.extractor(imgs)

        rpn_locs, rpn_scores, rois, roi_indices, anchor = \
            self.faster_rcnn.rpn(features, img_size, scale)

        # Since batch size is one, convert variables to singular form
        bbox = bboxes[0]
        label = labels[0]
        rpn_score = rpn_scores[0]
        rpn_loc = rpn_locs[0]
        roi = rois

        # Sample RoIs and forward
        # it's fine to break the computation graph of rois,
        # consider them as constant input
        sample_roi, gt_roi_loc, gt_roi_label = self.proposal_target_creator(
            roi, at.tonumpy(bbox), at.tonumpy(label), self.loc_normalize_mean,
            self.loc_normalize_std)
        # NOTE it's all zero because now it only support for batch=1 now
        sample_roi_index = t.zeros(len(sample_roi))
        roi_cls_loc, roi_score = self.faster_rcnn.head(features, sample_roi,
                                                       sample_roi_index)

        # ------------------ RPN losses -------------------#
        gt_rpn_loc, gt_rpn_label = self.anchor_target_creator(
            at.tonumpy(bbox), anchor, img_size)
        gt_rpn_label = at.totensor(gt_rpn_label).long()
        gt_rpn_loc = at.totensor(gt_rpn_loc)
        rpn_loc_loss = _fast_rcnn_loc_loss(rpn_loc, gt_rpn_loc,
                                           gt_rpn_label.data, self.rpn_sigma)

        # NOTE: default value of ignore_index is -100 ...
        rpn_cls_loss = F.cross_entropy(rpn_score,
                                       gt_rpn_label.cuda(),
                                       ignore_index=-1)
        _gt_rpn_label = gt_rpn_label[gt_rpn_label > -1]
        _rpn_score = at.tonumpy(rpn_score)[at.tonumpy(gt_rpn_label) > -1]
        self.rpn_cm.add(at.totensor(_rpn_score, False),
                        _gt_rpn_label.data.long())

        # ------------------ ROI losses (fast rcnn loss) -------------------#
        n_sample = roi_cls_loc.shape[0]
        roi_cls_loc = roi_cls_loc.view(n_sample, -1, 4)
        roi_loc = roi_cls_loc[t.arange(0, n_sample).long().cuda(), \
                              at.totensor(gt_roi_label).long()]
        gt_roi_label = at.totensor(gt_roi_label).long()
        gt_roi_loc = at.totensor(gt_roi_loc)

        roi_loc_loss = _fast_rcnn_loc_loss(roi_loc.contiguous(), gt_roi_loc,
                                           gt_roi_label.data, self.roi_sigma)

        roi_cls_loss = nn.CrossEntropyLoss()(roi_score, gt_roi_label.cuda())

        self.roi_cm.add(at.totensor(roi_score, False),
                        gt_roi_label.data.long())

        losses = [rpn_loc_loss, rpn_cls_loss, roi_loc_loss, roi_cls_loss]
        losses = losses + [sum(losses)]

        return LossTuple(*losses)

    def train_step(self, imgs, bboxes, labels, scale, prune_train=False):
        self.optimizer.zero_grad()
        losses = self.forward(imgs, bboxes, labels, scale)
        losses.total_loss.backward()
        if prune_train:
            for name, m in self.named_modules():
                if hasattr(m, 'mask') and hasattr(m, 'weight'):
                    dev = m.weight.device
                    tensor = m.weight.data  # .cpu().numpy()
                    mask = m.mask.data  # .cpu().numpy()
                    grad_tensor = m.weight.grad.data  # .cpu().numpy()
                    grad_tensor = grad_tensor * mask  # np.where(mask==0, 0, grad_tensor)
                    m.weight.grad.data = grad_tensor  # t.from_numpy(grad_tensor).to(dev)
        self.optimizer.step()
        self.update_meters(losses)
        return losses

    def save(self,
             save_optimizer=False,
             save_path=None,
             prune=False,
             **kwargs):
        """serialize models include optimizer and other info
        return path where the model-file is stored.

        Args:
            save_optimizer (bool): whether save optimizer.state_dict().
            save_path (string): where to save model, if it's None, save_path
                is generate using time str and info from kwargs.
        
        Returns:
            save_path(str): the path to save models.
        """
        save_dict = dict()
        save_dict['sparse_list'] = []
        if self.sparse:
            for n, m in self.named_modules():
                if hasattr(m, "sparse"):
                    if m.sparse and hasattr(m, 'weight'):
                        w_dev = m.weight.device
                        w = m.weight.data.coalesce().to_dense()
                        m.weight.data = w.to(w_dev)
                    save_dict['sparse_list'].append(str(m))

        save_dict['model'] = self.faster_rcnn.state_dict()
        save_dict['config'] = opt._state_dict()
        save_dict['other_info'] = kwargs
        save_dict['vis_info'] = self.vis.state_dict()
        save_dict['sparse'] = self.sparse
        if save_optimizer:
            save_dict['optimizer'] = self.optimizer.state_dict()

        if save_path is None:
            timestr = time.strftime('%m%d%H%M')
            save_path = 'checkpoints/fasterrcnn_%s' % timestr
            for k_, v_ in kwargs.items():
                save_path += '_%s' % v_
        if prune:
            save_path += "_prune"

        save_dir = os.path.dirname(save_path)
        if not os.path.exists(save_dir):
            os.makedirs(save_dir)

        t.save(save_dict, save_path)
        self.vis.save([self.vis.env])
        return save_path

    def generate_simple_state_dict(self, pre_trained, debug=False):
        new = list(pre_trained.items())
        curr_model_kvpair = self.faster_rcnn.state_dict()
        if debug:
            for k, v in curr_model_kvpair.items():
                print("curr :", str(k))
            for i in new:
                print("new :", str(i[0]))
        count = 0
        for k, v in curr_model_kvpair.items():
            if "mask" in k:
                continue
            if "head.cls_loc" in str(k) or "head.score" in str(k):
                count += 1
                continue
            _, weights = new[count]
            curr_model_kvpair[k] = weights
            count += 1
        return curr_model_kvpair

    def generate_state_dict(self, pre_trained, simple=False, debug=False):
        self.set_dense()
        if simple:
            return self.generate_simple_state_dict(pre_trained, debug)
        new = list(pre_trained.items())
        curr_model_kvpair = self.faster_rcnn.state_dict()
        if debug:
            for k, v in curr_model_kvpair.items():
                print("curr :", str(k))
            for i in new:
                print("new :", str(i[0]))
        for k, v in new:
            if k in curr_model_kvpair:
                curr_model_kvpair[k] = v
            else:
                print(f"Key Weight Mismatch at: {str(k)} -- Not Loading")
        return curr_model_kvpair

    def to_sparse(self, sparse_mx, n, m):
        print(f"Turning Sparse: {n}: {m}")
        sparse_mx = sparse_mx.tocoo().astype(np.float32)
        indices = t.from_numpy(np.vstack(
            (sparse_mx.row, sparse_mx.col))).long()
        values = t.from_numpy(sparse_mx.data)
        shape = t.Size(sparse_mx.shape)
        return t.sparse.FloatTensor(indices, values, shape)

    def revert_to_sparse(self, sparse_list):
        self.sparse = True
        for n, m in self.named_modules():
            if str(m) in sparse_list:
                m.sparse = True
                if hasattr(m, 'weight') and not m.weight.is_sparse:
                    try:
                        dev = m.weight.device
                        weight = m.weight.data.cpu().numpy()
                        matrix = coo_matrix(weight)
                        tensor = self.to_sparse(matrix, n, str(m))
                        m.weight.data = tensor.to(dev)
                    except:
                        raise ValueError(
                            f"Couldn't convert {n},{str(m)} to sparse")
        return self

    def generate_state_dict(self, pre_trained, simple=False, debug=False):
        self.set_dense()
        if simple:
            return self.generate_simple_state_dict(pre_trained, debug)
        new = list(pre_trained.items())
        curr_model_kvpair = self.faster_rcnn.state_dict()
        if debug:
            for k, v in curr_model_kvpair.items():
                print("curr :", str(k))
            for i in new:
                print("new :", str(i[0]))
        for k, v in new:
            if k in curr_model_kvpair:
                curr_model_kvpair[k] = v
            else:
                print(f"Key Weight Mismatch at: {str(k)} -- Not Loading")
        return curr_model_kvpair

    def to_sparse(self, sparse_mx, n, m):
        print(f"Turning Sparse: {n}: {m}")
        sparse_mx = sparse_mx.tocoo().astype(np.float32)
        indices = t.from_numpy(np.vstack(
            (sparse_mx.row, sparse_mx.col))).long()
        values = t.from_numpy(sparse_mx.data)
        shape = t.Size(sparse_mx.shape)
        return t.sparse.FloatTensor(indices, values, shape)

    def revert_to_sparse(self, sparse_list):
        self.sparse = True
        for n, m in self.named_modules():
            if str(m) in sparse_list:
                m.sparse = True
                if hasattr(m, 'weight') and not m.weight.is_sparse:
                    try:
                        dev = m.weight.device
                        weight = m.weight.data.cpu().numpy()
                        matrix = coo_matrix(weight)
                        tensor = self.to_sparse(matrix, n, str(m))
                        m.weight.data = tensor.to(dev)
                    except:
                        raise ValueError(
                            f"Couldn't convert {n},{str(m)} to sparse")
        return self

    def load(
        self,
        path,
        load_optimizer=False,
        parse_opt=False,
        debug=False,
        simple=opt.use_simple,
    ):
        state_dict = t.load(path)
        if 'model' in state_dict:
            sd = self.generate_state_dict(state_dict['model'], simple, debug)
            self.faster_rcnn.load_state_dict(sd)
        else:
            sd = self.generate_state_dict(state_dict, simple, debug)
            self.faster_rcnn.load_state_dict(sd)
            return self
        if parse_opt:
            opt._parse(state_dict['config'])
        if 'optimizer' in state_dict and load_optimizer:
            self.optimizer.load_state_dict(state_dict['optimizer'])
        if 'sparse' in state_dict and state_dict['sparse'] == True:
            print("Reverting to Sparse")
            self.revert_to_sparse(state_dict['sparse_list'])
        print(f"Successfully Loaded Model: {path}")
        return self

    def update_meters(self, losses):
        loss_d = {k: at.scalar(v) for k, v in losses._asdict().items()}
        for key, meter in self.meters.items():
            meter.add(loss_d[key])

    def reset_meters(self):
        for key, meter in self.meters.items():
            meter.reset()
        self.roi_cm.reset()
        self.rpn_cm.reset()

    def get_meter_data(self):
        return {k: v.value()[0] for k, v in self.meters.items()}

    def quantize(self, bits=5, verbose=False):
        self.sparse = True
        self.faster_rcnn = quantization.quantize(self.faster_rcnn,
                                                 bits=bits,
                                                 verbose=verbose)

    def replace_with_sparsedense(self):
        self.faster_rcnn.replace_with_sparsedense()

    def set_sparse(self):
        self.sparse = True
        self.faster_rcnn.set_sparse()

    def set_dense(self):
        self.sparse = False
        self.faster_rcnn.set_dense()
Exemple #13
0
class Relation_Trainer:
    def __init__(self, args):
        self.conf = args
        self.check_cuda()
        # Check with eval tools
        self.tp = TrainingProgress(global_config['progress_dir'], self.conf.model + self.conf.opt_header + '-progress',
                                   data_key_list=['epoch_loss', 'test_loss', 'training_loss', 'mAP'])
        self.load_net()
        self.load_data(args)
        self.create_data_loader()
        self.prepare_training()  # Optimizer/Loss function
    def check_cuda(self):
        if torch.cuda.is_available():
            torch.cuda.set_device(self.conf.dev)  # default 0
            print("Use CUDA,device=", torch.cuda.current_device())
            self.device = 'cuda:' + str(self.conf.dev)
        else:
            self.device = 'cpu'
        at.gpu_dev = self.device

    def load_net(self):
        """
        Load network weight or initiate a new model
        """
        pretrained = not self.conf.res  # if not loading weight
        self.net = Relationship_Predictor(n_rel_class=NUM_OF_CLASS, use_extractor=True, pretrained=pretrained, spatial_feature=self.conf.use_spatial_feature)
        self.net = self.net.to(self.device)

    def load_data(self,args):  # batch_files, data_conf, cuda_dev):
        """
        Load Training/Testing data
        """

        pp_train = Preprocess(normalize_means=[0.485, 0.456, 0.406], normalize_stds=[0.229, 0.224, 0.225], p_hflip=0.5)
        pp_test = Preprocess(normalize_means=[0.485, 0.456, 0.406], normalize_stds=[0.229, 0.224, 0.225], p_hflip=0)
        self.train_set = VMRD_dataset(data_dir=global_config['data_dir'], dataset_name=args.trd, split='trainval',
                                       preprocess=pp_train)
        self.test_set = VMRD_dataset(data_dir=global_config['data_dir'], dataset_name=args.trd, split='test',
                                      preprocess=pp_test)
        print('Data loader: Training set: ', len(self.train_set), ' Testing set: ', len(self.test_set))

    def create_data_loader(self):

        self.train_loader = DataLoader(dataset=self.train_set, batch_size=self.conf.tr_bat, shuffle=False,
                                       pin_memory=True, num_workers=8)
        self.test_loader = DataLoader(dataset=self.test_set, batch_size=self.conf.ts_bat, shuffle=False,
                                      pin_memory=True, num_workers=12)
        # self.valid_loader = DataLoader(dataset=self.val_set, batch_size=self.conf.ts_bat, shuffle=False,
        #                                pin_memory=True, num_workers=8)


    def prepare_training(self):
        """
        Load/Create Meta data
        Load/Restore Current Progress
        Set training parameters, Init optimizer
        """
        self.tp.add_meta({'conf': self.conf, 'global_conf': global_config})
        if self.conf.res:
            self.restore_progress()  # set optimizer and lr_scheduler
        else:
            self.epoch = 1
            self.set_optimizer()
            self.set_lr_scheduler()
        self.init_meters()
        # self.set_loss_func()

    def train(self):
        """
        while loss<target loss
            forward
            backward
            record loss
            if loop_n % RECORD_N:
                summary & save_progress
        """
        time_start = time.time()
        self.net.train()
        try:
            while self.epoch < self.conf.max_epoch:
                self.epoch_loss = 0
                self.reset_meters()
                for step, (img_name, img, bbox, label, index, relation, scale, _) in tqdm(enumerate(self.train_loader)): #bbox shape is [1, 2, 4] #label is [1,n] label_attr is[1,m,n]
                    if bbox.size(1) < 2:
                        continue

                    self.optimizer.zero_grad()
                    img, bbox, label, index, relation, scale = \
                        img.to(self.conf.dev), bbox.to(self.conf.dev), label.to(self.conf.dev), index.to(self.conf.dev),\
                        relation.to(self.conf.dev), at.scalar(scale)
                    loss = self.train_forward_net(img_name, img, bbox, label, index, relation, scale)
                    #print("loss {0}".format(loss.total_loss))
                    loss.total_loss.backward()
                    self.optimizer.step()
                    self.update_meters(loss)
                    self.epoch_loss += loss.total_loss.detach().cpu().numpy() * img.size(0)

                    #if step == 3500:
                    #    break
                    # if step % 200 == 0:
                    #     print('Step=', step)
                # ['epoch_loss', 'test_loss', 'training_loss']
                self.epoch_loss = self.epoch_loss / len(self.train_loader.dataset)
                # self.valid_loss = self.test(use_validation=True, display=True)
                self.tp.record_data({'epoch_loss': self.epoch_loss})  # 'validation_loss': self.valid_loss})
                self.lr_scheduler.step({'loss': self.epoch_loss, 'epoch': self.epoch})  # , 'torch': self.valid_loss})
                if self.epoch % self.conf.se == 0:
                    print(timeSince(time_start), ': Trainer Summary Epoch=', self.epoch)
                    self.summary()
                self.epoch += 1
            print(timeSince(time_start), ': Trainer Summary Epoch=', self.epoch)
            self.summary(save_optim=True)  # for resume training
        except KeyboardInterrupt:
            save = input('Save Current Progress ? y for yes: ')
            if 'y' in save:
                print('Saving Progress...')
                self.save_progress(save_optim=True, display=True)
    def train_forward_net(self, img_names, imgs, bboxes, labels, indexs, gt_relations, scale):
        n = bboxes.shape[0]
        if n != 1:
            raise ValueError('Currently only batch size 1 is supported.')

        # Since batch size is one, convert variables to singular form
        bboxes = bboxes[0]
        gt_relations = gt_relations[0]
        _, _, H, W = imgs.shape  # Tensor type (C,H,W)

        feature = self.net.extractor(imgs)
        combined_features, rel_flip = get_combined_feature(feature, bboxes, use_spatial_feature=self.conf.use_spatial_feature)
        relation_scores = []
        for combined_feature in combined_features:
            relation_score = self.net(*combined_feature)
            relation_scores.append(relation_score)
        relation_scores = torch.stack(relation_scores, dim=1).squeeze(0)

        ##to see if need flip
        for n, flip in enumerate(rel_flip):
            if flip: #if flip
                if gt_relations[n] == 1:
                    gt_relations[n] = 2
                elif gt_relations[n] == 2:
                    gt_relations[n] = 1
        gt_relations = at.to_tensor(gt_relations).long().cuda()
        #self.rel_cm.add(relation_scores, gt_relations.data)
        relation_loss = nn.CrossEntropyLoss()(relation_scores, gt_relations) ##(onehot tensor, not onehot long tensor)
        return LossTuple(total_loss=relation_loss)

    # Meters
    def init_meters(self):
        self.rel_cm = ConfusionMeter(3)  # num of class(including background)
        self.meters = {k: AverageValueMeter() for k in LossTuple._fields}  # average loss

    def update_meters(self, losses):
        loss_d = {k: at.scalar(v) for k, v in losses._asdict().items()}
        for key, meter in self.meters.items():
            meter.add(loss_d[key])

    def reset_meters(self):
        for key, meter in self.meters.items():
            meter.reset()
        self.rel_cm.reset()

    def get_meter_data(self):
        return {k: v.value()[0] for k, v in self.meters.items()}
    def set_optimizer(self):
        """
        Set optimizer parameters
        """
        if self.conf.optim == 'SGD':
            self.optimizer = getattr(optim, 'SGD')(filter(lambda p: p.requires_grad, self.net.parameters()),
                                                   lr=self.conf.lr_init, momentum=0.9, nesterov=True,
                                                   weight_decay=self.conf.w_decay)  # default SGD
        else:
            self.optimizer = getattr(optim, self.conf.optim)(
                filter(lambda p: p.requires_grad, self.net.parameters()), lr=self.conf.lr_init,
                weight_decay=self.conf.w_decay)  # default SGD
        if self.conf.res:
            if self.tp.get_meta('optim') == self.conf.optim:
                if 'optim_state' in self.tp.meta_dict.keys():
                    self.optimizer.load_state_dict(self.tp.get_meta('optim_state'))
                    print('Optimizer Internal State Restored')
    def set_lr_scheduler(self, restore_dict=None):
        self.lr_scheduler = LearningRateScheduler(self.conf.lrs, self.optimizer, self.conf.lr_rates,
                                                  self.conf.lr_epochs, self.conf.lr_loss, self.conf.lr_init, None)
        if restore_dict is not None:
            self.lr_scheduler.step(restore_dict)
        pass

    def summary(self, save_optim=False):  # Do the tests
        """
        Record the training and testing loss/time/accuracy
        """
        # train_loss = self.test(use_training=True, display=True)
        # test_loss = self.test(display=True)

        test_map = self.eval_relation_predictor()['map']
        # valid_loss = self.test(use_validation=True) #Already record in training loop

        self.tp.record_data({'mAP': test_map}, display=True)
        self.tp.add_meta(
            {'saved_epoch': self.epoch, 'epoch_loss': self.epoch_loss})  # , 'validation_loss': self.valid_loss})
        self.save_progress(display=True, save_optim=save_optim)

    def save_progress(self, display=False, save_optim=False):
        """
        Save training weight/progress/meta data
        """
        self.net = self.net.to('cpu')
        self.tp.add_meta(
            {'net_weight': self.net.state_dict(), 'optim': self.conf.optim})
        if self.conf.save_opt or save_optim:
            print('Saving Optimizer Sate')
            self.tp.add_meta({'optim_state': self.optimizer.state_dict()})
        self.tp.save_progress(self.epoch)

        self.net = self.net.to(self.device)
        if display:
            print('Progress Saved, current epoch=', self.epoch)

    def restore_progress(self):
        """
        Restore training weight/progress/meta data
        Restore self.epoch,optimizer parameters
        """
        self.tp.restore_progress(self.conf.tps)

        self.net = self.net.to('cpu')
        self.net.load_state_dict(self.tp.get_meta('net_weight'))

        self.net = self.net.to(self.device)
        # restore all the meta data and variables
        self.epoch = self.tp.get_meta('saved_epoch')
        self.epoch_loss = self.tp.get_meta('epoch_loss')
        # self.valid_loss = self.tp.get_meta('validation_loss')
        print('Restore Progress,epoch=', self.epoch, ' epoch loss=', self.epoch_loss)
        self.set_optimizer()
        self.set_lr_scheduler(restore_dict={'epoch': self.epoch})
        self.epoch += 1
    def eval_relation_predictor(self):
        pred_relations, pred_relation_scores = [], []
        gt_relations = []
        for ii, (img_name, imgs_, bboxes_, _, _, gt_relations_, _, _) \
                in tqdm(enumerate(self.test_loader), total=len(self.test_loader)):  # gt_label is numpy (1,N物體)
            if bboxes_.size(1) < 2:
                continue
            imgs_, bboxes_ = imgs_.to(self.conf.dev), bboxes_.to(self.conf.dev)
            pred_relations_, pred_relation_scores_ = self.net.predict(imgs_, bboxes_[0]) ##numpy form and not one hot

            # print("predict shape {0}".format(pred_bboxes_.shape[0]))
            gt_relations.append(gt_relations_[0].numpy())
            pred_relations.append(pred_relations_)
            pred_relation_scores.append(pred_relation_scores_)
            assert gt_relations[-1].shape == pred_relations[-1].shape, "gt:{0}, pre:{1}".format(gt_relations[-1].shape,pred_relations[-1].shape)

        # print(len(pred_bboxes), len(gt_bboxes))
        accu_arr = np.zeros(shape=(NUM_OF_CLASS, 2), dtype=int) #0 right, 1 total
        for pred_relations_, gt_relations_ in zip(pred_relations, gt_relations):
            for i in range(len(pred_relations_)):
                if pred_relations_[i] == gt_relations_[i]:
                    accu_arr[gt_relations_[i], 0] += 1#加gound truth 那個
                accu_arr[gt_relations_[i], 1] += 1

        ap = np.empty(shape=NUM_OF_CLASS, dtype=float)
        for ii in range(NUM_OF_CLASS):
            if accu_arr[ii, 1] == 0: ##non has been tested
                ap[ii] = np.nan
            else:
                ap[ii] = float(accu_arr[ii,0] / accu_arr[ii,1])
        result = {'ap': ap, 'map': np.nanmean(ap)}
        print("ap: {0}".format(result['ap']))
        print('map: {0}'.format(result['map']))
        return result
Exemple #14
0
class Trainer:
    def __init__(self, args):
        self.conf = args
        self.check_cuda()
        if not os.path.isdir(global_config['progress_dir'] + self.conf.model):
            os.mkdir(global_config['progress_dir'] + self.conf.model)
        self.tp = TrainingProgress(
            global_config['progress_dir'] + self.conf.model + '/',
            self.conf.net_model + '-progress',
            data_key_list=['epoch_loss', 'test_loss', 'training_loss', 'mAP'])
        self.load_net()
        self.load_data(args)
        self.create_data_loader()
        self.prepare_training()  # Optimizer/Loss function

    def check_cuda(self):
        if torch.cuda.is_available():
            torch.cuda.set_device(self.conf.dev)  # default 0
            print("Use CUDA,device=", torch.cuda.current_device())
            self.device = 'cuda:' + str(self.conf.dev)
        else:
            self.device = 'cpu'
        at.gpu_dev = self.device

    def load_net(self):
        """
        Load network weight or initiate a new model
        """
        if_pretrained = not self.conf.res  # if not loading weight
        if self.conf.net_model in [
                'VGG_16', 'VGG_16_bn', 'VGG_19', 'VGG_19_bn'
        ]:
            self.net = FasterRCNN_VGG(
                version=self.conf.net_model,
                n_names=N_NAMES,
                n_colors=N_COLORS,
                freeze_idx=self.conf.freeze_idx,
                vgg_pre=if_pretrained,
                use_drop=self.conf.use_drop
            )  # contains rpn and head(classification)
        elif self.conf.net_model in [
                'ResNet_18', 'ResNet_34', 'ResNet_50', 'ResNet_101',
                'ResNet_152'
        ]:  #if use resnet
            self.net = FasterRCNN_ResNet(
                version=self.conf.net_model,
                n_names=N_NAMES,
                n_colors=N_COLORS,
                freeze_idx=self.conf.freeze_idx,
                pretrained=if_pretrained,
                use_drop=self.conf.use_drop
            )  #contains rpn and head(classification)
        else:
            raise ValueError('The net model not exist')
        self.net = self.net.to(self.device)
        # Load Target Generators for Training
        self.anchor_target = AnchorTargetCreator()
        self.proposal_target = ProposalTargetCreator()
        print(self.conf.net_model + ' is used')

    def load_data(self, args):  # batch_files, data_conf, cuda_dev):
        """
        Load Training/Testing data
        """

        pp_train = Preprocess(normalize_means=[0.485, 0.456, 0.406],
                              normalize_stds=[0.229, 0.224, 0.225],
                              p_hflip=0.5)
        pp_test = Preprocess(normalize_means=[0.485, 0.456, 0.406],
                             normalize_stds=[0.229, 0.224, 0.225],
                             p_hflip=0,
                             bbox_resize=False)
        self.train_set = RD_dataset(data_dir=global_config['data_dir'],
                                    split='trainval',
                                    preprocess=pp_train)
        self.test_set = RD_dataset(data_dir=global_config['data_dir'],
                                   split='test',
                                   preprocess=pp_test)
        print('Data loader: Training set: ', len(self.train_set),
              ' Testing set: ', len(self.test_set))

    def create_data_loader(self):

        self.train_loader = DataLoader(dataset=self.train_set,
                                       batch_size=self.conf.tr_bat,
                                       shuffle=True,
                                       pin_memory=True,
                                       num_workers=8)
        self.test_loader = DataLoader(dataset=self.test_set,
                                      batch_size=self.conf.ts_bat,
                                      shuffle=False,
                                      pin_memory=True,
                                      num_workers=12)
        # self.valid_loader = DataLoader(dataset=self.val_set, batch_size=self.conf.ts_bat, shuffle=False,
        #                                pin_memory=True, num_workers=8)

    def prepare_training(self):
        """
        Load/Create Meta data
        Load/Restore Current Progress
        Set training parameters, Init optimizer
        """
        self.tp.add_meta({'conf': self.conf, 'global_conf': global_config})
        if self.conf.res:
            self.restore_progress()  # set optimizer and lr_scheduler
        else:
            self.epoch = 1
            self.set_optimizer()
            self.set_lr_scheduler()
        self.init_meters()
        # self.set_loss_func()

    def train(self):
        """
        while loss<target loss
            forward
            backward
            record loss
            if loop_n % RECORD_N:
                summary & save_progress
        """
        time_start = time.time()
        self.net.train()
        try:
            while self.epoch < self.conf.max_epoch:
                self.epoch_loss = 0
                self.reset_meters()
                for step, (
                        img_name, img, index, bbox, name, shape, color, _,
                        scale
                ) in tqdm(
                        enumerate(self.train_loader)
                ):  #bbox shapeis [1, 2, 4] #label is [1,n] label_attr is[1,m,n]
                    #a = input("label is {2}, label_shape is {0}, label_color is {1}".format(label_shape.numpy().shape, label_color.numpy().shape, label.numpy().shape))
                    self.optimizer.zero_grad()
                    img, bbox, label, scale = img.to(self.conf.dev), bbox.to(
                        self.conf.dev), name.to(
                            self.conf.dev), at.scalar(scale)
                    loss = self.train_forward_net(img_name, img, index, bbox,
                                                  name, shape, color, _, scale)
                    loss.total_loss.backward()
                    self.optimizer.step()
                    self.update_meters(loss)
                    self.epoch_loss += loss.total_loss.detach().cpu().numpy(
                    ) * img.size(0)

                    #if step == 5: break #for test

                    # if step % 200 == 0:
                    #     print('Step=', step)
                # ['epoch_loss', 'test_loss', 'training_loss']
                if self.epoch == 1:  ##create record file
                    with open(
                            global_config['progress_dir'] +
                            self.conf.net_model + '.txt', 'w') as f:
                        f.write('lr_rates: {0}, lr interval: {1}, use_drop: {2}\n'
                                'freeze_idx: {3}, opt: {4}, w_decay: {5}\n'\
                                .format(self.conf.lr_rates,self.conf.lr_epochs, self.conf.use_drop,
                                        self.conf.freeze_idx, self.conf.optim, self.conf.w_decay))
                self.epoch_loss = self.epoch_loss / len(
                    self.train_loader.dataset)
                # self.valid_loss = self.test(use_validation=True, display=True)
                self.tp.record_data({'epoch_loss': self.epoch_loss
                                     })  # 'validation_loss': self.valid_loss})
                self.lr_scheduler.step({
                    'loss': self.epoch_loss,
                    'epoch': self.epoch
                })  # , 'torch': self.valid_loss})
                if self.epoch % self.conf.se == 0:
                    print(timeSince(time_start), ': Trainer Summary Epoch=',
                          self.epoch)
                    self.summary()
                self.epoch += 1
            print(timeSince(time_start), ': Trainer Summary Epoch=',
                  self.epoch)
            self.summary(save_optim=True)  # for resume training
        except KeyboardInterrupt:
            save = input('Save Current Progress ? y for yes: ')
            if 'y' in save:
                print('Saving Progress...')
                self.save_progress(save_optim=True, display=True)

    def train_forward_net(self, img_names, imgs, indexs, bboxes, names, shapes,
                          colors, _, scale):

        n = bboxes.shape[0]
        if n != 1:
            raise ValueError('Currently only batch size 1 is supported.')

        _, _, H, W = imgs.shape
        img_size = (H, W)
        #print("img_name {0}".format(img_name))
        #vis_image(imgs.cpu()[0])
        #plt.show()
        features = self.net.extractor(imgs)  ##vgg16 (1,512,37,50)
        #print(features.shape)
        rpn_locs, rpn_scores, rois, _, anchor = self.net.rpn(
            features, img_size, scale)
        # Since batch size is one, convert variables to singular form
        bbox = bboxes[0]
        name = names[0]
        color = colors[0]

        rpn_score = rpn_scores[0]
        rpn_loc = rpn_locs[0]
        roi = rois

        # Sample RoIs(For the training of head(classification network)) and forward

        sample_roi, gt_roi_loc, gt_roi_name, gt_roi_color = self.proposal_target(
            roi, at.to_np(bbox), at.to_np(name), at.to_np(color))
        # NOTE it's all zero because now it only support for batch=1 now
        sample_roi_index = torch.zeros(len(sample_roi))
        roi_name_loc, roi_name_score = self.net.head_name(
            features, sample_roi, sample_roi_index)

        _, roi_color_score = self.net.head_color(features, sample_roi,
                                                 sample_roi_index)

        # ------------------ RPN losses -------------------#
        # Target for RPN => Anchor Target
        gt_rpn_loc, gt_rpn_label = self.anchor_target(at.to_np(bbox), anchor,
                                                      img_size)

        gt_rpn_label = at.to_tensor(gt_rpn_label).long()
        gt_rpn_loc = at.to_tensor(gt_rpn_loc)
        rpn_loc_loss = _fast_rcnn_loc_loss(rpn_loc, gt_rpn_loc,
                                           gt_rpn_label.data,
                                           self.conf.rpn_sigma)

        # NOTE: default value of ignore_index is -100 ...
        rpn_cls_loss = torch.nn.functional.cross_entropy(rpn_score,
                                                         gt_rpn_label.cuda(),
                                                         ignore_index=-1)
        _gt_rpn_label = gt_rpn_label[gt_rpn_label > -1]
        _rpn_score = at.to_np(rpn_score)[at.to_np(gt_rpn_label) > -1]
        self.rpn_cm.add(at.to_tensor(_rpn_score, False),
                        _gt_rpn_label.data.long())

        # ------------------ ROI losses (fast rcnn loss) -------------------#
        ##name##
        n_sample = roi_name_loc.shape[0]
        roi_name_loc = roi_name_loc.view(n_sample, -1, 4)
        name_roi_loc = roi_name_loc[torch.arange(0, n_sample).long().cuda(), \
                              at.to_tensor(gt_roi_name).long()] ## not one-hot
        gt_roi_name = at.to_tensor(gt_roi_name).long()
        gt_roi_loc = at.to_tensor(gt_roi_loc)

        roi_loc_loss = _fast_rcnn_loc_loss(name_roi_loc.contiguous(),
                                           gt_roi_loc, gt_roi_name.data,
                                           self.conf.roi_sigma)
        roi_name_loss = nn.CrossEntropyLoss()(roi_name_score,
                                              gt_roi_name.cuda())

        self.roi_name_cm.add(at.to_tensor(roi_name_score, False),
                             gt_roi_name.data.long())

        ##color##
        gt_roi_color = at.to_tensor(gt_roi_color).long()
        roi_color_loss = nn.CrossEntropyLoss()(roi_color_score,
                                               gt_roi_color.cuda())

        self.roi_color_cm.add(at.to_tensor(roi_color_score, False),
                              gt_roi_color.data.long())

        ##sum up all loss##
        losses = [
            rpn_loc_loss, rpn_cls_loss, roi_loc_loss, roi_name_loss,
            roi_color_loss
        ]
        losses = losses + [sum(losses)]

        return LossTuple(*losses)

    # Meters
    def init_meters(self):
        self.rpn_cm = ConfusionMeter(2)  #forground and background
        self.roi_name_cm = ConfusionMeter(
            N_NAMES + 1)  #num of class(including background)
        self.roi_color_cm = ConfusionMeter(
            N_COLORS + 1)  # num of class(including background)
        self.meters = {k: AverageValueMeter()
                       for k in LossTuple._fields}  # average loss

    def update_meters(self, losses):
        loss_d = {k: at.scalar(v) for k, v in losses._asdict().items()}
        for key, meter in self.meters.items():
            meter.add(loss_d[key])

    def reset_meters(self):
        for key, meter in self.meters.items():
            meter.reset()
        self.rpn_cm.reset()
        self.roi_name_cm.reset()
        self.roi_color_cm.reset()

    def get_meter_data(self):
        return {k: v.value()[0] for k, v in self.meters.items()}

    def set_optimizer(self):
        """
        Set optimizer parameters
        """

        if self.conf.optim == 'SGD':
            self.optimizer = getattr(optim, 'SGD')(
                filter(lambda p: p.requires_grad, self.net.parameters()),
                lr=self.conf.lr_init,
                momentum=0.9,
                nesterov=True,
                weight_decay=self.conf.w_decay)  # default SGD
        else:
            self.optimizer = getattr(optim, self.conf.optim)(
                filter(lambda p: p.requires_grad, self.net.parameters()),
                lr=self.conf.lr_init,
                weight_decay=self.conf.w_decay)  # default SGD
        if self.conf.res:
            if self.tp.get_meta('optim') == self.conf.optim:
                if 'optim_state' in self.tp.meta_dict.keys():
                    self.optimizer.load_state_dict(
                        self.tp.get_meta('optim_state'))
                    print('Optimizer Internal State Restored')

    def set_lr_scheduler(self, restore_dict=None):
        self.lr_scheduler = LearningRateScheduler(
            self.conf.lrs, self.optimizer, self.conf.lr_rates,
            self.conf.lr_epochs, self.conf.lr_loss, self.conf.lr_init, None)
        if restore_dict is not None:
            self.lr_scheduler.step(restore_dict)
        pass

    def summary(self, save_optim=False):  # Do the tests
        """
        Record the training and testing loss/time/accuracy
        """
        # train_loss = self.test(use_training=True, display=True)
        # test_loss = self.test(display=True)

        test_map = self.eval_faster_rcnn()
        # valid_loss = self.test(use_validation=True) #Already record in training loop

        self.tp.record_data(
            {
                'mAP_name': test_map['map_name'],
                'mAP_color': test_map['map_color']
            },
            display=True)
        self.tp.add_meta({
            'saved_epoch': self.epoch,
            'epoch_loss': self.epoch_loss
        })  # , 'validation_loss': self.valid_loss})
        self.save_progress(display=True, save_optim=save_optim)

    def save_progress(self, display=False, save_optim=False):
        """
        Save training weight/progress/meta data
        """
        self.net = self.net.to('cpu')
        self.tp.add_meta({
            'net_weight': self.net.state_dict(),
            'optim': self.conf.optim
        })
        if self.conf.save_opt or save_optim:
            print('Saving Optimizer Sate')
            self.tp.add_meta({'optim_state': self.optimizer.state_dict()})
        self.tp.save_progress(self.epoch)

        self.net = self.net.to(self.device)
        torch.save(
            self.net.state_dict(),
            '{0}{1}_{2}.pkl'.format(global_config['progress_dir'],
                                    self.conf.net_model, self.epoch))
        if display:
            print('Progress Saved, current epoch=', self.epoch)
        self.net.state_dict()

    def restore_progress(self):
        """
        Restore training weight/progress/meta data
        Restore self.epoch,optimizer parameters
        """
        self.tp.restore_progress(self.conf.tps)

        self.net = self.net.to('cpu')
        self.net.load_state_dict(self.tp.get_meta('net_weight'))

        self.net = self.net.to(self.device)
        # restore all the meta data and variables
        self.epoch = self.tp.get_meta('saved_epoch')
        self.epoch_loss = self.tp.get_meta('epoch_loss')
        # self.valid_loss = self.tp.get_meta('validation_loss')
        print('Restore Progress,epoch=', self.epoch, ' epoch loss=',
              self.epoch_loss)
        self.set_optimizer()
        self.set_lr_scheduler(restore_dict={'epoch': self.epoch})
        self.epoch += 1

    def eval_faster_rcnn(self, visualize=False):
        # list below store all img bboxes, the first dim is img index, second is which bbox, third is the label or coordinate
        pred_bboxes, pred_names, pred_name_scores, pred_colors, pred_color_scores  = [], [], [], [], []
        gt_bboxes, gt_names, gt_colors = [], [], []
        img_count = 0
        imgs = []
        for ii, (img_names, img, indexs, gt_bboxes_, gt_names_, gt_shapes_, gt_colors_, _, scale) \
                in tqdm(enumerate(self.test_loader), total=len(self.test_loader)): #gt_label is numpy (1,N物體)

            pred_bboxes_, pred_names_, pred_name_scores_, pred_colors_, pred_color_scores_ = \
                self.net.predict(img, scale, visualize=True)
            #print("predict shape {0}".format(pred_bboxes_.shape[0]))
            gt_bboxes.append(gt_bboxes_[0].numpy())
            gt_names.append(gt_names_[0].numpy())
            gt_colors.append(gt_colors_[0].numpy())
            """
            print("shape of predictions")
            print("bbox{0}".format(pred_bboxes_.shape))
            print("label{0}".format(pred_labels_.shape))
            print("label_score{0}".format(pred_scores_.shape))
            print("shape{0}".format(pred_labels_shape_.shape))
            print("shape_score{0}".format(pred_scores_shape_.shape))
            print("color{0}".format(pred_labels_color_.shape))
            print("color_score{0}".format(pred_scores_color_.shape))
            """

            pred_bboxes.append(pred_bboxes_)
            pred_names.append(pred_names_)
            pred_name_scores.append(pred_name_scores_)
            pred_colors.append(pred_colors_)
            pred_color_scores.append(pred_color_scores_)
            img_count += 1
            imgs.append(img)
            ####visualize the label####
            if visualize and pred_names[-1].shape[0] > 0:
                gt_arg_list = [gt_bboxes[-1], gt_names[-1], gt_colors[-1]]
                pred_arg_list = [
                    pred_bboxes[-1], pred_names[-1], pred_name_scores[-1],
                    pred_colors[-1], pred_color_scores[-1]
                ]
                vis_bbox(img[0], *gt_arg_list, *pred_arg_list)
                plt.show()
            ###########################
            #if ii ==5: break ##for test
        ##result is a dict
        #print(len(pred_bboxes), len(gt_bboxes))
        result = eval_detection_voc(
            pred_bboxes,
            pred_names,
            pred_name_scores,
            pred_colors,
            pred_color_scores,
            gt_bboxes,
            gt_names,
            gt_colors,
            use_07_metric=False,
            show_prec=self.conf.show_prec,
            show_rec=self.conf.show_rec)  #not one-hot type

        print('map_name: {0}, map_color: {1}'.format(result['map_name'],
                                                     result['map_color']))

        ##write info to file
        with open(global_config['progress_dir'] + self.conf.net_model + '.txt',
                  'a') as f:
            f.write('epoch= {0}, epoch_loss= {1}\n'.format(
                self.epoch, self.epoch_loss))
            f.write('map_name: {0}, map_color: {1}\n'.format(
                result['map_name'], result['map_color']))
        return result
Exemple #15
0
class FasterRCNNTrainer(nn.Module):
    """
    Wrapper for conveniently training. returns losses

    The losses include:

    * :obj:`rpn_loc_loss`: The localization loss for \
        Region Proposal Network (RPN).
    * :obj:`rpn_cls_loss`: The classification loss for RPN.
    * :obj:`roi_loc_loss`: The localization loss for the head module.
    * :obj:`roi_cls_loss`: The classification loss for the head module.
    * :obj:`total_loss`: The sum of 4 loss above.

    Args:
        faster_rcnn (model.FasterRCNN):
            A Faster R-CNN model that is going to be trained.
    """

    def __init__(self, faster_rcnn, n_fg_class=20):
        super(FasterRCNNTrainer, self).__init__()

        self.faster_rcnn = faster_rcnn
        self.n_fg_class = n_fg_class
        self.rpn_sigma = opt.rpn_sigma
        self.roi_sigma = opt.roi_sigma

        # target creator create gt_bbox gt_label etc as training targets. 
        self.anchor_target_creator = AnchorTargetCreator()
        self.proposal_target_creator = ProposalTargetCreator()

        self.loc_normalize_mean = faster_rcnn.loc_normalize_mean
        self.loc_normalize_std = faster_rcnn.loc_normalize_std

        self.optimizer = self.faster_rcnn.get_optimizer()

        # indicators for training status
        self.rpn_cm = ConfusionMeter(2)
        self.roi_cm = ConfusionMeter(n_fg_class+1)
        self.meters = {k: AverageValueMeter() for k in LossTuple._fields}  # average loss


    def forward(self, imgs, bboxes, labels, scale):
        """
        Forward Faster R-CNN and calculate losses.

        Here are notations used.

        * :math:`N` is the batch size.
        * :math:`R` is the number of bounding boxes per image.

        Currently, only :math:`N=1` is supported.

        Args:
            imgs (~torch.autograd.Variable): A variable with a batch of images.
            bboxes (~torch.autograd.Variable): A batch of bounding boxes.
                Its shape is :math:`(N, R, 4)`.
            labels (~torch.autograd..Variable): A batch of labels.
                Its shape is :math:`(N, R)`. The background is excluded from
                the definition, which means that the range of the value
                is :math:`[0, L - 1]`. :math:`L` is the number of foreground
                classes.
            scale (float): Amount of scaling applied to
                the raw image during preprocessing.

        Returns:
            namedtuple of 5 losses
        """
        
        n = bboxes.shape[0]
        if n != 1:
            raise ValueError('Currently only batch size 1 is supported.')

        _, _, H, W = imgs.shape
        img_size = (H, W)

        features = self.faster_rcnn.extractor(imgs)

        rpn_locs, rpn_scores, rois, roi_indices, anchor = \
            self.faster_rcnn.rpn(features, img_size, scale)

        # Since batch size is one, convert variables to singular form
        #print(bboxes)
        
        bbox = bboxes[0]
        label = labels[0]
        rpn_score = rpn_scores[0]
        rpn_loc = rpn_locs[0]
        roi = rois

        # Sample RoIs and forward
        # it's fine to break the computation graph of rois, 
        # consider them as constant input
        sample_roi, gt_roi_loc, gt_roi_label = self.proposal_target_creator(
            roi,
            at.tonumpy(bbox),
            at.tonumpy(label),
            self.loc_normalize_mean,
            self.loc_normalize_std)
        #print(gt_roi_label)
        #print('got region proposals')
        # NOTE it's all zero because now it only support for batch=1 now
        sample_roi_index = t.zeros(len(sample_roi))
        roi_cls_loc, roi_score = self.faster_rcnn.head(
            features,
            sample_roi,
            sample_roi_index)

        # ------------------ RPN losses -------------------#
        n_bbox = bbox.shape
        if len(n_bbox) > 0:
            n_bbox = n_bbox[0]
        if n_bbox > 0:
            gt_rpn_loc, gt_rpn_label = self.anchor_target_creator(
                at.tonumpy(bbox),
                anchor,
                img_size)
            #print(gt_rpn_label.shape)
            #print(gt_rpn_label)
            #print(anchor.shape)
            #print(sample_roi.shape)
            #print('got anchor targets')
            gt_rpn_label = at.tovariable(gt_rpn_label).long()
            gt_rpn_loc = at.tovariable(gt_rpn_loc)
            rpn_loc_loss = _fast_rcnn_loc_loss(
                rpn_loc,
                gt_rpn_loc,
                gt_rpn_label.data,
                self.rpn_sigma)
            #print(rpn_loc_loss)
        else: #if no bboxes, should have no rpn loc loss
            rpn_loc_loss = t.tensor(0.)
            if opt.use_cuda:
                rpn_loc_loss = rpn_loc_loss.cuda()
        #print('got rpn loc loss')
        
        # if no bboxes, all region labels are 0 (background)
  
        if n_bbox == 0:
            gt_rpn_label = t.tensor([0 for i in range(anchor.shape[0])])
        # NOTE: default value of ignore_index is -100 ...
        fg_bg_count = np.unique(gt_rpn_label.detach().cpu(), return_counts=True)[1][1:]
        if opt.reduce_bg_weight:
            # Reweight foreground / background for the case we couldn't sample identical numbers
            rpn_class_weights = 1.0 / fg_bg_count
            rpn_class_weights = t.FloatTensor(rpn_class_weights / np.sum(rpn_class_weights) * 2)
        else:
            rpn_class_weights = None
        if opt.use_cuda:
            rpn_cls_loss = F.cross_entropy(rpn_score, gt_rpn_label.cuda(), ignore_index=-1,
                                            weight=rpn_class_weights.cuda() if rpn_class_weights is not None else None)
        else:
            rpn_cls_loss = F.cross_entropy(rpn_score, gt_rpn_label, ignore_index=-1, weight=rpn_class_weights)
        _gt_rpn_label = gt_rpn_label[gt_rpn_label > -1]
        _rpn_score = at.tonumpy(rpn_score)[at.tonumpy(gt_rpn_label) > -1]
        self.rpn_cm.add(at.totensor(_rpn_score, False), _gt_rpn_label.data.long())
        #print('got rpn class loss')

        # ------------------ ROI losses (fast rcnn loss) -------------------#
        n_sample = roi_cls_loc.shape[0]
        roi_cls_loc = roi_cls_loc.view(n_sample, -1, 4)
        #print(n_sample, gt_roi_label.shape, sample_roi.shape)
        if opt.use_cuda:
            roi_loc = roi_cls_loc[t.arange(0, n_sample).long().cuda(), at.totensor(gt_roi_label).long()]
        else:
            roi_loc = roi_cls_loc[t.arange(0, n_sample).long(), at.totensor(gt_roi_label).long()]
        gt_roi_label = at.tovariable(gt_roi_label).long()
        gt_roi_loc = at.tovariable(gt_roi_loc)

        if n_bbox > 0:
            roi_loc_loss = _fast_rcnn_loc_loss(
                roi_loc.contiguous(),
                gt_roi_loc,
                gt_roi_label.data,
                self.roi_sigma)
        else: #no roi loc loss if no gt bboxes
            roi_loc_loss = t.tensor(0.)
            if opt.use_cuda:
                roi_loc_loss = roi_loc_loss.cuda()
        #print('got roi loc loss')

        if opt.reduce_bg_weight:
            bg_weight = 1.0 / gt_roi_label.size()[0]
            class_weights = t.FloatTensor(np.hstack([bg_weight, np.ones((self.n_fg_class,))]))
        else:
            class_weights = None

        if opt.use_cuda:
            roi_cls_loss = nn.CrossEntropyLoss(weight=class_weights.cuda() if 
                            class_weights is not None else None)(roi_score, gt_roi_label.cuda())
        else:
            roi_cls_loss = nn.CrossEntropyLoss(weight=class_weights)(roi_score, gt_roi_label)

        self.roi_cm.add(at.totensor(roi_score, False), gt_roi_label.data.long())
        #print('got roi class loss')
        
        losses = [rpn_loc_loss, rpn_cls_loss, roi_loc_loss, roi_cls_loss]
        #print(losses)
        sum_losses = sum(losses)
        #print(sum_losses.type)
        losses = losses + [sum_losses]

        return LossTuple(*losses)

    # ...def forward(self, imgs, bboxes, labels, scale)


    def train_step(self, imgs, bboxes, labels, scale):
    
        self.optimizer.zero_grad()
        losses = self.forward(imgs, bboxes, labels, scale)
        losses.total_loss.backward()
        self.optimizer.step()
        self.update_meters(losses)
        return losses


    def save(self, save_optimizer=False, save_path=None, **kwargs):
        """
        Serialize models include optimizer and other info
        return path where the model-file is stored.

        Args:
            save_optimizer (bool): whether save optimizer.state_dict().
            save_path (string): where to save model, if it's None, save_path
                is generate using time str and info from kwargs.
        
        Returns:
            save_path(str): the path to save models.
        """

        save_dict = dict()

        save_dict['model'] = self.faster_rcnn.state_dict()
        save_dict['config'] = opt._state_dict()
        save_dict['other_info'] = kwargs
        for k_, v_ in kwargs.items():
            save_dict[k_] = v_

        if save_optimizer:
            save_dict['optimizer'] = self.optimizer.state_dict()

        if save_path is None:
            timestr = time.strftime('%m%d%H%M')
            save_path = 'checkpoints/fasterrcnn_%s' % timestr
            if 'best_map' in kwargs.keys():
                save_path += '_%s' % kwargs['best_map']

        t.save(save_dict, save_path)
        return save_path


    def load(self, state_dict, load_optimizer=True, parse_opt=False, ):
        if 'model' in state_dict:
            self.faster_rcnn.load_state_dict(state_dict['model'])
        else:  # legacy way, for backward compatibility
            self.faster_rcnn.load_state_dict(state_dict)
            return self
        if parse_opt:
            opt._parse(state_dict['config'])
        if 'optimizer' in state_dict and load_optimizer:
            self.optimizer.load_state_dict(state_dict['optimizer'])
        return state_dict


    def update_meters(self, losses):

        loss_d = {k: at.scalar(v) for k, v in losses._asdict().items()}
        for key, meter in self.meters.items():
            meter.add(loss_d[key].detach().cpu().numpy())


    def reset_meters(self):

        for key, meter in self.meters.items():
            meter.reset()
        self.roi_cm.reset()
        self.rpn_cm.reset()


    def get_meter_data(self):

        return {k: v.value()[0] for k, v in self.meters.items()}
Exemple #16
0
class BaseTrainer(nn.Module):
    """wrapper for conveniently training. return losses

    The losses include:

    * :obj:`rpn_loc_loss`: The localization loss for \
        Region Proposal Network (RPN).
    * :obj:`rpn_cls_loss`: The classification loss for RPN.
    * :obj:`roi_loc_loss`: The localization loss for the head module.
    * :obj:`roi_cls_loss`: The classification loss for the head module.
    * :obj:`total_loss`: The sum of 4 loss above.

    Args:
        faster_rcnn (model.FasterRCNN):
            A Faster R-CNN model that is going to be trained.
    """
    def __init__(self, faster_rcnn):
        super().__init__()

        self.faster_rcnn = faster_rcnn
        self.rpn_sigma = opt.rpn_sigma
        self.roi_sigma = opt.roi_sigma

        # target creator create gt_bbox gt_label etc as training targets.
        self.anchor_target_creator = AnchorTargetCreator()
        self.proposal_target_creator = ProposalTargetCreator()

        self.loc_normalize_mean = faster_rcnn.loc_normalize_mean
        self.loc_normalize_std = faster_rcnn.loc_normalize_std

        self.optimizer = self.faster_rcnn.get_optimizer()
        # visdom wrapper
        self.vis = Visualizer(env=opt.env)

        # indicators for training status
        self.rpn_cm = ConfusionMeter(2)
        self.roi_cm = ConfusionMeter(2)
        self.meters = {k: AverageValueMeter()
                       for k in LossTuple._fields}  # average loss

    def forward(self, imgs):
        #Performs a forward pass and return the loss

        return LossTuple(*losses)

    def train_step(self, imgs, bboxes, labels, scale):
        self.optimizer.zero_grad()
        losses = self.forward(imgs, bboxes, labels, scale)
        losses.total_loss.backward()
        #print(losses)
        self.optimizer.step()
        self.update_meters(losses)
        return losses

    def save(self,
             name,
             epoch,
             ap=None,
             save_optimizer=True,
             save_path=None,
             best_map=True):
        """serialize models include optimizer and other info
        return path where the model-file is stored.

        Args:
            save_optimizer (bool): whether save optimizer.state_dict().
            save_path (string): where to save model, if it's None, save_path
                is generate using time str and info from kwargs.
        
        Returns:
            save_path(str): the path to save models.
        """
        save_dict = dict()

        save_dict['model'] = self.faster_rcnn.state_dict()
        save_dict['config'] = opt._state_dict()
        save_dict['epoch'] = epoch
        save_dict['map'] = ap
        save_dict['vis_info'] = self.vis.state_dict()

        if save_optimizer:
            save_dict['optimizer'] = self.optimizer.state_dict()

        if save_path is None:
            timestr = time.strftime('%m%d%H%M')
            save_path = 'saved_models/fasterrcnn_%s.pth' % timestr

        t.save(save_dict, save_path)

        if best_map:
            shutil.copyfile(save_path, 'model_best.pth')

        self.vis.save([self.vis.env])
        return save_path

    def load(
        self,
        path,
        load_optimizer=True,
        parse_opt=False,
    ):
        state_dict = t.load(path)
        if 'model' in state_dict:
            self.faster_rcnn.load_state_dict(state_dict['model'])
        else:  # legacy way, for backward compatibility
            self.faster_rcnn.load_state_dict(state_dict)
            return self
        if parse_opt:
            opt._parse(state_dict['config'])
        if 'optimizer' in state_dict and load_optimizer:
            self.optimizer.load_state_dict(state_dict['optimizer'])
        return self

    def update_meters(self, losses):
        #loss_d = {k: at.scalar(v) for k, v in losses._asdict().items()}
        loss_d = {k: v.item() for k, v in losses._asdict().items()}
        for key, meter in self.meters.items():
            meter.add(loss_d[key])

    def reset_meters(self):
        for key, meter in self.meters.items():
            meter.reset()
        self.roi_cm.reset()
        self.rpn_cm.reset()

    def get_meter_data(self):
        return {k: v.value()[0] for k, v in self.meters.items()}
Exemple #17
0
class fasterrcnn_train(nn.Module):
    def __init__(self,faster_rcnn):
        super(fasterrcnn_train, self).__init__()

        self.faster_rcnn = faster_rcnn
        self.rpn_sigma = opt.rpn_sigma
        self.roi_sigma = opt.roi_sigma

        # target creator create gt_bbox gt_label etc as training targets.
        self.anchortarget = gtor.Anchortarget_generator()
        self.propasaltarget = gtor.ProposalTargetCreator()

        self.loc_normalize_mean = faster_rcnn.loc_normalize_mean
        self.loc_normalize_std = faster_rcnn.loc_normalize_std

        self.optimizer = self.faster_rcnn.get_optimizer()
        # visdom wrapper
        self.vis = Visualizer(env=opt.env)

        # indicators for training status
        self.rpn_cm = ConfusionMeter(2)
        self.roi_cm = ConfusionMeter(21)
        self.meters = {k: AverageValueMeter() for k in LossTuple._fields}  # average loss

    def forward(self,imgs,bbox,label,n_pre_nms,n_post_nms,scale):

        _,_,H,W = imgs.shape

        img_size = (H,W)

        feature = self.faster_rcnn.extractor(imgs)

        rpn_loc,rpn_cls,roi,roi_indices,anchor = self.faster_rcnn.rpn(feature,img_size,n_pre_nms,n_post_nms,scale)

        gt_rpn_loc, gt_rpn_label = self.anchortarget(at.tonumpy(bbox), anchor, img_size)

        sample_roi, gt_roi_loc, gt_roi_label = self.propasaltarget(roi,at.tonumpy(bbox),at.tonumpy(label))

        sample_roi_index = t.zeros(len(sample_roi))#batchsize =1,全为0

        roi_cls, roi_loc = self.faster_rcnn.roihead(feature,sample_roi,sample_roi_index)

        # rpn loss
        gt_rpn_label = at.totensor(gt_rpn_label).long()
        gt_rpn_loc = at.totensor(gt_rpn_loc)
        rpn_loc_loss = _fast_rcnn_loc_loss(rpn_loc[0, :, :], gt_rpn_loc, gt_rpn_label.data, self.rpn_sigma)
        rpn_cls_loss = F.cross_entropy(rpn_cls[0, :, :], gt_rpn_label.cuda(), ignore_index=-1)
        _gt_rpn_label = gt_rpn_label[gt_rpn_label > -1]
        _rpn_score = at.tonumpy(rpn_cls[0,:,:])[at.tonumpy(gt_rpn_label) > -1]
        self.rpn_cm.add(at.totensor(_rpn_score, False), _gt_rpn_label.data.long())

        #roiloss
        n_sample = roi_loc.shape[0]
        roi_loc = roi_loc.view(n_sample, -1, 4)
        roi_loc = roi_loc[t.arange(0, n_sample).long().cuda(), at.totensor(gt_roi_label).long()]
        gt_roi_label = at.totensor(gt_roi_label).long()
        gt_roi_loc = at.totensor(gt_roi_loc)
        roi_loc_loss = _fast_rcnn_loc_loss(roi_loc.contiguous(), gt_roi_loc, gt_roi_label.data, self.roi_sigma)
        roi_cls_loss = F.cross_entropy(roi_cls, gt_roi_label.cuda())

        self.roi_cm.add(at.totensor(roi_cls, False), gt_roi_label.data.long())


        losses = [rpn_loc_loss, rpn_cls_loss, roi_loc_loss, roi_cls_loss]
        losses = losses + [sum(losses)]

        return LossTuple(*losses)

    def train_step(self, imgs, bboxes, labels,n_pre_nms,n_post_nms, scale):
        self.optimizer.zero_grad()
        losses = self.forward(imgs, bboxes, labels,n_pre_nms,n_post_nms,scale)
        losses.total_loss.backward()
        self.optimizer.step()
        self.update_meters(losses)
        return losses


    def save(self, save_optimizer=False, save_path=None, **kwargs):
        """serialize models include optimizer and other info
        return path where the model-file is stored.

        Args:
            save_optimizer (bool): whether save optimizer.state_dict().
            save_path (string): where to save model, if it's None, save_path
                is generate using time str and info from kwargs.

        Returns:
            save_path(str): the path to save models.
        """
        save_dict = dict()

        save_dict['model'] = self.faster_rcnn.state_dict()
        save_dict['config'] = opt._state_dict()
        save_dict['other_info'] = kwargs
        save_dict['vis_info'] = self.vis.state_dict()

        if save_optimizer:
            save_dict['optimizer'] = self.optimizer.state_dict()

        if save_path is None:
            timestr = time.strftime('%m%d%H%M')
            save_path = 'checkpoints/fasterrcnn_%s' % timestr
            for k_, v_ in kwargs.items():
                save_path += '_%s' % v_

        save_dir = os.path.dirname(save_path)
        if not os.path.exists(save_dir):
            os.makedirs(save_dir)

        t.save(save_dict, save_path)
        self.vis.save([self.vis.env])
        return save_path

    def load(self, path, load_optimizer=True, parse_opt=False, ):
        state_dict = t.load(path)
        if 'model' in state_dict:
            self.faster_rcnn.load_state_dict(state_dict['model'])
        else:  # legacy way, for backward compatibility
            self.faster_rcnn.load_state_dict(state_dict)
            return self
        if parse_opt:
            opt._parse(state_dict['config'])
        if 'optimizer' in state_dict and load_optimizer:
            self.optimizer.load_state_dict(state_dict['optimizer'])
        return self

    def update_meters(self, losses):
        loss_d = {k: at.scalar(v) for k, v in losses._asdict().items()}
        for key, meter in self.meters.items():
            meter.add(loss_d[key])

    def reset_meters(self):
        for key, meter in self.meters.items():
            meter.reset()
        self.roi_cm.reset()
        self.rpn_cm.reset()

    def get_meter_data(self):
        return {k: v.value()[0] for k, v in self.meters.items()}
Exemple #18
0
class FasterRCNNTrainer(nn.Module):
    def __init__(self, faster_rcnn):
        super(FasterRCNNTrainer, self).__init__()

        self.faster_rcnn = faster_rcnn
        self.rpn_sigma = opt.rpn_sigma
        self.roi_sigma = opt.roi_sigma

        # target creator create gt_bbox gt_label etc as training targets.
        self.anchor_target_creator = AnchorTargetCreator()
        self.proposal_target_creator = ProposalTargetCreator()

        self.loc_normalize_mean = faster_rcnn.loc_normalize_mean
        self.loc_normalize_std = faster_rcnn.loc_normalize_std

        self.optimizer = self.faster_rcnn.get_optimizer()
        # visdom wrapper
        #         self.vis = Visualizer(env=opt.env)

        # indicators for training status
        self.rpn_cm = ConfusionMeter(2)
        self.roi_cm = ConfusionMeter(21)
        self.meters = {k: AverageValueMeter()
                       for k in LossTuple._fields}  # average loss

    def forward(self,
                imgs,
                scale,
                paste_scale,
                copy_img,
                copy_bboxes,
                copy_labels,
                paste_img,
                paste_bboxes,
                paste_labels,
                cutmix_flag=False,
                plot_flag=False,
                drop_flag=False,
                crop_flag=False,
                keep_flag=False,
                device='cuda'):

        _, _, H, W = imgs.shape
        img_size = (H, W)

        features = self.faster_rcnn.extractor(imgs)

        features, attentive_maps, attention_maps = self.faster_rcnn.aug(
            features, img_size)

        info = {}

        if cutmix_flag:
            new_imgs, new_bboxes, new_labels, new_scale, info = cutmix_generate(
                copy_img, scale, paste_scale, paste_img, attention_maps,
                copy_bboxes, copy_labels, paste_bboxes, paste_labels, info)

            if new_imgs is None:

                bboxes = copy_bboxes
                labels = copy_labels
            else:

                imgs = new_imgs
                bboxes = new_bboxes
                labels = new_labels
                scale = new_scale
                _, _, H, W = imgs.shape
                img_size = (H, W)
                center = generate_map(bboxes, img_size)
                imgs = imgs.to(device)
                bboxes = bboxes.to(device)
                cutmix_features = self.faster_rcnn.extractor(imgs)

                cutmix_features, attentive_maps, attention_maps = self.faster_rcnn.aug(
                    cutmix_features, img_size)
        else:
            info["use_cutmix"] = 0
            bboxes = copy_bboxes
            labels = copy_labels

        center = generate_map(bboxes, list(imgs.shape[2:]))
        #         if cutmix_flag:
        #             features = cutmix_features

        with t.no_grad():
            if crop_flag:
                crop_image = self.faster_rcnn.batch_aug(
                    imgs,
                    attentive_maps[:, :1, :, ],
                    'crop',
                    theta=(0.4, 0.6),
                    padding_ratio=0.1)
            else:
                crop_image = None
            if drop_flag:
                drop_image = self.faster_rcnn.batch_aug(
                    imgs, attentive_maps[:, 1:, :, ], 'drop', theta=(0.2, 0.5))
            else:
                drop_image = None

        feature_map = []

        if not cutmix_flag or keep_flag:
            if cutmix_flag:
                features = t.nn.functional.interpolate(
                    features, cutmix_features.shape[2:])
            feature_map.append(features)
        if cutmix_flag:
            feature_map.append(cutmix_features)
        if crop_flag:
            crop_features = self.faster_rcnn.extractor(crop_image)
            feature_map.append(crop_features)
        if drop_flag:
            drop_features = self.faster_rcnn.extractor(drop_image)
            feature_map.append(drop_features)

        features2 = t.stack(feature_map, dim=0)
        rpn_locs = list(range(len(feature_map)))
        rpn_scores = list(range(len(feature_map)))
        rois = list(range(len(feature_map)))
        roi_indices = list(range(len(feature_map)))
        anchors = list(range(len(feature_map)))
        rpn_loc_loss_s = 0
        rpn_cls_loss_s = 0
        roi_loc_loss_s = 0
        roi_cls_loss_s = 0
        center_loss_s = 0
        center_loss_f = nn.MSELoss()
        for i in range(len(feature_map)):
            rpn_locs[i], rpn_scores[i], rois[i], roi_indices[i], anchors[i] = \
            self.faster_rcnn.rpn(features2[i], img_size, scale)

            rpn_locs[i], rpn_scores[i], rois[i], roi_indices[i], anchors[i] = \
                self.faster_rcnn.rpn(features2[i], img_size, scale)

            rpn_locs[i], rpn_scores[i], rois[i], roi_indices[i], anchors[i] = \
                self.faster_rcnn.rpn(features2[i], img_size, scale)

        rpn_locs = t.cat(rpn_locs, dim=0)
        rpn_scores = t.cat(rpn_scores, dim=0)
        roi_indices = np.concatenate(roi_indices, axis=0)
        # 仅当保留原feature map,cutmix有效时,有不同的bbox, labels
        if keep_flag and cutmix_flag and info["use_cutmix"] == 1:
            t1 = [t.from_numpy(paste_bboxes).to(device)]
            for i in range(len(feature_map) - 1):
                t1.append(bboxes.squeeze(dim=0))

            bboxs = t1

            t2 = [
                t.from_numpy(paste_labels).to(device).reshape([
                    len(paste_bboxes),
                ])
            ]

            for i in range(len(feature_map) - 1):
                t2.append(labels)
            labels = t2
        else:
            if len(bboxes) != 3:
                bboxes = bboxes.unsqueeze(dim=0)
            bboxs = t.stack([bboxes] * len(feature_map), dim=0)
            labels = t.stack([labels] * len(feature_map), dim=0)

        for i in range(len(feature_map)):
            bbox = bboxs[i].reshape(-1, 4)
            label = labels[i].reshape(-1, )
            rpn_score = rpn_scores[i]
            rpn_loc = rpn_locs[i]
            roi = rois[i]
            features = features2[i]
            anchor = anchors[i]

            # Sample RoIs and forward
            # it's fine to break the computation graph of rois,
            # consider them as constant input
            sample_roi, gt_roi_loc, gt_roi_label = self.proposal_target_creator(
                roi, at.tonumpy(bbox), at.tonumpy(label),
                self.loc_normalize_mean, self.loc_normalize_std)
            # NOTE it's all zero because now it only support for batch=1 now
            sample_roi_index = t.zeros(len(sample_roi))

            roi_cls_loc, roi_score = self.faster_rcnn.head(
                features, sample_roi, sample_roi_index.cuda())

            # ------------------ RPN losses -------------------#
            gt_rpn_loc, gt_rpn_label = self.anchor_target_creator(
                at.tonumpy(bbox), anchor, img_size)
            gt_rpn_label = at.totensor(gt_rpn_label).long()
            gt_rpn_loc = at.totensor(gt_rpn_loc)
            rpn_loc_loss = _fast_rcnn_loc_loss(rpn_loc, gt_rpn_loc,
                                               gt_rpn_label.data,
                                               self.rpn_sigma)

            rpn_cls_loss = F.cross_entropy(rpn_score,
                                           gt_rpn_label.to(device),
                                           ignore_index=-1)
            _gt_rpn_label = gt_rpn_label[gt_rpn_label > -1]
            _rpn_score = at.tonumpy(rpn_score)[at.tonumpy(gt_rpn_label) > -1]
            self.rpn_cm.add(at.totensor(_rpn_score, False),
                            _gt_rpn_label.data.long())

            # ------------------ ROI losses (fast rcnn loss) -------------------#
            n_sample = roi_cls_loc.shape[0]
            roi_cls_loc = roi_cls_loc.view(n_sample, -1, 4)
            roi_loc = roi_cls_loc[t.arange(0, n_sample).long().to(device), \
                                  at.totensor(gt_roi_label).long()]
            gt_roi_label = at.totensor(gt_roi_label).long()
            gt_roi_loc = at.totensor(gt_roi_loc)

            roi_loc_loss = _fast_rcnn_loc_loss(roi_loc.contiguous(),
                                               gt_roi_loc, gt_roi_label.data,
                                               self.roi_sigma)

            roi_cls_loss = nn.CrossEntropyLoss()(roi_score,
                                                 gt_roi_label.to(device))

            self.roi_cm.add(at.totensor(roi_score, False),
                            gt_roi_label.data.long())

            rpn_cls_loss_s += rpn_cls_loss / len(feature_map)
            rpn_loc_loss_s += rpn_loc_loss / len(feature_map)
            roi_loc_loss_s += roi_loc_loss / len(feature_map)
            roi_cls_loss_s += roi_cls_loss / len(feature_map)

        attention_map = t.tanh(attention_maps)
        #       center_loss = center_loss_f(attention_map, center).float()
        #        losses = [rpn_loc_loss_s, rpn_cls_loss_s, roi_loc_loss_s, roi_cls_loss_s, center_loss]
        losses = [
            rpn_loc_loss_s, rpn_cls_loss_s, roi_loc_loss_s, roi_cls_loss_s
        ]
        losses = losses + [sum(losses)]
        #         print("-"*100)
        #         for i in losses:
        #             print(i)
        #         print("-"*100)

        if plot_flag:

            res_visual(copy_img, copy_bboxes, copy_labels, paste_img,
                       paste_bboxes, paste_labels, crop_image, drop_image,
                       attention_map, center, imgs, bboxes, labels, features,
                       True, self.faster_rcnn)

        return LossTuple(*losses), info, imgs, bboxes, labels

    def train_step(self, *args, **kwargs):
        self.optimizer.zero_grad()
        losses, info, imgs, bboxes, labels = self.forward(*args, **kwargs)
        losses.total_loss.backward()
        self.optimizer.step()
        self.update_meters(losses)
        return losses, info, imgs, bboxes, labels

    def cutmix_process(self,
                       imgs,
                       scale,
                       paste_scale,
                       copy_img,
                       copy_bboxes,
                       copy_labels,
                       paste_img,
                       paste_bboxes,
                       paste_labels,
                       threshold,
                       overlap_threshold,
                       device='cuda',
                       crop=True,
                       drop=True):
        features = self.faster_rcnn.extractor(imgs)
        _, _, H, W = imgs.shape
        img_size = (H, W)
        features, attentive_maps, attention_maps = self.faster_rcnn.aug(
            features, img_size)
        info = {}
        new_imgs, new_bboxes, new_labels, _, info = cutmix_generate(
            copy_img,
            scale,
            paste_scale,
            paste_img,
            attention_maps,
            copy_bboxes,
            copy_labels,
            paste_bboxes,
            paste_labels,
            info,
            device=device,
            threshold=threshold,
            overlap_threshold=overlap_threshold)
        if crop:
            crop_image = self.faster_rcnn.batch_aug(imgs,
                                                    attentive_maps[:, :1, :, ],
                                                    'crop',
                                                    theta=(0.4, 0.6),
                                                    padding_ratio=0.1)
            info["crop_image"] = crop_image
        if drop:
            drop_image = self.faster_rcnn.batch_aug(imgs,
                                                    attentive_maps[:, 1:, :, ],
                                                    'drop',
                                                    theta=(0.2, 0.5))
            info["drop_image"] = drop_image
        return new_imgs, new_bboxes, new_labels, _, info, attention_maps

    def save(self, save_optimizer=False, save_path=None, **kwargs):
        """serialize models include optimizer and other info
        return path where the model-file is stored.

        Args:
            save_optimizer (bool): whether save optimizer.state_dict().
            save_path (string): where to save model, if it's None, save_path
                is generate using time str and info from kwargs.
        
        Returns:
            save_path(str): the path to save models.
        """
        save_dict = dict()

        save_dict['model'] = self.faster_rcnn.state_dict()
        save_dict['config'] = opt._state_dict()
        save_dict['other_info'] = kwargs
        #         save_dict['vis_info'] = self.vis.state_dict()

        if save_optimizer:
            save_dict['optimizer'] = self.optimizer.state_dict()

        if save_path is None:
            timestr = time.strftime('%m%d%H%M')
            save_path = 'checkpoints/fasterrcnn_%s' % timestr
            for k_, v_ in kwargs.items():
                save_path += '_%s' % v_

        save_dir = os.path.dirname(save_path)
        if not os.path.exists(save_dir):
            os.makedirs(save_dir)

        t.save(save_dict, save_path)
        #         self.vis.save([self.vis.env])
        return save_path

    def load(self, path, load_optimizer=True, parse_opt=False, device='cuda'):
        state_dict = t.load(path, map_location=t.device(device))
        if 'model' in state_dict:
            self.faster_rcnn.load_state_dict(state_dict['model'])
        else:  # legacy way, for backward compatibility
            self.faster_rcnn.load_state_dict(state_dict)
            return self
        if parse_opt:
            opt._parse(state_dict['config'])
        if 'optimizer' in state_dict and load_optimizer:
            self.optimizer.load_state_dict(state_dict['optimizer'])
        return self

    def update_lr(self, lr):
        self.optimizer = self.faster_rcnn.update_lr(lr)


#         return self

    def update_meters(self, losses):
        loss_d = {k: at.scalar(v) for k, v in losses._asdict().items()}
        for key, meter in self.meters.items():
            meter.add(loss_d[key])

    def reset_meters(self):
        for key, meter in self.meters.items():
            meter.reset()
        self.roi_cm.reset()
        self.rpn_cm.reset()

    def get_meter_data(self):
        return {k: v.value()[0] for k, v in self.meters.items()}
class FasterRCNNTrainer(nn.Module):

    def __init__(self, faster_rcnn):
        super(FasterRCNNTrainer, self).__init__()

        self.faster_rcnn = faster_rcnn
        self.rpn_sigma = opt.rpn_sigma     #是在_faster_rcnn_loc_loss调用用来计算位置损失函数用到的超参数,
        self.roi_sigma = opt.roi_sigma

        self.anchor_target_creator = AnchorTargetCreator()   #从上万个anchor中挑选256个来训练rpn,其中正样本不超过128
        self.proposal_target_creator = ProposalTargetCreator()  #从rpn给的2000个框中挑出128个来训练roihead,其中正样本不超过32个

        self.loc_normalize_mean = faster_rcnn.loc_normalize_mean
        self.loc_normalize_std = faster_rcnn.loc_normalize_std

        self.optimizer = self.faster_rcnn.get_optimizer()
        #可视化
        self.vis = Visualizer(env=opt.env)

        #验证预测值和真实值的精度
        self.rpn_cm = ConfusionMeter(2) #混淆矩阵,就是验证预测值与真实值精确度的矩阵ConfusionMeter(2)括号里的参数指的是类别数
        self.roi_cm = ConfusionMeter(21)
        self.meters = {k: AverageValueMeter() for k in LossTuple._fields}  #验证平均loss

    def forward(self, imgs, bboxes, labels, scale):
        '''
        :param imgs:  (~torch.autograd.Variable)  一个批次的图片
        :param bboxes: (~torch.autograd.Variable)  (N, R, 4)
        :param labels:  (~torch.autograd..Variable)  (N, R)  [0 - L-1] L为类别数
        :param scale:   (float)  原图经过preprocessing处理后的缩放比
        :return:  namedtuple of 5 losses
        '''

        n = bboxes.shape[0]  #batch_size 数量
        if n != 1:
            raise ValueError('Currently only batch size 1 is supported')

        _, _, H, W = imgs.shape
        img_size = (H, W)

        c2_out = self.faster_rcnn.C2(imgs)
        c3_out = self.faster_rcnn.C3(c2_out)
        c4_out = self.faster_rcnn.C4(c3_out)

        p2, p3, p4, p5 = self.faster_rcnn.fpn(c2_out, c3_out, c4_out)
        feature_maps = [p2, p3, p4, p5]
        rcnn_maps = [p2, p3, p4]

        # rpn_locs的维度(hh*ww*9,4),rpn_scores维度为(hh*ww*9,2), rois的维度为(2000,4),
        # roi_indices用不到,anchor的维度为(hh*ww*9,4),H和W是经过数据预处理后的。
        # 计算(H/16)x(W/16)x9(大概20000)个anchor属于前景的概率,取前12000个并经过NMS得到2000个近似目标框G^的坐标。
        # roi的维度为(2000,4)
        rpn_locs, rpn_scores, rois, roi_indices, anchor = self.faster_rcnn.rpn(
            feature_maps, img_size, scale)

        bbox = bboxes[0]
        label = labels[0]
        rpn_score = rpn_scores[0]  #(hh*ww*9,2)
        rpn_loc = rpn_locs[0]   #(hh*ww*9,4)
        roi = rois   #(2000,4)

        # 调用proposal_target_creator函数生成sample roi(128,4)、gt_roi_loc(128,4)、
        # gt_roi_label(128,1),RoIHead网络利用这sample_roi+featue为输入,
        # 输出是分类(21类)和回归(进一步微调bbox)的预测值,
        # 那么分类回归的groud truth就是ProposalTargetCreator输出的gt_roi_label和gt_roi_loc。
        sample_roi, gt_roi_loc, gt_roi_label = self.proposal_target_creator(
            roi,
            array_tool.tonumpy(bbox),
            array_tool.tonumpy(label),
            self.loc_normalize_mean,
            self.loc_normalize_std)

        sample_roi_index = torch.zeros(len(sample_roi))

        roi_cls_loc, roi_score = self.faster_rcnn.head(
            rcnn_maps,
            sample_roi,
            sample_roi_index)


        #------------------RPN loss------------------#
        gt_rpn_loc, gt_rpn_label = self.anchor_target_creator(
            array_tool.tonumpy(bbox),
            anchor,
            img_size)
        gt_rpn_label = array_tool.totensor(gt_rpn_label).long()
        gt_rpn_loc = array_tool.totensor(gt_rpn_loc)
        #rpn的回归l1smooth损失
        rpn_loc_loss = _fast_rcnn_loc_loss(
            rpn_loc,
            gt_rpn_loc,
            gt_rpn_label.data,
            self.rpn_sigma)
        #rpn的分类交叉熵损失
        rpn_cls_loss = functional.cross_entropy(rpn_score, gt_rpn_label.cuda(), ignore_index=-1)
        _gt_rpn_label = gt_rpn_label[gt_rpn_label > -1]
        _gt_rpn_score = rpn_score[gt_rpn_label > -1]
        _rpn_score = array_tool.tonumpy(rpn_score)[array_tool.tonumpy(gt_rpn_label) > -1]
        self.rpn_cm.add(array_tool.totensor(_rpn_score, False), _gt_rpn_label.data.long())

        #------------------------ROI loss------------------------#
        n_sample = roi_cls_loc.shape[0]   #n_sample为128 , roi_cls_loc为VGG16RoIHead的输出(128*84)
        roi_cls_loc = roi_cls_loc.view(n_sample, -1, 4) # roi_cls_loc=(128,21,4)
        roi_loc = roi_cls_loc[torch.arange(0, n_sample).long().cuda(), \
                                array_tool.totensor(gt_roi_label).long()]  # (128,4),按照label编号从21类中挑出当前标签的loc,从(128,21,4)降为(128,4)
        gt_roi_label = array_tool.totensor(gt_roi_label).long()
        gt_roi_loc = array_tool.totensor(gt_roi_loc)

        #roi的回归l1smooth损失
        roi_loc_loss = _fast_rcnn_loc_loss(
            roi_loc.contiguous(),
            gt_roi_loc,
            gt_roi_label.data,
            self.roi_sigma)
        roi_cls_loss = nn.CrossEntropyLoss()(roi_score, gt_roi_label.cuda())  #roi的交叉熵损失
        self.roi_cm.add(array_tool.totensor(roi_score, False), gt_roi_label.data.long())

        losses = [rpn_loc_loss, rpn_cls_loss, roi_loc_loss, roi_cls_loss]
        losses = losses + [sum(losses)]  #总loss,增加losses列表长度到5

        return LossTuple(*losses)

    def train_step(self, imgs, bboxes, labels, scale):
        self.optimizer.zero_grad()
        losses = self.forward(imgs, bboxes, labels, scale)
        losses.total_loss.backward()
        self.optimizer.step()
        self.update_meters(losses)
        return losses

    def save(self, save_optimizer=False, save_path=None, **kwargs):
        save_dict= dict()

        save_dict['model'] = self.faster_rcnn.state_dict()
        save_dict['config'] = opt._state_dict()
        save_dict['other_info'] = kwargs
        save_dict['vis_info'] = self.vis.state_dict()

        if save_optimizer:
            save_dict['optimizer'] = self.optimizer.state_dict()
        if save_path is None:
            timestr = time.strftime('%m%d%H%M')
            save_path = 'checkpoints/fasterrcnn_%s' % timestr
            for k_, v_ in kwargs.items():
                save_path += '_%s' % v_

        save_dir = os.path.dirname(save_path)
        if not os.path.exists(save_dir):
            os.makedirs(save_dir)

        torch.save(save_dict, save_path)
        self.vis.save([self.vis.env])
        return save_path

    def load(self, path, load_optimizer=True, parse_opt=False, ):
        state_dict = torch.load(path)
        if 'model' in state_dict:
            self.faster_rcnn.load_state_dict(state_dict['model'])
        else:  # legacy way, for backward compatibility
            self.faster_rcnn.load_state_dict(state_dict)
            return self
        if parse_opt:
            opt._parse(state_dict['config'])
        if 'optimizer' in state_dict and load_optimizer:
            self.optimizer.load_state_dict(state_dict['optimizer'])
        return self


    def update_meters(self, losses):
        loss_d = {k: array_tool.scalar(v) for k, v in losses._asdict().items()}
        for key, meter in self.meters.items():
            meter.add(loss_d[key])

    def reset_meters(self):
        for key, meter in self.meters.items():
            meter.reset()
        self.roi_cm.reset()
        self.rpn_cm.reset()

    def get_meter_data(self):
        return {k: v.value()[0] for k, v in self.meters.items()}
Exemple #20
0
class Trainer(nn.Module):
    def __init__(self, head_detector):
        super(Trainer, self).__init__()
        self.head_detector = head_detector
        self.optimizer = self.head_detector.get_optimizer()
        self.anchor_target_layer = AnchorTargetLayer()
        self.loss_tuple = namedtuple('LossTuple',
                                     ['rpn_regr_loss',
                                      'rpn_cls_loss',
                                      'total_loss'])
        self.vis = Visualizer(env=cfg.VISDOM_ENV)
        self.rpn_cm = ConfusionMeter(2)  # confusion matrix with 2 classes
        self.meters = {k: AverageValueMeter() for k in self.loss_tuple._fields}  # average loss

    def forward(self, x, gt_boxes, scale):
        batch = x.size()[0]
        assert batch == 1, 'Currently only batch size 1 is supported.'
        img_size = x.size()[2:]

        # Forward pass
        feature_map = self.head_detector.extractor(x)
        rpn_regr, rpn_cls, _, _, anchors = self.head_detector.rpn(feature_map, img_size, scale)

        # Remove the batch dimension
        gt_boxes, rpn_regr, rpn_cls = gt_boxes[0], rpn_regr[0], rpn_cls[0]

        # Generates GT regression targets and GT labels
        gt_regr, gt_cls = self.anchor_target_layer(gt_boxes.numpy(), anchors, img_size)
        gt_regr = torch.from_numpy(gt_regr).cuda().float()
        gt_cls = torch.from_numpy(gt_cls).cuda().long()

        # Computes loss
        rpn_regr_loss = losses.rpn_regr_loss(rpn_regr, gt_regr, gt_cls)
        rpn_cls_loss = F.cross_entropy(rpn_cls, gt_cls, ignore_index=-1)
        total_loss = rpn_regr_loss + rpn_cls_loss
        loss_list = [rpn_regr_loss, rpn_cls_loss, total_loss]

        # Ignore samples with a label = -1
        valid_gt_cls = gt_cls[gt_cls > -1]
        valid_pred_cls = rpn_cls[gt_cls > -1]

        # Computes the confusion matrix
        self.rpn_cm.add(valid_pred_cls.detach(), valid_gt_cls.detach())

        return self.loss_tuple(*loss_list)

    def train_step(self, x, boxes, scale):
        loss_tuple = self.forward(x, boxes, scale)
        self.optimizer.zero_grad()
        loss_tuple.total_loss.backward()
        self.optimizer.step()
        self.update_meters(loss_tuple)

    def update_meters(self, loss_tuple):
        loss_dict = {k: v.item() for k, v in loss_tuple._asdict().items()}
        for key, meter in self.meters.items():
            meter.add(loss_dict[key])

    def reset_meters(self):
        for meter in self.meters.values():
            meter.reset()
        self.rpn_cm.reset()

    def get_meter_data(self):
        return {k: v.value()[0] for k, v in self.meters.items()}

    def save(self, path, save_optimizer=False):
        save_dict = dict()
        save_dict['model'] = self.head_detector.state_dict()
        save_dict['vis_info'] = self.vis.state_dict()
        if save_optimizer:
            save_dict['optimizer'] = self.optimizer.state_dict()

        torch.save(save_dict, path)
        self.vis.save([self.vis.env])

    def load(self, path, load_optimizer=True):
        state_dict = torch.load(path)
        self.head_detector.load_state_dict(state_dict['model'])
        if load_optimizer and 'optimizer' in state_dict:
            self.optimizer.load_state_dict(state_dict['optimizer'])

    def scale_lr(self, decay=0.1):
        for param_group in self.optimizer.param_groups:
            param_group['lr'] *= decay
class FasterRCNNTrainer(nn.Module):
    """wrapper for conveniently training. return losses
       wrapper以便方便训练,返回losses
    The losses include:

    * :obj:`rpn_loc_loss`: The localization loss for  Region Proposal Network (RPN).
                           RPN定位loss
    * :obj:`rpn_cls_loss`: The classification loss for RPN.
                           RPN分类loss
    * :obj:`roi_loc_loss`: The localization loss for the head module.
                            roi定位loss
    * :obj:`roi_cls_loss`: The classification loss for the head module.
                            roi分类loss
    * :obj:`total_loss`: The sum of 4 loss above.
                          4个loss之和

    Args:
        faster_rcnn (model.FasterRCNN):
            A Faster R-CNN model that is going to be trained.
    """
    def __init__(self, faster_rcnn):
        super(FasterRCNNTrainer, self).__init__()
        #传入的是FasterRCNNVGG16模型,继承了FasterRCNN模型,而参数根据说明 是FasterRCNN模型
        #即初始化的是FasterRCNN模型
        #FasterRCNN模型是父类   FasterRCNNVGG16模型是子类
        self.faster_rcnn = faster_rcnn
        #sigma for l1_smooth_loss
        self.rpn_sigma = opt.rpn_sigma
        self.roi_sigma = opt.roi_sigma

        # target creator create gt_bbox gt_label etc as training targets.
        #目标框creator 目标是产生 真实的bbox 类别标签等
        #将真实的bbox分配给锚点
        self.anchor_target_creator = AnchorTargetCreator()
        self.proposal_target_creator = ProposalTargetCreator()
        #得到faster网络权重,均值 和方差
        self.loc_normalize_mean = faster_rcnn.loc_normalize_mean
        self.loc_normalize_std = faster_rcnn.loc_normalize_std

        #得到faster网络的优化器
        self.optimizer = self.faster_rcnn.get_optimizer()
        # visdom wrapper
        self.vis = Visualizer(env=opt.env)

        # indicators for training status
        #训练状态指标  两个混淆矩阵 2×2(前景后景)   21×21(20类+背景)
        self.rpn_cm = ConfusionMeter(2)
        self.roi_cm = ConfusionMeter(21)
        self.meters = {k: AverageValueMeter()
                       for k in LossTuple._fields}  # average loss 平均损失

    def forward(self, imgs, bboxes, labels, scale):
        """Forward Faster R-CNN and calculate losses.
        Faster网络的前向传播、计算losses*************************
        Here are notations used.

        * :math:`N` is the batch size. `N`是批量大小
        * :math:`R` is the number of bounding boxes per image. `R`是每个图像的边界框的数量

        Currently, only :math:`N=1` is supported.
        当前模型,只有N=1可用

        Args:
            imgs (~torch.autograd.Variable): A variable with a batch of images.
                                            batch=1的图片变量
            bboxes (~torch.autograd.Variable): A batch of bounding boxes.
                Its shape is :math:`(N, R, 4)`.
                                            真实人工标注的bboxes变量
            labels (~torch.autograd..Variable): A batch of labels.
                Its shape is :math:`(N, R)`.
                 The background is excluded from the definition, which means that the range of the value
                is :math:`[0, L - 1]`. :math:`L` is the number of foreground classes.
                 背景被排除在定义之外,这意味着值的范围。`L`是前景类的数量
            scale (float): Amount of scaling applied to
                the raw image during preprocessing.
                预处理期间应用于原始图像的缩放量

        Returns:
            namedtuple of 5 losses
            五个损失
        """

        n = bboxes.shape[0]
        #判断,只支持batch为1
        if n != 1:
            raise ValueError('Currently only batch size 1 is supported.')
        #img_size=原图像的高、宽
        _, _, H, W = imgs.shape
        img_size = (H, W)
        #通过提取器(预训练好的VGG16)网络提取特征
        features = self.faster_rcnn.extractor(imgs)
        #通过rpn网络(区域提案网络)得到
        #rpn这是一个区域提案网络。它提取图像特征,预测输出rois
        #rpn_locs[1,17316,4]   rpn_scores[1,17316,2]   rois[2000,4]   roi_indices[2000,]全为0  anchor [17316,4]
        rpn_locs, rpn_scores, rois, roi_indices, anchor = \
            self.faster_rcnn.rpn(features, img_size, scale)

        # Since batch size is one, convert variables to singular form
        # 由于批量大小为1,因此将变量转换为单数形式(即压缩第一维)
        #bbox变为[1,4]
        bbox = bboxes[0]
        label = labels[0]
        #则rpn_score变为[17316,4]  rpn_loc 变为[17316,2]
        rpn_score = rpn_scores[0]
        rpn_loc = rpn_locs[0]
        #大约2000个rois
        roi = rois

        # Sample RoIs and forward   简单的ROIs和前向传播
        # it's fine to break the computation graph of rois, consider them as constant input
        #打破rois的计算图,将它作为一个固定不变的输入
        #proposal_target_creator  输入为rois(2000个候选框,和人工标注的bbox)用于生成训练目标,只训练用到
        #2000个rois选出128个
        #sample_roi[128,4]     gt_roi_loc[128,4]     gt_roi_label[128,] 值为0或1 表示正负样本
        sample_roi, gt_roi_loc, gt_roi_label = self.proposal_target_creator(
            roi, at.tonumpy(bbox), at.tonumpy(label), self.loc_normalize_mean,
            self.loc_normalize_std)
        # NOTE it's all zero because now it only support for batch=1 now
        #它全部为零,因为现在它只支持batch = 1
        sample_roi_index = t.zeros(len(sample_roi))
        #roi head网络进行预测类别和目标框
        #RoIHead: 负责对rois分类和微调。对RPN找出的rois,判断它是否包含目标,并修正框的位置和座标
        #使用RoIs提议的的feature maps,对RoI中的对象进行分类并提高目标框定位
        #roi_cls_loc  roi的分类、回归
        #传入  特征提取的features   和  128个ROI
        #roi_cls_loc [128,84]回归定位    roi_score[128,21]分类(20类加背景)
        roi_cls_loc, roi_score = self.faster_rcnn.head(features, sample_roi,
                                                       sample_roi_index)

        # ------------------ RPN losses -------------------#
        #真实标注的bbox,预测出来的anchor锚点
        # 将真实的bbox分配给锚点,返回 经过rpn后对应的定位和标签
        #gt_rpn_loc[17316,4]     gt_rpn_label  [17316,]
        gt_rpn_loc, gt_rpn_label = self.anchor_target_creator(
            at.tonumpy(bbox), anchor, img_size)
        #转为变量V  转为long型
        gt_rpn_label = at.tovariable(gt_rpn_label).long()
        gt_rpn_loc = at.tovariable(gt_rpn_loc)
        #rpn的回归定位损失   rpn_loc_loss[1]
        rpn_loc_loss = _fast_rcnn_loc_loss(rpn_loc, gt_rpn_loc,
                                           gt_rpn_label.data, self.rpn_sigma)

        # NOTE: default value of ignore_index is -100 ...
        #ignore_index的默认值是 - 100...
        #F:pytorch的function
        #分类使用交叉熵
        rpn_cls_loss = F.cross_entropy(rpn_score,
                                       gt_rpn_label.cuda(),
                                       ignore_index=-1)

        _gt_rpn_label = gt_rpn_label[gt_rpn_label > -1]
        _rpn_score = at.tonumpy(rpn_score)[at.tonumpy(gt_rpn_label) > -1]
        #添加进rpn 混淆矩阵
        self.rpn_cm.add(at.totensor(_rpn_score, False),
                        _gt_rpn_label.data.long())

        # ------------------ ROI losses (fast rcnn loss) -------------------#
        #roi分类和回归   压缩第一维
        #n_sample 128
        n_sample = roi_cls_loc.shape[0]
        #改变形状为[ 32,4]
        roi_cls_loc = roi_cls_loc.view(n_sample, -1, 4)
        #得到roi的回归
        roi_loc = roi_cls_loc[t.arange(0, n_sample).long().cuda(), \
                              at.totensor(gt_roi_label).long()]
        # gt_roi_label:真实roi的标签
        #gt_roi_loc:真实roi的回归
        gt_roi_label = at.tovariable(gt_roi_label).long()
        gt_roi_loc = at.tovariable(gt_roi_loc)
        #roi的回归损失  计算回归定位的损失
        roi_loc_loss = _fast_rcnn_loc_loss(
            #contiguous从不连续调整为连续
            roi_loc.contiguous(),
            gt_roi_loc,
            gt_roi_label.data,
            self.roi_sigma)
        #roi分类损失(交叉熵)
        roi_cls_loss = nn.CrossEntropyLoss()(roi_score, gt_roi_label.cuda())
        #添加进roi 混淆矩阵
        self.roi_cm.add(at.totensor(roi_score, False),
                        gt_roi_label.data.long())
        #计算总损失
        losses = [rpn_loc_loss, rpn_cls_loss, roi_loc_loss, roi_cls_loss]
        losses = losses + [sum(losses)]
        #返回Tuple,四个损失+总损失
        return LossTuple(*losses)

    #训练并更新可学习参数
    def train_step(self, imgs, bboxes, labels, scale):
        #优化器梯度清零
        self.optimizer.zero_grad()
        #前向传播(重点*)  返回(总损失 和四类损失)
        losses = self.forward(imgs, bboxes, labels, scale)
        #反向传播(重点*)
        #针对总损失进行反向传播
        losses.total_loss.backward()
        # 更新可学习参数
        self.optimizer.step()
        #将losses写入meter中
        self.update_meters(losses)
        return losses

    def save(self, save_optimizer=False, save_path=None, **kwargs):
        """serialize models include optimizer and other info
        return path where the model-file is stored.

        Args:
            save_optimizer (bool): whether save optimizer.state_dict().
            save_path (string): where to save model, if it's None, save_path
                is generate using time str and info from kwargs.
        
        Returns:
            save_path(str): the path to save models.
        """
        save_dict = dict()

        save_dict['model'] = self.faster_rcnn.state_dict()
        save_dict['config'] = opt._state_dict()
        save_dict['other_info'] = kwargs
        save_dict['vis_info'] = self.vis.state_dict()

        if save_optimizer:
            save_dict['optimizer'] = self.optimizer.state_dict()

        if save_path is None:
            timestr = time.strftime('%m%d%H%M')
            save_path = 'checkpoints/fasterrcnn_%s' % timestr
            for k_, v_ in kwargs.items():
                save_path += '_%s' % v_

        t.save(save_dict, save_path)
        self.vis.save([self.vis.env])
        return save_path

    def load(
        self,
        path,
        load_optimizer=True,
        parse_opt=False,
    ):
        state_dict = t.load(path)
        if 'model' in state_dict:
            self.faster_rcnn.load_state_dict(state_dict['model'])
        else:  # legacy way, for backward compatibility
            self.faster_rcnn.load_state_dict(state_dict)
            return self
        if parse_opt:
            opt._parse(state_dict['config'])
        if 'optimizer' in state_dict and load_optimizer:
            self.optimizer.load_state_dict(state_dict['optimizer'])
        return self

    #更新仪表盘  用以显示
    def update_meters(self, losses):
        loss_d = {k: at.scalar(v) for k, v in losses._asdict().items()}
        for key, meter in self.meters.items():
            meter.add(loss_d[key])

    #将值重置到0
    def reset_meters(self):
        for key, meter in self.meters.items():
            meter.reset()
        #将两个混淆矩阵的内容也置为0
        self.roi_cm.reset()
        self.rpn_cm.reset()

    def get_meter_data(self):
        return {k: v.value()[0] for k, v in self.meters.items()}
class FasterRCNNTrainer(nn.Module):
    """wrapper for conveniently training. return losses

    The losses include:

    * :obj:`rpn_loc_loss`: The localization loss for \
        Region Proposal Network (RPN).
    * :obj:`rpn_cls_loss`: The classification loss for RPN.
    * :obj:`roi_loc_loss`: The localization loss for the head module.
    * :obj:`roi_cls_loss`: The classification loss for the head module.
    * :obj:`total_loss`: The sum of 4 loss above.

    Args:
        faster_rcnn (model.FasterRCNN):
            A Faster R-CNN model that is going to be trained.
    """

    def __init__(self, faster_rcnn):
        super(FasterRCNNTrainer, self).__init__()

        self.faster_rcnn = faster_rcnn
        self.rpn_sigma = opt.rpn_sigma
        self.roi_sigma = opt.roi_sigma  #是在_faster_rcnn_loc_loss调用用来计算位置损失函数用到的超参数,

        # target creator create gt_bbox gt_label etc as training targets. 
        self.anchor_target_creator = AnchorTargetCreator()
        # 用于从20000个候选anchor中产生256个anchor进行二分类和位置回归,
        # 也就是为rpn网络产生的预测位置和预测类别提供真正的ground_truth标准
        self.proposal_target_creator = ProposalTargetCreator()
        # AnchorTargetCreator和ProposalTargetCreator是为了生成训练的目标(或称ground truth),只在训练阶段用到,
        # ProposalCreator是RPN为Fast R-CNN生成RoIs,在训练和测试阶段都会用到。
        # 所以测试阶段直接输进来300个RoIs,而训练阶段会有AnchorTargetCreator的再次干预。

        self.loc_normalize_mean = faster_rcnn.loc_normalize_mean    #(0., 0., 0., 0.)
        self.loc_normalize_std = faster_rcnn.loc_normalize_std      #(0.1, 0.1, 0.2, 0.2)

        self.optimizer = self.faster_rcnn.get_optimizer()           #SGD
        # visdom wrapper
        self.vis = Visualizer(env=opt.env)

        # indicators for training status
        self.rpn_cm = ConfusionMeter(2)     #混淆矩阵,就是验证预测值与真实值精确度的矩阵ConfusionMeter(2)括号里的参数指的是类别数
        self.roi_cm = ConfusionMeter(21)    #roi的类别有21种(20个object类+1个background)
        self.meters = {k: AverageValueMeter() for k in LossTuple._fields}  # average loss

    def forward(self, imgs, bboxes, labels, scale):
        """Forward Faster R-CNN and calculate losses.

        Here are notations used.

        * :math:`N` is the batch size.
        * :math:`R` is the number of bounding boxes per image.

        Currently, only :math:`N=1` is supported.
        当前batch size只为一

        Args:
            imgs (~torch.autograd.Variable): A variable with a batch of images.
            bboxes (~torch.autograd.Variable): A batch of bounding boxes.
                Its shape is :math:`(N, R, 4)`.
            labels (~torch.autograd..Variable): A batch of labels.
                Its shape is :math:`(N, R)`. The background is excluded from
                the definition, which means that the range of the value
                is :math:`[0, L - 1]`. :math:`L` is the number of foreground
                classes.
            scale (float): Amount of scaling applied to
                the raw image during preprocessing.

        Returns:
            namedtuple of 5 losses
        """
        n = bboxes.shape[0]         #batch size = 1
        if n != 1:                  #绝了
            raise ValueError('Currently only batch size 1 is supported.')

        _, _, H, W = imgs.shape     #N,C,H,W
        img_size = (H, W)

        features = self.faster_rcnn.extractor(imgs)

        rpn_locs, rpn_scores, rois, roi_indices, anchor = \
            self.faster_rcnn.rpn(features, img_size, scale)

        # Since batch size is one, convert variables to singular form
        # emmmmmm。。。
        bbox = bboxes[0]                #bbox维度(N, R, 4)
        label = labels[0]               #labels维度为(N,R)
        rpn_score = rpn_scores[0]       #hh*ww*9
        rpn_loc = rpn_locs[0]           #(hh*ww*9,4)
        roi = rois                      # (2000,4)

        # Sample RoIs and forward
        # it's fine to break the computation graph of rois, 
        # consider them as constant input
        sample_roi, gt_roi_loc, gt_roi_label = self.proposal_target_creator(
            roi,
            at.tonumpy(bbox),
            at.tonumpy(label),
            self.loc_normalize_mean,
            self.loc_normalize_std)
        # NOTE it's all zero because now it only support for batch=1 now
        sample_roi_index = t.zeros(len(sample_roi))
        roi_cls_loc, roi_score = self.faster_rcnn.head(
            features,
            sample_roi,
            sample_roi_index)

        # ------------------ RPN losses -------------------#
        gt_rpn_loc, gt_rpn_label = self.anchor_target_creator(
            at.tonumpy(bbox),
            anchor,
            img_size)
        gt_rpn_label = at.totensor(gt_rpn_label).long()
        gt_rpn_loc = at.totensor(gt_rpn_loc)
        rpn_loc_loss = _fast_rcnn_loc_loss(
            rpn_loc,
            gt_rpn_loc,
            gt_rpn_label.data,
            self.rpn_sigma)

        # NOTE: default value of ignore_index is -100 ...
        rpn_cls_loss = F.cross_entropy(rpn_score, gt_rpn_label.cuda(), ignore_index=-1)
        # rpn_score为rpn网络得到的(20000个)与anchor_target_creator得到的2000个label求交叉熵损失
        _gt_rpn_label = gt_rpn_label[gt_rpn_label > -1]     #不计算背景类
        _rpn_score = at.tonumpy(rpn_score)[at.tonumpy(gt_rpn_label) > -1]

        self.rpn_cm.add(at.totensor(_rpn_score, False), _gt_rpn_label.data.long())   # 混淆矩阵


        # ------------------ ROI losses (fast rcnn loss) -------------------#
        n_sample = roi_cls_loc.shape[0]     # roi_cls_loc为VGG16RoIHead的输出(128*84), n_sample=128
        roi_cls_loc = roi_cls_loc.view(n_sample, -1, 4) # roi_cls_loc=(128,21,4)
        roi_loc = roi_cls_loc[t.arange(0, n_sample).long().cuda(), \
                              at.totensor(gt_roi_label).long()]
        gt_roi_label = at.totensor(gt_roi_label).long()     # 128个标签
        gt_roi_loc = at.totensor(gt_roi_loc)    # proposal_target_creator()生成的128个proposal与bbox求得的偏移量dx,dy,dw,dh

        roi_loc_loss = _fast_rcnn_loc_loss(     #采用smooth_l1_loss
            roi_loc.contiguous(),
            gt_roi_loc,
            gt_roi_label.data,
            self.roi_sigma)

        roi_cls_loss = nn.CrossEntropyLoss()(roi_score, gt_roi_label.cuda())    #求交叉熵损失

        self.roi_cm.add(at.totensor(roi_score, False), gt_roi_label.data.long())    #混淆矩阵

        losses = [rpn_loc_loss, rpn_cls_loss, roi_loc_loss, roi_cls_loss]       #四个loss加起来
        losses = losses + [sum(losses)]

        return LossTuple(*losses)

    def train_step(self, imgs, bboxes, labels, scale):
        self.optimizer.zero_grad()      # 将梯度数据全部清零
        losses = self.forward(imgs, bboxes, labels, scale)
        losses.total_loss.backward()
        self.optimizer.step()       # 更新参数
        self.update_meters(losses)  # 将所有损失的数据更新到可视化界面上,最后将losses返回
        return losses               # 返回loss

    def save(self, save_optimizer=False, save_path=None, **kwargs):
        """serialize models include optimizer and other info
        return path where the model-file is stored.

        Args:
            save_optimizer (bool): whether save optimizer.state_dict().
            save_path (string): where to save model, if it's None, save_path
                is generate using time str and info from kwargs.
        
        Returns:
            save_path(str): the path to save models.
        """
        save_dict = dict()

        save_dict['model'] = self.faster_rcnn.state_dict()
        save_dict['config'] = opt._state_dict()
        save_dict['other_info'] = kwargs
        save_dict['vis_info'] = self.vis.state_dict()

        if save_optimizer:
            save_dict['optimizer'] = self.optimizer.state_dict()

        if save_path is None:
            timestr = time.strftime('%m%d%H%M')
            save_path = 'checkpoints/fasterrcnn_%s' % timestr
            for k_, v_ in kwargs.items():
                save_path += '_%s' % v_

        save_dir = os.path.dirname(save_path)
        if not os.path.exists(save_dir):
            os.makedirs(save_dir)

        t.save(save_dict, save_path)
        self.vis.save([self.vis.env])
        return save_path

    def load(self, path, load_optimizer=True, parse_opt=False, ):
        state_dict = t.load(path)
        if 'model' in state_dict:
            self.faster_rcnn.load_state_dict(state_dict['model'])
        else:  # legacy way, for backward compatibility
            self.faster_rcnn.load_state_dict(state_dict)
            return self
        if parse_opt:
            opt._parse(state_dict['config'])
        if 'optimizer' in state_dict and load_optimizer:
            self.optimizer.load_state_dict(state_dict['optimizer'])
        return self

    def update_meters(self, losses):
        # 将所有损失的数据更新到可视化界面上,最后将losses返回
        loss_d = {k: at.scalar(v) for k, v in losses._asdict().items()}
        for key, meter in self.meters.items():
            meter.add(loss_d[key])

    def reset_meters(self):
        for key, meter in self.meters.items():
            meter.reset()
        self.roi_cm.reset()
        self.rpn_cm.reset()

    def get_meter_data(self):
        return {k: v.value()[0] for k, v in self.meters.items()}
Exemple #23
0
class BRFasterRcnnTrainer(nn.Module):
    def __init__(self, faster_rcnn, attacker=None, layer_idx=None, attack_mode=False):
        super(BRFasterRcnnTrainer, self).__init__()

        self.faster_rcnn = faster_rcnn
        self.attacker = attacker
        self.layer_idx = layer_idx
        self.rpn_sigma = opt.rpn_sigma
        self.roi_sigma = opt.roi_sigma
        self.attack_mode = attack_mode

        self.anchor_target_creator = AnchorTargetCreator()
        self.proposal_target_creator = ProposalTargetCreator()

        self.loc_normalize_mean = faster_rcnn.loc_normalize_mean
        self.loc_normalize_std = faster_rcnn.loc_normalize_std

        self.optimizer = self.faster_rcnn.get_optimizer()

        self.vis = Visualizer(env=opt.env)

        self.rpn_cm = ConfusionMeter(2)
        self.roi_cm = ConfusionMeter(21)
        self.meters = {k: AverageValueMeter() for k in LossTuple._fields}
        self.BR_meters = {k: AverageValueMeter() for k in LossTupleBR._fields}

    def forward(self, imgs, bboxes, labels, scale, attack=False):
        """Forward Faster R-CNN and calculate losses.

            Here are notations used.

            * :math:`N` is the batch size.
            * :math:`R` is the number of bounding boxes per image.

            Currently, only :math:`N=1` is supported.

            Args:
                imgs (~torch.autograd.Variable): A variable with a batch of images.
                bboxes (~torch.autograd.Variable): A batch of bounding boxes.
                    Its shape is :math:`(N, R, 4)`.
                labels (~torch.autograd..Variable): A batch of labels.
                    Its shape is :math:`(N, R)`. The background is excluded from
                    the definition, which means that the range of the value
                    is :math:`[0, L - 1]`. :math:`L` is the number of foreground
                    classes.
                scale (float): Amount of scaling applied to
                    the raw image during preprocessing.

            Returns:
                namedtuple of 5 losses
            """
        n = bboxes.shape[0]
        if n != 1:
            raise ValueError('Currently only batch size 1 is supported.')

        _, _, H, W = imgs.shape
        img_size = (H, W)

        # 创造钩子函数,记录featureamp的值
        features = self.faster_rcnn.extractor(imgs)
        feature_maps = self.faster_rcnn.feature_maps

        if not features.sum()[0] == 0:
            rpn_locs, rpn_scores, rois, roi_indices, anchor = \
                self.faster_rcnn.rpn(features, img_size, scale)
            # Since batch size is one, convert variables to singular form
            bbox = bboxes[0]
            label = labels[0]
            rpn_score = rpn_scores[0]
            rpn_loc = rpn_locs[0]
            roi = rois

            # Sample RoIs and forward
            # it's fine to break the computation graph of rois,
            # consider them as constant input
            if rois.size == 0:
                print("Features are 0 for some reason")
                losses = [Variable(torch.zeros(1)).cuda(), Variable(torch.zeros(1)).cuda(), \
                          Variable(torch.zeros(1)).cuda(), Variable(torch.zeros(1)).cuda()]
                losses = losses + [sum(losses)]
                return losses, features

            sample_roi, gt_roi_loc, gt_roi_label = self.proposal_target_creator(
                roi,
                at.tonumpy(bbox),
                at.tonumpy(label),
                self.loc_normalize_mean,
                self.loc_normalize_std)
            # NOTE it's all zero because now it only support for batch=1 now
            sample_roi_index = t.zeros(len(sample_roi))
            roi_cls_loc, roi_score = self.faster_rcnn.head(
                features,
                sample_roi,
                sample_roi_index)

            # ------------------ RPN losses -------------------#
            if not attack:
                if anchor.size != 0:
                    gt_rpn_loc, gt_rpn_label = self.anchor_target_creator(
                        at.tonumpy(bbox),
                        anchor,
                        img_size)
                    gt_rpn_label = at.tovariable(gt_rpn_label).long()
                    gt_rpn_loc = at.tovariable(gt_rpn_loc)
                    rpn_loc_loss = _fast_rcnn_loc_loss(
                        rpn_loc,
                        gt_rpn_loc,
                        gt_rpn_label.data,
                        self.rpn_sigma)

                    # NOTE: default value of ignore_index is -100 ...
                    rpn_cls_loss = F.cross_entropy(rpn_score, gt_rpn_label.cuda(), ignore_index=-1)
                    _gt_rpn_label = gt_rpn_label[gt_rpn_label > -1]
                    _rpn_score = at.tonumpy(rpn_score)[at.tonumpy(gt_rpn_label) > -1]
                    self.rpn_cm.add(at.totensor(_rpn_score, False), _gt_rpn_label.data.long())
                    # adv_losses = self.attacker.forward(imgs.detach(),gt_rpn_label.cuda(), img_size, scale, self)
                    # adv_losses = LossTupleAdv(*adv_losses)
                    # self.update_meters(adv_losses,adv=True)
                else:
                    rpn_cls_loss = 0
                    rpn_loc_loss = 0

            # ------------------ ROI losses (fast rcnn loss) -------------------#
            n_sample = roi_cls_loc.shape[0]
            roi_cls_loc = roi_cls_loc.view(n_sample, -1, 4)
            roi_loc = roi_cls_loc[t.arange(0, n_sample).long().cuda(), \
                                  at.totensor(gt_roi_label).long()]
            gt_roi_label = at.tovariable(gt_roi_label).long()
            gt_roi_loc = at.tovariable(gt_roi_loc)
            if attack:
                return roi_score, gt_roi_label, feature_maps
            else:
                roi_loc_loss = _fast_rcnn_loc_loss(
                    roi_loc.contiguous(),
                    gt_roi_loc,
                    gt_roi_label.data,
                    self.roi_sigma)

                roi_cls_loss = nn.CrossEntropyLoss()(roi_score, gt_roi_label.cuda())

                self.roi_cm.add(at.totensor(roi_score, False), gt_roi_label.data.long())

                losses = [rpn_loc_loss, rpn_cls_loss, roi_loc_loss, roi_cls_loss]
                losses = losses + [sum(losses)]

            # if attack:
            #     del rpn_loc_loss, rpn_cls_loss, roi_loc_loss, roi_cls_loss, losses, features
            #     return roi_score, gt_roi_label, feature_map
            # else:
                return LossTuple(*losses)
        else:
            print("Features are 0 for some reason")
            losses = [Variable(torch.zeros(1)).cuda(), Variable(torch.zeros(1)).cuda(), \
                      Variable(torch.zeros(1)).cuda(), Variable(torch.zeros(1)).cuda()]
            losses = losses + [sum(losses)]
            return losses

    def train_step(self, imgs, bboxes, labels, scale, target_feature=None, rois=None, roi_scores=None):
        if not self.attack_mode:
            print ('....')
        else:
            BR_losses = self.attacker.forward(imgs, self, labels, bboxes, scale, target_feature, rois, roi_scores)
            BR_losses = LossTupleBR(*BR_losses)
            self.update_meters(BR_losses, BR=True)

    # 将save_rcnn设置成False,因为我们在训练生成器过程中,不动rcnn的参数
    def save(self, save_optimizer=False, save_path=None, save_rcnn=False, **kwargs):
        save_dict = dict()

        save_dict['model'] = self.faster_rcnn.state_dict()
        save_dict['config'] = opt._state_dict()
        save_dict['other_info'] = kwargs
        save_dict['vis_info'] = self.vis.state_dict()

        if save_optimizer:
            save_dict['optimizer'] = self.optimizer.state_dict()

        if save_path is None:
            timestr = time.strftime('%m%d%H%M')
            save_path = 'checkpoints/faterrcnn_full_%s' % timestr
            if not self.attack_mode:
                for k_, v_ in kwargs.items():
                    save_path += '%s' % v_
            if self.attacker is not None:
                self.attacker.save('checkpoints/attack_%s_%d.path' % (timestr, kwargs['epochs']))
        if save_rcnn:
            t.save(save_dict, save_path)
        self.vis.save([self.vis.env])
        return save_path

    def load(self, path, load_optimizer=True, parse_opt=False):
        state_dict = t.load(path)
        if 'model' in state_dict:
            self.faster_rcnn.load_state_dict(state_dict['model'])
        else:
            self.faster_rcnn.load_state_dict(state_dict)
            return self

        if parse_opt:
            opt._parse(state_dict['config'])
        if 'optimizer' in state_dict and load_optimizer:
            self.load_state_dict(state_dict['optimizer'])
        return self

    def update_meters(self, losses, BR=False):
        loss_d = {k: at.scalar(v) for k, v in losses._asdict().items()}
        if not BR:
            for key, meter in self.meters.items():
                meter.add(loss_d[key])
        else:
            for key, meter in self.BR_meters.items():
                meter.add(loss_d[key])

    def reset_meters(self, BR=False):
        for key, meter in self.meters.items():
            meter.reset()
        if BR:
            for key, meter in self.BR_meters.items():
                meter.reset()
        self.roi_cm.reset()
        self.rpn_cm.reset()

    def get_meter_data(self, BR=False):
        if BR:
            return {k: v.value()[0] for k, v in self.BR_meters.items()}
        else:
            return {k: v.value()[0] for k, v in self.meters.items()}
Exemple #24
0
class FasterRCNNTrainer(nn.Module):
    def __init__(self, faster_rcnn):
        super(FasterRCNNTrainer, self).__init__()

        self.faster_rcnn = faster_rcnn
        self.rpn_sigma = opt.rpn_sigma
        self.roi_sigma = opt.roi_sigma
        self.anchor_target_creator = AnchorTargetCreator()
        self.proposal_target_creator = ProposalTargetCreator()

        self.loc_normalize_mean = faster_rcnn.loc_normalize_mean
        self.loc_normalize_std = faster_rcnn.loc_normalize_std

        self.optimizer = self.faster_rcnn.get_optimizer()
        self.rpn_cm = ConfusionMeter(2)
        self.roi_cm = ConfusionMeter(21)
        self.meters = {k: AverageValueMeter() for k in LossTuple._fields}

    def forward(self, imgs, bboxes, labels, scale):
        n = bboxes.shape[0]
        if n != 1:
            raise ValueError('Currently only batch size 1 is supported.')

        _, _, H, W = imgs.shape
        img_size = (H, W)

        features = self.faster_rcnn.extractor(imgs)

        rpn_locs, rpn_scores, rois, roi_indices, anchor = \
            self.faster_rcnn.rpn(features, img_size, scale)

        bbox = bboxes[0]
        label = labels[0]
        rpn_score = rpn_scores[0]
        rpn_loc = rpn_locs[0]
        roi = rois
        sample_roi, gt_roi_loc, gt_roi_label = self.proposal_target_creator(
            roi, at.tonumpy(bbox), at.tonumpy(label), self.loc_normalize_mean,
            self.loc_normalize_std)

        sample_roi_index = t.zeros(len(sample_roi))
        roi_cls_loc, roi_score = self.faster_rcnn.head(features, sample_roi,
                                                       sample_roi_index)

        gt_rpn_loc, gt_rpn_label = self.anchor_target_creator(
            at.tonumpy(bbox), anchor, img_size)
        gt_rpn_label = at.totensor(gt_rpn_label).long()
        gt_rpn_loc = at.totensor(gt_rpn_loc)
        rpn_loc_loss = _fast_rcnn_loc_loss(rpn_loc, gt_rpn_loc,
                                           gt_rpn_label.data, self.rpn_sigma)

        rpn_cls_loss = F.cross_entropy(rpn_score,
                                       gt_rpn_label.cuda(),
                                       ignore_index=-1)
        _gt_rpn_label = gt_rpn_label[gt_rpn_label > -1]
        _rpn_score = at.tonumpy(rpn_score)[at.tonumpy(gt_rpn_label) > -1]
        self.rpn_cm.add(at.totensor(_rpn_score, False),
                        _gt_rpn_label.data.long())

        # ------------------ ROI losses (fast rcnn loss) -------------------#
        n_sample = roi_cls_loc.shape[0]
        roi_cls_loc = roi_cls_loc.view(n_sample, -1, 4)
        roi_loc = roi_cls_loc[t.arange(0, n_sample).long().cuda(), \
                              at.totensor(gt_roi_label).long()]
        gt_roi_label = at.totensor(gt_roi_label).long()
        gt_roi_loc = at.totensor(gt_roi_loc)

        roi_loc_loss = _fast_rcnn_loc_loss(roi_loc.contiguous(), gt_roi_loc,
                                           gt_roi_label.data, self.roi_sigma)

        roi_cls_loss = nn.CrossEntropyLoss()(roi_score, gt_roi_label.cuda())

        self.roi_cm.add(at.totensor(roi_score, False),
                        gt_roi_label.data.long())

        losses = [rpn_loc_loss, rpn_cls_loss, roi_loc_loss, roi_cls_loss]
        losses = losses + [sum(losses)]

        return LossTuple(*losses)

    def train_step(self, imgs, bboxes, labels, scale):
        self.optimizer.zero_grad()
        losses = self.forward(imgs, bboxes, labels, scale)
        losses.total_loss.backward()
        self.optimizer.step()
        self.update_meters(losses)
        return losses

    def update_meters(self, losses):
        loss_d = {k: at.scalar(v) for k, v in losses._asdict().items()}
        for key, meter in self.meters.items():
            meter.add(loss_d[key])

    def reset_meters(self):
        for key, meter in self.meters.items():
            meter.reset()
        self.roi_cm.reset()
        self.rpn_cm.reset()

    def get_meter_data(self):
        return {k: v.value()[0] for k, v in self.meters.items()}
Exemple #25
0
class RFCN_Trainer(nn.Module):
    """
    trainer for RFCN, return loss:
    rpn_loc_loss, rpn_cls_loss, roi_loc_loss, roi_cls_loss

    params: r_fcn  --RFCN model
    """

    def __init__(self, r_fcn: RFCN):
        super(RFCN_Trainer, self).__init__()

        self.r_fcn = r_fcn
        self.rpn_sigma = opt.rpn_sigma
        self.roi_sigma = opt.roi_sigma

        # generate anchor for RPN training
        self.anchor_target_creator = AnchorTargetCreator()

        proposal_target_num = 300 if opt.use_OHEM else 128
        self.proposal_target_creator = ProposalTargetCreator(n_sample=proposal_target_num)

        self.loc_normalize_mean = r_fcn.loc_normalize_mean
        self.loc_normalize_std = r_fcn.loc_normalize_std

        self.optimizer = self.get_optimizer()

        # visdom wrapper
        self.viz = visdom.Visdom(env=opt.viz_env)
        self.viz_index = 0
        self.log_text = ''

        # record training status
        self.rpn_cm = ConfusionMeter(2)
        self.roi_cm = ConfusionMeter(self.r_fcn.class_num)
        if opt.FIX_HEAD:
            self.meters = {k: AverageValueMeter() for k in RPN_LossTuple._fields}
        else:
            self.meters = {k: AverageValueMeter() for k in RFCN_LossTuple._fields}

    def forward(self, imgs, bboxes, labels, scale):
        """
        :param imgs:        variable with a batch of images.
        :param bboxes:      A batch of GT bounding boxes.
        :param labels:      labels of gt bboxes.
            Its shape is :math:`(N, R)`. The background is excluded from
                the definition, which means that the range of the value
        :param scale:       Amount of scaling applied to the raw image during
                            preprocessing.
        :return:
            namedtuple of losses
        """
        n = bboxes.shape[0]
        if n != 1:
            raise ValueError('Currently only batch size 1 is supported.')

        _, _, H, W = imgs.shape
        img_size = (H, W)

        features = self.r_fcn.extractor_phase1(imgs)

        rpn_locs, rpn_scores, rois, roi_indices, anchor = \
            self.r_fcn.rpn(features, img_size, scale)

        # Since batch size is one, convert variables to singular form
        bbox = bboxes[0]
        label = labels[0]  # shape: (gt_num,)
        rpn_score = rpn_scores[0]  # shape: (anchor_num, 2)
        rpn_loc = rpn_locs[0]  # shape: (anchor_num, 4)
        roi = rois[np.where(roi_indices == 0)[0]]  # shape(R, 4)

        # --------------- rpn losses ------------ #
        anchor_loc_gt, anchor_label_gt = self.anchor_target_creator(
            tonumpy(bbox),
            anchor,
            img_size)

        anchor_loc_gt = totensor(anchor_loc_gt)
        anchor_label_gt = totensor(anchor_label_gt).long()
        rpn_loc_loss = _fast_rcnn_loc_loss(
            rpn_loc,
            anchor_loc_gt,
            anchor_label_gt.data,
            self.rpn_sigma)

        rpn_cls_loss = F.cross_entropy(rpn_score, anchor_label_gt.cuda(), ignore_index=-1)

        with torch.no_grad():
            _anchor_label_gt = anchor_label_gt[anchor_label_gt > -1]
            _rpn_score = rpn_score[anchor_label_gt > -1]
            self.rpn_cm.add(_rpn_score, _anchor_label_gt.data.long())

        # **************** for head ****************
        if opt.FIX_HEAD:
            losses = [rpn_loc_loss, rpn_cls_loss]
            losses = losses + [sum(losses)]

            return RPN_LossTuple(*losses)
        else:
            # sample rois for Head training
            sample_roi, roi_loc_gt, roi_label_gt = self.proposal_target_creator(
                roi,
                tonumpy(bbox),
                tonumpy(label),
                self.loc_normalize_mean,
                self.loc_normalize_std)

            # Note: set all value to zero(batch_size == 1)
            sample_roi_index = torch.zeros(len(sample_roi), dtype=torch.float).cuda()
            sample_roi = totensor(sample_roi).float()

            roi_locs, roi_scores = self.r_fcn.head(
                features,
                sample_roi,
                sample_roi_index)

            # ----------- PsROI losses ----------- #
            roi_label_gt = totensor(roi_label_gt).long()
            roi_loc_gt = totensor(roi_loc_gt)

            n_sample = roi_locs.shape[0]
            roi_locs = roi_locs.view(n_sample, -1, 4)
            if opt.cls_reg_specific:
                roi_locs = roi_locs[torch.arange(n_sample), roi_label_gt]
            else:
                roi_locs = roi_locs[torch.arange(n_sample), 1]

            roi_loc_loss = _fast_rcnn_loc_loss(
                roi_locs.contiguous(),
                roi_loc_gt,
                roi_label_gt.data,
                self.roi_sigma,
                ohem=opt.use_OHEM)

            if opt.use_OHEM:
                roi_cls_loss = F.cross_entropy(roi_scores, roi_label_gt.cuda(), reduction='none')
                roi_cls_loss, roi_loc_loss = self.ohem_dectect_loss(roi_cls_loss, roi_loc_loss,
                                                                    roi_label_gt, sample_roi,
                                                                    use_nms=True,
                                                                    hard_num=opt.hard_num)
            else:
                roi_cls_loss = F.cross_entropy(roi_scores, roi_label_gt.cuda())

            with torch.no_grad():
                self.roi_cm.add(roi_scores, roi_label_gt.data.long())

            losses = [rpn_loc_loss, rpn_cls_loss, roi_loc_loss, roi_cls_loss]
            losses = losses + [sum(losses)]

            return RFCN_LossTuple(*losses)

    def ohem_dectect_loss(self, cls_loss, loc_loss, gt_label, rois, hard_num=128,
                          use_nms=True, nms_thresh=0.7):
        """
        :param cls_loss:       cls loss
        :param loc_loss:       reg loss
        :param gt_label:       gt label of rois
        :param rois:           sampled rois by proposalTarget module
        :param hard_num:       the number of rois for backward
        :param use_nms:        filter ROI with excessive overlap
        :param nms_thresh:     nms阈值
        :return:
        """
        bbox_loss = cls_loss + loc_loss

        if use_nms:
            # nms based on loss
            keep = torchvision.ops.nms(rois, bbox_loss, iou_threshold=nms_thresh)
            bbox_loss = bbox_loss[keep]
            cls_loss = cls_loss[keep]
            loc_loss = loc_loss[keep]
            gt_label = gt_label[keep]

        # the number of rois for backward
        back_size = min(len(bbox_loss), hard_num)

        # hard example mining
        if back_size < len(bbox_loss):
            _, top_idx = torch.topk(bbox_loss, back_size)
            top_cls_loss = cls_loss[top_idx]
            isFg = (gt_label > 0)[top_idx]
            top_fg_idx = top_idx[isFg]
            top_loc_loss = loc_loss[top_fg_idx] if len(top_fg_idx) > 0 else torch.tensor([0.]).cuda()
        else:
            top_cls_loss = cls_loss
            top_loc_loss = loc_loss

        top_cls_loss_normal = top_cls_loss.mean()
        top_loc_loss_normal = top_loc_loss.mean()

        return top_cls_loss_normal, top_loc_loss_normal

    def train_step(self, imgs, bboxes, labels, scale):
        self.optimizer.zero_grad()
        losses = self.forward(imgs, bboxes, labels, scale)
        losses.total_loss.backward()
        self.update_meters(losses)
        self.optimizer.step()

        return losses

    def update_meters(self, losses):
        loss_d = {k: toscalar(v) for k, v in losses._asdict().items()}
        for key, meter in self.meters.items():
            meter.add(loss_d[key])

    def reset_meters(self):
        for key, meter in self.meters.items():
            meter.reset()
        self.roi_cm.reset()
        self.rpn_cm.reset()

    def get_meter_data(self):
        return {k: v.value()[0] for k, v in self.meters.items()}

    def save(self, save_optimizer=False, save_path=None, best_map=0., **kwargs):
        save_dict = dict()

        save_dict['model'] = self.r_fcn.state_dict()
        save_dict['config'] = opt.state_dict()
        save_dict['other_info'] = kwargs
        save_dict['vis_info'] = {'viz_index': self.viz_index}
        save_dict['best_map'] = best_map

        if save_optimizer:
            save_dict['optimizer'] = self.optimizer.state_dict()

        if save_path is None:
            save_path = './checkPoints/rfcn_'
            if opt.head_ver is not None:
                save_path += 'vgg_roi_'
            time_str = time.strftime('%m%d%H%M')
            save_path += '{}_{}.pth'.format(time_str, best_map)

        save_dir = os.path.dirname(save_path)
        if not os.path.exists(save_dir):
            os.makedirs(save_dir)

        torch.save(save_dict, save_path)
        return save_path

    def load(self, path, load_optimizer=True,
             load_viz_idx=False,
             parse_opt=False):
        state_dict = torch.load(path)
        if 'model' in state_dict:
            self.r_fcn.load_state_dict(state_dict['model'])
        else:
            raise ValueError("Cannot find the model parameters of RFCN, load_path:\n",
                             path)

        if load_viz_idx:
            self.viz_index = state_dict['vis_info']['viz_index']

        if parse_opt:
            print("Load history configuration!")
            opt.parse(state_dict['config'])

        if 'optimizer' in state_dict and load_optimizer:
            self.optimizer.load_state_dict(state_dict['optimizer'])
        return self


    def scale_lr(self, epoch, gamma=0.1):
        if (epoch + 1) in opt.LrMilestones:
            for param_group in self.optimizer.param_groups:
                param_group['lr'] *= gamma
        return self.optimizer

    def get_optimizer(self):
        """
        return optimizer
        """
        lr = opt.rfcn_init_lr
        params = []
        for key, value in dict(self.named_parameters()).items():
            if value.requires_grad:
                if 'bias' in key:
                    params += [{'params': [value], 'lr': lr * 2, 'weight_decay': 0}]
                else:
                    params += [{'params': [value], 'lr': lr, 'weight_decay': opt.weight_decay}]

        return torch.optim.SGD(params=params, momentum=0.9)

    def log(self, info, win='log_text'):
        """
        self.log({'loss':1, 'lr':0.0001})
        """
        self.log_text += ('[{time}] {info} <br>'.format(
            time=time.strftime('%m%d_%H%M%S'),
            info=info))
        self.viz.text(self.log_text, win, opts={"title": 'log_text'})
class Trainer(nn.Module):
    def __init__(self, rfcn, config):
        super().__init__()
        self.rfcn = rfcn
        self.rpn_target_generator = RPNTargetGenerator()
        self.roi_target_generator = RoITargetGenerator()
        self.rpn_sigma = config.rpn_sigma
        self.roi_sigma = config.roi_sigma
        self.loc_normalize_mean = (0., 0., 0., 0.)
        self.loc_normalize_std = (.1, .1, .2, .2)
        self.rpn_cm = ConfusionMeter(2)
        self.roi_cm = ConfusionMeter(config.num_classes)
        self.loss_avgmeter = {
            k: AverageValueMeter()
            for k in [
                'rpn_loc_loss', 'rpn_fg_loss', 'roi_loc_loss', 'roi_cls_loss',
                'tot_loss'
            ]
        }
        self.optimizer = self._get_optimizer(config)
        self.vis = Visualizer()
        self.train()

    def forward(self, imgs, bboxes, labels, scale):
        """
		Args:
			imgs: (N, C, H, W)
			bboxes: (N, R, 4)
			labels: (N, R)
			scale: scale factor of preprocessing
		"""
        if imgs.size(0) != 1:
            raise ValueError("Only batch_size 1 is supported.")
        img_size = imgs.size()[2:]

        features = self.rfcn.extractor(imgs)
        rpn_scores, rpn_locs, rois, roi_indices, anchors = self.rfcn.rpn(
            features, img_size)

        bbox = bboxes[0]
        label = labels[0]
        rpn_locs = rpn_locs[0]
        rpn_scores = rpn_scores[0]

        sample_roi, gt_roi_loc, gt_roi_label = self.roi_target_generator(
            rois, bbox, label, self.loc_normalize_mean, self.loc_normalize_std)
        roi_score, roi_loc = self.rfcn.RoIhead(features, sample_roi,
                                               torch.zeros(len(sample_roi)))

        # RPN losses
        gt_rpn_locs, gt_rpn_labels = self.rpn_target_generator(
            anchors, bboxes[0], img_size)
        rpn_loc_loss = _loc_loss(rpn_locs, gt_rpn_locs, gt_rpn_labels,
                                 self.rpn_sigma)
        rpn_fg_loss = F.cross_entropy(rpn_scores,
                                      gt_rpn_labels,
                                      ignore_index=-1)
        self.rpn_cm.add(rpn_scores[gt_rpn_labels > -1].detach(),
                        gt_rpn_labels[gt_rpn_labels > -1].detach())

        # RoI losses
        roi_loc = roi_loc.view(roi_loc.size(0), -1, 4)
        roi_loc = roi_loc[:, gt_roi_label].contiguous()
        roi_loc_loss = _loc_loss(roi_loc, gt_roi_loc, gt_roi_label,
                                 self.roi_sigma)
        roi_cls_loss = F.cross_entropy(roi_score, gt_roi_label)
        self.roi_cm.add(roi_score.detach(), gt_roi_label)

        tot_loss = rpn_loc_loss + rpn_fg_loss + roi_loc_loss + roi_cls_loss
        return {
            'rpn_loc_loss': rpn_loc_loss,
            'rpn_fg_loss': rpn_fg_loss,
            'roi_loc_loss': roi_loc_loss,
            'roi_cls_loss': roi_cls_loss,
            'tot_loss': tot_loss
        }

    def train_step(self, imgs, bboxes, labels, scale):
        self.optimizer.zero_grad()
        losses = self.forward(imgs, bboxes, labels, scale)
        for k, v in losses.items():
            self.loss_avgmeter[k].add(v)
        losses['tot_loss'].backward()
        self.optimizer.step()
        return losses

    def save(self, save_path):
        torch.save({'model', self.rfcn.state_dict()}, save_path)

    def reset_meters(self):
        for meter in self.loss_avgmeter.values():
            meter.reset()
        self.rpn_cm.reset()
        self.roi_cm.reset()

    def get_meter(self):
        return {(k, v) for k, v in self.loss_avgmeter.items()}

    def _get_optimizer(self, config):
        lr = config.lr
        params = []
        for key, value in dict(self.rfcn.named_parameters()).items():
            if value.requires_grad:
                if 'bias' in key:
                    params += [{
                        'params': [value],
                        'lr': lr * 2,
                        'weight_decay': 0
                    }]
                else:
                    params += [{
                        'params': [value],
                        'lr': lr,
                        'weight_decay': config.weight_decay
                    }]
        return torch.optim.Adam(params)
class FasterRCNNTrainer(nn.Module):
    """wrapper for conveniently training. return losses

    The losses include:

    * :obj:`rpn_loc_loss`: The localization loss for \
        Region Proposal Network (RPN).
    * :obj:`rpn_cls_loss`: The classification loss for RPN.
    * :obj:`roi_loc_loss`: The localization loss for the head module.
    * :obj:`roi_cls_loss`: The classification loss for the head module.
    * :obj:`total_loss`: The sum of 4 loss above.

    Args:
        faster_rcnn (model.FasterRCNN):
            A Faster R-CNN model that is going to be trained.
    """

    def __init__(self, faster_rcnn):
        super(FasterRCNNTrainer, self).__init__()

        self.faster_rcnn = faster_rcnn
        self.rpn_sigma = opt.rpn_sigma
        self.roi_sigma = opt.roi_sigma

        # target creator create gt_bbox gt_label etc as training targets. 
        self.anchor_target_creator = AnchorTargetCreator()
        self.proposal_target_creator = ProposalTargetCreator()

        self.loc_normalize_mean = faster_rcnn.loc_normalize_mean
        self.loc_normalize_std = faster_rcnn.loc_normalize_std

        self.optimizer = self.faster_rcnn.get_optimizer()
        # visdom wrapper
        self.vis = Visualizer(env=opt.env)

        # indicators for training status
        self.rpn_cm = ConfusionMeter(2)
        self.roi_cm = ConfusionMeter(21)
        self.meters = {k: AverageValueMeter() for k in LossTuple._fields}  # average loss

    def forward(self, imgs, bboxes, labels, scale):
        """Forward Faster R-CNN and calculate losses.

        Here are notations used.

        * :math:`N` is the batch size.
        * :math:`R` is the number of bounding boxes per image.

        Currently, only :math:`N=1` is supported.

        Args:
            imgs (~torch.autograd.Variable): A variable with a batch of images.
            bboxes (~torch.autograd.Variable): A batch of bounding boxes.
                Its shape is :math:`(N, R, 4)`.
            labels (~torch.autograd..Variable): A batch of labels.
                Its shape is :math:`(N, R)`. The background is excluded from
                the definition, which means that the range of the value
                is :math:`[0, L - 1]`. :math:`L` is the number of foreground
                classes.
            scale (float): Amount of scaling applied to
                the raw image during preprocessing.

        Returns:
            namedtuple of 5 losses
        """
        n = bboxes.shape[0]
        if n != 1:
            raise ValueError('Currently only batch size 1 is supported.')

        _, _, H, W = imgs.shape
        img_size = (H, W)

        features = self.faster_rcnn.extractor(imgs)

        rpn_locs, rpn_scores, rois, roi_indices, anchor = \
            self.faster_rcnn.rpn(features, img_size, scale)

        # Since batch size is one, convert variables to singular form
        bbox = bboxes[0]
        label = labels[0]
        rpn_score = rpn_scores[0]
        rpn_loc = rpn_locs[0]
        roi = rois

        # Sample RoIs and forward
        # it's fine to break the computation graph of rois, 
        # consider them as constant input
        sample_roi, gt_roi_loc, gt_roi_label = self.proposal_target_creator(
            roi,
            at.tonumpy(bbox),
            at.tonumpy(label),
            self.loc_normalize_mean,
            self.loc_normalize_std)
        # NOTE it's all zero because now it only support for batch=1 now
        sample_roi_index = t.zeros(len(sample_roi))
        roi_cls_loc, roi_score = self.faster_rcnn.head(
            features,
            sample_roi,
            sample_roi_index)

        # ------------------ RPN losses -------------------#
        gt_rpn_loc, gt_rpn_label = self.anchor_target_creator(
            at.tonumpy(bbox),
            anchor,
            img_size)
        gt_rpn_label = at.tovariable(gt_rpn_label).long()
        gt_rpn_loc = at.tovariable(gt_rpn_loc)
        rpn_loc_loss = _fast_rcnn_loc_loss(
            rpn_loc,
            gt_rpn_loc,
            gt_rpn_label.data,
            self.rpn_sigma)

        # NOTE: default value of ignore_index is -100 ...
        rpn_cls_loss = F.cross_entropy(rpn_score, gt_rpn_label.cuda(), ignore_index=-1)
        _gt_rpn_label = gt_rpn_label[gt_rpn_label > -1]
        _rpn_score = at.tonumpy(rpn_score)[at.tonumpy(gt_rpn_label) > -1]
        self.rpn_cm.add(at.totensor(_rpn_score, False), _gt_rpn_label.data.long())

        # ------------------ ROI losses (fast rcnn loss) -------------------#
        n_sample = roi_cls_loc.shape[0]
        roi_cls_loc = roi_cls_loc.view(n_sample, -1, 4)
        roi_loc = roi_cls_loc[t.arange(0, n_sample).long().cuda(), \
                              at.totensor(gt_roi_label).long()]
        gt_roi_label = at.tovariable(gt_roi_label).long()
        gt_roi_loc = at.tovariable(gt_roi_loc)

        roi_loc_loss = _fast_rcnn_loc_loss(
            roi_loc.contiguous(),
            gt_roi_loc,
            gt_roi_label.data,
            self.roi_sigma)

        roi_cls_loss = nn.CrossEntropyLoss()(roi_score, gt_roi_label.cuda())

        self.roi_cm.add(at.totensor(roi_score, False), gt_roi_label.data.long())

        losses = [rpn_loc_loss, rpn_cls_loss, roi_loc_loss, roi_cls_loss]
        losses = losses + [sum(losses)]

        return LossTuple(*losses)

    def train_step(self, imgs, bboxes, labels, scale):
        self.optimizer.zero_grad()
        losses = self.forward(imgs, bboxes, labels, scale)
        losses.total_loss.backward()
        self.optimizer.step()
        self.update_meters(losses)
        return losses

    def save(self, save_optimizer=False, save_path=None, **kwargs):
        """serialize models include optimizer and other info
        return path where the model-file is stored.

        Args:
            save_optimizer (bool): whether save optimizer.state_dict().
            save_path (string): where to save model, if it's None, save_path
                is generate using time str and info from kwargs.
        
        Returns:
            save_path(str): the path to save models.
        """
        save_dict = dict()

        save_dict['model'] = self.faster_rcnn.state_dict()
        save_dict['config'] = opt._state_dict()
        save_dict['other_info'] = kwargs
        save_dict['vis_info'] = self.vis.state_dict()

        if save_optimizer:
            save_dict['optimizer'] = self.optimizer.state_dict()

        if save_path is None:
            timestr = time.strftime('%m%d%H%M')
            save_path = 'checkpoints/fasterrcnn_%s' % timestr
            for k_, v_ in kwargs.items():
                save_path += '_%s' % v_

        t.save(save_dict, save_path)
        self.vis.save([self.vis.env])
        return save_path

    def load(self, path, load_optimizer=True, parse_opt=False, ):
        state_dict = t.load(path)
        if 'model' in state_dict:
            self.faster_rcnn.load_state_dict(state_dict['model'])
        else:  # legacy way, for backward compatibility
            self.faster_rcnn.load_state_dict(state_dict)
            return self
        if parse_opt:
            opt._parse(state_dict['config'])
        if 'optimizer' in state_dict and load_optimizer:
            self.optimizer.load_state_dict(state_dict['optimizer'])
        return self

    def update_meters(self, losses):
        loss_d = {k: at.scalar(v) for k, v in losses._asdict().items()}
        for key, meter in self.meters.items():
            meter.add(loss_d[key])

    def reset_meters(self):
        for key, meter in self.meters.items():
            meter.reset()
        self.roi_cm.reset()
        self.rpn_cm.reset()

    def get_meter_data(self):
        return {k: v.value()[0] for k, v in self.meters.items()}
class FasterRCNNTrainer(nn.Module):
    """wrapper for conveniently training. return losses

    The losses include:

    * :obj:`rpn_loc_loss`: The localization loss for \
        Region Proposal Network (RPN).
    * :obj:`rpn_cls_loss`: The classification loss for RPN.
    * :obj:`roi_loc_loss`: The localization loss for the head module.
    * :obj:`roi_cls_loss`: The classification loss for the head module.
    * :obj:`total_loss`: The sum of 4 loss above.

    Args:
        faster_rcnn (model.FasterRCNN):
            A Faster R-CNN model that is going to be trained.
    """
    def __init__(self, faster_rcnn):
        super(FasterRCNNTrainer, self).__init__()

        self.faster_rcnn = faster_rcnn
        self.rpn_sigma = configurations.rpn_sigma
        self.roi_sigma = configurations.roi_sigma

        # target creator create gt_bbox gt_label etc as training targets.
        self.anchor_target_creator = AnchorTargetCreator()
        self.proposal_target_creator = ProposalTargetCreator()

        self.loc_normalize_mean = faster_rcnn.loc_normalize_mean
        self.loc_normalize_std = faster_rcnn.loc_normalize_std

        lr = configurations.lr
        params = []
        for key, value in dict(faster_rcnn.named_parameters()).items():
            if value.requires_grad:
                if 'bias' in key:
                    params += [{
                        'params': [value],
                        'lr': lr * 2,
                        'weight_decay': 0
                    }]
                else:
                    params += [{
                        'params': [value],
                        'lr': lr,
                        'weight_decay': configurations.weight_decay
                    }]

        self.optimizer = t.optim.SGD(params, momentum=0.9)
        #self.optimizer = self.faster_rcnn.get_optimizer()

        # indicators for training status
        self.rpn_cm = ConfusionMeter(2)
        self.roi_cm = ConfusionMeter(21)
        self.meters = {k: AverageValueMeter()
                       for k in LossTuple._fields}  # average loss

    def forward(self, imgs, bboxes, labels, scale, epoch):

        pass

    def step1(self, imgs, bboxes, labels, scale, epoch):  #train RPN alone
        self.optimizer.zero_grad()
        _, _, H, W = imgs.shape
        img_size = (H, W)

        ############ EXTRACTOR STEP #################
        features1 = self.faster_rcnn.extractor1(imgs)

        ############ RPN STEP #######################
        rpn_locs, rpn_scores, rois, roi_indices, anchor = self.faster_rcnn.rpn(
            features1, img_size, scale)

        bbox = bboxes[0]
        label = labels[0]
        rpn_score = rpn_scores[0]
        rpn_loc = rpn_locs[0]
        roi = rois

        # ------------------ RPN losses -------------------#
        gt_rpn_loc, gt_rpn_label = self.anchor_target_creator(
            at.tonumpy(bbox), anchor, img_size)

        gt_rpn_label = at.totensor(gt_rpn_label).long()
        gt_rpn_loc = at.totensor(gt_rpn_loc)

        rpn_loc_loss = _fast_rcnn_loc_loss(rpn_loc, gt_rpn_loc,
                                           gt_rpn_label.data, self.rpn_sigma)
        rpn_cls_loss = F.cross_entropy(rpn_score,
                                       gt_rpn_label.cuda(),
                                       ignore_index=-1)

        roi_loc_loss = t.tensor([0]).cuda()
        roi_cls_loss = t.tensor([0]).cuda()

        losses = [rpn_loc_loss, rpn_cls_loss, roi_loc_loss, roi_cls_loss]
        losses = losses + [sum(losses)] + [rpn_loc_loss + rpn_cls_loss
                                           ] + [roi_loc_loss + roi_cls_loss]
        all_losses = LossTuple(*losses)
        all_losses.total_rpn.backward()
        self.optimizer.step()
        self.update_meters(all_losses)
        return all_losses

    def step2(self, imgs, bboxes, labels, scale, epoch):
        self.optimizer.zero_grad()
        _, _, H, W = imgs.shape
        img_size = (H, W)

        ############ EXTRACTOR STEP #################
        features1 = self.faster_rcnn.extractor1(imgs)
        features2 = self.faster_rcnn.extractor2(imgs)

        ############ RPN STEP #######################
        rpn_locs, rpn_scores, rois, roi_indices, anchor = self.faster_rcnn.rpn(
            features1, img_size, scale)

        bbox = bboxes[0]
        label = labels[0]
        rpn_score = rpn_scores[0]
        rpn_loc = rpn_locs[0]
        roi = rois

        ############ HEAD STEP #######################
        sample_roi, gt_roi_loc, gt_roi_label = self.proposal_target_creator(
            roi, at.tonumpy(bbox), at.tonumpy(label), self.loc_normalize_mean,
            self.loc_normalize_std)
        sample_roi_index = t.zeros(len(sample_roi))
        roi_cls_loc, roi_score = self.faster_rcnn.head(features2, sample_roi,
                                                       sample_roi_index)

        # ------------------ ROI losses (fast rcnn loss) -------------------#
        n_sample = roi_cls_loc.shape[0]
        roi_cls_loc = roi_cls_loc.view(n_sample, -1, 4)
        roi_loc = roi_cls_loc[t.arange(0, n_sample).long().cuda(),
                              at.totensor(gt_roi_label).long()]
        gt_roi_label = at.totensor(gt_roi_label).long()
        gt_roi_loc = at.totensor(gt_roi_loc)

        roi_loc_loss = _fast_rcnn_loc_loss(roi_loc.contiguous(), gt_roi_loc,
                                           gt_roi_label.data, self.roi_sigma)
        roi_cls_loss = nn.CrossEntropyLoss()(roi_score, gt_roi_label.cuda())
        rpn_loc_loss = t.tensor([0]).cuda()
        rpn_cls_loss = t.tensor([0]).cuda()

        losses = [rpn_loc_loss, rpn_cls_loss, roi_loc_loss, roi_cls_loss]
        losses = losses + [sum(losses)] + [rpn_loc_loss + rpn_cls_loss
                                           ] + [roi_loc_loss + roi_cls_loss]

        all_losses = LossTuple(*losses)
        all_losses.total_roi.backward()
        self.optimizer.step()
        self.update_meters(all_losses)
        return all_losses

    def step3(self, imgs, bboxes, labels, scale, epoch):
        self.optimizer.zero_grad()
        _, _, H, W = imgs.shape
        img_size = (H, W)

        ############ EXTRACTOR STEP #################
        features2 = self.faster_rcnn.extractor2(imgs)

        ############ RPN STEP #######################
        rpn_locs, rpn_scores, rois, roi_indices, anchor = self.faster_rcnn.rpn(
            features2, img_size, scale)

        bbox = bboxes[0]
        label = labels[0]
        rpn_score = rpn_scores[0]
        rpn_loc = rpn_locs[0]
        roi = rois

        # ------------------ RPN losses -------------------#
        gt_rpn_loc, gt_rpn_label = self.anchor_target_creator(
            at.tonumpy(bbox), anchor, img_size)

        gt_rpn_label = at.totensor(gt_rpn_label).long()
        gt_rpn_loc = at.totensor(gt_rpn_loc)

        rpn_loc_loss = _fast_rcnn_loc_loss(rpn_loc, gt_rpn_loc,
                                           gt_rpn_label.data, self.rpn_sigma)
        rpn_cls_loss = F.cross_entropy(rpn_score,
                                       gt_rpn_label.cuda(),
                                       ignore_index=-1)

        roi_loc_loss = t.tensor([0]).cuda()
        roi_cls_loss = t.tensor([0]).cuda()

        losses = [rpn_loc_loss, rpn_cls_loss, roi_loc_loss, roi_cls_loss]
        losses = losses + [sum(losses)] + [rpn_loc_loss + rpn_cls_loss
                                           ] + [roi_loc_loss + roi_cls_loss]
        all_losses = LossTuple(*losses)
        all_losses.total_rpn.backward()
        self.optimizer.step()
        self.update_meters(all_losses)
        return all_losses

    def step4(self, imgs, bboxes, labels, scale, epoch):
        self.optimizer.zero_grad()
        _, _, H, W = imgs.shape
        img_size = (H, W)

        ############ EXTRACTOR STEP #################
        features2 = self.faster_rcnn.extractor2(imgs)

        ############ RPN STEP #######################
        rpn_locs, rpn_scores, rois, roi_indices, anchor = self.faster_rcnn.rpn(
            features2, img_size, scale)

        bbox = bboxes[0]
        label = labels[0]
        rpn_score = rpn_scores[0]
        rpn_loc = rpn_locs[0]
        roi = rois

        ############ HEAD STEP #######################
        sample_roi, gt_roi_loc, gt_roi_label = self.proposal_target_creator(
            roi, at.tonumpy(bbox), at.tonumpy(label), self.loc_normalize_mean,
            self.loc_normalize_std)
        sample_roi_index = t.zeros(len(sample_roi))
        roi_cls_loc, roi_score = self.faster_rcnn.head(features2, sample_roi,
                                                       sample_roi_index)

        # ------------------ ROI losses (fast rcnn loss) -------------------#
        n_sample = roi_cls_loc.shape[0]
        roi_cls_loc = roi_cls_loc.view(n_sample, -1, 4)
        roi_loc = roi_cls_loc[t.arange(0, n_sample).long().cuda(),
                              at.totensor(gt_roi_label).long()]
        gt_roi_label = at.totensor(gt_roi_label).long()
        gt_roi_loc = at.totensor(gt_roi_loc)

        roi_loc_loss = _fast_rcnn_loc_loss(roi_loc.contiguous(), gt_roi_loc,
                                           gt_roi_label.data, self.roi_sigma)
        roi_cls_loss = nn.CrossEntropyLoss()(roi_score, gt_roi_label.cuda())
        rpn_loc_loss = t.tensor([0]).cuda()
        rpn_cls_loss = t.tensor([0]).cuda()

        losses = [rpn_loc_loss, rpn_cls_loss, roi_loc_loss, roi_cls_loss]
        losses = losses + [sum(losses)] + [rpn_loc_loss + rpn_cls_loss
                                           ] + [roi_loc_loss + roi_cls_loss]

        all_losses = LossTuple(*losses)
        all_losses.total_roi.backward()
        self.optimizer.step()
        self.update_meters(all_losses)
        return all_losses

######################################################################################

    def update_meters(self, losses):
        loss_d = {k: at.scalar(v) for k, v in losses._asdict().items()}
        for key, meter in self.meters.items():
            meter.add(loss_d[key])

    def reset_meters(self):
        for key, meter in self.meters.items():
            meter.reset()
        self.roi_cm.reset()
        self.rpn_cm.reset()

    def get_meter_data(self):
        return {k: v.value()[0] for k, v in self.meters.items()}
class FasterRCNNTrainer(nn.Module):
    def __init__(self, faster_rcnn):
        super(FasterRCNNTrainer, self).__init__()

        self.faster_rcnn = faster_rcnn
        self.rpn_sigma = 3.
        self.roi_sigma = 1.

        self.anchor_target_creator = AnchorTargetCreator()
        self.proposal_target_creator = ProposalTargetCreator()

        self.loc_normalize_mean = faster_rcnn.loc_normalize_mean
        self.loc_normalize_std = faster_rcnn.loc_normalize_std

        self.optimizer = self.faster_rcnn.get_optimizer()

        self.rpn_cm = ConfusionMeter(2)
        self.roi_cm = ConfusionMeter(21)
        self.meters = {k: AverageValueMeter() for k in LossTuple._fields}

    def forward(self, imgs, bboxes, labels, scale):
        n = bboxes.shape[0]
        if n != 1:
            raise ValueError('Currently only batch size 1 is supported.')

        _, _, H, W = imgs.shape
        img_size = (H, W)

        features = self.faster_rcnn.extractor(imgs)
        '''
            rpn_locs: [N, H*W*A, 4]
            rpn_scores: [N, H*W*A, 2]
            rois: [R, 4]
            roi_indices: which images [R, ]
        '''
        rpn_locs, rpn_scores, rois, roi_indices, anchor = self.faster_rcnn.rpn(
            features, img_size, scale)

        # batch size is one
        bbox = bboxes[0]
        label = labels[0]
        rpn_score = rpn_scores[0]
        rpn_loc = rpn_locs[0]
        roi = rois

        sample_roi, gt_roi_loc, gt_roi_label = self.proposal_target_creator(
            roi, to_numpy(bbox), to_numpy(label), self.loc_normalize_mean,
            self.loc_normalize_std)

        sample_roi_index = torch.zeros(len(sample_roi))
        roi_cls_loc, roi_score = self.faster_rcnn.head(features, sample_roi,
                                                       sample_roi_index)

        gt_rpn_loc, gt_rpn_label = self.anchor_target_creator(
            to_numpy(bbox), anchor, img_size)
        gt_rpn_label = to_tensor(gt_rpn_label).long()
        gt_rpn_loc = to_tensor(gt_rpn_loc)
        rpn_loc_loss = _fast_rcnn_loc_loss(rpn_loc, gt_rpn_loc,
                                           gt_rpn_label.data, self.rpn_sigma)

        rpn_cls_loss = F.cross_entropy(rpn_score,
                                       gt_rpn_label.cuda(),
                                       ignore_index=-1)
        _gt_rpn_label = gt_rpn_label[gt_rpn_label > -1]
        _rpn_score = to_numpy(rpn_score)[to_numpy(gt_rpn_label) > -1]
        self.rpn_cm.add(to_tensor(_rpn_score, False),
                        _gt_rpn_label.data.long())

        n_sample = roi_cls_loc.shape[0]
        roi_cls_loc = roi_cls_loc.view(n_sample, -1, 4)
        roi_loc = roi_cls_loc[torch.arange(0, n_sample).long().cuda(),
                              to_tensor(gt_roi_label).long()]
        gt_roi_label = to_tensor(gt_roi_label).long()
        gt_roi_loc = to_tensor(gt_roi_loc)

        roi_loc_loss = _fast_rcnn_loc_loss(roi_loc.contiguous(), gt_roi_loc,
                                           gt_roi_label.data, self.roi_sigma)

        roi_cls_loss = nn.CrossEntropyLoss()(roi_score, gt_roi_label.cuda())
        self.roi_cm.add(to_tensor(roi_score, False), gt_roi_label.data.long())
        losses = [rpn_loc_loss, rpn_cls_loss, roi_loc_loss, roi_cls_loss]
        losses = losses + [sum(losses)]

        return LossTuple(*losses)

    def train_step(self, imgs, bboxes, labels, scale):
        self.optimizer.zero_grad()
        losses = self.forward(imgs, bboxes, labels, scale)
        losses.total_loss.backward()
        self.optimizer.step()
        self.update_meters(losses)

        return losses

    def save(self, save_optimizer=False, save_path=None, **kwags):
        save_dict = dict()

        save_dict['model'] = self.faster_rcnn.state_dict()
        save_dict['other_info'] = kwags

        if save_optimizer:
            save_dict['optimizer'] = self.optimizer.state_dict()

        if save_path is None:
            timestr = time.strftime('%m%d%H%M')
            save_path = 'checkpoints/fasterrcnn_%s' % timestr
            for k_, v_ in kwags.items():
                save += '_%s' % v_

        save_dir = os.path.dirname(save_path)
        if not os.path.exists(save_dir):
            os.makedirs(save_dir)

        torch.save(save_dict, save_path)
        return save_path

    def load(
        self,
        path,
        load_optimizer=True,
        parse_opt=False,
    ):
        state_dict = torch.load(path)
        if 'model' in state_dict:
            self.faster_rcnn.load_state_dict(state_dict['model'])
        else:
            self.faster_rcnn.load_state_dict(state_dict)
            return self
        if 'optimizer' in state_dict and load_optimizer:
            self.optimizer.load_state_dict(state_dict['optimizer'])

        return self

    def update_meters(self, losses):
        loss_d = {k: scalar(v) for k, v in losses._asdict().items()}
        for key, meter in self.meters.items():
            meter.add(loss_d[key])

    def reset_meters(self):
        for key, meter in self.meters.items():
            meter.reset()
        self.roi_cm.reset()
        self.rpn_cm.reset()

    def get_meter_data(self):
        return {k: v.value()[0] for k, v in self.meters.items()}
class FasterRCNNTrainer(nn.Module):
    """wrapper for conveniently training. return losses

    The losses include:

    * :obj:`rpn_loc_loss`: The localization loss for \
        Region Proposal Network (RPN).
    * :obj:`rpn_cls_loss`: The classification loss for RPN.
    * :obj:`roi_loc_loss`: The localization loss for the head module.
    * :obj:`roi_cls_loss`: The classification loss for the head module.
    * :obj:`total_loss`: The sum of 4 loss above.

    Args:
        faster_rcnn (model.FasterRCNN):
            A Faster R-CNN model that is going to be trained.
    """
    def __init__(self, faster_rcnn):
        super(FasterRCNNTrainer, self).__init__()

        self.faster_rcnn = faster_rcnn
        self.rpn_sigma = opt.rpn_sigma
        self.roi_sigma = opt.roi_sigma

        # target creator create gt_bbox gt_label etc as training targets.
        # anchor_target_creator:将20000多个候选的anchor选出256个anchor进行分类和回归位置
        self.anchor_target_creator = AnchorTargetCreator()
        self.proposal_target_creator = ProposalTargetCreator()

        self.loc_normalize_mean = faster_rcnn.loc_normalize_mean
        self.loc_normalize_std = faster_rcnn.loc_normalize_std

        self.optimizer = self.faster_rcnn.get_optimizer()
        # visdom wrapper
        self.vis = Visualizer(env=opt.env)

        # indicators for training status
        self.rpn_cm = ConfusionMeter(2)
        self.roi_cm = ConfusionMeter(21)
        self.meters = {k: AverageValueMeter()
                       for k in LossTuple._fields}  # average loss

    def forward(self, imgs, bboxes, labels, scale):
        """Forward Faster R-CNN and calculate losses.

        Here are notations used.

        * :math:`N` is the batch size.
        * :math:`R` is the number of bounding boxes per image.

        Currently, only :math:`N=1` is supported.

        Args:
            imgs (~torch.autograd.Variable): A variable with a batch of images.
            bboxes (~torch.autograd.Variable): A batch of bounding boxes.
                Its shape is :math:`(N, R, 4)`.
            labels (~torch.autograd..Variable): A batch of labels.
                Its shape is :math:`(N, R)`. The background is excluded from
                the definition, which means that the range of the value
                is :math:`[0, L - 1]`. :math:`L` is the number of foreground
                classes.
            scale (float): Amount of scaling applied to
                the raw image during preprocessing.

        Returns:
            namedtuple of 5 losses
        """
        n = bboxes.shape[0]
        if n != 1:
            raise ValueError('Currently only batch size 1 is supported.')

        _, _, H, W = imgs.shape
        img_size = (H, W)

        # extractor在这里是VGG16的前10层,通过extractor可以提取feature_map
        features = self.faster_rcnn.extractor(imgs)

        # ------------------ RPN Network -------------------#
        # ------------------ RPN 预测 -------------------#
        # 通过RPN网络提取roi
        # rpn_locs:每个anchor的修正量,[1,9*hh*ww,4]
        # rpn_scores:每个anchor的二分类(是否为物体)得分,[1,9*hh*ww,2]
        # rois:通过rpn网络获得的ROI(候选区),训练时约2000个,[2000,4]
        # roi_indeces:不太懂,[0,0..0,0]?,长度和rois的个数一样,后面也根本没有用到
        # -解答-:全0是因为只支持batch size=1,这个index相当于在batch里的索引
        # rpn_locs和rpn_scores是用于训练时计算loss的,rois是给下面rcnn网络用来分类的
        # 注意,这里对每个anchor都进行了位置和分类的预测,也就是对9*hh*ww个anchor都进行了预测
        rpn_locs, rpn_scores, rois, roi_indices, anchor = self.faster_rcnn.rpn(
            features, img_size, scale)

        # Since batch size is one, convert variables to singular form
        # 因为这里只支持BatchSize=1,所以直接提取出来
        bbox = bboxes[0]
        label = labels[0]
        rpn_score = rpn_scores[0]  # [n_anchor,2]
        rpn_loc = rpn_locs[0]  # [n_anchor,4]
        roi = rois

        # ------------------ RPN 标注 -------------------#
        # 因为RPN网络对所有的(9*hh*ww)个anchor都进行了预测,所以这里的gt_rpn_loc, gt_rpn_label应该包含所有anchor的对应值
        # 但是在真实计算中只采样了一定的正负样本共256个用于计算loss
        # 这里的做法:正样本label=1,负样本label=0,不合法和要忽略的样本label=-1,在计算loss时加权区分
        gt_rpn_loc, gt_rpn_label = self.anchor_target_creator(
            at.tonumpy(bbox), anchor, img_size)
        gt_rpn_label = at.tovariable(gt_rpn_label).long()
        gt_rpn_loc = at.tovariable(gt_rpn_loc)

        # ------------------ RPN losses 计算 -------------------#
        # loc loss(位置回归loss)
        # loc的loss只计算正样本的
        rpn_loc_loss = _fast_rcnn_loc_loss(rpn_loc, gt_rpn_loc,
                                           gt_rpn_label.data, self.rpn_sigma)

        # cls loss(分类loss,这里只分两类)
        # label=-1的样本被忽略
        rpn_cls_loss = F.cross_entropy(rpn_score,
                                       gt_rpn_label.cuda(),
                                       ignore_index=-1)
        _gt_rpn_label = gt_rpn_label[gt_rpn_label > -1]
        _rpn_score = at.tonumpy(rpn_score)[at.tonumpy(gt_rpn_label) > -1]
        self.rpn_cm.add(at.totensor(_rpn_score, False),
                        _gt_rpn_label.data.long())

        # ------------------ ROI Nework -------------------#
        # ------------------ ROI 标注 -------------------#
        # Sample RoIs and forward
        # it's fine to break the computation graph of rois,
        # consider them as constant input
        # 在roi中采样一定数量的正负样本,给ROIHead(rcnn)网络用于训练分类
        # gt_roi_loc:位置修正量,这里就是第二次对位置进行回归修正
        # gt_roi_label:N+1类,多了一个背景类(是不是物体)
        sample_roi, gt_roi_loc, gt_roi_label = self.proposal_target_creator(
            roi, at.tonumpy(bbox), at.tonumpy(label), self.loc_normalize_mean,
            self.loc_normalize_std)
        # NOTE it's all zero because now it only support for batch=1 now(这里解释了上面的疑问)
        sample_roi_index = t.zeros(len(sample_roi))

        # ------------------ ROI 预测 -------------------#
        # 这里不需要对所有的ROI进行预测,所以在标注阶段确定了样本之后再进行预测
        # 得到候选区域sample_roi的预测分类roi_score和预测位置修正量roi_cls_loc
        roi_cls_loc, roi_score = self.faster_rcnn.head(features, sample_roi,
                                                       sample_roi_index)

        n_sample = roi_cls_loc.shape[0]
        roi_cls_loc = roi_cls_loc.view(n_sample, -1,
                                       4)  # [n_sample, n_class+1, 4]
        # roi_cls_loc得到的是对每个类的坐标的预测,但是真正的loss计算只需要在ground truth上的类的位置预测
        # roi_loc就是在ground truth上的类的位置预测
        roi_loc = roi_cls_loc[t.arange(0, n_sample).long().cuda(),
                              at.totensor(gt_roi_label).long()]  # [m_sample.4]
        gt_roi_label = at.tovariable(gt_roi_label).long()
        gt_roi_loc = at.tovariable(gt_roi_loc)

        # loc loss(位置回归loss)
        roi_loc_loss = _fast_rcnn_loc_loss(roi_loc.contiguous(), gt_roi_loc,
                                           gt_roi_label.data, self.roi_sigma)

        # cls loss(分类loss,这里分21类)
        roi_cls_loss = nn.CrossEntropyLoss()(roi_score, gt_roi_label.cuda())

        self.roi_cm.add(at.totensor(roi_score, False),
                        gt_roi_label.data.long())

        losses = [rpn_loc_loss, rpn_cls_loss, roi_loc_loss, roi_cls_loss]
        losses = losses + [sum(losses)]

        return LossTuple(*losses)

    def train_step(self, imgs, bboxes, labels, scale):
        self.optimizer.zero_grad()
        losses = self.forward(imgs, bboxes, labels, scale)
        losses.total_loss.backward()
        self.optimizer.step()
        self.update_meters(losses)
        return losses

    def save(self, save_optimizer=False, save_path=None, **kwargs):
        """serialize models include optimizer and other info
        return path where the model-file is stored.

        Args:
            save_optimizer (bool): whether save optimizer.state_dict().
            save_path (string): where to save model, if it's None, save_path
                is generate using time str and info from kwargs.
        
        Returns:
            save_path(str): the path to save models.
        """
        save_dict = dict()

        save_dict['model'] = self.faster_rcnn.state_dict()
        save_dict['config'] = opt._state_dict()
        save_dict['other_info'] = kwargs
        save_dict['vis_info'] = self.vis.state_dict()

        if save_optimizer:
            save_dict['optimizer'] = self.optimizer.state_dict()

        if save_path is None:
            timestr = time.strftime('%m%d%H%M')
            save_path = 'checkpoints/fasterrcnn_%s' % timestr
            for k_, v_ in kwargs.items():
                save_path += '_%s' % v_

        t.save(save_dict, save_path)
        self.vis.save([self.vis.env])
        return save_path

    def load(
        self,
        path,
        load_optimizer=True,
        parse_opt=False,
    ):
        state_dict = t.load(path)
        if 'model' in state_dict:
            self.faster_rcnn.load_state_dict(state_dict['model'])
        else:  # legacy way, for backward compatibility
            self.faster_rcnn.load_state_dict(state_dict)
            return self
        if parse_opt:
            opt._parse(state_dict['config'])
        if 'optimizer' in state_dict and load_optimizer:
            self.optimizer.load_state_dict(state_dict['optimizer'])
        return self

    def update_meters(self, losses):
        loss_d = {k: at.scalar(v) for k, v in losses._asdict().items()}
        for key, meter in self.meters.items():
            meter.add(loss_d[key])

    def reset_meters(self):
        for key, meter in self.meters.items():
            meter.reset()
        self.roi_cm.reset()
        self.rpn_cm.reset()

    def get_meter_data(self):
        return {k: v.value()[0] for k, v in self.meters.items()}